You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
cat << EOF > src/vector_db_example/weaviate/.env
OPENAI_APIKEY=$OPENAI_API_KEY
# Sets the default number of objects to be returned in a query.QUERY_DEFAULTS_LIMIT=25
# Allow users to interact with weaviate without authAUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true
# Where should Weaviate Standalone store its data?PERSISTENCE_DATA_PATH=/var/lib/weaviate
# Default vectorizer module to useDEFAULT_VECTORIZER_MODULE=text2vec-openai
# Which modules to enable in the setup?ENABLE_MODULES=text2vec-openai
# Hostname of the weaviate instanceCLUSTER_HOSTNAME=node1
EOF
@app.route("/ask", methods=["GET"])defask():
# Get the question from the userquestion=request.args.get("q")
# Build the user promptuser_prompt= {
"role": "user",
"content": question
}
# Get the nearest context from Weaviatecontext=weaviate_nearest_interactions(
question,
weaviate_certainty,
weaviate_limit,
)
# Get the latest interactions from Weaviatelatest_interactions=weaviate_latest_interactions(
interactions_limit
)
# Merge the two contexts# and get unique valuesglobal_context=latest_interactions["data"] +context["data"]
global_context= [
dict(t) fortin {
tuple(d.items()) fordinglobal_context
}
]
# Build the message to send to OpenAImessages= [system_prompt] +global_context+ [user_prompt]
# Send the message to OpenAIresponse=openai_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=200,
temperature=1.2,
)
content=response.choices[0].message.content.strip()
# Save the user prompt and the answer in Weaviateassistant_prompt= {
"role": "assistant",
"content": content
}
data= [
user_prompt,
assistant_prompt
]
weaviate_save_data(
data
)
# Return the response to the userreturn {
"response": assistant_prompt["content"],
"global_context": global_context,
}
defweaviate_nearest_interactions(query, certainty, limit):
try:
# Get the nearest context from Weaviateresult=weaviate_client.query.get(
class_name=weaviate_class_name,
properties=[
"role",
"content"
]
).with_near_text({
"concepts": [query],
"certainty": certainty
}).with_limit(
limit
).do()
return {
"data": result['data']['Get'][weaviate_class_name]
}
exceptExceptionase:
app.logger.error(f"Error while searching: {e}")
defweaviate_latest_interactions(limit):
try:
# Get the latest interactions from Weaviateresult=weaviate_client.query.get(
class_name=weaviate_class_name,
properties=[
"role",
"content"
]
).with_limit(
limit
).do()
return {
"data": result['data']['Get'][weaviate_class_name]
}
exceptExceptionase:
app.logger.error(f"Error while searching: {e}")
schema= {
"classes": [
{
"class": weaviate_class_name,
"description": "A class to store chat messages",
"properties": [
{
"name": "content",
"description": "The content of the chat message",
"dataType": ["text"],
},
{
"name": "role",
"description": "The role of the message",
"dataType": ["string"],
},
],
}
]
}
defweaviate_create_schema():
try:
# Create the schema in Weaviateweaviate_client.schema.create(schema)
app.logger.info("Schema successfully created.")
exceptExceptionase:
app.logger.error(f"Failed to create schema: {e}")
defweaviate_delete_data():
try:
weaviate_client.schema.delete_class(
class_name=weaviate_class_name
)
app.logger.info("Data successfully reset.")
exceptExceptionase:
app.logger.error(f"Error while deleting class: {e}")
return {"error in weaviate_reset": str(e)}, 500
cd src/vector_db_example
docker-compose up --build
# I have a cat
curl http://127.0.0.1:5000/ask?q=I%20have%20a%20cat
# I have a dog
curl http://127.0.0.1:5000/ask?q=I%20have%20a%20dog
# How many pets do I have?
curl http://127.0.0.1:5000/ask?q=How%20many%20pets%20do%20I%20have%3F
{
"global_context": [
{
"content": "I have a dog",
"role": "user"
},
{
"content":
"That's wonderful! Cats make great companions. ""Is there anything specific you need help with ""regarding your cat?",
"role": "assistant"
},
{
"content":
"That's great! Dogs are wonderful pets too. ""Is there anything specific you need help ""with regarding your dog?",
"role": "assistant"
},
{
"content": "I have a cat",
"role": "user"
}
],
"response":
"Based on your previous message, you mentioned ""having a dog. If you have a cat in addition to ""a dog, then you would have two pets."
}
Example 2: Using Weaviate and OpenAI in Semantic Search
article_class= {
"class": weaviate_class_name,
"description":
"An article from the Simple English Wikipedia data set",
"vectorizer": "text2vec-openai",
"moduleConfig": {
# Match how OpenAI created the embeddings # for the `content` (`text`) field"text2vec-openai": {
"model": "ada",
"modelVersion": "002",
"type": "text",
"vectorizeClassName": False
}
},
"properties": [
{
"name": "title",
"description": "The title of the article",
"dataType": ["text"],
# Don't vectorize the title"moduleConfig": {"text2vec-openai": {"skip": True}}
},
{
"name": "content",
"description": "The content of the article",
"dataType": ["text"],
}
]
}
defweaviate_import_data():
# Counter to show progress on the consolecounter=0interval=100csv_iterator=pd.read_csv(
'data/data.csv',
usecols=[
'id',
'url',
'title',
'text',
'content_vector'
],
# number of rows per chunkchunksize=100,
# limit the number of rows to importnrows=100
)
# Configure batchweaviate_client.batch.configure(
batch_size=100
)
withweaviate_client.batchasbatch:
forchunkincsv_iterator:
for_, rowinchunk.iterrows():
properties= {
"title": row.title,
"content": row.text,
"url": row.url
}
# Convert the vector from CSV # string back to array of floatsvector=ast.literal_eval(
row.content_vector
)
# Add the object to the batch, # and set its vector embeddingbatch.add_data_object(
properties,
class_name=weaviate_class_name,
vector=vector
)
# Calculate and display progresscounter+=1ifcounter%interval==0:
app.logger.debug(f"Imported {counter} articles...")
app.logger.debug(f"Finished importing {counter} articles.")
prompt="""Extract the list of topics discussed in these articles:{content}"""
cat<<'EOT'>src/vector_db_generative_search/app/app.pyimportweaviate, os, astfromflaskimportFlask, requestfromopenaiimportOpenAIimportpandasaspdapp=Flask(__name__)
openai_api_key=os.getenv("OPENAI_API_KEY")
model="gpt-3.5-turbo"weaviate_class_name="Article"openai_client=OpenAI(
api_key=openai_api_key
)
weaviate_client=weaviate.Client(
url="http://weaviate:8080",
auth_client_secret={
"X-OpenAI-Api-Key": openai_api_key
}
)
article_class= {
"class": weaviate_class_name,
"description":
"An article from the Simple English Wikipedia data set",
"vectorizer": "text2vec-openai",
"moduleConfig": {
# Match how OpenAI created the embeddings # for the `content` (`text`) field"text2vec-openai": {
"model": "ada",
"modelVersion": "002",
"type": "text",
"vectorizeClassName": False
}
},
"properties": [
{
"name": "title",
"description": "The title of the article",
"dataType": ["text"],
# Don't vectorize the title"moduleConfig": {"text2vec-openai": {"skip": True}}
},
{
"name": "content",
"description": "The content of the article",
"dataType": ["text"],
}
]
}
defweaviate_import_data():
counter=0interval=100csv_iterator=pd.read_csv(
'data/data.csv',
usecols=[
'id',
'url',
'title',
'text',
'content_vector'
],
# number of rows per chunkchunksize=100,
# limit the number of rows to importnrows=100
)
# Configure batchweaviate_client.batch.configure(
batch_size=100
)
withweaviate_client.batchasbatch:
forchunkincsv_iterator:
for_, rowinchunk.iterrows():
properties= {
"title": row.title,
"content": row.text,
"url": row.url
}
# Convert the vector from CSV # string back to array of floatsvector=ast.literal_eval(
row.content_vector
)
# Add the object to the batch, # and set its vector embeddingbatch.add_data_object(
properties,
class_name=weaviate_class_name,
vector=vector
)
# Calculate and display progresscounter+=1ifcounter%interval==0:
app.logger.debug(f"Imported {counter} articles...")
app.logger.debug(f"Finished importing {counter} articles.")
defweaviate_semantic_search(query, prompt):
nearText= {
"concepts": [query],
}
properties= [
"title",
"content",
"_additional {distance}"
]
limit=3response=weaviate_client.query.get(
class_name=weaviate_class_name,
properties=properties,
).with_generate(
grouped_task=prompt
).with_near_text(
nearText
).with_limit(
limit
).do()
result=response['data']['Get'][weaviate_class_name]
returnresultweaviate_client.schema.delete_all()
weaviate_import_data()
@app.route("/ask", methods=["GET"])defask():
question=request.args.get("q")
prompt="""Extract the list of topics discussed in these articles: {content} """context=weaviate_semantic_search(
question,
prompt
)
return {
"response": context
}
EOT
{
"response": [
{
"_additional": {
"distance": 0.17569739,
"generate": {
"error": null,
"groupedResult": "The list of topics discussed in these articles are:\n\n1. Animals (zoology, palaeontology, cellular respiration, metabolism, cell membranes)\n2. Plants (multicellular eukaryotic organisms)\n3. Grouping animals..."
}
},
"content": "Animals (or Metazoa) are living creatures with many cells...",
"title": "Animal"
},
{
"_additional": {
"distance": 0.21414834,
"generate": null
},
"content": "A browser is a name given to any animal, usually a herbivorous mammal ..",
"title": "Browser"
},
{
"_additional": {
"distance": 0.22182149,
"generate": null
},
"content": "Being is also a present tense part of to be..",
"title": "Being"
}
]
}
prompt="""You are a helpful assistant. You are having a discussion with a user. This is the context of the discussion:{content}"""