Spaces:

RomyMy
/

EcomShoppingBuddy

Sleeping

App Files Files Community

RomyMy commited on Oct 15, 2023

Commit

4eaf3da

0 Parent(s):

first logic

Browse files

Files changed (11) hide show

.env_example +2 -0
.gitignore +2 -0
__pycache__/database.cpython-311.pyc +0 -0
__pycache__/preprocess.cpython-311.pyc +0 -0
__pycache__/utilities.cpython-311.pyc +0 -0
chatbot.py +76 -0
database.py +15 -0
preprocess.py +48 -0
readme.md +3 -0
requirements.txt +5 -0
utilities.py +32 -0

.env_example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ REDIS_KEY = ''
2	+ OPENAI_API_KEY = ''

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ product_data.csv
2	+ .env

__pycache__/database.cpython-311.pyc ADDED Viewed

Binary file (610 Bytes). View file

__pycache__/preprocess.cpython-311.pyc ADDED Viewed

Binary file (1.35 kB). View file

__pycache__/utilities.cpython-311.pyc ADDED Viewed

Binary file (2.19 kB). View file

chatbot.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from langchain.prompts import PromptTemplate
+from langchain.llms import OpenAI
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from redis.commands.search.query import Query
+import time
+import os
+from dotenv import load_dotenv
+import numpy as np
+from database import redis_conn
+load_dotenv()
+llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
+prompt = PromptTemplate(
+    input_variables=["product_description"],
+    template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
+)
+chain = LLMChain(llm=llm, prompt=prompt)
+userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
+print("User:", userinput)
+# Run the chain only specifying the input variable.
+keywords = chain.run(userinput)
+embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
+#vectorize the query
+query_vector = embedding_model.embed_query(keywords)
+query_vector = np.array(query_vector).astype(np.float32).tobytes()
+#prepare the query
+ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
+topK=5
+q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
+params_dict = {"vec_param": query_vector}
+#Execute the query
+results = redis_conn.ft().search(q, query_params = params_dict)
+full_result_string = ''
+for product in results.docs:
+    full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
+# code The response
+template = """You are a chatbot. Be kind, detailed and nice. Present the given queried search result in a nice way as answer to the user input. dont ask questions back! just take the given context
+{chat_history}
+Human: {user_msg}
+Chatbot:"""
+prompt = PromptTemplate(
+    input_variables=["chat_history", "user_msg"],
+    template=template
+)
+memory = ConversationBufferMemory(memory_key="chat_history")
+llm_chain = LLMChain(
+    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=os.getenv('OPENAI_API_KEY')),
+    prompt=prompt,
+    verbose=False,
+    memory=memory,
+)
+answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
+print("Bot:", answer)
+time.sleep(0.5)
+while True:
+    follow_up = input("Anything else you want to ask about this topic?")
+    print("User:", follow_up)
+    answer = llm_chain.predict(
+        user_msg=follow_up
+    )
+    print("Bot:", answer)
+    time.sleep(0.5)

database.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import redis
+import os
+from dotenv import load_dotenv
+load_dotenv()
+redis_key = os.getenv('REDIS_KEY')
+redis_conn = redis.Redis(
+  host='redis-10923.c10.us-east-1-4.ec2.cloud.redislabs.com',
+  port=10923,
+  password=redis_key)
+print('connected to redis')

preprocess.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from langchain.embeddings import OpenAIEmbeddings
+import os
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+from database import redis_conn
+from utilities import create_flat_index, load_vectors
+load_dotenv()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+#set maximum length for text fields
+MAX_TEXT_LENGTH = 512
+def auto_truncate(text:str):
+    return text[0:MAX_TEXT_LENGTH]
+data = pd.read_csv('product_data.csv',converters={'bullet_point':auto_truncate,'item_keywords':auto_truncate,'item_name':auto_truncate})
+data['primary_key'] = data['item_id'] + '-' + data['domain_name']
+data.drop(columns=['item_id','domain_name'],inplace=True)
+data['item_keywords'].replace('',np.nan,inplace=True)
+data.dropna(subset=['item_keywords'],inplace=True)
+data.reset_index(drop=True, inplace=True)
+data_metadata = data.head(500).to_dict(orient='index')
+#generating embeddings (vectors) for the item keywords
+# embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
+embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
+#get the item keywords attribute for each product and encode them into vector embeddings
+item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
+item_keywords_vectors = [embedding_model.embed_query(item) for item in item_keywords]
+TEXT_EMBEDDING_DIMENSION=768
+NUMBER_PRODUCTS=500
+print ('Loading and Indexing + ' +  str(NUMBER_PRODUCTS) + ' products')
+#flush all data
+redis_conn.flushall()
+#create flat index & load vectors
+create_flat_index(redis_conn,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE')
+load_vectors(redis_conn,data_metadata,item_keywords_vectors)

readme.md ADDED Viewed

	@@ -0,0 +1,3 @@

+An ***e-commerce chatBot*** which goes through the Amazon dataset products and suggests the most suitable goods according to the user needs.
+By utilizing the power of product embeddings and large language models exploiting Langchain and Redis technologies, this chatbot acts as a real salesperson, can understand the client's request and efficiently search for relevant product recommendations based on the user description and present them in an engaging and informative manner.
+**link to download the Amazon product dataset** : https://drive.google.com/file/d/1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj/view

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+langchain == 0.0.242
+openai == 0.27.8
+redis == 5.0.1
+pandas == 2.0.3
+sentence-transformers == 2.2.2

utilities.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from redis import Redis
+from redis.commands.search.field import VectorField
+from redis.commands.search.field import TextField
+from redis.commands.search.field import TagField
+from redis.commands.search.result import Result
+import numpy as np
+def load_vectors(client:Redis, product_metadata, vector_dict):
+    p = client.pipeline(transaction=False)
+    for index in product_metadata.keys():
+        #hash key
+        key='product:'+ str(index)+ ':' + product_metadata[index]['primary_key']
+        #hash values
+        item_metadata = product_metadata[index]
+        item_keywords_vector = np.array(vector_dict[index], dtype=np.float32).tobytes()
+        item_metadata['item_vector']=item_keywords_vector
+        # HSET
+        p.hset(key,mapping=item_metadata)
+    p.execute()
+def create_flat_index (redis_conn, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
+    redis_conn.ft().create_index([
+        VectorField('item_vector', "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
+        TagField("product_type"),
+        TextField("item_name"),
+        TextField("item_keywords"),
+        TagField("country")
+    ])