RomyMy commited on
Commit
4eaf3da
β€’
0 Parent(s):

first logic

Browse files
.env_example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ REDIS_KEY = ''
2
+ OPENAI_API_KEY = ''
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ product_data.csv
2
+ .env
__pycache__/database.cpython-311.pyc ADDED
Binary file (610 Bytes). View file
 
__pycache__/preprocess.cpython-311.pyc ADDED
Binary file (1.35 kB). View file
 
__pycache__/utilities.cpython-311.pyc ADDED
Binary file (2.19 kB). View file
 
chatbot.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+ from langchain.llms import OpenAI
3
+ from langchain.embeddings import OpenAIEmbeddings
4
+ from langchain.chains import LLMChain
5
+ from langchain.memory import ConversationBufferMemory
6
+ from redis.commands.search.query import Query
7
+ import time
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import numpy as np
11
+ from database import redis_conn
12
+
13
+ load_dotenv()
14
+
15
+ llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
16
+ prompt = PromptTemplate(
17
+ input_variables=["product_description"],
18
+ template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
19
+ )
20
+
21
+ chain = LLMChain(llm=llm, prompt=prompt)
22
+
23
+ userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
24
+ print("User:", userinput)
25
+ # Run the chain only specifying the input variable.
26
+ keywords = chain.run(userinput)
27
+
28
+ embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
29
+ #vectorize the query
30
+ query_vector = embedding_model.embed_query(keywords)
31
+ query_vector = np.array(query_vector).astype(np.float32).tobytes()
32
+
33
+
34
+ #prepare the query
35
+ ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
36
+ topK=5
37
+ q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
38
+ params_dict = {"vec_param": query_vector}
39
+ #Execute the query
40
+ results = redis_conn.ft().search(q, query_params = params_dict)
41
+
42
+ full_result_string = ''
43
+ for product in results.docs:
44
+ full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
45
+
46
+ # code The response
47
+ template = """You are a chatbot. Be kind, detailed and nice. Present the given queried search result in a nice way as answer to the user input. dont ask questions back! just take the given context
48
+
49
+ {chat_history}
50
+ Human: {user_msg}
51
+ Chatbot:"""
52
+
53
+ prompt = PromptTemplate(
54
+ input_variables=["chat_history", "user_msg"],
55
+ template=template
56
+ )
57
+ memory = ConversationBufferMemory(memory_key="chat_history")
58
+ llm_chain = LLMChain(
59
+ llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=os.getenv('OPENAI_API_KEY')),
60
+ prompt=prompt,
61
+ verbose=False,
62
+ memory=memory,
63
+ )
64
+
65
+ answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
66
+ print("Bot:", answer)
67
+ time.sleep(0.5)
68
+
69
+ while True:
70
+ follow_up = input("Anything else you want to ask about this topic?")
71
+ print("User:", follow_up)
72
+ answer = llm_chain.predict(
73
+ user_msg=follow_up
74
+ )
75
+ print("Bot:", answer)
76
+ time.sleep(0.5)
database.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import redis
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+ redis_key = os.getenv('REDIS_KEY')
7
+
8
+
9
+
10
+ redis_conn = redis.Redis(
11
+ host='redis-10923.c10.us-east-1-4.ec2.cloud.redislabs.com',
12
+ port=10923,
13
+ password=redis_key)
14
+
15
+ print('connected to redis')
preprocess.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import OpenAIEmbeddings
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ from dotenv import load_dotenv
6
+ from database import redis_conn
7
+ from utilities import create_flat_index, load_vectors
8
+
9
+ load_dotenv()
10
+ openai_api_key = os.getenv("OPENAI_API_KEY")
11
+
12
+ #set maximum length for text fields
13
+ MAX_TEXT_LENGTH = 512
14
+
15
+ def auto_truncate(text:str):
16
+ return text[0:MAX_TEXT_LENGTH]
17
+
18
+ data = pd.read_csv('product_data.csv',converters={'bullet_point':auto_truncate,'item_keywords':auto_truncate,'item_name':auto_truncate})
19
+ data['primary_key'] = data['item_id'] + '-' + data['domain_name']
20
+ data.drop(columns=['item_id','domain_name'],inplace=True)
21
+ data['item_keywords'].replace('',np.nan,inplace=True)
22
+ data.dropna(subset=['item_keywords'],inplace=True)
23
+ data.reset_index(drop=True, inplace=True)
24
+ data_metadata = data.head(500).to_dict(orient='index')
25
+
26
+ #generating embeddings (vectors) for the item keywords
27
+ # embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
28
+ embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
29
+
30
+ #get the item keywords attribute for each product and encode them into vector embeddings
31
+ item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
32
+ item_keywords_vectors = [embedding_model.embed_query(item) for item in item_keywords]
33
+
34
+ TEXT_EMBEDDING_DIMENSION=768
35
+ NUMBER_PRODUCTS=500
36
+
37
+ print ('Loading and Indexing + ' + str(NUMBER_PRODUCTS) + ' products')
38
+ #flush all data
39
+ redis_conn.flushall()
40
+ #create flat index & load vectors
41
+ create_flat_index(redis_conn,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE')
42
+ load_vectors(redis_conn,data_metadata,item_keywords_vectors)
43
+
44
+
45
+
46
+
47
+
48
+
readme.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ An ***e-commerce chatBot*** which goes through the Amazon dataset products and suggests the most suitable goods according to the user needs.
2
+ By utilizing the power of product embeddings and large language models exploiting Langchain and Redis technologies, this chatbot acts as a real salesperson, can understand the client's request and efficiently search for relevant product recommendations based on the user description and present them in an engaging and informative manner.
3
+ **link to download the Amazon product dataset** : https://drive.google.com/file/d/1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj/view
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain == 0.0.242
2
+ openai == 0.27.8
3
+ redis == 5.0.1
4
+ pandas == 2.0.3
5
+ sentence-transformers == 2.2.2
utilities.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from redis import Redis
2
+ from redis.commands.search.field import VectorField
3
+ from redis.commands.search.field import TextField
4
+ from redis.commands.search.field import TagField
5
+ from redis.commands.search.result import Result
6
+ import numpy as np
7
+
8
+ def load_vectors(client:Redis, product_metadata, vector_dict):
9
+ p = client.pipeline(transaction=False)
10
+ for index in product_metadata.keys():
11
+ #hash key
12
+ key='product:'+ str(index)+ ':' + product_metadata[index]['primary_key']
13
+
14
+ #hash values
15
+ item_metadata = product_metadata[index]
16
+ item_keywords_vector = np.array(vector_dict[index], dtype=np.float32).tobytes()
17
+ item_metadata['item_vector']=item_keywords_vector
18
+
19
+ # HSET
20
+ p.hset(key,mapping=item_metadata)
21
+
22
+ p.execute()
23
+
24
+ def create_flat_index (redis_conn, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
25
+ redis_conn.ft().create_index([
26
+ VectorField('item_vector', "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
27
+ TagField("product_type"),
28
+ TextField("item_name"),
29
+ TextField("item_keywords"),
30
+ TagField("country")
31
+ ])
32
+