# Redis LangChain OpenAI eCommerce Chatbot

In [1]:
# Install requirements
!pip install -r requirements.txt

/usr/bin/zsh: /home/green/miniconda3/lib/libtinfo.so.6: no version information available (required by /usr/bin/zsh)

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Download the dataset
!gdown --id 1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj

/usr/bin/zsh: /home/green/miniconda3/lib/libtinfo.so.6: no version information available (required by /usr/bin/zsh)
Downloading...
From (uriginal): https://drive.google.com/uc?id=1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj
From (redirected): https://drive.google.com/uc?id=1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj&confirm=t&uuid=f678b48d-4f3e-44f9-bf60-03ca828cb67c
To: /home/green/code/gatech/ai_atl/inital_work/product_data.csv
100%|████████████████████████████████████████| 225M/225M [00:09<00:00, 24.0MB/s]


## Preprocess dataset

In [None]:
import pandas as pd

MAX_TEXT_LENGTH=512

def auto_truncate(val):
 """Truncate the given text."""
 return val[:MAX_TEXT_LENGTH]

# Load Product data and truncate long text fields
all_prods_df = pd.read_csv("product_data.csv", converters={
 'bullet_point': auto_truncate,
 'item_keywords': auto_truncate,
 'item_name': auto_truncate
})

In [None]:
# Contruct a primary key from item ID and domain name
all_prods_df['primary_key'] = (
 all_prods_df['item_id'] + '-' + all_prods_df['domain_name']
)
# Replace empty strings with None and drop
all_prods_df['item_keywords'].replace('', None, inplace=True)
all_prods_df.dropna(subset=['item_keywords'], inplace=True)

# Reset pandas dataframe index
all_prods_df.reset_index(drop=True, inplace=True)

all_prods_df.head()

In [None]:
# Num products to use (subset)
NUMBER_PRODUCTS = 2500 

# Get the first 1000 products with non-empty item keywords
product_metadata = ( 
 all_prods_df
 .head(NUMBER_PRODUCTS)
 .to_dict(orient='index')
)

In [None]:
# Check one of the products
product_metadata[0]

## Set up Redis as a vector db

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.redis import Redis as RedisVectorStore

# data that will be embedded and converted to vectors
texts = [
 v['item_name'] for k, v in product_metadata.items()
]

# product metadata that we'll store along our vectors
metadatas = list(product_metadata.values())

# we will use OpenAI as our embeddings provider
embedding = OpenAIEmbeddings()

# name of the Redis search index to create
index_name = "products"

# assumes you have a redis stack server running on within your docker compose network
redis_url = "redis://redis:6379"

# create and load redis with documents
vectorstore = RedisVectorStore.from_texts(
 texts=texts,
 metadatas=metadatas,
 embedding=embedding,
 index_name=index_name,
 redis_url=redis_url
)

## Build the ChatBot with ConversationalRetrieverChain

In [None]:
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import (
 ConversationalRetrievalChain,
 LLMChain
)
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.prompts.prompt import PromptTemplate

template = """Given the following chat history and a follow up question, rephrase the follow up input question to be a standalone question.
Or end the conversation if it seems like it's done.

Chat History:\"""
{chat_history}
\"""

Follow Up Input: \"""
{question}
\"""

Standalone question:"""

condense_question_prompt = PromptTemplate.from_template(template)

template = """You are a friendly, conversational retail shopping assistant. Use the following context including product names, descriptions, and keywords to show the shopper whats available, help find what they want, and answer any questions.
It's ok if you don't know the answer.

Context:\"""
{context}
\"""

Question:\"
\"""

Helpful Answer:"""

qa_prompt= PromptTemplate.from_template(template)


# define two LLM models from OpenAI
llm = OpenAI(temperature=0)

streaming_llm = OpenAI(
 streaming=True,
 callback_manager=CallbackManager([
 StreamingStdOutCallbackHandler()]),
 verbose=True,
 temperature=0.2,
 max_tokens=150
)

# use the LLM Chain to create a question creation chain
question_generator = LLMChain(
 llm=llm,
 prompt=condense_question_prompt
)

# use the streaming LLM to create a question answering chain
doc_chain = load_qa_chain(
 llm=streaming_llm,
 chain_type="stuff",
 prompt=qa_prompt
)


chatbot = ConversationalRetrievalChain(
 retriever=vectorstore.as_retriever(),
 combine_docs_chain=doc_chain,
 question_generator=question_generator
)

In [None]:
# create a chat history buffer
chat_history = []

# gather user input for the first question to kick off the bot
question = input("Hi! What are you looking for today?")

# keep the bot running in a loop to simulate a conversation
while True:
 result = chatbot(
 {"question": question, "chat_history": chat_history}
 )
 print("\n")
 chat_history.append((result["question"], result["answer"]))
 question = input()

## Customize your chains for even better performance

In [None]:
import json

from langchain.schema import BaseRetriever
from langchain.vectorstores import VectorStore
from langchain.schema import Document
from pydantic import BaseModel


class RedisProductRetriever(BaseRetriever, BaseModel):
 vectorstore: VectorStore

 class Config:
 
 arbitrary_types_allowed = True

 def combine_metadata(self, doc) -> str:
 metadata = doc.metadata
 return (
 "Item Name: " + metadata["item_name"] + ". " +
 "Item Description: " + metadata["bullet_point"] + ". " +
 "Item Keywords: " + metadata["item_keywords"] + "."
 )

 def get_relevant_documents(self, query):
 docs = []
 for doc in self.vectorstore.similarity_search(query):
 content = self.combine_metadata(doc)
 docs.append(Document(
 page_content=content,
 metadata=doc.metadata
 ))
 return docs

### Setup ChatBot with new retriever

In [None]:
redis_product_retriever = RedisProductRetriever(vectorstore=vectorstore)

chatbot = ConversationalRetrievalChain(
 retriever=redis_product_retriever,
 combine_docs_chain=doc_chain,
 question_generator=question_generator
)

### Retry

In [None]:
# create a chat history buffer
chat_history = []

# gather user input for the first question to kick off the bot
question = input("Hi! What are you looking for today?")

# keep the bot running in a loop to simulate a conversation
while True:
 result = chatbot(
 {"question": question, "chat_history": chat_history}
 )
 print("\n")
 chat_history.append((result["question"], result["answer"]))
 question = input()