from langchain.chains import RetrievalQA from langchain.chains import RetrievalQAWithSourcesChain from langchain.document_loaders import TextLoader from langchain.docstore.document import Document import openai from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import OpenAI import cohere from langchain.embeddings.cohere import CohereEmbeddings from langchain.llms import Cohere from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma import os from tqdm import tqdm import pickle import gradio as gr from langchain import LLMChain from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT from langchain.memory import ConversationSummaryMemory from langchain.chains import ConversationalRetrievalChain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import LLMChain from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, ) from langchain.schema import AIMessage,HumanMessage from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT # from langchain.memory import Memory from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers.document_compressors import CohereRerank documents=[] path='./bios/' Chroma().delete_collection() for file in os.listdir(path): loader = TextLoader(f'{path}{file}',encoding='unicode_escape') documents += loader.load() text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) texts = text_splitter.split_documents(documents) # embeddings = CohereEmbeddings(model='embed-english-v3.0') embeddings = OpenAIEmbeddings() docsearch = Chroma.from_documents(texts, embeddings) retriever=docsearch.as_retriever() # cohereLLM=Cohere(model='command') cohereLLM=OpenAI() # Initialize the CohereRerank compressor and the ContextualCompressionRetriever compressor = CohereRerank(user_agent='MyTool/1.0 (Linux; x86_64)') compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=retriever ) # delete this to return to production state memory=ConversationSummaryMemory( llm=cohereLLM, memory_key="chat_history", return_messages=True ) question_generator = LLMChain(llm=cohereLLM, prompt=CONDENSE_QUESTION_PROMPT) doc_chain = load_qa_with_sources_chain(cohereLLM, chain_type="refine") rag_chain=chain = ConversationalRetrievalChain( retriever=retriever, question_generator=question_generator, combine_docs_chain=doc_chain, return_source_documents=True ) # btuTuples=pickle.load(open('./bookTitleUrlTuples.pkl','rb')) bookTitleUrlDict={x:y for x,y in btuTuples} chat_history = [] def predict(message, history): message="you are a language model that gives book recommendation based on your context. "+message result=ai_msg = rag_chain({"question": message, "chat_history": chat_history}) bookNamePath=result["source_documents"][0].metadata["source"] bookName=bookNamePath.split("/")[-1][:-4].replace('_','') # if bookName not in ai_msg['answer']: # ai_msg['answer']=cohereLLM(f'what is the answer to my question of {message}? either {bookName} + the reason is .... or you don"t know') chat_history.extend([HumanMessage(content=message), AIMessage(content=ai_msg['answer'])]) # if bookNamePath.split("/")[-1][:-4] not in ai_msg['answer']: # message=f"""{bookNamePath.split('/')[-1][:-4]} and the book referred to in your answer are different, please think again and recommend \ # a book that is in your context"""+message # result=ai_msg = rag_chain({"question": message, "chat_history": chat_history}) # chat_history.extend([HumanMessage(content=message), AIMessage(content=ai_msg['answer'])]) return result['answer'] #+f'''\n---\nlinkToAmazon: {bookTitleUrlDict[bookNamePath.split("/")[-1][:-4]]}''' gr.ChatInterface(predict, chatbot=gr.Chatbot(height='auto'), textbox=gr.Textbox(placeholder="Recommend a book on someone who..."), title="Amazon But Better", description="Amazon started out with selling books. However, searching books on \ Amazon is tedious and inaccurate if you don't know what you are exactly looking for. **Why not \ make it faster and easier with LLMs:).** This chatbot's context is based on almost all the non-sponsored \ Kindle ebooks found in the biography section of amazon.ca (1195 items).", ).launch()