from langchain.chains import RetrievalQA from langchain.chains import RetrievalQAWithSourcesChain from langchain.document_loaders import TextLoader from langchain.docstore.document import Document import openai from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import OpenAI import cohere from langchain.embeddings.cohere import CohereEmbeddings from langchain.llms import Cohere from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma import os from tqdm import tqdm import pickle import gradio as gr from langchain import LLMChain from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT from langchain.memory import ConversationSummaryMemory from langchain.chains import ConversationalRetrievalChain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import LLMChain from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate, ) from langchain.schema import AIMessage,HumanMessage from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT # from langchain.memory import Memory from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers.document_compressors import CohereRerank documents=[] path='./bios/' Chroma().delete_collection() for file in os.listdir(path): loader = TextLoader(f'{path}{file}',encoding='unicode_escape') documents += loader.load() text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) texts = text_splitter.split_documents(documents) embeddings = CohereEmbeddings(model='embed-english-v3.0') docsearch = Chroma.from_documents(texts, embeddings) retriever=docsearch.as_retriever(search_kwargs={'k':1}) cohereLLM=Cohere(model='command') # Initialize the CohereRerank compressor and the ContextualCompressionRetriever compressor = CohereRerank(user_agent='MyTool/1.0 (Linux; x86_64)') compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=retriever ) # delete this to return to production state memory=ConversationSummaryMemory( llm=cohereLLM, memory_key="chat_history", return_messages=True ) question_generator = LLMChain(llm=cohereLLM, prompt=CONDENSE_QUESTION_PROMPT) doc_chain = load_qa_with_sources_chain(cohereLLM, chain_type="refine") rag_chain=chain = ConversationalRetrievalChain( retriever=docsearch.as_retriever(), question_generator=question_generator, combine_docs_chain=doc_chain, return_source_documents=True ) # btuTuples=pickle.load(open('./bookTitleUrlTuples.pkl','rb')) bookTitleUrlDict={x:y for x,y in btuTuples} chat_history = [] def predict(message, history): '''experimenation with memory and conversation retrieval chain has resulted in less performance, usefulness, and more halucination. Hence, this chat bot provides one shot answers with zero memory. You can use the code in github notebooks to do this experimentation. github.com/mehrdad-es/Amazon-But-Better''' message="you are a language model that gives book recommendation based on your context"+message+\ 'just give the book title and author' result=ai_msg = rag_chain({"question": message, "chat_history": chat_history}) chat_history.extend([HumanMessage(content=message), AIMessage(content=ai_msg['answer'])]) bookNamePath=result["source_documents"][0].metadata["source"] return result['answer'] +f'''---\nlink: {bookTitleUrlDict[bookNamePath.split("/")[-1][:-4]]}''' gr.ChatInterface(predict, chatbot=gr.Chatbot(height='auto'), textbox=gr.Textbox(placeholder="Recommend a book on someone who..."), title="Amazon But Better", description="Amazon started out with selling books. However, searching books on \ Amazon is tedious and inaccurate if you don't know what you are exactly looking for. **Why not \ make it faster and easier with LLMs:).** This chatbot's context is based on almost all the non-sponsored \ Kindle ebooks found in the biography section of amazon.ca (1195 items).", ).launch()