|
from langchain.chains import RetrievalQA |
|
from langchain.chains import RetrievalQAWithSourcesChain |
|
from langchain.document_loaders import TextLoader |
|
from langchain.docstore.document import Document |
|
import openai |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.llms import OpenAI |
|
import cohere |
|
from langchain.embeddings.cohere import CohereEmbeddings |
|
from langchain.llms import Cohere |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores import Chroma |
|
import os |
|
from tqdm import tqdm |
|
import pickle |
|
import gradio as gr |
|
from langchain import LLMChain |
|
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain |
|
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT |
|
from langchain.memory import ConversationSummaryMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains import LLMChain |
|
from langchain.prompts import ( |
|
ChatPromptTemplate, |
|
HumanMessagePromptTemplate, |
|
MessagesPlaceholder, |
|
SystemMessagePromptTemplate, |
|
) |
|
from langchain.schema import AIMessage,HumanMessage |
|
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain |
|
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT |
|
|
|
from langchain.retrievers import ContextualCompressionRetriever |
|
from langchain.retrievers.document_compressors import CohereRerank |
|
|
|
|
|
documents=[] |
|
path='./bios/' |
|
Chroma().delete_collection() |
|
|
|
for file in os.listdir(path): |
|
loader = TextLoader(f'{path}{file}',encoding='unicode_escape') |
|
documents += loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0) |
|
texts = text_splitter.split_documents(documents) |
|
embeddings = CohereEmbeddings(model='embed-english-v3.0') |
|
docsearch = Chroma.from_documents(texts, embeddings) |
|
retriever=docsearch.as_retriever(search_kwargs={'k':1}) |
|
cohereLLM=Cohere(model='command') |
|
|
|
compressor = CohereRerank(user_agent='MyTool/1.0 (Linux; x86_64)') |
|
compression_retriever = ContextualCompressionRetriever( |
|
base_compressor=compressor, base_retriever=retriever |
|
) |
|
|
|
|
|
memory=ConversationSummaryMemory( |
|
llm=cohereLLM, memory_key="chat_history", return_messages=True |
|
) |
|
question_generator = LLMChain(llm=cohereLLM, prompt=CONDENSE_QUESTION_PROMPT) |
|
doc_chain = load_qa_with_sources_chain(cohereLLM, chain_type="refine") |
|
|
|
rag_chain=chain = ConversationalRetrievalChain( |
|
retriever=docsearch.as_retriever(), |
|
question_generator=question_generator, |
|
combine_docs_chain=doc_chain, |
|
return_source_documents=True |
|
) |
|
|
|
|
|
btuTuples=pickle.load(open('./bookTitleUrlTuples.pkl','rb')) |
|
bookTitleUrlDict={x:y for x,y in btuTuples} |
|
chat_history = [] |
|
def predict(message, history): |
|
'''experimenation with memory and conversation retrieval chain has resulted in less |
|
performance, usefulness, and more halucination. Hence, this chat bot provides one |
|
shot answers with zero memory. You can use the code in github notebooks to do this |
|
experimentation. github.com/mehrdad-es/Amazon-But-Better''' |
|
|
|
message="you are a language model that gives book recommendation based on your context"+message+\ |
|
'just give the book title and author' |
|
result=ai_msg = rag_chain({"question": message, "chat_history": chat_history}) |
|
chat_history.extend([HumanMessage(content=message), AIMessage(content=ai_msg['answer'])]) |
|
bookNamePath=result["source_documents"][0].metadata["source"] |
|
return result['answer'] +f'''---\nlink: {bookTitleUrlDict[bookNamePath.split("/")[-1][:-4]]}''' |
|
|
|
gr.ChatInterface(predict, |
|
chatbot=gr.Chatbot(height='auto'), |
|
textbox=gr.Textbox(placeholder="Recommend a book on someone who..."), |
|
title="Amazon But Better", |
|
description="Amazon started out with selling books. However, searching books on \ |
|
Amazon is tedious and inaccurate if you don't know what you are exactly looking for. **Why not \ |
|
make it faster and easier with LLMs:).** This chatbot's context is based on almost all the non-sponsored \ |
|
Kindle ebooks found in the biography section of amazon.ca (1195 items).", |
|
).launch() |