Spaces:

murdadesmaeeli
/

Amazon-But-Better

Paused

Mehrdad Esmaeili

Update app.py

0325e85 8 months ago

4.34 kB

	from langchain.chains import RetrievalQA
	from langchain.chains import RetrievalQAWithSourcesChain
	from langchain.document_loaders import TextLoader
	from langchain.docstore.document import Document
	import openai
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.llms import OpenAI
	import cohere
	from langchain.embeddings.cohere import CohereEmbeddings
	from langchain.llms import Cohere
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.vectorstores import Chroma
	import os
	from tqdm import tqdm
	import pickle
	import gradio as gr
	from langchain import LLMChain
	from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
	from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
	from langchain.memory import ConversationSummaryMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import LLMChain
	from langchain.prompts import (
	ChatPromptTemplate,
	HumanMessagePromptTemplate,
	MessagesPlaceholder,
	SystemMessagePromptTemplate,
	)
	from langchain.schema import AIMessage,HumanMessage
	from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
	from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
	# from langchain.memory import Memory
	from langchain.retrievers import ContextualCompressionRetriever
	from langchain.retrievers.document_compressors import CohereRerank


	documents=[]
	path='./bios/'
	Chroma().delete_collection()

	for file in os.listdir(path):
	loader = TextLoader(f'{path}{file}',encoding='unicode_escape')
	documents += loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
	texts = text_splitter.split_documents(documents)
	embeddings = CohereEmbeddings(model='embed-english-v3.0')
	docsearch = Chroma.from_documents(texts, embeddings)
	retriever=docsearch.as_retriever(search_kwargs={'k':1})
	cohereLLM=Cohere(model='command')
	# Initialize the CohereRerank compressor and the ContextualCompressionRetriever
	compressor = CohereRerank(user_agent='MyTool/1.0 (Linux; x86_64)')
	compression_retriever = ContextualCompressionRetriever(
	base_compressor=compressor, base_retriever=retriever
	)

	# delete this to return to production state
	memory=ConversationSummaryMemory(
	llm=cohereLLM, memory_key="chat_history", return_messages=True
	)
	question_generator = LLMChain(llm=cohereLLM, prompt=CONDENSE_QUESTION_PROMPT)
	doc_chain = load_qa_with_sources_chain(cohereLLM, chain_type="refine")

	rag_chain=chain = ConversationalRetrievalChain(
	retriever=docsearch.as_retriever(),
	question_generator=question_generator,
	combine_docs_chain=doc_chain,
	return_source_documents=True
	)
	#

	btuTuples=pickle.load(open('./bookTitleUrlTuples.pkl','rb'))
	bookTitleUrlDict={x:y for x,y in btuTuples}
	chat_history = []
	def predict(message, history):
	'''experimenation with memory and conversation retrieval chain has resulted in less
	performance, usefulness, and more halucination. Hence, this chat bot provides one
	shot answers with zero memory. You can use the code in github notebooks to do this
	experimentation. github.com/mehrdad-es/Amazon-But-Better'''

	message="you are a language model that gives book recommendation based on your context"+message+\
	'just give the book title and author'
	result=ai_msg = rag_chain({"question": message, "chat_history": chat_history})
	chat_history.extend([HumanMessage(content=message), AIMessage(content=ai_msg['answer'])])
	bookNamePath=result["source_documents"][0].metadata["source"]
	return result['answer'] +f'''---\nlink: {bookTitleUrlDict[bookNamePath.split("/")[-1][:-4]]}'''

	gr.ChatInterface(predict,
	chatbot=gr.Chatbot(height='auto'),
	textbox=gr.Textbox(placeholder="Recommend a book on someone who..."),
	title="Amazon But Better",
	description="Amazon started out with selling books. However, searching books on \
	Amazon is tedious and inaccurate if you don't know what you are exactly looking for. **Why not \
	make it faster and easier with LLMs:).** This chatbot's context is based on almost all the non-sponsored \
	Kindle ebooks found in the biography section of amazon.ca (1195 items).",
	).launch()