File size: 4,481 Bytes
eda774c
 
 
 
 
 
 
 
 
 
 
 
 
5abdd75
eda774c
d0397f4
1f61739
 
 
 
 
 
 
 
 
 
1b2762e
eda774c
 
1b2762e
eda774c
 
 
 
 
 
 
164769e
3a1e5b8
 
 
 
 
 
acf1331
3a1e5b8
 
5abdd75
 
3a1e5b8
8e1c3e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a1e5b8
1f61739
 
8e1c3e9
 
 
 
 
5abdd75
05bcb80
 
5abdd75
05bcb80
8e1c3e9
 
164769e
bf3cf08
 
 
 
 
c386109
c928c71
 
bf3cf08
5464ccc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
import cohere
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.llms import Cohere
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import os
import pickle
from tqdm import tqdm
import gradio as gr
# from langchain.memory import ConversationSummaryMemory
# from langchain.chains import ConversationalRetrievalChain
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.chains import LLMChain
# from langchain.prompts import (
#     ChatPromptTemplate,
#     HumanMessagePromptTemplate,
#     MessagesPlaceholder,
#     SystemMessagePromptTemplate,
# )
from langchain.schema import AIMessage,HumanMessage
documents=[]
path='./bios/'

for file in os.listdir(path):
    loader = TextLoader(f'{path}{file}',encoding='unicode_escape')
    # loader.load()[0].metadata['category']='biography'
    # print(loader.load()[0].metadata)
    documents += loader.load()
# print(documents)
print(len(documents))
'''This is the code used for without memory chat'''
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = CohereEmbeddings(model='embed-english-v3.0')
docsearch = Chroma.from_documents(texts, embeddings)

qa = RetrievalQA.from_chain_type(llm=Cohere(model='command'), chain_type="stuff", \
    retriever=docsearch.as_retriever(search_kwargs={'k':1}),return_source_documents=True)

btuTuples=pickle.load(open('./bookTitleUrlTuples.pkl','rb'))
bookTitleUrlDict={x:y for x,y in btuTuples}
def predict(message, history):
    '''experimenation with memory and conversation retrieval chain has resulted in less
    performance, usefulness, and more halucination. Hence, this chat bot provides one 
    shot answers with zero memory. You can use the code in github notebooks to do this
    experimentation. github.com/mehrdad-es/Amazon-But-Better'''
    # history_langchain_format = []
    # for human, ai in history:
    #     history_langchain_format.append(HumanMessage(content=human))
    #     history_langchain_format.append(AIMessage(content=ai))
    # history_langchain_format.append(HumanMessage(content=message))
    # gpt_response = qa({'query':history_langchain_format})
    # return gpt_response['result']
    # gpt_response = qa({'query':''.join(history)+f'.\n given the previous conversation respond using the following prompt:{message}'})
    # # print(gpt_response)
    # history.append((f'HumanMessage:{message}',f'AIMessage: {gpt_response},'))
    # # history=history_langchain_format
    # return gpt_response['result']
    
    message="you are a language model that gives book recommendation based on your context"+message+\
    'just give the book title and author'
    result = qa({"query": message})
    # r1=docsearch.similarity_search_with_score(query=q,k=3)
    # print([(item[-2].metadata,item[-1]) for item in r1],\
    #       '\n\n',result['result'],f'|| {result["source_documents"][0].metadata}','\n*****\n')
    if result['result'] not in ["I don't know","I don't know."]:
        bookNamePath=result["source_documents"][0].metadata["source"]
        return result['result']+f'\n---\n***Ignore the description below if the chatbot was unsure about its response \
        or if the response is not about the book shown below***\nAmazon Kindle ebook description is:\n {result["source_documents"][0].page_content}'+\
        f'\n**from this file:** {bookNamePath}\n'+\
        f'**link==>** {bookTitleUrlDict[bookNamePath.split("/")[-1][:-4]]}'
    else:
        return result['result']

gr.ChatInterface(predict,
    chatbot=gr.Chatbot(height='auto'),
    textbox=gr.Textbox(placeholder="Recommend a book on someone who..."),
    title="Amazon But Better",
    description="Amazon started out with selling books. However, searching books on \
    Amazon is tedious and inaccurate if you don't know what you are exactly looking for. **Why not \
    make it faster and easier with LLMs:).** This chatbot's context is based on all the non-sponsored \
    Kindle ebooks found in the biography section of amazon.ca (1195 items).").launch()
# gr.ChatInterface(predict).launch()