Mehrdad Esmaeili commited on
Commit
eda774c
1 Parent(s): e2378cc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import RetrievalQA
2
+ from langchain.chains import RetrievalQAWithSourcesChain
3
+ from langchain.document_loaders import TextLoader
4
+ from langchain.docstore.document import Document
5
+ import openai
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.llms import OpenAI
8
+ import cohere
9
+ from langchain.embeddings.cohere import CohereEmbeddings
10
+ from langchain.llms import Cohere
11
+ from langchain.text_splitter import CharacterTextSplitter
12
+ from langchain.vectorstores import Chroma
13
+ import os
14
+ from tqdm import tqdm
15
+
16
+ documents=[]
17
+ path='./bios/'
18
+ # path='./augBios/'
19
+ for file in os.listdir(path):
20
+ loader = TextLoader(f'{path}{file}',encoding='unicode_escape')
21
+ # loader.load()[0].metadata['category']='biography'
22
+ # print(loader.load()[0].metadata)
23
+ documents += loader.load()
24
+ # print(documents)
25
+ print(len(documents))
26
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
27
+ texts = text_splitter.split_documents(documents)
28
+
29
+ # embeddings = OpenAIEmbeddings()
30
+ embeddings = CohereEmbeddings()
31
+ docsearch = Chroma.from_documents(texts, embeddings)
32
+
33
+ qa = RetrievalQA.from_chain_type(llm=Cohere(), chain_type="stuff", \
34
+ retriever=docsearch.as_retriever(search_kwargs={'k':1}),return_source_documents=True)
35
+
36
+ def predict(message, history):
37
+ # history_langchain_format = []
38
+ # for human, ai in history:
39
+ # history_langchain_format.append(HumanMessage(content=human))
40
+ # history_langchain_format.append(AIMessage(content=ai))
41
+ # history_langchain_format.append(HumanMessage(content=message))
42
+ # gpt_response = llm(history_langchain_format)
43
+ # return gpt_response.content
44
+ message+='? just give me the book title-Author'
45
+ result = qa({"query": message})
46
+ # r1=docsearch.similarity_search_with_score(query=q,k=3)
47
+ # print([(item[-2].metadata,item[-1]) for item in r1],\
48
+ # '\n\n',result['result'],f'|| {result["source_documents"][0].metadata}','\n*****\n')
49
+ return result['result'],f'|| source is==> {result["source_documents"][0].metadata}'
50
+
51
+
52
+
53
+ gr.ChatInterface(predict).launch()