Spaces:
Runtime error
Runtime error
Mehrdad Esmaeili
commited on
Commit
•
eda774c
1
Parent(s):
e2378cc
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chains import RetrievalQA
|
2 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
3 |
+
from langchain.document_loaders import TextLoader
|
4 |
+
from langchain.docstore.document import Document
|
5 |
+
import openai
|
6 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
7 |
+
from langchain.llms import OpenAI
|
8 |
+
import cohere
|
9 |
+
from langchain.embeddings.cohere import CohereEmbeddings
|
10 |
+
from langchain.llms import Cohere
|
11 |
+
from langchain.text_splitter import CharacterTextSplitter
|
12 |
+
from langchain.vectorstores import Chroma
|
13 |
+
import os
|
14 |
+
from tqdm import tqdm
|
15 |
+
|
16 |
+
documents=[]
|
17 |
+
path='./bios/'
|
18 |
+
# path='./augBios/'
|
19 |
+
for file in os.listdir(path):
|
20 |
+
loader = TextLoader(f'{path}{file}',encoding='unicode_escape')
|
21 |
+
# loader.load()[0].metadata['category']='biography'
|
22 |
+
# print(loader.load()[0].metadata)
|
23 |
+
documents += loader.load()
|
24 |
+
# print(documents)
|
25 |
+
print(len(documents))
|
26 |
+
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
27 |
+
texts = text_splitter.split_documents(documents)
|
28 |
+
|
29 |
+
# embeddings = OpenAIEmbeddings()
|
30 |
+
embeddings = CohereEmbeddings()
|
31 |
+
docsearch = Chroma.from_documents(texts, embeddings)
|
32 |
+
|
33 |
+
qa = RetrievalQA.from_chain_type(llm=Cohere(), chain_type="stuff", \
|
34 |
+
retriever=docsearch.as_retriever(search_kwargs={'k':1}),return_source_documents=True)
|
35 |
+
|
36 |
+
def predict(message, history):
|
37 |
+
# history_langchain_format = []
|
38 |
+
# for human, ai in history:
|
39 |
+
# history_langchain_format.append(HumanMessage(content=human))
|
40 |
+
# history_langchain_format.append(AIMessage(content=ai))
|
41 |
+
# history_langchain_format.append(HumanMessage(content=message))
|
42 |
+
# gpt_response = llm(history_langchain_format)
|
43 |
+
# return gpt_response.content
|
44 |
+
message+='? just give me the book title-Author'
|
45 |
+
result = qa({"query": message})
|
46 |
+
# r1=docsearch.similarity_search_with_score(query=q,k=3)
|
47 |
+
# print([(item[-2].metadata,item[-1]) for item in r1],\
|
48 |
+
# '\n\n',result['result'],f'|| {result["source_documents"][0].metadata}','\n*****\n')
|
49 |
+
return result['result'],f'|| source is==> {result["source_documents"][0].metadata}'
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
gr.ChatInterface(predict).launch()
|