evanrsl commited on
Commit
1bca2b2
1 Parent(s): 17de7c2

initialize

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -0
  2. app.py +143 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.chains import (
8
+ ConversationalRetrievalChain,
9
+ )
10
+ from langchain.document_loaders import PyPDFLoader, TextLoader
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.prompts.chat import (
13
+ ChatPromptTemplate,
14
+ SystemMessagePromptTemplate,
15
+ HumanMessagePromptTemplate,
16
+ )
17
+ from langchain.docstore.document import Document
18
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
19
+ from chainlit.types import AskFileResponse
20
+
21
+ import chainlit as cl
22
+
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
24
+
25
+ system_template = """Use the following pieces of context to answer the users question.
26
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
27
+ ALWAYS return a "SOURCES" part in your answer.
28
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
29
+
30
+ And if the user greets with greetings like Hi, hello, How are you, etc reply accordingly as well.
31
+
32
+ Example of your response should be:
33
+
34
+ The answer is foo
35
+ SOURCES: xyz
36
+
37
+
38
+ Begin!
39
+ ----------------
40
+ {summaries}"""
41
+ messages = [
42
+ SystemMessagePromptTemplate.from_template(system_template),
43
+ HumanMessagePromptTemplate.from_template("{question}"),
44
+ ]
45
+ prompt = ChatPromptTemplate.from_messages(messages)
46
+ chain_type_kwargs = {"prompt": prompt}
47
+
48
+ welcome_message = """Welcome to the Chainlit PDF QA demo! To get started:
49
+ 1. Upload a PDF or text file
50
+ 2. Ask a question about the file
51
+ """
52
+
53
+ def process_file(file: AskFileResponse):
54
+ pypdf_loader = PyPDFLoader(file.path)
55
+ texts = pypdf_loader.load_and_split()
56
+ texts = [text.page_content for text in texts]
57
+ return texts
58
+
59
+ @cl.on_chat_start
60
+ async def on_chat_start():
61
+ files = None
62
+
63
+ # Wait for the user to upload a file
64
+ while files == None:
65
+ files = await cl.AskFileMessage(
66
+ content = welcome_message,
67
+ accept=["application/pdf"],
68
+ max_size_mb=20,
69
+ timeout=180,
70
+ ).send()
71
+
72
+ file = files[0]
73
+
74
+ msg = cl.Message(
75
+ content=f"Processing `{file.name}`...", disable_feedback=True
76
+ )
77
+ await msg.send()
78
+
79
+ # load the file
80
+ texts = process_file(file)
81
+
82
+ # print(texts[0])
83
+
84
+ # Create a metadata for each chunk
85
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
86
+
87
+ # Create a Chroma vector store
88
+ embeddings = OpenAIEmbeddings()
89
+ docsearch = await cl.make_async(Chroma.from_texts)(
90
+ texts, embeddings, metadatas=metadatas
91
+ )
92
+
93
+ message_history = ChatMessageHistory()
94
+
95
+ memory = ConversationBufferMemory(
96
+ memory_key="chat_history",
97
+ output_key="answer",
98
+ chat_memory=message_history,
99
+ return_messages=True,
100
+ )
101
+
102
+ # Create a chain that uses the Chroma vector store
103
+ chain = ConversationalRetrievalChain.from_llm(
104
+ ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
105
+ chain_type="stuff",
106
+ retriever=docsearch.as_retriever(),
107
+ memory=memory,
108
+ return_source_documents=True,
109
+ )
110
+
111
+ # Let the user know that the system is ready
112
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
113
+ await msg.update()
114
+
115
+ cl.user_session.set("chain", chain)
116
+
117
+
118
+ @cl.on_message
119
+ async def main(message):
120
+ chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain
121
+ cb = cl.AsyncLangchainCallbackHandler()
122
+
123
+ res = await chain.acall(message.content, callbacks=[cb])
124
+ answer = res["answer"]
125
+ source_documents = res["source_documents"] # type: List[Document]
126
+
127
+ text_elements = [] # type: List[cl.Text]
128
+
129
+ if source_documents:
130
+ for source_idx, source_doc in enumerate(source_documents):
131
+ source_name = f"source_{source_idx}"
132
+ # Create the text element referenced in the message
133
+ text_elements.append(
134
+ cl.Text(content=source_doc.page_content, name=source_name)
135
+ )
136
+ source_names = [text_el.name for text_el in text_elements]
137
+
138
+ if source_names:
139
+ answer += f"\nSources: {', '.join(source_names)}"
140
+ else:
141
+ answer += "\nNo sources found"
142
+
143
+ await cl.Message(content=answer, elements=text_elements).send()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pypdf==3.8.1
2
+ pinecone-client==2.2.1
3
+ tiktoken==0.3.3
4
+ langchain
5
+ chainlit