shoshana-levitt commited on
Commit
09315ae
1 Parent(s): b806895

first commit

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. Dockerfile +7 -0
  3. app.py +132 -0
  4. chainlit.md +1 -0
  5. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ WORKDIR /app
4
+ COPY --chown=user ./requirements.txt requirements.txt
5
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
6
+ COPY --chown=user . /app
7
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.chains import RetrievalQAWithSourcesChain
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts.chat import (
8
+ ChatPromptTemplate,
9
+ SystemMessagePromptTemplate,
10
+ HumanMessagePromptTemplate,
11
+ )
12
+ import os
13
+ import chainlit as cl
14
+ import tempfile
15
+ from dotenv import load_dotenv
16
+
17
+ load_dotenv()
18
+
19
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
20
+
21
+ system_template = """ Try to find detailed information
22
+
23
+ Begin!
24
+ ----------------
25
+ {summaries}"""
26
+
27
+ messages = [
28
+ SystemMessagePromptTemplate.from_template(system_template),
29
+ HumanMessagePromptTemplate.from_template("{question}"),
30
+ ]
31
+
32
+ prompt = ChatPromptTemplate.from_messages(messages)
33
+
34
+ @cl.on_chat_start
35
+ async def init():
36
+ files = None
37
+
38
+ # Wait for the user to upload a file
39
+ while files is None:
40
+ files = await cl.AskFileMessage(
41
+ content="Please upload a file to start chatting!", accept=["pdf"]
42
+ ).send()
43
+
44
+ file = files[0]
45
+
46
+ msg = cl.Message(content=f"Processing `{file.name}`...")
47
+ await msg.send()
48
+
49
+ with tempfile.NamedTemporaryFile(delete=False) as temp:
50
+ temp.write(file.content)
51
+ temp_path = temp.name
52
+
53
+ # Load the PDF using PyPDFLoader into an array of documents, where each document contains the page content and metadata with page number.
54
+ loader = PyPDFLoader(temp_path)
55
+ pages = loader.load_and_split()
56
+
57
+ # Combine the page content into a single text variable.
58
+ text = ' '.join([page.page_content for page in pages])
59
+
60
+ # Split the text into chunks
61
+ texts = text_splitter.split_text(text)
62
+
63
+ # Create a metadata for each chunk
64
+ metadatas = [{"source": f"{i}-word"} for i in range(len(texts))]
65
+
66
+ # Create a Chroma vector store
67
+ embeddings = OpenAIEmbeddings()
68
+ docsearch = await cl.make_async(Chroma.from_texts)(
69
+ texts, embeddings, metadatas=metadatas
70
+ )
71
+
72
+ # Create a chain that uses the Chroma vector store
73
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
74
+ ChatOpenAI(temperature=0),
75
+ chain_type="stuff",
76
+ retriever=docsearch.as_retriever(),
77
+ )
78
+
79
+ # Save the metadata and texts in the user session
80
+ cl.user_session.set("metadatas", metadatas)
81
+ cl.user_session.set("texts", texts)
82
+
83
+ # Let the user know that the system is ready
84
+ msg.content = f"`{file.name}` processed. You can now ask questions!"
85
+ await msg.update()
86
+
87
+ cl.user_session.set("chain", chain)
88
+
89
+ @cl.on_message
90
+ async def process_response(message):
91
+ chain = cl.user_session.get("chain")
92
+
93
+ if chain is None:
94
+ await cl.Message(content="The system is not initialized. Please upload a PDF file first.").send()
95
+ return
96
+
97
+ # Use the chain to process the user's question
98
+ response = await chain.acall({
99
+ "question": message.content
100
+ })
101
+
102
+ answer = response["answer"]
103
+ sources = response["sources"].strip()
104
+ source_elements = []
105
+
106
+ # Get the metadata and texts from the user session
107
+ metadatas = cl.user_session.get("metadatas")
108
+ all_sources = [m["source"] for m in metadatas]
109
+ texts = cl.user_session.get("texts")
110
+
111
+ if sources:
112
+ found_sources = []
113
+
114
+ # Add the sources to the message
115
+ for source in sources.split(","):
116
+ source_name = source.strip().replace(".", "")
117
+ # Get the index of the source
118
+ try:
119
+ index = all_sources.index(source_name)
120
+ except ValueError:
121
+ continue
122
+ text = texts[index]
123
+ found_sources.append(source_name)
124
+ # Create the text element referenced in the message
125
+ source_elements.append(cl.Text(content=text, name=source_name))
126
+
127
+ if found_sources:
128
+ answer += f"\nSources: {', '.join(found_sources)}"
129
+ else:
130
+ answer += "\nNo sources found"
131
+
132
+ await cl.Message(content=answer, elements=source_elements).send()
chainlit.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # Chatbot
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ fastapi
2
+ uvicorn[standard]