Chris Alexiuk
commited on
Commit
•
0775a33
1
Parent(s):
3e7467d
Update app.py
Browse files
app.py
CHANGED
@@ -14,26 +14,26 @@ import arxiv
|
|
14 |
import chainlit as cl
|
15 |
from chainlit import user_session
|
16 |
|
17 |
-
@cl.langchain_factory
|
18 |
-
def init():
|
19 |
arxiv_query = None
|
20 |
|
21 |
# Wait for the user to ask an Arxiv question
|
22 |
while arxiv_query == None:
|
23 |
-
arxiv_query = cl.AskUserMessage(
|
24 |
content="Please enter a topic to begin!", timeout=15
|
25 |
).send()
|
26 |
|
27 |
# Obtain the top 30 results from Arxiv for the query
|
28 |
search = arxiv.Search(
|
29 |
query=arxiv_query["content"],
|
30 |
-
max_results=
|
31 |
sort_by=arxiv.SortCriterion.Relevance,
|
32 |
)
|
33 |
|
|
|
34 |
# download each of the pdfs
|
35 |
pdf_data = []
|
36 |
-
|
37 |
for result in search.results():
|
38 |
loader = PyMuPDFLoader(result.pdf_url)
|
39 |
loaded_pdf = loader.load()
|
@@ -48,12 +48,15 @@ def init():
|
|
48 |
embeddings = OpenAIEmbeddings(
|
49 |
disallowed_special=(),
|
50 |
)
|
|
|
|
|
|
|
51 |
docsearch = Chroma.from_documents(pdf_data, embeddings)
|
52 |
|
53 |
# Create a chain that uses the Chroma vector store
|
54 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
55 |
ChatOpenAI(
|
56 |
-
model_name="gpt-
|
57 |
temperature=0,
|
58 |
),
|
59 |
chain_type="stuff",
|
@@ -62,7 +65,7 @@ def init():
|
|
62 |
)
|
63 |
|
64 |
# Let the user know that the system is ready
|
65 |
-
cl.Message(
|
66 |
content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
|
67 |
).send()
|
68 |
|
@@ -70,7 +73,7 @@ def init():
|
|
70 |
|
71 |
|
72 |
@cl.langchain_postprocess
|
73 |
-
def process_response(res):
|
74 |
answer = res["answer"]
|
75 |
source_elements_dict = {}
|
76 |
source_elements = []
|
@@ -94,7 +97,7 @@ def process_response(res):
|
|
94 |
page_numbers = ", ".join([str(x) for x in source["page_number"]])
|
95 |
text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
|
96 |
source_elements.append(
|
97 |
-
cl.Text(name=title,
|
98 |
)
|
99 |
|
100 |
-
cl.Message(content=answer, elements=source_elements).send()
|
|
|
14 |
import chainlit as cl
|
15 |
from chainlit import user_session
|
16 |
|
17 |
+
@cl.langchain_factory(use_async=True)
|
18 |
+
async def init():
|
19 |
arxiv_query = None
|
20 |
|
21 |
# Wait for the user to ask an Arxiv question
|
22 |
while arxiv_query == None:
|
23 |
+
arxiv_query = await cl.AskUserMessage(
|
24 |
content="Please enter a topic to begin!", timeout=15
|
25 |
).send()
|
26 |
|
27 |
# Obtain the top 30 results from Arxiv for the query
|
28 |
search = arxiv.Search(
|
29 |
query=arxiv_query["content"],
|
30 |
+
max_results=3,
|
31 |
sort_by=arxiv.SortCriterion.Relevance,
|
32 |
)
|
33 |
|
34 |
+
await cl.Message(content="Downloading and chunking articles...").send()
|
35 |
# download each of the pdfs
|
36 |
pdf_data = []
|
|
|
37 |
for result in search.results():
|
38 |
loader = PyMuPDFLoader(result.pdf_url)
|
39 |
loaded_pdf = loader.load()
|
|
|
48 |
embeddings = OpenAIEmbeddings(
|
49 |
disallowed_special=(),
|
50 |
)
|
51 |
+
|
52 |
+
# If operation takes too long, make_async allows to run in a thread
|
53 |
+
# docsearch = await cl.make_async(Chroma.from_documents)(pdf_data, embeddings)
|
54 |
docsearch = Chroma.from_documents(pdf_data, embeddings)
|
55 |
|
56 |
# Create a chain that uses the Chroma vector store
|
57 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
58 |
ChatOpenAI(
|
59 |
+
model_name="gpt-3.5-turbo-16k",
|
60 |
temperature=0,
|
61 |
),
|
62 |
chain_type="stuff",
|
|
|
65 |
)
|
66 |
|
67 |
# Let the user know that the system is ready
|
68 |
+
await cl.Message(
|
69 |
content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
|
70 |
).send()
|
71 |
|
|
|
73 |
|
74 |
|
75 |
@cl.langchain_postprocess
|
76 |
+
async def process_response(res):
|
77 |
answer = res["answer"]
|
78 |
source_elements_dict = {}
|
79 |
source_elements = []
|
|
|
97 |
page_numbers = ", ".join([str(x) for x in source["page_number"]])
|
98 |
text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
|
99 |
source_elements.append(
|
100 |
+
cl.Text(name=title, content=text_for_source, display="inline")
|
101 |
)
|
102 |
|
103 |
+
await cl.Message(content=answer, elements=source_elements).send()
|