Spaces:
Running
Running
Commit
·
053d913
1
Parent(s):
e203fb5
added search engine to app, fixed please wait warning
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import streamlit as st #Web App
|
|
2 |
import os
|
3 |
from PIL import Image
|
4 |
from utils import *
|
|
|
5 |
|
6 |
import pickle
|
7 |
docs = None
|
@@ -14,7 +15,7 @@ st.image(image, width=1000)
|
|
14 |
|
15 |
#title
|
16 |
st.title("Answering questions from scientific papers")
|
17 |
-
st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key
|
18 |
st.markdown("##### Current version searches on [ArXiv](https://arxiv.org) papers only. 🚧Under development🚧")
|
19 |
st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
|
20 |
|
@@ -23,7 +24,8 @@ api_key_url = 'https://help.openai.com/en/articles/4936850-where-do-i-find-my-se
|
|
23 |
api_key = st.text_input('OpenAI API Key',
|
24 |
placeholder='sk-...',
|
25 |
help=f"['What is that?']({api_key_url})",
|
26 |
-
type="password"
|
|
|
27 |
|
28 |
os.environ["OPENAI_API_KEY"] = f"{api_key}" #
|
29 |
if len(api_key) != 51:
|
@@ -34,18 +36,23 @@ max_results_current = 5
|
|
34 |
max_results = max_results_current
|
35 |
def search_click_callback(search_query, max_results):
|
36 |
global pdf_info, pdf_citation
|
37 |
-
pdf_info, pdf_citation = call_arXiv_API(f'{search_query}', max_results=max_results)
|
38 |
-
download_pdf(pdf_info)
|
|
|
|
|
|
|
|
|
|
|
39 |
return pdf_info
|
40 |
|
41 |
with st.form(key='columns_in_form', clear_on_submit = False):
|
42 |
c1, c2 = st.columns([8,1])
|
43 |
with c1:
|
44 |
-
search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value=''
|
45 |
)#search_query, max_results_current))
|
46 |
|
47 |
with c2:
|
48 |
-
max_results = st.
|
49 |
max_results_current = max_results_current
|
50 |
searchButton = st.form_submit_button(label = 'Search')
|
51 |
|
@@ -60,34 +67,38 @@ if searchButton:
|
|
60 |
def answer_callback(question_query):
|
61 |
import paperqa
|
62 |
global docs
|
63 |
-
progress_text = "Please wait..."
|
64 |
-
# my_bar = st.progress(0, text = progress_text)
|
65 |
-
st.info('Please wait...', icon="🔥")
|
66 |
if docs is None:
|
67 |
-
# my_bar.progress(0.2, "Please wait...")
|
68 |
pdf_info = st.session_state['pdf_info']
|
69 |
-
# print('buliding docs')
|
70 |
docs = paperqa.Docs()
|
71 |
-
pdf_paths = [f"{p[4]}/{p[0]}.pdf" for p in pdf_info]
|
72 |
pdf_citations = [p[5] for p in pdf_info]
|
73 |
print(list(zip(pdf_paths, pdf_citations)))
|
74 |
-
|
75 |
for d, c in zip(pdf_paths, pdf_citations):
|
76 |
-
# print(d,c)
|
77 |
docs.add(d, c)
|
78 |
-
|
79 |
answer = docs.query(question_query)
|
80 |
-
|
81 |
-
# my_bar.progress(1.0, "Done!")
|
82 |
-
st.success('Voila!')
|
83 |
return answer.formatted_answer
|
84 |
|
85 |
form = st.form(key='question_form')
|
86 |
question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
|
87 |
-
value='')
|
88 |
submitButton = form.form_submit_button('Submit')
|
89 |
|
90 |
if submitButton:
|
91 |
with st.expander("Found papers:", expanded=True):
|
92 |
st.write(f"{st.session_state['all_reference_text']}")
|
93 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
from PIL import Image
|
4 |
from utils import *
|
5 |
+
import asyncio
|
6 |
|
7 |
import pickle
|
8 |
docs = None
|
|
|
15 |
|
16 |
#title
|
17 |
st.title("Answering questions from scientific papers")
|
18 |
+
st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key, charged by OpenAI.")
|
19 |
st.markdown("##### Current version searches on [ArXiv](https://arxiv.org) papers only. 🚧Under development🚧")
|
20 |
st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
|
21 |
|
|
|
24 |
api_key = st.text_input('OpenAI API Key',
|
25 |
placeholder='sk-...',
|
26 |
help=f"['What is that?']({api_key_url})",
|
27 |
+
type="password",
|
28 |
+
value = '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')
|
29 |
|
30 |
os.environ["OPENAI_API_KEY"] = f"{api_key}" #
|
31 |
if len(api_key) != 51:
|
|
|
36 |
max_results = max_results_current
|
37 |
def search_click_callback(search_query, max_results):
|
38 |
global pdf_info, pdf_citation
|
39 |
+
# pdf_info, pdf_citation = call_arXiv_API(f'{search_query}', max_results=max_results)
|
40 |
+
# download_pdf(pdf_info)
|
41 |
+
XRxiv_servers = ['rxiv']
|
42 |
+
search_engines = XRxivQuery(search_query, max_results, XRxiv_servers=XRxiv_servers)
|
43 |
+
pdf_info = search_engines.call_API()
|
44 |
+
search_engines.download_pdf()
|
45 |
+
|
46 |
return pdf_info
|
47 |
|
48 |
with st.form(key='columns_in_form', clear_on_submit = False):
|
49 |
c1, c2 = st.columns([8,1])
|
50 |
with c1:
|
51 |
+
search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value='CFD Modeling'
|
52 |
)#search_query, max_results_current))
|
53 |
|
54 |
with c2:
|
55 |
+
max_results = st.number_input("Max papers", value=max_results_current)
|
56 |
max_results_current = max_results_current
|
57 |
searchButton = st.form_submit_button(label = 'Search')
|
58 |
|
|
|
67 |
def answer_callback(question_query):
|
68 |
import paperqa
|
69 |
global docs
|
|
|
|
|
|
|
70 |
if docs is None:
|
|
|
71 |
pdf_info = st.session_state['pdf_info']
|
|
|
72 |
docs = paperqa.Docs()
|
73 |
+
pdf_paths = [f"{p[4]}/{p[0].replace(':','').replace('/','').replace('.','')}.pdf" for p in pdf_info]
|
74 |
pdf_citations = [p[5] for p in pdf_info]
|
75 |
print(list(zip(pdf_paths, pdf_citations)))
|
|
|
76 |
for d, c in zip(pdf_paths, pdf_citations):
|
|
|
77 |
docs.add(d, c)
|
78 |
+
docs._build_faiss_index()
|
79 |
answer = docs.query(question_query)
|
80 |
+
st.success('Voila! 😃')
|
|
|
|
|
81 |
return answer.formatted_answer
|
82 |
|
83 |
form = st.form(key='question_form')
|
84 |
question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
|
85 |
+
value='Write about CFD modeling')
|
86 |
submitButton = form.form_submit_button('Submit')
|
87 |
|
88 |
if submitButton:
|
89 |
with st.expander("Found papers:", expanded=True):
|
90 |
st.write(f"{st.session_state['all_reference_text']}")
|
91 |
+
with st.spinner('⏳ Please wait...'):
|
92 |
+
start = time.time()
|
93 |
+
final_answer = answer_callback(question_query)
|
94 |
+
length_answer = len(final_answer)
|
95 |
+
st.text_area("Answer:", final_answer, height=max(length_answer//4, 100))
|
96 |
+
end = time.time()
|
97 |
+
clock_time = end - start
|
98 |
+
with st.empty():
|
99 |
+
st.write(f"✔️ Task completed in {clock_time:.2f} seconds.")
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
|
utils.py
CHANGED
@@ -154,7 +154,7 @@ class XRxivQuery:
|
|
154 |
# os.remove(f'./{folder_name}/*')
|
155 |
# print(pdf_info)
|
156 |
all_reference_text = []
|
157 |
-
for i,p in enumerate(stqdm(self.all_pdf_info, desc='Searching and downloading papers')):
|
158 |
pdf_title=p[0]
|
159 |
pdf_category=p[3]
|
160 |
pdf_url=p[1]
|
|
|
154 |
# os.remove(f'./{folder_name}/*')
|
155 |
# print(pdf_info)
|
156 |
all_reference_text = []
|
157 |
+
for i,p in enumerate(stqdm(self.all_pdf_info, desc='🔍 Searching and downloading papers')):
|
158 |
pdf_title=p[0]
|
159 |
pdf_category=p[3]
|
160 |
pdf_url=p[1]
|