mehradans92 commited on
Commit
053d913
·
1 Parent(s): e203fb5

added search engine to app, fixed please wait warning

Browse files
Files changed (2) hide show
  1. app.py +31 -20
  2. utils.py +1 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st #Web App
2
  import os
3
  from PIL import Image
4
  from utils import *
 
5
 
6
  import pickle
7
  docs = None
@@ -14,7 +15,7 @@ st.image(image, width=1000)
14
 
15
  #title
16
  st.title("Answering questions from scientific papers")
17
- st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key which is charged by OpenAI.")
18
  st.markdown("##### Current version searches on [ArXiv](https://arxiv.org) papers only. 🚧Under development🚧")
19
  st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
20
 
@@ -23,7 +24,8 @@ api_key_url = 'https://help.openai.com/en/articles/4936850-where-do-i-find-my-se
23
  api_key = st.text_input('OpenAI API Key',
24
  placeholder='sk-...',
25
  help=f"['What is that?']({api_key_url})",
26
- type="password")
 
27
 
28
  os.environ["OPENAI_API_KEY"] = f"{api_key}" #
29
  if len(api_key) != 51:
@@ -34,18 +36,23 @@ max_results_current = 5
34
  max_results = max_results_current
35
  def search_click_callback(search_query, max_results):
36
  global pdf_info, pdf_citation
37
- pdf_info, pdf_citation = call_arXiv_API(f'{search_query}', max_results=max_results)
38
- download_pdf(pdf_info)
 
 
 
 
 
39
  return pdf_info
40
 
41
  with st.form(key='columns_in_form', clear_on_submit = False):
42
  c1, c2 = st.columns([8,1])
43
  with c1:
44
- search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value=''
45
  )#search_query, max_results_current))
46
 
47
  with c2:
48
- max_results = st.text_input("Max papers", value=max_results_current)
49
  max_results_current = max_results_current
50
  searchButton = st.form_submit_button(label = 'Search')
51
 
@@ -60,34 +67,38 @@ if searchButton:
60
  def answer_callback(question_query):
61
  import paperqa
62
  global docs
63
- progress_text = "Please wait..."
64
- # my_bar = st.progress(0, text = progress_text)
65
- st.info('Please wait...', icon="🔥")
66
  if docs is None:
67
- # my_bar.progress(0.2, "Please wait...")
68
  pdf_info = st.session_state['pdf_info']
69
- # print('buliding docs')
70
  docs = paperqa.Docs()
71
- pdf_paths = [f"{p[4]}/{p[0]}.pdf" for p in pdf_info]
72
  pdf_citations = [p[5] for p in pdf_info]
73
  print(list(zip(pdf_paths, pdf_citations)))
74
-
75
  for d, c in zip(pdf_paths, pdf_citations):
76
- # print(d,c)
77
  docs.add(d, c)
78
- # docs._build_faiss_index()
79
  answer = docs.query(question_query)
80
- # print(answer.formatted_answer)
81
- # my_bar.progress(1.0, "Done!")
82
- st.success('Voila!')
83
  return answer.formatted_answer
84
 
85
  form = st.form(key='question_form')
86
  question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
87
- value='')
88
  submitButton = form.form_submit_button('Submit')
89
 
90
  if submitButton:
91
  with st.expander("Found papers:", expanded=True):
92
  st.write(f"{st.session_state['all_reference_text']}")
93
- st.text_area("Answer:", answer_callback(question_query), height=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  from PIL import Image
4
  from utils import *
5
+ import asyncio
6
 
7
  import pickle
8
  docs = None
 
15
 
16
  #title
17
  st.title("Answering questions from scientific papers")
18
+ st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key, charged by OpenAI.")
19
  st.markdown("##### Current version searches on [ArXiv](https://arxiv.org) papers only. 🚧Under development🚧")
20
  st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
21
 
 
24
  api_key = st.text_input('OpenAI API Key',
25
  placeholder='sk-...',
26
  help=f"['What is that?']({api_key_url})",
27
+ type="password",
28
+ value = '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')
29
 
30
  os.environ["OPENAI_API_KEY"] = f"{api_key}" #
31
  if len(api_key) != 51:
 
36
  max_results = max_results_current
37
  def search_click_callback(search_query, max_results):
38
  global pdf_info, pdf_citation
39
+ # pdf_info, pdf_citation = call_arXiv_API(f'{search_query}', max_results=max_results)
40
+ # download_pdf(pdf_info)
41
+ XRxiv_servers = ['rxiv']
42
+ search_engines = XRxivQuery(search_query, max_results, XRxiv_servers=XRxiv_servers)
43
+ pdf_info = search_engines.call_API()
44
+ search_engines.download_pdf()
45
+
46
  return pdf_info
47
 
48
  with st.form(key='columns_in_form', clear_on_submit = False):
49
  c1, c2 = st.columns([8,1])
50
  with c1:
51
+ search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value='CFD Modeling'
52
  )#search_query, max_results_current))
53
 
54
  with c2:
55
+ max_results = st.number_input("Max papers", value=max_results_current)
56
  max_results_current = max_results_current
57
  searchButton = st.form_submit_button(label = 'Search')
58
 
 
67
  def answer_callback(question_query):
68
  import paperqa
69
  global docs
 
 
 
70
  if docs is None:
 
71
  pdf_info = st.session_state['pdf_info']
 
72
  docs = paperqa.Docs()
73
+ pdf_paths = [f"{p[4]}/{p[0].replace(':','').replace('/','').replace('.','')}.pdf" for p in pdf_info]
74
  pdf_citations = [p[5] for p in pdf_info]
75
  print(list(zip(pdf_paths, pdf_citations)))
 
76
  for d, c in zip(pdf_paths, pdf_citations):
 
77
  docs.add(d, c)
78
+ docs._build_faiss_index()
79
  answer = docs.query(question_query)
80
+ st.success('Voila! 😃')
 
 
81
  return answer.formatted_answer
82
 
83
  form = st.form(key='question_form')
84
  question_query = form.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
85
+ value='Write about CFD modeling')
86
  submitButton = form.form_submit_button('Submit')
87
 
88
  if submitButton:
89
  with st.expander("Found papers:", expanded=True):
90
  st.write(f"{st.session_state['all_reference_text']}")
91
+ with st.spinner('⏳ Please wait...'):
92
+ start = time.time()
93
+ final_answer = answer_callback(question_query)
94
+ length_answer = len(final_answer)
95
+ st.text_area("Answer:", final_answer, height=max(length_answer//4, 100))
96
+ end = time.time()
97
+ clock_time = end - start
98
+ with st.empty():
99
+ st.write(f"✔️ Task completed in {clock_time:.2f} seconds.")
100
+
101
+
102
+
103
+
104
+
utils.py CHANGED
@@ -154,7 +154,7 @@ class XRxivQuery:
154
  # os.remove(f'./{folder_name}/*')
155
  # print(pdf_info)
156
  all_reference_text = []
157
- for i,p in enumerate(stqdm(self.all_pdf_info, desc='Searching and downloading papers')):
158
  pdf_title=p[0]
159
  pdf_category=p[3]
160
  pdf_url=p[1]
 
154
  # os.remove(f'./{folder_name}/*')
155
  # print(pdf_info)
156
  all_reference_text = []
157
+ for i,p in enumerate(stqdm(self.all_pdf_info, desc='🔍 Searching and downloading papers')):
158
  pdf_title=p[0]
159
  pdf_category=p[3]
160
  pdf_url=p[1]