File size: 4,890 Bytes
b84e3bd
df44d29
51c1624
75c3b48
053d913
df44d29
 
b6f12dc
1ef9098
409fff7
741dc63
9580320
 
51c1624
02129a7
51c1624
409fff7
06d5048
053d913
a14bf30
1b7fefd
ca323aa
 
df44d29
 
 
 
 
 
053d913
310f57e
ce5740f
df44d29
ce5740f
 
a2f7c22
df44d29
76d5fd8
df44d29
6cbbc77
df44d29
053d913
 
 
 
b6f12dc
df44d29
 
e428506
df44d29
310f57e
 
df44d29
 
053d913
df44d29
e428506
 
 
1b1fe26
e428506
1b1fe26
e428506
1b1fe26
e428506
1b1fe26
6cbbc77
df44d29
 
 
6cbbc77
 
 
 
 
 
 
 
 
 
b6f12dc
6cbbc77
b6f12dc
1ef9098
 
a63b5cf
 
6cbbc77
a2f7c22
b6f12dc
 
 
 
053d913
b6f12dc
 
ce5740f
b6f12dc
bacf688
6cbbc77
053d913
df44d29
 
6cbbc77
 
 
 
a0656de
6cbbc77
 
 
df44d29
 
ce5740f
 
053d913
 
6cbbc77
053d913
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st  # Web App
import os
from PIL import Image
from utils import *
import asyncio

import pickle
docs = None
api_key = ' '


st.set_page_config(layout="wide")

image = Image.open('arxiv_decode.png')
st.image(image, width=1000)

#title
st.title("Answering questions from scientific papers")
st.markdown("##### This tool will allow you to ask questions and get answers based on scientific papers. It uses OpenAI's GPT models, and you must have your own API key. Each query is about 10k tokens, which costs about only $0.20 on your own API key, charged by OpenAI.")
st.markdown("##### Current version searches on different pre-print servers including [arXiv](https://arxiv.org), [chemRxiv](https://chemrxiv.org/engage/chemrxiv/public-dashboard), [bioRxiv](https://www.biorxiv.org/) and [medRxiv](https://www.medrxiv.org/). 🚧Under development🚧")
st.markdown("Used libraries:\n * [PaperQA](https://github.com/whitead/paper-qa) \n* [langchain](https://github.com/hwchase17/langchain)")
st.markdown("See this [tweet](https://twitter.com/MehradAnsari/status/1627649959204888576) for a demo.")


api_key_url = 'https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key'

api_key = st.text_input('OpenAI API Key',
         placeholder='sk-...', 
         help=f"['What is that?']({api_key_url})",
         type="password",
         value = '')     

os.environ["OPENAI_API_KEY"] = f"{api_key}" # 
if len(api_key) != 51:
    st.warning('Please enter a valid OpenAI API key.', icon="⚠️")


max_results_current = 5
max_results = max_results_current
def search_click_callback(search_query, max_results, XRxiv_servers=[]):
    global pdf_info, pdf_citation
    search_engines = XRxivQuery(search_query, max_results, XRxiv_servers=XRxiv_servers)
    pdf_info = search_engines.call_API()
    search_engines.download_pdf()

    return pdf_info

with st.form(key='columns_in_form', clear_on_submit = False):
    c1, c2 = st.columns([5, 0.8])
    with c1:
        search_query = st.text_input("Input search query here:", placeholder='Keywords for most relevant search...', value=''
                                       )

    with c2:
        max_results = st.number_input("Max papers", value=max_results_current)
        max_results_current = max_results_current
    st.markdown('Pre-print server')
    checks = st.columns(4)
    with checks[0]:
        ArXiv_check = st.checkbox('arXiv')
    with checks[1]:
        ChemArXiv_check = st.checkbox('chemRxiv')
    with checks[2]:
        BioArXiv_check = st.checkbox('bioRxiv')
    with checks[3]:
        MedrXiv_check = st.checkbox('medRxiv')  

    searchButton = st.form_submit_button(label = 'Search')

if searchButton:
    # checking which pre-print servers selected
    XRxiv_servers = []
    if ArXiv_check:
        XRxiv_servers.append('rxiv')
    if ChemArXiv_check:
        XRxiv_servers.append('chemrxiv')
    if BioArXiv_check:
        XRxiv_servers.append('biorxiv')
    if MedrXiv_check:
        XRxiv_servers.append('medrxiv')
    global pdf_info
    pdf_info =  search_click_callback(search_query, max_results, XRxiv_servers=XRxiv_servers)
    if 'pdf_info' not in st.session_state:
        st.session_state.key = 'pdf_info'
    st.session_state['pdf_info'] = pdf_info


def answer_callback(question_query, word_count):
    import paperqa
    global docs
    if docs is None:
        pdf_info = st.session_state['pdf_info']
        docs = paperqa.Docs()
        pdf_paths = [f"{p[4]}/{p[0].replace(':','').replace('/','').replace('.','')}.pdf" for p in pdf_info]
        pdf_citations = [p[5] for p in pdf_info]
        print(list(zip(pdf_paths, pdf_citations)))
        for d, c in zip(pdf_paths, pdf_citations):
            docs.add(d, c)
    docs._build_texts_index()
    answer = docs.query(question_query,  length_prompt=f'use {word_count:d} words')
    st.success('Voila! 😃')
    return answer.formatted_answer

with st.form(key='question_form', clear_on_submit = False):
    c1, c2 = st.columns([6, 2])
    with c1:
        question_query = st.text_input("What do you wanna know from these papers?", placeholder='Input questions here...',
                value='')
    with c2:
        word_count = st.slider("Suggested number of words in your answer?", 30, 300, 100)
    submitButton = st.form_submit_button('Submit')

if submitButton:
    with st.expander("Found papers:", expanded=True):
        st.write(f"{st.session_state['all_reference_text']}")
    with st.spinner('⏳ Please wait...'):
        start = time.time()
        final_answer = answer_callback(question_query, word_count)
        length_answer = len(final_answer)
        st.text_area("Answer:", final_answer, height=max(length_answer//4, 100))
        end = time.time()
        clock_time = end - start
        with st.empty():
            st.write(f"✔️ Task completed in {clock_time:.2f} seconds.")