Spaces:
Runtime error
Runtime error
First commit
Browse files- .gitignore +160 -0
- README.md +19 -5
- app.py +180 -0
- assets/zotero-logo.png +0 -0
- functions.py +330 -0
- models.py +71 -0
- requirements.txt +5 -0
- style.css +17 -0
.gitignore
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title: Zotero
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.20.1
|
8 |
app_file: app.py
|
@@ -10,4 +10,18 @@ pinned: false
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Zotero QA
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.20.1
|
8 |
app_file: app.py
|
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
+
# Zotero QA
|
14 |
+
|
15 |
+
Fan Li ([@FanLi_RnD](https://twitter.com/FanLi_RnD)) - https://apex974.com/articles/literature-reviews-with-paper-qa-and-zotero
|
16 |
+
|
17 |
+
This tool allows you to ask questions based on your Zotero library. It was built upon [Paper QA](https://github.com/whitead/paper-qa), [LangChain AI](https://github.com/hwchase17/langchain) and [pyZotero](https://github.com/urschrei/pyzotero).
|
18 |
+
|
19 |
+
You are required to provide your own [OpenAI API key](https://platform.openai.com/overview).
|
20 |
+
|
21 |
+
You also need a Zotero API key and your user ID (or shared group ID):
|
22 |
+
|
23 |
+
- To create Zotero API key, visit https://www.zotero.org/settings/keys/new
|
24 |
+
- To access your own library, select "User".
|
25 |
+
- To access a shared group, select "Group".
|
26 |
+
- Personal User ID can be found at https://www.zotero.org/settings/keys.
|
27 |
+
- Group ID is part of the group URL (e.g. 4952526).
|
app.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from pathlib import Path
|
5 |
+
from functions import (
|
6 |
+
reset_answer,
|
7 |
+
fetch_collections,
|
8 |
+
select_collection,
|
9 |
+
reset_collection,
|
10 |
+
handle_submit,
|
11 |
+
)
|
12 |
+
from models import (
|
13 |
+
Icons,
|
14 |
+
Message,
|
15 |
+
Messages,
|
16 |
+
)
|
17 |
+
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
css_style = Path('./style.css').read_text()
|
21 |
+
|
22 |
+
with gr.Blocks(css=css_style) as demo:
|
23 |
+
zot = gr.State(None)
|
24 |
+
zot_collections = gr.State([])
|
25 |
+
data = gr.State([])
|
26 |
+
messages = gr.State(Messages([
|
27 |
+
Message(
|
28 |
+
Icons.INFO, "Please provide all the required OpenAI and Zotero information in the left panel."),
|
29 |
+
]))
|
30 |
+
|
31 |
+
with gr.Row():
|
32 |
+
gr.Markdown("""
|
33 |
+
<h2>
|
34 |
+
<img src='file/assets/zotero-logo.png' alt="Zotero Logo" style="width: 3.2rem; display: inline; margin-right: 10px;" />
|
35 |
+
Zotero Q&A
|
36 |
+
</h2>
|
37 |
+
|
38 |
+
Fan Li ([@FanLi_RnD](https://twitter.com/FanLi_RnD)) - https://apex974.com/articles/literature-reviews-with-paper-qa-and-zotero
|
39 |
+
|
40 |
+
This tool allows you to ask questions based on your Zotero library. It was built upon [Paper QA](https://github.com/whitead/paper-qa), [LangChain AI](https://github.com/hwchase17/langchain) and [pyZotero](https://github.com/urschrei/pyzotero).
|
41 |
+
""")
|
42 |
+
|
43 |
+
with gr.Row():
|
44 |
+
with gr.Column(scale=1):
|
45 |
+
openai_api_key = gr.Textbox(
|
46 |
+
label="OpenAI API Key", type="password", value=os.getenv('OPENAI_API_KEY'))
|
47 |
+
|
48 |
+
gr.HTML()
|
49 |
+
|
50 |
+
zot_api_key = gr.Textbox(
|
51 |
+
label="Zotero API Key", type="password", value=os.getenv('ZOTERO_API_KEY'))
|
52 |
+
zot_library_type = gr.Radio(choices=[
|
53 |
+
"User", "Group"], label="Zotero Library Type", value="User", elem_id="zotero-library-type")
|
54 |
+
zot_library_id = gr.Textbox(
|
55 |
+
label="Zotero User/Group ID", value=os.getenv('ZOTERO_LIBRARY_ID'))
|
56 |
+
|
57 |
+
zot_selected_col = gr.Radio(
|
58 |
+
[], label="Zotero Collection", elem_id="zotero-collection", visible=False)
|
59 |
+
zot_msg = gr.HTML("""
|
60 |
+
<div style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
|
61 |
+
<ul style="margin-bottom: 0;">
|
62 |
+
<li>Click <a href="https://www.zotero.org/settings/keys/new" target="_blank">here</a> to create Zotero API key.</li>
|
63 |
+
<li>To access your own library, select "User".</li>
|
64 |
+
<li>To access a shared group, select "Group".</li>
|
65 |
+
<li>Personal User ID can be found <a href="https://www.zotero.org/settings/keys" target="_blank">here</a>.</li>
|
66 |
+
<li style="margin-bottom: 0;">Group ID is part of the group URL (e.g. 4952526).</li>
|
67 |
+
</ul>
|
68 |
+
</div>
|
69 |
+
""", visible=True)
|
70 |
+
zot_fetch_col_btn = gr.Button('Fetch Collections')
|
71 |
+
|
72 |
+
gr.Error("Some Error Message")
|
73 |
+
|
74 |
+
with gr.Column(scale=3):
|
75 |
+
|
76 |
+
question = gr.Textbox(
|
77 |
+
placeholder="You have to select a Zotero collection to proceed", label="Question", interactive=False, value="What predictive models are used in materials discovery?")
|
78 |
+
|
79 |
+
gr.HTML()
|
80 |
+
|
81 |
+
with gr.Box():
|
82 |
+
with gr.Accordion("Messages"):
|
83 |
+
msg_board = gr.HTML(messages.value)
|
84 |
+
|
85 |
+
answer = gr.HTML(None, elem_id="answer")
|
86 |
+
|
87 |
+
openai_api_key.change(reset_answer, inputs=[], outputs=[
|
88 |
+
answer], show_progress=False)
|
89 |
+
|
90 |
+
zot_api_key.change(
|
91 |
+
reset_collection,
|
92 |
+
inputs=[],
|
93 |
+
outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
|
94 |
+
show_progress=False,
|
95 |
+
)
|
96 |
+
zot_library_type.change(
|
97 |
+
reset_collection,
|
98 |
+
inputs=[],
|
99 |
+
outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
|
100 |
+
show_progress=False,
|
101 |
+
)
|
102 |
+
zot_library_id.change(
|
103 |
+
reset_collection,
|
104 |
+
inputs=[],
|
105 |
+
outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
|
106 |
+
show_progress=False
|
107 |
+
)
|
108 |
+
|
109 |
+
zot_fetch_col_btn.click(
|
110 |
+
fn=fetch_collections,
|
111 |
+
inputs=[zot_library_id, zot_library_type, zot_api_key, messages],
|
112 |
+
outputs=[zot, zot_collections, zot_selected_col,
|
113 |
+
zot_fetch_col_btn, zot_msg, messages, msg_board],
|
114 |
+
show_progress=False,
|
115 |
+
)
|
116 |
+
zot_selected_col.change(
|
117 |
+
fn=select_collection,
|
118 |
+
inputs=[zot_selected_col, messages],
|
119 |
+
outputs=[question, messages, msg_board, answer],
|
120 |
+
show_progress=False
|
121 |
+
)
|
122 |
+
|
123 |
+
question.submit(
|
124 |
+
fn=handle_submit,
|
125 |
+
inputs=[zot, zot_selected_col, zot_collections, question, messages],
|
126 |
+
outputs=[messages, msg_board, answer],
|
127 |
+
show_progress=False
|
128 |
+
)
|
129 |
+
|
130 |
+
# with gr.Accordion("See Docs:", open=False):
|
131 |
+
# dataset = gr.Dataframe(
|
132 |
+
# headers=["filepath", "citation string", "key"],
|
133 |
+
# datatype=["str", "str", "str"],
|
134 |
+
# col_count=(3, "fixed"),
|
135 |
+
# interactive=False,
|
136 |
+
# label="Documents and Citations",
|
137 |
+
# overflow_row_behaviour='paginate',
|
138 |
+
# max_rows=5
|
139 |
+
# )
|
140 |
+
# buildb = gr.Textbox("β οΈWaiting for documents and key...",
|
141 |
+
# label="msg_board", interactive=False, show_label=True,
|
142 |
+
# max_lines=1)
|
143 |
+
# stats = gr.Dataframe(headers=['Docs', 'Chunks'],
|
144 |
+
# datatype=['number', 'number'],
|
145 |
+
# col_count=(2, "fixed"),
|
146 |
+
# interactive=False,
|
147 |
+
# label="Doc Stats")
|
148 |
+
# openai_api_key.change(validate_dataset, inputs=[
|
149 |
+
# dataset, openai_api_key], outputs=[buildb])
|
150 |
+
# dataset.change(validate_dataset, inputs=[
|
151 |
+
# dataset, openai_api_key], outputs=[buildb])
|
152 |
+
# uploaded_files.change(request_pathname, inputs=[
|
153 |
+
# uploaded_files, data, openai_api_key], outputs=[stats, data, dataset, buildb])
|
154 |
+
# download.click(fn=download_repo, inputs=[
|
155 |
+
# gh_repo, data, openai_api_key], outputs=[stats, data, dataset, buildb])
|
156 |
+
# query = gr.Textbox(
|
157 |
+
# placeholder="Enter your question here...", label="Question")
|
158 |
+
# with gr.Row():
|
159 |
+
# length = gr.Slider(25, 200, value=100, step=5,
|
160 |
+
# label='Words in answer')
|
161 |
+
# marg = gr.Checkbox(True, label='Max marginal relevance')
|
162 |
+
# k = gr.Slider(1, 20, value=10, step=1,
|
163 |
+
# label='Chunks to examine')
|
164 |
+
# sources = gr.Slider(1, 10, value=5, step=1,
|
165 |
+
# label='Contexts to include')
|
166 |
+
|
167 |
+
# ask = gr.Button("Ask Question")
|
168 |
+
# answer = gr.Markdown(label="Answer")
|
169 |
+
# with gr.Accordion("Context", open=True):
|
170 |
+
# context = gr.Markdown(label="Context")
|
171 |
+
|
172 |
+
# with gr.Accordion("Raw Text", open=False):
|
173 |
+
# passages = gr.Markdown(label="Passages")
|
174 |
+
# ask.click(fn=do_ask, inputs=[query, buildb,
|
175 |
+
# openai_api_key, dataset,
|
176 |
+
# length, marg, k, sources,
|
177 |
+
# docs], outputs=[answer, context, passages, docs, stats])
|
178 |
+
|
179 |
+
demo.queue(concurrency_count=10, api_open=False)
|
180 |
+
demo.launch()
|
assets/zotero-logo.png
ADDED
functions.py
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import re
|
3 |
+
import requests
|
4 |
+
import tempfile
|
5 |
+
import time
|
6 |
+
from pyzotero import zotero
|
7 |
+
from paperqa import Docs
|
8 |
+
from lxml import html
|
9 |
+
from models import Icons, Message, Messages
|
10 |
+
|
11 |
+
|
12 |
+
def reset_answer():
|
13 |
+
return gr.HTML.update(value=None)
|
14 |
+
|
15 |
+
|
16 |
+
def fetch_collections(id, type, key, messages):
|
17 |
+
zot = zotero.Zotero(int(id), type.lower(), key)
|
18 |
+
try:
|
19 |
+
collections = zot.collections_top()
|
20 |
+
collection_names = [
|
21 |
+
f"{x['data']['name']} ({x['meta']['numItems']})" for x in collections]
|
22 |
+
messages.append(
|
23 |
+
Message(Icons.INFO, "Please select a Zotero collection to proceed."))
|
24 |
+
return (
|
25 |
+
zot,
|
26 |
+
collections,
|
27 |
+
gr.Radio.update(choices=collection_names,
|
28 |
+
visible=True, interactive=True),
|
29 |
+
gr.Button.update(visible=False),
|
30 |
+
gr.HTML.update(visible=False),
|
31 |
+
messages,
|
32 |
+
gr.HTML.update(value=str(messages)),
|
33 |
+
)
|
34 |
+
except Exception as e:
|
35 |
+
messages.append(
|
36 |
+
Message(Icons.ERR, f"Error occurred when fetching Zotero collection: {e}"))
|
37 |
+
print({'messages': str(messages)})
|
38 |
+
return (
|
39 |
+
None,
|
40 |
+
[],
|
41 |
+
None,
|
42 |
+
gr.Button.update(visible=True),
|
43 |
+
None,
|
44 |
+
messages,
|
45 |
+
gr.HTML.update(value=str(messages)),
|
46 |
+
)
|
47 |
+
|
48 |
+
|
49 |
+
def select_collection(collection, messages):
|
50 |
+
if collection is None:
|
51 |
+
return None, messages, gr.HTML.update()
|
52 |
+
collection_name = re.sub('\s\(\d+\)$', '', collection)
|
53 |
+
messages.set([Message(
|
54 |
+
Icons.OK, f"Selected collection: <span style='font-weight: bold'>{collection_name}</span>. Please type your question and hit \"Enter\".")])
|
55 |
+
return (
|
56 |
+
gr.Text.update(
|
57 |
+
placeholder="Please type your question and hit \"Enter\".", interactive=True),
|
58 |
+
messages,
|
59 |
+
gr.HTML.update(value=str(messages)),
|
60 |
+
gr.HTML.update(value=None)
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
def search_attachments(id, type, key, collection, queries=[], limit=10):
|
65 |
+
try:
|
66 |
+
zot = zotero.Zotero(int(id), type.lower(), key)
|
67 |
+
searches = [zot.collection_items(
|
68 |
+
collection['key'],
|
69 |
+
q=q,
|
70 |
+
limit=limit,
|
71 |
+
itemType='attachment',
|
72 |
+
qmode='everything'
|
73 |
+
) for q in queries]
|
74 |
+
attachments = [x for x in {item['key']: item for search in searches for item in search if item['data']
|
75 |
+
['contentType'] == 'application/pdf'}.values()][:limit]
|
76 |
+
|
77 |
+
parents = set([a['data']['parentItem'] for a in attachments])
|
78 |
+
|
79 |
+
message = f"<div>β
Found {len(attachments)} PDF {'attachments' if len(attachments) > 1 else 'attachment'} from {len(parents)} {'articles' if len(parents) > 1 else 'article'}.</div>" if len(
|
80 |
+
attachments) else "<div>β No results. Make sure to index your PDF attachments in Zotero.</div>"
|
81 |
+
return parents, attachments, message
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
message = f"<div>β οΈ Error occurred when searching in Zotero: {e}</div>"
|
85 |
+
return [], [], message
|
86 |
+
|
87 |
+
|
88 |
+
def download_attachment(id, type, key, attachment):
|
89 |
+
zot = zotero.Zotero(int(id), type.lower(), key)
|
90 |
+
link_mode = attachment['data']['linkMode']
|
91 |
+
|
92 |
+
if link_mode == 'imported_file':
|
93 |
+
return zot.file(attachment['key'])
|
94 |
+
elif link_mode == 'imported_url':
|
95 |
+
res = requests.get(attachment['data']['url'])
|
96 |
+
return res.content
|
97 |
+
else:
|
98 |
+
raise ValueError(
|
99 |
+
f'Unsupported link mode: {link_mode} for {attachment["key"]}.')
|
100 |
+
|
101 |
+
|
102 |
+
def reset_collection():
|
103 |
+
return (
|
104 |
+
gr.Radio.update(choices=[], visible=False),
|
105 |
+
gr.HTML.update(visible=True),
|
106 |
+
gr.Text.update(
|
107 |
+
placeholder="You have to select a Zotero collection to proceed", interactive=False),
|
108 |
+
gr.HTML.update(value=None)
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
def handle_submit(zot, collection_name, collections, question, messages):
|
113 |
+
collection_name_only = re.sub('\s\(\d+\)$', '', collection_name)
|
114 |
+
messages.set([Message(
|
115 |
+
Icons.OK, f"Selected collection: <span style='font-weight: bold'>{collection_name_only}</span>.")])
|
116 |
+
yield (
|
117 |
+
messages,
|
118 |
+
gr.HTML.update(value=str(messages)),
|
119 |
+
None,
|
120 |
+
)
|
121 |
+
|
122 |
+
docs = Docs()
|
123 |
+
|
124 |
+
# Generate search queries from the question by Paper QA
|
125 |
+
try:
|
126 |
+
question_prompt = 'A "keyword search" is a list of no more than 3 words, which separated by whitespace only and with no boolean operators (e.g. "dog canine puppy"). Avoid adding any new words not in the question unless they are synonyms to the existing words.'
|
127 |
+
queries = [x.strip('"').lower() for x in
|
128 |
+
docs.generate_search_query(question + '\n' + question_prompt)]
|
129 |
+
query_str = ", ".join(
|
130 |
+
[f"<span style='font-weight: bold;'>{q}</span>" for q in queries])
|
131 |
+
messages.append(
|
132 |
+
Message(Icons.WAIT, f"Searching your Zotero collection for {query_str}."))
|
133 |
+
yield (
|
134 |
+
messages,
|
135 |
+
gr.HTML.update(value=str(messages)),
|
136 |
+
None,
|
137 |
+
)
|
138 |
+
except Exception as e:
|
139 |
+
messages.append(
|
140 |
+
Message(Icons.ERR, f"Error occurred when generating search queries: {e}"))
|
141 |
+
yield (
|
142 |
+
messages,
|
143 |
+
gr.HTML.update(value=str(messages)),
|
144 |
+
None,
|
145 |
+
)
|
146 |
+
return None, None, None
|
147 |
+
|
148 |
+
# Search for attachments in Zotero
|
149 |
+
try:
|
150 |
+
collection = [
|
151 |
+
x for x in collections if f"{x['data']['name']} ({x['meta']['numItems']})" == collection_name][0]
|
152 |
+
searches = [zot.collection_items(
|
153 |
+
collection['key'],
|
154 |
+
q=q,
|
155 |
+
limit=10,
|
156 |
+
itemType='attachment',
|
157 |
+
qmode='everything'
|
158 |
+
) for q in queries]
|
159 |
+
attachments = [x for x in {
|
160 |
+
item['key']: item for search in searches for item in search if item['data']['contentType'] == 'application/pdf'}.values()][:10]
|
161 |
+
|
162 |
+
parents = set([a['data']['parentItem'] for a in attachments])
|
163 |
+
if len(attachments) > 0:
|
164 |
+
messages.append(Message(
|
165 |
+
Icons.SUCCESS, f"Found {len(attachments)} PDF {'attachments' if len(attachments) > 1 else 'attachment'} from {len(parents)} {'articles' if len(parents) > 1 else 'article'}."))
|
166 |
+
yield (
|
167 |
+
messages,
|
168 |
+
gr.HTML.update(value=str(messages)),
|
169 |
+
None,
|
170 |
+
)
|
171 |
+
else:
|
172 |
+
messages.append(Message(
|
173 |
+
Icons.ERR, "No results. Make sure to index your PDF attachments in Zotero and try rephrasing your question."))
|
174 |
+
yield (
|
175 |
+
messages,
|
176 |
+
gr.HTML.update(value=str(messages)),
|
177 |
+
None,
|
178 |
+
)
|
179 |
+
return None, None, None
|
180 |
+
|
181 |
+
except Exception as e:
|
182 |
+
messages.append(
|
183 |
+
Message(Icons.ERR, f"Error occurred when searching in Zotero: {e}"))
|
184 |
+
yield (
|
185 |
+
messages,
|
186 |
+
gr.HTML.update(value=str(messages)),
|
187 |
+
None,
|
188 |
+
)
|
189 |
+
return None, None, None
|
190 |
+
|
191 |
+
# Compile citation metadata
|
192 |
+
citation_dict = {}
|
193 |
+
parents = {}
|
194 |
+
messages.append(
|
195 |
+
Message(Icons.WAIT, f"Fetching attachment bibliography information."))
|
196 |
+
yield (
|
197 |
+
messages,
|
198 |
+
gr.HTML.update(value=str(messages)),
|
199 |
+
None,
|
200 |
+
)
|
201 |
+
for attachment in attachments:
|
202 |
+
parent_id = attachment["data"]["parentItem"]
|
203 |
+
try:
|
204 |
+
if parent_id in parents:
|
205 |
+
citation_dict[attachment["key"]] = parents[parent_id]
|
206 |
+
else:
|
207 |
+
parent = zot.item(
|
208 |
+
attachment["data"]["parentItem"], content="bib", style="nature")[0]
|
209 |
+
bib = f"""
|
210 |
+
{html.fragment_fromstring(parent).xpath("normalize-space(div[2])")}
|
211 |
+
<a href="{attachment['links']['alternate']['href']}" target="_blank" class="zotero-link">Open in Zotero</a>
|
212 |
+
"""
|
213 |
+
parents[parent_id] = bib
|
214 |
+
citation_dict[attachment["key"]] = bib
|
215 |
+
except Exception as e:
|
216 |
+
messages.append(Message(
|
217 |
+
Icons.WARN, f"Failed to retrieve bibliography for PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: {e}"))
|
218 |
+
yield (
|
219 |
+
messages,
|
220 |
+
gr.HTML.update(value=str(messages)),
|
221 |
+
None,
|
222 |
+
)
|
223 |
+
|
224 |
+
# Index attachments
|
225 |
+
available_attachments = 0
|
226 |
+
for attachment in attachments:
|
227 |
+
try:
|
228 |
+
link_mode = attachment['data']['linkMode']
|
229 |
+
|
230 |
+
if link_mode in ['imported_file', 'imported_url']:
|
231 |
+
attachment_content = zot.file(attachment['key']) if link_mode == 'imported_file' else requests.get(
|
232 |
+
attachment['data']['url']).content
|
233 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".pdf")
|
234 |
+
temp_file.write(attachment_content)
|
235 |
+
temp_file.flush()
|
236 |
+
docs.add(temp_file.name, citation_dict[attachment["key"]])
|
237 |
+
messages.append(Message(
|
238 |
+
Icons.INDEX, f"Indexed PDF attachment: <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>."))
|
239 |
+
available_attachments += 1
|
240 |
+
else:
|
241 |
+
messages.append(Message(
|
242 |
+
Icons.WARN, f"Unable to access linked PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: The file is not in Zotero online storage."))
|
243 |
+
yield (
|
244 |
+
messages,
|
245 |
+
gr.HTML.update(value=str(messages)),
|
246 |
+
None,
|
247 |
+
)
|
248 |
+
except Exception as e:
|
249 |
+
messages.append(Message(
|
250 |
+
Icons.WARN, f"Failed to retrieve PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: {e}"))
|
251 |
+
yield (
|
252 |
+
messages,
|
253 |
+
gr.HTML.update(value=str(messages)),
|
254 |
+
None,
|
255 |
+
)
|
256 |
+
|
257 |
+
# Build vector index
|
258 |
+
if available_attachments == 0:
|
259 |
+
messages.append(Message(
|
260 |
+
Icons.ERR, "No answer. Unable to access any PDF attachments from your Zotero online storage or public URLs."))
|
261 |
+
yield (
|
262 |
+
messages,
|
263 |
+
gr.HTML.update(value=str(messages)),
|
264 |
+
None,
|
265 |
+
)
|
266 |
+
return None, None, None
|
267 |
+
if docs._faiss_index is None:
|
268 |
+
messages.append(Message(
|
269 |
+
Icons.WAIT, f"Building vector index based on {available_attachments} available PDF {'attachment' if attachments==1 else 'attachments'}"))
|
270 |
+
yield (
|
271 |
+
messages,
|
272 |
+
gr.HTML.update(value=str(messages)),
|
273 |
+
None,
|
274 |
+
)
|
275 |
+
docs._build_faiss_index()
|
276 |
+
|
277 |
+
# Synthesize response
|
278 |
+
messages.append(Message(
|
279 |
+
Icons.WAIT, f"Creating answer. This will loop through all available PDF {'attachment' if attachments==1 else 'attachments'} and may take {'a few' if available_attachments > 2 else 'a couple of'} minutes."))
|
280 |
+
yield (
|
281 |
+
messages,
|
282 |
+
gr.HTML.update(value=str(messages)),
|
283 |
+
None,
|
284 |
+
)
|
285 |
+
try:
|
286 |
+
start_time = time.time()
|
287 |
+
total_time = 0
|
288 |
+
for i, answer in enumerate(docs.query_gen(question)):
|
289 |
+
end_time = time.time()
|
290 |
+
time_dif = end_time - start_time
|
291 |
+
if time_dif > 5:
|
292 |
+
start_time = end_time
|
293 |
+
total_time += time_dif
|
294 |
+
messages.append(Message(
|
295 |
+
Icons.INFO, f"Still in prgress: {total_time:.1f} seconds"))
|
296 |
+
yield (
|
297 |
+
messages,
|
298 |
+
gr.HTML.update(value=str(messages)),
|
299 |
+
None,
|
300 |
+
)
|
301 |
+
answer_text = '\n'.join(
|
302 |
+
[f"<div>{x}</div>" for x in answer.answer.split('\n')])
|
303 |
+
references = '\n'.join([f"<li>{x.split('.', 1)[1]}</li>"
|
304 |
+
for x in answer.references.split('\n\n')])
|
305 |
+
formatted_answer = f"""
|
306 |
+
<div>{answer_text}</div>
|
307 |
+
|
308 |
+
<h4 style="font-size: 1rem;">References:</h4>
|
309 |
+
<ol>
|
310 |
+
{references}
|
311 |
+
</ol>
|
312 |
+
|
313 |
+
<div>Tokens Used: {answer.tokens} Cost: ${answer.tokens/1000 * 0.002:.2f}</div>
|
314 |
+
""".strip()
|
315 |
+
messages.append(Message(
|
316 |
+
Icons.OK, f"Answer created."))
|
317 |
+
yield (
|
318 |
+
messages,
|
319 |
+
gr.HTML.update(value=str(messages)),
|
320 |
+
gr.HTML.update(value=formatted_answer)
|
321 |
+
)
|
322 |
+
except Exception as e:
|
323 |
+
messages.append(Message(
|
324 |
+
Icons.ERR, f"Error occurred when creating answer: {e}"))
|
325 |
+
yield (
|
326 |
+
messages,
|
327 |
+
gr.HTML.update(value=str(messages)),
|
328 |
+
None,
|
329 |
+
)
|
330 |
+
return None, None, None
|
models.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from enum import Enum
|
3 |
+
|
4 |
+
class Icons(Enum):
|
5 |
+
def __str__(self):
|
6 |
+
return str(self.value)
|
7 |
+
DOC = "π"
|
8 |
+
ERR = "β"
|
9 |
+
INDEX = "ποΈ"
|
10 |
+
INFO = "βΉοΈ"
|
11 |
+
OK = "π"
|
12 |
+
SUCCESS = "β
"
|
13 |
+
WAIT = "β"
|
14 |
+
WARN = "β οΈ"
|
15 |
+
|
16 |
+
class Message():
|
17 |
+
def __init__(self, icon, content):
|
18 |
+
self.icon = icon
|
19 |
+
self.content = content
|
20 |
+
|
21 |
+
def __str__(self):
|
22 |
+
return f"{self.icon} {self.content}"
|
23 |
+
|
24 |
+
class Messages():
|
25 |
+
def __init__(self, messages=[]):
|
26 |
+
self.messages = messages
|
27 |
+
|
28 |
+
def __str__(self):
|
29 |
+
return f"""
|
30 |
+
<div class="messages" style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
|
31 |
+
{("").join([f"<div>{x}</div>" for x in self.messages])}
|
32 |
+
</div>
|
33 |
+
"""
|
34 |
+
|
35 |
+
def append(self, new_message):
|
36 |
+
self.messages.append(new_message)
|
37 |
+
|
38 |
+
def set(self, messages):
|
39 |
+
self.messages = messages
|
40 |
+
|
41 |
+
# class Message():
|
42 |
+
|
43 |
+
# def standing_by(self):
|
44 |
+
# return "<div>π Standing by...</div>"
|
45 |
+
|
46 |
+
# def not_ready(self):
|
47 |
+
# return """
|
48 |
+
# <div style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
|
49 |
+
# You have to select a Zotero collection to proceed.
|
50 |
+
# </div>
|
51 |
+
# """
|
52 |
+
|
53 |
+
# def openai_api_key(self):
|
54 |
+
# return """
|
55 |
+
# <div style="padding: 1rem; background-color: #fcd7da; font-size: 0.8rem;">
|
56 |
+
# OpenAI API key is either missing or incorrect.
|
57 |
+
# </div>
|
58 |
+
# """
|
59 |
+
|
60 |
+
# def use_queries(queries):
|
61 |
+
# query_str = ", ".join([f"<span style="font-weight: bold;">{q}</span>" for q in queries])
|
62 |
+
# return f"<div>Search your Zotero collection for {query_str}"
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
# def update_status(messages):
|
67 |
+
# return gr.HTML.update(f"""
|
68 |
+
# <div class="messages" style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
|
69 |
+
# {("").join(messages)}
|
70 |
+
# </div>
|
71 |
+
# """)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
paper-qa
|
2 |
+
gradio
|
3 |
+
requests
|
4 |
+
python-dotenv
|
5 |
+
lxml
|
style.css
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#zotero-library-type label {
|
2 |
+
width: 48.5%;
|
3 |
+
}
|
4 |
+
|
5 |
+
#zotero-collection label {
|
6 |
+
width: 100%;
|
7 |
+
display: block;
|
8 |
+
}
|
9 |
+
|
10 |
+
.zotero-link {
|
11 |
+
font-size: 0.75rem;
|
12 |
+
color: #2d7ea9;
|
13 |
+
}
|
14 |
+
|
15 |
+
#answer .generating{
|
16 |
+
display: none;
|
17 |
+
}
|