Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,9 @@ st.info("""This Application currently only works with arxiv and acl anthology we
|
|
32 |
1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
|
33 |
2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
|
34 |
|
35 |
-
This Application uses the recently released Meta Nougat Visual Transformer for processing Papers
|
|
|
|
|
36 |
user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
|
37 |
|
38 |
|
@@ -55,16 +57,33 @@ def initialize_session_state():
|
|
55 |
initialize_session_state()
|
56 |
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def get_paper_content(url: str) -> str:
|
59 |
-
with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper"):
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
def index_paper_content(content: str):
|
67 |
-
with st.spinner(text="Indexing the paper – hang tight! This should take
|
68 |
try:
|
69 |
LLM_USER_ID = 'openai'
|
70 |
LLM_APP_ID = 'chat-completion'
|
@@ -95,7 +114,7 @@ def index_paper_content(content: str):
|
|
95 |
|
96 |
|
97 |
def generate_insights():
|
98 |
-
with st.spinner(text="Generating insights on the paper and preparing the Chatbot"):
|
99 |
try:
|
100 |
LLM_USER_ID = 'openai'
|
101 |
LLM_APP_ID = 'chat-completion'
|
@@ -133,28 +152,28 @@ def generate_insights():
|
|
133 |
)
|
134 |
|
135 |
response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
|
136 |
-
|
137 |
except Exception as e:
|
138 |
print(str(e))
|
139 |
response_key_insights = "Error While Generating Insights"
|
140 |
-
|
141 |
-
st.session_state.paper_insights = response_key_insights.response
|
142 |
|
143 |
|
144 |
if st.button("Read and Index Paper"):
|
145 |
paper_content = get_paper_content(url=user_input)
|
146 |
|
147 |
-
if
|
148 |
-
|
149 |
-
st.markdown(paper_content)
|
150 |
|
151 |
-
|
152 |
-
st.
|
153 |
-
|
|
|
|
|
154 |
|
155 |
|
156 |
if st.session_state.paper_content is not None:
|
157 |
-
with st.expander("See Paper Contents"):
|
158 |
st.markdown(st.session_state.paper_content)
|
159 |
|
160 |
if st.session_state.paper_insights is not None:
|
@@ -292,5 +311,5 @@ if st.session_state.vector_store is not None:
|
|
292 |
else:
|
293 |
response = f"This query cannot be processed as it has been detected to be {reason}"
|
294 |
st.write(response)
|
295 |
-
message = {"role": "assistant", "content": response
|
296 |
st.session_state.messages.append(message)
|
|
|
32 |
1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
|
33 |
2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
|
34 |
|
35 |
+
This Application uses the recently released Meta Nougat Visual Transformer for processing Papers.
|
36 |
+
|
37 |
+
The Nougat Transformer is inferenced through a deployed app I created on the Modal platform(https://modal.com/) and uses T4 GPU as hardware""", icon="ℹ️")
|
38 |
user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
|
39 |
|
40 |
|
|
|
57 |
initialize_session_state()
|
58 |
|
59 |
|
60 |
+
def is_arxiv_url(url: str) -> bool:
|
61 |
+
import re
|
62 |
+
|
63 |
+
arxiv_pattern = r'https?://arxiv\.org/abs/.+'
|
64 |
+
return bool(re.match(arxiv_pattern, url))
|
65 |
+
|
66 |
+
|
67 |
+
def is_acl_anthology_url(url: str) -> bool:
|
68 |
+
import re
|
69 |
+
|
70 |
+
acl_anthology_pattern = r'https://aclanthology\.org/.*?/'
|
71 |
+
return bool(re.match(acl_anthology_pattern, url))
|
72 |
+
|
73 |
+
|
74 |
def get_paper_content(url: str) -> str:
|
75 |
+
with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper – hang tight! This should take 1-2 minutes"):
|
76 |
+
if is_arxiv_url(url=url) or is_acl_anthology_url(url=url):
|
77 |
+
f = modal.Function.lookup("streamlit-hack", "main")
|
78 |
+
output = f.call(url)
|
79 |
+
st.session_state.paper_content = output
|
80 |
+
return output
|
81 |
+
else:
|
82 |
+
return 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.'
|
83 |
|
84 |
|
85 |
def index_paper_content(content: str):
|
86 |
+
with st.spinner(text="Indexing the paper – hang tight! This should take 1-2 minutes"):
|
87 |
try:
|
88 |
LLM_USER_ID = 'openai'
|
89 |
LLM_APP_ID = 'chat-completion'
|
|
|
114 |
|
115 |
|
116 |
def generate_insights():
|
117 |
+
with st.spinner(text="Generating insights on the paper and preparing the Chatbot. Hang tight! this should take 1-2 mins."):
|
118 |
try:
|
119 |
LLM_USER_ID = 'openai'
|
120 |
LLM_APP_ID = 'chat-completion'
|
|
|
152 |
)
|
153 |
|
154 |
response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
|
155 |
+
st.session_state.paper_insights = response_key_insights.response
|
156 |
except Exception as e:
|
157 |
print(str(e))
|
158 |
response_key_insights = "Error While Generating Insights"
|
159 |
+
st.session_state.paper_insights = response_key_insights
|
|
|
160 |
|
161 |
|
162 |
if st.button("Read and Index Paper"):
|
163 |
paper_content = get_paper_content(url=user_input)
|
164 |
|
165 |
+
if 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.' in paper_content:
|
166 |
+
st.write('Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.')
|
|
|
167 |
|
168 |
+
else:
|
169 |
+
if st.session_state.paper_content is not None:
|
170 |
+
result = index_paper_content(content=paper_content)
|
171 |
+
st.write(result)
|
172 |
+
generate_insights()
|
173 |
|
174 |
|
175 |
if st.session_state.paper_content is not None:
|
176 |
+
with st.expander("See Research Paper Contents"):
|
177 |
st.markdown(st.session_state.paper_content)
|
178 |
|
179 |
if st.session_state.paper_insights is not None:
|
|
|
311 |
else:
|
312 |
response = f"This query cannot be processed as it has been detected to be {reason}"
|
313 |
st.write(response)
|
314 |
+
message = {"role": "assistant", "content": response}
|
315 |
st.session_state.messages.append(message)
|