bpHigh commited on
Commit
1d57e0d
1 Parent(s): 6d22dbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -19
app.py CHANGED
@@ -32,7 +32,9 @@ st.info("""This Application currently only works with arxiv and acl anthology we
32
  1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
33
  2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
34
 
35
- This Application uses the recently released Meta Nougat Visual Transformer for processing Papers""", icon="ℹ️")
 
 
36
  user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
37
 
38
 
@@ -55,16 +57,33 @@ def initialize_session_state():
55
  initialize_session_state()
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def get_paper_content(url: str) -> str:
59
- with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper"):
60
- f = modal.Function.lookup("streamlit-hack", "main")
61
- output = f.call(url)
62
- st.session_state.paper_content = output
63
- return output
 
 
 
64
 
65
 
66
  def index_paper_content(content: str):
67
- with st.spinner(text="Indexing the paper – hang tight! This should take 3-5 minutes"):
68
  try:
69
  LLM_USER_ID = 'openai'
70
  LLM_APP_ID = 'chat-completion'
@@ -95,7 +114,7 @@ def index_paper_content(content: str):
95
 
96
 
97
  def generate_insights():
98
- with st.spinner(text="Generating insights on the paper and preparing the Chatbot"):
99
  try:
100
  LLM_USER_ID = 'openai'
101
  LLM_APP_ID = 'chat-completion'
@@ -133,28 +152,28 @@ def generate_insights():
133
  )
134
 
135
  response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
136
-
137
  except Exception as e:
138
  print(str(e))
139
  response_key_insights = "Error While Generating Insights"
140
-
141
- st.session_state.paper_insights = response_key_insights.response
142
 
143
 
144
  if st.button("Read and Index Paper"):
145
  paper_content = get_paper_content(url=user_input)
146
 
147
- if st.session_state.paper_content is not None:
148
- with st.expander("See Paper Contents"):
149
- st.markdown(paper_content)
150
 
151
- result = index_paper_content(content=paper_content)
152
- st.write(result)
153
- generate_insights()
 
 
154
 
155
 
156
  if st.session_state.paper_content is not None:
157
- with st.expander("See Paper Contents"):
158
  st.markdown(st.session_state.paper_content)
159
 
160
  if st.session_state.paper_insights is not None:
@@ -292,5 +311,5 @@ if st.session_state.vector_store is not None:
292
  else:
293
  response = f"This query cannot be processed as it has been detected to be {reason}"
294
  st.write(response)
295
- message = {"role": "assistant", "content": response.response}
296
  st.session_state.messages.append(message)
 
32
  1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
33
  2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
34
 
35
+ This Application uses the recently released Meta Nougat Visual Transformer for processing Papers.
36
+
37
+ The Nougat Transformer is inferenced through a deployed app I created on the Modal platform(https://modal.com/) and uses T4 GPU as hardware""", icon="ℹ️")
38
  user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
39
 
40
 
 
57
  initialize_session_state()
58
 
59
 
60
+ def is_arxiv_url(url: str) -> bool:
61
+ import re
62
+
63
+ arxiv_pattern = r'https?://arxiv\.org/abs/.+'
64
+ return bool(re.match(arxiv_pattern, url))
65
+
66
+
67
+ def is_acl_anthology_url(url: str) -> bool:
68
+ import re
69
+
70
+ acl_anthology_pattern = r'https://aclanthology\.org/.*?/'
71
+ return bool(re.match(acl_anthology_pattern, url))
72
+
73
+
74
  def get_paper_content(url: str) -> str:
75
+ with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper – hang tight! This should take 1-2 minutes"):
76
+ if is_arxiv_url(url=url) or is_acl_anthology_url(url=url):
77
+ f = modal.Function.lookup("streamlit-hack", "main")
78
+ output = f.call(url)
79
+ st.session_state.paper_content = output
80
+ return output
81
+ else:
82
+ return 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.'
83
 
84
 
85
  def index_paper_content(content: str):
86
+ with st.spinner(text="Indexing the paper – hang tight! This should take 1-2 minutes"):
87
  try:
88
  LLM_USER_ID = 'openai'
89
  LLM_APP_ID = 'chat-completion'
 
114
 
115
 
116
  def generate_insights():
117
+ with st.spinner(text="Generating insights on the paper and preparing the Chatbot. Hang tight! this should take 1-2 mins."):
118
  try:
119
  LLM_USER_ID = 'openai'
120
  LLM_APP_ID = 'chat-completion'
 
152
  )
153
 
154
  response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
155
+ st.session_state.paper_insights = response_key_insights.response
156
  except Exception as e:
157
  print(str(e))
158
  response_key_insights = "Error While Generating Insights"
159
+ st.session_state.paper_insights = response_key_insights
 
160
 
161
 
162
  if st.button("Read and Index Paper"):
163
  paper_content = get_paper_content(url=user_input)
164
 
165
+ if 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.' in paper_content:
166
+ st.write('Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.')
 
167
 
168
+ else:
169
+ if st.session_state.paper_content is not None:
170
+ result = index_paper_content(content=paper_content)
171
+ st.write(result)
172
+ generate_insights()
173
 
174
 
175
  if st.session_state.paper_content is not None:
176
+ with st.expander("See Research Paper Contents"):
177
  st.markdown(st.session_state.paper_content)
178
 
179
  if st.session_state.paper_insights is not None:
 
311
  else:
312
  response = f"This query cannot be processed as it has been detected to be {reason}"
313
  st.write(response)
314
+ message = {"role": "assistant", "content": response}
315
  st.session_state.messages.append(message)