Spaces:

bpHigh
/

AI-Research-Buddy

Paused

App Files Files Community

bpHigh commited on Sep 27, 2023

Commit

1d57e0d

•

1 Parent(s): 6d22dbf

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -19

app.py CHANGED Viewed

@@ -32,7 +32,9 @@ st.info("""This Application currently only works with arxiv and acl anthology we
 1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
 2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
-This Application uses the recently released Meta Nougat Visual Transformer for processing Papers""", icon="ℹ️")
 user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
@@ -55,16 +57,33 @@ def initialize_session_state():
 initialize_session_state()
 def get_paper_content(url: str) -> str:
-    with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper"):
-        f = modal.Function.lookup("streamlit-hack", "main")
-        output = f.call(url)
-        st.session_state.paper_content = output
-        return output
 def index_paper_content(content: str):
-    with st.spinner(text="Indexing the paper – hang tight! This should take 3-5 minutes"):
         try:
             LLM_USER_ID = 'openai'
             LLM_APP_ID = 'chat-completion'
@@ -95,7 +114,7 @@ def index_paper_content(content: str):
 def generate_insights():
-    with st.spinner(text="Generating insights on the paper and preparing the Chatbot"):
         try:
             LLM_USER_ID = 'openai'
             LLM_APP_ID = 'chat-completion'
@@ -133,28 +152,28 @@ def generate_insights():
             )
             response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
         except Exception as e:
             print(str(e))
             response_key_insights = "Error While Generating Insights"
-        st.session_state.paper_insights = response_key_insights.response
 if st.button("Read and Index Paper"):
     paper_content = get_paper_content(url=user_input)
-    if st.session_state.paper_content is not None:
-        with st.expander("See Paper Contents"):
-            st.markdown(paper_content)
-        result = index_paper_content(content=paper_content)
-        st.write(result)
-        generate_insights()
 if st.session_state.paper_content is not None:
-    with st.expander("See Paper Contents"):
         st.markdown(st.session_state.paper_content)
 if st.session_state.paper_insights is not None:
@@ -292,5 +311,5 @@ if st.session_state.vector_store is not None:
                 else:
                     response = f"This query cannot be processed as it has been detected to be {reason}"
                     st.write(response)
-                    message = {"role": "assistant", "content": response.response}
                     st.session_state.messages.append(message)

 1) Arxiv:- https://arxiv.org/abs/paper_unique_identifier
 2) ACL Anthology:- https://aclanthology.org/paper_unique_identifier/
+This Application uses the recently released Meta Nougat Visual Transformer for processing Papers.
+The Nougat Transformer is inferenced through a deployed app I created on the Modal platform(https://modal.com/) and uses T4 GPU as hardware""", icon="ℹ️")
 user_input = st.text_input("Enter the arxiv or acl anthology url of the paper", "https://aclanthology.org/2023.semeval-1.266/")
 initialize_session_state()
+def is_arxiv_url(url: str) -> bool:
+    import re
+    arxiv_pattern = r'https?://arxiv\.org/abs/.+'
+    return bool(re.match(arxiv_pattern, url))
+def is_acl_anthology_url(url: str) -> bool:
+    import re
+    acl_anthology_pattern = r'https://aclanthology\.org/.*?/'
+    return bool(re.match(acl_anthology_pattern, url))
 def get_paper_content(url: str) -> str:
+    with st.spinner(text="Using Nougat(https://facebookresearch.github.io/nougat/) to read the paper contents and get the markdown representation of the paper  – hang tight! This should take 1-2 minutes"):
+        if is_arxiv_url(url=url) or is_acl_anthology_url(url=url):
+            f = modal.Function.lookup("streamlit-hack", "main")
+            output = f.call(url)
+            st.session_state.paper_content = output
+            return output
+        else:
+            return 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.'
 def index_paper_content(content: str):
+    with st.spinner(text="Indexing the paper – hang tight! This should take 1-2 minutes"):
         try:
             LLM_USER_ID = 'openai'
             LLM_APP_ID = 'chat-completion'
 def generate_insights():
+    with st.spinner(text="Generating insights on the paper and preparing the Chatbot. Hang tight! this should take 1-2 mins."):
         try:
             LLM_USER_ID = 'openai'
             LLM_APP_ID = 'chat-completion'
             )
             response_key_insights = query_engine.query("Generate core crux insights, contributions and results of the paper as Key Topics and thier content in markdown format where each Key Topic is in bold followed by its content")
+            st.session_state.paper_insights = response_key_insights.response
         except Exception as e:
             print(str(e))
             response_key_insights = "Error While Generating Insights"
+            st.session_state.paper_insights = response_key_insights
 if st.button("Read and Index Paper"):
     paper_content = get_paper_content(url=user_input)
+    if 'Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.' in paper_content:
+        st.write('Invalid URL. Please provide a valid ArXiv or ACL Anthology URL.')
+    else:
+        if st.session_state.paper_content is not None:
+            result = index_paper_content(content=paper_content)
+            st.write(result)
+            generate_insights()
 if st.session_state.paper_content is not None:
+    with st.expander("See Research Paper Contents"):
         st.markdown(st.session_state.paper_content)
 if st.session_state.paper_insights is not None:
                 else:
                     response = f"This query cannot be processed as it has been detected to be {reason}"
                     st.write(response)
+                    message = {"role": "assistant", "content": response}
                     st.session_state.messages.append(message)