taesiri commited on
Commit
efed853
1 Parent(s): 7e8de53
Files changed (1) hide show
  1. app.py +53 -10
app.py CHANGED
@@ -1,22 +1,25 @@
1
  import os
2
  import re
 
 
3
 
4
- from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
5
  import gradio as gr
6
  import requests
7
-
8
- import arxiv
9
  from arxiv_latex_extractor import get_paper_content
10
- import requests
 
 
 
11
 
12
  from coreservice import app
13
- from fastapi.staticfiles import StaticFiles
14
 
 
15
 
16
  LEADING_PROMPT = "Read the following paper:"
17
 
18
 
19
-
20
  def replace_texttt(text):
21
  return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text)
22
 
@@ -102,24 +105,66 @@ class ContextualQA:
102
  self.client = None
103
 
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  def load_context(paper_id):
106
  global LEADING_PROMPT
107
 
108
- # First, try to get the paper from Hugging Face
 
 
 
109
  latex_source = get_paper_from_huggingface(paper_id)
 
110
 
111
- # If not found, use arxiv_latex_extractor
112
  if not latex_source:
113
  try:
114
  latex_source = get_paper_content(paper_id)
 
115
  except Exception as e:
116
  return None, [(f"Error loading paper with id {paper_id}: {e}",)]
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
119
  qa_model = ContextualQA(client, model="claude-2.0")
120
  context = f"{LEADING_PROMPT}\n{latex_source}"
121
  qa_model.load_text(context)
122
 
 
123
  title, abstract = get_paper_info(paper_id)
124
  title = replace_texttt(title)
125
  abstract = replace_texttt(abstract)
@@ -229,7 +274,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
229
 
230
  btn_clear.click(clear_context, outputs=[chatbot])
231
 
232
- # demo.launch()
233
-
234
  app.mount("/js", StaticFiles(directory="js"), name="js")
235
  gr.mount_gradio_app(app, demo, path="/")
 
1
  import os
2
  import re
3
+ import tempfile
4
+ import os
5
 
6
+ import arxiv
7
  import gradio as gr
8
  import requests
9
+ from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
 
10
  from arxiv_latex_extractor import get_paper_content
11
+ from fastapi.staticfiles import StaticFiles
12
+ from huggingface_hub import HfApi
13
+
14
+ hf_api = HfApi()
15
 
16
  from coreservice import app
 
17
 
18
+ hf_api = HfApi()
19
 
20
  LEADING_PROMPT = "Read the following paper:"
21
 
22
 
 
23
  def replace_texttt(text):
24
  return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text)
25
 
 
105
  self.client = None
106
 
107
 
108
+ def clean_paper_id(raw_id):
109
+ # Remove any leading/trailing spaces
110
+ cleaned_id = raw_id.strip()
111
+
112
+ # Extract paper ID from ArXiv URL if present
113
+ match = re.search(r"arxiv\.org\/abs\/([\w\.]+)", cleaned_id)
114
+ if match:
115
+ cleaned_id = match.group(1)
116
+ else:
117
+ # Remove trailing dot if present
118
+ cleaned_id = re.sub(r"\.$", "", cleaned_id)
119
+
120
+ return cleaned_id
121
+
122
+
123
  def load_context(paper_id):
124
  global LEADING_PROMPT
125
 
126
+ # Clean the paper_id to remove spaces or extract ID from URL
127
+ paper_id = clean_paper_id(paper_id)
128
+
129
+ # Check if the paper is already on Hugging Face
130
  latex_source = get_paper_from_huggingface(paper_id)
131
+ paper_downloaded = False
132
 
133
+ # If not found on Hugging Face, use arxiv_latex_extractor
134
  if not latex_source:
135
  try:
136
  latex_source = get_paper_content(paper_id)
137
+ paper_downloaded = True
138
  except Exception as e:
139
  return None, [(f"Error loading paper with id {paper_id}: {e}",)]
140
 
141
+ if paper_downloaded:
142
+ # Save the LaTeX content to a temporary file
143
+ with tempfile.NamedTemporaryFile(
144
+ mode="w+", suffix=".tex", delete=False
145
+ ) as tmp_file:
146
+ tmp_file.write(latex_source)
147
+ temp_file_path = tmp_file.name
148
+
149
+ # Upload the paper to Hugging Face
150
+ try:
151
+ if os.path.getsize(temp_file_path) > 1:
152
+ hf_api.upload_file(
153
+ path_or_fileobj=temp_file_path,
154
+ path_in_repo=f"papers/{paper_id}.tex",
155
+ repo_id="taesiri/arxiv_db",
156
+ repo_type="dataset",
157
+ )
158
+ except Exception as e:
159
+ print(f"Error uploading paper with id {paper_id}: {e}")
160
+
161
+ # Initialize the Anthropic client and QA model
162
  client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
163
  qa_model = ContextualQA(client, model="claude-2.0")
164
  context = f"{LEADING_PROMPT}\n{latex_source}"
165
  qa_model.load_text(context)
166
 
167
+ # Get the paper's title and abstract
168
  title, abstract = get_paper_info(paper_id)
169
  title = replace_texttt(title)
170
  abstract = replace_texttt(abstract)
 
274
 
275
  btn_clear.click(clear_context, outputs=[chatbot])
276
 
 
 
277
  app.mount("/js", StaticFiles(directory="js"), name="js")
278
  gr.mount_gradio_app(app, demo, path="/")