import os import re import tempfile import os import arxiv import gradio as gr import requests from anthropic import Anthropic from arxiv_latex_extractor import get_paper_content from huggingface_hub import HfApi LEADING_PROMPT = "Read the following paper:" custom_css = """ div#component-4 #chatbot { height: 800px !important; } rowZ""" ga_script = """ """ def replace_texttt(text): return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text) def get_paper_info(paper_id): # Create a search query with the arXiv ID search = arxiv.Search(id_list=[paper_id]) # Fetch the paper using its arXiv ID paper = next(search.results(), None) if paper is not None: # Return the paper's title and abstract # remove new lines title_ = paper.title.replace("\n", " ").replace("\r", " ") summary_ = paper.summary.replace("\n", " ").replace("\r", " ") return title_, summary_ else: return None, None def get_paper_from_huggingface(paper_id): top_level_paper_id = paper_id.split(".")[0] path_in_repo = f"papers/{top_level_paper_id}/{paper_id}.tex" try: url = ( f"https://huggingface.co/datasets/taesiri/arxiv_db/raw/main/{path_in_repo}" ) response = requests.get(url) response.raise_for_status() return response.text except Exception as e: return None class ContextualQA: def __init__(self, client, model="claude-3-opus-20240229", initial_context=""): self.client = client self.model = model self.context = initial_context # Set the initial context here self.questions = [] self.responses = [] def load_text(self, text): self.context = text # Update the context with new text def ask_question(self, question): # Prepare the messages list with previous Q&A pairs and the current context messages = [{ "role": "user", "content": [{"type": "text", "text": "Read the document bleow and answer the questions\n"+self.context}] }] messages.append({ "role": "assistant", "content": [{"type": "text", "text": "The document is loaded. You can now ask questions."}] }) for q, a in zip(self.questions, self.responses): messages.append({ "role": "user", "content": [{"type": "text", "text": q}] }) messages.append({ "role": "assistant", "content": [{"type": "text", "text": a}] }) # Add the new question messages.append({ "role": "user", "content": [{"type": "text", "text": question}] }) # Create the message with the system context and the list of messages response = self.client.messages.create( model=self.model, max_tokens=1024, system=self.context, # Pass the context directly as a string messages=messages, temperature=0 # Assuming you want deterministic responses ) # Assuming the response object has a 'content' attribute that contains the answer answer = response.content[0].text self.questions.append(question) self.responses.append(answer) return answer def clear_context(self): self.context = "" self.questions = [] self.responses = [] def __getstate__(self): state = self.__dict__.copy() del state["client"] return state def __setstate__(self, state): self.__dict__.update(state) self.client = None def clean_paper_id(raw_id): # Remove any leading/trailing spaces cleaned_id = raw_id.strip() # Extract paper ID from ArXiv URL if present match = re.search(r"arxiv\.org\/abs\/([\w\.]+)", cleaned_id) if match: cleaned_id = match.group(1) else: # Remove trailing dot if present cleaned_id = re.sub(r"\.$", "", cleaned_id) return cleaned_id def load_context(paper_id): global LEADING_PROMPT # Clean the paper_id to remove spaces or extract ID from URL paper_id = clean_paper_id(paper_id) # Check if the paper is already on Hugging Face latex_source = get_paper_from_huggingface(paper_id) paper_downloaded = False # If not found on Hugging Face, use arxiv_latex_extractor if not latex_source: try: latex_source = get_paper_content(paper_id) paper_downloaded = True except Exception as e: return None, [(f"Error loading paper with id {paper_id}: {e}",)] if paper_downloaded: # Save the LaTeX content to a temporary file with tempfile.NamedTemporaryFile( mode="w+", suffix=".tex", delete=False ) as tmp_file: tmp_file.write(latex_source) temp_file_path = tmp_file.name # Upload the paper to Hugging Face try: if os.path.getsize(temp_file_path) > 1: hf_api = HfApi(token=os.environ["HUGGINGFACE_TOKEN"]) top_level_paper_id = paper_id.split(".")[0] path_in_repo = f"papers/{top_level_paper_id}/{paper_id}.tex" hf_api.upload_file( path_or_fileobj=temp_file_path, path_in_repo=path_in_repo, repo_id="taesiri/arxiv_db", repo_type="dataset", ) except Exception as e: print(f"Error uploading paper with id {paper_id}: {e}") # Initialize the Anthropic client and QA model client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) qa_model = ContextualQA(client, model="claude-3-opus-20240229") context = f"{LEADING_PROMPT}\n{latex_source}" qa_model.load_text(context) # Get the paper's title and abstract title, abstract = get_paper_info(paper_id) title = replace_texttt(title) abstract = replace_texttt(abstract) return ( qa_model, [ ( f"Load the paper with id {paper_id}.", f"\n**Title**: {title}\n\n**Abstract**: {abstract}\n\nPaper loaded. You can now ask questions.", ) ], ) def answer_fn(qa_model, question, chat_history): # if question is empty, tell user that they need to ask a question if question == "": chat_history.append(("No Question Asked", "Please ask a question.")) return qa_model, chat_history, "" client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) qa_model.client = client try: answer = qa_model.ask_question(question) except Exception as e: chat_history.append(("Error Asking Question", str(e))) return qa_model, chat_history, "" chat_history.append((question, answer)) return qa_model, chat_history, "" def clear_context(): return [] with gr.Blocks( theme=gr.themes.Soft(), css=custom_css, title="ArXiv QA with Claude", head=ga_script ) as demo: gr.HTML( """
claude-3-opus-20240229
🔥- Ask Questions and Get Answers Instantly