import io import os import re import tarfile from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic import gradio as gr import requests import arxiv from arxiv_latex_extractor import get_paper_content import requests LEADING_PROMPT = "Read the following paper and answer the question below:" def replace_texttt(text): return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text) def get_paper_info(paper_id): # Create a search query with the arXiv ID search = arxiv.Search(id_list=[paper_id]) # Fetch the paper using its arXiv ID paper = next(search.results(), None) if paper is not None: # Return the paper's title and abstract return paper.title, paper.summary else: return None, None def get_paper_from_huggingface(paper_id): try: url = f"https://huggingface.co/datasets/taesiri/arxiv_db/raw/main/papers/{paper_id}.tex" response = requests.get(url) response.raise_for_status() # Will raise an HTTPError if the HTTP request returned an unsuccessful status code return response.text except Exception as e: return None class ContextualQA: def __init__(self, client, model="claude-2.0"): self.client = client self.model = model self.context = "" self.questions = [] self.responses = [] def load_text(self, text): self.context = text def ask_question(self, question): if self.questions: # For the first question-answer pair, don't add HUMAN_PROMPT before the question first_pair = f"Question: {self.questions[0]}\n{AI_PROMPT} Answer: {self.responses[0]}" # For subsequent questions, include both HUMAN_PROMPT and AI_PROMPT subsequent_pairs = "\n".join( [ f"{HUMAN_PROMPT} Question: {q}\n{AI_PROMPT} Answer: {a}" for q, a in zip(self.questions[1:], self.responses[1:]) ] ) history_context = f"{first_pair}\n{subsequent_pairs}" else: history_context = "" full_context = f"{self.context}\n\n{history_context}\n" prompt = f"{HUMAN_PROMPT} {full_context} {HUMAN_PROMPT} {question} {AI_PROMPT}" # save prompt on disk for examination with open("prompt.txt", "w") as f: f.write(prompt) response = self.client.completions.create( prompt=prompt, stop_sequences=[HUMAN_PROMPT], max_tokens_to_sample=6000, model=self.model, stream=False, ) answer = response.completion self.questions.append(question) self.responses.append(answer) return answer def clear_context(self): self.context = "" self.questions = [] self.responses = [] def __getstate__(self): state = self.__dict__.copy() del state["client"] return state def __setstate__(self, state): self.__dict__.update(state) self.client = None def load_context(paper_id): global LEADING_PROMPT # First, try to get the paper from Hugging Face latex_source = get_paper_from_huggingface(paper_id) # If not found, use arxiv_latex_extractor if not latex_source: try: latex_source = get_paper_content(paper_id) except Exception as e: return None, [(f"Error loading paper with id {paper_id}: {e}",)] client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) qa_model = ContextualQA(client, model="claude-2.0") context = f"{LEADING_PROMPT}\n{latex_source}" qa_model.load_text(context) # Usage title, abstract = get_paper_info(paper_id) # remove special symbols from title and abstract title = replace_texttt(title) abstract = replace_texttt(abstract) return ( qa_model, [ ( f"Load the paper with id {paper_id}.", f"\n**Title**: {title}\n\n**Abstract**: {abstract}\n\nPaper loaded. You can now ask questions.", ) ], ) def answer_fn(qa_model, question, chat_history): # if question is empty, tell user that they need to ask a question if question == "": chat_history.append(("No Question Asked", "Please ask a question.")) return qa_model, chat_history, "" client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) qa_model.client = client try: answer = qa_model.ask_question(question) except Exception as e: chat_history.append(("Error Asking Question", str(e))) return qa_model, chat_history, "" chat_history.append((question, answer)) return qa_model, chat_history, "" def clear_context(): return [] with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.HTML( """

Explore ArXiv Papers in Depth with claude-2.0 - Ask Questions and Get Answers Instantly

""" ) gr.HTML( """

Explore the depths of ArXiv papers with our interactive app, powered by the advanced claude-2.0 model. Ask detailed questions and get immediate, context-rich answers from academic papers.

""" ) gr.HTML( """
Duplicate Space Duplicate the Space with your Anthropic API Key  |  Follow me on Twitter for more updates: @taesiri
""" ) with gr.Column(): with gr.Row(): paper_id_input = gr.Textbox(label="Enter Paper ID", value="2108.07258") btn_load = gr.Button("Load Paper") qa_model = gr.State() with gr.Column(): chatbot = gr.Chatbot().style(color_map=("blue", "yellow")) question_txt = gr.Textbox( label="Question", lines=1, placeholder="Type your question here..." ) btn_answer = gr.Button("Answer Question") btn_clear = gr.Button("Clear Chat") gr.HTML( """
All the inputs are being sent to Anthropic's Claude endpoints. Please refer to this link for privacy policy.
""" ) gr.Markdown( "## Acknowledgements\n" "This project is made possible through the generous support of " "[Anthropic](https://www.anthropic.com/), who provided free access to the `Claude-2.0` API." ) btn_load.click(load_context, inputs=[paper_id_input], outputs=[qa_model, chatbot]) btn_answer.click( answer_fn, inputs=[qa_model, question_txt, chatbot], outputs=[qa_model, chatbot, question_txt], ) question_txt.submit( answer_fn, inputs=[qa_model, question_txt, chatbot], outputs=[qa_model, chatbot, question_txt], ) btn_clear.click(clear_context, outputs=[chatbot]) demo.launch()