Spaces:

dippatel1994
/

Research-Paper-QA

Runtime error

App Files Files Community

dippatel1994 commited on Mar 17

Commit

eff7d81

•

1 Parent(s): a9a35c7

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -54

app.py CHANGED Viewed

@@ -1,54 +1,41 @@
-import gradio as gr
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering
-class ResearchPaperQAModel:
-    """Class to load the model and answer questions based on abstract and text of reserach paper.
-    """
-    def __init__(self, model_name):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = TFAutoModelForQuestionAnswering.from_pretrained(model_name)
-    def answer_question(self, question, abstract, paper_text):
-        # Tokenize input question and context
-        if not paper_text:
-            context = abstract
-        else:
-            context = paper_text
-        inputs = self.tokenizer(question, context, return_tensors="tf")
-        # Get the start and end logits for the answer
-        outputs = self.model(**inputs)
-        start_logits, end_logits = outputs.start_logits[0].numpy(), outputs.end_logits[0].numpy()
-        # Find the tokens with the highest probability for start and end positions
-        start_index = tf.argmax(start_logits, axis=-1).numpy()
-        end_index = tf.argmax(end_logits, axis=-1).numpy()
-        # Convert token indices to actual tokens
-        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"].numpy().squeeze())
-        answer_tokens = tokens[start_index : end_index + 1]
-        # Convert answer tokens back to a string
-        answer = self.tokenizer.convert_tokens_to_string(answer_tokens)
-        return answer
-model = "bert-large-uncased-whole-word-masking-finetuned-squad" # Model name
-paper_model = ResearchPaperQAModel(model) #Create an instance of the model
-# Create a Gradio interface
-iface = gr.Interface(
-    fn=paper_model.answer_question,
-    inputs=["text", "text", "text"],
-    outputs="text",
-    live=True,
-    title="Ask question to research paper",
-    description="Enter title of research paper, abstract, research paper content(optional) and list of questions to get answers."
-)
-# Launch the Gradio interface
-iface.launch(share=True)

+import streamlit as st
+import requests
+from transformers import pipeline, BertTokenizer
+# Function to generate answers using the BERT model
+def generate_answers(chunks, question):
+    # Initialize the BERT tokenizer
+    tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
+    # Initialize the question-answering pipeline
+    model = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")
+    # Concatenate chunks into a single text
+    paper_text = ' '.join(chunks)
+    # Generate answers for the question based on the entire context
+    answer = model(question, paper_text)
+    return answer['answer']
+# Streamlit app
+st.title("Research Paper Question Answering")
+paper_link = st.text_input("Enter the link to the research paper (Arxiv link):")
+question = st.text_input("Enter your question:")
+if st.button("Generate Answer"):
+    if not (paper_link and question):
+        st.warning("Please provide both the paper link and the question.")
+    else:
+        # Download the research paper
+        response = requests.get(paper_link)
+        paper_text = response.text
+        # Split the paper text into chunks of 512 words
+        paper_chunks = [paper_text[i:i+512] for i in range(0, len(paper_text), 512)]
+        # Generate answer based on chunks
+        answer = generate_answers(paper_chunks, question)
+        st.success("Answer generated successfully!")
+        st.text("Generated Answer:")
+        st.write(answer)