Spaces:

mushroomsolutions
/

Image_Annotation

Runtime error

App Files Files Community

srinivas-mushroom commited on Mar 8, 2023

Commit

f33afb3

1 Parent(s): 4af8357

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -16

app.py CHANGED Viewed

@@ -10,7 +10,16 @@ model_name = "distilbert-base-cased-distilled-squad"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForQuestionAnswering.from_pretrained(model_name)
-def answer_questions(pdf_file, questions):
     # Load PDF file and extract text
     pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
     text = ""
@@ -19,27 +28,23 @@ def answer_questions(pdf_file, questions):
         text += page.extractText()
     text = text.strip()
-    answers = []
-    for question in questions:
-        # Tokenize question and text
-        input_ids = tokenizer.encode(question, text)
-        # Perform question answering
-        outputs = model(torch.tensor([input_ids]), return_dict=True)
-        answer_start = outputs.start_logits.argmax().item()
-        answer_end = outputs.end_logits.argmax().item()
-        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
-        answers.append(answer)
-    return answers
 inputs = [
     gr.inputs.File(label="PDF document"),
-    gr.inputs.Textbox(label="Questions (one per line)", type="text")
 ]
-outputs = gr.outputs.Textarea(label="Answers")
 gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
-             description="Upload a PDF document and ask multiple questions. The app will use a pre-trained model to find the answers.").launch()

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+# Define a list of pre-defined questions
+predefined_questions = [
+    "What is the purpose of this document?",
+    "What is the main topic of the document?",
+    "Who is the target audience?",
+    "What is the author's main argument?",
+    "What is the conclusion of the document?",
+]
+def answer_questions(pdf_file, question):
     # Load PDF file and extract text
     pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
     text = ""
         text += page.extractText()
     text = text.strip()
+    # Tokenize question and text
+    input_ids = tokenizer.encode(question, text)
+    # Perform question answering
+    outputs = model(torch.tensor([input_ids]), return_dict=True)
+    answer_start = outputs.start_logits.argmax().item()
+    answer_end = outputs.end_logits.argmax().item()
+    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
+    return answer
 inputs = [
     gr.inputs.File(label="PDF document"),
+    gr.inputs.Dropdown(label="Question", choices=predefined_questions),
 ]
+outputs = gr.outputs.Textbox(label="Answer")
 gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
+             description="Upload a PDF document and select a question from the dropdown. The app will use a pre-trained model to find the answer.").launch()