srinivas-mushroom's picture
Update app.py
f33afb3
raw
history blame contribute delete
No virus
1.83 kB
import gradio as gr
import PyPDF2
import io
import requests
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
# Download and load pre-trained model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
# Define a list of pre-defined questions
predefined_questions = [
"What is the purpose of this document?",
"What is the main topic of the document?",
"Who is the target audience?",
"What is the author's main argument?",
"What is the conclusion of the document?",
]
def answer_questions(pdf_file, question):
# Load PDF file and extract text
pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
text = ""
for i in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(i)
text += page.extractText()
text = text.strip()
# Tokenize question and text
input_ids = tokenizer.encode(question, text)
# Perform question answering
outputs = model(torch.tensor([input_ids]), return_dict=True)
answer_start = outputs.start_logits.argmax().item()
answer_end = outputs.end_logits.argmax().item()
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
return answer
inputs = [
gr.inputs.File(label="PDF document"),
gr.inputs.Dropdown(label="Question", choices=predefined_questions),
]
outputs = gr.outputs.Textbox(label="Answer")
gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
description="Upload a PDF document and select a question from the dropdown. The app will use a pre-trained model to find the answer.").launch()