File size: 1,830 Bytes
002247f
bf3d25c
63459a6
bf3d25c
 
63459a6
 
 
 
 
 
 
f33afb3
 
 
 
 
 
 
 
 
 
bf3d25c
 
 
 
 
 
 
 
f33afb3
 
bf3d25c
f33afb3
 
 
 
 
bf3d25c
f33afb3
63459a6
 
 
f33afb3
63459a6
 
f33afb3
63459a6
bf3d25c
f33afb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import PyPDF2
import io
import requests
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Download and load pre-trained model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

# Define a list of pre-defined questions
predefined_questions = [
    "What is the purpose of this document?",
    "What is the main topic of the document?",
    "Who is the target audience?",
    "What is the author's main argument?",
    "What is the conclusion of the document?",
]

def answer_questions(pdf_file, question):
    # Load PDF file and extract text
    pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
    text = ""
    for i in range(pdf_reader.getNumPages()):
        page = pdf_reader.getPage(i)
        text += page.extractText()
    text = text.strip()

    # Tokenize question and text
    input_ids = tokenizer.encode(question, text)

    # Perform question answering
    outputs = model(torch.tensor([input_ids]), return_dict=True)
    answer_start = outputs.start_logits.argmax().item()
    answer_end = outputs.end_logits.argmax().item()
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
        
    return answer

inputs = [
    gr.inputs.File(label="PDF document"),
    gr.inputs.Dropdown(label="Question", choices=predefined_questions),
]

outputs = gr.outputs.Textbox(label="Answer")

gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool", 
             description="Upload a PDF document and select a question from the dropdown. The app will use a pre-trained model to find the answer.").launch()