|
import fitz |
|
import gradio as gr |
|
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text = "" |
|
document = fitz.open(pdf_path.name) |
|
for page_num in range(len(document)): |
|
page = document.load_page(page_num) |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
model_name = "distilbert-base-cased-distilled-squad" |
|
model = AutoModelForQuestionAnswering.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer) |
|
|
|
|
|
def answer_question(pdf_file, question): |
|
|
|
content = extract_text_from_pdf(pdf_file) |
|
|
|
result = qa_pipeline(question=question, context=content) |
|
return result['answer'] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=answer_question, |
|
inputs=[gr.File(label="PDF File", file_types=[".pdf"]), gr.Textbox(lines=2, placeholder="Ask a question...")], |
|
outputs="text", |
|
title="DistilBERT Question Answering", |
|
description="Upload a PDF and ask questions based on the content of the PDF." |
|
) |
|
|
|
|
|
iface.launch() |