jit / app.py
Isanka's picture
Update app.py
893c729 verified
raw
history blame contribute delete
No virus
1.43 kB
import fitz # PyMuPDF
import gradio as gr
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
text = ""
document = fitz.open(pdf_path.name)
for page_num in range(len(document)):
page = document.load_page(page_num)
text += page.get_text()
return text
# Load the model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Initialize the question-answering pipeline
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)
# Function to answer questions based on the PDF content
def answer_question(pdf_file, question):
# Extract text from the uploaded PDF file
content = extract_text_from_pdf(pdf_file)
# Get the answer using the question-answering pipeline
result = qa_pipeline(question=question, context=content)
return result['answer']
# Define the Gradio interface
iface = gr.Interface(
fn=answer_question,
inputs=[gr.File(label="PDF File", file_types=[".pdf"]), gr.Textbox(lines=2, placeholder="Ask a question...")],
outputs="text",
title="DistilBERT Question Answering",
description="Upload a PDF and ask questions based on the content of the PDF."
)
# Launch the interface
iface.launch()