Spaces:
Sleeping
Sleeping
from pathlib import Path | |
from typing import Union | |
import PyPDF2 | |
from transformers import pipeline | |
import gradio as gr | |
# Initialize question-answering pipeline | |
nlp = pipeline('question-answering', model='deepset/roberta-base-squad2') | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_file: Union[str, Path]) -> str: | |
with open(pdf_file, 'rb') as pdf_file_obj: | |
pdf_reader = PyPDF2.PdfReader(pdf_file_obj) | |
text = ''.join(page.extract_text() for page in pdf_reader.pages) | |
return text | |
def answer_doc_question(pdf_file, question): | |
# Extract text from PDF | |
context = extract_text_from_pdf(pdf_file.name) | |
# Prepare question-answering input | |
QA_input = { | |
'question': question, | |
'context': context | |
} | |
# Get answer | |
res = nlp(QA_input, max_answer_length=500) | |
return res['answer'] | |
# Define Gradio interface | |
pdf_input = gr.File(type="filepath", label="Upload a PDF document and ask a question about it.") | |
question = gr.Textbox(label="Type a question regarding the uploaded document here.") | |
iface = gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text") | |
# Launch the interface | |
iface.launch() | |