pdf-gpt / app.py
swamisharan's picture
Update app.py
d17f001 verified
from pathlib import Path
from typing import Union
import PyPDF2
from transformers import pipeline
import gradio as gr
# Initialize question-answering pipeline
nlp = pipeline('question-answering', model='deepset/roberta-base-squad2')
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file: Union[str, Path]) -> str:
with open(pdf_file, 'rb') as pdf_file_obj:
pdf_reader = PyPDF2.PdfReader(pdf_file_obj)
text = ''.join(page.extract_text() for page in pdf_reader.pages)
return text
def answer_doc_question(pdf_file, question):
# Extract text from PDF
context = extract_text_from_pdf(pdf_file.name)
# Prepare question-answering input
QA_input = {
'question': question,
'context': context
}
# Get answer
res = nlp(QA_input, max_answer_length=500)
return res['answer']
# Define Gradio interface
pdf_input = gr.File(type="filepath", label="Upload a PDF document and ask a question about it.")
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
iface = gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text")
# Launch the interface
iface.launch()