from pathlib import Path from typing import Union import PyPDF2 from transformers import pipeline import gradio as gr # Initialize question-answering pipeline nlp = pipeline('question-answering', model='deepset/roberta-base-squad2') # Function to extract text from PDF def extract_text_from_pdf(pdf_file: Union[str, Path]) -> str: with open(pdf_file, 'rb') as pdf_file_obj: pdf_reader = PyPDF2.PdfReader(pdf_file_obj) text = ''.join(page.extract_text() for page in pdf_reader.pages) return text def answer_doc_question(pdf_file, question): # Extract text from PDF context = extract_text_from_pdf(pdf_file.name) # Prepare question-answering input QA_input = { 'question': question, 'context': context } # Get answer res = nlp(QA_input, max_answer_length=500) return res['answer'] # Define Gradio interface pdf_input = gr.File(type="filepath", label="Upload a PDF document and ask a question about it.") question = gr.Textbox(label="Type a question regarding the uploaded document here.") iface = gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text") # Launch the interface iface.launch()