import gradio as gr from pdfminer.high_level import extract_pages, extract_text from pdfminer.layout import LTTextContainer from transformers import pipeline def extract_text_from_pdf(pdf_file_path): extracted_text = "" for page_layout in extract_pages(pdf_file_path): for element in page_layout: if isinstance(element, LTTextContainer): extracted_text += element.get_text() return extracted_text def extract_text_from_pdf_file(pdf_file): extracted_text = extract_text_from_pdf(pdf_file.name) return extracted_text def question_answering(pdf_file, question): extracted_text = extract_text_from_pdf(pdf_file.name) context = extracted_text question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad') result = question_answerer(question=question, context=context) return result['answer'] title = 'PDF Text Extraction and Question Answering Demo' iface = gr.Interface(fn=question_answering, inputs=["file", "text"], outputs="text", title=title, description="Upload a PDF file and ask a question about its content to get an answer.", theme="peach") iface.launch()