Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pdfminer.high_level import extract_pages, extract_text | |
from pdfminer.layout import LTTextContainer | |
from transformers import pipeline | |
def extract_text_from_pdf(pdf_file_path): | |
extracted_text = "" | |
for page_layout in extract_pages(pdf_file_path): | |
for element in page_layout: | |
if isinstance(element, LTTextContainer): | |
extracted_text += element.get_text() | |
return extracted_text | |
def extract_text_from_pdf_file(pdf_file): | |
extracted_text = extract_text_from_pdf(pdf_file.name) | |
return extracted_text | |
def question_answering(pdf_file, question): | |
extracted_text = extract_text_from_pdf(pdf_file.name) | |
context = extracted_text | |
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad') | |
result = question_answerer(question=question, context=context) | |
return result['answer'] | |
title = 'PDF Text Extraction and Question Answering Demo' | |
iface = gr.Interface(fn=question_answering, | |
inputs=["file", "text"], | |
outputs="text", | |
title=title, | |
description="Upload a PDF file and ask a question about its content to get an answer.", | |
theme="peach") | |
iface.launch() | |