text_pdf / app.py
Akshayram1's picture
Update app.py
4af4ff7 verified
import gradio as gr
from pdfminer.high_level import extract_pages, extract_text
from pdfminer.layout import LTTextContainer
from transformers import pipeline
def extract_text_from_pdf(pdf_file_path):
extracted_text = ""
for page_layout in extract_pages(pdf_file_path):
for element in page_layout:
if isinstance(element, LTTextContainer):
extracted_text += element.get_text()
return extracted_text
def extract_text_from_pdf_file(pdf_file):
extracted_text = extract_text_from_pdf(pdf_file.name)
return extracted_text
def question_answering(pdf_file, question):
extracted_text = extract_text_from_pdf(pdf_file.name)
context = extracted_text
question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
result = question_answerer(question=question, context=context)
return result['answer']
title = 'PDF Text Extraction and Question Answering Demo'
iface = gr.Interface(fn=question_answering,
inputs=["file", "text"],
outputs="text",
title=title,
description="Upload a PDF file and ask a question about its content to get an answer.",
theme="peach")
iface.launch()