Spaces:

swamisharan
/

pdf-gpt

Runtime error

pdf-gpt / app.py

Update app.py

d17f001 verified over 1 year ago

1.2 kB

	from pathlib import Path
	from typing import Union
	import PyPDF2
	from transformers import pipeline
	import gradio as gr

	# Initialize question-answering pipeline
	nlp = pipeline('question-answering', model='deepset/roberta-base-squad2')

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file: Union[str, Path]) -> str:
	with open(pdf_file, 'rb') as pdf_file_obj:
	pdf_reader = PyPDF2.PdfReader(pdf_file_obj)
	text = ''.join(page.extract_text() for page in pdf_reader.pages)
	return text

	def answer_doc_question(pdf_file, question):
	# Extract text from PDF
	context = extract_text_from_pdf(pdf_file.name)

	# Prepare question-answering input
	QA_input = {
	'question': question,
	'context': context
	}

	# Get answer
	res = nlp(QA_input, max_answer_length=500)

	return res['answer']

	# Define Gradio interface
	pdf_input = gr.File(type="filepath", label="Upload a PDF document and ask a question about it.")
	question = gr.Textbox(label="Type a question regarding the uploaded document here.")
	iface = gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text")

	# Launch the interface
	iface.launch()