Spaces:

Isanka
/

jit

Sleeping

jit / app.py

Update app.py

893c729 verified over 1 year ago

1.43 kB

	import fitz # PyMuPDF
	import gradio as gr
	from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_path):
	text = ""
	document = fitz.open(pdf_path.name)
	for page_num in range(len(document)):
	page = document.load_page(page_num)
	text += page.get_text()
	return text

	# Load the model and tokenizer
	model_name = "distilbert-base-cased-distilled-squad"
	model = AutoModelForQuestionAnswering.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Initialize the question-answering pipeline
	qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

	# Function to answer questions based on the PDF content
	def answer_question(pdf_file, question):
	# Extract text from the uploaded PDF file
	content = extract_text_from_pdf(pdf_file)
	# Get the answer using the question-answering pipeline
	result = qa_pipeline(question=question, context=content)
	return result['answer']

	# Define the Gradio interface
	iface = gr.Interface(
	fn=answer_question,
	inputs=[gr.File(label="PDF File", file_types=[".pdf"]), gr.Textbox(lines=2, placeholder="Ask a question...")],
	outputs="text",
	title="DistilBERT Question Answering",
	description="Upload a PDF and ask questions based on the content of the PDF."
	)

	# Launch the interface
	iface.launch()