Spaces:

ivyblossom
/

question-answering

Running

App Files Files Community

question-answering / app.py

ivyblossom

Update app.py

3dd11fc 11 months ago

raw history blame contribute delete

No virus

1.82 kB

	import os
	import streamlit as st
	from transformers import pipeline
	from PyPDF2 import PdfReader
	import tempfile

	# Function to perform question-answering
	@st.cache_data(show_spinner=False)
	def question_answering(questions, pdf_text):
	# Perform question-answering using Hugging Face's Transformers
	question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")

	answers = []
	for question in questions:
	answer = question_answerer(question=question, context=pdf_text)
	answers.append(answer)

	return answers

	def main():
	st.title("Question Answering on PDF Files")

	uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])

	st.write("Enter your question(s) below (separate multiple questions with new lines):")
	questions = st.text_area("Questions").split('\n')

	if st.button("Answer") and uploaded_file is not None:
	pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
	with open(pdf_path, "wb") as f:
	f.write(uploaded_file.read())

	# Read PDF text once and cache it for batch processing
	pdf_reader = PdfReader(pdf_path)
	pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])

	# Perform question-answering in batches
	answers = question_answering(questions, pdf_text)

	# Display the results as a table with a header row
	table_data = [["Question", "Answer", "Score"]]
	for i, (question, answer) in enumerate(zip(questions, answers)):
	table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])

	st.write("Questions and Answers:")
	st.table(table_data)

	if __name__ == "__main__":
	main()