Spaces:
Running
Running
import os | |
import streamlit as st | |
from transformers import pipeline | |
from PyPDF2 import PdfReader | |
import tempfile | |
# Function to perform question-answering | |
def question_answering(questions, pdf_text): | |
# Perform question-answering using Hugging Face's Transformers | |
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad") | |
answers = [] | |
for question in questions: | |
answer = question_answerer(question=question, context=pdf_text) | |
answers.append(answer) | |
return answers | |
def main(): | |
st.title("Question Answering on PDF Files") | |
uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"]) | |
st.write("Enter your question(s) below (separate multiple questions with new lines):") | |
questions = st.text_area("Questions").split('\n') | |
if st.button("Answer") and uploaded_file is not None: | |
pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name) | |
with open(pdf_path, "wb") as f: | |
f.write(uploaded_file.read()) | |
# Read PDF text once and cache it for batch processing | |
pdf_reader = PdfReader(pdf_path) | |
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) | |
# Perform question-answering in batches | |
answers = question_answering(questions, pdf_text) | |
# Display the results as a table with a header row | |
table_data = [["Question", "Answer", "Score"]] | |
for i, (question, answer) in enumerate(zip(questions, answers)): | |
table_data.append([question, answer['answer'], f"{answer['score']:.2f}"]) | |
st.write("Questions and Answers:") | |
st.table(table_data) | |
if __name__ == "__main__": | |
main() |