Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from transformers import pipeline | |
| from PyPDF2 import PdfReader | |
| import tempfile | |
| # Function to perform question-answering | |
| def question_answering(questions, pdf_text): | |
| # Perform question-answering using Hugging Face's Transformers | |
| question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad") | |
| answers = [] | |
| for question in questions: | |
| answer = question_answerer(question=question, context=pdf_text) | |
| answers.append(answer) | |
| return answers | |
| def main(): | |
| st.title("Question Answering on PDF Files") | |
| uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"]) | |
| st.write("Enter your question(s) below (separate multiple questions with new lines):") | |
| questions = st.text_area("Questions").split('\n') | |
| if st.button("Answer") and uploaded_file is not None: | |
| pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name) | |
| with open(pdf_path, "wb") as f: | |
| f.write(uploaded_file.read()) | |
| # Read PDF text once and cache it for batch processing | |
| pdf_reader = PdfReader(pdf_path) | |
| pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) | |
| # Perform question-answering in batches | |
| answers = question_answering(questions, pdf_text) | |
| # Display the results as a table with a header row | |
| table_data = [["Question", "Answer", "Score"]] | |
| for i, (question, answer) in enumerate(zip(questions, answers)): | |
| table_data.append([question, answer['answer'], f"{answer['score']:.2f}"]) | |
| st.write("Questions and Answers:") | |
| st.table(table_data) | |
| if __name__ == "__main__": | |
| main() |