import streamlit as st from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering import fitz # PyMuPDF # Function to process the uploaded PDF file def process_pdf(uploaded_file, qa_model, tokenizer): # Check if file is uploaded if uploaded_file is not None: # Read the file as bytes file_contents = uploaded_file.read() # Process the PDF file doc = fitz.open(file_contents, filetype="pdf") if doc is not None: text = "" for page in doc: text += page.get_text() # Tokenize the text inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True) # Perform question answering outputs = qa_model(**inputs) start_scores = outputs.start_logits end_scores = outputs.end_logits # Display the generated questions and answers for i, (start, end) in enumerate(zip(start_scores, end_scores)): answer = tokenizer.decode(inputs["input_ids"][i][start.argmax():end.argmax()+1]) st.write("Answer:", answer) st.write("---") else: st.error("Error occurred while opening the PDF file.") # Main function def main(): # Load the question answering model and tokenizer qa_model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") # Set title and description st.title("PDF QA Generator") st.write("Upload a PDF file and generate questions and answers!") # Create a sidebar for file upload st.sidebar.title("Upload File") uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type=['pdf']) # Process the uploaded PDF file process_pdf(uploaded_file, qa_model, tokenizer) if __name__ == "__main__": main()