import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import fitz  # PyMuPDF

# Function to process the uploaded PDF file
def process_pdf(uploaded_file, qa_model, tokenizer):
    # Check if file is uploaded
    if uploaded_file is not None:
        # Read the file as bytes
        file_contents = uploaded_file.read()

        # Process the PDF file
        doc = fitz.open(file_contents, filetype="pdf")
        if doc is not None:
            text = ""
            for page in doc:
                text += page.get_text()

            # Tokenize the text
            inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)

            # Perform question answering
            outputs = qa_model(**inputs)
            start_scores = outputs.start_logits
            end_scores = outputs.end_logits

            # Display the generated questions and answers
            for i, (start, end) in enumerate(zip(start_scores, end_scores)):
                answer = tokenizer.decode(inputs["input_ids"][i][start.argmax():end.argmax()+1])
                st.write("Answer:", answer)
                st.write("---")
        else:
            st.error("Error occurred while opening the PDF file.")

# Main function
def main():
    # Load the question answering model and tokenizer
    qa_model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
    tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

    # Set title and description
    st.title("PDF QA Generator")
    st.write("Upload a PDF file and generate questions and answers!")

    # Create a sidebar for file upload
    st.sidebar.title("Upload File")
    uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type=['pdf'])

    # Process the uploaded PDF file
    process_pdf(uploaded_file, qa_model, tokenizer)

if __name__ == "__main__":
    main()