import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import fitz # PyMuPDF
# Function to process the uploaded PDF file
def process_pdf(uploaded_file, qa_model, tokenizer):
# Check if file is uploaded
if uploaded_file is not None:
# Read the file as bytes
file_contents =
# Process the PDF file
doc =, filetype="pdf")
if doc is not None:
text = ""
for page in doc:
text += page.get_text()
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
# Perform question answering
outputs = qa_model(**inputs)
start_scores = outputs.start_logits
end_scores = outputs.end_logits
# Display the generated questions and answers
for i, (start, end) in enumerate(zip(start_scores, end_scores)):
answer = tokenizer.decode(inputs["input_ids"][i][start.argmax():end.argmax()+1])
st.write("Answer:", answer)
st.error("Error occurred while opening the PDF file.")
# Main function
def main():
# Load the question answering model and tokenizer
qa_model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
# Set title and description
st.title("PDF QA Generator")
st.write("Upload a PDF file and generate questions and answers!")
# Create a sidebar for file upload
st.sidebar.title("Upload File")
uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type=['pdf'])
# Process the uploaded PDF file
process_pdf(uploaded_file, qa_model, tokenizer)
if __name__ == "__main__":