Spaces:

pratikshahp
/

chat-with-own-file

Running

App Files Files Community

chat-with-own-file / app.py

pratikshahp

Update app.py

e2e0f97 verified about 2 months ago

raw history blame contribute delete

No virus

1.99 kB

	import streamlit as st
	from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
	import fitz # PyMuPDF

	# Function to process the uploaded PDF file
	def process_pdf(uploaded_file, qa_model, tokenizer):
	# Check if file is uploaded
	if uploaded_file is not None:
	# Read the file as bytes
	file_contents = uploaded_file.read()

	# Process the PDF file
	doc = fitz.open(file_contents, filetype="pdf")
	if doc is not None:
	text = ""
	for page in doc:
	text += page.get_text()

	# Tokenize the text
	inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)

	# Perform question answering
	outputs = qa_model(**inputs)
	start_scores = outputs.start_logits
	end_scores = outputs.end_logits

	# Display the generated questions and answers
	for i, (start, end) in enumerate(zip(start_scores, end_scores)):
	answer = tokenizer.decode(inputs["input_ids"][i][start.argmax():end.argmax()+1])
	st.write("Answer:", answer)
	st.write("---")
	else:
	st.error("Error occurred while opening the PDF file.")

	# Main function
	def main():
	# Load the question answering model and tokenizer
	qa_model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
	tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

	# Set title and description
	st.title("PDF QA Generator")
	st.write("Upload a PDF file and generate questions and answers!")

	# Create a sidebar for file upload
	st.sidebar.title("Upload File")
	uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type=['pdf'])

	# Process the uploaded PDF file
	process_pdf(uploaded_file, qa_model, tokenizer)

	if __name__ == "__main__":
	main()