Spaces:

aps19
/

doc_summaryLLM

Runtime error

App Files Files Community

doc_summaryLLM / app.py

aps19

Update app.py

445e71a about 1 year ago

raw

history blame

1.95 kB

	import streamlit as st
	import fitz # PyMuPDF
	from transformers import T5Tokenizer, T5ForConditionalGeneration
	from transformers import pipeline

	# Model and tokenizer loading
	checkpoint = "./model/LaMini-Flan-T5-248M"
	tokenizer = T5Tokenizer.from_pretrained(checkpoint)
	base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)

	# LLM pipeline
	def llm_pipeline(pdf_contents, max_length=500, min_length=50):
	# Extract text from the PDF contents
	pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
	pdf_text = ""
	for page_num in range(pdf_document.page_count):
	page = pdf_document.load_page(page_num)
	pdf_text += page.get_text()

	# Use the pipeline to generate the summary
	pipe_sum = pipeline(
	'summarization',
	model=base_model,
	tokenizer=tokenizer,
	max_length=max_length,
	min_length=min_length
	)

	result = pipe_sum(pdf_text)
	summary = result[0]['summary_text']
	return summary

	# Streamlit code
	st.set_page_config(layout="wide")

	def main():
	st.title("Document Summarization App using a Smaller Model")

	# Button to upload a PDF file
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	max_length = st.slider("Maximum Summary Length", min_value=50, max_value=1000, step=20, value=500)
	min_length = st.slider("Minimum Summary Length", min_value=10, max_value=500, step=10, value=50)

	if st.button("Summarize"):
	# Check if the uploaded file is a PDF
	if uploaded_file.type == "application/pdf":
	summary = llm_pipeline(uploaded_file.read(), max_length, min_length)

	# Display the summary
	st.info("Summarization Complete")
	st.success(summary)

	else:
	st.error("Please upload a valid PDF file.")


	if __name__ == "__main__":
	main()