import streamlit as st import fitz # PyMuPDF from transformers import T5Tokenizer, T5ForConditionalGeneration from transformers import pipeline # Model and tokenizer loading checkpoint = "./model/LaMini-Flan-T5-248M" tokenizer = T5Tokenizer.from_pretrained(checkpoint) base_model = T5ForConditionalGeneration.from_pretrained(checkpoint) # LLM pipeline def llm_pipeline(pdf_contents, max_length=500, min_length=50): # Extract text from the PDF contents pdf_document = fitz.open(stream=pdf_contents, filetype="pdf") pdf_text = "" for page_num in range(pdf_document.page_count): page = pdf_document.load_page(page_num) pdf_text += page.get_text() # Use the pipeline to generate the summary pipe_sum = pipeline( 'summarization', model=base_model, tokenizer=tokenizer, max_length=max_length, min_length=min_length ) result = pipe_sum(pdf_text) summary = result[0]['summary_text'] return summary # Streamlit code st.set_page_config(layout="wide") def main(): st.title("Document Summarization using a Language Model") # Button to upload a PDF file uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: max_length = st.slider("Maximum Summary Length", min_value=50, max_value=1000, step=20, value=500) min_length = st.slider("Minimum Summary Length", min_value=10, max_value=500, step=10, value=50) if st.button("Summarize"): # Check if the uploaded file is a PDF if uploaded_file.type == "application/pdf": summary = llm_pipeline(uploaded_file.read(), max_length, min_length) # Display the summary st.info("Summarization Complete") st.success(summary) else: st.error("Please upload a valid PDF file.") if __name__ == "__main__": main()