import streamlit as st from transformers import pipeline import torch from PyPDF2 import PdfReader # Initialize the summarizer device = 0 if torch.cuda.is_available() else -1 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device) # Function to extract text from PDF in chunks def extract_text_from_pdf(pdf_file, chunk_size=1500): reader = PdfReader(pdf_file) text = "" for page_num in range(len(reader.pages)): page = reader.pages[page_num] page_text = page.extract_text() if page_text: text += page_text # Split large text into smaller chunks return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] # Function to extract text from a text file def extract_text_from_txt(txt_file): text = txt_file.read().decode("utf-8") return text # Function to summarize text chunk using the summarizer def summarize_chunk(chunk): return summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] # Streamlit interface st.subheader("Generate Summary for PDF or Text Files") # Upload file option uploaded_file = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"]) user_text = st.text_area("Or write your text here:", "") if uploaded_file or user_text: try: # Check if file is uploaded if uploaded_file: if uploaded_file.type == "application/pdf": text_chunks = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "text/plain": text = extract_text_from_txt(uploaded_file) text_chunks = [text] # Wrap text into a list for processing else: st.error("Unsupported file type.") text_chunks = [] else: # If no file is uploaded, take text from text area text = user_text text_chunks = [text] # Wrap text into a list for processing if text_chunks: summaries = [] # Summarize chunks sequentially for chunk in text_chunks: summary = summarize_chunk(chunk) summaries.append(summary) # Combine summaries into a final summary final_summary = " ".join(summaries) st.subheader("Summary") st.write(final_summary) # Allow uploading another file if st.button("Upload another file"): st.experimental_rerun() # Reset the uploader and text area except Exception as e: st.error(f"An error occurred: {str(e)}")