import streamlit as st from transformers import pipeline import fitz # PyMuPDF import docx import concurrent.futures # Summarization pipeline pipe = pipeline("summarization", model="facebook/bart-large-cnn") def chunk_text(text, chunk_size=512): # Split the text into smaller chunks return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] def summarize_chunk(chunk): return pipe(chunk)[0]['summary_text'] def extract_text_from_pdf(file): text = "" doc = fitz.open(stream=file.read(), filetype="pdf") for page in doc: text += page.get_text() return text def extract_text_from_docx(file): doc = docx.Document(file) return "\n".join([para.text for para in doc.paragraphs]) def main(): st.title("Text Summarization App") input_text = st.text_area("Enter Text (Due to the Free CPU Basic Hardware being used, it takes more time for the output, please keep the prompt minimal)") uploaded_file = st.file_uploader("Upload a file", type=['pdf', 'txt', 'doc', 'docx']) if st.button("Summarize"): if input_text: chunks = chunk_text(input_text) with concurrent.futures.ThreadPoolExecutor() as executor: summaries = list(executor.map(summarize_chunk, chunks)) st.subheader("Summary") st.write(' '.join(summaries)) elif uploaded_file is not None: if uploaded_file.type == "application/pdf": file_text = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": file_text = extract_text_from_docx(uploaded_file) elif uploaded_file.type == "text/plain": file_text = str(uploaded_file.read(), "utf-8") else: st.error("Unsupported file type") return chunks = chunk_text(file_text) with concurrent.futures.ThreadPoolExecutor() as executor: summaries = list(executor.map(summarize_chunk, chunks)) st.subheader("Summary") st.write(' '.join(summaries)) if __name__ == "__main__": main()