import streamlit as st import re import nltk nltk.download('punkt_tab') # Ensure sentence tokenizers are available. # Import your summarizer. Replace these paths with your actual model and tokenizer paths. from summarizer import Summarizer def clean_text(text): """ Cleans the text by removing extra whitespace. You can expand this function to include additional cleaning as needed. """ cleaned = re.sub(r'\s+', ' ', text).strip() return cleaned # Initialize your summarizer. Make sure to set the correct paths. MODEL_PATH = r"server_side_summarize_news" # Replace with your model path. TOKENIZER_PATH = r"server_side_summarize_news" # Replace with your tokenizer path. summarizer = Summarizer(MODEL_PATH, TOKENIZER_PATH) # Set up the Streamlit UI. st.title("Text Summarizer") st.write("Enter your text below to generate a summary:") # Text area for user input. input_text = st.text_area("Input Text", height=200) # When the 'Summarize' button is pressed, process the input. if st.button("Summarize"): if not input_text.strip(): st.warning("Please enter some text to summarize.") else: # Clean the input text. cleaned_text = clean_text(input_text) try: # Generate the summary using your summarizer. summary = summarizer.iterative_summarization(cleaned_text) st.subheader("Summary") st.write(summary) except Exception as e: st.error(f"An error occurred during summarization: {e}")