import streamlit as st
import re
import nltk
nltk.download('punkt_tab')  # Ensure sentence tokenizers are available.
# Import your summarizer. Replace these paths with your actual model and tokenizer paths.
from summarizer import Summarizer

def clean_text(text):
    """
    Cleans the text by removing extra whitespace.
    You can expand this function to include additional cleaning as needed.
    """
    cleaned = re.sub(r'\s+', ' ', text).strip()
    return cleaned

# Initialize your summarizer. Make sure to set the correct paths.
MODEL_PATH = r"server_side_summarize_news"          # Replace with your model path.
TOKENIZER_PATH = r"server_side_summarize_news"    # Replace with your tokenizer path.
summarizer = Summarizer(MODEL_PATH, TOKENIZER_PATH)

# Set up the Streamlit UI.
st.title("Text Summarizer")
st.write("Enter your text below to generate a summary:")

# Text area for user input.
input_text = st.text_area("Input Text", height=200)

# When the 'Summarize' button is pressed, process the input.
if st.button("Summarize"):
    if not input_text.strip():
        st.warning("Please enter some text to summarize.")
    else:
        # Clean the input text.
        cleaned_text = clean_text(input_text)
        try:
            # Generate the summary using your summarizer.
            summary = summarizer.iterative_summarization(cleaned_text)
            st.subheader("Summary")
            st.write(summary)
        except Exception as e:
            st.error(f"An error occurred during summarization: {e}")