| | import streamlit as st |
| | import time |
| | import re |
| | from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM |
| |
|
| | |
| | st.set_page_config(page_title="WhatsApp Chat Analyzer", page_icon="π±", layout="wide") |
| |
|
| | |
| | |
| | @st.cache_resource |
| | def load_pipeline(): |
| | model_id = "AishaniS/text_summarizer" |
| | |
| | try: |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | model = AutoModelForSeq2SeqLM.from_pretrained(model_id) |
| | return pipeline("summarization", model=model, tokenizer=tokenizer) |
| | except Exception as e: |
| | st.error(f"Error loading model from Hugging Face: {e}") |
| | return None |
| |
|
| | summarizer = load_pipeline() |
| |
|
| | |
| | def clean_whatsapp_log(text): |
| | """ |
| | Parses WhatsApp chat. |
| | Target format: "24/12/25, 09:38 - Name: Message" |
| | """ |
| | |
| | |
| | |
| | |
| | |
| | pattern = r'\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s' |
| | |
| | clean_lines = [] |
| | lines = text.split('\n') |
| | |
| | for line in lines: |
| | |
| | if "<Media omitted>" in line or "Messages and calls are end-to-end encrypted" in line: |
| | continue |
| | |
| | |
| | cleaned_line = re.sub(pattern, '', line).strip() |
| | |
| | |
| | if cleaned_line: |
| | clean_lines.append(cleaned_line) |
| | |
| | return "\n".join(clean_lines) |
| |
|
| | |
| | def chunk_text(text, max_chars=2000): |
| | chunks = [] |
| | current_chunk = "" |
| | for line in text.split('\n'): |
| | if len(current_chunk) + len(line) < max_chars: |
| | current_chunk += line + "\n" |
| | else: |
| | chunks.append(current_chunk) |
| | current_chunk = line + "\n" |
| | if current_chunk: |
| | chunks.append(current_chunk) |
| | return chunks |
| |
|
| | |
| | st.title("π± Real-Time WhatsApp Summarizer") |
| | st.markdown(f"**Model:** `AishaniS/text_summarizer` | **Status:** {'β
Loaded' if summarizer else 'β Error'}") |
| | st.markdown("Upload your exported `_chat.txt` file to analyze conversation.") |
| |
|
| | uploaded_file = st.file_uploader("Choose a file", type=['txt']) |
| |
|
| | if uploaded_file and summarizer: |
| | raw_text = uploaded_file.getvalue().decode("utf-8") |
| | |
| | |
| | clean_text = clean_whatsapp_log(raw_text) |
| | |
| | |
| | col1, col2 = st.columns(2) |
| | |
| | with col1: |
| | st.subheader("π Processed Chat") |
| | st.text_area("Cleaned Input", clean_text, height=400) |
| | |
| | with col2: |
| | st.subheader("π€ AI Summary") |
| | if st.button("Generate Summary"): |
| | if not clean_text: |
| | st.warning("Chat is empty after cleaning. Check the file format.") |
| | else: |
| | with st.spinner("Analyzing..."): |
| | start_time = time.time() |
| | |
| | |
| | chunks = chunk_text(clean_text) |
| | summary_parts = [] |
| | |
| | |
| | for i, chunk in enumerate(chunks[:3]): |
| | try: |
| | res = summarizer(chunk, max_length=128, min_length=30, do_sample=False) |
| | summary_parts.append(res[0]['summary_text']) |
| | except Exception as e: |
| | st.warning(f"Could not summarize chunk {i+1}: {e}") |
| | |
| | final_summary = " ".join(summary_parts) |
| | |
| | end_time = time.time() |
| | latency = end_time - start_time |
| | |
| | st.success(final_summary) |
| | st.info(f"β±οΈ Model Latency: {latency:.2f} seconds") |