# utils/summarizer.py from transformers import pipeline from typing import List # ========== Load Summarization Pipeline ========== summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # ========== Text Helpers ========== def clean_text(text: str) -> str: """ ๐Ÿงน Remove excessive whitespace and line breaks. """ return text.replace("\n", " ").replace(" ", " ").strip() def split_text(text: str, max_chunk_len: int = 800) -> List[str]: """ โœ‚๏ธ Breaks long text into smaller chunks for safe summarization. """ sentences = text.split('. ') chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) < max_chunk_len: current_chunk += sentence + ". " else: chunks.append(current_chunk.strip()) current_chunk = sentence + ". " if current_chunk: chunks.append(current_chunk.strip()) return chunks # ========== Summarization Functions ========== def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str: """ ๐Ÿ“„ Generate an executive summary. Params: - as_paragraph: True โ†’ returns as 2โ€“3 paragraph summary; False โ†’ bullet points - fallback: True โ†’ if model fails, returns manual fallback """ if not text.strip(): return "No input provided." try: cleaned = clean_text(text) chunks = split_text(cleaned) summaries = [] for chunk in chunks: result = summarizer(chunk, max_length=130, min_length=30, do_sample=False) summary = result[0]["summary_text"].strip() summaries.append(summary) if as_paragraph: return "๐Ÿ“„ Executive Summary:\n\n" + "\n\n".join(summaries) # Otherwise โ†’ return as bullet points bullet_points = [] for summary in summaries: lines = summary.split('. ') for line in lines: cleaned_line = line.strip().rstrip('.') if cleaned_line: bullet_points.append(f"โ€ข {cleaned_line}.") return "๐Ÿ“„ Executive Summary:\n" + "\n".join(bullet_points) except Exception as e: if fallback: return fallback_summary(text) return f"An error occurred: {str(e)}" # ========== Fallback Summary (manual) ========== def fallback_summary(text: str, max_lines: int = 5) -> str: """ ๐Ÿงญ Fallback: Return first few sentences as pseudo-summary. """ lines = text.split(". ") selected = lines[:max_lines] points = [f"โ€ข {line.strip().rstrip('.')}" for line in selected if line.strip()] return "๐Ÿ“„ (Fallback Summary)\n" + "\n".join(points)