| | """ |
| | PDF Report Exporter for Musora Sentiment Analysis Dashboard. |
| | |
| | Generates a comprehensive PDF report from the filtered dashboard data and |
| | Plotly visualizations. |
| | |
| | Dependencies: |
| | fpdf2 — PDF assembly (pip install fpdf2) |
| | kaleido — Plotly PNG rendering (pip install kaleido) |
| | """ |
| |
|
| | import os |
| | import sys |
| | import tempfile |
| | import logging |
| | from datetime import datetime |
| | from pathlib import Path |
| |
|
| | |
| | |
| | _parent = Path(__file__).resolve().parent.parent |
| | if str(_parent) not in sys.path: |
| | sys.path.insert(0, str(_parent)) |
| |
|
| | import plotly.io as pio |
| | from fpdf import FPDF |
| |
|
| | from utils.metrics import SentimentMetrics |
| | from utils.data_processor import SentimentDataProcessor |
| | from visualizations.sentiment_charts import SentimentCharts |
| | from visualizations.distribution_charts import DistributionCharts |
| | from visualizations.demographic_charts import DemographicCharts |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | |
| | |
| | _DESCRIPTIONS = { |
| | "executive_summary": ( |
| | "A top-level snapshot of community sentiment across all Musora brands and platforms. " |
| | "All findings are based on comments processed through the AI sentiment analysis pipeline." |
| | ), |
| | "sentiment": ( |
| | "Every comment is assigned one of five sentiment levels: " |
| | "Very Positive, Positive, Neutral, Negative, or Very Negative. " |
| | "The pie chart shows how those levels split across all analyzed comments. " |
| | "The Sentiment Score (0-100) converts the average rating to a percentage scale: " |
| | "50 = perfectly neutral, above 60 = primarily positive." |
| | ), |
| | "brand": ( |
| | "Sentiment broken down by Musora brand (Drumeo, Pianote, Guitareo, Singeo, etc.). " |
| | "Shows both the count and percentage of each sentiment level per brand, " |
| | "helping identify which brands receive the most positive or negative feedback." |
| | ), |
| | "platform": ( |
| | "Sentiment broken down by platform (Facebook, Instagram, YouTube, Twitter, Musora App). " |
| | "Helps compare audience sentiment across channels." |
| | ), |
| | "intent": ( |
| | "Beyond positive/negative, the AI identifies the intent behind each comment: " |
| | "praise, questions, requests, feedback, suggestions, humor, off-topic, or spam. " |
| | "Understanding intent helps prioritize community management." |
| | ), |
| | "cross_dimensional": ( |
| | "Cross-dimensional analysis reveals patterns across both brand and platform simultaneously. " |
| | "The heatmaps show comment volume and negative sentiment concentration by combination." |
| | ), |
| | "volume": ( |
| | "Volume analysis shows the distribution of comments across platforms and brands, " |
| | "indicating where the most community engagement is happening." |
| | ), |
| | "reply_requirements": ( |
| | "Comments flagged as requiring a reply, broken down by brand and platform. " |
| | "The urgency breakdown helps prioritize community management resources." |
| | ), |
| | "demographics": ( |
| | "Demographics data is available for Musora App comments and is derived from user profiles. " |
| | "Note: These charts reflect only users who have filled in their profile information - " |
| | "they do not represent all community members." |
| | ), |
| | "language": ( |
| | "Language distribution shows what languages comments are written in. " |
| | "Non-English comments are automatically translated for analysis." |
| | ), |
| | } |
| |
|
| | |
| | |
| | |
| | _PRIMARY_HEX = "#1982C4" |
| | _PRIMARY_RGB = (25, 130, 196) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | class MusoraPDF(FPDF): |
| | """Custom FPDF subclass with Musora branding and layout helpers.""" |
| |
|
| | PRIMARY = _PRIMARY_RGB |
| | WHITE = (255, 255, 255) |
| | GRAY = (180, 180, 180) |
| | LIGHT_GRAY = (240, 240, 240) |
| |
|
| | def __init__(self): |
| | super().__init__(orientation="P", unit="mm", format="A4") |
| | self.set_auto_page_break(auto=True, margin=20) |
| |
|
| | |
| | |
| | |
| |
|
| | @staticmethod |
| | def _sanitize(text: str) -> str: |
| | """Strip characters outside Latin-1 (required by the Helvetica font).""" |
| | if not isinstance(text, str): |
| | text = str(text) |
| | return text.encode("latin-1", errors="ignore").decode("latin-1") |
| |
|
| | |
| | |
| | |
| |
|
| | def header(self): |
| | if self.page_no() > 1: |
| | self.set_font("Helvetica", "B", 8) |
| | self.set_text_color(*self.GRAY) |
| | self.cell(0, 6, "Musora Sentiment Analysis Report", align="L") |
| | self.cell( |
| | 0, 6, f"Page {self.page_no()}", align="R", |
| | new_x="LMARGIN", new_y="NEXT", |
| | ) |
| | self.set_draw_color(*self.PRIMARY) |
| | self.set_line_width(0.5) |
| | self.line(10, self.get_y(), 200, self.get_y()) |
| | self.ln(4) |
| |
|
| | def footer(self): |
| | self.set_y(-15) |
| | self.set_font("Helvetica", "I", 7) |
| | self.set_text_color(*self.GRAY) |
| | self.cell( |
| | 0, 10, |
| | f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} | Confidential", |
| | align="C", |
| | ) |
| |
|
| | |
| | |
| | |
| |
|
| | def check_page_break(self, needed_mm: float) -> None: |
| | """Add a page break if less than *needed_mm* mm remain on the page.""" |
| | if self.get_y() + needed_mm > self.h - 20: |
| | self.add_page() |
| |
|
| | def section_header(self, title: str) -> None: |
| | """Bold, brand-coloured section heading with an underline rule.""" |
| | title = self._sanitize(title) |
| | self.check_page_break(20) |
| | self.ln(4) |
| | self.set_font("Helvetica", "B", 14) |
| | self.set_text_color(*self.PRIMARY) |
| | self.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT") |
| | self.set_draw_color(*self.PRIMARY) |
| | self.set_line_width(0.3) |
| | self.line(10, self.get_y(), 200, self.get_y()) |
| | self.ln(3) |
| | self.set_text_color(0, 0, 0) |
| |
|
| | def subsection_header(self, title: str) -> None: |
| | """Lighter subsection heading.""" |
| | title = self._sanitize(title) |
| | self.check_page_break(15) |
| | self.ln(2) |
| | self.set_font("Helvetica", "B", 11) |
| | self.set_text_color(60, 60, 60) |
| | self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT") |
| | self.ln(1) |
| | self.set_text_color(0, 0, 0) |
| |
|
| | def section_description(self, text: str) -> None: |
| | """Italicised description block beneath a section header.""" |
| | text = self._sanitize(text) |
| | self.set_font("Helvetica", "I", 9) |
| | self.set_text_color(80, 80, 80) |
| | self.multi_cell(0, 5, text) |
| | self.ln(4) |
| | self.set_text_color(0, 0, 0) |
| |
|
| | def body_text(self, text: str) -> None: |
| | """Standard paragraph text.""" |
| | text = self._sanitize(text) |
| | self.set_font("Helvetica", "", 9) |
| | self.set_text_color(50, 50, 50) |
| | self.multi_cell(0, 5, text) |
| | self.ln(2) |
| | self.set_text_color(0, 0, 0) |
| |
|
| | def callout_box( |
| | self, |
| | text: str, |
| | bg_color: tuple = (240, 248, 255), |
| | border_color: tuple = None, |
| | ) -> None: |
| | """Lightly-coloured info/callout box with a left accent bar.""" |
| | if border_color is None: |
| | border_color = self.PRIMARY |
| | text = self._sanitize(text) |
| | self.check_page_break(20) |
| | x, w = 10, 180 |
| | approx_lines = max(2, len(text) // 90 + text.count("\n") + 1) |
| | h = approx_lines * 5 + 6 |
| | y = self.get_y() |
| | self.set_fill_color(*bg_color) |
| | self.rect(x, y, w, h, style="F") |
| | self.set_fill_color(*border_color) |
| | self.rect(x, y, 3, h, style="F") |
| | self.set_font("Helvetica", "", 8.5) |
| | self.set_text_color(40, 40, 40) |
| | self.set_xy(x + 5, y + 3) |
| | self.multi_cell(w - 7, 4.5, text) |
| | self.set_y(y + h + 3) |
| | self.set_text_color(0, 0, 0) |
| |
|
| | def metric_row(self, metrics: list) -> None: |
| | """ |
| | Horizontal row of metric tiles. |
| | |
| | Args: |
| | metrics: list of (label, value) tuples. |
| | """ |
| | self.check_page_break(18) |
| | n = len(metrics) |
| | if n == 0: |
| | return |
| | box_w = (190 - (n - 1) * 3) / n |
| | x0 = 10 |
| | y = self.get_y() |
| | for i, (label, value) in enumerate(metrics): |
| | x = x0 + i * (box_w + 3) |
| | self.set_fill_color(245, 245, 245) |
| | self.rect(x, y, box_w, 14, style="F") |
| | self.set_xy(x, y + 1) |
| | self.set_font("Helvetica", "B", 10) |
| | self.set_text_color(*self.PRIMARY) |
| | self.cell(box_w, 6, self._sanitize(str(value)), align="C") |
| | self.set_xy(x, y + 7) |
| | self.set_font("Helvetica", "", 7) |
| | self.set_text_color(100, 100, 100) |
| | self.cell(box_w, 5, self._sanitize(str(label)), align="C") |
| | self.set_text_color(0, 0, 0) |
| | self.set_y(y + 16) |
| |
|
| | def add_table( |
| | self, |
| | headers: list, |
| | rows: list, |
| | col_widths: list = None, |
| | ) -> None: |
| | """ |
| | Styled data table with alternating row shading. |
| | |
| | Args: |
| | headers: Column header strings. |
| | rows: List of row tuples/lists. |
| | col_widths: Optional column widths in mm. |
| | """ |
| | self.check_page_break(10 + len(rows) * 6) |
| | n = len(headers) |
| | if col_widths is None: |
| | col_widths = [190 / n] * n |
| | |
| | self.set_font("Helvetica", "B", 8) |
| | self.set_fill_color(*self.PRIMARY) |
| | self.set_text_color(*self.WHITE) |
| | for i, hdr in enumerate(headers): |
| | self.cell(col_widths[i], 7, self._sanitize(hdr), border=1, fill=True, align="C") |
| | self.ln() |
| | |
| | self.set_font("Helvetica", "", 8) |
| | self.set_text_color(0, 0, 0) |
| | for row_idx, row in enumerate(rows): |
| | self.set_fill_color(250, 250, 250) if row_idx % 2 == 0 else self.set_fill_color(*self.WHITE) |
| | for i, cell_val in enumerate(row): |
| | self.cell(col_widths[i], 6, self._sanitize(str(cell_val)), border=1, fill=True, align="C") |
| | self.ln() |
| | self.ln(2) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | class DashboardPDFExporter: |
| | """ |
| | Generates a comprehensive PDF report from the Musora Sentiment dashboard. |
| | |
| | Usage:: |
| | |
| | exporter = DashboardPDFExporter() |
| | pdf_bytes = exporter.generate_report(filtered_df, filter_info) |
| | |
| | The *filter_info* dict (optional) maps human-readable filter names to their |
| | selected values and is shown on the cover page. |
| | """ |
| |
|
| | |
| | RENDER_SCALE = 3 |
| |
|
| | def __init__(self): |
| | self.sentiment_charts = SentimentCharts() |
| | self.distribution_charts = DistributionCharts() |
| | self.demographic_charts = DemographicCharts() |
| | self.processor = SentimentDataProcessor() |
| | self._temp_files: list[str] = [] |
| |
|
| | |
| | |
| | |
| |
|
| | def generate_report(self, df, filter_info: dict = None) -> bytes: |
| | """ |
| | Build and return the full PDF report. |
| | |
| | Args: |
| | df: Filtered dashboard DataFrame. |
| | filter_info: Optional dict of active filter descriptions shown on |
| | the cover page, e.g. {"Platforms": ["facebook"], |
| | "Brands": ["drumeo"]}. |
| | |
| | Returns: |
| | bytes: Raw PDF file contents ready for st.download_button. |
| | """ |
| | self.pdf = MusoraPDF() |
| | try: |
| | self._add_cover_page(df, filter_info) |
| | self._add_executive_summary(df) |
| | self._add_sentiment_section(df) |
| | self._add_brand_section(df) |
| | self._add_platform_section(df) |
| | self._add_intent_section(df) |
| | self._add_cross_dimensional_section(df) |
| | self._add_volume_section(df) |
| | self._add_reply_requirements_section(df) |
| | if self._has_demographics(df): |
| | self._add_demographics_section(df) |
| | if "detected_language" in df.columns: |
| | self._add_language_section(df) |
| | self._add_data_summary(df, filter_info) |
| |
|
| | return bytes(self.pdf.output()) |
| | finally: |
| | self._cleanup_temp_files() |
| |
|
| | |
| | |
| | |
| |
|
| | def _prepare_fig_for_pdf(self, fig, is_side_by_side: bool = False) -> None: |
| | """Apply white background, readable fonts, and automargin to a Plotly figure.""" |
| | base_fs = 13 if is_side_by_side else 14 |
| | fig.update_layout( |
| | paper_bgcolor="white", |
| | plot_bgcolor="white", |
| | font=dict(color="black", size=base_fs), |
| | title_font_size=base_fs + 4, |
| | margin=( |
| | dict(l=60, r=40, t=60, b=60) |
| | if is_side_by_side |
| | else dict(l=80, r=40, t=60, b=80) |
| | ), |
| | ) |
| | fig.update_xaxes(automargin=True) |
| | fig.update_yaxes(automargin=True) |
| | if fig.layout.showlegend is not False: |
| | fig.update_layout(legend_font_size=base_fs - 2) |
| |
|
| | def _fig_to_temp_path( |
| | self, fig, width: int = 800, height: int = 400, is_side_by_side: bool = False |
| | ) -> str: |
| | """Render a Plotly figure to a temporary high-DPI PNG and return the path.""" |
| | self._prepare_fig_for_pdf(fig, is_side_by_side=is_side_by_side) |
| | img_bytes = pio.to_image( |
| | fig, |
| | format="png", |
| | width=width, |
| | height=height, |
| | scale=self.RENDER_SCALE, |
| | engine="kaleido", |
| | ) |
| | tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) |
| | tmp.write(img_bytes) |
| | tmp.close() |
| | self._temp_files.append(tmp.name) |
| | return tmp.name |
| |
|
| | def _add_chart(self, fig, width: int = 180, img_width: int = 800, img_height: int = 400) -> None: |
| | """Render one figure full-width on the current PDF page.""" |
| | try: |
| | path = self._fig_to_temp_path(fig, img_width, img_height) |
| | h_mm = width * (img_height / img_width) |
| | self.pdf.check_page_break(h_mm + 5) |
| | self.pdf.image(path, x=10, w=width) |
| | self.pdf.ln(3) |
| | except Exception: |
| | logger.exception("Chart render failed") |
| | self.pdf.body_text("[Chart could not be rendered]") |
| |
|
| | def _add_two_charts(self, fig1, fig2, width: int = 92) -> None: |
| | """Render two figures side-by-side.""" |
| | try: |
| | p1 = self._fig_to_temp_path(fig1, 700, 450, is_side_by_side=True) |
| | p2 = self._fig_to_temp_path(fig2, 700, 450, is_side_by_side=True) |
| | h_mm = width * (450 / 700) |
| | self.pdf.check_page_break(h_mm + 5) |
| | y = self.pdf.get_y() |
| | self.pdf.image(p1, x=10, y=y, w=width) |
| | self.pdf.image(p2, x=10 + width + 4, y=y, w=width) |
| | self.pdf.set_y(y + h_mm + 3) |
| | except Exception: |
| | logger.exception("Side-by-side chart render failed") |
| | self.pdf.body_text("[Charts could not be rendered]") |
| |
|
| | def _cleanup_temp_files(self) -> None: |
| | for path in self._temp_files: |
| | try: |
| | os.unlink(path) |
| | except OSError: |
| | pass |
| | self._temp_files.clear() |
| |
|
| | |
| | |
| | |
| |
|
| | @staticmethod |
| | def _has_demographics(df) -> bool: |
| | return ( |
| | "platform" in df.columns |
| | and "musora_app" in df["platform"].values |
| | and "age_group" in df.columns |
| | and "timezone" in df.columns |
| | and "experience_level" in df.columns |
| | ) |
| |
|
| | @staticmethod |
| | def _filter_summary(filter_info: dict) -> str: |
| | if not filter_info: |
| | return "No filters applied - showing all data." |
| | parts = [] |
| | for key, value in filter_info.items(): |
| | if value: |
| | display = ( |
| | value if isinstance(value, str) |
| | else ", ".join(str(v) for v in value) |
| | ) |
| | parts.append(f"{key}: {display}") |
| | return "; ".join(parts) if parts else "No filters applied." |
| |
|
| | @staticmethod |
| | def _date_range_str(df) -> str: |
| | if "comment_timestamp" not in df.columns or df.empty: |
| | return "N/A" |
| | valid = df["comment_timestamp"].dropna() |
| | if valid.empty: |
| | return "N/A" |
| | return ( |
| | f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}" |
| | ) |
| |
|
| | |
| | |
| | |
| |
|
| | def _add_cover_page(self, df, filter_info: dict) -> None: |
| | self.pdf.add_page() |
| | self.pdf.ln(40) |
| |
|
| | r, g, b = MusoraPDF.PRIMARY |
| | self.pdf.set_fill_color(r, g, b) |
| | self.pdf.rect(0, 60, 210, 4, style="F") |
| |
|
| | self.pdf.ln(20) |
| | self.pdf.set_font("Helvetica", "B", 28) |
| | self.pdf.set_text_color(r, g, b) |
| | self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT") |
| |
|
| | self.pdf.set_font("Helvetica", "", 16) |
| | self.pdf.set_text_color(80, 80, 80) |
| | self.pdf.cell( |
| | 0, 10, "Sentiment Analysis Report", |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | self.pdf.ln(10) |
| | self.pdf.set_draw_color(r, g, b) |
| | self.pdf.set_line_width(0.5) |
| | self.pdf.line(60, self.pdf.get_y(), 150, self.pdf.get_y()) |
| | self.pdf.ln(10) |
| |
|
| | self.pdf.set_font("Helvetica", "", 12) |
| | self.pdf.set_text_color(100, 100, 100) |
| | self.pdf.cell( |
| | 0, 8, |
| | f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}", |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | self.pdf.ln(5) |
| | self.pdf.set_font("Helvetica", "", 10) |
| | self.pdf.cell( |
| | 0, 7, |
| | f"Total Comments Analyzed: {len(df):,}", |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | date_str = self._date_range_str(df) |
| | if date_str != "N/A": |
| | self.pdf.ln(3) |
| | self.pdf.set_font("Helvetica", "I", 9) |
| | self.pdf.set_text_color(120, 120, 120) |
| | self.pdf.cell( |
| | 0, 6, |
| | MusoraPDF._sanitize(f"Data period: {date_str}"), |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | if filter_info: |
| | self.pdf.ln(8) |
| | self.pdf.set_font("Helvetica", "B", 9) |
| | self.pdf.set_text_color(80, 80, 80) |
| | self.pdf.cell(0, 6, "Active Filters:", align="C", new_x="LMARGIN", new_y="NEXT") |
| | self.pdf.set_font("Helvetica", "", 9) |
| | for key, value in filter_info.items(): |
| | if value: |
| | display = ( |
| | value if isinstance(value, str) |
| | else ", ".join(str(v) for v in value) |
| | ) |
| | self.pdf.cell( |
| | 0, 5, |
| | MusoraPDF._sanitize(f"{key}: {display}"), |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | self.pdf.ln(20) |
| | self.pdf.set_font("Helvetica", "I", 8) |
| | self.pdf.set_text_color(150, 150, 150) |
| | self.pdf.cell( |
| | 0, 6, "Confidential - For Internal Use Only", |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| | self.pdf.cell( |
| | 0, 6, "Data Source: Snowflake | Musora Sentiment Pipeline", |
| | align="C", new_x="LMARGIN", new_y="NEXT", |
| | ) |
| |
|
| | def _add_executive_summary(self, df) -> None: |
| | self.pdf.add_page() |
| | self.pdf.section_header("Executive Summary") |
| | self.pdf.section_description(_DESCRIPTIONS["executive_summary"]) |
| |
|
| | metrics = SentimentMetrics.calculate_overall_metrics(df) |
| | normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100 |
| |
|
| | |
| | neg_pct = metrics["negative_pct"] |
| | health = "Healthy" if neg_pct < 20 else ("Moderate" if neg_pct < 35 else "Needs Attention") |
| |
|
| | |
| | brands = sorted(df["brand"].dropna().unique().tolist()) if "brand" in df.columns else [] |
| | platforms = sorted(df["platform"].dropna().unique().tolist()) if "platform" in df.columns else [] |
| | brands_str = ", ".join(str(b).title() for b in brands[:6]) if brands else "all brands" |
| | platforms_str = ", ".join(str(p).title() for p in platforms[:6]) if platforms else "all platforms" |
| |
|
| | narrative = ( |
| | f"This report analyzes {metrics['total_comments']:,} comments across {brands_str} " |
| | f"on {platforms_str}. " |
| | f"Overall sentiment is {metrics['positive_pct']:.1f}% positive and " |
| | f"{metrics['negative_pct']:.1f}% negative, " |
| | f"with {metrics['reply_required_pct']:.1f}% of comments requiring a reply." |
| | ) |
| | self.pdf.body_text(narrative) |
| |
|
| | |
| | r, g, b = MusoraPDF.PRIMARY |
| | self.pdf.set_font("Helvetica", "B", 11) |
| | self.pdf.set_text_color(r, g, b) |
| | self.pdf.cell( |
| | 0, 8, f"Overall Sentiment Health: {health}", |
| | new_x="LMARGIN", new_y="NEXT", |
| | ) |
| | self.pdf.ln(2) |
| | self.pdf.set_text_color(0, 0, 0) |
| |
|
| | |
| | self.pdf.metric_row([ |
| | ("Total Comments", f"{metrics['total_comments']:,}"), |
| | ("Positive %", f"{metrics['positive_pct']:.1f}%"), |
| | ("Negative %", f"{metrics['negative_pct']:.1f}%"), |
| | ("Sentiment Score", f"{normalized_score:.0f}/100"), |
| | ]) |
| | self.pdf.metric_row([ |
| | ("Reply Required", f"{metrics['total_reply_required']:,}"), |
| | ("Reply Rate %", f"{metrics['reply_required_pct']:.1f}%"), |
| | ("Brands Analyzed", str(len(brands))), |
| | ("Platforms Analyzed", str(len(platforms))), |
| | ]) |
| |
|
| | |
| | self.pdf.ln(2) |
| | self.pdf.callout_box( |
| | "How to read the Sentiment Score:\n" |
| | "Each comment is rated Very Positive (+2), Positive (+1), Neutral (0), " |
| | "Negative (-1), or Very Negative (-2). " |
| | "The Score (0-100) converts the average: 50 = perfectly neutral, " |
| | "above 60 = primarily positive, below 40 = primarily negative.", |
| | ) |
| |
|
| | |
| | self.pdf.subsection_header("Key Findings") |
| | for finding in self._generate_key_findings(df, metrics): |
| | self.pdf.body_text(f" * {finding}") |
| |
|
| | def _generate_key_findings(self, df, metrics: dict) -> list: |
| | findings = [] |
| |
|
| | |
| | if metrics["positive_pct"] > 50: |
| | findings.append( |
| | f"Sentiment is predominantly positive at {metrics['positive_pct']:.1f}%." |
| | ) |
| | elif metrics["negative_pct"] > 30: |
| | findings.append( |
| | f"Negative sentiment is elevated at {metrics['negative_pct']:.1f}% - " |
| | f"consider targeted community management." |
| | ) |
| | else: |
| | findings.append( |
| | f"Sentiment is balanced: {metrics['positive_pct']:.1f}% positive, " |
| | f"{metrics['negative_pct']:.1f}% negative." |
| | ) |
| |
|
| | |
| | if "brand" in df.columns and not df.empty: |
| | top_brand = df["brand"].value_counts().index[0] |
| | top_count = df["brand"].value_counts().iloc[0] |
| | findings.append( |
| | f"Most discussed brand: {str(top_brand).title()} " |
| | f"({top_count:,} comments, {top_count / len(df) * 100:.1f}% of total)." |
| | ) |
| |
|
| | |
| | if metrics["reply_required_pct"] > 10: |
| | findings.append( |
| | f"{metrics['total_reply_required']:,} comments " |
| | f"({metrics['reply_required_pct']:.1f}%) require a reply." |
| | ) |
| |
|
| | |
| | if "platform" in df.columns and not df.empty: |
| | top_platform = df["platform"].value_counts().index[0] |
| | plat_count = df["platform"].value_counts().iloc[0] |
| | findings.append( |
| | f"Most active platform: {str(top_platform).title()} " |
| | f"({plat_count:,} comments)." |
| | ) |
| |
|
| | return findings[:4] |
| |
|
| | def _add_sentiment_section(self, df) -> None: |
| | self.pdf.add_page() |
| | self.pdf.section_header("Sentiment Distribution") |
| | self.pdf.section_description(_DESCRIPTIONS["sentiment"]) |
| |
|
| | metrics = SentimentMetrics.calculate_overall_metrics(df) |
| | normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100 |
| |
|
| | pie = self.sentiment_charts.create_sentiment_pie_chart(df, title="Sentiment Distribution") |
| | gauge = self.sentiment_charts.create_sentiment_score_gauge( |
| | metrics["avg_sentiment_score"], title="Overall Sentiment Score" |
| | ) |
| | self._add_two_charts(pie, gauge) |
| |
|
| | self.pdf.body_text( |
| | f"Across {metrics['total_comments']:,} analyzed comments: " |
| | f"{metrics['positive_pct']:.1f}% positive, " |
| | f"{100 - metrics['positive_pct'] - metrics['negative_pct']:.1f}% neutral, " |
| | f"{metrics['negative_pct']:.1f}% negative. " |
| | f"Sentiment Score: {normalized_score:.0f}/100 " |
| | f"(raw average: {metrics['avg_sentiment_score']:.2f} on a -2 to +2 scale)." |
| | ) |
| |
|
| | def _add_brand_section(self, df) -> None: |
| | if "brand" not in df.columns or df["brand"].nunique() == 0: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Sentiment by Brand") |
| | self.pdf.section_description(_DESCRIPTIONS["brand"]) |
| |
|
| | bar = self.sentiment_charts.create_sentiment_bar_chart( |
| | df, group_by="brand", title="Sentiment Distribution by Brand" |
| | ) |
| | pct = self.sentiment_charts.create_sentiment_percentage_bar_chart( |
| | df, group_by="brand", title="Sentiment by Brand (%)" |
| | ) |
| | self._add_two_charts(bar, pct) |
| |
|
| | |
| | brand_metrics = SentimentMetrics.calculate_brand_metrics(df) |
| | rows = [] |
| | for brand, m in sorted(brand_metrics.items()): |
| | score = ((m["avg_sentiment_score"] + 2) / 4) * 100 |
| | rows.append(( |
| | str(brand).title(), |
| | f"{m['total_comments']:,}", |
| | f"{m['positive_pct']:.1f}%", |
| | f"{m['negative_pct']:.1f}%", |
| | f"{m['reply_required_pct']:.1f}%", |
| | f"{score:.0f}/100", |
| | )) |
| | self.pdf.subsection_header("Brand Metrics Summary") |
| | self.pdf.add_table( |
| | headers=["Brand", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"], |
| | rows=rows, |
| | col_widths=[38, 32, 30, 30, 30, 30], |
| | ) |
| |
|
| | def _add_platform_section(self, df) -> None: |
| | if "platform" not in df.columns or df["platform"].nunique() == 0: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Sentiment by Platform") |
| | self.pdf.section_description(_DESCRIPTIONS["platform"]) |
| |
|
| | bar = self.sentiment_charts.create_sentiment_bar_chart( |
| | df, group_by="platform", title="Sentiment Distribution by Platform" |
| | ) |
| | pct = self.sentiment_charts.create_sentiment_percentage_bar_chart( |
| | df, group_by="platform", title="Sentiment by Platform (%)" |
| | ) |
| | self._add_two_charts(bar, pct) |
| |
|
| | |
| | platform_metrics = SentimentMetrics.calculate_platform_metrics(df) |
| | rows = [] |
| | for platform, m in sorted(platform_metrics.items()): |
| | score = ((m["avg_sentiment_score"] + 2) / 4) * 100 |
| | rows.append(( |
| | str(platform).title(), |
| | f"{m['total_comments']:,}", |
| | f"{m['positive_pct']:.1f}%", |
| | f"{m['negative_pct']:.1f}%", |
| | f"{m['reply_required_pct']:.1f}%", |
| | f"{score:.0f}/100", |
| | )) |
| | self.pdf.subsection_header("Platform Metrics Summary") |
| | self.pdf.add_table( |
| | headers=["Platform", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"], |
| | rows=rows, |
| | col_widths=[38, 32, 30, 30, 30, 30], |
| | ) |
| |
|
| | def _add_intent_section(self, df) -> None: |
| | if "intent" not in df.columns: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Intent Analysis") |
| | self.pdf.section_description(_DESCRIPTIONS["intent"]) |
| |
|
| | intent_bar = self.distribution_charts.create_intent_bar_chart( |
| | df, title="Intent Distribution", orientation="h" |
| | ) |
| | intent_pie = self.distribution_charts.create_intent_pie_chart( |
| | df, title="Intent Distribution" |
| | ) |
| | self._add_two_charts(intent_bar, intent_pie) |
| |
|
| | def _add_cross_dimensional_section(self, df) -> None: |
| | if "brand" not in df.columns or "platform" not in df.columns: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Cross-Dimensional Analysis") |
| | self.pdf.section_description(_DESCRIPTIONS["cross_dimensional"]) |
| |
|
| | matrix = self.distribution_charts.create_brand_platform_matrix( |
| | df, title="Brand-Platform Comment Matrix" |
| | ) |
| | heatmap = self.sentiment_charts.create_sentiment_heatmap( |
| | df, |
| | row_dimension="brand", |
| | col_dimension="platform", |
| | title="Negative Sentiment Heatmap", |
| | ) |
| | self._add_two_charts(matrix, heatmap) |
| |
|
| | def _add_volume_section(self, df) -> None: |
| | has_platform = "platform" in df.columns |
| | has_brand = "brand" in df.columns |
| | if not has_platform and not has_brand: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Volume Analysis") |
| | self.pdf.section_description(_DESCRIPTIONS["volume"]) |
| |
|
| | if has_platform and has_brand: |
| | platform_dist = self.distribution_charts.create_platform_distribution( |
| | df, title="Comments by Platform" |
| | ) |
| | brand_dist = self.distribution_charts.create_brand_distribution( |
| | df, title="Comments by Brand" |
| | ) |
| | self._add_two_charts(platform_dist, brand_dist) |
| | elif has_platform: |
| | self._add_chart( |
| | self.distribution_charts.create_platform_distribution(df, title="Comments by Platform") |
| | ) |
| | else: |
| | self._add_chart( |
| | self.distribution_charts.create_brand_distribution(df, title="Comments by Brand") |
| | ) |
| |
|
| | def _add_reply_requirements_section(self, df) -> None: |
| | if "requires_reply" not in df.columns: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Reply Requirements Analysis") |
| | self.pdf.section_description(_DESCRIPTIONS["reply_requirements"]) |
| |
|
| | urgency = SentimentMetrics.calculate_response_urgency(df) |
| | self.pdf.metric_row([ |
| | ("Urgent", str(urgency["urgent_count"])), |
| | ("High Priority", str(urgency["high_priority_count"])), |
| | ("Medium Priority", str(urgency["medium_priority_count"])), |
| | ("Low Priority", str(urgency["low_priority_count"])), |
| | ]) |
| | self.pdf.ln(3) |
| |
|
| | has_brand = "brand" in df.columns |
| | has_platform = "platform" in df.columns |
| | if has_brand and has_platform: |
| | reply_brand = self.distribution_charts.create_reply_required_chart( |
| | df, group_by="brand", title="Comments Requiring Reply by Brand" |
| | ) |
| | reply_platform = self.distribution_charts.create_reply_required_chart( |
| | df, group_by="platform", title="Comments Requiring Reply by Platform" |
| | ) |
| | self._add_two_charts(reply_brand, reply_platform) |
| | elif has_brand: |
| | self._add_chart( |
| | self.distribution_charts.create_reply_required_chart( |
| | df, group_by="brand", title="Comments Requiring Reply by Brand" |
| | ) |
| | ) |
| |
|
| | def _add_demographics_section(self, df) -> None: |
| | df_musora = df[df["platform"] == "musora_app"].copy() |
| | if df_musora.empty: |
| | return |
| |
|
| | self.pdf.add_page() |
| | self.pdf.section_header("Demographics Analysis (Musora App)") |
| | self.pdf.section_description(_DESCRIPTIONS["demographics"]) |
| | self.pdf.body_text(f"Analyzing demographics for {len(df_musora):,} Musora App comments.") |
| |
|
| | |
| | age_dist = self.processor.get_demographics_distribution(df_musora, "age_group") |
| | if not age_dist.empty: |
| | self.pdf.subsection_header("Age Distribution") |
| | self._add_chart( |
| | self.demographic_charts.create_age_distribution_chart( |
| | age_dist, title="Comments by Age Group" |
| | ), |
| | img_height=350, |
| | ) |
| |
|
| | |
| | region_dist = self.processor.get_timezone_regions_distribution(df_musora) |
| | if not region_dist.empty: |
| | self.pdf.subsection_header("Geographic Distribution") |
| | self._add_chart( |
| | self.demographic_charts.create_region_distribution_chart( |
| | region_dist, title="Comments by Region" |
| | ), |
| | img_height=350, |
| | ) |
| |
|
| | |
| | exp_dist = self.processor.get_experience_level_distribution(df_musora, use_groups=True) |
| | if not exp_dist.empty: |
| | self.pdf.subsection_header("Experience Level Distribution") |
| | self._add_chart( |
| | self.demographic_charts.create_experience_distribution_chart( |
| | exp_dist, title="Comments by Experience Group", use_groups=True |
| | ), |
| | img_height=350, |
| | ) |
| |
|
| | def _add_language_section(self, df) -> None: |
| | self.pdf.add_page() |
| | self.pdf.section_header("Language Distribution") |
| | self.pdf.section_description(_DESCRIPTIONS["language"]) |
| | self._add_chart( |
| | self.distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages") |
| | ) |
| |
|
| | def _add_data_summary(self, df, filter_info: dict) -> None: |
| | self.pdf.add_page() |
| | self.pdf.section_header("Data Summary") |
| |
|
| | self.pdf.body_text( |
| | f"Report generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" |
| | ) |
| | self.pdf.body_text(f"Total records in report: {len(df):,}") |
| |
|
| | date_str = self._date_range_str(df) |
| | if date_str != "N/A": |
| | self.pdf.body_text(f"Data range: {date_str}") |
| |
|
| | self.pdf.body_text(f"Active filters: {self._filter_summary(filter_info)}") |
| |
|
| | if "brand" in df.columns: |
| | brands = sorted(str(b).title() for b in df["brand"].dropna().unique()) |
| | self.pdf.body_text(f"Brands included: {', '.join(brands)}") |
| |
|
| | if "platform" in df.columns: |
| | platforms = sorted(str(p).title() for p in df["platform"].dropna().unique()) |
| | self.pdf.body_text(f"Platforms included: {', '.join(platforms)}") |
| |
|
| | self.pdf.ln(5) |
| | self.pdf.callout_box( |
| | "Data source: Snowflake - SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES " |
| | "and SOCIAL_MEDIA_DB.ML_FEATURES.MUSORA_COMMENT_SENTIMENT_FEATURES.\n" |
| | "This report is confidential and intended for internal Musora team use only.", |
| | bg_color=(245, 245, 245), |
| | ) |