Danialebrat's picture
Deploying sentiment analysis project
9858829
"""
PDF Report Exporter for Musora Sentiment Analysis Dashboard.
Generates a comprehensive PDF report from the filtered dashboard data and
Plotly visualizations.
Dependencies:
fpdf2 — PDF assembly (pip install fpdf2)
kaleido — Plotly PNG rendering (pip install kaleido)
"""
import os
import sys
import tempfile
import logging
from datetime import datetime
from pathlib import Path
# Ensure the visualization package root is importable when this module is
# loaded directly (e.g., during testing outside Streamlit).
_parent = Path(__file__).resolve().parent.parent
if str(_parent) not in sys.path:
sys.path.insert(0, str(_parent))
import plotly.io as pio
from fpdf import FPDF
from utils.metrics import SentimentMetrics
from utils.data_processor import SentimentDataProcessor
from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.demographic_charts import DemographicCharts
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Section descriptions — plain-language context shown below each section header.
# ---------------------------------------------------------------------------
_DESCRIPTIONS = {
"executive_summary": (
"A top-level snapshot of community sentiment across all Musora brands and platforms. "
"All findings are based on comments processed through the AI sentiment analysis pipeline."
),
"sentiment": (
"Every comment is assigned one of five sentiment levels: "
"Very Positive, Positive, Neutral, Negative, or Very Negative. "
"The pie chart shows how those levels split across all analyzed comments. "
"The Sentiment Score (0-100) converts the average rating to a percentage scale: "
"50 = perfectly neutral, above 60 = primarily positive."
),
"brand": (
"Sentiment broken down by Musora brand (Drumeo, Pianote, Guitareo, Singeo, etc.). "
"Shows both the count and percentage of each sentiment level per brand, "
"helping identify which brands receive the most positive or negative feedback."
),
"platform": (
"Sentiment broken down by platform (Facebook, Instagram, YouTube, Twitter, Musora App). "
"Helps compare audience sentiment across channels."
),
"intent": (
"Beyond positive/negative, the AI identifies the intent behind each comment: "
"praise, questions, requests, feedback, suggestions, humor, off-topic, or spam. "
"Understanding intent helps prioritize community management."
),
"cross_dimensional": (
"Cross-dimensional analysis reveals patterns across both brand and platform simultaneously. "
"The heatmaps show comment volume and negative sentiment concentration by combination."
),
"volume": (
"Volume analysis shows the distribution of comments across platforms and brands, "
"indicating where the most community engagement is happening."
),
"reply_requirements": (
"Comments flagged as requiring a reply, broken down by brand and platform. "
"The urgency breakdown helps prioritize community management resources."
),
"demographics": (
"Demographics data is available for Musora App comments and is derived from user profiles. "
"Note: These charts reflect only users who have filled in their profile information - "
"they do not represent all community members."
),
"language": (
"Language distribution shows what languages comments are written in. "
"Non-English comments are automatically translated for analysis."
),
}
# ---------------------------------------------------------------------------
# Musora brand colours
# ---------------------------------------------------------------------------
_PRIMARY_HEX = "#1982C4"
_PRIMARY_RGB = (25, 130, 196)
# ---------------------------------------------------------------------------
# PDF document class
# ---------------------------------------------------------------------------
class MusoraPDF(FPDF):
"""Custom FPDF subclass with Musora branding and layout helpers."""
PRIMARY = _PRIMARY_RGB
WHITE = (255, 255, 255)
GRAY = (180, 180, 180)
LIGHT_GRAY = (240, 240, 240)
def __init__(self):
super().__init__(orientation="P", unit="mm", format="A4")
self.set_auto_page_break(auto=True, margin=20)
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
@staticmethod
def _sanitize(text: str) -> str:
"""Strip characters outside Latin-1 (required by the Helvetica font)."""
if not isinstance(text, str):
text = str(text)
return text.encode("latin-1", errors="ignore").decode("latin-1")
# ------------------------------------------------------------------
# FPDF overrides
# ------------------------------------------------------------------
def header(self):
if self.page_no() > 1:
self.set_font("Helvetica", "B", 8)
self.set_text_color(*self.GRAY)
self.cell(0, 6, "Musora Sentiment Analysis Report", align="L")
self.cell(
0, 6, f"Page {self.page_no()}", align="R",
new_x="LMARGIN", new_y="NEXT",
)
self.set_draw_color(*self.PRIMARY)
self.set_line_width(0.5)
self.line(10, self.get_y(), 200, self.get_y())
self.ln(4)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 7)
self.set_text_color(*self.GRAY)
self.cell(
0, 10,
f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} | Confidential",
align="C",
)
# ------------------------------------------------------------------
# Layout primitives
# ------------------------------------------------------------------
def check_page_break(self, needed_mm: float) -> None:
"""Add a page break if less than *needed_mm* mm remain on the page."""
if self.get_y() + needed_mm > self.h - 20:
self.add_page()
def section_header(self, title: str) -> None:
"""Bold, brand-coloured section heading with an underline rule."""
title = self._sanitize(title)
self.check_page_break(20)
self.ln(4)
self.set_font("Helvetica", "B", 14)
self.set_text_color(*self.PRIMARY)
self.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT")
self.set_draw_color(*self.PRIMARY)
self.set_line_width(0.3)
self.line(10, self.get_y(), 200, self.get_y())
self.ln(3)
self.set_text_color(0, 0, 0)
def subsection_header(self, title: str) -> None:
"""Lighter subsection heading."""
title = self._sanitize(title)
self.check_page_break(15)
self.ln(2)
self.set_font("Helvetica", "B", 11)
self.set_text_color(60, 60, 60)
self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT")
self.ln(1)
self.set_text_color(0, 0, 0)
def section_description(self, text: str) -> None:
"""Italicised description block beneath a section header."""
text = self._sanitize(text)
self.set_font("Helvetica", "I", 9)
self.set_text_color(80, 80, 80)
self.multi_cell(0, 5, text)
self.ln(4)
self.set_text_color(0, 0, 0)
def body_text(self, text: str) -> None:
"""Standard paragraph text."""
text = self._sanitize(text)
self.set_font("Helvetica", "", 9)
self.set_text_color(50, 50, 50)
self.multi_cell(0, 5, text)
self.ln(2)
self.set_text_color(0, 0, 0)
def callout_box(
self,
text: str,
bg_color: tuple = (240, 248, 255),
border_color: tuple = None,
) -> None:
"""Lightly-coloured info/callout box with a left accent bar."""
if border_color is None:
border_color = self.PRIMARY
text = self._sanitize(text)
self.check_page_break(20)
x, w = 10, 180
approx_lines = max(2, len(text) // 90 + text.count("\n") + 1)
h = approx_lines * 5 + 6
y = self.get_y()
self.set_fill_color(*bg_color)
self.rect(x, y, w, h, style="F")
self.set_fill_color(*border_color)
self.rect(x, y, 3, h, style="F")
self.set_font("Helvetica", "", 8.5)
self.set_text_color(40, 40, 40)
self.set_xy(x + 5, y + 3)
self.multi_cell(w - 7, 4.5, text)
self.set_y(y + h + 3)
self.set_text_color(0, 0, 0)
def metric_row(self, metrics: list) -> None:
"""
Horizontal row of metric tiles.
Args:
metrics: list of (label, value) tuples.
"""
self.check_page_break(18)
n = len(metrics)
if n == 0:
return
box_w = (190 - (n - 1) * 3) / n
x0 = 10
y = self.get_y()
for i, (label, value) in enumerate(metrics):
x = x0 + i * (box_w + 3)
self.set_fill_color(245, 245, 245)
self.rect(x, y, box_w, 14, style="F")
self.set_xy(x, y + 1)
self.set_font("Helvetica", "B", 10)
self.set_text_color(*self.PRIMARY)
self.cell(box_w, 6, self._sanitize(str(value)), align="C")
self.set_xy(x, y + 7)
self.set_font("Helvetica", "", 7)
self.set_text_color(100, 100, 100)
self.cell(box_w, 5, self._sanitize(str(label)), align="C")
self.set_text_color(0, 0, 0)
self.set_y(y + 16)
def add_table(
self,
headers: list,
rows: list,
col_widths: list = None,
) -> None:
"""
Styled data table with alternating row shading.
Args:
headers: Column header strings.
rows: List of row tuples/lists.
col_widths: Optional column widths in mm.
"""
self.check_page_break(10 + len(rows) * 6)
n = len(headers)
if col_widths is None:
col_widths = [190 / n] * n
# Header
self.set_font("Helvetica", "B", 8)
self.set_fill_color(*self.PRIMARY)
self.set_text_color(*self.WHITE)
for i, hdr in enumerate(headers):
self.cell(col_widths[i], 7, self._sanitize(hdr), border=1, fill=True, align="C")
self.ln()
# Rows
self.set_font("Helvetica", "", 8)
self.set_text_color(0, 0, 0)
for row_idx, row in enumerate(rows):
self.set_fill_color(250, 250, 250) if row_idx % 2 == 0 else self.set_fill_color(*self.WHITE)
for i, cell_val in enumerate(row):
self.cell(col_widths[i], 6, self._sanitize(str(cell_val)), border=1, fill=True, align="C")
self.ln()
self.ln(2)
# ---------------------------------------------------------------------------
# Main exporter
# ---------------------------------------------------------------------------
class DashboardPDFExporter:
"""
Generates a comprehensive PDF report from the Musora Sentiment dashboard.
Usage::
exporter = DashboardPDFExporter()
pdf_bytes = exporter.generate_report(filtered_df, filter_info)
The *filter_info* dict (optional) maps human-readable filter names to their
selected values and is shown on the cover page.
"""
# Kaleido scale factor: 3× ≈ 300 DPI at A4 print size.
RENDER_SCALE = 3
def __init__(self):
self.sentiment_charts = SentimentCharts()
self.distribution_charts = DistributionCharts()
self.demographic_charts = DemographicCharts()
self.processor = SentimentDataProcessor()
self._temp_files: list[str] = []
# ------------------------------------------------------------------
# Public entry point
# ------------------------------------------------------------------
def generate_report(self, df, filter_info: dict = None) -> bytes:
"""
Build and return the full PDF report.
Args:
df: Filtered dashboard DataFrame.
filter_info: Optional dict of active filter descriptions shown on
the cover page, e.g. {"Platforms": ["facebook"],
"Brands": ["drumeo"]}.
Returns:
bytes: Raw PDF file contents ready for st.download_button.
"""
self.pdf = MusoraPDF()
try:
self._add_cover_page(df, filter_info)
self._add_executive_summary(df)
self._add_sentiment_section(df)
self._add_brand_section(df)
self._add_platform_section(df)
self._add_intent_section(df)
self._add_cross_dimensional_section(df)
self._add_volume_section(df)
self._add_reply_requirements_section(df)
if self._has_demographics(df):
self._add_demographics_section(df)
if "detected_language" in df.columns:
self._add_language_section(df)
self._add_data_summary(df, filter_info)
return bytes(self.pdf.output())
finally:
self._cleanup_temp_files()
# ------------------------------------------------------------------
# Chart rendering helpers
# ------------------------------------------------------------------
def _prepare_fig_for_pdf(self, fig, is_side_by_side: bool = False) -> None:
"""Apply white background, readable fonts, and automargin to a Plotly figure."""
base_fs = 13 if is_side_by_side else 14
fig.update_layout(
paper_bgcolor="white",
plot_bgcolor="white",
font=dict(color="black", size=base_fs),
title_font_size=base_fs + 4,
margin=(
dict(l=60, r=40, t=60, b=60)
if is_side_by_side
else dict(l=80, r=40, t=60, b=80)
),
)
fig.update_xaxes(automargin=True)
fig.update_yaxes(automargin=True)
if fig.layout.showlegend is not False:
fig.update_layout(legend_font_size=base_fs - 2)
def _fig_to_temp_path(
self, fig, width: int = 800, height: int = 400, is_side_by_side: bool = False
) -> str:
"""Render a Plotly figure to a temporary high-DPI PNG and return the path."""
self._prepare_fig_for_pdf(fig, is_side_by_side=is_side_by_side)
img_bytes = pio.to_image(
fig,
format="png",
width=width,
height=height,
scale=self.RENDER_SCALE,
engine="kaleido",
)
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
tmp.write(img_bytes)
tmp.close()
self._temp_files.append(tmp.name)
return tmp.name
def _add_chart(self, fig, width: int = 180, img_width: int = 800, img_height: int = 400) -> None:
"""Render one figure full-width on the current PDF page."""
try:
path = self._fig_to_temp_path(fig, img_width, img_height)
h_mm = width * (img_height / img_width)
self.pdf.check_page_break(h_mm + 5)
self.pdf.image(path, x=10, w=width)
self.pdf.ln(3)
except Exception:
logger.exception("Chart render failed")
self.pdf.body_text("[Chart could not be rendered]")
def _add_two_charts(self, fig1, fig2, width: int = 92) -> None:
"""Render two figures side-by-side."""
try:
p1 = self._fig_to_temp_path(fig1, 700, 450, is_side_by_side=True)
p2 = self._fig_to_temp_path(fig2, 700, 450, is_side_by_side=True)
h_mm = width * (450 / 700)
self.pdf.check_page_break(h_mm + 5)
y = self.pdf.get_y()
self.pdf.image(p1, x=10, y=y, w=width)
self.pdf.image(p2, x=10 + width + 4, y=y, w=width)
self.pdf.set_y(y + h_mm + 3)
except Exception:
logger.exception("Side-by-side chart render failed")
self.pdf.body_text("[Charts could not be rendered]")
def _cleanup_temp_files(self) -> None:
for path in self._temp_files:
try:
os.unlink(path)
except OSError:
pass
self._temp_files.clear()
# ------------------------------------------------------------------
# Data helpers
# ------------------------------------------------------------------
@staticmethod
def _has_demographics(df) -> bool:
return (
"platform" in df.columns
and "musora_app" in df["platform"].values
and "age_group" in df.columns
and "timezone" in df.columns
and "experience_level" in df.columns
)
@staticmethod
def _filter_summary(filter_info: dict) -> str:
if not filter_info:
return "No filters applied - showing all data."
parts = []
for key, value in filter_info.items():
if value:
display = (
value if isinstance(value, str)
else ", ".join(str(v) for v in value)
)
parts.append(f"{key}: {display}")
return "; ".join(parts) if parts else "No filters applied."
@staticmethod
def _date_range_str(df) -> str:
if "comment_timestamp" not in df.columns or df.empty:
return "N/A"
valid = df["comment_timestamp"].dropna()
if valid.empty:
return "N/A"
return (
f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}"
)
# ------------------------------------------------------------------
# Report sections
# ------------------------------------------------------------------
def _add_cover_page(self, df, filter_info: dict) -> None:
self.pdf.add_page()
self.pdf.ln(40)
r, g, b = MusoraPDF.PRIMARY
self.pdf.set_fill_color(r, g, b)
self.pdf.rect(0, 60, 210, 4, style="F")
self.pdf.ln(20)
self.pdf.set_font("Helvetica", "B", 28)
self.pdf.set_text_color(r, g, b)
self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT")
self.pdf.set_font("Helvetica", "", 16)
self.pdf.set_text_color(80, 80, 80)
self.pdf.cell(
0, 10, "Sentiment Analysis Report",
align="C", new_x="LMARGIN", new_y="NEXT",
)
self.pdf.ln(10)
self.pdf.set_draw_color(r, g, b)
self.pdf.set_line_width(0.5)
self.pdf.line(60, self.pdf.get_y(), 150, self.pdf.get_y())
self.pdf.ln(10)
self.pdf.set_font("Helvetica", "", 12)
self.pdf.set_text_color(100, 100, 100)
self.pdf.cell(
0, 8,
f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}",
align="C", new_x="LMARGIN", new_y="NEXT",
)
self.pdf.ln(5)
self.pdf.set_font("Helvetica", "", 10)
self.pdf.cell(
0, 7,
f"Total Comments Analyzed: {len(df):,}",
align="C", new_x="LMARGIN", new_y="NEXT",
)
date_str = self._date_range_str(df)
if date_str != "N/A":
self.pdf.ln(3)
self.pdf.set_font("Helvetica", "I", 9)
self.pdf.set_text_color(120, 120, 120)
self.pdf.cell(
0, 6,
MusoraPDF._sanitize(f"Data period: {date_str}"),
align="C", new_x="LMARGIN", new_y="NEXT",
)
if filter_info:
self.pdf.ln(8)
self.pdf.set_font("Helvetica", "B", 9)
self.pdf.set_text_color(80, 80, 80)
self.pdf.cell(0, 6, "Active Filters:", align="C", new_x="LMARGIN", new_y="NEXT")
self.pdf.set_font("Helvetica", "", 9)
for key, value in filter_info.items():
if value:
display = (
value if isinstance(value, str)
else ", ".join(str(v) for v in value)
)
self.pdf.cell(
0, 5,
MusoraPDF._sanitize(f"{key}: {display}"),
align="C", new_x="LMARGIN", new_y="NEXT",
)
self.pdf.ln(20)
self.pdf.set_font("Helvetica", "I", 8)
self.pdf.set_text_color(150, 150, 150)
self.pdf.cell(
0, 6, "Confidential - For Internal Use Only",
align="C", new_x="LMARGIN", new_y="NEXT",
)
self.pdf.cell(
0, 6, "Data Source: Snowflake | Musora Sentiment Pipeline",
align="C", new_x="LMARGIN", new_y="NEXT",
)
def _add_executive_summary(self, df) -> None:
self.pdf.add_page()
self.pdf.section_header("Executive Summary")
self.pdf.section_description(_DESCRIPTIONS["executive_summary"])
metrics = SentimentMetrics.calculate_overall_metrics(df)
normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100
# Health label
neg_pct = metrics["negative_pct"]
health = "Healthy" if neg_pct < 20 else ("Moderate" if neg_pct < 35 else "Needs Attention")
# Opening narrative
brands = sorted(df["brand"].dropna().unique().tolist()) if "brand" in df.columns else []
platforms = sorted(df["platform"].dropna().unique().tolist()) if "platform" in df.columns else []
brands_str = ", ".join(str(b).title() for b in brands[:6]) if brands else "all brands"
platforms_str = ", ".join(str(p).title() for p in platforms[:6]) if platforms else "all platforms"
narrative = (
f"This report analyzes {metrics['total_comments']:,} comments across {brands_str} "
f"on {platforms_str}. "
f"Overall sentiment is {metrics['positive_pct']:.1f}% positive and "
f"{metrics['negative_pct']:.1f}% negative, "
f"with {metrics['reply_required_pct']:.1f}% of comments requiring a reply."
)
self.pdf.body_text(narrative)
# Health status
r, g, b = MusoraPDF.PRIMARY
self.pdf.set_font("Helvetica", "B", 11)
self.pdf.set_text_color(r, g, b)
self.pdf.cell(
0, 8, f"Overall Sentiment Health: {health}",
new_x="LMARGIN", new_y="NEXT",
)
self.pdf.ln(2)
self.pdf.set_text_color(0, 0, 0)
# Metric tiles — two rows
self.pdf.metric_row([
("Total Comments", f"{metrics['total_comments']:,}"),
("Positive %", f"{metrics['positive_pct']:.1f}%"),
("Negative %", f"{metrics['negative_pct']:.1f}%"),
("Sentiment Score", f"{normalized_score:.0f}/100"),
])
self.pdf.metric_row([
("Reply Required", f"{metrics['total_reply_required']:,}"),
("Reply Rate %", f"{metrics['reply_required_pct']:.1f}%"),
("Brands Analyzed", str(len(brands))),
("Platforms Analyzed", str(len(platforms))),
])
# Score explanation
self.pdf.ln(2)
self.pdf.callout_box(
"How to read the Sentiment Score:\n"
"Each comment is rated Very Positive (+2), Positive (+1), Neutral (0), "
"Negative (-1), or Very Negative (-2). "
"The Score (0-100) converts the average: 50 = perfectly neutral, "
"above 60 = primarily positive, below 40 = primarily negative.",
)
# Key findings
self.pdf.subsection_header("Key Findings")
for finding in self._generate_key_findings(df, metrics):
self.pdf.body_text(f" * {finding}")
def _generate_key_findings(self, df, metrics: dict) -> list:
findings = []
# Sentiment summary
if metrics["positive_pct"] > 50:
findings.append(
f"Sentiment is predominantly positive at {metrics['positive_pct']:.1f}%."
)
elif metrics["negative_pct"] > 30:
findings.append(
f"Negative sentiment is elevated at {metrics['negative_pct']:.1f}% - "
f"consider targeted community management."
)
else:
findings.append(
f"Sentiment is balanced: {metrics['positive_pct']:.1f}% positive, "
f"{metrics['negative_pct']:.1f}% negative."
)
# Top brand by volume
if "brand" in df.columns and not df.empty:
top_brand = df["brand"].value_counts().index[0]
top_count = df["brand"].value_counts().iloc[0]
findings.append(
f"Most discussed brand: {str(top_brand).title()} "
f"({top_count:,} comments, {top_count / len(df) * 100:.1f}% of total)."
)
# Reply urgency
if metrics["reply_required_pct"] > 10:
findings.append(
f"{metrics['total_reply_required']:,} comments "
f"({metrics['reply_required_pct']:.1f}%) require a reply."
)
# Top platform by volume
if "platform" in df.columns and not df.empty:
top_platform = df["platform"].value_counts().index[0]
plat_count = df["platform"].value_counts().iloc[0]
findings.append(
f"Most active platform: {str(top_platform).title()} "
f"({plat_count:,} comments)."
)
return findings[:4]
def _add_sentiment_section(self, df) -> None:
self.pdf.add_page()
self.pdf.section_header("Sentiment Distribution")
self.pdf.section_description(_DESCRIPTIONS["sentiment"])
metrics = SentimentMetrics.calculate_overall_metrics(df)
normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100
pie = self.sentiment_charts.create_sentiment_pie_chart(df, title="Sentiment Distribution")
gauge = self.sentiment_charts.create_sentiment_score_gauge(
metrics["avg_sentiment_score"], title="Overall Sentiment Score"
)
self._add_two_charts(pie, gauge)
self.pdf.body_text(
f"Across {metrics['total_comments']:,} analyzed comments: "
f"{metrics['positive_pct']:.1f}% positive, "
f"{100 - metrics['positive_pct'] - metrics['negative_pct']:.1f}% neutral, "
f"{metrics['negative_pct']:.1f}% negative. "
f"Sentiment Score: {normalized_score:.0f}/100 "
f"(raw average: {metrics['avg_sentiment_score']:.2f} on a -2 to +2 scale)."
)
def _add_brand_section(self, df) -> None:
if "brand" not in df.columns or df["brand"].nunique() == 0:
return
self.pdf.add_page()
self.pdf.section_header("Sentiment by Brand")
self.pdf.section_description(_DESCRIPTIONS["brand"])
bar = self.sentiment_charts.create_sentiment_bar_chart(
df, group_by="brand", title="Sentiment Distribution by Brand"
)
pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
df, group_by="brand", title="Sentiment by Brand (%)"
)
self._add_two_charts(bar, pct)
# Summary table
brand_metrics = SentimentMetrics.calculate_brand_metrics(df)
rows = []
for brand, m in sorted(brand_metrics.items()):
score = ((m["avg_sentiment_score"] + 2) / 4) * 100
rows.append((
str(brand).title(),
f"{m['total_comments']:,}",
f"{m['positive_pct']:.1f}%",
f"{m['negative_pct']:.1f}%",
f"{m['reply_required_pct']:.1f}%",
f"{score:.0f}/100",
))
self.pdf.subsection_header("Brand Metrics Summary")
self.pdf.add_table(
headers=["Brand", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
rows=rows,
col_widths=[38, 32, 30, 30, 30, 30],
)
def _add_platform_section(self, df) -> None:
if "platform" not in df.columns or df["platform"].nunique() == 0:
return
self.pdf.add_page()
self.pdf.section_header("Sentiment by Platform")
self.pdf.section_description(_DESCRIPTIONS["platform"])
bar = self.sentiment_charts.create_sentiment_bar_chart(
df, group_by="platform", title="Sentiment Distribution by Platform"
)
pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
df, group_by="platform", title="Sentiment by Platform (%)"
)
self._add_two_charts(bar, pct)
# Summary table
platform_metrics = SentimentMetrics.calculate_platform_metrics(df)
rows = []
for platform, m in sorted(platform_metrics.items()):
score = ((m["avg_sentiment_score"] + 2) / 4) * 100
rows.append((
str(platform).title(),
f"{m['total_comments']:,}",
f"{m['positive_pct']:.1f}%",
f"{m['negative_pct']:.1f}%",
f"{m['reply_required_pct']:.1f}%",
f"{score:.0f}/100",
))
self.pdf.subsection_header("Platform Metrics Summary")
self.pdf.add_table(
headers=["Platform", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
rows=rows,
col_widths=[38, 32, 30, 30, 30, 30],
)
def _add_intent_section(self, df) -> None:
if "intent" not in df.columns:
return
self.pdf.add_page()
self.pdf.section_header("Intent Analysis")
self.pdf.section_description(_DESCRIPTIONS["intent"])
intent_bar = self.distribution_charts.create_intent_bar_chart(
df, title="Intent Distribution", orientation="h"
)
intent_pie = self.distribution_charts.create_intent_pie_chart(
df, title="Intent Distribution"
)
self._add_two_charts(intent_bar, intent_pie)
def _add_cross_dimensional_section(self, df) -> None:
if "brand" not in df.columns or "platform" not in df.columns:
return
self.pdf.add_page()
self.pdf.section_header("Cross-Dimensional Analysis")
self.pdf.section_description(_DESCRIPTIONS["cross_dimensional"])
matrix = self.distribution_charts.create_brand_platform_matrix(
df, title="Brand-Platform Comment Matrix"
)
heatmap = self.sentiment_charts.create_sentiment_heatmap(
df,
row_dimension="brand",
col_dimension="platform",
title="Negative Sentiment Heatmap",
)
self._add_two_charts(matrix, heatmap)
def _add_volume_section(self, df) -> None:
has_platform = "platform" in df.columns
has_brand = "brand" in df.columns
if not has_platform and not has_brand:
return
self.pdf.add_page()
self.pdf.section_header("Volume Analysis")
self.pdf.section_description(_DESCRIPTIONS["volume"])
if has_platform and has_brand:
platform_dist = self.distribution_charts.create_platform_distribution(
df, title="Comments by Platform"
)
brand_dist = self.distribution_charts.create_brand_distribution(
df, title="Comments by Brand"
)
self._add_two_charts(platform_dist, brand_dist)
elif has_platform:
self._add_chart(
self.distribution_charts.create_platform_distribution(df, title="Comments by Platform")
)
else:
self._add_chart(
self.distribution_charts.create_brand_distribution(df, title="Comments by Brand")
)
def _add_reply_requirements_section(self, df) -> None:
if "requires_reply" not in df.columns:
return
self.pdf.add_page()
self.pdf.section_header("Reply Requirements Analysis")
self.pdf.section_description(_DESCRIPTIONS["reply_requirements"])
urgency = SentimentMetrics.calculate_response_urgency(df)
self.pdf.metric_row([
("Urgent", str(urgency["urgent_count"])),
("High Priority", str(urgency["high_priority_count"])),
("Medium Priority", str(urgency["medium_priority_count"])),
("Low Priority", str(urgency["low_priority_count"])),
])
self.pdf.ln(3)
has_brand = "brand" in df.columns
has_platform = "platform" in df.columns
if has_brand and has_platform:
reply_brand = self.distribution_charts.create_reply_required_chart(
df, group_by="brand", title="Comments Requiring Reply by Brand"
)
reply_platform = self.distribution_charts.create_reply_required_chart(
df, group_by="platform", title="Comments Requiring Reply by Platform"
)
self._add_two_charts(reply_brand, reply_platform)
elif has_brand:
self._add_chart(
self.distribution_charts.create_reply_required_chart(
df, group_by="brand", title="Comments Requiring Reply by Brand"
)
)
def _add_demographics_section(self, df) -> None:
df_musora = df[df["platform"] == "musora_app"].copy()
if df_musora.empty:
return
self.pdf.add_page()
self.pdf.section_header("Demographics Analysis (Musora App)")
self.pdf.section_description(_DESCRIPTIONS["demographics"])
self.pdf.body_text(f"Analyzing demographics for {len(df_musora):,} Musora App comments.")
# Age
age_dist = self.processor.get_demographics_distribution(df_musora, "age_group")
if not age_dist.empty:
self.pdf.subsection_header("Age Distribution")
self._add_chart(
self.demographic_charts.create_age_distribution_chart(
age_dist, title="Comments by Age Group"
),
img_height=350,
)
# Region
region_dist = self.processor.get_timezone_regions_distribution(df_musora)
if not region_dist.empty:
self.pdf.subsection_header("Geographic Distribution")
self._add_chart(
self.demographic_charts.create_region_distribution_chart(
region_dist, title="Comments by Region"
),
img_height=350,
)
# Experience
exp_dist = self.processor.get_experience_level_distribution(df_musora, use_groups=True)
if not exp_dist.empty:
self.pdf.subsection_header("Experience Level Distribution")
self._add_chart(
self.demographic_charts.create_experience_distribution_chart(
exp_dist, title="Comments by Experience Group", use_groups=True
),
img_height=350,
)
def _add_language_section(self, df) -> None:
self.pdf.add_page()
self.pdf.section_header("Language Distribution")
self.pdf.section_description(_DESCRIPTIONS["language"])
self._add_chart(
self.distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
)
def _add_data_summary(self, df, filter_info: dict) -> None:
self.pdf.add_page()
self.pdf.section_header("Data Summary")
self.pdf.body_text(
f"Report generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)
self.pdf.body_text(f"Total records in report: {len(df):,}")
date_str = self._date_range_str(df)
if date_str != "N/A":
self.pdf.body_text(f"Data range: {date_str}")
self.pdf.body_text(f"Active filters: {self._filter_summary(filter_info)}")
if "brand" in df.columns:
brands = sorted(str(b).title() for b in df["brand"].dropna().unique())
self.pdf.body_text(f"Brands included: {', '.join(brands)}")
if "platform" in df.columns:
platforms = sorted(str(p).title() for p in df["platform"].dropna().unique())
self.pdf.body_text(f"Platforms included: {', '.join(platforms)}")
self.pdf.ln(5)
self.pdf.callout_box(
"Data source: Snowflake - SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES "
"and SOCIAL_MEDIA_DB.ML_FEATURES.MUSORA_COMMENT_SENTIMENT_FEATURES.\n"
"This report is confidential and intended for internal Musora team use only.",
bg_color=(245, 245, 245),
)