Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Running

App Files Files Community

Sentiment_analysis / visualization /utils /pdf_exporter.py

Danialebrat

Deploying sentiment analysis project

9858829 13 days ago

raw

history blame contribute delete

36.9 kB

	"""
	PDF Report Exporter for Musora Sentiment Analysis Dashboard.

	Generates a comprehensive PDF report from the filtered dashboard data and
	Plotly visualizations.

	Dependencies:
	fpdf2 — PDF assembly (pip install fpdf2)
	kaleido — Plotly PNG rendering (pip install kaleido)
	"""

	import os
	import sys
	import tempfile
	import logging
	from datetime import datetime
	from pathlib import Path

	# Ensure the visualization package root is importable when this module is
	# loaded directly (e.g., during testing outside Streamlit).
	_parent = Path(__file__).resolve().parent.parent
	if str(_parent) not in sys.path:
	sys.path.insert(0, str(_parent))

	import plotly.io as pio
	from fpdf import FPDF

	from utils.metrics import SentimentMetrics
	from utils.data_processor import SentimentDataProcessor
	from visualizations.sentiment_charts import SentimentCharts
	from visualizations.distribution_charts import DistributionCharts
	from visualizations.demographic_charts import DemographicCharts

	logger = logging.getLogger(__name__)

	# ---------------------------------------------------------------------------
	# Section descriptions — plain-language context shown below each section header.
	# ---------------------------------------------------------------------------
	_DESCRIPTIONS = {
	"executive_summary": (
	"A top-level snapshot of community sentiment across all Musora brands and platforms. "
	"All findings are based on comments processed through the AI sentiment analysis pipeline."
	),
	"sentiment": (
	"Every comment is assigned one of five sentiment levels: "
	"Very Positive, Positive, Neutral, Negative, or Very Negative. "
	"The pie chart shows how those levels split across all analyzed comments. "
	"The Sentiment Score (0-100) converts the average rating to a percentage scale: "
	"50 = perfectly neutral, above 60 = primarily positive."
	),
	"brand": (
	"Sentiment broken down by Musora brand (Drumeo, Pianote, Guitareo, Singeo, etc.). "
	"Shows both the count and percentage of each sentiment level per brand, "
	"helping identify which brands receive the most positive or negative feedback."
	),
	"platform": (
	"Sentiment broken down by platform (Facebook, Instagram, YouTube, Twitter, Musora App). "
	"Helps compare audience sentiment across channels."
	),
	"intent": (
	"Beyond positive/negative, the AI identifies the intent behind each comment: "
	"praise, questions, requests, feedback, suggestions, humor, off-topic, or spam. "
	"Understanding intent helps prioritize community management."
	),
	"cross_dimensional": (
	"Cross-dimensional analysis reveals patterns across both brand and platform simultaneously. "
	"The heatmaps show comment volume and negative sentiment concentration by combination."
	),
	"volume": (
	"Volume analysis shows the distribution of comments across platforms and brands, "
	"indicating where the most community engagement is happening."
	),
	"reply_requirements": (
	"Comments flagged as requiring a reply, broken down by brand and platform. "
	"The urgency breakdown helps prioritize community management resources."
	),
	"demographics": (
	"Demographics data is available for Musora App comments and is derived from user profiles. "
	"Note: These charts reflect only users who have filled in their profile information - "
	"they do not represent all community members."
	),
	"language": (
	"Language distribution shows what languages comments are written in. "
	"Non-English comments are automatically translated for analysis."
	),
	}

	# ---------------------------------------------------------------------------
	# Musora brand colours
	# ---------------------------------------------------------------------------
	_PRIMARY_HEX = "#1982C4"
	_PRIMARY_RGB = (25, 130, 196)


	# ---------------------------------------------------------------------------
	# PDF document class
	# ---------------------------------------------------------------------------

	class MusoraPDF(FPDF):
	"""Custom FPDF subclass with Musora branding and layout helpers."""

	PRIMARY = _PRIMARY_RGB
	WHITE = (255, 255, 255)
	GRAY = (180, 180, 180)
	LIGHT_GRAY = (240, 240, 240)

	def __init__(self):
	super().__init__(orientation="P", unit="mm", format="A4")
	self.set_auto_page_break(auto=True, margin=20)

	# ------------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------------

	@staticmethod
	def _sanitize(text: str) -> str:
	"""Strip characters outside Latin-1 (required by the Helvetica font)."""
	if not isinstance(text, str):
	text = str(text)
	return text.encode("latin-1", errors="ignore").decode("latin-1")

	# ------------------------------------------------------------------
	# FPDF overrides
	# ------------------------------------------------------------------

	def header(self):
	if self.page_no() > 1:
	self.set_font("Helvetica", "B", 8)
	self.set_text_color(*self.GRAY)
	self.cell(0, 6, "Musora Sentiment Analysis Report", align="L")
	self.cell(
	0, 6, f"Page {self.page_no()}", align="R",
	new_x="LMARGIN", new_y="NEXT",
	)
	self.set_draw_color(*self.PRIMARY)
	self.set_line_width(0.5)
	self.line(10, self.get_y(), 200, self.get_y())
	self.ln(4)

	def footer(self):
	self.set_y(-15)
	self.set_font("Helvetica", "I", 7)
	self.set_text_color(*self.GRAY)
	self.cell(
	0, 10,
	f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M')} \| Confidential",
	align="C",
	)

	# ------------------------------------------------------------------
	# Layout primitives
	# ------------------------------------------------------------------

	def check_page_break(self, needed_mm: float) -> None:
	"""Add a page break if less than needed_mm mm remain on the page."""
	if self.get_y() + needed_mm > self.h - 20:
	self.add_page()

	def section_header(self, title: str) -> None:
	"""Bold, brand-coloured section heading with an underline rule."""
	title = self._sanitize(title)
	self.check_page_break(20)
	self.ln(4)
	self.set_font("Helvetica", "B", 14)
	self.set_text_color(*self.PRIMARY)
	self.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT")
	self.set_draw_color(*self.PRIMARY)
	self.set_line_width(0.3)
	self.line(10, self.get_y(), 200, self.get_y())
	self.ln(3)
	self.set_text_color(0, 0, 0)

	def subsection_header(self, title: str) -> None:
	"""Lighter subsection heading."""
	title = self._sanitize(title)
	self.check_page_break(15)
	self.ln(2)
	self.set_font("Helvetica", "B", 11)
	self.set_text_color(60, 60, 60)
	self.cell(0, 8, title, new_x="LMARGIN", new_y="NEXT")
	self.ln(1)
	self.set_text_color(0, 0, 0)

	def section_description(self, text: str) -> None:
	"""Italicised description block beneath a section header."""
	text = self._sanitize(text)
	self.set_font("Helvetica", "I", 9)
	self.set_text_color(80, 80, 80)
	self.multi_cell(0, 5, text)
	self.ln(4)
	self.set_text_color(0, 0, 0)

	def body_text(self, text: str) -> None:
	"""Standard paragraph text."""
	text = self._sanitize(text)
	self.set_font("Helvetica", "", 9)
	self.set_text_color(50, 50, 50)
	self.multi_cell(0, 5, text)
	self.ln(2)
	self.set_text_color(0, 0, 0)

	def callout_box(
	self,
	text: str,
	bg_color: tuple = (240, 248, 255),
	border_color: tuple = None,
	) -> None:
	"""Lightly-coloured info/callout box with a left accent bar."""
	if border_color is None:
	border_color = self.PRIMARY
	text = self._sanitize(text)
	self.check_page_break(20)
	x, w = 10, 180
	approx_lines = max(2, len(text) // 90 + text.count("\n") + 1)
	h = approx_lines * 5 + 6
	y = self.get_y()
	self.set_fill_color(*bg_color)
	self.rect(x, y, w, h, style="F")
	self.set_fill_color(*border_color)
	self.rect(x, y, 3, h, style="F")
	self.set_font("Helvetica", "", 8.5)
	self.set_text_color(40, 40, 40)
	self.set_xy(x + 5, y + 3)
	self.multi_cell(w - 7, 4.5, text)
	self.set_y(y + h + 3)
	self.set_text_color(0, 0, 0)

	def metric_row(self, metrics: list) -> None:
	"""
	Horizontal row of metric tiles.

	Args:
	metrics: list of (label, value) tuples.
	"""
	self.check_page_break(18)
	n = len(metrics)
	if n == 0:
	return
	box_w = (190 - (n - 1) * 3) / n
	x0 = 10
	y = self.get_y()
	for i, (label, value) in enumerate(metrics):
	x = x0 + i * (box_w + 3)
	self.set_fill_color(245, 245, 245)
	self.rect(x, y, box_w, 14, style="F")
	self.set_xy(x, y + 1)
	self.set_font("Helvetica", "B", 10)
	self.set_text_color(*self.PRIMARY)
	self.cell(box_w, 6, self._sanitize(str(value)), align="C")
	self.set_xy(x, y + 7)
	self.set_font("Helvetica", "", 7)
	self.set_text_color(100, 100, 100)
	self.cell(box_w, 5, self._sanitize(str(label)), align="C")
	self.set_text_color(0, 0, 0)
	self.set_y(y + 16)

	def add_table(
	self,
	headers: list,
	rows: list,
	col_widths: list = None,
	) -> None:
	"""
	Styled data table with alternating row shading.

	Args:
	headers: Column header strings.
	rows: List of row tuples/lists.
	col_widths: Optional column widths in mm.
	"""
	self.check_page_break(10 + len(rows) * 6)
	n = len(headers)
	if col_widths is None:
	col_widths = [190 / n] * n
	# Header
	self.set_font("Helvetica", "B", 8)
	self.set_fill_color(*self.PRIMARY)
	self.set_text_color(*self.WHITE)
	for i, hdr in enumerate(headers):
	self.cell(col_widths[i], 7, self._sanitize(hdr), border=1, fill=True, align="C")
	self.ln()
	# Rows
	self.set_font("Helvetica", "", 8)
	self.set_text_color(0, 0, 0)
	for row_idx, row in enumerate(rows):
	self.set_fill_color(250, 250, 250) if row_idx % 2 == 0 else self.set_fill_color(*self.WHITE)
	for i, cell_val in enumerate(row):
	self.cell(col_widths[i], 6, self._sanitize(str(cell_val)), border=1, fill=True, align="C")
	self.ln()
	self.ln(2)


	# ---------------------------------------------------------------------------
	# Main exporter
	# ---------------------------------------------------------------------------

	class DashboardPDFExporter:
	"""
	Generates a comprehensive PDF report from the Musora Sentiment dashboard.

	Usage::

	exporter = DashboardPDFExporter()
	pdf_bytes = exporter.generate_report(filtered_df, filter_info)

	The filter_info dict (optional) maps human-readable filter names to their
	selected values and is shown on the cover page.
	"""

	# Kaleido scale factor: 3× ≈ 300 DPI at A4 print size.
	RENDER_SCALE = 3

	def __init__(self):
	self.sentiment_charts = SentimentCharts()
	self.distribution_charts = DistributionCharts()
	self.demographic_charts = DemographicCharts()
	self.processor = SentimentDataProcessor()
	self._temp_files: list[str] = []

	# ------------------------------------------------------------------
	# Public entry point
	# ------------------------------------------------------------------

	def generate_report(self, df, filter_info: dict = None) -> bytes:
	"""
	Build and return the full PDF report.

	Args:
	df: Filtered dashboard DataFrame.
	filter_info: Optional dict of active filter descriptions shown on
	the cover page, e.g. {"Platforms": ["facebook"],
	"Brands": ["drumeo"]}.

	Returns:
	bytes: Raw PDF file contents ready for st.download_button.
	"""
	self.pdf = MusoraPDF()
	try:
	self._add_cover_page(df, filter_info)
	self._add_executive_summary(df)
	self._add_sentiment_section(df)
	self._add_brand_section(df)
	self._add_platform_section(df)
	self._add_intent_section(df)
	self._add_cross_dimensional_section(df)
	self._add_volume_section(df)
	self._add_reply_requirements_section(df)
	if self._has_demographics(df):
	self._add_demographics_section(df)
	if "detected_language" in df.columns:
	self._add_language_section(df)
	self._add_data_summary(df, filter_info)

	return bytes(self.pdf.output())
	finally:
	self._cleanup_temp_files()

	# ------------------------------------------------------------------
	# Chart rendering helpers
	# ------------------------------------------------------------------

	def _prepare_fig_for_pdf(self, fig, is_side_by_side: bool = False) -> None:
	"""Apply white background, readable fonts, and automargin to a Plotly figure."""
	base_fs = 13 if is_side_by_side else 14
	fig.update_layout(
	paper_bgcolor="white",
	plot_bgcolor="white",
	font=dict(color="black", size=base_fs),
	title_font_size=base_fs + 4,
	margin=(
	dict(l=60, r=40, t=60, b=60)
	if is_side_by_side
	else dict(l=80, r=40, t=60, b=80)
	),
	)
	fig.update_xaxes(automargin=True)
	fig.update_yaxes(automargin=True)
	if fig.layout.showlegend is not False:
	fig.update_layout(legend_font_size=base_fs - 2)

	def _fig_to_temp_path(
	self, fig, width: int = 800, height: int = 400, is_side_by_side: bool = False
	) -> str:
	"""Render a Plotly figure to a temporary high-DPI PNG and return the path."""
	self._prepare_fig_for_pdf(fig, is_side_by_side=is_side_by_side)
	img_bytes = pio.to_image(
	fig,
	format="png",
	width=width,
	height=height,
	scale=self.RENDER_SCALE,
	engine="kaleido",
	)
	tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	tmp.write(img_bytes)
	tmp.close()
	self._temp_files.append(tmp.name)
	return tmp.name

	def _add_chart(self, fig, width: int = 180, img_width: int = 800, img_height: int = 400) -> None:
	"""Render one figure full-width on the current PDF page."""
	try:
	path = self._fig_to_temp_path(fig, img_width, img_height)
	h_mm = width * (img_height / img_width)
	self.pdf.check_page_break(h_mm + 5)
	self.pdf.image(path, x=10, w=width)
	self.pdf.ln(3)
	except Exception:
	logger.exception("Chart render failed")
	self.pdf.body_text("[Chart could not be rendered]")

	def _add_two_charts(self, fig1, fig2, width: int = 92) -> None:
	"""Render two figures side-by-side."""
	try:
	p1 = self._fig_to_temp_path(fig1, 700, 450, is_side_by_side=True)
	p2 = self._fig_to_temp_path(fig2, 700, 450, is_side_by_side=True)
	h_mm = width * (450 / 700)
	self.pdf.check_page_break(h_mm + 5)
	y = self.pdf.get_y()
	self.pdf.image(p1, x=10, y=y, w=width)
	self.pdf.image(p2, x=10 + width + 4, y=y, w=width)
	self.pdf.set_y(y + h_mm + 3)
	except Exception:
	logger.exception("Side-by-side chart render failed")
	self.pdf.body_text("[Charts could not be rendered]")

	def _cleanup_temp_files(self) -> None:
	for path in self._temp_files:
	try:
	os.unlink(path)
	except OSError:
	pass
	self._temp_files.clear()

	# ------------------------------------------------------------------
	# Data helpers
	# ------------------------------------------------------------------

	@staticmethod
	def _has_demographics(df) -> bool:
	return (
	"platform" in df.columns
	and "musora_app" in df["platform"].values
	and "age_group" in df.columns
	and "timezone" in df.columns
	and "experience_level" in df.columns
	)

	@staticmethod
	def _filter_summary(filter_info: dict) -> str:
	if not filter_info:
	return "No filters applied - showing all data."
	parts = []
	for key, value in filter_info.items():
	if value:
	display = (
	value if isinstance(value, str)
	else ", ".join(str(v) for v in value)
	)
	parts.append(f"{key}: {display}")
	return "; ".join(parts) if parts else "No filters applied."

	@staticmethod
	def _date_range_str(df) -> str:
	if "comment_timestamp" not in df.columns or df.empty:
	return "N/A"
	valid = df["comment_timestamp"].dropna()
	if valid.empty:
	return "N/A"
	return (
	f"{valid.min().strftime('%b %d, %Y')} to {valid.max().strftime('%b %d, %Y')}"
	)

	# ------------------------------------------------------------------
	# Report sections
	# ------------------------------------------------------------------

	def _add_cover_page(self, df, filter_info: dict) -> None:
	self.pdf.add_page()
	self.pdf.ln(40)

	r, g, b = MusoraPDF.PRIMARY
	self.pdf.set_fill_color(r, g, b)
	self.pdf.rect(0, 60, 210, 4, style="F")

	self.pdf.ln(20)
	self.pdf.set_font("Helvetica", "B", 28)
	self.pdf.set_text_color(r, g, b)
	self.pdf.cell(0, 15, "Musora", align="C", new_x="LMARGIN", new_y="NEXT")

	self.pdf.set_font("Helvetica", "", 16)
	self.pdf.set_text_color(80, 80, 80)
	self.pdf.cell(
	0, 10, "Sentiment Analysis Report",
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	self.pdf.ln(10)
	self.pdf.set_draw_color(r, g, b)
	self.pdf.set_line_width(0.5)
	self.pdf.line(60, self.pdf.get_y(), 150, self.pdf.get_y())
	self.pdf.ln(10)

	self.pdf.set_font("Helvetica", "", 12)
	self.pdf.set_text_color(100, 100, 100)
	self.pdf.cell(
	0, 8,
	f"Generated: {datetime.now().strftime('%B %d, %Y at %H:%M')}",
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	self.pdf.ln(5)
	self.pdf.set_font("Helvetica", "", 10)
	self.pdf.cell(
	0, 7,
	f"Total Comments Analyzed: {len(df):,}",
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	date_str = self._date_range_str(df)
	if date_str != "N/A":
	self.pdf.ln(3)
	self.pdf.set_font("Helvetica", "I", 9)
	self.pdf.set_text_color(120, 120, 120)
	self.pdf.cell(
	0, 6,
	MusoraPDF._sanitize(f"Data period: {date_str}"),
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	if filter_info:
	self.pdf.ln(8)
	self.pdf.set_font("Helvetica", "B", 9)
	self.pdf.set_text_color(80, 80, 80)
	self.pdf.cell(0, 6, "Active Filters:", align="C", new_x="LMARGIN", new_y="NEXT")
	self.pdf.set_font("Helvetica", "", 9)
	for key, value in filter_info.items():
	if value:
	display = (
	value if isinstance(value, str)
	else ", ".join(str(v) for v in value)
	)
	self.pdf.cell(
	0, 5,
	MusoraPDF._sanitize(f"{key}: {display}"),
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	self.pdf.ln(20)
	self.pdf.set_font("Helvetica", "I", 8)
	self.pdf.set_text_color(150, 150, 150)
	self.pdf.cell(
	0, 6, "Confidential - For Internal Use Only",
	align="C", new_x="LMARGIN", new_y="NEXT",
	)
	self.pdf.cell(
	0, 6, "Data Source: Snowflake \| Musora Sentiment Pipeline",
	align="C", new_x="LMARGIN", new_y="NEXT",
	)

	def _add_executive_summary(self, df) -> None:
	self.pdf.add_page()
	self.pdf.section_header("Executive Summary")
	self.pdf.section_description(_DESCRIPTIONS["executive_summary"])

	metrics = SentimentMetrics.calculate_overall_metrics(df)
	normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100

	# Health label
	neg_pct = metrics["negative_pct"]
	health = "Healthy" if neg_pct < 20 else ("Moderate" if neg_pct < 35 else "Needs Attention")

	# Opening narrative
	brands = sorted(df["brand"].dropna().unique().tolist()) if "brand" in df.columns else []
	platforms = sorted(df["platform"].dropna().unique().tolist()) if "platform" in df.columns else []
	brands_str = ", ".join(str(b).title() for b in brands[:6]) if brands else "all brands"
	platforms_str = ", ".join(str(p).title() for p in platforms[:6]) if platforms else "all platforms"

	narrative = (
	f"This report analyzes {metrics['total_comments']:,} comments across {brands_str} "
	f"on {platforms_str}. "
	f"Overall sentiment is {metrics['positive_pct']:.1f}% positive and "
	f"{metrics['negative_pct']:.1f}% negative, "
	f"with {metrics['reply_required_pct']:.1f}% of comments requiring a reply."
	)
	self.pdf.body_text(narrative)

	# Health status
	r, g, b = MusoraPDF.PRIMARY
	self.pdf.set_font("Helvetica", "B", 11)
	self.pdf.set_text_color(r, g, b)
	self.pdf.cell(
	0, 8, f"Overall Sentiment Health: {health}",
	new_x="LMARGIN", new_y="NEXT",
	)
	self.pdf.ln(2)
	self.pdf.set_text_color(0, 0, 0)

	# Metric tiles — two rows
	self.pdf.metric_row([
	("Total Comments", f"{metrics['total_comments']:,}"),
	("Positive %", f"{metrics['positive_pct']:.1f}%"),
	("Negative %", f"{metrics['negative_pct']:.1f}%"),
	("Sentiment Score", f"{normalized_score:.0f}/100"),
	])
	self.pdf.metric_row([
	("Reply Required", f"{metrics['total_reply_required']:,}"),
	("Reply Rate %", f"{metrics['reply_required_pct']:.1f}%"),
	("Brands Analyzed", str(len(brands))),
	("Platforms Analyzed", str(len(platforms))),
	])

	# Score explanation
	self.pdf.ln(2)
	self.pdf.callout_box(
	"How to read the Sentiment Score:\n"
	"Each comment is rated Very Positive (+2), Positive (+1), Neutral (0), "
	"Negative (-1), or Very Negative (-2). "
	"The Score (0-100) converts the average: 50 = perfectly neutral, "
	"above 60 = primarily positive, below 40 = primarily negative.",
	)

	# Key findings
	self.pdf.subsection_header("Key Findings")
	for finding in self._generate_key_findings(df, metrics):
	self.pdf.body_text(f" * {finding}")

	def _generate_key_findings(self, df, metrics: dict) -> list:
	findings = []

	# Sentiment summary
	if metrics["positive_pct"] > 50:
	findings.append(
	f"Sentiment is predominantly positive at {metrics['positive_pct']:.1f}%."
	)
	elif metrics["negative_pct"] > 30:
	findings.append(
	f"Negative sentiment is elevated at {metrics['negative_pct']:.1f}% - "
	f"consider targeted community management."
	)
	else:
	findings.append(
	f"Sentiment is balanced: {metrics['positive_pct']:.1f}% positive, "
	f"{metrics['negative_pct']:.1f}% negative."
	)

	# Top brand by volume
	if "brand" in df.columns and not df.empty:
	top_brand = df["brand"].value_counts().index[0]
	top_count = df["brand"].value_counts().iloc[0]
	findings.append(
	f"Most discussed brand: {str(top_brand).title()} "
	f"({top_count:,} comments, {top_count / len(df) * 100:.1f}% of total)."
	)

	# Reply urgency
	if metrics["reply_required_pct"] > 10:
	findings.append(
	f"{metrics['total_reply_required']:,} comments "
	f"({metrics['reply_required_pct']:.1f}%) require a reply."
	)

	# Top platform by volume
	if "platform" in df.columns and not df.empty:
	top_platform = df["platform"].value_counts().index[0]
	plat_count = df["platform"].value_counts().iloc[0]
	findings.append(
	f"Most active platform: {str(top_platform).title()} "
	f"({plat_count:,} comments)."
	)

	return findings[:4]

	def _add_sentiment_section(self, df) -> None:
	self.pdf.add_page()
	self.pdf.section_header("Sentiment Distribution")
	self.pdf.section_description(_DESCRIPTIONS["sentiment"])

	metrics = SentimentMetrics.calculate_overall_metrics(df)
	normalized_score = ((metrics["avg_sentiment_score"] + 2) / 4) * 100

	pie = self.sentiment_charts.create_sentiment_pie_chart(df, title="Sentiment Distribution")
	gauge = self.sentiment_charts.create_sentiment_score_gauge(
	metrics["avg_sentiment_score"], title="Overall Sentiment Score"
	)
	self._add_two_charts(pie, gauge)

	self.pdf.body_text(
	f"Across {metrics['total_comments']:,} analyzed comments: "
	f"{metrics['positive_pct']:.1f}% positive, "
	f"{100 - metrics['positive_pct'] - metrics['negative_pct']:.1f}% neutral, "
	f"{metrics['negative_pct']:.1f}% negative. "
	f"Sentiment Score: {normalized_score:.0f}/100 "
	f"(raw average: {metrics['avg_sentiment_score']:.2f} on a -2 to +2 scale)."
	)

	def _add_brand_section(self, df) -> None:
	if "brand" not in df.columns or df["brand"].nunique() == 0:
	return

	self.pdf.add_page()
	self.pdf.section_header("Sentiment by Brand")
	self.pdf.section_description(_DESCRIPTIONS["brand"])

	bar = self.sentiment_charts.create_sentiment_bar_chart(
	df, group_by="brand", title="Sentiment Distribution by Brand"
	)
	pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
	df, group_by="brand", title="Sentiment by Brand (%)"
	)
	self._add_two_charts(bar, pct)

	# Summary table
	brand_metrics = SentimentMetrics.calculate_brand_metrics(df)
	rows = []
	for brand, m in sorted(brand_metrics.items()):
	score = ((m["avg_sentiment_score"] + 2) / 4) * 100
	rows.append((
	str(brand).title(),
	f"{m['total_comments']:,}",
	f"{m['positive_pct']:.1f}%",
	f"{m['negative_pct']:.1f}%",
	f"{m['reply_required_pct']:.1f}%",
	f"{score:.0f}/100",
	))
	self.pdf.subsection_header("Brand Metrics Summary")
	self.pdf.add_table(
	headers=["Brand", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
	rows=rows,
	col_widths=[38, 32, 30, 30, 30, 30],
	)

	def _add_platform_section(self, df) -> None:
	if "platform" not in df.columns or df["platform"].nunique() == 0:
	return

	self.pdf.add_page()
	self.pdf.section_header("Sentiment by Platform")
	self.pdf.section_description(_DESCRIPTIONS["platform"])

	bar = self.sentiment_charts.create_sentiment_bar_chart(
	df, group_by="platform", title="Sentiment Distribution by Platform"
	)
	pct = self.sentiment_charts.create_sentiment_percentage_bar_chart(
	df, group_by="platform", title="Sentiment by Platform (%)"
	)
	self._add_two_charts(bar, pct)

	# Summary table
	platform_metrics = SentimentMetrics.calculate_platform_metrics(df)
	rows = []
	for platform, m in sorted(platform_metrics.items()):
	score = ((m["avg_sentiment_score"] + 2) / 4) * 100
	rows.append((
	str(platform).title(),
	f"{m['total_comments']:,}",
	f"{m['positive_pct']:.1f}%",
	f"{m['negative_pct']:.1f}%",
	f"{m['reply_required_pct']:.1f}%",
	f"{score:.0f}/100",
	))
	self.pdf.subsection_header("Platform Metrics Summary")
	self.pdf.add_table(
	headers=["Platform", "Comments", "Positive %", "Negative %", "Reply Rate", "Score"],
	rows=rows,
	col_widths=[38, 32, 30, 30, 30, 30],
	)

	def _add_intent_section(self, df) -> None:
	if "intent" not in df.columns:
	return

	self.pdf.add_page()
	self.pdf.section_header("Intent Analysis")
	self.pdf.section_description(_DESCRIPTIONS["intent"])

	intent_bar = self.distribution_charts.create_intent_bar_chart(
	df, title="Intent Distribution", orientation="h"
	)
	intent_pie = self.distribution_charts.create_intent_pie_chart(
	df, title="Intent Distribution"
	)
	self._add_two_charts(intent_bar, intent_pie)

	def _add_cross_dimensional_section(self, df) -> None:
	if "brand" not in df.columns or "platform" not in df.columns:
	return

	self.pdf.add_page()
	self.pdf.section_header("Cross-Dimensional Analysis")
	self.pdf.section_description(_DESCRIPTIONS["cross_dimensional"])

	matrix = self.distribution_charts.create_brand_platform_matrix(
	df, title="Brand-Platform Comment Matrix"
	)
	heatmap = self.sentiment_charts.create_sentiment_heatmap(
	df,
	row_dimension="brand",
	col_dimension="platform",
	title="Negative Sentiment Heatmap",
	)
	self._add_two_charts(matrix, heatmap)

	def _add_volume_section(self, df) -> None:
	has_platform = "platform" in df.columns
	has_brand = "brand" in df.columns
	if not has_platform and not has_brand:
	return

	self.pdf.add_page()
	self.pdf.section_header("Volume Analysis")
	self.pdf.section_description(_DESCRIPTIONS["volume"])

	if has_platform and has_brand:
	platform_dist = self.distribution_charts.create_platform_distribution(
	df, title="Comments by Platform"
	)
	brand_dist = self.distribution_charts.create_brand_distribution(
	df, title="Comments by Brand"
	)
	self._add_two_charts(platform_dist, brand_dist)
	elif has_platform:
	self._add_chart(
	self.distribution_charts.create_platform_distribution(df, title="Comments by Platform")
	)
	else:
	self._add_chart(
	self.distribution_charts.create_brand_distribution(df, title="Comments by Brand")
	)

	def _add_reply_requirements_section(self, df) -> None:
	if "requires_reply" not in df.columns:
	return

	self.pdf.add_page()
	self.pdf.section_header("Reply Requirements Analysis")
	self.pdf.section_description(_DESCRIPTIONS["reply_requirements"])

	urgency = SentimentMetrics.calculate_response_urgency(df)
	self.pdf.metric_row([
	("Urgent", str(urgency["urgent_count"])),
	("High Priority", str(urgency["high_priority_count"])),
	("Medium Priority", str(urgency["medium_priority_count"])),
	("Low Priority", str(urgency["low_priority_count"])),
	])
	self.pdf.ln(3)

	has_brand = "brand" in df.columns
	has_platform = "platform" in df.columns
	if has_brand and has_platform:
	reply_brand = self.distribution_charts.create_reply_required_chart(
	df, group_by="brand", title="Comments Requiring Reply by Brand"
	)
	reply_platform = self.distribution_charts.create_reply_required_chart(
	df, group_by="platform", title="Comments Requiring Reply by Platform"
	)
	self._add_two_charts(reply_brand, reply_platform)
	elif has_brand:
	self._add_chart(
	self.distribution_charts.create_reply_required_chart(
	df, group_by="brand", title="Comments Requiring Reply by Brand"
	)
	)

	def _add_demographics_section(self, df) -> None:
	df_musora = df[df["platform"] == "musora_app"].copy()
	if df_musora.empty:
	return

	self.pdf.add_page()
	self.pdf.section_header("Demographics Analysis (Musora App)")
	self.pdf.section_description(_DESCRIPTIONS["demographics"])
	self.pdf.body_text(f"Analyzing demographics for {len(df_musora):,} Musora App comments.")

	# Age
	age_dist = self.processor.get_demographics_distribution(df_musora, "age_group")
	if not age_dist.empty:
	self.pdf.subsection_header("Age Distribution")
	self._add_chart(
	self.demographic_charts.create_age_distribution_chart(
	age_dist, title="Comments by Age Group"
	),
	img_height=350,
	)

	# Region
	region_dist = self.processor.get_timezone_regions_distribution(df_musora)
	if not region_dist.empty:
	self.pdf.subsection_header("Geographic Distribution")
	self._add_chart(
	self.demographic_charts.create_region_distribution_chart(
	region_dist, title="Comments by Region"
	),
	img_height=350,
	)

	# Experience
	exp_dist = self.processor.get_experience_level_distribution(df_musora, use_groups=True)
	if not exp_dist.empty:
	self.pdf.subsection_header("Experience Level Distribution")
	self._add_chart(
	self.demographic_charts.create_experience_distribution_chart(
	exp_dist, title="Comments by Experience Group", use_groups=True
	),
	img_height=350,
	)

	def _add_language_section(self, df) -> None:
	self.pdf.add_page()
	self.pdf.section_header("Language Distribution")
	self.pdf.section_description(_DESCRIPTIONS["language"])
	self._add_chart(
	self.distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
	)

	def _add_data_summary(self, df, filter_info: dict) -> None:
	self.pdf.add_page()
	self.pdf.section_header("Data Summary")

	self.pdf.body_text(
	f"Report generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
	)
	self.pdf.body_text(f"Total records in report: {len(df):,}")

	date_str = self._date_range_str(df)
	if date_str != "N/A":
	self.pdf.body_text(f"Data range: {date_str}")

	self.pdf.body_text(f"Active filters: {self._filter_summary(filter_info)}")

	if "brand" in df.columns:
	brands = sorted(str(b).title() for b in df["brand"].dropna().unique())
	self.pdf.body_text(f"Brands included: {', '.join(brands)}")

	if "platform" in df.columns:
	platforms = sorted(str(p).title() for p in df["platform"].dropna().unique())
	self.pdf.body_text(f"Platforms included: {', '.join(platforms)}")

	self.pdf.ln(5)
	self.pdf.callout_box(
	"Data source: Snowflake - SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES "
	"and SOCIAL_MEDIA_DB.ML_FEATURES.MUSORA_COMMENT_SENTIMENT_FEATURES.\n"
	"This report is confidential and intended for internal Musora team use only.",
	bg_color=(245, 245, 245),
	)