Danialebrat's picture
Deploying sentiment analysis project
9858829
"""
Main Dashboard Page
Displays overall sentiment distributions by brand and platform
"""
import streamlit as st
import sys
from pathlib import Path
# Add parent directory to path
parent_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(parent_dir))
from utils.data_processor import SentimentDataProcessor
from utils.metrics import SentimentMetrics
from utils.pdf_exporter import DashboardPDFExporter
from visualizations.sentiment_charts import SentimentCharts
from visualizations.distribution_charts import DistributionCharts
from visualizations.demographic_charts import DemographicCharts
from visualizations.content_cards import ContentCards
def render_dashboard(df):
"""
Render the main dashboard page
Args:
df: Sentiment dataframe
"""
st.title("πŸ“Š Sentiment Analysis Dashboard")
# ── PDF Report ────────────────────────────────────────────────────────────
with st.expander("πŸ“„ Export PDF Report", expanded=False):
st.markdown(
"Generate a comprehensive PDF report of the current dashboard view. "
"The report includes all charts, metrics, and a data summary. "
"Active global filters are reflected in the report."
)
if st.button("Generate PDF Report", type="primary", use_container_width=True):
with st.spinner("Generating PDF report β€” this may take 30–60 seconds…"):
try:
# Build a human-readable description of active filters
global_filters = st.session_state.get("global_filters", {})
filter_info = {}
if global_filters.get("platforms"):
filter_info["Platforms"] = global_filters["platforms"]
if global_filters.get("brands"):
filter_info["Brands"] = global_filters["brands"]
if global_filters.get("sentiments"):
filter_info["Sentiments"] = global_filters["sentiments"]
if global_filters.get("date_range"):
dr = global_filters["date_range"]
filter_info["Date Range"] = f"{dr[0]} to {dr[1]}"
exporter = DashboardPDFExporter()
pdf_bytes = exporter.generate_report(df, filter_info or None)
filename = (
f"musora_sentiment_report_"
f"{__import__('datetime').datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
)
st.success("Report generated successfully!")
st.download_button(
label="Download PDF Report",
data=pdf_bytes,
file_name=filename,
mime="application/pdf",
use_container_width=True,
)
except Exception as e:
st.error(f"Failed to generate report: {e}")
st.exception(e)
st.markdown("---")
# Performance tip
if len(df) > 10000:
st.info(f"πŸ’‘ **Performance Tip**: Loaded {len(df):,} comments. Use the global filters in the sidebar to narrow down your analysis for faster performance.")
st.markdown("---")
# Initialize components
sentiment_charts = SentimentCharts()
distribution_charts = DistributionCharts()
processor = SentimentDataProcessor()
# Display overall summary statistics
ContentCards.display_summary_stats(df)
st.markdown("---")
# Calculate overall metrics
overall_metrics = SentimentMetrics.calculate_overall_metrics(df)
# Display health indicator
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
ContentCards.display_health_indicator(overall_metrics['negative_pct'])
st.markdown("---")
# Overall sentiment distribution
st.markdown("## 🎯 Overall Sentiment Distribution")
col1, col2 = st.columns(2)
with col1:
# Sentiment pie chart
sentiment_pie = sentiment_charts.create_sentiment_pie_chart(df, title="Overall Sentiment Distribution")
st.plotly_chart(sentiment_pie, use_container_width=True)
with col2:
# Sentiment score gauge
sentiment_gauge = sentiment_charts.create_sentiment_score_gauge(
overall_metrics['avg_sentiment_score'],
title="Overall Sentiment Score"
)
st.plotly_chart(sentiment_gauge, use_container_width=True)
# Additional metrics
metric_col1, metric_col2 = st.columns(2)
with metric_col1:
st.metric("Positive %", f"{overall_metrics['positive_pct']:.1f}%")
with metric_col2:
st.metric("Reply Rate %", f"{overall_metrics['reply_required_pct']:.1f}%")
st.markdown("---")
# Sentiment by Brand
st.markdown("## 🏒 Sentiment Analysis by Brand")
col1, col2 = st.columns(2)
with col1:
# Stacked bar chart
brand_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
df, group_by='brand', title="Sentiment Distribution by Brand"
)
st.plotly_chart(brand_sentiment_bar, use_container_width=True)
with col2:
# Percentage bar chart
brand_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
df, group_by='brand', title="Sentiment Distribution by Brand (%)"
)
st.plotly_chart(brand_sentiment_pct, use_container_width=True)
# Brand metrics table
with st.expander("πŸ“ˆ Detailed Brand Metrics"):
brand_metrics = SentimentMetrics.calculate_brand_metrics(df)
brand_data = []
for brand, metrics in brand_metrics.items():
brand_data.append({
'Brand': brand.title(),
'Total Comments': metrics['total_comments'],
'Replies Needed': metrics['total_reply_required'],
'Negative %': f"{metrics['negative_pct']:.1f}%",
'Positive %': f"{metrics['positive_pct']:.1f}%",
'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
})
st.table(brand_data)
st.markdown("---")
# Sentiment by Platform
st.markdown("## 🌐 Sentiment Analysis by Platform")
col1, col2 = st.columns(2)
with col1:
# Stacked bar chart
platform_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
df, group_by='platform', title="Sentiment Distribution by Platform"
)
st.plotly_chart(platform_sentiment_bar, use_container_width=True)
with col2:
# Percentage bar chart
platform_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
df, group_by='platform', title="Sentiment Distribution by Platform (%)"
)
st.plotly_chart(platform_sentiment_pct, use_container_width=True)
# Platform metrics table
with st.expander("πŸ“ˆ Detailed Platform Metrics"):
platform_metrics = SentimentMetrics.calculate_platform_metrics(df)
platform_data = []
for platform, metrics in platform_metrics.items():
platform_data.append({
'Platform': platform.title(),
'Total Comments': metrics['total_comments'],
'Replies Needed': metrics['total_reply_required'],
'Negative %': f"{metrics['negative_pct']:.1f}%",
'Positive %': f"{metrics['positive_pct']:.1f}%",
'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
})
st.table(platform_data)
st.markdown("---")
# Intent Analysis
st.markdown("## 🎭 Intent Analysis")
col1, col2 = st.columns(2)
with col1:
# Intent bar chart
intent_bar = distribution_charts.create_intent_bar_chart(
df, title="Intent Distribution", orientation='h'
)
st.plotly_chart(intent_bar, use_container_width=True)
with col2:
# Intent pie chart
intent_pie = distribution_charts.create_intent_pie_chart(df, title="Intent Distribution")
st.plotly_chart(intent_pie, use_container_width=True)
st.markdown("---")
# Brand-Platform Matrix
st.markdown("## πŸ”€ Cross-Dimensional Analysis")
col1, col2 = st.columns(2)
with col1:
# Heatmap showing comment distribution
brand_platform_matrix = distribution_charts.create_brand_platform_matrix(
df, title="Brand-Platform Comment Matrix"
)
st.plotly_chart(brand_platform_matrix, use_container_width=True)
with col2:
# Sentiment heatmap
sentiment_heatmap = sentiment_charts.create_sentiment_heatmap(
df, row_dimension='brand', col_dimension='platform', title="Negative Sentiment Heatmap"
)
st.plotly_chart(sentiment_heatmap, use_container_width=True)
st.markdown("---")
# Platform and Brand Distribution
st.markdown("## πŸ“Š Volume Analysis")
col1, col2 = st.columns(2)
with col1:
# Platform distribution
platform_dist = distribution_charts.create_platform_distribution(df, title="Comments by Platform")
st.plotly_chart(platform_dist, use_container_width=True)
with col2:
# Brand distribution
brand_dist = distribution_charts.create_brand_distribution(df, title="Comments by Brand")
st.plotly_chart(brand_dist, use_container_width=True)
st.markdown("---")
# Reply Requirements
st.markdown("## ⚠️ Reply Requirements Analysis")
col1, col2 = st.columns(2)
with col1:
# Reply required by brand
reply_brand = distribution_charts.create_reply_required_chart(
df, group_by='brand', title="Comments Requiring Reply by Brand"
)
st.plotly_chart(reply_brand, use_container_width=True)
with col2:
# Reply required by platform
reply_platform = distribution_charts.create_reply_required_chart(
df, group_by='platform', title="Comments Requiring Reply by Platform"
)
st.plotly_chart(reply_platform, use_container_width=True)
# Response urgency metrics
urgency_metrics = SentimentMetrics.calculate_response_urgency(df)
st.markdown("### 🚨 Response Urgency Breakdown")
urgency_col1, urgency_col2, urgency_col3, urgency_col4 = st.columns(4)
with urgency_col1:
st.metric("πŸ”΄ Urgent", urgency_metrics['urgent_count'], help="Negative sentiment + requires reply")
with urgency_col2:
st.metric("🟠 High Priority", urgency_metrics['high_priority_count'], help="Neutral with feedback/request")
with urgency_col3:
st.metric("🟑 Medium Priority", urgency_metrics['medium_priority_count'], help="Positive requiring reply")
with urgency_col4:
st.metric("🟒 Low Priority", urgency_metrics['low_priority_count'], help="Very positive requiring reply")
st.markdown("---")
st.markdown("---")
# Demographics Analysis (for musora_app only)
# Check if we have musora_app data and demographic fields
has_musora_app = 'platform' in df.columns and 'musora_app' in df['platform'].values
has_demographics = (
has_musora_app and
'age_group' in df.columns and
'timezone' in df.columns and
'experience_level' in df.columns
)
if has_demographics:
# Filter for musora_app data only
df_musora = df[df['platform'] == 'musora_app'].copy()
# Check if we have any demographic data (not all Unknown)
has_valid_demographics = (
(df_musora['age_group'] != 'Unknown').any() or
(df_musora['timezone_region'] != 'Unknown').any() or
(df_musora['experience_group'] != 'Unknown').any()
)
if has_valid_demographics and len(df_musora) > 0:
st.markdown("## πŸ‘₯ Demographics Analysis (Musora App)")
st.info(f"πŸ“Š Analyzing demographics for **{len(df_musora):,}** Musora App comments")
# Initialize demographic charts
demographic_charts = DemographicCharts()
# Get demographic summary
demo_summary = processor.get_demographics_summary(df_musora)
# Display summary metrics
demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)
with demo_col1:
st.metric(
"Comments with Demographics",
f"{demo_summary['users_with_demographics']:,}",
f"{demo_summary['coverage_percentage']:.1f}% coverage"
)
with demo_col2:
if demo_summary['avg_age'] is not None:
st.metric("Average Age", f"{demo_summary['avg_age']:.1f} years")
else:
st.metric("Average Age", "N/A")
with demo_col3:
st.metric("Most Common Region", demo_summary['most_common_region'])
with demo_col4:
if demo_summary['avg_experience'] is not None:
st.metric("Avg Experience", f"{demo_summary['avg_experience']:.1f}/10")
else:
st.metric("Avg Experience", "N/A")
st.markdown("---")
# Age Analysis
st.markdown("### πŸŽ‚ Age Distribution")
age_dist = processor.get_demographics_distribution(df_musora, 'age_group')
age_sentiment = processor.get_demographics_by_sentiment(df_musora, 'age_group')
if not age_dist.empty:
col1, col2 = st.columns(2)
with col1:
age_chart = demographic_charts.create_age_distribution_chart(
age_dist,
title="Comments by Age Group"
)
st.plotly_chart(age_chart, use_container_width=True)
with col2:
age_sent_chart = demographic_charts.create_age_sentiment_chart(
age_sentiment,
title="Sentiment Distribution by Age Group"
)
st.plotly_chart(age_sent_chart, use_container_width=True)
# Insights
with st.expander("πŸ’‘ Age Insights"):
if len(age_dist) > 0:
top_age_group = age_dist.iloc[0]['age_group']
top_age_count = age_dist.iloc[0]['count']
top_age_pct = age_dist.iloc[0]['percentage']
st.write(f"**Most Active Age Group:** {top_age_group} ({top_age_count:,} comments, {top_age_pct:.1f}%)")
# Find age group with most negative sentiment
if not age_sentiment.empty:
negative_sentiments = age_sentiment[
age_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
].groupby('age_group')['percentage'].sum().reset_index()
if len(negative_sentiments) > 0:
negative_sentiments = negative_sentiments.sort_values('percentage', ascending=False)
most_negative_age = negative_sentiments.iloc[0]['age_group']
most_negative_pct = negative_sentiments.iloc[0]['percentage']
st.write(f"**Highest Negative Sentiment:** {most_negative_age} ({most_negative_pct:.1f}% negative)")
else:
st.info("No age data available for visualization")
st.markdown("---")
# Timezone Analysis
st.markdown("### 🌍 Geographic Distribution")
# Get timezone data
top_timezones = processor.get_top_timezones(df_musora, top_n=15)
region_dist = processor.get_timezone_regions_distribution(df_musora)
region_sentiment = processor.get_demographics_by_sentiment(df_musora, 'timezone_region')
if not top_timezones.empty or not region_dist.empty:
# Top timezones
if not top_timezones.empty:
st.markdown("#### Top 15 Timezones")
timezone_chart = demographic_charts.create_timezone_chart(
top_timezones,
title="Most Common Timezones",
top_n=15
)
st.plotly_chart(timezone_chart, use_container_width=True)
# Regional distribution
if not region_dist.empty:
st.markdown("#### Regional Distribution")
col1, col2 = st.columns(2)
with col1:
region_chart = demographic_charts.create_region_distribution_chart(
region_dist,
title="Comments by Region"
)
st.plotly_chart(region_chart, use_container_width=True)
with col2:
if not region_sentiment.empty:
region_sent_chart = demographic_charts.create_region_sentiment_chart(
region_sentiment,
title="Sentiment Distribution by Region"
)
st.plotly_chart(region_sent_chart, use_container_width=True)
# Insights
with st.expander("πŸ’‘ Geographic Insights"):
if not top_timezones.empty:
top_tz = top_timezones.iloc[0]['timezone']
top_tz_count = top_timezones.iloc[0]['count']
top_tz_pct = top_timezones.iloc[0]['percentage']
st.write(f"**Most Common Timezone:** {top_tz} ({top_tz_count:,} comments, {top_tz_pct:.1f}%)")
if not region_dist.empty:
top_region = region_dist.iloc[0]['timezone_region']
top_region_count = region_dist.iloc[0]['count']
top_region_pct = region_dist.iloc[0]['percentage']
st.write(f"**Most Active Region:** {top_region} ({top_region_count:,} comments, {top_region_pct:.1f}%)")
# Find region with most negative sentiment
if not region_sentiment.empty:
negative_regions = region_sentiment[
region_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
].groupby('timezone_region')['percentage'].sum().reset_index()
if len(negative_regions) > 0:
negative_regions = negative_regions.sort_values('percentage', ascending=False)
most_negative_region = negative_regions.iloc[0]['timezone_region']
most_negative_region_pct = negative_regions.iloc[0]['percentage']
st.write(f"**Highest Negative Sentiment:** {most_negative_region} ({most_negative_region_pct:.1f}% negative)")
else:
st.info("No timezone/region data available for visualization")
st.markdown("---")
# Experience Level Analysis
st.markdown("### 🎯 Experience Level Distribution")
# Get both detailed and grouped experience data
exp_dist_detailed = processor.get_experience_level_distribution(df_musora, use_groups=False)
exp_dist_grouped = processor.get_experience_level_distribution(df_musora, use_groups=True)
exp_sentiment_grouped = processor.get_demographics_by_sentiment(df_musora, 'experience_group')
if not exp_dist_detailed.empty or not exp_dist_grouped.empty:
# Tabs for detailed vs grouped view
tab1, tab2 = st.tabs(["πŸ“Š Detailed (0-10)", "πŸ“Š Grouped (Beginner/Intermediate/Advanced)"])
with tab1:
if not exp_dist_detailed.empty:
exp_chart_detailed = demographic_charts.create_experience_distribution_chart(
exp_dist_detailed,
title="Comments by Experience Level (0-10 Scale)",
use_groups=False
)
st.plotly_chart(exp_chart_detailed, use_container_width=True)
else:
st.info("No detailed experience level data available")
with tab2:
if not exp_dist_grouped.empty:
col1, col2 = st.columns(2)
with col1:
exp_chart_grouped = demographic_charts.create_experience_distribution_chart(
exp_dist_grouped,
title="Comments by Experience Group",
use_groups=True
)
st.plotly_chart(exp_chart_grouped, use_container_width=True)
with col2:
if not exp_sentiment_grouped.empty:
exp_sent_chart = demographic_charts.create_experience_sentiment_chart(
exp_sentiment_grouped,
title="Sentiment by Experience Group",
use_groups=True
)
st.plotly_chart(exp_sent_chart, use_container_width=True)
else:
st.info("No grouped experience level data available")
# Insights
with st.expander("πŸ’‘ Experience Insights"):
if not exp_dist_grouped.empty:
top_exp_group = exp_dist_grouped.iloc[0]['experience_group']
top_exp_count = exp_dist_grouped.iloc[0]['count']
top_exp_pct = exp_dist_grouped.iloc[0]['percentage']
st.write(f"**Most Active Group:** {top_exp_group} ({top_exp_count:,} comments, {top_exp_pct:.1f}%)")
# Find experience group with most negative sentiment
if not exp_sentiment_grouped.empty:
negative_exp = exp_sentiment_grouped[
exp_sentiment_grouped['sentiment_polarity'].isin(['negative', 'very_negative'])
].groupby('experience_group')['percentage'].sum().reset_index()
if len(negative_exp) > 0:
negative_exp = negative_exp.sort_values('percentage', ascending=False)
most_negative_exp = negative_exp.iloc[0]['experience_group']
most_negative_exp_pct = negative_exp.iloc[0]['percentage']
st.write(f"**Highest Negative Sentiment:** {most_negative_exp} ({most_negative_exp_pct:.1f}% negative)")
if demo_summary['avg_experience'] is not None:
st.write(f"**Average Experience Level:** {demo_summary['avg_experience']:.2f}/10")
st.write(f"**Most Common Experience Group:** {demo_summary.get('most_common_experience', 'Unknown')}")
else:
st.info("No experience level data available for visualization")
st.markdown("---")
# Language Distribution (if available)
if 'detected_language' in df.columns:
st.markdown("## 🌍 Language Distribution")
lang_dist = distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
st.plotly_chart(lang_dist, use_container_width=True)
st.markdown("---")
# Temporal trends (if timestamp available)
if 'comment_timestamp' in df.columns and not df.empty:
with st.expander("πŸ“ˆ Temporal Trends", expanded=False):
# Frequency selector
freq_col1, freq_col2 = st.columns([1, 3])
with freq_col1:
freq = st.selectbox(
"Time Granularity",
options=['D', 'W', 'M'],
format_func=lambda x: {'D': 'Daily', 'W': 'Weekly', 'M': 'Monthly'}[x],
index=1 # Default to Weekly
)
sentiment_timeline = sentiment_charts.create_sentiment_timeline(df, freq=freq, title="Sentiment Trends Over Time")
st.plotly_chart(sentiment_timeline, use_container_width=True)
# Hierarchical sunburst
with st.expander("🌟 Hierarchical View", expanded=False):
st.markdown("**Interactive Brand > Platform > Sentiment Distribution**")
sunburst = distribution_charts.create_combined_distribution_sunburst(
df, title="Brand > Platform > Sentiment Distribution"
)
st.plotly_chart(sunburst, use_container_width=True)