Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Running

App Files Files Community

Sentiment_analysis / visualization /components /dashboard.py

Danialebrat

Deploying sentiment analysis project

9858829 13 days ago

raw

history blame contribute delete

25.3 kB

	"""
	Main Dashboard Page
	Displays overall sentiment distributions by brand and platform
	"""
	import streamlit as st
	import sys
	from pathlib import Path

	# Add parent directory to path
	parent_dir = Path(__file__).resolve().parent.parent
	sys.path.append(str(parent_dir))

	from utils.data_processor import SentimentDataProcessor
	from utils.metrics import SentimentMetrics
	from utils.pdf_exporter import DashboardPDFExporter
	from visualizations.sentiment_charts import SentimentCharts
	from visualizations.distribution_charts import DistributionCharts
	from visualizations.demographic_charts import DemographicCharts
	from visualizations.content_cards import ContentCards


	def render_dashboard(df):
	"""
	Render the main dashboard page

	Args:
	df: Sentiment dataframe
	"""
	st.title("📊 Sentiment Analysis Dashboard")

	# ── PDF Report ────────────────────────────────────────────────────────────
	with st.expander("📄 Export PDF Report", expanded=False):
	st.markdown(
	"Generate a comprehensive PDF report of the current dashboard view. "
	"The report includes all charts, metrics, and a data summary. "
	"Active global filters are reflected in the report."
	)
	if st.button("Generate PDF Report", type="primary", use_container_width=True):
	with st.spinner("Generating PDF report — this may take 30–60 seconds…"):
	try:
	# Build a human-readable description of active filters
	global_filters = st.session_state.get("global_filters", {})
	filter_info = {}
	if global_filters.get("platforms"):
	filter_info["Platforms"] = global_filters["platforms"]
	if global_filters.get("brands"):
	filter_info["Brands"] = global_filters["brands"]
	if global_filters.get("sentiments"):
	filter_info["Sentiments"] = global_filters["sentiments"]
	if global_filters.get("date_range"):
	dr = global_filters["date_range"]
	filter_info["Date Range"] = f"{dr[0]} to {dr[1]}"

	exporter = DashboardPDFExporter()
	pdf_bytes = exporter.generate_report(df, filter_info or None)

	filename = (
	f"musora_sentiment_report_"
	f"{__import__('datetime').datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
	)
	st.success("Report generated successfully!")
	st.download_button(
	label="Download PDF Report",
	data=pdf_bytes,
	file_name=filename,
	mime="application/pdf",
	use_container_width=True,
	)
	except Exception as e:
	st.error(f"Failed to generate report: {e}")
	st.exception(e)

	st.markdown("---")

	# Performance tip
	if len(df) > 10000:
	st.info(f"💡 Performance Tip: Loaded {len(df):,} comments. Use the global filters in the sidebar to narrow down your analysis for faster performance.")

	st.markdown("---")

	# Initialize components
	sentiment_charts = SentimentCharts()
	distribution_charts = DistributionCharts()
	processor = SentimentDataProcessor()

	# Display overall summary statistics
	ContentCards.display_summary_stats(df)

	st.markdown("---")

	# Calculate overall metrics
	overall_metrics = SentimentMetrics.calculate_overall_metrics(df)

	# Display health indicator
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	ContentCards.display_health_indicator(overall_metrics['negative_pct'])

	st.markdown("---")

	# Overall sentiment distribution
	st.markdown("## 🎯 Overall Sentiment Distribution")

	col1, col2 = st.columns(2)

	with col1:
	# Sentiment pie chart
	sentiment_pie = sentiment_charts.create_sentiment_pie_chart(df, title="Overall Sentiment Distribution")
	st.plotly_chart(sentiment_pie, use_container_width=True)

	with col2:
	# Sentiment score gauge
	sentiment_gauge = sentiment_charts.create_sentiment_score_gauge(
	overall_metrics['avg_sentiment_score'],
	title="Overall Sentiment Score"
	)
	st.plotly_chart(sentiment_gauge, use_container_width=True)

	# Additional metrics
	metric_col1, metric_col2 = st.columns(2)
	with metric_col1:
	st.metric("Positive %", f"{overall_metrics['positive_pct']:.1f}%")
	with metric_col2:
	st.metric("Reply Rate %", f"{overall_metrics['reply_required_pct']:.1f}%")

	st.markdown("---")

	# Sentiment by Brand
	st.markdown("## 🏢 Sentiment Analysis by Brand")

	col1, col2 = st.columns(2)

	with col1:
	# Stacked bar chart
	brand_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
	df, group_by='brand', title="Sentiment Distribution by Brand"
	)
	st.plotly_chart(brand_sentiment_bar, use_container_width=True)

	with col2:
	# Percentage bar chart
	brand_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
	df, group_by='brand', title="Sentiment Distribution by Brand (%)"
	)
	st.plotly_chart(brand_sentiment_pct, use_container_width=True)

	# Brand metrics table
	with st.expander("📈 Detailed Brand Metrics"):
	brand_metrics = SentimentMetrics.calculate_brand_metrics(df)

	brand_data = []
	for brand, metrics in brand_metrics.items():
	brand_data.append({
	'Brand': brand.title(),
	'Total Comments': metrics['total_comments'],
	'Replies Needed': metrics['total_reply_required'],
	'Negative %': f"{metrics['negative_pct']:.1f}%",
	'Positive %': f"{metrics['positive_pct']:.1f}%",
	'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
	})

	st.table(brand_data)

	st.markdown("---")

	# Sentiment by Platform
	st.markdown("## 🌐 Sentiment Analysis by Platform")

	col1, col2 = st.columns(2)

	with col1:
	# Stacked bar chart
	platform_sentiment_bar = sentiment_charts.create_sentiment_bar_chart(
	df, group_by='platform', title="Sentiment Distribution by Platform"
	)
	st.plotly_chart(platform_sentiment_bar, use_container_width=True)

	with col2:
	# Percentage bar chart
	platform_sentiment_pct = sentiment_charts.create_sentiment_percentage_bar_chart(
	df, group_by='platform', title="Sentiment Distribution by Platform (%)"
	)
	st.plotly_chart(platform_sentiment_pct, use_container_width=True)

	# Platform metrics table
	with st.expander("📈 Detailed Platform Metrics"):
	platform_metrics = SentimentMetrics.calculate_platform_metrics(df)

	platform_data = []
	for platform, metrics in platform_metrics.items():
	platform_data.append({
	'Platform': platform.title(),
	'Total Comments': metrics['total_comments'],
	'Replies Needed': metrics['total_reply_required'],
	'Negative %': f"{metrics['negative_pct']:.1f}%",
	'Positive %': f"{metrics['positive_pct']:.1f}%",
	'Avg Sentiment Score': f"{metrics['avg_sentiment_score']:.2f}"
	})

	st.table(platform_data)

	st.markdown("---")

	# Intent Analysis
	st.markdown("## 🎭 Intent Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# Intent bar chart
	intent_bar = distribution_charts.create_intent_bar_chart(
	df, title="Intent Distribution", orientation='h'
	)
	st.plotly_chart(intent_bar, use_container_width=True)

	with col2:
	# Intent pie chart
	intent_pie = distribution_charts.create_intent_pie_chart(df, title="Intent Distribution")
	st.plotly_chart(intent_pie, use_container_width=True)

	st.markdown("---")

	# Brand-Platform Matrix
	st.markdown("## 🔀 Cross-Dimensional Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# Heatmap showing comment distribution
	brand_platform_matrix = distribution_charts.create_brand_platform_matrix(
	df, title="Brand-Platform Comment Matrix"
	)
	st.plotly_chart(brand_platform_matrix, use_container_width=True)

	with col2:
	# Sentiment heatmap
	sentiment_heatmap = sentiment_charts.create_sentiment_heatmap(
	df, row_dimension='brand', col_dimension='platform', title="Negative Sentiment Heatmap"
	)
	st.plotly_chart(sentiment_heatmap, use_container_width=True)

	st.markdown("---")

	# Platform and Brand Distribution
	st.markdown("## 📊 Volume Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# Platform distribution
	platform_dist = distribution_charts.create_platform_distribution(df, title="Comments by Platform")
	st.plotly_chart(platform_dist, use_container_width=True)

	with col2:
	# Brand distribution
	brand_dist = distribution_charts.create_brand_distribution(df, title="Comments by Brand")
	st.plotly_chart(brand_dist, use_container_width=True)

	st.markdown("---")

	# Reply Requirements
	st.markdown("## ⚠️ Reply Requirements Analysis")

	col1, col2 = st.columns(2)

	with col1:
	# Reply required by brand
	reply_brand = distribution_charts.create_reply_required_chart(
	df, group_by='brand', title="Comments Requiring Reply by Brand"
	)
	st.plotly_chart(reply_brand, use_container_width=True)

	with col2:
	# Reply required by platform
	reply_platform = distribution_charts.create_reply_required_chart(
	df, group_by='platform', title="Comments Requiring Reply by Platform"
	)
	st.plotly_chart(reply_platform, use_container_width=True)

	# Response urgency metrics
	urgency_metrics = SentimentMetrics.calculate_response_urgency(df)

	st.markdown("### 🚨 Response Urgency Breakdown")
	urgency_col1, urgency_col2, urgency_col3, urgency_col4 = st.columns(4)

	with urgency_col1:
	st.metric("🔴 Urgent", urgency_metrics['urgent_count'], help="Negative sentiment + requires reply")

	with urgency_col2:
	st.metric("🟠 High Priority", urgency_metrics['high_priority_count'], help="Neutral with feedback/request")

	with urgency_col3:
	st.metric("🟡 Medium Priority", urgency_metrics['medium_priority_count'], help="Positive requiring reply")

	with urgency_col4:
	st.metric("🟢 Low Priority", urgency_metrics['low_priority_count'], help="Very positive requiring reply")

	st.markdown("---")

	st.markdown("---")

	# Demographics Analysis (for musora_app only)
	# Check if we have musora_app data and demographic fields
	has_musora_app = 'platform' in df.columns and 'musora_app' in df['platform'].values
	has_demographics = (
	has_musora_app and
	'age_group' in df.columns and
	'timezone' in df.columns and
	'experience_level' in df.columns
	)

	if has_demographics:
	# Filter for musora_app data only
	df_musora = df[df['platform'] == 'musora_app'].copy()

	# Check if we have any demographic data (not all Unknown)
	has_valid_demographics = (
	(df_musora['age_group'] != 'Unknown').any() or
	(df_musora['timezone_region'] != 'Unknown').any() or
	(df_musora['experience_group'] != 'Unknown').any()
	)

	if has_valid_demographics and len(df_musora) > 0:
	st.markdown("## 👥 Demographics Analysis (Musora App)")
	st.info(f"📊 Analyzing demographics for {len(df_musora):,} Musora App comments")

	# Initialize demographic charts
	demographic_charts = DemographicCharts()

	# Get demographic summary
	demo_summary = processor.get_demographics_summary(df_musora)

	# Display summary metrics
	demo_col1, demo_col2, demo_col3, demo_col4 = st.columns(4)

	with demo_col1:
	st.metric(
	"Comments with Demographics",
	f"{demo_summary['users_with_demographics']:,}",
	f"{demo_summary['coverage_percentage']:.1f}% coverage"
	)

	with demo_col2:
	if demo_summary['avg_age'] is not None:
	st.metric("Average Age", f"{demo_summary['avg_age']:.1f} years")
	else:
	st.metric("Average Age", "N/A")

	with demo_col3:
	st.metric("Most Common Region", demo_summary['most_common_region'])

	with demo_col4:
	if demo_summary['avg_experience'] is not None:
	st.metric("Avg Experience", f"{demo_summary['avg_experience']:.1f}/10")
	else:
	st.metric("Avg Experience", "N/A")

	st.markdown("---")

	# Age Analysis
	st.markdown("### 🎂 Age Distribution")

	age_dist = processor.get_demographics_distribution(df_musora, 'age_group')
	age_sentiment = processor.get_demographics_by_sentiment(df_musora, 'age_group')

	if not age_dist.empty:
	col1, col2 = st.columns(2)

	with col1:
	age_chart = demographic_charts.create_age_distribution_chart(
	age_dist,
	title="Comments by Age Group"
	)
	st.plotly_chart(age_chart, use_container_width=True)

	with col2:
	age_sent_chart = demographic_charts.create_age_sentiment_chart(
	age_sentiment,
	title="Sentiment Distribution by Age Group"
	)
	st.plotly_chart(age_sent_chart, use_container_width=True)

	# Insights
	with st.expander("💡 Age Insights"):
	if len(age_dist) > 0:
	top_age_group = age_dist.iloc[0]['age_group']
	top_age_count = age_dist.iloc[0]['count']
	top_age_pct = age_dist.iloc[0]['percentage']

	st.write(f"Most Active Age Group: {top_age_group} ({top_age_count:,} comments, {top_age_pct:.1f}%)")

	# Find age group with most negative sentiment
	if not age_sentiment.empty:
	negative_sentiments = age_sentiment[
	age_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
	].groupby('age_group')['percentage'].sum().reset_index()

	if len(negative_sentiments) > 0:
	negative_sentiments = negative_sentiments.sort_values('percentage', ascending=False)
	most_negative_age = negative_sentiments.iloc[0]['age_group']
	most_negative_pct = negative_sentiments.iloc[0]['percentage']
	st.write(f"Highest Negative Sentiment: {most_negative_age} ({most_negative_pct:.1f}% negative)")
	else:
	st.info("No age data available for visualization")

	st.markdown("---")

	# Timezone Analysis
	st.markdown("### 🌍 Geographic Distribution")

	# Get timezone data
	top_timezones = processor.get_top_timezones(df_musora, top_n=15)
	region_dist = processor.get_timezone_regions_distribution(df_musora)
	region_sentiment = processor.get_demographics_by_sentiment(df_musora, 'timezone_region')

	if not top_timezones.empty or not region_dist.empty:
	# Top timezones
	if not top_timezones.empty:
	st.markdown("#### Top 15 Timezones")
	timezone_chart = demographic_charts.create_timezone_chart(
	top_timezones,
	title="Most Common Timezones",
	top_n=15
	)
	st.plotly_chart(timezone_chart, use_container_width=True)

	# Regional distribution
	if not region_dist.empty:
	st.markdown("#### Regional Distribution")
	col1, col2 = st.columns(2)

	with col1:
	region_chart = demographic_charts.create_region_distribution_chart(
	region_dist,
	title="Comments by Region"
	)
	st.plotly_chart(region_chart, use_container_width=True)

	with col2:
	if not region_sentiment.empty:
	region_sent_chart = demographic_charts.create_region_sentiment_chart(
	region_sentiment,
	title="Sentiment Distribution by Region"
	)
	st.plotly_chart(region_sent_chart, use_container_width=True)

	# Insights
	with st.expander("💡 Geographic Insights"):
	if not top_timezones.empty:
	top_tz = top_timezones.iloc[0]['timezone']
	top_tz_count = top_timezones.iloc[0]['count']
	top_tz_pct = top_timezones.iloc[0]['percentage']
	st.write(f"Most Common Timezone: {top_tz} ({top_tz_count:,} comments, {top_tz_pct:.1f}%)")

	if not region_dist.empty:
	top_region = region_dist.iloc[0]['timezone_region']
	top_region_count = region_dist.iloc[0]['count']
	top_region_pct = region_dist.iloc[0]['percentage']
	st.write(f"Most Active Region: {top_region} ({top_region_count:,} comments, {top_region_pct:.1f}%)")

	# Find region with most negative sentiment
	if not region_sentiment.empty:
	negative_regions = region_sentiment[
	region_sentiment['sentiment_polarity'].isin(['negative', 'very_negative'])
	].groupby('timezone_region')['percentage'].sum().reset_index()

	if len(negative_regions) > 0:
	negative_regions = negative_regions.sort_values('percentage', ascending=False)
	most_negative_region = negative_regions.iloc[0]['timezone_region']
	most_negative_region_pct = negative_regions.iloc[0]['percentage']
	st.write(f"Highest Negative Sentiment: {most_negative_region} ({most_negative_region_pct:.1f}% negative)")
	else:
	st.info("No timezone/region data available for visualization")

	st.markdown("---")

	# Experience Level Analysis
	st.markdown("### 🎯 Experience Level Distribution")

	# Get both detailed and grouped experience data
	exp_dist_detailed = processor.get_experience_level_distribution(df_musora, use_groups=False)
	exp_dist_grouped = processor.get_experience_level_distribution(df_musora, use_groups=True)
	exp_sentiment_grouped = processor.get_demographics_by_sentiment(df_musora, 'experience_group')

	if not exp_dist_detailed.empty or not exp_dist_grouped.empty:
	# Tabs for detailed vs grouped view
	tab1, tab2 = st.tabs(["📊 Detailed (0-10)", "📊 Grouped (Beginner/Intermediate/Advanced)"])

	with tab1:
	if not exp_dist_detailed.empty:
	exp_chart_detailed = demographic_charts.create_experience_distribution_chart(
	exp_dist_detailed,
	title="Comments by Experience Level (0-10 Scale)",
	use_groups=False
	)
	st.plotly_chart(exp_chart_detailed, use_container_width=True)
	else:
	st.info("No detailed experience level data available")

	with tab2:
	if not exp_dist_grouped.empty:
	col1, col2 = st.columns(2)

	with col1:
	exp_chart_grouped = demographic_charts.create_experience_distribution_chart(
	exp_dist_grouped,
	title="Comments by Experience Group",
	use_groups=True
	)
	st.plotly_chart(exp_chart_grouped, use_container_width=True)

	with col2:
	if not exp_sentiment_grouped.empty:
	exp_sent_chart = demographic_charts.create_experience_sentiment_chart(
	exp_sentiment_grouped,
	title="Sentiment by Experience Group",
	use_groups=True
	)
	st.plotly_chart(exp_sent_chart, use_container_width=True)
	else:
	st.info("No grouped experience level data available")

	# Insights
	with st.expander("💡 Experience Insights"):
	if not exp_dist_grouped.empty:
	top_exp_group = exp_dist_grouped.iloc[0]['experience_group']
	top_exp_count = exp_dist_grouped.iloc[0]['count']
	top_exp_pct = exp_dist_grouped.iloc[0]['percentage']
	st.write(f"Most Active Group: {top_exp_group} ({top_exp_count:,} comments, {top_exp_pct:.1f}%)")

	# Find experience group with most negative sentiment
	if not exp_sentiment_grouped.empty:
	negative_exp = exp_sentiment_grouped[
	exp_sentiment_grouped['sentiment_polarity'].isin(['negative', 'very_negative'])
	].groupby('experience_group')['percentage'].sum().reset_index()

	if len(negative_exp) > 0:
	negative_exp = negative_exp.sort_values('percentage', ascending=False)
	most_negative_exp = negative_exp.iloc[0]['experience_group']
	most_negative_exp_pct = negative_exp.iloc[0]['percentage']
	st.write(f"Highest Negative Sentiment: {most_negative_exp} ({most_negative_exp_pct:.1f}% negative)")

	if demo_summary['avg_experience'] is not None:
	st.write(f"Average Experience Level: {demo_summary['avg_experience']:.2f}/10")
	st.write(f"Most Common Experience Group: {demo_summary.get('most_common_experience', 'Unknown')}")
	else:
	st.info("No experience level data available for visualization")

	st.markdown("---")

	# Language Distribution (if available)
	if 'detected_language' in df.columns:
	st.markdown("## 🌍 Language Distribution")

	lang_dist = distribution_charts.create_language_distribution(df, top_n=10, title="Top 10 Languages")
	st.plotly_chart(lang_dist, use_container_width=True)

	st.markdown("---")

	# Temporal trends (if timestamp available)
	if 'comment_timestamp' in df.columns and not df.empty:
	with st.expander("📈 Temporal Trends", expanded=False):
	# Frequency selector
	freq_col1, freq_col2 = st.columns([1, 3])

	with freq_col1:
	freq = st.selectbox(
	"Time Granularity",
	options=['D', 'W', 'M'],
	format_func=lambda x: {'D': 'Daily', 'W': 'Weekly', 'M': 'Monthly'}[x],
	index=1 # Default to Weekly
	)

	sentiment_timeline = sentiment_charts.create_sentiment_timeline(df, freq=freq, title="Sentiment Trends Over Time")
	st.plotly_chart(sentiment_timeline, use_container_width=True)

	# Hierarchical sunburst
	with st.expander("🌟 Hierarchical View", expanded=False):
	st.markdown("Interactive Brand > Platform > Sentiment Distribution")
	sunburst = distribution_charts.create_combined_distribution_sunburst(
	df, title="Brand > Platform > Sentiment Distribution"
	)
	st.plotly_chart(sunburst, use_container_width=True)