Danialebrat's picture
Deploying sentiment analysis project
9858829
"""
Distribution visualization components using Plotly
Creates charts for intent, language, and other distributions
"""
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import json
from pathlib import Path
class DistributionCharts:
"""
Creates distribution visualizations
"""
def __init__(self, config_path=None):
"""
Initialize with configuration
Args:
config_path: Path to configuration file
"""
if config_path is None:
config_path = Path(__file__).parent.parent / "config" / "viz_config.json"
with open(config_path, 'r') as f:
self.config = json.load(f)
self.intent_colors = self.config['color_schemes']['intent']
self.platform_colors = self.config['color_schemes']['platform']
self.brand_colors = self.config['color_schemes']['brand']
self.intent_order = self.config['intent_order']
self.chart_height = self.config['dashboard']['chart_height']
def create_intent_bar_chart(self, df, title="Intent Distribution", orientation='h'):
"""
Create horizontal bar chart for intent distribution (handles multi-label)
Args:
df: Sentiment dataframe
title: Chart title
orientation: 'h' for horizontal, 'v' for vertical
Returns:
plotly.graph_objects.Figure
"""
# Explode intents
df_exploded = df.copy()
df_exploded['intent'] = df_exploded['intent'].str.split(',')
df_exploded = df_exploded.explode('intent')
df_exploded['intent'] = df_exploded['intent'].str.strip()
# Count intents
intent_counts = df_exploded['intent'].value_counts()
# Order by intent_order
ordered_intents = [i for i in self.intent_order if i in intent_counts.index]
intent_counts = intent_counts[ordered_intents]
colors = [self.intent_colors.get(i, '#CCCCCC') for i in intent_counts.index]
if orientation == 'h':
fig = go.Figure(data=[go.Bar(
y=intent_counts.index,
x=intent_counts.values,
orientation='h',
marker=dict(color=colors),
text=intent_counts.values,
textposition='auto',
hovertemplate='<b>%{y}</b><br>Count: %{x}<extra></extra>'
)])
fig.update_layout(
title=title,
xaxis_title="Number of Comments",
yaxis_title="Intent",
height=self.chart_height,
yaxis={'categoryorder': 'total ascending'}
)
else:
fig = go.Figure(data=[go.Bar(
x=intent_counts.index,
y=intent_counts.values,
marker=dict(color=colors),
text=intent_counts.values,
textposition='auto',
hovertemplate='<b>%{x}</b><br>Count: %{y}<extra></extra>'
)])
fig.update_layout(
title=title,
xaxis_title="Intent",
yaxis_title="Number of Comments",
height=self.chart_height
)
return fig
def create_intent_pie_chart(self, df, title="Intent Distribution"):
"""
Create pie chart for intent distribution
Args:
df: Sentiment dataframe
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
# Explode intents
df_exploded = df.copy()
df_exploded['intent'] = df_exploded['intent'].str.split(',')
df_exploded = df_exploded.explode('intent')
df_exploded['intent'] = df_exploded['intent'].str.strip()
intent_counts = df_exploded['intent'].value_counts()
# Order by intent_order
ordered_intents = [i for i in self.intent_order if i in intent_counts.index]
intent_counts = intent_counts[ordered_intents]
colors = [self.intent_colors.get(i, '#CCCCCC') for i in intent_counts.index]
fig = go.Figure(data=[go.Pie(
labels=intent_counts.index,
values=intent_counts.values,
marker=dict(colors=colors),
textinfo='label+percent',
textposition='auto',
hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
)])
fig.update_layout(
title=title,
height=self.chart_height,
showlegend=True,
legend=dict(orientation="v", yanchor="middle", y=0.5, xanchor="left", x=1.05)
)
return fig
def create_platform_distribution(self, df, title="Comments by Platform"):
"""
Create bar chart for platform distribution
Args:
df: Sentiment dataframe
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
platform_counts = df['platform'].value_counts()
colors = [self.platform_colors.get(p, self.platform_colors['default']) for p in platform_counts.index]
fig = go.Figure(data=[go.Bar(
x=platform_counts.index,
y=platform_counts.values,
marker=dict(color=colors),
text=platform_counts.values,
textposition='auto',
hovertemplate='<b>%{x}</b><br>Comments: %{y}<extra></extra>'
)])
fig.update_layout(
title=title,
xaxis_title="Platform",
yaxis_title="Number of Comments",
height=self.chart_height
)
return fig
def create_brand_distribution(self, df, title="Comments by Brand"):
"""
Create bar chart for brand distribution
Args:
df: Sentiment dataframe
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
brand_counts = df['brand'].value_counts()
colors = [self.brand_colors.get(b, self.brand_colors['default']) for b in brand_counts.index]
fig = go.Figure(data=[go.Bar(
x=brand_counts.index,
y=brand_counts.values,
marker=dict(color=colors),
text=brand_counts.values,
textposition='auto',
hovertemplate='<b>%{x}</b><br>Comments: %{y}<extra></extra>'
)])
fig.update_layout(
title=title,
xaxis_title="Brand",
yaxis_title="Number of Comments",
height=self.chart_height
)
return fig
def create_language_distribution(self, df, top_n=10, title="Language Distribution"):
"""
Create bar chart for language distribution
Args:
df: Sentiment dataframe
top_n: Number of top languages to show
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
if 'detected_language' not in df.columns:
return go.Figure().add_annotation(
text="No language data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False
)
lang_counts = df['detected_language'].value_counts().head(top_n)
fig = go.Figure(data=[go.Bar(
x=lang_counts.index,
y=lang_counts.values,
marker=dict(color='#2196F3'),
text=lang_counts.values,
textposition='auto',
hovertemplate='<b>%{x}</b><br>Comments: %{y}<extra></extra>'
)])
fig.update_layout(
title=title,
xaxis_title="Language",
yaxis_title="Number of Comments",
height=self.chart_height
)
return fig
def create_combined_distribution_sunburst(self, df, title="Hierarchical Distribution"):
"""
Create sunburst chart showing hierarchical distribution
(Brand > Platform > Sentiment)
Args:
df: Sentiment dataframe
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
# Prepare data for sunburst
sunburst_data = df.groupby(['brand', 'platform', 'sentiment_polarity']).size().reset_index(name='count')
fig = px.sunburst(
sunburst_data,
path=['brand', 'platform', 'sentiment_polarity'],
values='count',
title=title,
height=500
)
fig.update_layout(
margin=dict(t=50, l=0, r=0, b=0)
)
return fig
def create_brand_platform_matrix(self, df, title="Brand-Platform Comment Matrix"):
"""
Create heatmap showing comment distribution across brands and platforms
Args:
df: Sentiment dataframe
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
# Create pivot table
matrix_data = pd.crosstab(df['brand'], df['platform'])
fig = go.Figure(data=go.Heatmap(
z=matrix_data.values,
x=matrix_data.columns,
y=matrix_data.index,
colorscale='Blues',
text=matrix_data.values,
texttemplate='%{text}',
textfont={"size": 14},
hovertemplate='<b>%{y} - %{x}</b><br>Comments: %{z}<extra></extra>',
colorbar=dict(title="Comments")
))
fig.update_layout(
title=title,
xaxis_title="Platform",
yaxis_title="Brand",
height=self.chart_height
)
return fig
def create_reply_required_chart(self, df, group_by='brand', title="Comments Requiring Reply"):
"""
Create stacked bar chart showing reply requirements
Args:
df: Sentiment dataframe
group_by: Column to group by
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
# Create aggregation
reply_data = df.groupby([group_by, 'requires_reply']).size().reset_index(name='count')
reply_pivot = reply_data.pivot(index=group_by, columns='requires_reply', values='count').fillna(0)
fig = go.Figure()
if False in reply_pivot.columns:
fig.add_trace(go.Bar(
name='No Reply Needed',
x=reply_pivot.index,
y=reply_pivot[False],
marker_color='#81C784',
hovertemplate='<b>%{x}</b><br>No Reply: %{y}<extra></extra>'
))
if True in reply_pivot.columns:
fig.add_trace(go.Bar(
name='Reply Required',
x=reply_pivot.index,
y=reply_pivot[True],
marker_color='#FF7043',
hovertemplate='<b>%{x}</b><br>Reply Required: %{y}<extra></extra>'
))
fig.update_layout(
title=title,
xaxis_title=group_by.capitalize(),
yaxis_title="Number of Comments",
barmode='stack',
height=self.chart_height,
legend=dict(title="Reply Status", orientation="v", yanchor="top", y=1, xanchor="left", x=1.02)
)
return fig
def create_engagement_scatter(self, content_summary_df, title="Content Engagement Analysis"):
"""
Create scatter plot showing content engagement
Args:
content_summary_df: DataFrame with content summary statistics
title: Chart title
Returns:
plotly.graph_objects.Figure
"""
fig = px.scatter(
content_summary_df,
x='total_comments',
y='negative_percentage',
size='reply_required_count',
color='negative_percentage',
hover_data=['content_description'],
title=title,
labels={
'total_comments': 'Total Comments',
'negative_percentage': 'Negative Sentiment %',
'reply_required_count': 'Replies Required'
},
color_continuous_scale='RdYlGn_r',
height=self.chart_height
)
fig.update_layout(
xaxis_title="Total Comments",
yaxis_title="Negative Sentiment %",
coloraxis_colorbar=dict(title="Negative %")
)
return fig