| | """ |
| | Metrics calculation for sentiment analysis dashboard |
| | Provides key performance indicators and statistical metrics |
| | """ |
| | import pandas as pd |
| | import numpy as np |
| | from typing import Dict, List, Tuple |
| |
|
| |
|
| | class SentimentMetrics: |
| | """ |
| | Calculates various metrics for sentiment analysis |
| | """ |
| |
|
| | @staticmethod |
| | def calculate_overall_metrics(df): |
| | """ |
| | Calculate overall summary metrics |
| | |
| | Args: |
| | df: Sentiment dataframe |
| | |
| | Returns: |
| | dict: Overall metrics |
| | """ |
| | total_comments = len(df) |
| | total_reply_required = df['requires_reply'].sum() if 'requires_reply' in df.columns else 0 |
| |
|
| | |
| | sentiment_dist = df['sentiment_polarity'].value_counts(normalize=True) * 100 |
| |
|
| | |
| | sentiment_weights = { |
| | 'very_negative': -2, |
| | 'negative': -1, |
| | 'neutral': 0, |
| | 'positive': 1, |
| | 'very_positive': 2 |
| | } |
| | avg_sentiment_score = df['sentiment_polarity'].map(sentiment_weights).mean() |
| |
|
| | |
| | negative_sentiments = ['negative', 'very_negative'] |
| | negative_pct = (df['sentiment_polarity'].isin(negative_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 |
| |
|
| | |
| | positive_sentiments = ['positive', 'very_positive'] |
| | positive_pct = (df['sentiment_polarity'].isin(positive_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 |
| |
|
| | return { |
| | 'total_comments': total_comments, |
| | 'total_reply_required': int(total_reply_required), |
| | 'reply_required_pct': (total_reply_required / total_comments * 100) if total_comments > 0 else 0, |
| | 'avg_sentiment_score': avg_sentiment_score, |
| | 'negative_pct': negative_pct, |
| | 'positive_pct': positive_pct, |
| | 'sentiment_distribution': sentiment_dist.to_dict() |
| | } |
| |
|
| | @staticmethod |
| | def calculate_brand_metrics(df): |
| | """ |
| | Calculate metrics by brand |
| | |
| | Args: |
| | df: Sentiment dataframe |
| | |
| | Returns: |
| | dict: Metrics by brand |
| | """ |
| | brand_metrics = {} |
| |
|
| | for brand in df['brand'].unique(): |
| | brand_df = df[df['brand'] == brand] |
| | brand_metrics[brand] = SentimentMetrics.calculate_overall_metrics(brand_df) |
| |
|
| | return brand_metrics |
| |
|
| | @staticmethod |
| | def calculate_platform_metrics(df): |
| | """ |
| | Calculate metrics by platform |
| | |
| | Args: |
| | df: Sentiment dataframe |
| | |
| | Returns: |
| | dict: Metrics by platform |
| | """ |
| | platform_metrics = {} |
| |
|
| | for platform in df['platform'].unique(): |
| | platform_df = df[df['platform'] == platform] |
| | platform_metrics[platform] = SentimentMetrics.calculate_overall_metrics(platform_df) |
| |
|
| | return platform_metrics |
| |
|
| | @staticmethod |
| | def calculate_content_engagement_score(content_df): |
| | """ |
| | Calculate engagement score for a content piece |
| | |
| | Args: |
| | content_df: DataFrame for a single content |
| | |
| | Returns: |
| | float: Engagement score (0-100) |
| | """ |
| | if len(content_df) == 0: |
| | return 0 |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | comment_count = len(content_df) |
| | comment_score = min(comment_count / 100 * 30, 30) |
| |
|
| | |
| | sentiment_weights = { |
| | 'very_negative': -2, |
| | 'negative': -1, |
| | 'neutral': 0, |
| | 'positive': 1, |
| | 'very_positive': 2 |
| | } |
| | avg_sentiment = content_df['sentiment_polarity'].map(sentiment_weights).mean() |
| | sentiment_score = ((avg_sentiment + 2) / 4) * 40 |
| |
|
| | |
| | unique_intents = content_df['intent'].str.split(',').explode().str.strip().nunique() |
| | intent_score = min(unique_intents / 8 * 20, 20) |
| |
|
| | |
| | reply_rate = content_df['requires_reply'].sum() / len(content_df) if len(content_df) > 0 else 0 |
| | interaction_score = reply_rate * 10 |
| |
|
| | total_score = comment_score + sentiment_score + intent_score + interaction_score |
| | return round(total_score, 2) |
| |
|
| | @staticmethod |
| | def get_sentiment_health_status(negative_pct): |
| | """ |
| | Determine health status based on negative sentiment percentage |
| | |
| | Args: |
| | negative_pct: Percentage of negative sentiments |
| | |
| | Returns: |
| | tuple: (status, color) |
| | """ |
| | if negative_pct < 10: |
| | return ("Excellent", "green") |
| | elif negative_pct < 20: |
| | return ("Good", "lightgreen") |
| | elif negative_pct < 30: |
| | return ("Fair", "orange") |
| | elif negative_pct < 50: |
| | return ("Poor", "darkorange") |
| | else: |
| | return ("Critical", "red") |
| |
|
| | @staticmethod |
| | def calculate_intent_priority_score(intent_counts): |
| | """ |
| | Calculate priority score for different intents |
| | |
| | Args: |
| | intent_counts: Dictionary of intent counts |
| | |
| | Returns: |
| | dict: Priority scores for each intent |
| | """ |
| | |
| | priority_weights = { |
| | 'feedback_negative': 5, |
| | 'request': 4, |
| | 'question': 4, |
| | 'suggestion': 3, |
| | 'praise': 2, |
| | 'humor_sarcasm': 1, |
| | 'off_topic': 1, |
| | 'spam_selfpromo': 0 |
| | } |
| |
|
| | priority_scores = {} |
| | for intent, count in intent_counts.items(): |
| | weight = priority_weights.get(intent, 1) |
| | priority_scores[intent] = count * weight |
| |
|
| | return priority_scores |
| |
|
| | @staticmethod |
| | def calculate_response_urgency(df): |
| | """ |
| | Calculate response urgency metrics |
| | |
| | Args: |
| | df: Sentiment dataframe |
| | |
| | Returns: |
| | dict: Urgency metrics |
| | """ |
| | reply_required_df = df[df['requires_reply'] == True] |
| |
|
| | if len(reply_required_df) == 0: |
| | return { |
| | 'urgent_count': 0, |
| | 'high_priority_count': 0, |
| | 'medium_priority_count': 0, |
| | 'low_priority_count': 0 |
| | } |
| |
|
| | |
| | urgent = reply_required_df[ |
| | reply_required_df['sentiment_polarity'].isin(['very_negative', 'negative']) |
| | ] |
| | high_priority = reply_required_df[ |
| | (reply_required_df['sentiment_polarity'] == 'neutral') & |
| | (reply_required_df['intent'].str.contains('feedback_negative|request', na=False)) |
| | ] |
| | medium_priority = reply_required_df[ |
| | reply_required_df['sentiment_polarity'] == 'positive' |
| | ] |
| | low_priority = reply_required_df[ |
| | reply_required_df['sentiment_polarity'] == 'very_positive' |
| | ] |
| |
|
| | return { |
| | 'urgent_count': len(urgent), |
| | 'high_priority_count': len(high_priority), |
| | 'medium_priority_count': len(medium_priority), |
| | 'low_priority_count': len(low_priority) |
| | } |
| |
|
| | @staticmethod |
| | def calculate_trend_indicator(df, current_period, previous_period, metric='sentiment_score'): |
| | """ |
| | Calculate trend indicator comparing two periods |
| | |
| | Args: |
| | df: Sentiment dataframe |
| | current_period: Tuple of (start_date, end_date) for current period |
| | previous_period: Tuple of (start_date, end_date) for previous period |
| | metric: Metric to compare |
| | |
| | Returns: |
| | dict: Trend information |
| | """ |
| | if 'comment_timestamp' not in df.columns: |
| | return {'trend': 'stable', 'change': 0} |
| |
|
| | |
| | current_df = df[ |
| | (df['comment_timestamp'] >= pd.Timestamp(current_period[0])) & |
| | (df['comment_timestamp'] <= pd.Timestamp(current_period[1])) |
| | ] |
| | previous_df = df[ |
| | (df['comment_timestamp'] >= pd.Timestamp(previous_period[0])) & |
| | (df['comment_timestamp'] <= pd.Timestamp(previous_period[1])) |
| | ] |
| |
|
| | if len(current_df) == 0 or len(previous_df) == 0: |
| | return {'trend': 'stable', 'change': 0} |
| |
|
| | |
| | if metric == 'sentiment_score': |
| | |
| | sentiment_weights = { |
| | 'very_negative': -2, 'negative': -1, 'neutral': 0, |
| | 'positive': 1, 'very_positive': 2 |
| | } |
| | current_value = current_df['sentiment_polarity'].map(sentiment_weights).mean() |
| | previous_value = previous_df['sentiment_polarity'].map(sentiment_weights).mean() |
| | else: |
| | current_value = len(current_df) |
| | previous_value = len(previous_df) |
| |
|
| | |
| | change = ((current_value - previous_value) / previous_value * 100) if previous_value != 0 else 0 |
| |
|
| | |
| | if abs(change) < 5: |
| | trend = 'stable' |
| | elif change > 0: |
| | trend = 'improving' if metric == 'sentiment_score' else 'increasing' |
| | else: |
| | trend = 'declining' if metric == 'sentiment_score' else 'decreasing' |
| |
|
| | return { |
| | 'trend': trend, |
| | 'change': round(change, 2), |
| | 'current_value': round(current_value, 2), |
| | 'previous_value': round(previous_value, 2) |
| | } |