Spaces:

MusoraProductDepartment
/

Sentiment_analysis

Sleeping

App Files Files Community

Sentiment_analysis / visualization /utils /metrics.py

Danialebrat

Deploying sentiment analysis project

9858829 13 days ago

raw

history blame contribute delete

9.82 kB

	"""
	Metrics calculation for sentiment analysis dashboard
	Provides key performance indicators and statistical metrics
	"""
	import pandas as pd
	import numpy as np
	from typing import Dict, List, Tuple


	class SentimentMetrics:
	"""
	Calculates various metrics for sentiment analysis
	"""

	@staticmethod
	def calculate_overall_metrics(df):
	"""
	Calculate overall summary metrics

	Args:
	df: Sentiment dataframe

	Returns:
	dict: Overall metrics
	"""
	total_comments = len(df)
	total_reply_required = df['requires_reply'].sum() if 'requires_reply' in df.columns else 0

	# Sentiment distribution
	sentiment_dist = df['sentiment_polarity'].value_counts(normalize=True) * 100

	# Calculate sentiment score (vectorized — no copy needed)
	sentiment_weights = {
	'very_negative': -2,
	'negative': -1,
	'neutral': 0,
	'positive': 1,
	'very_positive': 2
	}
	avg_sentiment_score = df['sentiment_polarity'].map(sentiment_weights).mean()

	# Negative sentiment percentage
	negative_sentiments = ['negative', 'very_negative']
	negative_pct = (df['sentiment_polarity'].isin(negative_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0

	# Positive sentiment percentage
	positive_sentiments = ['positive', 'very_positive']
	positive_pct = (df['sentiment_polarity'].isin(positive_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0

	return {
	'total_comments': total_comments,
	'total_reply_required': int(total_reply_required),
	'reply_required_pct': (total_reply_required / total_comments * 100) if total_comments > 0 else 0,
	'avg_sentiment_score': avg_sentiment_score,
	'negative_pct': negative_pct,
	'positive_pct': positive_pct,
	'sentiment_distribution': sentiment_dist.to_dict()
	}

	@staticmethod
	def calculate_brand_metrics(df):
	"""
	Calculate metrics by brand

	Args:
	df: Sentiment dataframe

	Returns:
	dict: Metrics by brand
	"""
	brand_metrics = {}

	for brand in df['brand'].unique():
	brand_df = df[df['brand'] == brand]
	brand_metrics[brand] = SentimentMetrics.calculate_overall_metrics(brand_df)

	return brand_metrics

	@staticmethod
	def calculate_platform_metrics(df):
	"""
	Calculate metrics by platform

	Args:
	df: Sentiment dataframe

	Returns:
	dict: Metrics by platform
	"""
	platform_metrics = {}

	for platform in df['platform'].unique():
	platform_df = df[df['platform'] == platform]
	platform_metrics[platform] = SentimentMetrics.calculate_overall_metrics(platform_df)

	return platform_metrics

	@staticmethod
	def calculate_content_engagement_score(content_df):
	"""
	Calculate engagement score for a content piece

	Args:
	content_df: DataFrame for a single content

	Returns:
	float: Engagement score (0-100)
	"""
	if len(content_df) == 0:
	return 0

	# Factors:
	# 1. Number of comments (normalized)
	# 2. Sentiment positivity
	# 3. Intent diversity
	# 4. Reply requirement rate

	comment_count = len(content_df)
	comment_score = min(comment_count / 100 * 30, 30) # Max 30 points for 100+ comments

	# Sentiment score (max 40 points) — vectorized, no copy needed
	sentiment_weights = {
	'very_negative': -2,
	'negative': -1,
	'neutral': 0,
	'positive': 1,
	'very_positive': 2
	}
	avg_sentiment = content_df['sentiment_polarity'].map(sentiment_weights).mean()
	sentiment_score = ((avg_sentiment + 2) / 4) * 40 # Normalize to 0-40

	# Intent diversity score (max 20 points)
	unique_intents = content_df['intent'].str.split(',').explode().str.strip().nunique()
	intent_score = min(unique_intents / 8 * 20, 20) # Max 20 points for 8 unique intents

	# Interaction requirement (max 10 points)
	reply_rate = content_df['requires_reply'].sum() / len(content_df) if len(content_df) > 0 else 0
	interaction_score = reply_rate * 10

	total_score = comment_score + sentiment_score + intent_score + interaction_score
	return round(total_score, 2)

	@staticmethod
	def get_sentiment_health_status(negative_pct):
	"""
	Determine health status based on negative sentiment percentage

	Args:
	negative_pct: Percentage of negative sentiments

	Returns:
	tuple: (status, color)
	"""
	if negative_pct < 10:
	return ("Excellent", "green")
	elif negative_pct < 20:
	return ("Good", "lightgreen")
	elif negative_pct < 30:
	return ("Fair", "orange")
	elif negative_pct < 50:
	return ("Poor", "darkorange")
	else:
	return ("Critical", "red")

	@staticmethod
	def calculate_intent_priority_score(intent_counts):
	"""
	Calculate priority score for different intents

	Args:
	intent_counts: Dictionary of intent counts

	Returns:
	dict: Priority scores for each intent
	"""
	# Priority weights (higher = more urgent)
	priority_weights = {
	'feedback_negative': 5,
	'request': 4,
	'question': 4,
	'suggestion': 3,
	'praise': 2,
	'humor_sarcasm': 1,
	'off_topic': 1,
	'spam_selfpromo': 0
	}

	priority_scores = {}
	for intent, count in intent_counts.items():
	weight = priority_weights.get(intent, 1)
	priority_scores[intent] = count * weight

	return priority_scores

	@staticmethod
	def calculate_response_urgency(df):
	"""
	Calculate response urgency metrics

	Args:
	df: Sentiment dataframe

	Returns:
	dict: Urgency metrics
	"""
	reply_required_df = df[df['requires_reply'] == True]

	if len(reply_required_df) == 0:
	return {
	'urgent_count': 0,
	'high_priority_count': 0,
	'medium_priority_count': 0,
	'low_priority_count': 0
	}

	# Classify urgency based on sentiment and intent
	urgent = reply_required_df[
	reply_required_df['sentiment_polarity'].isin(['very_negative', 'negative'])
	]
	high_priority = reply_required_df[
	(reply_required_df['sentiment_polarity'] == 'neutral') &
	(reply_required_df['intent'].str.contains('feedback_negative\|request', na=False))
	]
	medium_priority = reply_required_df[
	reply_required_df['sentiment_polarity'] == 'positive'
	]
	low_priority = reply_required_df[
	reply_required_df['sentiment_polarity'] == 'very_positive'
	]

	return {
	'urgent_count': len(urgent),
	'high_priority_count': len(high_priority),
	'medium_priority_count': len(medium_priority),
	'low_priority_count': len(low_priority)
	}

	@staticmethod
	def calculate_trend_indicator(df, current_period, previous_period, metric='sentiment_score'):
	"""
	Calculate trend indicator comparing two periods

	Args:
	df: Sentiment dataframe
	current_period: Tuple of (start_date, end_date) for current period
	previous_period: Tuple of (start_date, end_date) for previous period
	metric: Metric to compare

	Returns:
	dict: Trend information
	"""
	if 'comment_timestamp' not in df.columns:
	return {'trend': 'stable', 'change': 0}

	# Filter data for each period
	current_df = df[
	(df['comment_timestamp'] >= pd.Timestamp(current_period[0])) &
	(df['comment_timestamp'] <= pd.Timestamp(current_period[1]))
	]
	previous_df = df[
	(df['comment_timestamp'] >= pd.Timestamp(previous_period[0])) &
	(df['comment_timestamp'] <= pd.Timestamp(previous_period[1]))
	]

	if len(current_df) == 0 or len(previous_df) == 0:
	return {'trend': 'stable', 'change': 0}

	# Calculate metric for each period
	if metric == 'sentiment_score':
	# Vectorized — no copy needed
	sentiment_weights = {
	'very_negative': -2, 'negative': -1, 'neutral': 0,
	'positive': 1, 'very_positive': 2
	}
	current_value = current_df['sentiment_polarity'].map(sentiment_weights).mean()
	previous_value = previous_df['sentiment_polarity'].map(sentiment_weights).mean()
	else:
	current_value = len(current_df)
	previous_value = len(previous_df)

	# Calculate change
	change = ((current_value - previous_value) / previous_value * 100) if previous_value != 0 else 0

	# Determine trend
	if abs(change) < 5:
	trend = 'stable'
	elif change > 0:
	trend = 'improving' if metric == 'sentiment_score' else 'increasing'
	else:
	trend = 'declining' if metric == 'sentiment_score' else 'decreasing'

	return {
	'trend': trend,
	'change': round(change, 2),
	'current_value': round(current_value, 2),
	'previous_value': round(previous_value, 2)
	}