CxSentimentAnalysisAI / stage4_batch_analysis.py
riazmo's picture
Upload 11 files
087ac11 verified
"""
Stage 4: Batch Analysis & Aggregation
- Aggregate insights across all processed reviews
- Identify patterns, trends, critical issues
- Generate actionable recommendations
"""
import json
from typing import Dict, Any, List
from collections import Counter
class Stage4BatchAnalysis:
"""
Stage 4: Batch-level intelligence and recommendations
"""
def __init__(self):
print(" 📊 Stage 4: Batch Analysis initialized")
def analyze_batch(self, reviews: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Analyze a batch of processed reviews
"""
if not reviews:
print(" ⚠️ No reviews to analyze")
return self._empty_insights()
print(f"\n 📊 Analyzing batch of {len(reviews)} reviews...")
# Initialize counters
total = len(reviews)
# Sentiment distribution
sentiment_counts = Counter()
for review in reviews:
sentiment = review.get('stage3_final_sentiment', 'NEUTRAL')
sentiment_counts[sentiment] += 1
print(f" 📈 Sentiment: "
f"POS={sentiment_counts.get('POSITIVE', 0)}, "
f"NEU={sentiment_counts.get('NEUTRAL', 0)}, "
f"NEG={sentiment_counts.get('NEGATIVE', 0)}")
# Priority distribution
priority_counts = Counter()
for review in reviews:
priority = review.get('stage1_llm1_priority', 'unknown')
priority_counts[priority] += 1
print(f" 🎯 Priority: "
f"Critical={priority_counts.get('critical', 0)}, "
f"High={priority_counts.get('high', 0)}, "
f"Medium={priority_counts.get('medium', 0)}, "
f"Low={priority_counts.get('low', 0)}")
# Department routing
dept_counts = Counter()
for review in reviews:
dept = review.get('stage1_llm1_department', 'unknown')
dept_counts[dept] += 1
print(f" 🏢 Departments: "
f"Eng={dept_counts.get('engineering', 0)}, "
f"UX={dept_counts.get('ux', 0)}, "
f"Support={dept_counts.get('support', 0)}, "
f"Business={dept_counts.get('business', 0)}")
# Emotion distribution
emotion_counts = Counter()
for review in reviews:
emotion = review.get('stage1_llm2_emotion', 'unknown')
emotion_counts[emotion] += 1
# Review type distribution
type_counts = Counter()
for review in reviews:
review_type = review.get('stage1_llm1_type', 'unknown')
type_counts[review_type] += 1
# Identify critical issues
critical_issues = self._identify_critical_issues(reviews)
print(f" 🚨 Critical Issues: {len(critical_issues)}")
# Identify quick wins
quick_wins = self._identify_quick_wins(reviews)
print(f" ⚡ Quick Wins: {len(quick_wins)}")
# Calculate churn risk
churn_risk = self._calculate_churn_risk(reviews)
print(f" ⚠️ Churn Risk: {churn_risk:.1f}%")
# Model agreement rate
agreement_count = sum(1 for r in reviews if r.get('stage2_agreement', False))
agreement_rate = (agreement_count / total * 100) if total > 0 else 0
print(f" 🤝 Model Agreement: {agreement_rate:.1f}%")
# Generate recommendations
recommendations = self._generate_recommendations(
sentiment_counts, priority_counts, dept_counts,
critical_issues, quick_wins, churn_risk
)
# Compile batch insights
insights = {
'total_reviews': total,
# Sentiment
'sentiment_positive': sentiment_counts.get('POSITIVE', 0),
'sentiment_neutral': sentiment_counts.get('NEUTRAL', 0),
'sentiment_negative': sentiment_counts.get('NEGATIVE', 0),
'sentiment_distribution': dict(sentiment_counts),
# Priority
'priority_critical': priority_counts.get('critical', 0),
'priority_high': priority_counts.get('high', 0),
'priority_medium': priority_counts.get('medium', 0),
'priority_low': priority_counts.get('low', 0),
'priority_distribution': dict(priority_counts),
# Department
'dept_engineering': dept_counts.get('engineering', 0),
'dept_ux': dept_counts.get('ux', 0),
'dept_support': dept_counts.get('support', 0),
'dept_business': dept_counts.get('business', 0),
'department_distribution': dict(dept_counts),
# Additional insights
'emotion_distribution': dict(emotion_counts),
'type_distribution': dict(type_counts),
'model_agreement_rate': agreement_rate,
'churn_risk': churn_risk,
# Actionable lists
'critical_issues': critical_issues,
'quick_wins': quick_wins,
'recommendations': recommendations
}
return insights
def _identify_critical_issues(self, reviews: List[Dict]) -> List[Dict]:
"""Identify critical issues requiring immediate attention"""
critical = []
for review in reviews:
priority = review.get('stage1_llm1_priority', '')
sentiment = review.get('stage3_final_sentiment', '')
needs_review = review.get('stage3_needs_human_review', False)
if priority == 'critical' or (sentiment == 'NEGATIVE' and needs_review):
critical.append({
'review_id': review.get('review_id', 'unknown'),
'type': review.get('stage1_llm1_type', 'unknown'),
'department': review.get('stage1_llm1_department', 'unknown'),
'reasoning': review.get('stage3_reasoning', ''),
'action': review.get('stage3_action_recommendation', ''),
'rating': review.get('rating', 0)
})
# Sort by rating (lowest first)
critical.sort(key=lambda x: x['rating'])
return critical[:10] # Top 10 critical issues
def _identify_quick_wins(self, reviews: List[Dict]) -> List[Dict]:
"""Identify easy-to-fix issues for quick wins"""
quick_wins = []
for review in reviews:
review_type = review.get('stage1_llm1_type', '')
priority = review.get('stage1_llm1_priority', '')
sentiment = review.get('stage3_final_sentiment', '')
# Suggestions with low priority = quick wins
if review_type == 'suggestion' and priority in ['low', 'medium']:
quick_wins.append({
'review_id': review.get('review_id', 'unknown'),
'suggestion': review.get('review_text', '')[:100],
'department': review.get('stage1_llm1_department', 'unknown'),
'action': review.get('stage3_action_recommendation', ''),
'rating': review.get('rating', 0)
})
return quick_wins[:10] # Top 10 quick wins
def _calculate_churn_risk(self, reviews: List[Dict]) -> float:
"""Calculate overall churn risk percentage"""
if not reviews:
return 0.0
churn_indicators = 0
for review in reviews:
user_type = review.get('stage1_llm2_user_type', '')
sentiment = review.get('stage3_final_sentiment', '')
rating = review.get('rating', 3)
# Churn indicators
if user_type == 'churning_user':
churn_indicators += 2
elif sentiment == 'NEGATIVE' and rating <= 2:
churn_indicators += 1
elif rating == 1:
churn_indicators += 1
# Calculate percentage
max_possible = len(reviews) * 2
churn_risk = (churn_indicators / max_possible * 100) if max_possible > 0 else 0.0
return min(churn_risk, 100.0)
def _generate_recommendations(self, sentiment_counts, priority_counts,
dept_counts, critical_issues, quick_wins,
churn_risk) -> List[str]:
"""Generate actionable recommendations"""
recommendations = []
# Sentiment-based
total = sum(sentiment_counts.values())
if total > 0:
neg_pct = (sentiment_counts.get('NEGATIVE', 0) / total * 100)
if neg_pct > 40:
recommendations.append(
f"🚨 HIGH: {neg_pct:.0f}% negative sentiment. Immediate investigation needed."
)
elif neg_pct > 25:
recommendations.append(
f"⚠️ MEDIUM: {neg_pct:.0f}% negative sentiment. Monitor closely."
)
# Priority-based
if priority_counts.get('critical', 0) > 0:
recommendations.append(
f"🔥 URGENT: {priority_counts['critical']} critical issues require immediate attention."
)
# Department-based
if dept_counts:
top_dept = max(dept_counts, key=dept_counts.get)
top_count = dept_counts[top_dept]
recommendations.append(
f"🎯 FOCUS: {top_count} issues routed to {top_dept} department."
)
# Churn risk
if churn_risk > 30:
recommendations.append(
f"⚠️ CHURN: {churn_risk:.0f}% churn risk detected. Implement retention strategy."
)
# Quick wins
if quick_wins:
recommendations.append(
f"⚡ OPPORTUNITY: {len(quick_wins)} quick wins available for easy improvements."
)
return recommendations
def _empty_insights(self) -> Dict[str, Any]:
"""Return empty insights structure"""
return {
'total_reviews': 0,
'sentiment_positive': 0,
'sentiment_neutral': 0,
'sentiment_negative': 0,
'priority_critical': 0,
'priority_high': 0,
'priority_medium': 0,
'priority_low': 0,
'dept_engineering': 0,
'dept_ux': 0,
'dept_support': 0,
'dept_business': 0,
'critical_issues': [],
'quick_wins': [],
'recommendations': []
}
if __name__ == "__main__":
# Test Stage 4
print("\n" + "="*60)
print("🧪 TESTING STAGE 4 BATCH ANALYSIS")
print("="*60)
# Sample processed reviews
sample_reviews = [
{
'review_id': '001',
'review_text': 'App crashes!',
'rating': 1,
'stage1_llm1_type': 'bug_report',
'stage1_llm1_department': 'engineering',
'stage1_llm1_priority': 'critical',
'stage1_llm2_user_type': 'power_user',
'stage1_llm2_emotion': 'frustration',
'stage2_agreement': True,
'stage3_final_sentiment': 'NEGATIVE',
'stage3_needs_human_review': True,
'stage3_reasoning': 'Critical bug',
'stage3_action_recommendation': 'Fix immediately'
},
{
'review_id': '002',
'review_text': 'Great app!',
'rating': 5,
'stage1_llm1_type': 'praise',
'stage1_llm1_department': 'ux',
'stage1_llm1_priority': 'low',
'stage1_llm2_user_type': 'regular_user',
'stage1_llm2_emotion': 'joy',
'stage2_agreement': True,
'stage3_final_sentiment': 'POSITIVE',
'stage3_needs_human_review': False
}
]
stage4 = Stage4BatchAnalysis()
insights = stage4.analyze_batch(sample_reviews)
print("\n📊 BATCH INSIGHTS:")
print(json.dumps(insights, indent=2))
print("\n✅ Stage 4 test complete!")