pravinai commited on
Commit
3236276
·
verified ·
1 Parent(s): 0dbd30e

Add sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. sentiment_analyzer.py +950 -0
sentiment_analyzer.py ADDED
@@ -0,0 +1,950 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SentilensAI - Advanced Sentiment Analysis for AI Chatbot Messages
3
+
4
+ This module provides comprehensive sentiment analysis capabilities specifically designed
5
+ for analyzing AI chatbot conversations using LangChain integration and multiple ML models.
6
+
7
+ Features:
8
+ - Multi-model sentiment analysis (VADER, TextBlob, spaCy, Transformers)
9
+ - LangChain integration for intelligent conversation analysis
10
+ - Real-time sentiment tracking for chatbot interactions
11
+ - Advanced emotion detection and classification
12
+ - Context-aware sentiment analysis for conversational AI
13
+
14
+ Author: Pravin Selvamuthu
15
+ Repository: https://github.com/kernelseed/sentilens-ai
16
+ """
17
+
18
+ import re
19
+ import json
20
+ import logging
21
+ from typing import Dict, List, Tuple, Optional, Union, Any
22
+ from datetime import datetime
23
+ from dataclasses import dataclass
24
+ from pathlib import Path
25
+
26
+ import pandas as pd
27
+ import numpy as np
28
+ from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
29
+ from sklearn.preprocessing import LabelEncoder
30
+ from sklearn.model_selection import train_test_split
31
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
32
+ import joblib
33
+
34
+ # NLP Libraries
35
+ import nltk
36
+ from nltk.corpus import stopwords
37
+ from nltk.tokenize import word_tokenize, sent_tokenize
38
+ from nltk.stem import WordNetLemmatizer
39
+ from textblob import TextBlob
40
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
41
+
42
+ # LangChain Integration
43
+ from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
44
+ from langchain.prompts import PromptTemplate
45
+ from langchain.chains import LLMChain
46
+ from langchain_community.llms import OpenAI
47
+ from langchain_core.callbacks import BaseCallbackHandler
48
+ from langchain_core.output_parsers import BaseOutputParser
49
+
50
+ # Transformers for advanced sentiment analysis
51
+ try:
52
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
53
+ import torch
54
+ TRANSFORMERS_AVAILABLE = True
55
+ except ImportError:
56
+ TRANSFORMERS_AVAILABLE = False
57
+
58
+ # Multilingual support
59
+ try:
60
+ from multilingual_sentiment import MultilingualSentimentAnalyzer, MultilingualSentimentResult
61
+ MULTILINGUAL_AVAILABLE = True
62
+ except ImportError:
63
+ MULTILINGUAL_AVAILABLE = False
64
+
65
+ # spaCy for advanced NLP
66
+ try:
67
+ import spacy
68
+ SPACY_AVAILABLE = True
69
+ except ImportError:
70
+ SPACY_AVAILABLE = False
71
+
72
+ # Download required NLTK data
73
+ try:
74
+ nltk.download('punkt', quiet=True)
75
+ nltk.download('stopwords', quiet=True)
76
+ nltk.download('wordnet', quiet=True)
77
+ nltk.download('vader_lexicon', quiet=True)
78
+ except:
79
+ pass
80
+
81
+ # Configure logging
82
+ logging.basicConfig(level=logging.INFO)
83
+ logger = logging.getLogger(__name__)
84
+
85
+
86
+ @dataclass
87
+ class SentimentResult:
88
+ """Data class for sentiment analysis results"""
89
+ text: str
90
+ sentiment: str # positive, negative, neutral
91
+ confidence: float
92
+ polarity: float # -1 to 1
93
+ subjectivity: float # 0 to 1
94
+ emotions: Dict[str, float]
95
+ timestamp: datetime
96
+ model_used: str
97
+ metadata: Dict[str, Any]
98
+
99
+
100
+ @dataclass
101
+ class ChatbotMessage:
102
+ """Data class for chatbot message analysis"""
103
+ message_id: str
104
+ user_message: str
105
+ bot_response: str
106
+ timestamp: datetime
107
+ conversation_id: str
108
+ user_sentiment: SentimentResult
109
+ bot_sentiment: SentimentResult
110
+ conversation_sentiment: str
111
+ satisfaction_score: float
112
+
113
+
114
+ class SentimentOutputParser(BaseOutputParser):
115
+ """Custom output parser for LangChain sentiment analysis"""
116
+
117
+ def parse(self, text: str) -> Dict[str, Any]:
118
+ """Parse sentiment analysis output from LLM"""
119
+ try:
120
+ # Try to parse as JSON first
121
+ if text.strip().startswith('{'):
122
+ return json.loads(text)
123
+
124
+ # Extract sentiment information using regex
125
+ sentiment_match = re.search(r'sentiment["\']?\s*:\s*["\']?(\w+)', text, re.IGNORECASE)
126
+ confidence_match = re.search(r'confidence["\']?\s*:\s*([0-9.]+)', text, re.IGNORECASE)
127
+ polarity_match = re.search(r'polarity["\']?\s*:\s*([-0-9.]+)', text, re.IGNORECASE)
128
+
129
+ result = {
130
+ 'sentiment': sentiment_match.group(1).lower() if sentiment_match else 'neutral',
131
+ 'confidence': float(confidence_match.group(1)) if confidence_match else 0.5,
132
+ 'polarity': float(polarity_match.group(1)) if polarity_match else 0.0,
133
+ 'raw_output': text
134
+ }
135
+
136
+ return result
137
+ except Exception as e:
138
+ logger.warning(f"Failed to parse sentiment output: {e}")
139
+ return {
140
+ 'sentiment': 'neutral',
141
+ 'confidence': 0.5,
142
+ 'polarity': 0.0,
143
+ 'raw_output': text
144
+ }
145
+
146
+
147
+ class SentilensAIAnalyzer:
148
+ """
149
+ Advanced sentiment analysis for AI chatbot messages using multiple models and LangChain
150
+ """
151
+
152
+ def __init__(self, openai_api_key: Optional[str] = None, model_cache_dir: str = "./model_cache",
153
+ enable_multilingual: bool = True):
154
+ """
155
+ Initialize the SentimentsAI analyzer
156
+
157
+ Args:
158
+ openai_api_key: OpenAI API key for LangChain integration
159
+ model_cache_dir: Directory to cache downloaded models
160
+ enable_multilingual: Enable multilingual support for English, Spanish, and Chinese
161
+ """
162
+ self.model_cache_dir = Path(model_cache_dir)
163
+ self.model_cache_dir.mkdir(exist_ok=True)
164
+
165
+ # Multilingual support
166
+ self.enable_multilingual = enable_multilingual and MULTILINGUAL_AVAILABLE
167
+ if self.enable_multilingual:
168
+ try:
169
+ self.multilingual_analyzer = MultilingualSentimentAnalyzer()
170
+ logger.info("✅ Multilingual support enabled (English, Spanish, Chinese)")
171
+ except Exception as e:
172
+ logger.warning(f"Failed to initialize multilingual analyzer: {e}")
173
+ self.enable_multilingual = False
174
+ else:
175
+ self.multilingual_analyzer = None
176
+
177
+ # Initialize sentiment analyzers
178
+ self.vader_analyzer = SentimentIntensityAnalyzer()
179
+ self.lemmatizer = WordNetLemmatizer()
180
+
181
+ # Load stopwords
182
+ try:
183
+ self.stop_words = set(stopwords.words('english'))
184
+ except:
185
+ self.stop_words = set()
186
+
187
+ # Initialize spaCy model
188
+ self.spacy_model = None
189
+ if SPACY_AVAILABLE:
190
+ try:
191
+ self.spacy_model = spacy.load("en_core_web_sm")
192
+ except OSError:
193
+ logger.warning("spaCy model 'en_core_web_sm' not found. Install with: python -m spacy download en_core_web_sm")
194
+
195
+ # Initialize transformers pipeline
196
+ self.transformers_pipeline = None
197
+ if TRANSFORMERS_AVAILABLE:
198
+ try:
199
+ self.transformers_pipeline = pipeline(
200
+ "sentiment-analysis",
201
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
202
+ cache_dir=self.model_cache_dir
203
+ )
204
+ except Exception as e:
205
+ logger.warning(f"Failed to load transformers pipeline: {e}")
206
+
207
+ # Initialize LangChain components
208
+ self.llm = None
209
+ self.sentiment_chain = None
210
+ if openai_api_key:
211
+ try:
212
+ self.llm = OpenAI(api_key=openai_api_key, temperature=0.1)
213
+ self._setup_langchain_components()
214
+ except Exception as e:
215
+ logger.warning(f"Failed to initialize OpenAI LLM: {e}")
216
+
217
+ # Emotion detection patterns
218
+ self.emotion_patterns = {
219
+ 'joy': [r'\b(happy|joy|excited|great|wonderful|amazing|fantastic|love|adore)\b'],
220
+ 'sadness': [r'\b(sad|depressed|upset|disappointed|hurt|grief|sorrow)\b'],
221
+ 'anger': [r'\b(angry|mad|furious|rage|annoyed|irritated|frustrated)\b'],
222
+ 'fear': [r'\b(scared|afraid|worried|anxious|nervous|terrified|panic)\b'],
223
+ 'surprise': [r'\b(surprised|shocked|amazed|wow|incredible|unbelievable)\b'],
224
+ 'disgust': [r'\b(disgusted|revolted|sick|gross|nasty|awful|terrible)\b']
225
+ }
226
+
227
+ def _setup_langchain_components(self):
228
+ """Setup LangChain components for sentiment analysis"""
229
+ if not self.llm:
230
+ return
231
+
232
+ # Create sentiment analysis prompt template
233
+ sentiment_prompt = PromptTemplate(
234
+ input_variables=["text", "context"],
235
+ template="""
236
+ Analyze the sentiment of the following text from an AI chatbot conversation.
237
+ Consider the context of the conversation and provide a detailed sentiment analysis.
238
+
239
+ Text: "{text}"
240
+ Context: "{context}"
241
+
242
+ Please provide your analysis in the following JSON format:
243
+ {{
244
+ "sentiment": "positive|negative|neutral",
245
+ "confidence": 0.0-1.0,
246
+ "polarity": -1.0 to 1.0,
247
+ "reasoning": "Brief explanation of your analysis",
248
+ "emotions": {{
249
+ "joy": 0.0-1.0,
250
+ "sadness": 0.0-1.0,
251
+ "anger": 0.0-1.0,
252
+ "fear": 0.0-1.0,
253
+ "surprise": 0.0-1.0,
254
+ "disgust": 0.0-1.0
255
+ }}
256
+ }}
257
+ """
258
+ )
259
+
260
+ # Create the sentiment analysis chain
261
+ self.sentiment_chain = LLMChain(
262
+ llm=self.llm,
263
+ prompt=sentiment_prompt,
264
+ output_parser=SentimentOutputParser()
265
+ )
266
+
267
+ def preprocess_text(self, text: str) -> str:
268
+ """
269
+ Preprocess text for sentiment analysis
270
+
271
+ Args:
272
+ text: Input text to preprocess
273
+
274
+ Returns:
275
+ Preprocessed text
276
+ """
277
+ if not text:
278
+ return ""
279
+
280
+ # Convert to lowercase
281
+ text = text.lower()
282
+
283
+ # Remove URLs, mentions, and hashtags
284
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
285
+ text = re.sub(r'@\w+|#\w+', '', text)
286
+
287
+ # Remove extra whitespace
288
+ text = re.sub(r'\s+', ' ', text).strip()
289
+
290
+ # Remove special characters but keep basic punctuation
291
+ text = re.sub(r'[^\w\s\.\!\?\,\;\:]', '', text)
292
+
293
+ return text
294
+
295
+ def extract_emotions(self, text: str) -> Dict[str, float]:
296
+ """
297
+ Extract emotion scores from text using pattern matching
298
+
299
+ Args:
300
+ text: Input text
301
+
302
+ Returns:
303
+ Dictionary of emotion scores
304
+ """
305
+ emotions = {emotion: 0.0 for emotion in self.emotion_patterns.keys()}
306
+
307
+ for emotion, patterns in self.emotion_patterns.items():
308
+ for pattern in patterns:
309
+ matches = re.findall(pattern, text, re.IGNORECASE)
310
+ emotions[emotion] += len(matches) * 0.1 # Simple scoring
311
+
312
+ # Normalize scores
313
+ total_score = sum(emotions.values())
314
+ if total_score > 0:
315
+ emotions = {k: min(v / total_score, 1.0) for k, v in emotions.items()}
316
+
317
+ return emotions
318
+
319
+ def analyze_with_vader(self, text: str) -> Dict[str, Any]:
320
+ """Analyze sentiment using VADER"""
321
+ scores = self.vader_analyzer.polarity_scores(text)
322
+
323
+ # Determine sentiment
324
+ if scores['compound'] >= 0.05:
325
+ sentiment = 'positive'
326
+ elif scores['compound'] <= -0.05:
327
+ sentiment = 'negative'
328
+ else:
329
+ sentiment = 'neutral'
330
+
331
+ return {
332
+ 'sentiment': sentiment,
333
+ 'confidence': abs(scores['compound']),
334
+ 'polarity': scores['compound'],
335
+ 'subjectivity': 0.5, # VADER doesn't provide subjectivity
336
+ 'scores': scores
337
+ }
338
+
339
+ def analyze_with_textblob(self, text: str) -> Dict[str, Any]:
340
+ """Analyze sentiment using TextBlob"""
341
+ blob = TextBlob(text)
342
+
343
+ # Determine sentiment
344
+ if blob.sentiment.polarity > 0.1:
345
+ sentiment = 'positive'
346
+ elif blob.sentiment.polarity < -0.1:
347
+ sentiment = 'negative'
348
+ else:
349
+ sentiment = 'neutral'
350
+
351
+ return {
352
+ 'sentiment': sentiment,
353
+ 'confidence': abs(blob.sentiment.polarity),
354
+ 'polarity': blob.sentiment.polarity,
355
+ 'subjectivity': blob.sentiment.subjectivity
356
+ }
357
+
358
+ def analyze_with_spacy(self, text: str) -> Dict[str, Any]:
359
+ """Analyze sentiment using spaCy (if available)"""
360
+ if not self.spacy_model:
361
+ return self.analyze_with_textblob(text) # Fallback
362
+
363
+ doc = self.spacy_model(text)
364
+
365
+ # Simple sentiment analysis using spaCy's token attributes
366
+ positive_words = 0
367
+ negative_words = 0
368
+ total_words = 0
369
+
370
+ for token in doc:
371
+ if not token.is_stop and not token.is_punct and token.is_alpha:
372
+ total_words += 1
373
+ # Simple heuristic based on word sentiment
374
+ if token.lemma_.lower() in ['good', 'great', 'excellent', 'amazing', 'wonderful']:
375
+ positive_words += 1
376
+ elif token.lemma_.lower() in ['bad', 'terrible', 'awful', 'horrible', 'worst']:
377
+ negative_words += 1
378
+
379
+ if total_words == 0:
380
+ polarity = 0.0
381
+ else:
382
+ polarity = (positive_words - negative_words) / total_words
383
+
384
+ # Determine sentiment
385
+ if polarity > 0.1:
386
+ sentiment = 'positive'
387
+ elif polarity < -0.1:
388
+ sentiment = 'negative'
389
+ else:
390
+ sentiment = 'neutral'
391
+
392
+ return {
393
+ 'sentiment': sentiment,
394
+ 'confidence': abs(polarity),
395
+ 'polarity': polarity,
396
+ 'subjectivity': 0.5 # spaCy doesn't provide subjectivity
397
+ }
398
+
399
+ def analyze_with_transformers(self, text: str) -> Dict[str, Any]:
400
+ """Analyze sentiment using Transformers (if available)"""
401
+ if not self.transformers_pipeline:
402
+ return self.analyze_with_textblob(text) # Fallback
403
+
404
+ try:
405
+ result = self.transformers_pipeline(text)[0]
406
+
407
+ # Map transformer labels to our format
408
+ label_mapping = {
409
+ 'LABEL_0': 'negative',
410
+ 'LABEL_1': 'neutral',
411
+ 'LABEL_2': 'positive'
412
+ }
413
+
414
+ sentiment = label_mapping.get(result['label'], 'neutral')
415
+ confidence = result['score']
416
+
417
+ # Estimate polarity from confidence and sentiment
418
+ if sentiment == 'positive':
419
+ polarity = confidence
420
+ elif sentiment == 'negative':
421
+ polarity = -confidence
422
+ else:
423
+ polarity = 0.0
424
+
425
+ return {
426
+ 'sentiment': sentiment,
427
+ 'confidence': confidence,
428
+ 'polarity': polarity,
429
+ 'subjectivity': 0.5 # Transformers don't provide subjectivity
430
+ }
431
+ except Exception as e:
432
+ logger.warning(f"Transformers analysis failed: {e}")
433
+ return self.analyze_with_textblob(text) # Fallback
434
+
435
+ def analyze_with_langchain(self, text: str, context: str = "") -> Dict[str, Any]:
436
+ """Analyze sentiment using LangChain and LLM"""
437
+ if not self.sentiment_chain:
438
+ return self.analyze_with_textblob(text) # Fallback
439
+
440
+ try:
441
+ result = self.sentiment_chain.run(text=text, context=context)
442
+
443
+ # Ensure we have the required fields
444
+ if not isinstance(result, dict):
445
+ result = {'sentiment': 'neutral', 'confidence': 0.5, 'polarity': 0.0}
446
+
447
+ # Validate and normalize the result
448
+ sentiment = result.get('sentiment', 'neutral')
449
+ if sentiment not in ['positive', 'negative', 'neutral']:
450
+ sentiment = 'neutral'
451
+
452
+ confidence = max(0.0, min(1.0, float(result.get('confidence', 0.5))))
453
+ polarity = max(-1.0, min(1.0, float(result.get('polarity', 0.0))))
454
+
455
+ # Extract emotions if available
456
+ emotions = result.get('emotions', {})
457
+ if not isinstance(emotions, dict):
458
+ emotions = self.extract_emotions(text)
459
+
460
+ return {
461
+ 'sentiment': sentiment,
462
+ 'confidence': confidence,
463
+ 'polarity': polarity,
464
+ 'subjectivity': 0.5, # LLM doesn't provide subjectivity
465
+ 'emotions': emotions,
466
+ 'reasoning': result.get('reasoning', '')
467
+ }
468
+ except Exception as e:
469
+ logger.warning(f"LangChain analysis failed: {e}")
470
+ return self.analyze_with_textblob(text) # Fallback
471
+
472
+ def analyze_sentiment(self, text: str, method: str = 'ensemble', context: str = "") -> SentimentResult:
473
+ """
474
+ Analyze sentiment using specified method
475
+
476
+ Args:
477
+ text: Text to analyze
478
+ method: Analysis method ('vader', 'textblob', 'spacy', 'transformers', 'langchain', 'ensemble')
479
+ context: Additional context for analysis
480
+
481
+ Returns:
482
+ SentimentResult object
483
+ """
484
+ if not text or not text.strip():
485
+ return SentimentResult(
486
+ text=text,
487
+ sentiment='neutral',
488
+ confidence=0.0,
489
+ polarity=0.0,
490
+ subjectivity=0.0,
491
+ emotions={},
492
+ timestamp=datetime.now(),
493
+ model_used=method,
494
+ metadata={}
495
+ )
496
+
497
+ # Preprocess text
498
+ processed_text = self.preprocess_text(text)
499
+
500
+ if method == 'ensemble':
501
+ # Use ensemble of all available methods
502
+ results = []
503
+
504
+ # VADER
505
+ vader_result = self.analyze_with_vader(processed_text)
506
+ results.append(vader_result)
507
+
508
+ # TextBlob
509
+ textblob_result = self.analyze_with_textblob(processed_text)
510
+ results.append(textblob_result)
511
+
512
+ # spaCy
513
+ spacy_result = self.analyze_with_spacy(processed_text)
514
+ results.append(spacy_result)
515
+
516
+ # Transformers
517
+ if self.transformers_pipeline:
518
+ transformers_result = self.analyze_with_transformers(processed_text)
519
+ results.append(transformers_result)
520
+
521
+ # LangChain
522
+ if self.sentiment_chain:
523
+ langchain_result = self.analyze_with_langchain(processed_text, context)
524
+ results.append(langchain_result)
525
+
526
+ # Ensemble voting
527
+ sentiment_votes = [r['sentiment'] for r in results]
528
+ sentiment_counts = {s: sentiment_votes.count(s) for s in set(sentiment_votes)}
529
+ final_sentiment = max(sentiment_counts, key=sentiment_counts.get)
530
+
531
+ # Average confidence and polarity
532
+ avg_confidence = np.mean([r['confidence'] for r in results])
533
+ avg_polarity = np.mean([r['polarity'] for r in results])
534
+ avg_subjectivity = np.mean([r.get('subjectivity', 0.5) for r in results])
535
+
536
+ # Combine emotions
537
+ all_emotions = {}
538
+ for result in results:
539
+ if 'emotions' in result:
540
+ for emotion, score in result['emotions'].items():
541
+ all_emotions[emotion] = all_emotions.get(emotion, 0) + score
542
+ emotions = {k: v / len(results) for k, v in all_emotions.items()}
543
+
544
+ if not emotions:
545
+ emotions = self.extract_emotions(processed_text)
546
+
547
+ final_result = {
548
+ 'sentiment': final_sentiment,
549
+ 'confidence': avg_confidence,
550
+ 'polarity': avg_polarity,
551
+ 'subjectivity': avg_subjectivity,
552
+ 'emotions': emotions
553
+ }
554
+
555
+ else:
556
+ # Use specific method
557
+ if method == 'vader':
558
+ final_result = self.analyze_with_vader(processed_text)
559
+ elif method == 'textblob':
560
+ final_result = self.analyze_with_textblob(processed_text)
561
+ elif method == 'spacy':
562
+ final_result = self.analyze_with_spacy(processed_text)
563
+ elif method == 'transformers':
564
+ final_result = self.analyze_with_transformers(processed_text)
565
+ elif method == 'langchain':
566
+ final_result = self.analyze_with_langchain(processed_text, context)
567
+ else:
568
+ raise ValueError(f"Unknown method: {method}")
569
+
570
+ # Extract emotions if not provided
571
+ if 'emotions' not in final_result:
572
+ final_result['emotions'] = self.extract_emotions(processed_text)
573
+
574
+ return SentimentResult(
575
+ text=text,
576
+ sentiment=final_result['sentiment'],
577
+ confidence=final_result['confidence'],
578
+ polarity=final_result['polarity'],
579
+ subjectivity=final_result.get('subjectivity', 0.5),
580
+ emotions=final_result['emotions'],
581
+ timestamp=datetime.now(),
582
+ model_used=method,
583
+ metadata=final_result
584
+ )
585
+
586
+ def analyze_sentiment_multilingual(self, text: str, target_language: Optional[str] = None,
587
+ enable_cross_language: bool = False) -> MultilingualSentimentResult:
588
+ """
589
+ Analyze sentiment with multilingual support (English, Spanish, Chinese)
590
+
591
+ Args:
592
+ text: Text to analyze
593
+ target_language: Specific language to use ('en', 'es', 'zh') or None for auto-detection
594
+ enable_cross_language: Enable cross-language consensus analysis
595
+
596
+ Returns:
597
+ MultilingualSentimentResult object
598
+ """
599
+ if not self.enable_multilingual or not self.multilingual_analyzer:
600
+ # Fallback to regular analysis
601
+ regular_result = self.analyze_sentiment(text, method='ensemble')
602
+ return MultilingualSentimentResult(
603
+ text=text,
604
+ detected_language='en',
605
+ language_confidence=0.5,
606
+ sentiment=regular_result.sentiment,
607
+ confidence=regular_result.confidence,
608
+ emotions=regular_result.emotions,
609
+ methods_used=[regular_result.model_used],
610
+ language_specific_analysis={'fallback': True}
611
+ )
612
+
613
+ return self.multilingual_analyzer.analyze_sentiment_multilingual(
614
+ text, target_language, enable_cross_language
615
+ )
616
+
617
+ def analyze_conversation_multilingual(self, conversation: Dict[str, Any]) -> Dict[str, Any]:
618
+ """
619
+ Analyze a conversation with multilingual support
620
+
621
+ Args:
622
+ conversation: Conversation dictionary with messages
623
+
624
+ Returns:
625
+ Dictionary with multilingual analysis results
626
+ """
627
+ if not self.enable_multilingual or not self.multilingual_analyzer:
628
+ # Fallback to regular analysis
629
+ messages = conversation.get('messages', [])
630
+ regular_results = []
631
+ for msg in messages:
632
+ user_text = msg.get('user', '')
633
+ bot_text = msg.get('bot', '')
634
+ if user_text:
635
+ regular_results.append(self.analyze_sentiment(user_text))
636
+ if bot_text:
637
+ regular_results.append(self.analyze_sentiment(bot_text))
638
+ return {'fallback': True, 'results': regular_results}
639
+
640
+ return self.multilingual_analyzer.analyze_conversation_multilingual(conversation)
641
+
642
+ def get_supported_languages(self) -> List[str]:
643
+ """Get list of supported languages for multilingual analysis"""
644
+ if self.enable_multilingual and self.multilingual_analyzer:
645
+ return self.multilingual_analyzer.get_supported_languages()
646
+ return ['en'] # Default to English only
647
+
648
+ def get_language_name(self, language_code: str) -> str:
649
+ """Get human-readable language name"""
650
+ if self.enable_multilingual and self.multilingual_analyzer:
651
+ return self.multilingual_analyzer.get_language_name(language_code)
652
+ return {'en': 'English'}.get(language_code, language_code)
653
+
654
+ def analyze_chatbot_conversation(self, messages: List[Dict[str, Any]]) -> List[ChatbotMessage]:
655
+ """
656
+ Analyze a complete chatbot conversation
657
+
658
+ Args:
659
+ messages: List of message dictionaries with 'user', 'bot', 'timestamp', 'conversation_id'
660
+
661
+ Returns:
662
+ List of ChatbotMessage objects
663
+ """
664
+ results = []
665
+
666
+ for i, msg in enumerate(messages):
667
+ user_text = msg.get('user', '')
668
+ bot_text = msg.get('bot', '')
669
+ timestamp = msg.get('timestamp', datetime.now())
670
+ conversation_id = msg.get('conversation_id', f'conv_{i}')
671
+ message_id = msg.get('message_id', f'{conversation_id}_{i}')
672
+
673
+ # Analyze user message
674
+ user_sentiment = self.analyze_sentiment(user_text, method='ensemble')
675
+
676
+ # Analyze bot response
677
+ bot_sentiment = self.analyze_sentiment(bot_text, method='ensemble', context=user_text)
678
+
679
+ # Determine overall conversation sentiment
680
+ if user_sentiment.sentiment == bot_sentiment.sentiment:
681
+ conversation_sentiment = user_sentiment.sentiment
682
+ else:
683
+ # Use weighted average based on confidence
684
+ user_weight = user_sentiment.confidence
685
+ bot_weight = bot_sentiment.confidence
686
+ total_weight = user_weight + bot_weight
687
+
688
+ if total_weight > 0:
689
+ user_polarity_weighted = user_sentiment.polarity * (user_weight / total_weight)
690
+ bot_polarity_weighted = bot_sentiment.polarity * (bot_weight / total_weight)
691
+ combined_polarity = user_polarity_weighted + bot_polarity_weighted
692
+
693
+ if combined_polarity > 0.1:
694
+ conversation_sentiment = 'positive'
695
+ elif combined_polarity < -0.1:
696
+ conversation_sentiment = 'negative'
697
+ else:
698
+ conversation_sentiment = 'neutral'
699
+ else:
700
+ conversation_sentiment = 'neutral'
701
+
702
+ # Calculate satisfaction score (0-1)
703
+ satisfaction_score = self._calculate_satisfaction_score(user_sentiment, bot_sentiment)
704
+
705
+ chatbot_message = ChatbotMessage(
706
+ message_id=message_id,
707
+ user_message=user_text,
708
+ bot_response=bot_text,
709
+ timestamp=timestamp,
710
+ conversation_id=conversation_id,
711
+ user_sentiment=user_sentiment,
712
+ bot_sentiment=bot_sentiment,
713
+ conversation_sentiment=conversation_sentiment,
714
+ satisfaction_score=satisfaction_score
715
+ )
716
+
717
+ results.append(chatbot_message)
718
+
719
+ return results
720
+
721
+ def _calculate_satisfaction_score(self, user_sentiment: SentimentResult, bot_sentiment: SentimentResult) -> float:
722
+ """Calculate satisfaction score based on sentiment alignment"""
723
+ # Base score from user sentiment
724
+ base_score = (user_sentiment.polarity + 1) / 2 # Convert -1,1 to 0,1
725
+
726
+ # Adjust based on bot response sentiment
727
+ if user_sentiment.sentiment == 'positive' and bot_sentiment.sentiment == 'positive':
728
+ adjustment = 0.2
729
+ elif user_sentiment.sentiment == 'negative' and bot_sentiment.sentiment == 'positive':
730
+ adjustment = 0.3 # Bot being positive to negative user is good
731
+ elif user_sentiment.sentiment == 'neutral' and bot_sentiment.sentiment == 'positive':
732
+ adjustment = 0.1
733
+ else:
734
+ adjustment = -0.1
735
+
736
+ # Factor in confidence
737
+ confidence_factor = (user_sentiment.confidence + bot_sentiment.confidence) / 2
738
+
739
+ final_score = base_score + adjustment
740
+ final_score = max(0.0, min(1.0, final_score)) # Clamp to 0-1
741
+
742
+ return final_score * confidence_factor
743
+
744
+ def get_sentiment_summary(self, results: List[SentimentResult]) -> Dict[str, Any]:
745
+ """Get summary statistics for sentiment analysis results"""
746
+ if not results:
747
+ return {}
748
+
749
+ sentiments = [r.sentiment for r in results]
750
+ confidences = [r.confidence for r in results]
751
+ polarities = [r.polarity for r in results]
752
+
753
+ sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)}
754
+ total = len(sentiments)
755
+
756
+ return {
757
+ 'total_messages': total,
758
+ 'sentiment_distribution': {k: v/total for k, v in sentiment_counts.items()},
759
+ 'average_confidence': np.mean(confidences),
760
+ 'average_polarity': np.mean(polarities),
761
+ 'sentiment_trend': sentiments,
762
+ 'confidence_trend': confidences,
763
+ 'polarity_trend': polarities
764
+ }
765
+
766
+ def export_results(self, results: List[Union[SentimentResult, ChatbotMessage]],
767
+ filename: str, format: str = 'json') -> str:
768
+ """
769
+ Export analysis results to file
770
+
771
+ Args:
772
+ results: List of analysis results
773
+ filename: Output filename
774
+ format: Export format ('json', 'csv', 'excel')
775
+
776
+ Returns:
777
+ Path to exported file
778
+ """
779
+ output_path = Path(filename)
780
+
781
+ if format == 'json':
782
+ # Convert results to dictionaries
783
+ data = []
784
+ for result in results:
785
+ if isinstance(result, SentimentResult):
786
+ data.append({
787
+ 'text': result.text,
788
+ 'sentiment': result.sentiment,
789
+ 'confidence': result.confidence,
790
+ 'polarity': result.polarity,
791
+ 'subjectivity': result.subjectivity,
792
+ 'emotions': result.emotions,
793
+ 'timestamp': result.timestamp.isoformat(),
794
+ 'model_used': result.model_used
795
+ })
796
+ elif isinstance(result, ChatbotMessage):
797
+ data.append({
798
+ 'message_id': result.message_id,
799
+ 'user_message': result.user_message,
800
+ 'bot_response': result.bot_response,
801
+ 'timestamp': result.timestamp.isoformat(),
802
+ 'conversation_id': result.conversation_id,
803
+ 'user_sentiment': result.user_sentiment.sentiment,
804
+ 'user_confidence': result.user_sentiment.confidence,
805
+ 'user_polarity': result.user_sentiment.polarity,
806
+ 'bot_sentiment': result.bot_sentiment.sentiment,
807
+ 'bot_confidence': result.bot_sentiment.confidence,
808
+ 'bot_polarity': result.bot_sentiment.polarity,
809
+ 'conversation_sentiment': result.conversation_sentiment,
810
+ 'satisfaction_score': result.satisfaction_score
811
+ })
812
+
813
+ with open(output_path, 'w', encoding='utf-8') as f:
814
+ json.dump(data, f, indent=2, ensure_ascii=False)
815
+
816
+ elif format == 'csv':
817
+ # Convert to DataFrame and save as CSV
818
+ data = []
819
+ for result in results:
820
+ if isinstance(result, SentimentResult):
821
+ data.append({
822
+ 'text': result.text,
823
+ 'sentiment': result.sentiment,
824
+ 'confidence': result.confidence,
825
+ 'polarity': result.polarity,
826
+ 'subjectivity': result.subjectivity,
827
+ 'timestamp': result.timestamp.isoformat(),
828
+ 'model_used': result.model_used
829
+ })
830
+ elif isinstance(result, ChatbotMessage):
831
+ data.append({
832
+ 'message_id': result.message_id,
833
+ 'user_message': result.user_message,
834
+ 'bot_response': result.bot_response,
835
+ 'timestamp': result.timestamp.isoformat(),
836
+ 'conversation_id': result.conversation_id,
837
+ 'user_sentiment': result.user_sentiment.sentiment,
838
+ 'user_confidence': result.user_sentiment.confidence,
839
+ 'bot_sentiment': result.bot_sentiment.sentiment,
840
+ 'bot_confidence': result.bot_sentiment.confidence,
841
+ 'conversation_sentiment': result.conversation_sentiment,
842
+ 'satisfaction_score': result.satisfaction_score
843
+ })
844
+
845
+ df = pd.DataFrame(data)
846
+ df.to_csv(output_path, index=False, encoding='utf-8')
847
+
848
+ elif format == 'excel':
849
+ # Convert to DataFrame and save as Excel
850
+ data = []
851
+ for result in results:
852
+ if isinstance(result, SentimentResult):
853
+ data.append({
854
+ 'text': result.text,
855
+ 'sentiment': result.sentiment,
856
+ 'confidence': result.confidence,
857
+ 'polarity': result.polarity,
858
+ 'subjectivity': result.subjectivity,
859
+ 'timestamp': result.timestamp.isoformat(),
860
+ 'model_used': result.model_used
861
+ })
862
+ elif isinstance(result, ChatbotMessage):
863
+ data.append({
864
+ 'message_id': result.message_id,
865
+ 'user_message': result.user_message,
866
+ 'bot_response': result.bot_response,
867
+ 'timestamp': result.timestamp.isoformat(),
868
+ 'conversation_id': result.conversation_id,
869
+ 'user_sentiment': result.user_sentiment.sentiment,
870
+ 'user_confidence': result.user_sentiment.confidence,
871
+ 'bot_sentiment': result.bot_sentiment.sentiment,
872
+ 'bot_confidence': result.bot_sentiment.confidence,
873
+ 'conversation_sentiment': result.conversation_sentiment,
874
+ 'satisfaction_score': result.satisfaction_score
875
+ })
876
+
877
+ df = pd.DataFrame(data)
878
+ df.to_excel(output_path, index=False, engine='openpyxl')
879
+
880
+ else:
881
+ raise ValueError(f"Unsupported format: {format}")
882
+
883
+ return str(output_path)
884
+
885
+
886
+ def main():
887
+ """Demo function to showcase SentimentsAI capabilities"""
888
+ print("🤖 SentilensAI - Advanced Sentiment Analysis for AI Chatbot Messages")
889
+ print("=" * 70)
890
+
891
+ # Initialize analyzer
892
+ analyzer = SentilensAIAnalyzer()
893
+
894
+ # Sample chatbot messages
895
+ sample_messages = [
896
+ {
897
+ 'user': 'I love this chatbot! It\'s so helpful and friendly.',
898
+ 'bot': 'Thank you so much! I\'m thrilled to hear that you\'re having a great experience. Is there anything else I can help you with today?',
899
+ 'timestamp': datetime.now(),
900
+ 'conversation_id': 'demo_001'
901
+ },
902
+ {
903
+ 'user': 'This is terrible. The bot keeps giving me wrong answers.',
904
+ 'bot': 'I apologize for the confusion. Let me help you get the correct information. Could you please provide more details about what you\'re looking for?',
905
+ 'timestamp': datetime.now(),
906
+ 'conversation_id': 'demo_002'
907
+ },
908
+ {
909
+ 'user': 'Can you help me with my account balance?',
910
+ 'bot': 'Of course! I\'d be happy to help you check your account balance. Please provide your account number or login credentials.',
911
+ 'timestamp': datetime.now(),
912
+ 'conversation_id': 'demo_003'
913
+ }
914
+ ]
915
+
916
+ print("\n📊 Analyzing sample chatbot conversations...")
917
+
918
+ # Analyze conversations
919
+ results = analyzer.analyze_chatbot_conversation(sample_messages)
920
+
921
+ # Display results
922
+ for i, result in enumerate(results, 1):
923
+ print(f"\n--- Conversation {i} ---")
924
+ print(f"User: {result.user_message}")
925
+ print(f"Bot: {result.bot_response}")
926
+ print(f"User Sentiment: {result.user_sentiment.sentiment} (confidence: {result.user_sentiment.confidence:.2f})")
927
+ print(f"Bot Sentiment: {result.bot_sentiment.sentiment} (confidence: {result.bot_sentiment.confidence:.2f})")
928
+ print(f"Conversation Sentiment: {result.conversation_sentiment}")
929
+ print(f"Satisfaction Score: {result.satisfaction_score:.2f}")
930
+
931
+ # Get summary
932
+ sentiment_results = [r.user_sentiment for r in results] + [r.bot_sentiment for r in results]
933
+ summary = analyzer.get_sentiment_summary(sentiment_results)
934
+
935
+ print(f"\n📈 Summary Statistics:")
936
+ print(f"Total Messages: {summary['total_messages']}")
937
+ print(f"Sentiment Distribution: {summary['sentiment_distribution']}")
938
+ print(f"Average Confidence: {summary['average_confidence']:.2f}")
939
+ print(f"Average Polarity: {summary['average_polarity']:.2f}")
940
+
941
+ # Export results
942
+ output_file = analyzer.export_results(results, 'sentiment_analysis_results.json', 'json')
943
+ print(f"\n💾 Results exported to: {output_file}")
944
+
945
+ print("\n✅ SentilensAI demo completed successfully!")
946
+ print("🚀 Ready for production use with LangChain and ML models!")
947
+
948
+
949
+ if __name__ == "__main__":
950
+ main()