pythonprincess commited on
Commit
f2ac8aa
·
verified ·
1 Parent(s): 1fe9570

Delete sentiment_utils.py

Browse files
Files changed (1) hide show
  1. sentiment_utils.py +0 -396
sentiment_utils.py DELETED
@@ -1,396 +0,0 @@
1
- # models/sentiment/sentiment_utils.py
2
-
3
- """
4
- Sentiment Analysis Model Utilities for PENNY Project
5
- Handles text sentiment classification for user input analysis and content moderation.
6
- Provides async sentiment analysis with structured error handling and logging.
7
- """
8
-
9
- import asyncio
10
- import time
11
- from typing import Dict, Any, Optional, List
12
-
13
- # --- Logging Imports ---
14
- from app.logging_utils import log_interaction, sanitize_for_logging
15
-
16
- # --- Model Loader Import ---
17
- try:
18
- from app.model_loader import load_model_pipeline
19
- MODEL_LOADER_AVAILABLE = True
20
- except ImportError:
21
- MODEL_LOADER_AVAILABLE = False
22
- import logging
23
- logging.getLogger(__name__).warning("Could not import load_model_pipeline. Sentiment service unavailable.")
24
-
25
- # Global variable to store the loaded pipeline for re-use
26
- SENTIMENT_PIPELINE: Optional[Any] = None
27
- AGENT_NAME = "penny-sentiment-agent"
28
- INITIALIZATION_ATTEMPTED = False
29
-
30
-
31
- def _initialize_sentiment_pipeline() -> bool:
32
- """
33
- Initializes the sentiment pipeline only once.
34
-
35
- Returns:
36
- bool: True if initialization succeeded, False otherwise.
37
- """
38
- global SENTIMENT_PIPELINE, INITIALIZATION_ATTEMPTED
39
-
40
- if INITIALIZATION_ATTEMPTED:
41
- return SENTIMENT_PIPELINE is not None
42
-
43
- INITIALIZATION_ATTEMPTED = True
44
-
45
- if not MODEL_LOADER_AVAILABLE:
46
- log_interaction(
47
- intent="sentiment_initialization",
48
- success=False,
49
- error="model_loader unavailable"
50
- )
51
- return False
52
-
53
- try:
54
- log_interaction(
55
- intent="sentiment_initialization",
56
- success=None,
57
- details=f"Loading {AGENT_NAME}"
58
- )
59
-
60
- SENTIMENT_PIPELINE = load_model_pipeline(AGENT_NAME)
61
-
62
- if SENTIMENT_PIPELINE is None:
63
- log_interaction(
64
- intent="sentiment_initialization",
65
- success=False,
66
- error="Pipeline returned None"
67
- )
68
- return False
69
-
70
- log_interaction(
71
- intent="sentiment_initialization",
72
- success=True,
73
- details=f"Model {AGENT_NAME} loaded successfully"
74
- )
75
- return True
76
-
77
- except Exception as e:
78
- log_interaction(
79
- intent="sentiment_initialization",
80
- success=False,
81
- error=str(e)
82
- )
83
- return False
84
-
85
-
86
- # Attempt initialization at module load
87
- _initialize_sentiment_pipeline()
88
-
89
-
90
- def is_sentiment_available() -> bool:
91
- """
92
- Check if sentiment analysis service is available.
93
-
94
- Returns:
95
- bool: True if sentiment pipeline is loaded and ready.
96
- """
97
- return SENTIMENT_PIPELINE is not None
98
-
99
-
100
- async def get_sentiment_analysis(
101
- text: str,
102
- tenant_id: Optional[str] = None
103
- ) -> Dict[str, Any]:
104
- """
105
- Runs sentiment analysis on the input text using the loaded pipeline.
106
-
107
- Args:
108
- text: The string of text to analyze.
109
- tenant_id: Optional tenant identifier for logging.
110
-
111
- Returns:
112
- A dictionary containing:
113
- - label (str): Sentiment label (e.g., "POSITIVE", "NEGATIVE", "NEUTRAL")
114
- - score (float): Confidence score for the sentiment prediction
115
- - available (bool): Whether the service was available
116
- - message (str, optional): Error message if analysis failed
117
- - response_time_ms (int, optional): Analysis time in milliseconds
118
- """
119
- start_time = time.time()
120
-
121
- global SENTIMENT_PIPELINE
122
-
123
- # Check availability
124
- if not is_sentiment_available():
125
- log_interaction(
126
- intent="sentiment_analysis",
127
- tenant_id=tenant_id,
128
- success=False,
129
- error="Sentiment pipeline not available",
130
- fallback_used=True
131
- )
132
- return {
133
- "label": "UNKNOWN",
134
- "score": 0.0,
135
- "available": False,
136
- "message": "Sentiment analysis is temporarily unavailable."
137
- }
138
-
139
- # Validate input
140
- if not text or not isinstance(text, str):
141
- log_interaction(
142
- intent="sentiment_analysis",
143
- tenant_id=tenant_id,
144
- success=False,
145
- error="Invalid text input"
146
- )
147
- return {
148
- "label": "ERROR",
149
- "score": 0.0,
150
- "available": True,
151
- "message": "Invalid text input provided."
152
- }
153
-
154
- # Check text length (prevent processing extremely long texts)
155
- if len(text) > 10000: # 10k character limit
156
- log_interaction(
157
- intent="sentiment_analysis",
158
- tenant_id=tenant_id,
159
- success=False,
160
- error=f"Text too long: {len(text)} characters",
161
- text_preview=sanitize_for_logging(text[:100])
162
- )
163
- return {
164
- "label": "ERROR",
165
- "score": 0.0,
166
- "available": True,
167
- "message": "Text is too long for sentiment analysis (max 10,000 characters)."
168
- }
169
-
170
- try:
171
- loop = asyncio.get_event_loop()
172
-
173
- # Run model inference in thread executor
174
- # Hugging Face pipelines accept lists and return lists
175
- results = await loop.run_in_executor(
176
- None,
177
- lambda: SENTIMENT_PIPELINE([text])
178
- )
179
-
180
- response_time_ms = int((time.time() - start_time) * 1000)
181
-
182
- # Validate results
183
- if not results or not isinstance(results, list) or len(results) == 0:
184
- log_interaction(
185
- intent="sentiment_analysis",
186
- tenant_id=tenant_id,
187
- success=False,
188
- error="Empty or invalid model output",
189
- response_time_ms=response_time_ms,
190
- text_preview=sanitize_for_logging(text[:100])
191
- )
192
- return {
193
- "label": "ERROR",
194
- "score": 0.0,
195
- "available": True,
196
- "message": "Sentiment analysis returned unexpected format."
197
- }
198
-
199
- result = results[0]
200
-
201
- # Validate result structure
202
- if not isinstance(result, dict) or 'label' not in result or 'score' not in result:
203
- log_interaction(
204
- intent="sentiment_analysis",
205
- tenant_id=tenant_id,
206
- success=False,
207
- error="Invalid result structure",
208
- response_time_ms=response_time_ms,
209
- text_preview=sanitize_for_logging(text[:100])
210
- )
211
- return {
212
- "label": "ERROR",
213
- "score": 0.0,
214
- "available": True,
215
- "message": "Sentiment analysis returned unexpected format."
216
- }
217
-
218
- # Log slow analysis
219
- if response_time_ms > 3000: # 3 seconds
220
- log_interaction(
221
- intent="sentiment_analysis_slow",
222
- tenant_id=tenant_id,
223
- success=True,
224
- response_time_ms=response_time_ms,
225
- details="Slow sentiment analysis detected",
226
- text_length=len(text)
227
- )
228
-
229
- log_interaction(
230
- intent="sentiment_analysis",
231
- tenant_id=tenant_id,
232
- success=True,
233
- response_time_ms=response_time_ms,
234
- sentiment_label=result.get('label'),
235
- sentiment_score=result.get('score'),
236
- text_length=len(text)
237
- )
238
-
239
- return {
240
- "label": result['label'],
241
- "score": float(result['score']),
242
- "available": True,
243
- "response_time_ms": response_time_ms
244
- }
245
-
246
- except asyncio.CancelledError:
247
- log_interaction(
248
- intent="sentiment_analysis",
249
- tenant_id=tenant_id,
250
- success=False,
251
- error="Analysis cancelled"
252
- )
253
- raise
254
-
255
- except Exception as e:
256
- response_time_ms = int((time.time() - start_time) * 1000)
257
-
258
- log_interaction(
259
- intent="sentiment_analysis",
260
- tenant_id=tenant_id,
261
- success=False,
262
- error=str(e),
263
- response_time_ms=response_time_ms,
264
- text_preview=sanitize_for_logging(text[:100]),
265
- fallback_used=True
266
- )
267
-
268
- return {
269
- "label": "ERROR",
270
- "score": 0.0,
271
- "available": False,
272
- "message": "An error occurred during sentiment analysis.",
273
- "error": str(e),
274
- "response_time_ms": response_time_ms
275
- }
276
-
277
-
278
- async def analyze_sentiment_batch(
279
- texts: List[str],
280
- tenant_id: Optional[str] = None
281
- ) -> Dict[str, Any]:
282
- """
283
- Runs sentiment analysis on a batch of texts for efficiency.
284
-
285
- Args:
286
- texts: List of text strings to analyze.
287
- tenant_id: Optional tenant identifier for logging.
288
-
289
- Returns:
290
- A dictionary containing:
291
- - results (list): List of sentiment analysis results for each text
292
- - available (bool): Whether the service was available
293
- - total_analyzed (int): Number of texts successfully analyzed
294
- - response_time_ms (int, optional): Total batch analysis time
295
- """
296
- start_time = time.time()
297
-
298
- global SENTIMENT_PIPELINE
299
-
300
- # Check availability
301
- if not is_sentiment_available():
302
- log_interaction(
303
- intent="sentiment_batch_analysis",
304
- tenant_id=tenant_id,
305
- success=False,
306
- error="Sentiment pipeline not available",
307
- batch_size=len(texts) if texts else 0
308
- )
309
- return {
310
- "results": [],
311
- "available": False,
312
- "total_analyzed": 0,
313
- "message": "Sentiment analysis is temporarily unavailable."
314
- }
315
-
316
- # Validate input
317
- if not texts or not isinstance(texts, list):
318
- log_interaction(
319
- intent="sentiment_batch_analysis",
320
- tenant_id=tenant_id,
321
- success=False,
322
- error="Invalid texts input"
323
- )
324
- return {
325
- "results": [],
326
- "available": True,
327
- "total_analyzed": 0,
328
- "message": "Invalid batch input provided."
329
- }
330
-
331
- # Filter valid texts and limit batch size
332
- valid_texts = [t for t in texts if isinstance(t, str) and t.strip()]
333
- if len(valid_texts) > 100: # Batch size limit
334
- valid_texts = valid_texts[:100]
335
-
336
- if not valid_texts:
337
- log_interaction(
338
- intent="sentiment_batch_analysis",
339
- tenant_id=tenant_id,
340
- success=False,
341
- error="No valid texts in batch"
342
- )
343
- return {
344
- "results": [],
345
- "available": True,
346
- "total_analyzed": 0,
347
- "message": "No valid texts provided for analysis."
348
- }
349
-
350
- try:
351
- loop = asyncio.get_event_loop()
352
-
353
- # Run batch inference in thread executor
354
- results = await loop.run_in_executor(
355
- None,
356
- lambda: SENTIMENT_PIPELINE(valid_texts)
357
- )
358
-
359
- response_time_ms = int((time.time() - start_time) * 1000)
360
-
361
- log_interaction(
362
- intent="sentiment_batch_analysis",
363
- tenant_id=tenant_id,
364
- success=True,
365
- response_time_ms=response_time_ms,
366
- batch_size=len(valid_texts),
367
- total_analyzed=len(results) if results else 0
368
- )
369
-
370
- return {
371
- "results": results if results else [],
372
- "available": True,
373
- "total_analyzed": len(results) if results else 0,
374
- "response_time_ms": response_time_ms
375
- }
376
-
377
- except Exception as e:
378
- response_time_ms = int((time.time() - start_time) * 1000)
379
-
380
- log_interaction(
381
- intent="sentiment_batch_analysis",
382
- tenant_id=tenant_id,
383
- success=False,
384
- error=str(e),
385
- response_time_ms=response_time_ms,
386
- batch_size=len(valid_texts)
387
- )
388
-
389
- return {
390
- "results": [],
391
- "available": False,
392
- "total_analyzed": 0,
393
- "message": "An error occurred during batch sentiment analysis.",
394
- "error": str(e),
395
- "response_time_ms": response_time_ms
396
- }