petter2025 commited on
Commit
13ad9ff
·
verified ·
1 Parent(s): f1c15c1

Update voice_narrator.py

Browse files
Files changed (1) hide show
  1. voice_narrator.py +142 -136
voice_narrator.py CHANGED
@@ -1,177 +1,183 @@
1
  """
2
- Voice Handler for Hathora Text-to-Speech Integration
3
- Converts metrics into natural, conversational speech for audio synthesis
4
  """
5
 
6
  import os
 
7
  import logging
8
- from typing import Dict, Any, Optional
9
- import hathora
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
 
14
- class VoiceHandler:
15
  """
16
- Handles conversion of metrics to natural speech and Hathora TTS integration
 
 
 
 
 
17
  """
18
 
19
  def __init__(self):
20
- """Initialize the VoiceHandler with Hathora API configuration"""
21
- self.api_key = os.getenv(
22
- "HATHORA_API_KEY",
23
- "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IkVFcDg4bkpRU25idmVtMFIyTWZTRyJ9.eyJlbWFpbCI6InNhbWlzb2NpYWw3MkBnbWFpbC5jb20iLCJpc3MiOiJodHRwczovL2F1dGguaGF0aG9yYS5jb20vIiwic3ViIjoiZ29vZ2xlLW9hdXRoMnwxMTc1OTY2NDkzODcyNjUzNDQxOTgiLCJhdWQiOlsiaHR0cHM6Ly9jbG91ZC5oYXRob3JhLmNvbSIsImh0dHBzOi8vZGV2LXRjaHA2aW45LnVzLmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3MzM4OTIxODYsImV4cCI6MTczMzk3ODU4Niwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCBvZmZsaW5lX2FjY2VzcyIsImF6cCI6IkROdnJ0RnY2NHJVdEExM1dIQjdvZ1BUMEI0SmtZb1AxIn0.VQwUFdDPOLLI2jOE0qbeRrZzp6oA5svMCaL2bmaMYiXZgOZHT5f5kpJSE8D5KBUwCJv9OyrGmyHCinxTtAthBLeliUL3OPUfORnbA6NfJnZ5Y-kM1IWIVI8a-in3HcWvxV5XUK2Rfq2QvMpQO_w9BnLh2JaMdwb-tsG6IeokH5owqo5sivQKJ8AiLb1ypuGFkcMENEOj0n0upnrd6A6-NDuUGms_x3Gx6d6k9Ih9iH9NAUKcm97AVwxXxBLM15nkZF1BDKztMM_A3YlI_aLfn3Oj7QLPRMsnHxz-tyWaVWLV0-wcYAzRVsrzCj-t6LEyRGBZ3fO19-KIEFvol12Wsw"
 
 
 
 
24
  )
25
 
26
- if not self.api_key:
27
- logger.warning("HATHORA_API_KEY not set. Voice synthesis will not work.")
28
- self.client = None
 
29
  else:
30
- try:
31
- self.client = hathora.Hathora(api_key=self.api_key)
32
- logger.info("Hathora client initialized successfully with kokoro model")
33
- except Exception as e:
34
- logger.error(f"Failed to initialize Hathora client: {e}")
35
- self.client = None
36
 
37
- def speak_metrics(self, metrics: Dict[str, Any]) -> Optional[str]:
 
 
 
 
 
 
 
 
38
  """
39
- Convert metrics dictionary into natural conversational speech and synthesize with Hathora
40
-
41
- Args:
42
- metrics: Dictionary containing:
43
- - confidence: str (e.g., "24.9%")
44
- - forecast: str (e.g., "1 critical risks")
45
- - insights: str (e.g., "High error rate")
46
- - actions: str (e.g., "traffic_shift")
47
 
48
  Returns:
49
- Path to the generated MP3 audio file, or None if synthesis failed
50
-
51
- Example input:
52
- {
53
- "confidence": "24.9%",
54
- "forecast": "1 critical risks",
55
- "insights": "High error rate",
56
- "actions": "traffic_shift"
57
- }
58
-
59
- Example output sentence:
60
- "Anomaly detected with a confidence of 24.9% and a high error rate.
61
- We are currently forecasting 1 critical risk, so we have initiated a traffic shift."
62
  """
 
 
 
 
 
 
 
 
 
63
  try:
64
- # Extract metrics with defaults
65
- confidence = metrics.get("confidence", "unknown confidence")
66
- forecast = metrics.get("forecast", "no forecast available")
67
- insights = metrics.get("insights", "no specific insights")
68
- actions = metrics.get("actions", "no action")
69
-
70
- # Convert action code to natural language
71
- action_text = self._format_action(actions)
72
-
73
- # Construct natural conversational sentence
74
- speech_text = (
75
- f"Anomaly detected with a confidence of {confidence} and {insights.lower()}. "
76
- f"We are currently forecasting {forecast}, so we have {action_text}."
77
  )
78
 
79
- logger.info(f"Generated speech text: {speech_text}")
80
-
81
- # Send to Hathora for synthesis
82
- audio_path = self._synthesize_with_hathora(speech_text)
83
-
84
- return audio_path
85
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- logger.error(f"Error in speak_metrics: {e}", exc_info=True)
 
88
  return None
89
 
90
- def _format_action(self, action: str) -> str:
91
- """
92
- Convert action code into natural language
 
 
 
 
 
 
 
93
 
94
- Args:
95
- action: Action code (e.g., "traffic_shift", "scale_up", "restart")
96
 
97
- Returns:
98
- Natural language description of the action
99
- """
100
- action_map = {
101
- "traffic_shift": "initiated a traffic shift",
102
- "scale_up": "scaled up the infrastructure",
103
- "scale_down": "scaled down the infrastructure",
104
- "restart": "restarted the affected services",
105
- "rollback": "performed a rollback",
106
- "alert": "sent alerts to the team",
107
- "no_action": "determined no immediate action is required",
108
- "investigate": "flagged this for investigation"
109
- }
110
-
111
- return action_map.get(action, f"taken the action: {action}")
112
-
113
- def _synthesize_with_hathora(self, text: str, output_filename: str = "alert_audio.mp3") -> Optional[str]:
114
- """
115
- Send text to Hathora TTS API for speech synthesis using the kokoro model
116
 
117
- Args:
118
- text: The text to convert to speech
119
- output_filename: Name of the output MP3 file
120
 
121
- Returns:
122
- Path to the generated MP3 file, or None if synthesis failed
123
- """
124
- if not self.client:
125
- logger.error("Cannot synthesize: Hathora client not initialized")
126
- return None
127
 
128
- try:
129
- logger.info(f"Synthesizing speech with Hathora kokoro model...")
130
-
131
- # Use the kokoro model with af_sarah voice (same as your working test)
132
- response = self.client.text_to_speech.convert(
133
- "kokoro",
134
- text,
135
- voice="af_sarah", # Professional female voice
136
- speed=1.0
137
- )
138
-
139
- # Save the audio file
140
- response.save(output_filename)
141
- logger.info(f"Successfully generated audio: {output_filename}")
142
-
143
- return output_filename
144
-
145
- except Exception as e:
146
- logger.error(f"Error during Hathora synthesis: {e}", exc_info=True)
147
- return None
148
 
149
- def speak_alert(self, diagnosis: str, action: str, output_filename: str = "alert.mp3") -> Optional[str]:
150
  """
151
- Generate alert voice for incidents (simplified version for direct use)
152
-
153
- Args:
154
- diagnosis: The diagnosis from Claude (e.g., "Payment gateway timeout")
155
- action: The action being taken (e.g., "REROUTE", "RESTART")
156
- output_filename: Name of output file
157
 
158
- Returns:
159
- Path to the generated MP3 file, or None if synthesis failed
160
  """
161
- if not self.client:
162
- logger.warning("Voice synthesis disabled: Hathora client not available")
163
- return None
164
 
165
  try:
166
- # Create natural speech text
167
- action_lower = action.lower().replace("_", " ")
168
- speech_text = f"Critical alert detected. {diagnosis}. Initiating {action_lower}."
169
-
170
- logger.info(f"Generating alert audio: {speech_text}")
171
-
172
- # Synthesize
173
- return self._synthesize_with_hathora(speech_text, output_filename)
 
 
 
 
 
 
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  except Exception as e:
176
- logger.error(f"Error generating alert audio: {e}", exc_info=True)
177
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Voice Narration for Incident Alerts
3
+ Supports multiple TTS providers with graceful fallback
4
  """
5
 
6
  import os
7
+ import requests
8
  import logging
9
+ from typing import Optional, Dict, Any
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
 
14
+ class VoiceNarrator:
15
  """
16
+ Narrate critical incidents using TTS API
17
+
18
+ Supports:
19
+ - Hathora Voice API (REST)
20
+ - Generic TTS APIs
21
+ - Graceful fallback (silent fail)
22
  """
23
 
24
  def __init__(self):
25
+ # Check for API key (set in HF Secrets)
26
+ self.api_key = os.environ.get("HATHORA_VOICE_API_KEY", "") or \
27
+ os.environ.get("VOICE_API_KEY", "")
28
+
29
+ # API endpoint (update with actual Hathora endpoint)
30
+ self.api_endpoint = os.environ.get(
31
+ "VOICE_API_ENDPOINT",
32
+ "https://api.hathora.dev/v1/tts" # PLACEHOLDER
33
  )
34
 
35
+ self.enabled = bool(self.api_key)
36
+
37
+ if self.enabled:
38
+ logger.info("✅ Voice narrator initialized with API key")
39
  else:
40
+ logger.warning("⚠️ Voice narrator disabled (no API key found)")
 
 
 
 
 
41
 
42
+ def narrate_incident(
43
+ self,
44
+ component: str,
45
+ severity: str,
46
+ latency: float,
47
+ error_rate: float,
48
+ root_cause: str = "Unknown",
49
+ recovery_action: str = "Investigating"
50
+ ) -> Optional[str]:
51
  """
52
+ Generate voice narration for a critical incident
 
 
 
 
 
 
 
53
 
54
  Returns:
55
+ Audio URL (str) if successful, None if failed
 
 
 
 
 
 
 
 
 
 
 
 
56
  """
57
+
58
+ if not self.enabled:
59
+ logger.debug("Voice narration skipped (disabled)")
60
+ return None
61
+
62
+ # Only narrate HIGH and CRITICAL incidents
63
+ if severity not in ["HIGH", "CRITICAL"]:
64
+ return None
65
+
66
  try:
67
+ # Build dramatic narration text (30-60 seconds when spoken)
68
+ narration_text = self._build_narration(
69
+ component, severity, latency, error_rate, root_cause, recovery_action
 
 
 
 
 
 
 
 
 
 
70
  )
71
 
72
+ # Call TTS API
73
+ audio_url = self._call_tts_api(narration_text)
 
 
 
 
74
 
75
+ if audio_url:
76
+ logger.info(f"✅ Generated voice narration for {component}")
77
+ return audio_url
78
+ else:
79
+ logger.warning("Voice API returned no audio URL")
80
+ return None
81
+
82
  except Exception as e:
83
+ # Silent fail - don't break the app
84
+ logger.error(f"Voice narration failed: {e}", exc_info=True)
85
  return None
86
 
87
+ def _build_narration(
88
+ self,
89
+ component: str,
90
+ severity: str,
91
+ latency: float,
92
+ error_rate: float,
93
+ root_cause: str,
94
+ recovery_action: str
95
+ ) -> str:
96
+ """Build dramatic narration text"""
97
 
98
+ # Format component name (remove dashes, capitalize)
99
+ component_spoken = component.replace("-", " ").title()
100
 
101
+ # Severity-specific intro
102
+ if severity == "CRITICAL":
103
+ intro = f"Critical alert. {component_spoken} is experiencing severe failure."
104
+ else:
105
+ intro = f"High priority alert. {component_spoken} degradation detected."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ # Metrics
108
+ metrics = f"Latency: {int(latency)} milliseconds. Error rate: {error_rate*100:.0f} percent."
 
109
 
110
+ # Root cause (if available)
111
+ if root_cause and root_cause != "Unknown":
112
+ cause = f"Root cause: {root_cause}."
113
+ else:
114
+ cause = "Root cause under investigation."
 
115
 
116
+ # Recovery action
117
+ action = f"Recovery action: {recovery_action}."
118
+
119
+ # Combine into ~30-60 second narration
120
+ full_text = f"{intro} {metrics} {cause} {action} Immediate attention required."
121
+
122
+ logger.debug(f"Narration text: {full_text[:100]}...")
123
+ return full_text
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ def _call_tts_api(self, text: str) -> Optional[str]:
126
  """
127
+ Call TTS API to generate audio
 
 
 
 
 
128
 
129
+ GENERIC implementation - adapt to actual Hathora Voice API format
 
130
  """
 
 
 
131
 
132
  try:
133
+ # Make REST API call
134
+ response = requests.post(
135
+ self.api_endpoint,
136
+ headers={
137
+ "Authorization": f"Bearer {self.api_key}",
138
+ "Content-Type": "application/json"
139
+ },
140
+ json={
141
+ "text": text,
142
+ "voice": "en-US-neural",
143
+ "speed": 1.0,
144
+ "format": "mp3"
145
+ },
146
+ timeout=10.0
147
+ )
148
 
149
+ if response.status_code == 200:
150
+ data = response.json()
151
+
152
+ # Extract audio URL
153
+ audio_url = data.get("audio_url") or \
154
+ data.get("url") or \
155
+ data.get("audioUrl")
156
+
157
+ return audio_url
158
+ else:
159
+ logger.error(f"TTS API error: {response.status_code} - {response.text[:200]}")
160
+ return None
161
+
162
+ except requests.exceptions.Timeout:
163
+ logger.warning("TTS API timeout")
164
+ return None
165
  except Exception as e:
166
+ logger.error(f"TTS API call failed: {e}")
167
+ return None
168
+
169
+
170
+ # Singleton instance
171
+ _narrator = None
172
+
173
+ def get_narrator() -> VoiceNarrator:
174
+ """Get global narrator instance"""
175
+ global _narrator
176
+ if _narrator is None:
177
+ _narrator = VoiceNarrator()
178
+ return _narrator
179
+
180
+
181
+ # Backward compatibility aliases
182
+ VoiceHandler = VoiceNarrator
183
+ get_voice_handler = get_narrator