Spaces:

A-R-F
/

Agentic-Reliability-Framework-API

Running

App Files Files Community

petter2025 commited on Dec 5, 2025

Commit

13ad9ff

verified ·

1 Parent(s): f1c15c1

Update voice_narrator.py

Browse files

Files changed (1) hide show

voice_narrator.py +142 -136

voice_narrator.py CHANGED Viewed

@@ -1,177 +1,183 @@
 """
-Voice Handler for Hathora Text-to-Speech Integration
-Converts metrics into natural, conversational speech for audio synthesis
 """
 import os
 import logging
-from typing import Dict, Any, Optional
-import hathora
 logger = logging.getLogger(__name__)
-class VoiceHandler:
     """
-    Handles conversion of metrics to natural speech and Hathora TTS integration
     """
     def __init__(self):
-        """Initialize the VoiceHandler with Hathora API configuration"""
-        self.api_key = os.getenv(
-            "HATHORA_API_KEY",
-            "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IkVFcDg4bkpRU25idmVtMFIyTWZTRyJ9.eyJlbWFpbCI6InNhbWlzb2NpYWw3MkBnbWFpbC5jb20iLCJpc3MiOiJodHRwczovL2F1dGguaGF0aG9yYS5jb20vIiwic3ViIjoiZ29vZ2xlLW9hdXRoMnwxMTc1OTY2NDkzODcyNjUzNDQxOTgiLCJhdWQiOlsiaHR0cHM6Ly9jbG91ZC5oYXRob3JhLmNvbSIsImh0dHBzOi8vZGV2LXRjaHA2aW45LnVzLmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3MzM4OTIxODYsImV4cCI6MTczMzk3ODU4Niwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCBvZmZsaW5lX2FjY2VzcyIsImF6cCI6IkROdnJ0RnY2NHJVdEExM1dIQjdvZ1BUMEI0SmtZb1AxIn0.VQwUFdDPOLLI2jOE0qbeRrZzp6oA5svMCaL2bmaMYiXZgOZHT5f5kpJSE8D5KBUwCJv9OyrGmyHCinxTtAthBLeliUL3OPUfORnbA6NfJnZ5Y-kM1IWIVI8a-in3HcWvxV5XUK2Rfq2QvMpQO_w9BnLh2JaMdwb-tsG6IeokH5owqo5sivQKJ8AiLb1ypuGFkcMENEOj0n0upnrd6A6-NDuUGms_x3Gx6d6k9Ih9iH9NAUKcm97AVwxXxBLM15nkZF1BDKztMM_A3YlI_aLfn3Oj7QLPRMsnHxz-tyWaVWLV0-wcYAzRVsrzCj-t6LEyRGBZ3fO19-KIEFvol12Wsw"
         )
-        if not self.api_key:
-            logger.warning("HATHORA_API_KEY not set. Voice synthesis will not work.")
-            self.client = None
         else:
-            try:
-                self.client = hathora.Hathora(api_key=self.api_key)
-                logger.info("Hathora client initialized successfully with kokoro model")
-            except Exception as e:
-                logger.error(f"Failed to initialize Hathora client: {e}")
-                self.client = None
-    def speak_metrics(self, metrics: Dict[str, Any]) -> Optional[str]:
         """
-        Convert metrics dictionary into natural conversational speech and synthesize with Hathora
-        Args:
-            metrics: Dictionary containing:
-                - confidence: str (e.g., "24.9%")
-                - forecast: str (e.g., "1 critical risks")
-                - insights: str (e.g., "High error rate")
-                - actions: str (e.g., "traffic_shift")
         Returns:
-            Path to the generated MP3 audio file, or None if synthesis failed
-        Example input:
-            {
-                "confidence": "24.9%",
-                "forecast": "1 critical risks",
-                "insights": "High error rate",
-                "actions": "traffic_shift"
-            }
-        Example output sentence:
-            "Anomaly detected with a confidence of 24.9% and a high error rate.
-             We are currently forecasting 1 critical risk, so we have initiated a traffic shift."
         """
         try:
-            # Extract metrics with defaults
-            confidence = metrics.get("confidence", "unknown confidence")
-            forecast = metrics.get("forecast", "no forecast available")
-            insights = metrics.get("insights", "no specific insights")
-            actions = metrics.get("actions", "no action")
-            # Convert action code to natural language
-            action_text = self._format_action(actions)
-            # Construct natural conversational sentence
-            speech_text = (
-                f"Anomaly detected with a confidence of {confidence} and {insights.lower()}. "
-                f"We are currently forecasting {forecast}, so we have {action_text}."
             )
-            logger.info(f"Generated speech text: {speech_text}")
-            # Send to Hathora for synthesis
-            audio_path = self._synthesize_with_hathora(speech_text)
-            return audio_path
         except Exception as e:
-            logger.error(f"Error in speak_metrics: {e}", exc_info=True)
             return None
-    def _format_action(self, action: str) -> str:
-        """
-        Convert action code into natural language
-        Args:
-            action: Action code (e.g., "traffic_shift", "scale_up", "restart")
-        Returns:
-            Natural language description of the action
-        """
-        action_map = {
-            "traffic_shift": "initiated a traffic shift",
-            "scale_up": "scaled up the infrastructure",
-            "scale_down": "scaled down the infrastructure",
-            "restart": "restarted the affected services",
-            "rollback": "performed a rollback",
-            "alert": "sent alerts to the team",
-            "no_action": "determined no immediate action is required",
-            "investigate": "flagged this for investigation"
-        }
-        return action_map.get(action, f"taken the action: {action}")
-    def _synthesize_with_hathora(self, text: str, output_filename: str = "alert_audio.mp3") -> Optional[str]:
-        """
-        Send text to Hathora TTS API for speech synthesis using the kokoro model
-        Args:
-            text: The text to convert to speech
-            output_filename: Name of the output MP3 file
-        Returns:
-            Path to the generated MP3 file, or None if synthesis failed
-        """
-        if not self.client:
-            logger.error("Cannot synthesize: Hathora client not initialized")
-            return None
-        try:
-            logger.info(f"Synthesizing speech with Hathora kokoro model...")
-            # Use the kokoro model with af_sarah voice (same as your working test)
-            response = self.client.text_to_speech.convert(
-                "kokoro",
-                text,
-                voice="af_sarah",  # Professional female voice
-                speed=1.0
-            )
-            # Save the audio file
-            response.save(output_filename)
-            logger.info(f"Successfully generated audio: {output_filename}")
-            return output_filename
-        except Exception as e:
-            logger.error(f"Error during Hathora synthesis: {e}", exc_info=True)
-            return None
-    def speak_alert(self, diagnosis: str, action: str, output_filename: str = "alert.mp3") -> Optional[str]:
         """
-        Generate alert voice for incidents (simplified version for direct use)
-        Args:
-            diagnosis: The diagnosis from Claude (e.g., "Payment gateway timeout")
-            action: The action being taken (e.g., "REROUTE", "RESTART")
-            output_filename: Name of output file
-        Returns:
-            Path to the generated MP3 file, or None if synthesis failed
         """
-        if not self.client:
-            logger.warning("Voice synthesis disabled: Hathora client not available")
-            return None
         try:
-            # Create natural speech text
-            action_lower = action.lower().replace("_", " ")
-            speech_text = f"Critical alert detected. {diagnosis}. Initiating {action_lower}."
-            logger.info(f"Generating alert audio: {speech_text}")
-            # Synthesize
-            return self._synthesize_with_hathora(speech_text, output_filename)
         except Exception as e:
-            logger.error(f"Error generating alert audio: {e}", exc_info=True)
-            return None

 """
+Voice Narration for Incident Alerts
+Supports multiple TTS providers with graceful fallback
 """
 import os
+import requests
 import logging
+from typing import Optional, Dict, Any
 logger = logging.getLogger(__name__)
+class VoiceNarrator:
     """
+    Narrate critical incidents using TTS API
+    Supports:
+    - Hathora Voice API (REST)
+    - Generic TTS APIs
+    - Graceful fallback (silent fail)
     """
     def __init__(self):
+        # Check for API key (set in HF Secrets)
+        self.api_key = os.environ.get("HATHORA_VOICE_API_KEY", "") or \
+                       os.environ.get("VOICE_API_KEY", "")
+        # API endpoint (update with actual Hathora endpoint)
+        self.api_endpoint = os.environ.get(
+            "VOICE_API_ENDPOINT",
+            "https://api.hathora.dev/v1/tts"  # PLACEHOLDER
         )
+        self.enabled = bool(self.api_key)
+        if self.enabled:
+            logger.info("✅ Voice narrator initialized with API key")
         else:
+            logger.warning("⚠️ Voice narrator disabled (no API key found)")
+    def narrate_incident(
+        self,
+        component: str,
+        severity: str,
+        latency: float,
+        error_rate: float,
+        root_cause: str = "Unknown",
+        recovery_action: str = "Investigating"
+    ) -> Optional[str]:
         """
+        Generate voice narration for a critical incident
         Returns:
+            Audio URL (str) if successful, None if failed
         """
+        if not self.enabled:
+            logger.debug("Voice narration skipped (disabled)")
+            return None
+        # Only narrate HIGH and CRITICAL incidents
+        if severity not in ["HIGH", "CRITICAL"]:
+            return None
         try:
+            # Build dramatic narration text (30-60 seconds when spoken)
+            narration_text = self._build_narration(
+                component, severity, latency, error_rate, root_cause, recovery_action
             )
+            # Call TTS API
+            audio_url = self._call_tts_api(narration_text)
+            if audio_url:
+                logger.info(f"✅ Generated voice narration for {component}")
+                return audio_url
+            else:
+                logger.warning("Voice API returned no audio URL")
+                return None
         except Exception as e:
+            # Silent fail - don't break the app
+            logger.error(f"Voice narration failed: {e}", exc_info=True)
             return None
+    def _build_narration(
+        self,
+        component: str,
+        severity: str,
+        latency: float,
+        error_rate: float,
+        root_cause: str,
+        recovery_action: str
+    ) -> str:
+        """Build dramatic narration text"""
+        # Format component name (remove dashes, capitalize)
+        component_spoken = component.replace("-", " ").title()
+        # Severity-specific intro
+        if severity == "CRITICAL":
+            intro = f"Critical alert. {component_spoken} is experiencing severe failure."
+        else:
+            intro = f"High priority alert. {component_spoken} degradation detected."
+        # Metrics
+        metrics = f"Latency: {int(latency)} milliseconds. Error rate: {error_rate*100:.0f} percent."
+        # Root cause (if available)
+        if root_cause and root_cause != "Unknown":
+            cause = f"Root cause: {root_cause}."
+        else:
+            cause = "Root cause under investigation."
+        # Recovery action
+        action = f"Recovery action: {recovery_action}."
+        # Combine into ~30-60 second narration
+        full_text = f"{intro} {metrics} {cause} {action} Immediate attention required."
+        logger.debug(f"Narration text: {full_text[:100]}...")
+        return full_text
+    def _call_tts_api(self, text: str) -> Optional[str]:
         """
+        Call TTS API to generate audio
+        GENERIC implementation - adapt to actual Hathora Voice API format
         """
         try:
+            # Make REST API call
+            response = requests.post(
+                self.api_endpoint,
+                headers={
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "text": text,
+                    "voice": "en-US-neural",
+                    "speed": 1.0,
+                    "format": "mp3"
+                },
+                timeout=10.0
+            )
+            if response.status_code == 200:
+                data = response.json()
+                # Extract audio URL
+                audio_url = data.get("audio_url") or \
+                           data.get("url") or \
+                           data.get("audioUrl")
+                return audio_url
+            else:
+                logger.error(f"TTS API error: {response.status_code} - {response.text[:200]}")
+                return None
+        except requests.exceptions.Timeout:
+            logger.warning("TTS API timeout")
+            return None
         except Exception as e:
+            logger.error(f"TTS API call failed: {e}")
+            return None
+# Singleton instance
+_narrator = None
+def get_narrator() -> VoiceNarrator:
+    """Get global narrator instance"""
+    global _narrator
+    if _narrator is None:
+        _narrator = VoiceNarrator()
+    return _narrator
+# Backward compatibility aliases
+VoiceHandler = VoiceNarrator
+get_voice_handler = get_narrator