Spaces:

invincible-jha
/

MentalHealthVocalBiomarkers

Sleeping

App Files Files Community

invincible-jha commited on Nov 27, 2024

Commit

b3d1df8

verified ·

1 Parent(s): f420a80

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -66

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
-# Part 1: Essential Imports and Setup
 import gradio as gr
 import torch
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -10,6 +13,10 @@ import warnings
 import os
 from scipy.stats import kurtosis, skew
 from anthropic import Anthropic
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
@@ -21,9 +28,15 @@ emotion_tokenizer = None
 emotion_model = None
 clinical_analyzer = None
-# Part 2: Model Loading and Initialization
 def load_models():
-    """Load and initialize speech and emotion analysis models."""
     global processor, whisper_model, emotion_tokenizer, emotion_model
     try:
@@ -48,9 +61,16 @@ def load_models():
         print(f"Error loading models: {str(e)}")
         return False
-# Part 3: Voice Feature Extraction
 def extract_prosodic_features(waveform, sr):
-    """Extract voice features including pitch, energy, and rhythm patterns."""
     try:
         # Input validation
         if waveform is None or len(waveform) == 0:
@@ -91,7 +111,7 @@ def extract_prosodic_features(waveform, sr):
             print(f"Pitch extraction error: {e}")
             features.update({'pitch_mean': 160.0, 'pitch_std': 0.0, 'pitch_range': 0.0})
-        # Energy analysis
         try:
             rms = librosa.feature.rms(
                 y=waveform,
@@ -109,7 +129,7 @@ def extract_prosodic_features(waveform, sr):
             print(f"Energy extraction error: {e}")
             features.update({'energy_mean': 0.02, 'energy_std': 0.0, 'energy_range': 0.0})
-        # Rhythm analysis
         try:
             onset_env = librosa.onset.onset_strength(
                 y=waveform,
@@ -125,6 +145,7 @@ def extract_prosodic_features(waveform, sr):
                 aggregate=None
             )[0]
             features['tempo'] = float(tempo) if 40 <= tempo <= 240 else 120.0
         except Exception as e:
@@ -136,7 +157,6 @@ def extract_prosodic_features(waveform, sr):
         print(f"Feature extraction failed: {e}")
         return None
-# Part 4: Clinical Analysis Integration
 class ClinicalVoiceAnalyzer:
     """Analyze voice characteristics for psychological indicators."""
@@ -144,15 +164,25 @@ class ClinicalVoiceAnalyzer:
         """Initialize the clinical analyzer with API and reference ranges."""
         self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
         self.model = "claude-3-opus-20240229"
         self.reference_ranges = {
-            'pitch': {'min': 150, 'max': 400},
-            'tempo': {'min': 90, 'max': 130},
             'energy': {'min': 0.01, 'max': 0.05}
         }
         print("Clinical analyzer ready")
     def analyze_voice_metrics(self, features, emotions, transcription):
-        """Generate clinical insights from voice and emotion data."""
         try:
             prompt = self._create_clinical_prompt(features, emotions, transcription)
             response = self.anthropic.messages.create(
@@ -208,9 +238,15 @@ Basic Voice Analysis (API Unavailable):
 - Voice Energy Level: {features['energy_mean']:.4f}
 - Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)"""
-# Part 5: Visualization Functions
 def create_feature_plots(features):
-    """Create interactive visualizations of voice features."""
     try:
         fig = go.Figure()
@@ -266,7 +302,14 @@ def create_feature_plots(features):
         return None
 def create_emotion_plot(emotions):
-    """Create visualization of emotional analysis."""
     try:
         fig = go.Figure(data=[
             go.Bar(
@@ -291,9 +334,18 @@ def create_emotion_plot(emotions):
         print(f"Emotion plot error: {e}")
         return None
-# Part 6: Main Analysis Function
 def analyze_audio(audio_input):
-    """Process audio input and generate comprehensive analysis."""
     try:
         # Validate input
         if audio_input is None:
@@ -365,6 +417,19 @@ Voice Characteristics:
 - Speech Rate (Tempo): {features['tempo']:.2f} BPM
 - Voice Energy: {features['energy_mean']:.4f}
 Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
 Emotion Confidence: {max(emotion_scores.values()):.2%}
@@ -379,7 +444,7 @@ Recording Duration: {duration:.2f} seconds
         print(error_msg)
         return error_msg, None, None
-# Part 7: Application Initialization
 try:
     print("===== Application Startup =====")
@@ -391,6 +456,33 @@ try:
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
     # Create Gradio interface
     demo = gr.Interface(
         fn=analyze_audio,
@@ -405,59 +497,26 @@ try:
             gr.HTML(label="Voice Feature Analysis")
         ],
         title="Voice Analysis System with Clinical Interpretation",
-        description="""
-        This application provides comprehensive voice analysis with clinical insights:
-        1. Voice Features:
-           - Pitch analysis (fundamental frequency and variation)
-           - Energy patterns (volume and intensity)
-           - Speech rate (words per minute)
-           - Voice quality metrics
-        2. Clinical Analysis:
-           - Mental health indicators
-           - Emotional state evaluation
-           - Risk assessment
-           - Clinical recommendations
-        3. Emotional Content:
-           - Emotion detection (6 basic emotions)
-           - Emotional intensity analysis
-        For optimal description="""
-        This application provides comprehensive voice analysis with clinical insights:
-        1. Voice Features:
-           - Pitch analysis (fundamental frequency and variation)
-           - Energy patterns (volume and intensity)
-           - Speech rate (words per minute)
-           - Voice quality metrics
-        2. Clinical Analysis:
-           - Mental health indicators
-           - Emotional state evaluation
-           - Risk assessment
-           - Clinical recommendations
-        3. Emotional Content:
-           - Emotion detection (6 basic emotions)
-           - Emotional intensity analysis
-        For optimal results:
-        - Record in a quiet environment
-        - Speak clearly and naturally
-        - Keep recordings between 1-5 seconds
-        - Maintain consistent volume
-        Upload an audio file or record directly through your microphone.
-        """,
         examples=None,
-        cache_examples=False
     )
-    # Launch the interface
     if __name__ == "__main__":
-        demo.launch()
 except Exception as e:
     print(f"Error during application startup: {str(e)}")

+# app.py - Voice Analysis System with Clinical Interpretation
+# This application provides comprehensive voice analysis with mental health insights
+# using voice biomarkers, emotion detection, and clinical interpretation.
 import gradio as gr
 import torch
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import os
 from scipy.stats import kurtosis, skew
 from anthropic import Anthropic
+from dotenv import load_dotenv
+# Load environment variables for API keys
+load_dotenv()
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
 emotion_model = None
 clinical_analyzer = None
 def load_models():
+    """Load and initialize speech recognition and emotion analysis models.
+    This function handles the initialization of both Whisper (for speech recognition)
+    and the emotion detection model, setting them up for CPU-based inference.
+    Returns:
+        bool: True if all models loaded successfully, False otherwise
+    """
     global processor, whisper_model, emotion_tokenizer, emotion_model
     try:
         print(f"Error loading models: {str(e)}")
         return False
 def extract_prosodic_features(waveform, sr):
+    """Extract voice features including pitch, energy, and rhythm patterns.
+    Args:
+        waveform (numpy.ndarray): Audio signal data
+        sr (int): Sampling rate of the audio
+    Returns:
+        dict: Dictionary containing extracted features or None if extraction fails
+    """
     try:
         # Input validation
         if waveform is None or len(waveform) == 0:
             print(f"Pitch extraction error: {e}")
             features.update({'pitch_mean': 160.0, 'pitch_std': 0.0, 'pitch_range': 0.0})
+        # Energy analysis with noise handling
         try:
             rms = librosa.feature.rms(
                 y=waveform,
             print(f"Energy extraction error: {e}")
             features.update({'energy_mean': 0.02, 'energy_std': 0.0, 'energy_range': 0.0})
+        # Rhythm analysis with tempo validation
         try:
             onset_env = librosa.onset.onset_strength(
                 y=waveform,
                 aggregate=None
             )[0]
+            # Validate tempo within normal speech range (40-240 BPM)
             features['tempo'] = float(tempo) if 40 <= tempo <= 240 else 120.0
         except Exception as e:
         print(f"Feature extraction failed: {e}")
         return None
 class ClinicalVoiceAnalyzer:
     """Analyze voice characteristics for psychological indicators."""
         """Initialize the clinical analyzer with API and reference ranges."""
         self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
         self.model = "claude-3-opus-20240229"
+        # Define normal ranges for voice metrics based on clinical research
         self.reference_ranges = {
+            'pitch': {'min': 150, 'max': 400},  # Hz
+            'tempo': {'min': 90, 'max': 130},   # BPM
             'energy': {'min': 0.01, 'max': 0.05}
         }
         print("Clinical analyzer ready")
     def analyze_voice_metrics(self, features, emotions, transcription):
+        """Generate clinical insights from voice and emotion data.
+        Args:
+            features (dict): Extracted voice features
+            emotions (dict): Detected emotion scores
+            transcription (str): Speech content
+        Returns:
+            str: Formatted clinical analysis or backup analysis if API fails
+        """
         try:
             prompt = self._create_clinical_prompt(features, emotions, transcription)
             response = self.anthropic.messages.create(
 - Voice Energy Level: {features['energy_mean']:.4f}
 - Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)"""
 def create_feature_plots(features):
+    """Create interactive visualizations of voice features.
+    Args:
+        features (dict): Dictionary of extracted voice features
+    Returns:
+        str: HTML representation of the interactive plots
+    """
     try:
         fig = go.Figure()
         return None
 def create_emotion_plot(emotions):
+    """Create visualization of emotional analysis.
+    Args:
+        emotions (dict): Dictionary of emotion scores
+    Returns:
+        str: HTML representation of the emotion plot
+    """
     try:
         fig = go.Figure(data=[
             go.Bar(
         print(f"Emotion plot error: {e}")
         return None
 def analyze_audio(audio_input):
+    """Process audio input and generate comprehensive analysis.
+    This is the main function that coordinates the entire analysis pipeline,
+    including feature extraction, emotion detection, and clinical interpretation.
+    Args:
+        audio_input: Audio file path or tuple containing audio data
+    Returns:
+        tuple: (analysis_summary, emotion_visualization, feature_visualization)
+    """
     try:
         # Validate input
         if audio_input is None:
 - Speech Rate (Tempo): {features['tempo']:.2f} BPM
 - Voice Energy: {features['energy_mean']:.4f}
+Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
+Emotion# Continue from previous summary string
+        summary = f"""Voice Analysis Summary:
+Speech Content:
+{transcription}
+Voice Characteristics:
+- Average Pitch: {features['pitch_mean']:.2f} Hz
+- Pitch Variation: {features['pitch_std']:.2f} Hz
+- Speech Rate (Tempo): {features['tempo']:.2f} BPM
+- Voice Energy: {features['energy_mean']:.4f}
 Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
 Emotion Confidence: {max(emotion_scores.values()):.2%}
         print(error_msg)
         return error_msg, None, None
+# Application initialization and Gradio interface setup
 try:
     print("===== Application Startup =====")
     clinical_analyzer = ClinicalVoiceAnalyzer()
     print("Clinical analyzer initialized")
+    # Define the interface description
+    description = """This application provides comprehensive voice analysis with clinical insights:
+1. Voice Features:
+   - Pitch analysis (fundamental frequency and variation)
+   - Energy patterns (volume and intensity)
+   - Speech rate (words per minute)
+   - Voice quality metrics
+2. Clinical Analysis:
+   - Mental health indicators
+   - Emotional state evaluation
+   - Risk assessment
+   - Clinical recommendations
+3. Emotional Content:
+   - Emotion detection (6 basic emotions)
+   - Emotional intensity analysis
+For optimal results:
+- Record in a quiet environment
+- Speak clearly and naturally
+- Keep recordings between 1-5 seconds
+- Maintain consistent volume
+Upload an audio file or record directly through your microphone."""
     # Create Gradio interface
     demo = gr.Interface(
         fn=analyze_audio,
             gr.HTML(label="Voice Feature Analysis")
         ],
         title="Voice Analysis System with Clinical Interpretation",
+        description=description,
+        article="""This system uses advanced AI models to analyze voice patterns and provide mental health insights.
+                  The analysis combines speech recognition, emotion detection, and clinical interpretation to offer
+                  a comprehensive understanding of psychological indicators present in voice characteristics.
+                  Note: This tool is for informational purposes only and should not be used as a substitute for
+                  professional medical advice, diagnosis, or treatment.""",
         examples=None,
+        cache_examples=False,
+        theme="default"
     )
+    # Launch the interface with additional configuration
     if __name__ == "__main__":
+        demo.launch(
+            server_name="0.0.0.0",  # Allow external access
+            server_port=7860,        # Default Gradio port
+            share=False,             # Disable public URL generation
+            debug=False              # Disable debug mode in production
+        )
 except Exception as e:
     print(f"Error during application startup: {str(e)}")