invincible-jha commited on
Commit
f420a80
·
verified ·
1 Parent(s): beebdff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -72
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Import necessary libraries for the voice analysis system
2
  import gradio as gr
3
  import torch
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -10,58 +10,138 @@ import warnings
10
  import os
11
  from scipy.stats import kurtosis, skew
12
  from anthropic import Anthropic
13
- from typing import Dict, Optional, Tuple
14
 
15
- # Suppress unnecessary warnings for cleaner output
16
  warnings.filterwarnings('ignore')
17
 
18
- # Initialize global variables for model storage
19
  processor = None
20
  whisper_model = None
21
  emotion_tokenizer = None
22
  emotion_model = None
23
  clinical_analyzer = None
24
 
 
25
  def load_models():
26
- """Initialize and load all required machine learning models.
27
-
28
- This function handles the loading of both the Whisper speech recognition model
29
- and the emotion detection model. It includes proper error handling and
30
- device management for optimal performance.
31
-
32
- Returns:
33
- bool: True if all models loaded successfully, False otherwise
34
- """
35
  global processor, whisper_model, emotion_tokenizer, emotion_model
36
 
37
  try:
38
- # Load the Whisper model for speech recognition
39
  print("Loading Whisper model...")
40
  processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
41
  whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
42
 
43
- # Load the emotion detection model
44
  print("Loading emotion model...")
45
  emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
46
  emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
47
 
48
- # Move models to CPU for consistent performance
49
  device = "cpu"
50
  whisper_model.to(device)
51
  emotion_model.to(device)
52
 
53
  print("Models loaded successfully!")
54
  return True
55
-
56
  except Exception as e:
57
  print(f"Error loading models: {str(e)}")
58
  return False
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  class ClinicalVoiceAnalyzer:
61
- """Clinical voice analysis system using Anthropic's Claude for interpretation."""
62
 
63
  def __init__(self):
64
- """Initialize the clinical analyzer with reference ranges and API client."""
65
  self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
66
  self.model = "claude-3-opus-20240229"
67
  self.reference_ranges = {
@@ -69,86 +149,243 @@ class ClinicalVoiceAnalyzer:
69
  'tempo': {'min': 90, 'max': 130},
70
  'energy': {'min': 0.01, 'max': 0.05}
71
  }
72
- print("Clinical analyzer initialized successfully")
73
 
74
- def analyze_voice_metrics(self, features: Dict, emotions: Dict, transcription: str) -> str:
75
- """Perform comprehensive clinical analysis of voice characteristics."""
76
  try:
77
- prompt = self._create_analysis_prompt(features, emotions, transcription)
78
  response = self.anthropic.messages.create(
79
  model=self.model,
80
  max_tokens=1000,
81
- messages=[{
82
- "role": "user",
83
- "content": prompt
84
- }]
85
  )
86
- return self._format_clinical_response(response.content)
87
  except Exception as e:
88
- print(f"Error in clinical analysis: {str(e)}")
89
- return self._generate_fallback_analysis(features, emotions)
90
 
91
- def _create_analysis_prompt(self, features: Dict, emotions: Dict, transcription: str) -> str:
92
- """Create a detailed prompt for clinical analysis."""
93
- return f"""As a clinical voice analysis expert specializing in mental health assessment,
94
- provide a detailed psychological evaluation based on the following data:
95
 
96
- Voice Characteristics:
97
- - Pitch: {features['pitch_mean']:.2f} Hz (Normal range: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
98
  - Pitch Variation: {features['pitch_std']:.2f} Hz
99
- - Speech Rate: {features['tempo']:.2f} BPM (Normal range: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
100
- - Voice Energy: {features['energy_mean']:.4f} (Normal range: {self.reference_ranges['energy']['min']}-{self.reference_ranges['energy']['max']})
101
 
102
- Emotional Analysis:
103
  {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
104
 
105
  Speech Content:
106
  "{transcription}"
107
 
108
- Please provide a comprehensive clinical assessment including:
109
- 1. Analysis of voice characteristics and their psychological significance
110
- 2. Emotional state evaluation and potential underlying patterns
111
- 3. Assessment of anxiety and depression indicators
112
  4. Stress level evaluation
113
- 5. Clinical recommendations and considerations
114
 
115
- Present the analysis in clear sections with specific observations and clinical insights."""
 
 
116
 
117
- def _format_clinical_response(self, analysis: str) -> str:
118
- """Format the clinical analysis for clear presentation."""
119
- return f"""
120
- Clinical Analysis:
121
- {analysis}
122
- """
123
-
124
- def _generate_fallback_analysis(self, features: Dict, emotions: Dict) -> str:
125
  """Generate basic analysis when API is unavailable."""
126
- dominant_emotion = max(emotions.items(), key=lambda x: x[1])[0]
127
- pitch_status = "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max'] else \
128
- "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min'] else "normal"
 
 
 
129
 
130
  return f"""
131
- Basic Clinical Analysis (API Unavailable):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- Voice Pattern Analysis:
134
- - Pitch is {pitch_status} ({features['pitch_mean']:.2f} Hz)
135
- - Speech rate shows {features['tempo']:.2f} BPM
136
- - Voice energy indicates {features['energy_mean']:.4f} level
 
137
 
138
- Emotional Indication:
139
- - Primary emotional tone: {dominant_emotion}
140
- - Confidence: {max(emotions.values()):.1%}
141
 
142
- Note: This is a basic analysis. For detailed clinical interpretation, please ensure API connectivity.
143
- """
144
 
145
- [Rest of your existing code for extract_prosodic_features, create_feature_plots, create_emotion_plot, and analyze_audio functions...]
 
 
 
 
 
 
 
146
 
147
- # Initialize the application with clinical analysis capability
148
  try:
149
  print("===== Application Startup =====")
 
 
150
  if not load_models():
151
- raise RuntimeError("Failed to load required models")
152
 
153
  # Initialize clinical analyzer
154
  clinical_analyzer = ClinicalVoiceAnalyzer()
@@ -167,9 +404,28 @@ try:
167
  gr.HTML(label="Emotion Analysis"),
168
  gr.HTML(label="Voice Feature Analysis")
169
  ],
170
- title="Advanced Voice Analysis System with Clinical Interpretation",
171
  description="""
172
- This application provides comprehensive voice analysis with clinical interpretation:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  1. Voice Features:
175
  - Pitch analysis (fundamental frequency and variation)
@@ -192,9 +448,14 @@ try:
192
  - Speak clearly and naturally
193
  - Keep recordings between 1-5 seconds
194
  - Maintain consistent volume
195
- """
 
 
 
 
196
  )
197
 
 
198
  if __name__ == "__main__":
199
  demo.launch()
200
 
 
1
+ # Part 1: Essential Imports and Setup
2
  import gradio as gr
3
  import torch
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
10
  import os
11
  from scipy.stats import kurtosis, skew
12
  from anthropic import Anthropic
 
13
 
14
+ # Suppress warnings for cleaner output
15
  warnings.filterwarnings('ignore')
16
 
17
+ # Initialize global model variables
18
  processor = None
19
  whisper_model = None
20
  emotion_tokenizer = None
21
  emotion_model = None
22
  clinical_analyzer = None
23
 
24
+ # Part 2: Model Loading and Initialization
25
  def load_models():
26
+ """Load and initialize speech and emotion analysis models."""
 
 
 
 
 
 
 
 
27
  global processor, whisper_model, emotion_tokenizer, emotion_model
28
 
29
  try:
30
+ # Initialize speech recognition (Whisper) model
31
  print("Loading Whisper model...")
32
  processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
33
  whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
34
 
35
+ # Initialize emotion detection model
36
  print("Loading emotion model...")
37
  emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
38
  emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
39
 
40
+ # Set models to CPU for consistent performance
41
  device = "cpu"
42
  whisper_model.to(device)
43
  emotion_model.to(device)
44
 
45
  print("Models loaded successfully!")
46
  return True
 
47
  except Exception as e:
48
  print(f"Error loading models: {str(e)}")
49
  return False
50
 
51
+ # Part 3: Voice Feature Extraction
52
+ def extract_prosodic_features(waveform, sr):
53
+ """Extract voice features including pitch, energy, and rhythm patterns."""
54
+ try:
55
+ # Input validation
56
+ if waveform is None or len(waveform) == 0:
57
+ return None
58
+
59
+ features = {}
60
+
61
+ # Pitch analysis with enhanced accuracy
62
+ try:
63
+ pitches, magnitudes = librosa.piptrack(
64
+ y=waveform,
65
+ sr=sr,
66
+ fmin=50, # Minimum human voice frequency
67
+ fmax=2000, # Maximum human voice frequency
68
+ n_mels=128, # Frequency resolution
69
+ hop_length=512,
70
+ win_length=2048
71
+ )
72
+
73
+ # Extract valid pitch contour
74
+ f0_contour = [
75
+ pitches[magnitudes[:, t].argmax(), t]
76
+ for t in range(pitches.shape[1])
77
+ if 50 <= pitches[magnitudes[:, t].argmax(), t] <= 2000
78
+ ]
79
+
80
+ # Calculate pitch statistics
81
+ if f0_contour:
82
+ features['pitch_mean'] = float(np.mean(f0_contour))
83
+ features['pitch_std'] = float(np.std(f0_contour))
84
+ features['pitch_range'] = float(np.ptp(f0_contour))
85
+ else:
86
+ features['pitch_mean'] = 160.0 # Default adult pitch
87
+ features['pitch_std'] = 0.0
88
+ features['pitch_range'] = 0.0
89
+
90
+ except Exception as e:
91
+ print(f"Pitch extraction error: {e}")
92
+ features.update({'pitch_mean': 160.0, 'pitch_std': 0.0, 'pitch_range': 0.0})
93
+
94
+ # Energy analysis
95
+ try:
96
+ rms = librosa.feature.rms(
97
+ y=waveform,
98
+ frame_length=2048,
99
+ hop_length=512,
100
+ center=True
101
+ )[0]
102
+
103
+ features.update({
104
+ 'energy_mean': float(np.mean(rms)),
105
+ 'energy_std': float(np.std(rms)),
106
+ 'energy_range': float(np.ptp(rms))
107
+ })
108
+ except Exception as e:
109
+ print(f"Energy extraction error: {e}")
110
+ features.update({'energy_mean': 0.02, 'energy_std': 0.0, 'energy_range': 0.0})
111
+
112
+ # Rhythm analysis
113
+ try:
114
+ onset_env = librosa.onset.onset_strength(
115
+ y=waveform,
116
+ sr=sr,
117
+ hop_length=512,
118
+ aggregate=np.median
119
+ )
120
+
121
+ tempo = librosa.beat.tempo(
122
+ onset_envelope=onset_env,
123
+ sr=sr,
124
+ hop_length=512,
125
+ aggregate=None
126
+ )[0]
127
+
128
+ features['tempo'] = float(tempo) if 40 <= tempo <= 240 else 120.0
129
+
130
+ except Exception as e:
131
+ print(f"Rhythm extraction error: {e}")
132
+ features['tempo'] = 120.0
133
+
134
+ return features
135
+ except Exception as e:
136
+ print(f"Feature extraction failed: {e}")
137
+ return None
138
+
139
+ # Part 4: Clinical Analysis Integration
140
  class ClinicalVoiceAnalyzer:
141
+ """Analyze voice characteristics for psychological indicators."""
142
 
143
  def __init__(self):
144
+ """Initialize the clinical analyzer with API and reference ranges."""
145
  self.anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
146
  self.model = "claude-3-opus-20240229"
147
  self.reference_ranges = {
 
149
  'tempo': {'min': 90, 'max': 130},
150
  'energy': {'min': 0.01, 'max': 0.05}
151
  }
152
+ print("Clinical analyzer ready")
153
 
154
+ def analyze_voice_metrics(self, features, emotions, transcription):
155
+ """Generate clinical insights from voice and emotion data."""
156
  try:
157
+ prompt = self._create_clinical_prompt(features, emotions, transcription)
158
  response = self.anthropic.messages.create(
159
  model=self.model,
160
  max_tokens=1000,
161
+ messages=[{"role": "user", "content": prompt}]
 
 
 
162
  )
163
+ return self._format_analysis(response.content)
164
  except Exception as e:
165
+ print(f"Clinical analysis error: {e}")
166
+ return self._generate_backup_analysis(features, emotions)
167
 
168
+ def _create_clinical_prompt(self, features, emotions, transcription):
169
+ """Create detailed prompt for clinical analysis."""
170
+ return f"""As a clinical voice analysis expert, provide a psychological assessment of:
 
171
 
172
+ Voice Metrics:
173
+ - Pitch: {features['pitch_mean']:.2f} Hz (Normal: {self.reference_ranges['pitch']['min']}-{self.reference_ranges['pitch']['max']} Hz)
174
  - Pitch Variation: {features['pitch_std']:.2f} Hz
175
+ - Speech Rate: {features['tempo']:.2f} BPM (Normal: {self.reference_ranges['tempo']['min']}-{self.reference_ranges['tempo']['max']} BPM)
176
+ - Voice Energy: {features['energy_mean']:.4f}
177
 
178
+ Emotions Detected:
179
  {', '.join(f'{emotion}: {score:.1%}' for emotion, score in emotions.items())}
180
 
181
  Speech Content:
182
  "{transcription}"
183
 
184
+ Provide:
185
+ 1. Voice characteristic analysis
186
+ 2. Emotional state assessment
187
+ 3. Anxiety/depression indicators
188
  4. Stress level evaluation
189
+ 5. Clinical recommendations"""
190
 
191
+ def _format_analysis(self, analysis):
192
+ """Format the clinical analysis output."""
193
+ return f"\nClinical Assessment:\n{analysis}"
194
 
195
+ def _generate_backup_analysis(self, features, emotions):
 
 
 
 
 
 
 
196
  """Generate basic analysis when API is unavailable."""
197
+ dominant_emotion = max(emotions.items(), key=lambda x: x[1])
198
+ pitch_status = (
199
+ "elevated" if features['pitch_mean'] > self.reference_ranges['pitch']['max']
200
+ else "reduced" if features['pitch_mean'] < self.reference_ranges['pitch']['min']
201
+ else "normal"
202
+ )
203
 
204
  return f"""
205
+ Basic Voice Analysis (API Unavailable):
206
+ - Pitch Status: {pitch_status} ({features['pitch_mean']:.2f} Hz)
207
+ - Speech Rate: {features['tempo']:.2f} BPM
208
+ - Voice Energy Level: {features['energy_mean']:.4f}
209
+ - Primary Emotion: {dominant_emotion[0]} ({dominant_emotion[1]:.1%} confidence)"""
210
+
211
+ # Part 5: Visualization Functions
212
+ def create_feature_plots(features):
213
+ """Create interactive visualizations of voice features."""
214
+ try:
215
+ fig = go.Figure()
216
+
217
+ # Pitch visualization
218
+ pitch_data = {
219
+ 'Mean': features['pitch_mean'],
220
+ 'Std Dev': features['pitch_std'],
221
+ 'Range': features['pitch_range']
222
+ }
223
+ fig.add_trace(go.Bar(
224
+ name='Pitch Features (Hz)',
225
+ x=list(pitch_data.keys()),
226
+ y=list(pitch_data.values()),
227
+ marker_color='blue'
228
+ ))
229
+
230
+ # Energy visualization
231
+ energy_data = {
232
+ 'Mean': features['energy_mean'],
233
+ 'Std Dev': features['energy_std'],
234
+ 'Range': features['energy_range']
235
+ }
236
+ fig.add_trace(go.Bar(
237
+ name='Energy Features',
238
+ x=[f"Energy {k}" for k in energy_data.keys()],
239
+ y=list(energy_data.values()),
240
+ marker_color='red'
241
+ ))
242
+
243
+ # Tempo visualization
244
+ fig.add_trace(go.Scatter(
245
+ name='Speech Rate (BPM)',
246
+ x=['Tempo'],
247
+ y=[features['tempo']],
248
+ mode='markers',
249
+ marker=dict(size=15, color='green')
250
+ ))
251
+
252
+ # Layout configuration
253
+ fig.update_layout(
254
+ title='Voice Feature Analysis',
255
+ showlegend=True,
256
+ height=600,
257
+ barmode='group',
258
+ xaxis_title='Feature Type',
259
+ yaxis_title='Value',
260
+ template='plotly_white'
261
+ )
262
+
263
+ return fig.to_html(include_plotlyjs=True)
264
+ except Exception as e:
265
+ print(f"Plot creation error: {e}")
266
+ return None
267
+
268
+ def create_emotion_plot(emotions):
269
+ """Create visualization of emotional analysis."""
270
+ try:
271
+ fig = go.Figure(data=[
272
+ go.Bar(
273
+ x=list(emotions.keys()),
274
+ y=list(emotions.values()),
275
+ marker_color=['#FF9999', '#66B2FF', '#99FF99',
276
+ '#FFCC99', '#FF99CC', '#99FFFF']
277
+ )
278
+ ])
279
+
280
+ fig.update_layout(
281
+ title='Emotion Analysis',
282
+ xaxis_title='Emotion',
283
+ yaxis_title='Confidence Score',
284
+ yaxis_range=[0, 1],
285
+ template='plotly_white',
286
+ height=400
287
+ )
288
+
289
+ return fig.to_html(include_plotlyjs=True)
290
+ except Exception as e:
291
+ print(f"Emotion plot error: {e}")
292
+ return None
293
+
294
+ # Part 6: Main Analysis Function
295
+ def analyze_audio(audio_input):
296
+ """Process audio input and generate comprehensive analysis."""
297
+ try:
298
+ # Validate input
299
+ if audio_input is None:
300
+ return "Please provide an audio input", None, None
301
+
302
+ # Load audio
303
+ audio_path = audio_input[0] if isinstance(audio_input, tuple) else audio_input
304
+ waveform, sr = librosa.load(audio_path, sr=16000, duration=30)
305
+
306
+ # Validate duration
307
+ duration = len(waveform) / sr
308
+ if duration < 0.5:
309
+ return "Audio too short (minimum 0.5 seconds needed)", None, None
310
+
311
+ # Extract features
312
+ features = extract_prosodic_features(waveform, sr)
313
+ if features is None:
314
+ return "Feature extraction failed", None, None
315
+
316
+ # Generate visualizations
317
+ feature_viz = create_feature_plots(features)
318
+
319
+ # Perform speech recognition
320
+ inputs = processor(waveform, sampling_rate=sr, return_tensors="pt").input_features
321
+ with torch.no_grad():
322
+ predicted_ids = whisper_model.generate(inputs)
323
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
324
+
325
+ # Analyze emotions
326
+ emotion_inputs = emotion_tokenizer(
327
+ transcription,
328
+ return_tensors="pt",
329
+ padding=True,
330
+ truncation=True,
331
+ max_length=512
332
+ )
333
+
334
+ with torch.no_grad():
335
+ emotion_outputs = emotion_model(**emotion_inputs)
336
+ emotions = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)
337
+
338
+ # Process emotion scores
339
+ emotion_labels = ['anger', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
340
+ emotion_scores = {
341
+ label: float(score)
342
+ for label, score in zip(emotion_labels, emotions[0].cpu().numpy())
343
+ }
344
+
345
+ emotion_viz = create_emotion_plot(emotion_scores)
346
+
347
+ # Generate clinical analysis
348
+ global clinical_analyzer
349
+ if clinical_analyzer is None:
350
+ clinical_analyzer = ClinicalVoiceAnalyzer()
351
+
352
+ clinical_analysis = clinical_analyzer.analyze_voice_metrics(
353
+ features, emotion_scores, transcription
354
+ )
355
+
356
+ # Create comprehensive summary
357
+ summary = f"""Voice Analysis Summary:
358
+
359
+ Speech Content:
360
+ {transcription}
361
 
362
+ Voice Characteristics:
363
+ - Average Pitch: {features['pitch_mean']:.2f} Hz
364
+ - Pitch Variation: {features['pitch_std']:.2f} Hz
365
+ - Speech Rate (Tempo): {features['tempo']:.2f} BPM
366
+ - Voice Energy: {features['energy_mean']:.4f}
367
 
368
+ Dominant Emotion: {max(emotion_scores.items(), key=lambda x: x[1])[0]}
369
+ Emotion Confidence: {max(emotion_scores.values()):.2%}
 
370
 
371
+ Recording Duration: {duration:.2f} seconds
 
372
 
373
+ {clinical_analysis}"""
374
+
375
+ return summary, emotion_viz, feature_viz
376
+
377
+ except Exception as e:
378
+ error_msg = f"Analysis failed: {str(e)}"
379
+ print(error_msg)
380
+ return error_msg, None, None
381
 
382
+ # Part 7: Application Initialization
383
  try:
384
  print("===== Application Startup =====")
385
+
386
+ # Load required models
387
  if not load_models():
388
+ raise RuntimeError("Model loading failed")
389
 
390
  # Initialize clinical analyzer
391
  clinical_analyzer = ClinicalVoiceAnalyzer()
 
404
  gr.HTML(label="Emotion Analysis"),
405
  gr.HTML(label="Voice Feature Analysis")
406
  ],
407
+ title="Voice Analysis System with Clinical Interpretation",
408
  description="""
409
+ This application provides comprehensive voice analysis with clinical insights:
410
+
411
+ 1. Voice Features:
412
+ - Pitch analysis (fundamental frequency and variation)
413
+ - Energy patterns (volume and intensity)
414
+ - Speech rate (words per minute)
415
+ - Voice quality metrics
416
+
417
+ 2. Clinical Analysis:
418
+ - Mental health indicators
419
+ - Emotional state evaluation
420
+ - Risk assessment
421
+ - Clinical recommendations
422
+
423
+ 3. Emotional Content:
424
+ - Emotion detection (6 basic emotions)
425
+ - Emotional intensity analysis
426
+
427
+ For optimal description="""
428
+ This application provides comprehensive voice analysis with clinical insights:
429
 
430
  1. Voice Features:
431
  - Pitch analysis (fundamental frequency and variation)
 
448
  - Speak clearly and naturally
449
  - Keep recordings between 1-5 seconds
450
  - Maintain consistent volume
451
+
452
+ Upload an audio file or record directly through your microphone.
453
+ """,
454
+ examples=None,
455
+ cache_examples=False
456
  )
457
 
458
+ # Launch the interface
459
  if __name__ == "__main__":
460
  demo.launch()
461