Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -44,14 +44,16 @@ def predict_text_emotion(text):
|
|
| 44 |
# Extract audio features and predict emotion
|
| 45 |
def extract_audio_features(audio_data, sample_rate):
|
| 46 |
if not isinstance(audio_data, np.ndarray):
|
| 47 |
-
audio_data = np.array(audio_data, dtype=np.float32) # Ensure it
|
| 48 |
else:
|
| 49 |
audio_data = audio_data.astype(np.float32) # Convert to float32
|
| 50 |
|
| 51 |
-
mfcc =
|
| 52 |
-
|
|
|
|
| 53 |
return features
|
| 54 |
|
|
|
|
| 55 |
def predict_audio_emotion(audio_data, sample_rate):
|
| 56 |
features = extract_audio_features(audio_data, sample_rate)
|
| 57 |
features = np.reshape(features, (1, 40)) # Match model expected input
|
|
|
|
| 44 |
# Extract audio features and predict emotion
|
| 45 |
def extract_audio_features(audio_data, sample_rate):
|
| 46 |
if not isinstance(audio_data, np.ndarray):
|
| 47 |
+
audio_data = np.array(audio_data, dtype=np.float32) # Ensure it's a NumPy array with float type
|
| 48 |
else:
|
| 49 |
audio_data = audio_data.astype(np.float32) # Convert to float32
|
| 50 |
|
| 51 |
+
mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=704)
|
| 52 |
+
mfcc = np.mean(mfcc.T, axis=0) # Compute mean across time
|
| 53 |
+
features = np.expand_dims(mfcc, axis=0) # Add batch dimension
|
| 54 |
return features
|
| 55 |
|
| 56 |
+
|
| 57 |
def predict_audio_emotion(audio_data, sample_rate):
|
| 58 |
features = extract_audio_features(audio_data, sample_rate)
|
| 59 |
features = np.reshape(features, (1, 40)) # Match model expected input
|