JagjeevanAK's picture
Upload folder using huggingface_hub
9401b94 verified
"""
Speech Emotion Recognition Inference Script
"""
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import argparse
def extract_feature(data, sr, mfcc=True, chroma=True, mel=True):
"""
Extract features from audio files into numpy array
"""
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
stft = np.abs(librosa.stft(data))
chroma_feat = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
result = np.hstack((result, chroma_feat))
if mel:
mel_feat = np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)
result = np.hstack((result, mel_feat))
return result
def predict_emotion(audio_path, model_path='trained_model.h5'):
"""
Predict emotion from audio file
"""
# Load audio
data, sr = librosa.load(audio_path, sr=22050)
# Extract features
feature = extract_feature(data, sr, mfcc=True, chroma=True, mel=True)
feature = np.expand_dims(feature, axis=0)
feature = np.expand_dims(feature, axis=2)
# Load model and predict
model = load_model(model_path)
prediction = model.predict(feature)
predicted_class = np.argmax(prediction, axis=1)
# Map to emotion labels
emotions = {
'01': 'Neutral',
'02': 'Calm',
'03': 'Happy',
'04': 'Sad',
'05': 'Angry',
'06': 'Fearful',
'07': 'Disgust',
'08': 'Surprised'
}
emojis = {
'Neutral': '😐',
'Calm': '😌',
'Happy': '😊',
'Sad': '😒',
'Angry': '😠',
'Fearful': '😨',
'Disgust': '🀒',
'Surprised': '😲'
}
label_encoder = LabelEncoder()
label_encoder.fit(list(emotions.values()))
predicted_emotion = label_encoder.inverse_transform(predicted_class)[0]
return predicted_emotion, emojis[predicted_emotion], prediction[0]
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Predict emotion from audio file')
parser.add_argument('audio_path', help='Path to audio file')
parser.add_argument('--model', default='trained_model.h5', help='Path to model file')
args = parser.parse_args()
try:
emotion, emoji, confidence = predict_emotion(args.audio_path, args.model)
print(f"Predicted Emotion: {emotion} {emoji}")
print(f"Confidence scores: {confidence}")
except Exception as e:
print(f"Error: {e}")