Fast_api / emotion /emo_predict.py
mulasagg's picture
add sank and emo
df039e9
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
import librosa
import torch
# Load the feature extractor and model
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
model = Wav2Vec2ForSequenceClassification.from_pretrained("r-f/wav2vec-english-speech-emotion-recognition")
model.eval()
def predict_emotion(audio_path):
# Load audio (mono, 16kHz)
audio, rate = librosa.load(audio_path, sr=16000)
# Extract features
inputs = feature_extractor(audio, sampling_rate=rate, return_tensors="pt", padding=True)
# Predict emotion
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
pred_id = torch.argmax(probs, dim=-1).item()
emotion = model.config.id2label[pred_id]
return emotion
# # Example usage
# emotion = predict_emotion(r"D:\Intern\shankh\audio_samples\anga.wav")
# print(f"Predicted Emotion: {emotion}")