Edit model card

Working example of using pretrained model to predict emotion in local audio file


def predict_emotion_hubert(audio_file):
    """ inspired by an example from https://github.com/m3hrdadfi/soxan """
    from audio_models import HubertForSpeechClassification
    from transformers import  Wav2Vec2FeatureExtractor, AutoConfig
    import torch.nn.functional as F
    import torch
    import numpy as np
    from pydub import AudioSegment

    model = HubertForSpeechClassification.from_pretrained("Rajaram1996/Hubert_emotion") # Downloading: 362M
    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
    sampling_rate=16000 # defined by the model; must convert mp3 to this rate.
    config = AutoConfig.from_pretrained("Rajaram1996/Hubert_emotion")

    def speech_file_to_array(path, sampling_rate):
        # using torchaudio...
        # speech_array, _sampling_rate = torchaudio.load(path)
        # resampler = torchaudio.transforms.Resample(_sampling_rate, sampling_rate)
        # speech = resampler(speech_array).squeeze().numpy()
        sound = AudioSegment.from_file(path)
        sound = sound.set_frame_rate(sampling_rate)
        sound_array = np.array(sound.get_array_of_samples())
        return sound_array

    sound_array = speech_file_to_array(audio_file, sampling_rate)
    inputs = feature_extractor(sound_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    inputs = {key: inputs[key].to("cpu").float() for key in inputs}

    with torch.no_grad():
        logits = model(**inputs).logits

    scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
    outputs = [{
        "emo": config.id2label[i],
        "score": round(score * 100, 1)}
        for i, score in enumerate(scores)
    ]
    return [row for row in sorted(outputs, key=lambda x:x["score"], reverse=True) if row['score'] != '0.0%'][:2]

result = predict_emotion_hubert("male-crying.mp3")
>>> result
[{'emo': 'male_sad', 'score': 91.0}, {'emo': 'male_fear', 'score': 4.8}]
Downloads last month
6,618
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Spaces using Rajaram1996/Hubert_emotion 2