File size: 1,637 Bytes
8a69132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from speechbrain.inference.interfaces import foreign_class
from custom_interface import CustomEncoderWav2vec2Classifier
from speechbrain.pretrained import EncoderClassifier

# Function in SpeechBrain to load and use custom PyTorch models 
classifier = foreign_class(
   source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
   pymodule_file="custom_interface.py",
   classname="CustomEncoderWav2vec2Classifier"
)

# Model checkpoint files
checkpoint = EncoderClassifier.from_hparams(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    savedir="./"  # Directory to save the model
)

# Convert hparams to a dictionary
hparams_dict = vars(checkpoint.hparams)

# OpenVINO inference optimization parameters
device = "cpu"
ov_opts = {"device_name": device, "PERFORMANCE_HINT": "LATENCY"}

instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
                                           hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
                                           audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
                                           backend="openvino",
                                           ov_opts=ov_opts,
                                           save_ov_model=False)


# OpenVINO inference
print("=" * 30)
print(f"[INFO] Inference Device: {ov_opts['device_name']}")
print("=" * 30)
print("\n[INFO] Performing OpenVINO inference...")
out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
print(f"[RESULT] OpenVINO Inference Output: {text_lab[index-1]}")