File size: 1,637 Bytes
8a69132 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from speechbrain.inference.interfaces import foreign_class
from custom_interface import CustomEncoderWav2vec2Classifier
from speechbrain.pretrained import EncoderClassifier
# Function in SpeechBrain to load and use custom PyTorch models
classifier = foreign_class(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier"
)
# Model checkpoint files
checkpoint = EncoderClassifier.from_hparams(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
savedir="./" # Directory to save the model
)
# Convert hparams to a dictionary
hparams_dict = vars(checkpoint.hparams)
# OpenVINO inference optimization parameters
device = "cpu"
ov_opts = {"device_name": device, "PERFORMANCE_HINT": "LATENCY"}
instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
backend="openvino",
ov_opts=ov_opts,
save_ov_model=False)
# OpenVINO inference
print("=" * 30)
print(f"[INFO] Inference Device: {ov_opts['device_name']}")
print("=" * 30)
print("\n[INFO] Performing OpenVINO inference...")
out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
print(f"[RESULT] OpenVINO Inference Output: {text_lab[index-1]}") |