from speechbrain.inference.interfaces import foreign_class from custom_interface import CustomEncoderWav2vec2Classifier from speechbrain.pretrained import EncoderClassifier import openvino.properties.hint as hints # Function in SpeechBrain to load and use custom PyTorch models classifier = foreign_class( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) # Model checkpoint files checkpoint = EncoderClassifier.from_hparams( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", savedir="./" # Directory to save the model ) # Convert hparams to a dictionary hparams_dict = vars(checkpoint.hparams) # inference backend backend = "openvino" torch_device = "cpu" if backend == "openvino" and torch_device == "cpu": # OpenVINO inference optimization parameters config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT} ov_opts = {"ov_device": "CPU", "config": config} instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods, hparams=hparams_dict, model=classifier.mods["wav2vec2"].model, audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav", backend="openvino", opts=ov_opts, torch_device=torch_device, save_ov_model=False) elif backend == "openvino" and torch_device == "cuda": raise ValueError("OpenVINO backend does not support CUDA devices. \ Please use cpu for torch_device.") if backend == "pytorch": torch_opts = {"torch_device": torch_device} instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods, hparams=hparams_dict, model=classifier.mods["wav2vec2"].model, audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav", backend="pytorch", opts=torch_opts, torch_device=torch_device) # OpenVINO inference print("=" * 30) if backend == "openvino": print(f"[INFO] Inference Device: {ov_opts['ov_device']}") print("=" * 30) print("\n[INFO] Performing OpenVINO inference...") else: print(f"[INFO] Inference Device: {torch_opts['torch_device']}") print("=" * 30) print("\n[INFO] Performing PyTorch inference...") out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav") print(f"[RESULT] Inference output label: {text_lab[index-1]}")