from speechbrain.inference.interfaces import foreign_class from custom_interface import CustomEncoderWav2vec2Classifier from speechbrain.pretrained import EncoderClassifier # Function in SpeechBrain to load and use custom PyTorch models classifier = foreign_class( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) # Model checkpoint files checkpoint = EncoderClassifier.from_hparams( source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", savedir="./" # Directory to save the model ) # Convert hparams to a dictionary hparams_dict = vars(checkpoint.hparams) # OpenVINO inference optimization parameters device = "cpu" ov_opts = {"device_name": device, "PERFORMANCE_HINT": "LATENCY"} instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods, hparams=hparams_dict, model=classifier.mods["wav2vec2"].model, audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav", backend="openvino", ov_opts=ov_opts, save_ov_model=False) # OpenVINO inference print("=" * 30) print(f"[INFO] Inference Device: {ov_opts['device_name']}") print("=" * 30) print("\n[INFO] Performing OpenVINO inference...") out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav") print(f"[RESULT] OpenVINO Inference Output: {text_lab[index-1]}")