psakamoori
/

speechbrain-emotion-recognition-openvino

@@ -1,6 +1,7 @@
 from speechbrain.inference.interfaces import foreign_class
 from custom_interface import CustomEncoderWav2vec2Classifier
 from speechbrain.pretrained import EncoderClassifier
 # Function in SpeechBrain to load and use custom PyTorch models
 classifier = foreign_class(
@@ -18,22 +19,45 @@ checkpoint = EncoderClassifier.from_hparams(
 # Convert hparams to a dictionary
 hparams_dict = vars(checkpoint.hparams)
-# OpenVINO inference optimization parameters
-device = "cpu"
-ov_opts = {"device_name": device, "PERFORMANCE_HINT": "LATENCY"}
-instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
                                            hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
                                            audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
                                            backend="openvino",
-                                           ov_opts=ov_opts,
                                            save_ov_model=False)
 # OpenVINO inference
 print("=" * 30)
-print(f"[INFO] Inference Device: {ov_opts['device_name']}")
-print("=" * 30)
-print("\n[INFO] Performing OpenVINO inference...")
 out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
 print(f"[RESULT] OpenVINO Inference Output: {text_lab[index-1]}")

 from speechbrain.inference.interfaces import foreign_class
 from custom_interface import CustomEncoderWav2vec2Classifier
 from speechbrain.pretrained import EncoderClassifier
+import openvino.properties.hint as hints
 # Function in SpeechBrain to load and use custom PyTorch models
 classifier = foreign_class(
 # Convert hparams to a dictionary
 hparams_dict = vars(checkpoint.hparams)
+# inference backend
+backend = "openvino"
+torch_device = "cpu"
+if backend == "openvino" and torch_device == "cpu":
+    # OpenVINO inference optimization parameters
+    #config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT, hints.num_requests: "1"}
+    config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT}
+    ov_opts = {"ov_device": "CPU", "config": config}
+    instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
                                            hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
                                            audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
                                            backend="openvino",
+                                           opts=ov_opts,
+                                           torch_device=torch_device,
                                            save_ov_model=False)
+elif backend == "openvino" and torch_device == "cuda":
+    raise ValueError("OpenVINO backend does not support CUDA devices. \
+                            Please use cpu for torch_device.")
+if backend == "pytorch":
+    torch_opts = {"torch_device": torch_device}
+    instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
+                                           hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
+                                           audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
+                                           backend="pytorch", opts=torch_opts, torch_device=torch_device)
 # OpenVINO inference
 print("=" * 30)
+if backend == "openvino":
+    print(f"[INFO] Inference Device: {ov_opts['ov_device']}")
+    print("=" * 30)
+    print("\n[INFO] Performing OpenVINO inference...")
+else:
+    print(f"[INFO] Inference Device: {torch_opts['torch_device']}")
+    print("=" * 30)
+    print("\n[INFO] Performing PyTorch inference...")
 out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
 print(f"[RESULT] OpenVINO Inference Output: {text_lab[index-1]}")