psakamoori
/

speechbrain-emotion-recognition-openvino

@@ -36,8 +36,10 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
     """
     def __init__(self, *args, model=None,
-                 audio_file_path=None, backend="pytorch",
-                 ov_opts={"device_name": "cpu"},
                  save_ov_model=False,
                  **kwargs):
         super().__init__(*args, **kwargs)
@@ -49,23 +51,26 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
             self.core = ov.Core()
             self.ov_model = None
-            # if torch model
             if model:
                 print("\n[INFO] Preparing OpenVINO model...")
                 self.get_ov_model(model, audio_file_path)
                 print("[SUCCESS] OpenVINO IR model compiled for inference!\n")
             if self.ov_model:
-                self.device = ov_opts["device_name"]
                 print("[INFO] Compiling OpenVINO IR model for inference...")
-                self.compiled_model = self.core.compile_model(self.ov_model, config=ov_opts)
                 print("[SUCCESS] OpenVINO IR model compiled for inference!\n")
-                    # Falg to save openvino ir model file to disk
             if save_ov_model:
                 # set to default path
                 print("[INFO] Saving OpenVINO IR model to disk!\n")
                 ov_ir_file_path = "./openvino_model/fp32/speechbrain_emotion_recog_ov_ir_model.xml"
                 ov.save_model(self.ov_model, ov_ir_file_path)
                 print(f"[SUCCESS] OpenVINO IR model file saved at {ov_ir_file_path}!\n")
     def encode_batch(self, wavs, wav_lens=None, normalize=False):
         """Encodes the input audio into a single vector embedding.
@@ -100,10 +105,10 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
         # Assign full length if wav_lens is not assigned
         if wav_lens is None:
-            wav_lens = torch.ones(wavs.shape[0], device=self.device)
         # Storing waveform in the specified device
-        wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
         wavs = wavs.float()
         if self.backend == "pytorch":

     """
     def __init__(self, *args, model=None,
+                 audio_file_path=None,
+                 backend="pytorch",
+                 opts=None,
+                 torch_device="cpu",
                  save_ov_model=False,
                  **kwargs):
         super().__init__(*args, **kwargs)
             self.core = ov.Core()
             self.ov_model = None
+            self.torch_device = torch_device
             if model:
                 print("\n[INFO] Preparing OpenVINO model...")
                 self.get_ov_model(model, audio_file_path)
                 print("[SUCCESS] OpenVINO IR model compiled for inference!\n")
             if self.ov_model:
                 print("[INFO] Compiling OpenVINO IR model for inference...")
+                self.compiled_model = self.core.compile_model(self.ov_model,
+                                                              device_name=opts["ov_device"],
+                                                              config=opts["config"])
                 print("[SUCCESS] OpenVINO IR model compiled for inference!\n")
+            # Falg to save openvino ir model file to disk
             if save_ov_model:
                 # set to default path
                 print("[INFO] Saving OpenVINO IR model to disk!\n")
                 ov_ir_file_path = "./openvino_model/fp32/speechbrain_emotion_recog_ov_ir_model.xml"
                 ov.save_model(self.ov_model, ov_ir_file_path)
                 print(f"[SUCCESS] OpenVINO IR model file saved at {ov_ir_file_path}!\n")
+        elif backend == "pytorch":
+            self.torch_device = opts["torch_device"]
     def encode_batch(self, wavs, wav_lens=None, normalize=False):
         """Encodes the input audio into a single vector embedding.
         # Assign full length if wav_lens is not assigned
         if wav_lens is None:
+            wav_lens = torch.ones(wavs.shape[0], device=self.torch_device)
         # Storing waveform in the specified device
+        wavs, wav_lens = wavs.to(self.torch_device), wav_lens.to(self.torch_device)
         wavs = wavs.float()
         if self.backend == "pytorch":