Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 4

Commit

3480092

verified ·

1 Parent(s): 9fcad90

Rename managers/audio_specialist.py to managers/mmaudio_manager.py

Browse files

Files changed (1) hide show

managers/{audio_specialist.py → mmaudio_manager.py} +47 -61

managers/{audio_specialist.py → mmaudio_manager.py} RENAMED Viewed

@@ -1,10 +1,10 @@
-# audio_specialist.py
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
-# Version: 2.2.0
 #
-# This file defines the Audio Specialist for the ADUC-SDR framework. It is responsible
 # for generating audio synchronized with video clips. This version has been refactored
 # to be self-contained by automatically cloning the MMAudio dependency from its
 # official repository, making the framework more portable and easier to set up.
@@ -27,11 +27,42 @@ DEPS_DIR = Path("./deps")
 MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
 MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
-class AudioSpecialist:
     """
-    Specialist responsible for generating audio for video fragments.
-    Manages the loading and unloading of audio models from VRAM and handles
-    its own code dependencies by cloning the MMAudio repository.
     """
     def __init__(self, workspace_dir):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -39,10 +70,7 @@ class AudioSpecialist:
         self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
         self.workspace_dir = workspace_dir
-        self._mmaudio_modules_loaded = False
-        self._setup_dependencies()
-        self._lazy_load_mmaudio_modules()
         self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
         self.net: 'MMAudio' = None
         self.feature_utils: 'FeaturesUtils' = None
@@ -50,53 +78,11 @@ class AudioSpecialist:
         self._load_models_to_cpu()
-    def _setup_dependencies(self):
-        """
-        Checks for the MMAudio repository locally. If not found, clones it.
-        Then, it adds the repository to the Python path to make its modules importable.
-        """
-        if not MMAUDIO_REPO_DIR.exists():
-            logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
-            try:
-                DEPS_DIR.mkdir(exist_ok=True)
-                subprocess.run(
-                    ["git", "clone", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
-                    check=True, capture_output=True, text=True
-                )
-                logger.info("MMAudio repository cloned successfully.")
-            except subprocess.CalledProcessError as e:
-                logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
-                raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
-        else:
-            logger.info("Found local MMAudio repository.")
-        if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
-            sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
-            logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
-    def _lazy_load_mmaudio_modules(self):
-        """Dynamically imports MMAudio modules only when needed."""
-        if self._mmaudio_modules_loaded:
-            return
-        # These globals are now populated by the lazy loader
-        global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video
-        global FlowMatching, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig
-        from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
-        from mmaudio.model.flow_matching import FlowMatching
-        from mmaudio.model.networks import MMAudio, get_my_mmaudio
-        from mmaudio.model.utils.features_utils import FeaturesUtils
-        from mmaudio.model.sequence_config import SequenceConfig
-        self.all_model_cfg = all_model_cfg
-        self._mmaudio_modules_loaded = True
-        logger.info("MMAudio modules have been dynamically loaded.")
     def _adjust_paths_for_repo(self):
         """Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
         for cfg_key in self.all_model_cfg:
             cfg = self.all_model_cfg[cfg_key]
             cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
             cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
             if cfg.bigvgan_16k_path is not None:
@@ -128,7 +114,7 @@ class AudioSpecialist:
             self.feature_utils = self.feature_utils.eval()
             self.net.to(self.cpu_device)
             self.feature_utils.to(self.cpu_device)
-            logger.info("Audio Specialist ready on CPU.")
         except Exception as e:
             logger.error(f"Failed to load audio models: {e}", exc_info=True)
             self.net = None
@@ -136,14 +122,14 @@ class AudioSpecialist:
     def to_gpu(self):
         """Moves the models and utilities to the GPU before inference."""
         if self.device == 'cpu': return
-        logger.info(f"Moving Audio Specialist to GPU ({self.device})...")
         self.net.to(self.device, self.dtype)
         self.feature_utils.to(self.device, self.dtype)
     def to_cpu(self):
         """Moves the models back to CPU and clears VRAM after inference."""
         if self.device == 'cpu': return
-        logger.info("Unloading Audio Specialist from GPU...")
         self.net.to(self.cpu_device)
         self.feature_utils.to(self.cpu_device)
         gc.collect()
@@ -201,12 +187,12 @@ class AudioSpecialist:
         finally:
             self.to_cpu()
-# Singleton instantiation
 try:
     with open("config.yaml", 'r') as f:
         config = yaml.safe_load(f)
     WORKSPACE_DIR = config['application']['workspace_dir']
-    audio_specialist_singleton = AudioSpecialist(workspace_dir=WORKSPACE_DIR)
 except Exception as e:
-    logger.error(f"Could not initialize AudioSpecialist: {e}", exc_info=True)
-    audio_specialist_singleton = None

+# managers/mmaudio_manager.py
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
+# Version: 2.3.0
 #
+# This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible
 # for generating audio synchronized with video clips. This version has been refactored
 # to be self-contained by automatically cloning the MMAudio dependency from its
 # official repository, making the framework more portable and easier to set up.
 MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
 MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
+def setup_mmaudio_dependencies():
     """
+    Ensures the MMAudio repository is cloned and available in the sys.path.
+    This function is run once when the module is first imported.
+    """
+    if not MMAUDIO_REPO_DIR.exists():
+        logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
+        try:
+            DEPS_DIR.mkdir(exist_ok=True)
+            subprocess.run(
+                ["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
+                check=True, capture_output=True, text=True
+            )
+            logger.info("MMAudio repository cloned successfully.")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
+            raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
+    else:
+        logger.info("Found local MMAudio repository.")
+    if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
+        sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
+        logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
+setup_mmaudio_dependencies()
+from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
+from mmaudio.model.flow_matching import FlowMatching
+from mmaudio.model.networks import MMAudio, get_my_mmaudio
+from mmaudio.model.utils.features_utils import FeaturesUtils
+from mmaudio.model.sequence_config import SequenceConfig
+class MMAudioManager:
+    """
+    Manages the MMAudio model for audio generation tasks.
     """
     def __init__(self, workspace_dir):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
         self.workspace_dir = workspace_dir
+        self.all_model_cfg = all_model_cfg
         self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
         self.net: 'MMAudio' = None
         self.feature_utils: 'FeaturesUtils' = None
         self._load_models_to_cpu()
     def _adjust_paths_for_repo(self):
         """Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
         for cfg_key in self.all_model_cfg:
             cfg = self.all_model_cfg[cfg_key]
+            # The paths in the original config are relative, so we join them with our repo path
             cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
             cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
             if cfg.bigvgan_16k_path is not None:
             self.feature_utils = self.feature_utils.eval()
             self.net.to(self.cpu_device)
             self.feature_utils.to(self.cpu_device)
+            logger.info("MMAudioManager ready on CPU.")
         except Exception as e:
             logger.error(f"Failed to load audio models: {e}", exc_info=True)
             self.net = None
     def to_gpu(self):
         """Moves the models and utilities to the GPU before inference."""
         if self.device == 'cpu': return
+        logger.info(f"Moving MMAudioManager to GPU ({self.device})...")
         self.net.to(self.device, self.dtype)
         self.feature_utils.to(self.device, self.dtype)
     def to_cpu(self):
         """Moves the models back to CPU and clears VRAM after inference."""
         if self.device == 'cpu': return
+        logger.info("Unloading MMAudioManager from GPU...")
         self.net.to(self.cpu_device)
         self.feature_utils.to(self.cpu_device)
         gc.collect()
         finally:
             self.to_cpu()
+# --- Singleton Instantiation ---
 try:
     with open("config.yaml", 'r') as f:
         config = yaml.safe_load(f)
     WORKSPACE_DIR = config['application']['workspace_dir']
+    mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR)
 except Exception as e:
+    logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True)
+    mmaudio_manager_singleton = None