Rename managers/audio_specialist.py to managers/mmaudio_manager.py
Browse files
managers/{audio_specialist.py → mmaudio_manager.py}
RENAMED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
-
#
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
-
# Version: 2.
|
| 6 |
#
|
| 7 |
-
# This file defines the
|
| 8 |
# for generating audio synchronized with video clips. This version has been refactored
|
| 9 |
# to be self-contained by automatically cloning the MMAudio dependency from its
|
| 10 |
# official repository, making the framework more portable and easier to set up.
|
|
@@ -27,11 +27,42 @@ DEPS_DIR = Path("./deps")
|
|
| 27 |
MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
|
| 28 |
MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
|
| 29 |
|
| 30 |
-
|
| 31 |
"""
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
"""
|
| 36 |
def __init__(self, workspace_dir):
|
| 37 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -39,10 +70,7 @@ class AudioSpecialist:
|
|
| 39 |
self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
| 40 |
self.workspace_dir = workspace_dir
|
| 41 |
|
| 42 |
-
self.
|
| 43 |
-
self._setup_dependencies()
|
| 44 |
-
self._lazy_load_mmaudio_modules()
|
| 45 |
-
|
| 46 |
self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
|
| 47 |
self.net: 'MMAudio' = None
|
| 48 |
self.feature_utils: 'FeaturesUtils' = None
|
|
@@ -50,53 +78,11 @@ class AudioSpecialist:
|
|
| 50 |
|
| 51 |
self._load_models_to_cpu()
|
| 52 |
|
| 53 |
-
def _setup_dependencies(self):
|
| 54 |
-
"""
|
| 55 |
-
Checks for the MMAudio repository locally. If not found, clones it.
|
| 56 |
-
Then, it adds the repository to the Python path to make its modules importable.
|
| 57 |
-
"""
|
| 58 |
-
if not MMAUDIO_REPO_DIR.exists():
|
| 59 |
-
logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
|
| 60 |
-
try:
|
| 61 |
-
DEPS_DIR.mkdir(exist_ok=True)
|
| 62 |
-
subprocess.run(
|
| 63 |
-
["git", "clone", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
|
| 64 |
-
check=True, capture_output=True, text=True
|
| 65 |
-
)
|
| 66 |
-
logger.info("MMAudio repository cloned successfully.")
|
| 67 |
-
except subprocess.CalledProcessError as e:
|
| 68 |
-
logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
|
| 69 |
-
raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
|
| 70 |
-
else:
|
| 71 |
-
logger.info("Found local MMAudio repository.")
|
| 72 |
-
|
| 73 |
-
if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
|
| 74 |
-
sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
|
| 75 |
-
logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
|
| 76 |
-
|
| 77 |
-
def _lazy_load_mmaudio_modules(self):
|
| 78 |
-
"""Dynamically imports MMAudio modules only when needed."""
|
| 79 |
-
if self._mmaudio_modules_loaded:
|
| 80 |
-
return
|
| 81 |
-
|
| 82 |
-
# These globals are now populated by the lazy loader
|
| 83 |
-
global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video
|
| 84 |
-
global FlowMatching, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig
|
| 85 |
-
|
| 86 |
-
from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
|
| 87 |
-
from mmaudio.model.flow_matching import FlowMatching
|
| 88 |
-
from mmaudio.model.networks import MMAudio, get_my_mmaudio
|
| 89 |
-
from mmaudio.model.utils.features_utils import FeaturesUtils
|
| 90 |
-
from mmaudio.model.sequence_config import SequenceConfig
|
| 91 |
-
|
| 92 |
-
self.all_model_cfg = all_model_cfg
|
| 93 |
-
self._mmaudio_modules_loaded = True
|
| 94 |
-
logger.info("MMAudio modules have been dynamically loaded.")
|
| 95 |
-
|
| 96 |
def _adjust_paths_for_repo(self):
|
| 97 |
"""Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
|
| 98 |
for cfg_key in self.all_model_cfg:
|
| 99 |
cfg = self.all_model_cfg[cfg_key]
|
|
|
|
| 100 |
cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
|
| 101 |
cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
|
| 102 |
if cfg.bigvgan_16k_path is not None:
|
|
@@ -128,7 +114,7 @@ class AudioSpecialist:
|
|
| 128 |
self.feature_utils = self.feature_utils.eval()
|
| 129 |
self.net.to(self.cpu_device)
|
| 130 |
self.feature_utils.to(self.cpu_device)
|
| 131 |
-
logger.info("
|
| 132 |
except Exception as e:
|
| 133 |
logger.error(f"Failed to load audio models: {e}", exc_info=True)
|
| 134 |
self.net = None
|
|
@@ -136,14 +122,14 @@ class AudioSpecialist:
|
|
| 136 |
def to_gpu(self):
|
| 137 |
"""Moves the models and utilities to the GPU before inference."""
|
| 138 |
if self.device == 'cpu': return
|
| 139 |
-
logger.info(f"Moving
|
| 140 |
self.net.to(self.device, self.dtype)
|
| 141 |
self.feature_utils.to(self.device, self.dtype)
|
| 142 |
|
| 143 |
def to_cpu(self):
|
| 144 |
"""Moves the models back to CPU and clears VRAM after inference."""
|
| 145 |
if self.device == 'cpu': return
|
| 146 |
-
logger.info("Unloading
|
| 147 |
self.net.to(self.cpu_device)
|
| 148 |
self.feature_utils.to(self.cpu_device)
|
| 149 |
gc.collect()
|
|
@@ -201,12 +187,12 @@ class AudioSpecialist:
|
|
| 201 |
finally:
|
| 202 |
self.to_cpu()
|
| 203 |
|
| 204 |
-
# Singleton
|
| 205 |
try:
|
| 206 |
with open("config.yaml", 'r') as f:
|
| 207 |
config = yaml.safe_load(f)
|
| 208 |
WORKSPACE_DIR = config['application']['workspace_dir']
|
| 209 |
-
|
| 210 |
except Exception as e:
|
| 211 |
-
logger.error(f"Could not initialize
|
| 212 |
-
|
|
|
|
| 1 |
+
# managers/mmaudio_manager.py
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
+
# Version: 2.3.0
|
| 6 |
#
|
| 7 |
+
# This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible
|
| 8 |
# for generating audio synchronized with video clips. This version has been refactored
|
| 9 |
# to be self-contained by automatically cloning the MMAudio dependency from its
|
| 10 |
# official repository, making the framework more portable and easier to set up.
|
|
|
|
| 27 |
MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
|
| 28 |
MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
|
| 29 |
|
| 30 |
+
def setup_mmaudio_dependencies():
|
| 31 |
"""
|
| 32 |
+
Ensures the MMAudio repository is cloned and available in the sys.path.
|
| 33 |
+
This function is run once when the module is first imported.
|
| 34 |
+
"""
|
| 35 |
+
if not MMAUDIO_REPO_DIR.exists():
|
| 36 |
+
logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
|
| 37 |
+
try:
|
| 38 |
+
DEPS_DIR.mkdir(exist_ok=True)
|
| 39 |
+
subprocess.run(
|
| 40 |
+
["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
|
| 41 |
+
check=True, capture_output=True, text=True
|
| 42 |
+
)
|
| 43 |
+
logger.info("MMAudio repository cloned successfully.")
|
| 44 |
+
except subprocess.CalledProcessError as e:
|
| 45 |
+
logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
|
| 46 |
+
raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
|
| 47 |
+
else:
|
| 48 |
+
logger.info("Found local MMAudio repository.")
|
| 49 |
+
|
| 50 |
+
if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
|
| 51 |
+
sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
|
| 52 |
+
logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
|
| 53 |
+
|
| 54 |
+
setup_mmaudio_dependencies()
|
| 55 |
+
|
| 56 |
+
from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
|
| 57 |
+
from mmaudio.model.flow_matching import FlowMatching
|
| 58 |
+
from mmaudio.model.networks import MMAudio, get_my_mmaudio
|
| 59 |
+
from mmaudio.model.utils.features_utils import FeaturesUtils
|
| 60 |
+
from mmaudio.model.sequence_config import SequenceConfig
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class MMAudioManager:
|
| 64 |
+
"""
|
| 65 |
+
Manages the MMAudio model for audio generation tasks.
|
| 66 |
"""
|
| 67 |
def __init__(self, workspace_dir):
|
| 68 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 70 |
self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
| 71 |
self.workspace_dir = workspace_dir
|
| 72 |
|
| 73 |
+
self.all_model_cfg = all_model_cfg
|
|
|
|
|
|
|
|
|
|
| 74 |
self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
|
| 75 |
self.net: 'MMAudio' = None
|
| 76 |
self.feature_utils: 'FeaturesUtils' = None
|
|
|
|
| 78 |
|
| 79 |
self._load_models_to_cpu()
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
def _adjust_paths_for_repo(self):
|
| 82 |
"""Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
|
| 83 |
for cfg_key in self.all_model_cfg:
|
| 84 |
cfg = self.all_model_cfg[cfg_key]
|
| 85 |
+
# The paths in the original config are relative, so we join them with our repo path
|
| 86 |
cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
|
| 87 |
cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
|
| 88 |
if cfg.bigvgan_16k_path is not None:
|
|
|
|
| 114 |
self.feature_utils = self.feature_utils.eval()
|
| 115 |
self.net.to(self.cpu_device)
|
| 116 |
self.feature_utils.to(self.cpu_device)
|
| 117 |
+
logger.info("MMAudioManager ready on CPU.")
|
| 118 |
except Exception as e:
|
| 119 |
logger.error(f"Failed to load audio models: {e}", exc_info=True)
|
| 120 |
self.net = None
|
|
|
|
| 122 |
def to_gpu(self):
|
| 123 |
"""Moves the models and utilities to the GPU before inference."""
|
| 124 |
if self.device == 'cpu': return
|
| 125 |
+
logger.info(f"Moving MMAudioManager to GPU ({self.device})...")
|
| 126 |
self.net.to(self.device, self.dtype)
|
| 127 |
self.feature_utils.to(self.device, self.dtype)
|
| 128 |
|
| 129 |
def to_cpu(self):
|
| 130 |
"""Moves the models back to CPU and clears VRAM after inference."""
|
| 131 |
if self.device == 'cpu': return
|
| 132 |
+
logger.info("Unloading MMAudioManager from GPU...")
|
| 133 |
self.net.to(self.cpu_device)
|
| 134 |
self.feature_utils.to(self.cpu_device)
|
| 135 |
gc.collect()
|
|
|
|
| 187 |
finally:
|
| 188 |
self.to_cpu()
|
| 189 |
|
| 190 |
+
# --- Singleton Instantiation ---
|
| 191 |
try:
|
| 192 |
with open("config.yaml", 'r') as f:
|
| 193 |
config = yaml.safe_load(f)
|
| 194 |
WORKSPACE_DIR = config['application']['workspace_dir']
|
| 195 |
+
mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR)
|
| 196 |
except Exception as e:
|
| 197 |
+
logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True)
|
| 198 |
+
mmaudio_manager_singleton = None
|