euiia commited on
Commit
3480092
·
verified ·
1 Parent(s): 9fcad90

Rename managers/audio_specialist.py to managers/mmaudio_manager.py

Browse files
managers/{audio_specialist.py → mmaudio_manager.py} RENAMED
@@ -1,10 +1,10 @@
1
- # audio_specialist.py
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 2.2.0
6
  #
7
- # This file defines the Audio Specialist for the ADUC-SDR framework. It is responsible
8
  # for generating audio synchronized with video clips. This version has been refactored
9
  # to be self-contained by automatically cloning the MMAudio dependency from its
10
  # official repository, making the framework more portable and easier to set up.
@@ -27,11 +27,42 @@ DEPS_DIR = Path("./deps")
27
  MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
28
  MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
29
 
30
- class AudioSpecialist:
31
  """
32
- Specialist responsible for generating audio for video fragments.
33
- Manages the loading and unloading of audio models from VRAM and handles
34
- its own code dependencies by cloning the MMAudio repository.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  """
36
  def __init__(self, workspace_dir):
37
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -39,10 +70,7 @@ class AudioSpecialist:
39
  self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
40
  self.workspace_dir = workspace_dir
41
 
42
- self._mmaudio_modules_loaded = False
43
- self._setup_dependencies()
44
- self._lazy_load_mmaudio_modules()
45
-
46
  self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
47
  self.net: 'MMAudio' = None
48
  self.feature_utils: 'FeaturesUtils' = None
@@ -50,53 +78,11 @@ class AudioSpecialist:
50
 
51
  self._load_models_to_cpu()
52
 
53
- def _setup_dependencies(self):
54
- """
55
- Checks for the MMAudio repository locally. If not found, clones it.
56
- Then, it adds the repository to the Python path to make its modules importable.
57
- """
58
- if not MMAUDIO_REPO_DIR.exists():
59
- logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
60
- try:
61
- DEPS_DIR.mkdir(exist_ok=True)
62
- subprocess.run(
63
- ["git", "clone", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
64
- check=True, capture_output=True, text=True
65
- )
66
- logger.info("MMAudio repository cloned successfully.")
67
- except subprocess.CalledProcessError as e:
68
- logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
69
- raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
70
- else:
71
- logger.info("Found local MMAudio repository.")
72
-
73
- if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
74
- sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
75
- logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
76
-
77
- def _lazy_load_mmaudio_modules(self):
78
- """Dynamically imports MMAudio modules only when needed."""
79
- if self._mmaudio_modules_loaded:
80
- return
81
-
82
- # These globals are now populated by the lazy loader
83
- global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video
84
- global FlowMatching, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig
85
-
86
- from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
87
- from mmaudio.model.flow_matching import FlowMatching
88
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
89
- from mmaudio.model.utils.features_utils import FeaturesUtils
90
- from mmaudio.model.sequence_config import SequenceConfig
91
-
92
- self.all_model_cfg = all_model_cfg
93
- self._mmaudio_modules_loaded = True
94
- logger.info("MMAudio modules have been dynamically loaded.")
95
-
96
  def _adjust_paths_for_repo(self):
97
  """Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
98
  for cfg_key in self.all_model_cfg:
99
  cfg = self.all_model_cfg[cfg_key]
 
100
  cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
101
  cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
102
  if cfg.bigvgan_16k_path is not None:
@@ -128,7 +114,7 @@ class AudioSpecialist:
128
  self.feature_utils = self.feature_utils.eval()
129
  self.net.to(self.cpu_device)
130
  self.feature_utils.to(self.cpu_device)
131
- logger.info("Audio Specialist ready on CPU.")
132
  except Exception as e:
133
  logger.error(f"Failed to load audio models: {e}", exc_info=True)
134
  self.net = None
@@ -136,14 +122,14 @@ class AudioSpecialist:
136
  def to_gpu(self):
137
  """Moves the models and utilities to the GPU before inference."""
138
  if self.device == 'cpu': return
139
- logger.info(f"Moving Audio Specialist to GPU ({self.device})...")
140
  self.net.to(self.device, self.dtype)
141
  self.feature_utils.to(self.device, self.dtype)
142
 
143
  def to_cpu(self):
144
  """Moves the models back to CPU and clears VRAM after inference."""
145
  if self.device == 'cpu': return
146
- logger.info("Unloading Audio Specialist from GPU...")
147
  self.net.to(self.cpu_device)
148
  self.feature_utils.to(self.cpu_device)
149
  gc.collect()
@@ -201,12 +187,12 @@ class AudioSpecialist:
201
  finally:
202
  self.to_cpu()
203
 
204
- # Singleton instantiation
205
  try:
206
  with open("config.yaml", 'r') as f:
207
  config = yaml.safe_load(f)
208
  WORKSPACE_DIR = config['application']['workspace_dir']
209
- audio_specialist_singleton = AudioSpecialist(workspace_dir=WORKSPACE_DIR)
210
  except Exception as e:
211
- logger.error(f"Could not initialize AudioSpecialist: {e}", exc_info=True)
212
- audio_specialist_singleton = None
 
1
+ # managers/mmaudio_manager.py
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 2.3.0
6
  #
7
+ # This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible
8
  # for generating audio synchronized with video clips. This version has been refactored
9
  # to be self-contained by automatically cloning the MMAudio dependency from its
10
  # official repository, making the framework more portable and easier to set up.
 
27
  MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
28
  MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
29
 
30
+ def setup_mmaudio_dependencies():
31
  """
32
+ Ensures the MMAudio repository is cloned and available in the sys.path.
33
+ This function is run once when the module is first imported.
34
+ """
35
+ if not MMAUDIO_REPO_DIR.exists():
36
+ logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...")
37
+ try:
38
+ DEPS_DIR.mkdir(exist_ok=True)
39
+ subprocess.run(
40
+ ["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)],
41
+ check=True, capture_output=True, text=True
42
+ )
43
+ logger.info("MMAudio repository cloned successfully.")
44
+ except subprocess.CalledProcessError as e:
45
+ logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}")
46
+ raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.")
47
+ else:
48
+ logger.info("Found local MMAudio repository.")
49
+
50
+ if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
51
+ sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
52
+ logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.")
53
+
54
+ setup_mmaudio_dependencies()
55
+
56
+ from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
57
+ from mmaudio.model.flow_matching import FlowMatching
58
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
59
+ from mmaudio.model.utils.features_utils import FeaturesUtils
60
+ from mmaudio.model.sequence_config import SequenceConfig
61
+
62
+
63
+ class MMAudioManager:
64
+ """
65
+ Manages the MMAudio model for audio generation tasks.
66
  """
67
  def __init__(self, workspace_dir):
68
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
70
  self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
71
  self.workspace_dir = workspace_dir
72
 
73
+ self.all_model_cfg = all_model_cfg
 
 
 
74
  self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2']
75
  self.net: 'MMAudio' = None
76
  self.feature_utils: 'FeaturesUtils' = None
 
78
 
79
  self._load_models_to_cpu()
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def _adjust_paths_for_repo(self):
82
  """Adjusts the checkpoint paths in the model config to point inside the cloned repo."""
83
  for cfg_key in self.all_model_cfg:
84
  cfg = self.all_model_cfg[cfg_key]
85
+ # The paths in the original config are relative, so we join them with our repo path
86
  cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path
87
  cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path
88
  if cfg.bigvgan_16k_path is not None:
 
114
  self.feature_utils = self.feature_utils.eval()
115
  self.net.to(self.cpu_device)
116
  self.feature_utils.to(self.cpu_device)
117
+ logger.info("MMAudioManager ready on CPU.")
118
  except Exception as e:
119
  logger.error(f"Failed to load audio models: {e}", exc_info=True)
120
  self.net = None
 
122
  def to_gpu(self):
123
  """Moves the models and utilities to the GPU before inference."""
124
  if self.device == 'cpu': return
125
+ logger.info(f"Moving MMAudioManager to GPU ({self.device})...")
126
  self.net.to(self.device, self.dtype)
127
  self.feature_utils.to(self.device, self.dtype)
128
 
129
  def to_cpu(self):
130
  """Moves the models back to CPU and clears VRAM after inference."""
131
  if self.device == 'cpu': return
132
+ logger.info("Unloading MMAudioManager from GPU...")
133
  self.net.to(self.cpu_device)
134
  self.feature_utils.to(self.cpu_device)
135
  gc.collect()
 
187
  finally:
188
  self.to_cpu()
189
 
190
+ # --- Singleton Instantiation ---
191
  try:
192
  with open("config.yaml", 'r') as f:
193
  config = yaml.safe_load(f)
194
  WORKSPACE_DIR = config['application']['workspace_dir']
195
+ mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR)
196
  except Exception as e:
197
+ logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True)
198
+ mmaudio_manager_singleton = None