Commit ·
1dcac2d
1
Parent(s): 04fdc6c
Fix HunyuanFoley: pre-download SigLIP2, use local_files_only=True
Browse filesSigLIP2 (google/siglip2-base-patch16-512, ~1.5 GB) was being downloaded
from the HF network inside every ZeroGPU GPU worker call, consuming ~4s of
GPU budget and risking timeouts/rate-limits.
Fixes:
- Add _dl_siglip2() startup download alongside other model downloads
- Add to parallel download pool (max_workers 7→8)
- Add local_files_only=True to both from_pretrained call sites in
model_utils.py (class-based path and load_model function path)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
HunyuanVideo-Foley/hunyuanvideo_foley/utils/model_utils.py
CHANGED
|
@@ -69,7 +69,8 @@ class OffloadModelManager:
|
|
| 69 |
transforms.ToTensor(),
|
| 70 |
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
| 71 |
])
|
| 72 |
-
self._siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512"
|
|
|
|
| 73 |
logger.info("SigLIP2 model loaded")
|
| 74 |
return self._siglip2_model, self._siglip2_preprocess
|
| 75 |
|
|
@@ -315,7 +316,8 @@ def load_model(model_path, config_path, device, enable_offload=False, model_size
|
|
| 315 |
transforms.ToTensor(),
|
| 316 |
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
| 317 |
])
|
| 318 |
-
siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512"
|
|
|
|
| 319 |
logger.info("SigLIP2 model and preprocessing pipeline loaded successfully")
|
| 320 |
|
| 321 |
# clap text-encoder
|
|
|
|
| 69 |
transforms.ToTensor(),
|
| 70 |
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
| 71 |
])
|
| 72 |
+
self._siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512",
|
| 73 |
+
local_files_only=True).to(self.device).eval()
|
| 74 |
logger.info("SigLIP2 model loaded")
|
| 75 |
return self._siglip2_model, self._siglip2_preprocess
|
| 76 |
|
|
|
|
| 316 |
transforms.ToTensor(),
|
| 317 |
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
| 318 |
])
|
| 319 |
+
siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512",
|
| 320 |
+
local_files_only=True).to(device).eval()
|
| 321 |
logger.info("SigLIP2 model and preprocessing pipeline loaded successfully")
|
| 322 |
|
| 323 |
# clap text-encoder
|
app.py
CHANGED
|
@@ -82,6 +82,11 @@ def _dl_hunyuan():
|
|
| 82 |
cache_dir=CACHE_DIR, local_dir=str(HUNYUAN_MODEL_DIR), local_dir_use_symlinks=False)
|
| 83 |
print("HunyuanVideoFoley checkpoints downloaded.")
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
def _dl_clap():
|
| 86 |
"""Pre-download CLAP so from_pretrained() hits local cache inside the ZeroGPU worker."""
|
| 87 |
snapshot_download(repo_id="laion/larger_clap_general")
|
|
@@ -108,7 +113,7 @@ def _dl_bigvgan():
|
|
| 108 |
|
| 109 |
print("[startup] Starting parallel checkpoint + model downloads…")
|
| 110 |
_t_dl_start = time.perf_counter()
|
| 111 |
-
with ThreadPoolExecutor(max_workers=
|
| 112 |
_fut_taro = _pool.submit(_dl_taro)
|
| 113 |
_fut_mmaudio = _pool.submit(_dl_mmaudio)
|
| 114 |
_fut_hunyuan = _pool.submit(_dl_hunyuan)
|
|
@@ -116,9 +121,11 @@ with ThreadPoolExecutor(max_workers=7) as _pool:
|
|
| 116 |
_fut_clip = _pool.submit(_dl_clip)
|
| 117 |
_fut_aldm2 = _pool.submit(_dl_audioldm2)
|
| 118 |
_fut_bigvgan = _pool.submit(_dl_bigvgan)
|
|
|
|
| 119 |
# Raise any download exceptions immediately
|
| 120 |
for _fut in as_completed([_fut_taro, _fut_mmaudio, _fut_hunyuan,
|
| 121 |
-
_fut_clap, _fut_clip, _fut_aldm2, _fut_bigvgan
|
|
|
|
| 122 |
_fut.result()
|
| 123 |
|
| 124 |
cavp_ckpt_path, onset_ckpt_path, taro_ckpt_path = _fut_taro.result()
|
|
|
|
| 82 |
cache_dir=CACHE_DIR, local_dir=str(HUNYUAN_MODEL_DIR), local_dir_use_symlinks=False)
|
| 83 |
print("HunyuanVideoFoley checkpoints downloaded.")
|
| 84 |
|
| 85 |
+
def _dl_siglip2():
|
| 86 |
+
"""Pre-download SigLIP2 (~1.5 GB) used by HunyuanFoley's visual encoder."""
|
| 87 |
+
snapshot_download(repo_id="google/siglip2-base-patch16-512")
|
| 88 |
+
print("SigLIP2 pre-downloaded.")
|
| 89 |
+
|
| 90 |
def _dl_clap():
|
| 91 |
"""Pre-download CLAP so from_pretrained() hits local cache inside the ZeroGPU worker."""
|
| 92 |
snapshot_download(repo_id="laion/larger_clap_general")
|
|
|
|
| 113 |
|
| 114 |
print("[startup] Starting parallel checkpoint + model downloads…")
|
| 115 |
_t_dl_start = time.perf_counter()
|
| 116 |
+
with ThreadPoolExecutor(max_workers=8) as _pool:
|
| 117 |
_fut_taro = _pool.submit(_dl_taro)
|
| 118 |
_fut_mmaudio = _pool.submit(_dl_mmaudio)
|
| 119 |
_fut_hunyuan = _pool.submit(_dl_hunyuan)
|
|
|
|
| 121 |
_fut_clip = _pool.submit(_dl_clip)
|
| 122 |
_fut_aldm2 = _pool.submit(_dl_audioldm2)
|
| 123 |
_fut_bigvgan = _pool.submit(_dl_bigvgan)
|
| 124 |
+
_fut_siglip2 = _pool.submit(_dl_siglip2)
|
| 125 |
# Raise any download exceptions immediately
|
| 126 |
for _fut in as_completed([_fut_taro, _fut_mmaudio, _fut_hunyuan,
|
| 127 |
+
_fut_clap, _fut_clip, _fut_aldm2, _fut_bigvgan,
|
| 128 |
+
_fut_siglip2]):
|
| 129 |
_fut.result()
|
| 130 |
|
| 131 |
cavp_ckpt_path, onset_ckpt_path, taro_ckpt_path = _fut_taro.result()
|