BoxOfColors Claude Sonnet 4.6 commited on
Commit
1dcac2d
·
1 Parent(s): 04fdc6c

Fix HunyuanFoley: pre-download SigLIP2, use local_files_only=True

Browse files

SigLIP2 (google/siglip2-base-patch16-512, ~1.5 GB) was being downloaded
from the HF network inside every ZeroGPU GPU worker call, consuming ~4s of
GPU budget and risking timeouts/rate-limits.

Fixes:
- Add _dl_siglip2() startup download alongside other model downloads
- Add to parallel download pool (max_workers 7→8)
- Add local_files_only=True to both from_pretrained call sites in
model_utils.py (class-based path and load_model function path)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

HunyuanVideo-Foley/hunyuanvideo_foley/utils/model_utils.py CHANGED
@@ -69,7 +69,8 @@ class OffloadModelManager:
69
  transforms.ToTensor(),
70
  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
71
  ])
72
- self._siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512").to(self.device).eval()
 
73
  logger.info("SigLIP2 model loaded")
74
  return self._siglip2_model, self._siglip2_preprocess
75
 
@@ -315,7 +316,8 @@ def load_model(model_path, config_path, device, enable_offload=False, model_size
315
  transforms.ToTensor(),
316
  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
317
  ])
318
- siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512").to(device).eval()
 
319
  logger.info("SigLIP2 model and preprocessing pipeline loaded successfully")
320
 
321
  # clap text-encoder
 
69
  transforms.ToTensor(),
70
  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
71
  ])
72
+ self._siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512",
73
+ local_files_only=True).to(self.device).eval()
74
  logger.info("SigLIP2 model loaded")
75
  return self._siglip2_model, self._siglip2_preprocess
76
 
 
316
  transforms.ToTensor(),
317
  transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
318
  ])
319
+ siglip2_model = AutoModel.from_pretrained("google/siglip2-base-patch16-512",
320
+ local_files_only=True).to(device).eval()
321
  logger.info("SigLIP2 model and preprocessing pipeline loaded successfully")
322
 
323
  # clap text-encoder
app.py CHANGED
@@ -82,6 +82,11 @@ def _dl_hunyuan():
82
  cache_dir=CACHE_DIR, local_dir=str(HUNYUAN_MODEL_DIR), local_dir_use_symlinks=False)
83
  print("HunyuanVideoFoley checkpoints downloaded.")
84
 
 
 
 
 
 
85
  def _dl_clap():
86
  """Pre-download CLAP so from_pretrained() hits local cache inside the ZeroGPU worker."""
87
  snapshot_download(repo_id="laion/larger_clap_general")
@@ -108,7 +113,7 @@ def _dl_bigvgan():
108
 
109
  print("[startup] Starting parallel checkpoint + model downloads…")
110
  _t_dl_start = time.perf_counter()
111
- with ThreadPoolExecutor(max_workers=7) as _pool:
112
  _fut_taro = _pool.submit(_dl_taro)
113
  _fut_mmaudio = _pool.submit(_dl_mmaudio)
114
  _fut_hunyuan = _pool.submit(_dl_hunyuan)
@@ -116,9 +121,11 @@ with ThreadPoolExecutor(max_workers=7) as _pool:
116
  _fut_clip = _pool.submit(_dl_clip)
117
  _fut_aldm2 = _pool.submit(_dl_audioldm2)
118
  _fut_bigvgan = _pool.submit(_dl_bigvgan)
 
119
  # Raise any download exceptions immediately
120
  for _fut in as_completed([_fut_taro, _fut_mmaudio, _fut_hunyuan,
121
- _fut_clap, _fut_clip, _fut_aldm2, _fut_bigvgan]):
 
122
  _fut.result()
123
 
124
  cavp_ckpt_path, onset_ckpt_path, taro_ckpt_path = _fut_taro.result()
 
82
  cache_dir=CACHE_DIR, local_dir=str(HUNYUAN_MODEL_DIR), local_dir_use_symlinks=False)
83
  print("HunyuanVideoFoley checkpoints downloaded.")
84
 
85
+ def _dl_siglip2():
86
+ """Pre-download SigLIP2 (~1.5 GB) used by HunyuanFoley's visual encoder."""
87
+ snapshot_download(repo_id="google/siglip2-base-patch16-512")
88
+ print("SigLIP2 pre-downloaded.")
89
+
90
  def _dl_clap():
91
  """Pre-download CLAP so from_pretrained() hits local cache inside the ZeroGPU worker."""
92
  snapshot_download(repo_id="laion/larger_clap_general")
 
113
 
114
  print("[startup] Starting parallel checkpoint + model downloads…")
115
  _t_dl_start = time.perf_counter()
116
+ with ThreadPoolExecutor(max_workers=8) as _pool:
117
  _fut_taro = _pool.submit(_dl_taro)
118
  _fut_mmaudio = _pool.submit(_dl_mmaudio)
119
  _fut_hunyuan = _pool.submit(_dl_hunyuan)
 
121
  _fut_clip = _pool.submit(_dl_clip)
122
  _fut_aldm2 = _pool.submit(_dl_audioldm2)
123
  _fut_bigvgan = _pool.submit(_dl_bigvgan)
124
+ _fut_siglip2 = _pool.submit(_dl_siglip2)
125
  # Raise any download exceptions immediately
126
  for _fut in as_completed([_fut_taro, _fut_mmaudio, _fut_hunyuan,
127
+ _fut_clap, _fut_clip, _fut_aldm2, _fut_bigvgan,
128
+ _fut_siglip2]):
129
  _fut.result()
130
 
131
  cavp_ckpt_path, onset_ckpt_path, taro_ckpt_path = _fut_taro.result()