Bils commited on
Commit
cc2901f
Β·
verified Β·
1 Parent(s): 489e2ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -96
app.py CHANGED
@@ -1,11 +1,15 @@
1
- # ShortiFoley
2
  # Created by bilsimaging.com
3
 
4
  import os
 
5
  os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
6
 
7
  import sys
8
  import json
 
 
 
9
  import base64
10
  import random
11
  import tempfile
@@ -21,7 +25,6 @@ from loguru import logger
21
  from huggingface_hub import snapshot_download
22
  import spaces
23
 
24
-
25
  # -------------------------
26
  # Constants & configuration
27
  # -------------------------
@@ -29,25 +32,41 @@ ROOT = Path(__file__).parent.resolve()
29
  REPO_DIR = ROOT / "HunyuanVideo-Foley"
30
  WEIGHTS_DIR = Path(os.environ.get("HIFI_FOLEY_MODEL_PATH", str(ROOT / "weights")))
31
  CONFIG_PATH = Path(os.environ.get("HIFI_FOLEY_CONFIG", str(REPO_DIR / "configs" / "hunyuanvideo-foley-xxl.yaml")))
32
- OUTPUTS_DIR = Path(os.environ.get("OUTPUTS_DIR", str(ROOT / "outputs")))
33
  OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
34
 
35
  SPACE_TITLE = "🎡 ShortiFoley β€” HunyuanVideo-Foley"
36
- SPACE_TAGLINE = "Bring your videos to life with AI-powered Foley"
37
  WATERMARK_NOTE = "Made with ❀️ by bilsimaging.com"
38
 
39
- # ZeroGPU limit (<=120)
40
  GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
41
 
42
- # Globals (NO CUDA INIT HERE)
43
  _model_dict = None
44
  _cfg = None
45
  _device: Optional[torch.device] = None
46
 
47
 
48
  # ------------
49
- # Small helpers
50
  # ------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def _ensure_repo() -> None:
52
  """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
53
  if REPO_DIR.exists():
@@ -86,30 +105,26 @@ def prepare_once() -> None:
86
  # -----------------------
87
  # Model load & inference
88
  # -----------------------
89
- def auto_load_models(device: Optional[torch.device] = None) -> str:
90
  """
91
- Load HunyuanVideo-Foley + encoders on the given device.
92
- MUST be called only inside a @spaces.GPU context with device=cuda:0.
93
  """
94
  global _model_dict, _cfg, _device
95
 
96
  if _model_dict is not None and _cfg is not None:
97
  return "βœ… Model already loaded."
98
 
99
- # DO NOT probe CUDA here unless device is passed from GPU context
100
- if device is None:
101
- return "❌ Load the model inside a GPU task first (use the Load button or run Generate)."
102
-
103
- os.environ["HF_PREFER_SAFETENSORS"] = "1" # enforce again for safety
104
 
105
  sys.path.append(str(REPO_DIR))
106
  from hunyuanvideo_foley.utils.model_utils import load_model
107
 
108
- _device = device
109
  logger.info("Loading HunyuanVideo-Foley model...")
110
  logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
111
  logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
112
- logger.info(f"TARGET_DEVICE: {_device}")
113
 
114
  try:
115
  _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
@@ -152,7 +167,7 @@ def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
152
 
153
  def _save_outputs(video_src: str, audio_tensor: torch.Tensor, sr: int, idx: int,
154
  prompt: str) -> str:
155
- """Save WAV + MP4 in outputs/, add metadata with a soft watermark note."""
156
  # torchaudio expects [C, N]
157
  if audio_tensor.ndim == 1:
158
  audio_tensor = audio_tensor.unsqueeze(0)
@@ -207,12 +222,9 @@ def infer_single_video(
207
  Generate Foley audio for an uploaded video (1–6 variants).
208
  Returns: (list of output video paths, status message)
209
  """
210
- # Safe: inside GPU context, we can use CUDA
211
- device = torch.device("cuda:0")
212
-
213
- # Lazy-load if needed on GPU
214
  if _model_dict is None or _cfg is None:
215
- msg = auto_load_models(device)
216
  if not str(msg).startswith("βœ…"):
217
  return [], f"❌ {msg}"
218
 
@@ -249,31 +261,17 @@ def infer_single_video(
249
  return outs, f"βœ… Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
250
 
251
 
252
-
253
- @spaces.GPU(duration=GPU_DURATION)
254
- def gpu_load_models() -> str:
255
- device = torch.device("cuda:0")
256
- return auto_load_models(device)
257
-
258
-
259
  # -------------
260
- # Gradio UI (with MCP + REST endpoints)
261
  # -------------
262
  def _about_html() -> str:
263
  return f"""
264
  <div style="line-height:1.6">
265
  <h2>About ShortiFoley</h2>
266
- <p><b>ShortiFoley</b> turns short videos into realistic Foley sound.
267
- Powered by Tencent’s HunyuanVideo-Foley (SigLIP2 + CLAP), with autosave and an MCP server for automation (e.g., n8n).</p>
268
-
269
- <p>It is part of the <b>Media Automation Suite</b> by
270
- <a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a>,
271
- built to streamline creative workflows across video, sound, and publishing.</p>
272
-
273
- <p>ShortiFoley integrates seamlessly with automation tools like
274
- <a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a>,
275
- making it easy to plug into custom workflows and pipelines.</p>
276
-
277
 
278
  <h3>Quick Steps</h3>
279
  <ol>
@@ -293,9 +291,9 @@ making it easy to plug into custom workflows and pipelines.</p>
293
 
294
  <h3>MCP & API</h3>
295
  <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see β€œAPI & MCP” tab).
296
- Perfect for pipelines and tools like <b>n8n</b>.</p>
 
297
 
298
-
299
  </div>
300
  """
301
 
@@ -309,7 +307,7 @@ def create_ui() -> gr.Blocks:
309
  .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
310
  .minor-btn button{ border-radius:10px;}
311
  .muted{ color:#64748b; }
312
- .footer-text{ margin-top:16px; text-align:center; color:#475569; font-size:.95rem;}
313
  """
314
  with gr.Blocks(title="ShortiFoley β€” HunyuanVideo-Foley", css=css) as demo:
315
 
@@ -333,7 +331,7 @@ def create_ui() -> gr.Blocks:
333
  samples = gr.Slider(1, 6, value=1, step=1, label="Variants")
334
 
335
  with gr.Row():
336
- load_btn = gr.Button("βš™οΈ Load model", variant="secondary", elem_classes=["minor-btn"])
337
  generate = gr.Button("🎡 Generate", variant="primary", elem_classes=["generate-btn"])
338
 
339
  status = gr.Textbox(label="Status", interactive=False)
@@ -356,27 +354,47 @@ def create_ui() -> gr.Blocks:
356
  outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
357
  vis = []
358
  for i in range(6):
359
- if i < len(outs):
360
  vis.append(gr.update(visible=True, value=outs[i]))
361
  else:
362
- vis.append(gr.update(visible=False, value=None))
363
- return (*vis, msg)
 
 
364
 
365
- gen_evt = generate.click(
366
  fn=_process_and_update,
367
  inputs=[video_input, text_input, guidance_scale, steps, samples],
368
- outputs=[v1, v2, v3, v4, v5, v6, status],
369
  api_name="/infer",
370
  api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
371
  )
372
 
373
- # Load model (GPU-safe)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  load_btn.click(
375
- fn=gpu_load_models,
376
  inputs=[],
377
  outputs=[status],
378
  api_name="/load_model",
379
- api_description="Load/initialize the ShortiFoley model and encoders (runs on GPU)."
380
  )
381
 
382
  # Toggle visibility based on variants
@@ -393,7 +411,7 @@ def create_ui() -> gr.Blocks:
393
  samples.change(_toggle_vis, inputs=[samples], outputs=[v1, v2, v3, v4, v5, v6])
394
 
395
  with gr.Tab("πŸ“ Gallery"):
396
- gr.Markdown("Latest generated videos (autosaved to `outputs/`).")
397
  gallery = gr.Gallery(
398
  value=_list_gallery(),
399
  columns=3,
@@ -401,49 +419,50 @@ def create_ui() -> gr.Blocks:
401
  label="Saved Results"
402
  )
403
  refresh = gr.Button("πŸ”„ Refresh Gallery")
404
-
405
- def _refresh_gallery():
406
- return gr.update(value=_list_gallery())
407
-
408
- # Refresh via button
409
- refresh.click(_refresh_gallery, outputs=[gallery])
410
- # Also refresh after generation finishes
411
- gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
412
 
413
  with gr.Tab("API & MCP"):
414
- gr.Markdown(
415
- "### REST examples\n\n"
416
- "**POST** `api_generate_from_url`\n"
417
- "```json\n"
418
- "{\n"
419
- ' "video_url_or_b64": "https://yourhost/sample.mp4",\n'
420
- ' "text_prompt": "metallic clink; hollow room reverb",\n'
421
- ' "guidance_scale": 4.5,\n'
422
- ' "num_inference_steps": 50,\n'
423
- ' "sample_nums": 2\n'
424
- "}\n"
425
- "```\n\n"
426
- "**POST** `load_model_tool` β€” loads the model proactively.\n\n"
427
- "### MCP resources & prompt\n"
428
- "- `shortifoley://status` β†’ quick health info\n"
429
- "- `foley_prompt` β†’ reusable guidance for describing the sound\n\n"
430
- "Works with n8n: call `load_model_tool` once, then `api_generate_from_url` per clip."
431
- )
 
 
 
 
 
432
 
433
  with gr.Tab("ℹ️ About"):
434
  gr.HTML(_about_html())
435
 
436
  # Footer
437
- gr.HTML("""
438
- <div class="footer-text">
439
- <p>πŸš€ Created by <b>bilsimaging.com</b> &bull; Powered by HunyuanVideo-Foley &bull; Generate high-quality audio from video and text descriptions</p>
440
- </div>
441
- """)
 
 
 
442
 
443
  # ---- REST + MCP endpoints (inside Blocks) ----
444
  def _download_to_tmp(url: str) -> str:
445
  try:
446
- import requests
447
  except Exception:
448
  raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
449
  r = requests.get(url, timeout=30)
@@ -479,9 +498,10 @@ def create_ui() -> gr.Blocks:
479
  num_inference_steps: int = 50,
480
  sample_nums: int = 1,
481
  ) -> Dict[str, List[str]]:
482
- # Ensure model is ready (GPU-safe path)
483
  if _model_dict is None or _cfg is None:
484
- _ = gpu_load_models()
 
 
485
  local = _normalize_video_input(video_url_or_b64)
486
  outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
487
  return {"videos": outs, "message": msg}
@@ -489,14 +509,14 @@ def create_ui() -> gr.Blocks:
489
  @gr.api
490
  def load_model_tool() -> str:
491
  """Ensure model is loaded on server (convenient for MCP/REST)."""
492
- return gpu_load_models()
493
 
494
  @gr.mcp.resource("shortifoley://status")
495
  def shortifoley_status() -> str:
496
  """Return a simple readiness string for MCP clients."""
497
  ready = _model_dict is not None and _cfg is not None
498
  dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
499
- return f"ShortiFoley status: {'ready' if ready else 'idle'} | device={dev} | outputs={OUTPUTS_DIR}"
500
 
501
  @gr.mcp.prompt()
502
  def foley_prompt(name: str = "default") -> str:
@@ -506,9 +526,6 @@ def create_ui() -> gr.Blocks:
506
  "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
507
  )
508
 
509
- # IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
510
- demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
511
-
512
  return demo
513
 
514
 
@@ -519,7 +536,7 @@ def set_seeds(s: int = 1):
519
 
520
 
521
  # -------------
522
- # App bootstrap (CPU only)
523
  # -------------
524
  if __name__ == "__main__":
525
  logger.remove()
@@ -529,7 +546,7 @@ if __name__ == "__main__":
529
  logger.info("===== Application Startup =====\n")
530
  prepare_once()
531
 
532
- # Probe imports (early surfacing) β€” CPU-safe
533
  sys.path.append(str(REPO_DIR))
534
  try:
535
  from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
 
1
+
2
  # Created by bilsimaging.com
3
 
4
  import os
5
+
6
  os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
7
 
8
  import sys
9
  import json
10
+ import uuid
11
+ import time
12
+ import shutil
13
  import base64
14
  import random
15
  import tempfile
 
25
  from huggingface_hub import snapshot_download
26
  import spaces
27
 
 
28
  # -------------------------
29
  # Constants & configuration
30
  # -------------------------
 
32
  REPO_DIR = ROOT / "HunyuanVideo-Foley"
33
  WEIGHTS_DIR = Path(os.environ.get("HIFI_FOLEY_MODEL_PATH", str(ROOT / "weights")))
34
  CONFIG_PATH = Path(os.environ.get("HIFI_FOLEY_CONFIG", str(REPO_DIR / "configs" / "hunyuanvideo-foley-xxl.yaml")))
35
+ OUTPUTS_DIR = Path(os.environ.get("OUTPUTS_DIR", str(ROOT / "outputs" / "autosaved")))
36
  OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
37
 
38
  SPACE_TITLE = "🎡 ShortiFoley β€” HunyuanVideo-Foley"
39
+ SPACE_TAGLINE = "Text/Video β†’ Audio Foley Β· Created by bilsimaging.com"
40
  WATERMARK_NOTE = "Made with ❀️ by bilsimaging.com"
41
 
42
+ # ZeroGPU limit
43
  GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
44
 
45
+ # Globals
46
  _model_dict = None
47
  _cfg = None
48
  _device: Optional[torch.device] = None
49
 
50
 
51
  # ------------
52
+ # Small helpers
53
  # ------------
54
+ def _setup_device(pref: str = "cpu", gpu_id: int = 0) -> torch.device:
55
+ """
56
+ Pick device safely.
57
+ IMPORTANT: Do NOT query torch.cuda.is_available() in main/non-GPU processes
58
+ on Stateless GPU Spaces. Only set CUDA when called from a @spaces.GPU context.
59
+ """
60
+ if pref.startswith("cuda"):
61
+ d = torch.device(f"cuda:{gpu_id}")
62
+ elif pref == "mps":
63
+ d = torch.device("mps")
64
+ else:
65
+ d = torch.device("cpu")
66
+ logger.info(f"Using {d}")
67
+ return d
68
+
69
+
70
  def _ensure_repo() -> None:
71
  """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
72
  if REPO_DIR.exists():
 
105
  # -----------------------
106
  # Model load & inference
107
  # -----------------------
108
+ def auto_load_models(device_str: str = "cpu") -> str:
109
  """
110
+ Load HunyuanVideo-Foley + encoders on the chosen device.
111
+ Use device_str="cuda" ONLY inside @spaces.GPU function to avoid CUDA init in main process.
112
  """
113
  global _model_dict, _cfg, _device
114
 
115
  if _model_dict is not None and _cfg is not None:
116
  return "βœ… Model already loaded."
117
 
118
+ # Make absolutely sure safetensors is preferred
119
+ os.environ["HF_PREFER_SAFETENSORS"] = "1"
 
 
 
120
 
121
  sys.path.append(str(REPO_DIR))
122
  from hunyuanvideo_foley.utils.model_utils import load_model
123
 
124
+ _device = _setup_device(device_str, 0)
125
  logger.info("Loading HunyuanVideo-Foley model...")
126
  logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
127
  logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
 
128
 
129
  try:
130
  _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
 
167
 
168
  def _save_outputs(video_src: str, audio_tensor: torch.Tensor, sr: int, idx: int,
169
  prompt: str) -> str:
170
+ """Save WAV + MP4 in autosaved/, add metadata with a soft watermark note."""
171
  # torchaudio expects [C, N]
172
  if audio_tensor.ndim == 1:
173
  audio_tensor = audio_tensor.unsqueeze(0)
 
222
  Generate Foley audio for an uploaded video (1–6 variants).
223
  Returns: (list of output video paths, status message)
224
  """
225
+ # Lazy-load on GPU
 
 
 
226
  if _model_dict is None or _cfg is None:
227
+ msg = auto_load_models(device_str="cuda")
228
  if not str(msg).startswith("βœ…"):
229
  return [], f"❌ {msg}"
230
 
 
261
  return outs, f"βœ… Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
262
 
263
 
 
 
 
 
 
 
 
264
  # -------------
265
+ # Gradio UI (with MCP+API inside the same app)
266
  # -------------
267
  def _about_html() -> str:
268
  return f"""
269
  <div style="line-height:1.6">
270
  <h2>About ShortiFoley</h2>
271
+ <p><b>ShortiFoley</b> turns short videos into realistic Foley sound.<br/>
272
+ Powered by Tencent’s HunyuanVideo-Foley (SigLIP2 + CLAP), with autosave and an MCP server for automation
273
+ (<a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a> flows).</p>
274
+ <p><b>Created by <a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a></b></p>
 
 
 
 
 
 
 
275
 
276
  <h3>Quick Steps</h3>
277
  <ol>
 
291
 
292
  <h3>MCP & API</h3>
293
  <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see β€œAPI & MCP” tab).
294
+ Perfect for media-automation pipelines and tools like <b><a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a></b>.</p>
295
+
296
 
 
297
  </div>
298
  """
299
 
 
307
  .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
308
  .minor-btn button{ border-radius:10px;}
309
  .muted{ color:#64748b; }
310
+ .footer-text{ color:#64748b; text-align:center; padding:12px 0; font-size:.95rem; }
311
  """
312
  with gr.Blocks(title="ShortiFoley β€” HunyuanVideo-Foley", css=css) as demo:
313
 
 
331
  samples = gr.Slider(1, 6, value=1, step=1, label="Variants")
332
 
333
  with gr.Row():
334
+ load_btn = gr.Button("βš™οΈ Load model (CPU)", variant="secondary", elem_classes=["minor-btn"])
335
  generate = gr.Button("🎡 Generate", variant="primary", elem_classes=["generate-btn"])
336
 
337
  status = gr.Textbox(label="Status", interactive=False)
 
354
  outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
355
  vis = []
356
  for i in range(6):
357
+ if outs and i < len(outs):
358
  vis.append(gr.update(visible=True, value=outs[i]))
359
  else:
360
+ vis.append(gr.update(visible=(i == 0), value=None if i > 0 else None))
361
+ # Also refresh the gallery in this same event
362
+ new_gallery = _list_gallery()
363
+ return (*vis, msg, new_gallery)
364
 
365
+ generate.click(
366
  fn=_process_and_update,
367
  inputs=[video_input, text_input, guidance_scale, steps, samples],
368
+ outputs=[v1, v2, v3, v4, v5, v6, status], # updated below to include gallery via .then-like merge
369
  api_name="/infer",
370
  api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
371
  )
372
 
373
+ # Workaround: extend outputs to include gallery refresh using a wrapper
374
+ def _process_and_update_with_gallery(video_file, text_prompt, cfg, nsteps, nsamples):
375
+ outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
376
+ vis = []
377
+ for i in range(6):
378
+ if outs and i < len(outs):
379
+ vis.append(gr.update(visible=True, value=outs[i]))
380
+ else:
381
+ vis.append(gr.update(visible=(i == 0), value=None if i > 0 else None))
382
+ new_gallery = _list_gallery()
383
+ return (*vis, msg, new_gallery)
384
+
385
+ # Re-bind with gallery as extra output
386
+ generate.click(
387
+ fn=_process_and_update_with_gallery,
388
+ inputs=[video_input, text_input, guidance_scale, steps, samples],
389
+ outputs=[v1, v2, v3, v4, v5, v6, status,], # gallery will be refreshed on Gallery tab itself
390
+ )
391
+
392
  load_btn.click(
393
+ fn=lambda: auto_load_models(device_str="cpu"),
394
  inputs=[],
395
  outputs=[status],
396
  api_name="/load_model",
397
+ api_description="Load/initialize the ShortiFoley model and encoders on CPU (GPU loads during inference)."
398
  )
399
 
400
  # Toggle visibility based on variants
 
411
  samples.change(_toggle_vis, inputs=[samples], outputs=[v1, v2, v3, v4, v5, v6])
412
 
413
  with gr.Tab("πŸ“ Gallery"):
414
+ gr.Markdown("Latest generated videos (autosaved to `outputs/autosaved/`).")
415
  gallery = gr.Gallery(
416
  value=_list_gallery(),
417
  columns=3,
 
419
  label="Saved Results"
420
  )
421
  refresh = gr.Button("πŸ”„ Refresh Gallery")
422
+ refresh.click(lambda: _list_gallery(), outputs=[gallery])
 
 
 
 
 
 
 
423
 
424
  with gr.Tab("API & MCP"):
425
+ gr.Markdown("""
426
+ ### REST examples
427
+
428
+ **POST** `/api_generate_from_url`
429
+ ```json
430
+ {
431
+ "video_url_or_b64": "https://yourhost/sample.mp4",
432
+ "text_prompt": "metallic clink; hollow room reverb",
433
+ "guidance_scale": 4.5,
434
+ "num_inference_steps": 50,
435
+ "sample_nums": 2
436
+ }
437
+ ```
438
+
439
+ **POST** `/load_model_tool`
440
+ Loads the model proactively (useful before batch runs).
441
+
442
+ **MCP resources & prompt**
443
+ - `shortifoley://status` β†’ quick health info
444
+ - `foley_prompt` β†’ reusable guidance for describing the sound
445
+
446
+ Works great with media-automation in tools like **n8n**: call `load_model_tool` once, then `api_generate_from_url` for each clip.
447
+ """)
448
 
449
  with gr.Tab("ℹ️ About"):
450
  gr.HTML(_about_html())
451
 
452
  # Footer
453
+ gr.HTML(
454
+ """
455
+ <div class="footer-text">
456
+ πŸš€ Created by <a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a>
457
+ &middot; Powered by HunyuanVideo-Foley
458
+ </div>
459
+ """
460
+ )
461
 
462
  # ---- REST + MCP endpoints (inside Blocks) ----
463
  def _download_to_tmp(url: str) -> str:
464
  try:
465
+ import requests
466
  except Exception:
467
  raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
468
  r = requests.get(url, timeout=30)
 
498
  num_inference_steps: int = 50,
499
  sample_nums: int = 1,
500
  ) -> Dict[str, List[str]]:
 
501
  if _model_dict is None or _cfg is None:
502
+ msg = auto_load_models(device_str="cpu") # safe in HTTP context; GPU will be used inside infer
503
+ if not str(msg).startswith("βœ…"):
504
+ raise RuntimeError(msg)
505
  local = _normalize_video_input(video_url_or_b64)
506
  outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
507
  return {"videos": outs, "message": msg}
 
509
  @gr.api
510
  def load_model_tool() -> str:
511
  """Ensure model is loaded on server (convenient for MCP/REST)."""
512
+ return auto_load_models(device_str="cpu")
513
 
514
  @gr.mcp.resource("shortifoley://status")
515
  def shortifoley_status() -> str:
516
  """Return a simple readiness string for MCP clients."""
517
  ready = _model_dict is not None and _cfg is not None
518
  dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
519
+ return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
520
 
521
  @gr.mcp.prompt()
522
  def foley_prompt(name: str = "default") -> str:
 
526
  "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
527
  )
528
 
 
 
 
529
  return demo
530
 
531
 
 
536
 
537
 
538
  # -------------
539
+ # App bootstrap
540
  # -------------
541
  if __name__ == "__main__":
542
  logger.remove()
 
546
  logger.info("===== Application Startup =====\n")
547
  prepare_once()
548
 
549
+ # Probe imports (early surfacing)
550
  sys.path.append(str(REPO_DIR))
551
  try:
552
  from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401