Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
-
|
2 |
# Created by bilsimaging.com
|
3 |
|
4 |
import os
|
|
|
5 |
os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
|
6 |
|
7 |
import sys
|
8 |
import json
|
|
|
|
|
|
|
9 |
import base64
|
10 |
import random
|
11 |
import tempfile
|
@@ -21,7 +25,6 @@ from loguru import logger
|
|
21 |
from huggingface_hub import snapshot_download
|
22 |
import spaces
|
23 |
|
24 |
-
|
25 |
# -------------------------
|
26 |
# Constants & configuration
|
27 |
# -------------------------
|
@@ -29,25 +32,41 @@ ROOT = Path(__file__).parent.resolve()
|
|
29 |
REPO_DIR = ROOT / "HunyuanVideo-Foley"
|
30 |
WEIGHTS_DIR = Path(os.environ.get("HIFI_FOLEY_MODEL_PATH", str(ROOT / "weights")))
|
31 |
CONFIG_PATH = Path(os.environ.get("HIFI_FOLEY_CONFIG", str(REPO_DIR / "configs" / "hunyuanvideo-foley-xxl.yaml")))
|
32 |
-
OUTPUTS_DIR = Path(os.environ.get("OUTPUTS_DIR", str(ROOT / "outputs")))
|
33 |
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
|
34 |
|
35 |
SPACE_TITLE = "π΅ ShortiFoley β HunyuanVideo-Foley"
|
36 |
-
SPACE_TAGLINE = "
|
37 |
WATERMARK_NOTE = "Made with β€οΈ by bilsimaging.com"
|
38 |
|
39 |
-
# ZeroGPU limit
|
40 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
41 |
|
42 |
-
# Globals
|
43 |
_model_dict = None
|
44 |
_cfg = None
|
45 |
_device: Optional[torch.device] = None
|
46 |
|
47 |
|
48 |
# ------------
|
49 |
-
# Small helpers
|
50 |
# ------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def _ensure_repo() -> None:
|
52 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
53 |
if REPO_DIR.exists():
|
@@ -86,30 +105,26 @@ def prepare_once() -> None:
|
|
86 |
# -----------------------
|
87 |
# Model load & inference
|
88 |
# -----------------------
|
89 |
-
def auto_load_models(
|
90 |
"""
|
91 |
-
Load HunyuanVideo-Foley + encoders on the
|
92 |
-
|
93 |
"""
|
94 |
global _model_dict, _cfg, _device
|
95 |
|
96 |
if _model_dict is not None and _cfg is not None:
|
97 |
return "β
Model already loaded."
|
98 |
|
99 |
-
#
|
100 |
-
|
101 |
-
return "β Load the model inside a GPU task first (use the Load button or run Generate)."
|
102 |
-
|
103 |
-
os.environ["HF_PREFER_SAFETENSORS"] = "1" # enforce again for safety
|
104 |
|
105 |
sys.path.append(str(REPO_DIR))
|
106 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
107 |
|
108 |
-
_device =
|
109 |
logger.info("Loading HunyuanVideo-Foley model...")
|
110 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
111 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
112 |
-
logger.info(f"TARGET_DEVICE: {_device}")
|
113 |
|
114 |
try:
|
115 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
@@ -152,7 +167,7 @@ def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
|
|
152 |
|
153 |
def _save_outputs(video_src: str, audio_tensor: torch.Tensor, sr: int, idx: int,
|
154 |
prompt: str) -> str:
|
155 |
-
"""Save WAV + MP4 in
|
156 |
# torchaudio expects [C, N]
|
157 |
if audio_tensor.ndim == 1:
|
158 |
audio_tensor = audio_tensor.unsqueeze(0)
|
@@ -207,12 +222,9 @@ def infer_single_video(
|
|
207 |
Generate Foley audio for an uploaded video (1β6 variants).
|
208 |
Returns: (list of output video paths, status message)
|
209 |
"""
|
210 |
-
#
|
211 |
-
device = torch.device("cuda:0")
|
212 |
-
|
213 |
-
# Lazy-load if needed on GPU
|
214 |
if _model_dict is None or _cfg is None:
|
215 |
-
msg = auto_load_models(
|
216 |
if not str(msg).startswith("β
"):
|
217 |
return [], f"β {msg}"
|
218 |
|
@@ -249,31 +261,17 @@ def infer_single_video(
|
|
249 |
return outs, f"β
Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
250 |
|
251 |
|
252 |
-
|
253 |
-
@spaces.GPU(duration=GPU_DURATION)
|
254 |
-
def gpu_load_models() -> str:
|
255 |
-
device = torch.device("cuda:0")
|
256 |
-
return auto_load_models(device)
|
257 |
-
|
258 |
-
|
259 |
# -------------
|
260 |
-
# Gradio UI (with MCP
|
261 |
# -------------
|
262 |
def _about_html() -> str:
|
263 |
return f"""
|
264 |
<div style="line-height:1.6">
|
265 |
<h2>About ShortiFoley</h2>
|
266 |
-
<p><b>ShortiFoley</b> turns short videos into realistic Foley sound
|
267 |
-
Powered by Tencentβs HunyuanVideo-Foley (SigLIP2 + CLAP), with autosave and an MCP server for automation
|
268 |
-
|
269 |
-
<p>
|
270 |
-
<a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a>,
|
271 |
-
built to streamline creative workflows across video, sound, and publishing.</p>
|
272 |
-
|
273 |
-
<p>ShortiFoley integrates seamlessly with automation tools like
|
274 |
-
<a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a>,
|
275 |
-
making it easy to plug into custom workflows and pipelines.</p>
|
276 |
-
|
277 |
|
278 |
<h3>Quick Steps</h3>
|
279 |
<ol>
|
@@ -293,9 +291,9 @@ making it easy to plug into custom workflows and pipelines.</p>
|
|
293 |
|
294 |
<h3>MCP & API</h3>
|
295 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see βAPI & MCPβ tab).
|
296 |
-
Perfect for pipelines and tools like <b>n8n</b>.</p>
|
|
|
297 |
|
298 |
-
|
299 |
</div>
|
300 |
"""
|
301 |
|
@@ -309,7 +307,7 @@ def create_ui() -> gr.Blocks:
|
|
309 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
310 |
.minor-btn button{ border-radius:10px;}
|
311 |
.muted{ color:#64748b; }
|
312 |
-
.footer-text{
|
313 |
"""
|
314 |
with gr.Blocks(title="ShortiFoley β HunyuanVideo-Foley", css=css) as demo:
|
315 |
|
@@ -333,7 +331,7 @@ def create_ui() -> gr.Blocks:
|
|
333 |
samples = gr.Slider(1, 6, value=1, step=1, label="Variants")
|
334 |
|
335 |
with gr.Row():
|
336 |
-
load_btn = gr.Button("βοΈ Load model", variant="secondary", elem_classes=["minor-btn"])
|
337 |
generate = gr.Button("π΅ Generate", variant="primary", elem_classes=["generate-btn"])
|
338 |
|
339 |
status = gr.Textbox(label="Status", interactive=False)
|
@@ -356,27 +354,47 @@ def create_ui() -> gr.Blocks:
|
|
356 |
outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
|
357 |
vis = []
|
358 |
for i in range(6):
|
359 |
-
if i < len(outs):
|
360 |
vis.append(gr.update(visible=True, value=outs[i]))
|
361 |
else:
|
362 |
-
vis.append(gr.update(visible=
|
363 |
-
|
|
|
|
|
364 |
|
365 |
-
|
366 |
fn=_process_and_update,
|
367 |
inputs=[video_input, text_input, guidance_scale, steps, samples],
|
368 |
-
outputs=[v1, v2, v3, v4, v5, v6, status],
|
369 |
api_name="/infer",
|
370 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
371 |
)
|
372 |
|
373 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
load_btn.click(
|
375 |
-
fn=
|
376 |
inputs=[],
|
377 |
outputs=[status],
|
378 |
api_name="/load_model",
|
379 |
-
api_description="Load/initialize the ShortiFoley model and encoders (
|
380 |
)
|
381 |
|
382 |
# Toggle visibility based on variants
|
@@ -393,7 +411,7 @@ def create_ui() -> gr.Blocks:
|
|
393 |
samples.change(_toggle_vis, inputs=[samples], outputs=[v1, v2, v3, v4, v5, v6])
|
394 |
|
395 |
with gr.Tab("π Gallery"):
|
396 |
-
gr.Markdown("Latest generated videos (autosaved to `outputs/`).")
|
397 |
gallery = gr.Gallery(
|
398 |
value=_list_gallery(),
|
399 |
columns=3,
|
@@ -401,49 +419,50 @@ def create_ui() -> gr.Blocks:
|
|
401 |
label="Saved Results"
|
402 |
)
|
403 |
refresh = gr.Button("π Refresh Gallery")
|
404 |
-
|
405 |
-
def _refresh_gallery():
|
406 |
-
return gr.update(value=_list_gallery())
|
407 |
-
|
408 |
-
# Refresh via button
|
409 |
-
refresh.click(_refresh_gallery, outputs=[gallery])
|
410 |
-
# Also refresh after generation finishes
|
411 |
-
gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
412 |
|
413 |
with gr.Tab("API & MCP"):
|
414 |
-
gr.Markdown(
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
with gr.Tab("βΉοΈ About"):
|
434 |
gr.HTML(_about_html())
|
435 |
|
436 |
# Footer
|
437 |
-
gr.HTML(
|
438 |
-
|
439 |
-
<
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
442 |
|
443 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
444 |
def _download_to_tmp(url: str) -> str:
|
445 |
try:
|
446 |
-
import requests
|
447 |
except Exception:
|
448 |
raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
|
449 |
r = requests.get(url, timeout=30)
|
@@ -479,9 +498,10 @@ def create_ui() -> gr.Blocks:
|
|
479 |
num_inference_steps: int = 50,
|
480 |
sample_nums: int = 1,
|
481 |
) -> Dict[str, List[str]]:
|
482 |
-
# Ensure model is ready (GPU-safe path)
|
483 |
if _model_dict is None or _cfg is None:
|
484 |
-
|
|
|
|
|
485 |
local = _normalize_video_input(video_url_or_b64)
|
486 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
487 |
return {"videos": outs, "message": msg}
|
@@ -489,14 +509,14 @@ def create_ui() -> gr.Blocks:
|
|
489 |
@gr.api
|
490 |
def load_model_tool() -> str:
|
491 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
492 |
-
return
|
493 |
|
494 |
@gr.mcp.resource("shortifoley://status")
|
495 |
def shortifoley_status() -> str:
|
496 |
"""Return a simple readiness string for MCP clients."""
|
497 |
ready = _model_dict is not None and _cfg is not None
|
498 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
499 |
-
return f"ShortiFoley status: {'ready' if ready else '
|
500 |
|
501 |
@gr.mcp.prompt()
|
502 |
def foley_prompt(name: str = "default") -> str:
|
@@ -506,9 +526,6 @@ def create_ui() -> gr.Blocks:
|
|
506 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
507 |
)
|
508 |
|
509 |
-
# IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
|
510 |
-
demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
|
511 |
-
|
512 |
return demo
|
513 |
|
514 |
|
@@ -519,7 +536,7 @@ def set_seeds(s: int = 1):
|
|
519 |
|
520 |
|
521 |
# -------------
|
522 |
-
# App bootstrap
|
523 |
# -------------
|
524 |
if __name__ == "__main__":
|
525 |
logger.remove()
|
@@ -529,7 +546,7 @@ if __name__ == "__main__":
|
|
529 |
logger.info("===== Application Startup =====\n")
|
530 |
prepare_once()
|
531 |
|
532 |
-
# Probe imports (early surfacing)
|
533 |
sys.path.append(str(REPO_DIR))
|
534 |
try:
|
535 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|
|
|
1 |
+
|
2 |
# Created by bilsimaging.com
|
3 |
|
4 |
import os
|
5 |
+
|
6 |
os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
|
7 |
|
8 |
import sys
|
9 |
import json
|
10 |
+
import uuid
|
11 |
+
import time
|
12 |
+
import shutil
|
13 |
import base64
|
14 |
import random
|
15 |
import tempfile
|
|
|
25 |
from huggingface_hub import snapshot_download
|
26 |
import spaces
|
27 |
|
|
|
28 |
# -------------------------
|
29 |
# Constants & configuration
|
30 |
# -------------------------
|
|
|
32 |
REPO_DIR = ROOT / "HunyuanVideo-Foley"
|
33 |
WEIGHTS_DIR = Path(os.environ.get("HIFI_FOLEY_MODEL_PATH", str(ROOT / "weights")))
|
34 |
CONFIG_PATH = Path(os.environ.get("HIFI_FOLEY_CONFIG", str(REPO_DIR / "configs" / "hunyuanvideo-foley-xxl.yaml")))
|
35 |
+
OUTPUTS_DIR = Path(os.environ.get("OUTPUTS_DIR", str(ROOT / "outputs" / "autosaved")))
|
36 |
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
|
37 |
|
38 |
SPACE_TITLE = "π΅ ShortiFoley β HunyuanVideo-Foley"
|
39 |
+
SPACE_TAGLINE = "Text/Video β Audio Foley Β· Created by bilsimaging.com"
|
40 |
WATERMARK_NOTE = "Made with β€οΈ by bilsimaging.com"
|
41 |
|
42 |
+
# ZeroGPU limit
|
43 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
44 |
|
45 |
+
# Globals
|
46 |
_model_dict = None
|
47 |
_cfg = None
|
48 |
_device: Optional[torch.device] = None
|
49 |
|
50 |
|
51 |
# ------------
|
52 |
+
# Small helpers
|
53 |
# ------------
|
54 |
+
def _setup_device(pref: str = "cpu", gpu_id: int = 0) -> torch.device:
|
55 |
+
"""
|
56 |
+
Pick device safely.
|
57 |
+
IMPORTANT: Do NOT query torch.cuda.is_available() in main/non-GPU processes
|
58 |
+
on Stateless GPU Spaces. Only set CUDA when called from a @spaces.GPU context.
|
59 |
+
"""
|
60 |
+
if pref.startswith("cuda"):
|
61 |
+
d = torch.device(f"cuda:{gpu_id}")
|
62 |
+
elif pref == "mps":
|
63 |
+
d = torch.device("mps")
|
64 |
+
else:
|
65 |
+
d = torch.device("cpu")
|
66 |
+
logger.info(f"Using {d}")
|
67 |
+
return d
|
68 |
+
|
69 |
+
|
70 |
def _ensure_repo() -> None:
|
71 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
72 |
if REPO_DIR.exists():
|
|
|
105 |
# -----------------------
|
106 |
# Model load & inference
|
107 |
# -----------------------
|
108 |
+
def auto_load_models(device_str: str = "cpu") -> str:
|
109 |
"""
|
110 |
+
Load HunyuanVideo-Foley + encoders on the chosen device.
|
111 |
+
Use device_str="cuda" ONLY inside @spaces.GPU function to avoid CUDA init in main process.
|
112 |
"""
|
113 |
global _model_dict, _cfg, _device
|
114 |
|
115 |
if _model_dict is not None and _cfg is not None:
|
116 |
return "β
Model already loaded."
|
117 |
|
118 |
+
# Make absolutely sure safetensors is preferred
|
119 |
+
os.environ["HF_PREFER_SAFETENSORS"] = "1"
|
|
|
|
|
|
|
120 |
|
121 |
sys.path.append(str(REPO_DIR))
|
122 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
123 |
|
124 |
+
_device = _setup_device(device_str, 0)
|
125 |
logger.info("Loading HunyuanVideo-Foley model...")
|
126 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
127 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
|
|
128 |
|
129 |
try:
|
130 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
|
|
167 |
|
168 |
def _save_outputs(video_src: str, audio_tensor: torch.Tensor, sr: int, idx: int,
|
169 |
prompt: str) -> str:
|
170 |
+
"""Save WAV + MP4 in autosaved/, add metadata with a soft watermark note."""
|
171 |
# torchaudio expects [C, N]
|
172 |
if audio_tensor.ndim == 1:
|
173 |
audio_tensor = audio_tensor.unsqueeze(0)
|
|
|
222 |
Generate Foley audio for an uploaded video (1β6 variants).
|
223 |
Returns: (list of output video paths, status message)
|
224 |
"""
|
225 |
+
# Lazy-load on GPU
|
|
|
|
|
|
|
226 |
if _model_dict is None or _cfg is None:
|
227 |
+
msg = auto_load_models(device_str="cuda")
|
228 |
if not str(msg).startswith("β
"):
|
229 |
return [], f"β {msg}"
|
230 |
|
|
|
261 |
return outs, f"β
Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
262 |
|
263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
# -------------
|
265 |
+
# Gradio UI (with MCP+API inside the same app)
|
266 |
# -------------
|
267 |
def _about_html() -> str:
|
268 |
return f"""
|
269 |
<div style="line-height:1.6">
|
270 |
<h2>About ShortiFoley</h2>
|
271 |
+
<p><b>ShortiFoley</b> turns short videos into realistic Foley sound.<br/>
|
272 |
+
Powered by Tencentβs HunyuanVideo-Foley (SigLIP2 + CLAP), with autosave and an MCP server for automation
|
273 |
+
(<a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a> flows).</p>
|
274 |
+
<p><b>Created by <a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a></b></p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
<h3>Quick Steps</h3>
|
277 |
<ol>
|
|
|
291 |
|
292 |
<h3>MCP & API</h3>
|
293 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see βAPI & MCPβ tab).
|
294 |
+
Perfect for media-automation pipelines and tools like <b><a href="https://n8n.partnerlinks.io/bilsimaging" target="_blank" rel="noopener">n8n</a></b>.</p>
|
295 |
+
|
296 |
|
|
|
297 |
</div>
|
298 |
"""
|
299 |
|
|
|
307 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
308 |
.minor-btn button{ border-radius:10px;}
|
309 |
.muted{ color:#64748b; }
|
310 |
+
.footer-text{ color:#64748b; text-align:center; padding:12px 0; font-size:.95rem; }
|
311 |
"""
|
312 |
with gr.Blocks(title="ShortiFoley β HunyuanVideo-Foley", css=css) as demo:
|
313 |
|
|
|
331 |
samples = gr.Slider(1, 6, value=1, step=1, label="Variants")
|
332 |
|
333 |
with gr.Row():
|
334 |
+
load_btn = gr.Button("βοΈ Load model (CPU)", variant="secondary", elem_classes=["minor-btn"])
|
335 |
generate = gr.Button("π΅ Generate", variant="primary", elem_classes=["generate-btn"])
|
336 |
|
337 |
status = gr.Textbox(label="Status", interactive=False)
|
|
|
354 |
outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
|
355 |
vis = []
|
356 |
for i in range(6):
|
357 |
+
if outs and i < len(outs):
|
358 |
vis.append(gr.update(visible=True, value=outs[i]))
|
359 |
else:
|
360 |
+
vis.append(gr.update(visible=(i == 0), value=None if i > 0 else None))
|
361 |
+
# Also refresh the gallery in this same event
|
362 |
+
new_gallery = _list_gallery()
|
363 |
+
return (*vis, msg, new_gallery)
|
364 |
|
365 |
+
generate.click(
|
366 |
fn=_process_and_update,
|
367 |
inputs=[video_input, text_input, guidance_scale, steps, samples],
|
368 |
+
outputs=[v1, v2, v3, v4, v5, v6, status], # updated below to include gallery via .then-like merge
|
369 |
api_name="/infer",
|
370 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
371 |
)
|
372 |
|
373 |
+
# Workaround: extend outputs to include gallery refresh using a wrapper
|
374 |
+
def _process_and_update_with_gallery(video_file, text_prompt, cfg, nsteps, nsamples):
|
375 |
+
outs, msg = infer_single_video(video_file, text_prompt, cfg, nsteps, nsamples)
|
376 |
+
vis = []
|
377 |
+
for i in range(6):
|
378 |
+
if outs and i < len(outs):
|
379 |
+
vis.append(gr.update(visible=True, value=outs[i]))
|
380 |
+
else:
|
381 |
+
vis.append(gr.update(visible=(i == 0), value=None if i > 0 else None))
|
382 |
+
new_gallery = _list_gallery()
|
383 |
+
return (*vis, msg, new_gallery)
|
384 |
+
|
385 |
+
# Re-bind with gallery as extra output
|
386 |
+
generate.click(
|
387 |
+
fn=_process_and_update_with_gallery,
|
388 |
+
inputs=[video_input, text_input, guidance_scale, steps, samples],
|
389 |
+
outputs=[v1, v2, v3, v4, v5, v6, status,], # gallery will be refreshed on Gallery tab itself
|
390 |
+
)
|
391 |
+
|
392 |
load_btn.click(
|
393 |
+
fn=lambda: auto_load_models(device_str="cpu"),
|
394 |
inputs=[],
|
395 |
outputs=[status],
|
396 |
api_name="/load_model",
|
397 |
+
api_description="Load/initialize the ShortiFoley model and encoders on CPU (GPU loads during inference)."
|
398 |
)
|
399 |
|
400 |
# Toggle visibility based on variants
|
|
|
411 |
samples.change(_toggle_vis, inputs=[samples], outputs=[v1, v2, v3, v4, v5, v6])
|
412 |
|
413 |
with gr.Tab("π Gallery"):
|
414 |
+
gr.Markdown("Latest generated videos (autosaved to `outputs/autosaved/`).")
|
415 |
gallery = gr.Gallery(
|
416 |
value=_list_gallery(),
|
417 |
columns=3,
|
|
|
419 |
label="Saved Results"
|
420 |
)
|
421 |
refresh = gr.Button("π Refresh Gallery")
|
422 |
+
refresh.click(lambda: _list_gallery(), outputs=[gallery])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
|
424 |
with gr.Tab("API & MCP"):
|
425 |
+
gr.Markdown("""
|
426 |
+
### REST examples
|
427 |
+
|
428 |
+
**POST** `/api_generate_from_url`
|
429 |
+
```json
|
430 |
+
{
|
431 |
+
"video_url_or_b64": "https://yourhost/sample.mp4",
|
432 |
+
"text_prompt": "metallic clink; hollow room reverb",
|
433 |
+
"guidance_scale": 4.5,
|
434 |
+
"num_inference_steps": 50,
|
435 |
+
"sample_nums": 2
|
436 |
+
}
|
437 |
+
```
|
438 |
+
|
439 |
+
**POST** `/load_model_tool`
|
440 |
+
Loads the model proactively (useful before batch runs).
|
441 |
+
|
442 |
+
**MCP resources & prompt**
|
443 |
+
- `shortifoley://status` β quick health info
|
444 |
+
- `foley_prompt` β reusable guidance for describing the sound
|
445 |
+
|
446 |
+
Works great with media-automation in tools like **n8n**: call `load_model_tool` once, then `api_generate_from_url` for each clip.
|
447 |
+
""")
|
448 |
|
449 |
with gr.Tab("βΉοΈ About"):
|
450 |
gr.HTML(_about_html())
|
451 |
|
452 |
# Footer
|
453 |
+
gr.HTML(
|
454 |
+
"""
|
455 |
+
<div class="footer-text">
|
456 |
+
π Created by <a href="https://bilsimaging.com" target="_blank" rel="noopener">bilsimaging.com</a>
|
457 |
+
· Powered by HunyuanVideo-Foley
|
458 |
+
</div>
|
459 |
+
"""
|
460 |
+
)
|
461 |
|
462 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
463 |
def _download_to_tmp(url: str) -> str:
|
464 |
try:
|
465 |
+
import requests
|
466 |
except Exception:
|
467 |
raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
|
468 |
r = requests.get(url, timeout=30)
|
|
|
498 |
num_inference_steps: int = 50,
|
499 |
sample_nums: int = 1,
|
500 |
) -> Dict[str, List[str]]:
|
|
|
501 |
if _model_dict is None or _cfg is None:
|
502 |
+
msg = auto_load_models(device_str="cpu") # safe in HTTP context; GPU will be used inside infer
|
503 |
+
if not str(msg).startswith("β
"):
|
504 |
+
raise RuntimeError(msg)
|
505 |
local = _normalize_video_input(video_url_or_b64)
|
506 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
507 |
return {"videos": outs, "message": msg}
|
|
|
509 |
@gr.api
|
510 |
def load_model_tool() -> str:
|
511 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
512 |
+
return auto_load_models(device_str="cpu")
|
513 |
|
514 |
@gr.mcp.resource("shortifoley://status")
|
515 |
def shortifoley_status() -> str:
|
516 |
"""Return a simple readiness string for MCP clients."""
|
517 |
ready = _model_dict is not None and _cfg is not None
|
518 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
519 |
+
return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
|
520 |
|
521 |
@gr.mcp.prompt()
|
522 |
def foley_prompt(name: str = "default") -> str:
|
|
|
526 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
527 |
)
|
528 |
|
|
|
|
|
|
|
529 |
return demo
|
530 |
|
531 |
|
|
|
536 |
|
537 |
|
538 |
# -------------
|
539 |
+
# App bootstrap
|
540 |
# -------------
|
541 |
if __name__ == "__main__":
|
542 |
logger.remove()
|
|
|
546 |
logger.info("===== Application Startup =====\n")
|
547 |
prepare_once()
|
548 |
|
549 |
+
# Probe imports (early surfacing)
|
550 |
sys.path.append(str(REPO_DIR))
|
551 |
try:
|
552 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|