Spaces:
Running on Zero
Running on Zero
Commit ·
bb533eb
1
Parent(s): c45a944
Fix regen: bypass Svelte binding — call Gradio queue API directly from JS
Browse files- Replace DOM event dispatch (which Svelte ignores) with direct
POST to /gradio_api/queue/join using the handler's api_name
- Remove hidden trigger textboxes and relay pattern (no longer needed)
- Embed state_json in waveform HTML data-state attribute so JS reads it
- Add elem_id to all input components so JS can read their DOM values
- Register per-slot regen handlers via gr.Button.click() with api_name
so they get stable fn_index entries in gradio_config.dependencies
- Simplify _make_output_slots to just (grps, vids, waveforms)
- Update _splice_and_save to embed updated state in returned waveform HTML
app.py
CHANGED
|
@@ -833,8 +833,13 @@ def _splice_and_save(new_wav, seg_idx, meta, slot_id):
|
|
| 833 |
updated_meta["audio_path"] = audio_path
|
| 834 |
updated_meta["video_path"] = video_path
|
| 835 |
|
| 836 |
-
|
| 837 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 838 |
return video_path, audio_path, updated_meta, waveform_html
|
| 839 |
|
| 840 |
|
|
@@ -1154,7 +1159,8 @@ def _build_regen_pending_html(segments: list, regen_seg_idx: int, slot_id: str,
|
|
| 1154 |
|
| 1155 |
|
| 1156 |
def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
|
| 1157 |
-
hidden_input_id: str
|
|
|
|
| 1158 |
"""Return a self-contained HTML block with a Canvas waveform (display only),
|
| 1159 |
segment boundary markers, and a download link.
|
| 1160 |
|
|
@@ -1370,8 +1376,13 @@ def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
|
|
| 1370 |
import html as _html
|
| 1371 |
srcdoc = _html.escape(iframe_inner, quote=True)
|
| 1372 |
|
|
|
|
|
|
|
|
|
|
| 1373 |
return f"""
|
| 1374 |
<div id="wf_container_{slot_id}"
|
|
|
|
|
|
|
| 1375 |
style="background:#1a1a1a;border-radius:8px;padding:10px;margin-top:6px;position:relative;">
|
| 1376 |
<div style="position:relative;width:100%;height:80px;">
|
| 1377 |
<iframe id="wf_iframe_{slot_id}"
|
|
@@ -1396,81 +1407,53 @@ def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
|
|
| 1396 |
def _make_output_slots(tab_prefix: str) -> tuple:
|
| 1397 |
"""Build MAX_SLOTS output groups for one tab.
|
| 1398 |
|
| 1399 |
-
Each slot has: video
|
| 1400 |
-
|
| 1401 |
-
|
| 1402 |
-
|
| 1403 |
-
|
| 1404 |
-
|
| 1405 |
-
outputs of the same event (which causes Gradio
|
| 1406 |
-
5 "Too many arguments" even with SSR disabled).
|
| 1407 |
-
Returns (grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads).
|
| 1408 |
"""
|
| 1409 |
-
grps, vids, waveforms
|
| 1410 |
for i in range(MAX_SLOTS):
|
| 1411 |
with gr.Group(visible=(i == 0)) as g:
|
| 1412 |
-
slot_id = f"{tab_prefix}_{i}"
|
| 1413 |
vids.append(gr.Video(label=f"Generation {i+1} — Video"))
|
| 1414 |
waveforms.append(gr.HTML(
|
| 1415 |
value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
|
| 1416 |
))
|
| 1417 |
-
# Regen trigger: CSS-hidden so JS can find and write to it.
|
| 1418 |
-
regen_triggers.append(gr.Textbox(
|
| 1419 |
-
value="",
|
| 1420 |
-
elem_id=f"regen_trigger_{slot_id}",
|
| 1421 |
-
elem_classes=["wf-hidden-input"],
|
| 1422 |
-
label="",
|
| 1423 |
-
show_label=False,
|
| 1424 |
-
))
|
| 1425 |
-
# Write-only state: updated by main gen and regen outputs.
|
| 1426 |
-
seg_states.append(gr.Textbox(
|
| 1427 |
-
value="",
|
| 1428 |
-
elem_classes=["wf-hidden-input"],
|
| 1429 |
-
label="",
|
| 1430 |
-
show_label=False,
|
| 1431 |
-
))
|
| 1432 |
-
# Read-only mirror: fed into regen handler inputs only.
|
| 1433 |
-
# Stays in sync via a .change() relay wired after slot creation.
|
| 1434 |
-
seg_state_reads.append(gr.Textbox(
|
| 1435 |
-
value="",
|
| 1436 |
-
elem_classes=["wf-hidden-input"],
|
| 1437 |
-
label="",
|
| 1438 |
-
show_label=False,
|
| 1439 |
-
))
|
| 1440 |
grps.append(g)
|
| 1441 |
-
return grps, vids, waveforms
|
| 1442 |
|
| 1443 |
|
| 1444 |
def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
|
| 1445 |
"""Turn a flat _pad_outputs list into Gradio update lists.
|
| 1446 |
|
| 1447 |
flat has MAX_SLOTS * 3 items: [vid0, aud0, meta0, vid1, aud1, meta1, ...]
|
| 1448 |
-
Returns updates for vids + waveforms
|
| 1449 |
Group visibility is handled separately via .then() to avoid Gradio 5 SSR
|
| 1450 |
'Too many arguments' caused by mixing gr.Group updates with other outputs.
|
|
|
|
|
|
|
| 1451 |
"""
|
| 1452 |
n = int(n)
|
| 1453 |
vid_updates = []
|
| 1454 |
wave_updates = []
|
| 1455 |
-
state_updates= []
|
| 1456 |
for i in range(MAX_SLOTS):
|
| 1457 |
vid_path = flat[i * 3]
|
| 1458 |
aud_path = flat[i * 3 + 1]
|
| 1459 |
meta = flat[i * 3 + 2]
|
| 1460 |
vid_updates.append(gr.update(value=vid_path))
|
| 1461 |
if aud_path and meta:
|
| 1462 |
-
slot_id
|
| 1463 |
-
|
| 1464 |
-
html = _build_waveform_html(aud_path, meta["segments"], slot_id,
|
|
|
|
| 1465 |
wave_updates.append(gr.update(value=html))
|
| 1466 |
-
# Serialize meta to JSON string (seg_states are now gr.Textbox)
|
| 1467 |
-
state_updates.append(gr.update(value=json.dumps(meta)))
|
| 1468 |
else:
|
| 1469 |
wave_updates.append(gr.update(
|
| 1470 |
value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>"
|
| 1471 |
))
|
| 1472 |
-
|
| 1473 |
-
return vid_updates + wave_updates + state_updates
|
| 1474 |
|
| 1475 |
|
| 1476 |
def _on_video_upload_taro(video_file, num_steps, crossfade_s):
|
|
@@ -1501,28 +1484,138 @@ _SLOT_CSS = """
|
|
| 1501 |
max-height: 60vh !important;
|
| 1502 |
object-fit: contain;
|
| 1503 |
}
|
| 1504 |
-
/*
|
| 1505 |
-
Gradio 5 SSR omits visible=False components from the DOM entirely,
|
| 1506 |
-
so JS can never find them. CSS-hidden components are always in the DOM. */
|
| 1507 |
-
.wf-hidden-input {
|
| 1508 |
-
position: absolute !important;
|
| 1509 |
-
left: -9999px !important;
|
| 1510 |
-
width: 1px !important;
|
| 1511 |
-
height: 1px !important;
|
| 1512 |
-
overflow: hidden !important;
|
| 1513 |
-
pointer-events: none !important;
|
| 1514 |
-
opacity: 0 !important;
|
| 1515 |
-
}
|
| 1516 |
"""
|
| 1517 |
|
| 1518 |
_GLOBAL_JS = """
|
| 1519 |
() => {
|
| 1520 |
// Global postMessage handler for waveform iframe events.
|
| 1521 |
// Runs once on page load (Gradio js= parameter).
|
| 1522 |
-
// Handles: popup open/close relay, regen trigger.
|
| 1523 |
if (window._wf_global_listener) return; // already registered
|
| 1524 |
window._wf_global_listener = true;
|
| 1525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1526 |
// Shared popup element created once and reused across all slots
|
| 1527 |
let _popup = null;
|
| 1528 |
let _pendingSlot = null, _pendingIdx = null;
|
|
@@ -1554,40 +1647,6 @@ _GLOBAL_JS = """
|
|
| 1554 |
_pendingSlot = null; _pendingIdx = null;
|
| 1555 |
}
|
| 1556 |
|
| 1557 |
-
function fireRegen(slot_id, idx) {
|
| 1558 |
-
const el = document.getElementById('regen_trigger_' + slot_id);
|
| 1559 |
-
if (!el) { console.warn('[fireRegen] regen_trigger element not found:', slot_id); return; }
|
| 1560 |
-
const input = el.querySelector('input, textarea');
|
| 1561 |
-
if (!input) { console.warn('[fireRegen] no input inside regen_trigger:', slot_id); return; }
|
| 1562 |
-
|
| 1563 |
-
// Use native setter to bypass Svelte's controlled-input tracking.
|
| 1564 |
-
// Timestamp suffix ensures repeat clicks on the same segment always
|
| 1565 |
-
// produce a new value so Svelte's change detection always fires.
|
| 1566 |
-
// State JSON is passed via a separate Gradio input (seg_state_read),
|
| 1567 |
-
// not embedded in the trigger string — Gradio's own state is reliable,
|
| 1568 |
-
// whereas reading the DOM input.value returns '' for Svelte-controlled inputs.
|
| 1569 |
-
// IMPORTANT: Gradio 5 renders Textbox as <textarea>, NOT <input>.
|
| 1570 |
-
// Must use HTMLTextAreaElement.prototype setter — using HTMLInputElement.prototype
|
| 1571 |
-
// on a textarea causes "TypeError: Illegal invocation" and silently aborts.
|
| 1572 |
-
function setNative(val) {
|
| 1573 |
-
const proto = input.tagName === 'TEXTAREA'
|
| 1574 |
-
? HTMLTextAreaElement.prototype
|
| 1575 |
-
: HTMLInputElement.prototype;
|
| 1576 |
-
const desc = Object.getOwnPropertyDescriptor(proto, 'value');
|
| 1577 |
-
if (desc && desc.set) desc.set.call(input, val);
|
| 1578 |
-
else input.value = val;
|
| 1579 |
-
input.dispatchEvent(new Event('input', {bubbles: true}));
|
| 1580 |
-
input.dispatchEvent(new Event('change', {bubbles: true}));
|
| 1581 |
-
}
|
| 1582 |
-
// Encode: "slot_id|seg_idx|timestamp"
|
| 1583 |
-
const triggerVal = slot_id + '|' + idx + '|' + Date.now();
|
| 1584 |
-
setNative(triggerVal);
|
| 1585 |
-
console.log('[fireRegen] fired trigger for', slot_id, 'seg', idx);
|
| 1586 |
-
|
| 1587 |
-
const lbl = document.getElementById('wf_seglabel_' + slot_id);
|
| 1588 |
-
if (lbl) lbl.textContent = 'Regenerating Seg ' + (idx + 1) + '...';
|
| 1589 |
-
}
|
| 1590 |
-
|
| 1591 |
window.addEventListener('message', function(e) {
|
| 1592 |
const d = e.data;
|
| 1593 |
if (!d || d.type !== 'wf_popup') return;
|
|
@@ -1598,7 +1657,7 @@ _GLOBAL_JS = """
|
|
| 1598 |
_pendingIdx = d.seg_idx;
|
| 1599 |
const lbl = document.getElementById('_wf_popup_lbl');
|
| 1600 |
if (lbl) lbl.textContent = 'Seg ' + (d.seg_idx + 1) +
|
| 1601 |
-
' (' + d.t0.toFixed(2) + 's
|
| 1602 |
p.style.display = 'block';
|
| 1603 |
p.style.left = (d.x + 10) + 'px';
|
| 1604 |
p.style.top = (d.y + 10) + 'px';
|
|
@@ -1631,20 +1690,18 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1631 |
with gr.Row():
|
| 1632 |
with gr.Column():
|
| 1633 |
taro_video = gr.Video(label="Input Video")
|
| 1634 |
-
taro_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 1635 |
-
taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
|
| 1636 |
-
taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
|
| 1637 |
-
taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde")
|
| 1638 |
-
taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|
| 1639 |
-
taro_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
|
| 1640 |
taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1641 |
taro_btn = gr.Button("Generate", variant="primary")
|
| 1642 |
|
| 1643 |
with gr.Column():
|
| 1644 |
(taro_slot_grps, taro_slot_vids,
|
| 1645 |
-
taro_slot_waves
|
| 1646 |
-
taro_slot_states,
|
| 1647 |
-
taro_slot_state_reads) = _make_output_slots("taro")
|
| 1648 |
|
| 1649 |
for trigger in [taro_video, taro_steps, taro_cf_dur]:
|
| 1650 |
trigger.change(
|
|
@@ -1674,63 +1731,58 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1674 |
fn=_run_taro,
|
| 1675 |
inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
|
| 1676 |
taro_cf_dur, taro_cf_db, taro_samples],
|
| 1677 |
-
outputs=taro_slot_vids + taro_slot_waves
|
| 1678 |
).then(
|
| 1679 |
fn=_update_slot_visibility,
|
| 1680 |
inputs=[taro_samples],
|
| 1681 |
outputs=taro_slot_grps,
|
| 1682 |
))
|
| 1683 |
|
| 1684 |
-
#
|
| 1685 |
-
|
| 1686 |
-
|
| 1687 |
-
|
| 1688 |
-
#
|
| 1689 |
-
|
|
|
|
|
|
|
| 1690 |
_slot_id = f"taro_{_i}"
|
|
|
|
|
|
|
| 1691 |
print(f"[startup] registering regen handler for slot {_slot_id}")
|
| 1692 |
def _make_taro_regen(_si, _sid):
|
| 1693 |
-
def _do(
|
| 1694 |
-
print(f"[regen TARO]
|
| 1695 |
-
if not trigger_val:
|
| 1696 |
-
print(f"[regen TARO] early-exit: trigger_val empty")
|
| 1697 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1698 |
if not state_json:
|
| 1699 |
print(f"[regen TARO] early-exit: state_json empty")
|
| 1700 |
-
yield gr.update(), gr.update()
|
| 1701 |
-
# Trigger format: "slot_id|seg_idx|timestamp"
|
| 1702 |
-
parts = trigger_val.split("|", 2)
|
| 1703 |
-
if len(parts) < 2 or parts[0] != _sid:
|
| 1704 |
-
print(f"[regen TARO] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
|
| 1705 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1706 |
-
seg_idx = int(parts[1])
|
| 1707 |
-
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — acquiring lock")
|
| 1708 |
lock = _get_slot_lock(_sid)
|
| 1709 |
with lock:
|
| 1710 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
|
| 1711 |
-
state
|
| 1712 |
pending_html = _build_regen_pending_html(
|
| 1713 |
-
state["segments"], seg_idx, _sid,
|
| 1714 |
-
f"regen_trigger_{_sid}"
|
| 1715 |
)
|
| 1716 |
-
yield gr.update(), gr.update(value=pending_html)
|
| 1717 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — calling regen_taro_segment")
|
| 1718 |
try:
|
| 1719 |
vid, aud, new_meta_json, html = regen_taro_segment(
|
| 1720 |
-
video, seg_idx, state_json,
|
| 1721 |
seed, cfg, steps, mode, cf_dur, cf_db, _sid,
|
| 1722 |
)
|
| 1723 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1724 |
except Exception as _e:
|
| 1725 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1726 |
raise
|
| 1727 |
-
yield gr.update(value=vid), gr.update(value=html)
|
| 1728 |
return _do
|
| 1729 |
-
|
| 1730 |
fn=_make_taro_regen(_i, _slot_id),
|
| 1731 |
-
inputs=[
|
| 1732 |
-
|
| 1733 |
-
|
|
|
|
|
|
|
| 1734 |
)
|
| 1735 |
|
| 1736 |
# ---------------------------------------------------------- #
|
|
@@ -1740,21 +1792,19 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1740 |
with gr.Row():
|
| 1741 |
with gr.Column():
|
| 1742 |
mma_video = gr.Video(label="Input Video")
|
| 1743 |
-
mma_prompt = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
|
| 1744 |
-
mma_neg = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
|
| 1745 |
-
mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 1746 |
-
mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
|
| 1747 |
-
mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
|
| 1748 |
-
mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|
| 1749 |
-
mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
|
| 1750 |
mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1751 |
mma_btn = gr.Button("Generate", variant="primary")
|
| 1752 |
|
| 1753 |
with gr.Column():
|
| 1754 |
(mma_slot_grps, mma_slot_vids,
|
| 1755 |
-
mma_slot_waves
|
| 1756 |
-
mma_slot_states,
|
| 1757 |
-
mma_slot_state_reads) = _make_output_slots("mma")
|
| 1758 |
|
| 1759 |
mma_samples.change(
|
| 1760 |
fn=_update_slot_visibility,
|
|
@@ -1775,60 +1825,50 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1775 |
fn=_run_mmaudio,
|
| 1776 |
inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
|
| 1777 |
mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
|
| 1778 |
-
outputs=mma_slot_vids + mma_slot_waves
|
| 1779 |
).then(
|
| 1780 |
fn=_update_slot_visibility,
|
| 1781 |
inputs=[mma_samples],
|
| 1782 |
outputs=mma_slot_grps,
|
| 1783 |
))
|
| 1784 |
|
| 1785 |
-
|
| 1786 |
-
for
|
| 1787 |
-
_st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
|
| 1788 |
-
|
| 1789 |
-
for _i, _rtrig in enumerate(mma_slot_rtrigs):
|
| 1790 |
_slot_id = f"mma_{_i}"
|
|
|
|
|
|
|
| 1791 |
def _make_mma_regen(_si, _sid):
|
| 1792 |
-
def _do(
|
| 1793 |
-
print(f"[regen MMA]
|
| 1794 |
-
if not trigger_val:
|
| 1795 |
-
print(f"[regen MMA] early-exit: trigger_val empty")
|
| 1796 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1797 |
if not state_json:
|
| 1798 |
print(f"[regen MMA] early-exit: state_json empty")
|
| 1799 |
-
yield gr.update(), gr.update()
|
| 1800 |
-
parts = trigger_val.split("|", 2)
|
| 1801 |
-
if len(parts) < 2 or parts[0] != _sid:
|
| 1802 |
-
print(f"[regen MMA] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
|
| 1803 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1804 |
-
seg_idx = int(parts[1])
|
| 1805 |
-
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — acquiring lock")
|
| 1806 |
lock = _get_slot_lock(_sid)
|
| 1807 |
with lock:
|
| 1808 |
-
|
| 1809 |
-
state = json.loads(state_json)
|
| 1810 |
pending_html = _build_regen_pending_html(
|
| 1811 |
-
state["segments"], seg_idx, _sid,
|
| 1812 |
-
f"regen_trigger_{_sid}"
|
| 1813 |
)
|
| 1814 |
-
yield gr.update(), gr.update(value=pending_html)
|
| 1815 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — calling regen_mmaudio_segment")
|
| 1816 |
try:
|
| 1817 |
vid, aud, new_meta_json, html = regen_mmaudio_segment(
|
| 1818 |
-
video, seg_idx, state_json,
|
| 1819 |
prompt, neg, seed, cfg, steps, cf_dur, cf_db, _sid,
|
| 1820 |
)
|
| 1821 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1822 |
except Exception as _e:
|
| 1823 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1824 |
raise
|
| 1825 |
-
yield gr.update(value=vid), gr.update(value=html)
|
| 1826 |
return _do
|
| 1827 |
-
|
| 1828 |
fn=_make_mma_regen(_i, _slot_id),
|
| 1829 |
-
inputs=[
|
| 1830 |
-
|
| 1831 |
-
|
|
|
|
|
|
|
| 1832 |
)
|
| 1833 |
|
| 1834 |
# ---------------------------------------------------------- #
|
|
@@ -1838,22 +1878,20 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1838 |
with gr.Row():
|
| 1839 |
with gr.Column():
|
| 1840 |
hf_video = gr.Video(label="Input Video")
|
| 1841 |
-
hf_prompt = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
|
| 1842 |
-
hf_neg = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
|
| 1843 |
-
hf_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 1844 |
-
hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5)
|
| 1845 |
-
hf_steps = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5)
|
| 1846 |
-
hf_size = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl")
|
| 1847 |
-
hf_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|
| 1848 |
-
hf_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
|
| 1849 |
hf_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1850 |
hf_btn = gr.Button("Generate", variant="primary")
|
| 1851 |
|
| 1852 |
with gr.Column():
|
| 1853 |
(hf_slot_grps, hf_slot_vids,
|
| 1854 |
-
hf_slot_waves
|
| 1855 |
-
hf_slot_states,
|
| 1856 |
-
hf_slot_state_reads) = _make_output_slots("hf")
|
| 1857 |
|
| 1858 |
hf_samples.change(
|
| 1859 |
fn=_update_slot_visibility,
|
|
@@ -1874,60 +1912,50 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 1874 |
fn=_run_hunyuan,
|
| 1875 |
inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
|
| 1876 |
hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
|
| 1877 |
-
outputs=hf_slot_vids + hf_slot_waves
|
| 1878 |
).then(
|
| 1879 |
fn=_update_slot_visibility,
|
| 1880 |
inputs=[hf_samples],
|
| 1881 |
outputs=hf_slot_grps,
|
| 1882 |
))
|
| 1883 |
|
| 1884 |
-
|
| 1885 |
-
for
|
| 1886 |
-
_st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
|
| 1887 |
-
|
| 1888 |
-
for _i, _rtrig in enumerate(hf_slot_rtrigs):
|
| 1889 |
_slot_id = f"hf_{_i}"
|
|
|
|
|
|
|
| 1890 |
def _make_hf_regen(_si, _sid):
|
| 1891 |
-
def _do(
|
| 1892 |
-
print(f"[regen HF]
|
| 1893 |
-
if not trigger_val:
|
| 1894 |
-
print(f"[regen HF] early-exit: trigger_val empty")
|
| 1895 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1896 |
if not state_json:
|
| 1897 |
print(f"[regen HF] early-exit: state_json empty")
|
| 1898 |
-
yield gr.update(), gr.update()
|
| 1899 |
-
parts = trigger_val.split("|", 2)
|
| 1900 |
-
if len(parts) < 2 or parts[0] != _sid:
|
| 1901 |
-
print(f"[regen HF] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
|
| 1902 |
-
yield gr.update(), gr.update(), gr.update(); return
|
| 1903 |
-
seg_idx = int(parts[1])
|
| 1904 |
-
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — acquiring lock")
|
| 1905 |
lock = _get_slot_lock(_sid)
|
| 1906 |
with lock:
|
| 1907 |
-
|
| 1908 |
-
state = json.loads(state_json)
|
| 1909 |
pending_html = _build_regen_pending_html(
|
| 1910 |
-
state["segments"], seg_idx, _sid,
|
| 1911 |
-
f"regen_trigger_{_sid}"
|
| 1912 |
)
|
| 1913 |
-
yield gr.update(), gr.update(value=pending_html)
|
| 1914 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — calling regen_hunyuan_segment")
|
| 1915 |
try:
|
| 1916 |
vid, aud, new_meta_json, html = regen_hunyuan_segment(
|
| 1917 |
-
video, seg_idx, state_json,
|
| 1918 |
prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, _sid,
|
| 1919 |
)
|
| 1920 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1921 |
except Exception as _e:
|
| 1922 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1923 |
raise
|
| 1924 |
-
yield gr.update(value=vid), gr.update(value=html)
|
| 1925 |
return _do
|
| 1926 |
-
|
| 1927 |
fn=_make_hf_regen(_i, _slot_id),
|
| 1928 |
-
inputs=[
|
| 1929 |
-
|
| 1930 |
-
|
|
|
|
|
|
|
| 1931 |
)
|
| 1932 |
|
| 1933 |
# ---- Cross-tab video sync ----
|
|
|
|
| 833 |
updated_meta["audio_path"] = audio_path
|
| 834 |
updated_meta["video_path"] = video_path
|
| 835 |
|
| 836 |
+
# Serialise for embedding in waveform HTML data-state (wavs as lists for JSON)
|
| 837 |
+
_serialised_meta = dict(updated_meta)
|
| 838 |
+
_serialised_meta["wavs"] = [w.tolist() for w in wavs]
|
| 839 |
+
state_json_new = json.dumps(_serialised_meta)
|
| 840 |
+
|
| 841 |
+
waveform_html = _build_waveform_html(audio_path, segments, slot_id, "",
|
| 842 |
+
state_json=state_json_new)
|
| 843 |
return video_path, audio_path, updated_meta, waveform_html
|
| 844 |
|
| 845 |
|
|
|
|
| 1159 |
|
| 1160 |
|
| 1161 |
def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
|
| 1162 |
+
hidden_input_id: str, state_json: str = "",
|
| 1163 |
+
fn_index: int = -1) -> str:
|
| 1164 |
"""Return a self-contained HTML block with a Canvas waveform (display only),
|
| 1165 |
segment boundary markers, and a download link.
|
| 1166 |
|
|
|
|
| 1376 |
import html as _html
|
| 1377 |
srcdoc = _html.escape(iframe_inner, quote=True)
|
| 1378 |
|
| 1379 |
+
import html as _html2
|
| 1380 |
+
state_escaped = _html2.escape(state_json or "", quote=True)
|
| 1381 |
+
|
| 1382 |
return f"""
|
| 1383 |
<div id="wf_container_{slot_id}"
|
| 1384 |
+
data-fn-index="{fn_index}"
|
| 1385 |
+
data-state="{state_escaped}"
|
| 1386 |
style="background:#1a1a1a;border-radius:8px;padding:10px;margin-top:6px;position:relative;">
|
| 1387 |
<div style="position:relative;width:100%;height:80px;">
|
| 1388 |
<iframe id="wf_iframe_{slot_id}"
|
|
|
|
| 1407 |
def _make_output_slots(tab_prefix: str) -> tuple:
|
| 1408 |
"""Build MAX_SLOTS output groups for one tab.
|
| 1409 |
|
| 1410 |
+
Each slot has: video and waveform HTML.
|
| 1411 |
+
Regen is triggered via direct Gradio queue API calls from JS (no hidden
|
| 1412 |
+
trigger textboxes needed — DOM event dispatch is unreliable in Gradio 5
|
| 1413 |
+
Svelte components). State JSON is embedded in the waveform HTML's
|
| 1414 |
+
data-state attribute and passed directly in the queue API payload.
|
| 1415 |
+
Returns (grps, vids, waveforms).
|
|
|
|
|
|
|
|
|
|
| 1416 |
"""
|
| 1417 |
+
grps, vids, waveforms = [], [], []
|
| 1418 |
for i in range(MAX_SLOTS):
|
| 1419 |
with gr.Group(visible=(i == 0)) as g:
|
|
|
|
| 1420 |
vids.append(gr.Video(label=f"Generation {i+1} — Video"))
|
| 1421 |
waveforms.append(gr.HTML(
|
| 1422 |
value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
|
| 1423 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1424 |
grps.append(g)
|
| 1425 |
+
return grps, vids, waveforms
|
| 1426 |
|
| 1427 |
|
| 1428 |
def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
|
| 1429 |
"""Turn a flat _pad_outputs list into Gradio update lists.
|
| 1430 |
|
| 1431 |
flat has MAX_SLOTS * 3 items: [vid0, aud0, meta0, vid1, aud1, meta1, ...]
|
| 1432 |
+
Returns updates for vids + waveforms only (NOT grps).
|
| 1433 |
Group visibility is handled separately via .then() to avoid Gradio 5 SSR
|
| 1434 |
'Too many arguments' caused by mixing gr.Group updates with other outputs.
|
| 1435 |
+
State JSON is embedded in the waveform HTML data-state attribute so JS
|
| 1436 |
+
can read it when calling the Gradio queue API for regen.
|
| 1437 |
"""
|
| 1438 |
n = int(n)
|
| 1439 |
vid_updates = []
|
| 1440 |
wave_updates = []
|
|
|
|
| 1441 |
for i in range(MAX_SLOTS):
|
| 1442 |
vid_path = flat[i * 3]
|
| 1443 |
aud_path = flat[i * 3 + 1]
|
| 1444 |
meta = flat[i * 3 + 2]
|
| 1445 |
vid_updates.append(gr.update(value=vid_path))
|
| 1446 |
if aud_path and meta:
|
| 1447 |
+
slot_id = f"{tab_prefix}_{i}"
|
| 1448 |
+
state_json = json.dumps(meta)
|
| 1449 |
+
html = _build_waveform_html(aud_path, meta["segments"], slot_id,
|
| 1450 |
+
"", state_json=state_json)
|
| 1451 |
wave_updates.append(gr.update(value=html))
|
|
|
|
|
|
|
| 1452 |
else:
|
| 1453 |
wave_updates.append(gr.update(
|
| 1454 |
value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>"
|
| 1455 |
))
|
| 1456 |
+
return vid_updates + wave_updates
|
|
|
|
| 1457 |
|
| 1458 |
|
| 1459 |
def _on_video_upload_taro(video_file, num_steps, crossfade_s):
|
|
|
|
| 1484 |
max-height: 60vh !important;
|
| 1485 |
object-fit: contain;
|
| 1486 |
}
|
| 1487 |
+
/* No hidden trigger inputs needed — regen uses direct Gradio queue API calls. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1488 |
"""
|
| 1489 |
|
| 1490 |
_GLOBAL_JS = """
|
| 1491 |
() => {
|
| 1492 |
// Global postMessage handler for waveform iframe events.
|
| 1493 |
// Runs once on page load (Gradio js= parameter).
|
| 1494 |
+
// Handles: popup open/close relay, regen trigger via Gradio queue API.
|
| 1495 |
if (window._wf_global_listener) return; // already registered
|
| 1496 |
window._wf_global_listener = true;
|
| 1497 |
|
| 1498 |
+
// Cache: api_name -> fn_index, built once from gradio_config.dependencies
|
| 1499 |
+
let _fnIndexCache = null;
|
| 1500 |
+
function getFnIndex(apiName) {
|
| 1501 |
+
if (!_fnIndexCache) {
|
| 1502 |
+
_fnIndexCache = {};
|
| 1503 |
+
const deps = window.gradio_config && window.gradio_config.dependencies;
|
| 1504 |
+
if (deps) deps.forEach(function(d, i) {
|
| 1505 |
+
if (d.api_name) _fnIndexCache[d.api_name] = i;
|
| 1506 |
+
});
|
| 1507 |
+
}
|
| 1508 |
+
return _fnIndexCache[apiName];
|
| 1509 |
+
}
|
| 1510 |
+
|
| 1511 |
+
// Read a component's current DOM value by elem_id.
|
| 1512 |
+
// For Number/Slider: reads the <input type="number"> or <input type="range">.
|
| 1513 |
+
// For Textbox/Radio: reads the <textarea> or checked <input type="radio">.
|
| 1514 |
+
// Returns null if not found.
|
| 1515 |
+
function readComponentValue(elemId) {
|
| 1516 |
+
const el = document.getElementById(elemId);
|
| 1517 |
+
if (!el) return null;
|
| 1518 |
+
const numInput = el.querySelector('input[type="number"]');
|
| 1519 |
+
if (numInput) return parseFloat(numInput.value);
|
| 1520 |
+
const rangeInput = el.querySelector('input[type="range"]');
|
| 1521 |
+
if (rangeInput) return parseFloat(rangeInput.value);
|
| 1522 |
+
const radio = el.querySelector('input[type="radio"]:checked');
|
| 1523 |
+
if (radio) return radio.value;
|
| 1524 |
+
const ta = el.querySelector('textarea');
|
| 1525 |
+
if (ta) return ta.value;
|
| 1526 |
+
const txt = el.querySelector('input[type="text"], input:not([type])');
|
| 1527 |
+
if (txt) return txt.value;
|
| 1528 |
+
return null;
|
| 1529 |
+
}
|
| 1530 |
+
|
| 1531 |
+
// Fire regen for a given slot and segment by posting directly to the
|
| 1532 |
+
// Gradio queue API — bypasses Svelte binding entirely.
|
| 1533 |
+
function fireRegen(slot_id, seg_idx) {
|
| 1534 |
+
// Determine tab prefix from slot_id (e.g. "taro_0" -> "taro")
|
| 1535 |
+
const prefix = slot_id.split('_')[0];
|
| 1536 |
+
const slotNum = parseInt(slot_id.split('_')[1], 10);
|
| 1537 |
+
|
| 1538 |
+
// Build api_name for this slot's regen handler
|
| 1539 |
+
const apiName = 'regen_' + prefix + '_' + slotNum;
|
| 1540 |
+
const fnIndex = getFnIndex(apiName);
|
| 1541 |
+
if (fnIndex === undefined) {
|
| 1542 |
+
console.warn('[fireRegen] fn_index not found for api_name:', apiName, 'cache:', _fnIndexCache);
|
| 1543 |
+
return;
|
| 1544 |
+
}
|
| 1545 |
+
|
| 1546 |
+
// Read state_json from the waveform container data-state attribute
|
| 1547 |
+
const container = document.getElementById('wf_container_' + slot_id);
|
| 1548 |
+
const stateJson = container ? (container.getAttribute('data-state') || '') : '';
|
| 1549 |
+
if (!stateJson) {
|
| 1550 |
+
console.warn('[fireRegen] no state_json for slot', slot_id);
|
| 1551 |
+
return;
|
| 1552 |
+
}
|
| 1553 |
+
|
| 1554 |
+
// Read current input values from DOM by elem_id
|
| 1555 |
+
let data;
|
| 1556 |
+
if (prefix === 'taro') {
|
| 1557 |
+
const video = null; // video is a file component — pass null, server uses its own state
|
| 1558 |
+
data = [
|
| 1559 |
+
seg_idx,
|
| 1560 |
+
stateJson,
|
| 1561 |
+
video,
|
| 1562 |
+
readComponentValue('taro_seed'),
|
| 1563 |
+
readComponentValue('taro_cfg'),
|
| 1564 |
+
readComponentValue('taro_steps'),
|
| 1565 |
+
readComponentValue('taro_mode'),
|
| 1566 |
+
readComponentValue('taro_cf_dur'),
|
| 1567 |
+
readComponentValue('taro_cf_db')
|
| 1568 |
+
];
|
| 1569 |
+
} else if (prefix === 'mma') {
|
| 1570 |
+
data = [
|
| 1571 |
+
seg_idx,
|
| 1572 |
+
stateJson,
|
| 1573 |
+
null, // video
|
| 1574 |
+
readComponentValue('mma_prompt'),
|
| 1575 |
+
readComponentValue('mma_neg'),
|
| 1576 |
+
readComponentValue('mma_seed'),
|
| 1577 |
+
readComponentValue('mma_cfg'),
|
| 1578 |
+
readComponentValue('mma_steps'),
|
| 1579 |
+
readComponentValue('mma_cf_dur'),
|
| 1580 |
+
readComponentValue('mma_cf_db')
|
| 1581 |
+
];
|
| 1582 |
+
} else {
|
| 1583 |
+
data = [
|
| 1584 |
+
seg_idx,
|
| 1585 |
+
stateJson,
|
| 1586 |
+
null, // video
|
| 1587 |
+
readComponentValue('hf_prompt'),
|
| 1588 |
+
readComponentValue('hf_neg'),
|
| 1589 |
+
readComponentValue('hf_seed'),
|
| 1590 |
+
readComponentValue('hf_guidance'),
|
| 1591 |
+
readComponentValue('hf_steps'),
|
| 1592 |
+
readComponentValue('hf_size'),
|
| 1593 |
+
readComponentValue('hf_cf_dur'),
|
| 1594 |
+
readComponentValue('hf_cf_db')
|
| 1595 |
+
];
|
| 1596 |
+
}
|
| 1597 |
+
|
| 1598 |
+
console.log('[fireRegen] calling api', apiName, 'fn_index', fnIndex, 'seg', seg_idx);
|
| 1599 |
+
|
| 1600 |
+
fetch('/gradio_api/queue/join', {
|
| 1601 |
+
method: 'POST',
|
| 1602 |
+
headers: {'Content-Type': 'application/json'},
|
| 1603 |
+
body: JSON.stringify({
|
| 1604 |
+
data: data,
|
| 1605 |
+
fn_index: fnIndex,
|
| 1606 |
+
session_hash: window.__gradio_session_hash__,
|
| 1607 |
+
event_data: null,
|
| 1608 |
+
trigger_id: null
|
| 1609 |
+
})
|
| 1610 |
+
}).then(function(r) { return r.json(); }).then(function(j) {
|
| 1611 |
+
console.log('[fireRegen] queued, event_id:', j.event_id);
|
| 1612 |
+
const lbl = document.getElementById('wf_seglabel_' + slot_id);
|
| 1613 |
+
if (lbl) lbl.textContent = 'Regenerating Seg ' + (seg_idx + 1) + '...';
|
| 1614 |
+
}).catch(function(e) {
|
| 1615 |
+
console.error('[fireRegen] fetch error:', e);
|
| 1616 |
+
});
|
| 1617 |
+
}
|
| 1618 |
+
|
| 1619 |
// Shared popup element created once and reused across all slots
|
| 1620 |
let _popup = null;
|
| 1621 |
let _pendingSlot = null, _pendingIdx = null;
|
|
|
|
| 1647 |
_pendingSlot = null; _pendingIdx = null;
|
| 1648 |
}
|
| 1649 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1650 |
window.addEventListener('message', function(e) {
|
| 1651 |
const d = e.data;
|
| 1652 |
if (!d || d.type !== 'wf_popup') return;
|
|
|
|
| 1657 |
_pendingIdx = d.seg_idx;
|
| 1658 |
const lbl = document.getElementById('_wf_popup_lbl');
|
| 1659 |
if (lbl) lbl.textContent = 'Seg ' + (d.seg_idx + 1) +
|
| 1660 |
+
' (' + d.t0.toFixed(2) + 's \u2013 ' + d.t1.toFixed(2) + 's)';
|
| 1661 |
p.style.display = 'block';
|
| 1662 |
p.style.left = (d.x + 10) + 'px';
|
| 1663 |
p.style.top = (d.y + 10) + 'px';
|
|
|
|
| 1690 |
with gr.Row():
|
| 1691 |
with gr.Column():
|
| 1692 |
taro_video = gr.Video(label="Input Video")
|
| 1693 |
+
taro_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="taro_seed")
|
| 1694 |
+
taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5, elem_id="taro_cfg")
|
| 1695 |
+
taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
|
| 1696 |
+
taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde", elem_id="taro_mode")
|
| 1697 |
+
taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="taro_cf_dur")
|
| 1698 |
+
taro_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="taro_cf_db")
|
| 1699 |
taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1700 |
taro_btn = gr.Button("Generate", variant="primary")
|
| 1701 |
|
| 1702 |
with gr.Column():
|
| 1703 |
(taro_slot_grps, taro_slot_vids,
|
| 1704 |
+
taro_slot_waves) = _make_output_slots("taro")
|
|
|
|
|
|
|
| 1705 |
|
| 1706 |
for trigger in [taro_video, taro_steps, taro_cf_dur]:
|
| 1707 |
trigger.change(
|
|
|
|
| 1731 |
fn=_run_taro,
|
| 1732 |
inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
|
| 1733 |
taro_cf_dur, taro_cf_db, taro_samples],
|
| 1734 |
+
outputs=taro_slot_vids + taro_slot_waves,
|
| 1735 |
).then(
|
| 1736 |
fn=_update_slot_visibility,
|
| 1737 |
inputs=[taro_samples],
|
| 1738 |
outputs=taro_slot_grps,
|
| 1739 |
))
|
| 1740 |
|
| 1741 |
+
# Per-slot regen handlers for TARO.
|
| 1742 |
+
# JS calls /gradio_api/queue/join directly with fn_index + data array:
|
| 1743 |
+
# data = [seg_idx, state_json, video_path_or_null, seed, cfg, steps, mode, cf_dur, cf_db]
|
| 1744 |
+
# fn_index is discovered at runtime from gradio_config.dependencies by api_name.
|
| 1745 |
+
# The handlers are registered via a dummy gr.Button click so Gradio assigns them
|
| 1746 |
+
# a stable fn_index and api_name.
|
| 1747 |
+
taro_regen_btns = []
|
| 1748 |
+
for _i in range(MAX_SLOTS):
|
| 1749 |
_slot_id = f"taro_{_i}"
|
| 1750 |
+
_btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
|
| 1751 |
+
taro_regen_btns.append(_btn)
|
| 1752 |
print(f"[startup] registering regen handler for slot {_slot_id}")
|
| 1753 |
def _make_taro_regen(_si, _sid):
|
| 1754 |
+
def _do(seg_idx, state_json, video, seed, cfg, steps, mode, cf_dur, cf_db):
|
| 1755 |
+
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
|
|
|
|
|
|
|
|
|
|
| 1756 |
if not state_json:
|
| 1757 |
print(f"[regen TARO] early-exit: state_json empty")
|
| 1758 |
+
yield gr.update(), gr.update(); return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1759 |
lock = _get_slot_lock(_sid)
|
| 1760 |
with lock:
|
| 1761 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
|
| 1762 |
+
state = json.loads(state_json)
|
| 1763 |
pending_html = _build_regen_pending_html(
|
| 1764 |
+
state["segments"], int(seg_idx), _sid, ""
|
|
|
|
| 1765 |
)
|
| 1766 |
+
yield gr.update(), gr.update(value=pending_html)
|
| 1767 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — calling regen_taro_segment")
|
| 1768 |
try:
|
| 1769 |
vid, aud, new_meta_json, html = regen_taro_segment(
|
| 1770 |
+
video, int(seg_idx), state_json,
|
| 1771 |
seed, cfg, steps, mode, cf_dur, cf_db, _sid,
|
| 1772 |
)
|
| 1773 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1774 |
except Exception as _e:
|
| 1775 |
print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1776 |
raise
|
| 1777 |
+
yield gr.update(value=vid), gr.update(value=html)
|
| 1778 |
return _do
|
| 1779 |
+
_btn.click(
|
| 1780 |
fn=_make_taro_regen(_i, _slot_id),
|
| 1781 |
+
inputs=[taro_seed, taro_seed, # seg_idx, state_json placeholders
|
| 1782 |
+
taro_video, taro_seed, taro_cfg, taro_steps,
|
| 1783 |
+
taro_mode, taro_cf_dur, taro_cf_db],
|
| 1784 |
+
outputs=[taro_slot_vids[_i], taro_slot_waves[_i]],
|
| 1785 |
+
api_name=f"regen_taro_{_i}",
|
| 1786 |
)
|
| 1787 |
|
| 1788 |
# ---------------------------------------------------------- #
|
|
|
|
| 1792 |
with gr.Row():
|
| 1793 |
with gr.Column():
|
| 1794 |
mma_video = gr.Video(label="Input Video")
|
| 1795 |
+
mma_prompt = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel", elem_id="mma_prompt")
|
| 1796 |
+
mma_neg = gr.Textbox(label="Negative Prompt", placeholder="music, speech", elem_id="mma_neg")
|
| 1797 |
+
mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="mma_seed")
|
| 1798 |
+
mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="mma_cfg")
|
| 1799 |
+
mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1, elem_id="mma_steps")
|
| 1800 |
+
mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="mma_cf_dur")
|
| 1801 |
+
mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="mma_cf_db")
|
| 1802 |
mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1803 |
mma_btn = gr.Button("Generate", variant="primary")
|
| 1804 |
|
| 1805 |
with gr.Column():
|
| 1806 |
(mma_slot_grps, mma_slot_vids,
|
| 1807 |
+
mma_slot_waves) = _make_output_slots("mma")
|
|
|
|
|
|
|
| 1808 |
|
| 1809 |
mma_samples.change(
|
| 1810 |
fn=_update_slot_visibility,
|
|
|
|
| 1825 |
fn=_run_mmaudio,
|
| 1826 |
inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
|
| 1827 |
mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
|
| 1828 |
+
outputs=mma_slot_vids + mma_slot_waves,
|
| 1829 |
).then(
|
| 1830 |
fn=_update_slot_visibility,
|
| 1831 |
inputs=[mma_samples],
|
| 1832 |
outputs=mma_slot_grps,
|
| 1833 |
))
|
| 1834 |
|
| 1835 |
+
mma_regen_btns = []
|
| 1836 |
+
for _i in range(MAX_SLOTS):
|
|
|
|
|
|
|
|
|
|
| 1837 |
_slot_id = f"mma_{_i}"
|
| 1838 |
+
_btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
|
| 1839 |
+
mma_regen_btns.append(_btn)
|
| 1840 |
def _make_mma_regen(_si, _sid):
|
| 1841 |
+
def _do(seg_idx, state_json, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db):
|
| 1842 |
+
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
|
|
|
|
|
|
|
|
|
|
| 1843 |
if not state_json:
|
| 1844 |
print(f"[regen MMA] early-exit: state_json empty")
|
| 1845 |
+
yield gr.update(), gr.update(); return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1846 |
lock = _get_slot_lock(_sid)
|
| 1847 |
with lock:
|
| 1848 |
+
state = json.loads(state_json)
|
|
|
|
| 1849 |
pending_html = _build_regen_pending_html(
|
| 1850 |
+
state["segments"], int(seg_idx), _sid, ""
|
|
|
|
| 1851 |
)
|
| 1852 |
+
yield gr.update(), gr.update(value=pending_html)
|
| 1853 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — calling regen_mmaudio_segment")
|
| 1854 |
try:
|
| 1855 |
vid, aud, new_meta_json, html = regen_mmaudio_segment(
|
| 1856 |
+
video, int(seg_idx), state_json,
|
| 1857 |
prompt, neg, seed, cfg, steps, cf_dur, cf_db, _sid,
|
| 1858 |
)
|
| 1859 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1860 |
except Exception as _e:
|
| 1861 |
print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1862 |
raise
|
| 1863 |
+
yield gr.update(value=vid), gr.update(value=html)
|
| 1864 |
return _do
|
| 1865 |
+
_btn.click(
|
| 1866 |
fn=_make_mma_regen(_i, _slot_id),
|
| 1867 |
+
inputs=[mma_seed, mma_seed, # seg_idx, state_json placeholders
|
| 1868 |
+
mma_video, mma_prompt, mma_neg, mma_seed,
|
| 1869 |
+
mma_cfg, mma_steps, mma_cf_dur, mma_cf_db],
|
| 1870 |
+
outputs=[mma_slot_vids[_i], mma_slot_waves[_i]],
|
| 1871 |
+
api_name=f"regen_mma_{_i}",
|
| 1872 |
)
|
| 1873 |
|
| 1874 |
# ---------------------------------------------------------- #
|
|
|
|
| 1878 |
with gr.Row():
|
| 1879 |
with gr.Column():
|
| 1880 |
hf_video = gr.Video(label="Input Video")
|
| 1881 |
+
hf_prompt = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof", elem_id="hf_prompt")
|
| 1882 |
+
hf_neg = gr.Textbox(label="Negative Prompt", value="noisy, harsh", elem_id="hf_neg")
|
| 1883 |
+
hf_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="hf_seed")
|
| 1884 |
+
hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="hf_guidance")
|
| 1885 |
+
hf_steps = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5, elem_id="hf_steps")
|
| 1886 |
+
hf_size = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl", elem_id="hf_size")
|
| 1887 |
+
hf_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="hf_cf_dur")
|
| 1888 |
+
hf_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="hf_cf_db")
|
| 1889 |
hf_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 1890 |
hf_btn = gr.Button("Generate", variant="primary")
|
| 1891 |
|
| 1892 |
with gr.Column():
|
| 1893 |
(hf_slot_grps, hf_slot_vids,
|
| 1894 |
+
hf_slot_waves) = _make_output_slots("hf")
|
|
|
|
|
|
|
| 1895 |
|
| 1896 |
hf_samples.change(
|
| 1897 |
fn=_update_slot_visibility,
|
|
|
|
| 1912 |
fn=_run_hunyuan,
|
| 1913 |
inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
|
| 1914 |
hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
|
| 1915 |
+
outputs=hf_slot_vids + hf_slot_waves,
|
| 1916 |
).then(
|
| 1917 |
fn=_update_slot_visibility,
|
| 1918 |
inputs=[hf_samples],
|
| 1919 |
outputs=hf_slot_grps,
|
| 1920 |
))
|
| 1921 |
|
| 1922 |
+
hf_regen_btns = []
|
| 1923 |
+
for _i in range(MAX_SLOTS):
|
|
|
|
|
|
|
|
|
|
| 1924 |
_slot_id = f"hf_{_i}"
|
| 1925 |
+
_btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
|
| 1926 |
+
hf_regen_btns.append(_btn)
|
| 1927 |
def _make_hf_regen(_si, _sid):
|
| 1928 |
+
def _do(seg_idx, state_json, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db):
|
| 1929 |
+
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
|
|
|
|
|
|
|
|
|
|
| 1930 |
if not state_json:
|
| 1931 |
print(f"[regen HF] early-exit: state_json empty")
|
| 1932 |
+
yield gr.update(), gr.update(); return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1933 |
lock = _get_slot_lock(_sid)
|
| 1934 |
with lock:
|
| 1935 |
+
state = json.loads(state_json)
|
|
|
|
| 1936 |
pending_html = _build_regen_pending_html(
|
| 1937 |
+
state["segments"], int(seg_idx), _sid, ""
|
|
|
|
| 1938 |
)
|
| 1939 |
+
yield gr.update(), gr.update(value=pending_html)
|
| 1940 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — calling regen_hunyuan_segment")
|
| 1941 |
try:
|
| 1942 |
vid, aud, new_meta_json, html = regen_hunyuan_segment(
|
| 1943 |
+
video, int(seg_idx), state_json,
|
| 1944 |
prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, _sid,
|
| 1945 |
)
|
| 1946 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
|
| 1947 |
except Exception as _e:
|
| 1948 |
print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
|
| 1949 |
raise
|
| 1950 |
+
yield gr.update(value=vid), gr.update(value=html)
|
| 1951 |
return _do
|
| 1952 |
+
_btn.click(
|
| 1953 |
fn=_make_hf_regen(_i, _slot_id),
|
| 1954 |
+
inputs=[hf_seed, hf_seed, # seg_idx, state_json placeholders
|
| 1955 |
+
hf_video, hf_prompt, hf_neg, hf_seed,
|
| 1956 |
+
hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db],
|
| 1957 |
+
outputs=[hf_slot_vids[_i], hf_slot_waves[_i]],
|
| 1958 |
+
api_name=f"regen_hf_{_i}",
|
| 1959 |
)
|
| 1960 |
|
| 1961 |
# ---- Cross-tab video sync ----
|