Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

BoxOfColors commited on 5 days ago

Commit

bb533eb

1 Parent(s): c45a944

Fix regen: bypass Svelte binding — call Gradio queue API directly from JS

- Replace DOM event dispatch (which Svelte ignores) with direct
POST to /gradio_api/queue/join using the handler's api_name
- Remove hidden trigger textboxes and relay pattern (no longer needed)
- Embed state_json in waveform HTML data-state attribute so JS reads it
- Add elem_id to all input components so JS can read their DOM values
- Register per-slot regen handlers via gr.Button.click() with api_name
so they get stable fn_index entries in gradio_config.dependencies
- Simplify _make_output_slots to just (grps, vids, waveforms)
- Update _splice_and_save to embed updated state in returned waveform HTML

Files changed (1) hide show

app.py +241 -213

app.py CHANGED Viewed

@@ -833,8 +833,13 @@ def _splice_and_save(new_wav, seg_idx, meta, slot_id):
     updated_meta["audio_path"] = audio_path
     updated_meta["video_path"] = video_path
-    hidden_el_id  = f"regen_trigger_{slot_id}"
-    waveform_html = _build_waveform_html(audio_path, segments, slot_id, hidden_el_id)
     return video_path, audio_path, updated_meta, waveform_html
@@ -1154,7 +1159,8 @@ def _build_regen_pending_html(segments: list, regen_seg_idx: int, slot_id: str,
 def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
-                         hidden_input_id: str) -> str:
     """Return a self-contained HTML block with a Canvas waveform (display only),
     segment boundary markers, and a download link.
@@ -1370,8 +1376,13 @@ def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
     import html as _html
     srcdoc = _html.escape(iframe_inner, quote=True)
     return f"""
 <div id="wf_container_{slot_id}"
      style="background:#1a1a1a;border-radius:8px;padding:10px;margin-top:6px;position:relative;">
   <div style="position:relative;width:100%;height:80px;">
     <iframe id="wf_iframe_{slot_id}"
@@ -1396,81 +1407,53 @@ def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
 def _make_output_slots(tab_prefix: str) -> tuple:
     """Build MAX_SLOTS output groups for one tab.
-    Each slot has: video, waveform HTML, hidden regen trigger textbox,
-    and TWO state textboxes:
-      - seg_states      (write): written by main gen + regen; also an output
-      - seg_state_reads (read):  mirrors seg_states via .change() relay;
-                                 used as input-only for regen handlers so that
-                                 no component ever appears in BOTH inputs AND
-                                 outputs of the same event (which causes Gradio
-                                 5 "Too many arguments" even with SSR disabled).
-    Returns (grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads).
     """
-    grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads = [], [], [], [], [], []
     for i in range(MAX_SLOTS):
         with gr.Group(visible=(i == 0)) as g:
-            slot_id = f"{tab_prefix}_{i}"
             vids.append(gr.Video(label=f"Generation {i+1} — Video"))
             waveforms.append(gr.HTML(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
             ))
-            # Regen trigger: CSS-hidden so JS can find and write to it.
-            regen_triggers.append(gr.Textbox(
-                value="",
-                elem_id=f"regen_trigger_{slot_id}",
-                elem_classes=["wf-hidden-input"],
-                label="",
-                show_label=False,
-            ))
-            # Write-only state: updated by main gen and regen outputs.
-            seg_states.append(gr.Textbox(
-                value="",
-                elem_classes=["wf-hidden-input"],
-                label="",
-                show_label=False,
-            ))
-            # Read-only mirror: fed into regen handler inputs only.
-            # Stays in sync via a .change() relay wired after slot creation.
-            seg_state_reads.append(gr.Textbox(
-                value="",
-                elem_classes=["wf-hidden-input"],
-                label="",
-                show_label=False,
-            ))
         grps.append(g)
-    return grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads
 def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
     """Turn a flat _pad_outputs list into Gradio update lists.
     flat has MAX_SLOTS * 3 items: [vid0, aud0, meta0, vid1, aud1, meta1, ...]
-    Returns updates for vids + waveforms + seg_states only (NOT grps).
     Group visibility is handled separately via .then() to avoid Gradio 5 SSR
     'Too many arguments' caused by mixing gr.Group updates with other outputs.
     """
     n = int(n)
     vid_updates  = []
     wave_updates = []
-    state_updates= []
     for i in range(MAX_SLOTS):
         vid_path  = flat[i * 3]
         aud_path  = flat[i * 3 + 1]
         meta      = flat[i * 3 + 2]
         vid_updates.append(gr.update(value=vid_path))
         if aud_path and meta:
-            slot_id       = f"{tab_prefix}_{i}"
-            hidden_el_id  = f"regen_trigger_{slot_id}"
-            html = _build_waveform_html(aud_path, meta["segments"], slot_id, hidden_el_id)
             wave_updates.append(gr.update(value=html))
-            # Serialize meta to JSON string (seg_states are now gr.Textbox)
-            state_updates.append(gr.update(value=json.dumps(meta)))
         else:
             wave_updates.append(gr.update(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>"
             ))
-            state_updates.append(gr.update(value=""))
-    return vid_updates + wave_updates + state_updates
 def _on_video_upload_taro(video_file, num_steps, crossfade_s):
@@ -1501,28 +1484,138 @@ _SLOT_CSS = """
     max-height: 60vh !important;
     object-fit: contain;
 }
-/* Regen trigger and state textboxes are CSS-hidden, NOT visible=False.
-   Gradio 5 SSR omits visible=False components from the DOM entirely,
-   so JS can never find them. CSS-hidden components are always in the DOM. */
-.wf-hidden-input {
-    position: absolute !important;
-    left: -9999px !important;
-    width: 1px !important;
-    height: 1px !important;
-    overflow: hidden !important;
-    pointer-events: none !important;
-    opacity: 0 !important;
-}
 """
 _GLOBAL_JS = """
 () => {
   // Global postMessage handler for waveform iframe events.
   // Runs once on page load (Gradio js= parameter).
-  // Handles: popup open/close relay, regen trigger.
   if (window._wf_global_listener) return;  // already registered
   window._wf_global_listener = true;
   // Shared popup element created once and reused across all slots
   let _popup = null;
   let _pendingSlot = null, _pendingIdx = null;
@@ -1554,40 +1647,6 @@ _GLOBAL_JS = """
     _pendingSlot = null; _pendingIdx = null;
   }
-  function fireRegen(slot_id, idx) {
-    const el = document.getElementById('regen_trigger_' + slot_id);
-    if (!el) { console.warn('[fireRegen] regen_trigger element not found:', slot_id); return; }
-    const input = el.querySelector('input, textarea');
-    if (!input) { console.warn('[fireRegen] no input inside regen_trigger:', slot_id); return; }
-    // Use native setter to bypass Svelte's controlled-input tracking.
-    // Timestamp suffix ensures repeat clicks on the same segment always
-    // produce a new value so Svelte's change detection always fires.
-    // State JSON is passed via a separate Gradio input (seg_state_read),
-    // not embedded in the trigger string — Gradio's own state is reliable,
-    // whereas reading the DOM input.value returns '' for Svelte-controlled inputs.
-    // IMPORTANT: Gradio 5 renders Textbox as <textarea>, NOT <input>.
-    // Must use HTMLTextAreaElement.prototype setter — using HTMLInputElement.prototype
-    // on a textarea causes "TypeError: Illegal invocation" and silently aborts.
-    function setNative(val) {
-      const proto = input.tagName === 'TEXTAREA'
-        ? HTMLTextAreaElement.prototype
-        : HTMLInputElement.prototype;
-      const desc = Object.getOwnPropertyDescriptor(proto, 'value');
-      if (desc && desc.set) desc.set.call(input, val);
-      else input.value = val;
-      input.dispatchEvent(new Event('input',  {bubbles: true}));
-      input.dispatchEvent(new Event('change', {bubbles: true}));
-    }
-    // Encode: "slot_id|seg_idx|timestamp"
-    const triggerVal = slot_id + '|' + idx + '|' + Date.now();
-    setNative(triggerVal);
-    console.log('[fireRegen] fired trigger for', slot_id, 'seg', idx);
-    const lbl = document.getElementById('wf_seglabel_' + slot_id);
-    if (lbl) lbl.textContent = 'Regenerating Seg ' + (idx + 1) + '...';
-  }
   window.addEventListener('message', function(e) {
     const d = e.data;
     if (!d || d.type !== 'wf_popup') return;
@@ -1598,7 +1657,7 @@ _GLOBAL_JS = """
     _pendingIdx  = d.seg_idx;
     const lbl = document.getElementById('_wf_popup_lbl');
     if (lbl) lbl.textContent = 'Seg ' + (d.seg_idx + 1) +
-      '  (' + d.t0.toFixed(2) + 's – ' + d.t1.toFixed(2) + 's)';
     p.style.display = 'block';
     p.style.left = (d.x + 10) + 'px';
     p.style.top  = (d.y + 10) + 'px';
@@ -1631,20 +1690,18 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
             with gr.Row():
                 with gr.Column():
                     taro_video   = gr.Video(label="Input Video")
-                    taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
-                    taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
-                    taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
-                    taro_mode    = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde")
-                    taro_cf_dur  = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
-                    taro_cf_db   = gr.Textbox(label="Crossfade Boost (dB)", value="3")
                     taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     taro_btn     = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (taro_slot_grps, taro_slot_vids,
-                     taro_slot_waves, taro_slot_rtrigs,
-                     taro_slot_states,
-                     taro_slot_state_reads) = _make_output_slots("taro")
             for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
@@ -1674,63 +1731,58 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 fn=_run_taro,
                 inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
-                outputs=taro_slot_vids + taro_slot_waves + taro_slot_states,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[taro_samples],
                 outputs=taro_slot_grps,
             ))
-            # Relay: keep seg_state_reads in sync with seg_states (write→read mirror)
-            for _st, _str in zip(taro_slot_states, taro_slot_state_reads):
-                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
-            # Per-slot regen trigger wiring for TARO
-            for _i, _rtrig in enumerate(taro_slot_rtrigs):
                 _slot_id = f"taro_{_i}"
                 print(f"[startup] registering regen handler for slot {_slot_id}")
                 def _make_taro_regen(_si, _sid):
-                    def _do(trigger_val, video, seed, cfg, steps, mode, cf_dur, cf_db, state_json):
-                        print(f"[regen TARO] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
-                        if not trigger_val:
-                            print(f"[regen TARO] early-exit: trigger_val empty")
-                            yield gr.update(), gr.update(), gr.update(); return
                         if not state_json:
                             print(f"[regen TARO] early-exit: state_json empty")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        # Trigger format: "slot_id|seg_idx|timestamp"
-                        parts = trigger_val.split("|", 2)
-                        if len(parts) < 2 or parts[0] != _sid:
-                            print(f"[regen TARO] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx = int(parts[1])
-                        print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
-                            state   = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
-                                state["segments"], seg_idx, _sid,
-                                f"regen_trigger_{_sid}"
                             )
-                            yield gr.update(), gr.update(value=pending_html), gr.update(value=state_json)
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — calling regen_taro_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_taro_segment(
-                                    video, seg_idx, state_json,
                                     seed, cfg, steps, mode, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
-                            yield gr.update(value=vid), gr.update(value=html), gr.update(value=new_meta_json)
                     return _do
-                _rtrig.change(
                     fn=_make_taro_regen(_i, _slot_id),
-                    inputs=[_rtrig, taro_video, taro_seed, taro_cfg, taro_steps,
-                            taro_mode, taro_cf_dur, taro_cf_db, taro_slot_state_reads[_i]],
-                    outputs=[taro_slot_vids[_i], taro_slot_waves[_i], taro_slot_states[_i]],
                 )
         # ---------------------------------------------------------- #
@@ -1740,21 +1792,19 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
             with gr.Row():
                 with gr.Column():
                     mma_video    = gr.Video(label="Input Video")
-                    mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
-                    mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
-                    mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
-                    mma_cfg      = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
-                    mma_steps    = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
-                    mma_cf_dur   = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
-                    mma_cf_db    = gr.Textbox(label="Crossfade Boost (dB)", value="3")
                     mma_samples  = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     mma_btn      = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (mma_slot_grps, mma_slot_vids,
-                     mma_slot_waves, mma_slot_rtrigs,
-                     mma_slot_states,
-                     mma_slot_state_reads) = _make_output_slots("mma")
             mma_samples.change(
                 fn=_update_slot_visibility,
@@ -1775,60 +1825,50 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 fn=_run_mmaudio,
                 inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
-                outputs=mma_slot_vids + mma_slot_waves + mma_slot_states,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[mma_samples],
                 outputs=mma_slot_grps,
             ))
-            # Relay: keep mma_slot_state_reads in sync with mma_slot_states
-            for _st, _str in zip(mma_slot_states, mma_slot_state_reads):
-                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
-            for _i, _rtrig in enumerate(mma_slot_rtrigs):
                 _slot_id = f"mma_{_i}"
                 def _make_mma_regen(_si, _sid):
-                    def _do(trigger_val, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db, state_json):
-                        print(f"[regen MMA] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
-                        if not trigger_val:
-                            print(f"[regen MMA] early-exit: trigger_val empty")
-                            yield gr.update(), gr.update(), gr.update(); return
                         if not state_json:
                             print(f"[regen MMA] early-exit: state_json empty")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        parts = trigger_val.split("|", 2)
-                        if len(parts) < 2 or parts[0] != _sid:
-                            print(f"[regen MMA] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx = int(parts[1])
-                        print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
-                            print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
-                            state   = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
-                                state["segments"], seg_idx, _sid,
-                                f"regen_trigger_{_sid}"
                             )
-                            yield gr.update(), gr.update(value=pending_html), gr.update(value=state_json)
                             print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — calling regen_mmaudio_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_mmaudio_segment(
-                                    video, seg_idx, state_json,
                                     prompt, neg, seed, cfg, steps, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
-                            yield gr.update(value=vid), gr.update(value=html), gr.update(value=new_meta_json)
                     return _do
-                _rtrig.change(
                     fn=_make_mma_regen(_i, _slot_id),
-                    inputs=[_rtrig, mma_video, mma_prompt, mma_neg, mma_seed,
-                            mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_slot_state_reads[_i]],
-                    outputs=[mma_slot_vids[_i], mma_slot_waves[_i], mma_slot_states[_i]],
                 )
         # ---------------------------------------------------------- #
@@ -1838,22 +1878,20 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
             with gr.Row():
                 with gr.Column():
                     hf_video    = gr.Video(label="Input Video")
-                    hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
-                    hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
-                    hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
-                    hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5)
-                    hf_steps    = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5)
-                    hf_size     = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl")
-                    hf_cf_dur   = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
-                    hf_cf_db    = gr.Textbox(label="Crossfade Boost (dB)", value="3")
                     hf_samples  = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     hf_btn      = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (hf_slot_grps, hf_slot_vids,
-                     hf_slot_waves, hf_slot_rtrigs,
-                     hf_slot_states,
-                     hf_slot_state_reads) = _make_output_slots("hf")
             hf_samples.change(
                 fn=_update_slot_visibility,
@@ -1874,60 +1912,50 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 fn=_run_hunyuan,
                 inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
-                outputs=hf_slot_vids + hf_slot_waves + hf_slot_states,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[hf_samples],
                 outputs=hf_slot_grps,
             ))
-            # Relay: keep hf_slot_state_reads in sync with hf_slot_states
-            for _st, _str in zip(hf_slot_states, hf_slot_state_reads):
-                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
-            for _i, _rtrig in enumerate(hf_slot_rtrigs):
                 _slot_id = f"hf_{_i}"
                 def _make_hf_regen(_si, _sid):
-                    def _do(trigger_val, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, state_json):
-                        print(f"[regen HF] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
-                        if not trigger_val:
-                            print(f"[regen HF] early-exit: trigger_val empty")
-                            yield gr.update(), gr.update(), gr.update(); return
                         if not state_json:
                             print(f"[regen HF] early-exit: state_json empty")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        parts = trigger_val.split("|", 2)
-                        if len(parts) < 2 or parts[0] != _sid:
-                            print(f"[regen HF] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
-                            yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx = int(parts[1])
-                        print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
-                            print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
-                            state   = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
-                                state["segments"], seg_idx, _sid,
-                                f"regen_trigger_{_sid}"
                             )
-                            yield gr.update(), gr.update(value=pending_html), gr.update(value=state_json)
                             print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — calling regen_hunyuan_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_hunyuan_segment(
-                                    video, seg_idx, state_json,
                                     prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
-                            yield gr.update(value=vid), gr.update(value=html), gr.update(value=new_meta_json)
                     return _do
-                _rtrig.change(
                     fn=_make_hf_regen(_i, _slot_id),
-                    inputs=[_rtrig, hf_video, hf_prompt, hf_neg, hf_seed,
-                            hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_slot_state_reads[_i]],
-                    outputs=[hf_slot_vids[_i], hf_slot_waves[_i], hf_slot_states[_i]],
                 )
     # ---- Cross-tab video sync ----

     updated_meta["audio_path"] = audio_path
     updated_meta["video_path"] = video_path
+    # Serialise for embedding in waveform HTML data-state (wavs as lists for JSON)
+    _serialised_meta            = dict(updated_meta)
+    _serialised_meta["wavs"]    = [w.tolist() for w in wavs]
+    state_json_new = json.dumps(_serialised_meta)
+    waveform_html = _build_waveform_html(audio_path, segments, slot_id, "",
+                                         state_json=state_json_new)
     return video_path, audio_path, updated_meta, waveform_html
 def _build_waveform_html(audio_path: str, segments: list, slot_id: str,
+                         hidden_input_id: str, state_json: str = "",
+                         fn_index: int = -1) -> str:
     """Return a self-contained HTML block with a Canvas waveform (display only),
     segment boundary markers, and a download link.
     import html as _html
     srcdoc = _html.escape(iframe_inner, quote=True)
+    import html as _html2
+    state_escaped  = _html2.escape(state_json or "", quote=True)
     return f"""
 <div id="wf_container_{slot_id}"
+     data-fn-index="{fn_index}"
+     data-state="{state_escaped}"
      style="background:#1a1a1a;border-radius:8px;padding:10px;margin-top:6px;position:relative;">
   <div style="position:relative;width:100%;height:80px;">
     <iframe id="wf_iframe_{slot_id}"
 def _make_output_slots(tab_prefix: str) -> tuple:
     """Build MAX_SLOTS output groups for one tab.
+    Each slot has: video and waveform HTML.
+    Regen is triggered via direct Gradio queue API calls from JS (no hidden
+    trigger textboxes needed — DOM event dispatch is unreliable in Gradio 5
+    Svelte components).  State JSON is embedded in the waveform HTML's
+    data-state attribute and passed directly in the queue API payload.
+    Returns (grps, vids, waveforms).
     """
+    grps, vids, waveforms = [], [], []
     for i in range(MAX_SLOTS):
         with gr.Group(visible=(i == 0)) as g:
             vids.append(gr.Video(label=f"Generation {i+1} — Video"))
             waveforms.append(gr.HTML(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
             ))
         grps.append(g)
+    return grps, vids, waveforms
 def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
     """Turn a flat _pad_outputs list into Gradio update lists.
     flat has MAX_SLOTS * 3 items: [vid0, aud0, meta0, vid1, aud1, meta1, ...]
+    Returns updates for vids + waveforms only (NOT grps).
     Group visibility is handled separately via .then() to avoid Gradio 5 SSR
     'Too many arguments' caused by mixing gr.Group updates with other outputs.
+    State JSON is embedded in the waveform HTML data-state attribute so JS
+    can read it when calling the Gradio queue API for regen.
     """
     n = int(n)
     vid_updates  = []
     wave_updates = []
     for i in range(MAX_SLOTS):
         vid_path  = flat[i * 3]
         aud_path  = flat[i * 3 + 1]
         meta      = flat[i * 3 + 2]
         vid_updates.append(gr.update(value=vid_path))
         if aud_path and meta:
+            slot_id      = f"{tab_prefix}_{i}"
+            state_json   = json.dumps(meta)
+            html = _build_waveform_html(aud_path, meta["segments"], slot_id,
+                                        "", state_json=state_json)
             wave_updates.append(gr.update(value=html))
         else:
             wave_updates.append(gr.update(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>"
             ))
+    return vid_updates + wave_updates
 def _on_video_upload_taro(video_file, num_steps, crossfade_s):
     max-height: 60vh !important;
     object-fit: contain;
 }
+/* No hidden trigger inputs needed — regen uses direct Gradio queue API calls. */
 """
 _GLOBAL_JS = """
 () => {
   // Global postMessage handler for waveform iframe events.
   // Runs once on page load (Gradio js= parameter).
+  // Handles: popup open/close relay, regen trigger via Gradio queue API.
   if (window._wf_global_listener) return;  // already registered
   window._wf_global_listener = true;
+  // Cache: api_name -> fn_index, built once from gradio_config.dependencies
+  let _fnIndexCache = null;
+  function getFnIndex(apiName) {
+    if (!_fnIndexCache) {
+      _fnIndexCache = {};
+      const deps = window.gradio_config && window.gradio_config.dependencies;
+      if (deps) deps.forEach(function(d, i) {
+        if (d.api_name) _fnIndexCache[d.api_name] = i;
+      });
+    }
+    return _fnIndexCache[apiName];
+  }
+  // Read a component's current DOM value by elem_id.
+  // For Number/Slider: reads the <input type="number"> or <input type="range">.
+  // For Textbox/Radio: reads the <textarea> or checked <input type="radio">.
+  // Returns null if not found.
+  function readComponentValue(elemId) {
+    const el = document.getElementById(elemId);
+    if (!el) return null;
+    const numInput = el.querySelector('input[type="number"]');
+    if (numInput) return parseFloat(numInput.value);
+    const rangeInput = el.querySelector('input[type="range"]');
+    if (rangeInput) return parseFloat(rangeInput.value);
+    const radio = el.querySelector('input[type="radio"]:checked');
+    if (radio) return radio.value;
+    const ta = el.querySelector('textarea');
+    if (ta) return ta.value;
+    const txt = el.querySelector('input[type="text"], input:not([type])');
+    if (txt) return txt.value;
+    return null;
+  }
+  // Fire regen for a given slot and segment by posting directly to the
+  // Gradio queue API — bypasses Svelte binding entirely.
+  function fireRegen(slot_id, seg_idx) {
+    // Determine tab prefix from slot_id (e.g. "taro_0" -> "taro")
+    const prefix = slot_id.split('_')[0];
+    const slotNum = parseInt(slot_id.split('_')[1], 10);
+    // Build api_name for this slot's regen handler
+    const apiName = 'regen_' + prefix + '_' + slotNum;
+    const fnIndex = getFnIndex(apiName);
+    if (fnIndex === undefined) {
+      console.warn('[fireRegen] fn_index not found for api_name:', apiName, 'cache:', _fnIndexCache);
+      return;
+    }
+    // Read state_json from the waveform container data-state attribute
+    const container = document.getElementById('wf_container_' + slot_id);
+    const stateJson  = container ? (container.getAttribute('data-state') || '') : '';
+    if (!stateJson) {
+      console.warn('[fireRegen] no state_json for slot', slot_id);
+      return;
+    }
+    // Read current input values from DOM by elem_id
+    let data;
+    if (prefix === 'taro') {
+      const video = null;  // video is a file component — pass null, server uses its own state
+      data = [
+        seg_idx,
+        stateJson,
+        video,
+        readComponentValue('taro_seed'),
+        readComponentValue('taro_cfg'),
+        readComponentValue('taro_steps'),
+        readComponentValue('taro_mode'),
+        readComponentValue('taro_cf_dur'),
+        readComponentValue('taro_cf_db')
+      ];
+    } else if (prefix === 'mma') {
+      data = [
+        seg_idx,
+        stateJson,
+        null,  // video
+        readComponentValue('mma_prompt'),
+        readComponentValue('mma_neg'),
+        readComponentValue('mma_seed'),
+        readComponentValue('mma_cfg'),
+        readComponentValue('mma_steps'),
+        readComponentValue('mma_cf_dur'),
+        readComponentValue('mma_cf_db')
+      ];
+    } else {
+      data = [
+        seg_idx,
+        stateJson,
+        null,  // video
+        readComponentValue('hf_prompt'),
+        readComponentValue('hf_neg'),
+        readComponentValue('hf_seed'),
+        readComponentValue('hf_guidance'),
+        readComponentValue('hf_steps'),
+        readComponentValue('hf_size'),
+        readComponentValue('hf_cf_dur'),
+        readComponentValue('hf_cf_db')
+      ];
+    }
+    console.log('[fireRegen] calling api', apiName, 'fn_index', fnIndex, 'seg', seg_idx);
+    fetch('/gradio_api/queue/join', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({
+        data: data,
+        fn_index: fnIndex,
+        session_hash: window.__gradio_session_hash__,
+        event_data: null,
+        trigger_id: null
+      })
+    }).then(function(r) { return r.json(); }).then(function(j) {
+      console.log('[fireRegen] queued, event_id:', j.event_id);
+      const lbl = document.getElementById('wf_seglabel_' + slot_id);
+      if (lbl) lbl.textContent = 'Regenerating Seg ' + (seg_idx + 1) + '...';
+    }).catch(function(e) {
+      console.error('[fireRegen] fetch error:', e);
+    });
+  }
   // Shared popup element created once and reused across all slots
   let _popup = null;
   let _pendingSlot = null, _pendingIdx = null;
     _pendingSlot = null; _pendingIdx = null;
   }
   window.addEventListener('message', function(e) {
     const d = e.data;
     if (!d || d.type !== 'wf_popup') return;
     _pendingIdx  = d.seg_idx;
     const lbl = document.getElementById('_wf_popup_lbl');
     if (lbl) lbl.textContent = 'Seg ' + (d.seg_idx + 1) +
+      '  (' + d.t0.toFixed(2) + 's \u2013 ' + d.t1.toFixed(2) + 's)';
     p.style.display = 'block';
     p.style.left = (d.x + 10) + 'px';
     p.style.top  = (d.y + 10) + 'px';
             with gr.Row():
                 with gr.Column():
                     taro_video   = gr.Video(label="Input Video")
+                    taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="taro_seed")
+                    taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5, elem_id="taro_cfg")
+                    taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
+                    taro_mode    = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde", elem_id="taro_mode")
+                    taro_cf_dur  = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="taro_cf_dur")
+                    taro_cf_db   = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="taro_cf_db")
                     taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     taro_btn     = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (taro_slot_grps, taro_slot_vids,
+                     taro_slot_waves) = _make_output_slots("taro")
             for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                 fn=_run_taro,
                 inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
+                outputs=taro_slot_vids + taro_slot_waves,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[taro_samples],
                 outputs=taro_slot_grps,
             ))
+            # Per-slot regen handlers for TARO.
+            # JS calls /gradio_api/queue/join directly with fn_index + data array:
+            #   data = [seg_idx, state_json, video_path_or_null, seed, cfg, steps, mode, cf_dur, cf_db]
+            # fn_index is discovered at runtime from gradio_config.dependencies by api_name.
+            # The handlers are registered via a dummy gr.Button click so Gradio assigns them
+            # a stable fn_index and api_name.
+            taro_regen_btns = []
+            for _i in range(MAX_SLOTS):
                 _slot_id = f"taro_{_i}"
+                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
+                taro_regen_btns.append(_btn)
                 print(f"[startup] registering regen handler for slot {_slot_id}")
                 def _make_taro_regen(_si, _sid):
+                    def _do(seg_idx, state_json, video, seed, cfg, steps, mode, cf_dur, cf_db):
+                        print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
                         if not state_json:
                             print(f"[regen TARO] early-exit: state_json empty")
+                            yield gr.update(), gr.update(); return
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
+                            state        = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
+                                state["segments"], int(seg_idx), _sid, ""
                             )
+                            yield gr.update(), gr.update(value=pending_html)
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — calling regen_taro_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_taro_segment(
+                                    video, int(seg_idx), state_json,
                                     seed, cfg, steps, mode, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
+                            yield gr.update(value=vid), gr.update(value=html)
                     return _do
+                _btn.click(
                     fn=_make_taro_regen(_i, _slot_id),
+                    inputs=[taro_seed, taro_seed,   # seg_idx, state_json placeholders
+                            taro_video, taro_seed, taro_cfg, taro_steps,
+                            taro_mode, taro_cf_dur, taro_cf_db],
+                    outputs=[taro_slot_vids[_i], taro_slot_waves[_i]],
+                    api_name=f"regen_taro_{_i}",
                 )
         # ---------------------------------------------------------- #
             with gr.Row():
                 with gr.Column():
                     mma_video    = gr.Video(label="Input Video")
+                    mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel", elem_id="mma_prompt")
+                    mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech", elem_id="mma_neg")
+                    mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="mma_seed")
+                    mma_cfg      = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="mma_cfg")
+                    mma_steps    = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1, elem_id="mma_steps")
+                    mma_cf_dur   = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="mma_cf_dur")
+                    mma_cf_db    = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="mma_cf_db")
                     mma_samples  = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     mma_btn      = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (mma_slot_grps, mma_slot_vids,
+                     mma_slot_waves) = _make_output_slots("mma")
             mma_samples.change(
                 fn=_update_slot_visibility,
                 fn=_run_mmaudio,
                 inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
+                outputs=mma_slot_vids + mma_slot_waves,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[mma_samples],
                 outputs=mma_slot_grps,
             ))
+            mma_regen_btns = []
+            for _i in range(MAX_SLOTS):
                 _slot_id = f"mma_{_i}"
+                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
+                mma_regen_btns.append(_btn)
                 def _make_mma_regen(_si, _sid):
+                    def _do(seg_idx, state_json, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db):
+                        print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
                         if not state_json:
                             print(f"[regen MMA] early-exit: state_json empty")
+                            yield gr.update(), gr.update(); return
                         lock = _get_slot_lock(_sid)
                         with lock:
+                            state        = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
+                                state["segments"], int(seg_idx), _sid, ""
                             )
+                            yield gr.update(), gr.update(value=pending_html)
                             print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — calling regen_mmaudio_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_mmaudio_segment(
+                                    video, int(seg_idx), state_json,
                                     prompt, neg, seed, cfg, steps, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
+                            yield gr.update(value=vid), gr.update(value=html)
                     return _do
+                _btn.click(
                     fn=_make_mma_regen(_i, _slot_id),
+                    inputs=[mma_seed, mma_seed,   # seg_idx, state_json placeholders
+                            mma_video, mma_prompt, mma_neg, mma_seed,
+                            mma_cfg, mma_steps, mma_cf_dur, mma_cf_db],
+                    outputs=[mma_slot_vids[_i], mma_slot_waves[_i]],
+                    api_name=f"regen_mma_{_i}",
                 )
         # ---------------------------------------------------------- #
             with gr.Row():
                 with gr.Column():
                     hf_video    = gr.Video(label="Input Video")
+                    hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof", elem_id="hf_prompt")
+                    hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh", elem_id="hf_neg")
+                    hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0, elem_id="hf_seed")
+                    hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="hf_guidance")
+                    hf_steps    = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5, elem_id="hf_steps")
+                    hf_size     = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl", elem_id="hf_size")
+                    hf_cf_dur   = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="hf_cf_dur")
+                    hf_cf_db    = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="hf_cf_db")
                     hf_samples  = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
                     hf_btn      = gr.Button("Generate", variant="primary")
                 with gr.Column():
                     (hf_slot_grps, hf_slot_vids,
+                     hf_slot_waves) = _make_output_slots("hf")
             hf_samples.change(
                 fn=_update_slot_visibility,
                 fn=_run_hunyuan,
                 inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
+                outputs=hf_slot_vids + hf_slot_waves,
             ).then(
                 fn=_update_slot_visibility,
                 inputs=[hf_samples],
                 outputs=hf_slot_grps,
             ))
+            hf_regen_btns = []
+            for _i in range(MAX_SLOTS):
                 _slot_id = f"hf_{_i}"
+                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
+                hf_regen_btns.append(_btn)
                 def _make_hf_regen(_si, _sid):
+                    def _do(seg_idx, state_json, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db):
+                        print(f"[regen HF] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
                         if not state_json:
                             print(f"[regen HF] early-exit: state_json empty")
+                            yield gr.update(), gr.update(); return
                         lock = _get_slot_lock(_sid)
                         with lock:
+                            state        = json.loads(state_json)
                             pending_html = _build_regen_pending_html(
+                                state["segments"], int(seg_idx), _sid, ""
                             )
+                            yield gr.update(), gr.update(value=pending_html)
                             print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — calling regen_hunyuan_segment")
                             try:
                                 vid, aud, new_meta_json, html = regen_hunyuan_segment(
+                                    video, int(seg_idx), state_json,
                                     prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, _sid,
                                 )
                                 print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
                             except Exception as _e:
                                 print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
                                 raise
+                            yield gr.update(value=vid), gr.update(value=html)
                     return _do
+                _btn.click(
                     fn=_make_hf_regen(_i, _slot_id),
+                    inputs=[hf_seed, hf_seed,   # seg_idx, state_json placeholders
+                            hf_video, hf_prompt, hf_neg, hf_seed,
+                            hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db],
+                    outputs=[hf_slot_vids[_i], hf_slot_waves[_i]],
+                    api_name=f"regen_hf_{_i}",
                 )
     # ---- Cross-tab video sync ----