Spaces:

yoyolicoris
/

diffvox

Running

App Files Files Community

yoyolicoris commited on May 13

Commit

6bd893f

1 Parent(s): 644e3c2

feat: separate direct and wet audio outputs and enable compressor control

Browse files

Files changed (1) hide show

app.py +134 -13

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pyloudnorm as pyln
 from hydra.utils import instantiate
 from soxr import resample
 from functools import partial
 from modules.utils import chain_functions, vec2statedict, get_chunks
 from modules.fx import clip_delay_eq_Q
@@ -109,6 +110,7 @@ def z2fx():
 @torch.no_grad()
 def fx2z():
     state_dict = fx.state_dict()
     flattened = torch.cat([state_dict[k].flatten() for k in param_keys])
     x = flattened[feature_mask]
@@ -133,10 +135,24 @@ def inference(audio):
     if y.shape[1] != 1:
         y = y.mean(dim=1, keepdim=True)
-    rendered = fx(y).squeeze(0).T.numpy()
     if np.max(np.abs(rendered)) > 1:
-        rendered = rendered / np.max(np.abs(rendered))
-    return (44100, (rendered * 32768).astype(np.int16))
 def get_important_pcs(n=10, **kwargs):
@@ -294,12 +310,17 @@ def plot_t60():
 @torch.no_grad()
-def upatePEQ(eq, attr_name, value):
-    match type(getattr(eq.params, attr_name)):
         case torch.nn.Parameter:
-            getattr(eq.params, attr_name).data.copy_(value)
         case _:
-            setattr(eq.params, attr_name, torch.tensor(value))
 with gr.Blocks() as demo:
@@ -388,11 +409,15 @@ with gr.Blocks() as demo:
             audio_output = gr.Audio(
                 type="numpy", label="Output Audio", interactive=False, loop=True
             )
-            _ = gr.Markdown("## Parametric EQ")
-            peq_plot = gr.Plot(
-                plot_eq(), label="PEQ Frequency Response", elem_id="peq-plot"
             )
     with gr.Row():
         with gr.Column(min_width=160):
             _ = gr.Markdown("High Pass")
@@ -514,7 +539,63 @@ with gr.Blocks() as demo:
                 label="Q",
             )
-    comp_plot = gr.Plot(plot_comp(), label="Compressor Curve", elem_id="comp-plot")
     delay_plot = gr.Plot(
         plot_delay(), label="Delay Frequency Response", elem_id="delay-plot"
     )
@@ -558,7 +639,7 @@ with gr.Blocks() as demo:
     ):
         s.input(
             lambda *args, eq=eq, attr_name=attr_name: chain_functions(  # chain_functions(
-                lambda args: (upatePEQ(eq, attr_name, args[0]), args[1]),
                 lambda args: (fx2z(), args[1]),
                 lambda args: args[1],
                 lambda i: update_pc(i) + [model2json(), plot_eq()],
@@ -569,6 +650,30 @@ with gr.Blocks() as demo:
             outputs=update_pc_outputs + [json_output, peq_plot],
         )
     render_button.click(
         # lambda *args: (
         #     lambda x: (
@@ -582,6 +687,8 @@ with gr.Blocks() as demo:
         ],
         outputs=[
             audio_output,
         ],
     )
@@ -600,6 +707,13 @@ with gr.Blocks() as demo:
         lp.params.Q.item(),
         hp.params.freq.item(),
         hp.params.Q.item(),
     ]
     update_fx_outputs = [
         pk1_freq,
@@ -616,6 +730,13 @@ with gr.Blocks() as demo:
         lp_q,
         hp_freq,
         hp_q,
     ]
     update_plots = lambda: [
         plot_eq(),

 from hydra.utils import instantiate
 from soxr import resample
 from functools import partial
+from torchcomp import coef2ms, ms2coef
 from modules.utils import chain_functions, vec2statedict, get_chunks
 from modules.fx import clip_delay_eq_Q
 @torch.no_grad()
 def fx2z():
+    plt.close("all")
     state_dict = fx.state_dict()
     flattened = torch.cat([state_dict[k].flatten() for k in param_keys])
     x = flattened[feature_mask]
     if y.shape[1] != 1:
         y = y.mean(dim=1, keepdim=True)
+    direct, wet = fx(y)
+    direct = direct.squeeze(0).T.numpy()
+    wet = wet.squeeze(0).T.numpy()
+    rendered = direct + wet
+    # rendered = fx(y).squeeze(0).T.numpy()
     if np.max(np.abs(rendered)) > 1:
+        scaler = np.max(np.abs(rendered))
+        rendered = rendered / scaler
+        direct = direct / scaler
+        wet = wet / scaler
+    return (
+        (44100, (rendered * 32768).astype(np.int16)),
+        (44100, (direct * 32768).astype(np.int16)),
+        (
+            44100,
+            (wet * 32768).astype(np.int16),
+        ),
+    )
 def get_important_pcs(n=10, **kwargs):
 @torch.no_grad()
+def update_param(m, attr_name, value):
+    match type(getattr(m.params, attr_name)):
         case torch.nn.Parameter:
+            getattr(m.params, attr_name).data.copy_(value)
         case _:
+            setattr(m.params, attr_name, torch.tensor(value))
+@torch.no_grad()
+def update_atrt(comp, attr_name, value):
+    setattr(comp.params, attr_name, ms2coef(torch.tensor(value), 44100))
 with gr.Blocks() as demo:
             audio_output = gr.Audio(
                 type="numpy", label="Output Audio", interactive=False, loop=True
             )
+            direct_output = gr.Audio(
+                type="numpy", label="Direct Audio", interactive=False, loop=True
+            )
+            wet_output = gr.Audio(
+                type="numpy", label="Wet Audio", interactive=False, loop=True
             )
+    _ = gr.Markdown("## Parametric EQ")
+    peq_plot = gr.Plot(plot_eq(), label="PEQ Frequency Response", elem_id="peq-plot")
     with gr.Row():
         with gr.Column(min_width=160):
             _ = gr.Markdown("High Pass")
                 label="Q",
             )
+    _ = gr.Markdown("## Compressor and Expander")
+    with gr.Row():
+        with gr.Column():
+            comp = fx[6]
+            cmp_th = gr.Slider(
+                minimum=-60,
+                maximum=0,
+                value=comp.params.cmp_th.item(),
+                interactive=True,
+                label="Comp. Threshold (dB)",
+            )
+            cmp_ratio = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=comp.params.cmp_ratio.item(),
+                interactive=True,
+                label="Comp. Ratio",
+            )
+            make_up = gr.Slider(
+                minimum=-12,
+                maximum=12,
+                value=comp.params.make_up.item(),
+                interactive=True,
+                label="Make Up (dB)",
+            )
+            attack_time = gr.Slider(
+                minimum=0.1,
+                maximum=100,
+                value=coef2ms(comp.params.at, 44100).item(),
+                interactive=True,
+                label="Attack Time (ms)",
+            )
+            release_time = gr.Slider(
+                minimum=50,
+                maximum=1000,
+                value=coef2ms(comp.params.rt, 44100).item(),
+                interactive=True,
+                label="Release Time (ms)",
+            )
+            exp_ratio = gr.Slider(
+                minimum=0,
+                maximum=1,
+                value=comp.params.exp_ratio.item(),
+                interactive=True,
+                label="Exp. Ratio",
+            )
+            exp_th = gr.Slider(
+                minimum=-80,
+                maximum=0,
+                value=comp.params.exp_th.item(),
+                interactive=True,
+                label="Exp. Threshold (dB)",
+            )
+        with gr.Column():
+            comp_plot = gr.Plot(
+                plot_comp(), label="Compressor Curve", elem_id="comp-plot"
+            )
     delay_plot = gr.Plot(
         plot_delay(), label="Delay Frequency Response", elem_id="delay-plot"
     )
     ):
         s.input(
             lambda *args, eq=eq, attr_name=attr_name: chain_functions(  # chain_functions(
+                lambda args: (update_param(eq, attr_name, args[0]), args[1]),
                 lambda args: (fx2z(), args[1]),
                 lambda args: args[1],
                 lambda i: update_pc(i) + [model2json(), plot_eq()],
             outputs=update_pc_outputs + [json_output, peq_plot],
         )
+    for f, s, attr_name in zip(
+        [update_param] * 5 + [update_atrt] * 2,
+        [
+            cmp_th,
+            cmp_ratio,
+            make_up,
+            exp_ratio,
+            exp_th,
+            attack_time,
+            release_time,
+        ],
+        ["cmp_th", "cmp_ratio", "make_up", "exp_ratio", "exp_th", "at", "rt"],
+    ):
+        s.input(
+            lambda *args, attr_name=attr_name, f=f: chain_functions(
+                lambda args: (f(comp, attr_name, args[0]), args[1]),
+                lambda args: (fx2z(), args[1]),
+                lambda args: args[1],
+                lambda i: update_pc(i) + [model2json(), plot_comp()],
+            )(args),
+            inputs=[s, extra_pc_dropdown],
+            outputs=update_pc_outputs + [json_output, comp_plot],
+        )
     render_button.click(
         # lambda *args: (
         #     lambda x: (
         ],
         outputs=[
             audio_output,
+            direct_output,
+            wet_output,
         ],
     )
         lp.params.Q.item(),
         hp.params.freq.item(),
         hp.params.Q.item(),
+        comp.params.cmp_th.item(),
+        comp.params.cmp_ratio.item(),
+        comp.params.make_up.item(),
+        comp.params.exp_th.item(),
+        comp.params.exp_ratio.item(),
+        coef2ms(comp.params.at, 44100).item(),
+        coef2ms(comp.params.rt, 44100).item(),
     ]
     update_fx_outputs = [
         pk1_freq,
         lp_q,
         hp_freq,
         hp_q,
+        cmp_th,
+        cmp_ratio,
+        make_up,
+        exp_th,
+        exp_ratio,
+        attack_time,
+        release_time,
     ]
     update_plots = lambda: [
         plot_eq(),