Spaces:

szin94
/

dmsp

Sleeping

App Files Files Community

szin94 commited on Jul 7

Commit

bc3e180

•

1 Parent(s): 14500e9

first commit

Browse files

Files changed (17) hide show

.gitattributes +1 -0
.gitignore +17 -0
README.md +1 -1
app.py +166 -0
ckpt/config.yaml +16 -0
ckpt/dmsp.ckpt +3 -0
ckpt/pitch.yaml +25 -0
requirements.txt +5 -0
src/model/nn/blocks.py +208 -0
src/model/nn/ddsp.py +69 -0
src/model/nn/dmsp.py +63 -0
src/model/nn/synthesizer.py +125 -0
src/utils/audio.py +219 -0
src/utils/control.py +61 -0
src/utils/ddsp.py +175 -0
src/utils/misc.py +336 -0
src/utils/plot.py +1132 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ckpt/dmsp.ckpt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.*.swp
+.*.py.swp
+*/.*.py.swp
+*/*/.*.py.swp
+*/*/*/.*.py.swp
+__pycache__/
+*/__pycache__/
+*/*/__pycache__/
+*/*/*/__pycache__/
+src/*.py
+src/configs
+src/dataset
+src/task
+src/model/cpp
+src/model/*.py
+check.py

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Dmsp
 emoji: 😻
 colorFrom: indigo
 colorTo: red

 ---
+title: dmsp
 emoji: 😻
 colorFrom: indigo
 colorTo: red

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import glob
+import yaml
+import torch
+import __main__
+import numpy as np
+import soundfile as sf
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import gradio as gr
+from src.model.nn.synthesizer import Synthesizer
+from src.utils.misc import triangular, downsample
+from src.utils.plot import state_video as plot_state_video
+from src.utils.audio import mel_basis, state_to_wav
+from src.utils.control import vibrato as control_vibrato
+class ConfigArgument:
+    def __getitem__(self,key):
+        return getattr(self, key)
+    def __setitem__(self,key,value):
+        return setattr(self, key, value)
+setattr(__main__, "ConfigArgument", ConfigArgument)
+def filter_state_dict(ckpt):
+    out_dict = {}
+    for key in ckpt.keys():
+        new_key = key[6:] if str(key)[:6] == 'model.' else key
+        out_dict[new_key] = ckpt[key]
+    return out_dict
+def flush(directory):
+    os.makedirs(directory, exist_ok=True)
+    files = glob.glob(f'{directory}/*')
+    for f in files:
+        os.remove(f)
+def add_glissando(f_0, Nt, sr, glissando, max_t):
+    front = int(0.2 * np.random.rand() * sr * max_t)
+    rear =  int((0.2 * np.random.rand() + 0.3) * sr * max_t)
+    middle = max(0, len(f_0) - front - rear)
+    ramp = glissando * torch.cat((torch.zeros(front), torch.linspace(0,1,middle), torch.ones(rear)), dim=-1)
+    return f_0 * (1 + ramp)
+def plot_spectrogram(path, x, n_fft=2048, hop_length=512, n_mel=256, samplerate=48000, max_duration=1):
+    x_wave = np.zeros(int(max_duration * samplerate))
+    x_wave[:len(x)] += x
+    x_spec = librosa.stft(
+        x_wave, n_fft=n_fft, hop_length=hop_length, win_length=n_fft, pad_mode='reflect')
+    mag = np.abs(x_spec) # (n_frames, n_freq)
+    mel_fbank = mel_basis(samplerate, n_fft, n_mel) # (n_mel, n_freq)
+    mel = np.einsum('ij,jk->ik', mel_fbank, mag) # (n_frames, n_mel)
+    plt.figure(figsize=(7,7))
+    librosa.display.specshow(mel)
+    plt.xticks([])
+    plt.yticks([])
+    plt.clim([0, 30])
+    plt.tight_layout()
+    plt.savefig(path, transparent=True)
+    plt.close('all')
+    plt.clf()
+with open("ckpt/config.yaml") as stream:
+    configs = yaml.safe_load(stream)
+with open("ckpt/pitch.yaml") as stream:
+    pitch_dict = yaml.safe_load(stream)
+def get_data(duration, resolution, note, glissando, vibrato, stiffness, tension, pluck, amplitude):
+    sr = configs['sr']
+    Nt = int(duration * sr)
+    Nx = int(resolution)
+    xgrid = torch.linspace(0,1,Nx)
+    tgrid = torch.arange(Nt) / sr
+    pitch = pitch_dict[note]
+    t60_min_1=20.; t60_max_1=30.; t60_min_2=30.; t60_max_2=30.
+    t60_diff_max=5.
+    T60 = torch.Tensor([[[1000., 25.],[100., 30.]]])
+    Nw = int(Nt / configs['block_size']) + 1
+    xg, tg = torch.meshgrid(xgrid, tgrid, indexing='ij')
+    ka = torch.Tensor([stiffness]).view(-1,1) # (1,1)
+    al = torch.Tensor([tension]).view(-1,1) # (1,1)
+    f_0 = torch.ones(Nt) * pitch # (Nt)
+    nx  = torch.Tensor([[[Nx]]]).float()
+    p_x = torch.ones_like(nx) * pluck
+    p_a = torch.ones_like(nx) * amplitude
+    u_0 = triangular(Nx, nx, p_x, p_a) # (1, 1, Nx)
+    f_0 = add_glissando(f_0, Nt, sr, glissando, Nt / sr)
+    f_0 = f_0 + control_vibrato(f_0.view(1,-1), 1/sr, mf=[3.,5.], ma=vibrato)
+    f_0 = downsample(f_0, factor=configs['block_size'])
+    xg  = xg[:,0].view(-1,1) # (Nx, 1)
+    tg  = tg                 # (Nx, Nt)
+    ka  = ka.repeat(Nx,1)    # (Nx, 1)
+    al  = al.repeat(Nx,1)    # (Nx, 1)
+    T60 = T60                # (Nx, 1, 1)
+    f_0 = f_0.repeat(Nx,1)   # (Nx, Nw)
+    u_0 = u_0.repeat(Nx,1,1) # (Nx, 1, Nx)
+    params = [xg, tg, ka, al, T60, None, None]
+    return params, f_0, u_0
+def run(duration, resolution, pitch, glissando, vibrato, stiffness, tension, pluck, amplitude):
+    checkpoint = torch.load('ckpt/dmsp.ckpt', map_location='cpu')
+    checkpoint = filter_state_dict(checkpoint['state_dict'])
+    model = Synthesizer(**configs)
+    model.load_state_dict(checkpoint)
+    params, f_0, u_0 = get_data( \
+        duration, resolution, pitch, glissando, vibrato, stiffness, tension, pluck, amplitude)
+    with torch.no_grad():
+        ut, mode_input, mode_output = model(params, f_0, u_0)
+    ut = ut.detach() # (Nx, Nt)
+    ut_wave = configs['gain'] * ut.mean(0)
+    save_dir = 'results'
+    prefix = 'dmsp'
+    fname = 'output'
+    flush(save_dir)
+    audio_name = f'{save_dir}/{fname}.wav'
+    video_name = f'{save_dir}/{prefix}-{fname}.mp4'
+    spec_name  = f'{save_dir}/spec.png'
+    ut = ut.numpy().T
+    ut_wave = ut_wave.numpy()
+    maxy = 0.022
+    sf.write(audio_name, ut_wave, samplerate=configs['sr'])
+    plot_spectrogram(spec_name, ut_wave, samplerate=configs['sr'])
+    plot_state_video(save_dir, ut, configs['sr'], prefix=prefix, fname=fname, maxy=maxy)
+    return spec_name, video_name
+pitch_list = ["G2", "Ab2", "A2", "Bb2", "B2", "C3", "Db3", "D3", "Eb3", "E3", "F3", "Gb3", "G3", "Ab3", "A3", "Bb3", "B3", "C4", "Db4", "D4", "Eb4", "E4", "F4", "Gb4", "G4",]
+duration   = gr.Slider(0.1, 1.0, value=1.0, label="Time Duration")
+resolution = gr.Slider(128, 256, value=256, label="Space Resolution", info='Reduce to simulate faster. Recommended to leave it as 256.')
+pitch      = gr.Dropdown(pitch_list, value="C3", label="Pitch", info="Specify the fundamental frequency as a musical note.")
+glissando  = gr.Slider(-0.4, 0.4, value=0, label="Glissando", info='Set +/- to ascend (+) or descend (-) the pitch')
+vibrato    = gr.Slider(0, 0.25, value=0, label="Vibrato", info='Set larger value to add more vibrato')
+stiffness  = gr.Slider(0.011, 0.029, value=0.02, label="Stiffness", info='Stiffness can change the resulting pitch. Specify low values when tension is high')
+tension    = gr.Slider(1.0,  25, value=4, label="Tension", info='Tension can introduce non-linear effects such as pitch glide. Specify low values when stiffness is high')
+pluck      = gr.Slider(0.12, 0.5, value=0.2, label="Pluck Position", info='Peak position of an initial condition')
+amplitude  = gr.Slider(0.001, 0.02, value=0.015, label="Pluck Amplitude", info='Peak amplitude of an initial condition')
+demo = gr.Interface(
+    fn=run,
+    inputs=[
+        duration, resolution, pitch, glissando, vibrato,
+        stiffness, tension, pluck, amplitude,
+    ],
+    outputs=[
+        gr.Image(),
+        gr.Video(format='mp4', include_audio=True),
+    ],
+)
+demo.launch()

ckpt/config.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+n_modes: 40
+n_bands: 65
+embed_dim: 128
+use_precomputed_mod: False
+harmonic: 'inharmonic'
+hidden_dim: 512
+block_size: 256
+sr: 48000
+gain: 100
+x_scale: [0., 1.]
+t_scale: [0., .3]
+gamma_scale: [196, 880]
+kappa_scale: [.01, .03]
+alpha_scale: [1., 30.]
+sig_0_scale: [0., 0.7]
+sig_1_scale: [0., 0.00001]

ckpt/dmsp.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:071f2f176f1ced82e287ee5d41ffbe36dc6f613e7e5b1dea4e19e9609f82655b
+size 104471835

ckpt/pitch.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+G2 :  98.00
+Ab2:  103.83
+A2 :  110.00
+Bb2:  116.54
+B2 :  123.47
+C3 :  130.81
+Db3:  138.59
+D3 :  146.83
+Eb3:  155.56
+E3 :  164.81
+F3 :  174.61
+Gb3:  185.00
+G3 :  196.00
+Ab3:  207.65
+A3 :  220.00
+Bb3:  233.08
+B3 :  246.94
+C4 :  261.63
+Db4:  277.18
+D4 :  293.66
+Eb4:  311.13
+E4 :  329.63
+F4 :  349.23
+Gb4:  369.99
+G4 :  392.00

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch==1.12.1
+numpy==1.24.3
+einops
+librosa
+omegaconf

src/model/nn/blocks.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from einops import rearrange
+from src.utils import misc as utils
+def apply_gain(x, gain, fn=None):
+    gain = fn(gain) if fn is not None else gain
+    x_list = x.chunk(len(gain), -1)
+    x_list = [gain[i] * x_i for i, x_i in enumerate(x_list)]
+    return torch.cat(x_list, dim=-1)
+class FMBlock(nn.Module):
+    def __init__(self, input_dim, embed_dim, num_features):
+        super().__init__()
+        concat_size = embed_dim * num_features + embed_dim
+        feature_dim = embed_dim * num_features
+        self.rff2 = RFF2(input_dim, embed_dim//2)
+        self.tmlp = mlp(concat_size, feature_dim, 5)
+        self.proj = nn.Linear(concat_size, 2*input_dim)
+        self.activation = nn.GLU(dim=-1)
+        gain_in = torch.randn(num_features) / 2
+        gain_out = torch.Tensor([0.1])
+        self.register_parameter('gain_in', nn.Parameter(gain_in, requires_grad=True))
+        self.register_parameter('gain_out', nn.Parameter(gain_out, requires_grad=True))
+    def forward(self, input, feature, slider, omega):
+        ''' input  : (B T input_dim)
+            feature: (B T feature_dim)
+            slider : (B T 1)
+        '''
+        _input = input / (1.3*math.pi) - 1
+        _input = self.rff2(_input)
+        feature = apply_gain(feature, self.gain_in, torch.tanh)
+        x = torch.cat((_input, feature), dim=-1)
+        x = torch.cat((self.tmlp(x), _input), dim=-1)
+        x = self.activation(self.proj(x))
+        gate = torch.tanh((slider - 1) * self.gain_out)
+        return input + omega * x * gate
+class AMBlock(nn.Module):
+    def __init__(self, input_dim, embed_dim, num_features):
+        super().__init__()
+        concat_size = embed_dim * num_features + embed_dim
+        feature_dim = embed_dim * num_features
+        self.rff2 = RFF2(input_dim, embed_dim//2)
+        self.tmlp = mlp(concat_size, feature_dim, 5)
+        self.proj = nn.Linear(concat_size, 2*input_dim)
+        self.activation = nn.GLU(dim=-1)
+        gain_in = torch.randn(num_features) / 2
+        self.register_parameter('gain_in', nn.Parameter(gain_in, requires_grad=True))
+    def forward(self, input, feature, slider):
+        ''' input  : (B T input_dim)
+            feature: (B T feature_dim)
+            slider : (B T 1)
+        '''
+        _input = input * 110 - 0.55
+        _input = self.rff2(_input)
+        feature = apply_gain(feature, self.gain_in, torch.tanh)
+        x = torch.cat((_input, feature), dim=-1)
+        x = torch.cat((self.tmlp(x), _input), dim=-1)
+        x = self.activation(self.proj(x))
+        return input * (1 + x)
+class ModBlock(nn.Module):
+    def __init__(self, input_dim, feature_dim, embed_dim):
+        super().__init__()
+        cat_size = 1+feature_dim
+        self.tmlp = mlp(cat_size, feature_dim, 2)
+        self.proj = nn.Linear(cat_size, 2)
+        self.activation = nn.GLU(dim=-1)
+    def forward(self, input, feature, slider):
+        ''' input  : (B T input_dim)
+            feature: (B T feature_dim)
+            slider : (B T 1)
+        '''
+        input   =   input.unsqueeze(-1) # (B T input_dim 1)
+        feature = feature.unsqueeze(-2).repeat(1,1,input.size(-2),1)
+        x = torch.cat((input, feature), dim=-1)
+        x = torch.cat((self.tmlp(x), input), dim=-1)
+        x = self.activation(self.proj(x))
+        return (input * (1 + x)).squeeze(-1)
+def mlp(in_size, hidden_size, n_layers):
+    channels = [in_size] + (n_layers) * [hidden_size]
+    net = []
+    for i in range(n_layers):
+        net.append(nn.Linear(channels[i], channels[i + 1]))
+        #net.append(nn.LayerNorm(channels[i + 1]))
+        net.append(nn.PReLU())
+    return nn.Sequential(*net)
+class RFF2(nn.Module):
+    """ Random Fourier Features Module """
+    def __init__(self, input_dim, embed_dim, scale=1.):
+        super().__init__()
+        #N = torch.randn(input_dim, embed_dim)
+        N = torch.ones((input_dim, embed_dim)) / input_dim / embed_dim
+        N = nn.Parameter(N, requires_grad=False)
+        e = torch.Tensor([scale])
+        e = nn.Parameter(e, requires_grad=True)
+        self.register_buffer('N', N)
+        self.register_parameter('e', e)
+    def forward(self, x):
+        ''' x: (Bs, Nt, input_dim)
+            -> (Bs, Nt, embed_dim)
+        '''
+        B = self.e * self.N
+        x_embd = utils.fourier_feature(x, B)
+        return x_embd
+class RFF(nn.Module):
+    """ Random Fourier Features Module """
+    def __init__(self, scales, embed_dim):
+        super().__init__()
+        input_dim = len(scales)
+        N = torch.randn(input_dim, embed_dim)
+        N = nn.Parameter(N, requires_grad=False)
+        e = torch.Tensor(scales).view(-1,1)
+        e = nn.Parameter(e, requires_grad=True)
+        self.register_buffer('N', N)
+        self.register_parameter('e', e)
+    def forward(self, x):
+        ''' x: (Bs, Nt, input_dim)
+            -> (Bs, Nt, input_dim*embed_dim)
+        '''
+        xs = x.chunk(self.N.size(0), -1) # (Bs, Nt, 1) * input_dim
+        Ns = self.N.chunk(self.N.size(0), 0) # (1, embed_dim) * input_dim
+        Bs = [torch.pow(10, self.e[i]) * N for i, N in enumerate(Ns)]
+        x_embd = [utils.fourier_feature(xs[i], B) for i, B in enumerate(Bs)]
+        return torch.cat(x_embd, dim=-1)
+class ModeEstimator(nn.Module):
+    def __init__(self, n_modes, hidden_dim, kappa_scale=None, gamma_scale=None, inharmonic=True, sr=48000):
+        super().__init__()
+        self.sr = sr
+        self.kappa_scale = kappa_scale
+        self.gamma_scale = gamma_scale
+        self.rff = RFF([1.]*5, hidden_dim//2)
+        self.a_mlp = mlp(5*hidden_dim, hidden_dim, 2)
+        self.a_proj = nn.Linear(hidden_dim, n_modes)
+        self.tanh = nn.Tanh()
+        if inharmonic:
+            self.f_mlp = mlp(5*hidden_dim, hidden_dim, 2)
+            self.f_proj = nn.Linear(hidden_dim, n_modes)
+            self.sigmoid = nn.Sigmoid()
+        else:
+            self.f_mlp = None
+            self.f_proj = None
+            self.sigmoid = nn.Sigmoid()
+    def forward(self, u_0, x_p, kappa, gamma):
+        ''' u_0   : (b, 1, x)
+            x_p   : (b, 1, 1)
+            kappa : (b, 1, 1)
+            gamma : (b, 1, 1)
+        '''
+        p_x = torch.argmax(u_0, dim=-1, keepdim=True) / 255. # (b, 1, 1)
+        p_a = torch.max(u_0, dim=-1, keepdim=True).values / 0.02 # (b, 1, 1)
+        kappa = self.normalize_kappa(kappa)
+        gamma = self.normalize_gamma(gamma)
+        con = torch.cat((p_x, p_a, x_p, kappa, gamma), dim=-1) # (b, 1, 5)
+        con = self.rff(con) # (b, 1, 3*hidden_dim)
+        mode_amps = self.a_mlp(con) # (b, 1, k)
+        mode_amps = self.tanh(1e-3 * self.a_proj(mode_amps)) # (b, 1, m)
+        if self.f_mlp is not None:
+            mode_freq = self.f_mlp(con) # (b, 1, k)
+            mode_freq = 0.3 * self.sigmoid(self.f_proj(mode_freq)) # (b, 1, m)
+            mode_freq = mode_freq.cumsum(-1)
+        else:
+            int_mults = torch.ones_like(mode_amps).cumsum(-1) # (b, 1, k)
+            omega = gamma / self.sr * (2*math.pi)
+            mode_freq = omega * int_mults
+        return mode_amps, mode_freq
+    def normalize_gamma(self, x):
+        if self.gamma_scale is not None:
+            minval = min(self.gamma_scale)
+            denval = max(self.gamma_scale) - minval
+            x = (x - minval) / denval
+        return x
+    def normalize_kappa(self, x):
+        if self.kappa_scale is not None:
+            minval = min(self.kappa_scale)
+            denval = max(self.kappa_scale) - minval
+            x = (x - minval) / denval
+        return x

src/model/nn/ddsp.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+import torch.nn as nn
+from src.model.nn.blocks import FMBlock, AMBlock
+from src.utils.ddsp import upsample
+from src.utils.ddsp import remove_above_nyquist_mode
+from src.utils.ddsp import amp_to_impulse_response, fft_convolve
+from src.utils.ddsp import modal_synth
+from src.utils.ddsp import resample
+import math
+class DDSP(nn.Module):
+    def __init__(self,
+            feature_size, hidden_size,
+            n_modes, n_bands, sampling_rate, block_size,
+            fm=False,
+        ):
+        super().__init__()
+        self.n_modes = n_modes
+        self.freq_modulator = FMBlock(n_modes, feature_size) if fm else None
+        self.coef_modulator = AMBlock(n_modes, feature_size)
+        self.noise_proj = nn.Linear(feature_size, n_bands)
+        noise_gate = nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
+        self.register_parameter("noise_gate", noise_gate)
+        self.register_buffer("sampling_rate", torch.tensor(sampling_rate))
+        self.register_buffer("block_size", torch.tensor(block_size))
+    def forward(self, hidden, mode_freq, mode_coef, times, alpha, lengths):
+        ''' hidden    : (Bs,  1, hidden_size)
+            mode_freq : (Bs, Nt, n_modes)
+            mode_coef : (Bs,  1, n_modes)
+            times     : (Bs, Nt, 1)
+        '''
+        if self.freq_modulator is None:
+            freq_m = mode_freq # integer multiples
+        else:
+            freq_m = self.freq_modulator(mode_freq, hidden)
+        coef_m = self.coef_modulator(mode_coef, hidden, times)
+        #==============================
+        # harmonic part
+        #==============================
+        freqs = freq_m / (2*math.pi) * self.sampling_rate
+        coef_m = remove_above_nyquist_mode(coef_m, freqs, self.sampling_rate) # (Bs, Nt, n_modes)
+        freq_s = upsample(freq_m, self.block_size).narrow(1,0,lengths)
+        coef_s = upsample(coef_m, self.block_size).narrow(1,0,lengths)
+        harmonic = modal_synth(freq_s, coef_s, self.sampling_rate)
+        #==============================
+        # noise part
+        #==============================
+        ngate = torch.tanh((alpha - 1) * self.noise_gate)
+        param = ngate * torch.sigmoid(self.noise_proj(hidden) - 5)
+        impulse = amp_to_impulse_response(param, self.block_size)
+        noise = torch.rand(
+            impulse.shape[0],
+            impulse.shape[1],
+            self.block_size,
+        ).to(impulse) * 2 - 1
+        noise = fft_convolve(noise, impulse).contiguous()
+        noise = noise.reshape(noise.shape[0], -1, 1).narrow(1,0,lengths)
+        signal = harmonic + noise
+        return signal.squeeze(-1), freq_m, coef_m

src/model/nn/dmsp.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+import torch.nn as nn
+from src.model.nn.blocks import FMBlock, AMBlock, ModBlock
+from src.utils.ddsp import scale_function, remove_above_nyquist, upsample
+from src.utils.ddsp import remove_above_nyquist_mode
+from src.utils.ddsp import harmonic_synth, amp_to_impulse_response, fft_convolve
+from src.utils.ddsp import modal_synth
+from src.utils.ddsp import resample
+import math
+class DMSP(nn.Module):
+    def __init__(self,
+            embed_dim, hidden_size, n_features,
+            n_modes, n_bands, sampling_rate, block_size,
+        ):
+        super().__init__()
+        self.n_modes = n_modes
+        self.freq_modulator = FMBlock(n_modes, embed_dim, n_features)
+        self.coef_modulator = AMBlock(n_modes, embed_dim, n_features)
+        self.proj_noise = nn.Linear(n_features*embed_dim, n_bands)
+        self.register_buffer("sampling_rate", torch.tensor(sampling_rate))
+        self.register_buffer("block_size", torch.tensor(block_size))
+    def forward(self, hidden, mode_freq, mode_coef, times, alpha, omega, lengths):
+        ''' hidden    : (Bs,  1, hidden_size)
+            mode_freq : (Bs, Nt, n_modes)
+            mode_coef : (Bs,  1, n_modes)
+            times     : (Bs, Nt, 1)
+        '''
+        freq_m = self.freq_modulator(mode_freq, hidden, alpha, omega)
+        coef_m = self.coef_modulator(mode_coef, hidden, times)
+        #==============================
+        # harmonic part
+        #==============================
+        freqs = freq_m / (2*math.pi) * self.sampling_rate
+        coef_m = remove_above_nyquist_mode(coef_m, freqs, self.sampling_rate) # (Bs, Nt, n_modes)
+        freq_s = upsample(freq_m, self.block_size).narrow(1,0,lengths)
+        coef_s = upsample(coef_m, self.block_size).narrow(1,0,lengths)
+        harmonic = modal_synth(freq_s, coef_s, self.sampling_rate)
+        #==============================
+        # noise part
+        #==============================
+        param = scale_function(self.proj_noise(hidden) - 5)
+        impulse = amp_to_impulse_response(param, self.block_size)
+        noise = torch.rand(
+            impulse.shape[0],
+            impulse.shape[1],
+            self.block_size,
+        ).to(impulse) * 2 - 1
+        noise = fft_convolve(noise, impulse).contiguous()
+        noise = noise.reshape(noise.shape[0], -1, 1).narrow(1,0,lengths)
+        signal = harmonic + noise
+        return signal.squeeze(-1), freq_m, coef_m

src/model/nn/synthesizer.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from src.utils import audio as audio
+class Synthesizer(nn.Module):
+    """ Synthesizer Network """
+    def __init__(self,
+            embed_dim=64,
+            x_scale=1, t_scale=1,
+            gamma_scale=0, kappa_scale=0, alpha_scale=0, sig_0_scale=0, sig_1_scale=0,
+            **kwargs):
+        super().__init__()
+        self.sr=kwargs['sr']
+        hidden_dim=kwargs['hidden_dim']
+        self.n_modes = kwargs['n_modes']
+        inharmonic = kwargs['harmonic'].lower() == 'inharmonic'
+        self.x_scale = x_scale
+        self.t_scale = t_scale
+        self.gamma_scale = gamma_scale
+        self.kappa_scale = kappa_scale
+        self.alpha_scale = alpha_scale
+        self.sig_0_scale = sig_0_scale
+        self.sig_1_scale = sig_1_scale
+        from src.model.nn.blocks import RFF, ModeEstimator
+        n_feats = 7
+        self.material_encoder = RFF([1.]*n_feats, embed_dim // 2)
+        feature_size = embed_dim * n_feats
+        self.mode_estimator = ModeEstimator(
+            self.n_modes, embed_dim, kappa_scale, gamma_scale,
+            inharmonic=inharmonic,
+        )
+        if inharmonic:
+            from src.model.nn.dmsp import DMSP
+            self.net = DMSP(
+                embed_dim=embed_dim,
+                hidden_size=hidden_dim,
+                n_features=n_feats,
+                n_modes=kwargs['n_modes'],
+                n_bands=kwargs['n_bands'],
+                block_size=kwargs['block_size'],
+                sampling_rate=kwargs['sr'],
+            )
+        else:
+            from src.model.nn.ddsp import DDSP
+            self.net = DDSP(
+                feature_size=feature_size,
+                hidden_size=hidden_dim,
+                n_modes=kwargs['n_modes'],
+                n_bands=kwargs['n_bands'],
+                block_size=kwargs['block_size'],
+                sampling_rate=kwargs['sr'],
+                fm=kwargs['ddsp_frequency_modulation'],
+            )
+    def forward(self, params, pitch, initial):
+        ''' params : input parameters
+            pitch  : fundamental frequency in Hz
+            initial: initial condition
+        '''
+        space, times, kappa, alpha, t60, mode_freq, mode_coef = params
+        f_0 = pitch.unsqueeze(2)    # (b, frames, 1)
+        times = times.unsqueeze(-1) # (b, sample, 1)
+        kappa = kappa.unsqueeze(-1) # (b, 1, 1)
+        alpha = alpha.unsqueeze(-1) # (b, 1, 1)
+        space = space.unsqueeze(-1) # (b, 1, 1)
+        gamma = 2*f_0               # (b, frames, 1)
+        omega = f_0 / self.sr * (2*math.pi) # (b, t, 1)
+        relf0 = omega - omega.narrow(1,0,1) # (b, t, 1)
+        in_coef, in_freq = self.mode_estimator(initial, space, kappa, gamma.narrow(1,9,1))
+        mode_coef = in_coef if mode_coef is None else mode_coef
+        mode_freq = in_freq if mode_freq is None else mode_freq
+        mode_freq = mode_freq + relf0 # linear FM
+        Nt = times.size(1)     # total number of samples
+        Nf = mode_freq.size(1) # total number of frames
+        frames = self.get_frame_time(times, Nf)
+        space = space.repeat(1,f_0.size(1),1) # (b, frames, 1)
+        alpha = alpha.repeat(1,f_0.size(1),1) # (b, frames, 1)
+        kappa = kappa.repeat(1,f_0.size(1),1) # (b, frames, 1)
+        sigma = audio.T60_to_sigma(t60, f_0, 2*f_0*kappa) # (b, frames, 2)
+        # fourier features
+        feat = [space, frames, kappa, alpha, sigma, gamma]
+        feat = self.normalize_params(feat)
+        feat = self.material_encoder(feat) # (b, frames, n_feats * embed_dim)
+        damping = torch.exp(- frames * sigma.narrow(-1,0,1))
+        mode_coef = mode_coef * damping
+        ut, ut_freq, ut_coef = self.net(feat, mode_freq, mode_coef, frames, alpha, omega, Nt)
+        return ut, [in_freq, in_coef], [ut_freq, ut_coef]
+    def get_frame_time(self, times, Nf):
+        t_0 = times.narrow(1,0,1) # (Bs, 1, 1)
+        t_k = torch.ones_like(t_0).repeat(1,Nf,1).cumsum(1) / self.sr
+        t_k = t_k + t_0 # (Bs, Nt, 1)
+        return t_k
+    def normalize_params(self, params):
+        def rescale(var, scale):
+            minval = min(scale)
+            denval = max(scale) - minval
+            return (var - minval) / denval
+        space, times, kappa, alpha, sigma, gamma = params
+        sig_0, sig_1 = sigma.chunk(2, -1)
+        space = rescale(space, self.x_scale)
+        times = rescale(times - max(self.t_scale), self.t_scale)
+        kappa = rescale(kappa, self.kappa_scale)
+        alpha = rescale(alpha, self.alpha_scale)
+        sig_0 = rescale(sig_0, self.sig_0_scale)
+        sig_1 = rescale(sig_1, self.sig_1_scale)
+        gamma = rescale(gamma, self.gamma_scale)
+        sigma = torch.cat((sig_0, sig_1), dim=-1)
+        return torch.cat([space, times, kappa, alpha, sigma, gamma], dim=-1)

src/utils/audio.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import math
+import torch
+import torch.nn.functional as F
+import numpy as np
+import librosa
+import soundfile as sf
+from einops import rearrange
+eps = np.finfo(np.float32).eps
+def calculate_rms(amp):
+    if isinstance(amp, torch.Tensor):
+        return amp.pow(2).mean(-1, keepdim=True).pow(.5)
+    elif isinstance(amp, np.ndarray):
+        return np.sqrt(np.mean(np.square(amp), axis=-1) + eps)
+    else:
+        raise TypeError(f"argument 'amp' must be torch.Tensor or np.ndarray. got: {type(amp)}")
+def dB2amp(dB):
+    return np.power(10., dB/20.)
+def amp2dB(amp):
+    return 20. * np.log10(amp)
+def rms_normalize(wav, ref_dBFS=-23.0, skip_nan=True):
+    exists_nan = np.isnan(np.sum(wav))
+    if not skip_nan:
+        assert not exists_nan, np.isnan(wav)
+    if exists_nan:
+        return wav, 1.
+    # RMS normalize
+    # value_dBFS = 20*log10(rms(signal) * sqrt(2)) = 20*log10(rms(signal)) + 3.0103
+    rms = calculate_rms(wav)
+    if isinstance(ref_dBFS, torch.Tensor):
+        ref_linear = torch.pow(10, (ref_dBFS-3.0103)/20.)
+    else:
+        ref_linear = np.power(10, (ref_dBFS-3.0103)/20.)
+    gain = ref_linear / (rms + eps)
+    wav = gain * wav
+    return wav, gain
+def ell_infty_normalize(wav, skip_nan=True):
+    if isinstance(wav, np.ndarray):
+        ''' numpy '''
+        exists_nan = np.isnan(np.sum(wav))
+        if not skip_nan:
+            assert not exists_nan, np.isnan(wav)
+        if exists_nan:
+            return wav, 1.
+        maxv = np.max(np.abs(wav), axis=-1)
+        # 1 if maxv == 0 else 1. / maxv
+        if len(list(maxv.shape)) == 0:
+            gain = 1 if maxv==0 else 1. / maxv
+        else:
+            gain = 1. / maxv; gain[maxv==0] = 1
+    elif isinstance(wav, torch.Tensor):
+        ''' torch '''
+        exists_nan = torch.isnan(wav.sum())
+        if not skip_nan:
+            assert not exists_nan, torch.isnan(wav)
+        if exists_nan:
+            return wav, 1.
+        maxv = wav.abs().max(-1).values.unsqueeze(-1)
+        # 1 if maxv == 0 else 1. / maxv
+        gain = torch.where(maxv.eq(0),
+            torch.ones_like(maxv), 1. / maxv)
+    else:
+        assert False, wav
+    wav = gain * wav
+    return wav, gain
+def dB_RMS(wav):
+    if isinstance(wav, torch.Tensor):
+        return 20 * torch.log10(calculate_rms(wav))
+    elif isinstance(wav, np.ndarray):
+        return 20 * np.log10(calculate_rms(wav))
+def mel_basis(sr, n_fft, n_mel):
+    return librosa.filters.mel(sr=sr,n_fft=n_fft,n_mels=n_mel,fmin=0,fmax=sr//2,norm=1)
+def inv_mel_basis(sr, n_fft, n_mel):
+    return librosa.filters.mel(
+        sr=sr, n_fft=n_fft, n_mels=n_mel, norm=None, fmin=0, fmax=sr//2,
+    ).T
+def lin_to_mel(linspec, sr, n_fft, n_mel=80):
+    basis = mel_basis(sr, n_fft, n_mel)
+    return basis @ linspec
+def save_waves(est, save_dir, sr=16000):
+    data = []
+    batch_size = inp.shape[0]
+    for b in range(batch_size):
+        est_wav = est[b,0].squeeze()
+        wave_path = f"{save_dir}/{b}.wav"
+        sf.write(wave_path, est_wav, samplerate=sr)
+def get_inverse_window(forward_window, frame_length, frame_step):
+    denom = torch.square(forward_window)
+    overlaps = -(-frame_length // frame_step)  # Ceiling division.
+    denom = F.pad(denom, (0, overlaps * frame_step - frame_length))
+    denom = denom.reshape(overlaps, frame_step)
+    denom = denom.sum(0, keepdims=True)
+    denom = denom.tile(overlaps, 1)
+    denom = denom.reshape(overlaps * frame_step)
+    return forward_window / denom[:frame_length]
+def state_to_wav(state, normalize=True, sr=48000):
+    ''' state: (Bs, Nt, Nx) '''
+    assert len(list(state.shape)) == 3, state.shape
+    Nt = state.size(1)
+    vel = ((state.narrow(1,1,Nt-1) - state.narrow(1,0,Nt-1)) * sr).sum(-1)
+    return ell_infty_normalize(vel)[0] if normalize else vel
+def state_to_spec(x, window):
+    ''' x: (Bs, Nt, Nx, Ch)
+        -> (Bs, Nt, Nx, Ch*n_fft*2)
+    '''
+    Bs, Nt, Nx, Ch = x.shape
+    n_ffts = window.size(-1)
+    n_freq = n_ffts // 2 + 1
+    hop_length = n_ffts // 4
+    x = rearrange(x, 'b t x c -> (b x c) t')
+    s = torch.stft(x, n_ffts, hop_length=hop_length, window=window)
+    s = rearrange(s, '(b x c) f t k -> b t x (c f k)',
+        b=Bs, x=Nx, c=Ch, f=n_freq, k=2)
+    return s
+def spec_to_state(x, window, length):
+    ''' x: (Bs, Nt, Nx, Ch*n_fft*2)
+        -> (Bs, Nt, Nx, Ch)
+    '''
+    Bs, Nt, Nx, _ = x.shape
+    n_ffts = window.size(-1)
+    n_freq = n_ffts // 2 + 1
+    x = rearrange(x, 'b t x (c f k) -> (b x c) f t k', f=n_freq, k=2)
+    x = torch.istft(x, n_ffts, length=length, window=window)
+    x = rearrange(x, '(b x c) t -> b t x c', b=Bs, x=Nx)
+    return x
+def to_spec(x, window, reduce_channel=True):
+    ''' x: (Bs, Nt)
+        -> (Bs, Nt, Nf*2) if reduce_channel==True
+        -> (Bs, Nt, Nf,2) otherwise
+    '''
+    Bs, Nt = x.shape
+    n_ffts = window.size(-1)
+    n_freq = n_ffts // 2 + 1
+    hop_length = n_ffts // 4
+    s = torch.stft(x, n_ffts, hop_length=hop_length, window=window)
+    s = s.transpose(1,2)
+    if reduce_channel:
+        s = rearrange(s, 'b t f k -> b t (f k)',
+            b=Bs, f=n_freq, k=2)
+    return s
+def from_spec(x, window, length):
+    ''' x: (Bs, Nt, Nf*2)
+        -> (Bs, Nt)
+    '''
+    Bs, Nt, _ = x.shape
+    n_ffts = window.size(-1)
+    n_freq = n_ffts // 2 + 1
+    x = rearrange(x, 'b t (f k) -> b f t k', f=n_freq, k=2)
+    x = torch.istft(x, n_ffts, length=length, window=window)
+    return x
+def adjust_gain(y, x, minmax, ref_dBFS=-23.0):
+    ran_gain = (minmax[1] - minmax[0]) * torch.rand_like(y.narrow(-1,0,1)) + minmax[0]
+    ref_linear = np.power(10, (ref_dBFS-3.0103)/20.)
+    ran_linear = torch.pow(10, (ran_gain-3.0103)/20.)
+    x_rms = calculate_rms(x)
+    y_rms = calculate_rms(y)
+    x_gain = ref_linear / (x_rms + eps)
+    y_gain = ref_linear / (y_rms + eps)
+    y_xscale = y * y_gain / x_gain
+    return y_xscale / ran_linear
+def degrade(x, rir, noise):
+    ''' x    : (Bs, Nt)
+        rir  : (Bs, Nt)
+        noise: (Bs, Nt)
+    '''
+    x_pad = F.pad(x,   (0,rir.size(-1)))
+    w_pad = F.pad(rir, (0,rir.size(-1)))
+    x_fft = torch.fft.rfft(x_pad)
+    w_fft = torch.fft.rfft(w_pad)
+    wet_x = torch.fft.irfft(x_fft * w_fft).narrow(-1,0,x.size(-1))
+    y = adjust_gain(wet_x, x, [-0, 30]) # ser
+    n = adjust_gain(noise, y, [10, 30]) # snr
+    return y + n
+def T60_to_sigma(T60, f_0, K):
+    ''' T60 : (Bs, 2, 2)  [[T60_freq_1, T60_1], [T60_freq_2, T60_2]]
+        f_0 : (Bs, Nt, 1) fundamental frequency
+        K   : (Bs, Nt, 1) kappa (K == gamma * kappa_rel)
+     -> sig : (Bs, Nt, 2)
+    '''
+    gamma = f_0 * 2
+    freq1, time1 = T60.narrow(1,0,1).chunk(2,-1)
+    freq2, time2 = T60.narrow(1,1,1).chunk(2,-1)
+    zeta1 = - gamma.pow(2) + (gamma.pow(4) + 4 * K.pow(2) * (2 * math.pi * freq1).pow(2)).pow(.5)
+    zeta2 = - gamma.pow(2) + (gamma.pow(4) + 4 * K.pow(2) * (2 * math.pi * freq2).pow(2)).pow(.5)
+    sig0 = - zeta2 / time1 + zeta1 / time2
+    sig0 = 6 * math.log(10) * sig0 / (zeta1 - zeta2)
+    sig1 = 1 / time1 - 1 / time2
+    sig1 = 6 * math.log(10) * sig1 / (zeta1 - zeta2)
+    sig = torch.cat((sig0, sig1), dim=-1)
+    return sig

src/utils/control.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+import numpy as np
+import torch.nn.functional as F
+def constant(f0, n, dtype=None):
+    ''' f0 (batch_size,)
+        n  (int)
+    '''
+    return f0.unsqueeze(-1) * torch.ones(1,n, dtype=dtype)
+def linear(f1, f2, n):
+    ''' f1 (batch_size,)
+        f2 (batch_size,)
+        n  (int)
+    '''
+    out = torch.cat((f1.unsqueeze(-1),f2.unsqueeze(-1)), dim=-1)             # (batch_size, 2)
+    out = F.interpolate(out.unsqueeze(1), size=n, mode='linear', align_corners=True).squeeze(1)  # (batch_size, n)
+    return out
+def glissando(f1, f2, n, mode='linear'):
+    if mode == 'linear':
+        return linear(f1, f2, n)
+    else:
+        raise NotImplementedError(mode)
+def vibrato(f0, k, mf=[3,5], ma=0.05, ma_in_hz=False):
+    ''' f0 (batch_size, n)
+        k  (int): 1/sr
+        mf (list): modulation frequency ([min, max])
+        ma (float): modulation amplitude (in Hz)
+        ma_in_hz (bool): ma is given in Hz (else: ma is given as a weighting factor of f0)
+    '''
+    ff = f0.narrow(-1,0,1)
+    def get_new_vibrato(f0, k, mf, ma, ma_in_hz):
+        mod_frq = mf[1] * torch.rand_like(ff) + mf[0] # (B, 1)
+        mod_amp = ma * torch.rand_like(ff) # (B, 1)
+        nt = f0.size(-1)  # total time
+        vt = torch.floor((nt // 2) * torch.rand(f0.size(0)).view(-1,1))  # vibrato time
+        t = torch.ones_like(f0).cumsum(-1)
+        m = t.gt(vt) # mask `t` for n <= vt
+        vibra = m * mod_amp * (1 - torch.cos(2 * np.pi * mod_frq * (t - vt) * k)) / 2
+        if not ma_in_hz: vibra *= f0
+        return vibra * torch.randn_like(ff).sign()
+    return f0 + get_new_vibrato(f0, k, mf, ma, ma_in_hz)
+def triangle_with_velocity(vel, n, sr_t, sr_x, max_u=.1):
+    ''' vel    (batch_size,) velocity
+        n      (int) number of samples
+        sr_t   (int) sampling rate in time
+        sr_x   (int) sampling rate in space
+        max_u  (float) maximum displacement
+    '''
+    vel = vel.view(-1,1) * sr_x / sr_t    # m/s to non-dimensional quantity
+    vel = vel * torch.ones_like(vel).repeat(1,n)
+    u_H = torch.relu(max_u - (max_u - vel.cumsum(1)).abs() - vel)
+    u_H = u_H.pow(5).clamp(max=0.01)
+    return u_H

src/utils/ddsp.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import torch
+import torch.nn as nn
+import torch.fft as fft
+import numpy as np
+import librosa as li
+import math
+def safe_log(x):
+    return torch.log(x + 1e-7)
+@torch.no_grad()
+def mean_std_loudness(dataset):
+    mean = 0
+    std = 0
+    n = 0
+    for _, _, l in dataset:
+        n += 1
+        mean += (l.mean().item() - mean) / n
+        std += (l.std().item() - std) / n
+    return mean, std
+def multiscale_fft(signal, scales, overlap):
+    stfts = []
+    for s in scales:
+        S = torch.stft(
+            signal,
+            s,
+            int(s * (1 - overlap)),
+            s,
+            torch.hann_window(s).to(signal),
+            True,
+            normalized=True,
+            return_complex=True,
+        ).abs()
+        stfts.append(S)
+    return stfts
+def resample(x, factor: int):
+    batch, frame, channel = x.shape
+    x = x.permute(0, 2, 1).reshape(batch * channel, 1, frame)
+    window = torch.hann_window(
+        factor * 2,
+        dtype=x.dtype,
+        device=x.device,
+    ).reshape(1, 1, -1)
+    y = torch.zeros(x.shape[0], x.shape[1], factor * x.shape[2]).to(x)
+    y[..., ::factor] = x
+    y[..., -1:] = x[..., -1:]
+    y = torch.nn.functional.pad(y, [factor, factor])
+    y = torch.nn.functional.conv1d(y, window)[..., :-1]
+    y = y.reshape(batch, channel, factor * frame).permute(0, 2, 1)
+    return y
+def upsample(signal, factor):
+    signal = signal.permute(0,2,1)
+    signal = nn.functional.interpolate(signal, size=signal.shape[-1] * factor, mode='linear')
+    return signal.permute(0,2,1)
+def remove_above_nyquist(amplitudes, pitch, sampling_rate):
+    ''' amplitudes: (batch, frames, n_harmoincs)
+        pitch: (batch, frames, 1)
+    '''
+    n_harm = amplitudes.shape[-1]
+    pitches = pitch.repeat(1,1,n_harm).cumsum(-1)
+    aa = (pitches < sampling_rate / 2).float() + 1e-4
+    return amplitudes * aa
+def remove_above_nyquist_mode(amplitudes, frequencies, sampling_rate):
+    ''' amplitudes: (batch, frames, n_harmoincs)
+        frequencies: (batch, frames, n_harmonics)
+    '''
+    aa = (frequencies < sampling_rate / 2).float() + 1e-4
+    return amplitudes * aa
+def scale_function(x):
+    ''' 0 ~ 2'''
+    return 2 * torch.sigmoid(x)**(math.log(10)) + 1e-7
+def extract_loudness(signal, sampling_rate, block_size, n_fft=2048):
+    S = li.stft(
+        signal,
+        n_fft=n_fft,
+        hop_length=block_size,
+        win_length=n_fft,
+        center=True,
+    )
+    S = np.log(abs(S) + 1e-7)
+    f = li.fft_frequencies(sampling_rate, n_fft)
+    a_weight = li.A_weighting(f)
+    S = S + a_weight.reshape(-1, 1)
+    S = np.mean(S, 0)[..., :-1]
+    return S
+def extract_pitch(signal, sampling_rate, block_size):
+    length = signal.shape[-1] // block_size
+    f0 = crepe.predict(
+        signal,
+        sampling_rate,
+        step_size=int(1000 * block_size / sampling_rate),
+        verbose=1,
+        center=True,
+        viterbi=True,
+    )
+    f0 = f0[1].reshape(-1)[:-1]
+    if f0.shape[-1] != length:
+        f0 = np.interp(
+            np.linspace(0, 1, length, endpoint=False),
+            np.linspace(0, 1, f0.shape[-1], endpoint=False),
+            f0,
+        )
+    return f0
+def harmonic_synth(pitch, amplitudes, sampling_rate):
+    n_harmonic = amplitudes.shape[-1]
+    omega = torch.cumsum(2 * math.pi * pitch / sampling_rate, 1)
+    omegas = omega * torch.arange(1, n_harmonic + 1).to(omega)
+    signal = (torch.sin(omegas) * amplitudes).sum(-1, keepdim=True)
+    return signal
+def modal_synth(modes, amplitude, sampling_rate, n_chunks=16):
+    freqs = modes.chunk(n_chunks, 1)
+    coefs = amplitude.chunk(n_chunks, 1)
+    lastf = torch.zeros_like(freqs[0])
+    sols = []
+    for f, c in zip(freqs, coefs):
+        fcs = f.cumsum(1) + lastf
+        sol = (torch.cos(fcs) * c).sum(-1, keepdim=True)
+        lastf = fcs.narrow(1,-1,1)
+        sols.append(sol)
+    return torch.cat(sols, 1)
+def amp_to_impulse_response(amp, target_size):
+    amp = torch.stack([amp, torch.zeros_like(amp)], -1)
+    amp = torch.view_as_complex(amp)
+    amp = fft.irfft(amp)
+    filter_size = amp.shape[-1]
+    amp = torch.roll(amp, filter_size // 2, -1)
+    win = torch.hann_window(filter_size, dtype=amp.dtype, device=amp.device)
+    amp = amp * win
+    amp = nn.functional.pad(amp, (0, int(target_size) - int(filter_size)))
+    amp = torch.roll(amp, -filter_size // 2, -1)
+    return amp
+def fft_convolve(signal, kernel):
+    signal = nn.functional.pad(signal, (0, signal.shape[-1]))
+    kernel = nn.functional.pad(kernel, (kernel.shape[-1], 0))
+    output = fft.irfft(fft.rfft(signal) * fft.rfft(kernel))
+    output = output[..., output.shape[-1] // 2:]
+    return output

src/utils/misc.py ADDED Viewed

	@@ -0,0 +1,336 @@

+import os
+import yaml
+import torch
+import numpy as np
+import torch.nn.functional as F
+from scipy.interpolate import RectBivariateSpline
+from contextlib import contextmanager,redirect_stderr,redirect_stdout
+from os import devnull
+chars = [c for c in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']
+@contextmanager
+def suppress_stdout_stderr():
+    """A context manager that redirects stdout and stderr to devnull"""
+    with open(devnull, 'w') as fnull:
+        with redirect_stderr(fnull) as err, redirect_stdout(fnull) as out:
+            yield (err, out)
+def batchify(x, batch_size, n_samples):
+    pass
+def random_str(length=8):
+    return "".join(np.random.choice(chars, length))
+def sqrt(x):
+    return x.pow(.5) if isinstance(x, torch.Tensor) else x**.5
+def soft_bow(v_rel, a=100):
+    return np.sqrt(2*a) * v_rel * torch.exp(-a * v_rel**2 + 0.5)
+def hard_bow(v_rel, a=5, eps=0.1, hard_sign=True):
+    sign = torch.sign(v_rel) if hard_sign else torch.tanh(100 * v_rel)
+    return sign * (eps + (1-eps) * torch.exp(-a * v_rel.abs()))
+def raised_cosine(N, h, ctr, wid, n):
+    ''' N      (int): number of maximal samples in space
+        h    (float): spatial grid cell width
+        ctr  (B,1,1): center points for each batch
+        wid  (B,1,1): width lengths for each batch
+        n       (B,): number of actual samples in space
+    '''
+    xax = torch.linspace(h, 1, N).to(ctr.device).view(1,-1,1)   # (1, N, 1)
+    ctr = (ctr * n / N)
+    wid = wid / N
+    ind = torch.sign(torch.relu(-(xax - ctr - wid / 2) * (xax - ctr + wid / 2)))
+    out = 0.5 * ind * (1 + torch.cos(2 * np.pi * (xax - ctr) / wid))
+    return out / out.abs().sum(1, keepdim=True)  # (batch_Size, N, 1)
+def floor_dirac_delta(n, ctr, N):
+    ''' torch::Tensor n,        // number of samples in space
+        torch::Tensor ctr,      // center point of raised cosine curve
+        int N
+    '''
+    xax = torch.ones_like(ctr).view(-1,1,1).repeat(1,N,1).cumsum(1) - 1
+    idx = torch.floor(ctr * n).view(-1,1,1)
+    #return torch.floor(xax).eq(idx).to(n.dtype())  # (batch_size, N, 1)
+    return torch.floor(xax).eq(idx)  # (batch_size, N, 1)
+def triangular(N, n, p_x, p_a):
+    ''' N    (int): number of maximal samples in space
+        n    (B,  1, 1): number of actual samples in space
+        p_x  (B, Nt, 1): peak position
+        p_a  (B, Nt, 1): peak amplitude
+    '''
+    vel_l = torch.where(p_x.le(0), torch.zeros_like(p_x), p_a / p_x / n)
+    vel_r = torch.where(p_x.le(0), torch.zeros_like(p_x), p_a / (1-p_x) / n)
+    vel_l = ((vel_l * torch.ones_like(vel_l).repeat(1,1,N)).cumsum(2) - vel_l).clamp(min=0)
+    vel_r = ((vel_r * torch.ones_like(vel_r).repeat(1,1,N)).cumsum(2) - vel_r * (N-n+1)).clamp(min=0).flip(2)
+    tri = torch.minimum(vel_l, vel_r)
+    assert not torch.isnan(tri).any(), torch.isnan(tri.flatten(1).sum(1))
+    return tri
+def pre_shaper(x, sr, velocity=10):
+    w = torch.tanh(torch.ones_like(x).cumsum(-1) / sr * velocity)
+    return w * x
+def post_shaper(x, sr, pulloff, velocity=100):
+    offset = x.size(-1) - int(sr * pulloff)
+    w = torch.tanh(torch.ones_like(x).cumsum(-1) / sr * velocity).flip(-1)
+    w = F.pad(w.narrow(-1,offset,w.size(-1)-offset), (0,offset))
+    return w * x
+def random_uniform(floor, ceiling, size=None, weight=None, dtype=None):
+    if not isinstance(size, tuple): size = (size,)
+    if weight is None: weight = torch.ones(size, dtype=dtype)
+    # NOTE: torch.rand(..., dtype=dtype) for dtype \in [torch.float32, torch.float64]
+    #       can result in different random number generation
+    #       (for different precisions; despite fixiing the random seed.)
+    return (ceiling - floor) * torch.rand(size=size).to(dtype) * weight + floor
+def equidistant(floor, ceiling, steps, dtype=None):
+    return torch.linspace(floor, ceiling, steps).to(dtype)
+def get_masks(model_name, bs, disjoint=True):
+    ''' setting `disjoint=False` enables multiple excitations allowed
+        (e.g., bowing over hammered strings.) While this could be a
+        charming choice, but it can also drive the simulation unstable.
+    '''
+    # boolean mask that determines whether to impose each excitation
+    if model_name.endswith('bow'):
+        bow_mask    = torch.ones( size=(bs,)).view(-1,1,1)
+        hammer_mask = torch.zeros(size=(bs,)).view(-1,1,1)
+    elif model_name.endswith('hammer'):
+        bow_mask    = torch.zeros(size=(bs,)).view(-1,1,1)
+        hammer_mask = torch.ones( size=(bs,)).view(-1,1,1)
+    elif model_name.endswith('pluck'):
+        bow_mask    = torch.zeros(size=(bs,)).view(-1,1,1)
+        hammer_mask = torch.zeros(size=(bs,)).view(-1,1,1)
+    else:
+        bow_mask    = torch.rand(size=(bs,)).gt(0.5).view(-1,1,1)
+        hammer_mask = torch.rand(size=(bs,)).gt(0.5).view(-1,1,1)
+        if disjoint:
+            both_are_true = torch.logical_and(
+                torch.logical_or(bow_mask, hammer_mask),
+                torch.logical_or(bow_mask, hammer_mask.logical_not())
+            )
+            hammer_mask[both_are_true] = False
+        bow_mask    = bow_mask.view(-1,1,1)
+        hammer_mask = hammer_mask.view(-1,1,1)
+    return [bow_mask, hammer_mask]
+def f0_interpolate(f0_1, n_frames, tmax):
+    t_0 = np.linspace(0, tmax, n_frames)
+    t_1 = np.linspace(0, tmax, f0_1.shape[0])
+    return np.interp(t_0, t_1, f0_1)
+def interpolate1d(u, xaxis, xvals, k=5):
+    ''' u: (1, Nx)
+        xaxis: (1, Nx_input)
+        xvals: (1, Nx_output)
+        -> (1, Nx_output)
+    '''
+    t = np.arange(k)[:,None] / k
+    rbs = RectBivariateSpline(t, xaxis, u.repeat(k,0), kx=1, ky=k)
+    return rbs(t, xvals, grid=True)[k//2][None,:]
+def interpolate(u, taxis, xaxis, xvals, kx=5, ky=5):
+    ''' u: (Nt, Nx)
+        taxis: (Nt, 1)
+        xaxis: (1, Nx_input)
+        xvals: (1, Nx_output)
+        -> (Nt, Nx_output)
+    '''
+    rbs = RectBivariateSpline(taxis, xaxis, u, kx=kx, ky=ky)
+    return rbs(taxis, xvals, grid=True)
+def torch_interpolate(x, scale_factor):
+    y = F.interpolate(x, scale_factor=scale_factor)
+    res = x.size(-1) - y.size(-1)
+    if res % 2 == 0: y = F.pad(y, (res//2, res//2))
+    else: y = F.pad(y, (res//2, res//2+1))
+    return y
+def minmax_normalize(x, dim=-1):
+    x_min = x.min(dim, keepdim=True).values
+    x = x - x_min
+    x_max = x.max(dim, keepdim=True).values
+    x = x / x_max
+    return x
+def get_minmax(x):
+    if np.isnan(x.sum()):
+        return None, None
+    return np.nan_to_num(x.min()), np.nan_to_num(x.max())
+def select_with_batched_index(input, dim, index):
+    ''' input: (bs, ..., n, ...)
+        dim  : (int)
+        index: (bs, ..., 1, ...) index to select on dim `dim`
+     -> out  : (bs, ..., 1, ...) for each batch, select `index`-th element on dim `dim`
+    '''
+    assert input.size(0) == index.size(0), [input.shpae, index.shape]
+    bs = input.size(0)
+    ins = input.chunk(bs, 0)
+    idx = index.chunk(bs, 0)
+    out = []
+    for b in range(bs):
+        out.append(batched_index_select(ins[b], dim, idx[b]))
+    return torch.cat(out, dim=0)
+def batched_index_select(input, dim, index):
+    ''' input: (..., n, ...)
+        dim  : (int)
+        index: (..., k, ...) index to select on dim `dim`
+     -> out  : (..., k, ...) select k out of n elements on dim `dim`
+    '''
+    Nx = len(list(input.shape))
+    expanse = [-1 if k==(dim % Nx) else 1 for k in range(Nx)]
+    tiler = [1 if k==(dim % Nx) else n for k, n in enumerate(input.shape)]
+    index = index.to(torch.int64).view(expanse).tile(tiler)
+    return torch.gather(input, dim, index)
+def random_index(max_N, idx_N):
+    if max_N < idx_N:
+        # choosing with replacement
+        return torch.randint(0, max_N, (idx_N,))
+    else:
+        # choosing without replacement
+        return torch.randperm(max_N)[:idx_N]
+def ell_infty_normalize(x, normalize_dims=1):
+    eps = torch.finfo(x.dtype).eps
+    x_shape = list(x.shape)
+    m_shape = x_shape[:normalize_dims] + [1] * (len(x_shape) - normalize_dims)
+    x_max = x.abs().flatten(normalize_dims).max(normalize_dims).values + eps
+    x_gain =  1. / x_max.view(m_shape)
+    return x * x_gain, x_gain
+def sinusoidal_embedding(x, n, gain=10000, dim=-1):
+    ''' let `x` be normalized to be in the nondimensional (0 ~ 1) range '''
+    assert n % 2 == 0, n
+    x = x.unsqueeze(-1)
+    shape = [1] * len(list(x.shape)); shape[dim] = -1 # e.g., [1,1,-1]
+    half_n = n // 2
+    expnt = torch.arange(half_n, device=x.device, dtype=x.dtype).view(shape)
+    _embed = torch.exp(expnt * -(np.log(gain) / (half_n - 1)))
+    _embed = torch.exp(expnt * -(np.log(gain) / (half_n - 1)))
+    _embed = x * _embed
+    emb = torch.cat((torch.sin(_embed), torch.cos(_embed)), dim)
+    return emb # list(x.shape) + [n]
+def fourier_feature(x, B):
+    ''' x: (Bs, ..., in_dim)
+        B: (in_dim, out_dim)
+    '''
+    if B is None:
+        return x
+    else:
+        x_proj = (2.*np.pi*x) @ B
+        return torch.cat((torch.sin(x_proj), torch.cos(x_proj)), dim=-1)
+def save_simulation_data(directory, excitation_type, overall_results, constants):
+    os.makedirs(directory, exist_ok=True)
+    string_params   = overall_results.pop('string_params')
+    hammer_params   = overall_results.pop('hammer_params')
+    bow_params      = overall_results.pop('bow_params')
+    simulation_dict = overall_results
+    string_dict = {
+        'kappa': string_params[0],
+        'alpha': string_params[1],
+        'u0'   : string_params[2],
+        'v0'   : string_params[3],
+        'f0'   : string_params[4],
+        'pos'  : string_params[5],
+        'T60'  : string_params[6],
+        'target_f0': string_params[7],
+    }
+    hammer_dict = {
+        'x_H'  : hammer_params[0],
+        'v_H'  : hammer_params[1],
+        'u_H'  : hammer_params[2],
+        'w_H'  : hammer_params[3],
+        'M_r'  : hammer_params[4],
+        'alpha': hammer_params[5],
+    }
+    bow_dict = {
+        'x_B'  : bow_params[0],
+        'v_B'  : bow_params[1],
+        'F_B'  : bow_params[2],
+        'phi_0': bow_params[3],
+        'phi_1': bow_params[4],
+        'wid_B': bow_params[5],
+    }
+    def sample(val):
+        try:
+            _val = val.item(0)
+        except AttributeError as err:
+            if isinstance(val, float) or isinstance(val, int):
+                _val = val
+            else:
+                raise err
+        return _val
+    short_configuration = {
+        'excitation_type': excitation_type,
+        'theta_t' : constants[1],
+        'lambda_c': constants[2],
+    }
+    short_configuration['value-string'] = {}
+    for key, val in string_dict.items():
+        short_configuration['value-string'].update({ key : sample(val) })
+    short_configuration['value-hammer'] = {}
+    for key, val in hammer_dict.items():
+        short_configuration['value-hammer'].update({ key : sample(val) })
+    short_configuration['value-bow'] = {}
+    for key, val in bow_dict.items():
+        short_configuration['value-bow'].update({ key : sample(val) })
+    np.savez_compressed(f'{directory}/simulation.npz', **simulation_dict)
+    np.savez_compressed(f'{directory}/string_params.npz', **string_dict)
+    np.savez_compressed(f'{directory}/hammer_params.npz', **hammer_dict)
+    np.savez_compressed(f'{directory}/bow_params.npz',    **bow_dict)
+    with open(f"{directory}/simulation_config.yaml", 'w') as f:
+        yaml.dump(short_configuration, f, default_flow_style=False)
+def add_noise(x, c, vals, eps=1e-5):
+    noise = eps * torch.randn_like(x)
+    for val in vals:
+        mask = torch.where(c == val, torch.ones_like(c), torch.zeros_like(c))
+        x = x + mask * noise
+    return x
+def downsample(x, factor=None, size=None):
+    ''' x: (Bs, Nt) -> (Bs, Nt // factor)
+    '''
+    if size is None:
+        size = x.size(1) // factor + bool(x.size(1) % factor)
+    else:
+        assert factor is None, [factor, size]
+    return F.interpolate(x.unsqueeze(1), size=size, mode='linear').squeeze(1)
+if  __name__=='__main__':
+    N = 10
+    B = 1
+    h = 1 / N
+    ctr = 0.5 * torch.ones(B).view(-1,1,1)
+    wid = 1 * torch.ones(B).view(-1,1,1)
+    n = N * torch.ones(B)
+    ''' N      (int): number of maximal samples in space
+        h    (float): spatial grid cell width
+        ctr  (B,1,1): center points for each batch
+        wid  (B,1,1): width lengths for each batch
+        n       (B,): number of actual samples in space
+    '''
+    c = raised_cosine(N, h, ctr, wid, n)
+    print(c.shape)
+    import matplotlib.pyplot as plt
+    plt.figure()
+    plt.plot(c[0,:,0])
+    plt.savefig('asdf.png')

src/utils/plot.py ADDED Viewed

	@@ -0,0 +1,1132 @@

+import os
+import shutil
+import subprocess
+import torch
+import torch.nn.functional as F
+import numpy as np
+import librosa
+import matplotlib.pyplot as plt
+import scipy
+from src.utils.control import *
+from src.utils.misc import soft_bow, hard_bow, sinusoidal_embedding
+from src.utils.audio import rms_normalize
+import soundfile as sf
+plt.rc('text', usetex=True)
+plt.rc('font', family='serif')
+def gt_param(TF=5, sr=44100):
+    sr = 44100
+    NF = int(sr * TF)
+    k = 1 / sr
+    TRANS = int(0.05 * sr)
+    x_bow = torch.linspace(0.25, 0.45, NF)
+    v_bow = 0.1 * torch.tanh(torch.linspace(0., 10, NF))
+    F_bow = torch.cat((
+        torch.linspace(100, 120, NF//8 - TRANS), torch.zeros(TRANS),
+        100 * torch.ones(NF//8 - TRANS), torch.zeros(TRANS),
+        100 * torch.ones(NF//8 - TRANS), torch.zeros(TRANS),
+        torch.linspace(100, 80, NF//8 - TRANS), torch.zeros(TRANS),
+        80 * torch.ones(NF//4),
+        torch.zeros(NF//4),
+    ), dim=-1)
+    f0 = torch.cat((
+        glissando(98,110, NF//8),
+        constant(130.81, NF//8),
+        glissando(146.83, 164.81, NF//8),
+        constant(207.65, NF//8),
+        vibrato(207.65, NF//4, k, 5, 10),
+        constant(207.65, NF//4),
+    ), dim=-1)
+    F_bow = F.pad(F_bow, (NF-F_bow.size(-1),0))
+    f0 = F.pad(f0, (NF-f0.size(-1),0))
+    #wid = torch.linspace(0.05, 0.05, NF)
+    #rp = np.array([0.3, 0.7])
+    #T60 = np.array([[100, 8], [2000, 5]])
+    return [x_bow, v_bow, F_bow, f0]
+def param(est_param, gt_param, save_path):
+    e_x_bow, e_v_bow, e_F_bow, e_f0 = [item.detach().cpu().numpy() for item in est_param[:4]]
+    g_x_bow, g_v_bow, g_F_bow, g_f0 = [item.cpu().numpy() for item in gt_param]
+    fig, ax = plt.subplots(figsize=(7,7), nrows=4, ncols=1)
+    ax[0].plot(g_x_bow, 'b:')
+    ax[0].plot(e_x_bow, 'k-')
+    ax[0].axhline(y=0, c='k', lw=.5)
+    ax[0].set_ylabel('bow pos')
+    ax[1].plot(g_v_bow, 'b:')
+    ax[1].plot(e_v_bow, 'k-')
+    ax[1].axhline(y=0, c='k', lw=.5)
+    ax[1].set_ylabel('bow vel')
+    ax[2].plot(g_F_bow, 'b:')
+    ax[2].plot(e_F_bow, 'k-')
+    ax[2].axhline(y=0, c='k', lw=.5)
+    ax[2].set_ylabel('bow force')
+    ax[3].plot(g_f0, 'b:')
+    ax[3].plot(e_f0, 'k-')
+    ax[3].axhline(y=0, c='k', lw=.5)
+    ax[3].set_ylabel('f0')
+    plt.tight_layout()
+    plt.savefig(save_path)
+    plt.clf()
+    plt.close()
+def simulation_data(
+        save_dir,
+        uout, zout, v_r_out, F_H_out, u_H_out,
+        state_u, state_z,
+        string_params, bow_params, hammer_params,
+        **kwargs,
+    ):
+    N = min(1000, uout.shape[0])
+    kappa, alpha, u0, v0, f0, pos, T60, target_f0 = string_params
+    x_b, v_b, F_b, phi_0, phi_1, wid_b = bow_params
+    x_H, v_H, u_H, w_H, M_r, alpha_H = hammer_params
+    max_disp = np.max(np.abs(uout[:N]))
+    rels = torch.linspace(-1,1,100)
+    prof = hard_bow(rels, phi_0, phi_1)
+    # plot string params
+    fig, ax = plt.subplots(figsize=(7,7), nrows=5, ncols=1)
+    ax[0].plot(f0, 'k-')
+    ax[0].axhline(y=0, c='k', lw=.5)
+    ax[0].set_ylabel('f0')
+    ax[0].yaxis.tick_right()
+    ax[0].set_ylim([0, 500])
+    ax[1].plot(np.linspace(0,1,state_u.shape[-1]), state_u[-1], 'k-')
+    ax[1].axvline(x=pos, c='r', lw=.5); ax[1].axvline(x=x_b[-1], c='b', lw=.5)
+    ax[1].set_ylabel('transverse state')
+    ax[1].yaxis.tick_right()
+    #ax[1].set_ylim([-max_disp, max_disp])
+    ax[2].plot(np.linspace(0,1,state_z.shape[-1]), state_z[-1], 'k-')
+    ax[2].axvline(x=pos, c='r', lw=.5); ax[1].axvline(x=x_b[-1], c='b', lw=.5)
+    ax[2].set_ylabel('longitudinal state')
+    ax[2].yaxis.tick_right()
+    #ax[2].set_ylim([-max_disp, max_disp])
+    ax[3].plot(np.arange(N), uout[:N], 'k-')
+    ax[3].axhline(y=0, c='k', lw=.5)
+    ax[3].set_ylabel('output')
+    ax[3].yaxis.tick_right()
+    ax[3].set_ylim([-max_disp, max_disp])
+    ax[4].plot(np.arange(N), zout[:N], 'k-')
+    ax[4].axhline(y=0, c='k', lw=.5)
+    ax[4].set_ylabel('output')
+    ax[4].yaxis.tick_right()
+    #ax[4].set_ylim([-max_disp, max_disp])
+    plt.tight_layout()
+    plt.savefig(f"{save_dir}/string.png")
+    plt.clf()
+    plt.close()
+    # plot bow params
+    fig, ax = plt.subplots(figsize=(7,7), nrows=3, ncols=2)
+    ax[0,0].plot(x_b, 'k-')                ;  ax[0,1].plot(rels.numpy(), prof.numpy(), 'k-')
+    ax[0,0].axhline(y=0, c='k', lw=.5)     ;  ax[0,1].axhline(y=0, c='k', lw=.5)
+    ax[0,0].set_ylabel('bowing position')  ;  ax[0,1].set_ylabel('bow friction fn')
+    ax[0,0].yaxis.tick_right()             ;  ax[0,1].yaxis.tick_right()
+    ax[0,0].set_ylim([0, 1])               ;  ax[0,1].set_ylim([-1.5, 1.5])
+    ax[1,0].plot(v_b, 'k-')                ;  ax[1,1].plot(np.arange(N), v_r_out[:N], 'k-')
+    ax[1,0].axhline(y=0, c='k', lw=.5)     ;  ax[1,1].axhline(y=0, c='k', lw=.5)
+    ax[1,0].set_ylabel('bowing velocity')  ;  ax[1,1].set_ylabel('rel vel (attack)')
+    ax[1,0].yaxis.tick_right()             ;  ax[1,1].yaxis.tick_right()
+    ax[1,0].set_ylim([0, 0.5])             ;  ax[1,1].set_ylim([-2, 2])
+    ax[2,0].plot(F_b, 'k-')                ;  ax[2,1].plot(np.arange(N), v_r_out[-N:], 'k-')
+    ax[2,0].axhline(y=0, c='k', lw=.5)     ;  ax[2,1].axhline(y=0, c='k', lw=.5)
+    ax[2,0].set_ylabel('bowing force')     ;  ax[2,1].set_ylabel('rel vel (release)')
+    ax[2,0].yaxis.tick_right()             ;  ax[2,1].yaxis.tick_right()
+    ax[2,0].set_ylim([0, 100])             ;  ax[2,1].set_ylim([-2, 2])
+    plt.tight_layout()
+    plt.savefig(f"{save_dir}/bow.png")
+    plt.clf()
+    plt.close()
+    sr = 48000
+    Nt = len(v_r_out)
+    Nx = state_u.shape[-1]
+    a_f = (v_r_out[1:] - v_r_out[:Nt-1]) * sr
+    F_f = a_f / Nx
+    mu = F_f / F_b[-(Nt-1):]
+    vr = v_r_out[:Nt-1]
+    rels = torch.linspace(np.min(vr)-.1,np.max(vr)+.1,100)
+    prof = hard_bow(rels, phi_0, phi_1)
+    #prof = soft_bow(rels, phi_0)
+    fig, ax = plt.subplots(figsize=(4,4), nrows=1, ncols=1)
+    #ax.plot(rels.numpy(), prof.numpy(), 'r--')
+    ax.fill_between(rels.numpy(), prof.numpy(), alpha=0.2, facecolor='r')
+    ax.plot(vr, mu, 'k-')
+    ax.axhline(y=0, c='k', lw=.5)
+    ax.set_xlabel('Relative velocity')
+    ax.set_ylabel('Friction coefficient')
+    ax.set_ylim([-1.5, 1.5])
+    plt.tight_layout()
+    plt.savefig(f"{save_dir}/bow-velforce.pdf")
+    plt.clf()
+    plt.close()
+    # plot string params
+    fig, ax = plt.subplots(figsize=(7,7), nrows=2, ncols=1)
+    sr = 48000
+    # ms
+    t_1 = 0; Nt_1 = int(sr * t_1 * 1e-3)
+    #t_2 = 3; Nt_2 = int(sr * t_2 * 1e-3)
+    t_2 = 8; Nt_2 = int(sr * t_2 * 1e-3)
+    time = np.linspace(t_1, t_2, Nt_2 - Nt_1)
+    ax[0].plot(time, u_H_out[Nt_1:Nt_2], 'k-')
+    ax[0].axhline(y=0, c='k', lw=.5)
+    ax[0].set_ylabel('hammer displacement')
+    ax[0].yaxis.tick_right()
+    #ax[0].set_ylim([0, 0.1])
+    ax[1].plot(time, F_H_out[Nt_1:Nt_2], 'k-')
+    ax[1].axhline(y=0, c='k', lw=.5)
+    ax[1].set_ylabel('hammer force')
+    ax[1].yaxis.tick_right()
+    #ax[1].set_ylim([0, 10000])
+    plt.tight_layout()
+    plt.savefig(f"{save_dir}/hammer.png")
+    plt.clf()
+    plt.close()
+def state_specs(save_path, analytic, estimate, simulate):
+    tf = 100
+    Nt, Nx = simulate.shape
+    nt = Nt // tf
+    nx = Nx // 2
+    diff_ana = analytic - simulate
+    diff_est = estimate - simulate
+    maxval = np.max(np.abs(simulate))
+    maxerr = max(np.max(np.abs(diff_ana)), np.max(np.abs(diff_est)))
+    nrows = 3; ncols = 2
+    fig, ax = plt.subplots(ncols=ncols, nrows=nrows, figsize=(7,7))
+    s_state = librosa.display.specshow(simulate[0::tf].T, cmap='coolwarm', ax=ax[0,0])
+    a_state = librosa.display.specshow(analytic[0::tf].T, cmap='coolwarm', ax=ax[1,0])
+    e_state = librosa.display.specshow(estimate[0::tf].T, cmap='coolwarm', ax=ax[2,0])
+    a_diffs = librosa.display.specshow(diff_ana[0::tf].T, cmap='coolwarm', ax=ax[1,1])
+    e_diffs = librosa.display.specshow(diff_est[0::tf].T, cmap='coolwarm', ax=ax[2,1])
+    ax[0,1].plot(simulate[:nt,nx], c='goldenrod', label='FDTD')
+    ax[0,1].plot(analytic[:nt,nx], c='r', label='Modal')
+    ax[0,1].plot(estimate[:nt,nx], c='g', label='Ours')
+    a_state.set_clim([-maxval, +maxval])
+    e_state.set_clim([-maxval, +maxval])
+    s_state.set_clim([-maxval, +maxval])
+    a_diffs.set_clim([-maxerr, +maxerr])
+    e_diffs.set_clim([-maxerr, +maxerr])
+    titles = ['FDTD', 'Modal', 'Ours']
+    for i, title in enumerate(titles):
+        ax[i,0].set_ylabel(title)
+    for i in range(nrows):
+        for j in range(ncols):
+            ax[i,j].set_xticks([])
+            ax[i,j].set_yticks([])
+    ax[0,1].legend(
+        loc='lower center', bbox_to_anchor=(.95,-0.5),
+        ncol=1, fancybox=True,
+        handlelength=1., handletextpad=0.1, columnspacing=.5, fontsize=7,
+    )
+    fig.tight_layout()
+    fig.subplots_adjust(wspace=0)
+    fig.subplots_adjust(hspace=0)
+    plt.savefig(save_path, bbox_inches='tight')
+    plt.close('all')
+    plt.clf()
+def state_video(save_dir, state_u, sr, framerate=100, trim_front=True, verbose=False, prefix=None, fname='output', maxy=None):
+    if isinstance(state_u, list):
+        state_v = state_u[1]
+        state_u = state_u[0]
+    else:
+        state_v = None
+    if trim_front:
+        state_u = state_u[:int(sr / 55)] # for 55 Hz (A1)
+        state_v = state_v[:int(sr / 55)] if state_v is not None else None
+        downs = int(state_u.shape[0]/framerate)
+    else:
+        downs = 100
+    Nt, Nx = state_u.shape
+    maxy = np.max(np.abs(state_u)) if maxy is None else maxy
+    locs = np.linspace(0, 1, Nx)
+    for j in range(Nt // downs):
+        plt.figure(figsize=(5,2))
+        if state_v is not None:
+            plt.plot(locs, state_v[j * downs], c='k', alpha=0.5)
+        plt.plot(locs, state_u[j * downs], c='k')
+        plt.xlim([0, 1])
+        plt.ylim([-maxy, maxy])
+        plt.xticks([])
+        plt.yticks([])
+        plt.tight_layout()
+        os.makedirs(f'{save_dir}/temp', exist_ok=True)
+        plt.savefig(f'{save_dir}/temp/file%02d.png' % j)
+        plt.clf()
+        plt.close("all")
+    prefix = 'fdtd' if prefix is None else prefix
+    with open(os.devnull, 'w') as devnull:
+        silent_video = ['ffmpeg',
+            '-framerate', f'{framerate}',
+            '-i', f'{save_dir}/temp/file%02d.png',
+            '-r', '30', '-pix_fmt', 'yuv420p', '-y',
+            f'{save_dir}/{prefix}-{fname}-silent_video.mp4']
+        output_video = ['ffmpeg',
+            '-i', f'{save_dir}/{prefix}-{fname}-silent_video.mp4',
+            '-i', f'{save_dir}/{fname}.wav',
+            '-c:v', 'copy', '-map', '0:v', '-map', '1:a',
+            '-shortest', '-y',
+            f'{save_dir}/{prefix}-{fname}.mp4']
+        silent_video +=  ['-loglevel', 'quiet'] if not verbose else []
+        output_video +=  ['-loglevel', 'quiet'] if not verbose else []
+        subprocess.call(silent_video, stdout=devnull)
+        subprocess.call(output_video, stdout=devnull)
+    shutil.rmtree(f"{save_dir}/temp")
+def rainbowgram(
+    save_path, out, sr, n_fft=2**13, hop_length=None,
+    f0_input=None, f0_estimate=None, modes=None, colorbar=True,
+):
+    L = 32
+    if out.shape[-1] > 2*n_fft:
+        hop_length = n_fft // L if hop_length is None else hop_length
+    else:
+        n_fft = out.shape[-1] // 2
+        hop_length = n_fft // L
+    t_max = out.shape[-1] / sr
+    out, gain = rms_normalize(out)
+    D = librosa.stft(out, n_fft=n_fft, hop_length=hop_length, pad_mode='reflect')
+    mag, phase = librosa.magphase(D)
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
+    times = librosa.times_like(D, sr=sr, hop_length=hop_length)
+    phase_exp = 2 * np.pi * np.multiply.outer(freqs, times)
+    unwrapped_phase = np.unwrap((np.angle(phase)-phase_exp) / (L/4), axis=1)
+    unwrapped_phase_diff = np.diff(unwrapped_phase, axis=1, prepend=0)
+    alpha = librosa.amplitude_to_db(mag, ref=np.max) / 80 + 1
+    #width = 2.5; height = 1.9
+    width = 7; height = 7
+    fig, ax = plt.subplots(figsize=(width,height))
+    spec = librosa.display.specshow(
+        unwrapped_phase_diff, cmap='hsv', alpha=alpha,
+        n_fft=n_fft, hop_length=hop_length, sr=sr, ax=ax,
+        y_axis='log', x_axis='time',
+    )
+    ax.set_facecolor('#000')
+    if colorbar:
+        cbar = fig.colorbar(spec, ticks=[-np.pi, -np.pi/2, 0, np.pi/2, np.pi], ax=ax)
+        cbar.ax.set(yticklabels=['$-\pi$', '$-\pi/2$', "$0$", '$\pi/2$', '$\pi$']);
+    def add_plot(freqs, label=None, ls=None, lw=2., dashes=(None,None)):
+        x = np.linspace(1/sr, t_max, freqs.shape[-1])
+        freqs = np.interp(times, x, freqs)
+        line, = ax.plot(times - times[0], freqs, label=label, color='white', lw=lw, ls=ls, dashes=dashes)
+        return line
+    freq_ticks = [0, 128, 512, 2048, 8192, sr // 2]
+    time_ticks = [0, 1, 2]
+    if f0_input is not None:
+        add_plot(f0_input, "f0_input", dashes=(10,5))
+        freq_ticks += [f0_input[0]]
+    if f0_estimate is not None:
+        add_plot(f0_estimate, "f0_estimate", dashes=(2,5))
+        freq_ticks += [] if f0_input is not None else [f0_estimate[0]]
+    if modes is not None:
+        for im, m in enumerate(modes):
+            l = add_plot(m, f"mode {im}")
+            l.set_dashes([5,10,1,10])
+    #ax.set_xticks(time_ticks)
+    #ax.set_yticks(freq_ticks)
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.xaxis.set_visible(False)
+    ax.yaxis.set_visible(False)
+    plt.tight_layout()
+    plt.savefig(save_path, bbox_inches='tight', pad_inches=-1e-6)
+    plt.clf()
+    plt.close("all")
+def phase_diagram(
+        save_path, x, s,
+        xmin, xmax,
+        dxmin, dxmax,
+        ddxmin, ddxmax,
+        sr, tau=1, label='$u$'):
+    dx  = (x[tau:] - x[:-tau]) / (tau / sr)
+    ddx = (x[2*tau:] - 2*x[tau:-tau] + x[:-2*tau]) / (2*tau / sr)
+    if s is not None:
+        if s.shape[0] > x.shape[0]:
+            s = s[:x.shape[0]]
+        dsdt = (s[tau:] - s[:-tau]) / (tau / sr)
+        _dsdt = np.mean(np.abs(dsdt), axis=0)
+        spax = np.arange(len(_dsdt))
+    if s is not None:
+        fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(8,3.5), width_ratios=[4, 1])
+        ax[0,0].axhline(y=0, color='gray', ls='-', lw=0.3)
+        ax[0,0].plot(x, 'k-', lw=0.5)
+        ax[0,0].set_xlim([0,len(x)])
+        ax[0,0].set_ylim([xmin,xmax])
+        ax[0,0].set_xticks([])
+        ax[0,0].set_yticks([])
+        #ax[0,0].set_xlabel('$t$')
+        ax[0,0].set_ylabel(label)
+        ax[0,1].axhline(y=0, color='gray', ls='-', lw=0.3)
+        ax[0,1].axvline(x=0, color='gray', ls='-', lw=0.3)
+        ax[0,1].plot(dx, x[tau:], 'k-', lw=0.5)
+        ax[0,1].set_xlim([dxmin,dxmax])
+        ax[0,1].set_ylim([xmin,xmax])
+        ax[0,1].set_xticks([])
+        ax[0,1].set_yticks([])
+        #ax[0,1].set_xlabel('$d$'+label+'$/dt$')
+        #state = librosa.display.specshow(s.T, cmap='coolwarm', ax=ax[1,0])
+        state = librosa.display.specshow(dsdt.T, cmap='coolwarm', ax=ax[1,0])
+        maxabs = np.max(np.abs(dsdt))
+        state.set_clim([-maxabs, +maxabs])
+        ax[1,0].set_xlim([0,x.shape[0]])
+        ax[1,0].set_xlabel('$t$')
+        ax[1,0].set_ylabel('$x$')
+        _dsdt = np.pad( _dsdt, (1,1))
+        _spax = np.pad(  spax, (1,1), mode='edge')
+        ax[1,1].fill_between(+ _dsdt, _spax, alpha=0.2, facecolor='k')
+        ax[1,1].fill_between(- _dsdt, _spax, alpha=0.2, facecolor='k')
+        ax[1,1].axvline(x=0, color='k', ls='-', lw=1.0)
+        ax[1,1].set_ylim([spax[0], spax[-1]])
+        ax[1,1].set_xticks([])
+        ax[1,1].set_yticks([])
+        ax[1,1].set_xlabel('$d$'+label+'$/dt$')
+    else:
+        fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,2), width_ratios=[4, 1])
+        ax[0].axhline(y=0, color='gray', ls='-', lw=0.3)
+        ax[0].plot(x, 'k-', lw=0.5)
+        ax[0].set_xlim([0,len(x)])
+        ax[0].set_ylim([xmin,xmax])
+        ax[0].set_xticks([])
+        ax[0].set_yticks([])
+        ax[0].set_xlabel('$t$')
+        ax[0].set_ylabel(label)
+        ax[1].axhline(y=0, color='gray', ls='-', lw=0.3)
+        ax[1].axvline(x=0, color='gray', ls='-', lw=0.3)
+        ax[1].plot(dx, x[tau:], 'k-', lw=0.5)
+        ax[1].set_xlim([dxmin,dxmax])
+        ax[1].set_ylim([xmin,xmax])
+        ax[1].set_xticks([])
+        ax[1].set_yticks([])
+        ax[1].set_xlabel('$d$'+label+'$/dt$')
+    plt.tight_layout()
+    plt.subplots_adjust(wspace=0.)
+    plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path, bbox_inches='tight', transparent=True)
+    plt.clf()
+    plt.close("all")
+    #fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(3.5,2))
+    #ax[0].axvline(x=0, color='gray', ls='-', lw=0.3)
+    #ax[0].axhline(y=0, color='gray', ls='-', lw=0.3)
+    #ax[0].plot(x[2*tau:], ddx, 'k-', lw=0.5)
+    #ax[0].set_xlim([xmin,xmax])
+    #ax[0].set_ylim([ddxmin,ddxmax])
+    #ax[0].set_xticks([])
+    #ax[0].set_yticks([])
+    #ax[0].set_xlabel(label)
+    #ax[0].set_ylabel('$d^2$'+label+'$/dt^2$')
+    #ax[1].axvline(x=0, color='gray', ls='-', lw=0.3)
+    #ax[1].axhline(y=0, color='gray', ls='-', lw=0.3)
+    #ax[1].plot(dx[tau:], ddx, 'k-', lw=0.5)
+    #ax[1].set_xlim([dxmin, dxmax])
+    #ax[1].set_ylim([ddxmin,ddxmax])
+    #ax[1].set_xticks([])
+    #ax[1].set_yticks([])
+    #ax[1].set_xlabel('$d$'+label+'$/dt$')
+    #plt.tight_layout()
+    #plt.subplots_adjust(wspace=0.)
+    #plt.subplots_adjust(hspace=0.)
+    #save_dir = save_path.split('/')[:-1]
+    #save_name = save_path.split('/')[-1]
+    #save_path_2 = '/'.join(save_dir+[save_name.replace('phs', 'dphs')])
+    #plt.savefig(save_path_2, bbox_inches='tight', transparent=True)
+    #plt.clf()
+    #plt.close("all")
+def xt_grid_embedding(save_path, x, t, embed_dim=32, t_gain=1e-6, x_gain=1e-2):
+    t = t * 1000
+    Bs,  _, Nx = x.shape
+    Bs, Nt,  _ = t.shape
+    t_embd = sinusoidal_embedding(t.unsqueeze(-1), n=embed_dim, gain=t_gain) # (Bs, 1,Nx,1,embed_dim)
+    x_embd = sinusoidal_embedding(x.unsqueeze(-1), n=embed_dim, gain=x_gain) # (Bs,Nt, 1,1,embed_dim)
+    t_axis = t.squeeze().detach().cpu().numpy()
+    x_axis = x.squeeze().detach().cpu().numpy()
+    t_embd = t_embd.squeeze().detach().cpu().numpy()
+    x_embd = x_embd.squeeze().detach().cpu().numpy()
+    assert len(list(t_embd.shape)) == 2, t_embd.shape
+    assert len(list(x_embd.shape)) == 2, x_embd.shape
+    e = np.arange(embed_dim)
+    fig, ax = plt.subplots(figsize=(13,7), nrows=1, ncols=2)
+    librosa.display.specshow(t_embd, ax=ax[0], x_coords=e, y_coords=t_axis)
+    librosa.display.specshow(x_embd, ax=ax[1], x_coords=e, y_coords=x_axis)
+    ax[0].set_title("t embed")
+    ax[0].set_xlabel("embedding dim")
+    ax[0].set_ylabel("time")
+    ax[0].set_yticks(t_axis[0::10])
+    ax[1].set_title("x embed")
+    ax[1].set_xlabel("embedding dim")
+    ax[1].set_ylabel("space")
+    ax[1].set_yticks(x_axis[0::10])
+    ax[1].yaxis.set_label_position("right")
+    ax[1].yaxis.tick_right()
+    plt.tight_layout()
+    plt.subplots_adjust(wspace=0.)
+    plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path)
+    plt.clf()
+    plt.close("all")
+def logedc(save_path, logedc, tmax):
+    time = np.linspace(0, tmax, logedc.shape[0])
+    fig, ax = plt.subplots(figsize=(3,3))
+    ax.plot(time, logedc)
+    ax.set_xlabel("Time (s)")
+    ax.set_ylabel("Energy (dB)")
+    plt.tight_layout()
+    plt.savefig(save_path)
+    plt.clf()
+    plt.close("all")
+def f0curve(save_path, f0_input, f0_estimate, first_mode, tmax):
+    time = np.linspace(0, tmax, len(f0_estimate))
+    fig, ax = plt.subplots(figsize=(3,3))
+    ax.plot(time, f0_input, label='$f_0$')
+    ax.plot(time, f0_estimate, label='$f_0^{(\\tt est)}$')
+    ax.plot(time, first_mode, label='$\hat{f_0}$')
+    ax.set_xlabel("Time (s)")
+    ax.set_ylabel("Frequency (Hz)")
+    ax.set_ylim(0, 200)
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(save_path)
+    plt.clf()
+    plt.close("all")
+def spectrum(save_path, out, f0_input, f0_estimate, modes, sr, n_fft=2**14, ylabel=None):
+    t_max = out.shape[-1] / sr
+    n_fft = min(n_fft, out.shape[-1])
+    cr = int(f0_estimate.shape[-1] / t_max)   # crepe framerate
+    simulated = out[-n_fft:]
+    f0_input    =    f0_input[-1]
+    f0_estimate = f0_estimate[-1]
+    modes = [m[-1] for m in modes]
+    simulated_fr = 20 * np.log10(np.abs(np.fft.rfft(simulated, n_fft)))
+    freqs = np.linspace(0, sr/2 / 1000, int(n_fft/2+1))
+    n_freqs = 1024
+    fig, ax = plt.subplots(figsize=(4,2))
+    lw = 0.7
+    ax.plot(freqs[:n_freqs], simulated_fr[:n_freqs], 'k', lw=1.)
+    ax.axvline(x=f0_input / 1000, c='r', ls='-', lw=lw, label='$f_0$')
+    ax.axvline(x=f0_estimate / 1000, c='g', ls='--', lw=lw, label='$f_0^{(\\tt est)}$')
+    for i, m in enumerate(modes):
+        if i == 0:
+            ax.axvline(x=m / 1000, c='b', ls='-.', lw=lw, label='$\hat{f_p}$')
+        else:
+            ax.axvline(x=m / 1000, c='b', ls='-.', lw=lw)
+    ax.set_xticks([0, 0.5, 1, 1.5, 2])
+    plt.xlim([0, 2])
+    plt.xlabel('Frequency (kHz)')
+    plt.ylabel(ylabel)
+    plt.legend(ncol=3, fancybox=True)
+    plt.tight_layout()
+    plt.savefig(save_path, bbox_inches='tight')
+    plt.clf()
+    plt.close("all")
+def spectrum_uz(save_path, uout, zout, f0_input, f0_estimate, modes, sr, n_fft=2**14):
+    t_max = uout.shape[-1] / sr
+    n_fft = min(n_fft, uout.shape[-1])
+    cr = int(f0_estimate.shape[-1] / t_max)   # crepe framerate
+    simulated_u = uout[-n_fft:]
+    simulated_z = zout[-n_fft:]
+    f0_input    =    f0_input[-1]
+    f0_estimate = f0_estimate[-1]
+    modes = [m[-1] for m in modes]
+    simulated_fr_u = 20 * np.log10(np.abs(np.fft.rfft(simulated_u, n_fft)))
+    simulated_fr_z = 20 * np.log10(np.abs(np.fft.rfft(simulated_z, n_fft)))
+    freqs = np.linspace(0, sr/2 / 1000, int(n_fft/2+1))
+    n_freqs = 1024
+    fig, ax = plt.subplots(figsize=(2.5,2), ncols=1, nrows=2)
+    #fig, ax = plt.subplots(figsize=(4,2), ncols=1, nrows=2)
+    lw = 1.
+    lw_fr = .5
+    al = .5
+    ax[0].axhline(y=0, c='k', lw=0.5, alpha=al)
+    ax[0].plot(freqs[:n_freqs], simulated_fr_u[:n_freqs], 'k', lw=lw_fr)
+    ax[0].axvline(x=f0_input / 1000, c='r', ls='-', lw=lw, label='$f_0$', alpha=al)
+    ax[0].axvline(x=f0_estimate / 1000, c='g', ls='--', lw=lw, label='$f_0^{(\\tt est)}$', alpha=al)
+    for i, m in enumerate(modes):
+        if i == 0:
+            ax[0].axvline(x=m / 1000, c='b', ls=':', lw=lw, label='$\hat{f_p}$', alpha=al)
+        else:
+            ax[0].axvline(x=m / 1000, c='b', ls=':', lw=lw, alpha=al)
+    ax[0].set_xticks([0, 0.5, 1, 1.5, 2])
+    ax[0].set_xlim([0, 2])
+    ax[0].set_ylabel('$|u|$')
+    ax[0].xaxis.set_label_position('top')
+    ax[0].yaxis.tick_right()
+    ax[0].xaxis.tick_top()
+    ax[1].axhline(y=0, c='k', lw=0.3, alpha=al)
+    ax[1].plot(freqs[:n_freqs], simulated_fr_z[:n_freqs], 'k', lw=lw_fr)
+    ax[1].axvline(x=f0_input / 1000, c='r', ls='-', lw=lw, label='$f_0$', alpha=al)
+    ax[1].axvline(x=f0_estimate / 1000, c='g', ls='--', lw=lw, label='$f_0^{(\\tt est)}$', alpha=al)
+    for i, m in enumerate(modes):
+        if i == 0:
+            ax[1].axvline(x=m / 1000, c='b', ls=':', lw=lw, label='$\hat{f_p}$', alpha=al)
+        else:
+            ax[1].axvline(x=m / 1000, c='b', ls=':', lw=lw, alpha=al)
+    ax[1].set_xticks([])
+    ax[1].set_xlim([0, 2])
+    ax[1].set_xlabel('Frequency (kHz)')
+    ax[1].set_ylabel('$|\zeta|$')
+    ax[1].yaxis.tick_right()
+    #ax[1].xaxis.set_label_coords(0.2, -0.05)
+    #plt.legend(loc='lower center', bbox_to_anchor=(0.7,-0.4), ncol=3, fancybox=True, handletextpad=0.1, columnspacing=1.)
+    #ax[1].xaxis.set_label_coords(0.2, -0.1)
+    #plt.legend(loc='lower center', bbox_to_anchor=(0.7,-0.8), ncol=3, fancybox=True, handletextpad=0.1, columnspacing=1.)
+    ax[1].xaxis.set_label_coords(0.3, -0.1)
+    plt.legend(loc='lower center', bbox_to_anchor=(.95,-0.5), ncol=3, fancybox=True, handlelength=1., handletextpad=0.1, columnspacing=.5, fontsize=7)
+    plt.tight_layout()
+    plt.subplots_adjust(wspace=0.)
+    plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path, bbox_inches='tight', transparent=True, pad_inches=-1e-6)
+    plt.clf()
+    plt.close("all")
+def scatter_xy(save_path, x, y_dict, xlabel, ylabel, xticks=[], yticks=[]):
+    fig, ax = plt.subplots(figsize=(2.5,2.5))
+    for y_label in y_dict.keys():
+        ax.scatter(x, y_dict[y_label], label=y_label, s=1.)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_xticks(xticks)
+    ax.set_yticks(yticks)
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(save_path, bbox_inches='tight', transparent=True)
+    plt.clf()
+    plt.close("all")
+def scatter_kappa(save_path, total_summary, ss=.3):
+    f0_diffs, f0_ground, kappa, alpha = total_summary
+    def moving_average(x, n):
+        assert n % 2 == 1, n
+        x = np.pad(x, (n//2, n//2), 'symmetric')
+        return np.convolve(x, np.ones(n) / n, 'valid')
+    sorted_kf = sorted(zip(kappa, f0_ground))
+    sorted_kappa     = [k for k, f in sorted_kf]
+    sorted_f0_ground = [f for k, f in sorted_kf]
+    sorted_kappa     = sorted_kappa[0::40]     + [sorted_kappa[-1]]
+    sorted_f0_ground = sorted_f0_ground[0::40] + [sorted_f0_ground[-1]]
+    diff_max = max(f0_diffs) + 3.
+    xticks = [5,10,15,20]
+    yticks = [0,10,20,30,40,50,60]
+    fig, ax = plt.subplots(figsize=(2.5,2), nrows=1, ncols=1)
+    #cm = plt.cm.get_cmap('RdYlBu')
+    cm = plt.cm.get_cmap('plasma')
+    ax.plot(sorted_kappa, sorted_f0_ground, 'k-', lw=1.0, alpha=0.5)
+    sc = ax.scatter(kappa, f0_diffs, c=alpha, s=ss,
+        vmin=min(alpha), vmax=max(alpha), cmap=cm)
+    cbar = plt.colorbar(sc)
+    cbar.ax.set_title(r'$\alpha$')
+    cbar.ax.set_yticks([1,10,20,25])
+    ax.set_xticks(xticks)
+    ax.set_yticks(yticks)
+    ax.set_ylim([0,60])
+    for xt in xticks: ax.axvline(xt, c='k', ls='-', lw=0.5, alpha=0.3)
+    for yt in yticks: ax.axhline(yt, c='k', ls='-', lw=0.5, alpha=0.3)
+    ax.set_xlabel('$\kappa$')
+    ax.set_ylabel(r'$|f_0^{(\tt est)} - f_0|$ (Hz)')
+    ax.xaxis.tick_top()
+    plt.tight_layout()
+    #plt.subplots_adjust(wspace=0.)
+    #plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path, bbox_inches='tight', transparent=True, pad_inches=-1e-5)
+    plt.clf()
+    plt.close("all")
+def scatter_pluck(save_path, total_summary, ss=.3, al=0.7):
+    cmap = {
+        '$|f_0^{(\\tt est)} - f_0|$'       : 'orchid',
+        '$|f_0^{(\\tt est)} - \hat{f_0}|$' : 'cadetblue',
+    }
+    f0_diffs, kappa, alpha, p_x, p_a = total_summary
+    diff_max = max([max(item) for k, item in f0_diffs.items()]) + 3.
+    ncols = 3 if alpha is None else 4
+    fig, ax = plt.subplots(figsize=(4., 2), nrows=1, ncols=ncols)
+    # kappa
+    for y_label in f0_diffs.keys():
+        ax[0].scatter(kappa, f0_diffs[y_label], c=cmap[y_label], label=y_label, s=ss, alpha=al)
+    ax[0].axvline(x=5.88, c='k', ls='--', lw=0.5)
+    #ax[0].axhline(y=6, c='k', ls='--', lw=0.5)
+    #ax[0].axhline(y=1, c='k', ls='--', lw=0.5)
+    ax[0].set_xlabel('$\kappa$')
+    ax[0].set_ylabel('Detune')
+    #ax[0].set_ylim([0, 10])
+    ax[0].set_ylim([0, diff_max])
+    ax[0].set_xticks([2,5,8])
+    ax[0].set_yticks([])
+    ax[0].xaxis.tick_top()
+    # p_x
+    for y_label in f0_diffs.keys():
+        ax[1].scatter(p_x, f0_diffs[y_label], c=cmap[y_label], label=y_label, s=ss, alpha=al)
+    #ax[1].axhline(y=6, c='k', ls='--', lw=0.5)
+    #ax[1].axhline(y=1, c='k', ls='--', lw=0.5)
+    ax[1].set_xlabel('$p_x$')
+    ax[1].set_ylim([0, diff_max])
+    ax[1].set_xticks([-0.5, 0])
+    ax[1].set_yticks([])
+    ax[1].xaxis.tick_top()
+    ax[1].yaxis.tick_right()
+    # p_a
+    p_a = [x * 1e3 for x in p_a]
+    for y_label in f0_diffs.keys():
+        ax[2].scatter(p_a, f0_diffs[y_label], c=cmap[y_label], label=y_label, s=ss, alpha=al)
+    #ax[2].axhline(y=6, c='k', ls='--', lw=0.5)
+    #ax[2].axhline(y=1, c='k', ls='--', lw=0.5)
+    ax[2].set_xlabel('$p_a\\times10^{3}$')
+    ax[2].set_ylim([0, diff_max])
+    ax[2].set_xticks([1, 4, 7, 10])
+    ax[2].set_yticks([0,5,10])
+    ax[2].xaxis.tick_top()
+    ax[2].yaxis.tick_right()
+    # alpha
+    if alpha is not None:
+        for y_label in f0_diffs.keys():
+            ax[3].scatter(alpha, f0_diffs[y_label], c=cmap[y_label], label=y_label, s=ss, alpha=al)
+        ax[3].axhline(y=6, c='k', ls='--', lw=0.5)
+        ax[3].axhline(y=1, c='k', ls='--', lw=0.5)
+        ax[3].set_xlabel('$\\alpha$')
+        ax[3].set_ylim([0, diff_max])
+        #ax[3].set_xticks([1,2,3,4])
+        ax[2].set_yticks([])
+        ax[3].set_yticks([0,5,10])
+        ax[3].xaxis.tick_top()
+    plt.tight_layout()
+    plt.legend(loc='lower center', bbox_to_anchor=(-0.5, -1.2), ncol=2, fancybox=True, handletextpad=0.02, columnspacing=.2, markerscale=5., fontsize=7)
+    plt.subplots_adjust(wspace=0.)
+    plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path, bbox_inches='tight', transparent=True, pad_inches=-1e-5)
+    plt.clf()
+    plt.close("all")
+def time_experiment(save_path, gpu_summary, cpu_summary):
+    n_criteria = len(list(gpu_summary.keys()))
+    fig, ax = plt.subplots(figsize=(5, 1.66), nrows=1, ncols=n_criteria)
+    config = {
+        'Batch size'       : [4, 16, 64, 256, 1024],
+        '$N_t$'            : [0.25, 0.50, 1.00, 2.00, 4.00],
+        '$N_x^{(\\tt t)}+N_x^{(\\tt l)}$' : [20, 40, 80, 160,  320],
+        '$N_x^{(\\tt l)}$' : [1, 2, 3, 4],
+    }
+    xlims = {
+        'Batch size'       : [2,1800],
+        '$N_t$'            : [6000, 300000],
+        #'$N_x^{(\\tt t)}$' : [15, 160],
+        '$N_x^{(\\tt t)}+N_x^{(\\tt l)}$' : [70, 1900],
+        '$N_x^{(\\tt l)}$' : [15, 160],
+    }
+    def f0_to_NtNl(f0, k=1/48000, theta_t=0.5 + 2/(np.pi**2), kappa_rel=0.03):
+        gamma =  2*f0
+        kappa = gamma * kappa_rel
+        IHP = (np.pi * kappa / gamma)**2         # inharmonicity parameter (>0); eq 7.21
+        K = pow(IHP, .5) * (gamma / np.pi)          # set parameters
+        h = pow( \
+            (gamma**2 * k**2 + pow(gamma**4 * k**4 + 16 * K**2 * k**2 * (2 * theta_t - 1), .5)) \
+          / (2 * (2 * theta_t - 1)) \
+        , .5)
+        N_t = int(1/h)
+        alpha = 1
+        h = gamma * alpha * k
+        N_l = int(1/h)
+        return N_t + N_l
+    def alpha_to_Nl(alpha, gamma=600, k=1/48000):
+        h = gamma * alpha * k
+        N_l = int(1/h)
+        return N_l
+    for i, criterion in enumerate(config.keys()):
+        if criterion == '$N_t$':
+            config[criterion] = [int(c * 48000) for c in config[criterion]]
+        if criterion == '$N_x^{(\\tt t)}+N_x^{(\\tt l)}$':
+            config[criterion] = [f0_to_NtNl(c) for c in config[criterion]]
+        if criterion == '$N_x^{(\\tt l)}$':
+            config[criterion] = [alpha_to_Nl(c) for c in config[criterion]]
+    print(config)
+    for i, criterion in enumerate(gpu_summary.keys()):
+        conf_list = config[criterion]
+        gpu_times = gpu_summary[criterion]
+        cpu_times = cpu_summary[criterion]
+        if i == 0:
+            # divide by number of batch
+            #gpu_times = [gpu_times[k] / conf_list[k] for k in range(len(gpu_times))]
+            #cpu_times = [cpu_times[k] / conf_list[k] for k in range(len(cpu_times))]
+            pass
+        elif i > 1:
+            conf_list = list(reversed(conf_list))
+            gpu_times = list(reversed(gpu_times))
+            cpu_times = list(reversed(cpu_times))
+        gpu_times = [gpu_times[k] / gpu_times[0] for k in range(len(gpu_times))]
+        cpu_times = [cpu_times[k] / cpu_times[0] for k in range(len(cpu_times))]
+        lin_times = [conf_list[k] / conf_list[0] for k in range(len(gpu_times))]
+        thicklw = 0.8
+        ax[i].axhline(y=100,  c='lightgray', lw=thicklw, ls=':')
+        ax[i].axhline(y=10,   c='lightgray', lw=thicklw, ls=':')
+        ax[i].axhline(y=1,    c='lightgray', lw=thicklw, ls='-')
+        ax[i].plot(conf_list[:len(cpu_times)], cpu_times, 'kD--', lw=.9, label="CPU", mfc='lightgray')
+        ax[i].plot(conf_list[:len(gpu_times)], gpu_times, 'ko-',  lw=.9, label="GPU", mfc='white')
+        ax[i].fill_between(conf_list[:len(cpu_times)], lin_times, alpha=.2)
+        ax[i].set_xlabel(criterion)
+        if i == 0:
+            ax[i].set_ylabel('Relative time')
+        ax[i].set_xscale('log')
+        ax[i].set_yscale('log')
+        ax[i].set_ylim([0.5, 1e3])
+        ax[i].set_xlim(xlims[criterion])
+        ax[i].xaxis.set_label_position('top')
+        ax[i].yaxis.tick_right()
+        if i < len(list(gpu_summary.keys()))-1:
+            ax[i].set_yticks([])
+        else:
+            ax[i].set_yticks([1, 10, 100, 1000])
+    plt.tight_layout()
+    #plt.legend(loc='lower center', bbox_to_anchor=(-0.5, -0.75), ncol=2, fancybox=True, handletextpad=0.1, columnspacing=1.)
+    #plt.legend(loc='lower right', ncol=2, fancybox=True)
+    plt.legend(loc='upper right', ncol=2, fancybox=True)
+    plt.subplots_adjust(wspace=0.)
+    plt.subplots_adjust(hspace=0.)
+    plt.savefig(save_path, bbox_inches='tight', transparent=True)
+    plt.clf()
+    plt.close("all")
+def est_tar_specs(est, tar, inp, plot_path, wave_path, sr=16000):
+    data = []
+    batch_size = est["wav"].shape[0]
+    for b in range(batch_size):
+        logspecs = []
+        difspecs = []
+        nrows  = 4;  ncols = 2
+        height = 8; widths = 7
+        specfig, ax = plt.subplots(nrows, ncols, figsize=(widths,height))
+        diff_0 = tar["logmag"][b] - est["logmag"][b]
+        logspecs.append(
+            librosa.display.specshow(
+            inp["logmag"][b].numpy().T,  cmap='magma', ax=ax[0,0]))
+        logspecs.append(
+            librosa.display.specshow(
+            est["logmag"][b].numpy().T,  cmap='magma', ax=ax[1,0]))
+        logspecs.append(
+            librosa.display.specshow(
+            tar["logmag"][b].numpy().T,  cmap='magma', ax=ax[2,0]))
+        difspecs.append(
+            librosa.display.specshow(
+            diff_0.numpy().T, cmap='bwr',  ax=ax[3,0]))
+        diff_0 = tar["logmel"][b] - est["logmel"][b]
+        logspecs.append(
+            librosa.display.specshow(
+            inp["logmel"][b].numpy().T,  cmap='magma',ax=ax[0,1]))
+        logspecs.append(
+            librosa.display.specshow(
+            est["logmel"][b].numpy().T,  cmap='magma',ax=ax[1,1]))
+        logspecs.append(
+            librosa.display.specshow(
+            tar["logmel"][b].numpy().T,  cmap='magma',ax=ax[2,1]))
+        difspecs.append(
+            librosa.display.specshow(
+            diff_0.numpy().T, cmap='bwr', ax=ax[3,1]))
+        for spec in logspecs:
+            spec.set_clim([-60, 30])
+        for spec in difspecs:
+            spec.set_clim([-20, 20])
+        titles = ['Analytic', 'Estimate', 'Original', 'Difference']
+        for i, title in enumerate(titles):
+            ax[i,0].set_ylabel(title)
+        specfig.tight_layout()
+        specfig.subplots_adjust(wspace=0)
+        specfig.subplots_adjust(hspace=0)
+        specfig.savefig(plot_path)
+        plt.close('all')
+        plt.clf()
+        inp_wav = inp["wav"][b].squeeze()
+        sf.write(wave_path.replace('.wav', f"-{b}-inp.wav"), inp_wav, samplerate=sr)
+        est_wav = est["wav"][b].squeeze()
+        sf.write(wave_path.replace('.wav', f"-{b}-est.wav"), est_wav, samplerate=sr)
+        tar_wav = tar["wav"][b].squeeze()
+        sf.write(wave_path.replace('.wav', f"-{b}-tar.wav"), tar_wav, samplerate=sr)
+        d  = [ wandb.Image(specfig) ]
+        d += [ wandb.Audio(inp_wav, sample_rate=sr) ]
+        d += [ wandb.Audio(est_wav, sample_rate=sr) ]
+        d += [ wandb.Audio(tar_wav, sample_rate=sr) ]
+        data.append(d)
+    columns  = ["spec"]
+    columns += ["analytic", "estimate", "original"]
+    return {
+        "columns": columns,
+        "data": data,
+    }
+def rde_specs(factors, est, sim, plot_path, wave_path, sr=16000):
+    data = []
+    num_factors = len(factors)
+    # plot_path = f'test/plot/rde.png'
+    mag_path = plot_path.replace('rde.png', 'rde-mag.png')
+    mel_path = plot_path.replace('rde.png', 'rde-mel.png')
+    seu_path = plot_path.replace('rde.png', 'rde-state-pinn-u.png')
+    sez_path = plot_path.replace('rde.png', 'rde-state-pinn-z.png')
+    ssu_path = plot_path.replace('rde.png', 'rde-state-fdtd-u.png')
+    ssz_path = plot_path.replace('rde.png', 'rde-state-fdtd-z.png')
+    #==============================
+    # plot logmag
+    #==============================
+    specs = []
+    magfig, ax = plt.subplots(nrows=num_factors, ncols=2, figsize=(5,7))
+    for i in range(num_factors):
+        specs.append(librosa.display.specshow(
+            sim["logmag"][i].numpy().T, cmap='magma',ax=ax[i,0]))
+        specs.append(librosa.display.specshow(
+            est["logmag"][i].numpy().T, cmap='magma',ax=ax[i,1]))
+    for spec in specs: spec.set_clim([-60, 30])
+    for i, fc in enumerate(factors): ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+    ax[0,0].set_title('FDTD')
+    ax[0,1].set_title('PINN')
+    magfig.tight_layout()
+    magfig.subplots_adjust(wspace=0)
+    magfig.subplots_adjust(hspace=0)
+    magfig.savefig(mag_path)
+    plt.close('all')
+    plt.clf()
+    #==============================
+    # plot logmel
+    #==============================
+    specs = []
+    melfig, ax = plt.subplots(nrows=num_factors, ncols=2, figsize=(5,7))
+    for i in range(num_factors):
+        specs.append(librosa.display.specshow(
+            sim["logmel"][i].numpy().T, cmap='magma',ax=ax[i,0]))
+        specs.append(librosa.display.specshow(
+            est["logmel"][i].numpy().T, cmap='magma',ax=ax[i,1]))
+    for spec in specs: spec.set_clim([-60, 30])
+    for i, fc in enumerate(factors): ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+    ax[0,0].set_title('FDTD')
+    ax[0,1].set_title('PINN')
+    melfig.tight_layout()
+    melfig.subplots_adjust(wspace=0)
+    melfig.subplots_adjust(hspace=0)
+    melfig.savefig(mel_path)
+    plt.close('all')
+    plt.clf()
+    #==============================
+    # plot state
+    #==============================
+    u_states = []; dustates = []
+    z_states = []; dzstates = []
+    eu_fig, eu_ax = plt.subplots(num_factors, 2, figsize=(7,7))
+    ez_fig, ez_ax = plt.subplots(num_factors, 2, figsize=(7,7))
+    su_fig, su_ax = plt.subplots(num_factors, 2, figsize=(7,7))
+    sz_fig, sz_ax = plt.subplots(num_factors, 2, figsize=(7,7))
+    u_max = 0
+    z_max = 0
+    cm = 'coolwarm'
+    for i, fc in enumerate(factors):
+        e_dif = est["state"][i] - est["state"][-1]
+        s_dif = sim["state"][i] - sim["state"][-1]
+        Nt = int(sr * 30 / 1000)
+        u_states.append(librosa.display.specshow(sim["state"][i][:Nt,:,0].numpy().T, cmap=cm,ax=su_ax[i,0]))
+        u_states.append(librosa.display.specshow(est["state"][i][:Nt,:,0].numpy().T, cmap=cm,ax=eu_ax[i,0]))
+        dustates.append(librosa.display.specshow(s_dif[:Nt,:,0].numpy().T,           cmap=cm,ax=su_ax[i,1]))
+        dustates.append(librosa.display.specshow(e_dif[:Nt,:,0].numpy().T,           cmap=cm,ax=eu_ax[i,1]))
+        z_states.append(librosa.display.specshow(sim["state"][i][:Nt,:,1].numpy().T, cmap=cm,ax=sz_ax[i,0]))
+        z_states.append(librosa.display.specshow(est["state"][i][:Nt,:,1].numpy().T, cmap=cm,ax=ez_ax[i,0]))
+        dzstates.append(librosa.display.specshow(s_dif[:Nt,:,1].numpy().T,           cmap=cm,ax=sz_ax[i,1]))
+        dzstates.append(librosa.display.specshow(e_dif[:Nt,:,1].numpy().T,           cmap=cm,ax=ez_ax[i,1]))
+        u_max = max(u_max, sim["state"][i][:Nt,:,0].abs().max(), est["state"][i][:Nt,:,0].abs().max())
+        z_max = max(z_max, sim["state"][i][:Nt,:,1].abs().max(), est["state"][i][:Nt,:,1].abs().max())
+        su_ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+        eu_ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+        sz_ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+        ez_ax[i,0].set_ylabel(r"$x\times" + f"{fc}$")
+    for stat in u_states: stat.set_clim([-u_max, u_max])
+    for stat in z_states: stat.set_clim([-z_max, z_max])
+    for stat in dustates: stat.set_clim([-u_max/10, u_max/10])
+    for stat in dzstates: stat.set_clim([-z_max/10, z_max/10])
+    eu_fig.tight_layout(); eu_fig.subplots_adjust(wspace=0); eu_fig.subplots_adjust(hspace=0)
+    ez_fig.tight_layout(); ez_fig.subplots_adjust(wspace=0); ez_fig.subplots_adjust(hspace=0)
+    su_fig.tight_layout(); su_fig.subplots_adjust(wspace=0); su_fig.subplots_adjust(hspace=0)
+    sz_fig.tight_layout(); sz_fig.subplots_adjust(wspace=0); sz_fig.subplots_adjust(hspace=0)
+    eu_fig.savefig(seu_path)
+    ez_fig.savefig(sez_path)
+    su_fig.savefig(ssu_path)
+    sz_fig.savefig(ssz_path)
+    plt.close('all')
+    plt.clf()
+    for i, factor in enumerate(factors):
+        fstr = f"{factor:.1f}".replace('.', '_')
+        # wave_path = f'test/wave/rde.wav'
+        we_path = wave_path.replace('rde.wav', f'rde-pinn-{fstr}.wav')
+        ws_path = wave_path.replace('rde.wav', f'rde-fdtd-{fstr}.wav')
+        est_wav = est["wav"][i].squeeze()
+        sim_wav = sim["wav"][i].squeeze()
+        sf.write(we_path, est_wav, samplerate=sr)
+        sf.write(ws_path, sim_wav, samplerate=sr)
+    d  = [ wandb.Image(magfig) ]; columns  = ["logmag"]
+    d += [ wandb.Image(melfig) ]; columns += ["logmel"]
+    d += [ wandb.Image(eu_fig) ]; columns += ["PINN-u"]
+    d += [ wandb.Image(su_fig) ]; columns += ["FDTD-u"]
+    d += [ wandb.Image(ez_fig) ]; columns += ["PINN-z"]
+    d += [ wandb.Image(sz_fig) ]; columns += ["FDTD-z"]
+    d += [ wandb.Audio(est_wav, sample_rate=sr) ]; columns += ["PINN wav"]
+    d += [ wandb.Audio(sim_wav, sample_rate=sr) ]; columns += ["FDTD wav"]
+    data.append(d)
+    return {
+        "columns": columns,
+        "data": data,
+    }