RemFx / remfx /effects.py
mattricesound's picture
Update to latest classifier inference
568c3f1
raw
history blame
No virus
22.9 kB
import torch
import torchaudio
import numpy as np
import scipy.signal
import scipy.stats
import pyloudnorm as pyln
from torchvision.transforms import Compose, RandomApply
from typing import List
from pedalboard import (
Pedalboard,
Chorus,
Reverb,
Compressor,
Phaser,
Delay,
Distortion,
Limiter,
)
__all__ = []
def loguniform(low=0, high=1):
return scipy.stats.loguniform.rvs(low, high)
def rand(low=0, high=1):
return (torch.rand(1).numpy()[0] * (high - low)) + low
def randint(low=0, high=1):
return torch.randint(low, high + 1, (1,)).numpy()[0]
def biqaud(
gain_db: float,
cutoff_freq: float,
q_factor: float,
sample_rate: float,
filter_type: str,
):
"""Use design parameters to generate coeffieicnets for a specific filter type.
Args:
gain_db (float): Shelving filter gain in dB.
cutoff_freq (float): Cutoff frequency in Hz.
q_factor (float): Q factor.
sample_rate (float): Sample rate in Hz.
filter_type (str): Filter type.
One of "low_shelf", "high_shelf", or "peaking"
Returns:
b (np.ndarray): Numerator filter coefficients stored as [b0, b1, b2]
a (np.ndarray): Denominator filter coefficients stored as [a0, a1, a2]
"""
A = 10 ** (gain_db / 40.0)
w0 = 2.0 * np.pi * (cutoff_freq / sample_rate)
alpha = np.sin(w0) / (2.0 * q_factor)
cos_w0 = np.cos(w0)
sqrt_A = np.sqrt(A)
if filter_type == "high_shelf":
b0 = A * ((A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
b1 = -2 * A * ((A - 1) + (A + 1) * cos_w0)
b2 = A * ((A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
a0 = (A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha
a1 = 2 * ((A - 1) - (A + 1) * cos_w0)
a2 = (A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha
elif filter_type == "low_shelf":
b0 = A * ((A + 1) - (A - 1) * cos_w0 + 2 * sqrt_A * alpha)
b1 = 2 * A * ((A - 1) - (A + 1) * cos_w0)
b2 = A * ((A + 1) - (A - 1) * cos_w0 - 2 * sqrt_A * alpha)
a0 = (A + 1) + (A - 1) * cos_w0 + 2 * sqrt_A * alpha
a1 = -2 * ((A - 1) + (A + 1) * cos_w0)
a2 = (A + 1) + (A - 1) * cos_w0 - 2 * sqrt_A * alpha
elif filter_type == "peaking":
b0 = 1 + alpha * A
b1 = -2 * cos_w0
b2 = 1 - alpha * A
a0 = 1 + alpha / A
a1 = -2 * cos_w0
a2 = 1 - alpha / A
else:
pass
b = np.array([b0, b1, b2]) / a0
a = np.array([a0, a1, a2]) / a0
return b, a
def parametric_eq(
x: np.ndarray,
sample_rate: float,
low_shelf_gain_db: float = 0.0,
low_shelf_cutoff_freq: float = 80.0,
low_shelf_q_factor: float = 0.707,
band_gains_db: List[float] = [0.0],
band_cutoff_freqs: List[float] = [300.0],
band_q_factors: List[float] = [0.707],
high_shelf_gain_db: float = 0.0,
high_shelf_cutoff_freq: float = 1000.0,
high_shelf_q_factor: float = 0.707,
dtype=np.float32,
):
"""Multiband parametric EQ.
Low-shelf -> Band 1 -> ... -> Band N -> High-shelf
Args:
"""
assert (
len(band_gains_db) == len(band_cutoff_freqs) == len(band_q_factors)
) # must define for all bands
# -------- apply low-shelf filter --------
b, a = biqaud(
low_shelf_gain_db,
low_shelf_cutoff_freq,
low_shelf_q_factor,
sample_rate,
"low_shelf",
)
x = scipy.signal.lfilter(b, a, x)
# -------- apply peaking filters --------
for gain_db, cutoff_freq, q_factor in zip(
band_gains_db, band_cutoff_freqs, band_q_factors
):
b, a = biqaud(
gain_db,
cutoff_freq,
q_factor,
sample_rate,
"peaking",
)
x = scipy.signal.lfilter(b, a, x)
# -------- apply high-shelf filter --------
b, a = biqaud(
high_shelf_gain_db,
high_shelf_cutoff_freq,
high_shelf_q_factor,
sample_rate,
"high_shelf",
)
sos5 = np.concatenate((b, a))
x = scipy.signal.lfilter(b, a, x)
return x.astype(dtype)
class RandomParametricEQ(torch.nn.Module):
def __init__(
self,
sample_rate: float,
num_bands: int = 3,
min_gain_db: float = -6.0,
max_gain_db: float = +6.0,
min_cutoff_freq: float = 1000.0,
max_cutoff_freq: float = 10000.0,
min_q_factor: float = 0.1,
max_q_factor: float = 4.0,
):
super().__init__()
self.sample_rate = sample_rate
self.num_bands = num_bands
self.min_gain_db = min_gain_db
self.max_gain_db = max_gain_db
self.min_cutoff_freq = min_cutoff_freq
self.max_cutoff_freq = max_cutoff_freq
self.min_q_factor = min_q_factor
self.max_q_factor = max_q_factor
def forward(self, x: torch.Tensor):
"""
Args:
x: (torch.Tensor): Array of audio samples with shape (chs, seq_leq).
The filter will be applied the final dimension, and by default the same
filter will be applied to all channels.
"""
low_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
low_shelf_cutoff_freq = loguniform(20.0, 200.0)
low_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)
high_shelf_gain_db = rand(self.min_gain_db, self.max_gain_db)
high_shelf_cutoff_freq = loguniform(8000.0, 16000.0)
high_shelf_q_factor = rand(self.min_q_factor, self.max_q_factor)
band_gain_dbs = []
band_cutoff_freqs = []
band_q_factors = []
for _ in range(self.num_bands):
band_gain_dbs.append(rand(self.min_gain_db, self.max_gain_db))
band_cutoff_freqs.append(
loguniform(self.min_cutoff_freq, self.max_cutoff_freq)
)
band_q_factors.append(rand(self.min_q_factor, self.max_q_factor))
y = parametric_eq(
x.numpy(),
self.sample_rate,
low_shelf_gain_db=low_shelf_gain_db,
low_shelf_cutoff_freq=low_shelf_cutoff_freq,
low_shelf_q_factor=low_shelf_q_factor,
band_gains_db=band_gain_dbs,
band_cutoff_freqs=band_cutoff_freqs,
band_q_factors=band_q_factors,
high_shelf_gain_db=high_shelf_gain_db,
high_shelf_cutoff_freq=high_shelf_cutoff_freq,
high_shelf_q_factor=high_shelf_q_factor,
)
return torch.from_numpy(y)
def stereo_widener(x: torch.Tensor, width: torch.Tensor):
sqrt2 = np.sqrt(2)
left = x[0, ...]
right = x[1, ...]
mid = (left + right) / sqrt2
side = (left - right) / sqrt2
# amplify mid and side signal seperately:
mid *= 2 * (1 - width)
side *= 2 * width
left = (mid + side) / sqrt2
right = (mid - side) / sqrt2
x = torch.stack((left, right), dim=0)
return x
class RandomStereoWidener(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_width: float = 0.0,
max_width: float = 1.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_width = min_width
self.max_width = max_width
def forward(self, x: torch.Tensor):
width = rand(self.min_width, self.max_width)
return stereo_widener(x, width)
class RandomVolumeAutomation(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_segments: int = 1,
max_segments: int = 3,
min_gain_db: float = -6.0,
max_gain_db: float = 6.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_segments = min_segments
self.max_segments = max_segments
self.min_gain_db = min_gain_db
self.max_gain_db = max_gain_db
def forward(self, x: torch.Tensor):
gain_db = torch.zeros(x.shape[-1]).type_as(x)
num_segments = randint(self.min_segments, self.max_segments)
segment_lengths = (
x.shape[-1]
* np.random.dirichlet([rand(0, 10) for _ in range(num_segments)], 1)
).astype("int")[0]
samples_filled = 0
start_gain_db = 0
for idx in range(num_segments):
segment_samples = segment_lengths[idx]
if idx != 0:
start_gain_db = end_gain_db
# sample random end gain
end_gain_db = rand(self.min_gain_db, self.max_gain_db)
fade = torch.linspace(start_gain_db, end_gain_db, steps=segment_samples)
gain_db[samples_filled : samples_filled + segment_samples] = fade
samples_filled = samples_filled + segment_samples
x *= 10 ** (gain_db / 20.0)
return x
class RandomPedalboardCompressor(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_threshold_db: float = -42.0,
max_threshold_db: float = -6.0,
min_ratio: float = 1.5,
max_ratio: float = 4.0,
min_attack_ms: float = 1.0,
max_attack_ms: float = 50.0,
min_release_ms: float = 10.0,
max_release_ms: float = 250.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_threshold_db = min_threshold_db
self.max_threshold_db = max_threshold_db
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.min_attack_ms = min_attack_ms
self.max_attack_ms = max_attack_ms
self.min_release_ms = min_release_ms
self.max_release_ms = max_release_ms
def forward(self, x: torch.Tensor):
board = Pedalboard()
threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
ratio = rand(self.min_ratio, self.max_ratio)
attack_ms = rand(self.min_attack_ms, self.max_attack_ms)
release_ms = rand(self.min_release_ms, self.max_release_ms)
board.append(
Compressor(
threshold_db=threshold_db,
ratio=ratio,
attack_ms=attack_ms,
release_ms=release_ms,
)
)
# process audio using the pedalboard
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomPedalboardDelay(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_delay_seconds: float = 0.1,
max_delay_sconds: float = 1.0,
min_feedback: float = 0.05,
max_feedback: float = 0.6,
min_mix: float = 0.0,
max_mix: float = 0.7,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_delay_seconds = min_delay_seconds
self.max_delay_seconds = max_delay_sconds
self.min_feedback = min_feedback
self.max_feedback = max_feedback
self.min_mix = min_mix
self.max_mix = max_mix
def forward(self, x: torch.Tensor):
board = Pedalboard()
delay_seconds = loguniform(self.min_delay_seconds, self.max_delay_seconds)
feedback = rand(self.min_feedback, self.max_feedback)
mix = rand(self.min_mix, self.max_mix)
board.append(Delay(delay_seconds=delay_seconds, feedback=feedback, mix=mix))
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomPedalboardChorus(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_rate_hz: float = 0.25,
max_rate_hz: float = 4.0,
min_depth: float = 0.0,
max_depth: float = 0.6,
min_centre_delay_ms: float = 5.0,
max_centre_delay_ms: float = 10.0,
min_feedback: float = 0.1,
max_feedback: float = 0.6,
min_mix: float = 0.1,
max_mix: float = 0.7,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_rate_hz = min_rate_hz
self.max_rate_hz = max_rate_hz
self.min_depth = min_depth
self.max_depth = max_depth
self.min_centre_delay_ms = min_centre_delay_ms
self.max_centre_delay_ms = max_centre_delay_ms
self.min_feedback = min_feedback
self.max_feedback = max_feedback
self.min_mix = min_mix
self.max_mix = max_mix
def forward(self, x: torch.Tensor):
board = Pedalboard()
rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
depth = rand(self.min_depth, self.max_depth)
centre_delay_ms = rand(self.min_centre_delay_ms, self.max_centre_delay_ms)
feedback = rand(self.min_feedback, self.max_feedback)
mix = rand(self.min_mix, self.max_mix)
board.append(
Chorus(
rate_hz=rate_hz,
depth=depth,
centre_delay_ms=centre_delay_ms,
feedback=feedback,
mix=mix,
)
)
# process audio using the pedalboard
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomPedalboardPhaser(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_rate_hz: float = 0.25,
max_rate_hz: float = 5.0,
min_depth: float = 0.1,
max_depth: float = 0.6,
min_centre_frequency_hz: float = 200.0,
max_centre_frequency_hz: float = 600.0,
min_feedback: float = 0.1,
max_feedback: float = 0.6,
min_mix: float = 0.1,
max_mix: float = 0.7,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_rate_hz = min_rate_hz
self.max_rate_hz = max_rate_hz
self.min_depth = min_depth
self.max_depth = max_depth
self.min_centre_frequency_hz = min_centre_frequency_hz
self.max_centre_frequency_hz = max_centre_frequency_hz
self.min_feedback = min_feedback
self.max_feedback = max_feedback
self.min_mix = min_mix
self.max_mix = max_mix
def forward(self, x: torch.Tensor):
board = Pedalboard()
rate_hz = rand(self.min_rate_hz, self.max_rate_hz)
depth = rand(self.min_depth, self.max_depth)
centre_frequency_hz = rand(
self.min_centre_frequency_hz, self.min_centre_frequency_hz
)
feedback = rand(self.min_feedback, self.max_feedback)
mix = rand(self.min_mix, self.max_mix)
board.append(
Phaser(
rate_hz=rate_hz,
depth=depth,
centre_frequency_hz=centre_frequency_hz,
feedback=feedback,
mix=mix,
)
)
# process audio using the pedalboard
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomPedalboardLimiter(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_threshold_db: float = -32.0,
max_threshold_db: float = -6.0,
min_release_ms: float = 10.0,
max_release_ms: float = 300.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_threshold_db = min_threshold_db
self.max_threshold_db = max_threshold_db
self.min_release_ms = min_release_ms
self.max_release_ms = max_release_ms
def forward(self, x: torch.Tensor):
board = Pedalboard()
threshold_db = rand(self.min_threshold_db, self.max_threshold_db)
release_ms = rand(self.min_release_ms, self.max_release_ms)
board.append(
Limiter(
threshold_db=threshold_db,
release_ms=release_ms,
)
)
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomPedalboardDistortion(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_drive_db: float = -20.0,
max_drive_db: float = 12.0,
):
super().__init__()
self.sample_rate = sample_rate
self.min_drive_db = min_drive_db
self.max_drive_db = max_drive_db
def forward(self, x: torch.Tensor):
board = Pedalboard()
drive_db = rand(self.min_drive_db, self.max_drive_db)
board.append(Distortion(drive_db=drive_db))
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class RandomSoxReverb(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_reverberance: float = 10.0,
max_reverberance: float = 100.0,
min_high_freq_damping: float = 0.0,
max_high_freq_damping: float = 100.0,
min_wet_dry: float = 0.0,
max_wet_dry: float = 1.0,
min_room_scale: float = 5.0,
max_room_scale: float = 100.0,
min_stereo_depth: float = 20.0,
max_stereo_depth: float = 100.0,
min_pre_delay: float = 0.0,
max_pre_delay: float = 100.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_reverberance = min_reverberance
self.max_reverberance = max_reverberance
self.min_high_freq_damping = min_high_freq_damping
self.max_high_freq_damping = max_high_freq_damping
self.min_wet_dry = min_wet_dry
self.max_wet_dry = max_wet_dry
self.min_room_scale = min_room_scale
self.max_room_scale = max_room_scale
self.min_stereo_depth = min_stereo_depth
self.max_stereo_depth = max_stereo_depth
self.min_pre_delay = min_pre_delay
self.max_pre_delay = max_pre_delay
def forward(self, x: torch.Tensor):
reverberance = rand(self.min_reverberance, self.max_reverberance)
high_freq_damping = rand(self.min_high_freq_damping, self.max_high_freq_damping)
room_scale = rand(self.min_room_scale, self.max_room_scale)
stereo_depth = rand(self.min_stereo_depth, self.max_stereo_depth)
wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
pre_delay = rand(self.min_pre_delay, self.max_pre_delay)
effects = [
[
"reverb",
f"{reverberance}",
f"{high_freq_damping}",
f"{room_scale}",
f"{stereo_depth}",
f"{pre_delay}",
"--wet-only",
]
]
y, _ = torchaudio.sox_effects.apply_effects_tensor(
x, self.sample_rate, effects, channels_first=True
)
# manual wet/dry mix
return (x * (1 - wet_dry)) + (y * wet_dry)
class RandomPedalboardReverb(torch.nn.Module):
def __init__(
self,
sample_rate: float,
min_room_size: float = 0.0,
max_room_size: float = 1.0,
min_damping: float = 0.0,
max_damping: float = 1.0,
min_wet_dry: float = 0.0,
max_wet_dry: float = 0.7,
min_width: float = 0.0,
max_width: float = 1.0,
) -> None:
super().__init__()
self.sample_rate = sample_rate
self.min_room_size = min_room_size
self.max_room_size = max_room_size
self.min_damping = min_damping
self.max_damping = max_damping
self.min_wet_dry = min_wet_dry
self.max_wet_dry = max_wet_dry
self.min_width = min_width
self.max_width = max_width
def forward(self, x: torch.Tensor):
board = Pedalboard()
room_size = rand(self.min_room_size, self.max_room_size)
damping = rand(self.min_damping, self.max_damping)
wet_dry = rand(self.min_wet_dry, self.max_wet_dry)
width = rand(self.min_width, self.max_width)
board.append(
Reverb(
room_size=room_size,
damping=damping,
wet_level=wet_dry,
dry_level=(1 - wet_dry),
width=width,
)
)
return torch.from_numpy(board(x.numpy(), self.sample_rate))
class LoudnessNormalize(torch.nn.Module):
def __init__(self, sample_rate: float, target_lufs_db: float = -32.0) -> None:
super().__init__()
self.meter = pyln.Meter(sample_rate)
self.target_lufs_db = target_lufs_db
def forward(self, x: torch.Tensor):
x_lufs_db = self.meter.integrated_loudness(x.permute(1, 0).numpy())
delta_lufs_db = torch.tensor([self.target_lufs_db - x_lufs_db]).float()
gain_lin = 10.0 ** (delta_lufs_db.clamp(-120, 40.0) / 20.0)
return gain_lin * x
class RandomAudioEffectsChannel(torch.nn.Module):
def __init__(
self,
sample_rate: float,
parametric_eq_prob: float = 0.7,
distortion_prob: float = 0.01,
delay_prob: float = 0.1,
chorus_prob: float = 0.01,
phaser_prob: float = 0.01,
compressor_prob: float = 0.4,
reverb_prob: float = 0.2,
stereo_widener_prob: float = 0.3,
limiter_prob: float = 0.3,
vol_automation_prob: float = 0.7,
target_lufs_db: float = -32.0,
) -> None:
super().__init__()
self.transforms = Compose(
[
RandomApply(
[RandomParametricEQ(sample_rate)],
p=parametric_eq_prob,
),
RandomApply(
[RandomPedalboardDistortion(sample_rate)],
p=distortion_prob,
),
RandomApply(
[RandomPedalboardDelay(sample_rate)],
p=delay_prob,
),
RandomApply(
[RandomPedalboardChorus(sample_rate)],
p=chorus_prob,
),
RandomApply(
[RandomPedalboardPhaser(sample_rate)],
p=phaser_prob,
),
RandomApply(
[RandomPedalboardCompressor(sample_rate)],
p=compressor_prob,
),
RandomApply(
[RandomPedalboardReverb(sample_rate)],
p=reverb_prob,
),
RandomApply(
[RandomStereoWidener(sample_rate)],
p=stereo_widener_prob,
),
RandomApply(
[RandomPedalboardLimiter(sample_rate)],
p=limiter_prob,
),
RandomApply(
[RandomVolumeAutomation(sample_rate)],
p=vol_automation_prob,
),
LoudnessNormalize(sample_rate, target_lufs_db=target_lufs_db),
]
)
def forward(self, x: torch.Tensor):
return self.transforms(x)
Pedalboard_Effects = [
RandomPedalboardReverb,
RandomPedalboardChorus,
RandomPedalboardDelay,
RandomPedalboardDistortion,
RandomPedalboardCompressor,
# RandomPedalboardPhaser,
# RandomPedalboardLimiter,
]