azamat commited on
Commit
05b81bd
1 Parent(s): fbc84be
Files changed (1) hide show
  1. stft_loss.py +184 -0
stft_loss.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted from https://github.com/kan-bayashi/ParallelWaveGAN
2
+
3
+ # Original Copyright 2019 Tomoki Hayashi
4
+ # MIT License (https://opensource.org/licenses/MIT)
5
+
6
+ """STFT-based Loss modules."""
7
+
8
+ import torch
9
+ import torch.nn.functional as F
10
+
11
+ from distutils.version import LooseVersion
12
+
13
+ is_pytorch_17plus = LooseVersion(torch.__version__) >= LooseVersion("1.7")
14
+
15
+
16
+ def stft(x, fft_size, hop_size, win_length, window):
17
+ """Perform STFT and convert to magnitude spectrogram.
18
+ Args:
19
+ x (Tensor): Input signal tensor (B, T).
20
+ fft_size (int): FFT size.
21
+ hop_size (int): Hop size.
22
+ win_length (int): Window length.
23
+ window (str): Window function type.
24
+ Returns:
25
+ Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
26
+
27
+ """
28
+ if is_pytorch_17plus:
29
+ x_stft = torch.stft(
30
+ x, fft_size, hop_size, win_length, window, return_complex=False
31
+ )
32
+ else:
33
+ x_stft = torch.stft(x, fft_size, hop_size, win_length, window)
34
+ real = x_stft[..., 0]
35
+ imag = x_stft[..., 1]
36
+
37
+ # NOTE(kan-bayashi): clamp is needed to avoid nan or inf
38
+ return torch.sqrt(torch.clamp(real**2 + imag**2, min=1e-7)).transpose(2, 1)
39
+
40
+
41
+ class SpectralConvergenceLoss(torch.nn.Module):
42
+ """Spectral convergence loss module."""
43
+
44
+ def __init__(self):
45
+ """Initilize spectral convergence loss module."""
46
+ super(SpectralConvergenceLoss, self).__init__()
47
+
48
+ def forward(self, x_mag, y_mag):
49
+ """Calculate forward propagation.
50
+
51
+ Args:
52
+ x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
53
+ y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
54
+
55
+ Returns:
56
+ Tensor: Spectral convergence loss value.
57
+
58
+ """
59
+ return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro")
60
+
61
+
62
+ class LogSTFTMagnitudeLoss(torch.nn.Module):
63
+ """Log STFT magnitude loss module."""
64
+
65
+ def __init__(self):
66
+ """Initilize los STFT magnitude loss module."""
67
+ super(LogSTFTMagnitudeLoss, self).__init__()
68
+
69
+ def forward(self, x_mag, y_mag):
70
+ """Calculate forward propagation.
71
+
72
+ Args:
73
+ x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
74
+ y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
75
+
76
+ Returns:
77
+ Tensor: Log STFT magnitude loss value.
78
+
79
+ """
80
+ return F.l1_loss(torch.log(y_mag), torch.log(x_mag))
81
+
82
+
83
+ class STFTLoss(torch.nn.Module):
84
+ """STFT loss module."""
85
+
86
+ def __init__(
87
+ self, fft_size=1024, shift_size=120, win_length=600, window="hann_window",
88
+ band="full"
89
+ ):
90
+ """Initialize STFT loss module."""
91
+ super(STFTLoss, self).__init__()
92
+ self.fft_size = fft_size
93
+ self.shift_size = shift_size
94
+ self.win_length = win_length
95
+ self.band = band
96
+
97
+ self.spectral_convergence_loss = SpectralConvergenceLoss()
98
+ self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()
99
+ # NOTE(kan-bayashi): Use register_buffer to fix #223
100
+ self.register_buffer("window", getattr(torch, window)(win_length))
101
+
102
+ def forward(self, x, y):
103
+ """Calculate forward propagation.
104
+
105
+ Args:
106
+ x (Tensor): Predicted signal (B, T).
107
+ y (Tensor): Groundtruth signal (B, T).
108
+
109
+ Returns:
110
+ Tensor: Spectral convergence loss value.
111
+ Tensor: Log STFT magnitude loss value.
112
+
113
+ """
114
+ x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)
115
+ y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)
116
+
117
+ if self.band == "high":
118
+ freq_mask_ind = x_mag.shape[1] // 2 # only select high frequency bands
119
+ sc_loss = self.spectral_convergence_loss(x_mag[:,freq_mask_ind:,:], y_mag[:,freq_mask_ind:,:])
120
+ mag_loss = self.log_stft_magnitude_loss(x_mag[:,freq_mask_ind:,:], y_mag[:,freq_mask_ind:,:])
121
+ elif self.band == "full":
122
+ sc_loss = self.spectral_convergence_loss(x_mag, y_mag)
123
+ mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)
124
+ else:
125
+ raise NotImplementedError
126
+
127
+ return sc_loss, mag_loss
128
+
129
+
130
+ class MultiResolutionSTFTLoss(torch.nn.Module):
131
+ """Multi resolution STFT loss module."""
132
+
133
+ def __init__(
134
+ self, fft_sizes=[1024, 2048, 512], hop_sizes=[120, 240, 50], win_lengths=[600, 1200, 240],
135
+ window="hann_window", sc_lambda=0.1, mag_lambda=0.1, band="full"
136
+ ):
137
+ """Initialize Multi resolution STFT loss module.
138
+
139
+ Args:
140
+ fft_sizes (list): List of FFT sizes.
141
+ hop_sizes (list): List of hop sizes.
142
+ win_lengths (list): List of window lengths.
143
+ window (str): Window function type.
144
+ *_lambda (float): a balancing factor across different losses.
145
+ band (str): high-band or full-band loss
146
+
147
+ """
148
+ super(MultiResolutionSTFTLoss, self).__init__()
149
+ self.sc_lambda = sc_lambda
150
+ self.mag_lambda = mag_lambda
151
+
152
+ assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
153
+ self.stft_losses = torch.nn.ModuleList()
154
+ for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
155
+ self.stft_losses += [STFTLoss(fs, ss, wl, window, band)]
156
+
157
+ def forward(self, x, y):
158
+ """Calculate forward propagation.
159
+
160
+ Args:
161
+ x (Tensor): Predicted signal (B, T) or (B, #subband, T).
162
+ y (Tensor): Groundtruth signal (B, T) or (B, #subband, T).
163
+
164
+ Returns:
165
+ Tensor: Multi resolution spectral convergence loss value.
166
+ Tensor: Multi resolution log STFT magnitude loss value.
167
+
168
+ """
169
+ if len(x.shape) == 3:
170
+ x = x.view(-1, x.size(2)) # (B, C, T) -> (B x C, T)
171
+ y = y.view(-1, y.size(2)) # (B, C, T) -> (B x C, T)
172
+ sc_loss = 0.0
173
+ mag_loss = 0.0
174
+ for f in self.stft_losses:
175
+ sc_l, mag_l = f(x, y)
176
+ sc_loss += sc_l
177
+ mag_loss += mag_l
178
+
179
+ sc_loss *= self.sc_lambda
180
+ sc_loss /= len(self.stft_losses)
181
+ mag_loss *= self.mag_lambda
182
+ mag_loss /= len(self.stft_losses)
183
+
184
+ return sc_loss, mag_loss