Spaces:
Runtime error
Runtime error
| from torch import nn | |
| class ZeroTemporalPad(nn.Module): | |
| """Pad sequences to equal lentgh in the temporal dimension""" | |
| def __init__(self, kernel_size, dilation): | |
| super().__init__() | |
| total_pad = dilation * (kernel_size - 1) | |
| begin = total_pad // 2 | |
| end = total_pad - begin | |
| self.pad_layer = nn.ZeroPad2d((0, 0, begin, end)) | |
| def forward(self, x): | |
| return self.pad_layer(x) | |
| class Conv1dBN(nn.Module): | |
| """1d convolutional with batch norm. | |
| conv1d -> relu -> BN blocks. | |
| Note: | |
| Batch normalization is applied after ReLU regarding the original implementation. | |
| Args: | |
| in_channels (int): number of input channels. | |
| out_channels (int): number of output channels. | |
| kernel_size (int): kernel size for convolutional filters. | |
| dilation (int): dilation for convolution layers. | |
| """ | |
| def __init__(self, in_channels, out_channels, kernel_size, dilation): | |
| super().__init__() | |
| padding = dilation * (kernel_size - 1) | |
| pad_s = padding // 2 | |
| pad_e = padding - pad_s | |
| self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation) | |
| self.pad = nn.ZeroPad2d((pad_s, pad_e, 0, 0)) # uneven left and right padding | |
| self.norm = nn.BatchNorm1d(out_channels) | |
| def forward(self, x): | |
| o = self.conv1d(x) | |
| o = self.pad(o) | |
| o = nn.functional.relu(o) | |
| o = self.norm(o) | |
| return o | |
| class Conv1dBNBlock(nn.Module): | |
| """1d convolutional block with batch norm. It is a set of conv1d -> relu -> BN blocks. | |
| Args: | |
| in_channels (int): number of input channels. | |
| out_channels (int): number of output channels. | |
| hidden_channels (int): number of inner convolution channels. | |
| kernel_size (int): kernel size for convolutional filters. | |
| dilation (int): dilation for convolution layers. | |
| num_conv_blocks (int, optional): number of convolutional blocks. Defaults to 2. | |
| """ | |
| def __init__(self, in_channels, out_channels, hidden_channels, kernel_size, dilation, num_conv_blocks=2): | |
| super().__init__() | |
| self.conv_bn_blocks = [] | |
| for idx in range(num_conv_blocks): | |
| layer = Conv1dBN( | |
| in_channels if idx == 0 else hidden_channels, | |
| out_channels if idx == (num_conv_blocks - 1) else hidden_channels, | |
| kernel_size, | |
| dilation, | |
| ) | |
| self.conv_bn_blocks.append(layer) | |
| self.conv_bn_blocks = nn.Sequential(*self.conv_bn_blocks) | |
| def forward(self, x): | |
| """ | |
| Shapes: | |
| x: (B, D, T) | |
| """ | |
| return self.conv_bn_blocks(x) | |
| class ResidualConv1dBNBlock(nn.Module): | |
| """Residual Convolutional Blocks with BN | |
| Each block has 'num_conv_block' conv layers and 'num_res_blocks' such blocks are connected | |
| with residual connections. | |
| conv_block = (conv1d -> relu -> bn) x 'num_conv_blocks' | |
| residuak_conv_block = (x -> conv_block -> + ->) x 'num_res_blocks' | |
| ' - - - - - - - - - ^ | |
| Args: | |
| in_channels (int): number of input channels. | |
| out_channels (int): number of output channels. | |
| hidden_channels (int): number of inner convolution channels. | |
| kernel_size (int): kernel size for convolutional filters. | |
| dilations (list): dilations for each convolution layer. | |
| num_res_blocks (int, optional): number of residual blocks. Defaults to 13. | |
| num_conv_blocks (int, optional): number of convolutional blocks in each residual block. Defaults to 2. | |
| """ | |
| def __init__( | |
| self, in_channels, out_channels, hidden_channels, kernel_size, dilations, num_res_blocks=13, num_conv_blocks=2 | |
| ): | |
| super().__init__() | |
| assert len(dilations) == num_res_blocks | |
| self.res_blocks = nn.ModuleList() | |
| for idx, dilation in enumerate(dilations): | |
| block = Conv1dBNBlock( | |
| in_channels if idx == 0 else hidden_channels, | |
| out_channels if (idx + 1) == len(dilations) else hidden_channels, | |
| hidden_channels, | |
| kernel_size, | |
| dilation, | |
| num_conv_blocks, | |
| ) | |
| self.res_blocks.append(block) | |
| def forward(self, x, x_mask=None): | |
| if x_mask is None: | |
| x_mask = 1.0 | |
| o = x * x_mask | |
| for block in self.res_blocks: | |
| res = o | |
| o = block(o) | |
| o = o + res | |
| if x_mask is not None: | |
| o = o * x_mask | |
| return o | |