fx

Browse files

Files changed (4) hide show

audiocraft/__init__.py +0 -0
audiocraft/conv.py +18 -36
audiocraft/lm.py +2 -2
audiocraft/seanet.py +3 -0

audiocraft/__init__.py ADDED Viewed

File without changes

audiocraft/conv.py CHANGED Viewed

@@ -114,20 +114,7 @@ class NormConv1d(nn.Module):
         return x
-class NormConv2d(nn.Module):
-    """Wrapper around Conv2d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(self, *args, norm: str = 'none', norm_kwargs: tp.Dict[str, tp.Any] = {}, **kwargs):
-        super().__init__()
-        self.conv = apply_parametrization_norm(nn.Conv2d(*args, **kwargs), norm)
-        self.norm = get_norm_module(self.conv, causal=False, norm=norm, **norm_kwargs)
-        self.norm_type = norm
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.norm(x)
-        return x
 class NormConvTranspose1d(nn.Module):
@@ -147,30 +134,25 @@ class NormConvTranspose1d(nn.Module):
         return x
-class NormConvTranspose2d(nn.Module):
-    """Wrapper around ConvTranspose2d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(self, *args, norm: str = 'none', norm_kwargs: tp.Dict[str, tp.Any] = {}, **kwargs):
-        super().__init__()
-        self.convtr = apply_parametrization_norm(nn.ConvTranspose2d(*args, **kwargs), norm)
-        self.norm = get_norm_module(self.convtr, causal=False, norm=norm, **norm_kwargs)
-    def forward(self, x):
-        x = self.convtr(x)
-        x = self.norm(x)
-        return x
 class StreamableConv1d(nn.Module):
     """Conv1d with some builtin handling of asymmetric or causal padding
     and normalization.
     """
-    def __init__(self, in_channels: int, out_channels: int,
-                 kernel_size: int, stride: int = 1, dilation: int = 1,
-                 groups: int = 1, bias: bool = True, causal: bool = False,
-                 norm: str = 'none', norm_kwargs: tp.Dict[str, tp.Any] = {},
-                 pad_mode: str = 'reflect'):
         super().__init__()
         # warn user on unusual setup between dilation and stride
         if stride > 1 and dilation > 1:
@@ -192,12 +174,15 @@ class StreamableConv1d(nn.Module):
         extra_padding = get_extra_padding_for_conv1d(x, kernel_size, stride, padding_total)
         if self.causal:
             # Left padding for causal
-            x = pad1d(x, (padding_total, extra_padding), mode=self.pad_mode)
         else:
             # Asymmetric padding required for odd strides
             padding_right = padding_total // 2
             padding_left = padding_total - padding_right
             x = pad1d(x, (padding_left, padding_right + extra_padding), mode=self.pad_mode)
         return self.conv(x)
@@ -230,13 +215,10 @@ class StreamableConvTranspose1d(nn.Module):
         # as removing it here would require also passing the length at the matching layer
         # in the encoder.
         if self.causal:
-            # Trim the padding on the right according to the specified ratio
-            # if trim_right_ratio = 1.0, trim everything from right
-            padding_right = math.ceil(padding_total * self.trim_right_ratio)
-            padding_left = padding_total - padding_right
-            y = unpad1d(y, (padding_left, padding_right))
         else:
             # Asymmetric padding required for odd strides
             padding_right = padding_total // 2
             padding_left = padding_total - padding_right
             y = unpad1d(y, (padding_left, padding_right))

         return x
 class NormConvTranspose1d(nn.Module):
         return x
 class StreamableConv1d(nn.Module):
     """Conv1d with some builtin handling of asymmetric or causal padding
     and normalization.
     """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 dilation=1,
+                 groups=1,
+                 bias=True,
+                 causal=False,
+                 norm='none',
+                 norm_kwargs={},
+                 pad_mode='reflect'):
         super().__init__()
         # warn user on unusual setup between dilation and stride
         if stride > 1 and dilation > 1:
         extra_padding = get_extra_padding_for_conv1d(x, kernel_size, stride, padding_total)
         if self.causal:
             # Left padding for causal
+            # x = pad1d(x, (padding_total, extra_padding), mode=self.pad_mode)
+            print('\n   \n\n\nn\n\n\nnCAUSAL N\n\n\n')
         else:
             # Asymmetric padding required for odd strides
             padding_right = padding_total // 2
             padding_left = padding_total - padding_right
             x = pad1d(x, (padding_left, padding_right + extra_padding), mode=self.pad_mode)
+            # print(f'\n   \/n\n\n\nANTICaus N {x.shape=}\n')
+            # ANTICaus CONV OLD_SHAPE=torch.Size([1, 512, 280]) x.shape=torch.Size([1, 512, 282])
         return self.conv(x)
         # as removing it here would require also passing the length at the matching layer
         # in the encoder.
         if self.causal:
+            print('\n   \n\n\nn\n\n\nnCAUSAL T\n\n\n\n\n')
         else:
             # Asymmetric padding required for odd strides
+            # print('\n   \n\n\nn\n\n\nnANTICAUSAL T\n\n\n')
             padding_right = padding_total // 2
             padding_left = padding_total - padding_right
             y = unpad1d(y, (padding_left, padding_right))

audiocraft/lm.py CHANGED Viewed

@@ -435,7 +435,7 @@ class LMModel(StreamingModule):
                 # print('Set All to Special')
                 # RUNS with = 2047 just different of self.special_token_id  -> 2047 is drill noise
-                # next_token[:] = self.special_token_id
@@ -451,7 +451,7 @@ class LMModel(StreamingModule):
         unconditional_state.clear()
         out_codes, _, _ = pattern.revert_pattern_sequence(gen_sequence, special_token=unknown_token)
-        print(f'{out_codes.shape=} {out_codes.min()}  {out_codes.max()}\n')
         out_start_offset = start_offset if remove_prompts else 0
         out_codes = out_codes[..., out_start_offset:max_gen_len]

                 # print('Set All to Special')
                 # RUNS with = 2047 just different of self.special_token_id  -> 2047 is drill noise
+                # next_token[:] = self.special_token_id
         unconditional_state.clear()
         out_codes, _, _ = pattern.revert_pattern_sequence(gen_sequence, special_token=unknown_token)
+        print(f' <=> CODES {out_codes.shape=} {out_codes.min()}  {out_codes.max()}\n')   # ARRIVES here also if special
         out_start_offset = start_offset if remove_prompts else 0
         out_codes = out_codes[..., out_start_offset:max_gen_len]

audiocraft/seanet.py CHANGED Viewed

@@ -143,5 +143,8 @@ class SEANetDecoder(nn.Module):
         self.model = nn.Sequential(*model)
     def forward(self, z):
         y = self.model(z)
         return y

         self.model = nn.Sequential(*model)
     def forward(self, z):
+        print(f'\n   Enter seanet with shape {z.shape}\n')  # arrives here with (1,128,35)
+        # how can this convnet care for the value that is in z so it crashes?
         y = self.model(z)
+        print(f'\n   Exit seanet with shape {y.shape}\n')  # arrives here with (1,128,35)
         return y