RemFx

Sleeping

App Files Files Community

Christian J. Steinmetz commited on Apr 25, 2023

Commit

29f23c3

1 Parent(s): 0b430bb

moving to multi-way binary classification task

Browse files

Files changed (2) hide show

remfx/classifier.py +19 -7
remfx/models.py +70 -50

remfx/classifier.py CHANGED Viewed

@@ -170,7 +170,11 @@ class Cnn14(nn.Module):
         self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)
         self.fc1 = nn.Linear(2048, 2048, bias=True)
-        self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
         self.init_weight()
@@ -186,7 +190,7 @@ class Cnn14(nn.Module):
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
-        init_layer(self.fc_audioset)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
@@ -206,9 +210,12 @@ class Cnn14(nn.Module):
             # axs[1].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
             # plt.savefig("spec_augment.png", dpi=300)
-        x = x.permute(0, 2, 1, 3)
-        x = self.bn0(x)
-        x = x.permute(0, 2, 1, 3)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
@@ -229,9 +236,14 @@ class Cnn14(nn.Module):
         x = x1 + x2
         x = F.dropout(x, p=0.5, training=train)
         x = F.relu_(self.fc1(x))
-        clipwise_output = self.fc_audioset(x)
-        return clipwise_output
 class ConvBlock(nn.Module):

         self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)
         self.fc1 = nn.Linear(2048, 2048, bias=True)
+        # self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
+        self.heads = torch.nn.ModuleList()
+        for _ in range(num_classes):
+            self.heads.append(nn.Linear(2048, 1, bias=True))
         self.init_weight()
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
+        # init_layer(self.fc_audioset)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
             # axs[1].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
             # plt.savefig("spec_augment.png", dpi=300)
+        # x = x.permute(0, 2, 1, 3)
+        # x = self.bn0(x)
+        # x = x.permute(0, 2, 1, 3)
+        # apply standardization
+        x = (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
         x = x1 + x2
         x = F.dropout(x, p=0.5, training=train)
         x = F.relu_(self.fc1(x))
+        outputs = []
+        for head in self.heads:
+            outputs.append(torch.sigmoid(head(x)))
+        # clipwise_output = self.fc_audioset(x)
+        return outputs
 class ConvBlock(nn.Module):

remfx/models.py CHANGED Viewed

@@ -423,13 +423,20 @@ def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
     """
     batch_size = x.size(0)
     if alpha > 0:
-        lam = np.random.beta(alpha, alpha)
     else:
         lam = 1
-    index = torch.randperm(batch_size).to(x.device)
-    mixed_x = lam * x + (1 - lam) * x[index, :]
-    mixed_y = lam * y + (1 - lam) * y[index, :]
     return mixed_x, mixed_y, lam
@@ -454,38 +461,52 @@ class FXClassifier(pl.LightningModule):
         self.label_smoothing = label_smoothing
         self.loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=label_smoothing)
-        self.train_f1 = torchmetrics.classification.MultilabelF1Score(
-            5, average="none", multidim_average="global"
-        )
-        self.val_f1 = torchmetrics.classification.MultilabelF1Score(
-            5, average="none", multidim_average="global"
-        )
-        self.test_f1 = torchmetrics.classification.MultilabelF1Score(
-            5, average="none", multidim_average="global"
-        )
-        self.train_f1_avg = torchmetrics.classification.MultilabelF1Score(
-            5, threshold=0.5, average="macro", multidim_average="global"
-        )
-        self.val_f1_avg = torchmetrics.classification.MultilabelF1Score(
-            5, threshold=0.5, average="macro", multidim_average="global"
-        )
-        self.test_f1_avg = torchmetrics.classification.MultilabelF1Score(
-            5, threshold=0.5, average="macro", multidim_average="global"
-        )
-        self.metrics = {
-            "train": self.train_f1,
-            "valid": self.val_f1,
-            "test": self.test_f1,
-        }
-        self.avg_metrics = {
-            "train": self.train_f1_avg,
-            "valid": self.val_f1_avg,
-            "test": self.test_f1_avg,
-        }
     def forward(self, x: torch.Tensor, train: bool = False):
         return self.network(x, train=train)
@@ -496,15 +517,15 @@ class FXClassifier(pl.LightningModule):
         if mode == "train" and self.mixup:
             x_mixed, label_mixed, lam = mixup(x, wet_label)
-            pred_label = self(x_mixed, train)
-            loss = self.loss_fn(pred_label, label_mixed)
-            print(torch.sigmoid(pred_label[0, ...]))
-            print(label_mixed[0, ...])
         else:
-            pred_label = self(x, train)
-            loss = self.loss_fn(pred_label, wet_label)
-            print(torch.where(torch.sigmoid(pred_label[0, ...]) > 0.5, 1.0, 0.0).long())
-            print(wet_label.long()[0, ...])
         self.log(
             f"{mode}_loss",
@@ -516,26 +537,25 @@ class FXClassifier(pl.LightningModule):
             sync_dist=True,
         )
-        metrics = self.metrics[mode](torch.sigmoid(pred_label), wet_label.long())
         for idx, effect_name in enumerate(self.effects):
             self.log(
-                f"{mode}_f1_{effect_name}",
-                metrics[idx],
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True,
                 sync_dist=True,
             )
-        avg_metrics = self.avg_metrics[mode](
-            torch.sigmoid(pred_label), wet_label.long()
-        )
         self.log(
-            f"{mode}_f1_avg",
-            avg_metrics,
             on_step=True,
             on_epoch=True,
             prog_bar=True,

     """
     batch_size = x.size(0)
     if alpha > 0:
+        # lam = np.random.beta(alpha, alpha)
+        lam = np.random.uniform(0.25, 0.75, batch_size)
+        lam = torch.from_numpy(lam).float().to(x.device).view(batch_size, 1, 1)
     else:
         lam = 1
+    print(lam)
+    if np.random.rand() > 0.5:
+        index = torch.randperm(batch_size).to(x.device)
+        mixed_x = lam * x + (1 - lam) * x[index, :]
+        mixed_y = torch.logical_or(y, y[index, :]).float()
+    else:
+        mixed_x = x
+        mixed_y = y
     return mixed_x, mixed_y, lam
         self.label_smoothing = label_smoothing
         self.loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=label_smoothing)
+        self.loss_fn = torch.nn.BCELoss()
+        if False:
+            self.train_f1 = torchmetrics.classification.MultilabelF1Score(
+                5, average="none", multidim_average="global"
+            )
+            self.val_f1 = torchmetrics.classification.MultilabelF1Score(
+                5, average="none", multidim_average="global"
+            )
+            self.test_f1 = torchmetrics.classification.MultilabelF1Score(
+                5, average="none", multidim_average="global"
+            )
+            self.train_f1_avg = torchmetrics.classification.MultilabelF1Score(
+                5, threshold=0.5, average="macro", multidim_average="global"
+            )
+            self.val_f1_avg = torchmetrics.classification.MultilabelF1Score(
+                5, threshold=0.5, average="macro", multidim_average="global"
+            )
+            self.test_f1_avg = torchmetrics.classification.MultilabelF1Score(
+                5, threshold=0.5, average="macro", multidim_average="global"
+            )
+            self.metrics = {
+                "train": self.train_acc,
+                "valid": self.val_acc,
+                "test": self.test_acc,
+            }
+            self.avg_metrics = {
+                "train": self.train_f1_avg,
+                "valid": self.val_f1_avg,
+                "test": self.test_f1_avg,
+            }
+        self.metrics = torch.nn.ModuleDict()
+        for effect in self.effects:
+            self.metrics[f"train_{effect}_acc"] = torchmetrics.classification.Accuracy(
+                task="binary"
+            )
+            self.metrics[f"valid_{effect}_acc"] = torchmetrics.classification.Accuracy(
+                task="binary"
+            )
+            self.metrics[f"test_{effect}_acc"] = torchmetrics.classification.Accuracy(
+                task="binary"
+            )
     def forward(self, x: torch.Tensor, train: bool = False):
         return self.network(x, train=train)
         if mode == "train" and self.mixup:
             x_mixed, label_mixed, lam = mixup(x, wet_label)
+            outputs = self(x_mixed, train)
+            loss = 0
+            for idx, output in enumerate(outputs):
+                loss += self.loss_fn(output.squeeze(-1), label_mixed[..., idx])
         else:
+            outputs = self(x, train)
+            loss = 0
+            for idx, output in enumerate(outputs):
+                loss += self.loss_fn(output.squeeze(-1), wet_label[..., idx])
         self.log(
             f"{mode}_loss",
             sync_dist=True,
         )
+        acc_metrics = []
         for idx, effect_name in enumerate(self.effects):
+            acc_metric = self.metrics[f"{mode}_{effect_name}_acc"](
+                outputs[idx].squeeze(-1), wet_label[..., idx]
+            )
             self.log(
+                f"{mode}_{effect_name}_acc",
+                acc_metric,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True,
                 sync_dist=True,
             )
+            acc_metrics.append(acc_metric)
         self.log(
+            f"{mode}_avg_acc",
+            torch.mean(torch.stack(acc_metrics)),
             on_step=True,
             on_epoch=True,
             prog_bar=True,