Spaces:

mattricesound
/

RemFx

Runtime error

App Files Files Community

mattricesound commited on Jul 31, 2023

Commit

d8f7979

•

1 Parent(s): 12e94ae

Restore classifier, move shell scripts to scripts

Browse files

Files changed (8) hide show

README.md +12 -12
remfx/classifier.py +19 -21
remfx/models.py +1 -46
scripts/chain_inference.py +2 -0
download_ckpts.sh → scripts/download_ckpts.sh +0 -0
download_eval_datasets.sh → scripts/download_eval_datasets.sh +0 -0
eval.sh → scripts/eval.sh +3 -3
remfx_detect.sh → scripts/remfx_detect.sh +1 -1

README.md CHANGED Viewed

@@ -16,12 +16,12 @@ This repo can be used for many different tasks. Here are some examples.
 ## Run RemFX Detect on a single file
 First, need to download the checkpoints from [zenodo](https://zenodo.org/record/8179396)
 ```
-./download_checkpoints.sh
-./remfx_detect.sh wet.wav -o dry.wav
 ```
 ## Download the [General Purpose Audio Effect Removal evaluation datasets](https://zenodo.org/record/8187288)
 ```
-./download_eval_datasets.sh
 ```
 ## Download the starter datasets
@@ -73,28 +73,28 @@ Also note that the training assumes you have a GPU. To train on CPU, set `accele
 First download the General Purpose Audio Effect Removal evaluation datasets (see above).
 To use the pretrained RemFX model, download the checkpoints
 ```
-./download_checkpoints.sh
 ```
 Then run the evaluation script, select the RemFX configuration, between `remfx_oracle`, `remfx_detect`, and `remfx_all`. Then select N, the number of effects to remove.
 ```
-./eval.sh remfx_detect 0-0
-./eval.sh remfx_detect 1-1
-./eval.sh remfx_detect 2-2
-./eval.sh remfx_detect 3-3
-./eval.sh remfx_detect 4-4
-./eval.sh remfx_detect 5-5
 ```
 To eval a custom monolithic model, first train a model (see Training)
 Then run the evaluation script, with the config used and checkpoint_path.
 ```
-./eval.sh distortion_aug 0-0 -ckpt "logs/ckpts/2023-07-26-10-10-27/epoch\=05-valid_loss\=8.623.ckpt"
 ```
 To eval a custom effect-specific model as part of the inference chain, first train a model (see Training), then edit `cfg/exp/remfx_{desired_configuration}.yaml -> ckpts -> {effect}`.
 Then run the evaluation script.
 ```
-./eval.sh remfx_detect 0-0
 ```
 The script assumes that RemFX_eval_datasets is in the top-level directory.

 ## Run RemFX Detect on a single file
 First, need to download the checkpoints from [zenodo](https://zenodo.org/record/8179396)
 ```
+scripts/download_checkpoints.sh
+scripts/remfx_detect.sh wet.wav -o dry.wav
 ```
 ## Download the [General Purpose Audio Effect Removal evaluation datasets](https://zenodo.org/record/8187288)
 ```
+scripts/download_eval_datasets.sh
 ```
 ## Download the starter datasets
 First download the General Purpose Audio Effect Removal evaluation datasets (see above).
 To use the pretrained RemFX model, download the checkpoints
 ```
+scripts/download_checkpoints.sh
 ```
 Then run the evaluation script, select the RemFX configuration, between `remfx_oracle`, `remfx_detect`, and `remfx_all`. Then select N, the number of effects to remove.
 ```
+scripts/eval.sh remfx_detect 0-0
+scripts/eval.sh remfx_detect 1-1
+scripts/eval.sh remfx_detect 2-2
+scripts/eval.sh remfx_detect 3-3
+scripts/eval.sh remfx_detect 4-4
+scripts/eval.sh remfx_detect 5-5
 ```
 To eval a custom monolithic model, first train a model (see Training)
 Then run the evaluation script, with the config used and checkpoint_path.
 ```
+scripts/eval.sh distortion_aug 0-0 -ckpt "logs/ckpts/2023-07-26-10-10-27/epoch\=05-valid_loss\=8.623.ckpt"
 ```
 To eval a custom effect-specific model as part of the inference chain, first train a model (see Training), then edit `cfg/exp/remfx_{desired_configuration}.yaml -> ckpts -> {effect}`.
 Then run the evaluation script.
 ```
+scripts/eval.sh remfx_detect 0-0
 ```
 The script assumes that RemFX_eval_datasets is in the top-level directory.

remfx/classifier.py CHANGED Viewed

@@ -1,11 +1,9 @@
 import torch
 import torchaudio
 import torch.nn as nn
-# import hearbaseline
-# import hearbaseline.vggish
-# import hearbaseline.wav2vec2
 import wav2clip_hear
 import panns_hear
@@ -173,10 +171,10 @@ class Cnn14(nn.Module):
         self.fc1 = nn.Linear(2048, 2048, bias=True)
-        self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
-        # self.heads = torch.nn.ModuleList()
-        # for _ in range(num_classes):
-        # self.heads.append(nn.Linear(2048, 1, bias=True))
         self.init_weight()
@@ -192,7 +190,7 @@ class Cnn14(nn.Module):
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
-        init_layer(self.fc_audioset)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
@@ -212,12 +210,12 @@ class Cnn14(nn.Module):
             # axs[1].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
             # plt.savefig("spec_augment.png", dpi=300)
-        x = x.permute(0, 2, 1, 3)
-        x = self.bn0(x)
-        x = x.permute(0, 2, 1, 3)
         # apply standardization
-        # x = (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
@@ -239,13 +237,13 @@ class Cnn14(nn.Module):
         x = F.dropout(x, p=0.5, training=train)
         x = F.relu_(self.fc1(x))
-        # outputs = []
-        # for head in self.heads:
-        # outputs.append(torch.sigmoid(head(x)))
-        clipwise_output = self.fc_audioset(x)
-        return clipwise_output
-        # return outputs
 class ConvBlock(nn.Module):
@@ -296,4 +294,4 @@ class ConvBlock(nn.Module):
         else:
             raise Exception("Incorrect argument!")
-        return x

 import torch
 import torchaudio
 import torch.nn as nn
+import hearbaseline
+import hearbaseline.vggish
+import hearbaseline.wav2vec2
 import wav2clip_hear
 import panns_hear
         self.fc1 = nn.Linear(2048, 2048, bias=True)
+        # self.fc_audioset = nn.Linear(2048, num_classes, bias=True)
+        self.heads = torch.nn.ModuleList()
+        for _ in range(num_classes):
+            self.heads.append(nn.Linear(2048, 1, bias=True))
         self.init_weight()
     def init_weight(self):
         init_bn(self.bn0)
         init_layer(self.fc1)
+        # init_layer(self.fc_audioset)
     def forward(self, x: torch.Tensor, train: bool = False):
         """
             # axs[1].imshow(x[0, :, :, :].detach().squeeze().cpu().numpy())
             # plt.savefig("spec_augment.png", dpi=300)
+        # x = x.permute(0, 2, 1, 3)
+        # x = self.bn0(x)
+        # x = x.permute(0, 2, 1, 3)
         # apply standardization
+        x = (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True)
         x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg")
         x = F.dropout(x, p=0.2, training=train)
         x = F.dropout(x, p=0.5, training=train)
         x = F.relu_(self.fc1(x))
+        outputs = []
+        for head in self.heads:
+            outputs.append(torch.sigmoid(head(x)))
+        # clipwise_output = self.fc_audioset(x)
+        return outputs
 class ConvBlock(nn.Module):
         else:
             raise Exception("Incorrect argument!")
+        return x

remfx/models.py CHANGED Viewed

@@ -143,17 +143,8 @@ class RemFXChainInference(pl.LightningModule):
                     prog_bar=True,
                     sync_dist=True,
                 )
-                # print(f"Input_{metric}", negate * self.metrics[metric](x, y))
-                # print(f"test_{metric}", negate * self.metrics[metric](output, y))
-                # self.output_str += f"{negate * self.metrics[metric](x, y).item():.4f},{negate * self.metrics[metric](output, y).item():.4f},"
-            # self.output_str += "\n"
         return loss
-    def on_test_end(self) -> None:
-        pass
-        # with open("output.csv", "w") as f:
-        # f.write(self.output_str)
     def sample(self, batch):
         return self.forward(batch, 0)[1]
@@ -438,7 +429,6 @@ def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
     return mixed_x, mixed_y, lam
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,
@@ -458,42 +448,7 @@ class FXClassifier(pl.LightningModule):
         self.mixup = mixup
         self.label_smoothing = label_smoothing
-        self.loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=label_smoothing)
         self.loss_fn = torch.nn.BCELoss()
-        if False:
-            self.train_f1 = torchmetrics.classification.MultilabelF1Score(
-                5, average="none", multidim_average="global"
-            )
-            self.val_f1 = torchmetrics.classification.MultilabelF1Score(
-                5, average="none", multidim_average="global"
-            )
-            self.test_f1 = torchmetrics.classification.MultilabelF1Score(
-                5, average="none", multidim_average="global"
-            )
-            self.train_f1_avg = torchmetrics.classification.MultilabelF1Score(
-                5, threshold=0.5, average="macro", multidim_average="global"
-            )
-            self.val_f1_avg = torchmetrics.classification.MultilabelF1Score(
-                5, threshold=0.5, average="macro", multidim_average="global"
-            )
-            self.test_f1_avg = torchmetrics.classification.MultilabelF1Score(
-                5, threshold=0.5, average="macro", multidim_average="global"
-            )
-            self.metrics = {
-                "train": self.train_acc,
-                "valid": self.val_acc,
-                "test": self.test_acc,
-            }
-            self.avg_metrics = {
-                "train": self.train_f1_avg,
-                "valid": self.val_f1_avg,
-                "test": self.test_f1_avg,
-            }
         self.metrics = torch.nn.ModuleDict()
         for effect in self.effects:
             self.metrics[f"train_{effect}_acc"] = torchmetrics.classification.Accuracy(
@@ -578,4 +533,4 @@ class FXClassifier(pl.LightningModule):
             lr=self.lr,
             weight_decay=self.lr_weight_decay,
         )
-        return optimizer

                     prog_bar=True,
                     sync_dist=True,
                 )
         return loss
     def sample(self, batch):
         return self.forward(batch, 0)[1]
     return mixed_x, mixed_y, lam
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,
         self.mixup = mixup
         self.label_smoothing = label_smoothing
         self.loss_fn = torch.nn.BCELoss()
         self.metrics = torch.nn.ModuleDict()
         for effect in self.effects:
             self.metrics[f"train_{effect}_acc"] = torchmetrics.classification.Accuracy(
             lr=self.lr,
             weight_decay=self.lr_weight_decay,
         )
+        return optimizer

scripts/chain_inference.py CHANGED Viewed

@@ -45,6 +45,7 @@ def main(cfg: DictConfig):
     logger = hydra.utils.instantiate(cfg.logger, _convert_="partial")
     log.info(f"Instantiating trainer <{cfg.trainer._target_}>.")
     trainer = hydra.utils.instantiate(
         cfg.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
     )
@@ -68,6 +69,7 @@ def main(cfg: DictConfig):
         shuffle_effect_order=cfg.inference_effects_shuffle,
         use_all_effect_models=cfg.inference_use_all_effect_models,
     )
     trainer.test(model=inference_model, datamodule=datamodule)

     logger = hydra.utils.instantiate(cfg.logger, _convert_="partial")
     log.info(f"Instantiating trainer <{cfg.trainer._target_}>.")
+    cfg.trainer.accelerator = "gpu" if torch.cuda.is_available() else "cpu"
     trainer = hydra.utils.instantiate(
         cfg.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
     )
         shuffle_effect_order=cfg.inference_effects_shuffle,
         use_all_effect_models=cfg.inference_use_all_effect_models,
     )
     trainer.test(model=inference_model, datamodule=datamodule)

download_ckpts.sh → scripts/download_ckpts.sh RENAMED Viewed

File without changes

download_eval_datasets.sh → scripts/download_eval_datasets.sh RENAMED Viewed

File without changes

eval.sh → scripts/eval.sh RENAMED Viewed

@@ -1,13 +1,13 @@
 #! /bin/bash
 # Example usage:
-# ./eval.sh remfx_detect 0-0
-# ./eval.sh distortion_aug 0-0 -ckpt logs/ckpts/2023-01-21-12-21-44
 # First 2 arguments are required, third argument is optional
 # Default value for the optional parameter
 ckpt_path=""
 # Function to display script usage
 function display_usage {
     echo "Usage: $0 <experiment> <dataset> [-ckpt {ckpt_path}]"

 #! /bin/bash
 # Example usage:
+# scripts/eval.sh remfx_detect 0-0
+# scripts/eval.sh distortion_aug 0-0 -ckpt logs/ckpts/2023-01-21-12-21-44
 # First 2 arguments are required, third argument is optional
 # Default value for the optional parameter
 ckpt_path=""
+export DATASET_ROOT=RemFX_eval_datasets
 # Function to display script usage
 function display_usage {
     echo "Usage: $0 <experiment> <dataset> [-ckpt {ckpt_path}]"

remfx_detect.sh → scripts/remfx_detect.sh RENAMED Viewed

@@ -1,7 +1,7 @@
 #! /bin/bash
 # Example usage:
-# ./remfx_detect.sh wet.wav -o examples/output.wav
 # first argument is required, second argument is optional
 # Check if first argument is empty

 #! /bin/bash
 # Example usage:
+# scripts/remfx_detect.sh wet.wav -o examples/output.wav
 # first argument is required, second argument is optional
 # Check if first argument is empty