BernardoTorres commited on
Commit
e60d96b
1 Parent(s): fc91e6e

add paper models

Browse files
contrastive-vc/config.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.9.3
2
+ seed_everything: 123
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs
8
+ name: exp_contrastive_reg_sameclip
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ enable_checkpointing: true
15
+ callbacks:
16
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
17
+ init_args:
18
+ dirpath: null
19
+ filename: best-val-loss-{epoch}-{step}
20
+ monitor: loss/val
21
+ verbose: false
22
+ save_last: null
23
+ save_top_k: 1
24
+ save_weights_only: false
25
+ mode: min
26
+ auto_insert_metric_name: true
27
+ every_n_train_steps: null
28
+ train_time_interval: null
29
+ every_n_epochs: null
30
+ save_on_train_epoch_end: null
31
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
32
+ init_args:
33
+ dirpath: null
34
+ filename: best-eer-val-{epoch}-{step}
35
+ monitor: EER evaluation proj/val
36
+ verbose: false
37
+ save_last: null
38
+ save_top_k: 1
39
+ save_weights_only: false
40
+ mode: min
41
+ auto_insert_metric_name: true
42
+ every_n_train_steps: null
43
+ train_time_interval: null
44
+ every_n_epochs: null
45
+ save_on_train_epoch_end: null
46
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
47
+ init_args:
48
+ dirpath: null
49
+ filename: best-rank-val-{epoch}-{step}
50
+ monitor: Order evaluation mean proj/val
51
+ verbose: false
52
+ save_last: null
53
+ save_top_k: 1
54
+ save_weights_only: false
55
+ mode: min
56
+ auto_insert_metric_name: true
57
+ every_n_train_steps: null
58
+ train_time_interval: null
59
+ every_n_epochs: null
60
+ save_on_train_epoch_end: null
61
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
62
+ init_args:
63
+ dirpath: null
64
+ filename: best-alignment-val-{epoch}-{step}
65
+ monitor: Alignment evaluation proj/val
66
+ verbose: false
67
+ save_last: null
68
+ save_top_k: 1
69
+ save_weights_only: false
70
+ mode: min
71
+ auto_insert_metric_name: true
72
+ every_n_train_steps: null
73
+ train_time_interval: null
74
+ every_n_epochs: null
75
+ save_on_train_epoch_end: null
76
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
77
+ init_args:
78
+ dirpath: null
79
+ filename: best-uniformity-val-{epoch}-{step}
80
+ monitor: Uniformity evaluation proj/val
81
+ verbose: false
82
+ save_last: null
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: min
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: null
90
+ save_on_train_epoch_end: null
91
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
92
+ init_args:
93
+ dirpath: null
94
+ filename: cptk-{epoch}-{step}
95
+ monitor: null
96
+ verbose: false
97
+ save_last: null
98
+ save_top_k: -1
99
+ save_weights_only: false
100
+ mode: min
101
+ auto_insert_metric_name: true
102
+ every_n_train_steps: null
103
+ train_time_interval: null
104
+ every_n_epochs: 25
105
+ save_on_train_epoch_end: null
106
+ - class_path: callbacks.evaluation.OrderEvaluation
107
+ init_args:
108
+ log_n_epochs: 5
109
+ on_train: true
110
+ use_projection: true
111
+ - class_path: callbacks.evaluation.EEREvaluation
112
+ init_args:
113
+ use_more_neg: false
114
+ log_n_epochs: 5
115
+ on_train: false
116
+ use_projection: true
117
+ - class_path: callbacks.evaluation.HypersphereEvaluation
118
+ init_args:
119
+ log_n_epochs: 5
120
+ on_train: true
121
+ use_projection: true
122
+ default_root_dir: null
123
+ gradient_clip_val: null
124
+ gradient_clip_algorithm: null
125
+ num_nodes: 1
126
+ num_processes: null
127
+ devices: 1
128
+ gpus: null
129
+ auto_select_gpus: null
130
+ tpu_cores: null
131
+ ipus: null
132
+ enable_progress_bar: true
133
+ overfit_batches: 0.0
134
+ track_grad_norm: -1
135
+ check_val_every_n_epoch: 1
136
+ fast_dev_run: false
137
+ accumulate_grad_batches: null
138
+ max_epochs: 100000
139
+ min_epochs: null
140
+ max_steps: 1000000000
141
+ min_steps: null
142
+ max_time: null
143
+ limit_train_batches: null
144
+ limit_val_batches: null
145
+ limit_test_batches: null
146
+ limit_predict_batches: null
147
+ val_check_interval: null
148
+ log_every_n_steps: 50
149
+ accelerator: gpu
150
+ strategy: ddp
151
+ sync_batchnorm: false
152
+ precision: 32
153
+ enable_model_summary: true
154
+ num_sanity_val_steps: 2
155
+ resume_from_checkpoint: null
156
+ profiler: null
157
+ benchmark: null
158
+ deterministic: null
159
+ reload_dataloaders_every_n_epochs: 0
160
+ auto_lr_find: false
161
+ replace_sampler_ddp: true
162
+ detect_anomaly: false
163
+ auto_scale_batch_size: false
164
+ plugins: null
165
+ amp_backend: null
166
+ amp_level: null
167
+ move_metrics_to_cpu: false
168
+ multiple_trainloader_mode: max_size_cycle
169
+ inference_mode: true
170
+ ckpt_path: null
171
+ model:
172
+ class_path: models.trainer.ContrastiveTrainer
173
+ init_args:
174
+ feature_extractor:
175
+ spec_layer: melspectogram
176
+ n_fft: 2048
177
+ hop_length: 512
178
+ backbone:
179
+ backbone: efficientnet_b0
180
+ pretrained: true
181
+ embedding_dim: 1000
182
+ projection:
183
+ input_dim: 1000
184
+ output_dim: 128
185
+ l2_normalize: true
186
+ optimizer1_init:
187
+ class_path: torch.optim.Adam
188
+ init_args:
189
+ lr: 0.0001
190
+ weight_decay: 1.0e-05
191
+ use_contrastive_loss: true
192
+ temp: 0.2
193
+ nr_negative: 250
194
+ decouple: true
195
+ use_norm_reg: false
196
+ max_norm_hinge: 4.0
197
+ norm_hinge_fact: 10.0
198
+ use_invariance_loss: false
199
+ fact_inv_loss: 1.0
200
+ use_covariance_reg: true
201
+ fact_cov: 100.0
202
+ use_variance_reg: true
203
+ fact_var: 25.0
204
+ gamma: 1.0
205
+ use_vicreg_loss: false
206
+ use_align_loss: false
207
+ fact_align_loss: 0.25
208
+ fact_unif_loss: 0.5
209
+ use_uniform_loss: false
210
+ mask_batch: false
211
+ compute_test_loss: false
212
+ data:
213
+ class_path: data.vocals.VocalsDataModule
214
+ init_args:
215
+ augs_neg:
216
+ enable: false
217
+ gaussian_noise: 0.5
218
+ pitch_shift_naive: 0
219
+ time_stretch: 0
220
+ gain: 0.5
221
+ shift: 0
222
+ parametric_eq: 0
223
+ tanh_distortion: 0
224
+ time_mask: 0
225
+ formant_shift_parselmouth: 0
226
+ pitch_shift_parselmouth: 0
227
+ pitch_range_parselmouth: 0
228
+ pitch_shift_parselmouth_prob: 0
229
+ positive_examples: same_clip
230
+ dataset_dirs:
231
+ - tencys_vocals
232
+ - ghero_vocals_3
233
+ - ghero_vocals_4
234
+ batch_size: 120
235
+ batch_size_val: 120
236
+ nr_samples: 176000
237
+ normalize: true
238
+ num_workers: 40
239
+ sr: 44100
240
+ batch_sampling_mode: sample_clips
241
+ eval_frac: 0.105
242
+ group_name_is_folder: true
243
+ group_by_artist: true
244
+ augs:
245
+ enable: true
246
+ gaussian_noise: 0.5
247
+ pitch_shift_naive: 0
248
+ time_stretch: 0
249
+ gain: 0.5
250
+ shift: 0
251
+ parametric_eq: 0
252
+ tanh_distortion: 0
253
+ time_mask: 0.5
254
+ formant_shift_parselmouth: 0
255
+ pitch_shift_parselmouth:
256
+ - 1
257
+ - 1.3
258
+ pitch_range_parselmouth: 1.5
259
+ pitch_shift_parselmouth_prob: 0.5
260
+ transform_override: false
261
+ verbose: true
262
+ use_random_loader: false
263
+ max_groups: -1
264
+ multi_epoch: 1
265
+ classification: false
contrastive-vc/encoder.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ backbone: efficientnet_b0
2
+ embedding_dim: 1000
3
+ pretrained: true
contrastive-vc/feature_extractor.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ hop_length: 512
2
+ n_fft: 2048
3
+ spec_layer: melspectogram
contrastive-vc/hyperparams.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ encoder:
2
+ backbone: efficientnet_b0
3
+ embedding_dim: 1000
4
+ pretrained: true
5
+ feature_extractor:
6
+ hop_length: 512
7
+ n_fft: 2048
8
+ spec_layer: melspectogram
contrastive-vc/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f72bb45396412b75b9a0ff811f9ce48c577e524f85f8f0c8fd7a4c57c495fc
3
+ size 38765709
contrastive-vc/model.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7063f1ffff6855495636ba9d863ca932b711dc0bb83a7f5691e62a596dd18c50
3
+ size 38969566
contrastive/config.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.9.3
2
+ seed_everything: 123
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs
8
+ name: exp_contrastive_sameclip_v2
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ enable_checkpointing: true
15
+ callbacks:
16
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
17
+ init_args:
18
+ dirpath: null
19
+ filename: best-val-loss-{epoch}-{step}
20
+ monitor: loss/val
21
+ verbose: false
22
+ save_last: null
23
+ save_top_k: 1
24
+ save_weights_only: false
25
+ mode: min
26
+ auto_insert_metric_name: true
27
+ every_n_train_steps: null
28
+ train_time_interval: null
29
+ every_n_epochs: null
30
+ save_on_train_epoch_end: null
31
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
32
+ init_args:
33
+ dirpath: null
34
+ filename: best-eer-val-{epoch}-{step}
35
+ monitor: EER evaluation proj/val
36
+ verbose: false
37
+ save_last: null
38
+ save_top_k: 1
39
+ save_weights_only: false
40
+ mode: min
41
+ auto_insert_metric_name: true
42
+ every_n_train_steps: null
43
+ train_time_interval: null
44
+ every_n_epochs: null
45
+ save_on_train_epoch_end: null
46
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
47
+ init_args:
48
+ dirpath: null
49
+ filename: best-rank-val-{epoch}-{step}
50
+ monitor: Order evaluation mean proj/val
51
+ verbose: false
52
+ save_last: null
53
+ save_top_k: 1
54
+ save_weights_only: false
55
+ mode: min
56
+ auto_insert_metric_name: true
57
+ every_n_train_steps: null
58
+ train_time_interval: null
59
+ every_n_epochs: null
60
+ save_on_train_epoch_end: null
61
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
62
+ init_args:
63
+ dirpath: null
64
+ filename: best-alignment-val-{epoch}-{step}
65
+ monitor: Alignment evaluation proj/val
66
+ verbose: false
67
+ save_last: null
68
+ save_top_k: 1
69
+ save_weights_only: false
70
+ mode: min
71
+ auto_insert_metric_name: true
72
+ every_n_train_steps: null
73
+ train_time_interval: null
74
+ every_n_epochs: null
75
+ save_on_train_epoch_end: null
76
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
77
+ init_args:
78
+ dirpath: null
79
+ filename: best-uniformity-val-{epoch}-{step}
80
+ monitor: Uniformity evaluation proj/val
81
+ verbose: false
82
+ save_last: null
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: min
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: null
90
+ save_on_train_epoch_end: null
91
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
92
+ init_args:
93
+ dirpath: null
94
+ filename: cptk-{epoch}-{step}
95
+ monitor: null
96
+ verbose: false
97
+ save_last: null
98
+ save_top_k: -1
99
+ save_weights_only: false
100
+ mode: min
101
+ auto_insert_metric_name: true
102
+ every_n_train_steps: null
103
+ train_time_interval: null
104
+ every_n_epochs: 25
105
+ save_on_train_epoch_end: null
106
+ - class_path: callbacks.evaluation.OrderEvaluation
107
+ init_args:
108
+ log_n_epochs: 5
109
+ on_train: true
110
+ use_projection: true
111
+ - class_path: callbacks.evaluation.EEREvaluation
112
+ init_args:
113
+ use_more_neg: false
114
+ log_n_epochs: 5
115
+ on_train: false
116
+ use_projection: true
117
+ - class_path: callbacks.evaluation.HypersphereEvaluation
118
+ init_args:
119
+ log_n_epochs: 5
120
+ on_train: true
121
+ use_projection: true
122
+ default_root_dir: null
123
+ gradient_clip_val: null
124
+ gradient_clip_algorithm: null
125
+ num_nodes: 1
126
+ num_processes: null
127
+ devices: null
128
+ gpus: 1
129
+ auto_select_gpus: null
130
+ tpu_cores: null
131
+ ipus: null
132
+ enable_progress_bar: true
133
+ overfit_batches: 0.0
134
+ track_grad_norm: -1
135
+ check_val_every_n_epoch: 1
136
+ fast_dev_run: false
137
+ accumulate_grad_batches: null
138
+ max_epochs: 100000
139
+ min_epochs: null
140
+ max_steps: 1000000000
141
+ min_steps: null
142
+ max_time: null
143
+ limit_train_batches: null
144
+ limit_val_batches: null
145
+ limit_test_batches: null
146
+ limit_predict_batches: null
147
+ val_check_interval: null
148
+ log_every_n_steps: 50
149
+ accelerator: gpu
150
+ strategy: null
151
+ sync_batchnorm: false
152
+ precision: 32
153
+ enable_model_summary: true
154
+ num_sanity_val_steps: 2
155
+ resume_from_checkpoint: null
156
+ profiler: null
157
+ benchmark: null
158
+ deterministic: null
159
+ reload_dataloaders_every_n_epochs: 0
160
+ auto_lr_find: false
161
+ replace_sampler_ddp: true
162
+ detect_anomaly: false
163
+ auto_scale_batch_size: false
164
+ plugins: null
165
+ amp_backend: null
166
+ amp_level: null
167
+ move_metrics_to_cpu: false
168
+ multiple_trainloader_mode: max_size_cycle
169
+ inference_mode: true
170
+ ckpt_path: null
171
+ model:
172
+ class_path: models.trainer.ContrastiveTrainer
173
+ init_args:
174
+ feature_extractor:
175
+ spec_layer: melspectogram
176
+ n_fft: 2048
177
+ hop_length: 512
178
+ backbone:
179
+ backbone: efficientnet_b0
180
+ pretrained: true
181
+ embedding_dim: 1000
182
+ projection:
183
+ input_dim: 1000
184
+ output_dim: 128
185
+ l2_normalize: true
186
+ optimizer1_init:
187
+ class_path: torch.optim.Adam
188
+ init_args:
189
+ lr: 0.0001
190
+ weight_decay: 1.0e-05
191
+ use_contrastive_loss: true
192
+ temp: 0.2
193
+ nr_negative: 250
194
+ decouple: true
195
+ use_norm_reg: false
196
+ max_norm_hinge: 4.0
197
+ norm_hinge_fact: 10.0
198
+ use_invariance_loss: false
199
+ fact_inv_loss: 1.0
200
+ use_covariance_reg: false
201
+ fact_cov: 1.0
202
+ use_variance_reg: false
203
+ fact_var: 1.0
204
+ gamma: 1.0
205
+ use_vicreg_loss: false
206
+ use_align_loss: false
207
+ fact_align_loss: 0.25
208
+ fact_unif_loss: 0.5
209
+ use_uniform_loss: false
210
+ mask_batch: false
211
+ compute_test_loss: false
212
+ data:
213
+ class_path: data.vocals.VocalsDataModule
214
+ init_args:
215
+ augs_neg:
216
+ enable: false
217
+ gaussian_noise: 0.5
218
+ pitch_shift_naive: 0
219
+ time_stretch: 0
220
+ gain: 0.5
221
+ shift: 0
222
+ parametric_eq: 0
223
+ tanh_distortion: 0
224
+ time_mask: 0
225
+ formant_shift_parselmouth: 0
226
+ pitch_shift_parselmouth: 0
227
+ pitch_range_parselmouth: 0
228
+ pitch_shift_parselmouth_prob: 0
229
+ positive_examples: same_clip
230
+ dataset_dirs:
231
+ - tencys_vocals
232
+ - ghero_vocals_3
233
+ - ghero_vocals_4
234
+ batch_size: 140
235
+ batch_size_val: 120
236
+ nr_samples: 176000
237
+ normalize: true
238
+ num_workers: 40
239
+ sr: 44100
240
+ batch_sampling_mode: sample_clips
241
+ eval_frac: 0.1
242
+ group_name_is_folder: true
243
+ group_by_artist: true
244
+ augs:
245
+ enable: true
246
+ gaussian_noise: 0.5
247
+ pitch_shift_naive: 0
248
+ time_stretch: 0
249
+ gain: 0.5
250
+ shift: 0
251
+ parametric_eq: 0
252
+ tanh_distortion: 0
253
+ time_mask: 0.5
254
+ formant_shift_parselmouth: 0
255
+ pitch_shift_parselmouth:
256
+ - 1
257
+ - 1.3
258
+ pitch_range_parselmouth: 1.5
259
+ pitch_shift_parselmouth_prob: 0.5
260
+ transform_override: false
261
+ verbose: true
262
+ use_random_loader: false
263
+ max_groups: -1
264
+ multi_epoch: 1
265
+ classification: false
contrastive/encoder.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ backbone: efficientnet_b0
2
+ embedding_dim: 1000
3
+ pretrained: true
contrastive/feature_extractor.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ hop_length: 512
2
+ n_fft: 2048
3
+ spec_layer: melspectogram
contrastive/hyperparams.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ encoder:
2
+ backbone: efficientnet_b0
3
+ embedding_dim: 1000
4
+ pretrained: false
5
+ feature_extractor:
6
+ hop_length: 512
7
+ n_fft: 2048
8
+ spec_layer: melspectogram
contrastive/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381f2964da906ed85ddaa2453f2ce7a16b84d741ad0f23b1e5357972f2c2692b
3
+ size 38786209
contrastive/model.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f17b9de1848877ed16801cb873b9c615fcf65cc23fa1c43dfa0d0fe948f40a
3
+ size 38995676
unif-align/config.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.9.3
2
+ seed_everything: 123
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs
8
+ name: exp_uniformity_sameclip
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ enable_checkpointing: true
15
+ callbacks:
16
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
17
+ init_args:
18
+ dirpath: null
19
+ filename: best-val-loss-{epoch}-{step}
20
+ monitor: loss/val
21
+ verbose: false
22
+ save_last: null
23
+ save_top_k: 1
24
+ save_weights_only: false
25
+ mode: min
26
+ auto_insert_metric_name: true
27
+ every_n_train_steps: null
28
+ train_time_interval: null
29
+ every_n_epochs: null
30
+ save_on_train_epoch_end: null
31
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
32
+ init_args:
33
+ dirpath: null
34
+ filename: best-eer-val-{epoch}-{step}
35
+ monitor: EER evaluation proj/val
36
+ verbose: false
37
+ save_last: null
38
+ save_top_k: 1
39
+ save_weights_only: false
40
+ mode: min
41
+ auto_insert_metric_name: true
42
+ every_n_train_steps: null
43
+ train_time_interval: null
44
+ every_n_epochs: null
45
+ save_on_train_epoch_end: null
46
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
47
+ init_args:
48
+ dirpath: null
49
+ filename: best-rank-val-{epoch}-{step}
50
+ monitor: Order evaluation mean proj/val
51
+ verbose: false
52
+ save_last: null
53
+ save_top_k: 1
54
+ save_weights_only: false
55
+ mode: min
56
+ auto_insert_metric_name: true
57
+ every_n_train_steps: null
58
+ train_time_interval: null
59
+ every_n_epochs: null
60
+ save_on_train_epoch_end: null
61
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
62
+ init_args:
63
+ dirpath: null
64
+ filename: best-alignment-val-{epoch}-{step}
65
+ monitor: Alignment evaluation proj/val
66
+ verbose: false
67
+ save_last: null
68
+ save_top_k: 1
69
+ save_weights_only: false
70
+ mode: min
71
+ auto_insert_metric_name: true
72
+ every_n_train_steps: null
73
+ train_time_interval: null
74
+ every_n_epochs: null
75
+ save_on_train_epoch_end: null
76
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
77
+ init_args:
78
+ dirpath: null
79
+ filename: best-uniformity-val-{epoch}-{step}
80
+ monitor: Uniformity evaluation proj/val
81
+ verbose: false
82
+ save_last: null
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: min
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: null
90
+ save_on_train_epoch_end: null
91
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
92
+ init_args:
93
+ dirpath: null
94
+ filename: cptk-{epoch}-{step}
95
+ monitor: null
96
+ verbose: false
97
+ save_last: null
98
+ save_top_k: -1
99
+ save_weights_only: false
100
+ mode: min
101
+ auto_insert_metric_name: true
102
+ every_n_train_steps: null
103
+ train_time_interval: null
104
+ every_n_epochs: 25
105
+ save_on_train_epoch_end: null
106
+ - class_path: callbacks.evaluation.OrderEvaluation
107
+ init_args:
108
+ log_n_epochs: 5
109
+ on_train: true
110
+ use_projection: true
111
+ - class_path: callbacks.evaluation.EEREvaluation
112
+ init_args:
113
+ use_more_neg: false
114
+ log_n_epochs: 5
115
+ on_train: false
116
+ use_projection: true
117
+ - class_path: callbacks.evaluation.HypersphereEvaluation
118
+ init_args:
119
+ log_n_epochs: 5
120
+ on_train: true
121
+ use_projection: true
122
+ default_root_dir: null
123
+ gradient_clip_val: null
124
+ gradient_clip_algorithm: null
125
+ num_nodes: 1
126
+ num_processes: null
127
+ devices: null
128
+ gpus: 2
129
+ auto_select_gpus: null
130
+ tpu_cores: null
131
+ ipus: null
132
+ enable_progress_bar: true
133
+ overfit_batches: 0.0
134
+ track_grad_norm: -1
135
+ check_val_every_n_epoch: 1
136
+ fast_dev_run: false
137
+ accumulate_grad_batches: null
138
+ max_epochs: 100000
139
+ min_epochs: null
140
+ max_steps: 1000000000
141
+ min_steps: null
142
+ max_time: null
143
+ limit_train_batches: null
144
+ limit_val_batches: null
145
+ limit_test_batches: null
146
+ limit_predict_batches: null
147
+ val_check_interval: null
148
+ log_every_n_steps: 50
149
+ accelerator: gpu
150
+ strategy: null
151
+ sync_batchnorm: false
152
+ precision: 32
153
+ enable_model_summary: true
154
+ num_sanity_val_steps: 2
155
+ resume_from_checkpoint: null
156
+ profiler: null
157
+ benchmark: null
158
+ deterministic: null
159
+ reload_dataloaders_every_n_epochs: 0
160
+ auto_lr_find: false
161
+ replace_sampler_ddp: true
162
+ detect_anomaly: false
163
+ auto_scale_batch_size: false
164
+ plugins: null
165
+ amp_backend: null
166
+ amp_level: null
167
+ move_metrics_to_cpu: false
168
+ multiple_trainloader_mode: max_size_cycle
169
+ inference_mode: true
170
+ ckpt_path: null
171
+ model:
172
+ class_path: models.trainer.ContrastiveTrainer
173
+ init_args:
174
+ feature_extractor:
175
+ spec_layer: melspectogram
176
+ n_fft: 2048
177
+ hop_length: 512
178
+ backbone:
179
+ backbone: efficientnet_b0
180
+ pretrained: true
181
+ embedding_dim: 1000
182
+ projection:
183
+ input_dim: 1000
184
+ output_dim: 128
185
+ l2_normalize: true
186
+ optimizer1_init:
187
+ class_path: torch.optim.Adam
188
+ init_args:
189
+ lr: 0.0001
190
+ weight_decay: 1.0e-05
191
+ use_contrastive_loss: false
192
+ temp: 0.1
193
+ nr_negative: 64
194
+ decouple: false
195
+ use_norm_reg: false
196
+ max_norm_hinge: 4.0
197
+ norm_hinge_fact: 10.0
198
+ use_invariance_loss: false
199
+ fact_inv_loss: 1.0
200
+ use_covariance_reg: false
201
+ fact_cov: 1.0
202
+ use_variance_reg: false
203
+ fact_var: 1.0
204
+ gamma: 1.0
205
+ use_vicreg_loss: false
206
+ use_align_loss: true
207
+ fact_align_loss: 1.0
208
+ fact_unif_loss: 1.0
209
+ use_uniform_loss: true
210
+ mask_batch: false
211
+ compute_test_loss: false
212
+ data:
213
+ class_path: data.vocals.VocalsDataModule
214
+ init_args:
215
+ augs_neg:
216
+ enable: false
217
+ gaussian_noise: 0.5
218
+ pitch_shift_naive: 0
219
+ time_stretch: 0
220
+ gain: 0.5
221
+ shift: 0
222
+ parametric_eq: 0
223
+ tanh_distortion: 0
224
+ time_mask: 0
225
+ formant_shift_parselmouth: 0
226
+ pitch_shift_parselmouth: 0
227
+ pitch_range_parselmouth: 0
228
+ pitch_shift_parselmouth_prob: 0
229
+ positive_examples: same_clip
230
+ dataset_dirs:
231
+ - tencys_vocals
232
+ - ghero_vocals_3
233
+ - ghero_vocals_4
234
+ batch_size: 55
235
+ batch_size_val: 55
236
+ nr_samples: 176000
237
+ normalize: true
238
+ num_workers: 40
239
+ sr: 44100
240
+ batch_sampling_mode: sample_clips
241
+ eval_frac: 0.11
242
+ group_name_is_folder: true
243
+ group_by_artist: true
244
+ augs:
245
+ enable: true
246
+ gaussian_noise: 0.5
247
+ pitch_shift_naive: 0
248
+ time_stretch: 0
249
+ gain: 0.5
250
+ shift: 0
251
+ parametric_eq: 0
252
+ tanh_distortion: 0
253
+ time_mask: 0.5
254
+ formant_shift_parselmouth: 0
255
+ pitch_shift_parselmouth:
256
+ - 1
257
+ - 1.3
258
+ pitch_range_parselmouth: 1.5
259
+ pitch_shift_parselmouth_prob: 0.5
260
+ transform_override: false
261
+ verbose: true
262
+ use_random_loader: false
263
+ max_groups: -1
264
+ multi_epoch: 1
265
+ classification: false
unif-align/encoder.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ backbone: efficientnet_b0
2
+ embedding_dim: 1000
3
+ pretrained: true
unif-align/feature_extractor.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ hop_length: 512
2
+ n_fft: 2048
3
+ spec_layer: melspectogram
unif-align/hyperparams.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ encoder:
2
+ backbone: efficientnet_b0
3
+ embedding_dim: 1000
4
+ pretrained: true
5
+ feature_extractor:
6
+ hop_length: 512
7
+ n_fft: 2048
8
+ spec_layer: melspectogram
unif-align/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd51dde0840cf75ac049c11f631f738a636395f0493f6e8508f9bb36ca73db31
3
+ size 38765709
unif-align/model.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b2ebb000ed138b0d92fa25248c7dc91e9a7189ce3e0600121791dfca496cab
3
+ size 38970610
vicreg/config.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pytorch_lightning==1.9.3
2
+ seed_everything: 123
3
+ trainer:
4
+ logger:
5
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
6
+ init_args:
7
+ save_dir: logs
8
+ name: exp_vicreg_sameclip
9
+ version: null
10
+ log_graph: false
11
+ default_hp_metric: true
12
+ prefix: ''
13
+ sub_dir: null
14
+ enable_checkpointing: true
15
+ callbacks:
16
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
17
+ init_args:
18
+ dirpath: null
19
+ filename: best-val-loss-{epoch}-{step}
20
+ monitor: loss/val
21
+ verbose: false
22
+ save_last: null
23
+ save_top_k: 1
24
+ save_weights_only: false
25
+ mode: min
26
+ auto_insert_metric_name: true
27
+ every_n_train_steps: null
28
+ train_time_interval: null
29
+ every_n_epochs: null
30
+ save_on_train_epoch_end: null
31
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
32
+ init_args:
33
+ dirpath: null
34
+ filename: best-eer-val-{epoch}-{step}
35
+ monitor: EER evaluation proj/val
36
+ verbose: false
37
+ save_last: null
38
+ save_top_k: 1
39
+ save_weights_only: false
40
+ mode: min
41
+ auto_insert_metric_name: true
42
+ every_n_train_steps: null
43
+ train_time_interval: null
44
+ every_n_epochs: null
45
+ save_on_train_epoch_end: null
46
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
47
+ init_args:
48
+ dirpath: null
49
+ filename: best-rank-val-{epoch}-{step}
50
+ monitor: Order evaluation mean proj/val
51
+ verbose: false
52
+ save_last: null
53
+ save_top_k: 1
54
+ save_weights_only: false
55
+ mode: min
56
+ auto_insert_metric_name: true
57
+ every_n_train_steps: null
58
+ train_time_interval: null
59
+ every_n_epochs: null
60
+ save_on_train_epoch_end: null
61
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
62
+ init_args:
63
+ dirpath: null
64
+ filename: best-alignment-val-{epoch}-{step}
65
+ monitor: Alignment evaluation proj/val
66
+ verbose: false
67
+ save_last: null
68
+ save_top_k: 1
69
+ save_weights_only: false
70
+ mode: min
71
+ auto_insert_metric_name: true
72
+ every_n_train_steps: null
73
+ train_time_interval: null
74
+ every_n_epochs: null
75
+ save_on_train_epoch_end: null
76
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
77
+ init_args:
78
+ dirpath: null
79
+ filename: best-uniformity-val-{epoch}-{step}
80
+ monitor: Uniformity evaluation proj/val
81
+ verbose: false
82
+ save_last: null
83
+ save_top_k: 1
84
+ save_weights_only: false
85
+ mode: min
86
+ auto_insert_metric_name: true
87
+ every_n_train_steps: null
88
+ train_time_interval: null
89
+ every_n_epochs: null
90
+ save_on_train_epoch_end: null
91
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
92
+ init_args:
93
+ dirpath: null
94
+ filename: cptk-{epoch}-{step}
95
+ monitor: null
96
+ verbose: false
97
+ save_last: null
98
+ save_top_k: -1
99
+ save_weights_only: false
100
+ mode: min
101
+ auto_insert_metric_name: true
102
+ every_n_train_steps: null
103
+ train_time_interval: null
104
+ every_n_epochs: 25
105
+ save_on_train_epoch_end: null
106
+ - class_path: callbacks.evaluation.OrderEvaluation
107
+ init_args:
108
+ log_n_epochs: 5
109
+ on_train: true
110
+ use_projection: true
111
+ - class_path: callbacks.evaluation.EEREvaluation
112
+ init_args:
113
+ use_more_neg: false
114
+ log_n_epochs: 5
115
+ on_train: true
116
+ use_projection: true
117
+ - class_path: callbacks.evaluation.HypersphereEvaluation
118
+ init_args:
119
+ log_n_epochs: 5
120
+ on_train: true
121
+ use_projection: true
122
+ default_root_dir: null
123
+ gradient_clip_val: null
124
+ gradient_clip_algorithm: null
125
+ num_nodes: 1
126
+ num_processes: null
127
+ devices: null
128
+ gpus: 2
129
+ auto_select_gpus: null
130
+ tpu_cores: null
131
+ ipus: null
132
+ enable_progress_bar: true
133
+ overfit_batches: 0.0
134
+ track_grad_norm: -1
135
+ check_val_every_n_epoch: 1
136
+ fast_dev_run: false
137
+ accumulate_grad_batches: null
138
+ max_epochs: 100000
139
+ min_epochs: null
140
+ max_steps: 1000000000
141
+ min_steps: null
142
+ max_time: null
143
+ limit_train_batches: null
144
+ limit_val_batches: null
145
+ limit_test_batches: null
146
+ limit_predict_batches: null
147
+ val_check_interval: null
148
+ log_every_n_steps: 50
149
+ accelerator: gpu
150
+ strategy: null
151
+ sync_batchnorm: false
152
+ precision: 32
153
+ enable_model_summary: true
154
+ num_sanity_val_steps: 2
155
+ resume_from_checkpoint: null
156
+ profiler: null
157
+ benchmark: null
158
+ deterministic: null
159
+ reload_dataloaders_every_n_epochs: 0
160
+ auto_lr_find: false
161
+ replace_sampler_ddp: true
162
+ detect_anomaly: false
163
+ auto_scale_batch_size: false
164
+ plugins: null
165
+ amp_backend: null
166
+ amp_level: null
167
+ move_metrics_to_cpu: false
168
+ multiple_trainloader_mode: max_size_cycle
169
+ inference_mode: true
170
+ ckpt_path: null
171
+ model:
172
+ class_path: models.trainer.ContrastiveTrainer
173
+ init_args:
174
+ feature_extractor:
175
+ spec_layer: melspectogram
176
+ n_fft: 2048
177
+ hop_length: 512
178
+ backbone:
179
+ backbone: efficientnet_b0
180
+ pretrained: true
181
+ embedding_dim: 1000
182
+ projection:
183
+ input_dim: 1000
184
+ output_dim: 128
185
+ l2_normalize: true
186
+ optimizer1_init:
187
+ class_path: torch.optim.Adam
188
+ init_args:
189
+ lr: 0.0001
190
+ weight_decay: 1.0e-05
191
+ use_contrastive_loss: false
192
+ temp: 0.1
193
+ nr_negative: 64
194
+ decouple: false
195
+ use_norm_reg: false
196
+ max_norm_hinge: 4.0
197
+ norm_hinge_fact: 10.0
198
+ use_invariance_loss: true
199
+ fact_inv_loss: 25.0
200
+ use_covariance_reg: true
201
+ fact_cov: 100.0
202
+ use_variance_reg: true
203
+ fact_var: 25.0
204
+ gamma: 1.0
205
+ use_vicreg_loss: false
206
+ use_align_loss: false
207
+ fact_align_loss: 0.25
208
+ fact_unif_loss: 0.5
209
+ use_uniform_loss: false
210
+ mask_batch: false
211
+ compute_test_loss: false
212
+ data:
213
+ class_path: data.vocals.VocalsDataModule
214
+ init_args:
215
+ augs_neg:
216
+ enable: false
217
+ gaussian_noise: 0.5
218
+ pitch_shift_naive: 0
219
+ time_stretch: 0
220
+ gain: 0.5
221
+ shift: 0
222
+ parametric_eq: 0
223
+ tanh_distortion: 0
224
+ time_mask: 0
225
+ formant_shift_parselmouth: 0
226
+ pitch_shift_parselmouth: 0
227
+ pitch_range_parselmouth: 0
228
+ pitch_shift_parselmouth_prob: 0
229
+ positive_examples: same_clip
230
+ dataset_dirs:
231
+ - tencys_vocals
232
+ - ghero_vocals_3
233
+ - ghero_vocals_4
234
+ batch_size: 55
235
+ batch_size_val: 55
236
+ nr_samples: 176000
237
+ normalize: true
238
+ num_workers: 40
239
+ sr: 44100
240
+ batch_sampling_mode: sample_clips
241
+ eval_frac: 0.11
242
+ group_name_is_folder: true
243
+ group_by_artist: true
244
+ augs:
245
+ enable: true
246
+ gaussian_noise: 0.5
247
+ pitch_shift_naive: 0
248
+ time_stretch: 0
249
+ gain: 0.5
250
+ shift: 0
251
+ parametric_eq: 0
252
+ tanh_distortion: 0
253
+ time_mask: 0.5
254
+ formant_shift_parselmouth: 0
255
+ pitch_shift_parselmouth:
256
+ - 1
257
+ - 1.3
258
+ pitch_range_parselmouth: 1.5
259
+ pitch_shift_parselmouth_prob: 0.5
260
+ transform_override: false
261
+ verbose: true
262
+ use_random_loader: false
263
+ max_groups: -1
264
+ multi_epoch: 1
265
+ classification: false
vicreg/encoder.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ backbone: efficientnet_b0
2
+ embedding_dim: 1000
3
+ pretrained: true
vicreg/feature_extractor.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ hop_length: 512
2
+ n_fft: 2048
3
+ spec_layer: melspectogram
vicreg/hyperparams.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ encoder:
2
+ backbone: efficientnet_b0
3
+ embedding_dim: 1000
4
+ pretrained: true
5
+ feature_extractor:
6
+ hop_length: 512
7
+ n_fft: 2048
8
+ spec_layer: melspectogram
vicreg/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bccb3c2ec820feb16f109d33439886b77c8b0a236267f0ff71276ad562171cae
3
+ size 38765709
vicreg/model.ts ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d2c1af904ee08cb2c163307fc54c5381774774a42c96eeb2896f475e2c8aeb
3
+ size 38970610