simpleoier commited on
Commit
8372fea
1 Parent(s): 0c8d698

Update model

Browse files
Files changed (19) hide show
  1. README.md +327 -0
  2. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/config.yaml +257 -0
  3. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/acc_m.png +0 -0
  4. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/acc_u.png +0 -0
  5. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/backward_time.png +0 -0
  6. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/correct_m.png +0 -0
  7. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/correct_u.png +0 -0
  8. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/count_m.png +0 -0
  9. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/count_u.png +0 -0
  10. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/forward_time.png +0 -0
  11. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/iter_time.png +0 -0
  13. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/loss.png +0 -0
  14. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/optim0_lr0.png +0 -0
  15. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/optim_step_time.png +0 -0
  16. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/train_time.png +0 -0
  17. exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/valid.loss.ave.pth +3 -0
  18. exp/kmeans_iter0_mfcc_train_960_portion0.1/km_100.mdl +0 -0
  19. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,330 @@
1
  ---
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - self-supervised-learning
6
+ language: en
7
+ datasets:
8
+ - librispeech
9
  license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 SSL model
13
+
14
+ ### `simpleoier/simpleoier_librispeech_hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw`
15
+
16
+ This model was trained by simpleoier using librispeech recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 753f40d61813436d4e76660904d02eaed7a6649e
26
+ pip install -e .
27
+ cd egs2/librispeech/ssl1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model simpleoier/simpleoier_librispeech_hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw
29
+ ```
30
+
31
+
32
+
33
+ ## SSL config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/tuning/train_ssl_torchaudiohubert_base_960h_pretrain_it0.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ dry_run: false
42
+ iterator_type: sequence
43
+ output_dir: exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw
44
+ ngpu: 1
45
+ seed: 0
46
+ num_workers: 64
47
+ num_att_plot: 3
48
+ dist_backend: nccl
49
+ dist_init_method: env://
50
+ dist_world_size: 8
51
+ dist_rank: 0
52
+ local_rank: 0
53
+ dist_master_addr: localhost
54
+ dist_master_port: 45091
55
+ dist_launcher: null
56
+ multiprocessing_distributed: true
57
+ unused_parameters: true
58
+ sharded_ddp: false
59
+ cudnn_enabled: true
60
+ cudnn_benchmark: false
61
+ cudnn_deterministic: true
62
+ collect_stats: false
63
+ write_collected_feats: false
64
+ max_epoch: 250
65
+ patience: null
66
+ val_scheduler_criterion:
67
+ - valid
68
+ - loss
69
+ early_stopping_criterion:
70
+ - valid
71
+ - loss
72
+ - min
73
+ best_model_criterion:
74
+ - - valid
75
+ - loss
76
+ - min
77
+ keep_nbest_models: 10
78
+ nbest_averaging_interval: 0
79
+ grad_clip: 5.0
80
+ grad_clip_type: 2.0
81
+ grad_noise: false
82
+ accum_grad: 2
83
+ no_forward_run: false
84
+ resume: true
85
+ train_dtype: float32
86
+ use_amp: true
87
+ log_interval: null
88
+ use_matplotlib: true
89
+ use_tensorboard: true
90
+ create_graph_in_tensorboard: false
91
+ use_wandb: false
92
+ wandb_project: null
93
+ wandb_id: null
94
+ wandb_entity: null
95
+ wandb_name: null
96
+ wandb_model_log_interval: -1
97
+ detect_anomaly: false
98
+ pretrain_path: null
99
+ init_param: []
100
+ ignore_init_mismatch: false
101
+ freeze_param: []
102
+ num_iters_per_epoch: null
103
+ batch_size: 20
104
+ valid_batch_size: null
105
+ batch_bins: 48000000
106
+ valid_batch_bins: null
107
+ train_shape_file:
108
+ - exp/hubert_iter0_stats_raw/train/speech_shape
109
+ - exp/hubert_iter0_stats_raw/train/text_shape.word
110
+ valid_shape_file:
111
+ - exp/hubert_iter0_stats_raw/valid/speech_shape
112
+ - exp/hubert_iter0_stats_raw/valid/text_shape.word
113
+ batch_type: numel
114
+ valid_batch_type: null
115
+ fold_length:
116
+ - 80000
117
+ - 400
118
+ sort_in_batch: descending
119
+ sort_batch: descending
120
+ multiple_iterator: false
121
+ chunk_length: 500
122
+ chunk_shift_ratio: 0.5
123
+ num_cache_chunks: 1024
124
+ train_data_path_and_name_and_type:
125
+ - - dump/raw/train_960/wav.scp
126
+ - speech
127
+ - sound
128
+ - - dump/raw/train_960/text.km.kmeans_iter0_mfcc_train_960_portion0.1
129
+ - text
130
+ - text
131
+ valid_data_path_and_name_and_type:
132
+ - - dump/raw/dev/wav.scp
133
+ - speech
134
+ - sound
135
+ - - dump/raw/dev/text.km.kmeans_iter0_mfcc_train_960_portion0.1
136
+ - text
137
+ - text
138
+ allow_variable_data_keys: false
139
+ max_cache_size: 0.0
140
+ max_cache_fd: 32
141
+ valid_max_cache_size: null
142
+ optim: adam
143
+ optim_conf:
144
+ lr: 0.0005
145
+ scheduler: warmuplr
146
+ scheduler_conf:
147
+ warmup_steps: 32000
148
+ token_list:
149
+ - '81'
150
+ - '5'
151
+ - '79'
152
+ - '84'
153
+ - '27'
154
+ - '35'
155
+ - '67'
156
+ - '56'
157
+ - '10'
158
+ - '99'
159
+ - '24'
160
+ - '3'
161
+ - '48'
162
+ - '8'
163
+ - '42'
164
+ - '16'
165
+ - '32'
166
+ - '31'
167
+ - '47'
168
+ - '43'
169
+ - '20'
170
+ - '73'
171
+ - '49'
172
+ - '86'
173
+ - '18'
174
+ - '64'
175
+ - '34'
176
+ - '59'
177
+ - '95'
178
+ - '0'
179
+ - '52'
180
+ - '44'
181
+ - '61'
182
+ - '57'
183
+ - '30'
184
+ - '1'
185
+ - '93'
186
+ - '6'
187
+ - '69'
188
+ - '19'
189
+ - '7'
190
+ - '65'
191
+ - '28'
192
+ - '89'
193
+ - '2'
194
+ - '96'
195
+ - '91'
196
+ - '72'
197
+ - '38'
198
+ - '78'
199
+ - '26'
200
+ - '13'
201
+ - '39'
202
+ - '94'
203
+ - '4'
204
+ - '88'
205
+ - '85'
206
+ - '51'
207
+ - '82'
208
+ - '41'
209
+ - '50'
210
+ - '21'
211
+ - '80'
212
+ - '97'
213
+ - '87'
214
+ - '25'
215
+ - '54'
216
+ - '12'
217
+ - '40'
218
+ - '60'
219
+ - '29'
220
+ - '11'
221
+ - '53'
222
+ - '71'
223
+ - '83'
224
+ - '74'
225
+ - '68'
226
+ - '55'
227
+ - '62'
228
+ - '76'
229
+ - '45'
230
+ - '75'
231
+ - '92'
232
+ - '46'
233
+ - '36'
234
+ - '66'
235
+ - '22'
236
+ - '77'
237
+ - '23'
238
+ - '63'
239
+ - '37'
240
+ - '58'
241
+ - '33'
242
+ - '15'
243
+ - '17'
244
+ - '90'
245
+ - '98'
246
+ - '14'
247
+ - '70'
248
+ - '9'
249
+ - <unk>
250
+ - <sos/eos>
251
+ init: null
252
+ collate_fn_conf:
253
+ label_downsampling: 2
254
+ pad: false
255
+ rand_crop: true
256
+ input_size: 1
257
+ num_classes: 100
258
+ use_preprocessor: true
259
+ token_type: word
260
+ bpemodel: null
261
+ non_linguistic_symbols: null
262
+ cleaner: null
263
+ g2p: null
264
+ speech_volume_normalize: null
265
+ rir_scp: null
266
+ rir_apply_prob: 1.0
267
+ noise_scp: null
268
+ noise_apply_prob: 1.0
269
+ noise_db_range: '13_15'
270
+ pred_masked_weight: 1.0
271
+ pred_nomask_weight: 0.0
272
+ loss_weights: 0.0
273
+ frontend: null
274
+ frontend_conf: {}
275
+ specaug: null
276
+ specaug_conf: {}
277
+ normalize: null
278
+ normalize_conf: {}
279
+ preencoder: null
280
+ preencoder_conf: {}
281
+ encoder: torchaudio_hubert
282
+ encoder_conf:
283
+ encoder_projection_dropout: 0.1
284
+ encoder_attention_dropout: 0.1
285
+ encoder_ff_interm_dropout: 0.0
286
+ encoder_dropout: 0.1
287
+ encoder_layer_drop: 0.05
288
+ model: torchaudio
289
+ model_conf: {}
290
+ required:
291
+ - output_dir
292
+ - token_list
293
+ version: '202209'
294
+ distributed: true
295
+ ```
296
+
297
+ </details>
298
+
299
+
300
+
301
+ ### Citing ESPnet
302
+
303
+ ```BibTex
304
+ @inproceedings{watanabe2018espnet,
305
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
306
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
307
+ year={2018},
308
+ booktitle={Proceedings of Interspeech},
309
+ pages={2207--2211},
310
+ doi={10.21437/Interspeech.2018-1456},
311
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
312
+ }
313
+
314
+
315
+
316
+
317
+ ```
318
+
319
+ or arXiv:
320
+
321
+ ```bibtex
322
+ @misc{watanabe2018espnet,
323
+ title={ESPnet: End-to-End Speech Processing Toolkit},
324
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
325
+ year={2018},
326
+ eprint={1804.00015},
327
+ archivePrefix={arXiv},
328
+ primaryClass={cs.CL}
329
+ }
330
+ ```
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/config.yaml ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_ssl_torchaudiohubert_base_960h_pretrain_it0.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 64
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 8
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 45091
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 250
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 2
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: true
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 48000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/hubert_iter0_stats_raw/train/speech_shape
72
+ - exp/hubert_iter0_stats_raw/train/text_shape.word
73
+ valid_shape_file:
74
+ - exp/hubert_iter0_stats_raw/valid/speech_shape
75
+ - exp/hubert_iter0_stats_raw/valid/text_shape.word
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 400
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/train_960/wav.scp
89
+ - speech
90
+ - sound
91
+ - - dump/raw/train_960/text.km.kmeans_iter0_mfcc_train_960_portion0.1
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/dev/wav.scp
96
+ - speech
97
+ - sound
98
+ - - dump/raw/dev/text.km.kmeans_iter0_mfcc_train_960_portion0.1
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.0005
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 32000
111
+ token_list:
112
+ - '81'
113
+ - '5'
114
+ - '79'
115
+ - '84'
116
+ - '27'
117
+ - '35'
118
+ - '67'
119
+ - '56'
120
+ - '10'
121
+ - '99'
122
+ - '24'
123
+ - '3'
124
+ - '48'
125
+ - '8'
126
+ - '42'
127
+ - '16'
128
+ - '32'
129
+ - '31'
130
+ - '47'
131
+ - '43'
132
+ - '20'
133
+ - '73'
134
+ - '49'
135
+ - '86'
136
+ - '18'
137
+ - '64'
138
+ - '34'
139
+ - '59'
140
+ - '95'
141
+ - '0'
142
+ - '52'
143
+ - '44'
144
+ - '61'
145
+ - '57'
146
+ - '30'
147
+ - '1'
148
+ - '93'
149
+ - '6'
150
+ - '69'
151
+ - '19'
152
+ - '7'
153
+ - '65'
154
+ - '28'
155
+ - '89'
156
+ - '2'
157
+ - '96'
158
+ - '91'
159
+ - '72'
160
+ - '38'
161
+ - '78'
162
+ - '26'
163
+ - '13'
164
+ - '39'
165
+ - '94'
166
+ - '4'
167
+ - '88'
168
+ - '85'
169
+ - '51'
170
+ - '82'
171
+ - '41'
172
+ - '50'
173
+ - '21'
174
+ - '80'
175
+ - '97'
176
+ - '87'
177
+ - '25'
178
+ - '54'
179
+ - '12'
180
+ - '40'
181
+ - '60'
182
+ - '29'
183
+ - '11'
184
+ - '53'
185
+ - '71'
186
+ - '83'
187
+ - '74'
188
+ - '68'
189
+ - '55'
190
+ - '62'
191
+ - '76'
192
+ - '45'
193
+ - '75'
194
+ - '92'
195
+ - '46'
196
+ - '36'
197
+ - '66'
198
+ - '22'
199
+ - '77'
200
+ - '23'
201
+ - '63'
202
+ - '37'
203
+ - '58'
204
+ - '33'
205
+ - '15'
206
+ - '17'
207
+ - '90'
208
+ - '98'
209
+ - '14'
210
+ - '70'
211
+ - '9'
212
+ - <unk>
213
+ - <sos/eos>
214
+ init: null
215
+ collate_fn_conf:
216
+ label_downsampling: 2
217
+ pad: false
218
+ rand_crop: true
219
+ input_size: 1
220
+ num_classes: 100
221
+ use_preprocessor: true
222
+ token_type: word
223
+ bpemodel: null
224
+ non_linguistic_symbols: null
225
+ cleaner: null
226
+ g2p: null
227
+ speech_volume_normalize: null
228
+ rir_scp: null
229
+ rir_apply_prob: 1.0
230
+ noise_scp: null
231
+ noise_apply_prob: 1.0
232
+ noise_db_range: '13_15'
233
+ pred_masked_weight: 1.0
234
+ pred_nomask_weight: 0.0
235
+ loss_weights: 0.0
236
+ frontend: null
237
+ frontend_conf: {}
238
+ specaug: null
239
+ specaug_conf: {}
240
+ normalize: null
241
+ normalize_conf: {}
242
+ preencoder: null
243
+ preencoder_conf: {}
244
+ encoder: torchaudio_hubert
245
+ encoder_conf:
246
+ encoder_projection_dropout: 0.1
247
+ encoder_attention_dropout: 0.1
248
+ encoder_ff_interm_dropout: 0.0
249
+ encoder_dropout: 0.1
250
+ encoder_layer_drop: 0.05
251
+ model: torchaudio
252
+ model_conf: {}
253
+ required:
254
+ - output_dir
255
+ - token_list
256
+ version: '202209'
257
+ distributed: true
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/acc_m.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/acc_u.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/backward_time.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/correct_m.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/correct_u.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/count_m.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/count_u.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/forward_time.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/gpu_max_cached_mem_GB.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/iter_time.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/loss.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/optim0_lr0.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/optim_step_time.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/images/train_time.png ADDED
exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/valid.loss.ave.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:313bb75a1a1c35c95eb6ee62f0773be1c6ba58798828a335f00b82e3af14022c
3
+ size 378481137
exp/kmeans_iter0_mfcc_train_960_portion0.1/km_100.mdl ADDED
Binary file (16.9 kB). View file
 
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202209'
2
+ files:
3
+ model_file: exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/valid.loss.ave.pth
4
+ python: "3.9.15 (main, Nov 24 2022, 14:31:59) \n[GCC 11.2.0]"
5
+ timestamp: 1672839599.248442
6
+ torch: 1.13.0
7
+ yaml_files:
8
+ train_config: exp/hubert_iter0_train_ssl_torchaudiohubert_base_960h_pretrain_it0_raw/config.yaml