Jakob Poncelet commited on
Commit
b4d3e53
1 Parent(s): e7e1c5c

First model version

Browse files
checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ee473f13953e0d9053f39367251669e2a006af3d99f5b007dfe2a79b35f851
3
+ size 1140805865
dict.ltr.txt ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | 1425817
2
+ e 1178319
3
+ n 633984
4
+ a 534584
5
+ i 432845
6
+ t 426822
7
+ d 368464
8
+ r 363974
9
+ o 362103
10
+ s 243285
11
+ l 240045
12
+ h 194637
13
+ g 193570
14
+ k 170494
15
+ m 163029
16
+ u 157460
17
+ v 141792
18
+ j 130640
19
+ w 111117
20
+ z 97895
21
+ b 97538
22
+ p 82921
23
+ c 75002
24
+ f 51103
25
+ ' 20301
26
+ X 7489
27
+ é 7286
28
+ y 5108
29
+ è 5063
30
+ - 4778
31
+ ë 3361
32
+ x 2022
33
+ q 682
34
+ ï 493
35
+ ü 264
36
+ ö 157
37
+ à 139
38
+ ê 80
39
+ ç 39
40
+ 1 38
41
+ á 38
42
+ ä 35
43
+ 2 34
44
+ 32 30
45
+ áx 16
46
+ 38 15
47
+ 16 13
48
+ 35 12
49
+ 4 11
50
+ ô 10
51
+ â 10
52
+ 21 10
53
+ & 10
54
+ ñ 8
55
+ 314 6
56
+ î 5
57
+ 12 5
58
+ 24 5
59
+ 19 5
60
+ 17 5
61
+ 40 4
62
+ 10 4
63
+ 130 4
64
+ 52 4
65
+ 5 3
66
+ 8 3
67
+ 20 3
68
+ 3 3
69
+ Ö 3
70
+ í 3
71
+ 6 2
72
+ 04 2
73
+ 313 2
74
+ ó 2
75
+ 201 1
76
+ 67 1
77
+ 499 1
78
+ 7 1
79
+ 45 1
80
+ 198 1
81
+ 25 1
82
+ 902 1
83
+ xq 1
84
+ 3xx 1
85
+ 66 1
86
+ ù 1
87
+ ò 1
88
+ Å 1
89
+ ú 1
finetuning_config.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ common:
4
+ memory_efficient_fp16: true
5
+ log_format: json
6
+ log_interval: 100
7
+ model_parallel_size: 1
8
+
9
+ checkpoint:
10
+ no_epoch_checkpoints: true
11
+ best_checkpoint_metric: wer
12
+ save_dir: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/finetune_VW_base_all
13
+
14
+ task:
15
+ _name: audio_pretraining
16
+ data: /users/spraak/jponcele/BenchmarkingSS/data/cgn_phone_10ms_w2v2_all
17
+ normalize: true #false
18
+ labels: ltr
19
+ segments: true
20
+ max_length: 800000
21
+
22
+ dataset:
23
+ num_workers: 6
24
+ batch_size: 4
25
+ max_tokens: 32000000
26
+ skip_invalid_size_inputs_valid_test: true
27
+ valid_subset: test
28
+ data_buffer_size: 2
29
+
30
+ distributed_training:
31
+ ddp_backend: legacy_ddp
32
+ distributed_world_size: 1
33
+
34
+ criterion:
35
+ _name: ctc
36
+ zero_infinity: true
37
+
38
+ optimization:
39
+ max_update: 500000
40
+ lr: [0.00003]
41
+ sentence_avg: true
42
+ update_freq: [4]
43
+
44
+ optimizer:
45
+ _name: adam
46
+ adam_betas: (0.9,0.98)
47
+ adam_eps: 1e-08
48
+
49
+ lr_scheduler:
50
+ _name: tri_stage
51
+ phase_ratio: [0.1, 0.4, 0.5]
52
+ final_lr_scale: 0.05
53
+
54
+ model:
55
+ _name: wav2vec_ctc
56
+ w2v_path: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/pretrain_w2v2_cgn-unsup-VW_base/checkpoint_74_250000.pt
57
+ apply_mask: true
58
+ mask_prob: 0.65
59
+ mask_channel_prob: 0.5
60
+ mask_channel_length: 64
61
+ layerdrop: 0.1
62
+ activation_dropout: 0.1
63
+ feature_grad_mult: 0.0
64
+ freeze_finetune_updates: 0
65
+
pretraining_config.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ common:
4
+ memory_efficient_fp16: true
5
+ log_format: json
6
+ log_interval: 100
7
+
8
+ checkpoint:
9
+ save_interval_updates: 10000
10
+ keep_interval_updates: 1
11
+ no_epoch_checkpoints: true
12
+ save_dir: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/pretrain_w2v2_cgn-unsup-VW_base
13
+
14
+ task:
15
+ _name: audio_pretraining
16
+ data: /users/spraak/jponcele/BenchmarkingSS/data/cgn_unsup_VW_w2v2
17
+ max_sample_size: 250000
18
+ min_sample_size: 4000
19
+ segments: true
20
+ normalize: true
21
+
22
+ dataset:
23
+ num_workers: 6
24
+ #batch_size: 4
25
+ max_tokens: 1400000
26
+ skip_invalid_size_inputs_valid_test: true
27
+ valid_subset: test
28
+ data_buffer_size: 1 #2
29
+ required_batch_size_multiple: 1 #default=8
30
+
31
+ distributed_training:
32
+ distributed_world_size: 1
33
+ ddp_backend: legacy_ddp
34
+
35
+ criterion:
36
+ _name: wav2vec
37
+ infonce: true
38
+ log_keys: ["prob_perplexity","code_perplexity","temp"]
39
+ loss_weights: [0.1, 10]
40
+
41
+ optimization:
42
+ max_update: 400000
43
+ lr: [0.0005]
44
+ update_freq: [32]
45
+
46
+ optimizer:
47
+ _name: adam
48
+ adam_betas: (0.9,0.98)
49
+ adam_eps: 1e-06
50
+ weight_decay: 0.01
51
+
52
+ lr_scheduler:
53
+ _name: polynomial_decay
54
+ warmup_updates: 50000
55
+
56
+ model:
57
+ _name: wav2vec2
58
+ quantize_targets: true
59
+ final_dim: 256
60
+ encoder_layerdrop: 0.05
61
+ dropout_input: 0.1
62
+ dropout_features: 0.1
63
+ feature_grad_mult: 0.1