marinone94 commited on
Commit
4621ac3
β€’
1 Parent(s): 216b1e3

Training in progress, step 500

Browse files
Files changed (42) hide show
  1. .ipynb_checkpoints/run-checkpoint.sh +3 -3
  2. checkpoint-200/scaler.pt +0 -3
  3. checkpoint-200/trainer_state.json +0 -172
  4. checkpoint-250/optimizer.pt +0 -3
  5. checkpoint-250/trainer_state.json +0 -211
  6. checkpoint-350/config.json +0 -107
  7. checkpoint-350/preprocessor_config.json +0 -9
  8. checkpoint-350/pytorch_model.bin +0 -3
  9. checkpoint-350/rng_state.pth +0 -3
  10. checkpoint-350/scheduler.pt +0 -3
  11. checkpoint-350/training_args.bin +0 -3
  12. {checkpoint-200 β†’ checkpoint-400}/config.json +0 -0
  13. {checkpoint-300 β†’ checkpoint-400}/optimizer.pt +1 -1
  14. {checkpoint-200 β†’ checkpoint-400}/preprocessor_config.json +0 -0
  15. {checkpoint-250 β†’ checkpoint-400}/pytorch_model.bin +1 -1
  16. {checkpoint-250 β†’ checkpoint-400}/rng_state.pth +1 -1
  17. {checkpoint-300 β†’ checkpoint-400}/scaler.pt +1 -1
  18. {checkpoint-200 β†’ checkpoint-400}/scheduler.pt +1 -1
  19. {checkpoint-350 β†’ checkpoint-400}/trainer_state.json +42 -3
  20. {checkpoint-200 β†’ checkpoint-400}/training_args.bin +0 -0
  21. {checkpoint-250 β†’ checkpoint-450}/config.json +0 -0
  22. {checkpoint-350 β†’ checkpoint-450}/optimizer.pt +1 -1
  23. {checkpoint-250 β†’ checkpoint-450}/preprocessor_config.json +0 -0
  24. {checkpoint-200 β†’ checkpoint-450}/pytorch_model.bin +1 -1
  25. {checkpoint-300 β†’ checkpoint-450}/rng_state.pth +1 -1
  26. {checkpoint-250 β†’ checkpoint-450}/scaler.pt +1 -1
  27. {checkpoint-300 β†’ checkpoint-450}/scheduler.pt +1 -1
  28. {checkpoint-300 β†’ checkpoint-450}/trainer_state.json +120 -3
  29. {checkpoint-250 β†’ checkpoint-450}/training_args.bin +0 -0
  30. {checkpoint-300 β†’ checkpoint-500}/config.json +0 -0
  31. {checkpoint-200 β†’ checkpoint-500}/optimizer.pt +2 -2
  32. {checkpoint-300 β†’ checkpoint-500}/preprocessor_config.json +0 -0
  33. {checkpoint-300 β†’ checkpoint-500}/pytorch_model.bin +1 -1
  34. {checkpoint-200 β†’ checkpoint-500}/rng_state.pth +2 -2
  35. {checkpoint-350 β†’ checkpoint-500}/scaler.pt +1 -1
  36. {checkpoint-250 β†’ checkpoint-500}/scheduler.pt +1 -1
  37. checkpoint-500/trainer_state.json +55 -0
  38. {checkpoint-300 β†’ checkpoint-500}/training_args.bin +1 -1
  39. pytorch_model.bin +1 -1
  40. run.sh +3 -3
  41. special_tokens_map.json +1 -1
  42. training_args.bin +1 -1
.ipynb_checkpoints/run-checkpoint.sh CHANGED
@@ -14,9 +14,9 @@ python run_speech_recognition_ctc.py \
14
  --evaluation_strategy="steps" \
15
  --text_column_name="sentence" \
16
  --chars_to_ignore , ? . ! \- \; \: \" β€œ % β€˜ ” οΏ½ β€” ’ … – \
17
- --save_steps="50" \
18
- --eval_steps="50" \
19
- --logging_steps="10" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
 
14
  --evaluation_strategy="steps" \
15
  --text_column_name="sentence" \
16
  --chars_to_ignore , ? . ! \- \; \: \" β€œ % β€˜ ” οΏ½ β€” ’ … – \
17
+ --save_steps="500" \
18
+ --eval_steps="500" \
19
+ --logging_steps="100" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
checkpoint-200/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:456d3f8c3511ae0b0f0b3bf14cf84027d3dd6e2dd5258c9c8a92b9132d6ccfef
3
- size 559
 
 
 
 
checkpoint-200/trainer_state.json DELETED
@@ -1,172 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.580130529369108,
5
- "global_step": 200,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 3.75e-05,
13
- "loss": 12.1562,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.06,
18
- "learning_rate": 7.125e-05,
19
- "loss": 8.7679,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.09,
24
- "learning_rate": 7.398952095808383e-05,
25
- "loss": 5.3683,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.12,
30
- "learning_rate": 7.286676646706586e-05,
31
- "loss": 4.3219,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.15,
36
- "learning_rate": 7.17440119760479e-05,
37
- "loss": 3.7182,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.15,
42
- "eval_loss": 3.836604595184326,
43
- "eval_runtime": 133.4846,
44
- "eval_samples_per_second": 34.611,
45
- "eval_steps_per_second": 4.33,
46
- "eval_wer": 1.0,
47
- "step": 50
48
- },
49
- {
50
- "epoch": 0.17,
51
- "learning_rate": 7.062125748502993e-05,
52
- "loss": 3.478,
53
- "step": 60
54
- },
55
- {
56
- "epoch": 0.2,
57
- "learning_rate": 6.949850299401197e-05,
58
- "loss": 3.4492,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.23,
63
- "learning_rate": 6.837574850299401e-05,
64
- "loss": 3.3928,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.26,
69
- "learning_rate": 6.725299401197604e-05,
70
- "loss": 3.3183,
71
- "step": 90
72
- },
73
- {
74
- "epoch": 0.29,
75
- "learning_rate": 6.613023952095809e-05,
76
- "loss": 3.2075,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.29,
81
- "eval_loss": 3.258362293243408,
82
- "eval_runtime": 126.6078,
83
- "eval_samples_per_second": 36.491,
84
- "eval_steps_per_second": 4.565,
85
- "eval_wer": 1.0,
86
- "step": 100
87
- },
88
- {
89
- "epoch": 0.32,
90
- "learning_rate": 6.500748502994012e-05,
91
- "loss": 3.14,
92
- "step": 110
93
- },
94
- {
95
- "epoch": 0.35,
96
- "learning_rate": 6.388473053892215e-05,
97
- "loss": 3.1281,
98
- "step": 120
99
- },
100
- {
101
- "epoch": 0.38,
102
- "learning_rate": 6.276197604790418e-05,
103
- "loss": 3.0987,
104
- "step": 130
105
- },
106
- {
107
- "epoch": 0.41,
108
- "learning_rate": 6.163922155688622e-05,
109
- "loss": 3.1003,
110
- "step": 140
111
- },
112
- {
113
- "epoch": 0.44,
114
- "learning_rate": 6.0516467065868256e-05,
115
- "loss": 3.0922,
116
- "step": 150
117
- },
118
- {
119
- "epoch": 0.44,
120
- "eval_loss": 3.127869129180908,
121
- "eval_runtime": 126.3837,
122
- "eval_samples_per_second": 36.555,
123
- "eval_steps_per_second": 4.573,
124
- "eval_wer": 1.0,
125
- "step": 150
126
- },
127
- {
128
- "epoch": 0.46,
129
- "learning_rate": 5.9393712574850293e-05,
130
- "loss": 3.0588,
131
- "step": 160
132
- },
133
- {
134
- "epoch": 0.49,
135
- "learning_rate": 5.827095808383233e-05,
136
- "loss": 3.0477,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 0.52,
141
- "learning_rate": 5.714820359281436e-05,
142
- "loss": 3.045,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 0.55,
147
- "learning_rate": 5.602544910179641e-05,
148
- "loss": 3.0439,
149
- "step": 190
150
- },
151
- {
152
- "epoch": 0.58,
153
- "learning_rate": 5.490269461077844e-05,
154
- "loss": 3.0846,
155
- "step": 200
156
- },
157
- {
158
- "epoch": 0.58,
159
- "eval_loss": 3.079519271850586,
160
- "eval_runtime": 125.7215,
161
- "eval_samples_per_second": 36.748,
162
- "eval_steps_per_second": 4.597,
163
- "eval_wer": 1.0,
164
- "step": 200
165
- }
166
- ],
167
- "max_steps": 688,
168
- "num_train_epochs": 2,
169
- "total_flos": 5.906333355279667e+17,
170
- "trial_name": null,
171
- "trial_params": null
172
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-250/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:25575d34d06948a00999a341b6ef425486d94285aac957915fb8f09abecc3531
3
- size 2490361937
 
 
 
 
checkpoint-250/trainer_state.json DELETED
@@ -1,211 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.7251631617113851,
5
- "global_step": 250,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 3.75e-05,
13
- "loss": 12.1562,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.06,
18
- "learning_rate": 7.125e-05,
19
- "loss": 8.7679,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.09,
24
- "learning_rate": 7.398952095808383e-05,
25
- "loss": 5.3683,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.12,
30
- "learning_rate": 7.286676646706586e-05,
31
- "loss": 4.3219,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.15,
36
- "learning_rate": 7.17440119760479e-05,
37
- "loss": 3.7182,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.15,
42
- "eval_loss": 3.836604595184326,
43
- "eval_runtime": 133.4846,
44
- "eval_samples_per_second": 34.611,
45
- "eval_steps_per_second": 4.33,
46
- "eval_wer": 1.0,
47
- "step": 50
48
- },
49
- {
50
- "epoch": 0.17,
51
- "learning_rate": 7.062125748502993e-05,
52
- "loss": 3.478,
53
- "step": 60
54
- },
55
- {
56
- "epoch": 0.2,
57
- "learning_rate": 6.949850299401197e-05,
58
- "loss": 3.4492,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.23,
63
- "learning_rate": 6.837574850299401e-05,
64
- "loss": 3.3928,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.26,
69
- "learning_rate": 6.725299401197604e-05,
70
- "loss": 3.3183,
71
- "step": 90
72
- },
73
- {
74
- "epoch": 0.29,
75
- "learning_rate": 6.613023952095809e-05,
76
- "loss": 3.2075,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.29,
81
- "eval_loss": 3.258362293243408,
82
- "eval_runtime": 126.6078,
83
- "eval_samples_per_second": 36.491,
84
- "eval_steps_per_second": 4.565,
85
- "eval_wer": 1.0,
86
- "step": 100
87
- },
88
- {
89
- "epoch": 0.32,
90
- "learning_rate": 6.500748502994012e-05,
91
- "loss": 3.14,
92
- "step": 110
93
- },
94
- {
95
- "epoch": 0.35,
96
- "learning_rate": 6.388473053892215e-05,
97
- "loss": 3.1281,
98
- "step": 120
99
- },
100
- {
101
- "epoch": 0.38,
102
- "learning_rate": 6.276197604790418e-05,
103
- "loss": 3.0987,
104
- "step": 130
105
- },
106
- {
107
- "epoch": 0.41,
108
- "learning_rate": 6.163922155688622e-05,
109
- "loss": 3.1003,
110
- "step": 140
111
- },
112
- {
113
- "epoch": 0.44,
114
- "learning_rate": 6.0516467065868256e-05,
115
- "loss": 3.0922,
116
- "step": 150
117
- },
118
- {
119
- "epoch": 0.44,
120
- "eval_loss": 3.127869129180908,
121
- "eval_runtime": 126.3837,
122
- "eval_samples_per_second": 36.555,
123
- "eval_steps_per_second": 4.573,
124
- "eval_wer": 1.0,
125
- "step": 150
126
- },
127
- {
128
- "epoch": 0.46,
129
- "learning_rate": 5.9393712574850293e-05,
130
- "loss": 3.0588,
131
- "step": 160
132
- },
133
- {
134
- "epoch": 0.49,
135
- "learning_rate": 5.827095808383233e-05,
136
- "loss": 3.0477,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 0.52,
141
- "learning_rate": 5.714820359281436e-05,
142
- "loss": 3.045,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 0.55,
147
- "learning_rate": 5.602544910179641e-05,
148
- "loss": 3.0439,
149
- "step": 190
150
- },
151
- {
152
- "epoch": 0.58,
153
- "learning_rate": 5.490269461077844e-05,
154
- "loss": 3.0846,
155
- "step": 200
156
- },
157
- {
158
- "epoch": 0.58,
159
- "eval_loss": 3.079519271850586,
160
- "eval_runtime": 125.7215,
161
- "eval_samples_per_second": 36.748,
162
- "eval_steps_per_second": 4.597,
163
- "eval_wer": 1.0,
164
- "step": 200
165
- },
166
- {
167
- "epoch": 0.61,
168
- "learning_rate": 5.3779940119760477e-05,
169
- "loss": 3.0512,
170
- "step": 210
171
- },
172
- {
173
- "epoch": 0.64,
174
- "learning_rate": 5.265718562874251e-05,
175
- "loss": 3.0143,
176
- "step": 220
177
- },
178
- {
179
- "epoch": 0.67,
180
- "learning_rate": 5.1534431137724546e-05,
181
- "loss": 3.0387,
182
- "step": 230
183
- },
184
- {
185
- "epoch": 0.7,
186
- "learning_rate": 5.0411676646706584e-05,
187
- "loss": 3.0311,
188
- "step": 240
189
- },
190
- {
191
- "epoch": 0.73,
192
- "learning_rate": 4.9288922155688615e-05,
193
- "loss": 3.0417,
194
- "step": 250
195
- },
196
- {
197
- "epoch": 0.73,
198
- "eval_loss": 3.069390058517456,
199
- "eval_runtime": 125.7339,
200
- "eval_samples_per_second": 36.744,
201
- "eval_steps_per_second": 4.597,
202
- "eval_wer": 1.0,
203
- "step": 250
204
- }
205
- ],
206
- "max_steps": 688,
207
- "num_train_epochs": 2,
208
- "total_flos": 7.370807236652851e+17,
209
- "trial_name": null,
210
- "trial_params": null
211
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-350/config.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.0,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 768,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.0,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "hidden_act": "gelu",
56
- "hidden_dropout": 0.0,
57
- "hidden_size": 1024,
58
- "initializer_range": 0.02,
59
- "intermediate_size": 4096,
60
- "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.0,
62
- "mask_feature_length": 64,
63
- "mask_feature_min_masks": 0,
64
- "mask_feature_prob": 0.25,
65
- "mask_time_length": 10,
66
- "mask_time_min_masks": 2,
67
- "mask_time_prob": 0.75,
68
- "model_type": "wav2vec2",
69
- "num_adapter_layers": 3,
70
- "num_attention_heads": 16,
71
- "num_codevector_groups": 2,
72
- "num_codevectors_per_group": 320,
73
- "num_conv_pos_embedding_groups": 16,
74
- "num_conv_pos_embeddings": 128,
75
- "num_feat_extract_layers": 7,
76
- "num_hidden_layers": 24,
77
- "num_negatives": 100,
78
- "output_hidden_size": 1024,
79
- "pad_token_id": 34,
80
- "proj_codevector_dim": 768,
81
- "tdnn_dilation": [
82
- 1,
83
- 2,
84
- 3,
85
- 1,
86
- 1
87
- ],
88
- "tdnn_dim": [
89
- 512,
90
- 512,
91
- 512,
92
- 512,
93
- 1500
94
- ],
95
- "tdnn_kernel": [
96
- 5,
97
- 3,
98
- 3,
99
- 1,
100
- 1
101
- ],
102
- "torch_dtype": "float32",
103
- "transformers_version": "4.16.0.dev0",
104
- "use_weighted_layer_sum": false,
105
- "vocab_size": 37,
106
- "xvector_output_dim": 512
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-350/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-350/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
3
- size 1262075377
 
 
 
 
checkpoint-350/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4671c6ef20752fdeca41dd978833212c15422e660369baad61a24c693eba960d
3
- size 14567
 
 
 
 
checkpoint-350/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:68cb16be7c67302d24fc36708cbe6b5ff6ca823143d0ed4ccd59b12de9852185
3
- size 623
 
 
 
 
checkpoint-350/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
3
- size 2991
 
 
 
 
{checkpoint-200 β†’ checkpoint-400}/config.json RENAMED
File without changes
{checkpoint-300 β†’ checkpoint-400}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fbc6f4dd07d8d67fd707ed0307012427b86b156035171248cffd740587b141d
3
  size 2490362385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fdeeb953ce80d82d1ba249d99ae68b45e80c943e36a80b8a6517c275a7b594
3
  size 2490362385
{checkpoint-200 β†’ checkpoint-400}/preprocessor_config.json RENAMED
File without changes
{checkpoint-250 β†’ checkpoint-400}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23de3d5e56e89844f6d19c6036e4e2b363f725b01482cc092d77475cb79971ed
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65431ecd0a10e000abb091aa8e3064f6e344c3e7771a07e792b6c0470ee8092c
3
  size 1262075377
{checkpoint-250 β†’ checkpoint-400}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ca012043e7838ae908c34417eeb6fac92f1f9b5c9fe391cf53fd8efcf0ad26
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a426a909abceadbfb6682c295322fafea2b96fc77d190014718f2a843f386bc4
3
  size 14567
{checkpoint-300 β†’ checkpoint-400}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d459085befd098f94535d03ce7c7b44c036f922e66bbf06dd7d1df1f2271db85
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3828e6a0e34e076fa271e3dd1c08f47dc96711f380d9585d89de05befff54169
3
  size 559
{checkpoint-200 β†’ checkpoint-400}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efd672a046efb6eb5df47d5237a07689c59887dac098586f96bf610f5cf17f77
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b73abf21b39473fffe8d985c5918328870ddc7f68ec8e4b77b7e7ea44506a1b1
3
  size 623
{checkpoint-350 β†’ checkpoint-400}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0174039158810733,
5
- "global_step": 350,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -279,11 +279,50 @@
279
  "eval_steps_per_second": 3.875,
280
  "eval_wer": 1.0,
281
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  }
283
  ],
284
  "max_steps": 688,
285
  "num_train_epochs": 2,
286
- "total_flos": 1.0437262031798554e+18,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1624365482233503,
5
+ "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
279
  "eval_steps_per_second": 3.875,
280
  "eval_wer": 1.0,
281
  "step": 350
282
+ },
283
+ {
284
+ "epoch": 1.05,
285
+ "learning_rate": 3.693862275449102e-05,
286
+ "loss": 2.9665,
287
+ "step": 360
288
+ },
289
+ {
290
+ "epoch": 1.08,
291
+ "learning_rate": 3.581586826347305e-05,
292
+ "loss": 2.9641,
293
+ "step": 370
294
+ },
295
+ {
296
+ "epoch": 1.1,
297
+ "learning_rate": 3.469311377245509e-05,
298
+ "loss": 2.9484,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 1.13,
303
+ "learning_rate": 3.3570359281437126e-05,
304
+ "loss": 2.9494,
305
+ "step": 390
306
+ },
307
+ {
308
+ "epoch": 1.16,
309
+ "learning_rate": 3.244760479041916e-05,
310
+ "loss": 2.9698,
311
+ "step": 400
312
+ },
313
+ {
314
+ "epoch": 1.16,
315
+ "eval_loss": 2.989494562149048,
316
+ "eval_runtime": 150.6903,
317
+ "eval_samples_per_second": 30.659,
318
+ "eval_steps_per_second": 3.836,
319
+ "eval_wer": 1.0,
320
+ "step": 400
321
  }
322
  ],
323
  "max_steps": 688,
324
  "num_train_epochs": 2,
325
+ "total_flos": 1.1900340345446784e+18,
326
  "trial_name": null,
327
  "trial_params": null
328
  }
{checkpoint-200 β†’ checkpoint-400}/training_args.bin RENAMED
File without changes
{checkpoint-250 β†’ checkpoint-450}/config.json RENAMED
File without changes
{checkpoint-350 β†’ checkpoint-450}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:646667c125c12f737d1ffee17d6371268d6575f3796018a92eb4102f5f1fff0b
3
  size 2490362385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d20a2e7a06e4c6fe1ca1763518fe26e3bd509e54e5c6ce336b1cdd4ad352fc8
3
  size 2490362385
{checkpoint-250 β†’ checkpoint-450}/preprocessor_config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-450}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a809773f4c41661a588636b358c9e5380d7596cf519d3864f59d078d6b5d56
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a67aef87c3664c6c90fef5a16264df1a1e9ec09017448f7dd3962f7e7ed3cd
3
  size 1262075377
{checkpoint-300 β†’ checkpoint-450}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4e6a8eb3cbe35497679345db620fc16f097b4fd6a7597d0d1e70c31e40c4556
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171f5d4a1db63b5ecb87951d275cbcef882e72c7875f0439f386ec2a0cc474fc
3
  size 14503
{checkpoint-250 β†’ checkpoint-450}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4b33a302d3359c12eb2b6ea50d6c9c2f406dda2633a8f61a78ad84ec0805e1f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b437a2168639a7226a6616cf299b21e2671732a153df5f941f17a2c38a20459
3
  size 559
{checkpoint-300 β†’ checkpoint-450}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7af1e86e64792f71c1d8769a03e0e23b66ad421c106349a338e419060870b38
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89db00b13c6183184d8d5a3b77b2b0c26843c6c480825e0951a03deff712e541
3
  size 623
{checkpoint-300 β†’ checkpoint-450}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8701957940536621,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -240,11 +240,128 @@
240
  "eval_steps_per_second": 4.369,
241
  "eval_wer": 1.0,
242
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  }
244
  ],
245
  "max_steps": 688,
246
  "num_train_epochs": 2,
247
- "total_flos": 8.840514496994611e+17,
248
  "trial_name": null,
249
  "trial_params": null
250
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3074691805656273,
5
+ "global_step": 450,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
240
  "eval_steps_per_second": 4.369,
241
  "eval_wer": 1.0,
242
  "step": 300
243
+ },
244
+ {
245
+ "epoch": 0.9,
246
+ "learning_rate": 4.255239520958083e-05,
247
+ "loss": 3.0052,
248
+ "step": 310
249
+ },
250
+ {
251
+ "epoch": 0.93,
252
+ "learning_rate": 4.142964071856287e-05,
253
+ "loss": 2.9826,
254
+ "step": 320
255
+ },
256
+ {
257
+ "epoch": 0.96,
258
+ "learning_rate": 4.030688622754491e-05,
259
+ "loss": 2.9747,
260
+ "step": 330
261
+ },
262
+ {
263
+ "epoch": 0.99,
264
+ "learning_rate": 3.918413173652694e-05,
265
+ "loss": 2.9617,
266
+ "step": 340
267
+ },
268
+ {
269
+ "epoch": 1.02,
270
+ "learning_rate": 3.806137724550898e-05,
271
+ "loss": 3.2053,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 1.02,
276
+ "eval_loss": 2.984886407852173,
277
+ "eval_runtime": 149.1508,
278
+ "eval_samples_per_second": 30.975,
279
+ "eval_steps_per_second": 3.875,
280
+ "eval_wer": 1.0,
281
+ "step": 350
282
+ },
283
+ {
284
+ "epoch": 1.05,
285
+ "learning_rate": 3.693862275449102e-05,
286
+ "loss": 2.9665,
287
+ "step": 360
288
+ },
289
+ {
290
+ "epoch": 1.08,
291
+ "learning_rate": 3.581586826347305e-05,
292
+ "loss": 2.9641,
293
+ "step": 370
294
+ },
295
+ {
296
+ "epoch": 1.1,
297
+ "learning_rate": 3.469311377245509e-05,
298
+ "loss": 2.9484,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 1.13,
303
+ "learning_rate": 3.3570359281437126e-05,
304
+ "loss": 2.9494,
305
+ "step": 390
306
+ },
307
+ {
308
+ "epoch": 1.16,
309
+ "learning_rate": 3.244760479041916e-05,
310
+ "loss": 2.9698,
311
+ "step": 400
312
+ },
313
+ {
314
+ "epoch": 1.16,
315
+ "eval_loss": 2.989494562149048,
316
+ "eval_runtime": 150.6903,
317
+ "eval_samples_per_second": 30.659,
318
+ "eval_steps_per_second": 3.836,
319
+ "eval_wer": 1.0,
320
+ "step": 400
321
+ },
322
+ {
323
+ "epoch": 1.19,
324
+ "learning_rate": 3.1324850299401195e-05,
325
+ "loss": 2.9664,
326
+ "step": 410
327
+ },
328
+ {
329
+ "epoch": 1.22,
330
+ "learning_rate": 3.020209580838323e-05,
331
+ "loss": 2.9494,
332
+ "step": 420
333
+ },
334
+ {
335
+ "epoch": 1.25,
336
+ "learning_rate": 2.9079341317365265e-05,
337
+ "loss": 2.935,
338
+ "step": 430
339
+ },
340
+ {
341
+ "epoch": 1.28,
342
+ "learning_rate": 2.7956586826347306e-05,
343
+ "loss": 2.9397,
344
+ "step": 440
345
+ },
346
+ {
347
+ "epoch": 1.31,
348
+ "learning_rate": 2.683383233532934e-05,
349
+ "loss": 2.9485,
350
+ "step": 450
351
+ },
352
+ {
353
+ "epoch": 1.31,
354
+ "eval_loss": 2.9584460258483887,
355
+ "eval_runtime": 140.6358,
356
+ "eval_samples_per_second": 32.851,
357
+ "eval_steps_per_second": 4.11,
358
+ "eval_wer": 1.0,
359
+ "step": 450
360
  }
361
  ],
362
  "max_steps": 688,
363
  "num_train_epochs": 2,
364
+ "total_flos": 1.336010418574825e+18,
365
  "trial_name": null,
366
  "trial_params": null
367
  }
{checkpoint-250 β†’ checkpoint-450}/training_args.bin RENAMED
File without changes
{checkpoint-300 β†’ checkpoint-500}/config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1202a5092b4eef4129f21d94d892672eedd0f405c7b97384c527938ad263ff2
3
- size 2490361937
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbd398c0993452243cb7df79d58793489c5b41d1a1000e547261b81959e2f45
3
+ size 2490362385
{checkpoint-300 β†’ checkpoint-500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-300 β†’ checkpoint-500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8edb565b658376cb9cf7a44bbc1bfea5a26ebcd17d3f36739a8535e85a13dcee
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed159635bd947770842e25d915d8157fd198d2f06d5db2476c6663627e3beee7
3
  size 1262075377
{checkpoint-200 β†’ checkpoint-500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aac8c2cbd9ad36e5da5c9bde6c85c2a957009b424972b91ca2f61d198a65abaf
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32b82ac75b07e68dc3bc90e76f55a339f64dce724d87a9ae3c69ee46df441867
3
+ size 14503
{checkpoint-350 β†’ checkpoint-500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a38190a29b2e515a160c453a45f3b2acc23e2c8c2240009e053ed0dbf017f0
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c8bceadebe118ff459b01a775a8e9b38a6b8302c162d022f78d3646163e6486
3
  size 559
{checkpoint-250 β†’ checkpoint-500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d0c28659ec00a57e0df47b2243fcc9567217741ac741e245ef295e398e80890
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15019bcd1c8fc8b1bd39c46d9e1196c2fa76648918a0024eb84229f57debcf7e
3
  size 623
checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.4525018129079044,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.29,
12
+ "learning_rate": 6.613023952095809e-05,
13
+ "loss": 5.1206,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.58,
18
+ "learning_rate": 5.490269461077844e-05,
19
+ "loss": 3.0901,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.87,
24
+ "learning_rate": 4.36751497005988e-05,
25
+ "loss": 3.0224,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 1.16,
30
+ "learning_rate": 3.244760479041916e-05,
31
+ "loss": 2.9922,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 1.45,
36
+ "learning_rate": 2.1220059880239517e-05,
37
+ "loss": 2.9357,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 1.45,
42
+ "eval_loss": 2.9458744525909424,
43
+ "eval_runtime": 138.8724,
44
+ "eval_samples_per_second": 33.268,
45
+ "eval_steps_per_second": 4.162,
46
+ "eval_wer": 1.0,
47
+ "step": 500
48
+ }
49
+ ],
50
+ "max_steps": 688,
51
+ "num_train_epochs": 2,
52
+ "total_flos": 1.4827194756605722e+18,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }
{checkpoint-300 β†’ checkpoint-500}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c8daeeea1ded5d5b75ab1c6033b9bed25c9ac1f192a365842399932683cfcc
3
  size 2991
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed159635bd947770842e25d915d8157fd198d2f06d5db2476c6663627e3beee7
3
  size 1262075377
run.sh CHANGED
@@ -14,9 +14,9 @@ python run_speech_recognition_ctc.py \
14
  --evaluation_strategy="steps" \
15
  --text_column_name="sentence" \
16
  --chars_to_ignore , ? . ! \- \; \: \" β€œ % β€˜ ” οΏ½ β€” ’ … – \
17
- --save_steps="50" \
18
- --eval_steps="50" \
19
- --logging_steps="10" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
 
14
  --evaluation_strategy="steps" \
15
  --text_column_name="sentence" \
16
  --chars_to_ignore , ? . ! \- \; \: \" β€œ % β€˜ ” οΏ½ β€” ’ … – \
17
+ --save_steps="500" \
18
+ --eval_steps="500" \
19
+ --logging_steps="100" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c8daeeea1ded5d5b75ab1c6033b9bed25c9ac1f192a365842399932683cfcc
3
  size 2991