marinone94 commited on
Commit
6012db9
β€’
1 Parent(s): 32a526a

Training in progress, step 1600

Browse files
Files changed (27) hide show
  1. checkpoint-1200/scaler.pt +0 -3
  2. checkpoint-1200/trainer_state.json +0 -484
  3. checkpoint-1300/scheduler.pt +0 -3
  4. checkpoint-1400/config.json +0 -115
  5. checkpoint-1400/optimizer.pt +0 -3
  6. checkpoint-1400/preprocessor_config.json +0 -9
  7. checkpoint-1400/pytorch_model.bin +0 -3
  8. checkpoint-1400/rng_state.pth +0 -3
  9. checkpoint-1400/training_args.bin +0 -3
  10. {checkpoint-1200 β†’ checkpoint-1500}/config.json +0 -0
  11. {checkpoint-1200 β†’ checkpoint-1500}/optimizer.pt +1 -1
  12. {checkpoint-1200 β†’ checkpoint-1500}/preprocessor_config.json +0 -0
  13. {checkpoint-1300 β†’ checkpoint-1500}/pytorch_model.bin +1 -1
  14. {checkpoint-1300 β†’ checkpoint-1500}/rng_state.pth +1 -1
  15. {checkpoint-1400 β†’ checkpoint-1500}/scaler.pt +1 -1
  16. {checkpoint-1200 β†’ checkpoint-1500}/scheduler.pt +1 -1
  17. {checkpoint-1400 β†’ checkpoint-1500}/trainer_state.json +42 -3
  18. {checkpoint-1200 β†’ checkpoint-1500}/training_args.bin +0 -0
  19. {checkpoint-1300 β†’ checkpoint-1600}/config.json +0 -0
  20. {checkpoint-1300 β†’ checkpoint-1600}/optimizer.pt +1 -1
  21. {checkpoint-1300 β†’ checkpoint-1600}/preprocessor_config.json +0 -0
  22. {checkpoint-1200 β†’ checkpoint-1600}/pytorch_model.bin +1 -1
  23. {checkpoint-1200 β†’ checkpoint-1600}/rng_state.pth +1 -1
  24. {checkpoint-1300 β†’ checkpoint-1600}/scaler.pt +1 -1
  25. {checkpoint-1400 β†’ checkpoint-1600}/scheduler.pt +1 -1
  26. {checkpoint-1300 β†’ checkpoint-1600}/trainer_state.json +120 -3
  27. {checkpoint-1300 β†’ checkpoint-1600}/training_args.bin +0 -0
checkpoint-1200/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:19c7277eaca0850ae3e9b6790b3d002d820169cce0671185e672c28c8ae8e056
3
- size 559
 
 
 
 
checkpoint-1200/trainer_state.json DELETED
@@ -1,484 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 13.185792349726777,
5
- "global_step": 1200,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.22,
12
- "learning_rate": 2.5e-06,
13
- "loss": 3.5867,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.44,
18
- "learning_rate": 5e-06,
19
- "loss": 3.5457,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.66,
24
- "learning_rate": 7.5e-06,
25
- "loss": 3.4513,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.87,
30
- "learning_rate": 1e-05,
31
- "loss": 3.3432,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 1.1,
36
- "learning_rate": 1.25e-05,
37
- "loss": 3.3533,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 1.1,
42
- "eval_loss": 3.2806732654571533,
43
- "eval_runtime": 190.4728,
44
- "eval_samples_per_second": 25.426,
45
- "eval_steps_per_second": 0.798,
46
- "eval_wer": 1.0,
47
- "step": 100
48
- },
49
- {
50
- "epoch": 1.32,
51
- "learning_rate": 1.5e-05,
52
- "loss": 3.2217,
53
- "step": 120
54
- },
55
- {
56
- "epoch": 1.54,
57
- "learning_rate": 1.7500000000000002e-05,
58
- "loss": 3.1765,
59
- "step": 140
60
- },
61
- {
62
- "epoch": 1.75,
63
- "learning_rate": 2e-05,
64
- "loss": 3.1408,
65
- "step": 160
66
- },
67
- {
68
- "epoch": 1.97,
69
- "learning_rate": 2.2499999999999998e-05,
70
- "loss": 3.1165,
71
- "step": 180
72
- },
73
- {
74
- "epoch": 2.2,
75
- "learning_rate": 2.5e-05,
76
- "loss": 3.1709,
77
- "step": 200
78
- },
79
- {
80
- "epoch": 2.2,
81
- "eval_loss": 3.1325438022613525,
82
- "eval_runtime": 192.4978,
83
- "eval_samples_per_second": 25.159,
84
- "eval_steps_per_second": 0.79,
85
- "eval_wer": 1.0,
86
- "step": 200
87
- },
88
- {
89
- "epoch": 2.42,
90
- "learning_rate": 2.75e-05,
91
- "loss": 3.079,
92
- "step": 220
93
- },
94
- {
95
- "epoch": 2.63,
96
- "learning_rate": 3e-05,
97
- "loss": 3.0677,
98
- "step": 240
99
- },
100
- {
101
- "epoch": 2.85,
102
- "learning_rate": 3.2500000000000004e-05,
103
- "loss": 3.0656,
104
- "step": 260
105
- },
106
- {
107
- "epoch": 3.08,
108
- "learning_rate": 3.5000000000000004e-05,
109
- "loss": 3.1463,
110
- "step": 280
111
- },
112
- {
113
- "epoch": 3.3,
114
- "learning_rate": 3.75e-05,
115
- "loss": 3.0573,
116
- "step": 300
117
- },
118
- {
119
- "epoch": 3.3,
120
- "eval_loss": 3.0614514350891113,
121
- "eval_runtime": 194.36,
122
- "eval_samples_per_second": 24.918,
123
- "eval_steps_per_second": 0.782,
124
- "eval_wer": 1.0,
125
- "step": 300
126
- },
127
- {
128
- "epoch": 3.51,
129
- "learning_rate": 4e-05,
130
- "loss": 3.0511,
131
- "step": 320
132
- },
133
- {
134
- "epoch": 3.73,
135
- "learning_rate": 4.25e-05,
136
- "loss": 3.0358,
137
- "step": 340
138
- },
139
- {
140
- "epoch": 3.95,
141
- "learning_rate": 4.4999999999999996e-05,
142
- "loss": 3.0416,
143
- "step": 360
144
- },
145
- {
146
- "epoch": 4.17,
147
- "learning_rate": 4.75e-05,
148
- "loss": 3.1053,
149
- "step": 380
150
- },
151
- {
152
- "epoch": 4.39,
153
- "learning_rate": 5e-05,
154
- "loss": 3.0314,
155
- "step": 400
156
- },
157
- {
158
- "epoch": 4.39,
159
- "eval_loss": 3.0990231037139893,
160
- "eval_runtime": 198.3688,
161
- "eval_samples_per_second": 24.414,
162
- "eval_steps_per_second": 0.766,
163
- "eval_wer": 1.0,
164
- "step": 400
165
- },
166
- {
167
- "epoch": 4.61,
168
- "learning_rate": 5.25e-05,
169
- "loss": 3.0309,
170
- "step": 420
171
- },
172
- {
173
- "epoch": 4.83,
174
- "learning_rate": 5.5e-05,
175
- "loss": 3.0259,
176
- "step": 440
177
- },
178
- {
179
- "epoch": 5.05,
180
- "learning_rate": 5.75e-05,
181
- "loss": 3.0998,
182
- "step": 460
183
- },
184
- {
185
- "epoch": 5.27,
186
- "learning_rate": 6e-05,
187
- "loss": 3.0152,
188
- "step": 480
189
- },
190
- {
191
- "epoch": 5.49,
192
- "learning_rate": 6.25e-05,
193
- "loss": 3.0129,
194
- "step": 500
195
- },
196
- {
197
- "epoch": 5.49,
198
- "eval_loss": 3.039973497390747,
199
- "eval_runtime": 190.8567,
200
- "eval_samples_per_second": 25.375,
201
- "eval_steps_per_second": 0.796,
202
- "eval_wer": 1.0,
203
- "step": 500
204
- },
205
- {
206
- "epoch": 5.71,
207
- "learning_rate": 6.500000000000001e-05,
208
- "loss": 3.0088,
209
- "step": 520
210
- },
211
- {
212
- "epoch": 5.93,
213
- "learning_rate": 6.75e-05,
214
- "loss": 3.0051,
215
- "step": 540
216
- },
217
- {
218
- "epoch": 6.15,
219
- "learning_rate": 7.000000000000001e-05,
220
- "loss": 3.073,
221
- "step": 560
222
- },
223
- {
224
- "epoch": 6.37,
225
- "learning_rate": 7.25e-05,
226
- "loss": 3.0031,
227
- "step": 580
228
- },
229
- {
230
- "epoch": 6.59,
231
- "learning_rate": 7.5e-05,
232
- "loss": 2.9964,
233
- "step": 600
234
- },
235
- {
236
- "epoch": 6.59,
237
- "eval_loss": 2.998962640762329,
238
- "eval_runtime": 193.5213,
239
- "eval_samples_per_second": 25.026,
240
- "eval_steps_per_second": 0.785,
241
- "eval_wer": 1.0,
242
- "step": 600
243
- },
244
- {
245
- "epoch": 6.81,
246
- "learning_rate": 7.75e-05,
247
- "loss": 2.9921,
248
- "step": 620
249
- },
250
- {
251
- "epoch": 7.03,
252
- "learning_rate": 8e-05,
253
- "loss": 3.0665,
254
- "step": 640
255
- },
256
- {
257
- "epoch": 7.25,
258
- "learning_rate": 8.25e-05,
259
- "loss": 2.9826,
260
- "step": 660
261
- },
262
- {
263
- "epoch": 7.47,
264
- "learning_rate": 8.5e-05,
265
- "loss": 2.9689,
266
- "step": 680
267
- },
268
- {
269
- "epoch": 7.69,
270
- "learning_rate": 8.75e-05,
271
- "loss": 2.9602,
272
- "step": 700
273
- },
274
- {
275
- "epoch": 7.69,
276
- "eval_loss": 2.9620397090911865,
277
- "eval_runtime": 193.5851,
278
- "eval_samples_per_second": 25.017,
279
- "eval_steps_per_second": 0.785,
280
- "eval_wer": 1.0,
281
- "step": 700
282
- },
283
- {
284
- "epoch": 7.91,
285
- "learning_rate": 8.999999999999999e-05,
286
- "loss": 2.9639,
287
- "step": 720
288
- },
289
- {
290
- "epoch": 8.13,
291
- "learning_rate": 9.25e-05,
292
- "loss": 3.0215,
293
- "step": 740
294
- },
295
- {
296
- "epoch": 8.35,
297
- "learning_rate": 9.5e-05,
298
- "loss": 2.9454,
299
- "step": 760
300
- },
301
- {
302
- "epoch": 8.57,
303
- "learning_rate": 9.750000000000001e-05,
304
- "loss": 2.9239,
305
- "step": 780
306
- },
307
- {
308
- "epoch": 8.79,
309
- "learning_rate": 0.0001,
310
- "loss": 2.8756,
311
- "step": 800
312
- },
313
- {
314
- "epoch": 8.79,
315
- "eval_loss": 2.7302000522613525,
316
- "eval_runtime": 191.8065,
317
- "eval_samples_per_second": 25.249,
318
- "eval_steps_per_second": 0.792,
319
- "eval_wer": 1.0,
320
- "step": 800
321
- },
322
- {
323
- "epoch": 9.01,
324
- "learning_rate": 0.0001025,
325
- "loss": 2.8933,
326
- "step": 820
327
- },
328
- {
329
- "epoch": 9.23,
330
- "learning_rate": 0.000105,
331
- "loss": 2.7318,
332
- "step": 840
333
- },
334
- {
335
- "epoch": 9.45,
336
- "learning_rate": 0.0001075,
337
- "loss": 2.5941,
338
- "step": 860
339
- },
340
- {
341
- "epoch": 9.67,
342
- "learning_rate": 0.00011,
343
- "loss": 2.4441,
344
- "step": 880
345
- },
346
- {
347
- "epoch": 9.89,
348
- "learning_rate": 0.00011250000000000001,
349
- "loss": 2.2931,
350
- "step": 900
351
- },
352
- {
353
- "epoch": 9.89,
354
- "eval_loss": 1.5057899951934814,
355
- "eval_runtime": 196.4368,
356
- "eval_samples_per_second": 24.654,
357
- "eval_steps_per_second": 0.774,
358
- "eval_wer": 0.9775759296054499,
359
- "step": 900
360
- },
361
- {
362
- "epoch": 10.11,
363
- "learning_rate": 0.000115,
364
- "loss": 2.1999,
365
- "step": 920
366
- },
367
- {
368
- "epoch": 10.33,
369
- "learning_rate": 0.0001175,
370
- "loss": 2.0574,
371
- "step": 940
372
- },
373
- {
374
- "epoch": 10.55,
375
- "learning_rate": 0.00012,
376
- "loss": 1.9672,
377
- "step": 960
378
- },
379
- {
380
- "epoch": 10.77,
381
- "learning_rate": 0.0001225,
382
- "loss": 1.9015,
383
- "step": 980
384
- },
385
- {
386
- "epoch": 10.98,
387
- "learning_rate": 0.000125,
388
- "loss": 1.8427,
389
- "step": 1000
390
- },
391
- {
392
- "epoch": 10.98,
393
- "eval_loss": 0.9154536724090576,
394
- "eval_runtime": 203.1168,
395
- "eval_samples_per_second": 23.843,
396
- "eval_steps_per_second": 0.748,
397
- "eval_wer": 0.7832245245529378,
398
- "step": 1000
399
- },
400
- {
401
- "epoch": 11.21,
402
- "learning_rate": 0.0001275,
403
- "loss": 1.8155,
404
- "step": 1020
405
- },
406
- {
407
- "epoch": 11.43,
408
- "learning_rate": 0.00013000000000000002,
409
- "loss": 1.6767,
410
- "step": 1040
411
- },
412
- {
413
- "epoch": 11.64,
414
- "learning_rate": 0.00013250000000000002,
415
- "loss": 1.5184,
416
- "step": 1060
417
- },
418
- {
419
- "epoch": 11.86,
420
- "learning_rate": 0.000135,
421
- "loss": 1.4505,
422
- "step": 1080
423
- },
424
- {
425
- "epoch": 12.09,
426
- "learning_rate": 0.0001375,
427
- "loss": 1.4286,
428
- "step": 1100
429
- },
430
- {
431
- "epoch": 12.09,
432
- "eval_loss": 0.4074769914150238,
433
- "eval_runtime": 194.7798,
434
- "eval_samples_per_second": 24.864,
435
- "eval_steps_per_second": 0.78,
436
- "eval_wer": 0.37964802724950325,
437
- "step": 1100
438
- },
439
- {
440
- "epoch": 12.31,
441
- "learning_rate": 0.00014000000000000001,
442
- "loss": 1.3478,
443
- "step": 1120
444
- },
445
- {
446
- "epoch": 12.52,
447
- "learning_rate": 0.0001425,
448
- "loss": 1.3164,
449
- "step": 1140
450
- },
451
- {
452
- "epoch": 12.74,
453
- "learning_rate": 0.000145,
454
- "loss": 1.269,
455
- "step": 1160
456
- },
457
- {
458
- "epoch": 12.96,
459
- "learning_rate": 0.0001475,
460
- "loss": 1.2292,
461
- "step": 1180
462
- },
463
- {
464
- "epoch": 13.19,
465
- "learning_rate": 0.00015,
466
- "loss": 1.2229,
467
- "step": 1200
468
- },
469
- {
470
- "epoch": 13.19,
471
- "eval_loss": 0.28933778405189514,
472
- "eval_runtime": 192.6398,
473
- "eval_samples_per_second": 25.14,
474
- "eval_steps_per_second": 0.789,
475
- "eval_wer": 0.26520011353959694,
476
- "step": 1200
477
- }
478
- ],
479
- "max_steps": 4550,
480
- "num_train_epochs": 50,
481
- "total_flos": 1.8579484602378154e+19,
482
- "trial_name": null,
483
- "trial_params": null
484
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1300/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a74fb1217961243834b43dc5bb531167031d0d67fb9ade238f561b7c566d57
3
- size 623
 
 
 
 
checkpoint-1400/config.json DELETED
@@ -1,115 +0,0 @@
1
- {
2
- "_name_or_path": "marinone94/xls-r-300m-sv-robust",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.0,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 768,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.0,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "hidden_act": "gelu",
56
- "hidden_dropout": 0.0,
57
- "hidden_size": 1024,
58
- "initializer_range": 0.02,
59
- "intermediate_size": 4096,
60
- "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.0,
62
- "mask_channel_length": 10,
63
- "mask_channel_min_space": 1,
64
- "mask_channel_other": 0.0,
65
- "mask_channel_prob": 0.0,
66
- "mask_channel_selection": "static",
67
- "mask_feature_length": 64,
68
- "mask_feature_min_masks": 0,
69
- "mask_feature_prob": 0.25,
70
- "mask_time_length": 10,
71
- "mask_time_min_masks": 2,
72
- "mask_time_min_space": 1,
73
- "mask_time_other": 0.0,
74
- "mask_time_prob": 0.75,
75
- "mask_time_selection": "static",
76
- "model_type": "wav2vec2",
77
- "num_adapter_layers": 3,
78
- "num_attention_heads": 16,
79
- "num_codevector_groups": 2,
80
- "num_codevectors_per_group": 320,
81
- "num_conv_pos_embedding_groups": 16,
82
- "num_conv_pos_embeddings": 128,
83
- "num_feat_extract_layers": 7,
84
- "num_hidden_layers": 24,
85
- "num_negatives": 100,
86
- "output_hidden_size": 1024,
87
- "pad_token_id": 31,
88
- "proj_codevector_dim": 768,
89
- "tdnn_dilation": [
90
- 1,
91
- 2,
92
- 3,
93
- 1,
94
- 1
95
- ],
96
- "tdnn_dim": [
97
- 512,
98
- 512,
99
- 512,
100
- 512,
101
- 1500
102
- ],
103
- "tdnn_kernel": [
104
- 5,
105
- 3,
106
- 3,
107
- 1,
108
- 1
109
- ],
110
- "torch_dtype": "float32",
111
- "transformers_version": "4.17.0.dev0",
112
- "use_weighted_layer_sum": false,
113
- "vocab_size": 34,
114
- "xvector_output_dim": 512
115
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1400/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:066471e5b1e42d95f381420c5eb456280eaebbcce841ac5387ae7bfde1fc9de6
3
- size 2490337809
 
 
 
 
checkpoint-1400/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-1400/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a459f72ab853af3d30be6993d7c2796ebd50d28c9cae60928258ccc702d6e46
3
- size 1262063089
 
 
 
 
checkpoint-1400/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f4e020badfa95b92140e362fadf8866d696357e6c8984f31eba89adcf57045
3
- size 14631
 
 
 
 
checkpoint-1400/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c53503eb760641941023d2dcab7bb32b620bf895e31db5c3910b2760135b07
3
- size 3055
 
 
 
 
{checkpoint-1200 β†’ checkpoint-1500}/config.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f873560d6b841c6e56a47519788a591ecb8d7154bdb93981b70877292ed84a8a
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59d9e961a47ab1f00dd325d343d6b3a2c56a477a890edfff182fab52ad7d1961
3
  size 2490337809
{checkpoint-1200 β†’ checkpoint-1500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1300 β†’ checkpoint-1500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c32f633d461c6e73651e2cad62b1f33ec6fa6c731ff331293bd3ea3ab178533
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c881807ec99ef3592933c11ac25ff336dab472ee8669960c08f6bb055cb11c
3
  size 1262063089
{checkpoint-1300 β†’ checkpoint-1500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4a3aa97bd65a21ca0d5bb2262a5c675bb5847b62e059527a92ac37b03aa6555
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26a16fb06bd95550585922193ec31f3a9a5878ee3e77efc87c1b8bdb4a21d142
3
  size 14567
{checkpoint-1400 β†’ checkpoint-1500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26339048a0b88d77fc1a5e9abedb9d0653c3cd4b707b2f8b4a992e2612d53b7f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b77bf231929dcf1b0885a05986daead51666518206e8005c99cd124832d9f9
3
  size 559
{checkpoint-1200 β†’ checkpoint-1500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1da2dbf5dd242dfa72930d12fb2ab32ad49f7cdbe466d03a63bc27fa38c23ac
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d0ac6fcb7f6a54e2016654eacc790bd572cb9149b97999e246f807a8519d7d
3
  size 623
{checkpoint-1400 β†’ checkpoint-1500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.382513661202186,
5
- "global_step": 1400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -552,11 +552,50 @@
552
  "eval_steps_per_second": 0.802,
553
  "eval_wer": 0.19727504967357365,
554
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  }
556
  ],
557
  "max_steps": 4550,
558
  "num_train_epochs": 50,
559
- "total_flos": 2.168370158639202e+19,
560
  "trial_name": null,
561
  "trial_params": null
562
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.48087431693989,
5
+ "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
552
  "eval_steps_per_second": 0.802,
553
  "eval_wer": 0.19727504967357365,
554
  "step": 1400
555
+ },
556
+ {
557
+ "epoch": 15.6,
558
+ "learning_rate": 0.0001775,
559
+ "loss": 1.0733,
560
+ "step": 1420
561
+ },
562
+ {
563
+ "epoch": 15.82,
564
+ "learning_rate": 0.00017999999999999998,
565
+ "loss": 1.0635,
566
+ "step": 1440
567
+ },
568
+ {
569
+ "epoch": 16.04,
570
+ "learning_rate": 0.0001825,
571
+ "loss": 1.1079,
572
+ "step": 1460
573
+ },
574
+ {
575
+ "epoch": 16.26,
576
+ "learning_rate": 0.000185,
577
+ "loss": 1.0467,
578
+ "step": 1480
579
+ },
580
+ {
581
+ "epoch": 16.48,
582
+ "learning_rate": 0.0001875,
583
+ "loss": 1.0667,
584
+ "step": 1500
585
+ },
586
+ {
587
+ "epoch": 16.48,
588
+ "eval_loss": 0.21288961172103882,
589
+ "eval_runtime": 189.0078,
590
+ "eval_samples_per_second": 25.623,
591
+ "eval_steps_per_second": 0.804,
592
+ "eval_wer": 0.18944081748509792,
593
+ "step": 1500
594
  }
595
  ],
596
  "max_steps": 4550,
597
  "num_train_epochs": 50,
598
+ "total_flos": 2.31847014520748e+19,
599
  "trial_name": null,
600
  "trial_params": null
601
  }
{checkpoint-1200 β†’ checkpoint-1500}/training_args.bin RENAMED
File without changes
{checkpoint-1300 β†’ checkpoint-1600}/config.json RENAMED
File without changes
{checkpoint-1300 β†’ checkpoint-1600}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9e0206a01417bef4d11f2a95335ad840a1721c6f678b089bdf7abdcafa51906
3
  size 2490337809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01538eb4bd0c6517e58a239a0edad2ba490de930aaa3809478517bc233bd339
3
  size 2490337809
{checkpoint-1300 β†’ checkpoint-1600}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a5f2f033aca18e07a43815c17c1eceec65f7cc103e7316a2bde0dd377abb87e
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:577857a0d5063da507a1ec462e3b3478b15131b86a5760953cbe69453453fe49
3
  size 1262063089
{checkpoint-1200 β†’ checkpoint-1600}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a240849fae515c3fea78a4c2538f3ef3837d994e58f8156dfb367f177292bd30
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32079a7ecfdaedcafafe628415e8789ccbf50b12b21038d2ca0bc5ad993d93dd
3
  size 14567
{checkpoint-1300 β†’ checkpoint-1600}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f30aff976137ec8a8f02bc3ed293492299fd63571aec46a1b1426491c85fe082
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45cc99d310862a4180e2b863b5866b132e05e88ff0bb7c56d68724e754f0549e
3
  size 559
{checkpoint-1400 β†’ checkpoint-1600}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce6cad226e370a6c6a2392ca8fc4c8eed33526e65dfa1b9d115b105ea51c8462
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5627b7bd7a23e3bd273e778f370a9ce7118842c3616c776314b677675af7a5
3
  size 623
{checkpoint-1300 β†’ checkpoint-1600}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.28415300546448,
5
- "global_step": 1300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -513,11 +513,128 @@
513
  "eval_steps_per_second": 0.766,
514
  "eval_wer": 0.2253760999148453,
515
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  }
517
  ],
518
  "max_steps": 4550,
519
  "num_train_epochs": 50,
520
- "total_flos": 2.013668332353175e+19,
521
  "trial_name": null,
522
  "trial_params": null
523
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.579234972677597,
5
+ "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
513
  "eval_steps_per_second": 0.766,
514
  "eval_wer": 0.2253760999148453,
515
  "step": 1300
516
+ },
517
+ {
518
+ "epoch": 14.5,
519
+ "learning_rate": 0.000165,
520
+ "loss": 1.1217,
521
+ "step": 1320
522
+ },
523
+ {
524
+ "epoch": 14.72,
525
+ "learning_rate": 0.0001675,
526
+ "loss": 1.1236,
527
+ "step": 1340
528
+ },
529
+ {
530
+ "epoch": 14.94,
531
+ "learning_rate": 0.00017,
532
+ "loss": 1.0969,
533
+ "step": 1360
534
+ },
535
+ {
536
+ "epoch": 15.16,
537
+ "learning_rate": 0.0001725,
538
+ "loss": 1.0971,
539
+ "step": 1380
540
+ },
541
+ {
542
+ "epoch": 15.38,
543
+ "learning_rate": 0.000175,
544
+ "loss": 1.0663,
545
+ "step": 1400
546
+ },
547
+ {
548
+ "epoch": 15.38,
549
+ "eval_loss": 0.22192780673503876,
550
+ "eval_runtime": 189.4566,
551
+ "eval_samples_per_second": 25.563,
552
+ "eval_steps_per_second": 0.802,
553
+ "eval_wer": 0.19727504967357365,
554
+ "step": 1400
555
+ },
556
+ {
557
+ "epoch": 15.6,
558
+ "learning_rate": 0.0001775,
559
+ "loss": 1.0733,
560
+ "step": 1420
561
+ },
562
+ {
563
+ "epoch": 15.82,
564
+ "learning_rate": 0.00017999999999999998,
565
+ "loss": 1.0635,
566
+ "step": 1440
567
+ },
568
+ {
569
+ "epoch": 16.04,
570
+ "learning_rate": 0.0001825,
571
+ "loss": 1.1079,
572
+ "step": 1460
573
+ },
574
+ {
575
+ "epoch": 16.26,
576
+ "learning_rate": 0.000185,
577
+ "loss": 1.0467,
578
+ "step": 1480
579
+ },
580
+ {
581
+ "epoch": 16.48,
582
+ "learning_rate": 0.0001875,
583
+ "loss": 1.0667,
584
+ "step": 1500
585
+ },
586
+ {
587
+ "epoch": 16.48,
588
+ "eval_loss": 0.21288961172103882,
589
+ "eval_runtime": 189.0078,
590
+ "eval_samples_per_second": 25.623,
591
+ "eval_steps_per_second": 0.804,
592
+ "eval_wer": 0.18944081748509792,
593
+ "step": 1500
594
+ },
595
+ {
596
+ "epoch": 16.7,
597
+ "learning_rate": 0.00019,
598
+ "loss": 1.0204,
599
+ "step": 1520
600
+ },
601
+ {
602
+ "epoch": 16.92,
603
+ "learning_rate": 0.00019250000000000002,
604
+ "loss": 1.0356,
605
+ "step": 1540
606
+ },
607
+ {
608
+ "epoch": 17.14,
609
+ "learning_rate": 0.00019500000000000002,
610
+ "loss": 1.0538,
611
+ "step": 1560
612
+ },
613
+ {
614
+ "epoch": 17.36,
615
+ "learning_rate": 0.0001975,
616
+ "loss": 1.0228,
617
+ "step": 1580
618
+ },
619
+ {
620
+ "epoch": 17.58,
621
+ "learning_rate": 0.0002,
622
+ "loss": 1.0193,
623
+ "step": 1600
624
+ },
625
+ {
626
+ "epoch": 17.58,
627
+ "eval_loss": 0.1991206556558609,
628
+ "eval_runtime": 191.1717,
629
+ "eval_samples_per_second": 25.333,
630
+ "eval_steps_per_second": 0.795,
631
+ "eval_wer": 0.17885325007096226,
632
+ "step": 1600
633
  }
634
  ],
635
  "max_steps": 4550,
636
  "num_train_epochs": 50,
637
+ "total_flos": 2.4776551891108737e+19,
638
  "trial_name": null,
639
  "trial_params": null
640
  }
{checkpoint-1300 β†’ checkpoint-1600}/training_args.bin RENAMED
File without changes