lsb commited on
Commit
6a384b6
1 Parent(s): ddb3a9f

add tokenizer

Browse files
checkpoint-1500/config.json DELETED
@@ -1,109 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-base-it-voxpopuli",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.1,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 256,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": false,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": false,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "group",
52
- "feat_proj_dropout": 0.1,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.1,
55
- "gradient_checkpointing": false,
56
- "hidden_act": "gelu",
57
- "hidden_dropout": 0.1,
58
- "hidden_dropout_prob": 0.1,
59
- "hidden_size": 768,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 3072,
62
- "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.1,
64
- "mask_feature_length": 10,
65
- "mask_feature_min_masks": 0,
66
- "mask_feature_prob": 0.0,
67
- "mask_time_length": 10,
68
- "mask_time_min_masks": 2,
69
- "mask_time_prob": 0.05,
70
- "model_type": "wav2vec2",
71
- "num_adapter_layers": 3,
72
- "num_attention_heads": 12,
73
- "num_codevector_groups": 2,
74
- "num_codevectors_per_group": 320,
75
- "num_conv_pos_embedding_groups": 16,
76
- "num_conv_pos_embeddings": 128,
77
- "num_feat_extract_layers": 7,
78
- "num_hidden_layers": 12,
79
- "num_negatives": 100,
80
- "output_hidden_size": 768,
81
- "pad_token_id": 25,
82
- "proj_codevector_dim": 256,
83
- "tdnn_dilation": [
84
- 1,
85
- 2,
86
- 3,
87
- 1,
88
- 1
89
- ],
90
- "tdnn_dim": [
91
- 512,
92
- 512,
93
- 512,
94
- 512,
95
- 1500
96
- ],
97
- "tdnn_kernel": [
98
- 5,
99
- 3,
100
- 3,
101
- 1,
102
- 1
103
- ],
104
- "torch_dtype": "float32",
105
- "transformers_version": "4.16.1",
106
- "use_weighted_layer_sum": false,
107
- "vocab_size": 32,
108
- "xvector_output_dim": 512
109
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09193c765095b0edd50bbf764532b0a78939a909d4dccdfd6eeea5ecfa2d02dc
3
- size 721685265
 
 
 
 
checkpoint-1500/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": false,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-1500/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b7f06101e1e8d6a711b5db6d76ed9a5bc122a74a8cc304c9d31fe46f693d853
3
- size 377670039
 
 
 
 
checkpoint-1500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1531a7e0f905290b8961b8267ec7ef6cc83660736b9a4acaec582bf5f0e57520
3
- size 14567
 
 
 
 
checkpoint-1500/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffce1add4731473c42f90f9591511f4cbeeff8f9e059b22cd53faea5621924cb
3
- size 559
 
 
 
 
checkpoint-1500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d212565847c568a9d9ec141c2168db2ea3ae4a7aab188a7409e99ecfd29bf6a1
3
- size 623
 
 
 
 
checkpoint-1500/trainer_state.json DELETED
@@ -1,1843 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.4191114836546521,
5
- "global_step": 1500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1e-08,
13
- "loss": 24.3697,
14
- "step": 5
15
- },
16
- {
17
- "epoch": 0.0,
18
- "learning_rate": 3.0000000000000004e-08,
19
- "loss": 17.6279,
20
- "step": 10
21
- },
22
- {
23
- "epoch": 0.0,
24
- "learning_rate": 5.5e-08,
25
- "loss": 17.3553,
26
- "step": 15
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 8e-08,
31
- "loss": 16.801,
32
- "step": 20
33
- },
34
- {
35
- "epoch": 0.01,
36
- "learning_rate": 1.0500000000000001e-07,
37
- "loss": 15.1858,
38
- "step": 25
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 1.3e-07,
43
- "loss": 14.8524,
44
- "step": 30
45
- },
46
- {
47
- "epoch": 0.01,
48
- "learning_rate": 1.5500000000000002e-07,
49
- "loss": 14.8696,
50
- "step": 35
51
- },
52
- {
53
- "epoch": 0.01,
54
- "learning_rate": 1.8e-07,
55
- "loss": 14.5785,
56
- "step": 40
57
- },
58
- {
59
- "epoch": 0.01,
60
- "learning_rate": 2.0500000000000002e-07,
61
- "loss": 15.0479,
62
- "step": 45
63
- },
64
- {
65
- "epoch": 0.01,
66
- "learning_rate": 2.3000000000000002e-07,
67
- "loss": 14.5691,
68
- "step": 50
69
- },
70
- {
71
- "epoch": 0.02,
72
- "learning_rate": 2.55e-07,
73
- "loss": 18.4354,
74
- "step": 55
75
- },
76
- {
77
- "epoch": 0.02,
78
- "learning_rate": 2.8e-07,
79
- "loss": 18.1091,
80
- "step": 60
81
- },
82
- {
83
- "epoch": 0.02,
84
- "learning_rate": 3.0500000000000004e-07,
85
- "loss": 17.4985,
86
- "step": 65
87
- },
88
- {
89
- "epoch": 0.02,
90
- "learning_rate": 3.3e-07,
91
- "loss": 15.3995,
92
- "step": 70
93
- },
94
- {
95
- "epoch": 0.02,
96
- "learning_rate": 3.55e-07,
97
- "loss": 15.2441,
98
- "step": 75
99
- },
100
- {
101
- "epoch": 0.02,
102
- "learning_rate": 3.8e-07,
103
- "loss": 14.5564,
104
- "step": 80
105
- },
106
- {
107
- "epoch": 0.02,
108
- "learning_rate": 4.0500000000000004e-07,
109
- "loss": 14.824,
110
- "step": 85
111
- },
112
- {
113
- "epoch": 0.03,
114
- "learning_rate": 4.3e-07,
115
- "loss": 14.9045,
116
- "step": 90
117
- },
118
- {
119
- "epoch": 0.03,
120
- "learning_rate": 4.5500000000000004e-07,
121
- "loss": 14.4084,
122
- "step": 95
123
- },
124
- {
125
- "epoch": 0.03,
126
- "learning_rate": 4.800000000000001e-07,
127
- "loss": 14.5942,
128
- "step": 100
129
- },
130
- {
131
- "epoch": 0.03,
132
- "learning_rate": 5.05e-07,
133
- "loss": 18.0525,
134
- "step": 105
135
- },
136
- {
137
- "epoch": 0.03,
138
- "learning_rate": 5.3e-07,
139
- "loss": 16.5825,
140
- "step": 110
141
- },
142
- {
143
- "epoch": 0.03,
144
- "learning_rate": 5.550000000000001e-07,
145
- "loss": 17.0772,
146
- "step": 115
147
- },
148
- {
149
- "epoch": 0.03,
150
- "learning_rate": 5.800000000000001e-07,
151
- "loss": 15.6476,
152
- "step": 120
153
- },
154
- {
155
- "epoch": 0.03,
156
- "learning_rate": 6.05e-07,
157
- "loss": 14.2361,
158
- "step": 125
159
- },
160
- {
161
- "epoch": 0.04,
162
- "learning_rate": 6.3e-07,
163
- "loss": 14.3617,
164
- "step": 130
165
- },
166
- {
167
- "epoch": 0.04,
168
- "learning_rate": 6.550000000000001e-07,
169
- "loss": 14.0812,
170
- "step": 135
171
- },
172
- {
173
- "epoch": 0.04,
174
- "learning_rate": 6.800000000000001e-07,
175
- "loss": 13.7279,
176
- "step": 140
177
- },
178
- {
179
- "epoch": 0.04,
180
- "learning_rate": 7.05e-07,
181
- "loss": 13.5363,
182
- "step": 145
183
- },
184
- {
185
- "epoch": 0.04,
186
- "learning_rate": 7.3e-07,
187
- "loss": 14.8066,
188
- "step": 150
189
- },
190
- {
191
- "epoch": 0.04,
192
- "learning_rate": 7.5e-07,
193
- "loss": 17.2667,
194
- "step": 155
195
- },
196
- {
197
- "epoch": 0.04,
198
- "learning_rate": 7.750000000000001e-07,
199
- "loss": 15.1567,
200
- "step": 160
201
- },
202
- {
203
- "epoch": 0.05,
204
- "learning_rate": 8.000000000000001e-07,
205
- "loss": 16.3138,
206
- "step": 165
207
- },
208
- {
209
- "epoch": 0.05,
210
- "learning_rate": 8.250000000000001e-07,
211
- "loss": 13.1312,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.05,
216
- "learning_rate": 8.500000000000001e-07,
217
- "loss": 12.7545,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.05,
222
- "learning_rate": 8.75e-07,
223
- "loss": 13.0234,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.05,
228
- "learning_rate": 9.000000000000001e-07,
229
- "loss": 13.2179,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.05,
234
- "learning_rate": 9.25e-07,
235
- "loss": 13.0916,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.05,
240
- "learning_rate": 9.500000000000001e-07,
241
- "loss": 11.6765,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 0.06,
246
- "learning_rate": 9.750000000000002e-07,
247
- "loss": 12.5044,
248
- "step": 200
249
- },
250
- {
251
- "epoch": 0.06,
252
- "learning_rate": 1.0000000000000002e-06,
253
- "loss": 15.0522,
254
- "step": 205
255
- },
256
- {
257
- "epoch": 0.06,
258
- "learning_rate": 1.025e-06,
259
- "loss": 12.4961,
260
- "step": 210
261
- },
262
- {
263
- "epoch": 0.06,
264
- "learning_rate": 1.0500000000000001e-06,
265
- "loss": 11.6473,
266
- "step": 215
267
- },
268
- {
269
- "epoch": 0.06,
270
- "learning_rate": 1.075e-06,
271
- "loss": 10.5303,
272
- "step": 220
273
- },
274
- {
275
- "epoch": 0.06,
276
- "learning_rate": 1.1e-06,
277
- "loss": 12.0959,
278
- "step": 225
279
- },
280
- {
281
- "epoch": 0.06,
282
- "learning_rate": 1.125e-06,
283
- "loss": 10.7053,
284
- "step": 230
285
- },
286
- {
287
- "epoch": 0.07,
288
- "learning_rate": 1.1500000000000002e-06,
289
- "loss": 9.5165,
290
- "step": 235
291
- },
292
- {
293
- "epoch": 0.07,
294
- "learning_rate": 1.175e-06,
295
- "loss": 11.838,
296
- "step": 240
297
- },
298
- {
299
- "epoch": 0.07,
300
- "learning_rate": 1.2000000000000002e-06,
301
- "loss": 9.5919,
302
- "step": 245
303
- },
304
- {
305
- "epoch": 0.07,
306
- "learning_rate": 1.2250000000000001e-06,
307
- "loss": 9.7433,
308
- "step": 250
309
- },
310
- {
311
- "epoch": 0.07,
312
- "learning_rate": 1.25e-06,
313
- "loss": 10.6129,
314
- "step": 255
315
- },
316
- {
317
- "epoch": 0.07,
318
- "learning_rate": 1.275e-06,
319
- "loss": 9.6936,
320
- "step": 260
321
- },
322
- {
323
- "epoch": 0.07,
324
- "learning_rate": 1.3e-06,
325
- "loss": 10.8679,
326
- "step": 265
327
- },
328
- {
329
- "epoch": 0.08,
330
- "learning_rate": 1.3250000000000002e-06,
331
- "loss": 12.5521,
332
- "step": 270
333
- },
334
- {
335
- "epoch": 0.08,
336
- "learning_rate": 1.3500000000000002e-06,
337
- "loss": 8.2902,
338
- "step": 275
339
- },
340
- {
341
- "epoch": 0.08,
342
- "learning_rate": 1.3750000000000002e-06,
343
- "loss": 8.7901,
344
- "step": 280
345
- },
346
- {
347
- "epoch": 0.08,
348
- "learning_rate": 1.4000000000000001e-06,
349
- "loss": 7.9864,
350
- "step": 285
351
- },
352
- {
353
- "epoch": 0.08,
354
- "learning_rate": 1.425e-06,
355
- "loss": 8.0575,
356
- "step": 290
357
- },
358
- {
359
- "epoch": 0.08,
360
- "learning_rate": 1.45e-06,
361
- "loss": 7.4836,
362
- "step": 295
363
- },
364
- {
365
- "epoch": 0.08,
366
- "learning_rate": 1.475e-06,
367
- "loss": 10.0019,
368
- "step": 300
369
- },
370
- {
371
- "epoch": 0.09,
372
- "learning_rate": 1.5e-06,
373
- "loss": 11.417,
374
- "step": 305
375
- },
376
- {
377
- "epoch": 0.09,
378
- "learning_rate": 1.525e-06,
379
- "loss": 8.4366,
380
- "step": 310
381
- },
382
- {
383
- "epoch": 0.09,
384
- "learning_rate": 1.5500000000000002e-06,
385
- "loss": 8.6453,
386
- "step": 315
387
- },
388
- {
389
- "epoch": 0.09,
390
- "learning_rate": 1.5750000000000002e-06,
391
- "loss": 7.6421,
392
- "step": 320
393
- },
394
- {
395
- "epoch": 0.09,
396
- "learning_rate": 1.6000000000000001e-06,
397
- "loss": 8.7056,
398
- "step": 325
399
- },
400
- {
401
- "epoch": 0.09,
402
- "learning_rate": 1.6250000000000001e-06,
403
- "loss": 7.4756,
404
- "step": 330
405
- },
406
- {
407
- "epoch": 0.09,
408
- "learning_rate": 1.6500000000000003e-06,
409
- "loss": 6.6272,
410
- "step": 335
411
- },
412
- {
413
- "epoch": 0.09,
414
- "learning_rate": 1.6750000000000003e-06,
415
- "loss": 6.9702,
416
- "step": 340
417
- },
418
- {
419
- "epoch": 0.1,
420
- "learning_rate": 1.7000000000000002e-06,
421
- "loss": 6.971,
422
- "step": 345
423
- },
424
- {
425
- "epoch": 0.1,
426
- "learning_rate": 1.725e-06,
427
- "loss": 6.4695,
428
- "step": 350
429
- },
430
- {
431
- "epoch": 0.1,
432
- "learning_rate": 1.75e-06,
433
- "loss": 9.6089,
434
- "step": 355
435
- },
436
- {
437
- "epoch": 0.1,
438
- "learning_rate": 1.7750000000000002e-06,
439
- "loss": 7.6838,
440
- "step": 360
441
- },
442
- {
443
- "epoch": 0.1,
444
- "learning_rate": 1.8000000000000001e-06,
445
- "loss": 6.6076,
446
- "step": 365
447
- },
448
- {
449
- "epoch": 0.1,
450
- "learning_rate": 1.825e-06,
451
- "loss": 8.1119,
452
- "step": 370
453
- },
454
- {
455
- "epoch": 0.1,
456
- "learning_rate": 1.85e-06,
457
- "loss": 8.6286,
458
- "step": 375
459
- },
460
- {
461
- "epoch": 0.11,
462
- "learning_rate": 1.8750000000000003e-06,
463
- "loss": 6.2768,
464
- "step": 380
465
- },
466
- {
467
- "epoch": 0.11,
468
- "learning_rate": 1.9000000000000002e-06,
469
- "loss": 6.9159,
470
- "step": 385
471
- },
472
- {
473
- "epoch": 0.11,
474
- "learning_rate": 1.925e-06,
475
- "loss": 7.0885,
476
- "step": 390
477
- },
478
- {
479
- "epoch": 0.11,
480
- "learning_rate": 1.9500000000000004e-06,
481
- "loss": 8.5395,
482
- "step": 395
483
- },
484
- {
485
- "epoch": 0.11,
486
- "learning_rate": 1.975e-06,
487
- "loss": 8.7365,
488
- "step": 400
489
- },
490
- {
491
- "epoch": 0.11,
492
- "learning_rate": 2.0000000000000003e-06,
493
- "loss": 8.7254,
494
- "step": 405
495
- },
496
- {
497
- "epoch": 0.11,
498
- "learning_rate": 2.025e-06,
499
- "loss": 6.4878,
500
- "step": 410
501
- },
502
- {
503
- "epoch": 0.12,
504
- "learning_rate": 2.05e-06,
505
- "loss": 10.4269,
506
- "step": 415
507
- },
508
- {
509
- "epoch": 0.12,
510
- "learning_rate": 2.075e-06,
511
- "loss": 6.2613,
512
- "step": 420
513
- },
514
- {
515
- "epoch": 0.12,
516
- "learning_rate": 2.1000000000000002e-06,
517
- "loss": 6.6001,
518
- "step": 425
519
- },
520
- {
521
- "epoch": 0.12,
522
- "learning_rate": 2.125e-06,
523
- "loss": 6.0585,
524
- "step": 430
525
- },
526
- {
527
- "epoch": 0.12,
528
- "learning_rate": 2.15e-06,
529
- "loss": 6.2137,
530
- "step": 435
531
- },
532
- {
533
- "epoch": 0.12,
534
- "learning_rate": 2.1750000000000004e-06,
535
- "loss": 8.026,
536
- "step": 440
537
- },
538
- {
539
- "epoch": 0.12,
540
- "learning_rate": 2.2e-06,
541
- "loss": 8.5144,
542
- "step": 445
543
- },
544
- {
545
- "epoch": 0.13,
546
- "learning_rate": 2.2250000000000003e-06,
547
- "loss": 6.092,
548
- "step": 450
549
- },
550
- {
551
- "epoch": 0.13,
552
- "learning_rate": 2.25e-06,
553
- "loss": 8.8018,
554
- "step": 455
555
- },
556
- {
557
- "epoch": 0.13,
558
- "learning_rate": 2.2750000000000002e-06,
559
- "loss": 6.2074,
560
- "step": 460
561
- },
562
- {
563
- "epoch": 0.13,
564
- "learning_rate": 2.3000000000000004e-06,
565
- "loss": 10.6086,
566
- "step": 465
567
- },
568
- {
569
- "epoch": 0.13,
570
- "learning_rate": 2.325e-06,
571
- "loss": 6.5399,
572
- "step": 470
573
- },
574
- {
575
- "epoch": 0.13,
576
- "learning_rate": 2.35e-06,
577
- "loss": 7.0942,
578
- "step": 475
579
- },
580
- {
581
- "epoch": 0.13,
582
- "learning_rate": 2.375e-06,
583
- "loss": 6.4347,
584
- "step": 480
585
- },
586
- {
587
- "epoch": 0.14,
588
- "learning_rate": 2.4000000000000003e-06,
589
- "loss": 7.1602,
590
- "step": 485
591
- },
592
- {
593
- "epoch": 0.14,
594
- "learning_rate": 2.425e-06,
595
- "loss": 5.4093,
596
- "step": 490
597
- },
598
- {
599
- "epoch": 0.14,
600
- "learning_rate": 2.4500000000000003e-06,
601
- "loss": 6.618,
602
- "step": 495
603
- },
604
- {
605
- "epoch": 0.14,
606
- "learning_rate": 2.475e-06,
607
- "loss": 8.3884,
608
- "step": 500
609
- },
610
- {
611
- "epoch": 0.14,
612
- "eval_loss": 7.4285359382629395,
613
- "eval_runtime": 109.7145,
614
- "eval_samples_per_second": 10.883,
615
- "eval_steps_per_second": 1.367,
616
- "eval_wer": 1.0,
617
- "step": 500
618
- },
619
- {
620
- "epoch": 0.14,
621
- "learning_rate": 2.5e-06,
622
- "loss": 8.5122,
623
- "step": 505
624
- },
625
- {
626
- "epoch": 0.14,
627
- "learning_rate": 2.5250000000000004e-06,
628
- "loss": 9.0237,
629
- "step": 510
630
- },
631
- {
632
- "epoch": 0.14,
633
- "learning_rate": 2.55e-06,
634
- "loss": 6.343,
635
- "step": 515
636
- },
637
- {
638
- "epoch": 0.15,
639
- "learning_rate": 2.5750000000000003e-06,
640
- "loss": 6.3103,
641
- "step": 520
642
- },
643
- {
644
- "epoch": 0.15,
645
- "learning_rate": 2.6e-06,
646
- "loss": 7.6124,
647
- "step": 525
648
- },
649
- {
650
- "epoch": 0.15,
651
- "learning_rate": 2.6250000000000003e-06,
652
- "loss": 7.1252,
653
- "step": 530
654
- },
655
- {
656
- "epoch": 0.15,
657
- "learning_rate": 2.6500000000000005e-06,
658
- "loss": 7.0198,
659
- "step": 535
660
- },
661
- {
662
- "epoch": 0.15,
663
- "learning_rate": 2.6750000000000002e-06,
664
- "loss": 6.3734,
665
- "step": 540
666
- },
667
- {
668
- "epoch": 0.15,
669
- "learning_rate": 2.7000000000000004e-06,
670
- "loss": 4.7063,
671
- "step": 545
672
- },
673
- {
674
- "epoch": 0.15,
675
- "learning_rate": 2.7250000000000006e-06,
676
- "loss": 7.7081,
677
- "step": 550
678
- },
679
- {
680
- "epoch": 0.16,
681
- "learning_rate": 2.7500000000000004e-06,
682
- "loss": 5.9,
683
- "step": 555
684
- },
685
- {
686
- "epoch": 0.16,
687
- "learning_rate": 2.7750000000000005e-06,
688
- "loss": 5.634,
689
- "step": 560
690
- },
691
- {
692
- "epoch": 0.16,
693
- "learning_rate": 2.8000000000000003e-06,
694
- "loss": 8.6987,
695
- "step": 565
696
- },
697
- {
698
- "epoch": 0.16,
699
- "learning_rate": 2.825e-06,
700
- "loss": 6.5511,
701
- "step": 570
702
- },
703
- {
704
- "epoch": 0.16,
705
- "learning_rate": 2.85e-06,
706
- "loss": 7.6908,
707
- "step": 575
708
- },
709
- {
710
- "epoch": 0.16,
711
- "learning_rate": 2.875e-06,
712
- "loss": 7.4105,
713
- "step": 580
714
- },
715
- {
716
- "epoch": 0.16,
717
- "learning_rate": 2.9e-06,
718
- "loss": 5.802,
719
- "step": 585
720
- },
721
- {
722
- "epoch": 0.16,
723
- "learning_rate": 2.925e-06,
724
- "loss": 6.0835,
725
- "step": 590
726
- },
727
- {
728
- "epoch": 0.17,
729
- "learning_rate": 2.95e-06,
730
- "loss": 7.8744,
731
- "step": 595
732
- },
733
- {
734
- "epoch": 0.17,
735
- "learning_rate": 2.9750000000000003e-06,
736
- "loss": 6.1134,
737
- "step": 600
738
- },
739
- {
740
- "epoch": 0.17,
741
- "learning_rate": 3e-06,
742
- "loss": 6.6416,
743
- "step": 605
744
- },
745
- {
746
- "epoch": 0.17,
747
- "learning_rate": 3.0250000000000003e-06,
748
- "loss": 8.7177,
749
- "step": 610
750
- },
751
- {
752
- "epoch": 0.17,
753
- "learning_rate": 3.05e-06,
754
- "loss": 6.2383,
755
- "step": 615
756
- },
757
- {
758
- "epoch": 0.17,
759
- "learning_rate": 3.075e-06,
760
- "loss": 6.2413,
761
- "step": 620
762
- },
763
- {
764
- "epoch": 0.17,
765
- "learning_rate": 3.1000000000000004e-06,
766
- "loss": 5.4088,
767
- "step": 625
768
- },
769
- {
770
- "epoch": 0.18,
771
- "learning_rate": 3.125e-06,
772
- "loss": 4.3896,
773
- "step": 630
774
- },
775
- {
776
- "epoch": 0.18,
777
- "learning_rate": 3.1500000000000003e-06,
778
- "loss": 5.6138,
779
- "step": 635
780
- },
781
- {
782
- "epoch": 0.18,
783
- "learning_rate": 3.175e-06,
784
- "loss": 5.0078,
785
- "step": 640
786
- },
787
- {
788
- "epoch": 0.18,
789
- "learning_rate": 3.2000000000000003e-06,
790
- "loss": 6.9994,
791
- "step": 645
792
- },
793
- {
794
- "epoch": 0.18,
795
- "learning_rate": 3.2250000000000005e-06,
796
- "loss": 5.4662,
797
- "step": 650
798
- },
799
- {
800
- "epoch": 0.18,
801
- "learning_rate": 3.2500000000000002e-06,
802
- "loss": 5.144,
803
- "step": 655
804
- },
805
- {
806
- "epoch": 0.18,
807
- "learning_rate": 3.2750000000000004e-06,
808
- "loss": 5.4199,
809
- "step": 660
810
- },
811
- {
812
- "epoch": 0.19,
813
- "learning_rate": 3.3000000000000006e-06,
814
- "loss": 6.5677,
815
- "step": 665
816
- },
817
- {
818
- "epoch": 0.19,
819
- "learning_rate": 3.3250000000000004e-06,
820
- "loss": 6.168,
821
- "step": 670
822
- },
823
- {
824
- "epoch": 0.19,
825
- "learning_rate": 3.3500000000000005e-06,
826
- "loss": 6.3248,
827
- "step": 675
828
- },
829
- {
830
- "epoch": 0.19,
831
- "learning_rate": 3.3750000000000003e-06,
832
- "loss": 5.6306,
833
- "step": 680
834
- },
835
- {
836
- "epoch": 0.19,
837
- "learning_rate": 3.4000000000000005e-06,
838
- "loss": 4.4895,
839
- "step": 685
840
- },
841
- {
842
- "epoch": 0.19,
843
- "learning_rate": 3.4250000000000007e-06,
844
- "loss": 4.7549,
845
- "step": 690
846
- },
847
- {
848
- "epoch": 0.19,
849
- "learning_rate": 3.45e-06,
850
- "loss": 3.8521,
851
- "step": 695
852
- },
853
- {
854
- "epoch": 0.2,
855
- "learning_rate": 3.475e-06,
856
- "loss": 7.8289,
857
- "step": 700
858
- },
859
- {
860
- "epoch": 0.2,
861
- "learning_rate": 3.5e-06,
862
- "loss": 8.7039,
863
- "step": 705
864
- },
865
- {
866
- "epoch": 0.2,
867
- "learning_rate": 3.525e-06,
868
- "loss": 7.9723,
869
- "step": 710
870
- },
871
- {
872
- "epoch": 0.2,
873
- "learning_rate": 3.5500000000000003e-06,
874
- "loss": 7.4315,
875
- "step": 715
876
- },
877
- {
878
- "epoch": 0.2,
879
- "learning_rate": 3.575e-06,
880
- "loss": 5.6061,
881
- "step": 720
882
- },
883
- {
884
- "epoch": 0.2,
885
- "learning_rate": 3.6000000000000003e-06,
886
- "loss": 5.4848,
887
- "step": 725
888
- },
889
- {
890
- "epoch": 0.2,
891
- "learning_rate": 3.625e-06,
892
- "loss": 4.1425,
893
- "step": 730
894
- },
895
- {
896
- "epoch": 0.21,
897
- "learning_rate": 3.65e-06,
898
- "loss": 5.5031,
899
- "step": 735
900
- },
901
- {
902
- "epoch": 0.21,
903
- "learning_rate": 3.6750000000000004e-06,
904
- "loss": 4.4556,
905
- "step": 740
906
- },
907
- {
908
- "epoch": 0.21,
909
- "learning_rate": 3.7e-06,
910
- "loss": 5.9729,
911
- "step": 745
912
- },
913
- {
914
- "epoch": 0.21,
915
- "learning_rate": 3.7250000000000003e-06,
916
- "loss": 4.5424,
917
- "step": 750
918
- },
919
- {
920
- "epoch": 0.21,
921
- "learning_rate": 3.7500000000000005e-06,
922
- "loss": 7.1968,
923
- "step": 755
924
- },
925
- {
926
- "epoch": 0.21,
927
- "learning_rate": 3.7750000000000003e-06,
928
- "loss": 6.0854,
929
- "step": 760
930
- },
931
- {
932
- "epoch": 0.21,
933
- "learning_rate": 3.8000000000000005e-06,
934
- "loss": 5.1684,
935
- "step": 765
936
- },
937
- {
938
- "epoch": 0.22,
939
- "learning_rate": 3.825000000000001e-06,
940
- "loss": 4.8008,
941
- "step": 770
942
- },
943
- {
944
- "epoch": 0.22,
945
- "learning_rate": 3.85e-06,
946
- "loss": 5.2927,
947
- "step": 775
948
- },
949
- {
950
- "epoch": 0.22,
951
- "learning_rate": 3.875e-06,
952
- "loss": 4.1035,
953
- "step": 780
954
- },
955
- {
956
- "epoch": 0.22,
957
- "learning_rate": 3.900000000000001e-06,
958
- "loss": 4.083,
959
- "step": 785
960
- },
961
- {
962
- "epoch": 0.22,
963
- "learning_rate": 3.9250000000000005e-06,
964
- "loss": 4.2797,
965
- "step": 790
966
- },
967
- {
968
- "epoch": 0.22,
969
- "learning_rate": 3.95e-06,
970
- "loss": 6.4263,
971
- "step": 795
972
- },
973
- {
974
- "epoch": 0.22,
975
- "learning_rate": 3.975000000000001e-06,
976
- "loss": 6.5447,
977
- "step": 800
978
- },
979
- {
980
- "epoch": 0.22,
981
- "learning_rate": 4.000000000000001e-06,
982
- "loss": 5.9181,
983
- "step": 805
984
- },
985
- {
986
- "epoch": 0.23,
987
- "learning_rate": 4.0250000000000004e-06,
988
- "loss": 6.3199,
989
- "step": 810
990
- },
991
- {
992
- "epoch": 0.23,
993
- "learning_rate": 4.05e-06,
994
- "loss": 7.4045,
995
- "step": 815
996
- },
997
- {
998
- "epoch": 0.23,
999
- "learning_rate": 4.075e-06,
1000
- "loss": 4.7224,
1001
- "step": 820
1002
- },
1003
- {
1004
- "epoch": 0.23,
1005
- "learning_rate": 4.1e-06,
1006
- "loss": 4.3196,
1007
- "step": 825
1008
- },
1009
- {
1010
- "epoch": 0.23,
1011
- "learning_rate": 4.125e-06,
1012
- "loss": 6.7817,
1013
- "step": 830
1014
- },
1015
- {
1016
- "epoch": 0.23,
1017
- "learning_rate": 4.15e-06,
1018
- "loss": 3.9509,
1019
- "step": 835
1020
- },
1021
- {
1022
- "epoch": 0.23,
1023
- "learning_rate": 4.175e-06,
1024
- "loss": 5.2207,
1025
- "step": 840
1026
- },
1027
- {
1028
- "epoch": 0.24,
1029
- "learning_rate": 4.2000000000000004e-06,
1030
- "loss": 4.5517,
1031
- "step": 845
1032
- },
1033
- {
1034
- "epoch": 0.24,
1035
- "learning_rate": 4.225e-06,
1036
- "loss": 5.7441,
1037
- "step": 850
1038
- },
1039
- {
1040
- "epoch": 0.24,
1041
- "learning_rate": 4.25e-06,
1042
- "loss": 6.9507,
1043
- "step": 855
1044
- },
1045
- {
1046
- "epoch": 0.24,
1047
- "learning_rate": 4.2750000000000006e-06,
1048
- "loss": 4.8579,
1049
- "step": 860
1050
- },
1051
- {
1052
- "epoch": 0.24,
1053
- "learning_rate": 4.3e-06,
1054
- "loss": 5.6729,
1055
- "step": 865
1056
- },
1057
- {
1058
- "epoch": 0.24,
1059
- "learning_rate": 4.325e-06,
1060
- "loss": 5.7697,
1061
- "step": 870
1062
- },
1063
- {
1064
- "epoch": 0.24,
1065
- "learning_rate": 4.350000000000001e-06,
1066
- "loss": 4.7595,
1067
- "step": 875
1068
- },
1069
- {
1070
- "epoch": 0.25,
1071
- "learning_rate": 4.3750000000000005e-06,
1072
- "loss": 4.6507,
1073
- "step": 880
1074
- },
1075
- {
1076
- "epoch": 0.25,
1077
- "learning_rate": 4.4e-06,
1078
- "loss": 5.0327,
1079
- "step": 885
1080
- },
1081
- {
1082
- "epoch": 0.25,
1083
- "learning_rate": 4.425e-06,
1084
- "loss": 4.3302,
1085
- "step": 890
1086
- },
1087
- {
1088
- "epoch": 0.25,
1089
- "learning_rate": 4.450000000000001e-06,
1090
- "loss": 3.2291,
1091
- "step": 895
1092
- },
1093
- {
1094
- "epoch": 0.25,
1095
- "learning_rate": 4.475e-06,
1096
- "loss": 3.5748,
1097
- "step": 900
1098
- },
1099
- {
1100
- "epoch": 0.25,
1101
- "learning_rate": 4.5e-06,
1102
- "loss": 2.9844,
1103
- "step": 905
1104
- },
1105
- {
1106
- "epoch": 0.25,
1107
- "learning_rate": 4.525000000000001e-06,
1108
- "loss": 3.2969,
1109
- "step": 910
1110
- },
1111
- {
1112
- "epoch": 0.26,
1113
- "learning_rate": 4.5500000000000005e-06,
1114
- "loss": 3.2062,
1115
- "step": 915
1116
- },
1117
- {
1118
- "epoch": 0.26,
1119
- "learning_rate": 4.575e-06,
1120
- "loss": 3.1091,
1121
- "step": 920
1122
- },
1123
- {
1124
- "epoch": 0.26,
1125
- "learning_rate": 4.600000000000001e-06,
1126
- "loss": 3.288,
1127
- "step": 925
1128
- },
1129
- {
1130
- "epoch": 0.26,
1131
- "learning_rate": 4.625000000000001e-06,
1132
- "loss": 2.8966,
1133
- "step": 930
1134
- },
1135
- {
1136
- "epoch": 0.26,
1137
- "learning_rate": 4.65e-06,
1138
- "loss": 2.9091,
1139
- "step": 935
1140
- },
1141
- {
1142
- "epoch": 0.26,
1143
- "learning_rate": 4.675000000000001e-06,
1144
- "loss": 3.212,
1145
- "step": 940
1146
- },
1147
- {
1148
- "epoch": 0.26,
1149
- "learning_rate": 4.7e-06,
1150
- "loss": 2.8426,
1151
- "step": 945
1152
- },
1153
- {
1154
- "epoch": 0.27,
1155
- "learning_rate": 4.7250000000000005e-06,
1156
- "loss": 3.1961,
1157
- "step": 950
1158
- },
1159
- {
1160
- "epoch": 0.27,
1161
- "learning_rate": 4.75e-06,
1162
- "loss": 2.9483,
1163
- "step": 955
1164
- },
1165
- {
1166
- "epoch": 0.27,
1167
- "learning_rate": 4.775e-06,
1168
- "loss": 2.9817,
1169
- "step": 960
1170
- },
1171
- {
1172
- "epoch": 0.27,
1173
- "learning_rate": 4.800000000000001e-06,
1174
- "loss": 3.0139,
1175
- "step": 965
1176
- },
1177
- {
1178
- "epoch": 0.27,
1179
- "learning_rate": 4.825e-06,
1180
- "loss": 2.8438,
1181
- "step": 970
1182
- },
1183
- {
1184
- "epoch": 0.27,
1185
- "learning_rate": 4.85e-06,
1186
- "loss": 3.008,
1187
- "step": 975
1188
- },
1189
- {
1190
- "epoch": 0.27,
1191
- "learning_rate": 4.875e-06,
1192
- "loss": 2.9319,
1193
- "step": 980
1194
- },
1195
- {
1196
- "epoch": 0.28,
1197
- "learning_rate": 4.9000000000000005e-06,
1198
- "loss": 2.8704,
1199
- "step": 985
1200
- },
1201
- {
1202
- "epoch": 0.28,
1203
- "learning_rate": 4.925e-06,
1204
- "loss": 3.0068,
1205
- "step": 990
1206
- },
1207
- {
1208
- "epoch": 0.28,
1209
- "learning_rate": 4.95e-06,
1210
- "loss": 2.8494,
1211
- "step": 995
1212
- },
1213
- {
1214
- "epoch": 0.28,
1215
- "learning_rate": 4.975000000000001e-06,
1216
- "loss": 2.8652,
1217
- "step": 1000
1218
- },
1219
- {
1220
- "epoch": 0.28,
1221
- "eval_loss": 2.881692886352539,
1222
- "eval_runtime": 141.6494,
1223
- "eval_samples_per_second": 8.429,
1224
- "eval_steps_per_second": 1.059,
1225
- "eval_wer": 1.0,
1226
- "step": 1000
1227
- },
1228
- {
1229
- "epoch": 0.28,
1230
- "learning_rate": 5e-06,
1231
- "loss": 2.9299,
1232
- "step": 1005
1233
- },
1234
- {
1235
- "epoch": 0.28,
1236
- "learning_rate": 4.998122559327126e-06,
1237
- "loss": 2.855,
1238
- "step": 1010
1239
- },
1240
- {
1241
- "epoch": 0.28,
1242
- "learning_rate": 4.996245118654251e-06,
1243
- "loss": 2.8683,
1244
- "step": 1015
1245
- },
1246
- {
1247
- "epoch": 0.28,
1248
- "learning_rate": 4.994367677981376e-06,
1249
- "loss": 2.8563,
1250
- "step": 1020
1251
- },
1252
- {
1253
- "epoch": 0.29,
1254
- "learning_rate": 4.992490237308501e-06,
1255
- "loss": 2.9462,
1256
- "step": 1025
1257
- },
1258
- {
1259
- "epoch": 0.29,
1260
- "learning_rate": 4.990612796635627e-06,
1261
- "loss": 2.8713,
1262
- "step": 1030
1263
- },
1264
- {
1265
- "epoch": 0.29,
1266
- "learning_rate": 4.988735355962752e-06,
1267
- "loss": 2.9464,
1268
- "step": 1035
1269
- },
1270
- {
1271
- "epoch": 0.29,
1272
- "learning_rate": 4.986857915289877e-06,
1273
- "loss": 2.8587,
1274
- "step": 1040
1275
- },
1276
- {
1277
- "epoch": 0.29,
1278
- "learning_rate": 4.984980474617002e-06,
1279
- "loss": 2.9491,
1280
- "step": 1045
1281
- },
1282
- {
1283
- "epoch": 0.29,
1284
- "learning_rate": 4.9831030339441276e-06,
1285
- "loss": 2.8847,
1286
- "step": 1050
1287
- },
1288
- {
1289
- "epoch": 0.29,
1290
- "learning_rate": 4.981225593271253e-06,
1291
- "loss": 2.9443,
1292
- "step": 1055
1293
- },
1294
- {
1295
- "epoch": 0.3,
1296
- "learning_rate": 4.979348152598378e-06,
1297
- "loss": 2.9,
1298
- "step": 1060
1299
- },
1300
- {
1301
- "epoch": 0.3,
1302
- "learning_rate": 4.977470711925503e-06,
1303
- "loss": 2.8448,
1304
- "step": 1065
1305
- },
1306
- {
1307
- "epoch": 0.3,
1308
- "learning_rate": 4.9755932712526285e-06,
1309
- "loss": 2.862,
1310
- "step": 1070
1311
- },
1312
- {
1313
- "epoch": 0.3,
1314
- "learning_rate": 4.973715830579754e-06,
1315
- "loss": 2.8782,
1316
- "step": 1075
1317
- },
1318
- {
1319
- "epoch": 0.3,
1320
- "learning_rate": 4.97183838990688e-06,
1321
- "loss": 2.866,
1322
- "step": 1080
1323
- },
1324
- {
1325
- "epoch": 0.3,
1326
- "learning_rate": 4.969960949234004e-06,
1327
- "loss": 2.9187,
1328
- "step": 1085
1329
- },
1330
- {
1331
- "epoch": 0.3,
1332
- "learning_rate": 4.96808350856113e-06,
1333
- "loss": 2.8627,
1334
- "step": 1090
1335
- },
1336
- {
1337
- "epoch": 0.31,
1338
- "learning_rate": 4.966206067888255e-06,
1339
- "loss": 2.8636,
1340
- "step": 1095
1341
- },
1342
- {
1343
- "epoch": 0.31,
1344
- "learning_rate": 4.964328627215381e-06,
1345
- "loss": 2.9201,
1346
- "step": 1100
1347
- },
1348
- {
1349
- "epoch": 0.31,
1350
- "learning_rate": 4.962451186542506e-06,
1351
- "loss": 2.9078,
1352
- "step": 1105
1353
- },
1354
- {
1355
- "epoch": 0.31,
1356
- "learning_rate": 4.9605737458696305e-06,
1357
- "loss": 2.8853,
1358
- "step": 1110
1359
- },
1360
- {
1361
- "epoch": 0.31,
1362
- "learning_rate": 4.9586963051967565e-06,
1363
- "loss": 2.8612,
1364
- "step": 1115
1365
- },
1366
- {
1367
- "epoch": 0.31,
1368
- "learning_rate": 4.956818864523881e-06,
1369
- "loss": 2.8607,
1370
- "step": 1120
1371
- },
1372
- {
1373
- "epoch": 0.31,
1374
- "learning_rate": 4.954941423851007e-06,
1375
- "loss": 2.845,
1376
- "step": 1125
1377
- },
1378
- {
1379
- "epoch": 0.32,
1380
- "learning_rate": 4.953063983178132e-06,
1381
- "loss": 2.8778,
1382
- "step": 1130
1383
- },
1384
- {
1385
- "epoch": 0.32,
1386
- "learning_rate": 4.9511865425052575e-06,
1387
- "loss": 3.1876,
1388
- "step": 1135
1389
- },
1390
- {
1391
- "epoch": 0.32,
1392
- "learning_rate": 4.949309101832383e-06,
1393
- "loss": 2.8603,
1394
- "step": 1140
1395
- },
1396
- {
1397
- "epoch": 0.32,
1398
- "learning_rate": 4.947431661159508e-06,
1399
- "loss": 2.8403,
1400
- "step": 1145
1401
- },
1402
- {
1403
- "epoch": 0.32,
1404
- "learning_rate": 4.945554220486633e-06,
1405
- "loss": 2.8791,
1406
- "step": 1150
1407
- },
1408
- {
1409
- "epoch": 0.32,
1410
- "learning_rate": 4.9436767798137585e-06,
1411
- "loss": 2.8604,
1412
- "step": 1155
1413
- },
1414
- {
1415
- "epoch": 0.32,
1416
- "learning_rate": 4.941799339140884e-06,
1417
- "loss": 2.8595,
1418
- "step": 1160
1419
- },
1420
- {
1421
- "epoch": 0.33,
1422
- "learning_rate": 4.939921898468009e-06,
1423
- "loss": 2.9011,
1424
- "step": 1165
1425
- },
1426
- {
1427
- "epoch": 0.33,
1428
- "learning_rate": 4.938044457795134e-06,
1429
- "loss": 2.86,
1430
- "step": 1170
1431
- },
1432
- {
1433
- "epoch": 0.33,
1434
- "learning_rate": 4.936167017122259e-06,
1435
- "loss": 2.8583,
1436
- "step": 1175
1437
- },
1438
- {
1439
- "epoch": 0.33,
1440
- "learning_rate": 4.934289576449385e-06,
1441
- "loss": 2.9274,
1442
- "step": 1180
1443
- },
1444
- {
1445
- "epoch": 0.33,
1446
- "learning_rate": 4.93241213577651e-06,
1447
- "loss": 2.9472,
1448
- "step": 1185
1449
- },
1450
- {
1451
- "epoch": 0.33,
1452
- "learning_rate": 4.930534695103635e-06,
1453
- "loss": 3.4067,
1454
- "step": 1190
1455
- },
1456
- {
1457
- "epoch": 0.33,
1458
- "learning_rate": 4.92865725443076e-06,
1459
- "loss": 2.9363,
1460
- "step": 1195
1461
- },
1462
- {
1463
- "epoch": 0.34,
1464
- "learning_rate": 4.926779813757886e-06,
1465
- "loss": 2.8608,
1466
- "step": 1200
1467
- },
1468
- {
1469
- "epoch": 0.34,
1470
- "learning_rate": 4.924902373085011e-06,
1471
- "loss": 3.356,
1472
- "step": 1205
1473
- },
1474
- {
1475
- "epoch": 0.34,
1476
- "learning_rate": 4.923024932412136e-06,
1477
- "loss": 2.8678,
1478
- "step": 1210
1479
- },
1480
- {
1481
- "epoch": 0.34,
1482
- "learning_rate": 4.921147491739261e-06,
1483
- "loss": 2.8664,
1484
- "step": 1215
1485
- },
1486
- {
1487
- "epoch": 0.34,
1488
- "learning_rate": 4.919270051066387e-06,
1489
- "loss": 2.8897,
1490
- "step": 1220
1491
- },
1492
- {
1493
- "epoch": 0.34,
1494
- "learning_rate": 4.917392610393512e-06,
1495
- "loss": 2.8783,
1496
- "step": 1225
1497
- },
1498
- {
1499
- "epoch": 0.34,
1500
- "learning_rate": 4.915515169720637e-06,
1501
- "loss": 2.8886,
1502
- "step": 1230
1503
- },
1504
- {
1505
- "epoch": 0.35,
1506
- "learning_rate": 4.913637729047762e-06,
1507
- "loss": 2.8534,
1508
- "step": 1235
1509
- },
1510
- {
1511
- "epoch": 0.35,
1512
- "learning_rate": 4.9117602883748875e-06,
1513
- "loss": 2.8322,
1514
- "step": 1240
1515
- },
1516
- {
1517
- "epoch": 0.35,
1518
- "learning_rate": 4.909882847702014e-06,
1519
- "loss": 2.8148,
1520
- "step": 1245
1521
- },
1522
- {
1523
- "epoch": 0.35,
1524
- "learning_rate": 4.908005407029138e-06,
1525
- "loss": 2.832,
1526
- "step": 1250
1527
- },
1528
- {
1529
- "epoch": 0.35,
1530
- "learning_rate": 4.906127966356263e-06,
1531
- "loss": 2.8783,
1532
- "step": 1255
1533
- },
1534
- {
1535
- "epoch": 0.35,
1536
- "learning_rate": 4.9042505256833885e-06,
1537
- "loss": 2.8255,
1538
- "step": 1260
1539
- },
1540
- {
1541
- "epoch": 0.35,
1542
- "learning_rate": 4.902373085010514e-06,
1543
- "loss": 2.8893,
1544
- "step": 1265
1545
- },
1546
- {
1547
- "epoch": 0.35,
1548
- "learning_rate": 4.90049564433764e-06,
1549
- "loss": 2.9132,
1550
- "step": 1270
1551
- },
1552
- {
1553
- "epoch": 0.36,
1554
- "learning_rate": 4.898618203664764e-06,
1555
- "loss": 2.8366,
1556
- "step": 1275
1557
- },
1558
- {
1559
- "epoch": 0.36,
1560
- "learning_rate": 4.89674076299189e-06,
1561
- "loss": 2.8717,
1562
- "step": 1280
1563
- },
1564
- {
1565
- "epoch": 0.36,
1566
- "learning_rate": 4.894863322319015e-06,
1567
- "loss": 3.0332,
1568
- "step": 1285
1569
- },
1570
- {
1571
- "epoch": 0.36,
1572
- "learning_rate": 4.892985881646141e-06,
1573
- "loss": 2.8806,
1574
- "step": 1290
1575
- },
1576
- {
1577
- "epoch": 0.36,
1578
- "learning_rate": 4.891108440973266e-06,
1579
- "loss": 2.8377,
1580
- "step": 1295
1581
- },
1582
- {
1583
- "epoch": 0.36,
1584
- "learning_rate": 4.88923100030039e-06,
1585
- "loss": 2.8136,
1586
- "step": 1300
1587
- },
1588
- {
1589
- "epoch": 0.36,
1590
- "learning_rate": 4.8873535596275165e-06,
1591
- "loss": 2.9869,
1592
- "step": 1305
1593
- },
1594
- {
1595
- "epoch": 0.37,
1596
- "learning_rate": 4.885476118954641e-06,
1597
- "loss": 2.8812,
1598
- "step": 1310
1599
- },
1600
- {
1601
- "epoch": 0.37,
1602
- "learning_rate": 4.883598678281767e-06,
1603
- "loss": 2.83,
1604
- "step": 1315
1605
- },
1606
- {
1607
- "epoch": 0.37,
1608
- "learning_rate": 4.881721237608892e-06,
1609
- "loss": 2.8324,
1610
- "step": 1320
1611
- },
1612
- {
1613
- "epoch": 0.37,
1614
- "learning_rate": 4.8798437969360175e-06,
1615
- "loss": 2.829,
1616
- "step": 1325
1617
- },
1618
- {
1619
- "epoch": 0.37,
1620
- "learning_rate": 4.877966356263143e-06,
1621
- "loss": 2.834,
1622
- "step": 1330
1623
- },
1624
- {
1625
- "epoch": 0.37,
1626
- "learning_rate": 4.876088915590268e-06,
1627
- "loss": 3.1544,
1628
- "step": 1335
1629
- },
1630
- {
1631
- "epoch": 0.37,
1632
- "learning_rate": 4.874211474917393e-06,
1633
- "loss": 2.8126,
1634
- "step": 1340
1635
- },
1636
- {
1637
- "epoch": 0.38,
1638
- "learning_rate": 4.8723340342445184e-06,
1639
- "loss": 2.8915,
1640
- "step": 1345
1641
- },
1642
- {
1643
- "epoch": 0.38,
1644
- "learning_rate": 4.870456593571644e-06,
1645
- "loss": 3.0785,
1646
- "step": 1350
1647
- },
1648
- {
1649
- "epoch": 0.38,
1650
- "learning_rate": 4.868579152898769e-06,
1651
- "loss": 2.8628,
1652
- "step": 1355
1653
- },
1654
- {
1655
- "epoch": 0.38,
1656
- "learning_rate": 4.866701712225894e-06,
1657
- "loss": 2.8263,
1658
- "step": 1360
1659
- },
1660
- {
1661
- "epoch": 0.38,
1662
- "learning_rate": 4.864824271553019e-06,
1663
- "loss": 2.8055,
1664
- "step": 1365
1665
- },
1666
- {
1667
- "epoch": 0.38,
1668
- "learning_rate": 4.862946830880145e-06,
1669
- "loss": 2.837,
1670
- "step": 1370
1671
- },
1672
- {
1673
- "epoch": 0.38,
1674
- "learning_rate": 4.86106939020727e-06,
1675
- "loss": 2.8109,
1676
- "step": 1375
1677
- },
1678
- {
1679
- "epoch": 0.39,
1680
- "learning_rate": 4.859191949534395e-06,
1681
- "loss": 2.8313,
1682
- "step": 1380
1683
- },
1684
- {
1685
- "epoch": 0.39,
1686
- "learning_rate": 4.85731450886152e-06,
1687
- "loss": 2.8085,
1688
- "step": 1385
1689
- },
1690
- {
1691
- "epoch": 0.39,
1692
- "learning_rate": 4.855437068188646e-06,
1693
- "loss": 2.9006,
1694
- "step": 1390
1695
- },
1696
- {
1697
- "epoch": 0.39,
1698
- "learning_rate": 4.853559627515771e-06,
1699
- "loss": 2.76,
1700
- "step": 1395
1701
- },
1702
- {
1703
- "epoch": 0.39,
1704
- "learning_rate": 4.851682186842896e-06,
1705
- "loss": 2.7712,
1706
- "step": 1400
1707
- },
1708
- {
1709
- "epoch": 0.39,
1710
- "learning_rate": 4.849804746170021e-06,
1711
- "loss": 2.8082,
1712
- "step": 1405
1713
- },
1714
- {
1715
- "epoch": 0.39,
1716
- "learning_rate": 4.8479273054971466e-06,
1717
- "loss": 2.9101,
1718
- "step": 1410
1719
- },
1720
- {
1721
- "epoch": 0.4,
1722
- "learning_rate": 4.846049864824272e-06,
1723
- "loss": 2.8121,
1724
- "step": 1415
1725
- },
1726
- {
1727
- "epoch": 0.4,
1728
- "learning_rate": 4.844172424151397e-06,
1729
- "loss": 2.7971,
1730
- "step": 1420
1731
- },
1732
- {
1733
- "epoch": 0.4,
1734
- "learning_rate": 4.842294983478522e-06,
1735
- "loss": 2.8376,
1736
- "step": 1425
1737
- },
1738
- {
1739
- "epoch": 0.4,
1740
- "learning_rate": 4.8404175428056475e-06,
1741
- "loss": 2.7967,
1742
- "step": 1430
1743
- },
1744
- {
1745
- "epoch": 0.4,
1746
- "learning_rate": 4.838540102132774e-06,
1747
- "loss": 2.8066,
1748
- "step": 1435
1749
- },
1750
- {
1751
- "epoch": 0.4,
1752
- "learning_rate": 4.836662661459898e-06,
1753
- "loss": 2.7832,
1754
- "step": 1440
1755
- },
1756
- {
1757
- "epoch": 0.4,
1758
- "learning_rate": 4.834785220787023e-06,
1759
- "loss": 2.7981,
1760
- "step": 1445
1761
- },
1762
- {
1763
- "epoch": 0.41,
1764
- "learning_rate": 4.8329077801141485e-06,
1765
- "loss": 2.743,
1766
- "step": 1450
1767
- },
1768
- {
1769
- "epoch": 0.41,
1770
- "learning_rate": 4.831030339441274e-06,
1771
- "loss": 2.8466,
1772
- "step": 1455
1773
- },
1774
- {
1775
- "epoch": 0.41,
1776
- "learning_rate": 4.8291528987684e-06,
1777
- "loss": 2.8222,
1778
- "step": 1460
1779
- },
1780
- {
1781
- "epoch": 0.41,
1782
- "learning_rate": 4.827275458095524e-06,
1783
- "loss": 2.802,
1784
- "step": 1465
1785
- },
1786
- {
1787
- "epoch": 0.41,
1788
- "learning_rate": 4.82539801742265e-06,
1789
- "loss": 2.6981,
1790
- "step": 1470
1791
- },
1792
- {
1793
- "epoch": 0.41,
1794
- "learning_rate": 4.823520576749775e-06,
1795
- "loss": 2.7656,
1796
- "step": 1475
1797
- },
1798
- {
1799
- "epoch": 0.41,
1800
- "learning_rate": 4.821643136076901e-06,
1801
- "loss": 2.6938,
1802
- "step": 1480
1803
- },
1804
- {
1805
- "epoch": 0.41,
1806
- "learning_rate": 4.819765695404026e-06,
1807
- "loss": 2.7163,
1808
- "step": 1485
1809
- },
1810
- {
1811
- "epoch": 0.42,
1812
- "learning_rate": 4.81788825473115e-06,
1813
- "loss": 2.7057,
1814
- "step": 1490
1815
- },
1816
- {
1817
- "epoch": 0.42,
1818
- "learning_rate": 4.8160108140582765e-06,
1819
- "loss": 2.7461,
1820
- "step": 1495
1821
- },
1822
- {
1823
- "epoch": 0.42,
1824
- "learning_rate": 4.814133373385401e-06,
1825
- "loss": 2.7789,
1826
- "step": 1500
1827
- },
1828
- {
1829
- "epoch": 0.42,
1830
- "eval_loss": 2.633471965789795,
1831
- "eval_runtime": 154.8101,
1832
- "eval_samples_per_second": 7.713,
1833
- "eval_steps_per_second": 0.969,
1834
- "eval_wer": 1.0,
1835
- "step": 1500
1836
- }
1837
- ],
1838
- "max_steps": 14316,
1839
- "num_train_epochs": 4,
1840
- "total_flos": 2.1018630215006784e+17,
1841
- "trial_name": null,
1842
- "trial_params": null
1843
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f05c6a97d90fd87e90a83547a454ff788a066d3d434f2903b0a9058eae5eb5d
3
- size 2991
 
 
 
 
checkpoint-2000/config.json DELETED
@@ -1,109 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-base-it-voxpopuli",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.1,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 256,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": false,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": false,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "group",
52
- "feat_proj_dropout": 0.1,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.1,
55
- "gradient_checkpointing": false,
56
- "hidden_act": "gelu",
57
- "hidden_dropout": 0.1,
58
- "hidden_dropout_prob": 0.1,
59
- "hidden_size": 768,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 3072,
62
- "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.1,
64
- "mask_feature_length": 10,
65
- "mask_feature_min_masks": 0,
66
- "mask_feature_prob": 0.0,
67
- "mask_time_length": 10,
68
- "mask_time_min_masks": 2,
69
- "mask_time_prob": 0.05,
70
- "model_type": "wav2vec2",
71
- "num_adapter_layers": 3,
72
- "num_attention_heads": 12,
73
- "num_codevector_groups": 2,
74
- "num_codevectors_per_group": 320,
75
- "num_conv_pos_embedding_groups": 16,
76
- "num_conv_pos_embeddings": 128,
77
- "num_feat_extract_layers": 7,
78
- "num_hidden_layers": 12,
79
- "num_negatives": 100,
80
- "output_hidden_size": 768,
81
- "pad_token_id": 25,
82
- "proj_codevector_dim": 256,
83
- "tdnn_dilation": [
84
- 1,
85
- 2,
86
- 3,
87
- 1,
88
- 1
89
- ],
90
- "tdnn_dim": [
91
- 512,
92
- 512,
93
- 512,
94
- 512,
95
- 1500
96
- ],
97
- "tdnn_kernel": [
98
- 5,
99
- 3,
100
- 3,
101
- 1,
102
- 1
103
- ],
104
- "torch_dtype": "float32",
105
- "transformers_version": "4.16.1",
106
- "use_weighted_layer_sum": false,
107
- "vocab_size": 32,
108
- "xvector_output_dim": 512
109
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:32a02fe3a48cb8f108292c24e359935169139497ac2ce1a31dd7ebe631cc860a
3
- size 721685265
 
 
 
 
checkpoint-2000/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": false,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-2000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ceee4d99fac0e428a3e6c42cf3d28b660b22a63293d37a437f057bc00425c96e
3
- size 377670039
 
 
 
 
checkpoint-2000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cf469464ced309b82e7569855461608a70adf8c27e2aca970016a751298af6e
3
- size 14503
 
 
 
 
checkpoint-2000/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:209c0328055994bede183a9757b67dd653e3674aa457a155edc49f9c54d5c4c4
3
- size 559
 
 
 
 
checkpoint-2000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:98620f310b72cdbfd5eae5ee0c8d95e77ec17d00770e12c509c6b9232279a6de
3
- size 623
 
 
 
 
checkpoint-2000/trainer_state.json DELETED
@@ -1,2452 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.5588153115395362,
5
- "global_step": 2000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1e-08,
13
- "loss": 24.3697,
14
- "step": 5
15
- },
16
- {
17
- "epoch": 0.0,
18
- "learning_rate": 3.0000000000000004e-08,
19
- "loss": 17.6279,
20
- "step": 10
21
- },
22
- {
23
- "epoch": 0.0,
24
- "learning_rate": 5.5e-08,
25
- "loss": 17.3553,
26
- "step": 15
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 8e-08,
31
- "loss": 16.801,
32
- "step": 20
33
- },
34
- {
35
- "epoch": 0.01,
36
- "learning_rate": 1.0500000000000001e-07,
37
- "loss": 15.1858,
38
- "step": 25
39
- },
40
- {
41
- "epoch": 0.01,
42
- "learning_rate": 1.3e-07,
43
- "loss": 14.8524,
44
- "step": 30
45
- },
46
- {
47
- "epoch": 0.01,
48
- "learning_rate": 1.5500000000000002e-07,
49
- "loss": 14.8696,
50
- "step": 35
51
- },
52
- {
53
- "epoch": 0.01,
54
- "learning_rate": 1.8e-07,
55
- "loss": 14.5785,
56
- "step": 40
57
- },
58
- {
59
- "epoch": 0.01,
60
- "learning_rate": 2.0500000000000002e-07,
61
- "loss": 15.0479,
62
- "step": 45
63
- },
64
- {
65
- "epoch": 0.01,
66
- "learning_rate": 2.3000000000000002e-07,
67
- "loss": 14.5691,
68
- "step": 50
69
- },
70
- {
71
- "epoch": 0.02,
72
- "learning_rate": 2.55e-07,
73
- "loss": 18.4354,
74
- "step": 55
75
- },
76
- {
77
- "epoch": 0.02,
78
- "learning_rate": 2.8e-07,
79
- "loss": 18.1091,
80
- "step": 60
81
- },
82
- {
83
- "epoch": 0.02,
84
- "learning_rate": 3.0500000000000004e-07,
85
- "loss": 17.4985,
86
- "step": 65
87
- },
88
- {
89
- "epoch": 0.02,
90
- "learning_rate": 3.3e-07,
91
- "loss": 15.3995,
92
- "step": 70
93
- },
94
- {
95
- "epoch": 0.02,
96
- "learning_rate": 3.55e-07,
97
- "loss": 15.2441,
98
- "step": 75
99
- },
100
- {
101
- "epoch": 0.02,
102
- "learning_rate": 3.8e-07,
103
- "loss": 14.5564,
104
- "step": 80
105
- },
106
- {
107
- "epoch": 0.02,
108
- "learning_rate": 4.0500000000000004e-07,
109
- "loss": 14.824,
110
- "step": 85
111
- },
112
- {
113
- "epoch": 0.03,
114
- "learning_rate": 4.3e-07,
115
- "loss": 14.9045,
116
- "step": 90
117
- },
118
- {
119
- "epoch": 0.03,
120
- "learning_rate": 4.5500000000000004e-07,
121
- "loss": 14.4084,
122
- "step": 95
123
- },
124
- {
125
- "epoch": 0.03,
126
- "learning_rate": 4.800000000000001e-07,
127
- "loss": 14.5942,
128
- "step": 100
129
- },
130
- {
131
- "epoch": 0.03,
132
- "learning_rate": 5.05e-07,
133
- "loss": 18.0525,
134
- "step": 105
135
- },
136
- {
137
- "epoch": 0.03,
138
- "learning_rate": 5.3e-07,
139
- "loss": 16.5825,
140
- "step": 110
141
- },
142
- {
143
- "epoch": 0.03,
144
- "learning_rate": 5.550000000000001e-07,
145
- "loss": 17.0772,
146
- "step": 115
147
- },
148
- {
149
- "epoch": 0.03,
150
- "learning_rate": 5.800000000000001e-07,
151
- "loss": 15.6476,
152
- "step": 120
153
- },
154
- {
155
- "epoch": 0.03,
156
- "learning_rate": 6.05e-07,
157
- "loss": 14.2361,
158
- "step": 125
159
- },
160
- {
161
- "epoch": 0.04,
162
- "learning_rate": 6.3e-07,
163
- "loss": 14.3617,
164
- "step": 130
165
- },
166
- {
167
- "epoch": 0.04,
168
- "learning_rate": 6.550000000000001e-07,
169
- "loss": 14.0812,
170
- "step": 135
171
- },
172
- {
173
- "epoch": 0.04,
174
- "learning_rate": 6.800000000000001e-07,
175
- "loss": 13.7279,
176
- "step": 140
177
- },
178
- {
179
- "epoch": 0.04,
180
- "learning_rate": 7.05e-07,
181
- "loss": 13.5363,
182
- "step": 145
183
- },
184
- {
185
- "epoch": 0.04,
186
- "learning_rate": 7.3e-07,
187
- "loss": 14.8066,
188
- "step": 150
189
- },
190
- {
191
- "epoch": 0.04,
192
- "learning_rate": 7.5e-07,
193
- "loss": 17.2667,
194
- "step": 155
195
- },
196
- {
197
- "epoch": 0.04,
198
- "learning_rate": 7.750000000000001e-07,
199
- "loss": 15.1567,
200
- "step": 160
201
- },
202
- {
203
- "epoch": 0.05,
204
- "learning_rate": 8.000000000000001e-07,
205
- "loss": 16.3138,
206
- "step": 165
207
- },
208
- {
209
- "epoch": 0.05,
210
- "learning_rate": 8.250000000000001e-07,
211
- "loss": 13.1312,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.05,
216
- "learning_rate": 8.500000000000001e-07,
217
- "loss": 12.7545,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.05,
222
- "learning_rate": 8.75e-07,
223
- "loss": 13.0234,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.05,
228
- "learning_rate": 9.000000000000001e-07,
229
- "loss": 13.2179,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.05,
234
- "learning_rate": 9.25e-07,
235
- "loss": 13.0916,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.05,
240
- "learning_rate": 9.500000000000001e-07,
241
- "loss": 11.6765,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 0.06,
246
- "learning_rate": 9.750000000000002e-07,
247
- "loss": 12.5044,
248
- "step": 200
249
- },
250
- {
251
- "epoch": 0.06,
252
- "learning_rate": 1.0000000000000002e-06,
253
- "loss": 15.0522,
254
- "step": 205
255
- },
256
- {
257
- "epoch": 0.06,
258
- "learning_rate": 1.025e-06,
259
- "loss": 12.4961,
260
- "step": 210
261
- },
262
- {
263
- "epoch": 0.06,
264
- "learning_rate": 1.0500000000000001e-06,
265
- "loss": 11.6473,
266
- "step": 215
267
- },
268
- {
269
- "epoch": 0.06,
270
- "learning_rate": 1.075e-06,
271
- "loss": 10.5303,
272
- "step": 220
273
- },
274
- {
275
- "epoch": 0.06,
276
- "learning_rate": 1.1e-06,
277
- "loss": 12.0959,
278
- "step": 225
279
- },
280
- {
281
- "epoch": 0.06,
282
- "learning_rate": 1.125e-06,
283
- "loss": 10.7053,
284
- "step": 230
285
- },
286
- {
287
- "epoch": 0.07,
288
- "learning_rate": 1.1500000000000002e-06,
289
- "loss": 9.5165,
290
- "step": 235
291
- },
292
- {
293
- "epoch": 0.07,
294
- "learning_rate": 1.175e-06,
295
- "loss": 11.838,
296
- "step": 240
297
- },
298
- {
299
- "epoch": 0.07,
300
- "learning_rate": 1.2000000000000002e-06,
301
- "loss": 9.5919,
302
- "step": 245
303
- },
304
- {
305
- "epoch": 0.07,
306
- "learning_rate": 1.2250000000000001e-06,
307
- "loss": 9.7433,
308
- "step": 250
309
- },
310
- {
311
- "epoch": 0.07,
312
- "learning_rate": 1.25e-06,
313
- "loss": 10.6129,
314
- "step": 255
315
- },
316
- {
317
- "epoch": 0.07,
318
- "learning_rate": 1.275e-06,
319
- "loss": 9.6936,
320
- "step": 260
321
- },
322
- {
323
- "epoch": 0.07,
324
- "learning_rate": 1.3e-06,
325
- "loss": 10.8679,
326
- "step": 265
327
- },
328
- {
329
- "epoch": 0.08,
330
- "learning_rate": 1.3250000000000002e-06,
331
- "loss": 12.5521,
332
- "step": 270
333
- },
334
- {
335
- "epoch": 0.08,
336
- "learning_rate": 1.3500000000000002e-06,
337
- "loss": 8.2902,
338
- "step": 275
339
- },
340
- {
341
- "epoch": 0.08,
342
- "learning_rate": 1.3750000000000002e-06,
343
- "loss": 8.7901,
344
- "step": 280
345
- },
346
- {
347
- "epoch": 0.08,
348
- "learning_rate": 1.4000000000000001e-06,
349
- "loss": 7.9864,
350
- "step": 285
351
- },
352
- {
353
- "epoch": 0.08,
354
- "learning_rate": 1.425e-06,
355
- "loss": 8.0575,
356
- "step": 290
357
- },
358
- {
359
- "epoch": 0.08,
360
- "learning_rate": 1.45e-06,
361
- "loss": 7.4836,
362
- "step": 295
363
- },
364
- {
365
- "epoch": 0.08,
366
- "learning_rate": 1.475e-06,
367
- "loss": 10.0019,
368
- "step": 300
369
- },
370
- {
371
- "epoch": 0.09,
372
- "learning_rate": 1.5e-06,
373
- "loss": 11.417,
374
- "step": 305
375
- },
376
- {
377
- "epoch": 0.09,
378
- "learning_rate": 1.525e-06,
379
- "loss": 8.4366,
380
- "step": 310
381
- },
382
- {
383
- "epoch": 0.09,
384
- "learning_rate": 1.5500000000000002e-06,
385
- "loss": 8.6453,
386
- "step": 315
387
- },
388
- {
389
- "epoch": 0.09,
390
- "learning_rate": 1.5750000000000002e-06,
391
- "loss": 7.6421,
392
- "step": 320
393
- },
394
- {
395
- "epoch": 0.09,
396
- "learning_rate": 1.6000000000000001e-06,
397
- "loss": 8.7056,
398
- "step": 325
399
- },
400
- {
401
- "epoch": 0.09,
402
- "learning_rate": 1.6250000000000001e-06,
403
- "loss": 7.4756,
404
- "step": 330
405
- },
406
- {
407
- "epoch": 0.09,
408
- "learning_rate": 1.6500000000000003e-06,
409
- "loss": 6.6272,
410
- "step": 335
411
- },
412
- {
413
- "epoch": 0.09,
414
- "learning_rate": 1.6750000000000003e-06,
415
- "loss": 6.9702,
416
- "step": 340
417
- },
418
- {
419
- "epoch": 0.1,
420
- "learning_rate": 1.7000000000000002e-06,
421
- "loss": 6.971,
422
- "step": 345
423
- },
424
- {
425
- "epoch": 0.1,
426
- "learning_rate": 1.725e-06,
427
- "loss": 6.4695,
428
- "step": 350
429
- },
430
- {
431
- "epoch": 0.1,
432
- "learning_rate": 1.75e-06,
433
- "loss": 9.6089,
434
- "step": 355
435
- },
436
- {
437
- "epoch": 0.1,
438
- "learning_rate": 1.7750000000000002e-06,
439
- "loss": 7.6838,
440
- "step": 360
441
- },
442
- {
443
- "epoch": 0.1,
444
- "learning_rate": 1.8000000000000001e-06,
445
- "loss": 6.6076,
446
- "step": 365
447
- },
448
- {
449
- "epoch": 0.1,
450
- "learning_rate": 1.825e-06,
451
- "loss": 8.1119,
452
- "step": 370
453
- },
454
- {
455
- "epoch": 0.1,
456
- "learning_rate": 1.85e-06,
457
- "loss": 8.6286,
458
- "step": 375
459
- },
460
- {
461
- "epoch": 0.11,
462
- "learning_rate": 1.8750000000000003e-06,
463
- "loss": 6.2768,
464
- "step": 380
465
- },
466
- {
467
- "epoch": 0.11,
468
- "learning_rate": 1.9000000000000002e-06,
469
- "loss": 6.9159,
470
- "step": 385
471
- },
472
- {
473
- "epoch": 0.11,
474
- "learning_rate": 1.925e-06,
475
- "loss": 7.0885,
476
- "step": 390
477
- },
478
- {
479
- "epoch": 0.11,
480
- "learning_rate": 1.9500000000000004e-06,
481
- "loss": 8.5395,
482
- "step": 395
483
- },
484
- {
485
- "epoch": 0.11,
486
- "learning_rate": 1.975e-06,
487
- "loss": 8.7365,
488
- "step": 400
489
- },
490
- {
491
- "epoch": 0.11,
492
- "learning_rate": 2.0000000000000003e-06,
493
- "loss": 8.7254,
494
- "step": 405
495
- },
496
- {
497
- "epoch": 0.11,
498
- "learning_rate": 2.025e-06,
499
- "loss": 6.4878,
500
- "step": 410
501
- },
502
- {
503
- "epoch": 0.12,
504
- "learning_rate": 2.05e-06,
505
- "loss": 10.4269,
506
- "step": 415
507
- },
508
- {
509
- "epoch": 0.12,
510
- "learning_rate": 2.075e-06,
511
- "loss": 6.2613,
512
- "step": 420
513
- },
514
- {
515
- "epoch": 0.12,
516
- "learning_rate": 2.1000000000000002e-06,
517
- "loss": 6.6001,
518
- "step": 425
519
- },
520
- {
521
- "epoch": 0.12,
522
- "learning_rate": 2.125e-06,
523
- "loss": 6.0585,
524
- "step": 430
525
- },
526
- {
527
- "epoch": 0.12,
528
- "learning_rate": 2.15e-06,
529
- "loss": 6.2137,
530
- "step": 435
531
- },
532
- {
533
- "epoch": 0.12,
534
- "learning_rate": 2.1750000000000004e-06,
535
- "loss": 8.026,
536
- "step": 440
537
- },
538
- {
539
- "epoch": 0.12,
540
- "learning_rate": 2.2e-06,
541
- "loss": 8.5144,
542
- "step": 445
543
- },
544
- {
545
- "epoch": 0.13,
546
- "learning_rate": 2.2250000000000003e-06,
547
- "loss": 6.092,
548
- "step": 450
549
- },
550
- {
551
- "epoch": 0.13,
552
- "learning_rate": 2.25e-06,
553
- "loss": 8.8018,
554
- "step": 455
555
- },
556
- {
557
- "epoch": 0.13,
558
- "learning_rate": 2.2750000000000002e-06,
559
- "loss": 6.2074,
560
- "step": 460
561
- },
562
- {
563
- "epoch": 0.13,
564
- "learning_rate": 2.3000000000000004e-06,
565
- "loss": 10.6086,
566
- "step": 465
567
- },
568
- {
569
- "epoch": 0.13,
570
- "learning_rate": 2.325e-06,
571
- "loss": 6.5399,
572
- "step": 470
573
- },
574
- {
575
- "epoch": 0.13,
576
- "learning_rate": 2.35e-06,
577
- "loss": 7.0942,
578
- "step": 475
579
- },
580
- {
581
- "epoch": 0.13,
582
- "learning_rate": 2.375e-06,
583
- "loss": 6.4347,
584
- "step": 480
585
- },
586
- {
587
- "epoch": 0.14,
588
- "learning_rate": 2.4000000000000003e-06,
589
- "loss": 7.1602,
590
- "step": 485
591
- },
592
- {
593
- "epoch": 0.14,
594
- "learning_rate": 2.425e-06,
595
- "loss": 5.4093,
596
- "step": 490
597
- },
598
- {
599
- "epoch": 0.14,
600
- "learning_rate": 2.4500000000000003e-06,
601
- "loss": 6.618,
602
- "step": 495
603
- },
604
- {
605
- "epoch": 0.14,
606
- "learning_rate": 2.475e-06,
607
- "loss": 8.3884,
608
- "step": 500
609
- },
610
- {
611
- "epoch": 0.14,
612
- "eval_loss": 7.4285359382629395,
613
- "eval_runtime": 109.7145,
614
- "eval_samples_per_second": 10.883,
615
- "eval_steps_per_second": 1.367,
616
- "eval_wer": 1.0,
617
- "step": 500
618
- },
619
- {
620
- "epoch": 0.14,
621
- "learning_rate": 2.5e-06,
622
- "loss": 8.5122,
623
- "step": 505
624
- },
625
- {
626
- "epoch": 0.14,
627
- "learning_rate": 2.5250000000000004e-06,
628
- "loss": 9.0237,
629
- "step": 510
630
- },
631
- {
632
- "epoch": 0.14,
633
- "learning_rate": 2.55e-06,
634
- "loss": 6.343,
635
- "step": 515
636
- },
637
- {
638
- "epoch": 0.15,
639
- "learning_rate": 2.5750000000000003e-06,
640
- "loss": 6.3103,
641
- "step": 520
642
- },
643
- {
644
- "epoch": 0.15,
645
- "learning_rate": 2.6e-06,
646
- "loss": 7.6124,
647
- "step": 525
648
- },
649
- {
650
- "epoch": 0.15,
651
- "learning_rate": 2.6250000000000003e-06,
652
- "loss": 7.1252,
653
- "step": 530
654
- },
655
- {
656
- "epoch": 0.15,
657
- "learning_rate": 2.6500000000000005e-06,
658
- "loss": 7.0198,
659
- "step": 535
660
- },
661
- {
662
- "epoch": 0.15,
663
- "learning_rate": 2.6750000000000002e-06,
664
- "loss": 6.3734,
665
- "step": 540
666
- },
667
- {
668
- "epoch": 0.15,
669
- "learning_rate": 2.7000000000000004e-06,
670
- "loss": 4.7063,
671
- "step": 545
672
- },
673
- {
674
- "epoch": 0.15,
675
- "learning_rate": 2.7250000000000006e-06,
676
- "loss": 7.7081,
677
- "step": 550
678
- },
679
- {
680
- "epoch": 0.16,
681
- "learning_rate": 2.7500000000000004e-06,
682
- "loss": 5.9,
683
- "step": 555
684
- },
685
- {
686
- "epoch": 0.16,
687
- "learning_rate": 2.7750000000000005e-06,
688
- "loss": 5.634,
689
- "step": 560
690
- },
691
- {
692
- "epoch": 0.16,
693
- "learning_rate": 2.8000000000000003e-06,
694
- "loss": 8.6987,
695
- "step": 565
696
- },
697
- {
698
- "epoch": 0.16,
699
- "learning_rate": 2.825e-06,
700
- "loss": 6.5511,
701
- "step": 570
702
- },
703
- {
704
- "epoch": 0.16,
705
- "learning_rate": 2.85e-06,
706
- "loss": 7.6908,
707
- "step": 575
708
- },
709
- {
710
- "epoch": 0.16,
711
- "learning_rate": 2.875e-06,
712
- "loss": 7.4105,
713
- "step": 580
714
- },
715
- {
716
- "epoch": 0.16,
717
- "learning_rate": 2.9e-06,
718
- "loss": 5.802,
719
- "step": 585
720
- },
721
- {
722
- "epoch": 0.16,
723
- "learning_rate": 2.925e-06,
724
- "loss": 6.0835,
725
- "step": 590
726
- },
727
- {
728
- "epoch": 0.17,
729
- "learning_rate": 2.95e-06,
730
- "loss": 7.8744,
731
- "step": 595
732
- },
733
- {
734
- "epoch": 0.17,
735
- "learning_rate": 2.9750000000000003e-06,
736
- "loss": 6.1134,
737
- "step": 600
738
- },
739
- {
740
- "epoch": 0.17,
741
- "learning_rate": 3e-06,
742
- "loss": 6.6416,
743
- "step": 605
744
- },
745
- {
746
- "epoch": 0.17,
747
- "learning_rate": 3.0250000000000003e-06,
748
- "loss": 8.7177,
749
- "step": 610
750
- },
751
- {
752
- "epoch": 0.17,
753
- "learning_rate": 3.05e-06,
754
- "loss": 6.2383,
755
- "step": 615
756
- },
757
- {
758
- "epoch": 0.17,
759
- "learning_rate": 3.075e-06,
760
- "loss": 6.2413,
761
- "step": 620
762
- },
763
- {
764
- "epoch": 0.17,
765
- "learning_rate": 3.1000000000000004e-06,
766
- "loss": 5.4088,
767
- "step": 625
768
- },
769
- {
770
- "epoch": 0.18,
771
- "learning_rate": 3.125e-06,
772
- "loss": 4.3896,
773
- "step": 630
774
- },
775
- {
776
- "epoch": 0.18,
777
- "learning_rate": 3.1500000000000003e-06,
778
- "loss": 5.6138,
779
- "step": 635
780
- },
781
- {
782
- "epoch": 0.18,
783
- "learning_rate": 3.175e-06,
784
- "loss": 5.0078,
785
- "step": 640
786
- },
787
- {
788
- "epoch": 0.18,
789
- "learning_rate": 3.2000000000000003e-06,
790
- "loss": 6.9994,
791
- "step": 645
792
- },
793
- {
794
- "epoch": 0.18,
795
- "learning_rate": 3.2250000000000005e-06,
796
- "loss": 5.4662,
797
- "step": 650
798
- },
799
- {
800
- "epoch": 0.18,
801
- "learning_rate": 3.2500000000000002e-06,
802
- "loss": 5.144,
803
- "step": 655
804
- },
805
- {
806
- "epoch": 0.18,
807
- "learning_rate": 3.2750000000000004e-06,
808
- "loss": 5.4199,
809
- "step": 660
810
- },
811
- {
812
- "epoch": 0.19,
813
- "learning_rate": 3.3000000000000006e-06,
814
- "loss": 6.5677,
815
- "step": 665
816
- },
817
- {
818
- "epoch": 0.19,
819
- "learning_rate": 3.3250000000000004e-06,
820
- "loss": 6.168,
821
- "step": 670
822
- },
823
- {
824
- "epoch": 0.19,
825
- "learning_rate": 3.3500000000000005e-06,
826
- "loss": 6.3248,
827
- "step": 675
828
- },
829
- {
830
- "epoch": 0.19,
831
- "learning_rate": 3.3750000000000003e-06,
832
- "loss": 5.6306,
833
- "step": 680
834
- },
835
- {
836
- "epoch": 0.19,
837
- "learning_rate": 3.4000000000000005e-06,
838
- "loss": 4.4895,
839
- "step": 685
840
- },
841
- {
842
- "epoch": 0.19,
843
- "learning_rate": 3.4250000000000007e-06,
844
- "loss": 4.7549,
845
- "step": 690
846
- },
847
- {
848
- "epoch": 0.19,
849
- "learning_rate": 3.45e-06,
850
- "loss": 3.8521,
851
- "step": 695
852
- },
853
- {
854
- "epoch": 0.2,
855
- "learning_rate": 3.475e-06,
856
- "loss": 7.8289,
857
- "step": 700
858
- },
859
- {
860
- "epoch": 0.2,
861
- "learning_rate": 3.5e-06,
862
- "loss": 8.7039,
863
- "step": 705
864
- },
865
- {
866
- "epoch": 0.2,
867
- "learning_rate": 3.525e-06,
868
- "loss": 7.9723,
869
- "step": 710
870
- },
871
- {
872
- "epoch": 0.2,
873
- "learning_rate": 3.5500000000000003e-06,
874
- "loss": 7.4315,
875
- "step": 715
876
- },
877
- {
878
- "epoch": 0.2,
879
- "learning_rate": 3.575e-06,
880
- "loss": 5.6061,
881
- "step": 720
882
- },
883
- {
884
- "epoch": 0.2,
885
- "learning_rate": 3.6000000000000003e-06,
886
- "loss": 5.4848,
887
- "step": 725
888
- },
889
- {
890
- "epoch": 0.2,
891
- "learning_rate": 3.625e-06,
892
- "loss": 4.1425,
893
- "step": 730
894
- },
895
- {
896
- "epoch": 0.21,
897
- "learning_rate": 3.65e-06,
898
- "loss": 5.5031,
899
- "step": 735
900
- },
901
- {
902
- "epoch": 0.21,
903
- "learning_rate": 3.6750000000000004e-06,
904
- "loss": 4.4556,
905
- "step": 740
906
- },
907
- {
908
- "epoch": 0.21,
909
- "learning_rate": 3.7e-06,
910
- "loss": 5.9729,
911
- "step": 745
912
- },
913
- {
914
- "epoch": 0.21,
915
- "learning_rate": 3.7250000000000003e-06,
916
- "loss": 4.5424,
917
- "step": 750
918
- },
919
- {
920
- "epoch": 0.21,
921
- "learning_rate": 3.7500000000000005e-06,
922
- "loss": 7.1968,
923
- "step": 755
924
- },
925
- {
926
- "epoch": 0.21,
927
- "learning_rate": 3.7750000000000003e-06,
928
- "loss": 6.0854,
929
- "step": 760
930
- },
931
- {
932
- "epoch": 0.21,
933
- "learning_rate": 3.8000000000000005e-06,
934
- "loss": 5.1684,
935
- "step": 765
936
- },
937
- {
938
- "epoch": 0.22,
939
- "learning_rate": 3.825000000000001e-06,
940
- "loss": 4.8008,
941
- "step": 770
942
- },
943
- {
944
- "epoch": 0.22,
945
- "learning_rate": 3.85e-06,
946
- "loss": 5.2927,
947
- "step": 775
948
- },
949
- {
950
- "epoch": 0.22,
951
- "learning_rate": 3.875e-06,
952
- "loss": 4.1035,
953
- "step": 780
954
- },
955
- {
956
- "epoch": 0.22,
957
- "learning_rate": 3.900000000000001e-06,
958
- "loss": 4.083,
959
- "step": 785
960
- },
961
- {
962
- "epoch": 0.22,
963
- "learning_rate": 3.9250000000000005e-06,
964
- "loss": 4.2797,
965
- "step": 790
966
- },
967
- {
968
- "epoch": 0.22,
969
- "learning_rate": 3.95e-06,
970
- "loss": 6.4263,
971
- "step": 795
972
- },
973
- {
974
- "epoch": 0.22,
975
- "learning_rate": 3.975000000000001e-06,
976
- "loss": 6.5447,
977
- "step": 800
978
- },
979
- {
980
- "epoch": 0.22,
981
- "learning_rate": 4.000000000000001e-06,
982
- "loss": 5.9181,
983
- "step": 805
984
- },
985
- {
986
- "epoch": 0.23,
987
- "learning_rate": 4.0250000000000004e-06,
988
- "loss": 6.3199,
989
- "step": 810
990
- },
991
- {
992
- "epoch": 0.23,
993
- "learning_rate": 4.05e-06,
994
- "loss": 7.4045,
995
- "step": 815
996
- },
997
- {
998
- "epoch": 0.23,
999
- "learning_rate": 4.075e-06,
1000
- "loss": 4.7224,
1001
- "step": 820
1002
- },
1003
- {
1004
- "epoch": 0.23,
1005
- "learning_rate": 4.1e-06,
1006
- "loss": 4.3196,
1007
- "step": 825
1008
- },
1009
- {
1010
- "epoch": 0.23,
1011
- "learning_rate": 4.125e-06,
1012
- "loss": 6.7817,
1013
- "step": 830
1014
- },
1015
- {
1016
- "epoch": 0.23,
1017
- "learning_rate": 4.15e-06,
1018
- "loss": 3.9509,
1019
- "step": 835
1020
- },
1021
- {
1022
- "epoch": 0.23,
1023
- "learning_rate": 4.175e-06,
1024
- "loss": 5.2207,
1025
- "step": 840
1026
- },
1027
- {
1028
- "epoch": 0.24,
1029
- "learning_rate": 4.2000000000000004e-06,
1030
- "loss": 4.5517,
1031
- "step": 845
1032
- },
1033
- {
1034
- "epoch": 0.24,
1035
- "learning_rate": 4.225e-06,
1036
- "loss": 5.7441,
1037
- "step": 850
1038
- },
1039
- {
1040
- "epoch": 0.24,
1041
- "learning_rate": 4.25e-06,
1042
- "loss": 6.9507,
1043
- "step": 855
1044
- },
1045
- {
1046
- "epoch": 0.24,
1047
- "learning_rate": 4.2750000000000006e-06,
1048
- "loss": 4.8579,
1049
- "step": 860
1050
- },
1051
- {
1052
- "epoch": 0.24,
1053
- "learning_rate": 4.3e-06,
1054
- "loss": 5.6729,
1055
- "step": 865
1056
- },
1057
- {
1058
- "epoch": 0.24,
1059
- "learning_rate": 4.325e-06,
1060
- "loss": 5.7697,
1061
- "step": 870
1062
- },
1063
- {
1064
- "epoch": 0.24,
1065
- "learning_rate": 4.350000000000001e-06,
1066
- "loss": 4.7595,
1067
- "step": 875
1068
- },
1069
- {
1070
- "epoch": 0.25,
1071
- "learning_rate": 4.3750000000000005e-06,
1072
- "loss": 4.6507,
1073
- "step": 880
1074
- },
1075
- {
1076
- "epoch": 0.25,
1077
- "learning_rate": 4.4e-06,
1078
- "loss": 5.0327,
1079
- "step": 885
1080
- },
1081
- {
1082
- "epoch": 0.25,
1083
- "learning_rate": 4.425e-06,
1084
- "loss": 4.3302,
1085
- "step": 890
1086
- },
1087
- {
1088
- "epoch": 0.25,
1089
- "learning_rate": 4.450000000000001e-06,
1090
- "loss": 3.2291,
1091
- "step": 895
1092
- },
1093
- {
1094
- "epoch": 0.25,
1095
- "learning_rate": 4.475e-06,
1096
- "loss": 3.5748,
1097
- "step": 900
1098
- },
1099
- {
1100
- "epoch": 0.25,
1101
- "learning_rate": 4.5e-06,
1102
- "loss": 2.9844,
1103
- "step": 905
1104
- },
1105
- {
1106
- "epoch": 0.25,
1107
- "learning_rate": 4.525000000000001e-06,
1108
- "loss": 3.2969,
1109
- "step": 910
1110
- },
1111
- {
1112
- "epoch": 0.26,
1113
- "learning_rate": 4.5500000000000005e-06,
1114
- "loss": 3.2062,
1115
- "step": 915
1116
- },
1117
- {
1118
- "epoch": 0.26,
1119
- "learning_rate": 4.575e-06,
1120
- "loss": 3.1091,
1121
- "step": 920
1122
- },
1123
- {
1124
- "epoch": 0.26,
1125
- "learning_rate": 4.600000000000001e-06,
1126
- "loss": 3.288,
1127
- "step": 925
1128
- },
1129
- {
1130
- "epoch": 0.26,
1131
- "learning_rate": 4.625000000000001e-06,
1132
- "loss": 2.8966,
1133
- "step": 930
1134
- },
1135
- {
1136
- "epoch": 0.26,
1137
- "learning_rate": 4.65e-06,
1138
- "loss": 2.9091,
1139
- "step": 935
1140
- },
1141
- {
1142
- "epoch": 0.26,
1143
- "learning_rate": 4.675000000000001e-06,
1144
- "loss": 3.212,
1145
- "step": 940
1146
- },
1147
- {
1148
- "epoch": 0.26,
1149
- "learning_rate": 4.7e-06,
1150
- "loss": 2.8426,
1151
- "step": 945
1152
- },
1153
- {
1154
- "epoch": 0.27,
1155
- "learning_rate": 4.7250000000000005e-06,
1156
- "loss": 3.1961,
1157
- "step": 950
1158
- },
1159
- {
1160
- "epoch": 0.27,
1161
- "learning_rate": 4.75e-06,
1162
- "loss": 2.9483,
1163
- "step": 955
1164
- },
1165
- {
1166
- "epoch": 0.27,
1167
- "learning_rate": 4.775e-06,
1168
- "loss": 2.9817,
1169
- "step": 960
1170
- },
1171
- {
1172
- "epoch": 0.27,
1173
- "learning_rate": 4.800000000000001e-06,
1174
- "loss": 3.0139,
1175
- "step": 965
1176
- },
1177
- {
1178
- "epoch": 0.27,
1179
- "learning_rate": 4.825e-06,
1180
- "loss": 2.8438,
1181
- "step": 970
1182
- },
1183
- {
1184
- "epoch": 0.27,
1185
- "learning_rate": 4.85e-06,
1186
- "loss": 3.008,
1187
- "step": 975
1188
- },
1189
- {
1190
- "epoch": 0.27,
1191
- "learning_rate": 4.875e-06,
1192
- "loss": 2.9319,
1193
- "step": 980
1194
- },
1195
- {
1196
- "epoch": 0.28,
1197
- "learning_rate": 4.9000000000000005e-06,
1198
- "loss": 2.8704,
1199
- "step": 985
1200
- },
1201
- {
1202
- "epoch": 0.28,
1203
- "learning_rate": 4.925e-06,
1204
- "loss": 3.0068,
1205
- "step": 990
1206
- },
1207
- {
1208
- "epoch": 0.28,
1209
- "learning_rate": 4.95e-06,
1210
- "loss": 2.8494,
1211
- "step": 995
1212
- },
1213
- {
1214
- "epoch": 0.28,
1215
- "learning_rate": 4.975000000000001e-06,
1216
- "loss": 2.8652,
1217
- "step": 1000
1218
- },
1219
- {
1220
- "epoch": 0.28,
1221
- "eval_loss": 2.881692886352539,
1222
- "eval_runtime": 141.6494,
1223
- "eval_samples_per_second": 8.429,
1224
- "eval_steps_per_second": 1.059,
1225
- "eval_wer": 1.0,
1226
- "step": 1000
1227
- },
1228
- {
1229
- "epoch": 0.28,
1230
- "learning_rate": 5e-06,
1231
- "loss": 2.9299,
1232
- "step": 1005
1233
- },
1234
- {
1235
- "epoch": 0.28,
1236
- "learning_rate": 4.998122559327126e-06,
1237
- "loss": 2.855,
1238
- "step": 1010
1239
- },
1240
- {
1241
- "epoch": 0.28,
1242
- "learning_rate": 4.996245118654251e-06,
1243
- "loss": 2.8683,
1244
- "step": 1015
1245
- },
1246
- {
1247
- "epoch": 0.28,
1248
- "learning_rate": 4.994367677981376e-06,
1249
- "loss": 2.8563,
1250
- "step": 1020
1251
- },
1252
- {
1253
- "epoch": 0.29,
1254
- "learning_rate": 4.992490237308501e-06,
1255
- "loss": 2.9462,
1256
- "step": 1025
1257
- },
1258
- {
1259
- "epoch": 0.29,
1260
- "learning_rate": 4.990612796635627e-06,
1261
- "loss": 2.8713,
1262
- "step": 1030
1263
- },
1264
- {
1265
- "epoch": 0.29,
1266
- "learning_rate": 4.988735355962752e-06,
1267
- "loss": 2.9464,
1268
- "step": 1035
1269
- },
1270
- {
1271
- "epoch": 0.29,
1272
- "learning_rate": 4.986857915289877e-06,
1273
- "loss": 2.8587,
1274
- "step": 1040
1275
- },
1276
- {
1277
- "epoch": 0.29,
1278
- "learning_rate": 4.984980474617002e-06,
1279
- "loss": 2.9491,
1280
- "step": 1045
1281
- },
1282
- {
1283
- "epoch": 0.29,
1284
- "learning_rate": 4.9831030339441276e-06,
1285
- "loss": 2.8847,
1286
- "step": 1050
1287
- },
1288
- {
1289
- "epoch": 0.29,
1290
- "learning_rate": 4.981225593271253e-06,
1291
- "loss": 2.9443,
1292
- "step": 1055
1293
- },
1294
- {
1295
- "epoch": 0.3,
1296
- "learning_rate": 4.979348152598378e-06,
1297
- "loss": 2.9,
1298
- "step": 1060
1299
- },
1300
- {
1301
- "epoch": 0.3,
1302
- "learning_rate": 4.977470711925503e-06,
1303
- "loss": 2.8448,
1304
- "step": 1065
1305
- },
1306
- {
1307
- "epoch": 0.3,
1308
- "learning_rate": 4.9755932712526285e-06,
1309
- "loss": 2.862,
1310
- "step": 1070
1311
- },
1312
- {
1313
- "epoch": 0.3,
1314
- "learning_rate": 4.973715830579754e-06,
1315
- "loss": 2.8782,
1316
- "step": 1075
1317
- },
1318
- {
1319
- "epoch": 0.3,
1320
- "learning_rate": 4.97183838990688e-06,
1321
- "loss": 2.866,
1322
- "step": 1080
1323
- },
1324
- {
1325
- "epoch": 0.3,
1326
- "learning_rate": 4.969960949234004e-06,
1327
- "loss": 2.9187,
1328
- "step": 1085
1329
- },
1330
- {
1331
- "epoch": 0.3,
1332
- "learning_rate": 4.96808350856113e-06,
1333
- "loss": 2.8627,
1334
- "step": 1090
1335
- },
1336
- {
1337
- "epoch": 0.31,
1338
- "learning_rate": 4.966206067888255e-06,
1339
- "loss": 2.8636,
1340
- "step": 1095
1341
- },
1342
- {
1343
- "epoch": 0.31,
1344
- "learning_rate": 4.964328627215381e-06,
1345
- "loss": 2.9201,
1346
- "step": 1100
1347
- },
1348
- {
1349
- "epoch": 0.31,
1350
- "learning_rate": 4.962451186542506e-06,
1351
- "loss": 2.9078,
1352
- "step": 1105
1353
- },
1354
- {
1355
- "epoch": 0.31,
1356
- "learning_rate": 4.9605737458696305e-06,
1357
- "loss": 2.8853,
1358
- "step": 1110
1359
- },
1360
- {
1361
- "epoch": 0.31,
1362
- "learning_rate": 4.9586963051967565e-06,
1363
- "loss": 2.8612,
1364
- "step": 1115
1365
- },
1366
- {
1367
- "epoch": 0.31,
1368
- "learning_rate": 4.956818864523881e-06,
1369
- "loss": 2.8607,
1370
- "step": 1120
1371
- },
1372
- {
1373
- "epoch": 0.31,
1374
- "learning_rate": 4.954941423851007e-06,
1375
- "loss": 2.845,
1376
- "step": 1125
1377
- },
1378
- {
1379
- "epoch": 0.32,
1380
- "learning_rate": 4.953063983178132e-06,
1381
- "loss": 2.8778,
1382
- "step": 1130
1383
- },
1384
- {
1385
- "epoch": 0.32,
1386
- "learning_rate": 4.9511865425052575e-06,
1387
- "loss": 3.1876,
1388
- "step": 1135
1389
- },
1390
- {
1391
- "epoch": 0.32,
1392
- "learning_rate": 4.949309101832383e-06,
1393
- "loss": 2.8603,
1394
- "step": 1140
1395
- },
1396
- {
1397
- "epoch": 0.32,
1398
- "learning_rate": 4.947431661159508e-06,
1399
- "loss": 2.8403,
1400
- "step": 1145
1401
- },
1402
- {
1403
- "epoch": 0.32,
1404
- "learning_rate": 4.945554220486633e-06,
1405
- "loss": 2.8791,
1406
- "step": 1150
1407
- },
1408
- {
1409
- "epoch": 0.32,
1410
- "learning_rate": 4.9436767798137585e-06,
1411
- "loss": 2.8604,
1412
- "step": 1155
1413
- },
1414
- {
1415
- "epoch": 0.32,
1416
- "learning_rate": 4.941799339140884e-06,
1417
- "loss": 2.8595,
1418
- "step": 1160
1419
- },
1420
- {
1421
- "epoch": 0.33,
1422
- "learning_rate": 4.939921898468009e-06,
1423
- "loss": 2.9011,
1424
- "step": 1165
1425
- },
1426
- {
1427
- "epoch": 0.33,
1428
- "learning_rate": 4.938044457795134e-06,
1429
- "loss": 2.86,
1430
- "step": 1170
1431
- },
1432
- {
1433
- "epoch": 0.33,
1434
- "learning_rate": 4.936167017122259e-06,
1435
- "loss": 2.8583,
1436
- "step": 1175
1437
- },
1438
- {
1439
- "epoch": 0.33,
1440
- "learning_rate": 4.934289576449385e-06,
1441
- "loss": 2.9274,
1442
- "step": 1180
1443
- },
1444
- {
1445
- "epoch": 0.33,
1446
- "learning_rate": 4.93241213577651e-06,
1447
- "loss": 2.9472,
1448
- "step": 1185
1449
- },
1450
- {
1451
- "epoch": 0.33,
1452
- "learning_rate": 4.930534695103635e-06,
1453
- "loss": 3.4067,
1454
- "step": 1190
1455
- },
1456
- {
1457
- "epoch": 0.33,
1458
- "learning_rate": 4.92865725443076e-06,
1459
- "loss": 2.9363,
1460
- "step": 1195
1461
- },
1462
- {
1463
- "epoch": 0.34,
1464
- "learning_rate": 4.926779813757886e-06,
1465
- "loss": 2.8608,
1466
- "step": 1200
1467
- },
1468
- {
1469
- "epoch": 0.34,
1470
- "learning_rate": 4.924902373085011e-06,
1471
- "loss": 3.356,
1472
- "step": 1205
1473
- },
1474
- {
1475
- "epoch": 0.34,
1476
- "learning_rate": 4.923024932412136e-06,
1477
- "loss": 2.8678,
1478
- "step": 1210
1479
- },
1480
- {
1481
- "epoch": 0.34,
1482
- "learning_rate": 4.921147491739261e-06,
1483
- "loss": 2.8664,
1484
- "step": 1215
1485
- },
1486
- {
1487
- "epoch": 0.34,
1488
- "learning_rate": 4.919270051066387e-06,
1489
- "loss": 2.8897,
1490
- "step": 1220
1491
- },
1492
- {
1493
- "epoch": 0.34,
1494
- "learning_rate": 4.917392610393512e-06,
1495
- "loss": 2.8783,
1496
- "step": 1225
1497
- },
1498
- {
1499
- "epoch": 0.34,
1500
- "learning_rate": 4.915515169720637e-06,
1501
- "loss": 2.8886,
1502
- "step": 1230
1503
- },
1504
- {
1505
- "epoch": 0.35,
1506
- "learning_rate": 4.913637729047762e-06,
1507
- "loss": 2.8534,
1508
- "step": 1235
1509
- },
1510
- {
1511
- "epoch": 0.35,
1512
- "learning_rate": 4.9117602883748875e-06,
1513
- "loss": 2.8322,
1514
- "step": 1240
1515
- },
1516
- {
1517
- "epoch": 0.35,
1518
- "learning_rate": 4.909882847702014e-06,
1519
- "loss": 2.8148,
1520
- "step": 1245
1521
- },
1522
- {
1523
- "epoch": 0.35,
1524
- "learning_rate": 4.908005407029138e-06,
1525
- "loss": 2.832,
1526
- "step": 1250
1527
- },
1528
- {
1529
- "epoch": 0.35,
1530
- "learning_rate": 4.906127966356263e-06,
1531
- "loss": 2.8783,
1532
- "step": 1255
1533
- },
1534
- {
1535
- "epoch": 0.35,
1536
- "learning_rate": 4.9042505256833885e-06,
1537
- "loss": 2.8255,
1538
- "step": 1260
1539
- },
1540
- {
1541
- "epoch": 0.35,
1542
- "learning_rate": 4.902373085010514e-06,
1543
- "loss": 2.8893,
1544
- "step": 1265
1545
- },
1546
- {
1547
- "epoch": 0.35,
1548
- "learning_rate": 4.90049564433764e-06,
1549
- "loss": 2.9132,
1550
- "step": 1270
1551
- },
1552
- {
1553
- "epoch": 0.36,
1554
- "learning_rate": 4.898618203664764e-06,
1555
- "loss": 2.8366,
1556
- "step": 1275
1557
- },
1558
- {
1559
- "epoch": 0.36,
1560
- "learning_rate": 4.89674076299189e-06,
1561
- "loss": 2.8717,
1562
- "step": 1280
1563
- },
1564
- {
1565
- "epoch": 0.36,
1566
- "learning_rate": 4.894863322319015e-06,
1567
- "loss": 3.0332,
1568
- "step": 1285
1569
- },
1570
- {
1571
- "epoch": 0.36,
1572
- "learning_rate": 4.892985881646141e-06,
1573
- "loss": 2.8806,
1574
- "step": 1290
1575
- },
1576
- {
1577
- "epoch": 0.36,
1578
- "learning_rate": 4.891108440973266e-06,
1579
- "loss": 2.8377,
1580
- "step": 1295
1581
- },
1582
- {
1583
- "epoch": 0.36,
1584
- "learning_rate": 4.88923100030039e-06,
1585
- "loss": 2.8136,
1586
- "step": 1300
1587
- },
1588
- {
1589
- "epoch": 0.36,
1590
- "learning_rate": 4.8873535596275165e-06,
1591
- "loss": 2.9869,
1592
- "step": 1305
1593
- },
1594
- {
1595
- "epoch": 0.37,
1596
- "learning_rate": 4.885476118954641e-06,
1597
- "loss": 2.8812,
1598
- "step": 1310
1599
- },
1600
- {
1601
- "epoch": 0.37,
1602
- "learning_rate": 4.883598678281767e-06,
1603
- "loss": 2.83,
1604
- "step": 1315
1605
- },
1606
- {
1607
- "epoch": 0.37,
1608
- "learning_rate": 4.881721237608892e-06,
1609
- "loss": 2.8324,
1610
- "step": 1320
1611
- },
1612
- {
1613
- "epoch": 0.37,
1614
- "learning_rate": 4.8798437969360175e-06,
1615
- "loss": 2.829,
1616
- "step": 1325
1617
- },
1618
- {
1619
- "epoch": 0.37,
1620
- "learning_rate": 4.877966356263143e-06,
1621
- "loss": 2.834,
1622
- "step": 1330
1623
- },
1624
- {
1625
- "epoch": 0.37,
1626
- "learning_rate": 4.876088915590268e-06,
1627
- "loss": 3.1544,
1628
- "step": 1335
1629
- },
1630
- {
1631
- "epoch": 0.37,
1632
- "learning_rate": 4.874211474917393e-06,
1633
- "loss": 2.8126,
1634
- "step": 1340
1635
- },
1636
- {
1637
- "epoch": 0.38,
1638
- "learning_rate": 4.8723340342445184e-06,
1639
- "loss": 2.8915,
1640
- "step": 1345
1641
- },
1642
- {
1643
- "epoch": 0.38,
1644
- "learning_rate": 4.870456593571644e-06,
1645
- "loss": 3.0785,
1646
- "step": 1350
1647
- },
1648
- {
1649
- "epoch": 0.38,
1650
- "learning_rate": 4.868579152898769e-06,
1651
- "loss": 2.8628,
1652
- "step": 1355
1653
- },
1654
- {
1655
- "epoch": 0.38,
1656
- "learning_rate": 4.866701712225894e-06,
1657
- "loss": 2.8263,
1658
- "step": 1360
1659
- },
1660
- {
1661
- "epoch": 0.38,
1662
- "learning_rate": 4.864824271553019e-06,
1663
- "loss": 2.8055,
1664
- "step": 1365
1665
- },
1666
- {
1667
- "epoch": 0.38,
1668
- "learning_rate": 4.862946830880145e-06,
1669
- "loss": 2.837,
1670
- "step": 1370
1671
- },
1672
- {
1673
- "epoch": 0.38,
1674
- "learning_rate": 4.86106939020727e-06,
1675
- "loss": 2.8109,
1676
- "step": 1375
1677
- },
1678
- {
1679
- "epoch": 0.39,
1680
- "learning_rate": 4.859191949534395e-06,
1681
- "loss": 2.8313,
1682
- "step": 1380
1683
- },
1684
- {
1685
- "epoch": 0.39,
1686
- "learning_rate": 4.85731450886152e-06,
1687
- "loss": 2.8085,
1688
- "step": 1385
1689
- },
1690
- {
1691
- "epoch": 0.39,
1692
- "learning_rate": 4.855437068188646e-06,
1693
- "loss": 2.9006,
1694
- "step": 1390
1695
- },
1696
- {
1697
- "epoch": 0.39,
1698
- "learning_rate": 4.853559627515771e-06,
1699
- "loss": 2.76,
1700
- "step": 1395
1701
- },
1702
- {
1703
- "epoch": 0.39,
1704
- "learning_rate": 4.851682186842896e-06,
1705
- "loss": 2.7712,
1706
- "step": 1400
1707
- },
1708
- {
1709
- "epoch": 0.39,
1710
- "learning_rate": 4.849804746170021e-06,
1711
- "loss": 2.8082,
1712
- "step": 1405
1713
- },
1714
- {
1715
- "epoch": 0.39,
1716
- "learning_rate": 4.8479273054971466e-06,
1717
- "loss": 2.9101,
1718
- "step": 1410
1719
- },
1720
- {
1721
- "epoch": 0.4,
1722
- "learning_rate": 4.846049864824272e-06,
1723
- "loss": 2.8121,
1724
- "step": 1415
1725
- },
1726
- {
1727
- "epoch": 0.4,
1728
- "learning_rate": 4.844172424151397e-06,
1729
- "loss": 2.7971,
1730
- "step": 1420
1731
- },
1732
- {
1733
- "epoch": 0.4,
1734
- "learning_rate": 4.842294983478522e-06,
1735
- "loss": 2.8376,
1736
- "step": 1425
1737
- },
1738
- {
1739
- "epoch": 0.4,
1740
- "learning_rate": 4.8404175428056475e-06,
1741
- "loss": 2.7967,
1742
- "step": 1430
1743
- },
1744
- {
1745
- "epoch": 0.4,
1746
- "learning_rate": 4.838540102132774e-06,
1747
- "loss": 2.8066,
1748
- "step": 1435
1749
- },
1750
- {
1751
- "epoch": 0.4,
1752
- "learning_rate": 4.836662661459898e-06,
1753
- "loss": 2.7832,
1754
- "step": 1440
1755
- },
1756
- {
1757
- "epoch": 0.4,
1758
- "learning_rate": 4.834785220787023e-06,
1759
- "loss": 2.7981,
1760
- "step": 1445
1761
- },
1762
- {
1763
- "epoch": 0.41,
1764
- "learning_rate": 4.8329077801141485e-06,
1765
- "loss": 2.743,
1766
- "step": 1450
1767
- },
1768
- {
1769
- "epoch": 0.41,
1770
- "learning_rate": 4.831030339441274e-06,
1771
- "loss": 2.8466,
1772
- "step": 1455
1773
- },
1774
- {
1775
- "epoch": 0.41,
1776
- "learning_rate": 4.8291528987684e-06,
1777
- "loss": 2.8222,
1778
- "step": 1460
1779
- },
1780
- {
1781
- "epoch": 0.41,
1782
- "learning_rate": 4.827275458095524e-06,
1783
- "loss": 2.802,
1784
- "step": 1465
1785
- },
1786
- {
1787
- "epoch": 0.41,
1788
- "learning_rate": 4.82539801742265e-06,
1789
- "loss": 2.6981,
1790
- "step": 1470
1791
- },
1792
- {
1793
- "epoch": 0.41,
1794
- "learning_rate": 4.823520576749775e-06,
1795
- "loss": 2.7656,
1796
- "step": 1475
1797
- },
1798
- {
1799
- "epoch": 0.41,
1800
- "learning_rate": 4.821643136076901e-06,
1801
- "loss": 2.6938,
1802
- "step": 1480
1803
- },
1804
- {
1805
- "epoch": 0.41,
1806
- "learning_rate": 4.819765695404026e-06,
1807
- "loss": 2.7163,
1808
- "step": 1485
1809
- },
1810
- {
1811
- "epoch": 0.42,
1812
- "learning_rate": 4.81788825473115e-06,
1813
- "loss": 2.7057,
1814
- "step": 1490
1815
- },
1816
- {
1817
- "epoch": 0.42,
1818
- "learning_rate": 4.8160108140582765e-06,
1819
- "loss": 2.7461,
1820
- "step": 1495
1821
- },
1822
- {
1823
- "epoch": 0.42,
1824
- "learning_rate": 4.814133373385401e-06,
1825
- "loss": 2.7789,
1826
- "step": 1500
1827
- },
1828
- {
1829
- "epoch": 0.42,
1830
- "eval_loss": 2.633471965789795,
1831
- "eval_runtime": 154.8101,
1832
- "eval_samples_per_second": 7.713,
1833
- "eval_steps_per_second": 0.969,
1834
- "eval_wer": 1.0,
1835
- "step": 1500
1836
- },
1837
- {
1838
- "epoch": 0.42,
1839
- "learning_rate": 4.812255932712527e-06,
1840
- "loss": 2.7559,
1841
- "step": 1505
1842
- },
1843
- {
1844
- "epoch": 0.42,
1845
- "learning_rate": 4.810378492039652e-06,
1846
- "loss": 3.0039,
1847
- "step": 1510
1848
- },
1849
- {
1850
- "epoch": 0.42,
1851
- "learning_rate": 4.8085010513667774e-06,
1852
- "loss": 2.6165,
1853
- "step": 1515
1854
- },
1855
- {
1856
- "epoch": 0.42,
1857
- "learning_rate": 4.806623610693903e-06,
1858
- "loss": 3.5613,
1859
- "step": 1520
1860
- },
1861
- {
1862
- "epoch": 0.43,
1863
- "learning_rate": 4.804746170021028e-06,
1864
- "loss": 2.5695,
1865
- "step": 1525
1866
- },
1867
- {
1868
- "epoch": 0.43,
1869
- "learning_rate": 4.802868729348153e-06,
1870
- "loss": 2.6025,
1871
- "step": 1530
1872
- },
1873
- {
1874
- "epoch": 0.43,
1875
- "learning_rate": 4.800991288675278e-06,
1876
- "loss": 2.6736,
1877
- "step": 1535
1878
- },
1879
- {
1880
- "epoch": 0.43,
1881
- "learning_rate": 4.799113848002404e-06,
1882
- "loss": 2.5119,
1883
- "step": 1540
1884
- },
1885
- {
1886
- "epoch": 0.43,
1887
- "learning_rate": 4.797236407329529e-06,
1888
- "loss": 2.7166,
1889
- "step": 1545
1890
- },
1891
- {
1892
- "epoch": 0.43,
1893
- "learning_rate": 4.795358966656654e-06,
1894
- "loss": 2.5151,
1895
- "step": 1550
1896
- },
1897
- {
1898
- "epoch": 0.43,
1899
- "learning_rate": 4.793481525983779e-06,
1900
- "loss": 2.5859,
1901
- "step": 1555
1902
- },
1903
- {
1904
- "epoch": 0.44,
1905
- "learning_rate": 4.791604085310905e-06,
1906
- "loss": 2.4956,
1907
- "step": 1560
1908
- },
1909
- {
1910
- "epoch": 0.44,
1911
- "learning_rate": 4.78972664463803e-06,
1912
- "loss": 2.638,
1913
- "step": 1565
1914
- },
1915
- {
1916
- "epoch": 0.44,
1917
- "learning_rate": 4.787849203965155e-06,
1918
- "loss": 2.5352,
1919
- "step": 1570
1920
- },
1921
- {
1922
- "epoch": 0.44,
1923
- "learning_rate": 4.78597176329228e-06,
1924
- "loss": 2.4668,
1925
- "step": 1575
1926
- },
1927
- {
1928
- "epoch": 0.44,
1929
- "learning_rate": 4.7840943226194056e-06,
1930
- "loss": 2.5222,
1931
- "step": 1580
1932
- },
1933
- {
1934
- "epoch": 0.44,
1935
- "learning_rate": 4.782216881946531e-06,
1936
- "loss": 2.6859,
1937
- "step": 1585
1938
- },
1939
- {
1940
- "epoch": 0.44,
1941
- "learning_rate": 4.780339441273656e-06,
1942
- "loss": 2.4609,
1943
- "step": 1590
1944
- },
1945
- {
1946
- "epoch": 0.45,
1947
- "learning_rate": 4.778462000600781e-06,
1948
- "loss": 2.4346,
1949
- "step": 1595
1950
- },
1951
- {
1952
- "epoch": 0.45,
1953
- "learning_rate": 4.7765845599279065e-06,
1954
- "loss": 2.4685,
1955
- "step": 1600
1956
- },
1957
- {
1958
- "epoch": 0.45,
1959
- "learning_rate": 4.774707119255032e-06,
1960
- "loss": 2.4568,
1961
- "step": 1605
1962
- },
1963
- {
1964
- "epoch": 0.45,
1965
- "learning_rate": 4.772829678582157e-06,
1966
- "loss": 2.2941,
1967
- "step": 1610
1968
- },
1969
- {
1970
- "epoch": 0.45,
1971
- "learning_rate": 4.770952237909282e-06,
1972
- "loss": 2.2794,
1973
- "step": 1615
1974
- },
1975
- {
1976
- "epoch": 0.45,
1977
- "learning_rate": 4.7690747972364075e-06,
1978
- "loss": 2.4405,
1979
- "step": 1620
1980
- },
1981
- {
1982
- "epoch": 0.45,
1983
- "learning_rate": 4.7671973565635336e-06,
1984
- "loss": 2.3013,
1985
- "step": 1625
1986
- },
1987
- {
1988
- "epoch": 0.46,
1989
- "learning_rate": 4.765319915890658e-06,
1990
- "loss": 2.2069,
1991
- "step": 1630
1992
- },
1993
- {
1994
- "epoch": 0.46,
1995
- "learning_rate": 4.763442475217783e-06,
1996
- "loss": 2.1934,
1997
- "step": 1635
1998
- },
1999
- {
2000
- "epoch": 0.46,
2001
- "learning_rate": 4.7615650345449084e-06,
2002
- "loss": 2.1879,
2003
- "step": 1640
2004
- },
2005
- {
2006
- "epoch": 0.46,
2007
- "learning_rate": 4.759687593872034e-06,
2008
- "loss": 2.1863,
2009
- "step": 1645
2010
- },
2011
- {
2012
- "epoch": 0.46,
2013
- "learning_rate": 4.75781015319916e-06,
2014
- "loss": 2.2998,
2015
- "step": 1650
2016
- },
2017
- {
2018
- "epoch": 0.46,
2019
- "learning_rate": 4.755932712526284e-06,
2020
- "loss": 2.3798,
2021
- "step": 1655
2022
- },
2023
- {
2024
- "epoch": 0.46,
2025
- "learning_rate": 4.75405527185341e-06,
2026
- "loss": 2.0779,
2027
- "step": 1660
2028
- },
2029
- {
2030
- "epoch": 0.47,
2031
- "learning_rate": 4.752177831180535e-06,
2032
- "loss": 2.4298,
2033
- "step": 1665
2034
- },
2035
- {
2036
- "epoch": 0.47,
2037
- "learning_rate": 4.750300390507661e-06,
2038
- "loss": 2.0593,
2039
- "step": 1670
2040
- },
2041
- {
2042
- "epoch": 0.47,
2043
- "learning_rate": 4.748422949834786e-06,
2044
- "loss": 2.0854,
2045
- "step": 1675
2046
- },
2047
- {
2048
- "epoch": 0.47,
2049
- "learning_rate": 4.746545509161911e-06,
2050
- "loss": 2.0465,
2051
- "step": 1680
2052
- },
2053
- {
2054
- "epoch": 0.47,
2055
- "learning_rate": 4.7446680684890365e-06,
2056
- "loss": 2.1006,
2057
- "step": 1685
2058
- },
2059
- {
2060
- "epoch": 0.47,
2061
- "learning_rate": 4.742790627816161e-06,
2062
- "loss": 2.1144,
2063
- "step": 1690
2064
- },
2065
- {
2066
- "epoch": 0.47,
2067
- "learning_rate": 4.740913187143287e-06,
2068
- "loss": 2.355,
2069
- "step": 1695
2070
- },
2071
- {
2072
- "epoch": 0.47,
2073
- "learning_rate": 4.739035746470412e-06,
2074
- "loss": 2.0663,
2075
- "step": 1700
2076
- },
2077
- {
2078
- "epoch": 0.48,
2079
- "learning_rate": 4.737158305797537e-06,
2080
- "loss": 1.9655,
2081
- "step": 1705
2082
- },
2083
- {
2084
- "epoch": 0.48,
2085
- "learning_rate": 4.735280865124663e-06,
2086
- "loss": 1.9764,
2087
- "step": 1710
2088
- },
2089
- {
2090
- "epoch": 0.48,
2091
- "learning_rate": 4.733403424451788e-06,
2092
- "loss": 2.1639,
2093
- "step": 1715
2094
- },
2095
- {
2096
- "epoch": 0.48,
2097
- "learning_rate": 4.731525983778913e-06,
2098
- "loss": 1.9601,
2099
- "step": 1720
2100
- },
2101
- {
2102
- "epoch": 0.48,
2103
- "learning_rate": 4.729648543106038e-06,
2104
- "loss": 2.0582,
2105
- "step": 1725
2106
- },
2107
- {
2108
- "epoch": 0.48,
2109
- "learning_rate": 4.727771102433164e-06,
2110
- "loss": 1.8999,
2111
- "step": 1730
2112
- },
2113
- {
2114
- "epoch": 0.48,
2115
- "learning_rate": 4.725893661760289e-06,
2116
- "loss": 2.1299,
2117
- "step": 1735
2118
- },
2119
- {
2120
- "epoch": 0.49,
2121
- "learning_rate": 4.724016221087414e-06,
2122
- "loss": 1.9507,
2123
- "step": 1740
2124
- },
2125
- {
2126
- "epoch": 0.49,
2127
- "learning_rate": 4.722138780414539e-06,
2128
- "loss": 1.9841,
2129
- "step": 1745
2130
- },
2131
- {
2132
- "epoch": 0.49,
2133
- "learning_rate": 4.7202613397416646e-06,
2134
- "loss": 2.2819,
2135
- "step": 1750
2136
- },
2137
- {
2138
- "epoch": 0.49,
2139
- "learning_rate": 4.71838389906879e-06,
2140
- "loss": 2.0013,
2141
- "step": 1755
2142
- },
2143
- {
2144
- "epoch": 0.49,
2145
- "learning_rate": 4.716506458395915e-06,
2146
- "loss": 1.8868,
2147
- "step": 1760
2148
- },
2149
- {
2150
- "epoch": 0.49,
2151
- "learning_rate": 4.71462901772304e-06,
2152
- "loss": 1.8533,
2153
- "step": 1765
2154
- },
2155
- {
2156
- "epoch": 0.49,
2157
- "learning_rate": 4.7127515770501655e-06,
2158
- "loss": 2.3557,
2159
- "step": 1770
2160
- },
2161
- {
2162
- "epoch": 0.5,
2163
- "learning_rate": 4.710874136377291e-06,
2164
- "loss": 1.7171,
2165
- "step": 1775
2166
- },
2167
- {
2168
- "epoch": 0.5,
2169
- "learning_rate": 4.708996695704416e-06,
2170
- "loss": 1.88,
2171
- "step": 1780
2172
- },
2173
- {
2174
- "epoch": 0.5,
2175
- "learning_rate": 4.707119255031541e-06,
2176
- "loss": 2.1387,
2177
- "step": 1785
2178
- },
2179
- {
2180
- "epoch": 0.5,
2181
- "learning_rate": 4.7052418143586665e-06,
2182
- "loss": 1.8137,
2183
- "step": 1790
2184
- },
2185
- {
2186
- "epoch": 0.5,
2187
- "learning_rate": 4.703364373685792e-06,
2188
- "loss": 1.8639,
2189
- "step": 1795
2190
- },
2191
- {
2192
- "epoch": 0.5,
2193
- "learning_rate": 4.701486933012917e-06,
2194
- "loss": 2.2175,
2195
- "step": 1800
2196
- },
2197
- {
2198
- "epoch": 0.5,
2199
- "learning_rate": 4.699609492340042e-06,
2200
- "loss": 1.7335,
2201
- "step": 1805
2202
- },
2203
- {
2204
- "epoch": 0.51,
2205
- "learning_rate": 4.6977320516671675e-06,
2206
- "loss": 2.1544,
2207
- "step": 1810
2208
- },
2209
- {
2210
- "epoch": 0.51,
2211
- "learning_rate": 4.6958546109942935e-06,
2212
- "loss": 2.1503,
2213
- "step": 1815
2214
- },
2215
- {
2216
- "epoch": 0.51,
2217
- "learning_rate": 4.693977170321418e-06,
2218
- "loss": 2.3852,
2219
- "step": 1820
2220
- },
2221
- {
2222
- "epoch": 0.51,
2223
- "learning_rate": 4.692099729648544e-06,
2224
- "loss": 1.9649,
2225
- "step": 1825
2226
- },
2227
- {
2228
- "epoch": 0.51,
2229
- "learning_rate": 4.690222288975668e-06,
2230
- "loss": 1.9533,
2231
- "step": 1830
2232
- },
2233
- {
2234
- "epoch": 0.51,
2235
- "learning_rate": 4.688344848302794e-06,
2236
- "loss": 1.7411,
2237
- "step": 1835
2238
- },
2239
- {
2240
- "epoch": 0.51,
2241
- "learning_rate": 4.68646740762992e-06,
2242
- "loss": 1.8008,
2243
- "step": 1840
2244
- },
2245
- {
2246
- "epoch": 0.52,
2247
- "learning_rate": 4.684589966957044e-06,
2248
- "loss": 2.0014,
2249
- "step": 1845
2250
- },
2251
- {
2252
- "epoch": 0.52,
2253
- "learning_rate": 4.68271252628417e-06,
2254
- "loss": 1.7596,
2255
- "step": 1850
2256
- },
2257
- {
2258
- "epoch": 0.52,
2259
- "learning_rate": 4.680835085611295e-06,
2260
- "loss": 1.6665,
2261
- "step": 1855
2262
- },
2263
- {
2264
- "epoch": 0.52,
2265
- "learning_rate": 4.678957644938421e-06,
2266
- "loss": 1.6203,
2267
- "step": 1860
2268
- },
2269
- {
2270
- "epoch": 0.52,
2271
- "learning_rate": 4.677080204265546e-06,
2272
- "loss": 2.5619,
2273
- "step": 1865
2274
- },
2275
- {
2276
- "epoch": 0.52,
2277
- "learning_rate": 4.675202763592671e-06,
2278
- "loss": 1.8384,
2279
- "step": 1870
2280
- },
2281
- {
2282
- "epoch": 0.52,
2283
- "learning_rate": 4.673325322919796e-06,
2284
- "loss": 1.7171,
2285
- "step": 1875
2286
- },
2287
- {
2288
- "epoch": 0.53,
2289
- "learning_rate": 4.671447882246921e-06,
2290
- "loss": 1.594,
2291
- "step": 1880
2292
- },
2293
- {
2294
- "epoch": 0.53,
2295
- "learning_rate": 4.669570441574047e-06,
2296
- "loss": 1.5099,
2297
- "step": 1885
2298
- },
2299
- {
2300
- "epoch": 0.53,
2301
- "learning_rate": 4.667693000901172e-06,
2302
- "loss": 1.5688,
2303
- "step": 1890
2304
- },
2305
- {
2306
- "epoch": 0.53,
2307
- "learning_rate": 4.665815560228297e-06,
2308
- "loss": 1.9561,
2309
- "step": 1895
2310
- },
2311
- {
2312
- "epoch": 0.53,
2313
- "learning_rate": 4.663938119555423e-06,
2314
- "loss": 1.7206,
2315
- "step": 1900
2316
- },
2317
- {
2318
- "epoch": 0.53,
2319
- "learning_rate": 4.662060678882548e-06,
2320
- "loss": 1.6661,
2321
- "step": 1905
2322
- },
2323
- {
2324
- "epoch": 0.53,
2325
- "learning_rate": 4.660183238209673e-06,
2326
- "loss": 1.5018,
2327
- "step": 1910
2328
- },
2329
- {
2330
- "epoch": 0.54,
2331
- "learning_rate": 4.658305797536798e-06,
2332
- "loss": 1.7274,
2333
- "step": 1915
2334
- },
2335
- {
2336
- "epoch": 0.54,
2337
- "learning_rate": 4.656428356863924e-06,
2338
- "loss": 1.5007,
2339
- "step": 1920
2340
- },
2341
- {
2342
- "epoch": 0.54,
2343
- "learning_rate": 4.654550916191049e-06,
2344
- "loss": 1.4662,
2345
- "step": 1925
2346
- },
2347
- {
2348
- "epoch": 0.54,
2349
- "learning_rate": 4.652673475518174e-06,
2350
- "loss": 1.4984,
2351
- "step": 1930
2352
- },
2353
- {
2354
- "epoch": 0.54,
2355
- "learning_rate": 4.650796034845299e-06,
2356
- "loss": 1.3899,
2357
- "step": 1935
2358
- },
2359
- {
2360
- "epoch": 0.54,
2361
- "learning_rate": 4.6489185941724245e-06,
2362
- "loss": 1.4297,
2363
- "step": 1940
2364
- },
2365
- {
2366
- "epoch": 0.54,
2367
- "learning_rate": 4.64704115349955e-06,
2368
- "loss": 1.7618,
2369
- "step": 1945
2370
- },
2371
- {
2372
- "epoch": 0.54,
2373
- "learning_rate": 4.645163712826675e-06,
2374
- "loss": 1.6308,
2375
- "step": 1950
2376
- },
2377
- {
2378
- "epoch": 0.55,
2379
- "learning_rate": 4.6432862721538e-06,
2380
- "loss": 1.6705,
2381
- "step": 1955
2382
- },
2383
- {
2384
- "epoch": 0.55,
2385
- "learning_rate": 4.6414088314809255e-06,
2386
- "loss": 1.6243,
2387
- "step": 1960
2388
- },
2389
- {
2390
- "epoch": 0.55,
2391
- "learning_rate": 4.639531390808051e-06,
2392
- "loss": 1.331,
2393
- "step": 1965
2394
- },
2395
- {
2396
- "epoch": 0.55,
2397
- "learning_rate": 4.637653950135176e-06,
2398
- "loss": 1.913,
2399
- "step": 1970
2400
- },
2401
- {
2402
- "epoch": 0.55,
2403
- "learning_rate": 4.635776509462301e-06,
2404
- "loss": 1.6333,
2405
- "step": 1975
2406
- },
2407
- {
2408
- "epoch": 0.55,
2409
- "learning_rate": 4.6338990687894265e-06,
2410
- "loss": 1.4198,
2411
- "step": 1980
2412
- },
2413
- {
2414
- "epoch": 0.55,
2415
- "learning_rate": 4.632021628116552e-06,
2416
- "loss": 2.0835,
2417
- "step": 1985
2418
- },
2419
- {
2420
- "epoch": 0.56,
2421
- "learning_rate": 4.630144187443677e-06,
2422
- "loss": 1.3693,
2423
- "step": 1990
2424
- },
2425
- {
2426
- "epoch": 0.56,
2427
- "learning_rate": 4.628266746770802e-06,
2428
- "loss": 1.5302,
2429
- "step": 1995
2430
- },
2431
- {
2432
- "epoch": 0.56,
2433
- "learning_rate": 4.6263893060979274e-06,
2434
- "loss": 1.8702,
2435
- "step": 2000
2436
- },
2437
- {
2438
- "epoch": 0.56,
2439
- "eval_loss": 1.0430355072021484,
2440
- "eval_runtime": 152.8141,
2441
- "eval_samples_per_second": 7.813,
2442
- "eval_steps_per_second": 0.982,
2443
- "eval_wer": 1.0,
2444
- "step": 2000
2445
- }
2446
- ],
2447
- "max_steps": 14316,
2448
- "num_train_epochs": 4,
2449
- "total_flos": 2.7924067835086464e+17,
2450
- "trial_name": null,
2451
- "trial_params": null
2452
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f05c6a97d90fd87e90a83547a454ff788a066d3d434f2903b0a9058eae5eb5d
3
- size 2991