TenzinGayche commited on
Commit
0d45503
β€’
1 Parent(s): e68c6d8

Training in progress, step 2500

Browse files
{checkpoint-1300 β†’ checkpoint-2400}/config.json RENAMED
File without changes
{checkpoint-1300 β†’ checkpoint-2400}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c1666c0a22afd442f94ad9a2fb031f26ef23fc68ac37235ef5e164c93d256d
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ec413dbf6dce3f66f0847998faf0727145082a010278c772b5854d5056f118
3
  size 2490946501
{checkpoint-1300 β†’ checkpoint-2400}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-2400}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:525988dbd5f8d7b2e059acdea4c0a6bc193e607eee39c65af058fb9d8d7a8539
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e4f13af4571dc4fc776d37a670bd005d274231a78ff352023ba5c4fadd8614
3
  size 1262344621
{checkpoint-1400 β†’ checkpoint-2400}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e195c0cbe62b1c3287e8c8ade092811fd748dee533be1e8e2f44adb3a467e7d
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ee801de1abf33cd6ebc4f0b38240fe2dfecaa66947acc7503d6a21ded7f76f
3
  size 14639
{checkpoint-1400 β†’ checkpoint-2400}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:beab7ac9352e542e0ab61f68c372e6cd98f7db9ae003386f401b5ce518bf6451
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcc907e7c7cdb7f74446d1478d9321af62972312f69212a9da63fbf093fd591
3
  size 557
{checkpoint-1300 β†’ checkpoint-2400}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25421b1b72b7f9c181ce4b25601b3a2bfa4015a8cac8ada8a61cd22ffcfde034
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3212d7bddbe681e165c9fd72ab4f4e8de8c824c01389aa4a534800797bb190ca
3
  size 627
{checkpoint-1400 β†’ checkpoint-2400}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.523809523809524,
5
- "global_step": 1400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -300,11 +300,221 @@
300
  "eval_samples_per_second": 23.669,
301
  "eval_steps_per_second": 2.979,
302
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  }
304
  ],
305
  "max_steps": 3675,
306
  "num_train_epochs": 25,
307
- "total_flos": 5.445185739163822e+18,
308
  "trial_name": null,
309
  "trial_params": null
310
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.3265306122449,
5
+ "global_step": 2400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
300
  "eval_samples_per_second": 23.669,
301
  "eval_steps_per_second": 2.979,
302
  "step": 1400
303
+ },
304
+ {
305
+ "epoch": 9.86,
306
+ "learning_rate": 0.00021051968503937006,
307
+ "loss": 0.4415,
308
+ "step": 1450
309
+ },
310
+ {
311
+ "epoch": 10.2,
312
+ "learning_rate": 0.00020579527559055118,
313
+ "loss": 0.417,
314
+ "step": 1500
315
+ },
316
+ {
317
+ "epoch": 10.2,
318
+ "eval_cer": 0.17156367193869232,
319
+ "eval_loss": 0.3833578824996948,
320
+ "eval_runtime": 24.455,
321
+ "eval_samples_per_second": 24.044,
322
+ "eval_steps_per_second": 3.026,
323
+ "step": 1500
324
+ },
325
+ {
326
+ "epoch": 10.54,
327
+ "learning_rate": 0.00020107086614173225,
328
+ "loss": 0.3959,
329
+ "step": 1550
330
+ },
331
+ {
332
+ "epoch": 10.88,
333
+ "learning_rate": 0.00019634645669291338,
334
+ "loss": 0.4074,
335
+ "step": 1600
336
+ },
337
+ {
338
+ "epoch": 10.88,
339
+ "eval_cer": 0.16301972933311593,
340
+ "eval_loss": 0.3626195192337036,
341
+ "eval_runtime": 24.5184,
342
+ "eval_samples_per_second": 23.982,
343
+ "eval_steps_per_second": 3.018,
344
+ "step": 1600
345
+ },
346
+ {
347
+ "epoch": 11.22,
348
+ "learning_rate": 0.00019162204724409448,
349
+ "loss": 0.3584,
350
+ "step": 1650
351
+ },
352
+ {
353
+ "epoch": 11.56,
354
+ "learning_rate": 0.0001868976377952756,
355
+ "loss": 0.3682,
356
+ "step": 1700
357
+ },
358
+ {
359
+ "epoch": 11.56,
360
+ "eval_cer": 0.15685635088863525,
361
+ "eval_loss": 0.36927542090415955,
362
+ "eval_runtime": 24.3751,
363
+ "eval_samples_per_second": 24.123,
364
+ "eval_steps_per_second": 3.036,
365
+ "step": 1700
366
+ },
367
+ {
368
+ "epoch": 11.9,
369
+ "learning_rate": 0.00018217322834645667,
370
+ "loss": 0.3455,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 12.24,
375
+ "learning_rate": 0.00017744881889763777,
376
+ "loss": 0.3245,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 12.24,
381
+ "eval_cer": 0.15816076960704387,
382
+ "eval_loss": 0.3740461468696594,
383
+ "eval_runtime": 24.7162,
384
+ "eval_samples_per_second": 23.79,
385
+ "eval_steps_per_second": 2.994,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
  }
514
  ],
515
  "max_steps": 3675,
516
  "num_train_epochs": 25,
517
+ "total_flos": 9.330801216434254e+18,
518
  "trial_name": null,
519
  "trial_params": null
520
  }
{checkpoint-1300 β†’ checkpoint-2400}/training_args.bin RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-2500}/config.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-2500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e5e6891b1564f4b96d98ec7f0ef55e8529b27ce8ed8b89164463ea2b9d4e22c
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c295d37c841b2d9274e1c2ff590e71f0a545a4dfe10d0f547dca1cf116d93838
3
  size 2490946501
{checkpoint-1400 β†’ checkpoint-2500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1300 β†’ checkpoint-2500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc90199dc2ecc37a23a362572fba266636f4b6878e99b8110257efd797480e7
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a368a48ff43ab4bc8e301dc5a41abb75b9de3b052aeffd6edcf4765b37523a8d
3
  size 1262344621
{checkpoint-1300 β†’ checkpoint-2500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e6254add3221c3bb1601a93aa46c1e8325da0f3ff08620c5c64a7e934c66208
3
- size 14575
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8575e4b2ab78e0afbc81dc9eacc30543d16798a08a6fac89cde25f7c7f9d9a06
3
+ size 14703
{checkpoint-1300 β†’ checkpoint-2500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef98761cd842a9942d5f99a607d8d1f1e746566961234fad871f99a1c4bb56e
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c0deb49ff9261d5b5d7609a0755c25d7a947f998b4734a1525c3a096a987646
3
  size 557
{checkpoint-1400 β†’ checkpoint-2500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5e93a0a05373e052e8c26d47329bbd69b6ce25046ec38875cdf3bacb435a33
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1dc16f2d67a6830e5fca5f7b55d822dad401a11ddc497a9f782324d540f77d8
3
  size 627
{checkpoint-1300 β†’ checkpoint-2500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.843537414965986,
5
- "global_step": 1300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -279,11 +279,263 @@
279
  "eval_samples_per_second": 23.629,
280
  "eval_steps_per_second": 2.974,
281
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  }
283
  ],
284
  "max_steps": 3675,
285
  "num_train_epochs": 25,
286
- "total_flos": 5.062688283872989e+18,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.006802721088434,
5
+ "global_step": 2500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
279
  "eval_samples_per_second": 23.629,
280
  "eval_steps_per_second": 2.974,
281
  "step": 1300
282
+ },
283
+ {
284
+ "epoch": 9.18,
285
+ "learning_rate": 0.00021996850393700784,
286
+ "loss": 0.4621,
287
+ "step": 1350
288
+ },
289
+ {
290
+ "epoch": 9.52,
291
+ "learning_rate": 0.00021524409448818896,
292
+ "loss": 0.4583,
293
+ "step": 1400
294
+ },
295
+ {
296
+ "epoch": 9.52,
297
+ "eval_cer": 0.1760313060492418,
298
+ "eval_loss": 0.41417357325553894,
299
+ "eval_runtime": 24.8425,
300
+ "eval_samples_per_second": 23.669,
301
+ "eval_steps_per_second": 2.979,
302
+ "step": 1400
303
+ },
304
+ {
305
+ "epoch": 9.86,
306
+ "learning_rate": 0.00021051968503937006,
307
+ "loss": 0.4415,
308
+ "step": 1450
309
+ },
310
+ {
311
+ "epoch": 10.2,
312
+ "learning_rate": 0.00020579527559055118,
313
+ "loss": 0.417,
314
+ "step": 1500
315
+ },
316
+ {
317
+ "epoch": 10.2,
318
+ "eval_cer": 0.17156367193869232,
319
+ "eval_loss": 0.3833578824996948,
320
+ "eval_runtime": 24.455,
321
+ "eval_samples_per_second": 24.044,
322
+ "eval_steps_per_second": 3.026,
323
+ "step": 1500
324
+ },
325
+ {
326
+ "epoch": 10.54,
327
+ "learning_rate": 0.00020107086614173225,
328
+ "loss": 0.3959,
329
+ "step": 1550
330
+ },
331
+ {
332
+ "epoch": 10.88,
333
+ "learning_rate": 0.00019634645669291338,
334
+ "loss": 0.4074,
335
+ "step": 1600
336
+ },
337
+ {
338
+ "epoch": 10.88,
339
+ "eval_cer": 0.16301972933311593,
340
+ "eval_loss": 0.3626195192337036,
341
+ "eval_runtime": 24.5184,
342
+ "eval_samples_per_second": 23.982,
343
+ "eval_steps_per_second": 3.018,
344
+ "step": 1600
345
+ },
346
+ {
347
+ "epoch": 11.22,
348
+ "learning_rate": 0.00019162204724409448,
349
+ "loss": 0.3584,
350
+ "step": 1650
351
+ },
352
+ {
353
+ "epoch": 11.56,
354
+ "learning_rate": 0.0001868976377952756,
355
+ "loss": 0.3682,
356
+ "step": 1700
357
+ },
358
+ {
359
+ "epoch": 11.56,
360
+ "eval_cer": 0.15685635088863525,
361
+ "eval_loss": 0.36927542090415955,
362
+ "eval_runtime": 24.3751,
363
+ "eval_samples_per_second": 24.123,
364
+ "eval_steps_per_second": 3.036,
365
+ "step": 1700
366
+ },
367
+ {
368
+ "epoch": 11.9,
369
+ "learning_rate": 0.00018217322834645667,
370
+ "loss": 0.3455,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 12.24,
375
+ "learning_rate": 0.00017744881889763777,
376
+ "loss": 0.3245,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 12.24,
381
+ "eval_cer": 0.15816076960704387,
382
+ "eval_loss": 0.3740461468696594,
383
+ "eval_runtime": 24.7162,
384
+ "eval_samples_per_second": 23.79,
385
+ "eval_steps_per_second": 2.994,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
  }
535
  ],
536
  "max_steps": 3675,
537
  "num_train_epochs": 25,
538
+ "total_flos": 9.701648869870967e+18,
539
  "trial_name": null,
540
  "trial_params": null
541
  }
{checkpoint-1400 β†’ checkpoint-2500}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b720f0acc795f0ce8dfdc5369d926dbb4f0c576ab9a917bc26dcfdd9c8bb55f7
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a368a48ff43ab4bc8e301dc5a41abb75b9de3b052aeffd6edcf4765b37523a8d
3
  size 1262344621