TenzinGayche commited on
Commit
2f23d51
β€’
1 Parent(s): 0d45503

Training in progress, step 2700

Browse files
{checkpoint-1500 β†’ checkpoint-2600}/config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2600}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1524d6204dd1d9571e106450aa78a0047caccd33264142d1b1d96598293949c
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206ea63887407ec8e34ca0976f176fba15fe66228df233f838fc0cfed90548e6
3
  size 2490946501
{checkpoint-1500 β†’ checkpoint-2600}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2600}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad6357a9e5c9dd5dd5741823e00aedddfec42d30f8ce46a6d034dcff4543a39
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a5094bb42dbf2cd0fc5974c6f43818537a4fbd29c7708022c2289caa021655
3
  size 1262344621
{checkpoint-1600 β†’ checkpoint-2600}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d10c6bd9d48134cf8eeac7d42482eafcf53211a5ac7f6981efd7e55707c3d0a
3
  size 14639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74e38f1a7bf8084691d64ac5100949d2f0fd9bd1ef1d5f1b8fad59bf6082a13
3
  size 14639
{checkpoint-1600 β†’ checkpoint-2600}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3908abcd123f77ca1ba6f0cc89ccfb45a998a20677c58b28be69a032cbd2ff26
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c40f435b50444d345428e533971e24aa2c54e344b905b88782a34a910c8c42
3
  size 557
{checkpoint-1500 β†’ checkpoint-2600}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b099dabe4ed5cc0419fc6b3ed457d34c91b920fb1ebdcb44ad9cc504edc495
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7307abb316278b32ba0ea0e5ef2e773c3508f659c0c03a41baa3412f33cf5d8a
3
  size 627
{checkpoint-1600 β†’ checkpoint-2600}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.884353741496598,
5
- "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -342,11 +342,221 @@
342
  "eval_samples_per_second": 23.982,
343
  "eval_steps_per_second": 3.018,
344
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  }
346
  ],
347
  "max_steps": 3675,
348
  "num_train_epochs": 25,
349
- "total_flos": 6.224068721891923e+18,
350
  "trial_name": null,
351
  "trial_params": null
352
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.687074829931973,
5
+ "global_step": 2600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
342
  "eval_samples_per_second": 23.982,
343
  "eval_steps_per_second": 3.018,
344
  "step": 1600
345
+ },
346
+ {
347
+ "epoch": 11.22,
348
+ "learning_rate": 0.00019162204724409448,
349
+ "loss": 0.3584,
350
+ "step": 1650
351
+ },
352
+ {
353
+ "epoch": 11.56,
354
+ "learning_rate": 0.0001868976377952756,
355
+ "loss": 0.3682,
356
+ "step": 1700
357
+ },
358
+ {
359
+ "epoch": 11.56,
360
+ "eval_cer": 0.15685635088863525,
361
+ "eval_loss": 0.36927542090415955,
362
+ "eval_runtime": 24.3751,
363
+ "eval_samples_per_second": 24.123,
364
+ "eval_steps_per_second": 3.036,
365
+ "step": 1700
366
+ },
367
+ {
368
+ "epoch": 11.9,
369
+ "learning_rate": 0.00018217322834645667,
370
+ "loss": 0.3455,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 12.24,
375
+ "learning_rate": 0.00017744881889763777,
376
+ "loss": 0.3245,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 12.24,
381
+ "eval_cer": 0.15816076960704387,
382
+ "eval_loss": 0.3740461468696594,
383
+ "eval_runtime": 24.7162,
384
+ "eval_samples_per_second": 23.79,
385
+ "eval_steps_per_second": 2.994,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
  }
556
  ],
557
  "max_steps": 3675,
558
  "num_train_epochs": 25,
559
+ "total_flos": 1.009919824454501e+19,
560
  "trial_name": null,
561
  "trial_params": null
562
  }
{checkpoint-1500 β†’ checkpoint-2600}/training_args.bin RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2700}/config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2700}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c09c17d29e4fc326c42244511bcaa78618ea3678b475aeb30bae0cec3cdde19
3
  size 2490946501
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f92a5dc51136ef95412fd140ff1588b4bd021fae2e4e1d0c3ff1d627fb549e7
3
  size 2490946501
{checkpoint-1600 β†’ checkpoint-2700}/preprocessor_config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-2700}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15cc6fb216f3da35028c9798afff67e0a5388271f5aaac6060d15e7551569f26
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bba14f51afadd438f5716f846b339431901a15de8cc5ac24dc52e61e72244c4
3
  size 1262344621
{checkpoint-1500 β†’ checkpoint-2700}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a25122640aee963f4ee6e1769a451ca48fbe76be99ac9653e1a79eca2e58761c
3
- size 14575
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431a3896c72aed8c52c291336a3ad85d896044a4bbceea1bc54f8c6c2977c933
3
+ size 14639
{checkpoint-1500 β†’ checkpoint-2700}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe96bf231849321ba4efb19c6b36f95dd54d4afaab7ccaedf38fefc92983569e
3
  size 557
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1739fe3b23e1ecf0ff622a8248f947392f9a7d39eeb86ebb549697db7a136562
3
  size 557
{checkpoint-1600 β†’ checkpoint-2700}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8de202cb6256f9113a1b311d18ed82c476ea245c321354355f79fe2b8918b62
3
  size 627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76fb0d2abf88d73ac50a41ede59c29dd01e496baa78a6a932c6cb64f4d7c0fd7
3
  size 627
{checkpoint-1500 β†’ checkpoint-2700}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.204081632653061,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -321,11 +321,263 @@
321
  "eval_samples_per_second": 24.044,
322
  "eval_steps_per_second": 3.026,
323
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  }
325
  ],
326
  "max_steps": 3675,
327
  "num_train_epochs": 25,
328
- "total_flos": 5.82944974613242e+18,
329
  "trial_name": null,
330
  "trial_params": null
331
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 18.367346938775512,
5
+ "global_step": 2700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
321
  "eval_samples_per_second": 24.044,
322
  "eval_steps_per_second": 3.026,
323
  "step": 1500
324
+ },
325
+ {
326
+ "epoch": 10.54,
327
+ "learning_rate": 0.00020107086614173225,
328
+ "loss": 0.3959,
329
+ "step": 1550
330
+ },
331
+ {
332
+ "epoch": 10.88,
333
+ "learning_rate": 0.00019634645669291338,
334
+ "loss": 0.4074,
335
+ "step": 1600
336
+ },
337
+ {
338
+ "epoch": 10.88,
339
+ "eval_cer": 0.16301972933311593,
340
+ "eval_loss": 0.3626195192337036,
341
+ "eval_runtime": 24.5184,
342
+ "eval_samples_per_second": 23.982,
343
+ "eval_steps_per_second": 3.018,
344
+ "step": 1600
345
+ },
346
+ {
347
+ "epoch": 11.22,
348
+ "learning_rate": 0.00019162204724409448,
349
+ "loss": 0.3584,
350
+ "step": 1650
351
+ },
352
+ {
353
+ "epoch": 11.56,
354
+ "learning_rate": 0.0001868976377952756,
355
+ "loss": 0.3682,
356
+ "step": 1700
357
+ },
358
+ {
359
+ "epoch": 11.56,
360
+ "eval_cer": 0.15685635088863525,
361
+ "eval_loss": 0.36927542090415955,
362
+ "eval_runtime": 24.3751,
363
+ "eval_samples_per_second": 24.123,
364
+ "eval_steps_per_second": 3.036,
365
+ "step": 1700
366
+ },
367
+ {
368
+ "epoch": 11.9,
369
+ "learning_rate": 0.00018217322834645667,
370
+ "loss": 0.3455,
371
+ "step": 1750
372
+ },
373
+ {
374
+ "epoch": 12.24,
375
+ "learning_rate": 0.00017744881889763777,
376
+ "loss": 0.3245,
377
+ "step": 1800
378
+ },
379
+ {
380
+ "epoch": 12.24,
381
+ "eval_cer": 0.15816076960704387,
382
+ "eval_loss": 0.3740461468696594,
383
+ "eval_runtime": 24.7162,
384
+ "eval_samples_per_second": 23.79,
385
+ "eval_steps_per_second": 2.994,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 12.59,
390
+ "learning_rate": 0.0001727244094488189,
391
+ "loss": 0.3208,
392
+ "step": 1850
393
+ },
394
+ {
395
+ "epoch": 12.93,
396
+ "learning_rate": 0.000168,
397
+ "loss": 0.3063,
398
+ "step": 1900
399
+ },
400
+ {
401
+ "epoch": 12.93,
402
+ "eval_cer": 0.15904125224196966,
403
+ "eval_loss": 0.3622555434703827,
404
+ "eval_runtime": 24.4729,
405
+ "eval_samples_per_second": 24.027,
406
+ "eval_steps_per_second": 3.024,
407
+ "step": 1900
408
+ },
409
+ {
410
+ "epoch": 13.27,
411
+ "learning_rate": 0.0001632755905511811,
412
+ "loss": 0.3019,
413
+ "step": 1950
414
+ },
415
+ {
416
+ "epoch": 13.61,
417
+ "learning_rate": 0.00015855118110236219,
418
+ "loss": 0.2945,
419
+ "step": 2000
420
+ },
421
+ {
422
+ "epoch": 13.61,
423
+ "eval_cer": 0.16634599706505787,
424
+ "eval_loss": 0.3725011348724365,
425
+ "eval_runtime": 25.0023,
426
+ "eval_samples_per_second": 23.518,
427
+ "eval_steps_per_second": 2.96,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 13.95,
432
+ "learning_rate": 0.0001538267716535433,
433
+ "loss": 0.279,
434
+ "step": 2050
435
+ },
436
+ {
437
+ "epoch": 14.29,
438
+ "learning_rate": 0.0001491023622047244,
439
+ "loss": 0.2674,
440
+ "step": 2100
441
+ },
442
+ {
443
+ "epoch": 14.29,
444
+ "eval_cer": 0.15731289744007826,
445
+ "eval_loss": 0.3531067371368408,
446
+ "eval_runtime": 24.8381,
447
+ "eval_samples_per_second": 23.673,
448
+ "eval_steps_per_second": 2.979,
449
+ "step": 2100
450
+ },
451
+ {
452
+ "epoch": 14.63,
453
+ "learning_rate": 0.0001443779527559055,
454
+ "loss": 0.2584,
455
+ "step": 2150
456
+ },
457
+ {
458
+ "epoch": 14.97,
459
+ "learning_rate": 0.0001396535433070866,
460
+ "loss": 0.2796,
461
+ "step": 2200
462
+ },
463
+ {
464
+ "epoch": 14.97,
465
+ "eval_cer": 0.14808413500733736,
466
+ "eval_loss": 0.3606802523136139,
467
+ "eval_runtime": 24.8151,
468
+ "eval_samples_per_second": 23.695,
469
+ "eval_steps_per_second": 2.982,
470
+ "step": 2200
471
+ },
472
+ {
473
+ "epoch": 15.31,
474
+ "learning_rate": 0.0001349291338582677,
475
+ "loss": 0.2462,
476
+ "step": 2250
477
+ },
478
+ {
479
+ "epoch": 15.65,
480
+ "learning_rate": 0.0001302047244094488,
481
+ "loss": 0.256,
482
+ "step": 2300
483
+ },
484
+ {
485
+ "epoch": 15.65,
486
+ "eval_cer": 0.15819338007500408,
487
+ "eval_loss": 0.3580550253391266,
488
+ "eval_runtime": 24.5695,
489
+ "eval_samples_per_second": 23.932,
490
+ "eval_steps_per_second": 3.012,
491
+ "step": 2300
492
+ },
493
+ {
494
+ "epoch": 15.99,
495
+ "learning_rate": 0.00012548031496062992,
496
+ "loss": 0.2524,
497
+ "step": 2350
498
+ },
499
+ {
500
+ "epoch": 16.33,
501
+ "learning_rate": 0.00012075590551181102,
502
+ "loss": 0.2219,
503
+ "step": 2400
504
+ },
505
+ {
506
+ "epoch": 16.33,
507
+ "eval_cer": 0.14801891407141693,
508
+ "eval_loss": 0.35925593972206116,
509
+ "eval_runtime": 24.982,
510
+ "eval_samples_per_second": 23.537,
511
+ "eval_steps_per_second": 2.962,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 16.67,
516
+ "learning_rate": 0.0001160314960629921,
517
+ "loss": 0.2364,
518
+ "step": 2450
519
+ },
520
+ {
521
+ "epoch": 17.01,
522
+ "learning_rate": 0.00011130708661417321,
523
+ "loss": 0.2291,
524
+ "step": 2500
525
+ },
526
+ {
527
+ "epoch": 17.01,
528
+ "eval_cer": 0.1471058209685309,
529
+ "eval_loss": 0.35567909479141235,
530
+ "eval_runtime": 24.4749,
531
+ "eval_samples_per_second": 24.025,
532
+ "eval_steps_per_second": 3.024,
533
+ "step": 2500
534
+ },
535
+ {
536
+ "epoch": 17.35,
537
+ "learning_rate": 0.00010658267716535431,
538
+ "loss": 0.2045,
539
+ "step": 2550
540
+ },
541
+ {
542
+ "epoch": 17.69,
543
+ "learning_rate": 0.00010185826771653542,
544
+ "loss": 0.2172,
545
+ "step": 2600
546
+ },
547
+ {
548
+ "epoch": 17.69,
549
+ "eval_cer": 0.14792108266753629,
550
+ "eval_loss": 0.3606509566307068,
551
+ "eval_runtime": 25.1105,
552
+ "eval_samples_per_second": 23.416,
553
+ "eval_steps_per_second": 2.947,
554
+ "step": 2600
555
+ },
556
+ {
557
+ "epoch": 18.03,
558
+ "learning_rate": 9.713385826771652e-05,
559
+ "loss": 0.2271,
560
+ "step": 2650
561
+ },
562
+ {
563
+ "epoch": 18.37,
564
+ "learning_rate": 9.240944881889763e-05,
565
+ "loss": 0.1858,
566
+ "step": 2700
567
+ },
568
+ {
569
+ "epoch": 18.37,
570
+ "eval_cer": 0.15144301320723952,
571
+ "eval_loss": 0.3589307963848114,
572
+ "eval_runtime": 24.5005,
573
+ "eval_samples_per_second": 24.0,
574
+ "eval_steps_per_second": 3.02,
575
+ "step": 2700
576
  }
577
  ],
578
  "max_steps": 3675,
579
  "num_train_epochs": 25,
580
+ "total_flos": 1.0494071921698755e+19,
581
  "trial_name": null,
582
  "trial_params": null
583
  }
{checkpoint-1600 β†’ checkpoint-2700}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a368a48ff43ab4bc8e301dc5a41abb75b9de3b052aeffd6edcf4765b37523a8d
3
  size 1262344621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bba14f51afadd438f5716f846b339431901a15de8cc5ac24dc52e61e72244c4
3
  size 1262344621