mutisya commited on
Commit
59b213f
1 Parent(s): b25620a

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:058e54c275bf98f7ed004457ae9f9318569d65fce5cbe9ed2c527096ddc694c2
3
  size 2460359008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6fbabae4d2f897b20cea2f5dd58950edcfa618b25a5aea08bb44e77a77cc4d
3
  size 2460359008
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e780f49a81031e7a2bb19bd1191d8b3257d5779abdc037edea157d37247972f
3
  size 4921031637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f032ee9fb8aee92d9585018aec96570e3613bb1e6cfccece998d5e79383a8e6a
3
  size 4921031637
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a1a0f1b02e62311ced3353adb26ff8f2fc8de119d0b12230c2a311f3a184ce8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bf42a83e06896ca85f0cf80d2b2a54efd61aa8b9e5ea095e181d4355e9278f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657bea0924aade758c792ce4b215c578a2732cae7083827d465063b9d7ebfe12
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f801a9def587f3e8f73edf722aff5b9463c74ee1de5e83ebd494295436516c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9999296583741706,
5
  "eval_steps": 500,
6
- "global_step": 31986,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -508,6 +508,180 @@
508
  "eval_samples_per_second": 17.293,
509
  "eval_steps_per_second": 2.162,
510
  "step": 31986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  }
512
  ],
513
  "logging_steps": 500,
@@ -527,7 +701,7 @@
527
  "attributes": {}
528
  }
529
  },
530
- "total_flos": 1.4534043401188147e+17,
531
  "train_batch_size": 8,
532
  "trial_name": null,
533
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 42649,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
508
  "eval_samples_per_second": 17.293,
509
  "eval_steps_per_second": 2.162,
510
  "step": 31986
511
+ },
512
+ {
513
+ "epoch": 3.00124270205632,
514
+ "grad_norm": 0.8884554505348206,
515
+ "learning_rate": 1.9997186268992684e-05,
516
+ "loss": 1.0903,
517
+ "step": 32000
518
+ },
519
+ {
520
+ "epoch": 3.04813711927595,
521
+ "grad_norm": 0.9535645842552185,
522
+ "learning_rate": 1.9528231101106737e-05,
523
+ "loss": 1.0308,
524
+ "step": 32500
525
+ },
526
+ {
527
+ "epoch": 3.09503153649558,
528
+ "grad_norm": 0.8955056667327881,
529
+ "learning_rate": 1.9059275933220786e-05,
530
+ "loss": 1.0411,
531
+ "step": 33000
532
+ },
533
+ {
534
+ "epoch": 3.1419259537152104,
535
+ "grad_norm": 0.8737803101539612,
536
+ "learning_rate": 1.8590320765334836e-05,
537
+ "loss": 1.0337,
538
+ "step": 33500
539
+ },
540
+ {
541
+ "epoch": 3.1888203709348404,
542
+ "grad_norm": 0.9888309240341187,
543
+ "learning_rate": 1.8121365597448885e-05,
544
+ "loss": 1.0315,
545
+ "step": 34000
546
+ },
547
+ {
548
+ "epoch": 3.2357147881544703,
549
+ "grad_norm": 0.8544078469276428,
550
+ "learning_rate": 1.7652410429562935e-05,
551
+ "loss": 1.0334,
552
+ "step": 34500
553
+ },
554
+ {
555
+ "epoch": 3.2826092053741003,
556
+ "grad_norm": 0.9137187600135803,
557
+ "learning_rate": 1.7183455261676984e-05,
558
+ "loss": 1.0287,
559
+ "step": 35000
560
+ },
561
+ {
562
+ "epoch": 3.3295036225937302,
563
+ "grad_norm": 0.9270204305648804,
564
+ "learning_rate": 1.6714500093791037e-05,
565
+ "loss": 1.0322,
566
+ "step": 35500
567
+ },
568
+ {
569
+ "epoch": 3.37639803981336,
570
+ "grad_norm": 1.0035383701324463,
571
+ "learning_rate": 1.6246482836240855e-05,
572
+ "loss": 1.0291,
573
+ "step": 36000
574
+ },
575
+ {
576
+ "epoch": 3.42329245703299,
577
+ "grad_norm": 0.87139892578125,
578
+ "learning_rate": 1.5777527668354904e-05,
579
+ "loss": 1.0275,
580
+ "step": 36500
581
+ },
582
+ {
583
+ "epoch": 3.47018687425262,
584
+ "grad_norm": 0.8827325701713562,
585
+ "learning_rate": 1.5308572500468957e-05,
586
+ "loss": 1.0304,
587
+ "step": 37000
588
+ },
589
+ {
590
+ "epoch": 3.51708129147225,
591
+ "grad_norm": 0.8981931805610657,
592
+ "learning_rate": 1.4839617332583005e-05,
593
+ "loss": 1.0296,
594
+ "step": 37500
595
+ },
596
+ {
597
+ "epoch": 3.5639757086918804,
598
+ "grad_norm": 0.8706479668617249,
599
+ "learning_rate": 1.4370662164697055e-05,
600
+ "loss": 1.0222,
601
+ "step": 38000
602
+ },
603
+ {
604
+ "epoch": 3.6108701259115104,
605
+ "grad_norm": 0.9338583946228027,
606
+ "learning_rate": 1.3901706996811106e-05,
607
+ "loss": 1.0271,
608
+ "step": 38500
609
+ },
610
+ {
611
+ "epoch": 3.6577645431311403,
612
+ "grad_norm": 0.9135944843292236,
613
+ "learning_rate": 1.3433689739260927e-05,
614
+ "loss": 1.0284,
615
+ "step": 39000
616
+ },
617
+ {
618
+ "epoch": 3.7046589603507702,
619
+ "grad_norm": 0.9738103747367859,
620
+ "learning_rate": 1.296567248171075e-05,
621
+ "loss": 1.0312,
622
+ "step": 39500
623
+ },
624
+ {
625
+ "epoch": 3.7515533775704,
626
+ "grad_norm": 0.9819686412811279,
627
+ "learning_rate": 1.2496717313824799e-05,
628
+ "loss": 1.0263,
629
+ "step": 40000
630
+ },
631
+ {
632
+ "epoch": 3.79844779479003,
633
+ "grad_norm": 0.8923668265342712,
634
+ "learning_rate": 1.2027762145938848e-05,
635
+ "loss": 1.0331,
636
+ "step": 40500
637
+ },
638
+ {
639
+ "epoch": 3.84534221200966,
640
+ "grad_norm": 0.9926149249076843,
641
+ "learning_rate": 1.1558806978052898e-05,
642
+ "loss": 1.0233,
643
+ "step": 41000
644
+ },
645
+ {
646
+ "epoch": 3.8922366292292905,
647
+ "grad_norm": 0.8542903661727905,
648
+ "learning_rate": 1.109078972050272e-05,
649
+ "loss": 1.0257,
650
+ "step": 41500
651
+ },
652
+ {
653
+ "epoch": 3.93913104644892,
654
+ "grad_norm": 0.8875910043716431,
655
+ "learning_rate": 1.062183455261677e-05,
656
+ "loss": 1.0304,
657
+ "step": 42000
658
+ },
659
+ {
660
+ "epoch": 3.9860254636685504,
661
+ "grad_norm": 0.9772380590438843,
662
+ "learning_rate": 1.0152879384730821e-05,
663
+ "loss": 1.0198,
664
+ "step": 42500
665
+ },
666
+ {
667
+ "epoch": 4.0,
668
+ "eval_bleu_eng_Latn-kam_Latn": 0.4666093377048015,
669
+ "eval_bleu_eng_Latn-kik_Latn": 2.229006349816899,
670
+ "eval_bleu_eng_Latn-luo_Latn": 6.981679468332617,
671
+ "eval_bleu_eng_Latn-mer_Latn": 0.07746222799067638,
672
+ "eval_bleu_eng_Latn-som_Latn": 13.603096448156661,
673
+ "eval_bleu_eng_Latn-swh_Latn": 50.52106297558092,
674
+ "eval_bleu_kam_Latn-eng_Latn": 29.285398745670836,
675
+ "eval_bleu_kik_Latn-eng_Latn": 39.99303600842124,
676
+ "eval_bleu_luo_Latn-eng_Latn": 38.135666779535974,
677
+ "eval_bleu_mer_Latn-eng_Latn": 33.29506320125331,
678
+ "eval_bleu_som_Latn-eng_Latn": 52.3043943429511,
679
+ "eval_bleu_swh_Latn-eng_Latn": 64.78388013811517,
680
+ "eval_loss": 1.0888630151748657,
681
+ "eval_runtime": 3672.1113,
682
+ "eval_samples_per_second": 16.639,
683
+ "eval_steps_per_second": 2.08,
684
+ "step": 42649
685
  }
686
  ],
687
  "logging_steps": 500,
 
701
  "attributes": {}
702
  }
703
  },
704
+ "total_flos": 1.9376583813685248e+17,
705
  "train_batch_size": 8,
706
  "trial_name": null,
707
  "trial_params": null