philschmid HF staff commited on
Commit
d508a96
β€’
1 Parent(s): 6744131

Training in progress, step 1400

Browse files
Files changed (39) hide show
  1. checkpoint-1000/latest +0 -1
  2. {checkpoint-1000 β†’ checkpoint-1400}/config.json +0 -0
  3. {checkpoint-1000 β†’ checkpoint-1400}/generation_config.json +0 -0
  4. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-1400/latest +1 -0
  21. {checkpoint-1000 β†’ checkpoint-1400}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1000 β†’ checkpoint-1400}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1000 β†’ checkpoint-1400}/model.safetensors.index.json +0 -0
  24. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_0.pth +0 -0
  25. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_1.pth +0 -0
  26. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_2.pth +0 -0
  27. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_3.pth +0 -0
  28. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_4.pth +0 -0
  29. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_5.pth +0 -0
  30. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_6.pth +0 -0
  31. {checkpoint-1000 β†’ checkpoint-1400}/rng_state_7.pth +0 -0
  32. {checkpoint-1000 β†’ checkpoint-1400}/special_tokens_map.json +0 -0
  33. {checkpoint-1000 β†’ checkpoint-1400}/tokenizer.json +0 -0
  34. {checkpoint-1000 β†’ checkpoint-1400}/tokenizer.model +0 -0
  35. {checkpoint-1000 β†’ checkpoint-1400}/tokenizer_config.json +0 -0
  36. {checkpoint-1000 β†’ checkpoint-1400}/trainer_state.json +243 -3
  37. {checkpoint-1000 β†’ checkpoint-1400}/training_args.bin +0 -0
  38. {checkpoint-1000 β†’ checkpoint-1400}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1000/latest DELETED
@@ -1 +0,0 @@
1
- global_step1000
 
 
{checkpoint-1000 β†’ checkpoint-1400}/config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/generation_config.json RENAMED
File without changes
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5703ed353a444bcb887496e437dcc7b16f8bdaae831e4e6d4f7ae38a220a5df3
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48928da7236975b7d563675900115bd40723d9bb946a5e12aeb3802cda65a29
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d6937e778c81679ecadc56c59bb90e942cd63ffc72959111931f27b22bea57
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d0660e0465b031017aa8ac057dadaad3fefbbe9a853e985e3fbcc3b9e3d14b
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7078c4a9cb10247701dadff610cea1a671df2436f25c85ce7ec7f830d5864cff
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3b54faf82a144d566ec1e13391ced58d82ba950098490d54bc7e6c12b2f665
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632c044c424e36e9ac4d04c3db4d08e0453493e42ad88a6bbfdceadec83fd685
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8730071eb5352476076fecea2c16332b76414b1587e9170294cd8be64d658ae4
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31e28791a65e07de46a9f174419774d315db15b86b1c34705af7a4bc7d99a66
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ecb36573935c513134e905740b8404414b0756f654afb39ce67f9eed0f089c
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07b03db6da0fdf58a5abbc86c3e32353bc4ea225df4814dd9b0c87094cba0a38
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2977e65540cd59b6e490d355dee2caa45edce69df4ccad7d08cd12e64717c58d
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1621abeb507c8e04e54646297d7dfc104a12de8750d5ef2e450703166e2e6a23
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91e4e9224ed4db284c54c1c60b66d8d8db36f96ef264a6d0724d17d6d4159e2
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb1c8f3f16701d2f9fe90ecb59b247f72b3a0bde6d3db2267a5c44807cf77842
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a2ea44cd053990b0f804df2960b863b7ee45e2d96229c63aeb1b4f124b33f1
3
  size 10107626487
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4658b6b3c7f7a48dd296534d1ec3dc6d546541ef01a0a3c11c403c23380b9050
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b427970d78d97a2990a387d0111436e57780839b349873130d3acb3276e6590
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc1c982bfd73913563caf1b20156081f202141ef2b4942de7b04a60a2735599a
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25cd95fea28ed0b382448d3984a3b9c57253251be9b58ca7a14bdb60e0044800
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c42c53335aafb877cfbba5749485ee3cd61082a493faf56f57f8709eb5e6252
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12da95cea77c0e4bf6c95cf547c52358a1a3dcc47d277ebc6a798c9caa01814
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd7e1a57d35ed384ee88e36e9a684ec710295b14dacde4ea9e97a2e0dd5e2f4
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883c364ce237d2eaae2ccaffd1020d9bf975a0709445f76ec935f476a9e0355c
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa0a5cc62566f10c81232feb782efb1f9f3ef3a38928981584b06095d904ad93
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7dee3b066702da3217434a6fdf3be471ea2359e4a7f550407604a5b0eac806
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8585a4986217ec7db11028c99ffd2fb8f753ad9973841c9b2be8b872fa0f5af7
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144d70ba74905a12764aa691b542b267d9b7798360cf8174acfb33c6916e7df6
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:689aff8732618feb10643e53dc24544b9e68c3fe79059dd39c4324f792c95f8e
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9ff5c63b0581bc3bf5296b9188a1bf794ee1128efd17617a1b6d6c5bd71ef6
3
  size 168086
{checkpoint-1000/global_step1000 β†’ checkpoint-1400/global_step1400}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200527511d1203250da233f27a50a276bf203ca991380ba5c7c096f1955a2752
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddbafdcfd970d545721c1221e491df7ab9c50322e6312b7ec1b036b6481b3dcb
3
  size 168086
checkpoint-1400/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1400
{checkpoint-1000 β†’ checkpoint-1400}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11913c7f1753ca039f36c4c2b6bbefb982404ad4203fe8ba96a2f892793f3922
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d81d64070bf6ada487dbdc2aaec06e52c932926918d08c4298cb20772e593337
3
  size 9976576392
{checkpoint-1000 β†’ checkpoint-1400}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9877b1573d48279b0f52a3d16935fdb3bf67e40baa64bc8774bbb8005dd3519
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df84e357f53f857462e754af1e9a7f50bdbf8395868f7a7403b6b72c5d94b5a7
3
  size 3500296504
{checkpoint-1000 β†’ checkpoint-1400}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/tokenizer.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/tokenizer.model RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3333333333333333,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,251 @@
606
  "learning_rate": 0.0003,
607
  "loss": 0.7357,
608
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 3000,
612
  "num_train_epochs": 9223372036854775807,
613
- "total_flos": 418759311360000.0,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4666666666666667,
5
+ "global_step": 1400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "learning_rate": 0.0003,
607
  "loss": 0.7357,
608
  "step": 1000
609
+ },
610
+ {
611
+ "epoch": 0.34,
612
+ "learning_rate": 0.0003,
613
+ "loss": 0.6983,
614
+ "step": 1010
615
+ },
616
+ {
617
+ "epoch": 0.34,
618
+ "learning_rate": 0.0003,
619
+ "loss": 0.6989,
620
+ "step": 1020
621
+ },
622
+ {
623
+ "epoch": 0.34,
624
+ "learning_rate": 0.0003,
625
+ "loss": 0.6692,
626
+ "step": 1030
627
+ },
628
+ {
629
+ "epoch": 0.35,
630
+ "learning_rate": 0.0003,
631
+ "loss": 0.6553,
632
+ "step": 1040
633
+ },
634
+ {
635
+ "epoch": 0.35,
636
+ "learning_rate": 0.0003,
637
+ "loss": 0.6272,
638
+ "step": 1050
639
+ },
640
+ {
641
+ "epoch": 0.35,
642
+ "learning_rate": 0.0003,
643
+ "loss": 0.6092,
644
+ "step": 1060
645
+ },
646
+ {
647
+ "epoch": 0.36,
648
+ "learning_rate": 0.0003,
649
+ "loss": 0.604,
650
+ "step": 1070
651
+ },
652
+ {
653
+ "epoch": 0.36,
654
+ "learning_rate": 0.0003,
655
+ "loss": 0.6494,
656
+ "step": 1080
657
+ },
658
+ {
659
+ "epoch": 0.36,
660
+ "learning_rate": 0.0003,
661
+ "loss": 0.6155,
662
+ "step": 1090
663
+ },
664
+ {
665
+ "epoch": 0.37,
666
+ "learning_rate": 0.0003,
667
+ "loss": 0.5713,
668
+ "step": 1100
669
+ },
670
+ {
671
+ "epoch": 0.37,
672
+ "learning_rate": 0.0003,
673
+ "loss": 0.5358,
674
+ "step": 1110
675
+ },
676
+ {
677
+ "epoch": 0.37,
678
+ "learning_rate": 0.0003,
679
+ "loss": 0.5005,
680
+ "step": 1120
681
+ },
682
+ {
683
+ "epoch": 0.38,
684
+ "learning_rate": 0.0003,
685
+ "loss": 0.483,
686
+ "step": 1130
687
+ },
688
+ {
689
+ "epoch": 0.38,
690
+ "learning_rate": 0.0003,
691
+ "loss": 0.4736,
692
+ "step": 1140
693
+ },
694
+ {
695
+ "epoch": 0.38,
696
+ "learning_rate": 0.0003,
697
+ "loss": 0.4585,
698
+ "step": 1150
699
+ },
700
+ {
701
+ "epoch": 0.39,
702
+ "learning_rate": 0.0003,
703
+ "loss": 0.4442,
704
+ "step": 1160
705
+ },
706
+ {
707
+ "epoch": 0.39,
708
+ "learning_rate": 0.0003,
709
+ "loss": 0.4392,
710
+ "step": 1170
711
+ },
712
+ {
713
+ "epoch": 0.39,
714
+ "learning_rate": 0.0003,
715
+ "loss": 0.4084,
716
+ "step": 1180
717
+ },
718
+ {
719
+ "epoch": 0.4,
720
+ "learning_rate": 0.0003,
721
+ "loss": 0.4104,
722
+ "step": 1190
723
+ },
724
+ {
725
+ "epoch": 0.4,
726
+ "learning_rate": 0.0003,
727
+ "loss": 0.3855,
728
+ "step": 1200
729
+ },
730
+ {
731
+ "epoch": 0.4,
732
+ "learning_rate": 0.0003,
733
+ "loss": 0.3662,
734
+ "step": 1210
735
+ },
736
+ {
737
+ "epoch": 0.41,
738
+ "learning_rate": 0.0003,
739
+ "loss": 0.3727,
740
+ "step": 1220
741
+ },
742
+ {
743
+ "epoch": 0.41,
744
+ "learning_rate": 0.0003,
745
+ "loss": 0.3563,
746
+ "step": 1230
747
+ },
748
+ {
749
+ "epoch": 0.41,
750
+ "learning_rate": 0.0003,
751
+ "loss": 0.3297,
752
+ "step": 1240
753
+ },
754
+ {
755
+ "epoch": 0.42,
756
+ "learning_rate": 0.0003,
757
+ "loss": 0.314,
758
+ "step": 1250
759
+ },
760
+ {
761
+ "epoch": 0.42,
762
+ "learning_rate": 0.0003,
763
+ "loss": 0.2996,
764
+ "step": 1260
765
+ },
766
+ {
767
+ "epoch": 0.42,
768
+ "learning_rate": 0.0003,
769
+ "loss": 0.3,
770
+ "step": 1270
771
+ },
772
+ {
773
+ "epoch": 0.43,
774
+ "learning_rate": 0.0003,
775
+ "loss": 0.2773,
776
+ "step": 1280
777
+ },
778
+ {
779
+ "epoch": 0.43,
780
+ "learning_rate": 0.0003,
781
+ "loss": 0.2665,
782
+ "step": 1290
783
+ },
784
+ {
785
+ "epoch": 0.43,
786
+ "learning_rate": 0.0003,
787
+ "loss": 0.2646,
788
+ "step": 1300
789
+ },
790
+ {
791
+ "epoch": 0.44,
792
+ "learning_rate": 0.0003,
793
+ "loss": 0.2406,
794
+ "step": 1310
795
+ },
796
+ {
797
+ "epoch": 0.44,
798
+ "learning_rate": 0.0003,
799
+ "loss": 0.2206,
800
+ "step": 1320
801
+ },
802
+ {
803
+ "epoch": 0.44,
804
+ "learning_rate": 0.0003,
805
+ "loss": 0.2325,
806
+ "step": 1330
807
+ },
808
+ {
809
+ "epoch": 0.45,
810
+ "learning_rate": 0.0003,
811
+ "loss": 0.2152,
812
+ "step": 1340
813
+ },
814
+ {
815
+ "epoch": 0.45,
816
+ "learning_rate": 0.0003,
817
+ "loss": 0.2011,
818
+ "step": 1350
819
+ },
820
+ {
821
+ "epoch": 0.45,
822
+ "learning_rate": 0.0003,
823
+ "loss": 0.1953,
824
+ "step": 1360
825
+ },
826
+ {
827
+ "epoch": 0.46,
828
+ "learning_rate": 0.0003,
829
+ "loss": 0.1862,
830
+ "step": 1370
831
+ },
832
+ {
833
+ "epoch": 0.46,
834
+ "learning_rate": 0.0003,
835
+ "loss": 0.1814,
836
+ "step": 1380
837
+ },
838
+ {
839
+ "epoch": 0.46,
840
+ "learning_rate": 0.0003,
841
+ "loss": 0.1677,
842
+ "step": 1390
843
+ },
844
+ {
845
+ "epoch": 0.47,
846
+ "learning_rate": 0.0003,
847
+ "loss": 0.1657,
848
+ "step": 1400
849
  }
850
  ],
851
  "max_steps": 3000,
852
  "num_train_epochs": 9223372036854775807,
853
+ "total_flos": 586263035904000.0,
854
  "trial_name": null,
855
  "trial_params": null
856
  }
{checkpoint-1000 β†’ checkpoint-1400}/training_args.bin RENAMED
File without changes
{checkpoint-1000 β†’ checkpoint-1400}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f41ffe5f5f16201f54cd7acfed3173d526460b94c4275a088ef0eba3eb4bac
3
- size 24561
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5454467270806d8ae2ed14569e1e08a427c329f3a4cebd109224d30bd23d58
3
+ size 26131