philschmid HF staff commited on
Commit
dd5b800
β€’
1 Parent(s): 42d082a

Training in progress, step 1600

Browse files
Files changed (39) hide show
  1. checkpoint-1200/latest +0 -1
  2. {checkpoint-1200 β†’ checkpoint-1600}/config.json +0 -0
  3. {checkpoint-1200 β†’ checkpoint-1600}/generation_config.json +0 -0
  4. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-1600/latest +1 -0
  21. {checkpoint-1200 β†’ checkpoint-1600}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1200 β†’ checkpoint-1600}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1200 β†’ checkpoint-1600}/model.safetensors.index.json +0 -0
  24. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_0.pth +0 -0
  25. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_1.pth +0 -0
  26. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_2.pth +0 -0
  27. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_3.pth +0 -0
  28. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_4.pth +0 -0
  29. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_5.pth +0 -0
  30. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_6.pth +0 -0
  31. {checkpoint-1200 β†’ checkpoint-1600}/rng_state_7.pth +0 -0
  32. {checkpoint-1200 β†’ checkpoint-1600}/special_tokens_map.json +0 -0
  33. {checkpoint-1200 β†’ checkpoint-1600}/tokenizer.json +0 -0
  34. {checkpoint-1200 β†’ checkpoint-1600}/tokenizer.model +0 -0
  35. {checkpoint-1200 β†’ checkpoint-1600}/tokenizer_config.json +0 -0
  36. {checkpoint-1200 β†’ checkpoint-1600}/trainer_state.json +243 -3
  37. {checkpoint-1200 β†’ checkpoint-1600}/training_args.bin +0 -0
  38. {checkpoint-1200 β†’ checkpoint-1600}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1200/latest DELETED
@@ -1 +0,0 @@
1
- global_step1200
 
 
{checkpoint-1200 β†’ checkpoint-1600}/config.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/generation_config.json RENAMED
File without changes
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fb262812656b3c83b6881b1dd67c5767d8042011235ae6468454646240b5c07
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a340884351df9f228d3ba5317543b112e21edc4d1572228a4abd7118e419a6b
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7df89141b87c7b3401cf3dc6d0f0e35b07ee4007b03d264482fa67df22e6e386
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f75017ef2953071a44cde3f1155976fb76dde43b6395726f2683ee1ec2c250
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:304104b8210689a208414ce338a1a8170d07fd50ede690c06cc17f711af7d23b
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdfad233cd4ea82dc4f02a8e5b074984bb6a29a18f6262838ec7b7f1630e0ac8
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e7a70ad03df41c4584486b35cde5c60d0c66a22c3f5c4048017d438d1d7dd5
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5919bc8e571e808ff3b2bcb9ad7014597b2ee31e29993555c14491a33d11f095
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e5c69f087a06dfa4218e7205637830d59e2eb6ab4496e20b6099fcccbc9b07
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c354eb1ceeeb29fb3a5f70328aa4850898429dd56b0f49cfb1b15ac4000f975
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37dd48878dd48bb7d2a1f2ea052444d84a13e6f4cbab8f1d27904757ae116bd0
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fead0347839e08434f2eed3165dad3e70691836ea6e9cf64ef56551331997bca
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d614936212979e10ebba5d41469df3f0f5d966c2da7d3029cfa1ec1e9dd897a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ffe9fbb7a126ec39112724337caecc4eae7d8e492d63490d8567816ba07929
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3266f649233e5304749c7c1f07769559935f00b03922e28bff0442991ec9388a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e21de250ee92a08fe926befd4f33f363a7c9bd7ca8bbb6aef12bce9df04133
3
  size 10107626487
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee838bbe2d8ed873c80d9a5740fcb7f54c91e439e0f4bdde882b4456ea232c7c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91599707db75d7b03814882cbcbb8b854875d9e88102c0831463b1e42ed49ab1
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4326e6a49ee2354fb513f4f301a33c21ffb84a9a51a4e3d0ed33bc12d02bd10b
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6831020de473339b10b00488798cd0193af1763454bfa118f482faf07d70a44d
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6c42d8a6eeae2786121f541dc5d4a06728de74fdd785c4e82bf7be14708e787
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb9abf6f8ac61756727e9fcc585fd5a1a70c1a20ca3eb82c79388ae9fe06779
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca8f493f0fa1429963ac77a6e815a9f189220490b4f4457f7be2b942cdc01726
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24b271888c79b57824d2821f84a48dcb6381041a8b0c6c06463ebd491bb032d
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c1911bcfb001e09fe12e3ee69beaea09f5f021dc647d8b1040b0d66821cc448
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93af96588d797250ee8a47897e3e963aa391dabb815f6714e949496001e60f03
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c7c6cda3f5f3d82146b47b1ad4b836ecfff16ca2e586ea073dd665626f8757e
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288e31b1bddda038ec9427bc5bb7b953b1d58f1e7c11dbbf18e74dc37512d377
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23cb97ab9d849ada9216fd67b47e13c5ebfbeaeb019c59c09a9cb3298aa097e6
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab9e0ea1b5660b9e2221c0e11c6a20f979a0b1641b3514c45386900d1001699
3
  size 168086
{checkpoint-1200/global_step1200 β†’ checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bb17570ce5f68b67d4232815d8eb3b18765ca6071753b7ef99040ac8710b3ec
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:122e850bb9d0ec838955d8d4d6eabbb7c4437e56754813e6645e6ab2a32d64c8
3
  size 168086
checkpoint-1600/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1600
{checkpoint-1200 β†’ checkpoint-1600}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:050176f4baa1bd8c33427c5122e69e830566029acf7a4cca7415b3b41047fcbd
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb8059dad24ddb21134712cb2d7c09e74afdb9c0ad8407d026104190bec0ae3
3
  size 9976576392
{checkpoint-1200 β†’ checkpoint-1600}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30aa4a97478659826d8dbb57cb3e6ba545204647306edfcfec56c36a5b191c85
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf47ea3281e028d797e91e4208dafdd29c9912946218b26b9fc740a42513cda2
3
  size 3500296504
{checkpoint-1200 β†’ checkpoint-1600}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/tokenizer.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/tokenizer.model RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4,
5
- "global_step": 1200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -726,11 +726,251 @@
726
  "learning_rate": 0.0003,
727
  "loss": 0.3855,
728
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  }
730
  ],
731
  "max_steps": 3000,
732
  "num_train_epochs": 9223372036854775807,
733
- "total_flos": 502511173632000.0,
734
  "trial_name": null,
735
  "trial_params": null
736
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5333333333333333,
5
+ "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
726
  "learning_rate": 0.0003,
727
  "loss": 0.3855,
728
  "step": 1200
729
+ },
730
+ {
731
+ "epoch": 0.4,
732
+ "learning_rate": 0.0003,
733
+ "loss": 0.3662,
734
+ "step": 1210
735
+ },
736
+ {
737
+ "epoch": 0.41,
738
+ "learning_rate": 0.0003,
739
+ "loss": 0.3727,
740
+ "step": 1220
741
+ },
742
+ {
743
+ "epoch": 0.41,
744
+ "learning_rate": 0.0003,
745
+ "loss": 0.3563,
746
+ "step": 1230
747
+ },
748
+ {
749
+ "epoch": 0.41,
750
+ "learning_rate": 0.0003,
751
+ "loss": 0.3297,
752
+ "step": 1240
753
+ },
754
+ {
755
+ "epoch": 0.42,
756
+ "learning_rate": 0.0003,
757
+ "loss": 0.314,
758
+ "step": 1250
759
+ },
760
+ {
761
+ "epoch": 0.42,
762
+ "learning_rate": 0.0003,
763
+ "loss": 0.2996,
764
+ "step": 1260
765
+ },
766
+ {
767
+ "epoch": 0.42,
768
+ "learning_rate": 0.0003,
769
+ "loss": 0.3,
770
+ "step": 1270
771
+ },
772
+ {
773
+ "epoch": 0.43,
774
+ "learning_rate": 0.0003,
775
+ "loss": 0.2773,
776
+ "step": 1280
777
+ },
778
+ {
779
+ "epoch": 0.43,
780
+ "learning_rate": 0.0003,
781
+ "loss": 0.2665,
782
+ "step": 1290
783
+ },
784
+ {
785
+ "epoch": 0.43,
786
+ "learning_rate": 0.0003,
787
+ "loss": 0.2646,
788
+ "step": 1300
789
+ },
790
+ {
791
+ "epoch": 0.44,
792
+ "learning_rate": 0.0003,
793
+ "loss": 0.2406,
794
+ "step": 1310
795
+ },
796
+ {
797
+ "epoch": 0.44,
798
+ "learning_rate": 0.0003,
799
+ "loss": 0.2206,
800
+ "step": 1320
801
+ },
802
+ {
803
+ "epoch": 0.44,
804
+ "learning_rate": 0.0003,
805
+ "loss": 0.2325,
806
+ "step": 1330
807
+ },
808
+ {
809
+ "epoch": 0.45,
810
+ "learning_rate": 0.0003,
811
+ "loss": 0.2152,
812
+ "step": 1340
813
+ },
814
+ {
815
+ "epoch": 0.45,
816
+ "learning_rate": 0.0003,
817
+ "loss": 0.2011,
818
+ "step": 1350
819
+ },
820
+ {
821
+ "epoch": 0.45,
822
+ "learning_rate": 0.0003,
823
+ "loss": 0.1953,
824
+ "step": 1360
825
+ },
826
+ {
827
+ "epoch": 0.46,
828
+ "learning_rate": 0.0003,
829
+ "loss": 0.1862,
830
+ "step": 1370
831
+ },
832
+ {
833
+ "epoch": 0.46,
834
+ "learning_rate": 0.0003,
835
+ "loss": 0.1814,
836
+ "step": 1380
837
+ },
838
+ {
839
+ "epoch": 0.46,
840
+ "learning_rate": 0.0003,
841
+ "loss": 0.1677,
842
+ "step": 1390
843
+ },
844
+ {
845
+ "epoch": 0.47,
846
+ "learning_rate": 0.0003,
847
+ "loss": 0.1657,
848
+ "step": 1400
849
+ },
850
+ {
851
+ "epoch": 0.47,
852
+ "learning_rate": 0.0003,
853
+ "loss": 0.155,
854
+ "step": 1410
855
+ },
856
+ {
857
+ "epoch": 0.47,
858
+ "learning_rate": 0.0003,
859
+ "loss": 0.1517,
860
+ "step": 1420
861
+ },
862
+ {
863
+ "epoch": 0.48,
864
+ "learning_rate": 0.0003,
865
+ "loss": 0.1481,
866
+ "step": 1430
867
+ },
868
+ {
869
+ "epoch": 0.48,
870
+ "learning_rate": 0.0003,
871
+ "loss": 0.1396,
872
+ "step": 1440
873
+ },
874
+ {
875
+ "epoch": 0.48,
876
+ "learning_rate": 0.0003,
877
+ "loss": 0.1301,
878
+ "step": 1450
879
+ },
880
+ {
881
+ "epoch": 0.49,
882
+ "learning_rate": 0.0003,
883
+ "loss": 0.1274,
884
+ "step": 1460
885
+ },
886
+ {
887
+ "epoch": 0.49,
888
+ "learning_rate": 0.0003,
889
+ "loss": 0.1244,
890
+ "step": 1470
891
+ },
892
+ {
893
+ "epoch": 0.49,
894
+ "learning_rate": 0.0003,
895
+ "loss": 0.1172,
896
+ "step": 1480
897
+ },
898
+ {
899
+ "epoch": 0.5,
900
+ "learning_rate": 0.0003,
901
+ "loss": 0.1177,
902
+ "step": 1490
903
+ },
904
+ {
905
+ "epoch": 0.5,
906
+ "learning_rate": 0.0003,
907
+ "loss": 0.1118,
908
+ "step": 1500
909
+ },
910
+ {
911
+ "epoch": 0.5,
912
+ "learning_rate": 0.0003,
913
+ "loss": 0.113,
914
+ "step": 1510
915
+ },
916
+ {
917
+ "epoch": 0.51,
918
+ "learning_rate": 0.0003,
919
+ "loss": 0.3687,
920
+ "step": 1520
921
+ },
922
+ {
923
+ "epoch": 0.51,
924
+ "learning_rate": 0.0003,
925
+ "loss": 0.1801,
926
+ "step": 1530
927
+ },
928
+ {
929
+ "epoch": 0.51,
930
+ "learning_rate": 0.0003,
931
+ "loss": 0.1534,
932
+ "step": 1540
933
+ },
934
+ {
935
+ "epoch": 0.52,
936
+ "learning_rate": 0.0003,
937
+ "loss": 0.1248,
938
+ "step": 1550
939
+ },
940
+ {
941
+ "epoch": 0.52,
942
+ "learning_rate": 0.0003,
943
+ "loss": 0.1091,
944
+ "step": 1560
945
+ },
946
+ {
947
+ "epoch": 0.52,
948
+ "learning_rate": 0.0003,
949
+ "loss": 0.0965,
950
+ "step": 1570
951
+ },
952
+ {
953
+ "epoch": 0.53,
954
+ "learning_rate": 0.0003,
955
+ "loss": 0.098,
956
+ "step": 1580
957
+ },
958
+ {
959
+ "epoch": 0.53,
960
+ "learning_rate": 0.0003,
961
+ "loss": 0.097,
962
+ "step": 1590
963
+ },
964
+ {
965
+ "epoch": 0.53,
966
+ "learning_rate": 0.0003,
967
+ "loss": 0.0885,
968
+ "step": 1600
969
  }
970
  ],
971
  "max_steps": 3000,
972
  "num_train_epochs": 9223372036854775807,
973
+ "total_flos": 670014898176000.0,
974
  "trial_name": null,
975
  "trial_params": null
976
  }
{checkpoint-1200 β†’ checkpoint-1600}/training_args.bin RENAMED
File without changes
{checkpoint-1200 β†’ checkpoint-1600}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:315e08b502208730f4cc5abee6046d569656431e4bc2fa327e702fbff33f5015
3
- size 27701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677d38e5d0f2898fe52eabe578b18234ab0985e274317f79127ce656648b82b9
3
+ size 29271