philschmid HF staff commited on
Commit
10b336a
β€’
1 Parent(s): 072be9d

Training in progress, step 1800

Browse files
Files changed (39) hide show
  1. checkpoint-1400/latest +0 -1
  2. {checkpoint-1400 β†’ checkpoint-1800}/config.json +0 -0
  3. {checkpoint-1400 β†’ checkpoint-1800}/generation_config.json +0 -0
  4. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-1800/latest +1 -0
  21. {checkpoint-1400 β†’ checkpoint-1800}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1400 β†’ checkpoint-1800}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1400 β†’ checkpoint-1800}/model.safetensors.index.json +0 -0
  24. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_0.pth +0 -0
  25. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_1.pth +0 -0
  26. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_2.pth +0 -0
  27. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_3.pth +0 -0
  28. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_4.pth +0 -0
  29. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_5.pth +0 -0
  30. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_6.pth +0 -0
  31. {checkpoint-1400 β†’ checkpoint-1800}/rng_state_7.pth +0 -0
  32. {checkpoint-1400 β†’ checkpoint-1800}/special_tokens_map.json +0 -0
  33. {checkpoint-1400 β†’ checkpoint-1800}/tokenizer.json +0 -0
  34. {checkpoint-1400 β†’ checkpoint-1800}/tokenizer.model +0 -0
  35. {checkpoint-1400 β†’ checkpoint-1800}/tokenizer_config.json +0 -0
  36. {checkpoint-1400 β†’ checkpoint-1800}/trainer_state.json +243 -3
  37. {checkpoint-1400 β†’ checkpoint-1800}/training_args.bin +0 -0
  38. {checkpoint-1400 β†’ checkpoint-1800}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1400/latest DELETED
@@ -1 +0,0 @@
1
- global_step1400
 
 
{checkpoint-1400 β†’ checkpoint-1800}/config.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/generation_config.json RENAMED
File without changes
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d48928da7236975b7d563675900115bd40723d9bb946a5e12aeb3802cda65a29
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ac051edb6a7231e7ff80bf7b6ff872456d98c899aa44c7f0de57ec100e241a
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60d0660e0465b031017aa8ac057dadaad3fefbbe9a853e985e3fbcc3b9e3d14b
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63f4158c79b9097827e52efe21c448978712c950ba30906df8d4e81779f0705
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc3b54faf82a144d566ec1e13391ced58d82ba950098490d54bc7e6c12b2f665
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a06a42dbae3be95f0f13a0c6a7e3d8fff63f06a1b88e67198a327c7c3f2b4fdf
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8730071eb5352476076fecea2c16332b76414b1587e9170294cd8be64d658ae4
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d439a4859cf52b20fb93dc23b4dba8c8083d3d58b8bd2b35129a82e4f9847f
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1ecb36573935c513134e905740b8404414b0756f654afb39ce67f9eed0f089c
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e9a0ca5c754b3520440fc3506fe5a4e46f2a2eade8194719f6c07e5ee7b32e
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2977e65540cd59b6e490d355dee2caa45edce69df4ccad7d08cd12e64717c58d
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9a156039d51fadfd2736deea73e96bf8dfb0dbf11a6c3b10eb075a358c9b8f
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a91e4e9224ed4db284c54c1c60b66d8d8db36f96ef264a6d0724d17d6d4159e2
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:103ef36796553ded7aa4fea4a7e84fb84384939f9f20cda0a94ecf5087a56b7d
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88a2ea44cd053990b0f804df2960b863b7ee45e2d96229c63aeb1b4f124b33f1
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478d4c670c95787a5e189a35cf100f0c722538933effcc1efdf43d7a3a79ec87
3
  size 10107626487
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b427970d78d97a2990a387d0111436e57780839b349873130d3acb3276e6590
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3559441b3540dc1b5a49a8fb7253c34731a3906e17ecc6f79b18ee56a4ba0e
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25cd95fea28ed0b382448d3984a3b9c57253251be9b58ca7a14bdb60e0044800
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7f165ac77d9d755737a9b47099cbc30144ddc12bbdf2a30877c884fe0aa613
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a12da95cea77c0e4bf6c95cf547c52358a1a3dcc47d277ebc6a798c9caa01814
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7943fb9f5e3008adcd3ed98b51af946f70d290a7337c4ffbe0eb7a9b97be6c
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:883c364ce237d2eaae2ccaffd1020d9bf975a0709445f76ec935f476a9e0355c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734fbce19436e77d545003a57bdb0fcc5fbd70f67630be95f23d1f26b813fc8b
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7dee3b066702da3217434a6fdf3be471ea2359e4a7f550407604a5b0eac806
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7a3c3a029b300ee81735e4b5d95db67561d27acbcc726de134c1749f3d5442
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:144d70ba74905a12764aa691b542b267d9b7798360cf8174acfb33c6916e7df6
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afac81b2fa8bcebadbac9d41f1d41c618419c5aa49f521a8b868d88affb1b81b
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c9ff5c63b0581bc3bf5296b9188a1bf794ee1128efd17617a1b6d6c5bd71ef6
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c36b334041e232c5d22af305a776884cf9a86554d427f772411f35d878d40e0
3
  size 168086
{checkpoint-1400/global_step1400 β†’ checkpoint-1800/global_step1800}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddbafdcfd970d545721c1221e491df7ab9c50322e6312b7ec1b036b6481b3dcb
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4741731558644f602064546705ffa83f9879c68a3314de93ea623d611af243e5
3
  size 168086
checkpoint-1800/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1800
{checkpoint-1400 β†’ checkpoint-1800}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d81d64070bf6ada487dbdc2aaec06e52c932926918d08c4298cb20772e593337
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8b8d03a8fe3e6b5c85db79b65e4586478a90485d98555e56952666cdae15e5
3
  size 9976576392
{checkpoint-1400 β†’ checkpoint-1800}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df84e357f53f857462e754af1e9a7f50bdbf8395868f7a7403b6b72c5d94b5a7
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1f107946442c1ba1bd23d3ab4b3b2906ea95617d47c6b2f8fe6b989084b50d
3
  size 3500296504
{checkpoint-1400 β†’ checkpoint-1800}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/tokenizer.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/tokenizer.model RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4666666666666667,
5
- "global_step": 1400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -846,11 +846,251 @@
846
  "learning_rate": 0.0003,
847
  "loss": 0.1657,
848
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  }
850
  ],
851
  "max_steps": 3000,
852
  "num_train_epochs": 9223372036854775807,
853
- "total_flos": 586263035904000.0,
854
  "trial_name": null,
855
  "trial_params": null
856
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6,
5
+ "global_step": 1800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
846
  "learning_rate": 0.0003,
847
  "loss": 0.1657,
848
  "step": 1400
849
+ },
850
+ {
851
+ "epoch": 0.47,
852
+ "learning_rate": 0.0003,
853
+ "loss": 0.155,
854
+ "step": 1410
855
+ },
856
+ {
857
+ "epoch": 0.47,
858
+ "learning_rate": 0.0003,
859
+ "loss": 0.1517,
860
+ "step": 1420
861
+ },
862
+ {
863
+ "epoch": 0.48,
864
+ "learning_rate": 0.0003,
865
+ "loss": 0.1481,
866
+ "step": 1430
867
+ },
868
+ {
869
+ "epoch": 0.48,
870
+ "learning_rate": 0.0003,
871
+ "loss": 0.1396,
872
+ "step": 1440
873
+ },
874
+ {
875
+ "epoch": 0.48,
876
+ "learning_rate": 0.0003,
877
+ "loss": 0.1301,
878
+ "step": 1450
879
+ },
880
+ {
881
+ "epoch": 0.49,
882
+ "learning_rate": 0.0003,
883
+ "loss": 0.1274,
884
+ "step": 1460
885
+ },
886
+ {
887
+ "epoch": 0.49,
888
+ "learning_rate": 0.0003,
889
+ "loss": 0.1244,
890
+ "step": 1470
891
+ },
892
+ {
893
+ "epoch": 0.49,
894
+ "learning_rate": 0.0003,
895
+ "loss": 0.1172,
896
+ "step": 1480
897
+ },
898
+ {
899
+ "epoch": 0.5,
900
+ "learning_rate": 0.0003,
901
+ "loss": 0.1177,
902
+ "step": 1490
903
+ },
904
+ {
905
+ "epoch": 0.5,
906
+ "learning_rate": 0.0003,
907
+ "loss": 0.1118,
908
+ "step": 1500
909
+ },
910
+ {
911
+ "epoch": 0.5,
912
+ "learning_rate": 0.0003,
913
+ "loss": 0.113,
914
+ "step": 1510
915
+ },
916
+ {
917
+ "epoch": 0.51,
918
+ "learning_rate": 0.0003,
919
+ "loss": 0.3687,
920
+ "step": 1520
921
+ },
922
+ {
923
+ "epoch": 0.51,
924
+ "learning_rate": 0.0003,
925
+ "loss": 0.1801,
926
+ "step": 1530
927
+ },
928
+ {
929
+ "epoch": 0.51,
930
+ "learning_rate": 0.0003,
931
+ "loss": 0.1534,
932
+ "step": 1540
933
+ },
934
+ {
935
+ "epoch": 0.52,
936
+ "learning_rate": 0.0003,
937
+ "loss": 0.1248,
938
+ "step": 1550
939
+ },
940
+ {
941
+ "epoch": 0.52,
942
+ "learning_rate": 0.0003,
943
+ "loss": 0.1091,
944
+ "step": 1560
945
+ },
946
+ {
947
+ "epoch": 0.52,
948
+ "learning_rate": 0.0003,
949
+ "loss": 0.0965,
950
+ "step": 1570
951
+ },
952
+ {
953
+ "epoch": 0.53,
954
+ "learning_rate": 0.0003,
955
+ "loss": 0.098,
956
+ "step": 1580
957
+ },
958
+ {
959
+ "epoch": 0.53,
960
+ "learning_rate": 0.0003,
961
+ "loss": 0.097,
962
+ "step": 1590
963
+ },
964
+ {
965
+ "epoch": 0.53,
966
+ "learning_rate": 0.0003,
967
+ "loss": 0.0885,
968
+ "step": 1600
969
+ },
970
+ {
971
+ "epoch": 0.54,
972
+ "learning_rate": 0.0003,
973
+ "loss": 0.0795,
974
+ "step": 1610
975
+ },
976
+ {
977
+ "epoch": 0.54,
978
+ "learning_rate": 0.0003,
979
+ "loss": 0.0843,
980
+ "step": 1620
981
+ },
982
+ {
983
+ "epoch": 0.54,
984
+ "learning_rate": 0.0003,
985
+ "loss": 0.0734,
986
+ "step": 1630
987
+ },
988
+ {
989
+ "epoch": 0.55,
990
+ "learning_rate": 0.0003,
991
+ "loss": 0.0744,
992
+ "step": 1640
993
+ },
994
+ {
995
+ "epoch": 0.55,
996
+ "learning_rate": 0.0003,
997
+ "loss": 0.0794,
998
+ "step": 1650
999
+ },
1000
+ {
1001
+ "epoch": 0.55,
1002
+ "learning_rate": 0.0003,
1003
+ "loss": 0.0708,
1004
+ "step": 1660
1005
+ },
1006
+ {
1007
+ "epoch": 0.56,
1008
+ "learning_rate": 0.0003,
1009
+ "loss": 0.1133,
1010
+ "step": 1670
1011
+ },
1012
+ {
1013
+ "epoch": 0.56,
1014
+ "learning_rate": 0.0003,
1015
+ "loss": 0.1582,
1016
+ "step": 1680
1017
+ },
1018
+ {
1019
+ "epoch": 0.56,
1020
+ "learning_rate": 0.0003,
1021
+ "loss": 0.1106,
1022
+ "step": 1690
1023
+ },
1024
+ {
1025
+ "epoch": 0.57,
1026
+ "learning_rate": 0.0003,
1027
+ "loss": 0.0904,
1028
+ "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.57,
1032
+ "learning_rate": 0.0003,
1033
+ "loss": 0.0785,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.57,
1038
+ "learning_rate": 0.0003,
1039
+ "loss": 0.0747,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.58,
1044
+ "learning_rate": 0.0003,
1045
+ "loss": 0.0704,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.58,
1050
+ "learning_rate": 0.0003,
1051
+ "loss": 0.0634,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.58,
1056
+ "learning_rate": 0.0003,
1057
+ "loss": 0.0629,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.59,
1062
+ "learning_rate": 0.0003,
1063
+ "loss": 0.057,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.59,
1068
+ "learning_rate": 0.0003,
1069
+ "loss": 0.0563,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.59,
1074
+ "learning_rate": 0.0003,
1075
+ "loss": 0.054,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.6,
1080
+ "learning_rate": 0.0003,
1081
+ "loss": 0.0532,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.6,
1086
+ "learning_rate": 0.0003,
1087
+ "loss": 0.0525,
1088
+ "step": 1800
1089
  }
1090
  ],
1091
  "max_steps": 3000,
1092
  "num_train_epochs": 9223372036854775807,
1093
+ "total_flos": 753766760448000.0,
1094
  "trial_name": null,
1095
  "trial_params": null
1096
  }
{checkpoint-1400 β†’ checkpoint-1800}/training_args.bin RENAMED
File without changes
{checkpoint-1400 β†’ checkpoint-1800}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:959f335ea05d3561ef4372cd1e8d8c2c8124e9a070658a308f82975d8ed3a93b
3
- size 30841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953ae36b2f89bfca964195d3702f47bd8b1f095dbf1e56005adcfb5972042fe7
3
+ size 32411