philschmid HF staff commited on
Commit
3dd94b9
β€’
1 Parent(s): 10b336a

Training in progress, step 1900

Browse files
Files changed (39) hide show
  1. checkpoint-1500/latest +0 -1
  2. {checkpoint-1500 β†’ checkpoint-1900}/config.json +0 -0
  3. {checkpoint-1500 β†’ checkpoint-1900}/generation_config.json +0 -0
  4. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-1900/latest +1 -0
  21. {checkpoint-1500 β†’ checkpoint-1900}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1500 β†’ checkpoint-1900}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1500 β†’ checkpoint-1900}/model.safetensors.index.json +0 -0
  24. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_0.pth +0 -0
  25. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_1.pth +0 -0
  26. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_2.pth +0 -0
  27. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_3.pth +0 -0
  28. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_4.pth +0 -0
  29. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_5.pth +0 -0
  30. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_6.pth +0 -0
  31. {checkpoint-1500 β†’ checkpoint-1900}/rng_state_7.pth +0 -0
  32. {checkpoint-1500 β†’ checkpoint-1900}/special_tokens_map.json +0 -0
  33. {checkpoint-1500 β†’ checkpoint-1900}/tokenizer.json +0 -0
  34. {checkpoint-1500 β†’ checkpoint-1900}/tokenizer.model +0 -0
  35. {checkpoint-1500 β†’ checkpoint-1900}/tokenizer_config.json +0 -0
  36. {checkpoint-1500 β†’ checkpoint-1900}/trainer_state.json +243 -3
  37. {checkpoint-1500 β†’ checkpoint-1900}/training_args.bin +0 -0
  38. {checkpoint-1500 β†’ checkpoint-1900}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1500/latest DELETED
@@ -1 +0,0 @@
1
- global_step1500
 
 
{checkpoint-1500 β†’ checkpoint-1900}/config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/generation_config.json RENAMED
File without changes
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be234974669f3fadc6f6cbd37d452cd9934b5baf0c5c8c7e1c6b4137198abe52
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47aebfb81f8363ed05b74e6e078681b273c5c586465aaeef3d06ab2733851a41
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc2e8be7f1f3ab9f8f99201560b172df4044f01701bc038d6290d215cfae01c
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c4b9eee899438e56990c5b63498d9dcb5a31753aaf89888cc5b0f2ae771706
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2165ab382bfdfdbf16b93de6450522cccf99b62071bca0b967949cf333ac657e
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea0f41d07ac677f422404956c83f402b623d1e9d38f80d96aa7df1573be8ff3
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ffd46eeebd3371a3b4b86eb3043327577493233d73a94c9bb477d60b233368c
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff2158c21bc75a93ba75193e8cd8dfad392a2398d8707967fcb537e61891b1a3
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b0718ea2367838043f31bbcecb9244b1ed91ffef1d2ca1462dd26c04c7489c
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c811cf5b1861c8cb662c054b12a7c96a038902adb71f04de6bf96a65b6246ec4
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54231c179632b2326d7b09b3385f441573f6500c70b342193c4260c7c8bd085e
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9369b4ef2bf8dddb4168a4446f9088e8f3fb3b051aff7a7e19a3049c2070e54
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd30beca9133984f1cb9d79a189892ed188f27c8388db7aa58c1da5602b68251
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef94ff510dd0d525c7d5571350b2b024f9cab3ccce19b0a137485160166c1b8
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc12faa6160bd48d1e8d7c22fd5dacfb9b2c72fd9b8c0b15b28e54a7ceebad9
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3a389cbb94be741314103205c427d4a3571699efd16e0fd5f242bcb0c9f9f8
3
  size 10107626487
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eee19a14911d3ed28d1d543bd79d2dd862e5a570f2d967e42a87991d0a33e8c6
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ccec6639f6021228a72cacd939c896abbd5ea2c92885d42b8682b9ec5808e12
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c0127bac24efc303b80e612fc9e224dd30b89da7db5488f6c14771d46648f2
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb94590dee0991ff663b4bfea29d11e9fb5258d9f6b09f9dc2c4ed958a22cbbc
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09ee6e175062ef86f7fc01d12a5bc90ac8148ba0918a8debb63553579b33b7fa
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd1a137c362ef4d5e939a77f90b7f1238a607611aca192d48e5c14e5590f89c8
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d50b37adf6fa6b474bee839f1d619ee75ce3c807e483c9db094f19f9db3e95f
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c67c840ba12469a70530ece889168c13d1c236e3c47f561627f2fb998a5811
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fa6ab2edb8180b9dd9b85477f05b17f0d390568d5145a16e8aa261e96cc1d1b
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cd53ec91a613c2b080addf94707266554f06bea53f22e5f2410638b7c2f359
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e330ab2aa79060a8373472b9f063fd27cab075cee334e9d1a8bb9172811b145c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8380e0444737b81d5c7bb2a3201de3b50244230313758be4830937fbfcdd62e
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a42a6db08c6f1abc3d971a342c8b8af2eafd434e02783f9576a53acf666928
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99848a79deee2a4543bbb55739697dc1b1e83d7124d2b9100b203e0aea8ede67
3
  size 168086
{checkpoint-1500/global_step1500 β†’ checkpoint-1900/global_step1900}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8251c8a3a28153639016d2e924ecd80a8c9779255ab274fcbccce7098548386c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c98e12012ad590a146a8cf1c7bce43258a07f093310465ea9b884c26768dfbb4
3
  size 168086
checkpoint-1900/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1900
{checkpoint-1500 β†’ checkpoint-1900}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eb4326fec531d0679ee14fa4bdac4c9f46ba12c00419f7e9672fa8f3f3fad0a
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0250158745c111e3a3bd8c0c63bb88e9f24ad6e3b56dea72fd77a43559e57cb
3
  size 9976576392
{checkpoint-1500 β†’ checkpoint-1900}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37b31b0207fede92f81ca33b70043c969525893bbe5d409f4c636566178f19b9
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf8e87043471e756376e41c4bb6a0d60729025052f4bc413ae38b154f3eaa9d9
3
  size 3500296504
{checkpoint-1500 β†’ checkpoint-1900}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/tokenizer.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/tokenizer.model RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
- "global_step": 1500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -906,11 +906,251 @@
906
  "learning_rate": 0.0003,
907
  "loss": 0.1118,
908
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909
  }
910
  ],
911
  "max_steps": 3000,
912
  "num_train_epochs": 9223372036854775807,
913
- "total_flos": 628138967040000.0,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6333333333333333,
5
+ "global_step": 1900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
906
  "learning_rate": 0.0003,
907
  "loss": 0.1118,
908
  "step": 1500
909
+ },
910
+ {
911
+ "epoch": 0.5,
912
+ "learning_rate": 0.0003,
913
+ "loss": 0.113,
914
+ "step": 1510
915
+ },
916
+ {
917
+ "epoch": 0.51,
918
+ "learning_rate": 0.0003,
919
+ "loss": 0.3687,
920
+ "step": 1520
921
+ },
922
+ {
923
+ "epoch": 0.51,
924
+ "learning_rate": 0.0003,
925
+ "loss": 0.1801,
926
+ "step": 1530
927
+ },
928
+ {
929
+ "epoch": 0.51,
930
+ "learning_rate": 0.0003,
931
+ "loss": 0.1534,
932
+ "step": 1540
933
+ },
934
+ {
935
+ "epoch": 0.52,
936
+ "learning_rate": 0.0003,
937
+ "loss": 0.1248,
938
+ "step": 1550
939
+ },
940
+ {
941
+ "epoch": 0.52,
942
+ "learning_rate": 0.0003,
943
+ "loss": 0.1091,
944
+ "step": 1560
945
+ },
946
+ {
947
+ "epoch": 0.52,
948
+ "learning_rate": 0.0003,
949
+ "loss": 0.0965,
950
+ "step": 1570
951
+ },
952
+ {
953
+ "epoch": 0.53,
954
+ "learning_rate": 0.0003,
955
+ "loss": 0.098,
956
+ "step": 1580
957
+ },
958
+ {
959
+ "epoch": 0.53,
960
+ "learning_rate": 0.0003,
961
+ "loss": 0.097,
962
+ "step": 1590
963
+ },
964
+ {
965
+ "epoch": 0.53,
966
+ "learning_rate": 0.0003,
967
+ "loss": 0.0885,
968
+ "step": 1600
969
+ },
970
+ {
971
+ "epoch": 0.54,
972
+ "learning_rate": 0.0003,
973
+ "loss": 0.0795,
974
+ "step": 1610
975
+ },
976
+ {
977
+ "epoch": 0.54,
978
+ "learning_rate": 0.0003,
979
+ "loss": 0.0843,
980
+ "step": 1620
981
+ },
982
+ {
983
+ "epoch": 0.54,
984
+ "learning_rate": 0.0003,
985
+ "loss": 0.0734,
986
+ "step": 1630
987
+ },
988
+ {
989
+ "epoch": 0.55,
990
+ "learning_rate": 0.0003,
991
+ "loss": 0.0744,
992
+ "step": 1640
993
+ },
994
+ {
995
+ "epoch": 0.55,
996
+ "learning_rate": 0.0003,
997
+ "loss": 0.0794,
998
+ "step": 1650
999
+ },
1000
+ {
1001
+ "epoch": 0.55,
1002
+ "learning_rate": 0.0003,
1003
+ "loss": 0.0708,
1004
+ "step": 1660
1005
+ },
1006
+ {
1007
+ "epoch": 0.56,
1008
+ "learning_rate": 0.0003,
1009
+ "loss": 0.1133,
1010
+ "step": 1670
1011
+ },
1012
+ {
1013
+ "epoch": 0.56,
1014
+ "learning_rate": 0.0003,
1015
+ "loss": 0.1582,
1016
+ "step": 1680
1017
+ },
1018
+ {
1019
+ "epoch": 0.56,
1020
+ "learning_rate": 0.0003,
1021
+ "loss": 0.1106,
1022
+ "step": 1690
1023
+ },
1024
+ {
1025
+ "epoch": 0.57,
1026
+ "learning_rate": 0.0003,
1027
+ "loss": 0.0904,
1028
+ "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.57,
1032
+ "learning_rate": 0.0003,
1033
+ "loss": 0.0785,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.57,
1038
+ "learning_rate": 0.0003,
1039
+ "loss": 0.0747,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.58,
1044
+ "learning_rate": 0.0003,
1045
+ "loss": 0.0704,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.58,
1050
+ "learning_rate": 0.0003,
1051
+ "loss": 0.0634,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.58,
1056
+ "learning_rate": 0.0003,
1057
+ "loss": 0.0629,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.59,
1062
+ "learning_rate": 0.0003,
1063
+ "loss": 0.057,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.59,
1068
+ "learning_rate": 0.0003,
1069
+ "loss": 0.0563,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.59,
1074
+ "learning_rate": 0.0003,
1075
+ "loss": 0.054,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.6,
1080
+ "learning_rate": 0.0003,
1081
+ "loss": 0.0532,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.6,
1086
+ "learning_rate": 0.0003,
1087
+ "loss": 0.0525,
1088
+ "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.6,
1092
+ "learning_rate": 0.0003,
1093
+ "loss": 0.0519,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.61,
1098
+ "learning_rate": 0.0003,
1099
+ "loss": 0.0478,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.61,
1104
+ "learning_rate": 0.0003,
1105
+ "loss": 0.0473,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.61,
1110
+ "learning_rate": 0.0003,
1111
+ "loss": 0.0473,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.62,
1116
+ "learning_rate": 0.0003,
1117
+ "loss": 0.0492,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.62,
1122
+ "learning_rate": 0.0003,
1123
+ "loss": 0.0451,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 0.62,
1128
+ "learning_rate": 0.0003,
1129
+ "loss": 0.0453,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 0.63,
1134
+ "learning_rate": 0.0003,
1135
+ "loss": 0.0439,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 0.63,
1140
+ "learning_rate": 0.0003,
1141
+ "loss": 0.0435,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 0.63,
1146
+ "learning_rate": 0.0003,
1147
+ "loss": 0.0435,
1148
+ "step": 1900
1149
  }
1150
  ],
1151
  "max_steps": 3000,
1152
  "num_train_epochs": 9223372036854775807,
1153
+ "total_flos": 795642691584000.0,
1154
  "trial_name": null,
1155
  "trial_params": null
1156
  }
{checkpoint-1500 β†’ checkpoint-1900}/training_args.bin RENAMED
File without changes
{checkpoint-1500 β†’ checkpoint-1900}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:953ae36b2f89bfca964195d3702f47bd8b1f095dbf1e56005adcfb5972042fe7
3
- size 32411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1542e5f72a0deb50ea56f278bcbfb04cc7ef3686cad25fabc136934d1b2ae0e6
3
+ size 33981