hemanth-kj commited on
Commit
6715c53
β€’
1 Parent(s): 72cacfc

Training in progress, step 3750

Browse files
Files changed (27) hide show
  1. last-checkpoint/README.md +0 -1
  2. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  3. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  4. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  5. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  7. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  8. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  9. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  10. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  11. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  12. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  13. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  14. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  15. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  16. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  17. last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/trainer_state.json +2253 -3
last-checkpoint/README.md CHANGED
@@ -5,6 +5,5 @@ library_name: peft
5
 
6
  ### Framework versions
7
 
8
- - PEFT 0.4.0.dev0
9
 
10
  - PEFT 0.4.0.dev0
 
5
 
6
  ### Framework versions
7
 
 
8
 
9
  - PEFT 0.4.0.dev0
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc3aba48715eba0d62814c116be69cdd8db40c6bc42ce0caf09bc8fd54ff0bad
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e478c6b534ac1a6b3724122ba6eadc55c9ebd14b8c8a3c77a4070b5fecf18b
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:482603883dc54fe0438c3e4d2df1653c84bf964b9ee4150b6df3ed7c6c65a0ad
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7904e9b27c6f63f3fa7db70a98212d63295b71fac68ed438eff83c57c218b8a4
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac6cda5d42888c8bd34166fdf0673c46cc2cc6720eb9d95e7f52ae00c200eee4
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c7f87564e6231ab85520e9f53484a46cd0ba70c16747048130c451c483a0ad1
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2212e8fb1026d9c4a234a31b92fcbc3b9757e4661b361ba24c109fc438bd5efc
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b493617a1c5f0b725e613a361de24b572b3ffa31bad16f66769d6c3e64f8b31f
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e0efab9f86a1fc4cdefe9209f280e916eb88e9f639304743a62c915be58130a
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a3493442f0b865ac4957812c5d91470c29fb7b82f07e877fd4496eda970c20
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c1d2000930eb4b557d099f2ddd740c2f3eb3cee1071cac00368cfc8d837f4d
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8466a86c49e8236c78f7387ca8759ca4d6149928ec2b5b68cc53a2cf5964c1ac
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67f9ce15873371920fc37cb57c0c541b4ebfc5a589c2f344f8900182569870e4
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ef82faa62873b20f6a31b3066eb743684c6539bc69d39cd8f4e254afcee227
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e658ab4cd3a5fb2a89ca0cc414ca8859577c5643624125d62142c5fb1e5ea419
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34da03236277c4d33da2eb226487d7f0402647525de8cf3eb9fe938fdd80b19f
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfbc572ac2614b0f71af3cece56fd993f587bc61dd630482ade88c179db97a9f
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54e8d22446098c827f41f6cfce50b601ae02ad77c7ac0f1763863e304db9cc8
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1938abf09346d77d8fc8232f5b275addd5d5c5a3b0741ddb694c65cb509083
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30be550513dc83fe62650233c1755dbc7f1fabcfc514008f08df44747e1110a
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56a74cc6cf0057fda63ded0033822c845711207b58de56e6571f53dc7626c1d5
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99a982fa3883066192d867416d3993db8ed0549d069664b3d9d6339665e6504
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4135022cae2c7b096309646b76bc23b5b2be5ef498db8b11dad0f656f57cc589
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a9de517df626a32a0a6be3fe0584b5a93357f29e5bdaf8d2470aafa149331d
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a07cf5614bb028d36f6496e22a69c00112170bb48a7fa5769c999304030ab0e1
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d1bc13993452afb59ed904e00ddb76573791405ab1c8362e525688d25e4695
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a86ac1a1fea9b8099833730b3bb980c521629a67af4f8ddfbe286dd38bfdb1
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d00093a950f59861e781d901e6ef8e3eeeba09754c690ed0c16419b138db547
3
  size 39324734
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acc28e5d4b9ee438f714f9ba46d05afb2425a8ac1bb7fa1fc700214d52cbd723
3
  size 6508524919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ce1839dc0971f32364a192d5e8cfa35ebf685c0b4a8c150cf3fcdfe9010933
3
  size 6508524919
last-checkpoint/{global_step1875 β†’ global_step3750}/zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:647e8061c41695b8f1c8597798fc9bb8112c1b108e26a0bc6483fb4754c5af8e
3
  size 39324734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dceaa97d8787b904a9222ee2f66ba95d2f3d704f59c1251822139b844914d590
3
  size 39324734
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1875
 
1
+ global_step3750
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:285a047ead58adb79ee83ad0db3adbbda92d36a204c3cc5c668c1f063f1637c4
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe79feaa7e801abdfdff085f376707d4056b6625d388ab81d8946abe948c28c
3
  size 21687
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60722f7ef4638cf0aae77ec645716aada21f8d8687b56f379dbc90416a04543e
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c673eddb8087820d16ca257d7839924b5ea273ad632c53d36011d296ec26e4e
3
  size 21687
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29dcfebb85aa2f8ea2e4a051ecf97ef2dd4581dd56230e161c1cc07fbda1e938
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a62c8fa55abfdd5a7ab39d300c11fded08d1ab85729d1a508b2215e932eabac
3
  size 21687
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c0d3ac388816bd596a86ef1cba1c1f18e9bef020642f802d9bef6d67ab7a3b6
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a045a626834c2c50a62bd785234c8ce9da6f544f1db8c3db02c8866676839
3
  size 21687
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d55982f4a550feb1d76343d94cf85e3a96a0bafff49d034b23e3cf828350f12d
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518c42482f7f9346fa44766c52d3ccc958203799e7e28df95dc60534729e6c39
3
  size 21687
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3472a83b969c6766805fbb6eac6e06e1fb0d1b413c8af2805c1a3f29eddf796
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:566154a9ab3f68cc0088de3aeb028246caf75e199c4db3e3544dd0ecca522df2
3
  size 21687
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99091da41f7eae5cc1767b133376cec72866d89d8f1312ec50333f9096a4b7a
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b38d8e70ad291aaf6e4877862e21efc5b4922a9f3edbfc1aade9751a312510
3
  size 21687
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c9584f36f14255afedeab1ad3c2cbf4ae4ac6d7e38ed1a1a53e90e42ab75cba
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e2a53d294da1431ca48b0893e7d3d6218fb45ed1f1839078d777d3fa0d6bca
3
  size 21687
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7203227045716482,
5
- "global_step": 1875,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2256,11 +2256,2261 @@
2256
  "learning_rate": 0.00012825250192455737,
2257
  "loss": 0.0,
2258
  "step": 1875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2259
  }
2260
  ],
2261
  "max_steps": 5206,
2262
  "num_train_epochs": 2,
2263
- "total_flos": 28526020919296.0,
2264
  "trial_name": null,
2265
  "trial_params": null
2266
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.440645409143296,
5
+ "global_step": 3750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2256
  "learning_rate": 0.00012825250192455737,
2257
  "loss": 0.0,
2258
  "step": 1875
2259
+ },
2260
+ {
2261
+ "epoch": 0.72,
2262
+ "learning_rate": 0.00012806004618937645,
2263
+ "loss": 0.0,
2264
+ "step": 1880
2265
+ },
2266
+ {
2267
+ "epoch": 0.72,
2268
+ "learning_rate": 0.00012786759045419553,
2269
+ "loss": 0.0,
2270
+ "step": 1885
2271
+ },
2272
+ {
2273
+ "epoch": 0.73,
2274
+ "learning_rate": 0.00012767513471901462,
2275
+ "loss": 0.0,
2276
+ "step": 1890
2277
+ },
2278
+ {
2279
+ "epoch": 0.73,
2280
+ "learning_rate": 0.00012748267898383373,
2281
+ "loss": 0.0,
2282
+ "step": 1895
2283
+ },
2284
+ {
2285
+ "epoch": 0.73,
2286
+ "learning_rate": 0.0001272902232486528,
2287
+ "loss": 0.0,
2288
+ "step": 1900
2289
+ },
2290
+ {
2291
+ "epoch": 0.73,
2292
+ "learning_rate": 0.00012709776751347192,
2293
+ "loss": 0.0,
2294
+ "step": 1905
2295
+ },
2296
+ {
2297
+ "epoch": 0.73,
2298
+ "learning_rate": 0.000126905311778291,
2299
+ "loss": 0.0,
2300
+ "step": 1910
2301
+ },
2302
+ {
2303
+ "epoch": 0.74,
2304
+ "learning_rate": 0.00012671285604311008,
2305
+ "loss": 0.0,
2306
+ "step": 1915
2307
+ },
2308
+ {
2309
+ "epoch": 0.74,
2310
+ "learning_rate": 0.00012652040030792917,
2311
+ "loss": 0.0,
2312
+ "step": 1920
2313
+ },
2314
+ {
2315
+ "epoch": 0.74,
2316
+ "learning_rate": 0.00012632794457274828,
2317
+ "loss": 0.0,
2318
+ "step": 1925
2319
+ },
2320
+ {
2321
+ "epoch": 0.74,
2322
+ "learning_rate": 0.00012613548883756736,
2323
+ "loss": 0.0,
2324
+ "step": 1930
2325
+ },
2326
+ {
2327
+ "epoch": 0.74,
2328
+ "learning_rate": 0.00012594303310238647,
2329
+ "loss": 0.0,
2330
+ "step": 1935
2331
+ },
2332
+ {
2333
+ "epoch": 0.75,
2334
+ "learning_rate": 0.00012575057736720555,
2335
+ "loss": 0.0,
2336
+ "step": 1940
2337
+ },
2338
+ {
2339
+ "epoch": 0.75,
2340
+ "learning_rate": 0.00012555812163202466,
2341
+ "loss": 0.0,
2342
+ "step": 1945
2343
+ },
2344
+ {
2345
+ "epoch": 0.75,
2346
+ "learning_rate": 0.00012536566589684372,
2347
+ "loss": 0.0,
2348
+ "step": 1950
2349
+ },
2350
+ {
2351
+ "epoch": 0.75,
2352
+ "learning_rate": 0.0001251732101616628,
2353
+ "loss": 0.0,
2354
+ "step": 1955
2355
+ },
2356
+ {
2357
+ "epoch": 0.75,
2358
+ "learning_rate": 0.0001249807544264819,
2359
+ "loss": 0.0,
2360
+ "step": 1960
2361
+ },
2362
+ {
2363
+ "epoch": 0.75,
2364
+ "learning_rate": 0.000124788298691301,
2365
+ "loss": 0.0,
2366
+ "step": 1965
2367
+ },
2368
+ {
2369
+ "epoch": 0.76,
2370
+ "learning_rate": 0.0001245958429561201,
2371
+ "loss": 0.0,
2372
+ "step": 1970
2373
+ },
2374
+ {
2375
+ "epoch": 0.76,
2376
+ "learning_rate": 0.00012440338722093919,
2377
+ "loss": 0.0,
2378
+ "step": 1975
2379
+ },
2380
+ {
2381
+ "epoch": 0.76,
2382
+ "learning_rate": 0.0001242109314857583,
2383
+ "loss": 0.0,
2384
+ "step": 1980
2385
+ },
2386
+ {
2387
+ "epoch": 0.76,
2388
+ "learning_rate": 0.00012401847575057738,
2389
+ "loss": 0.0,
2390
+ "step": 1985
2391
+ },
2392
+ {
2393
+ "epoch": 0.76,
2394
+ "learning_rate": 0.00012382602001539646,
2395
+ "loss": 0.0,
2396
+ "step": 1990
2397
+ },
2398
+ {
2399
+ "epoch": 0.77,
2400
+ "learning_rate": 0.00012363356428021554,
2401
+ "loss": 0.0,
2402
+ "step": 1995
2403
+ },
2404
+ {
2405
+ "epoch": 0.77,
2406
+ "learning_rate": 0.00012344110854503465,
2407
+ "loss": 0.0,
2408
+ "step": 2000
2409
+ },
2410
+ {
2411
+ "epoch": 0.77,
2412
+ "learning_rate": 0.00012324865280985374,
2413
+ "loss": 0.0,
2414
+ "step": 2005
2415
+ },
2416
+ {
2417
+ "epoch": 0.77,
2418
+ "learning_rate": 0.00012305619707467285,
2419
+ "loss": 0.0,
2420
+ "step": 2010
2421
+ },
2422
+ {
2423
+ "epoch": 0.77,
2424
+ "learning_rate": 0.00012286374133949193,
2425
+ "loss": 0.0,
2426
+ "step": 2015
2427
+ },
2428
+ {
2429
+ "epoch": 0.78,
2430
+ "learning_rate": 0.000122671285604311,
2431
+ "loss": 0.0,
2432
+ "step": 2020
2433
+ },
2434
+ {
2435
+ "epoch": 0.78,
2436
+ "learning_rate": 0.0001224788298691301,
2437
+ "loss": 0.0,
2438
+ "step": 2025
2439
+ },
2440
+ {
2441
+ "epoch": 0.78,
2442
+ "learning_rate": 0.00012228637413394918,
2443
+ "loss": 0.0,
2444
+ "step": 2030
2445
+ },
2446
+ {
2447
+ "epoch": 0.78,
2448
+ "learning_rate": 0.0001220939183987683,
2449
+ "loss": 0.0,
2450
+ "step": 2035
2451
+ },
2452
+ {
2453
+ "epoch": 0.78,
2454
+ "learning_rate": 0.00012190146266358737,
2455
+ "loss": 0.0,
2456
+ "step": 2040
2457
+ },
2458
+ {
2459
+ "epoch": 0.79,
2460
+ "learning_rate": 0.00012170900692840648,
2461
+ "loss": 0.0,
2462
+ "step": 2045
2463
+ },
2464
+ {
2465
+ "epoch": 0.79,
2466
+ "learning_rate": 0.00012151655119322556,
2467
+ "loss": 0.0,
2468
+ "step": 2050
2469
+ },
2470
+ {
2471
+ "epoch": 0.79,
2472
+ "learning_rate": 0.00012132409545804466,
2473
+ "loss": 0.0,
2474
+ "step": 2055
2475
+ },
2476
+ {
2477
+ "epoch": 0.79,
2478
+ "learning_rate": 0.00012113163972286374,
2479
+ "loss": 0.0,
2480
+ "step": 2060
2481
+ },
2482
+ {
2483
+ "epoch": 0.79,
2484
+ "learning_rate": 0.00012093918398768285,
2485
+ "loss": 0.0,
2486
+ "step": 2065
2487
+ },
2488
+ {
2489
+ "epoch": 0.8,
2490
+ "learning_rate": 0.00012074672825250193,
2491
+ "loss": 0.0,
2492
+ "step": 2070
2493
+ },
2494
+ {
2495
+ "epoch": 0.8,
2496
+ "learning_rate": 0.00012055427251732103,
2497
+ "loss": 0.0,
2498
+ "step": 2075
2499
+ },
2500
+ {
2501
+ "epoch": 0.8,
2502
+ "learning_rate": 0.00012036181678214011,
2503
+ "loss": 0.0,
2504
+ "step": 2080
2505
+ },
2506
+ {
2507
+ "epoch": 0.8,
2508
+ "learning_rate": 0.0001201693610469592,
2509
+ "loss": 0.0,
2510
+ "step": 2085
2511
+ },
2512
+ {
2513
+ "epoch": 0.8,
2514
+ "learning_rate": 0.00011997690531177829,
2515
+ "loss": 0.0,
2516
+ "step": 2090
2517
+ },
2518
+ {
2519
+ "epoch": 0.8,
2520
+ "learning_rate": 0.00011978444957659738,
2521
+ "loss": 0.0,
2522
+ "step": 2095
2523
+ },
2524
+ {
2525
+ "epoch": 0.81,
2526
+ "learning_rate": 0.00011959199384141649,
2527
+ "loss": 0.0,
2528
+ "step": 2100
2529
+ },
2530
+ {
2531
+ "epoch": 0.81,
2532
+ "learning_rate": 0.00011939953810623557,
2533
+ "loss": 0.0,
2534
+ "step": 2105
2535
+ },
2536
+ {
2537
+ "epoch": 0.81,
2538
+ "learning_rate": 0.00011920708237105466,
2539
+ "loss": 0.0,
2540
+ "step": 2110
2541
+ },
2542
+ {
2543
+ "epoch": 0.81,
2544
+ "learning_rate": 0.00011901462663587375,
2545
+ "loss": 0.0,
2546
+ "step": 2115
2547
+ },
2548
+ {
2549
+ "epoch": 0.81,
2550
+ "learning_rate": 0.00011882217090069286,
2551
+ "loss": 0.0,
2552
+ "step": 2120
2553
+ },
2554
+ {
2555
+ "epoch": 0.82,
2556
+ "learning_rate": 0.00011862971516551194,
2557
+ "loss": 0.0,
2558
+ "step": 2125
2559
+ },
2560
+ {
2561
+ "epoch": 0.82,
2562
+ "learning_rate": 0.00011843725943033104,
2563
+ "loss": 0.0,
2564
+ "step": 2130
2565
+ },
2566
+ {
2567
+ "epoch": 0.82,
2568
+ "learning_rate": 0.00011824480369515012,
2569
+ "loss": 0.0,
2570
+ "step": 2135
2571
+ },
2572
+ {
2573
+ "epoch": 0.82,
2574
+ "learning_rate": 0.00011805234795996923,
2575
+ "loss": 0.0,
2576
+ "step": 2140
2577
+ },
2578
+ {
2579
+ "epoch": 0.82,
2580
+ "learning_rate": 0.0001178598922247883,
2581
+ "loss": 0.0,
2582
+ "step": 2145
2583
+ },
2584
+ {
2585
+ "epoch": 0.83,
2586
+ "learning_rate": 0.00011766743648960738,
2587
+ "loss": 0.0,
2588
+ "step": 2150
2589
+ },
2590
+ {
2591
+ "epoch": 0.83,
2592
+ "learning_rate": 0.00011747498075442649,
2593
+ "loss": 0.0,
2594
+ "step": 2155
2595
+ },
2596
+ {
2597
+ "epoch": 0.83,
2598
+ "learning_rate": 0.00011728252501924557,
2599
+ "loss": 0.0,
2600
+ "step": 2160
2601
+ },
2602
+ {
2603
+ "epoch": 0.83,
2604
+ "learning_rate": 0.00011709006928406467,
2605
+ "loss": 0.0,
2606
+ "step": 2165
2607
+ },
2608
+ {
2609
+ "epoch": 0.83,
2610
+ "learning_rate": 0.00011689761354888375,
2611
+ "loss": 0.0,
2612
+ "step": 2170
2613
+ },
2614
+ {
2615
+ "epoch": 0.84,
2616
+ "learning_rate": 0.00011670515781370286,
2617
+ "loss": 0.0,
2618
+ "step": 2175
2619
+ },
2620
+ {
2621
+ "epoch": 0.84,
2622
+ "learning_rate": 0.00011651270207852195,
2623
+ "loss": 0.0,
2624
+ "step": 2180
2625
+ },
2626
+ {
2627
+ "epoch": 0.84,
2628
+ "learning_rate": 0.00011632024634334104,
2629
+ "loss": 0.0,
2630
+ "step": 2185
2631
+ },
2632
+ {
2633
+ "epoch": 0.84,
2634
+ "learning_rate": 0.00011612779060816012,
2635
+ "loss": 0.0,
2636
+ "step": 2190
2637
+ },
2638
+ {
2639
+ "epoch": 0.84,
2640
+ "learning_rate": 0.00011593533487297923,
2641
+ "loss": 0.0,
2642
+ "step": 2195
2643
+ },
2644
+ {
2645
+ "epoch": 0.85,
2646
+ "learning_rate": 0.00011574287913779832,
2647
+ "loss": 0.0,
2648
+ "step": 2200
2649
+ },
2650
+ {
2651
+ "epoch": 0.85,
2652
+ "learning_rate": 0.00011555042340261741,
2653
+ "loss": 0.0,
2654
+ "step": 2205
2655
+ },
2656
+ {
2657
+ "epoch": 0.85,
2658
+ "learning_rate": 0.0001153579676674365,
2659
+ "loss": 0.0,
2660
+ "step": 2210
2661
+ },
2662
+ {
2663
+ "epoch": 0.85,
2664
+ "learning_rate": 0.00011516551193225558,
2665
+ "loss": 0.0,
2666
+ "step": 2215
2667
+ },
2668
+ {
2669
+ "epoch": 0.85,
2670
+ "learning_rate": 0.00011497305619707468,
2671
+ "loss": 0.0,
2672
+ "step": 2220
2673
+ },
2674
+ {
2675
+ "epoch": 0.85,
2676
+ "learning_rate": 0.00011478060046189376,
2677
+ "loss": 0.0,
2678
+ "step": 2225
2679
+ },
2680
+ {
2681
+ "epoch": 0.86,
2682
+ "learning_rate": 0.00011458814472671287,
2683
+ "loss": 0.0,
2684
+ "step": 2230
2685
+ },
2686
+ {
2687
+ "epoch": 0.86,
2688
+ "learning_rate": 0.00011439568899153195,
2689
+ "loss": 0.0,
2690
+ "step": 2235
2691
+ },
2692
+ {
2693
+ "epoch": 0.86,
2694
+ "learning_rate": 0.00011420323325635105,
2695
+ "loss": 0.0,
2696
+ "step": 2240
2697
+ },
2698
+ {
2699
+ "epoch": 0.86,
2700
+ "learning_rate": 0.00011401077752117013,
2701
+ "loss": 0.0,
2702
+ "step": 2245
2703
+ },
2704
+ {
2705
+ "epoch": 0.86,
2706
+ "learning_rate": 0.00011381832178598924,
2707
+ "loss": 0.0,
2708
+ "step": 2250
2709
+ },
2710
+ {
2711
+ "epoch": 0.87,
2712
+ "learning_rate": 0.00011362586605080832,
2713
+ "loss": 0.0,
2714
+ "step": 2255
2715
+ },
2716
+ {
2717
+ "epoch": 0.87,
2718
+ "learning_rate": 0.00011343341031562742,
2719
+ "loss": 0.0,
2720
+ "step": 2260
2721
+ },
2722
+ {
2723
+ "epoch": 0.87,
2724
+ "learning_rate": 0.0001132409545804465,
2725
+ "loss": 0.0,
2726
+ "step": 2265
2727
+ },
2728
+ {
2729
+ "epoch": 0.87,
2730
+ "learning_rate": 0.00011304849884526561,
2731
+ "loss": 0.0,
2732
+ "step": 2270
2733
+ },
2734
+ {
2735
+ "epoch": 0.87,
2736
+ "learning_rate": 0.00011285604311008468,
2737
+ "loss": 0.0,
2738
+ "step": 2275
2739
+ },
2740
+ {
2741
+ "epoch": 0.88,
2742
+ "learning_rate": 0.00011266358737490376,
2743
+ "loss": 0.0,
2744
+ "step": 2280
2745
+ },
2746
+ {
2747
+ "epoch": 0.88,
2748
+ "learning_rate": 0.00011247113163972287,
2749
+ "loss": 0.0,
2750
+ "step": 2285
2751
+ },
2752
+ {
2753
+ "epoch": 0.88,
2754
+ "learning_rate": 0.00011227867590454196,
2755
+ "loss": 0.0,
2756
+ "step": 2290
2757
+ },
2758
+ {
2759
+ "epoch": 0.88,
2760
+ "learning_rate": 0.00011208622016936105,
2761
+ "loss": 0.0,
2762
+ "step": 2295
2763
+ },
2764
+ {
2765
+ "epoch": 0.88,
2766
+ "learning_rate": 0.00011189376443418013,
2767
+ "loss": 0.0,
2768
+ "step": 2300
2769
+ },
2770
+ {
2771
+ "epoch": 0.89,
2772
+ "learning_rate": 0.00011170130869899924,
2773
+ "loss": 0.0,
2774
+ "step": 2305
2775
+ },
2776
+ {
2777
+ "epoch": 0.89,
2778
+ "learning_rate": 0.00011150885296381833,
2779
+ "loss": 0.0,
2780
+ "step": 2310
2781
+ },
2782
+ {
2783
+ "epoch": 0.89,
2784
+ "learning_rate": 0.00011131639722863742,
2785
+ "loss": 0.0,
2786
+ "step": 2315
2787
+ },
2788
+ {
2789
+ "epoch": 0.89,
2790
+ "learning_rate": 0.0001111239414934565,
2791
+ "loss": 0.0,
2792
+ "step": 2320
2793
+ },
2794
+ {
2795
+ "epoch": 0.89,
2796
+ "learning_rate": 0.00011093148575827562,
2797
+ "loss": 0.0,
2798
+ "step": 2325
2799
+ },
2800
+ {
2801
+ "epoch": 0.9,
2802
+ "learning_rate": 0.00011073903002309469,
2803
+ "loss": 0.0,
2804
+ "step": 2330
2805
+ },
2806
+ {
2807
+ "epoch": 0.9,
2808
+ "learning_rate": 0.0001105465742879138,
2809
+ "loss": 0.0,
2810
+ "step": 2335
2811
+ },
2812
+ {
2813
+ "epoch": 0.9,
2814
+ "learning_rate": 0.00011035411855273288,
2815
+ "loss": 0.0,
2816
+ "step": 2340
2817
+ },
2818
+ {
2819
+ "epoch": 0.9,
2820
+ "learning_rate": 0.00011016166281755196,
2821
+ "loss": 0.0,
2822
+ "step": 2345
2823
+ },
2824
+ {
2825
+ "epoch": 0.9,
2826
+ "learning_rate": 0.00010996920708237106,
2827
+ "loss": 0.0,
2828
+ "step": 2350
2829
+ },
2830
+ {
2831
+ "epoch": 0.9,
2832
+ "learning_rate": 0.00010977675134719014,
2833
+ "loss": 0.0,
2834
+ "step": 2355
2835
+ },
2836
+ {
2837
+ "epoch": 0.91,
2838
+ "learning_rate": 0.00010958429561200925,
2839
+ "loss": 0.0,
2840
+ "step": 2360
2841
+ },
2842
+ {
2843
+ "epoch": 0.91,
2844
+ "learning_rate": 0.00010939183987682833,
2845
+ "loss": 0.0,
2846
+ "step": 2365
2847
+ },
2848
+ {
2849
+ "epoch": 0.91,
2850
+ "learning_rate": 0.00010919938414164743,
2851
+ "loss": 0.0,
2852
+ "step": 2370
2853
+ },
2854
+ {
2855
+ "epoch": 0.91,
2856
+ "learning_rate": 0.00010900692840646651,
2857
+ "loss": 0.0,
2858
+ "step": 2375
2859
+ },
2860
+ {
2861
+ "epoch": 0.91,
2862
+ "learning_rate": 0.00010881447267128562,
2863
+ "loss": 0.0,
2864
+ "step": 2380
2865
+ },
2866
+ {
2867
+ "epoch": 0.92,
2868
+ "learning_rate": 0.0001086220169361047,
2869
+ "loss": 0.0,
2870
+ "step": 2385
2871
+ },
2872
+ {
2873
+ "epoch": 0.92,
2874
+ "learning_rate": 0.0001084295612009238,
2875
+ "loss": 0.0,
2876
+ "step": 2390
2877
+ },
2878
+ {
2879
+ "epoch": 0.92,
2880
+ "learning_rate": 0.00010823710546574288,
2881
+ "loss": 0.0,
2882
+ "step": 2395
2883
+ },
2884
+ {
2885
+ "epoch": 0.92,
2886
+ "learning_rate": 0.00010804464973056199,
2887
+ "loss": 0.0,
2888
+ "step": 2400
2889
+ },
2890
+ {
2891
+ "epoch": 0.92,
2892
+ "learning_rate": 0.00010785219399538106,
2893
+ "loss": 0.0,
2894
+ "step": 2405
2895
+ },
2896
+ {
2897
+ "epoch": 0.93,
2898
+ "learning_rate": 0.00010765973826020015,
2899
+ "loss": 0.0,
2900
+ "step": 2410
2901
+ },
2902
+ {
2903
+ "epoch": 0.93,
2904
+ "learning_rate": 0.00010746728252501925,
2905
+ "loss": 0.0,
2906
+ "step": 2415
2907
+ },
2908
+ {
2909
+ "epoch": 0.93,
2910
+ "learning_rate": 0.00010727482678983834,
2911
+ "loss": 0.0,
2912
+ "step": 2420
2913
+ },
2914
+ {
2915
+ "epoch": 0.93,
2916
+ "learning_rate": 0.00010708237105465743,
2917
+ "loss": 0.0,
2918
+ "step": 2425
2919
+ },
2920
+ {
2921
+ "epoch": 0.93,
2922
+ "learning_rate": 0.00010688991531947652,
2923
+ "loss": 0.0,
2924
+ "step": 2430
2925
+ },
2926
+ {
2927
+ "epoch": 0.94,
2928
+ "learning_rate": 0.00010669745958429563,
2929
+ "loss": 0.0,
2930
+ "step": 2435
2931
+ },
2932
+ {
2933
+ "epoch": 0.94,
2934
+ "learning_rate": 0.00010650500384911471,
2935
+ "loss": 0.0,
2936
+ "step": 2440
2937
+ },
2938
+ {
2939
+ "epoch": 0.94,
2940
+ "learning_rate": 0.0001063125481139338,
2941
+ "loss": 0.0,
2942
+ "step": 2445
2943
+ },
2944
+ {
2945
+ "epoch": 0.94,
2946
+ "learning_rate": 0.00010612009237875289,
2947
+ "loss": 0.0,
2948
+ "step": 2450
2949
+ },
2950
+ {
2951
+ "epoch": 0.94,
2952
+ "learning_rate": 0.000105927636643572,
2953
+ "loss": 0.0,
2954
+ "step": 2455
2955
+ },
2956
+ {
2957
+ "epoch": 0.95,
2958
+ "learning_rate": 0.00010573518090839107,
2959
+ "loss": 0.0,
2960
+ "step": 2460
2961
+ },
2962
+ {
2963
+ "epoch": 0.95,
2964
+ "learning_rate": 0.00010554272517321018,
2965
+ "loss": 0.0,
2966
+ "step": 2465
2967
+ },
2968
+ {
2969
+ "epoch": 0.95,
2970
+ "learning_rate": 0.00010535026943802926,
2971
+ "loss": 0.0,
2972
+ "step": 2470
2973
+ },
2974
+ {
2975
+ "epoch": 0.95,
2976
+ "learning_rate": 0.00010515781370284834,
2977
+ "loss": 0.0,
2978
+ "step": 2475
2979
+ },
2980
+ {
2981
+ "epoch": 0.95,
2982
+ "learning_rate": 0.00010496535796766744,
2983
+ "loss": 0.0,
2984
+ "step": 2480
2985
+ },
2986
+ {
2987
+ "epoch": 0.95,
2988
+ "learning_rate": 0.00010477290223248652,
2989
+ "loss": 0.0,
2990
+ "step": 2485
2991
+ },
2992
+ {
2993
+ "epoch": 0.96,
2994
+ "learning_rate": 0.00010458044649730563,
2995
+ "loss": 0.0,
2996
+ "step": 2490
2997
+ },
2998
+ {
2999
+ "epoch": 0.96,
3000
+ "learning_rate": 0.00010438799076212471,
3001
+ "loss": 0.0,
3002
+ "step": 2495
3003
+ },
3004
+ {
3005
+ "epoch": 0.96,
3006
+ "learning_rate": 0.00010419553502694381,
3007
+ "loss": 0.0,
3008
+ "step": 2500
3009
+ },
3010
+ {
3011
+ "epoch": 0.96,
3012
+ "learning_rate": 0.0001040030792917629,
3013
+ "loss": 0.0,
3014
+ "step": 2505
3015
+ },
3016
+ {
3017
+ "epoch": 0.96,
3018
+ "learning_rate": 0.000103810623556582,
3019
+ "loss": 0.0,
3020
+ "step": 2510
3021
+ },
3022
+ {
3023
+ "epoch": 0.97,
3024
+ "learning_rate": 0.00010361816782140109,
3025
+ "loss": 0.0,
3026
+ "step": 2515
3027
+ },
3028
+ {
3029
+ "epoch": 0.97,
3030
+ "learning_rate": 0.00010342571208622018,
3031
+ "loss": 0.0,
3032
+ "step": 2520
3033
+ },
3034
+ {
3035
+ "epoch": 0.97,
3036
+ "learning_rate": 0.00010323325635103927,
3037
+ "loss": 0.0,
3038
+ "step": 2525
3039
+ },
3040
+ {
3041
+ "epoch": 0.97,
3042
+ "learning_rate": 0.00010304080061585836,
3043
+ "loss": 0.0,
3044
+ "step": 2530
3045
+ },
3046
+ {
3047
+ "epoch": 0.97,
3048
+ "learning_rate": 0.00010284834488067744,
3049
+ "loss": 0.0,
3050
+ "step": 2535
3051
+ },
3052
+ {
3053
+ "epoch": 0.98,
3054
+ "learning_rate": 0.00010265588914549653,
3055
+ "loss": 0.0,
3056
+ "step": 2540
3057
+ },
3058
+ {
3059
+ "epoch": 0.98,
3060
+ "learning_rate": 0.00010246343341031564,
3061
+ "loss": 0.0,
3062
+ "step": 2545
3063
+ },
3064
+ {
3065
+ "epoch": 0.98,
3066
+ "learning_rate": 0.00010227097767513472,
3067
+ "loss": 0.0,
3068
+ "step": 2550
3069
+ },
3070
+ {
3071
+ "epoch": 0.98,
3072
+ "learning_rate": 0.00010207852193995382,
3073
+ "loss": 0.0,
3074
+ "step": 2555
3075
+ },
3076
+ {
3077
+ "epoch": 0.98,
3078
+ "learning_rate": 0.0001018860662047729,
3079
+ "loss": 0.0,
3080
+ "step": 2560
3081
+ },
3082
+ {
3083
+ "epoch": 0.99,
3084
+ "learning_rate": 0.00010169361046959201,
3085
+ "loss": 0.0,
3086
+ "step": 2565
3087
+ },
3088
+ {
3089
+ "epoch": 0.99,
3090
+ "learning_rate": 0.00010150115473441109,
3091
+ "loss": 0.0,
3092
+ "step": 2570
3093
+ },
3094
+ {
3095
+ "epoch": 0.99,
3096
+ "learning_rate": 0.00010130869899923019,
3097
+ "loss": 0.0,
3098
+ "step": 2575
3099
+ },
3100
+ {
3101
+ "epoch": 0.99,
3102
+ "learning_rate": 0.00010111624326404927,
3103
+ "loss": 0.0,
3104
+ "step": 2580
3105
+ },
3106
+ {
3107
+ "epoch": 0.99,
3108
+ "learning_rate": 0.00010092378752886838,
3109
+ "loss": 0.0,
3110
+ "step": 2585
3111
+ },
3112
+ {
3113
+ "epoch": 1.0,
3114
+ "learning_rate": 0.00010073133179368745,
3115
+ "loss": 0.0,
3116
+ "step": 2590
3117
+ },
3118
+ {
3119
+ "epoch": 1.0,
3120
+ "learning_rate": 0.00010053887605850656,
3121
+ "loss": 0.0,
3122
+ "step": 2595
3123
+ },
3124
+ {
3125
+ "epoch": 1.0,
3126
+ "learning_rate": 0.00010034642032332564,
3127
+ "loss": 0.0,
3128
+ "step": 2600
3129
+ },
3130
+ {
3131
+ "epoch": 1.0,
3132
+ "learning_rate": 0.00010015396458814473,
3133
+ "loss": 0.0,
3134
+ "step": 2605
3135
+ },
3136
+ {
3137
+ "epoch": 1.0,
3138
+ "learning_rate": 9.996150885296382e-05,
3139
+ "loss": 0.0,
3140
+ "step": 2610
3141
+ },
3142
+ {
3143
+ "epoch": 1.0,
3144
+ "learning_rate": 9.976905311778292e-05,
3145
+ "loss": 0.0,
3146
+ "step": 2615
3147
+ },
3148
+ {
3149
+ "epoch": 1.01,
3150
+ "learning_rate": 9.957659738260201e-05,
3151
+ "loss": 0.0,
3152
+ "step": 2620
3153
+ },
3154
+ {
3155
+ "epoch": 1.01,
3156
+ "learning_rate": 9.93841416474211e-05,
3157
+ "loss": 0.0,
3158
+ "step": 2625
3159
+ },
3160
+ {
3161
+ "epoch": 1.01,
3162
+ "learning_rate": 9.919168591224018e-05,
3163
+ "loss": 0.0,
3164
+ "step": 2630
3165
+ },
3166
+ {
3167
+ "epoch": 1.01,
3168
+ "learning_rate": 9.899923017705928e-05,
3169
+ "loss": 0.0,
3170
+ "step": 2635
3171
+ },
3172
+ {
3173
+ "epoch": 1.01,
3174
+ "learning_rate": 9.880677444187837e-05,
3175
+ "loss": 0.0,
3176
+ "step": 2640
3177
+ },
3178
+ {
3179
+ "epoch": 1.02,
3180
+ "learning_rate": 9.861431870669747e-05,
3181
+ "loss": 0.0,
3182
+ "step": 2645
3183
+ },
3184
+ {
3185
+ "epoch": 1.02,
3186
+ "learning_rate": 9.842186297151655e-05,
3187
+ "loss": 0.0,
3188
+ "step": 2650
3189
+ },
3190
+ {
3191
+ "epoch": 1.02,
3192
+ "learning_rate": 9.822940723633565e-05,
3193
+ "loss": 0.0,
3194
+ "step": 2655
3195
+ },
3196
+ {
3197
+ "epoch": 1.02,
3198
+ "learning_rate": 9.803695150115474e-05,
3199
+ "loss": 0.0,
3200
+ "step": 2660
3201
+ },
3202
+ {
3203
+ "epoch": 1.02,
3204
+ "learning_rate": 9.784449576597383e-05,
3205
+ "loss": 0.0,
3206
+ "step": 2665
3207
+ },
3208
+ {
3209
+ "epoch": 1.03,
3210
+ "learning_rate": 9.765204003079292e-05,
3211
+ "loss": 0.0,
3212
+ "step": 2670
3213
+ },
3214
+ {
3215
+ "epoch": 1.03,
3216
+ "learning_rate": 9.745958429561202e-05,
3217
+ "loss": 0.0,
3218
+ "step": 2675
3219
+ },
3220
+ {
3221
+ "epoch": 1.03,
3222
+ "learning_rate": 9.726712856043112e-05,
3223
+ "loss": 0.0,
3224
+ "step": 2680
3225
+ },
3226
+ {
3227
+ "epoch": 1.03,
3228
+ "learning_rate": 9.70746728252502e-05,
3229
+ "loss": 0.0,
3230
+ "step": 2685
3231
+ },
3232
+ {
3233
+ "epoch": 1.03,
3234
+ "learning_rate": 9.68822170900693e-05,
3235
+ "loss": 0.0,
3236
+ "step": 2690
3237
+ },
3238
+ {
3239
+ "epoch": 1.04,
3240
+ "learning_rate": 9.668976135488838e-05,
3241
+ "loss": 0.0,
3242
+ "step": 2695
3243
+ },
3244
+ {
3245
+ "epoch": 1.04,
3246
+ "learning_rate": 9.649730561970747e-05,
3247
+ "loss": 0.0,
3248
+ "step": 2700
3249
+ },
3250
+ {
3251
+ "epoch": 1.04,
3252
+ "learning_rate": 9.630484988452656e-05,
3253
+ "loss": 0.0,
3254
+ "step": 2705
3255
+ },
3256
+ {
3257
+ "epoch": 1.04,
3258
+ "learning_rate": 9.611239414934565e-05,
3259
+ "loss": 0.0,
3260
+ "step": 2710
3261
+ },
3262
+ {
3263
+ "epoch": 1.04,
3264
+ "learning_rate": 9.591993841416475e-05,
3265
+ "loss": 0.0,
3266
+ "step": 2715
3267
+ },
3268
+ {
3269
+ "epoch": 1.04,
3270
+ "learning_rate": 9.572748267898383e-05,
3271
+ "loss": 0.0,
3272
+ "step": 2720
3273
+ },
3274
+ {
3275
+ "epoch": 1.05,
3276
+ "learning_rate": 9.553502694380293e-05,
3277
+ "loss": 0.0,
3278
+ "step": 2725
3279
+ },
3280
+ {
3281
+ "epoch": 1.05,
3282
+ "learning_rate": 9.534257120862202e-05,
3283
+ "loss": 0.0,
3284
+ "step": 2730
3285
+ },
3286
+ {
3287
+ "epoch": 1.05,
3288
+ "learning_rate": 9.515011547344112e-05,
3289
+ "loss": 0.0,
3290
+ "step": 2735
3291
+ },
3292
+ {
3293
+ "epoch": 1.05,
3294
+ "learning_rate": 9.49576597382602e-05,
3295
+ "loss": 0.0,
3296
+ "step": 2740
3297
+ },
3298
+ {
3299
+ "epoch": 1.05,
3300
+ "learning_rate": 9.47652040030793e-05,
3301
+ "loss": 0.0,
3302
+ "step": 2745
3303
+ },
3304
+ {
3305
+ "epoch": 1.06,
3306
+ "learning_rate": 9.45727482678984e-05,
3307
+ "loss": 0.0,
3308
+ "step": 2750
3309
+ },
3310
+ {
3311
+ "epoch": 1.06,
3312
+ "learning_rate": 9.438029253271748e-05,
3313
+ "loss": 0.0,
3314
+ "step": 2755
3315
+ },
3316
+ {
3317
+ "epoch": 1.06,
3318
+ "learning_rate": 9.418783679753656e-05,
3319
+ "loss": 0.0,
3320
+ "step": 2760
3321
+ },
3322
+ {
3323
+ "epoch": 1.06,
3324
+ "learning_rate": 9.399538106235566e-05,
3325
+ "loss": 0.0,
3326
+ "step": 2765
3327
+ },
3328
+ {
3329
+ "epoch": 1.06,
3330
+ "learning_rate": 9.380292532717475e-05,
3331
+ "loss": 0.0,
3332
+ "step": 2770
3333
+ },
3334
+ {
3335
+ "epoch": 1.07,
3336
+ "learning_rate": 9.361046959199384e-05,
3337
+ "loss": 0.0,
3338
+ "step": 2775
3339
+ },
3340
+ {
3341
+ "epoch": 1.07,
3342
+ "learning_rate": 9.341801385681293e-05,
3343
+ "loss": 0.0,
3344
+ "step": 2780
3345
+ },
3346
+ {
3347
+ "epoch": 1.07,
3348
+ "learning_rate": 9.322555812163203e-05,
3349
+ "loss": 0.0,
3350
+ "step": 2785
3351
+ },
3352
+ {
3353
+ "epoch": 1.07,
3354
+ "learning_rate": 9.303310238645113e-05,
3355
+ "loss": 0.0,
3356
+ "step": 2790
3357
+ },
3358
+ {
3359
+ "epoch": 1.07,
3360
+ "learning_rate": 9.284064665127021e-05,
3361
+ "loss": 0.0,
3362
+ "step": 2795
3363
+ },
3364
+ {
3365
+ "epoch": 1.08,
3366
+ "learning_rate": 9.26481909160893e-05,
3367
+ "loss": 0.0,
3368
+ "step": 2800
3369
+ },
3370
+ {
3371
+ "epoch": 1.08,
3372
+ "learning_rate": 9.24557351809084e-05,
3373
+ "loss": 0.0,
3374
+ "step": 2805
3375
+ },
3376
+ {
3377
+ "epoch": 1.08,
3378
+ "learning_rate": 9.226327944572748e-05,
3379
+ "loss": 0.0,
3380
+ "step": 2810
3381
+ },
3382
+ {
3383
+ "epoch": 1.08,
3384
+ "learning_rate": 9.207082371054658e-05,
3385
+ "loss": 0.0,
3386
+ "step": 2815
3387
+ },
3388
+ {
3389
+ "epoch": 1.08,
3390
+ "learning_rate": 9.187836797536568e-05,
3391
+ "loss": 0.0,
3392
+ "step": 2820
3393
+ },
3394
+ {
3395
+ "epoch": 1.09,
3396
+ "learning_rate": 9.168591224018476e-05,
3397
+ "loss": 0.0,
3398
+ "step": 2825
3399
+ },
3400
+ {
3401
+ "epoch": 1.09,
3402
+ "learning_rate": 9.149345650500386e-05,
3403
+ "loss": 0.0,
3404
+ "step": 2830
3405
+ },
3406
+ {
3407
+ "epoch": 1.09,
3408
+ "learning_rate": 9.130100076982294e-05,
3409
+ "loss": 0.0,
3410
+ "step": 2835
3411
+ },
3412
+ {
3413
+ "epoch": 1.09,
3414
+ "learning_rate": 9.110854503464203e-05,
3415
+ "loss": 0.0,
3416
+ "step": 2840
3417
+ },
3418
+ {
3419
+ "epoch": 1.09,
3420
+ "learning_rate": 9.091608929946113e-05,
3421
+ "loss": 0.0,
3422
+ "step": 2845
3423
+ },
3424
+ {
3425
+ "epoch": 1.09,
3426
+ "learning_rate": 9.072363356428021e-05,
3427
+ "loss": 0.0,
3428
+ "step": 2850
3429
+ },
3430
+ {
3431
+ "epoch": 1.1,
3432
+ "learning_rate": 9.053117782909931e-05,
3433
+ "loss": 0.0,
3434
+ "step": 2855
3435
+ },
3436
+ {
3437
+ "epoch": 1.1,
3438
+ "learning_rate": 9.03387220939184e-05,
3439
+ "loss": 0.0,
3440
+ "step": 2860
3441
+ },
3442
+ {
3443
+ "epoch": 1.1,
3444
+ "learning_rate": 9.01462663587375e-05,
3445
+ "loss": 0.0,
3446
+ "step": 2865
3447
+ },
3448
+ {
3449
+ "epoch": 1.1,
3450
+ "learning_rate": 8.995381062355659e-05,
3451
+ "loss": 0.0,
3452
+ "step": 2870
3453
+ },
3454
+ {
3455
+ "epoch": 1.1,
3456
+ "learning_rate": 8.976135488837568e-05,
3457
+ "loss": 0.0,
3458
+ "step": 2875
3459
+ },
3460
+ {
3461
+ "epoch": 1.11,
3462
+ "learning_rate": 8.956889915319478e-05,
3463
+ "loss": 0.0,
3464
+ "step": 2880
3465
+ },
3466
+ {
3467
+ "epoch": 1.11,
3468
+ "learning_rate": 8.937644341801386e-05,
3469
+ "loss": 0.0,
3470
+ "step": 2885
3471
+ },
3472
+ {
3473
+ "epoch": 1.11,
3474
+ "learning_rate": 8.918398768283294e-05,
3475
+ "loss": 0.0,
3476
+ "step": 2890
3477
+ },
3478
+ {
3479
+ "epoch": 1.11,
3480
+ "learning_rate": 8.899153194765204e-05,
3481
+ "loss": 0.0,
3482
+ "step": 2895
3483
+ },
3484
+ {
3485
+ "epoch": 1.11,
3486
+ "learning_rate": 8.879907621247114e-05,
3487
+ "loss": 0.0,
3488
+ "step": 2900
3489
+ },
3490
+ {
3491
+ "epoch": 1.12,
3492
+ "learning_rate": 8.860662047729022e-05,
3493
+ "loss": 0.0,
3494
+ "step": 2905
3495
+ },
3496
+ {
3497
+ "epoch": 1.12,
3498
+ "learning_rate": 8.841416474210932e-05,
3499
+ "loss": 0.0,
3500
+ "step": 2910
3501
+ },
3502
+ {
3503
+ "epoch": 1.12,
3504
+ "learning_rate": 8.822170900692841e-05,
3505
+ "loss": 0.0,
3506
+ "step": 2915
3507
+ },
3508
+ {
3509
+ "epoch": 1.12,
3510
+ "learning_rate": 8.802925327174751e-05,
3511
+ "loss": 0.0,
3512
+ "step": 2920
3513
+ },
3514
+ {
3515
+ "epoch": 1.12,
3516
+ "learning_rate": 8.783679753656659e-05,
3517
+ "loss": 0.0,
3518
+ "step": 2925
3519
+ },
3520
+ {
3521
+ "epoch": 1.13,
3522
+ "learning_rate": 8.764434180138569e-05,
3523
+ "loss": 0.0,
3524
+ "step": 2930
3525
+ },
3526
+ {
3527
+ "epoch": 1.13,
3528
+ "learning_rate": 8.745188606620478e-05,
3529
+ "loss": 0.0,
3530
+ "step": 2935
3531
+ },
3532
+ {
3533
+ "epoch": 1.13,
3534
+ "learning_rate": 8.725943033102387e-05,
3535
+ "loss": 0.0,
3536
+ "step": 2940
3537
+ },
3538
+ {
3539
+ "epoch": 1.13,
3540
+ "learning_rate": 8.706697459584296e-05,
3541
+ "loss": 0.0,
3542
+ "step": 2945
3543
+ },
3544
+ {
3545
+ "epoch": 1.13,
3546
+ "learning_rate": 8.687451886066205e-05,
3547
+ "loss": 0.0,
3548
+ "step": 2950
3549
+ },
3550
+ {
3551
+ "epoch": 1.14,
3552
+ "learning_rate": 8.668206312548114e-05,
3553
+ "loss": 0.0,
3554
+ "step": 2955
3555
+ },
3556
+ {
3557
+ "epoch": 1.14,
3558
+ "learning_rate": 8.648960739030024e-05,
3559
+ "loss": 0.0,
3560
+ "step": 2960
3561
+ },
3562
+ {
3563
+ "epoch": 1.14,
3564
+ "learning_rate": 8.629715165511932e-05,
3565
+ "loss": 0.0,
3566
+ "step": 2965
3567
+ },
3568
+ {
3569
+ "epoch": 1.14,
3570
+ "learning_rate": 8.610469591993842e-05,
3571
+ "loss": 0.0,
3572
+ "step": 2970
3573
+ },
3574
+ {
3575
+ "epoch": 1.14,
3576
+ "learning_rate": 8.591224018475751e-05,
3577
+ "loss": 0.0,
3578
+ "step": 2975
3579
+ },
3580
+ {
3581
+ "epoch": 1.14,
3582
+ "learning_rate": 8.57197844495766e-05,
3583
+ "loss": 0.0,
3584
+ "step": 2980
3585
+ },
3586
+ {
3587
+ "epoch": 1.15,
3588
+ "learning_rate": 8.552732871439569e-05,
3589
+ "loss": 0.0,
3590
+ "step": 2985
3591
+ },
3592
+ {
3593
+ "epoch": 1.15,
3594
+ "learning_rate": 8.533487297921479e-05,
3595
+ "loss": 0.0,
3596
+ "step": 2990
3597
+ },
3598
+ {
3599
+ "epoch": 1.15,
3600
+ "learning_rate": 8.514241724403388e-05,
3601
+ "loss": 0.0,
3602
+ "step": 2995
3603
+ },
3604
+ {
3605
+ "epoch": 1.15,
3606
+ "learning_rate": 8.494996150885297e-05,
3607
+ "loss": 0.0,
3608
+ "step": 3000
3609
+ },
3610
+ {
3611
+ "epoch": 1.15,
3612
+ "learning_rate": 8.475750577367206e-05,
3613
+ "loss": 0.0,
3614
+ "step": 3005
3615
+ },
3616
+ {
3617
+ "epoch": 1.16,
3618
+ "learning_rate": 8.456505003849116e-05,
3619
+ "loss": 0.0,
3620
+ "step": 3010
3621
+ },
3622
+ {
3623
+ "epoch": 1.16,
3624
+ "learning_rate": 8.437259430331024e-05,
3625
+ "loss": 0.0,
3626
+ "step": 3015
3627
+ },
3628
+ {
3629
+ "epoch": 1.16,
3630
+ "learning_rate": 8.418013856812933e-05,
3631
+ "loss": 0.0,
3632
+ "step": 3020
3633
+ },
3634
+ {
3635
+ "epoch": 1.16,
3636
+ "learning_rate": 8.398768283294842e-05,
3637
+ "loss": 0.0,
3638
+ "step": 3025
3639
+ },
3640
+ {
3641
+ "epoch": 1.16,
3642
+ "learning_rate": 8.379522709776752e-05,
3643
+ "loss": 0.0,
3644
+ "step": 3030
3645
+ },
3646
+ {
3647
+ "epoch": 1.17,
3648
+ "learning_rate": 8.36027713625866e-05,
3649
+ "loss": 0.0,
3650
+ "step": 3035
3651
+ },
3652
+ {
3653
+ "epoch": 1.17,
3654
+ "learning_rate": 8.34103156274057e-05,
3655
+ "loss": 0.0,
3656
+ "step": 3040
3657
+ },
3658
+ {
3659
+ "epoch": 1.17,
3660
+ "learning_rate": 8.32178598922248e-05,
3661
+ "loss": 0.0,
3662
+ "step": 3045
3663
+ },
3664
+ {
3665
+ "epoch": 1.17,
3666
+ "learning_rate": 8.302540415704389e-05,
3667
+ "loss": 0.0,
3668
+ "step": 3050
3669
+ },
3670
+ {
3671
+ "epoch": 1.17,
3672
+ "learning_rate": 8.283294842186297e-05,
3673
+ "loss": 0.0,
3674
+ "step": 3055
3675
+ },
3676
+ {
3677
+ "epoch": 1.18,
3678
+ "learning_rate": 8.264049268668207e-05,
3679
+ "loss": 0.0,
3680
+ "step": 3060
3681
+ },
3682
+ {
3683
+ "epoch": 1.18,
3684
+ "learning_rate": 8.244803695150117e-05,
3685
+ "loss": 0.0,
3686
+ "step": 3065
3687
+ },
3688
+ {
3689
+ "epoch": 1.18,
3690
+ "learning_rate": 8.225558121632025e-05,
3691
+ "loss": 0.0,
3692
+ "step": 3070
3693
+ },
3694
+ {
3695
+ "epoch": 1.18,
3696
+ "learning_rate": 8.206312548113934e-05,
3697
+ "loss": 0.0,
3698
+ "step": 3075
3699
+ },
3700
+ {
3701
+ "epoch": 1.18,
3702
+ "learning_rate": 8.187066974595843e-05,
3703
+ "loss": 0.0,
3704
+ "step": 3080
3705
+ },
3706
+ {
3707
+ "epoch": 1.19,
3708
+ "learning_rate": 8.167821401077752e-05,
3709
+ "loss": 0.0,
3710
+ "step": 3085
3711
+ },
3712
+ {
3713
+ "epoch": 1.19,
3714
+ "learning_rate": 8.148575827559662e-05,
3715
+ "loss": 0.0,
3716
+ "step": 3090
3717
+ },
3718
+ {
3719
+ "epoch": 1.19,
3720
+ "learning_rate": 8.12933025404157e-05,
3721
+ "loss": 0.0,
3722
+ "step": 3095
3723
+ },
3724
+ {
3725
+ "epoch": 1.19,
3726
+ "learning_rate": 8.11008468052348e-05,
3727
+ "loss": 0.0,
3728
+ "step": 3100
3729
+ },
3730
+ {
3731
+ "epoch": 1.19,
3732
+ "learning_rate": 8.09083910700539e-05,
3733
+ "loss": 0.0,
3734
+ "step": 3105
3735
+ },
3736
+ {
3737
+ "epoch": 1.19,
3738
+ "learning_rate": 8.071593533487298e-05,
3739
+ "loss": 0.0,
3740
+ "step": 3110
3741
+ },
3742
+ {
3743
+ "epoch": 1.2,
3744
+ "learning_rate": 8.052347959969207e-05,
3745
+ "loss": 0.0,
3746
+ "step": 3115
3747
+ },
3748
+ {
3749
+ "epoch": 1.2,
3750
+ "learning_rate": 8.033102386451117e-05,
3751
+ "loss": 0.0,
3752
+ "step": 3120
3753
+ },
3754
+ {
3755
+ "epoch": 1.2,
3756
+ "learning_rate": 8.013856812933027e-05,
3757
+ "loss": 0.0,
3758
+ "step": 3125
3759
+ },
3760
+ {
3761
+ "epoch": 1.2,
3762
+ "learning_rate": 7.994611239414935e-05,
3763
+ "loss": 0.0,
3764
+ "step": 3130
3765
+ },
3766
+ {
3767
+ "epoch": 1.2,
3768
+ "learning_rate": 7.975365665896845e-05,
3769
+ "loss": 0.0,
3770
+ "step": 3135
3771
+ },
3772
+ {
3773
+ "epoch": 1.21,
3774
+ "learning_rate": 7.956120092378754e-05,
3775
+ "loss": 0.0,
3776
+ "step": 3140
3777
+ },
3778
+ {
3779
+ "epoch": 1.21,
3780
+ "learning_rate": 7.936874518860663e-05,
3781
+ "loss": 0.0,
3782
+ "step": 3145
3783
+ },
3784
+ {
3785
+ "epoch": 1.21,
3786
+ "learning_rate": 7.917628945342571e-05,
3787
+ "loss": 0.0,
3788
+ "step": 3150
3789
+ },
3790
+ {
3791
+ "epoch": 1.21,
3792
+ "learning_rate": 7.89838337182448e-05,
3793
+ "loss": 0.0,
3794
+ "step": 3155
3795
+ },
3796
+ {
3797
+ "epoch": 1.21,
3798
+ "learning_rate": 7.87913779830639e-05,
3799
+ "loss": 0.0,
3800
+ "step": 3160
3801
+ },
3802
+ {
3803
+ "epoch": 1.22,
3804
+ "learning_rate": 7.859892224788298e-05,
3805
+ "loss": 0.0,
3806
+ "step": 3165
3807
+ },
3808
+ {
3809
+ "epoch": 1.22,
3810
+ "learning_rate": 7.840646651270208e-05,
3811
+ "loss": 0.0,
3812
+ "step": 3170
3813
+ },
3814
+ {
3815
+ "epoch": 1.22,
3816
+ "learning_rate": 7.821401077752118e-05,
3817
+ "loss": 0.0,
3818
+ "step": 3175
3819
+ },
3820
+ {
3821
+ "epoch": 1.22,
3822
+ "learning_rate": 7.802155504234027e-05,
3823
+ "loss": 0.0,
3824
+ "step": 3180
3825
+ },
3826
+ {
3827
+ "epoch": 1.22,
3828
+ "learning_rate": 7.782909930715935e-05,
3829
+ "loss": 0.0,
3830
+ "step": 3185
3831
+ },
3832
+ {
3833
+ "epoch": 1.23,
3834
+ "learning_rate": 7.763664357197845e-05,
3835
+ "loss": 0.0,
3836
+ "step": 3190
3837
+ },
3838
+ {
3839
+ "epoch": 1.23,
3840
+ "learning_rate": 7.744418783679755e-05,
3841
+ "loss": 0.0,
3842
+ "step": 3195
3843
+ },
3844
+ {
3845
+ "epoch": 1.23,
3846
+ "learning_rate": 7.725173210161663e-05,
3847
+ "loss": 0.0,
3848
+ "step": 3200
3849
+ },
3850
+ {
3851
+ "epoch": 1.23,
3852
+ "learning_rate": 7.705927636643573e-05,
3853
+ "loss": 0.0,
3854
+ "step": 3205
3855
+ },
3856
+ {
3857
+ "epoch": 1.23,
3858
+ "learning_rate": 7.686682063125481e-05,
3859
+ "loss": 0.0,
3860
+ "step": 3210
3861
+ },
3862
+ {
3863
+ "epoch": 1.24,
3864
+ "learning_rate": 7.66743648960739e-05,
3865
+ "loss": 0.0,
3866
+ "step": 3215
3867
+ },
3868
+ {
3869
+ "epoch": 1.24,
3870
+ "learning_rate": 7.648190916089299e-05,
3871
+ "loss": 0.0,
3872
+ "step": 3220
3873
+ },
3874
+ {
3875
+ "epoch": 1.24,
3876
+ "learning_rate": 7.628945342571208e-05,
3877
+ "loss": 0.0,
3878
+ "step": 3225
3879
+ },
3880
+ {
3881
+ "epoch": 1.24,
3882
+ "learning_rate": 7.609699769053118e-05,
3883
+ "loss": 0.0,
3884
+ "step": 3230
3885
+ },
3886
+ {
3887
+ "epoch": 1.24,
3888
+ "learning_rate": 7.590454195535028e-05,
3889
+ "loss": 0.0,
3890
+ "step": 3235
3891
+ },
3892
+ {
3893
+ "epoch": 1.24,
3894
+ "learning_rate": 7.571208622016936e-05,
3895
+ "loss": 0.0,
3896
+ "step": 3240
3897
+ },
3898
+ {
3899
+ "epoch": 1.25,
3900
+ "learning_rate": 7.551963048498846e-05,
3901
+ "loss": 0.0,
3902
+ "step": 3245
3903
+ },
3904
+ {
3905
+ "epoch": 1.25,
3906
+ "learning_rate": 7.532717474980755e-05,
3907
+ "loss": 0.0,
3908
+ "step": 3250
3909
+ },
3910
+ {
3911
+ "epoch": 1.25,
3912
+ "learning_rate": 7.513471901462664e-05,
3913
+ "loss": 0.0,
3914
+ "step": 3255
3915
+ },
3916
+ {
3917
+ "epoch": 1.25,
3918
+ "learning_rate": 7.494226327944573e-05,
3919
+ "loss": 0.0,
3920
+ "step": 3260
3921
+ },
3922
+ {
3923
+ "epoch": 1.25,
3924
+ "learning_rate": 7.474980754426483e-05,
3925
+ "loss": 0.0,
3926
+ "step": 3265
3927
+ },
3928
+ {
3929
+ "epoch": 1.26,
3930
+ "learning_rate": 7.455735180908392e-05,
3931
+ "loss": 0.0,
3932
+ "step": 3270
3933
+ },
3934
+ {
3935
+ "epoch": 1.26,
3936
+ "learning_rate": 7.436489607390301e-05,
3937
+ "loss": 0.0,
3938
+ "step": 3275
3939
+ },
3940
+ {
3941
+ "epoch": 1.26,
3942
+ "learning_rate": 7.417244033872209e-05,
3943
+ "loss": 0.0,
3944
+ "step": 3280
3945
+ },
3946
+ {
3947
+ "epoch": 1.26,
3948
+ "learning_rate": 7.397998460354119e-05,
3949
+ "loss": 0.0,
3950
+ "step": 3285
3951
+ },
3952
+ {
3953
+ "epoch": 1.26,
3954
+ "learning_rate": 7.378752886836028e-05,
3955
+ "loss": 0.0,
3956
+ "step": 3290
3957
+ },
3958
+ {
3959
+ "epoch": 1.27,
3960
+ "learning_rate": 7.359507313317937e-05,
3961
+ "loss": 0.0,
3962
+ "step": 3295
3963
+ },
3964
+ {
3965
+ "epoch": 1.27,
3966
+ "learning_rate": 7.340261739799846e-05,
3967
+ "loss": 0.0,
3968
+ "step": 3300
3969
+ },
3970
+ {
3971
+ "epoch": 1.27,
3972
+ "learning_rate": 7.321016166281756e-05,
3973
+ "loss": 0.0,
3974
+ "step": 3305
3975
+ },
3976
+ {
3977
+ "epoch": 1.27,
3978
+ "learning_rate": 7.301770592763665e-05,
3979
+ "loss": 0.0,
3980
+ "step": 3310
3981
+ },
3982
+ {
3983
+ "epoch": 1.27,
3984
+ "learning_rate": 7.282525019245574e-05,
3985
+ "loss": 0.0,
3986
+ "step": 3315
3987
+ },
3988
+ {
3989
+ "epoch": 1.28,
3990
+ "learning_rate": 7.263279445727483e-05,
3991
+ "loss": 0.0,
3992
+ "step": 3320
3993
+ },
3994
+ {
3995
+ "epoch": 1.28,
3996
+ "learning_rate": 7.244033872209393e-05,
3997
+ "loss": 0.0,
3998
+ "step": 3325
3999
+ },
4000
+ {
4001
+ "epoch": 1.28,
4002
+ "learning_rate": 7.224788298691301e-05,
4003
+ "loss": 0.0,
4004
+ "step": 3330
4005
+ },
4006
+ {
4007
+ "epoch": 1.28,
4008
+ "learning_rate": 7.205542725173211e-05,
4009
+ "loss": 0.0,
4010
+ "step": 3335
4011
+ },
4012
+ {
4013
+ "epoch": 1.28,
4014
+ "learning_rate": 7.186297151655119e-05,
4015
+ "loss": 0.0,
4016
+ "step": 3340
4017
+ },
4018
+ {
4019
+ "epoch": 1.29,
4020
+ "learning_rate": 7.167051578137029e-05,
4021
+ "loss": 0.0,
4022
+ "step": 3345
4023
+ },
4024
+ {
4025
+ "epoch": 1.29,
4026
+ "learning_rate": 7.147806004618937e-05,
4027
+ "loss": 0.0,
4028
+ "step": 3350
4029
+ },
4030
+ {
4031
+ "epoch": 1.29,
4032
+ "learning_rate": 7.128560431100847e-05,
4033
+ "loss": 0.0,
4034
+ "step": 3355
4035
+ },
4036
+ {
4037
+ "epoch": 1.29,
4038
+ "learning_rate": 7.109314857582756e-05,
4039
+ "loss": 0.0,
4040
+ "step": 3360
4041
+ },
4042
+ {
4043
+ "epoch": 1.29,
4044
+ "learning_rate": 7.090069284064666e-05,
4045
+ "loss": 0.0,
4046
+ "step": 3365
4047
+ },
4048
+ {
4049
+ "epoch": 1.29,
4050
+ "learning_rate": 7.070823710546574e-05,
4051
+ "loss": 0.0,
4052
+ "step": 3370
4053
+ },
4054
+ {
4055
+ "epoch": 1.3,
4056
+ "learning_rate": 7.051578137028484e-05,
4057
+ "loss": 0.0,
4058
+ "step": 3375
4059
+ },
4060
+ {
4061
+ "epoch": 1.3,
4062
+ "learning_rate": 7.032332563510393e-05,
4063
+ "loss": 0.0,
4064
+ "step": 3380
4065
+ },
4066
+ {
4067
+ "epoch": 1.3,
4068
+ "learning_rate": 7.013086989992302e-05,
4069
+ "loss": 0.0,
4070
+ "step": 3385
4071
+ },
4072
+ {
4073
+ "epoch": 1.3,
4074
+ "learning_rate": 6.993841416474211e-05,
4075
+ "loss": 0.0,
4076
+ "step": 3390
4077
+ },
4078
+ {
4079
+ "epoch": 1.3,
4080
+ "learning_rate": 6.974595842956121e-05,
4081
+ "loss": 0.0,
4082
+ "step": 3395
4083
+ },
4084
+ {
4085
+ "epoch": 1.31,
4086
+ "learning_rate": 6.955350269438029e-05,
4087
+ "loss": 0.0,
4088
+ "step": 3400
4089
+ },
4090
+ {
4091
+ "epoch": 1.31,
4092
+ "learning_rate": 6.936104695919939e-05,
4093
+ "loss": 0.0,
4094
+ "step": 3405
4095
+ },
4096
+ {
4097
+ "epoch": 1.31,
4098
+ "learning_rate": 6.916859122401847e-05,
4099
+ "loss": 0.0,
4100
+ "step": 3410
4101
+ },
4102
+ {
4103
+ "epoch": 1.31,
4104
+ "learning_rate": 6.897613548883757e-05,
4105
+ "loss": 0.0,
4106
+ "step": 3415
4107
+ },
4108
+ {
4109
+ "epoch": 1.31,
4110
+ "learning_rate": 6.878367975365666e-05,
4111
+ "loss": 0.0,
4112
+ "step": 3420
4113
+ },
4114
+ {
4115
+ "epoch": 1.32,
4116
+ "learning_rate": 6.859122401847575e-05,
4117
+ "loss": 0.0,
4118
+ "step": 3425
4119
+ },
4120
+ {
4121
+ "epoch": 1.32,
4122
+ "learning_rate": 6.839876828329484e-05,
4123
+ "loss": 0.0,
4124
+ "step": 3430
4125
+ },
4126
+ {
4127
+ "epoch": 1.32,
4128
+ "learning_rate": 6.820631254811394e-05,
4129
+ "loss": 0.0,
4130
+ "step": 3435
4131
+ },
4132
+ {
4133
+ "epoch": 1.32,
4134
+ "learning_rate": 6.801385681293304e-05,
4135
+ "loss": 0.0,
4136
+ "step": 3440
4137
+ },
4138
+ {
4139
+ "epoch": 1.32,
4140
+ "learning_rate": 6.782140107775212e-05,
4141
+ "loss": 0.0,
4142
+ "step": 3445
4143
+ },
4144
+ {
4145
+ "epoch": 1.33,
4146
+ "learning_rate": 6.762894534257122e-05,
4147
+ "loss": 0.0,
4148
+ "step": 3450
4149
+ },
4150
+ {
4151
+ "epoch": 1.33,
4152
+ "learning_rate": 6.743648960739031e-05,
4153
+ "loss": 0.0,
4154
+ "step": 3455
4155
+ },
4156
+ {
4157
+ "epoch": 1.33,
4158
+ "learning_rate": 6.72440338722094e-05,
4159
+ "loss": 0.0,
4160
+ "step": 3460
4161
+ },
4162
+ {
4163
+ "epoch": 1.33,
4164
+ "learning_rate": 6.705157813702848e-05,
4165
+ "loss": 0.0,
4166
+ "step": 3465
4167
+ },
4168
+ {
4169
+ "epoch": 1.33,
4170
+ "learning_rate": 6.685912240184757e-05,
4171
+ "loss": 0.0,
4172
+ "step": 3470
4173
+ },
4174
+ {
4175
+ "epoch": 1.33,
4176
+ "learning_rate": 6.666666666666667e-05,
4177
+ "loss": 0.0,
4178
+ "step": 3475
4179
+ },
4180
+ {
4181
+ "epoch": 1.34,
4182
+ "learning_rate": 6.647421093148575e-05,
4183
+ "loss": 0.0,
4184
+ "step": 3480
4185
+ },
4186
+ {
4187
+ "epoch": 1.34,
4188
+ "learning_rate": 6.628175519630485e-05,
4189
+ "loss": 0.0,
4190
+ "step": 3485
4191
+ },
4192
+ {
4193
+ "epoch": 1.34,
4194
+ "learning_rate": 6.608929946112395e-05,
4195
+ "loss": 0.0,
4196
+ "step": 3490
4197
+ },
4198
+ {
4199
+ "epoch": 1.34,
4200
+ "learning_rate": 6.589684372594304e-05,
4201
+ "loss": 0.0,
4202
+ "step": 3495
4203
+ },
4204
+ {
4205
+ "epoch": 1.34,
4206
+ "learning_rate": 6.570438799076212e-05,
4207
+ "loss": 0.0,
4208
+ "step": 3500
4209
+ },
4210
+ {
4211
+ "epoch": 1.35,
4212
+ "learning_rate": 6.551193225558122e-05,
4213
+ "loss": 0.0,
4214
+ "step": 3505
4215
+ },
4216
+ {
4217
+ "epoch": 1.35,
4218
+ "learning_rate": 6.531947652040032e-05,
4219
+ "loss": 0.0,
4220
+ "step": 3510
4221
+ },
4222
+ {
4223
+ "epoch": 1.35,
4224
+ "learning_rate": 6.51270207852194e-05,
4225
+ "loss": 0.0,
4226
+ "step": 3515
4227
+ },
4228
+ {
4229
+ "epoch": 1.35,
4230
+ "learning_rate": 6.49345650500385e-05,
4231
+ "loss": 0.0,
4232
+ "step": 3520
4233
+ },
4234
+ {
4235
+ "epoch": 1.35,
4236
+ "learning_rate": 6.474210931485759e-05,
4237
+ "loss": 0.0,
4238
+ "step": 3525
4239
+ },
4240
+ {
4241
+ "epoch": 1.36,
4242
+ "learning_rate": 6.454965357967668e-05,
4243
+ "loss": 0.0,
4244
+ "step": 3530
4245
+ },
4246
+ {
4247
+ "epoch": 1.36,
4248
+ "learning_rate": 6.435719784449577e-05,
4249
+ "loss": 0.0,
4250
+ "step": 3535
4251
+ },
4252
+ {
4253
+ "epoch": 1.36,
4254
+ "learning_rate": 6.416474210931485e-05,
4255
+ "loss": 0.0,
4256
+ "step": 3540
4257
+ },
4258
+ {
4259
+ "epoch": 1.36,
4260
+ "learning_rate": 6.397228637413395e-05,
4261
+ "loss": 0.0,
4262
+ "step": 3545
4263
+ },
4264
+ {
4265
+ "epoch": 1.36,
4266
+ "learning_rate": 6.377983063895305e-05,
4267
+ "loss": 0.0,
4268
+ "step": 3550
4269
+ },
4270
+ {
4271
+ "epoch": 1.37,
4272
+ "learning_rate": 6.358737490377213e-05,
4273
+ "loss": 0.0,
4274
+ "step": 3555
4275
+ },
4276
+ {
4277
+ "epoch": 1.37,
4278
+ "learning_rate": 6.339491916859123e-05,
4279
+ "loss": 0.0,
4280
+ "step": 3560
4281
+ },
4282
+ {
4283
+ "epoch": 1.37,
4284
+ "learning_rate": 6.320246343341032e-05,
4285
+ "loss": 0.0,
4286
+ "step": 3565
4287
+ },
4288
+ {
4289
+ "epoch": 1.37,
4290
+ "learning_rate": 6.301000769822942e-05,
4291
+ "loss": 0.0,
4292
+ "step": 3570
4293
+ },
4294
+ {
4295
+ "epoch": 1.37,
4296
+ "learning_rate": 6.28175519630485e-05,
4297
+ "loss": 0.0,
4298
+ "step": 3575
4299
+ },
4300
+ {
4301
+ "epoch": 1.38,
4302
+ "learning_rate": 6.26250962278676e-05,
4303
+ "loss": 0.0,
4304
+ "step": 3580
4305
+ },
4306
+ {
4307
+ "epoch": 1.38,
4308
+ "learning_rate": 6.24326404926867e-05,
4309
+ "loss": 0.0,
4310
+ "step": 3585
4311
+ },
4312
+ {
4313
+ "epoch": 1.38,
4314
+ "learning_rate": 6.224018475750578e-05,
4315
+ "loss": 0.0,
4316
+ "step": 3590
4317
+ },
4318
+ {
4319
+ "epoch": 1.38,
4320
+ "learning_rate": 6.204772902232486e-05,
4321
+ "loss": 0.0,
4322
+ "step": 3595
4323
+ },
4324
+ {
4325
+ "epoch": 1.38,
4326
+ "learning_rate": 6.185527328714396e-05,
4327
+ "loss": 0.0,
4328
+ "step": 3600
4329
+ },
4330
+ {
4331
+ "epoch": 1.38,
4332
+ "learning_rate": 6.166281755196305e-05,
4333
+ "loss": 0.0,
4334
+ "step": 3605
4335
+ },
4336
+ {
4337
+ "epoch": 1.39,
4338
+ "learning_rate": 6.147036181678213e-05,
4339
+ "loss": 0.0,
4340
+ "step": 3610
4341
+ },
4342
+ {
4343
+ "epoch": 1.39,
4344
+ "learning_rate": 6.127790608160123e-05,
4345
+ "loss": 0.0,
4346
+ "step": 3615
4347
+ },
4348
+ {
4349
+ "epoch": 1.39,
4350
+ "learning_rate": 6.108545034642033e-05,
4351
+ "loss": 0.0,
4352
+ "step": 3620
4353
+ },
4354
+ {
4355
+ "epoch": 1.39,
4356
+ "learning_rate": 6.089299461123942e-05,
4357
+ "loss": 0.0,
4358
+ "step": 3625
4359
+ },
4360
+ {
4361
+ "epoch": 1.39,
4362
+ "learning_rate": 6.070053887605851e-05,
4363
+ "loss": 0.0,
4364
+ "step": 3630
4365
+ },
4366
+ {
4367
+ "epoch": 1.4,
4368
+ "learning_rate": 6.05080831408776e-05,
4369
+ "loss": 0.0,
4370
+ "step": 3635
4371
+ },
4372
+ {
4373
+ "epoch": 1.4,
4374
+ "learning_rate": 6.031562740569669e-05,
4375
+ "loss": 0.0,
4376
+ "step": 3640
4377
+ },
4378
+ {
4379
+ "epoch": 1.4,
4380
+ "learning_rate": 6.012317167051579e-05,
4381
+ "loss": 0.0,
4382
+ "step": 3645
4383
+ },
4384
+ {
4385
+ "epoch": 1.4,
4386
+ "learning_rate": 5.993071593533488e-05,
4387
+ "loss": 0.0,
4388
+ "step": 3650
4389
+ },
4390
+ {
4391
+ "epoch": 1.4,
4392
+ "learning_rate": 5.9738260200153974e-05,
4393
+ "loss": 0.0,
4394
+ "step": 3655
4395
+ },
4396
+ {
4397
+ "epoch": 1.41,
4398
+ "learning_rate": 5.954580446497306e-05,
4399
+ "loss": 0.0,
4400
+ "step": 3660
4401
+ },
4402
+ {
4403
+ "epoch": 1.41,
4404
+ "learning_rate": 5.935334872979215e-05,
4405
+ "loss": 0.0,
4406
+ "step": 3665
4407
+ },
4408
+ {
4409
+ "epoch": 1.41,
4410
+ "learning_rate": 5.9160892994611236e-05,
4411
+ "loss": 0.0,
4412
+ "step": 3670
4413
+ },
4414
+ {
4415
+ "epoch": 1.41,
4416
+ "learning_rate": 5.896843725943033e-05,
4417
+ "loss": 0.0,
4418
+ "step": 3675
4419
+ },
4420
+ {
4421
+ "epoch": 1.41,
4422
+ "learning_rate": 5.877598152424942e-05,
4423
+ "loss": 0.0,
4424
+ "step": 3680
4425
+ },
4426
+ {
4427
+ "epoch": 1.42,
4428
+ "learning_rate": 5.858352578906852e-05,
4429
+ "loss": 0.0,
4430
+ "step": 3685
4431
+ },
4432
+ {
4433
+ "epoch": 1.42,
4434
+ "learning_rate": 5.839107005388761e-05,
4435
+ "loss": 0.0,
4436
+ "step": 3690
4437
+ },
4438
+ {
4439
+ "epoch": 1.42,
4440
+ "learning_rate": 5.8198614318706704e-05,
4441
+ "loss": 0.0,
4442
+ "step": 3695
4443
+ },
4444
+ {
4445
+ "epoch": 1.42,
4446
+ "learning_rate": 5.8006158583525794e-05,
4447
+ "loss": 0.0,
4448
+ "step": 3700
4449
+ },
4450
+ {
4451
+ "epoch": 1.42,
4452
+ "learning_rate": 5.781370284834488e-05,
4453
+ "loss": 0.0,
4454
+ "step": 3705
4455
+ },
4456
+ {
4457
+ "epoch": 1.43,
4458
+ "learning_rate": 5.762124711316398e-05,
4459
+ "loss": 0.0,
4460
+ "step": 3710
4461
+ },
4462
+ {
4463
+ "epoch": 1.43,
4464
+ "learning_rate": 5.742879137798307e-05,
4465
+ "loss": 0.0,
4466
+ "step": 3715
4467
+ },
4468
+ {
4469
+ "epoch": 1.43,
4470
+ "learning_rate": 5.7236335642802165e-05,
4471
+ "loss": 0.0,
4472
+ "step": 3720
4473
+ },
4474
+ {
4475
+ "epoch": 1.43,
4476
+ "learning_rate": 5.704387990762125e-05,
4477
+ "loss": 0.0,
4478
+ "step": 3725
4479
+ },
4480
+ {
4481
+ "epoch": 1.43,
4482
+ "learning_rate": 5.685142417244034e-05,
4483
+ "loss": 0.0,
4484
+ "step": 3730
4485
+ },
4486
+ {
4487
+ "epoch": 1.43,
4488
+ "learning_rate": 5.665896843725943e-05,
4489
+ "loss": 0.0,
4490
+ "step": 3735
4491
+ },
4492
+ {
4493
+ "epoch": 1.44,
4494
+ "learning_rate": 5.6466512702078524e-05,
4495
+ "loss": 0.0,
4496
+ "step": 3740
4497
+ },
4498
+ {
4499
+ "epoch": 1.44,
4500
+ "learning_rate": 5.627405696689761e-05,
4501
+ "loss": 0.0,
4502
+ "step": 3745
4503
+ },
4504
+ {
4505
+ "epoch": 1.44,
4506
+ "learning_rate": 5.608160123171671e-05,
4507
+ "loss": 0.0,
4508
+ "step": 3750
4509
  }
4510
  ],
4511
  "max_steps": 5206,
4512
  "num_train_epochs": 2,
4513
+ "total_flos": 57215852847104.0,
4514
  "trial_name": null,
4515
  "trial_params": null
4516
  }