Nadav commited on
Commit
b4ac146
1 Parent(s): ac6491d

Training in progress, step 75000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c98283f4eee4bb758c71ec62014a67a4552b0758eea313df980226947b2ade
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c4615a4cfb3d6284272ff52bb78802562addac25bc47aecac63ac87926f3e1
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd35fbb667c7c8278fafcb67c5061ab4d6f220dcd06ca61480a2cb447bf773e4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7146f8d52213faf72703e230b94defd59fa1f96a4226817fcd5fb6b8b82642d0
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8876c50e1ce3fb4970a80eb0614074ef9eef11af29d4d4a411c34a56cba5b25f
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9592ccee7cd881c490789d0c081e96e3bd0737279468e6c4dda1fabb2546355a
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128b78bf39c57c4ee291c8fde7c2cc6eead3eaf12fff730867105475c3c602a9
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcd13dbe306fa5a2ee25614c30d95796b3d386c2395432ebb37f537a50477e0
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cdae046482883391f6321c0f8158f1008a657f80a5068452ca97c45f0f69142
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b1cc3df0b7ebdcaaea83c1d1aa72c613ada98ab7f2fb32d66ee37180c5fcdf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.353125544140693,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -958,11 +958,79 @@
958
  "eval_samples_per_second": 55.227,
959
  "eval_steps_per_second": 0.873,
960
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
961
  }
962
  ],
963
  "max_steps": 100000,
964
  "num_train_epochs": 9,
965
- "total_flos": 3.29682408031201e+21,
966
  "trial_name": null,
967
  "trial_params": null
968
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.788438098554762,
5
+ "global_step": 75000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
958
  "eval_samples_per_second": 55.227,
959
  "eval_steps_per_second": 0.873,
960
  "step": 70000
961
+ },
962
+ {
963
+ "epoch": 5.4,
964
+ "learning_rate": 2.8166833668202425e-05,
965
+ "loss": 0.4087,
966
+ "step": 70500
967
+ },
968
+ {
969
+ "epoch": 5.44,
970
+ "learning_rate": 2.759969559696268e-05,
971
+ "loss": 0.4088,
972
+ "step": 71000
973
+ },
974
+ {
975
+ "epoch": 5.48,
976
+ "learning_rate": 2.703941452170851e-05,
977
+ "loss": 0.4073,
978
+ "step": 71500
979
+ },
980
+ {
981
+ "epoch": 5.53,
982
+ "learning_rate": 2.648612868341161e-05,
983
+ "loss": 0.4084,
984
+ "step": 72000
985
+ },
986
+ {
987
+ "epoch": 5.57,
988
+ "learning_rate": 2.5939974597073566e-05,
989
+ "loss": 0.4077,
990
+ "step": 72500
991
+ },
992
+ {
993
+ "epoch": 5.61,
994
+ "learning_rate": 2.5402157452548983e-05,
995
+ "loss": 0.4072,
996
+ "step": 73000
997
+ },
998
+ {
999
+ "epoch": 5.66,
1000
+ "learning_rate": 2.487065441284431e-05,
1001
+ "loss": 0.4079,
1002
+ "step": 73500
1003
+ },
1004
+ {
1005
+ "epoch": 5.7,
1006
+ "learning_rate": 2.4346681719201903e-05,
1007
+ "loss": 0.4076,
1008
+ "step": 74000
1009
+ },
1010
+ {
1011
+ "epoch": 5.74,
1012
+ "learning_rate": 2.383036865404354e-05,
1013
+ "loss": 0.4069,
1014
+ "step": 74500
1015
+ },
1016
+ {
1017
+ "epoch": 5.79,
1018
+ "learning_rate": 2.332184260989229e-05,
1019
+ "loss": 0.4078,
1020
+ "step": 75000
1021
+ },
1022
+ {
1023
+ "epoch": 5.79,
1024
+ "eval_loss": 0.3861904740333557,
1025
+ "eval_runtime": 75.9418,
1026
+ "eval_samples_per_second": 65.84,
1027
+ "eval_steps_per_second": 1.04,
1028
+ "step": 75000
1029
  }
1030
  ],
1031
  "max_steps": 100000,
1032
  "num_train_epochs": 9,
1033
+ "total_flos": 3.5323171476767716e+21,
1034
  "trial_name": null,
1035
  "trial_params": null
1036
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd35fbb667c7c8278fafcb67c5061ab4d6f220dcd06ca61480a2cb447bf773e4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7146f8d52213faf72703e230b94defd59fa1f96a4226817fcd5fb6b8b82642d0
3
  size 449471589