aseratus1 commited on
Commit
0d418de
·
verified ·
1 Parent(s): 886e6db

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:308c8112cd436e4e3cf0b4d1c1d899765b81713470eab2d742f0a53f08d8e8fb
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200a88f61a7e88a93fb00c5dc2da4ba537a81cd94444730c2f73356bcb9fa588
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87dbd6589d1029104817f2f43217594eb6430a5218ba1db9e5fb602f538d0ae4
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75023e07d60e4e00600120d9257e00fe03185b56bfc95b72efdb36dd2222933b
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a340e73e7fff823d019deed088bd52f0fc0271528bcd905e694a55cb4ced90
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf91ecd669f186faeccc2ddc419b2e245a4aca4accac80b04b3de8a0ea96372
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f130e00059750e08fde0557f1628570250cc11d981d01f55b80e33b935a3162
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6769de87f07ffd3568aa81b9a1fe6ecade894861382192b5a67c276923ba90ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.45658260583877563,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
- "epoch": 0.4549763033175355,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -919,6 +919,119 @@
919
  "eval_samples_per_second": 21.043,
920
  "eval_steps_per_second": 5.263,
921
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  }
923
  ],
924
  "logging_steps": 10,
@@ -947,7 +1060,7 @@
947
  "attributes": {}
948
  }
949
  },
950
- "total_flos": 1.0646334581998879e+18,
951
  "train_batch_size": 8,
952
  "trial_name": null,
953
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4398421049118042,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1350",
4
+ "epoch": 0.5118483412322274,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
919
  "eval_samples_per_second": 21.043,
920
  "eval_steps_per_second": 5.263,
921
  "step": 1200
922
+ },
923
+ {
924
+ "epoch": 0.45876777251184836,
925
+ "grad_norm": 0.8443006277084351,
926
+ "learning_rate": 5.809736120478817e-05,
927
+ "loss": 0.7046,
928
+ "step": 1210
929
+ },
930
+ {
931
+ "epoch": 0.46255924170616114,
932
+ "grad_norm": 0.7299622893333435,
933
+ "learning_rate": 5.749783770253315e-05,
934
+ "loss": 0.4708,
935
+ "step": 1220
936
+ },
937
+ {
938
+ "epoch": 0.4663507109004739,
939
+ "grad_norm": 0.7422939538955688,
940
+ "learning_rate": 5.6897209353880885e-05,
941
+ "loss": 0.4123,
942
+ "step": 1230
943
+ },
944
+ {
945
+ "epoch": 0.47014218009478675,
946
+ "grad_norm": 0.723355770111084,
947
+ "learning_rate": 5.629556466462376e-05,
948
+ "loss": 0.3656,
949
+ "step": 1240
950
+ },
951
+ {
952
+ "epoch": 0.47393364928909953,
953
+ "grad_norm": 0.611301839351654,
954
+ "learning_rate": 5.5692992290317366e-05,
955
+ "loss": 0.2865,
956
+ "step": 1250
957
+ },
958
+ {
959
+ "epoch": 0.4777251184834123,
960
+ "grad_norm": 0.7408523559570312,
961
+ "learning_rate": 5.508958102321666e-05,
962
+ "loss": 0.7044,
963
+ "step": 1260
964
+ },
965
+ {
966
+ "epoch": 0.4815165876777251,
967
+ "grad_norm": 0.779376745223999,
968
+ "learning_rate": 5.448541977919195e-05,
969
+ "loss": 0.4626,
970
+ "step": 1270
971
+ },
972
+ {
973
+ "epoch": 0.48530805687203793,
974
+ "grad_norm": 0.824704110622406,
975
+ "learning_rate": 5.388059758462658e-05,
976
+ "loss": 0.4017,
977
+ "step": 1280
978
+ },
979
+ {
980
+ "epoch": 0.4890995260663507,
981
+ "grad_norm": 0.7132182717323303,
982
+ "learning_rate": 5.327520356329853e-05,
983
+ "loss": 0.3587,
984
+ "step": 1290
985
+ },
986
+ {
987
+ "epoch": 0.4928909952606635,
988
+ "grad_norm": 0.6510176062583923,
989
+ "learning_rate": 5.266932692324747e-05,
990
+ "loss": 0.2709,
991
+ "step": 1300
992
+ },
993
+ {
994
+ "epoch": 0.4966824644549763,
995
+ "grad_norm": 0.8185328245162964,
996
+ "learning_rate": 5.206305694362959e-05,
997
+ "loss": 0.6967,
998
+ "step": 1310
999
+ },
1000
+ {
1001
+ "epoch": 0.500473933649289,
1002
+ "grad_norm": 0.8479395508766174,
1003
+ "learning_rate": 5.1456482961561656e-05,
1004
+ "loss": 0.4754,
1005
+ "step": 1320
1006
+ },
1007
+ {
1008
+ "epoch": 0.5042654028436019,
1009
+ "grad_norm": 0.7257512211799622,
1010
+ "learning_rate": 5.084969435895691e-05,
1011
+ "loss": 0.4183,
1012
+ "step": 1330
1013
+ },
1014
+ {
1015
+ "epoch": 0.5080568720379147,
1016
+ "grad_norm": 0.6470847725868225,
1017
+ "learning_rate": 5.024278054935403e-05,
1018
+ "loss": 0.3574,
1019
+ "step": 1340
1020
+ },
1021
+ {
1022
+ "epoch": 0.5118483412322274,
1023
+ "grad_norm": 0.5835993885993958,
1024
+ "learning_rate": 4.963583096474159e-05,
1025
+ "loss": 0.2612,
1026
+ "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 0.5118483412322274,
1030
+ "eval_loss": 0.4398421049118042,
1031
+ "eval_runtime": 211.1019,
1032
+ "eval_samples_per_second": 21.042,
1033
+ "eval_steps_per_second": 5.263,
1034
+ "step": 1350
1035
  }
1036
  ],
1037
  "logging_steps": 10,
 
1060
  "attributes": {}
1061
  }
1062
  },
1063
+ "total_flos": 1.197948345721553e+18,
1064
  "train_batch_size": 8,
1065
  "trial_name": null,
1066
  "trial_params": null