besimray commited on
Commit
6265c42
·
verified ·
1 Parent(s): 53cb4a0

Training in progress, step 130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7890d150186359623165f222ca4830c2a039fc03af0d56fe99e682d415fc31f6
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f94ce60ad18907bfe378be4ba63c3cb07211d25772e7578153e59c360d0334
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30705974fe66df92448604887f4ff2019bdf74272d27933dc0a7e38b3354a13
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af22c1f6025a1e35c54cfaffb9aa264061cac162f2b52caade7115ff260d713e
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98bbd4b4faf598bcf20005208e29928176693d773d6281a511c45efeae497be2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4c4d2d769c6f52183fdfe62140ef02b36aa1e936b1d8050f51672d3d58fb1e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6011497974395752,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.0338863395693611,
5
  "eval_steps": 10,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -951,6 +951,84 @@
951
  "eval_samples_per_second": 5.596,
952
  "eval_steps_per_second": 5.596,
953
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
954
  }
955
  ],
956
  "logging_steps": 1,
@@ -965,7 +1043,7 @@
965
  "early_stopping_threshold": 0.0
966
  },
967
  "attributes": {
968
- "early_stopping_patience_counter": 2
969
  }
970
  },
971
  "TrainerControl": {
@@ -979,7 +1057,7 @@
979
  "attributes": {}
980
  }
981
  },
982
- "total_flos": 1.174564302225408e+16,
983
  "train_batch_size": 1,
984
  "trial_name": null,
985
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5968632698059082,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-130",
4
+ "epoch": 0.03671020120014119,
5
  "eval_steps": 10,
6
+ "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
951
  "eval_samples_per_second": 5.596,
952
  "eval_steps_per_second": 5.596,
953
  "step": 120
954
+ },
955
+ {
956
+ "epoch": 0.03416872573243911,
957
+ "grad_norm": 1.4283939599990845,
958
+ "learning_rate": 0.00017572742764761055,
959
+ "loss": 1.4789,
960
+ "step": 121
961
+ },
962
+ {
963
+ "epoch": 0.03445111189551712,
964
+ "grad_norm": 1.3361456394195557,
965
+ "learning_rate": 0.00017530714660036112,
966
+ "loss": 0.784,
967
+ "step": 122
968
+ },
969
+ {
970
+ "epoch": 0.03473349805859513,
971
+ "grad_norm": 1.0861424207687378,
972
+ "learning_rate": 0.00017488376997127283,
973
+ "loss": 2.2809,
974
+ "step": 123
975
+ },
976
+ {
977
+ "epoch": 0.035015884221673135,
978
+ "grad_norm": 4.459283351898193,
979
+ "learning_rate": 0.0001744573151637007,
980
+ "loss": 1.4483,
981
+ "step": 124
982
+ },
983
+ {
984
+ "epoch": 0.03529827038475115,
985
+ "grad_norm": 1.324436902999878,
986
+ "learning_rate": 0.00017402779970753155,
987
+ "loss": 2.6136,
988
+ "step": 125
989
+ },
990
+ {
991
+ "epoch": 0.03558065654782916,
992
+ "grad_norm": 3.7964041233062744,
993
+ "learning_rate": 0.0001735952412584635,
994
+ "loss": 1.092,
995
+ "step": 126
996
+ },
997
+ {
998
+ "epoch": 0.035863042710907164,
999
+ "grad_norm": 2.560436725616455,
1000
+ "learning_rate": 0.00017315965759728014,
1001
+ "loss": 1.4307,
1002
+ "step": 127
1003
+ },
1004
+ {
1005
+ "epoch": 0.036145428873985176,
1006
+ "grad_norm": 1.473990797996521,
1007
+ "learning_rate": 0.00017272106662911973,
1008
+ "loss": 1.1344,
1009
+ "step": 128
1010
+ },
1011
+ {
1012
+ "epoch": 0.03642781503706318,
1013
+ "grad_norm": 3.3736298084259033,
1014
+ "learning_rate": 0.00017227948638273916,
1015
+ "loss": 1.5746,
1016
+ "step": 129
1017
+ },
1018
+ {
1019
+ "epoch": 0.03671020120014119,
1020
+ "grad_norm": 1.5858126878738403,
1021
+ "learning_rate": 0.00017183493500977278,
1022
+ "loss": 1.3798,
1023
+ "step": 130
1024
+ },
1025
+ {
1026
+ "epoch": 0.03671020120014119,
1027
+ "eval_loss": 1.5968632698059082,
1028
+ "eval_runtime": 133.2364,
1029
+ "eval_samples_per_second": 5.599,
1030
+ "eval_steps_per_second": 5.599,
1031
+ "step": 130
1032
  }
1033
  ],
1034
  "logging_steps": 1,
 
1043
  "early_stopping_threshold": 0.0
1044
  },
1045
  "attributes": {
1046
+ "early_stopping_patience_counter": 0
1047
  }
1048
  },
1049
  "TrainerControl": {
 
1057
  "attributes": {}
1058
  }
1059
  },
1060
+ "total_flos": 1.272444660744192e+16,
1061
  "train_batch_size": 1,
1062
  "trial_name": null,
1063
  "trial_params": null