mikhail-panzo commited on
Commit
62e7aa5
1 Parent(s): fa93c98

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1af62ce679fec44c6a2ef505e7a86dd3be2443d46dc17f477cf0c8f9b14685
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:403cfbffc7ee4007065e4824a75bc209469efb1d3eaf4b78bdcdc4b8e425ae79
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aba3b3463e55db8f4fec1eefbd85db6b8463a3cf70a9bc8f8d66daa21d39bfe
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f82a7c0b927ee4293785e8b97633d22c4746dbad1b0c92376ee3f5578e79116
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b29b7ac3fd9a0e682d95173845901bfa8ffe79dceb66372f749527a981b4aec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795fe76cfd6ac22612e53f90e3708fe6447c8d1c969f78a57978bb17493086b1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbbb103ab5c01e2a91659adff089a9b4717297cd8e8e74182bd3fa3fb8f51b04
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ef84ad524da6dbaaadb8576fa258a66ac5d4b080583a172fefc45887ff84f46
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.41427454352378845,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b128_le3_s8000/checkpoint-500",
4
- "epoch": 549.0196078431372,
5
  "eval_steps": 500,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1099,6 +1099,84 @@
1099
  "eval_samples_per_second": 26.722,
1100
  "eval_steps_per_second": 3.415,
1101
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102
  }
1103
  ],
1104
  "logging_steps": 50,
@@ -1118,7 +1196,7 @@
1118
  "attributes": {}
1119
  }
1120
  },
1121
- "total_flos": 1.5229554622744013e+17,
1122
  "train_batch_size": 32,
1123
  "trial_name": null,
1124
  "trial_params": null
 
1
  {
2
  "best_metric": 0.41427454352378845,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b128_le3_s8000/checkpoint-500",
4
+ "epoch": 588.2352941176471,
5
  "eval_steps": 500,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1099
  "eval_samples_per_second": 26.722,
1100
  "eval_steps_per_second": 3.415,
1101
  "step": 7000
1102
+ },
1103
+ {
1104
+ "epoch": 552.9411764705883,
1105
+ "grad_norm": 0.06083720177412033,
1106
+ "learning_rate": 0.00015866666666666668,
1107
+ "loss": 1.42,
1108
+ "step": 7050
1109
+ },
1110
+ {
1111
+ "epoch": 556.8627450980392,
1112
+ "grad_norm": 0.07482324540615082,
1113
+ "learning_rate": 0.00015033333333333335,
1114
+ "loss": 1.4184,
1115
+ "step": 7100
1116
+ },
1117
+ {
1118
+ "epoch": 560.7843137254902,
1119
+ "grad_norm": 0.05342550203204155,
1120
+ "learning_rate": 0.00014199999999999998,
1121
+ "loss": 1.4186,
1122
+ "step": 7150
1123
+ },
1124
+ {
1125
+ "epoch": 564.7058823529412,
1126
+ "grad_norm": 0.09053777158260345,
1127
+ "learning_rate": 0.00013366666666666667,
1128
+ "loss": 1.4191,
1129
+ "step": 7200
1130
+ },
1131
+ {
1132
+ "epoch": 568.6274509803922,
1133
+ "grad_norm": 0.08072460442781448,
1134
+ "learning_rate": 0.00012533333333333334,
1135
+ "loss": 1.42,
1136
+ "step": 7250
1137
+ },
1138
+ {
1139
+ "epoch": 572.5490196078431,
1140
+ "grad_norm": 0.09836099296808243,
1141
+ "learning_rate": 0.00011700000000000001,
1142
+ "loss": 1.4193,
1143
+ "step": 7300
1144
+ },
1145
+ {
1146
+ "epoch": 576.4705882352941,
1147
+ "grad_norm": 0.05738500505685806,
1148
+ "learning_rate": 0.00010866666666666666,
1149
+ "loss": 1.42,
1150
+ "step": 7350
1151
+ },
1152
+ {
1153
+ "epoch": 580.3921568627451,
1154
+ "grad_norm": 0.06691340357065201,
1155
+ "learning_rate": 0.00010033333333333334,
1156
+ "loss": 1.4185,
1157
+ "step": 7400
1158
+ },
1159
+ {
1160
+ "epoch": 584.3137254901961,
1161
+ "grad_norm": 0.06185409054160118,
1162
+ "learning_rate": 9.2e-05,
1163
+ "loss": 1.4172,
1164
+ "step": 7450
1165
+ },
1166
+ {
1167
+ "epoch": 588.2352941176471,
1168
+ "grad_norm": 0.14216652512550354,
1169
+ "learning_rate": 8.366666666666666e-05,
1170
+ "loss": 1.4194,
1171
+ "step": 7500
1172
+ },
1173
+ {
1174
+ "epoch": 588.2352941176471,
1175
+ "eval_loss": 1.4361063241958618,
1176
+ "eval_runtime": 6.733,
1177
+ "eval_samples_per_second": 26.734,
1178
+ "eval_steps_per_second": 3.416,
1179
+ "step": 7500
1180
  }
1181
  ],
1182
  "logging_steps": 50,
 
1196
  "attributes": {}
1197
  }
1198
  },
1199
+ "total_flos": 1.631701918768069e+17,
1200
  "train_batch_size": 32,
1201
  "trial_name": null,
1202
  "trial_params": null