mikhail-panzo commited on
Commit
a5a21f9
1 Parent(s): 87dfa98

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:188d0d1c582e736e2eec2d1d5382875e9492db41c1fbe88ec1c106466a2b07ec
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eceda0d19725c7e2e2c77b3a104966fcc3d57681a4b523ecdf21f43f5fa0ee51
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a712edfd21efaec231d91aaab20ede1aa0cfd4ebe31363aa2fdfa6f036773d5
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec8c680684da01ca7c440c427816b3a17cdb48da1e3800ff44e6295258d3292d
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f87f712128c90495b5215cdd4ffe095bd74b9df4d54c07a1c61e31f0311ddf7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c0b2f427b1052fb24dceb73f158cfa57198b87c4992deba7fedea2db5e9bd1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a6ecce960b3a775d6eb50742229bab81345e6cd64cb3dc52202422c15ec1c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc91c63b416eaf729d985969526750ffbadb96ebf18f73584ff35d9ca5f08c2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.41767898201942444,
3
  "best_model_checkpoint": "mikhail_panzo/fil_b64_le4_s8000/checkpoint-2000",
4
- "epoch": 333.3333333333333,
5
  "eval_steps": 500,
6
- "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1177,6 +1177,84 @@
1177
  "eval_samples_per_second": 21.009,
1178
  "eval_steps_per_second": 2.643,
1179
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
  }
1181
  ],
1182
  "logging_steps": 50,
@@ -1191,12 +1269,12 @@
1191
  "should_evaluate": false,
1192
  "should_log": false,
1193
  "should_save": true,
1194
- "should_training_stop": false
1195
  },
1196
  "attributes": {}
1197
  }
1198
  },
1199
- "total_flos": 1.0882778098582685e+17,
1200
  "train_batch_size": 32,
1201
  "trial_name": null,
1202
  "trial_params": null
 
1
  {
2
  "best_metric": 0.41767898201942444,
3
  "best_model_checkpoint": "mikhail_panzo/fil_b64_le4_s8000/checkpoint-2000",
4
+ "epoch": 355.55555555555554,
5
  "eval_steps": 500,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1177
  "eval_samples_per_second": 21.009,
1178
  "eval_steps_per_second": 2.643,
1179
  "step": 7500
1180
+ },
1181
+ {
1182
+ "epoch": 335.55555555555554,
1183
+ "grad_norm": 0.7335402965545654,
1184
+ "learning_rate": 7.5666666666666665e-06,
1185
+ "loss": 0.3512,
1186
+ "step": 7550
1187
+ },
1188
+ {
1189
+ "epoch": 337.77777777777777,
1190
+ "grad_norm": 0.5599180459976196,
1191
+ "learning_rate": 6.733333333333333e-06,
1192
+ "loss": 0.3449,
1193
+ "step": 7600
1194
+ },
1195
+ {
1196
+ "epoch": 340.0,
1197
+ "grad_norm": 0.6823892593383789,
1198
+ "learning_rate": 5.9e-06,
1199
+ "loss": 0.3431,
1200
+ "step": 7650
1201
+ },
1202
+ {
1203
+ "epoch": 342.22222222222223,
1204
+ "grad_norm": 0.6472559571266174,
1205
+ "learning_rate": 5.066666666666667e-06,
1206
+ "loss": 0.3407,
1207
+ "step": 7700
1208
+ },
1209
+ {
1210
+ "epoch": 344.44444444444446,
1211
+ "grad_norm": 0.6288905143737793,
1212
+ "learning_rate": 4.233333333333333e-06,
1213
+ "loss": 0.3439,
1214
+ "step": 7750
1215
+ },
1216
+ {
1217
+ "epoch": 346.6666666666667,
1218
+ "grad_norm": 0.8924105167388916,
1219
+ "learning_rate": 3.4000000000000005e-06,
1220
+ "loss": 0.3458,
1221
+ "step": 7800
1222
+ },
1223
+ {
1224
+ "epoch": 348.8888888888889,
1225
+ "grad_norm": 0.6726309657096863,
1226
+ "learning_rate": 2.566666666666667e-06,
1227
+ "loss": 0.3421,
1228
+ "step": 7850
1229
+ },
1230
+ {
1231
+ "epoch": 351.1111111111111,
1232
+ "grad_norm": 0.45373550057411194,
1233
+ "learning_rate": 1.7333333333333334e-06,
1234
+ "loss": 0.3392,
1235
+ "step": 7900
1236
+ },
1237
+ {
1238
+ "epoch": 353.3333333333333,
1239
+ "grad_norm": 0.7311224341392517,
1240
+ "learning_rate": 9e-07,
1241
+ "loss": 0.3432,
1242
+ "step": 7950
1243
+ },
1244
+ {
1245
+ "epoch": 355.55555555555554,
1246
+ "grad_norm": 0.7168406248092651,
1247
+ "learning_rate": 6.666666666666667e-08,
1248
+ "loss": 0.3371,
1249
+ "step": 8000
1250
+ },
1251
+ {
1252
+ "epoch": 355.55555555555554,
1253
+ "eval_loss": 0.4246142506599426,
1254
+ "eval_runtime": 7.4147,
1255
+ "eval_samples_per_second": 21.444,
1256
+ "eval_steps_per_second": 2.697,
1257
+ "step": 8000
1258
  }
1259
  ],
1260
  "logging_steps": 50,
 
1269
  "should_evaluate": false,
1270
  "should_log": false,
1271
  "should_save": true,
1272
+ "should_training_stop": true
1273
  },
1274
  "attributes": {}
1275
  }
1276
  },
1277
+ "total_flos": 1.160835352354054e+17,
1278
  "train_batch_size": 32,
1279
  "trial_name": null,
1280
  "trial_params": null