lesso05 commited on
Commit
b2b9928
·
verified ·
1 Parent(s): fb91611

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0096973dbb7308622b4e714f1ddd57059b21d39bb6fd2785bef1c3de3cfe11e6
3
  size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2b161e8bc9876a4f57639483ab4f03ecf4650ac7a020e045d2c08e4c277d74
3
  size 35237104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f929d657bd6388739fa3b1100aa223d832081a8a59553e7ba86015b74d1cbe6
3
- size 18810036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1734b51ad697d4c7c595278b77f79965d95999e3deb80a31ea76ad67e2d864d
3
+ size 18810356
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45ef98acf7ed689cfa32e1240be213a46133386c9425f564f19bcfcd37891b80
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550860cff63886cd432a55f8848c8d62e312a5d9b9bf22bfda8663babbdbb92f
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c5d32644c7486ba3c019c882e1c59bae084188a76560e3aa6cc8b5092d956b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f398ec605d9c9cdba56060aebcb72b7c65adf12fa1c75b756614fa39cc370089
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.8600337505340576,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 0.011287955751213455,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -230,6 +230,49 @@
230
  "eval_samples_per_second": 50.698,
231
  "eval_steps_per_second": 12.677,
232
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 10,
@@ -258,7 +301,7 @@
258
  "attributes": {}
259
  }
260
  },
261
- "total_flos": 2257488154460160.0,
262
  "train_batch_size": 4,
263
  "trial_name": null,
264
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.802130699157715,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.013545546901456146,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
230
  "eval_samples_per_second": 50.698,
231
  "eval_steps_per_second": 12.677,
232
  "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.011739473981261994,
236
+ "grad_norm": 2.9359610080718994,
237
+ "learning_rate": 0.00011321416748493448,
238
+ "loss": 2.7982,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.012190992211310531,
243
+ "grad_norm": 2.9160430431365967,
244
+ "learning_rate": 0.00010607719841200637,
245
+ "loss": 2.6641,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.01264251044135907,
250
+ "grad_norm": 3.4751181602478027,
251
+ "learning_rate": 9.892280158799368e-05,
252
+ "loss": 2.6787,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.013094028671407609,
257
+ "grad_norm": 4.7677388191223145,
258
+ "learning_rate": 9.178583251506553e-05,
259
+ "loss": 2.9618,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.013545546901456146,
264
+ "grad_norm": 5.446451663970947,
265
+ "learning_rate": 8.470106178913964e-05,
266
+ "loss": 3.2181,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.013545546901456146,
271
+ "eval_loss": 2.802130699157715,
272
+ "eval_runtime": 181.988,
273
+ "eval_samples_per_second": 51.245,
274
+ "eval_steps_per_second": 12.814,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 2708084592476160.0,
305
  "train_batch_size": 4,
306
  "trial_name": null,
307
  "trial_params": null