auxyus commited on
Commit
85a6b3c
·
verified ·
1 Parent(s): 31426d0

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f732bef2952e9d32f19e762db18002aed6c12f6c825354edc05263531b046467
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd98b116783dd2e2128b57f7754943e7c8c78af1a8c7737870d2430b059f158
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2feeebd9398df71afacce59452b2c73c4adb2295cd53299eb4c2ded241ccce8f
3
  size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51b03fc9ef7d2e5ec63f54ba8c56e82313d28b728558511fdc3ef093966f003d
3
  size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:574041858ad47610f1228962be219c1774ebe5acbf20c9a7bf53d14a3ca80f21
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c6c8fe118793e2efe5d50edd9a3cde907f8de2cd1ffd9277dcf906663b389d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f0f789b56065211b8c0b1a5e2a97dd0b5b08a816bbbe288fb6f9c677282af9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6793892979621887,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.07821666014861166,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,135 @@
273
  "eval_samples_per_second": 12.602,
274
  "eval_steps_per_second": 3.151,
275
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 10,
@@ -301,7 +430,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 4.487523283595428e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.598429799079895,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.11732499022291748,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 12.602,
274
  "eval_steps_per_second": 3.151,
275
  "step": 300
276
+ },
277
+ {
278
+ "epoch": 0.08082388215356538,
279
+ "grad_norm": 7.278568744659424,
280
+ "learning_rate": 9.733794785622253e-05,
281
+ "loss": 2.2598,
282
+ "step": 310
283
+ },
284
+ {
285
+ "epoch": 0.08343110415851909,
286
+ "grad_norm": 11.45936393737793,
287
+ "learning_rate": 9.202138944469168e-05,
288
+ "loss": 2.221,
289
+ "step": 320
290
+ },
291
+ {
292
+ "epoch": 0.08603832616347282,
293
+ "grad_norm": 11.150221824645996,
294
+ "learning_rate": 8.672744727162781e-05,
295
+ "loss": 2.7018,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 0.08864554816842654,
300
+ "grad_norm": 14.54114055633545,
301
+ "learning_rate": 8.147112759128859e-05,
302
+ "loss": 2.7212,
303
+ "step": 340
304
+ },
305
+ {
306
+ "epoch": 0.09125277017338027,
307
+ "grad_norm": 39.82421112060547,
308
+ "learning_rate": 7.626733001288851e-05,
309
+ "loss": 3.3754,
310
+ "step": 350
311
+ },
312
+ {
313
+ "epoch": 0.09125277017338027,
314
+ "eval_loss": 0.6544287800788879,
315
+ "eval_runtime": 512.4276,
316
+ "eval_samples_per_second": 12.607,
317
+ "eval_steps_per_second": 3.152,
318
+ "step": 350
319
+ },
320
+ {
321
+ "epoch": 0.09385999217833399,
322
+ "grad_norm": 10.35319709777832,
323
+ "learning_rate": 7.113080526603792e-05,
324
+ "loss": 2.0052,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 0.0964672141832877,
329
+ "grad_norm": 11.164114952087402,
330
+ "learning_rate": 6.607611338819697e-05,
331
+ "loss": 2.3054,
332
+ "step": 370
333
+ },
334
+ {
335
+ "epoch": 0.09907443618824142,
336
+ "grad_norm": 10.16714859008789,
337
+ "learning_rate": 6.111758245266794e-05,
338
+ "loss": 2.4965,
339
+ "step": 380
340
+ },
341
+ {
342
+ "epoch": 0.10168165819319515,
343
+ "grad_norm": 10.978752136230469,
344
+ "learning_rate": 5.626926795411447e-05,
345
+ "loss": 2.8824,
346
+ "step": 390
347
+ },
348
+ {
349
+ "epoch": 0.10428888019814887,
350
+ "grad_norm": 19.6451473236084,
351
+ "learning_rate": 5.1544912966734994e-05,
352
+ "loss": 3.0805,
353
+ "step": 400
354
+ },
355
+ {
356
+ "epoch": 0.10428888019814887,
357
+ "eval_loss": 0.6194283962249756,
358
+ "eval_runtime": 512.9361,
359
+ "eval_samples_per_second": 12.594,
360
+ "eval_steps_per_second": 3.149,
361
+ "step": 400
362
+ },
363
+ {
364
+ "epoch": 0.1068961022031026,
365
+ "grad_norm": 8.461108207702637,
366
+ "learning_rate": 4.695790918802576e-05,
367
+ "loss": 1.9141,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 0.10950332420805632,
372
+ "grad_norm": 10.8994140625,
373
+ "learning_rate": 4.252125897855932e-05,
374
+ "loss": 2.2111,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 0.11211054621301003,
379
+ "grad_norm": 9.634496688842773,
380
+ "learning_rate": 3.824753850538082e-05,
381
+ "loss": 2.1574,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 0.11471776821796376,
386
+ "grad_norm": 12.223336219787598,
387
+ "learning_rate": 3.414886209349615e-05,
388
+ "loss": 2.7663,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 0.11732499022291748,
393
+ "grad_norm": 20.796768188476562,
394
+ "learning_rate": 3.0236847886501542e-05,
395
+ "loss": 3.095,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 0.11732499022291748,
400
+ "eval_loss": 0.598429799079895,
401
+ "eval_runtime": 512.9195,
402
+ "eval_samples_per_second": 12.595,
403
+ "eval_steps_per_second": 3.149,
404
+ "step": 450
405
  }
406
  ],
407
  "logging_steps": 10,
 
430
  "attributes": {}
431
  }
432
  },
433
+ "total_flos": 6.736589326437581e+17,
434
  "train_batch_size": 8,
435
  "trial_name": null,
436
  "trial_params": null