mikhail-panzo commited on
Commit
4f11644
1 Parent(s): 423d4f2

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65cc40d94c31ec94c9b98763487736e129c44c2fd99d58b70a1ed20a361a1eaa
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f6e02c9049d9eb2741a87e15c4995bca735aa623c1eacd8c972af7d785b970
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb391bb8e15ea2bb7244598232e46d31016900192d4dc478b9ba1a9edca860c
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0fe8f85e6ddb45c62b62781d7c657ebd54e09d3ba4af58ee61f07f59f2d325
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968291a21d5bd54cba05589e1f0110365f5d188066676d4b8853115288d17bb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373099bcd0d3ee5ceb0d1d94a6fdc048cf24515e4576a9744fed78c8d5e543ab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a04aa548ba1997bc55fa261cc2851282a66f1a8d19fe3862e3573f33f7d76f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37eacb952a10eea80d10445d29e75511960fff96b96070d64acac281d799da8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.3953019976615906,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s4000/checkpoint-2500",
4
- "epoch": 99.00990099009901,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,6 +397,84 @@
397
  "eval_samples_per_second": 25.844,
398
  "eval_steps_per_second": 3.302,
399
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 50,
@@ -416,7 +494,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 2.701326498107371e+16,
420
  "train_batch_size": 16,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
  "best_metric": 0.3953019976615906,
3
  "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s4000/checkpoint-2500",
4
+ "epoch": 118.81188118811882,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "eval_samples_per_second": 25.844,
398
  "eval_steps_per_second": 3.302,
399
  "step": 2500
400
+ },
401
+ {
402
+ "epoch": 100.99009900990099,
403
+ "grad_norm": 2.3658339977264404,
404
+ "learning_rate": 7.255e-05,
405
+ "loss": 0.3793,
406
+ "step": 2550
407
+ },
408
+ {
409
+ "epoch": 102.97029702970298,
410
+ "grad_norm": 1.4890061616897583,
411
+ "learning_rate": 7.005000000000001e-05,
412
+ "loss": 0.3823,
413
+ "step": 2600
414
+ },
415
+ {
416
+ "epoch": 104.95049504950495,
417
+ "grad_norm": 2.134577512741089,
418
+ "learning_rate": 6.755e-05,
419
+ "loss": 0.3735,
420
+ "step": 2650
421
+ },
422
+ {
423
+ "epoch": 106.93069306930693,
424
+ "grad_norm": 1.2545772790908813,
425
+ "learning_rate": 6.505e-05,
426
+ "loss": 0.3788,
427
+ "step": 2700
428
+ },
429
+ {
430
+ "epoch": 108.91089108910892,
431
+ "grad_norm": 1.011292815208435,
432
+ "learning_rate": 6.255e-05,
433
+ "loss": 0.3762,
434
+ "step": 2750
435
+ },
436
+ {
437
+ "epoch": 110.89108910891089,
438
+ "grad_norm": 1.4628199338912964,
439
+ "learning_rate": 6.005000000000001e-05,
440
+ "loss": 0.3742,
441
+ "step": 2800
442
+ },
443
+ {
444
+ "epoch": 112.87128712871286,
445
+ "grad_norm": 1.661889910697937,
446
+ "learning_rate": 5.755e-05,
447
+ "loss": 0.3778,
448
+ "step": 2850
449
+ },
450
+ {
451
+ "epoch": 114.85148514851485,
452
+ "grad_norm": 1.1926722526550293,
453
+ "learning_rate": 5.505e-05,
454
+ "loss": 0.3713,
455
+ "step": 2900
456
+ },
457
+ {
458
+ "epoch": 116.83168316831683,
459
+ "grad_norm": 0.9905158877372742,
460
+ "learning_rate": 5.255e-05,
461
+ "loss": 0.3714,
462
+ "step": 2950
463
+ },
464
+ {
465
+ "epoch": 118.81188118811882,
466
+ "grad_norm": 0.7342912554740906,
467
+ "learning_rate": 5.005e-05,
468
+ "loss": 0.3688,
469
+ "step": 3000
470
+ },
471
+ {
472
+ "epoch": 118.81188118811882,
473
+ "eval_loss": 0.39639872312545776,
474
+ "eval_runtime": 7.0442,
475
+ "eval_samples_per_second": 25.553,
476
+ "eval_steps_per_second": 3.265,
477
+ "step": 3000
478
  }
479
  ],
480
  "logging_steps": 50,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 3.2419037301352416e+16,
498
  "train_batch_size": 16,
499
  "trial_name": null,
500
  "trial_params": null