mikhail-panzo commited on
Commit
e286fc4
1 Parent(s): 3466009

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbd982b8acccd1b2d14990bc63f8c329f2341fd56be13b837d9d0673ddbcc639
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a736e732330ece0b457cbaf5c5b4351b5d3db83c02791cf60a402f193e6be32
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ba982a667b225612226666295d284a7db39c4d14a96acdf4def9ad659ee35ee
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:621173d0409fc5035f339553890479dbf13f20436f2751f19fa0d3f772c19566
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968291a21d5bd54cba05589e1f0110365f5d188066676d4b8853115288d17bb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373099bcd0d3ee5ceb0d1d94a6fdc048cf24515e4576a9744fed78c8d5e543ab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b382f91d6a62c99969921e0d8014a3b1f89a198a6a81ab888e1194bc144c13d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71cdf44715446c06dd4fba004a7a18790345b0ae4b9e2bcb1cb99e4155dbc62d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.39783453941345215,
3
- "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s8000/checkpoint-2500",
4
- "epoch": 99.00990099009901,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,6 +397,84 @@
397
  "eval_samples_per_second": 23.289,
398
  "eval_steps_per_second": 2.976,
399
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 50,
@@ -416,7 +494,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 2.701326498107371e+16,
420
  "train_batch_size": 16,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3978251516819,
3
+ "best_model_checkpoint": "mikhail_panzo/ceb_b64_le4_s8000/checkpoint-3000",
4
+ "epoch": 118.81188118811882,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "eval_samples_per_second": 23.289,
398
  "eval_steps_per_second": 2.976,
399
  "step": 2500
400
+ },
401
+ {
402
+ "epoch": 100.99009900990099,
403
+ "grad_norm": 1.1166267395019531,
404
+ "learning_rate": 9.085e-05,
405
+ "loss": 0.3824,
406
+ "step": 2550
407
+ },
408
+ {
409
+ "epoch": 102.97029702970298,
410
+ "grad_norm": 1.4629709720611572,
411
+ "learning_rate": 9.001666666666667e-05,
412
+ "loss": 0.3829,
413
+ "step": 2600
414
+ },
415
+ {
416
+ "epoch": 104.95049504950495,
417
+ "grad_norm": 2.9931211471557617,
418
+ "learning_rate": 8.918333333333334e-05,
419
+ "loss": 0.3756,
420
+ "step": 2650
421
+ },
422
+ {
423
+ "epoch": 106.93069306930693,
424
+ "grad_norm": 1.6760491132736206,
425
+ "learning_rate": 8.834999999999999e-05,
426
+ "loss": 0.3815,
427
+ "step": 2700
428
+ },
429
+ {
430
+ "epoch": 108.91089108910892,
431
+ "grad_norm": 1.8942713737487793,
432
+ "learning_rate": 8.751666666666668e-05,
433
+ "loss": 0.3773,
434
+ "step": 2750
435
+ },
436
+ {
437
+ "epoch": 110.89108910891089,
438
+ "grad_norm": 1.110032081604004,
439
+ "learning_rate": 8.668333333333334e-05,
440
+ "loss": 0.3747,
441
+ "step": 2800
442
+ },
443
+ {
444
+ "epoch": 112.87128712871286,
445
+ "grad_norm": 1.3915964365005493,
446
+ "learning_rate": 8.585000000000001e-05,
447
+ "loss": 0.3796,
448
+ "step": 2850
449
+ },
450
+ {
451
+ "epoch": 114.85148514851485,
452
+ "grad_norm": 2.8676748275756836,
453
+ "learning_rate": 8.501666666666667e-05,
454
+ "loss": 0.3731,
455
+ "step": 2900
456
+ },
457
+ {
458
+ "epoch": 116.83168316831683,
459
+ "grad_norm": 1.0008431673049927,
460
+ "learning_rate": 8.418333333333334e-05,
461
+ "loss": 0.3747,
462
+ "step": 2950
463
+ },
464
+ {
465
+ "epoch": 118.81188118811882,
466
+ "grad_norm": 2.071352243423462,
467
+ "learning_rate": 8.335e-05,
468
+ "loss": 0.3726,
469
+ "step": 3000
470
+ },
471
+ {
472
+ "epoch": 118.81188118811882,
473
+ "eval_loss": 0.3978251516819,
474
+ "eval_runtime": 8.1696,
475
+ "eval_samples_per_second": 22.033,
476
+ "eval_steps_per_second": 2.815,
477
+ "step": 3000
478
  }
479
  ],
480
  "logging_steps": 50,
 
494
  "attributes": {}
495
  }
496
  },
497
+ "total_flos": 3.2419037301352416e+16,
498
  "train_batch_size": 16,
499
  "trial_name": null,
500
  "trial_params": null