pakawadeep commited on
Commit
e2e3c2f
1 Parent(s): fdb9bf8

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d5794c68522903362bd13bff8a3a9d4ddbbdc08ed274ca763782dd8af324f41
3
  size 40036488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb1b8a303fc9b3efcf849062236f63c083e832b9046d873cf9992c715843b93
3
  size 40036488
last-checkpoint/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd7d6494f445634ed0c5199eae72b6d115d61e4207ed306563079f7f118a58a
3
+ size 239914192
last-checkpoint/global_step500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:439671f20c83645773f281ff7dc11fd332dece646507f0f9ceb23efecbbe4c75
3
+ size 143772857
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step475
 
1
+ global_step500
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10ef9acbd4fa0d6f3cf9ef31e36c3f42b685148e0b2bdf384912e243beb8a4d6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3480915ffec0a67b3b569a0849907362275927ef32fee988b0a6bace7e965c6
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.22400377269511906,
5
  "eval_steps": 1000,
6
- "global_step": 475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3332,6 +3332,181 @@
3332
  "learning_rate": 1.09979633401222e-05,
3333
  "loss": 1.3586,
3334
  "step": 475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3335
  }
3336
  ],
3337
  "logging_steps": 1,
@@ -3339,7 +3514,7 @@
3339
  "num_input_tokens_seen": 0,
3340
  "num_train_epochs": 1,
3341
  "save_steps": 25,
3342
- "total_flos": 4986911766085632.0,
3343
  "train_batch_size": 2,
3344
  "trial_name": null,
3345
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23579344494223062,
5
  "eval_steps": 1000,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3332
  "learning_rate": 1.09979633401222e-05,
3333
  "loss": 1.3586,
3334
  "step": 475
3335
+ },
3336
+ {
3337
+ "epoch": 0.22447535958500353,
3338
+ "grad_norm": 1.565455350231205,
3339
+ "learning_rate": 1.059063136456212e-05,
3340
+ "loss": 1.2235,
3341
+ "step": 476
3342
+ },
3343
+ {
3344
+ "epoch": 0.224946946474888,
3345
+ "grad_norm": 1.4820242487973796,
3346
+ "learning_rate": 1.0183299389002038e-05,
3347
+ "loss": 0.731,
3348
+ "step": 477
3349
+ },
3350
+ {
3351
+ "epoch": 0.22541853336477247,
3352
+ "grad_norm": 1.5511126800670054,
3353
+ "learning_rate": 9.775967413441956e-06,
3354
+ "loss": 1.1143,
3355
+ "step": 478
3356
+ },
3357
+ {
3358
+ "epoch": 0.22589012025465693,
3359
+ "grad_norm": 1.9312597502922166,
3360
+ "learning_rate": 9.368635437881874e-06,
3361
+ "loss": 1.8651,
3362
+ "step": 479
3363
+ },
3364
+ {
3365
+ "epoch": 0.22636170714454137,
3366
+ "grad_norm": 2.332740151981935,
3367
+ "learning_rate": 8.961303462321792e-06,
3368
+ "loss": 1.5795,
3369
+ "step": 480
3370
+ },
3371
+ {
3372
+ "epoch": 0.22683329403442584,
3373
+ "grad_norm": 1.6625740665773265,
3374
+ "learning_rate": 8.55397148676171e-06,
3375
+ "loss": 0.8778,
3376
+ "step": 481
3377
+ },
3378
+ {
3379
+ "epoch": 0.2273048809243103,
3380
+ "grad_norm": 2.0164940904700503,
3381
+ "learning_rate": 8.14663951120163e-06,
3382
+ "loss": 1.2201,
3383
+ "step": 482
3384
+ },
3385
+ {
3386
+ "epoch": 0.22777646781419478,
3387
+ "grad_norm": 2.308672133920739,
3388
+ "learning_rate": 7.739307535641548e-06,
3389
+ "loss": 1.6128,
3390
+ "step": 483
3391
+ },
3392
+ {
3393
+ "epoch": 0.22824805470407922,
3394
+ "grad_norm": 3.6223058225453437,
3395
+ "learning_rate": 7.3319755600814665e-06,
3396
+ "loss": 1.1935,
3397
+ "step": 484
3398
+ },
3399
+ {
3400
+ "epoch": 0.22871964159396369,
3401
+ "grad_norm": 1.898440262906297,
3402
+ "learning_rate": 6.9246435845213855e-06,
3403
+ "loss": 1.3363,
3404
+ "step": 485
3405
+ },
3406
+ {
3407
+ "epoch": 0.22919122848384815,
3408
+ "grad_norm": 1.6065163269115639,
3409
+ "learning_rate": 6.517311608961303e-06,
3410
+ "loss": 1.0665,
3411
+ "step": 486
3412
+ },
3413
+ {
3414
+ "epoch": 0.22966281537373262,
3415
+ "grad_norm": 1.8701305737526783,
3416
+ "learning_rate": 6.109979633401222e-06,
3417
+ "loss": 1.6185,
3418
+ "step": 487
3419
+ },
3420
+ {
3421
+ "epoch": 0.23013440226361706,
3422
+ "grad_norm": 1.6728117270550722,
3423
+ "learning_rate": 5.702647657841141e-06,
3424
+ "loss": 1.4654,
3425
+ "step": 488
3426
+ },
3427
+ {
3428
+ "epoch": 0.23060598915350153,
3429
+ "grad_norm": 1.7046091455062304,
3430
+ "learning_rate": 5.29531568228106e-06,
3431
+ "loss": 1.0988,
3432
+ "step": 489
3433
+ },
3434
+ {
3435
+ "epoch": 0.231077576043386,
3436
+ "grad_norm": 1.6534105108434365,
3437
+ "learning_rate": 4.887983706720978e-06,
3438
+ "loss": 1.1103,
3439
+ "step": 490
3440
+ },
3441
+ {
3442
+ "epoch": 0.23154916293327046,
3443
+ "grad_norm": 1.4043753102008933,
3444
+ "learning_rate": 4.480651731160896e-06,
3445
+ "loss": 0.656,
3446
+ "step": 491
3447
+ },
3448
+ {
3449
+ "epoch": 0.2320207498231549,
3450
+ "grad_norm": 1.9403853927340866,
3451
+ "learning_rate": 4.073319755600815e-06,
3452
+ "loss": 1.6598,
3453
+ "step": 492
3454
+ },
3455
+ {
3456
+ "epoch": 0.23249233671303937,
3457
+ "grad_norm": 1.5035036811542513,
3458
+ "learning_rate": 3.6659877800407332e-06,
3459
+ "loss": 1.1763,
3460
+ "step": 493
3461
+ },
3462
+ {
3463
+ "epoch": 0.23296392360292384,
3464
+ "grad_norm": 2.0180415371872273,
3465
+ "learning_rate": 3.2586558044806514e-06,
3466
+ "loss": 1.2816,
3467
+ "step": 494
3468
+ },
3469
+ {
3470
+ "epoch": 0.2334355104928083,
3471
+ "grad_norm": 1.9388592781834748,
3472
+ "learning_rate": 2.8513238289205704e-06,
3473
+ "loss": 1.4855,
3474
+ "step": 495
3475
+ },
3476
+ {
3477
+ "epoch": 0.23390709738269277,
3478
+ "grad_norm": 2.3566969268057516,
3479
+ "learning_rate": 2.443991853360489e-06,
3480
+ "loss": 1.3763,
3481
+ "step": 496
3482
+ },
3483
+ {
3484
+ "epoch": 0.23437868427257721,
3485
+ "grad_norm": 1.506615118830032,
3486
+ "learning_rate": 2.0366598778004075e-06,
3487
+ "loss": 0.9294,
3488
+ "step": 497
3489
+ },
3490
+ {
3491
+ "epoch": 0.23485027116246168,
3492
+ "grad_norm": 1.9049222731064772,
3493
+ "learning_rate": 1.6293279022403257e-06,
3494
+ "loss": 1.5868,
3495
+ "step": 498
3496
+ },
3497
+ {
3498
+ "epoch": 0.23532185805234615,
3499
+ "grad_norm": 2.0393885793885573,
3500
+ "learning_rate": 1.2219959266802445e-06,
3501
+ "loss": 1.3604,
3502
+ "step": 499
3503
+ },
3504
+ {
3505
+ "epoch": 0.23579344494223062,
3506
+ "grad_norm": 1.53222897433984,
3507
+ "learning_rate": 8.146639511201628e-07,
3508
+ "loss": 1.0879,
3509
+ "step": 500
3510
  }
3511
  ],
3512
  "logging_steps": 1,
 
3514
  "num_input_tokens_seen": 0,
3515
  "num_train_epochs": 1,
3516
  "save_steps": 25,
3517
+ "total_flos": 5267949826867200.0,
3518
  "train_batch_size": 2,
3519
  "trial_name": null,
3520
  "trial_params": null