hariniiiiiiiiii commited on
Commit
d0fc634
1 Parent(s): 024a457

Training in progress, step 5000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979fae33b211827802e243d4b2113809f0f0cecb9b7c1d248e072e2b037b2cb0
3
  size 4115013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d81815d2717de634f2a36953a7237bebc0aed6d6f05bf4b444356bcd0bbcf5
3
  size 4115013
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f4336db612efb7d62e5471628f98da8a89b2945716dfd11cbdd02c45bd395f
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87f4fb413b45320b3f6b962097316ca8762605322043e16b3d8b94d7c09674a
3
  size 2329702453
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c4dc079a6baea2794599f0f64419cf3494f7a5dbc363ce3a63466ab6608372
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0d39dfe3569fd98eb88e2a2c2eaaf8e508e20a76576f7fbb2655866ff3eba6b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eea54d69309341d06e47639645523531f831191f588becdb9503cc4509f35e8f
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6982685cb2f1f3ac97aedb27d2a37d776ee4b9327cce88693a07d917b95ae056
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.891628652447293,
5
- "global_step": 4500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3246,11 +3246,371 @@
3246
  "eval_samples_per_second": 0.238,
3247
  "eval_steps_per_second": 0.238,
3248
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3249
  }
3250
  ],
3251
  "max_steps": 5060,
3252
  "num_train_epochs": 10,
3253
- "total_flos": 1.0952350126776115e+17,
3254
  "trial_name": null,
3255
  "trial_params": null
3256
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.879792873874985,
5
+ "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3246
  "eval_samples_per_second": 0.238,
3247
  "eval_steps_per_second": 0.238,
3248
  "step": 4500
3249
+ },
3250
+ {
3251
+ "epoch": 8.91,
3252
+ "learning_rate": 5.533199195171026e-05,
3253
+ "loss": 0.1363,
3254
+ "step": 4510
3255
+ },
3256
+ {
3257
+ "epoch": 8.93,
3258
+ "learning_rate": 5.4325955734406444e-05,
3259
+ "loss": 0.0738,
3260
+ "step": 4520
3261
+ },
3262
+ {
3263
+ "epoch": 8.95,
3264
+ "learning_rate": 5.331991951710262e-05,
3265
+ "loss": 0.0649,
3266
+ "step": 4530
3267
+ },
3268
+ {
3269
+ "epoch": 8.97,
3270
+ "learning_rate": 5.2313883299798795e-05,
3271
+ "loss": 0.0971,
3272
+ "step": 4540
3273
+ },
3274
+ {
3275
+ "epoch": 8.99,
3276
+ "learning_rate": 5.130784708249498e-05,
3277
+ "loss": 0.0688,
3278
+ "step": 4550
3279
+ },
3280
+ {
3281
+ "epoch": 9.01,
3282
+ "learning_rate": 5.030181086519115e-05,
3283
+ "loss": 0.0837,
3284
+ "step": 4560
3285
+ },
3286
+ {
3287
+ "epoch": 9.03,
3288
+ "learning_rate": 4.929577464788732e-05,
3289
+ "loss": 0.0891,
3290
+ "step": 4570
3291
+ },
3292
+ {
3293
+ "epoch": 9.05,
3294
+ "learning_rate": 4.82897384305835e-05,
3295
+ "loss": 0.0744,
3296
+ "step": 4580
3297
+ },
3298
+ {
3299
+ "epoch": 9.07,
3300
+ "learning_rate": 4.728370221327968e-05,
3301
+ "loss": 0.0906,
3302
+ "step": 4590
3303
+ },
3304
+ {
3305
+ "epoch": 9.09,
3306
+ "learning_rate": 4.6277665995975854e-05,
3307
+ "loss": 0.1102,
3308
+ "step": 4600
3309
+ },
3310
+ {
3311
+ "epoch": 9.09,
3312
+ "eval_loss": 0.9806769490242004,
3313
+ "eval_rouge1": 0.12615384615384617,
3314
+ "eval_rouge2": 0.08636363636363635,
3315
+ "eval_rougeL": 0.13615384615384613,
3316
+ "eval_rougeLsum": 0.12615384615384617,
3317
+ "eval_runtime": 88.592,
3318
+ "eval_samples_per_second": 0.226,
3319
+ "eval_steps_per_second": 0.226,
3320
+ "step": 4600
3321
+ },
3322
+ {
3323
+ "epoch": 9.11,
3324
+ "learning_rate": 4.527162977867203e-05,
3325
+ "loss": 0.072,
3326
+ "step": 4610
3327
+ },
3328
+ {
3329
+ "epoch": 9.13,
3330
+ "learning_rate": 4.426559356136821e-05,
3331
+ "loss": 0.0729,
3332
+ "step": 4620
3333
+ },
3334
+ {
3335
+ "epoch": 9.15,
3336
+ "learning_rate": 4.325955734406439e-05,
3337
+ "loss": 0.0884,
3338
+ "step": 4630
3339
+ },
3340
+ {
3341
+ "epoch": 9.17,
3342
+ "learning_rate": 4.225352112676056e-05,
3343
+ "loss": 0.0782,
3344
+ "step": 4640
3345
+ },
3346
+ {
3347
+ "epoch": 9.19,
3348
+ "learning_rate": 4.124748490945674e-05,
3349
+ "loss": 0.0879,
3350
+ "step": 4650
3351
+ },
3352
+ {
3353
+ "epoch": 9.21,
3354
+ "learning_rate": 4.024144869215292e-05,
3355
+ "loss": 0.1,
3356
+ "step": 4660
3357
+ },
3358
+ {
3359
+ "epoch": 9.23,
3360
+ "learning_rate": 3.9235412474849096e-05,
3361
+ "loss": 0.0867,
3362
+ "step": 4670
3363
+ },
3364
+ {
3365
+ "epoch": 9.25,
3366
+ "learning_rate": 3.822937625754527e-05,
3367
+ "loss": 0.0881,
3368
+ "step": 4680
3369
+ },
3370
+ {
3371
+ "epoch": 9.27,
3372
+ "learning_rate": 3.7223340040241454e-05,
3373
+ "loss": 0.0903,
3374
+ "step": 4690
3375
+ },
3376
+ {
3377
+ "epoch": 9.29,
3378
+ "learning_rate": 3.621730382293763e-05,
3379
+ "loss": 0.0942,
3380
+ "step": 4700
3381
+ },
3382
+ {
3383
+ "epoch": 9.29,
3384
+ "eval_loss": 0.9865831136703491,
3385
+ "eval_rouge1": 0.13999999999999999,
3386
+ "eval_rouge2": 0.09772727272727272,
3387
+ "eval_rougeL": 0.14615384615384613,
3388
+ "eval_rougeLsum": 0.13999999999999999,
3389
+ "eval_runtime": 86.651,
3390
+ "eval_samples_per_second": 0.231,
3391
+ "eval_steps_per_second": 0.231,
3392
+ "step": 4700
3393
+ },
3394
+ {
3395
+ "epoch": 9.31,
3396
+ "learning_rate": 3.5211267605633805e-05,
3397
+ "loss": 0.1079,
3398
+ "step": 4710
3399
+ },
3400
+ {
3401
+ "epoch": 9.33,
3402
+ "learning_rate": 3.420523138832998e-05,
3403
+ "loss": 0.0807,
3404
+ "step": 4720
3405
+ },
3406
+ {
3407
+ "epoch": 9.35,
3408
+ "learning_rate": 3.319919517102616e-05,
3409
+ "loss": 0.105,
3410
+ "step": 4730
3411
+ },
3412
+ {
3413
+ "epoch": 9.37,
3414
+ "learning_rate": 3.219315895372234e-05,
3415
+ "loss": 0.095,
3416
+ "step": 4740
3417
+ },
3418
+ {
3419
+ "epoch": 9.39,
3420
+ "learning_rate": 3.118712273641851e-05,
3421
+ "loss": 0.0965,
3422
+ "step": 4750
3423
+ },
3424
+ {
3425
+ "epoch": 9.41,
3426
+ "learning_rate": 3.018108651911469e-05,
3427
+ "loss": 0.1001,
3428
+ "step": 4760
3429
+ },
3430
+ {
3431
+ "epoch": 9.43,
3432
+ "learning_rate": 2.9175050301810868e-05,
3433
+ "loss": 0.0782,
3434
+ "step": 4770
3435
+ },
3436
+ {
3437
+ "epoch": 9.45,
3438
+ "learning_rate": 2.8169014084507043e-05,
3439
+ "loss": 0.115,
3440
+ "step": 4780
3441
+ },
3442
+ {
3443
+ "epoch": 9.47,
3444
+ "learning_rate": 2.7162977867203222e-05,
3445
+ "loss": 0.071,
3446
+ "step": 4790
3447
+ },
3448
+ {
3449
+ "epoch": 9.49,
3450
+ "learning_rate": 2.6156941649899397e-05,
3451
+ "loss": 0.129,
3452
+ "step": 4800
3453
+ },
3454
+ {
3455
+ "epoch": 9.49,
3456
+ "eval_loss": 0.9853466153144836,
3457
+ "eval_rouge1": 0.12837606837606838,
3458
+ "eval_rouge2": 0.08636363636363635,
3459
+ "eval_rougeL": 0.13615384615384613,
3460
+ "eval_rougeLsum": 0.1294871794871795,
3461
+ "eval_runtime": 86.6444,
3462
+ "eval_samples_per_second": 0.231,
3463
+ "eval_steps_per_second": 0.231,
3464
+ "step": 4800
3465
+ },
3466
+ {
3467
+ "epoch": 9.5,
3468
+ "learning_rate": 2.5150905432595576e-05,
3469
+ "loss": 0.1285,
3470
+ "step": 4810
3471
+ },
3472
+ {
3473
+ "epoch": 9.52,
3474
+ "learning_rate": 2.414486921529175e-05,
3475
+ "loss": 0.0747,
3476
+ "step": 4820
3477
+ },
3478
+ {
3479
+ "epoch": 9.54,
3480
+ "learning_rate": 2.3138832997987927e-05,
3481
+ "loss": 0.0702,
3482
+ "step": 4830
3483
+ },
3484
+ {
3485
+ "epoch": 9.56,
3486
+ "learning_rate": 2.2132796780684106e-05,
3487
+ "loss": 0.1029,
3488
+ "step": 4840
3489
+ },
3490
+ {
3491
+ "epoch": 9.58,
3492
+ "learning_rate": 2.112676056338028e-05,
3493
+ "loss": 0.102,
3494
+ "step": 4850
3495
+ },
3496
+ {
3497
+ "epoch": 9.6,
3498
+ "learning_rate": 2.012072434607646e-05,
3499
+ "loss": 0.0909,
3500
+ "step": 4860
3501
+ },
3502
+ {
3503
+ "epoch": 9.62,
3504
+ "learning_rate": 1.9114688128772636e-05,
3505
+ "loss": 0.0849,
3506
+ "step": 4870
3507
+ },
3508
+ {
3509
+ "epoch": 9.64,
3510
+ "learning_rate": 1.8108651911468815e-05,
3511
+ "loss": 0.1216,
3512
+ "step": 4880
3513
+ },
3514
+ {
3515
+ "epoch": 9.66,
3516
+ "learning_rate": 1.710261569416499e-05,
3517
+ "loss": 0.1016,
3518
+ "step": 4890
3519
+ },
3520
+ {
3521
+ "epoch": 9.68,
3522
+ "learning_rate": 1.609657947686117e-05,
3523
+ "loss": 0.0949,
3524
+ "step": 4900
3525
+ },
3526
+ {
3527
+ "epoch": 9.68,
3528
+ "eval_loss": 0.9819391965866089,
3529
+ "eval_rouge1": 0.1911111111111111,
3530
+ "eval_rouge2": 0.09772727272727272,
3531
+ "eval_rougeL": 0.19615384615384612,
3532
+ "eval_rougeLsum": 0.1923076923076923,
3533
+ "eval_runtime": 87.241,
3534
+ "eval_samples_per_second": 0.229,
3535
+ "eval_steps_per_second": 0.229,
3536
+ "step": 4900
3537
+ },
3538
+ {
3539
+ "epoch": 9.7,
3540
+ "learning_rate": 1.5090543259557344e-05,
3541
+ "loss": 0.0692,
3542
+ "step": 4910
3543
+ },
3544
+ {
3545
+ "epoch": 9.72,
3546
+ "learning_rate": 1.4084507042253522e-05,
3547
+ "loss": 0.0653,
3548
+ "step": 4920
3549
+ },
3550
+ {
3551
+ "epoch": 9.74,
3552
+ "learning_rate": 1.3078470824949699e-05,
3553
+ "loss": 0.0797,
3554
+ "step": 4930
3555
+ },
3556
+ {
3557
+ "epoch": 9.76,
3558
+ "learning_rate": 1.2072434607645874e-05,
3559
+ "loss": 0.0905,
3560
+ "step": 4940
3561
+ },
3562
+ {
3563
+ "epoch": 9.78,
3564
+ "learning_rate": 1.1066398390342053e-05,
3565
+ "loss": 0.0868,
3566
+ "step": 4950
3567
+ },
3568
+ {
3569
+ "epoch": 9.8,
3570
+ "learning_rate": 1.006036217303823e-05,
3571
+ "loss": 0.0964,
3572
+ "step": 4960
3573
+ },
3574
+ {
3575
+ "epoch": 9.82,
3576
+ "learning_rate": 9.054325955734407e-06,
3577
+ "loss": 0.0913,
3578
+ "step": 4970
3579
+ },
3580
+ {
3581
+ "epoch": 9.84,
3582
+ "learning_rate": 8.048289738430584e-06,
3583
+ "loss": 0.0708,
3584
+ "step": 4980
3585
+ },
3586
+ {
3587
+ "epoch": 9.86,
3588
+ "learning_rate": 7.042253521126761e-06,
3589
+ "loss": 0.1102,
3590
+ "step": 4990
3591
+ },
3592
+ {
3593
+ "epoch": 9.88,
3594
+ "learning_rate": 6.036217303822937e-06,
3595
+ "loss": 0.0852,
3596
+ "step": 5000
3597
+ },
3598
+ {
3599
+ "epoch": 9.88,
3600
+ "eval_loss": 0.9852367639541626,
3601
+ "eval_rouge1": 0.12615384615384617,
3602
+ "eval_rouge2": 0.08636363636363635,
3603
+ "eval_rougeL": 0.13615384615384613,
3604
+ "eval_rougeLsum": 0.12615384615384617,
3605
+ "eval_runtime": 87.473,
3606
+ "eval_samples_per_second": 0.229,
3607
+ "eval_steps_per_second": 0.229,
3608
+ "step": 5000
3609
  }
3610
  ],
3611
  "max_steps": 5060,
3612
  "num_train_epochs": 10,
3613
+ "total_flos": 1.2175308798022656e+17,
3614
  "trial_name": null,
3615
  "trial_params": null
3616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f4336db612efb7d62e5471628f98da8a89b2945716dfd11cbdd02c45bd395f
3
  size 2329702453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87f4fb413b45320b3f6b962097316ca8762605322043e16b3d8b94d7c09674a
3
  size 2329702453
runs/Feb11_14-00-34_74e5e7b42358/events.out.tfevents.1676124921.74e5e7b42358.292.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e58a04a0e477b9d2ba2e1e319284b1a33bbd6d6814f990542656072fb5f45d84
3
- size 14428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23fbf43aa6b57ea727d191f2793201bb108b3520d3fbf7a5842b8ba63cc79714
3
+ size 24648