danurahul commited on
Commit
c276d13
1 Parent(s): c0b9689

Initial commit

Browse files
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +483 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f23fe1da9d7b0d262eb827700691cf93218fcc1ab92881c019c3bf663b065d8
3
  size 655348487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d55c7235936d0242cf392bec1a2ea0817b12aa635dec8779c8a4eacdb4938ed
3
  size 655348487
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a823b809bd0a2dff786f1c5ff0e0323d2af862b1b3dfbee9e1b34a9f48e4dda8
3
  size 333975623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492888ed30a69737bf3290cd99f0992bde40cc710fa370b0ccded98a91f294b8
3
  size 333975623
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f78d7980d4c582b39ed334cb97ac267ed42b605e3763293c7f4f9ed34e3a350f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51219b2cb4bc18c5893bf94b3f2834354e493062268fb89b16464b4a442a743
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 120.2843083652269,
5
- "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5286,11 +5286,491 @@
5286
  "learning_rate": 4.3991797676008206e-05,
5287
  "loss": 1.9818,
5288
  "step": 440000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5289
  }
5290
  ],
5291
  "max_steps": 3658000,
5292
  "num_train_epochs": 1000,
5293
- "total_flos": 885711431443415040,
5294
  "trial_name": null,
5295
  "trial_params": null
5296
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 131.21924548933845,
5
+ "global_step": 480000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5286
  "learning_rate": 4.3991797676008206e-05,
5287
  "loss": 1.9818,
5288
  "step": 440000
5289
+ },
5290
+ {
5291
+ "epoch": 120.42,
5292
+ "learning_rate": 4.398496240601504e-05,
5293
+ "loss": 1.9914,
5294
+ "step": 440500
5295
+ },
5296
+ {
5297
+ "epoch": 120.56,
5298
+ "learning_rate": 4.3978127136021876e-05,
5299
+ "loss": 1.9949,
5300
+ "step": 441000
5301
+ },
5302
+ {
5303
+ "epoch": 120.69,
5304
+ "learning_rate": 4.397129186602871e-05,
5305
+ "loss": 2.0031,
5306
+ "step": 441500
5307
+ },
5308
+ {
5309
+ "epoch": 120.83,
5310
+ "learning_rate": 4.3964456596035545e-05,
5311
+ "loss": 2.0048,
5312
+ "step": 442000
5313
+ },
5314
+ {
5315
+ "epoch": 120.97,
5316
+ "learning_rate": 4.395762132604238e-05,
5317
+ "loss": 2.018,
5318
+ "step": 442500
5319
+ },
5320
+ {
5321
+ "epoch": 121.1,
5322
+ "learning_rate": 4.3950786056049215e-05,
5323
+ "loss": 1.9803,
5324
+ "step": 443000
5325
+ },
5326
+ {
5327
+ "epoch": 121.24,
5328
+ "learning_rate": 4.3943950786056056e-05,
5329
+ "loss": 1.9769,
5330
+ "step": 443500
5331
+ },
5332
+ {
5333
+ "epoch": 121.38,
5334
+ "learning_rate": 4.393711551606289e-05,
5335
+ "loss": 1.9786,
5336
+ "step": 444000
5337
+ },
5338
+ {
5339
+ "epoch": 121.51,
5340
+ "learning_rate": 4.393028024606972e-05,
5341
+ "loss": 1.99,
5342
+ "step": 444500
5343
+ },
5344
+ {
5345
+ "epoch": 121.65,
5346
+ "learning_rate": 4.3923444976076554e-05,
5347
+ "loss": 1.9929,
5348
+ "step": 445000
5349
+ },
5350
+ {
5351
+ "epoch": 121.79,
5352
+ "learning_rate": 4.3916609706083396e-05,
5353
+ "loss": 2.0034,
5354
+ "step": 445500
5355
+ },
5356
+ {
5357
+ "epoch": 121.92,
5358
+ "learning_rate": 4.390977443609023e-05,
5359
+ "loss": 2.0121,
5360
+ "step": 446000
5361
+ },
5362
+ {
5363
+ "epoch": 122.06,
5364
+ "learning_rate": 4.3902939166097065e-05,
5365
+ "loss": 1.9899,
5366
+ "step": 446500
5367
+ },
5368
+ {
5369
+ "epoch": 122.2,
5370
+ "learning_rate": 4.389610389610389e-05,
5371
+ "loss": 1.9649,
5372
+ "step": 447000
5373
+ },
5374
+ {
5375
+ "epoch": 122.33,
5376
+ "learning_rate": 4.3889268626110735e-05,
5377
+ "loss": 1.9764,
5378
+ "step": 447500
5379
+ },
5380
+ {
5381
+ "epoch": 122.47,
5382
+ "learning_rate": 4.388243335611757e-05,
5383
+ "loss": 1.9795,
5384
+ "step": 448000
5385
+ },
5386
+ {
5387
+ "epoch": 122.61,
5388
+ "learning_rate": 4.3875598086124404e-05,
5389
+ "loss": 1.9988,
5390
+ "step": 448500
5391
+ },
5392
+ {
5393
+ "epoch": 122.74,
5394
+ "learning_rate": 4.386876281613124e-05,
5395
+ "loss": 1.9975,
5396
+ "step": 449000
5397
+ },
5398
+ {
5399
+ "epoch": 122.88,
5400
+ "learning_rate": 4.3861927546138074e-05,
5401
+ "loss": 2.007,
5402
+ "step": 449500
5403
+ },
5404
+ {
5405
+ "epoch": 123.02,
5406
+ "learning_rate": 4.385509227614491e-05,
5407
+ "loss": 2.0015,
5408
+ "step": 450000
5409
+ },
5410
+ {
5411
+ "epoch": 123.15,
5412
+ "learning_rate": 4.3848257006151743e-05,
5413
+ "loss": 1.9606,
5414
+ "step": 450500
5415
+ },
5416
+ {
5417
+ "epoch": 123.29,
5418
+ "learning_rate": 4.384142173615858e-05,
5419
+ "loss": 1.9646,
5420
+ "step": 451000
5421
+ },
5422
+ {
5423
+ "epoch": 123.43,
5424
+ "learning_rate": 4.383458646616542e-05,
5425
+ "loss": 1.9758,
5426
+ "step": 451500
5427
+ },
5428
+ {
5429
+ "epoch": 123.56,
5430
+ "learning_rate": 4.382775119617225e-05,
5431
+ "loss": 1.9893,
5432
+ "step": 452000
5433
+ },
5434
+ {
5435
+ "epoch": 123.7,
5436
+ "learning_rate": 4.382091592617908e-05,
5437
+ "loss": 1.9943,
5438
+ "step": 452500
5439
+ },
5440
+ {
5441
+ "epoch": 123.84,
5442
+ "learning_rate": 4.3814080656185924e-05,
5443
+ "loss": 1.9997,
5444
+ "step": 453000
5445
+ },
5446
+ {
5447
+ "epoch": 123.97,
5448
+ "learning_rate": 4.380724538619276e-05,
5449
+ "loss": 2.0071,
5450
+ "step": 453500
5451
+ },
5452
+ {
5453
+ "epoch": 124.11,
5454
+ "learning_rate": 4.3800410116199594e-05,
5455
+ "loss": 1.9643,
5456
+ "step": 454000
5457
+ },
5458
+ {
5459
+ "epoch": 124.25,
5460
+ "learning_rate": 4.379357484620643e-05,
5461
+ "loss": 1.9644,
5462
+ "step": 454500
5463
+ },
5464
+ {
5465
+ "epoch": 124.38,
5466
+ "learning_rate": 4.378673957621326e-05,
5467
+ "loss": 1.9718,
5468
+ "step": 455000
5469
+ },
5470
+ {
5471
+ "epoch": 124.52,
5472
+ "learning_rate": 4.37799043062201e-05,
5473
+ "loss": 1.9801,
5474
+ "step": 455500
5475
+ },
5476
+ {
5477
+ "epoch": 124.66,
5478
+ "learning_rate": 4.377306903622693e-05,
5479
+ "loss": 1.9836,
5480
+ "step": 456000
5481
+ },
5482
+ {
5483
+ "epoch": 124.79,
5484
+ "learning_rate": 4.376623376623377e-05,
5485
+ "loss": 1.9961,
5486
+ "step": 456500
5487
+ },
5488
+ {
5489
+ "epoch": 124.93,
5490
+ "learning_rate": 4.37593984962406e-05,
5491
+ "loss": 1.9996,
5492
+ "step": 457000
5493
+ },
5494
+ {
5495
+ "epoch": 125.07,
5496
+ "learning_rate": 4.375256322624744e-05,
5497
+ "loss": 1.9744,
5498
+ "step": 457500
5499
+ },
5500
+ {
5501
+ "epoch": 125.21,
5502
+ "learning_rate": 4.374572795625427e-05,
5503
+ "loss": 1.9558,
5504
+ "step": 458000
5505
+ },
5506
+ {
5507
+ "epoch": 125.34,
5508
+ "learning_rate": 4.373889268626111e-05,
5509
+ "loss": 1.9701,
5510
+ "step": 458500
5511
+ },
5512
+ {
5513
+ "epoch": 125.48,
5514
+ "learning_rate": 4.373205741626795e-05,
5515
+ "loss": 1.971,
5516
+ "step": 459000
5517
+ },
5518
+ {
5519
+ "epoch": 125.62,
5520
+ "learning_rate": 4.372522214627478e-05,
5521
+ "loss": 1.9868,
5522
+ "step": 459500
5523
+ },
5524
+ {
5525
+ "epoch": 125.75,
5526
+ "learning_rate": 4.371838687628161e-05,
5527
+ "loss": 1.9827,
5528
+ "step": 460000
5529
+ },
5530
+ {
5531
+ "epoch": 125.89,
5532
+ "learning_rate": 4.3711551606288446e-05,
5533
+ "loss": 1.9944,
5534
+ "step": 460500
5535
+ },
5536
+ {
5537
+ "epoch": 126.03,
5538
+ "learning_rate": 4.370471633629529e-05,
5539
+ "loss": 1.9864,
5540
+ "step": 461000
5541
+ },
5542
+ {
5543
+ "epoch": 126.16,
5544
+ "learning_rate": 4.369788106630212e-05,
5545
+ "loss": 1.9455,
5546
+ "step": 461500
5547
+ },
5548
+ {
5549
+ "epoch": 126.3,
5550
+ "learning_rate": 4.369104579630896e-05,
5551
+ "loss": 1.9598,
5552
+ "step": 462000
5553
+ },
5554
+ {
5555
+ "epoch": 126.44,
5556
+ "learning_rate": 4.368421052631579e-05,
5557
+ "loss": 1.9695,
5558
+ "step": 462500
5559
+ },
5560
+ {
5561
+ "epoch": 126.57,
5562
+ "learning_rate": 4.367737525632263e-05,
5563
+ "loss": 1.9791,
5564
+ "step": 463000
5565
+ },
5566
+ {
5567
+ "epoch": 126.71,
5568
+ "learning_rate": 4.367053998632946e-05,
5569
+ "loss": 1.9733,
5570
+ "step": 463500
5571
+ },
5572
+ {
5573
+ "epoch": 126.85,
5574
+ "learning_rate": 4.3663704716336296e-05,
5575
+ "loss": 1.9899,
5576
+ "step": 464000
5577
+ },
5578
+ {
5579
+ "epoch": 126.98,
5580
+ "learning_rate": 4.365686944634314e-05,
5581
+ "loss": 1.9975,
5582
+ "step": 464500
5583
+ },
5584
+ {
5585
+ "epoch": 127.12,
5586
+ "learning_rate": 4.3650034176349966e-05,
5587
+ "loss": 1.9522,
5588
+ "step": 465000
5589
+ },
5590
+ {
5591
+ "epoch": 127.26,
5592
+ "learning_rate": 4.36431989063568e-05,
5593
+ "loss": 1.9489,
5594
+ "step": 465500
5595
+ },
5596
+ {
5597
+ "epoch": 127.39,
5598
+ "learning_rate": 4.3636363636363636e-05,
5599
+ "loss": 1.961,
5600
+ "step": 466000
5601
+ },
5602
+ {
5603
+ "epoch": 127.53,
5604
+ "learning_rate": 4.362952836637048e-05,
5605
+ "loss": 1.9685,
5606
+ "step": 466500
5607
+ },
5608
+ {
5609
+ "epoch": 127.67,
5610
+ "learning_rate": 4.362269309637731e-05,
5611
+ "loss": 1.9775,
5612
+ "step": 467000
5613
+ },
5614
+ {
5615
+ "epoch": 127.8,
5616
+ "learning_rate": 4.361585782638414e-05,
5617
+ "loss": 1.983,
5618
+ "step": 467500
5619
+ },
5620
+ {
5621
+ "epoch": 127.94,
5622
+ "learning_rate": 4.3609022556390975e-05,
5623
+ "loss": 1.9919,
5624
+ "step": 468000
5625
+ },
5626
+ {
5627
+ "epoch": 128.08,
5628
+ "learning_rate": 4.3602187286397816e-05,
5629
+ "loss": 1.9609,
5630
+ "step": 468500
5631
+ },
5632
+ {
5633
+ "epoch": 128.21,
5634
+ "learning_rate": 4.359535201640465e-05,
5635
+ "loss": 1.9508,
5636
+ "step": 469000
5637
+ },
5638
+ {
5639
+ "epoch": 128.35,
5640
+ "learning_rate": 4.3588516746411486e-05,
5641
+ "loss": 1.9594,
5642
+ "step": 469500
5643
+ },
5644
+ {
5645
+ "epoch": 128.49,
5646
+ "learning_rate": 4.358168147641832e-05,
5647
+ "loss": 1.9627,
5648
+ "step": 470000
5649
+ },
5650
+ {
5651
+ "epoch": 128.62,
5652
+ "learning_rate": 4.3574846206425156e-05,
5653
+ "loss": 1.9675,
5654
+ "step": 470500
5655
+ },
5656
+ {
5657
+ "epoch": 128.76,
5658
+ "learning_rate": 4.356801093643199e-05,
5659
+ "loss": 1.9777,
5660
+ "step": 471000
5661
+ },
5662
+ {
5663
+ "epoch": 128.9,
5664
+ "learning_rate": 4.3561175666438825e-05,
5665
+ "loss": 1.9811,
5666
+ "step": 471500
5667
+ },
5668
+ {
5669
+ "epoch": 129.03,
5670
+ "learning_rate": 4.355434039644567e-05,
5671
+ "loss": 1.975,
5672
+ "step": 472000
5673
+ },
5674
+ {
5675
+ "epoch": 129.17,
5676
+ "learning_rate": 4.3547505126452495e-05,
5677
+ "loss": 1.9393,
5678
+ "step": 472500
5679
+ },
5680
+ {
5681
+ "epoch": 129.31,
5682
+ "learning_rate": 4.354066985645933e-05,
5683
+ "loss": 1.9465,
5684
+ "step": 473000
5685
+ },
5686
+ {
5687
+ "epoch": 129.44,
5688
+ "learning_rate": 4.3533834586466164e-05,
5689
+ "loss": 1.9567,
5690
+ "step": 473500
5691
+ },
5692
+ {
5693
+ "epoch": 129.58,
5694
+ "learning_rate": 4.3526999316473006e-05,
5695
+ "loss": 1.963,
5696
+ "step": 474000
5697
+ },
5698
+ {
5699
+ "epoch": 129.72,
5700
+ "learning_rate": 4.352016404647984e-05,
5701
+ "loss": 1.9731,
5702
+ "step": 474500
5703
+ },
5704
+ {
5705
+ "epoch": 129.85,
5706
+ "learning_rate": 4.3513328776486675e-05,
5707
+ "loss": 1.9787,
5708
+ "step": 475000
5709
+ },
5710
+ {
5711
+ "epoch": 129.99,
5712
+ "learning_rate": 4.3506493506493503e-05,
5713
+ "loss": 1.9779,
5714
+ "step": 475500
5715
+ },
5716
+ {
5717
+ "epoch": 130.13,
5718
+ "learning_rate": 4.3499658236500345e-05,
5719
+ "loss": 1.9351,
5720
+ "step": 476000
5721
+ },
5722
+ {
5723
+ "epoch": 130.26,
5724
+ "learning_rate": 4.349282296650718e-05,
5725
+ "loss": 1.9439,
5726
+ "step": 476500
5727
+ },
5728
+ {
5729
+ "epoch": 130.4,
5730
+ "learning_rate": 4.3485987696514015e-05,
5731
+ "loss": 1.953,
5732
+ "step": 477000
5733
+ },
5734
+ {
5735
+ "epoch": 130.54,
5736
+ "learning_rate": 4.347915242652085e-05,
5737
+ "loss": 1.9584,
5738
+ "step": 477500
5739
+ },
5740
+ {
5741
+ "epoch": 130.67,
5742
+ "learning_rate": 4.3472317156527684e-05,
5743
+ "loss": 1.9635,
5744
+ "step": 478000
5745
+ },
5746
+ {
5747
+ "epoch": 130.81,
5748
+ "learning_rate": 4.346548188653452e-05,
5749
+ "loss": 1.9722,
5750
+ "step": 478500
5751
+ },
5752
+ {
5753
+ "epoch": 130.95,
5754
+ "learning_rate": 4.3458646616541354e-05,
5755
+ "loss": 1.9806,
5756
+ "step": 479000
5757
+ },
5758
+ {
5759
+ "epoch": 131.08,
5760
+ "learning_rate": 4.345181134654819e-05,
5761
+ "loss": 1.9505,
5762
+ "step": 479500
5763
+ },
5764
+ {
5765
+ "epoch": 131.22,
5766
+ "learning_rate": 4.344497607655503e-05,
5767
+ "loss": 1.9355,
5768
+ "step": 480000
5769
  }
5770
  ],
5771
  "max_steps": 3658000,
5772
  "num_train_epochs": 1000,
5773
+ "total_flos": 966230618169802752,
5774
  "trial_name": null,
5775
  "trial_params": null
5776
  }