longcld commited on
Commit
1168686
1 Parent(s): b99e558
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +213 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc9f6fe010e512320a90ce220ed55a10ff624f259e8a75c32268b9132a34e934
3
  size 352532601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cde460e97bad04f84a56ae7ff816ff1cdeff3b03a09474465cd292b7b6c31a4
3
  size 352532601
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e576da7e6d3188bac283997ec40b96be395c180937d3e0cd64a81c475eee7f5a
3
  size 688496379
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb094630c57c0076f7966d01611056cb86c230bb1ce7b15571b9741513a969a
3
  size 688496379
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4137b71a9c6c90a0b8880aa5775cd23b0f08ddb88acac61c8656a0072ea5157c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da5ab5e4c8996c9e9ca7255e01ed8f4458a8c14e30fe5aa54d259faeef742fb
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4fc6a6f383126201b9ed524a41f33bc30e031454336d5411887b80a7f125f0c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffecd9835fe8fdd7e333d29c70739f7ca849a8c7b68d33c0cc301eaacafda24e
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.3538515843492895,
5
- "global_step": 58500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3516,11 +3516,221 @@
3516
  "learning_rate": 3.6461388074291305e-05,
3517
  "loss": 2.7954,
3518
  "step": 58500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3519
  }
3520
  ],
3521
  "max_steps": 92070,
3522
  "num_train_epochs": 10,
3523
- "total_flos": 1.1996750282539008e+17,
3524
  "trial_name": null,
3525
  "trial_params": null
3526
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.73398680387738,
5
+ "global_step": 62000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3516
  "learning_rate": 3.6461388074291305e-05,
3517
  "loss": 2.7954,
3518
  "step": 58500
3519
+ },
3520
+ {
3521
+ "epoch": 6.36,
3522
+ "learning_rate": 3.6352775062452485e-05,
3523
+ "loss": 2.7937,
3524
+ "step": 58600
3525
+ },
3526
+ {
3527
+ "epoch": 6.38,
3528
+ "learning_rate": 3.6244162050613665e-05,
3529
+ "loss": 2.7426,
3530
+ "step": 58700
3531
+ },
3532
+ {
3533
+ "epoch": 6.39,
3534
+ "learning_rate": 3.6135549038774845e-05,
3535
+ "loss": 2.8019,
3536
+ "step": 58800
3537
+ },
3538
+ {
3539
+ "epoch": 6.4,
3540
+ "learning_rate": 3.602693602693603e-05,
3541
+ "loss": 2.7112,
3542
+ "step": 58900
3543
+ },
3544
+ {
3545
+ "epoch": 6.41,
3546
+ "learning_rate": 3.591832301509721e-05,
3547
+ "loss": 2.809,
3548
+ "step": 59000
3549
+ },
3550
+ {
3551
+ "epoch": 6.42,
3552
+ "learning_rate": 3.580971000325839e-05,
3553
+ "loss": 2.7244,
3554
+ "step": 59100
3555
+ },
3556
+ {
3557
+ "epoch": 6.43,
3558
+ "learning_rate": 3.570109699141958e-05,
3559
+ "loss": 2.7719,
3560
+ "step": 59200
3561
+ },
3562
+ {
3563
+ "epoch": 6.44,
3564
+ "learning_rate": 3.559248397958075e-05,
3565
+ "loss": 2.9352,
3566
+ "step": 59300
3567
+ },
3568
+ {
3569
+ "epoch": 6.45,
3570
+ "learning_rate": 3.548387096774194e-05,
3571
+ "loss": 2.711,
3572
+ "step": 59400
3573
+ },
3574
+ {
3575
+ "epoch": 6.46,
3576
+ "learning_rate": 3.537525795590312e-05,
3577
+ "loss": 2.7434,
3578
+ "step": 59500
3579
+ },
3580
+ {
3581
+ "epoch": 6.47,
3582
+ "learning_rate": 3.5266644944064304e-05,
3583
+ "loss": 2.7573,
3584
+ "step": 59600
3585
+ },
3586
+ {
3587
+ "epoch": 6.48,
3588
+ "learning_rate": 3.5158031932225484e-05,
3589
+ "loss": 2.6963,
3590
+ "step": 59700
3591
+ },
3592
+ {
3593
+ "epoch": 6.5,
3594
+ "learning_rate": 3.5049418920386664e-05,
3595
+ "loss": 2.8267,
3596
+ "step": 59800
3597
+ },
3598
+ {
3599
+ "epoch": 6.51,
3600
+ "learning_rate": 3.4940805908547844e-05,
3601
+ "loss": 2.7864,
3602
+ "step": 59900
3603
+ },
3604
+ {
3605
+ "epoch": 6.52,
3606
+ "learning_rate": 3.4832192896709024e-05,
3607
+ "loss": 2.7585,
3608
+ "step": 60000
3609
+ },
3610
+ {
3611
+ "epoch": 6.53,
3612
+ "learning_rate": 3.472357988487021e-05,
3613
+ "loss": 2.714,
3614
+ "step": 60100
3615
+ },
3616
+ {
3617
+ "epoch": 6.54,
3618
+ "learning_rate": 3.461496687303139e-05,
3619
+ "loss": 2.7202,
3620
+ "step": 60200
3621
+ },
3622
+ {
3623
+ "epoch": 6.55,
3624
+ "learning_rate": 3.450635386119258e-05,
3625
+ "loss": 2.8636,
3626
+ "step": 60300
3627
+ },
3628
+ {
3629
+ "epoch": 6.56,
3630
+ "learning_rate": 3.439774084935376e-05,
3631
+ "loss": 2.8138,
3632
+ "step": 60400
3633
+ },
3634
+ {
3635
+ "epoch": 6.57,
3636
+ "learning_rate": 3.428912783751494e-05,
3637
+ "loss": 2.74,
3638
+ "step": 60500
3639
+ },
3640
+ {
3641
+ "epoch": 6.58,
3642
+ "learning_rate": 3.418051482567612e-05,
3643
+ "loss": 2.7704,
3644
+ "step": 60600
3645
+ },
3646
+ {
3647
+ "epoch": 6.59,
3648
+ "learning_rate": 3.40719018138373e-05,
3649
+ "loss": 2.7326,
3650
+ "step": 60700
3651
+ },
3652
+ {
3653
+ "epoch": 6.6,
3654
+ "learning_rate": 3.396328880199848e-05,
3655
+ "loss": 2.7531,
3656
+ "step": 60800
3657
+ },
3658
+ {
3659
+ "epoch": 6.61,
3660
+ "learning_rate": 3.385467579015966e-05,
3661
+ "loss": 2.67,
3662
+ "step": 60900
3663
+ },
3664
+ {
3665
+ "epoch": 6.63,
3666
+ "learning_rate": 3.374606277832085e-05,
3667
+ "loss": 2.7537,
3668
+ "step": 61000
3669
+ },
3670
+ {
3671
+ "epoch": 6.64,
3672
+ "learning_rate": 3.363744976648203e-05,
3673
+ "loss": 2.7866,
3674
+ "step": 61100
3675
+ },
3676
+ {
3677
+ "epoch": 6.65,
3678
+ "learning_rate": 3.35288367546432e-05,
3679
+ "loss": 2.7174,
3680
+ "step": 61200
3681
+ },
3682
+ {
3683
+ "epoch": 6.66,
3684
+ "learning_rate": 3.342022374280439e-05,
3685
+ "loss": 2.7214,
3686
+ "step": 61300
3687
+ },
3688
+ {
3689
+ "epoch": 6.67,
3690
+ "learning_rate": 3.331161073096557e-05,
3691
+ "loss": 2.7205,
3692
+ "step": 61400
3693
+ },
3694
+ {
3695
+ "epoch": 6.68,
3696
+ "learning_rate": 3.3202997719126756e-05,
3697
+ "loss": 2.8244,
3698
+ "step": 61500
3699
+ },
3700
+ {
3701
+ "epoch": 6.69,
3702
+ "learning_rate": 3.3094384707287936e-05,
3703
+ "loss": 2.7776,
3704
+ "step": 61600
3705
+ },
3706
+ {
3707
+ "epoch": 6.7,
3708
+ "learning_rate": 3.2985771695449116e-05,
3709
+ "loss": 2.7184,
3710
+ "step": 61700
3711
+ },
3712
+ {
3713
+ "epoch": 6.71,
3714
+ "learning_rate": 3.2877158683610296e-05,
3715
+ "loss": 2.7534,
3716
+ "step": 61800
3717
+ },
3718
+ {
3719
+ "epoch": 6.72,
3720
+ "learning_rate": 3.2768545671771476e-05,
3721
+ "loss": 2.7492,
3722
+ "step": 61900
3723
+ },
3724
+ {
3725
+ "epoch": 6.73,
3726
+ "learning_rate": 3.265993265993266e-05,
3727
+ "loss": 2.7351,
3728
+ "step": 62000
3729
  }
3730
  ],
3731
  "max_steps": 92070,
3732
  "num_train_epochs": 10,
3733
+ "total_flos": 1.2714223024540877e+17,
3734
  "trial_name": null,
3735
  "trial_params": null
3736
  }