jflotz commited on
Commit
55d0152
1 Parent(s): 3cd6a82

Training in progress, step 990000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa6e21575dd459731b96c75fb2eff44427788a2b21e2cba9f9983669023c697a
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4cd9e789adbd1802119018bcfc4f0b6dba2541ced8918776537c19936d2aa3
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac26c3fd0e63c430cd827768cc149848f896e9fbf6df9ca8358e31c62102f594
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1caabd63b797b525c8b3557d0ed6bdcb32c060e6354cffd8a2f88412a58c50
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1f60f9446cba0320cf9ced93c4b14816af8d6988d011f7cc2f5b01e8ada101d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2358905887cd0ce80c53b6e8a0174e039c4c5bd62c6c91c86f0312f9b46fcf7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.929814973846515,
5
- "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -19606,11 +19606,211 @@
19606
  "eval_samples_per_second": 887.915,
19607
  "eval_steps_per_second": 13.916,
19608
  "step": 980000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19609
  }
19610
  ],
19611
  "max_steps": 1000000,
19612
  "num_train_epochs": 12,
19613
- "total_flos": 6.869770816498864e+22,
19614
  "trial_name": null,
19615
  "trial_params": null
19616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.041343698069438,
5
+ "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
19606
  "eval_samples_per_second": 887.915,
19607
  "eval_steps_per_second": 13.916,
19608
  "step": 980000
19609
+ },
19610
+ {
19611
+ "epoch": 10.94,
19612
+ "learning_rate": 1.0145492062574731e-05,
19613
+ "loss": 0.1798,
19614
+ "step": 980500
19615
+ },
19616
+ {
19617
+ "epoch": 10.94,
19618
+ "learning_rate": 1.0138129010020992e-05,
19619
+ "loss": 0.1797,
19620
+ "step": 981000
19621
+ },
19622
+ {
19623
+ "epoch": 10.94,
19624
+ "eval_loss": 0.17310407757759094,
19625
+ "eval_runtime": 2.575,
19626
+ "eval_samples_per_second": 892.044,
19627
+ "eval_steps_per_second": 13.981,
19628
+ "step": 981000
19629
+ },
19630
+ {
19631
+ "epoch": 10.95,
19632
+ "learning_rate": 1.0130956957154867e-05,
19633
+ "loss": 0.1796,
19634
+ "step": 981500
19635
+ },
19636
+ {
19637
+ "epoch": 10.95,
19638
+ "learning_rate": 1.0123975923584488e-05,
19639
+ "loss": 0.1795,
19640
+ "step": 982000
19641
+ },
19642
+ {
19643
+ "epoch": 10.95,
19644
+ "eval_loss": 0.17133940756320953,
19645
+ "eval_runtime": 2.6289,
19646
+ "eval_samples_per_second": 873.748,
19647
+ "eval_steps_per_second": 13.694,
19648
+ "step": 982000
19649
+ },
19650
+ {
19651
+ "epoch": 10.96,
19652
+ "learning_rate": 1.0117185928395721e-05,
19653
+ "loss": 0.1797,
19654
+ "step": 982500
19655
+ },
19656
+ {
19657
+ "epoch": 10.96,
19658
+ "learning_rate": 1.0110586990152152e-05,
19659
+ "loss": 0.1796,
19660
+ "step": 983000
19661
+ },
19662
+ {
19663
+ "epoch": 10.96,
19664
+ "eval_loss": 0.17200584709644318,
19665
+ "eval_runtime": 2.6007,
19666
+ "eval_samples_per_second": 883.236,
19667
+ "eval_steps_per_second": 13.843,
19668
+ "step": 983000
19669
+ },
19670
+ {
19671
+ "epoch": 10.97,
19672
+ "learning_rate": 1.0104179126895039e-05,
19673
+ "loss": 0.1797,
19674
+ "step": 983500
19675
+ },
19676
+ {
19677
+ "epoch": 10.97,
19678
+ "learning_rate": 1.0097962356143219e-05,
19679
+ "loss": 0.1797,
19680
+ "step": 984000
19681
+ },
19682
+ {
19683
+ "epoch": 10.97,
19684
+ "eval_loss": 0.17025373876094818,
19685
+ "eval_runtime": 2.5803,
19686
+ "eval_samples_per_second": 890.197,
19687
+ "eval_steps_per_second": 13.952,
19688
+ "step": 984000
19689
+ },
19690
+ {
19691
+ "epoch": 10.98,
19692
+ "learning_rate": 1.009193669489312e-05,
19693
+ "loss": 0.1797,
19694
+ "step": 984500
19695
+ },
19696
+ {
19697
+ "epoch": 10.99,
19698
+ "learning_rate": 1.0086102159618668e-05,
19699
+ "loss": 0.1796,
19700
+ "step": 985000
19701
+ },
19702
+ {
19703
+ "epoch": 10.99,
19704
+ "eval_loss": 0.17083962261676788,
19705
+ "eval_runtime": 2.5712,
19706
+ "eval_samples_per_second": 893.348,
19707
+ "eval_steps_per_second": 14.001,
19708
+ "step": 985000
19709
+ },
19710
+ {
19711
+ "epoch": 10.99,
19712
+ "learning_rate": 1.0080458766271252e-05,
19713
+ "loss": 0.1798,
19714
+ "step": 985500
19715
+ },
19716
+ {
19717
+ "epoch": 11.0,
19718
+ "learning_rate": 1.0075006530279694e-05,
19719
+ "loss": 0.1797,
19720
+ "step": 986000
19721
+ },
19722
+ {
19723
+ "epoch": 11.0,
19724
+ "eval_loss": 0.1690717339515686,
19725
+ "eval_runtime": 2.588,
19726
+ "eval_samples_per_second": 887.569,
19727
+ "eval_steps_per_second": 13.911,
19728
+ "step": 986000
19729
+ },
19730
+ {
19731
+ "epoch": 11.0,
19732
+ "learning_rate": 1.0069745466550205e-05,
19733
+ "loss": 0.1794,
19734
+ "step": 986500
19735
+ },
19736
+ {
19737
+ "epoch": 11.01,
19738
+ "learning_rate": 1.0064675589466339e-05,
19739
+ "loss": 0.1796,
19740
+ "step": 987000
19741
+ },
19742
+ {
19743
+ "epoch": 11.01,
19744
+ "eval_loss": 0.16997100412845612,
19745
+ "eval_runtime": 2.5938,
19746
+ "eval_samples_per_second": 885.56,
19747
+ "eval_steps_per_second": 13.879,
19748
+ "step": 987000
19749
+ },
19750
+ {
19751
+ "epoch": 11.01,
19752
+ "learning_rate": 1.005979691288893e-05,
19753
+ "loss": 0.1795,
19754
+ "step": 987500
19755
+ },
19756
+ {
19757
+ "epoch": 11.02,
19758
+ "learning_rate": 1.0055109450156098e-05,
19759
+ "loss": 0.1791,
19760
+ "step": 988000
19761
+ },
19762
+ {
19763
+ "epoch": 11.02,
19764
+ "eval_loss": 0.1697554588317871,
19765
+ "eval_runtime": 2.5898,
19766
+ "eval_samples_per_second": 886.931,
19767
+ "eval_steps_per_second": 13.901,
19768
+ "step": 988000
19769
+ },
19770
+ {
19771
+ "epoch": 11.02,
19772
+ "learning_rate": 1.0050613214083197e-05,
19773
+ "loss": 0.1797,
19774
+ "step": 988500
19775
+ },
19776
+ {
19777
+ "epoch": 11.03,
19778
+ "learning_rate": 1.0046308216962759e-05,
19779
+ "loss": 0.1795,
19780
+ "step": 989000
19781
+ },
19782
+ {
19783
+ "epoch": 11.03,
19784
+ "eval_loss": 0.1698392927646637,
19785
+ "eval_runtime": 2.6586,
19786
+ "eval_samples_per_second": 863.992,
19787
+ "eval_steps_per_second": 13.541,
19788
+ "step": 989000
19789
+ },
19790
+ {
19791
+ "epoch": 11.04,
19792
+ "learning_rate": 1.0042194470564472e-05,
19793
+ "loss": 0.1796,
19794
+ "step": 989500
19795
+ },
19796
+ {
19797
+ "epoch": 11.04,
19798
+ "learning_rate": 1.0038271986135177e-05,
19799
+ "loss": 0.1799,
19800
+ "step": 990000
19801
+ },
19802
+ {
19803
+ "epoch": 11.04,
19804
+ "eval_loss": 0.16946464776992798,
19805
+ "eval_runtime": 2.6152,
19806
+ "eval_samples_per_second": 878.327,
19807
+ "eval_steps_per_second": 13.766,
19808
+ "step": 990000
19809
  }
19810
  ],
19811
  "max_steps": 1000000,
19812
  "num_train_epochs": 12,
19813
+ "total_flos": 6.9398656010816955e+22,
19814
  "trial_name": null,
19815
  "trial_params": null
19816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:081e5eabe8ef9a2817820443cfba02d1a6ecee053832fff6fbfbe29c77150986
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e636c80ed6aaf4e2b5d21598685c1a08b0a8d8edf7041e56552898357162ca
3
  size 449471589