jflotz commited on
Commit
78e9441
1 Parent(s): 3a6c403

Training in progress, step 500000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bba175f94af3131ddc7e585c8d0c85376ebd1433f20a5a01a35d8488fc39885b
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e10f3bf4c9fdd6d04e4a45594e02e70886380f6c627ce307a2167e61722f396
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f6ff68ce8f2343aee8e2f40adc1f066ed4b3aebc05b692c06e92c1edcc2d09
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37572fbf72a1446a290380546bc3e45a2e14961acceac2bf85c43bfce749553b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a074c65f43dfe683b5f0e988de79e3365c939ebb4b13c8f9ce84b59bdb64a7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.929693076374019,
5
- "global_step": 490000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9806,11 +9806,211 @@
9806
  "eval_samples_per_second": 1128.67,
9807
  "eval_steps_per_second": 17.689,
9808
  "step": 490000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9809
  }
9810
  ],
9811
  "max_steps": 500000,
9812
  "num_train_epochs": 12,
9813
- "total_flos": 1.5654585257336347e+22,
9814
  "trial_name": null,
9815
  "trial_params": null
9816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.152748037116346,
5
+ "global_step": 500000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9806
  "eval_samples_per_second": 1128.67,
9807
  "eval_steps_per_second": 17.689,
9808
  "step": 490000
9809
+ },
9810
+ {
9811
+ "epoch": 10.94,
9812
+ "learning_rate": 1.0286124377900624e-05,
9813
+ "loss": 0.2541,
9814
+ "step": 490500
9815
+ },
9816
+ {
9817
+ "epoch": 10.95,
9818
+ "learning_rate": 1.0256807270282153e-05,
9819
+ "loss": 0.2537,
9820
+ "step": 491000
9821
+ },
9822
+ {
9823
+ "epoch": 10.95,
9824
+ "eval_loss": 0.23797546327114105,
9825
+ "eval_runtime": 1.9751,
9826
+ "eval_samples_per_second": 1162.982,
9827
+ "eval_steps_per_second": 18.227,
9828
+ "step": 491000
9829
+ },
9830
+ {
9831
+ "epoch": 10.96,
9832
+ "learning_rate": 1.0229073051029455e-05,
9833
+ "loss": 0.2539,
9834
+ "step": 491500
9835
+ },
9836
+ {
9837
+ "epoch": 10.97,
9838
+ "learning_rate": 1.020292202343952e-05,
9839
+ "loss": 0.254,
9840
+ "step": 492000
9841
+ },
9842
+ {
9843
+ "epoch": 10.97,
9844
+ "eval_loss": 0.23647533357143402,
9845
+ "eval_runtime": 2.0099,
9846
+ "eval_samples_per_second": 1142.868,
9847
+ "eval_steps_per_second": 17.912,
9848
+ "step": 492000
9849
+ },
9850
+ {
9851
+ "epoch": 10.99,
9852
+ "learning_rate": 1.0178354473495813e-05,
9853
+ "loss": 0.2539,
9854
+ "step": 492500
9855
+ },
9856
+ {
9857
+ "epoch": 11.0,
9858
+ "learning_rate": 1.0155370669865077e-05,
9859
+ "loss": 0.254,
9860
+ "step": 493000
9861
+ },
9862
+ {
9863
+ "epoch": 11.0,
9864
+ "eval_loss": 0.2374097853899002,
9865
+ "eval_runtime": 1.9968,
9866
+ "eval_samples_per_second": 1150.347,
9867
+ "eval_steps_per_second": 18.029,
9868
+ "step": 493000
9869
+ },
9870
+ {
9871
+ "epoch": 11.01,
9872
+ "learning_rate": 1.0133970863894557e-05,
9873
+ "loss": 0.2537,
9874
+ "step": 493500
9875
+ },
9876
+ {
9877
+ "epoch": 11.02,
9878
+ "learning_rate": 1.0114155289609061e-05,
9879
+ "loss": 0.2535,
9880
+ "step": 494000
9881
+ },
9882
+ {
9883
+ "epoch": 11.02,
9884
+ "eval_loss": 0.23701806366443634,
9885
+ "eval_runtime": 2.0405,
9886
+ "eval_samples_per_second": 1125.682,
9887
+ "eval_steps_per_second": 17.642,
9888
+ "step": 494000
9889
+ },
9890
+ {
9891
+ "epoch": 11.03,
9892
+ "learning_rate": 1.0095924163708572e-05,
9893
+ "loss": 0.2542,
9894
+ "step": 494500
9895
+ },
9896
+ {
9897
+ "epoch": 11.04,
9898
+ "learning_rate": 1.0079277685565724e-05,
9899
+ "loss": 0.2538,
9900
+ "step": 495000
9901
+ },
9902
+ {
9903
+ "epoch": 11.04,
9904
+ "eval_loss": 0.23448336124420166,
9905
+ "eval_runtime": 1.9969,
9906
+ "eval_samples_per_second": 1150.279,
9907
+ "eval_steps_per_second": 18.028,
9908
+ "step": 495000
9909
+ },
9910
+ {
9911
+ "epoch": 11.05,
9912
+ "learning_rate": 1.0064216037223772e-05,
9913
+ "loss": 0.2536,
9914
+ "step": 495500
9915
+ },
9916
+ {
9917
+ "epoch": 11.06,
9918
+ "learning_rate": 1.0050739383394454e-05,
9919
+ "loss": 0.2539,
9920
+ "step": 496000
9921
+ },
9922
+ {
9923
+ "epoch": 11.06,
9924
+ "eval_loss": 0.23559238016605377,
9925
+ "eval_runtime": 2.0311,
9926
+ "eval_samples_per_second": 1130.902,
9927
+ "eval_steps_per_second": 17.724,
9928
+ "step": 496000
9929
+ },
9930
+ {
9931
+ "epoch": 11.07,
9932
+ "learning_rate": 1.003884787145633e-05,
9933
+ "loss": 0.2532,
9934
+ "step": 496500
9935
+ },
9936
+ {
9937
+ "epoch": 11.09,
9938
+ "learning_rate": 1.002854163145305e-05,
9939
+ "loss": 0.2533,
9940
+ "step": 497000
9941
+ },
9942
+ {
9943
+ "epoch": 11.09,
9944
+ "eval_loss": 0.23612073063850403,
9945
+ "eval_runtime": 2.0421,
9946
+ "eval_samples_per_second": 1124.803,
9947
+ "eval_steps_per_second": 17.629,
9948
+ "step": 497000
9949
+ },
9950
+ {
9951
+ "epoch": 11.1,
9952
+ "learning_rate": 1.0019820776091995e-05,
9953
+ "loss": 0.2531,
9954
+ "step": 497500
9955
+ },
9956
+ {
9957
+ "epoch": 11.11,
9958
+ "learning_rate": 1.0012685400743077e-05,
9959
+ "loss": 0.2533,
9960
+ "step": 498000
9961
+ },
9962
+ {
9963
+ "epoch": 11.11,
9964
+ "eval_loss": 0.23812700808048248,
9965
+ "eval_runtime": 2.036,
9966
+ "eval_samples_per_second": 1128.176,
9967
+ "eval_steps_per_second": 17.681,
9968
+ "step": 498000
9969
+ },
9970
+ {
9971
+ "epoch": 11.12,
9972
+ "learning_rate": 1.0007135583437572e-05,
9973
+ "loss": 0.2531,
9974
+ "step": 498500
9975
+ },
9976
+ {
9977
+ "epoch": 11.13,
9978
+ "learning_rate": 1.0003171384867436e-05,
9979
+ "loss": 0.2534,
9980
+ "step": 499000
9981
+ },
9982
+ {
9983
+ "epoch": 11.13,
9984
+ "eval_loss": 0.23714645206928253,
9985
+ "eval_runtime": 2.0313,
9986
+ "eval_samples_per_second": 1130.821,
9987
+ "eval_steps_per_second": 17.723,
9988
+ "step": 499000
9989
+ },
9990
+ {
9991
+ "epoch": 11.14,
9992
+ "learning_rate": 1.0000792848384467e-05,
9993
+ "loss": 0.2535,
9994
+ "step": 499500
9995
+ },
9996
+ {
9997
+ "epoch": 11.15,
9998
+ "learning_rate": 1e-05,
9999
+ "loss": 0.2535,
10000
+ "step": 500000
10001
+ },
10002
+ {
10003
+ "epoch": 11.15,
10004
+ "eval_loss": 0.23556514084339142,
10005
+ "eval_runtime": 1.9635,
10006
+ "eval_samples_per_second": 1169.857,
10007
+ "eval_steps_per_second": 18.335,
10008
+ "step": 500000
10009
  }
10010
  ],
10011
  "max_steps": 500000,
10012
  "num_train_epochs": 12,
10013
+ "total_flos": 1.5974043941849432e+22,
10014
  "trial_name": null,
10015
  "trial_params": null
10016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f6ff68ce8f2343aee8e2f40adc1f066ed4b3aebc05b692c06e92c1edcc2d09
3
  size 102501541