joelniklaus commited on
Commit
94d1da9
1 Parent(s): ccb0702

Training in progress, step 1000000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69c2a3f0d71cdb35d08c4188dc0759de44bd72d54dd0e64ce39a1cc599afb440
3
  size 885325017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6709c31ff0729a1d7c432a7387ac7045f87daa988613a212726f338bc80a9f5
3
  size 885325017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65138ee8a8889f23fd090f031b33c3c7187319b021a3e7673c53dffc8e14f9e
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b18fe4bb5da26c52d0fd64a7e1283a9392787bf8fa2100118119861935f53c
3
  size 442675755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a3146b2cf60fe575602b3dab14d07f2e3b1c9322dc666bc5e2455f1aba9e69d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c696e3facd4608b3fc117b6b37f55cf9f2b0bc62746ffe7180114a3b5d128e
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc2c2c8416f63b11e9c82d6dac05baa6ad73177ac658621e099b23ff71f2f801
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:906bc3ed48818cc1785b6a98c1e064532a322520b99cdf458cfd827674d9b7ec
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.073748,
5
- "global_step": 950000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5858,11 +5858,319 @@
5858
  "eval_samples_per_second": 447.176,
5859
  "eval_steps_per_second": 3.577,
5860
  "step": 950000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5861
  }
5862
  ],
5863
  "max_steps": 1000000,
5864
  "num_train_epochs": 9223372036854775807,
5865
- "total_flos": 1.6003204479826526e+19,
5866
  "trial_name": null,
5867
  "trial_params": null
5868
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.123748,
5
+ "global_step": 1000000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5858
  "eval_samples_per_second": 447.176,
5859
  "eval_steps_per_second": 3.577,
5860
  "step": 950000
5861
+ },
5862
+ {
5863
+ "epoch": 6.07,
5864
+ "learning_rate": 6.549893279788277e-07,
5865
+ "loss": 0.6979,
5866
+ "step": 951000
5867
+ },
5868
+ {
5869
+ "epoch": 6.08,
5870
+ "learning_rate": 6.285834552247128e-07,
5871
+ "loss": 0.6093,
5872
+ "step": 952000
5873
+ },
5874
+ {
5875
+ "epoch": 6.08,
5876
+ "learning_rate": 6.027175003719354e-07,
5877
+ "loss": 0.5832,
5878
+ "step": 953000
5879
+ },
5880
+ {
5881
+ "epoch": 6.08,
5882
+ "learning_rate": 5.773917462864264e-07,
5883
+ "loss": 0.6034,
5884
+ "step": 954000
5885
+ },
5886
+ {
5887
+ "epoch": 6.08,
5888
+ "learning_rate": 5.526064699265753e-07,
5889
+ "loss": 0.7171,
5890
+ "step": 955000
5891
+ },
5892
+ {
5893
+ "epoch": 6.08,
5894
+ "learning_rate": 5.283619423401998e-07,
5895
+ "loss": 0.6829,
5896
+ "step": 956000
5897
+ },
5898
+ {
5899
+ "epoch": 6.08,
5900
+ "learning_rate": 5.046584286615697e-07,
5901
+ "loss": 0.6362,
5902
+ "step": 957000
5903
+ },
5904
+ {
5905
+ "epoch": 6.08,
5906
+ "learning_rate": 4.814961881085045e-07,
5907
+ "loss": 0.5757,
5908
+ "step": 958000
5909
+ },
5910
+ {
5911
+ "epoch": 6.08,
5912
+ "learning_rate": 4.5887547397955864e-07,
5913
+ "loss": 0.6047,
5914
+ "step": 959000
5915
+ },
5916
+ {
5917
+ "epoch": 6.08,
5918
+ "learning_rate": 4.367965336512403e-07,
5919
+ "loss": 0.6948,
5920
+ "step": 960000
5921
+ },
5922
+ {
5923
+ "epoch": 6.08,
5924
+ "learning_rate": 4.1525960857530243e-07,
5925
+ "loss": 0.7152,
5926
+ "step": 961000
5927
+ },
5928
+ {
5929
+ "epoch": 6.09,
5930
+ "learning_rate": 3.9426493427611177e-07,
5931
+ "loss": 0.6197,
5932
+ "step": 962000
5933
+ },
5934
+ {
5935
+ "epoch": 6.09,
5936
+ "learning_rate": 3.738127403480507e-07,
5937
+ "loss": 0.5809,
5938
+ "step": 963000
5939
+ },
5940
+ {
5941
+ "epoch": 6.09,
5942
+ "learning_rate": 3.5390325045304706e-07,
5943
+ "loss": 0.5836,
5944
+ "step": 964000
5945
+ },
5946
+ {
5947
+ "epoch": 6.09,
5948
+ "learning_rate": 3.3453668231809286e-07,
5949
+ "loss": 0.7279,
5950
+ "step": 965000
5951
+ },
5952
+ {
5953
+ "epoch": 6.09,
5954
+ "learning_rate": 3.157132477328628e-07,
5955
+ "loss": 0.6954,
5956
+ "step": 966000
5957
+ },
5958
+ {
5959
+ "epoch": 6.09,
5960
+ "learning_rate": 2.9743315254743833e-07,
5961
+ "loss": 0.6144,
5962
+ "step": 967000
5963
+ },
5964
+ {
5965
+ "epoch": 6.09,
5966
+ "learning_rate": 2.796965966699927e-07,
5967
+ "loss": 0.5801,
5968
+ "step": 968000
5969
+ },
5970
+ {
5971
+ "epoch": 6.09,
5972
+ "learning_rate": 2.625037740646763e-07,
5973
+ "loss": 0.6001,
5974
+ "step": 969000
5975
+ },
5976
+ {
5977
+ "epoch": 6.09,
5978
+ "learning_rate": 2.458548727494292e-07,
5979
+ "loss": 0.7176,
5980
+ "step": 970000
5981
+ },
5982
+ {
5983
+ "epoch": 6.09,
5984
+ "learning_rate": 2.2975007479397738e-07,
5985
+ "loss": 0.6872,
5986
+ "step": 971000
5987
+ },
5988
+ {
5989
+ "epoch": 6.1,
5990
+ "learning_rate": 2.1418955631781202e-07,
5991
+ "loss": 0.6369,
5992
+ "step": 972000
5993
+ },
5994
+ {
5995
+ "epoch": 6.1,
5996
+ "learning_rate": 1.9917348748826335e-07,
5997
+ "loss": 0.5778,
5998
+ "step": 973000
5999
+ },
6000
+ {
6001
+ "epoch": 6.1,
6002
+ "learning_rate": 1.847020325186577e-07,
6003
+ "loss": 0.599,
6004
+ "step": 974000
6005
+ },
6006
+ {
6007
+ "epoch": 6.1,
6008
+ "learning_rate": 1.7077534966650766e-07,
6009
+ "loss": 0.6961,
6010
+ "step": 975000
6011
+ },
6012
+ {
6013
+ "epoch": 6.1,
6014
+ "learning_rate": 1.5739359123178587e-07,
6015
+ "loss": 0.7163,
6016
+ "step": 976000
6017
+ },
6018
+ {
6019
+ "epoch": 6.1,
6020
+ "learning_rate": 1.4455690355525964e-07,
6021
+ "loss": 0.6199,
6022
+ "step": 977000
6023
+ },
6024
+ {
6025
+ "epoch": 6.1,
6026
+ "learning_rate": 1.3226542701689215e-07,
6027
+ "loss": 0.5833,
6028
+ "step": 978000
6029
+ },
6030
+ {
6031
+ "epoch": 6.1,
6032
+ "learning_rate": 1.2051929603428825e-07,
6033
+ "loss": 0.5774,
6034
+ "step": 979000
6035
+ },
6036
+ {
6037
+ "epoch": 6.1,
6038
+ "learning_rate": 1.0931863906127327e-07,
6039
+ "loss": 0.7263,
6040
+ "step": 980000
6041
+ },
6042
+ {
6043
+ "epoch": 6.1,
6044
+ "learning_rate": 9.866357858642205e-08,
6045
+ "loss": 0.6966,
6046
+ "step": 981000
6047
+ },
6048
+ {
6049
+ "epoch": 6.11,
6050
+ "learning_rate": 8.855423113177664e-08,
6051
+ "loss": 0.6213,
6052
+ "step": 982000
6053
+ },
6054
+ {
6055
+ "epoch": 6.11,
6056
+ "learning_rate": 7.899070725153613e-08,
6057
+ "loss": 0.579,
6058
+ "step": 983000
6059
+ },
6060
+ {
6061
+ "epoch": 6.11,
6062
+ "learning_rate": 6.997311153086883e-08,
6063
+ "loss": 0.6005,
6064
+ "step": 984000
6065
+ },
6066
+ {
6067
+ "epoch": 6.11,
6068
+ "learning_rate": 6.150154258476315e-08,
6069
+ "loss": 0.7129,
6070
+ "step": 985000
6071
+ },
6072
+ {
6073
+ "epoch": 6.11,
6074
+ "learning_rate": 5.3576093056922906e-08,
6075
+ "loss": 0.6897,
6076
+ "step": 986000
6077
+ },
6078
+ {
6079
+ "epoch": 6.11,
6080
+ "learning_rate": 4.619684961881254e-08,
6081
+ "loss": 0.6395,
6082
+ "step": 987000
6083
+ },
6084
+ {
6085
+ "epoch": 6.11,
6086
+ "learning_rate": 3.936389296864129e-08,
6087
+ "loss": 0.5797,
6088
+ "step": 988000
6089
+ },
6090
+ {
6091
+ "epoch": 6.11,
6092
+ "learning_rate": 3.3077297830541584e-08,
6093
+ "loss": 0.5937,
6094
+ "step": 989000
6095
+ },
6096
+ {
6097
+ "epoch": 6.11,
6098
+ "learning_rate": 2.7337132953697554e-08,
6099
+ "loss": 0.6948,
6100
+ "step": 990000
6101
+ },
6102
+ {
6103
+ "epoch": 6.11,
6104
+ "learning_rate": 2.214346111164556e-08,
6105
+ "loss": 0.7171,
6106
+ "step": 991000
6107
+ },
6108
+ {
6109
+ "epoch": 6.12,
6110
+ "learning_rate": 1.749633910153592e-08,
6111
+ "loss": 0.6254,
6112
+ "step": 992000
6113
+ },
6114
+ {
6115
+ "epoch": 6.12,
6116
+ "learning_rate": 1.3395817743561134e-08,
6117
+ "loss": 0.5856,
6118
+ "step": 993000
6119
+ },
6120
+ {
6121
+ "epoch": 6.12,
6122
+ "learning_rate": 9.841941880361916e-09,
6123
+ "loss": 0.5713,
6124
+ "step": 994000
6125
+ },
6126
+ {
6127
+ "epoch": 6.12,
6128
+ "learning_rate": 6.834750376549792e-09,
6129
+ "loss": 0.7225,
6130
+ "step": 995000
6131
+ },
6132
+ {
6133
+ "epoch": 6.12,
6134
+ "learning_rate": 4.3742761183018784e-09,
6135
+ "loss": 0.6983,
6136
+ "step": 996000
6137
+ },
6138
+ {
6139
+ "epoch": 6.12,
6140
+ "learning_rate": 2.4605460129556445e-09,
6141
+ "loss": 0.626,
6142
+ "step": 997000
6143
+ },
6144
+ {
6145
+ "epoch": 6.12,
6146
+ "learning_rate": 1.0935809887702154e-09,
6147
+ "loss": 0.5772,
6148
+ "step": 998000
6149
+ },
6150
+ {
6151
+ "epoch": 6.12,
6152
+ "learning_rate": 2.7339599464326627e-10,
6153
+ "loss": 0.598,
6154
+ "step": 999000
6155
+ },
6156
+ {
6157
+ "epoch": 6.12,
6158
+ "learning_rate": 0.0,
6159
+ "loss": 0.7085,
6160
+ "step": 1000000
6161
+ },
6162
+ {
6163
+ "epoch": 6.12,
6164
+ "eval_loss": 0.47990772128105164,
6165
+ "eval_runtime": 10.5537,
6166
+ "eval_samples_per_second": 473.766,
6167
+ "eval_steps_per_second": 3.79,
6168
+ "step": 1000000
6169
  }
6170
  ],
6171
  "max_steps": 1000000,
6172
  "num_train_epochs": 9223372036854775807,
6173
+ "total_flos": 1.6845474410117726e+19,
6174
  "trial_name": null,
6175
  "trial_params": null
6176
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65138ee8a8889f23fd090f031b33c3c7187319b021a3e7673c53dffc8e14f9e
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b18fe4bb5da26c52d0fd64a7e1283a9392787bf8fa2100118119861935f53c
3
  size 442675755
runs/Dec27_23-39-04_t1v-n-6071ee6f-w-0/events.out.tfevents.1672184365.t1v-n-6071ee6f-w-0.108001.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3416fba1663bed0ceb7da268696a6250ba565b786001d9ed7fe5a803fca0224a
3
- size 160977
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00318c70372e189e574b61ad78972e58f0a1a6d389587262a37adad04212402a
3
+ size 169253