plip commited on
Commit
cf7a449
1 Parent(s): d83e249

Training in progress, step 110000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5a8740e850a7025efc4240518d5b20a0266625ee7a4df5824989ffd90a771e4
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d58137113791019cd3d543973897cce79aa9a3f674e715923996321558c96b13
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9743dfff2c56ecc42fa49782ae324bef5653923fd6c7bcd04473781c4c5cb59f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f59c2ef88b3bc3cd7bdeb23ee182242569e553f7337d36848967067fc05c479
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ffde1f04d1013b5eec7ae73480f479d0d281fd050bfb67dcf4ffaf69a11bc9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7786e0d240c1817a80f936fe537093f6b0f81238abcccea2c0e618f1ac9e9438
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.329970409375801,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2006,11 +2006,211 @@
2006
  "eval_samples_per_second": 1563.737,
2007
  "eval_steps_per_second": 24.9,
2008
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2009
  }
2010
  ],
2011
  "max_steps": 500000,
2012
  "num_train_epochs": 12,
2013
- "total_flos": 3.1948663937631865e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.562967450313381,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2006
  "eval_samples_per_second": 1563.737,
2007
  "eval_steps_per_second": 24.9,
2008
  "step": 100000
2009
+ },
2010
+ {
2011
+ "epoch": 2.34,
2012
+ "learning_rate": 0.0002822947773032956,
2013
+ "loss": 0.3022,
2014
+ "step": 100500
2015
+ },
2016
+ {
2017
+ "epoch": 2.35,
2018
+ "learning_rate": 0.0002820644686063602,
2019
+ "loss": 0.3023,
2020
+ "step": 101000
2021
+ },
2022
+ {
2023
+ "epoch": 2.35,
2024
+ "eval_loss": 0.2856578528881073,
2025
+ "eval_runtime": 1.4144,
2026
+ "eval_samples_per_second": 1553.982,
2027
+ "eval_steps_per_second": 24.745,
2028
+ "step": 101000
2029
+ },
2030
+ {
2031
+ "epoch": 2.36,
2032
+ "learning_rate": 0.00028183277035275363,
2033
+ "loss": 0.3022,
2034
+ "step": 101500
2035
+ },
2036
+ {
2037
+ "epoch": 2.38,
2038
+ "learning_rate": 0.0002815996850762909,
2039
+ "loss": 0.3024,
2040
+ "step": 102000
2041
+ },
2042
+ {
2043
+ "epoch": 2.38,
2044
+ "eval_loss": 0.28789734840393066,
2045
+ "eval_runtime": 1.8256,
2046
+ "eval_samples_per_second": 1203.971,
2047
+ "eval_steps_per_second": 19.172,
2048
+ "step": 102000
2049
+ },
2050
+ {
2051
+ "epoch": 2.39,
2052
+ "learning_rate": 0.00028136521532595515,
2053
+ "loss": 0.3022,
2054
+ "step": 102500
2055
+ },
2056
+ {
2057
+ "epoch": 2.4,
2058
+ "learning_rate": 0.00028112936366587023,
2059
+ "loss": 0.3018,
2060
+ "step": 103000
2061
+ },
2062
+ {
2063
+ "epoch": 2.4,
2064
+ "eval_loss": 0.28655195236206055,
2065
+ "eval_runtime": 1.4347,
2066
+ "eval_samples_per_second": 1532.011,
2067
+ "eval_steps_per_second": 24.395,
2068
+ "step": 103000
2069
+ },
2070
+ {
2071
+ "epoch": 2.41,
2072
+ "learning_rate": 0.00028089213267527184,
2073
+ "loss": 0.3018,
2074
+ "step": 103500
2075
+ },
2076
+ {
2077
+ "epoch": 2.42,
2078
+ "learning_rate": 0.0002806535249484803,
2079
+ "loss": 0.3015,
2080
+ "step": 104000
2081
+ },
2082
+ {
2083
+ "epoch": 2.42,
2084
+ "eval_loss": 0.2860538959503174,
2085
+ "eval_runtime": 1.4232,
2086
+ "eval_samples_per_second": 1544.364,
2087
+ "eval_steps_per_second": 24.592,
2088
+ "step": 104000
2089
+ },
2090
+ {
2091
+ "epoch": 2.43,
2092
+ "learning_rate": 0.00028041354309487135,
2093
+ "loss": 0.3013,
2094
+ "step": 104500
2095
+ },
2096
+ {
2097
+ "epoch": 2.45,
2098
+ "learning_rate": 0.0002801721897388482,
2099
+ "loss": 0.301,
2100
+ "step": 105000
2101
+ },
2102
+ {
2103
+ "epoch": 2.45,
2104
+ "eval_loss": 0.28530260920524597,
2105
+ "eval_runtime": 1.4494,
2106
+ "eval_samples_per_second": 1516.459,
2107
+ "eval_steps_per_second": 24.147,
2108
+ "step": 105000
2109
+ },
2110
+ {
2111
+ "epoch": 2.46,
2112
+ "learning_rate": 0.0002799294675198124,
2113
+ "loss": 0.3018,
2114
+ "step": 105500
2115
+ },
2116
+ {
2117
+ "epoch": 2.47,
2118
+ "learning_rate": 0.00027968537909213524,
2119
+ "loss": 0.3008,
2120
+ "step": 106000
2121
+ },
2122
+ {
2123
+ "epoch": 2.47,
2124
+ "eval_loss": 0.285876989364624,
2125
+ "eval_runtime": 1.4506,
2126
+ "eval_samples_per_second": 1515.222,
2127
+ "eval_steps_per_second": 24.128,
2128
+ "step": 106000
2129
+ },
2130
+ {
2131
+ "epoch": 2.48,
2132
+ "learning_rate": 0.0002794399271251287,
2133
+ "loss": 0.3013,
2134
+ "step": 106500
2135
+ },
2136
+ {
2137
+ "epoch": 2.49,
2138
+ "learning_rate": 0.0002791931143030162,
2139
+ "loss": 0.3006,
2140
+ "step": 107000
2141
+ },
2142
+ {
2143
+ "epoch": 2.49,
2144
+ "eval_loss": 0.28289294242858887,
2145
+ "eval_runtime": 1.4223,
2146
+ "eval_samples_per_second": 1545.438,
2147
+ "eval_steps_per_second": 24.609,
2148
+ "step": 107000
2149
+ },
2150
+ {
2151
+ "epoch": 2.5,
2152
+ "learning_rate": 0.00027894494332490315,
2153
+ "loss": 0.3001,
2154
+ "step": 107500
2155
+ },
2156
+ {
2157
+ "epoch": 2.52,
2158
+ "learning_rate": 0.0002786954169047476,
2159
+ "loss": 0.3003,
2160
+ "step": 108000
2161
+ },
2162
+ {
2163
+ "epoch": 2.52,
2164
+ "eval_loss": 0.2835444509983063,
2165
+ "eval_runtime": 1.4445,
2166
+ "eval_samples_per_second": 1521.603,
2167
+ "eval_steps_per_second": 24.229,
2168
+ "step": 108000
2169
+ },
2170
+ {
2171
+ "epoch": 2.53,
2172
+ "learning_rate": 0.0002784445377713306,
2173
+ "loss": 0.2999,
2174
+ "step": 108500
2175
+ },
2176
+ {
2177
+ "epoch": 2.54,
2178
+ "learning_rate": 0.0002781923086682261,
2179
+ "loss": 0.2998,
2180
+ "step": 109000
2181
+ },
2182
+ {
2183
+ "epoch": 2.54,
2184
+ "eval_loss": 0.27993467450141907,
2185
+ "eval_runtime": 1.4177,
2186
+ "eval_samples_per_second": 1550.426,
2187
+ "eval_steps_per_second": 24.688,
2188
+ "step": 109000
2189
+ },
2190
+ {
2191
+ "epoch": 2.55,
2192
+ "learning_rate": 0.0002779387323537711,
2193
+ "loss": 0.2994,
2194
+ "step": 109500
2195
+ },
2196
+ {
2197
+ "epoch": 2.56,
2198
+ "learning_rate": 0.0002776838116010356,
2199
+ "loss": 0.3001,
2200
+ "step": 110000
2201
+ },
2202
+ {
2203
+ "epoch": 2.56,
2204
+ "eval_loss": 0.28160494565963745,
2205
+ "eval_runtime": 1.4324,
2206
+ "eval_samples_per_second": 1534.54,
2207
+ "eval_steps_per_second": 24.435,
2208
+ "step": 110000
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 12,
2213
+ "total_flos": 3.5143535325376865e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9743dfff2c56ecc42fa49782ae324bef5653923fd6c7bcd04473781c4c5cb59f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f59c2ef88b3bc3cd7bdeb23ee182242569e553f7337d36848967067fc05c479
3
  size 102501541