Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +303 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100172997
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61757e92d02b06dda1da003da57fa0b18bc1cc2b413fc514841b017d0d63c3c8
|
3 |
size 100172997
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81a90871ae24751a566fb99821bee5e29d062c303c164fcd6aeac08948cab240
|
3 |
size 146774203
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7978600af4170dc4592efcab1d33d1582d45b26dc998a10a280a81e23e422deb
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 246899880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7331ea7e49edb5d8c1485934eca953ca913987924fdd220c26d2fc895357dc9
|
3 |
size 246899880
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -2106,11 +2106,311 @@
|
|
2106 |
"learning_rate": 0.00046983100340983056,
|
2107 |
"loss": 3.2461,
|
2108 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2109 |
}
|
2110 |
],
|
2111 |
"max_steps": 500000,
|
2112 |
"num_train_epochs": 3,
|
2113 |
-
"total_flos": 1.
|
2114 |
"trial_name": null,
|
2115 |
"trial_params": null
|
2116 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.3437902879243661,
|
5 |
+
"global_step": 80000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
2106 |
"learning_rate": 0.00046983100340983056,
|
2107 |
"loss": 3.2461,
|
2108 |
"step": 70000
|
2109 |
+
},
|
2110 |
+
{
|
2111 |
+
"epoch": 0.3,
|
2112 |
+
"learning_rate": 0.00047183105799939297,
|
2113 |
+
"loss": 3.2495,
|
2114 |
+
"step": 70200
|
2115 |
+
},
|
2116 |
+
{
|
2117 |
+
"epoch": 0.3,
|
2118 |
+
"learning_rate": 0.0004738319577694809,
|
2119 |
+
"loss": 3.2325,
|
2120 |
+
"step": 70400
|
2121 |
+
},
|
2122 |
+
{
|
2123 |
+
"epoch": 0.3,
|
2124 |
+
"learning_rate": 0.0004758336676119636,
|
2125 |
+
"loss": 3.2335,
|
2126 |
+
"step": 70600
|
2127 |
+
},
|
2128 |
+
{
|
2129 |
+
"epoch": 0.3,
|
2130 |
+
"learning_rate": 0.0004778361524044967,
|
2131 |
+
"loss": 3.2499,
|
2132 |
+
"step": 70800
|
2133 |
+
},
|
2134 |
+
{
|
2135 |
+
"epoch": 0.31,
|
2136 |
+
"learning_rate": 0.00047983937701113794,
|
2137 |
+
"loss": 3.2323,
|
2138 |
+
"step": 71000
|
2139 |
+
},
|
2140 |
+
{
|
2141 |
+
"epoch": 0.31,
|
2142 |
+
"learning_rate": 0.00048184330628296484,
|
2143 |
+
"loss": 3.221,
|
2144 |
+
"step": 71200
|
2145 |
+
},
|
2146 |
+
{
|
2147 |
+
"epoch": 0.31,
|
2148 |
+
"learning_rate": 0.0004838479050586898,
|
2149 |
+
"loss": 3.2344,
|
2150 |
+
"step": 71400
|
2151 |
+
},
|
2152 |
+
{
|
2153 |
+
"epoch": 0.31,
|
2154 |
+
"learning_rate": 0.0004858531381652792,
|
2155 |
+
"loss": 3.241,
|
2156 |
+
"step": 71600
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 0.31,
|
2160 |
+
"learning_rate": 0.0004878589704185682,
|
2161 |
+
"loss": 3.2434,
|
2162 |
+
"step": 71800
|
2163 |
+
},
|
2164 |
+
{
|
2165 |
+
"epoch": 0.31,
|
2166 |
+
"learning_rate": 0.00048986536662388,
|
2167 |
+
"loss": 3.2375,
|
2168 |
+
"step": 72000
|
2169 |
+
},
|
2170 |
+
{
|
2171 |
+
"epoch": 0.31,
|
2172 |
+
"learning_rate": 0.0004918722915766426,
|
2173 |
+
"loss": 3.2237,
|
2174 |
+
"step": 72200
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 0.31,
|
2178 |
+
"learning_rate": 0.0004938797100630057,
|
2179 |
+
"loss": 3.2665,
|
2180 |
+
"step": 72400
|
2181 |
+
},
|
2182 |
+
{
|
2183 |
+
"epoch": 0.31,
|
2184 |
+
"learning_rate": 0.0004958875868604602,
|
2185 |
+
"loss": 3.218,
|
2186 |
+
"step": 72600
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 0.31,
|
2190 |
+
"learning_rate": 0.0004978958867384551,
|
2191 |
+
"loss": 3.2474,
|
2192 |
+
"step": 72800
|
2193 |
+
},
|
2194 |
+
{
|
2195 |
+
"epoch": 0.31,
|
2196 |
+
"learning_rate": 0.000499904574459016,
|
2197 |
+
"loss": 3.2423,
|
2198 |
+
"step": 73000
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 0.31,
|
2202 |
+
"learning_rate": 0.000501913614777363,
|
2203 |
+
"loss": 3.2288,
|
2204 |
+
"step": 73200
|
2205 |
+
},
|
2206 |
+
{
|
2207 |
+
"epoch": 0.32,
|
2208 |
+
"learning_rate": 0.0005039229724425303,
|
2209 |
+
"loss": 3.2148,
|
2210 |
+
"step": 73400
|
2211 |
+
},
|
2212 |
+
{
|
2213 |
+
"epoch": 0.32,
|
2214 |
+
"learning_rate": 0.0005059326121979831,
|
2215 |
+
"loss": 3.2383,
|
2216 |
+
"step": 73600
|
2217 |
+
},
|
2218 |
+
{
|
2219 |
+
"epoch": 0.32,
|
2220 |
+
"learning_rate": 0.0005079424987822374,
|
2221 |
+
"loss": 3.236,
|
2222 |
+
"step": 73800
|
2223 |
+
},
|
2224 |
+
{
|
2225 |
+
"epoch": 0.32,
|
2226 |
+
"learning_rate": 0.0005099525969294778,
|
2227 |
+
"loss": 3.2231,
|
2228 |
+
"step": 74000
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 0.32,
|
2232 |
+
"learning_rate": 0.0005119628713701773,
|
2233 |
+
"loss": 3.2215,
|
2234 |
+
"step": 74200
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 0.32,
|
2238 |
+
"learning_rate": 0.0005139732868317155,
|
2239 |
+
"loss": 3.227,
|
2240 |
+
"step": 74400
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 0.32,
|
2244 |
+
"learning_rate": 0.0005159838080389977,
|
2245 |
+
"loss": 3.2006,
|
2246 |
+
"step": 74600
|
2247 |
+
},
|
2248 |
+
{
|
2249 |
+
"epoch": 0.32,
|
2250 |
+
"learning_rate": 0.0005179943997150736,
|
2251 |
+
"loss": 3.2426,
|
2252 |
+
"step": 74800
|
2253 |
+
},
|
2254 |
+
{
|
2255 |
+
"epoch": 0.32,
|
2256 |
+
"learning_rate": 0.0005200050265817561,
|
2257 |
+
"loss": 3.2194,
|
2258 |
+
"step": 75000
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 0.32,
|
2262 |
+
"learning_rate": 0.0005220156533602416,
|
2263 |
+
"loss": 3.223,
|
2264 |
+
"step": 75200
|
2265 |
+
},
|
2266 |
+
{
|
2267 |
+
"epoch": 0.32,
|
2268 |
+
"learning_rate": 0.0005240262447717271,
|
2269 |
+
"loss": 3.224,
|
2270 |
+
"step": 75400
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 0.32,
|
2274 |
+
"learning_rate": 0.0005260367655380307,
|
2275 |
+
"loss": 3.2138,
|
2276 |
+
"step": 75600
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 0.33,
|
2280 |
+
"learning_rate": 0.0005280471803822096,
|
2281 |
+
"loss": 3.2123,
|
2282 |
+
"step": 75800
|
2283 |
+
},
|
2284 |
+
{
|
2285 |
+
"epoch": 0.33,
|
2286 |
+
"learning_rate": 0.0005300574540291803,
|
2287 |
+
"loss": 3.2208,
|
2288 |
+
"step": 76000
|
2289 |
+
},
|
2290 |
+
{
|
2291 |
+
"epoch": 0.33,
|
2292 |
+
"learning_rate": 0.000532067551206336,
|
2293 |
+
"loss": 3.2199,
|
2294 |
+
"step": 76200
|
2295 |
+
},
|
2296 |
+
{
|
2297 |
+
"epoch": 0.33,
|
2298 |
+
"learning_rate": 0.0005340774366441665,
|
2299 |
+
"loss": 3.2367,
|
2300 |
+
"step": 76400
|
2301 |
+
},
|
2302 |
+
{
|
2303 |
+
"epoch": 0.33,
|
2304 |
+
"learning_rate": 0.0005360870750768769,
|
2305 |
+
"loss": 3.21,
|
2306 |
+
"step": 76600
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 0.33,
|
2310 |
+
"learning_rate": 0.0005380964312430063,
|
2311 |
+
"loss": 3.2221,
|
2312 |
+
"step": 76800
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 0.33,
|
2316 |
+
"learning_rate": 0.0005401054698860466,
|
2317 |
+
"loss": 3.2021,
|
2318 |
+
"step": 77000
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 0.33,
|
2322 |
+
"learning_rate": 0.0005421141557550603,
|
2323 |
+
"loss": 3.2352,
|
2324 |
+
"step": 77200
|
2325 |
+
},
|
2326 |
+
{
|
2327 |
+
"epoch": 0.33,
|
2328 |
+
"learning_rate": 0.0005441224536053012,
|
2329 |
+
"loss": 3.1962,
|
2330 |
+
"step": 77400
|
2331 |
+
},
|
2332 |
+
{
|
2333 |
+
"epoch": 0.33,
|
2334 |
+
"learning_rate": 0.0005461303281988298,
|
2335 |
+
"loss": 3.2127,
|
2336 |
+
"step": 77600
|
2337 |
+
},
|
2338 |
+
{
|
2339 |
+
"epoch": 0.33,
|
2340 |
+
"learning_rate": 0.000548137744305134,
|
2341 |
+
"loss": 3.2307,
|
2342 |
+
"step": 77800
|
2343 |
+
},
|
2344 |
+
{
|
2345 |
+
"epoch": 0.34,
|
2346 |
+
"learning_rate": 0.0005501446667017461,
|
2347 |
+
"loss": 3.2053,
|
2348 |
+
"step": 78000
|
2349 |
+
},
|
2350 |
+
{
|
2351 |
+
"epoch": 0.34,
|
2352 |
+
"learning_rate": 0.0005521510601748613,
|
2353 |
+
"loss": 3.1936,
|
2354 |
+
"step": 78200
|
2355 |
+
},
|
2356 |
+
{
|
2357 |
+
"epoch": 0.34,
|
2358 |
+
"learning_rate": 0.0005541568895199552,
|
2359 |
+
"loss": 3.2019,
|
2360 |
+
"step": 78400
|
2361 |
+
},
|
2362 |
+
{
|
2363 |
+
"epoch": 0.34,
|
2364 |
+
"learning_rate": 0.0005561621195424016,
|
2365 |
+
"loss": 3.203,
|
2366 |
+
"step": 78600
|
2367 |
+
},
|
2368 |
+
{
|
2369 |
+
"epoch": 0.34,
|
2370 |
+
"learning_rate": 0.0005581667150580907,
|
2371 |
+
"loss": 3.2125,
|
2372 |
+
"step": 78800
|
2373 |
+
},
|
2374 |
+
{
|
2375 |
+
"epoch": 0.34,
|
2376 |
+
"learning_rate": 0.0005601706408940451,
|
2377 |
+
"loss": 3.2205,
|
2378 |
+
"step": 79000
|
2379 |
+
},
|
2380 |
+
{
|
2381 |
+
"epoch": 0.34,
|
2382 |
+
"learning_rate": 0.0005621738618890382,
|
2383 |
+
"loss": 3.2215,
|
2384 |
+
"step": 79200
|
2385 |
+
},
|
2386 |
+
{
|
2387 |
+
"epoch": 0.34,
|
2388 |
+
"learning_rate": 0.0005641763428942106,
|
2389 |
+
"loss": 3.2052,
|
2390 |
+
"step": 79400
|
2391 |
+
},
|
2392 |
+
{
|
2393 |
+
"epoch": 0.34,
|
2394 |
+
"learning_rate": 0.0005661780487736866,
|
2395 |
+
"loss": 3.2249,
|
2396 |
+
"step": 79600
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 0.34,
|
2400 |
+
"learning_rate": 0.0005681789444051913,
|
2401 |
+
"loss": 3.1952,
|
2402 |
+
"step": 79800
|
2403 |
+
},
|
2404 |
+
{
|
2405 |
+
"epoch": 0.34,
|
2406 |
+
"learning_rate": 0.0005701789946806666,
|
2407 |
+
"loss": 3.1995,
|
2408 |
+
"step": 80000
|
2409 |
}
|
2410 |
],
|
2411 |
"max_steps": 500000,
|
2412 |
"num_train_epochs": 3,
|
2413 |
+
"total_flos": 1.2750639857664e+17,
|
2414 |
"trial_name": null,
|
2415 |
"trial_params": null
|
2416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 146774203
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81a90871ae24751a566fb99821bee5e29d062c303c164fcd6aeac08948cab240
|
3 |
size 146774203
|