Training in progress, step 350, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +371 -5
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335604696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30263ee37a2d68a7092f89eb8fa7493fe6f8735c772e021332473d7fcac0680b
|
3 |
size 335604696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671466706
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b46b0498e23611388191f2115cb6dceb214463dd39243d7304b3cad927146750
|
3 |
size 671466706
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6999b9489bb7350f42339a6e403dbb7f9b68ad8f2757bcf005b72ef226794cb
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ce49388e1c9528f986f55439875157d89a4871e8e7acd10dbbd2c4a9c1bc530
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52b00fa031576decf3df4a9b5c441241f45eac22271ca0abe7bfa2975f83834a
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b61c20b54b5fd3227b51531ad3983ccdfc8e54598920e059bbf33d95d6d3558
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4112ab6a51bcb64d14b5b501c1d18a2f1bd9a7dc366fede565410f8f2533667f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.3020389080047607,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-300",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 25,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2211,6 +2211,372 @@
|
|
2211 |
"eval_samples_per_second": 33.113,
|
2212 |
"eval_steps_per_second": 8.609,
|
2213 |
"step": 300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2214 |
}
|
2215 |
],
|
2216 |
"logging_steps": 1,
|
@@ -2225,7 +2591,7 @@
|
|
2225 |
"early_stopping_threshold": 0.0
|
2226 |
},
|
2227 |
"attributes": {
|
2228 |
-
"early_stopping_patience_counter":
|
2229 |
}
|
2230 |
},
|
2231 |
"TrainerControl": {
|
@@ -2234,12 +2600,12 @@
|
|
2234 |
"should_evaluate": false,
|
2235 |
"should_log": false,
|
2236 |
"should_save": true,
|
2237 |
-
"should_training_stop":
|
2238 |
},
|
2239 |
"attributes": {}
|
2240 |
}
|
2241 |
},
|
2242 |
-
"total_flos":
|
2243 |
"train_batch_size": 1,
|
2244 |
"trial_name": null,
|
2245 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.3020389080047607,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-300",
|
4 |
+
"epoch": 2.224428997020854,
|
5 |
"eval_steps": 25,
|
6 |
+
"global_step": 350,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2211 |
"eval_samples_per_second": 33.113,
|
2212 |
"eval_steps_per_second": 8.609,
|
2213 |
"step": 300
|
2214 |
+
},
|
2215 |
+
{
|
2216 |
+
"epoch": 1.9130089374379344,
|
2217 |
+
"grad_norm": 0.23434974253177643,
|
2218 |
+
"learning_rate": 4.366487945997335e-05,
|
2219 |
+
"loss": 2.1907,
|
2220 |
+
"step": 301
|
2221 |
+
},
|
2222 |
+
{
|
2223 |
+
"epoch": 1.9193644488579942,
|
2224 |
+
"grad_norm": 0.22699026763439178,
|
2225 |
+
"learning_rate": 4.337710167617577e-05,
|
2226 |
+
"loss": 2.2486,
|
2227 |
+
"step": 302
|
2228 |
+
},
|
2229 |
+
{
|
2230 |
+
"epoch": 1.9257199602780535,
|
2231 |
+
"grad_norm": 0.24884426593780518,
|
2232 |
+
"learning_rate": 4.308983231560384e-05,
|
2233 |
+
"loss": 2.2289,
|
2234 |
+
"step": 303
|
2235 |
+
},
|
2236 |
+
{
|
2237 |
+
"epoch": 1.9320754716981132,
|
2238 |
+
"grad_norm": 0.24829219281673431,
|
2239 |
+
"learning_rate": 4.2803083944350016e-05,
|
2240 |
+
"loss": 2.2107,
|
2241 |
+
"step": 304
|
2242 |
+
},
|
2243 |
+
{
|
2244 |
+
"epoch": 1.9384309831181727,
|
2245 |
+
"grad_norm": 0.2585359513759613,
|
2246 |
+
"learning_rate": 4.2516869105717004e-05,
|
2247 |
+
"loss": 2.2945,
|
2248 |
+
"step": 305
|
2249 |
+
},
|
2250 |
+
{
|
2251 |
+
"epoch": 1.9447864945382323,
|
2252 |
+
"grad_norm": 0.28458529710769653,
|
2253 |
+
"learning_rate": 4.223120031966903e-05,
|
2254 |
+
"loss": 2.2366,
|
2255 |
+
"step": 306
|
2256 |
+
},
|
2257 |
+
{
|
2258 |
+
"epoch": 1.951142005958292,
|
2259 |
+
"grad_norm": 0.3298734426498413,
|
2260 |
+
"learning_rate": 4.1946090082284264e-05,
|
2261 |
+
"loss": 2.3884,
|
2262 |
+
"step": 307
|
2263 |
+
},
|
2264 |
+
{
|
2265 |
+
"epoch": 1.9574975173783515,
|
2266 |
+
"grad_norm": 0.22929073870182037,
|
2267 |
+
"learning_rate": 4.166155086520809e-05,
|
2268 |
+
"loss": 2.1712,
|
2269 |
+
"step": 308
|
2270 |
+
},
|
2271 |
+
{
|
2272 |
+
"epoch": 1.963853028798411,
|
2273 |
+
"grad_norm": 0.20995154976844788,
|
2274 |
+
"learning_rate": 4.137759511510766e-05,
|
2275 |
+
"loss": 2.1756,
|
2276 |
+
"step": 309
|
2277 |
+
},
|
2278 |
+
{
|
2279 |
+
"epoch": 1.9702085402184708,
|
2280 |
+
"grad_norm": 0.22254884243011475,
|
2281 |
+
"learning_rate": 4.109423525312738e-05,
|
2282 |
+
"loss": 2.2643,
|
2283 |
+
"step": 310
|
2284 |
+
},
|
2285 |
+
{
|
2286 |
+
"epoch": 1.9765640516385303,
|
2287 |
+
"grad_norm": 0.23102299869060516,
|
2288 |
+
"learning_rate": 4.081148367434554e-05,
|
2289 |
+
"loss": 2.2153,
|
2290 |
+
"step": 311
|
2291 |
+
},
|
2292 |
+
{
|
2293 |
+
"epoch": 1.9829195630585899,
|
2294 |
+
"grad_norm": 0.25142958760261536,
|
2295 |
+
"learning_rate": 4.052935274723222e-05,
|
2296 |
+
"loss": 2.1985,
|
2297 |
+
"step": 312
|
2298 |
+
},
|
2299 |
+
{
|
2300 |
+
"epoch": 1.9892750744786496,
|
2301 |
+
"grad_norm": 0.24793897569179535,
|
2302 |
+
"learning_rate": 4.02478548131081e-05,
|
2303 |
+
"loss": 2.256,
|
2304 |
+
"step": 313
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 1.995630585898709,
|
2308 |
+
"grad_norm": 0.28536108136177063,
|
2309 |
+
"learning_rate": 3.9967002185604805e-05,
|
2310 |
+
"loss": 2.3481,
|
2311 |
+
"step": 314
|
2312 |
+
},
|
2313 |
+
{
|
2314 |
+
"epoch": 2.0019860973187686,
|
2315 |
+
"grad_norm": 0.5548765063285828,
|
2316 |
+
"learning_rate": 3.968680715012606e-05,
|
2317 |
+
"loss": 3.6251,
|
2318 |
+
"step": 315
|
2319 |
+
},
|
2320 |
+
{
|
2321 |
+
"epoch": 2.0083416087388284,
|
2322 |
+
"grad_norm": 0.22422701120376587,
|
2323 |
+
"learning_rate": 3.940728196331045e-05,
|
2324 |
+
"loss": 2.0935,
|
2325 |
+
"step": 316
|
2326 |
+
},
|
2327 |
+
{
|
2328 |
+
"epoch": 2.0146971201588877,
|
2329 |
+
"grad_norm": 0.20903456211090088,
|
2330 |
+
"learning_rate": 3.912843885249515e-05,
|
2331 |
+
"loss": 2.1515,
|
2332 |
+
"step": 317
|
2333 |
+
},
|
2334 |
+
{
|
2335 |
+
"epoch": 2.0210526315789474,
|
2336 |
+
"grad_norm": 0.20581288635730743,
|
2337 |
+
"learning_rate": 3.885029001518119e-05,
|
2338 |
+
"loss": 2.1726,
|
2339 |
+
"step": 318
|
2340 |
+
},
|
2341 |
+
{
|
2342 |
+
"epoch": 2.0274081429990067,
|
2343 |
+
"grad_norm": 0.22312361001968384,
|
2344 |
+
"learning_rate": 3.857284761849975e-05,
|
2345 |
+
"loss": 2.1979,
|
2346 |
+
"step": 319
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"epoch": 2.0337636544190665,
|
2350 |
+
"grad_norm": 0.22348229587078094,
|
2351 |
+
"learning_rate": 3.829612379868006e-05,
|
2352 |
+
"loss": 2.2117,
|
2353 |
+
"step": 320
|
2354 |
+
},
|
2355 |
+
{
|
2356 |
+
"epoch": 2.0401191658391262,
|
2357 |
+
"grad_norm": 0.22266767919063568,
|
2358 |
+
"learning_rate": 3.8020130660518495e-05,
|
2359 |
+
"loss": 2.2157,
|
2360 |
+
"step": 321
|
2361 |
+
},
|
2362 |
+
{
|
2363 |
+
"epoch": 2.0464746772591855,
|
2364 |
+
"grad_norm": 0.23664413392543793,
|
2365 |
+
"learning_rate": 3.774488027684898e-05,
|
2366 |
+
"loss": 2.2099,
|
2367 |
+
"step": 322
|
2368 |
+
},
|
2369 |
+
{
|
2370 |
+
"epoch": 2.0528301886792453,
|
2371 |
+
"grad_norm": 0.24889320135116577,
|
2372 |
+
"learning_rate": 3.7470384688015e-05,
|
2373 |
+
"loss": 2.1944,
|
2374 |
+
"step": 323
|
2375 |
+
},
|
2376 |
+
{
|
2377 |
+
"epoch": 2.059185700099305,
|
2378 |
+
"grad_norm": 0.2663976848125458,
|
2379 |
+
"learning_rate": 3.7196655901342836e-05,
|
2380 |
+
"loss": 2.1792,
|
2381 |
+
"step": 324
|
2382 |
+
},
|
2383 |
+
{
|
2384 |
+
"epoch": 2.0655412115193643,
|
2385 |
+
"grad_norm": 0.2890424132347107,
|
2386 |
+
"learning_rate": 3.692370589061639e-05,
|
2387 |
+
"loss": 2.2045,
|
2388 |
+
"step": 325
|
2389 |
+
},
|
2390 |
+
{
|
2391 |
+
"epoch": 2.0655412115193643,
|
2392 |
+
"eval_loss": 2.3015811443328857,
|
2393 |
+
"eval_runtime": 1.5096,
|
2394 |
+
"eval_samples_per_second": 33.122,
|
2395 |
+
"eval_steps_per_second": 8.612,
|
2396 |
+
"step": 325
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 2.071896722939424,
|
2400 |
+
"grad_norm": 0.3092862665653229,
|
2401 |
+
"learning_rate": 3.665154659555336e-05,
|
2402 |
+
"loss": 2.2105,
|
2403 |
+
"step": 326
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 2.078252234359484,
|
2407 |
+
"grad_norm": 0.2822588086128235,
|
2408 |
+
"learning_rate": 3.638018992128296e-05,
|
2409 |
+
"loss": 1.8203,
|
2410 |
+
"step": 327
|
2411 |
+
},
|
2412 |
+
{
|
2413 |
+
"epoch": 2.084607745779543,
|
2414 |
+
"grad_norm": 0.2667306363582611,
|
2415 |
+
"learning_rate": 3.6109647737825246e-05,
|
2416 |
+
"loss": 2.5053,
|
2417 |
+
"step": 328
|
2418 |
+
},
|
2419 |
+
{
|
2420 |
+
"epoch": 2.090963257199603,
|
2421 |
+
"grad_norm": 0.2170848250389099,
|
2422 |
+
"learning_rate": 3.583993187957173e-05,
|
2423 |
+
"loss": 2.1155,
|
2424 |
+
"step": 329
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 2.097318768619662,
|
2428 |
+
"grad_norm": 0.23939752578735352,
|
2429 |
+
"learning_rate": 3.557105414476782e-05,
|
2430 |
+
"loss": 2.1199,
|
2431 |
+
"step": 330
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 2.103674280039722,
|
2435 |
+
"grad_norm": 0.24040144681930542,
|
2436 |
+
"learning_rate": 3.530302629499667e-05,
|
2437 |
+
"loss": 2.2042,
|
2438 |
+
"step": 331
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 2.1100297914597816,
|
2442 |
+
"grad_norm": 0.23685196042060852,
|
2443 |
+
"learning_rate": 3.503586005466474e-05,
|
2444 |
+
"loss": 2.0769,
|
2445 |
+
"step": 332
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 2.116385302879841,
|
2449 |
+
"grad_norm": 0.23992370069026947,
|
2450 |
+
"learning_rate": 3.476956711048885e-05,
|
2451 |
+
"loss": 2.1787,
|
2452 |
+
"step": 333
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 2.1227408142999007,
|
2456 |
+
"grad_norm": 0.25517672300338745,
|
2457 |
+
"learning_rate": 3.4504159110985035e-05,
|
2458 |
+
"loss": 2.2335,
|
2459 |
+
"step": 334
|
2460 |
+
},
|
2461 |
+
{
|
2462 |
+
"epoch": 2.1290963257199604,
|
2463 |
+
"grad_norm": 0.2546270787715912,
|
2464 |
+
"learning_rate": 3.423964766595906e-05,
|
2465 |
+
"loss": 2.1222,
|
2466 |
+
"step": 335
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 2.1354518371400197,
|
2470 |
+
"grad_norm": 0.2679215371608734,
|
2471 |
+
"learning_rate": 3.3976044345998365e-05,
|
2472 |
+
"loss": 2.1973,
|
2473 |
+
"step": 336
|
2474 |
+
},
|
2475 |
+
{
|
2476 |
+
"epoch": 2.1418073485600795,
|
2477 |
+
"grad_norm": 0.2851313650608063,
|
2478 |
+
"learning_rate": 3.371336068196617e-05,
|
2479 |
+
"loss": 2.2163,
|
2480 |
+
"step": 337
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"epoch": 2.1481628599801392,
|
2484 |
+
"grad_norm": 0.31128042936325073,
|
2485 |
+
"learning_rate": 3.345160816449687e-05,
|
2486 |
+
"loss": 2.1715,
|
2487 |
+
"step": 338
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 2.1545183714001985,
|
2491 |
+
"grad_norm": 0.3457167148590088,
|
2492 |
+
"learning_rate": 3.3190798243493595e-05,
|
2493 |
+
"loss": 2.2396,
|
2494 |
+
"step": 339
|
2495 |
+
},
|
2496 |
+
{
|
2497 |
+
"epoch": 2.1608738828202583,
|
2498 |
+
"grad_norm": 0.32169675827026367,
|
2499 |
+
"learning_rate": 3.293094232762715e-05,
|
2500 |
+
"loss": 2.1081,
|
2501 |
+
"step": 340
|
2502 |
+
},
|
2503 |
+
{
|
2504 |
+
"epoch": 2.1672293942403176,
|
2505 |
+
"grad_norm": 0.24396829307079315,
|
2506 |
+
"learning_rate": 3.26720517838371e-05,
|
2507 |
+
"loss": 2.0669,
|
2508 |
+
"step": 341
|
2509 |
+
},
|
2510 |
+
{
|
2511 |
+
"epoch": 2.1735849056603773,
|
2512 |
+
"grad_norm": 0.25499778985977173,
|
2513 |
+
"learning_rate": 3.241413793683458e-05,
|
2514 |
+
"loss": 2.1445,
|
2515 |
+
"step": 342
|
2516 |
+
},
|
2517 |
+
{
|
2518 |
+
"epoch": 2.179940417080437,
|
2519 |
+
"grad_norm": 0.25378331542015076,
|
2520 |
+
"learning_rate": 3.215721206860673e-05,
|
2521 |
+
"loss": 2.1485,
|
2522 |
+
"step": 343
|
2523 |
+
},
|
2524 |
+
{
|
2525 |
+
"epoch": 2.1862959285004964,
|
2526 |
+
"grad_norm": 0.2579626441001892,
|
2527 |
+
"learning_rate": 3.19012854179234e-05,
|
2528 |
+
"loss": 2.1483,
|
2529 |
+
"step": 344
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 2.192651439920556,
|
2533 |
+
"grad_norm": 0.2530902326107025,
|
2534 |
+
"learning_rate": 3.164636917984534e-05,
|
2535 |
+
"loss": 2.135,
|
2536 |
+
"step": 345
|
2537 |
+
},
|
2538 |
+
{
|
2539 |
+
"epoch": 2.199006951340616,
|
2540 |
+
"grad_norm": 0.26239219307899475,
|
2541 |
+
"learning_rate": 3.1392474505234604e-05,
|
2542 |
+
"loss": 2.169,
|
2543 |
+
"step": 346
|
2544 |
+
},
|
2545 |
+
{
|
2546 |
+
"epoch": 2.205362462760675,
|
2547 |
+
"grad_norm": 0.275020033121109,
|
2548 |
+
"learning_rate": 3.11396125002668e-05,
|
2549 |
+
"loss": 2.2371,
|
2550 |
+
"step": 347
|
2551 |
+
},
|
2552 |
+
{
|
2553 |
+
"epoch": 2.211717974180735,
|
2554 |
+
"grad_norm": 0.27179548144340515,
|
2555 |
+
"learning_rate": 3.088779422594514e-05,
|
2556 |
+
"loss": 2.1947,
|
2557 |
+
"step": 348
|
2558 |
+
},
|
2559 |
+
{
|
2560 |
+
"epoch": 2.2180734856007946,
|
2561 |
+
"grad_norm": 0.2944357097148895,
|
2562 |
+
"learning_rate": 3.063703069761679e-05,
|
2563 |
+
"loss": 2.2554,
|
2564 |
+
"step": 349
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"epoch": 2.224428997020854,
|
2568 |
+
"grad_norm": 0.31469622254371643,
|
2569 |
+
"learning_rate": 3.0387332884490805e-05,
|
2570 |
+
"loss": 2.1767,
|
2571 |
+
"step": 350
|
2572 |
+
},
|
2573 |
+
{
|
2574 |
+
"epoch": 2.224428997020854,
|
2575 |
+
"eval_loss": 2.3065717220306396,
|
2576 |
+
"eval_runtime": 1.5093,
|
2577 |
+
"eval_samples_per_second": 33.127,
|
2578 |
+
"eval_steps_per_second": 8.613,
|
2579 |
+
"step": 350
|
2580 |
}
|
2581 |
],
|
2582 |
"logging_steps": 1,
|
|
|
2591 |
"early_stopping_threshold": 0.0
|
2592 |
},
|
2593 |
"attributes": {
|
2594 |
+
"early_stopping_patience_counter": 1
|
2595 |
}
|
2596 |
},
|
2597 |
"TrainerControl": {
|
|
|
2600 |
"should_evaluate": false,
|
2601 |
"should_log": false,
|
2602 |
"should_save": true,
|
2603 |
+
"should_training_stop": true
|
2604 |
},
|
2605 |
"attributes": {}
|
2606 |
}
|
2607 |
},
|
2608 |
+
"total_flos": 8.35531706424361e+18,
|
2609 |
"train_batch_size": 1,
|
2610 |
"trial_name": null,
|
2611 |
"trial_params": null
|