TracyTank commited on
Commit
9e04294
·
verified ·
1 Parent(s): 5c1c7d6

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b441e5a407e166d75de29ae94a3955500ad7021ce6dca594b68aff59f6d6ed
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30263ee37a2d68a7092f89eb8fa7493fe6f8735c772e021332473d7fcac0680b
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aadc289b586300b6c580bc1a6eca495740414d61ea7683171879f63084fc162e
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46b0498e23611388191f2115cb6dceb214463dd39243d7304b3cad927146750
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78a5855da155279f93abc13f1bfe89fdf931186bd840a200d37c3b15753ce1a9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6999b9489bb7350f42339a6e403dbb7f9b68ad8f2757bcf005b72ef226794cb
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6686cf7ca61e0053c16815e8d590e1349f054c8a931e8bf4f04bd67e016e11bb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce49388e1c9528f986f55439875157d89a4871e8e7acd10dbbd2c4a9c1bc530
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1180d1447ec654e562fe520e2b7e53b29f2bc13d63e52f6f84d0b46af7e8e585
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b00fa031576decf3df4a9b5c441241f45eac22271ca0abe7bfa2975f83834a
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e4e36ed27da4c00b2842665e4d78dfd418210e96c76a5f25a475da6320fd1f7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b61c20b54b5fd3227b51531ad3983ccdfc8e54598920e059bbf33d95d6d3558
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35854f452c47ab12eebe0e36f0be134b8fb0b52560a5a462ad8966fc50622447
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4112ab6a51bcb64d14b5b501c1d18a2f1bd9a7dc366fede565410f8f2533667f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.3020389080047607,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 1.906653426017875,
5
  "eval_steps": 25,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2211,6 +2211,372 @@
2211
  "eval_samples_per_second": 33.113,
2212
  "eval_steps_per_second": 8.609,
2213
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2214
  }
2215
  ],
2216
  "logging_steps": 1,
@@ -2225,7 +2591,7 @@
2225
  "early_stopping_threshold": 0.0
2226
  },
2227
  "attributes": {
2228
- "early_stopping_patience_counter": 0
2229
  }
2230
  },
2231
  "TrainerControl": {
@@ -2234,12 +2600,12 @@
2234
  "should_evaluate": false,
2235
  "should_log": false,
2236
  "should_save": true,
2237
- "should_training_stop": false
2238
  },
2239
  "attributes": {}
2240
  }
2241
  },
2242
- "total_flos": 7.161700340780237e+18,
2243
  "train_batch_size": 1,
2244
  "trial_name": null,
2245
  "trial_params": null
 
1
  {
2
  "best_metric": 2.3020389080047607,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 2.224428997020854,
5
  "eval_steps": 25,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2211
  "eval_samples_per_second": 33.113,
2212
  "eval_steps_per_second": 8.609,
2213
  "step": 300
2214
+ },
2215
+ {
2216
+ "epoch": 1.9130089374379344,
2217
+ "grad_norm": 0.23434974253177643,
2218
+ "learning_rate": 4.366487945997335e-05,
2219
+ "loss": 2.1907,
2220
+ "step": 301
2221
+ },
2222
+ {
2223
+ "epoch": 1.9193644488579942,
2224
+ "grad_norm": 0.22699026763439178,
2225
+ "learning_rate": 4.337710167617577e-05,
2226
+ "loss": 2.2486,
2227
+ "step": 302
2228
+ },
2229
+ {
2230
+ "epoch": 1.9257199602780535,
2231
+ "grad_norm": 0.24884426593780518,
2232
+ "learning_rate": 4.308983231560384e-05,
2233
+ "loss": 2.2289,
2234
+ "step": 303
2235
+ },
2236
+ {
2237
+ "epoch": 1.9320754716981132,
2238
+ "grad_norm": 0.24829219281673431,
2239
+ "learning_rate": 4.2803083944350016e-05,
2240
+ "loss": 2.2107,
2241
+ "step": 304
2242
+ },
2243
+ {
2244
+ "epoch": 1.9384309831181727,
2245
+ "grad_norm": 0.2585359513759613,
2246
+ "learning_rate": 4.2516869105717004e-05,
2247
+ "loss": 2.2945,
2248
+ "step": 305
2249
+ },
2250
+ {
2251
+ "epoch": 1.9447864945382323,
2252
+ "grad_norm": 0.28458529710769653,
2253
+ "learning_rate": 4.223120031966903e-05,
2254
+ "loss": 2.2366,
2255
+ "step": 306
2256
+ },
2257
+ {
2258
+ "epoch": 1.951142005958292,
2259
+ "grad_norm": 0.3298734426498413,
2260
+ "learning_rate": 4.1946090082284264e-05,
2261
+ "loss": 2.3884,
2262
+ "step": 307
2263
+ },
2264
+ {
2265
+ "epoch": 1.9574975173783515,
2266
+ "grad_norm": 0.22929073870182037,
2267
+ "learning_rate": 4.166155086520809e-05,
2268
+ "loss": 2.1712,
2269
+ "step": 308
2270
+ },
2271
+ {
2272
+ "epoch": 1.963853028798411,
2273
+ "grad_norm": 0.20995154976844788,
2274
+ "learning_rate": 4.137759511510766e-05,
2275
+ "loss": 2.1756,
2276
+ "step": 309
2277
+ },
2278
+ {
2279
+ "epoch": 1.9702085402184708,
2280
+ "grad_norm": 0.22254884243011475,
2281
+ "learning_rate": 4.109423525312738e-05,
2282
+ "loss": 2.2643,
2283
+ "step": 310
2284
+ },
2285
+ {
2286
+ "epoch": 1.9765640516385303,
2287
+ "grad_norm": 0.23102299869060516,
2288
+ "learning_rate": 4.081148367434554e-05,
2289
+ "loss": 2.2153,
2290
+ "step": 311
2291
+ },
2292
+ {
2293
+ "epoch": 1.9829195630585899,
2294
+ "grad_norm": 0.25142958760261536,
2295
+ "learning_rate": 4.052935274723222e-05,
2296
+ "loss": 2.1985,
2297
+ "step": 312
2298
+ },
2299
+ {
2300
+ "epoch": 1.9892750744786496,
2301
+ "grad_norm": 0.24793897569179535,
2302
+ "learning_rate": 4.02478548131081e-05,
2303
+ "loss": 2.256,
2304
+ "step": 313
2305
+ },
2306
+ {
2307
+ "epoch": 1.995630585898709,
2308
+ "grad_norm": 0.28536108136177063,
2309
+ "learning_rate": 3.9967002185604805e-05,
2310
+ "loss": 2.3481,
2311
+ "step": 314
2312
+ },
2313
+ {
2314
+ "epoch": 2.0019860973187686,
2315
+ "grad_norm": 0.5548765063285828,
2316
+ "learning_rate": 3.968680715012606e-05,
2317
+ "loss": 3.6251,
2318
+ "step": 315
2319
+ },
2320
+ {
2321
+ "epoch": 2.0083416087388284,
2322
+ "grad_norm": 0.22422701120376587,
2323
+ "learning_rate": 3.940728196331045e-05,
2324
+ "loss": 2.0935,
2325
+ "step": 316
2326
+ },
2327
+ {
2328
+ "epoch": 2.0146971201588877,
2329
+ "grad_norm": 0.20903456211090088,
2330
+ "learning_rate": 3.912843885249515e-05,
2331
+ "loss": 2.1515,
2332
+ "step": 317
2333
+ },
2334
+ {
2335
+ "epoch": 2.0210526315789474,
2336
+ "grad_norm": 0.20581288635730743,
2337
+ "learning_rate": 3.885029001518119e-05,
2338
+ "loss": 2.1726,
2339
+ "step": 318
2340
+ },
2341
+ {
2342
+ "epoch": 2.0274081429990067,
2343
+ "grad_norm": 0.22312361001968384,
2344
+ "learning_rate": 3.857284761849975e-05,
2345
+ "loss": 2.1979,
2346
+ "step": 319
2347
+ },
2348
+ {
2349
+ "epoch": 2.0337636544190665,
2350
+ "grad_norm": 0.22348229587078094,
2351
+ "learning_rate": 3.829612379868006e-05,
2352
+ "loss": 2.2117,
2353
+ "step": 320
2354
+ },
2355
+ {
2356
+ "epoch": 2.0401191658391262,
2357
+ "grad_norm": 0.22266767919063568,
2358
+ "learning_rate": 3.8020130660518495e-05,
2359
+ "loss": 2.2157,
2360
+ "step": 321
2361
+ },
2362
+ {
2363
+ "epoch": 2.0464746772591855,
2364
+ "grad_norm": 0.23664413392543793,
2365
+ "learning_rate": 3.774488027684898e-05,
2366
+ "loss": 2.2099,
2367
+ "step": 322
2368
+ },
2369
+ {
2370
+ "epoch": 2.0528301886792453,
2371
+ "grad_norm": 0.24889320135116577,
2372
+ "learning_rate": 3.7470384688015e-05,
2373
+ "loss": 2.1944,
2374
+ "step": 323
2375
+ },
2376
+ {
2377
+ "epoch": 2.059185700099305,
2378
+ "grad_norm": 0.2663976848125458,
2379
+ "learning_rate": 3.7196655901342836e-05,
2380
+ "loss": 2.1792,
2381
+ "step": 324
2382
+ },
2383
+ {
2384
+ "epoch": 2.0655412115193643,
2385
+ "grad_norm": 0.2890424132347107,
2386
+ "learning_rate": 3.692370589061639e-05,
2387
+ "loss": 2.2045,
2388
+ "step": 325
2389
+ },
2390
+ {
2391
+ "epoch": 2.0655412115193643,
2392
+ "eval_loss": 2.3015811443328857,
2393
+ "eval_runtime": 1.5096,
2394
+ "eval_samples_per_second": 33.122,
2395
+ "eval_steps_per_second": 8.612,
2396
+ "step": 325
2397
+ },
2398
+ {
2399
+ "epoch": 2.071896722939424,
2400
+ "grad_norm": 0.3092862665653229,
2401
+ "learning_rate": 3.665154659555336e-05,
2402
+ "loss": 2.2105,
2403
+ "step": 326
2404
+ },
2405
+ {
2406
+ "epoch": 2.078252234359484,
2407
+ "grad_norm": 0.2822588086128235,
2408
+ "learning_rate": 3.638018992128296e-05,
2409
+ "loss": 1.8203,
2410
+ "step": 327
2411
+ },
2412
+ {
2413
+ "epoch": 2.084607745779543,
2414
+ "grad_norm": 0.2667306363582611,
2415
+ "learning_rate": 3.6109647737825246e-05,
2416
+ "loss": 2.5053,
2417
+ "step": 328
2418
+ },
2419
+ {
2420
+ "epoch": 2.090963257199603,
2421
+ "grad_norm": 0.2170848250389099,
2422
+ "learning_rate": 3.583993187957173e-05,
2423
+ "loss": 2.1155,
2424
+ "step": 329
2425
+ },
2426
+ {
2427
+ "epoch": 2.097318768619662,
2428
+ "grad_norm": 0.23939752578735352,
2429
+ "learning_rate": 3.557105414476782e-05,
2430
+ "loss": 2.1199,
2431
+ "step": 330
2432
+ },
2433
+ {
2434
+ "epoch": 2.103674280039722,
2435
+ "grad_norm": 0.24040144681930542,
2436
+ "learning_rate": 3.530302629499667e-05,
2437
+ "loss": 2.2042,
2438
+ "step": 331
2439
+ },
2440
+ {
2441
+ "epoch": 2.1100297914597816,
2442
+ "grad_norm": 0.23685196042060852,
2443
+ "learning_rate": 3.503586005466474e-05,
2444
+ "loss": 2.0769,
2445
+ "step": 332
2446
+ },
2447
+ {
2448
+ "epoch": 2.116385302879841,
2449
+ "grad_norm": 0.23992370069026947,
2450
+ "learning_rate": 3.476956711048885e-05,
2451
+ "loss": 2.1787,
2452
+ "step": 333
2453
+ },
2454
+ {
2455
+ "epoch": 2.1227408142999007,
2456
+ "grad_norm": 0.25517672300338745,
2457
+ "learning_rate": 3.4504159110985035e-05,
2458
+ "loss": 2.2335,
2459
+ "step": 334
2460
+ },
2461
+ {
2462
+ "epoch": 2.1290963257199604,
2463
+ "grad_norm": 0.2546270787715912,
2464
+ "learning_rate": 3.423964766595906e-05,
2465
+ "loss": 2.1222,
2466
+ "step": 335
2467
+ },
2468
+ {
2469
+ "epoch": 2.1354518371400197,
2470
+ "grad_norm": 0.2679215371608734,
2471
+ "learning_rate": 3.3976044345998365e-05,
2472
+ "loss": 2.1973,
2473
+ "step": 336
2474
+ },
2475
+ {
2476
+ "epoch": 2.1418073485600795,
2477
+ "grad_norm": 0.2851313650608063,
2478
+ "learning_rate": 3.371336068196617e-05,
2479
+ "loss": 2.2163,
2480
+ "step": 337
2481
+ },
2482
+ {
2483
+ "epoch": 2.1481628599801392,
2484
+ "grad_norm": 0.31128042936325073,
2485
+ "learning_rate": 3.345160816449687e-05,
2486
+ "loss": 2.1715,
2487
+ "step": 338
2488
+ },
2489
+ {
2490
+ "epoch": 2.1545183714001985,
2491
+ "grad_norm": 0.3457167148590088,
2492
+ "learning_rate": 3.3190798243493595e-05,
2493
+ "loss": 2.2396,
2494
+ "step": 339
2495
+ },
2496
+ {
2497
+ "epoch": 2.1608738828202583,
2498
+ "grad_norm": 0.32169675827026367,
2499
+ "learning_rate": 3.293094232762715e-05,
2500
+ "loss": 2.1081,
2501
+ "step": 340
2502
+ },
2503
+ {
2504
+ "epoch": 2.1672293942403176,
2505
+ "grad_norm": 0.24396829307079315,
2506
+ "learning_rate": 3.26720517838371e-05,
2507
+ "loss": 2.0669,
2508
+ "step": 341
2509
+ },
2510
+ {
2511
+ "epoch": 2.1735849056603773,
2512
+ "grad_norm": 0.25499778985977173,
2513
+ "learning_rate": 3.241413793683458e-05,
2514
+ "loss": 2.1445,
2515
+ "step": 342
2516
+ },
2517
+ {
2518
+ "epoch": 2.179940417080437,
2519
+ "grad_norm": 0.25378331542015076,
2520
+ "learning_rate": 3.215721206860673e-05,
2521
+ "loss": 2.1485,
2522
+ "step": 343
2523
+ },
2524
+ {
2525
+ "epoch": 2.1862959285004964,
2526
+ "grad_norm": 0.2579626441001892,
2527
+ "learning_rate": 3.19012854179234e-05,
2528
+ "loss": 2.1483,
2529
+ "step": 344
2530
+ },
2531
+ {
2532
+ "epoch": 2.192651439920556,
2533
+ "grad_norm": 0.2530902326107025,
2534
+ "learning_rate": 3.164636917984534e-05,
2535
+ "loss": 2.135,
2536
+ "step": 345
2537
+ },
2538
+ {
2539
+ "epoch": 2.199006951340616,
2540
+ "grad_norm": 0.26239219307899475,
2541
+ "learning_rate": 3.1392474505234604e-05,
2542
+ "loss": 2.169,
2543
+ "step": 346
2544
+ },
2545
+ {
2546
+ "epoch": 2.205362462760675,
2547
+ "grad_norm": 0.275020033121109,
2548
+ "learning_rate": 3.11396125002668e-05,
2549
+ "loss": 2.2371,
2550
+ "step": 347
2551
+ },
2552
+ {
2553
+ "epoch": 2.211717974180735,
2554
+ "grad_norm": 0.27179548144340515,
2555
+ "learning_rate": 3.088779422594514e-05,
2556
+ "loss": 2.1947,
2557
+ "step": 348
2558
+ },
2559
+ {
2560
+ "epoch": 2.2180734856007946,
2561
+ "grad_norm": 0.2944357097148895,
2562
+ "learning_rate": 3.063703069761679e-05,
2563
+ "loss": 2.2554,
2564
+ "step": 349
2565
+ },
2566
+ {
2567
+ "epoch": 2.224428997020854,
2568
+ "grad_norm": 0.31469622254371643,
2569
+ "learning_rate": 3.0387332884490805e-05,
2570
+ "loss": 2.1767,
2571
+ "step": 350
2572
+ },
2573
+ {
2574
+ "epoch": 2.224428997020854,
2575
+ "eval_loss": 2.3065717220306396,
2576
+ "eval_runtime": 1.5093,
2577
+ "eval_samples_per_second": 33.127,
2578
+ "eval_steps_per_second": 8.613,
2579
+ "step": 350
2580
  }
2581
  ],
2582
  "logging_steps": 1,
 
2591
  "early_stopping_threshold": 0.0
2592
  },
2593
  "attributes": {
2594
+ "early_stopping_patience_counter": 1
2595
  }
2596
  },
2597
  "TrainerControl": {
 
2600
  "should_evaluate": false,
2601
  "should_log": false,
2602
  "should_save": true,
2603
+ "should_training_stop": true
2604
  },
2605
  "attributes": {}
2606
  }
2607
  },
2608
+ "total_flos": 8.35531706424361e+18,
2609
  "train_batch_size": 1,
2610
  "trial_name": null,
2611
  "trial_params": null