ZeroUniqueness commited on
Commit
d761575
β€’
1 Parent(s): b65f702

Training in progress, step 37000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23c2d32c288feead4318a69d3ff414e535f97637bea0a9ae28d2ce95cd3de348
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9546c32a7c69f32e36c612415bdd464231e08b43267c0c48a31c7f40c0d408
3
  size 500897101
{checkpoint-33000 β†’ checkpoint-36000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-33000 β†’ checkpoint-36000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-33000 β†’ checkpoint-36000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42cbea41e45d2d0b755f9666264a178377e0d657920d99d2ac36083b14a3fce
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c2d32c288feead4318a69d3ff414e535f97637bea0a9ae28d2ce95cd3de348
3
  size 500897101
{checkpoint-33000/adapter_model β†’ checkpoint-37000}/README.md RENAMED
File without changes
{checkpoint-33000/adapter_model β†’ checkpoint-37000}/adapter_config.json RENAMED
File without changes
{checkpoint-33000/adapter_model β†’ checkpoint-37000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42cbea41e45d2d0b755f9666264a178377e0d657920d99d2ac36083b14a3fce
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9546c32a7c69f32e36c612415bdd464231e08b43267c0c48a31c7f40c0d408
3
  size 500897101
{checkpoint-33000 β†’ checkpoint-37000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d118e7ceed7e898d194344dd82e1f57237c110a54ae13d845e1624b5ac6a2be7
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66752d28f3b7ca12e1b483bbb5bd48fa49998d34e026c908100c9cd122ff144f
3
  size 1001723453
{checkpoint-33000 β†’ checkpoint-37000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:169992729d29d9e2e5dcb82f149024cae6d9e402f3075f5eab6b5fc3fbaa107d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a1e84087c196e7e618617b884516dfa94b6d7a7e668c1d280c6b4c8239f845
3
  size 14575
{checkpoint-33000 β†’ checkpoint-37000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be0f09eabe27444782208394bfcda4067efa0ba31e368befa219e0b566bccb1c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7046cc5cdc134adf08950434c6180a09920d2a73dfbc0c6da7d0543135abf2
3
  size 627
{checkpoint-33000 β†’ checkpoint-37000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.5719351172447205,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-33000",
4
- "epoch": 1.2303791804928974,
5
- "global_step": 33000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2250,11 +2250,283 @@
2250
  "eval_samples_per_second": 0.428,
2251
  "eval_steps_per_second": 0.428,
2252
  "step": 33000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2253
  }
2254
  ],
2255
  "max_steps": 80463,
2256
  "num_train_epochs": 3,
2257
- "total_flos": 9.255791880986788e+18,
2258
  "trial_name": null,
2259
  "trial_params": null
2260
  }
 
1
  {
2
+ "best_metric": 0.5555862188339233,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-37000",
4
+ "epoch": 1.3795160508556727,
5
+ "global_step": 37000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2250
  "eval_samples_per_second": 0.428,
2251
  "eval_steps_per_second": 0.428,
2252
  "step": 33000
2253
+ },
2254
+ {
2255
+ "epoch": 1.23,
2256
+ "learning_rate": 0.0001275078855425007,
2257
+ "loss": 0.4971,
2258
+ "step": 33100
2259
+ },
2260
+ {
2261
+ "epoch": 1.24,
2262
+ "learning_rate": 0.0001271322532054803,
2263
+ "loss": 0.4977,
2264
+ "step": 33200
2265
+ },
2266
+ {
2267
+ "epoch": 1.24,
2268
+ "learning_rate": 0.0001267562071541254,
2269
+ "loss": 0.499,
2270
+ "step": 33300
2271
+ },
2272
+ {
2273
+ "epoch": 1.25,
2274
+ "learning_rate": 0.00012637975312241022,
2275
+ "loss": 0.5044,
2276
+ "step": 33400
2277
+ },
2278
+ {
2279
+ "epoch": 1.25,
2280
+ "learning_rate": 0.00012600289685052996,
2281
+ "loss": 0.5019,
2282
+ "step": 33500
2283
+ },
2284
+ {
2285
+ "epoch": 1.25,
2286
+ "learning_rate": 0.00012562564408481327,
2287
+ "loss": 0.5225,
2288
+ "step": 33600
2289
+ },
2290
+ {
2291
+ "epoch": 1.26,
2292
+ "learning_rate": 0.00012524800057763438,
2293
+ "loss": 0.5503,
2294
+ "step": 33700
2295
+ },
2296
+ {
2297
+ "epoch": 1.26,
2298
+ "learning_rate": 0.00012486997208732573,
2299
+ "loss": 0.5025,
2300
+ "step": 33800
2301
+ },
2302
+ {
2303
+ "epoch": 1.26,
2304
+ "learning_rate": 0.0001244915643780899,
2305
+ "loss": 0.5187,
2306
+ "step": 33900
2307
+ },
2308
+ {
2309
+ "epoch": 1.27,
2310
+ "learning_rate": 0.00012411278321991195,
2311
+ "loss": 0.5199,
2312
+ "step": 34000
2313
+ },
2314
+ {
2315
+ "epoch": 1.27,
2316
+ "eval_loss": 0.5665221810340881,
2317
+ "eval_runtime": 1263.0264,
2318
+ "eval_samples_per_second": 0.429,
2319
+ "eval_steps_per_second": 0.429,
2320
+ "step": 34000
2321
+ },
2322
+ {
2323
+ "epoch": 1.27,
2324
+ "learning_rate": 0.00012373363438847117,
2325
+ "loss": 0.5135,
2326
+ "step": 34100
2327
+ },
2328
+ {
2329
+ "epoch": 1.28,
2330
+ "learning_rate": 0.00012335412366505324,
2331
+ "loss": 0.5065,
2332
+ "step": 34200
2333
+ },
2334
+ {
2335
+ "epoch": 1.28,
2336
+ "learning_rate": 0.000122974256836462,
2337
+ "loss": 0.5223,
2338
+ "step": 34300
2339
+ },
2340
+ {
2341
+ "epoch": 1.28,
2342
+ "learning_rate": 0.00012259403969493114,
2343
+ "loss": 0.4946,
2344
+ "step": 34400
2345
+ },
2346
+ {
2347
+ "epoch": 1.29,
2348
+ "learning_rate": 0.00012221347803803605,
2349
+ "loss": 0.5105,
2350
+ "step": 34500
2351
+ },
2352
+ {
2353
+ "epoch": 1.29,
2354
+ "learning_rate": 0.00012183257766860514,
2355
+ "loss": 0.4812,
2356
+ "step": 34600
2357
+ },
2358
+ {
2359
+ "epoch": 1.29,
2360
+ "learning_rate": 0.00012145134439463178,
2361
+ "loss": 0.4981,
2362
+ "step": 34700
2363
+ },
2364
+ {
2365
+ "epoch": 1.3,
2366
+ "learning_rate": 0.0001210697840291852,
2367
+ "loss": 0.5038,
2368
+ "step": 34800
2369
+ },
2370
+ {
2371
+ "epoch": 1.3,
2372
+ "learning_rate": 0.00012068790239032241,
2373
+ "loss": 0.5551,
2374
+ "step": 34900
2375
+ },
2376
+ {
2377
+ "epoch": 1.3,
2378
+ "learning_rate": 0.00012030570530099902,
2379
+ "loss": 0.4964,
2380
+ "step": 35000
2381
+ },
2382
+ {
2383
+ "epoch": 1.3,
2384
+ "eval_loss": 0.562954843044281,
2385
+ "eval_runtime": 1252.1434,
2386
+ "eval_samples_per_second": 0.433,
2387
+ "eval_steps_per_second": 0.433,
2388
+ "step": 35000
2389
+ },
2390
+ {
2391
+ "epoch": 1.31,
2392
+ "learning_rate": 0.00011992319858898077,
2393
+ "loss": 0.4952,
2394
+ "step": 35100
2395
+ },
2396
+ {
2397
+ "epoch": 1.31,
2398
+ "learning_rate": 0.0001195403880867545,
2399
+ "loss": 0.5157,
2400
+ "step": 35200
2401
+ },
2402
+ {
2403
+ "epoch": 1.32,
2404
+ "learning_rate": 0.00011915727963143922,
2405
+ "loss": 0.4973,
2406
+ "step": 35300
2407
+ },
2408
+ {
2409
+ "epoch": 1.32,
2410
+ "learning_rate": 0.00011877387906469721,
2411
+ "loss": 0.4884,
2412
+ "step": 35400
2413
+ },
2414
+ {
2415
+ "epoch": 1.32,
2416
+ "learning_rate": 0.00011839019223264489,
2417
+ "loss": 0.5017,
2418
+ "step": 35500
2419
+ },
2420
+ {
2421
+ "epoch": 1.33,
2422
+ "learning_rate": 0.00011800622498576363,
2423
+ "loss": 0.5157,
2424
+ "step": 35600
2425
+ },
2426
+ {
2427
+ "epoch": 1.33,
2428
+ "learning_rate": 0.00011762198317881059,
2429
+ "loss": 0.4774,
2430
+ "step": 35700
2431
+ },
2432
+ {
2433
+ "epoch": 1.33,
2434
+ "learning_rate": 0.0001172374726707295,
2435
+ "loss": 0.4855,
2436
+ "step": 35800
2437
+ },
2438
+ {
2439
+ "epoch": 1.34,
2440
+ "learning_rate": 0.00011685269932456115,
2441
+ "loss": 0.5134,
2442
+ "step": 35900
2443
+ },
2444
+ {
2445
+ "epoch": 1.34,
2446
+ "learning_rate": 0.00011646766900735422,
2447
+ "loss": 0.5143,
2448
+ "step": 36000
2449
+ },
2450
+ {
2451
+ "epoch": 1.34,
2452
+ "eval_loss": 0.5594063997268677,
2453
+ "eval_runtime": 1270.0722,
2454
+ "eval_samples_per_second": 0.427,
2455
+ "eval_steps_per_second": 0.427,
2456
+ "step": 36000
2457
+ },
2458
+ {
2459
+ "epoch": 1.35,
2460
+ "learning_rate": 0.00011608238759007561,
2461
+ "loss": 0.5268,
2462
+ "step": 36100
2463
+ },
2464
+ {
2465
+ "epoch": 1.35,
2466
+ "learning_rate": 0.00011569686094752101,
2467
+ "loss": 0.5179,
2468
+ "step": 36200
2469
+ },
2470
+ {
2471
+ "epoch": 1.35,
2472
+ "learning_rate": 0.00011531109495822545,
2473
+ "loss": 0.5236,
2474
+ "step": 36300
2475
+ },
2476
+ {
2477
+ "epoch": 1.36,
2478
+ "learning_rate": 0.00011492509550437339,
2479
+ "loss": 0.5197,
2480
+ "step": 36400
2481
+ },
2482
+ {
2483
+ "epoch": 1.36,
2484
+ "learning_rate": 0.0001145388684717092,
2485
+ "loss": 0.5109,
2486
+ "step": 36500
2487
+ },
2488
+ {
2489
+ "epoch": 1.36,
2490
+ "learning_rate": 0.00011415241974944744,
2491
+ "loss": 0.5126,
2492
+ "step": 36600
2493
+ },
2494
+ {
2495
+ "epoch": 1.37,
2496
+ "learning_rate": 0.00011376575523018296,
2497
+ "loss": 0.501,
2498
+ "step": 36700
2499
+ },
2500
+ {
2501
+ "epoch": 1.37,
2502
+ "learning_rate": 0.00011337888080980115,
2503
+ "loss": 0.4888,
2504
+ "step": 36800
2505
+ },
2506
+ {
2507
+ "epoch": 1.38,
2508
+ "learning_rate": 0.00011299180238738789,
2509
+ "loss": 0.5324,
2510
+ "step": 36900
2511
+ },
2512
+ {
2513
+ "epoch": 1.38,
2514
+ "learning_rate": 0.00011260452586513981,
2515
+ "loss": 0.5053,
2516
+ "step": 37000
2517
+ },
2518
+ {
2519
+ "epoch": 1.38,
2520
+ "eval_loss": 0.5555862188339233,
2521
+ "eval_runtime": 1324.5375,
2522
+ "eval_samples_per_second": 0.409,
2523
+ "eval_steps_per_second": 0.409,
2524
+ "step": 37000
2525
  }
2526
  ],
2527
  "max_steps": 80463,
2528
  "num_train_epochs": 3,
2529
+ "total_flos": 1.0375131020284969e+19,
2530
  "trial_name": null,
2531
  "trial_params": null
2532
  }
{checkpoint-33000 β†’ checkpoint-37000}/training_args.bin RENAMED
File without changes