warmestman commited on
Commit
2323d53
1 Parent(s): 6d08abd

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13b0b8605d056f8ee7ce244a61451dfe6635de98c1b69d84b5289c60f0ab8eaa
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7f288875c8726d4f74614c4b4d578bf9f6c37fad3979dd0d09151a806084ec
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e6087bf4500e0ab3b2cab5f97f24befceaa6ebbfdae315b4bd2490c05a5d511
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fbb2dcbc66d7c4ed8d2f213452b756beef85e83d9865c31ecac6cbf30b67a6
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13e72aba5acacb0929adffd5014b3aaa0ab0fa8f3ab6b8a64e401afdf7596199
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931c79968a2b40db2a2f1274cfd99c95dae4c27c65afcce2ab924300046d3108
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33ddc40084ed0bf1ba89d8a115875f6b967da1b3460fbfda40f8da959e6708a6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:233fa719614ba3255a065a1e0c9c0afee77a964a4cb4878352b10cf17f61bece
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1b89bc8aed79bb7d8a2beccf0e2fe565be37ff238354d0b75a836cc8219e4fe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11d4981b9ead74252fd412088033260fc7419a972daef228681b9d5d3c51ee44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 41.91738628238271,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
4
- "epoch": 53.89221556886228,
5
  "eval_steps": 1000,
6
- "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2248,6 +2248,255 @@
2248
  "eval_steps_per_second": 0.089,
2249
  "eval_wer": 42.20889940047302,
2250
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2251
  }
2252
  ],
2253
  "logging_steps": 25,
@@ -2255,7 +2504,7 @@
2255
  "num_input_tokens_seen": 0,
2256
  "num_train_epochs": 120,
2257
  "save_steps": 1000,
2258
- "total_flos": 4.8869955391389696e+20,
2259
  "train_batch_size": 16,
2260
  "trial_name": null,
2261
  "trial_params": null
 
1
  {
2
  "best_metric": 41.91738628238271,
3
  "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
4
+ "epoch": 59.880239520958085,
5
  "eval_steps": 1000,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2248
  "eval_steps_per_second": 0.089,
2249
  "eval_wer": 42.20889940047302,
2250
  "step": 9000
2251
+ },
2252
+ {
2253
+ "epoch": 54.04,
2254
+ "learning_rate": 5.629230769230769e-07,
2255
+ "loss": 0.0003,
2256
+ "step": 9025
2257
+ },
2258
+ {
2259
+ "epoch": 54.19,
2260
+ "learning_rate": 5.616410256410256e-07,
2261
+ "loss": 0.0003,
2262
+ "step": 9050
2263
+ },
2264
+ {
2265
+ "epoch": 54.34,
2266
+ "learning_rate": 5.603589743589743e-07,
2267
+ "loss": 0.0003,
2268
+ "step": 9075
2269
+ },
2270
+ {
2271
+ "epoch": 54.49,
2272
+ "learning_rate": 5.59076923076923e-07,
2273
+ "loss": 0.0003,
2274
+ "step": 9100
2275
+ },
2276
+ {
2277
+ "epoch": 54.64,
2278
+ "learning_rate": 5.577948717948717e-07,
2279
+ "loss": 0.0003,
2280
+ "step": 9125
2281
+ },
2282
+ {
2283
+ "epoch": 54.79,
2284
+ "learning_rate": 5.565128205128204e-07,
2285
+ "loss": 0.0003,
2286
+ "step": 9150
2287
+ },
2288
+ {
2289
+ "epoch": 54.94,
2290
+ "learning_rate": 5.552307692307692e-07,
2291
+ "loss": 0.0003,
2292
+ "step": 9175
2293
+ },
2294
+ {
2295
+ "epoch": 55.09,
2296
+ "learning_rate": 5.539487179487179e-07,
2297
+ "loss": 0.0002,
2298
+ "step": 9200
2299
+ },
2300
+ {
2301
+ "epoch": 55.24,
2302
+ "learning_rate": 5.526666666666666e-07,
2303
+ "loss": 0.0002,
2304
+ "step": 9225
2305
+ },
2306
+ {
2307
+ "epoch": 55.39,
2308
+ "learning_rate": 5.513846153846153e-07,
2309
+ "loss": 0.0002,
2310
+ "step": 9250
2311
+ },
2312
+ {
2313
+ "epoch": 55.54,
2314
+ "learning_rate": 5.501025641025641e-07,
2315
+ "loss": 0.0002,
2316
+ "step": 9275
2317
+ },
2318
+ {
2319
+ "epoch": 55.69,
2320
+ "learning_rate": 5.488205128205128e-07,
2321
+ "loss": 0.0003,
2322
+ "step": 9300
2323
+ },
2324
+ {
2325
+ "epoch": 55.84,
2326
+ "learning_rate": 5.475384615384615e-07,
2327
+ "loss": 0.0002,
2328
+ "step": 9325
2329
+ },
2330
+ {
2331
+ "epoch": 55.99,
2332
+ "learning_rate": 5.462564102564102e-07,
2333
+ "loss": 0.0002,
2334
+ "step": 9350
2335
+ },
2336
+ {
2337
+ "epoch": 56.14,
2338
+ "learning_rate": 5.44974358974359e-07,
2339
+ "loss": 0.0002,
2340
+ "step": 9375
2341
+ },
2342
+ {
2343
+ "epoch": 56.29,
2344
+ "learning_rate": 5.436923076923077e-07,
2345
+ "loss": 0.0002,
2346
+ "step": 9400
2347
+ },
2348
+ {
2349
+ "epoch": 56.44,
2350
+ "learning_rate": 5.424102564102564e-07,
2351
+ "loss": 0.0002,
2352
+ "step": 9425
2353
+ },
2354
+ {
2355
+ "epoch": 56.59,
2356
+ "learning_rate": 5.411282051282051e-07,
2357
+ "loss": 0.0002,
2358
+ "step": 9450
2359
+ },
2360
+ {
2361
+ "epoch": 56.74,
2362
+ "learning_rate": 5.398461538461539e-07,
2363
+ "loss": 0.0002,
2364
+ "step": 9475
2365
+ },
2366
+ {
2367
+ "epoch": 56.89,
2368
+ "learning_rate": 5.385641025641026e-07,
2369
+ "loss": 0.0002,
2370
+ "step": 9500
2371
+ },
2372
+ {
2373
+ "epoch": 57.04,
2374
+ "learning_rate": 5.372820512820513e-07,
2375
+ "loss": 0.0002,
2376
+ "step": 9525
2377
+ },
2378
+ {
2379
+ "epoch": 57.19,
2380
+ "learning_rate": 5.36e-07,
2381
+ "loss": 0.0002,
2382
+ "step": 9550
2383
+ },
2384
+ {
2385
+ "epoch": 57.34,
2386
+ "learning_rate": 5.347179487179488e-07,
2387
+ "loss": 0.0002,
2388
+ "step": 9575
2389
+ },
2390
+ {
2391
+ "epoch": 57.49,
2392
+ "learning_rate": 5.334358974358975e-07,
2393
+ "loss": 0.0002,
2394
+ "step": 9600
2395
+ },
2396
+ {
2397
+ "epoch": 57.63,
2398
+ "learning_rate": 5.321538461538462e-07,
2399
+ "loss": 0.0002,
2400
+ "step": 9625
2401
+ },
2402
+ {
2403
+ "epoch": 57.78,
2404
+ "learning_rate": 5.308717948717949e-07,
2405
+ "loss": 0.0002,
2406
+ "step": 9650
2407
+ },
2408
+ {
2409
+ "epoch": 57.93,
2410
+ "learning_rate": 5.295897435897437e-07,
2411
+ "loss": 0.0002,
2412
+ "step": 9675
2413
+ },
2414
+ {
2415
+ "epoch": 58.08,
2416
+ "learning_rate": 5.283076923076923e-07,
2417
+ "loss": 0.0002,
2418
+ "step": 9700
2419
+ },
2420
+ {
2421
+ "epoch": 58.23,
2422
+ "learning_rate": 5.27025641025641e-07,
2423
+ "loss": 0.0002,
2424
+ "step": 9725
2425
+ },
2426
+ {
2427
+ "epoch": 58.38,
2428
+ "learning_rate": 5.257435897435897e-07,
2429
+ "loss": 0.0002,
2430
+ "step": 9750
2431
+ },
2432
+ {
2433
+ "epoch": 58.53,
2434
+ "learning_rate": 5.244615384615385e-07,
2435
+ "loss": 0.0002,
2436
+ "step": 9775
2437
+ },
2438
+ {
2439
+ "epoch": 58.68,
2440
+ "learning_rate": 5.231794871794871e-07,
2441
+ "loss": 0.0002,
2442
+ "step": 9800
2443
+ },
2444
+ {
2445
+ "epoch": 58.83,
2446
+ "learning_rate": 5.218974358974358e-07,
2447
+ "loss": 0.0002,
2448
+ "step": 9825
2449
+ },
2450
+ {
2451
+ "epoch": 58.98,
2452
+ "learning_rate": 5.206153846153845e-07,
2453
+ "loss": 0.0002,
2454
+ "step": 9850
2455
+ },
2456
+ {
2457
+ "epoch": 59.13,
2458
+ "learning_rate": 5.193333333333332e-07,
2459
+ "loss": 0.0002,
2460
+ "step": 9875
2461
+ },
2462
+ {
2463
+ "epoch": 59.28,
2464
+ "learning_rate": 5.18051282051282e-07,
2465
+ "loss": 0.0002,
2466
+ "step": 9900
2467
+ },
2468
+ {
2469
+ "epoch": 59.43,
2470
+ "learning_rate": 5.167692307692307e-07,
2471
+ "loss": 0.0002,
2472
+ "step": 9925
2473
+ },
2474
+ {
2475
+ "epoch": 59.58,
2476
+ "learning_rate": 5.154871794871794e-07,
2477
+ "loss": 0.0002,
2478
+ "step": 9950
2479
+ },
2480
+ {
2481
+ "epoch": 59.73,
2482
+ "learning_rate": 5.142051282051281e-07,
2483
+ "loss": 0.0002,
2484
+ "step": 9975
2485
+ },
2486
+ {
2487
+ "epoch": 59.88,
2488
+ "learning_rate": 5.129230769230769e-07,
2489
+ "loss": 0.0002,
2490
+ "step": 10000
2491
+ },
2492
+ {
2493
+ "epoch": 59.88,
2494
+ "eval_loss": 0.7664361596107483,
2495
+ "eval_runtime": 596.9914,
2496
+ "eval_samples_per_second": 0.702,
2497
+ "eval_steps_per_second": 0.089,
2498
+ "eval_wer": 42.69842142896431,
2499
+ "step": 10000
2500
  }
2501
  ],
2502
  "logging_steps": 25,
 
2504
  "num_input_tokens_seen": 0,
2505
  "num_train_epochs": 120,
2506
  "save_steps": 1000,
2507
+ "total_flos": 5.429983718493389e+20,
2508
  "train_batch_size": 16,
2509
  "trial_name": null,
2510
  "trial_params": null