warmestman commited on
Commit
882d11c
1 Parent(s): 947f61a

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c2d0e6dc3a0a3cb0e78c67a617f73907c8782cfdb1d92576f72fe63fe84ce1d
3
  size 4993448880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b1f8d231b5ff1859039320fa0188cee6c8c45459e38193a22bc1676daee37c
3
  size 4993448880
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f09bb99c6964c0121b34c9f37c63e5bcf6766e01d4f789e5b6ad5bef47ff520
3
  size 1180663192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9006daf5d78d90bf22b0c813d9e776720f90a688af4a7bd80cbc7dc7c09b019
3
  size 1180663192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47d1730c3ecc75ca8b98338f8305200732ffa3ff6c6f8d8301f20b5d21e51360
3
  size 3095446256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2a236d039fcf4f5348b184a304acbdf17c1575f2f90a02947dc19886042b45
3
  size 3095446256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:943488f319c579b17e95c507cdca4305465e095da760375c2bd0063ba7eb635c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad76ae298f288aa3230881b9dfb28684fc083c10f565ddf65a09a40de5f4dc0c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160e35db71592c700222e3cea1c8f7a567a820242110e1c69d8a8d82e01ac9b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:318a20b90d087d5ab98fe55e815e26cf3c50d3ca88ad22e2ca83eebaaef1c1d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 41.504867719047354,
3
- "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-1000",
4
- "epoch": 5.9880239520958085,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -256,6 +256,255 @@
256
  "eval_steps_per_second": 0.091,
257
  "eval_wer": 41.504867719047354,
258
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
  "logging_steps": 25,
@@ -263,7 +512,7 @@
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 60,
265
  "save_steps": 1000,
266
- "total_flos": 5.43090104303616e+19,
267
  "train_batch_size": 16,
268
  "trial_name": null,
269
  "trial_params": null
 
1
  {
2
+ "best_metric": 38.2982234200539,
3
+ "best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-2000",
4
+ "epoch": 11.976047904191617,
5
  "eval_steps": 1000,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
256
  "eval_steps_per_second": 0.091,
257
  "eval_wer": 41.504867719047354,
258
  "step": 1000
259
+ },
260
+ {
261
+ "epoch": 6.14,
262
+ "learning_rate": 9.447368421052633e-05,
263
+ "loss": 0.0455,
264
+ "step": 1025
265
+ },
266
+ {
267
+ "epoch": 6.29,
268
+ "learning_rate": 9.421052631578949e-05,
269
+ "loss": 0.0471,
270
+ "step": 1050
271
+ },
272
+ {
273
+ "epoch": 6.44,
274
+ "learning_rate": 9.394736842105264e-05,
275
+ "loss": 0.0471,
276
+ "step": 1075
277
+ },
278
+ {
279
+ "epoch": 6.59,
280
+ "learning_rate": 9.36842105263158e-05,
281
+ "loss": 0.0465,
282
+ "step": 1100
283
+ },
284
+ {
285
+ "epoch": 6.74,
286
+ "learning_rate": 9.342105263157896e-05,
287
+ "loss": 0.0486,
288
+ "step": 1125
289
+ },
290
+ {
291
+ "epoch": 6.89,
292
+ "learning_rate": 9.315789473684211e-05,
293
+ "loss": 0.044,
294
+ "step": 1150
295
+ },
296
+ {
297
+ "epoch": 7.04,
298
+ "learning_rate": 9.289473684210527e-05,
299
+ "loss": 0.0421,
300
+ "step": 1175
301
+ },
302
+ {
303
+ "epoch": 7.19,
304
+ "learning_rate": 9.263157894736843e-05,
305
+ "loss": 0.0333,
306
+ "step": 1200
307
+ },
308
+ {
309
+ "epoch": 7.34,
310
+ "learning_rate": 9.236842105263158e-05,
311
+ "loss": 0.0323,
312
+ "step": 1225
313
+ },
314
+ {
315
+ "epoch": 7.49,
316
+ "learning_rate": 9.210526315789474e-05,
317
+ "loss": 0.0289,
318
+ "step": 1250
319
+ },
320
+ {
321
+ "epoch": 7.63,
322
+ "learning_rate": 9.18421052631579e-05,
323
+ "loss": 0.0339,
324
+ "step": 1275
325
+ },
326
+ {
327
+ "epoch": 7.78,
328
+ "learning_rate": 9.157894736842105e-05,
329
+ "loss": 0.0332,
330
+ "step": 1300
331
+ },
332
+ {
333
+ "epoch": 7.93,
334
+ "learning_rate": 9.131578947368421e-05,
335
+ "loss": 0.0701,
336
+ "step": 1325
337
+ },
338
+ {
339
+ "epoch": 8.08,
340
+ "learning_rate": 9.105263157894738e-05,
341
+ "loss": 0.0601,
342
+ "step": 1350
343
+ },
344
+ {
345
+ "epoch": 8.23,
346
+ "learning_rate": 9.078947368421054e-05,
347
+ "loss": 0.0285,
348
+ "step": 1375
349
+ },
350
+ {
351
+ "epoch": 8.38,
352
+ "learning_rate": 9.052631578947369e-05,
353
+ "loss": 0.0258,
354
+ "step": 1400
355
+ },
356
+ {
357
+ "epoch": 8.53,
358
+ "learning_rate": 9.026315789473685e-05,
359
+ "loss": 0.0262,
360
+ "step": 1425
361
+ },
362
+ {
363
+ "epoch": 8.68,
364
+ "learning_rate": 9e-05,
365
+ "loss": 0.0256,
366
+ "step": 1450
367
+ },
368
+ {
369
+ "epoch": 8.83,
370
+ "learning_rate": 8.973684210526316e-05,
371
+ "loss": 0.0286,
372
+ "step": 1475
373
+ },
374
+ {
375
+ "epoch": 8.98,
376
+ "learning_rate": 8.947368421052632e-05,
377
+ "loss": 0.0265,
378
+ "step": 1500
379
+ },
380
+ {
381
+ "epoch": 9.13,
382
+ "learning_rate": 8.921052631578948e-05,
383
+ "loss": 0.0208,
384
+ "step": 1525
385
+ },
386
+ {
387
+ "epoch": 9.28,
388
+ "learning_rate": 8.894736842105263e-05,
389
+ "loss": 0.0237,
390
+ "step": 1550
391
+ },
392
+ {
393
+ "epoch": 9.43,
394
+ "learning_rate": 8.868421052631579e-05,
395
+ "loss": 0.0235,
396
+ "step": 1575
397
+ },
398
+ {
399
+ "epoch": 9.58,
400
+ "learning_rate": 8.842105263157894e-05,
401
+ "loss": 0.0224,
402
+ "step": 1600
403
+ },
404
+ {
405
+ "epoch": 9.73,
406
+ "learning_rate": 8.81578947368421e-05,
407
+ "loss": 0.024,
408
+ "step": 1625
409
+ },
410
+ {
411
+ "epoch": 9.88,
412
+ "learning_rate": 8.789473684210526e-05,
413
+ "loss": 0.0246,
414
+ "step": 1650
415
+ },
416
+ {
417
+ "epoch": 10.03,
418
+ "learning_rate": 8.763157894736841e-05,
419
+ "loss": 0.0248,
420
+ "step": 1675
421
+ },
422
+ {
423
+ "epoch": 10.18,
424
+ "learning_rate": 8.736842105263158e-05,
425
+ "loss": 0.0173,
426
+ "step": 1700
427
+ },
428
+ {
429
+ "epoch": 10.33,
430
+ "learning_rate": 8.710526315789474e-05,
431
+ "loss": 0.0174,
432
+ "step": 1725
433
+ },
434
+ {
435
+ "epoch": 10.48,
436
+ "learning_rate": 8.68421052631579e-05,
437
+ "loss": 0.0215,
438
+ "step": 1750
439
+ },
440
+ {
441
+ "epoch": 10.63,
442
+ "learning_rate": 8.657894736842105e-05,
443
+ "loss": 0.0236,
444
+ "step": 1775
445
+ },
446
+ {
447
+ "epoch": 10.78,
448
+ "learning_rate": 8.631578947368421e-05,
449
+ "loss": 0.0203,
450
+ "step": 1800
451
+ },
452
+ {
453
+ "epoch": 10.93,
454
+ "learning_rate": 8.605263157894738e-05,
455
+ "loss": 0.0192,
456
+ "step": 1825
457
+ },
458
+ {
459
+ "epoch": 11.08,
460
+ "learning_rate": 8.578947368421054e-05,
461
+ "loss": 0.0172,
462
+ "step": 1850
463
+ },
464
+ {
465
+ "epoch": 11.23,
466
+ "learning_rate": 8.552631578947369e-05,
467
+ "loss": 0.0197,
468
+ "step": 1875
469
+ },
470
+ {
471
+ "epoch": 11.38,
472
+ "learning_rate": 8.526315789473685e-05,
473
+ "loss": 0.0188,
474
+ "step": 1900
475
+ },
476
+ {
477
+ "epoch": 11.53,
478
+ "learning_rate": 8.5e-05,
479
+ "loss": 0.0197,
480
+ "step": 1925
481
+ },
482
+ {
483
+ "epoch": 11.68,
484
+ "learning_rate": 8.473684210526316e-05,
485
+ "loss": 0.0197,
486
+ "step": 1950
487
+ },
488
+ {
489
+ "epoch": 11.83,
490
+ "learning_rate": 8.447368421052632e-05,
491
+ "loss": 0.0212,
492
+ "step": 1975
493
+ },
494
+ {
495
+ "epoch": 11.98,
496
+ "learning_rate": 8.421052631578948e-05,
497
+ "loss": 0.0183,
498
+ "step": 2000
499
+ },
500
+ {
501
+ "epoch": 11.98,
502
+ "eval_loss": 0.49957072734832764,
503
+ "eval_runtime": 578.898,
504
+ "eval_samples_per_second": 0.724,
505
+ "eval_steps_per_second": 0.092,
506
+ "eval_wer": 38.2982234200539,
507
+ "step": 2000
508
  }
509
  ],
510
  "logging_steps": 25,
 
512
  "num_input_tokens_seen": 0,
513
  "num_train_epochs": 60,
514
  "save_steps": 1000,
515
+ "total_flos": 1.0860782836580352e+20,
516
  "train_batch_size": 16,
517
  "trial_name": null,
518
  "trial_params": null