marinone94 commited on
Commit
bc5c400
β€’
1 Parent(s): d71bd83

Training in progress, step 800

Browse files
{checkpoint-500 β†’ checkpoint-800}/config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-800}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1851d90640fae1429c8bc6e2baa16e7ce7bcbc2c9e0360292b57c63705f63c3
3
  size 2490346001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7ea02ec7c343ab141220559f7cfc2e88de82783306133f00b7a0f427c9391a
3
  size 2490346001
{checkpoint-500 β†’ checkpoint-800}/preprocessor_config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-800}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d37d8b46fff81cb7c0632707f6755c972760efacc89d90558f854dc54bd880e
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa43bdbed16b5fc10ebee537908dcf5e5f5e67381037463baf9a181a9926057d
3
  size 1262067185
{checkpoint-500 β†’ checkpoint-800}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aba1dab228290b3826ffb1f6e748f9ca7c766ef71c09288a7f28a9b4fc91edd7
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18fc220a6ff2583766eb993c583889da7a20bad4aad048832b1c72e10236be1
3
+ size 14631
{checkpoint-500 β†’ checkpoint-800}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c304e72bfea7592d4ddcc6e3b3b1a9bcab2965ed75957d798b3560c75dbe7d8
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea98ff9335db6568bd2c2133c9e5febb1265494530eb77d55ae8ee8b21d5e4e
3
  size 559
{checkpoint-500 β†’ checkpoint-800}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d329f7de106210c9b555181c590d8fd032215809ecb1e289ee83b0ce08346df0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bd4c9006c0fa0d3e816d59ca98b89719c7ee4d17d76fad0d1502cc27f1f6a6
3
  size 623
{checkpoint-500 β†’ checkpoint-800}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2306805074971165,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -351,11 +351,218 @@
351
  "eval_steps_per_second": 1.114,
352
  "eval_wer": 1.0,
353
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  }
355
  ],
356
  "max_steps": 6501,
357
  "num_train_epochs": 3,
358
- "total_flos": 9.466107317116815e+18,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3690888119953864,
5
+ "global_step": 800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
351
  "eval_steps_per_second": 1.114,
352
  "eval_wer": 1.0,
353
  "step": 500
354
+ },
355
+ {
356
+ "epoch": 0.24,
357
+ "learning_rate": 7.06789638932496e-05,
358
+ "loss": 2.9352,
359
+ "step": 510
360
+ },
361
+ {
362
+ "epoch": 0.24,
363
+ "learning_rate": 7.056122448979591e-05,
364
+ "loss": 2.9405,
365
+ "step": 520
366
+ },
367
+ {
368
+ "epoch": 0.24,
369
+ "learning_rate": 7.044348508634222e-05,
370
+ "loss": 2.9125,
371
+ "step": 530
372
+ },
373
+ {
374
+ "epoch": 0.25,
375
+ "learning_rate": 7.032574568288854e-05,
376
+ "loss": 2.9401,
377
+ "step": 540
378
+ },
379
+ {
380
+ "epoch": 0.25,
381
+ "learning_rate": 7.021978021978021e-05,
382
+ "loss": 3.1443,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 0.26,
387
+ "learning_rate": 7.010204081632653e-05,
388
+ "loss": 2.7692,
389
+ "step": 560
390
+ },
391
+ {
392
+ "epoch": 0.26,
393
+ "learning_rate": 6.998430141287284e-05,
394
+ "loss": 2.7574,
395
+ "step": 570
396
+ },
397
+ {
398
+ "epoch": 0.27,
399
+ "learning_rate": 6.986656200941914e-05,
400
+ "loss": 2.706,
401
+ "step": 580
402
+ },
403
+ {
404
+ "epoch": 0.27,
405
+ "learning_rate": 6.974882260596545e-05,
406
+ "loss": 2.7155,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 0.28,
411
+ "learning_rate": 6.964285714285713e-05,
412
+ "loss": 2.8747,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 0.28,
417
+ "eval_loss": 2.5006003379821777,
418
+ "eval_runtime": 129.3111,
419
+ "eval_samples_per_second": 35.728,
420
+ "eval_steps_per_second": 1.121,
421
+ "eval_wer": 1.0,
422
+ "step": 600
423
+ },
424
+ {
425
+ "epoch": 0.28,
426
+ "learning_rate": 6.952511773940345e-05,
427
+ "loss": 2.5368,
428
+ "step": 610
429
+ },
430
+ {
431
+ "epoch": 0.29,
432
+ "learning_rate": 6.940737833594976e-05,
433
+ "loss": 2.5942,
434
+ "step": 620
435
+ },
436
+ {
437
+ "epoch": 0.29,
438
+ "learning_rate": 6.928963893249607e-05,
439
+ "loss": 2.4907,
440
+ "step": 630
441
+ },
442
+ {
443
+ "epoch": 0.3,
444
+ "learning_rate": 6.917189952904238e-05,
445
+ "loss": 2.472,
446
+ "step": 640
447
+ },
448
+ {
449
+ "epoch": 0.3,
450
+ "learning_rate": 6.907770800627943e-05,
451
+ "loss": 2.6826,
452
+ "step": 650
453
+ },
454
+ {
455
+ "epoch": 0.3,
456
+ "learning_rate": 6.895996860282574e-05,
457
+ "loss": 2.3167,
458
+ "step": 660
459
+ },
460
+ {
461
+ "epoch": 0.31,
462
+ "learning_rate": 6.884222919937205e-05,
463
+ "loss": 2.3457,
464
+ "step": 670
465
+ },
466
+ {
467
+ "epoch": 0.31,
468
+ "learning_rate": 6.872448979591836e-05,
469
+ "loss": 2.3532,
470
+ "step": 680
471
+ },
472
+ {
473
+ "epoch": 0.32,
474
+ "learning_rate": 6.860675039246468e-05,
475
+ "loss": 2.4822,
476
+ "step": 690
477
+ },
478
+ {
479
+ "epoch": 0.32,
480
+ "learning_rate": 6.850078492935636e-05,
481
+ "loss": 3.1002,
482
+ "step": 700
483
+ },
484
+ {
485
+ "epoch": 0.32,
486
+ "eval_loss": 2.819200277328491,
487
+ "eval_runtime": 129.5044,
488
+ "eval_samples_per_second": 35.674,
489
+ "eval_steps_per_second": 1.12,
490
+ "eval_wer": 0.9999694058618369,
491
+ "step": 700
492
+ },
493
+ {
494
+ "epoch": 0.33,
495
+ "learning_rate": 6.838304552590267e-05,
496
+ "loss": 2.9139,
497
+ "step": 710
498
+ },
499
+ {
500
+ "epoch": 0.33,
501
+ "learning_rate": 6.826530612244897e-05,
502
+ "loss": 2.9957,
503
+ "step": 720
504
+ },
505
+ {
506
+ "epoch": 0.34,
507
+ "learning_rate": 6.814756671899528e-05,
508
+ "loss": 2.9842,
509
+ "step": 730
510
+ },
511
+ {
512
+ "epoch": 0.34,
513
+ "learning_rate": 6.802982731554159e-05,
514
+ "loss": 3.2804,
515
+ "step": 740
516
+ },
517
+ {
518
+ "epoch": 0.35,
519
+ "learning_rate": 6.792386185243327e-05,
520
+ "loss": 3.4944,
521
+ "step": 750
522
+ },
523
+ {
524
+ "epoch": 0.35,
525
+ "learning_rate": 6.780612244897958e-05,
526
+ "loss": 2.9905,
527
+ "step": 760
528
+ },
529
+ {
530
+ "epoch": 0.36,
531
+ "learning_rate": 6.768838304552589e-05,
532
+ "loss": 2.9692,
533
+ "step": 770
534
+ },
535
+ {
536
+ "epoch": 0.36,
537
+ "learning_rate": 6.757064364207221e-05,
538
+ "loss": 2.9747,
539
+ "step": 780
540
+ },
541
+ {
542
+ "epoch": 0.36,
543
+ "learning_rate": 6.745290423861852e-05,
544
+ "loss": 3.294,
545
+ "step": 790
546
+ },
547
+ {
548
+ "epoch": 0.37,
549
+ "learning_rate": 6.73469387755102e-05,
550
+ "loss": 3.5998,
551
+ "step": 800
552
+ },
553
+ {
554
+ "epoch": 0.37,
555
+ "eval_loss": 3.059178590774536,
556
+ "eval_runtime": 129.2337,
557
+ "eval_samples_per_second": 35.749,
558
+ "eval_steps_per_second": 1.122,
559
+ "eval_wer": 1.0,
560
+ "step": 800
561
  }
562
  ],
563
  "max_steps": 6501,
564
  "num_train_epochs": 3,
565
+ "total_flos": 1.5122477119825152e+19,
566
  "trial_name": null,
567
  "trial_params": null
568
  }
{checkpoint-500 β†’ checkpoint-800}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f82fd4ef9d1ee16be752146f67271d9079ee4c70b3fc0deb5e49afca834a8366
3
  size 1262067185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa43bdbed16b5fc10ebee537908dcf5e5f5e67381037463baf9a181a9926057d
3
  size 1262067185