{ "best_metric": null, "best_model_checkpoint": null, "epoch": 150.0, "global_step": 3450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.43, "learning_rate": 4.2e-06, "loss": 20.1322, "step": 10 }, { "epoch": 0.87, "learning_rate": 1.02e-05, "loss": 20.2807, "step": 20 }, { "epoch": 1.3, "learning_rate": 1.5599999999999996e-05, "loss": 19.9863, "step": 30 }, { "epoch": 1.74, "learning_rate": 2.1599999999999996e-05, "loss": 16.998, "step": 40 }, { "epoch": 2.17, "learning_rate": 2.7599999999999997e-05, "loss": 11.753, "step": 50 }, { "epoch": 2.61, "learning_rate": 3.36e-05, "loss": 8.5319, "step": 60 }, { "epoch": 3.04, "learning_rate": 3.96e-05, "loss": 7.0837, "step": 70 }, { "epoch": 3.48, "learning_rate": 4.56e-05, "loss": 6.0396, "step": 80 }, { "epoch": 3.91, "learning_rate": 5.1599999999999994e-05, "loss": 5.5626, "step": 90 }, { "epoch": 4.35, "learning_rate": 5.76e-05, "loss": 5.1697, "step": 100 }, { "epoch": 4.78, "learning_rate": 6.359999999999999e-05, "loss": 4.7468, "step": 110 }, { "epoch": 5.22, "learning_rate": 6.96e-05, "loss": 4.6084, "step": 120 }, { "epoch": 5.65, "learning_rate": 7.56e-05, "loss": 4.0859, "step": 130 }, { "epoch": 6.09, "learning_rate": 8.16e-05, "loss": 3.9953, "step": 140 }, { "epoch": 6.52, "learning_rate": 8.759999999999999e-05, "loss": 3.7522, "step": 150 }, { "epoch": 6.96, "learning_rate": 9.36e-05, "loss": 3.5674, "step": 160 }, { "epoch": 7.39, "learning_rate": 9.96e-05, "loss": 3.4598, "step": 170 }, { "epoch": 7.83, "learning_rate": 0.00010559999999999998, "loss": 3.42, "step": 180 }, { "epoch": 8.26, "learning_rate": 0.00011159999999999999, "loss": 3.3579, "step": 190 }, { "epoch": 8.7, "learning_rate": 0.0001176, "loss": 3.3237, "step": 200 }, { "epoch": 9.13, "learning_rate": 0.0001236, "loss": 3.2857, "step": 210 }, { "epoch": 9.57, "learning_rate": 0.00012959999999999998, "loss": 3.2553, "step": 220 }, { "epoch": 10.0, "learning_rate": 0.0001356, "loss": 3.2545, "step": 230 }, { "epoch": 10.43, "learning_rate": 0.00014159999999999997, "loss": 3.2053, "step": 240 }, { "epoch": 10.87, "learning_rate": 0.00014759999999999998, "loss": 3.249, "step": 250 }, { "epoch": 11.3, "learning_rate": 0.0001536, "loss": 3.2334, "step": 260 }, { "epoch": 11.74, "learning_rate": 0.0001596, "loss": 3.2278, "step": 270 }, { "epoch": 12.17, "learning_rate": 0.0001656, "loss": 3.206, "step": 280 }, { "epoch": 12.61, "learning_rate": 0.00017159999999999997, "loss": 3.2164, "step": 290 }, { "epoch": 13.04, "learning_rate": 0.00017759999999999998, "loss": 3.2048, "step": 300 }, { "epoch": 13.48, "learning_rate": 0.0001836, "loss": 3.2024, "step": 310 }, { "epoch": 13.91, "learning_rate": 0.00018959999999999997, "loss": 3.2113, "step": 320 }, { "epoch": 14.35, "learning_rate": 0.00019559999999999998, "loss": 3.2023, "step": 330 }, { "epoch": 14.78, "learning_rate": 0.0002016, "loss": 3.2135, "step": 340 }, { "epoch": 15.22, "learning_rate": 0.00020759999999999998, "loss": 3.2014, "step": 350 }, { "epoch": 15.65, "learning_rate": 0.00021359999999999996, "loss": 3.1402, "step": 360 }, { "epoch": 16.09, "learning_rate": 0.00021959999999999997, "loss": 3.093, "step": 370 }, { "epoch": 16.52, "learning_rate": 0.00022559999999999998, "loss": 2.9955, "step": 380 }, { "epoch": 16.96, "learning_rate": 0.0002316, "loss": 2.8186, "step": 390 }, { "epoch": 17.39, "learning_rate": 0.0002376, "loss": 2.5288, "step": 400 }, { "epoch": 17.39, "eval_loss": 2.0544939041137695, "eval_runtime": 2.3101, "eval_samples_per_second": 96.532, "eval_steps_per_second": 3.03, "eval_wer": 0.6038511871377827, "step": 400 }, { "epoch": 17.83, "learning_rate": 0.00024359999999999999, "loss": 2.1609, "step": 410 }, { "epoch": 18.26, "learning_rate": 0.00024959999999999994, "loss": 1.8764, "step": 420 }, { "epoch": 18.7, "learning_rate": 0.0002556, "loss": 1.6154, "step": 430 }, { "epoch": 19.13, "learning_rate": 0.00026159999999999996, "loss": 1.4481, "step": 440 }, { "epoch": 19.57, "learning_rate": 0.0002676, "loss": 1.2572, "step": 450 }, { "epoch": 20.0, "learning_rate": 0.0002736, "loss": 1.2195, "step": 460 }, { "epoch": 20.43, "learning_rate": 0.00027959999999999997, "loss": 1.0625, "step": 470 }, { "epoch": 20.87, "learning_rate": 0.00028559999999999995, "loss": 1.0071, "step": 480 }, { "epoch": 21.3, "learning_rate": 0.0002916, "loss": 0.9312, "step": 490 }, { "epoch": 21.74, "learning_rate": 0.00029759999999999997, "loss": 0.8633, "step": 500 }, { "epoch": 22.17, "learning_rate": 0.00029052631578947366, "loss": 0.8454, "step": 510 }, { "epoch": 22.61, "learning_rate": 0.0002747368421052631, "loss": 0.7471, "step": 520 }, { "epoch": 23.04, "learning_rate": 0.0002589473684210526, "loss": 0.7783, "step": 530 }, { "epoch": 23.48, "learning_rate": 0.00024315789473684207, "loss": 0.64, "step": 540 }, { "epoch": 23.91, "learning_rate": 0.00022736842105263157, "loss": 0.6077, "step": 550 }, { "epoch": 24.35, "learning_rate": 0.00021157894736842102, "loss": 0.5887, "step": 560 }, { "epoch": 24.78, "learning_rate": 0.0001957894736842105, "loss": 0.5389, "step": 570 }, { "epoch": 25.22, "learning_rate": 0.00017999999999999998, "loss": 0.4779, "step": 580 }, { "epoch": 25.65, "learning_rate": 0.00016421052631578948, "loss": 0.4971, "step": 590 }, { "epoch": 26.09, "learning_rate": 0.00014842105263157893, "loss": 0.4876, "step": 600 }, { "epoch": 26.52, "learning_rate": 0.0001326315789473684, "loss": 0.4645, "step": 610 }, { "epoch": 26.96, "learning_rate": 0.00011684210526315788, "loss": 0.3915, "step": 620 }, { "epoch": 27.39, "learning_rate": 0.00010105263157894735, "loss": 0.3831, "step": 630 }, { "epoch": 27.83, "learning_rate": 8.526315789473684e-05, "loss": 0.3738, "step": 640 }, { "epoch": 28.26, "learning_rate": 6.947368421052631e-05, "loss": 0.3274, "step": 650 }, { "epoch": 28.7, "learning_rate": 5.3684210526315784e-05, "loss": 0.3508, "step": 660 }, { "epoch": 29.13, "learning_rate": 3.789473684210526e-05, "loss": 0.3293, "step": 670 }, { "epoch": 29.57, "learning_rate": 2.2105263157894733e-05, "loss": 0.3146, "step": 680 }, { "epoch": 30.0, "learning_rate": 6.3157894736842095e-06, "loss": 0.2893, "step": 690 }, { "epoch": 30.0, "step": 690, "total_flos": 3.7988686760446525e+18, "train_loss": 3.560509018276049, "train_runtime": 533.4671, "train_samples_per_second": 40.49, "train_steps_per_second": 1.293 }, { "epoch": 30.43, "learning_rate": 5.999999999999999e-06, "loss": 0.3166, "step": 700 }, { "epoch": 30.87, "learning_rate": 1.1999999999999999e-05, "loss": 0.3066, "step": 710 }, { "epoch": 31.3, "learning_rate": 1.7999999999999997e-05, "loss": 0.2917, "step": 720 }, { "epoch": 31.74, "learning_rate": 2.3999999999999997e-05, "loss": 0.2866, "step": 730 }, { "epoch": 32.17, "learning_rate": 2.9999999999999997e-05, "loss": 0.2835, "step": 740 }, { "epoch": 32.61, "learning_rate": 3.5999999999999994e-05, "loss": 0.2765, "step": 750 }, { "epoch": 33.04, "learning_rate": 4.2e-05, "loss": 0.2951, "step": 760 }, { "epoch": 33.48, "learning_rate": 4.7999999999999994e-05, "loss": 0.2685, "step": 770 }, { "epoch": 33.91, "learning_rate": 5.399999999999999e-05, "loss": 0.3011, "step": 780 }, { "epoch": 34.35, "learning_rate": 5.94e-05, "loss": 0.273, "step": 790 }, { "epoch": 34.78, "learning_rate": 6.479999999999999e-05, "loss": 0.2616, "step": 800 }, { "epoch": 34.78, "eval_loss": 0.4040215313434601, "eval_runtime": 2.307, "eval_samples_per_second": 96.662, "eval_steps_per_second": 3.034, "eval_wer": 0.12422882781828379, "step": 800 }, { "epoch": 35.22, "learning_rate": 7.079999999999999e-05, "loss": 0.2468, "step": 810 }, { "epoch": 35.65, "learning_rate": 7.68e-05, "loss": 0.2571, "step": 820 }, { "epoch": 36.09, "learning_rate": 8.28e-05, "loss": 0.2412, "step": 830 }, { "epoch": 36.52, "learning_rate": 8.879999999999999e-05, "loss": 0.2392, "step": 840 }, { "epoch": 36.96, "learning_rate": 9.479999999999999e-05, "loss": 0.2594, "step": 850 }, { "epoch": 37.39, "learning_rate": 0.0001008, "loss": 0.2413, "step": 860 }, { "epoch": 37.83, "learning_rate": 0.00010679999999999998, "loss": 0.2502, "step": 870 }, { "epoch": 38.26, "learning_rate": 0.00011279999999999999, "loss": 0.248, "step": 880 }, { "epoch": 38.7, "learning_rate": 0.0001188, "loss": 0.2272, "step": 890 }, { "epoch": 39.13, "learning_rate": 0.00012479999999999997, "loss": 0.2359, "step": 900 }, { "epoch": 39.57, "learning_rate": 0.00013079999999999998, "loss": 0.2299, "step": 910 }, { "epoch": 40.0, "learning_rate": 0.0001368, "loss": 0.2321, "step": 920 }, { "epoch": 40.43, "learning_rate": 0.00014279999999999997, "loss": 0.2385, "step": 930 }, { "epoch": 40.87, "learning_rate": 0.00014879999999999998, "loss": 0.2183, "step": 940 }, { "epoch": 41.3, "learning_rate": 0.0001548, "loss": 0.2318, "step": 950 }, { "epoch": 41.74, "learning_rate": 0.0001608, "loss": 0.241, "step": 960 }, { "epoch": 42.17, "learning_rate": 0.0001668, "loss": 0.2314, "step": 970 }, { "epoch": 42.61, "learning_rate": 0.00017279999999999997, "loss": 0.2195, "step": 980 }, { "epoch": 43.04, "learning_rate": 0.00017879999999999998, "loss": 0.232, "step": 990 }, { "epoch": 43.48, "learning_rate": 0.0001848, "loss": 0.196, "step": 1000 }, { "epoch": 43.91, "learning_rate": 0.00019079999999999998, "loss": 0.2384, "step": 1010 }, { "epoch": 44.35, "learning_rate": 0.00019679999999999999, "loss": 0.2293, "step": 1020 }, { "epoch": 44.78, "learning_rate": 0.0002028, "loss": 0.2241, "step": 1030 }, { "epoch": 45.22, "learning_rate": 0.00020879999999999998, "loss": 0.1895, "step": 1040 }, { "epoch": 45.65, "learning_rate": 0.00021479999999999996, "loss": 0.2248, "step": 1050 }, { "epoch": 46.09, "learning_rate": 0.00022079999999999997, "loss": 0.2288, "step": 1060 }, { "epoch": 46.52, "learning_rate": 0.00022679999999999998, "loss": 0.2115, "step": 1070 }, { "epoch": 46.96, "learning_rate": 0.0002328, "loss": 0.2041, "step": 1080 }, { "epoch": 47.39, "learning_rate": 0.0002388, "loss": 0.2281, "step": 1090 }, { "epoch": 47.83, "learning_rate": 0.0002448, "loss": 0.2306, "step": 1100 }, { "epoch": 48.26, "learning_rate": 0.00025079999999999997, "loss": 0.2039, "step": 1110 }, { "epoch": 48.7, "learning_rate": 0.00025679999999999995, "loss": 0.2167, "step": 1120 }, { "epoch": 49.13, "learning_rate": 0.0002628, "loss": 0.2339, "step": 1130 }, { "epoch": 49.57, "learning_rate": 0.0002688, "loss": 0.2366, "step": 1140 }, { "epoch": 50.0, "learning_rate": 0.0002748, "loss": 0.1972, "step": 1150 }, { "epoch": 50.0, "step": 1150, "total_flos": 6.332515423493292e+18, "train_loss": 0.09717807769775391, "train_runtime": 228.6677, "train_samples_per_second": 157.434, "train_steps_per_second": 5.029 }, { "epoch": 52.17, "learning_rate": 3.675e-06, "loss": 1.0586, "step": 1200 }, { "epoch": 56.52, "learning_rate": 1.1099999999999999e-05, "loss": 0.9511, "step": 1300 }, { "epoch": 60.87, "learning_rate": 1.8599999999999998e-05, "loss": 0.8827, "step": 1400 }, { "epoch": 65.22, "learning_rate": 2.6024999999999996e-05, "loss": 0.8123, "step": 1500 }, { "epoch": 65.22, "eval_loss": 0.38979411125183105, "eval_runtime": 2.5496, "eval_samples_per_second": 87.465, "eval_steps_per_second": 2.746, "eval_wer": 0.11226397457468686, "step": 1500 }, { "epoch": 69.57, "learning_rate": 3.3524999999999995e-05, "loss": 0.7657, "step": 1600 }, { "epoch": 73.91, "learning_rate": 4.1025e-05, "loss": 0.7197, "step": 1700 }, { "epoch": 78.26, "learning_rate": 4.845e-05, "loss": 0.6831, "step": 1800 }, { "epoch": 82.61, "learning_rate": 5.595e-05, "loss": 0.6424, "step": 1900 }, { "epoch": 86.96, "learning_rate": 6.345e-05, "loss": 0.6314, "step": 2000 }, { "epoch": 86.96, "eval_loss": 0.3785865306854248, "eval_runtime": 2.183, "eval_samples_per_second": 102.155, "eval_steps_per_second": 3.207, "eval_wer": 0.10431856421761077, "step": 2000 }, { "epoch": 91.3, "learning_rate": 7.094999999999999e-05, "loss": 0.5952, "step": 2100 }, { "epoch": 95.65, "learning_rate": 7.359183673469387e-05, "loss": 0.5769, "step": 2200 }, { "epoch": 100.0, "learning_rate": 7.053061224489794e-05, "loss": 0.5631, "step": 2300 }, { "epoch": 104.35, "learning_rate": 6.746938775510203e-05, "loss": 0.5318, "step": 2400 }, { "epoch": 108.7, "learning_rate": 6.440816326530611e-05, "loss": 0.5046, "step": 2500 }, { "epoch": 108.7, "eval_loss": 0.38604071736335754, "eval_runtime": 2.1232, "eval_samples_per_second": 105.03, "eval_steps_per_second": 3.297, "eval_wer": 0.10515984296130118, "step": 2500 }, { "epoch": 113.04, "learning_rate": 6.13469387755102e-05, "loss": 0.5035, "step": 2600 }, { "epoch": 117.39, "learning_rate": 5.828571428571428e-05, "loss": 0.4646, "step": 2700 }, { "epoch": 121.74, "learning_rate": 5.525510204081632e-05, "loss": 0.4552, "step": 2800 }, { "epoch": 126.09, "learning_rate": 5.21938775510204e-05, "loss": 0.4378, "step": 2900 }, { "epoch": 130.43, "learning_rate": 4.9132653061224486e-05, "loss": 0.4264, "step": 3000 }, { "epoch": 130.43, "eval_loss": 0.37559226155281067, "eval_runtime": 2.1713, "eval_samples_per_second": 102.705, "eval_steps_per_second": 3.224, "eval_wer": 0.10001869508319312, "step": 3000 }, { "epoch": 134.78, "learning_rate": 4.607142857142857e-05, "loss": 0.4114, "step": 3100 }, { "epoch": 139.13, "learning_rate": 4.3010204081632646e-05, "loss": 0.3945, "step": 3200 }, { "epoch": 143.48, "learning_rate": 3.994897959183673e-05, "loss": 0.3842, "step": 3300 }, { "epoch": 147.83, "learning_rate": 3.688775510204081e-05, "loss": 0.3757, "step": 3400 }, { "epoch": 150.0, "step": 3450, "total_flos": 1.8994436456459534e+19, "train_loss": 0.38979322350543477, "train_runtime": 1100.4507, "train_samples_per_second": 98.142, "train_steps_per_second": 3.135 } ], "max_steps": 3450, "num_train_epochs": 150, "total_flos": 1.8994436456459534e+19, "trial_name": null, "trial_params": null }