{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 1035, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 8e-06, "loss": 11.0428, "step": 10 }, { "epoch": 0.29, "learning_rate": 1.8e-05, "loss": 11.5863, "step": 20 }, { "epoch": 0.43, "learning_rate": 2.7e-05, "loss": 10.2859, "step": 30 }, { "epoch": 0.58, "learning_rate": 3.7e-05, "loss": 7.7802, "step": 40 }, { "epoch": 0.72, "learning_rate": 4.7000000000000004e-05, "loss": 5.4079, "step": 50 }, { "epoch": 0.87, "learning_rate": 5.7e-05, "loss": 4.1036, "step": 60 }, { "epoch": 1.01, "learning_rate": 6.7e-05, "loss": 3.9029, "step": 70 }, { "epoch": 1.16, "learning_rate": 7.7e-05, "loss": 3.4366, "step": 80 }, { "epoch": 1.3, "learning_rate": 8.7e-05, "loss": 3.3931, "step": 90 }, { "epoch": 1.45, "learning_rate": 9.7e-05, "loss": 3.1944, "step": 100 }, { "epoch": 1.59, "learning_rate": 0.000107, "loss": 3.043, "step": 110 }, { "epoch": 1.74, "learning_rate": 0.00011700000000000001, "loss": 3.0573, "step": 120 }, { "epoch": 1.88, "learning_rate": 0.000127, "loss": 2.9868, "step": 130 }, { "epoch": 2.03, "learning_rate": 0.00013700000000000002, "loss": 2.9365, "step": 140 }, { "epoch": 2.17, "learning_rate": 0.000147, "loss": 2.9301, "step": 150 }, { "epoch": 2.32, "learning_rate": 0.000157, "loss": 2.9331, "step": 160 }, { "epoch": 2.46, "learning_rate": 0.00016700000000000002, "loss": 2.9344, "step": 170 }, { "epoch": 2.61, "learning_rate": 0.000177, "loss": 2.9127, "step": 180 }, { "epoch": 2.75, "learning_rate": 0.000187, "loss": 2.9461, "step": 190 }, { "epoch": 2.9, "learning_rate": 0.00019700000000000002, "loss": 2.9012, "step": 200 }, { "epoch": 3.04, "learning_rate": 0.000207, "loss": 2.8907, "step": 210 }, { "epoch": 3.19, "learning_rate": 0.00021700000000000002, "loss": 2.9403, "step": 220 }, { "epoch": 3.33, "learning_rate": 0.00022700000000000002, "loss": 2.8832, "step": 230 }, { "epoch": 3.48, "learning_rate": 0.000237, "loss": 2.9188, "step": 240 }, { "epoch": 3.62, "learning_rate": 0.000247, "loss": 2.861, "step": 250 }, { "epoch": 3.77, "learning_rate": 0.000257, "loss": 2.8522, "step": 260 }, { "epoch": 3.91, "learning_rate": 0.00026700000000000004, "loss": 2.7281, "step": 270 }, { "epoch": 4.06, "learning_rate": 0.000277, "loss": 2.5301, "step": 280 }, { "epoch": 4.2, "learning_rate": 0.000287, "loss": 2.2375, "step": 290 }, { "epoch": 4.35, "learning_rate": 0.000297, "loss": 1.7317, "step": 300 }, { "epoch": 4.49, "learning_rate": 0.000307, "loss": 1.1891, "step": 310 }, { "epoch": 4.64, "learning_rate": 0.000317, "loss": 0.9012, "step": 320 }, { "epoch": 4.78, "learning_rate": 0.00032700000000000003, "loss": 0.8128, "step": 330 }, { "epoch": 4.93, "learning_rate": 0.000337, "loss": 0.7676, "step": 340 }, { "epoch": 5.07, "learning_rate": 0.000347, "loss": 0.8468, "step": 350 }, { "epoch": 5.22, "learning_rate": 0.000357, "loss": 0.7011, "step": 360 }, { "epoch": 5.36, "learning_rate": 0.000367, "loss": 0.5223, "step": 370 }, { "epoch": 5.51, "learning_rate": 0.000377, "loss": 0.4903, "step": 380 }, { "epoch": 5.65, "learning_rate": 0.00038700000000000003, "loss": 0.4989, "step": 390 }, { "epoch": 5.8, "learning_rate": 0.00039700000000000005, "loss": 0.5719, "step": 400 }, { "epoch": 5.94, "learning_rate": 0.00040699999999999997, "loss": 0.4448, "step": 410 }, { "epoch": 6.09, "learning_rate": 0.000417, "loss": 0.4106, "step": 420 }, { "epoch": 6.23, "learning_rate": 0.000427, "loss": 0.3305, "step": 430 }, { "epoch": 6.38, "learning_rate": 0.000437, "loss": 0.3317, "step": 440 }, { "epoch": 6.52, "learning_rate": 0.000447, "loss": 0.3787, "step": 450 }, { "epoch": 6.67, "learning_rate": 0.00045700000000000005, "loss": 0.3884, "step": 460 }, { "epoch": 6.81, "learning_rate": 0.000467, "loss": 0.3162, "step": 470 }, { "epoch": 6.96, "learning_rate": 0.000477, "loss": 0.2973, "step": 480 }, { "epoch": 7.1, "learning_rate": 0.000487, "loss": 0.2614, "step": 490 }, { "epoch": 7.25, "learning_rate": 0.000497, "loss": 0.265, "step": 500 }, { "epoch": 7.25, "eval_loss": 0.48176705837249756, "eval_runtime": 597.1948, "eval_samples_per_second": 5.683, "eval_steps_per_second": 0.712, "eval_wer": 0.44399247261461594, "step": 500 }, { "epoch": 7.39, "learning_rate": 0.0004934579439252336, "loss": 0.2784, "step": 510 }, { "epoch": 7.54, "learning_rate": 0.00048411214953271025, "loss": 0.2536, "step": 520 }, { "epoch": 7.68, "learning_rate": 0.0004747663551401869, "loss": 0.2452, "step": 530 }, { "epoch": 7.83, "learning_rate": 0.00046542056074766355, "loss": 0.2731, "step": 540 }, { "epoch": 7.97, "learning_rate": 0.00045607476635514015, "loss": 0.2524, "step": 550 }, { "epoch": 8.12, "learning_rate": 0.00044672897196261686, "loss": 0.2179, "step": 560 }, { "epoch": 8.26, "learning_rate": 0.0004373831775700935, "loss": 0.1944, "step": 570 }, { "epoch": 8.41, "learning_rate": 0.0004280373831775701, "loss": 0.1866, "step": 580 }, { "epoch": 8.55, "learning_rate": 0.00041869158878504677, "loss": 0.179, "step": 590 }, { "epoch": 8.7, "learning_rate": 0.0004093457943925234, "loss": 0.1778, "step": 600 }, { "epoch": 8.84, "learning_rate": 0.0004, "loss": 0.1683, "step": 610 }, { "epoch": 8.99, "learning_rate": 0.00039065420560747667, "loss": 0.1796, "step": 620 }, { "epoch": 9.13, "learning_rate": 0.00038130841121495327, "loss": 0.17, "step": 630 }, { "epoch": 9.28, "learning_rate": 0.0003719626168224299, "loss": 0.1274, "step": 640 }, { "epoch": 9.42, "learning_rate": 0.0003626168224299066, "loss": 0.1498, "step": 650 }, { "epoch": 9.57, "learning_rate": 0.0003532710280373832, "loss": 0.1164, "step": 660 }, { "epoch": 9.71, "learning_rate": 0.00034392523364485983, "loss": 0.1308, "step": 670 }, { "epoch": 9.86, "learning_rate": 0.0003345794392523365, "loss": 0.1364, "step": 680 }, { "epoch": 10.0, "learning_rate": 0.0003252336448598131, "loss": 0.124, "step": 690 }, { "epoch": 10.14, "learning_rate": 0.00031588785046728974, "loss": 0.1068, "step": 700 }, { "epoch": 10.29, "learning_rate": 0.0003065420560747664, "loss": 0.1209, "step": 710 }, { "epoch": 10.43, "learning_rate": 0.000297196261682243, "loss": 0.1348, "step": 720 }, { "epoch": 10.58, "learning_rate": 0.00028785046728971965, "loss": 0.1014, "step": 730 }, { "epoch": 10.72, "learning_rate": 0.0002785046728971963, "loss": 0.0974, "step": 740 }, { "epoch": 10.87, "learning_rate": 0.0002691588785046729, "loss": 0.097, "step": 750 }, { "epoch": 11.01, "learning_rate": 0.00025981308411214955, "loss": 0.1105, "step": 760 }, { "epoch": 11.16, "learning_rate": 0.0002504672897196262, "loss": 0.0849, "step": 770 }, { "epoch": 11.3, "learning_rate": 0.0002411214953271028, "loss": 0.0777, "step": 780 }, { "epoch": 11.45, "learning_rate": 0.00023177570093457946, "loss": 0.0741, "step": 790 }, { "epoch": 11.59, "learning_rate": 0.00022242990654205608, "loss": 0.083, "step": 800 }, { "epoch": 11.74, "learning_rate": 0.0002130841121495327, "loss": 0.0989, "step": 810 }, { "epoch": 11.88, "learning_rate": 0.00020373831775700934, "loss": 0.0944, "step": 820 }, { "epoch": 12.03, "learning_rate": 0.000194392523364486, "loss": 0.0713, "step": 830 }, { "epoch": 12.17, "learning_rate": 0.00018504672897196262, "loss": 0.0823, "step": 840 }, { "epoch": 12.32, "learning_rate": 0.00017570093457943924, "loss": 0.0632, "step": 850 }, { "epoch": 12.46, "learning_rate": 0.0001663551401869159, "loss": 0.0671, "step": 860 }, { "epoch": 12.61, "learning_rate": 0.00015700934579439252, "loss": 0.0722, "step": 870 }, { "epoch": 12.75, "learning_rate": 0.00014766355140186915, "loss": 0.0657, "step": 880 }, { "epoch": 12.9, "learning_rate": 0.0001383177570093458, "loss": 0.0749, "step": 890 }, { "epoch": 13.04, "learning_rate": 0.00012897196261682243, "loss": 0.0727, "step": 900 }, { "epoch": 13.19, "learning_rate": 0.00011962616822429907, "loss": 0.0741, "step": 910 }, { "epoch": 13.33, "learning_rate": 0.00011028037383177571, "loss": 0.0511, "step": 920 }, { "epoch": 13.48, "learning_rate": 0.00010093457943925234, "loss": 0.0461, "step": 930 }, { "epoch": 13.62, "learning_rate": 9.158878504672898e-05, "loss": 0.0517, "step": 940 }, { "epoch": 13.77, "learning_rate": 8.224299065420562e-05, "loss": 0.0533, "step": 950 }, { "epoch": 13.91, "learning_rate": 7.289719626168224e-05, "loss": 0.0577, "step": 960 }, { "epoch": 14.06, "learning_rate": 6.355140186915888e-05, "loss": 0.0571, "step": 970 }, { "epoch": 14.2, "learning_rate": 5.420560747663551e-05, "loss": 0.048, "step": 980 }, { "epoch": 14.35, "learning_rate": 4.485981308411215e-05, "loss": 0.0542, "step": 990 }, { "epoch": 14.49, "learning_rate": 3.551401869158879e-05, "loss": 0.0522, "step": 1000 }, { "epoch": 14.49, "eval_loss": 0.39019492268562317, "eval_runtime": 581.0044, "eval_samples_per_second": 5.842, "eval_steps_per_second": 0.731, "eval_wer": 0.2725472310562171, "step": 1000 }, { "epoch": 14.64, "learning_rate": 2.6168224299065423e-05, "loss": 0.0524, "step": 1010 }, { "epoch": 14.78, "learning_rate": 1.6822429906542056e-05, "loss": 0.0501, "step": 1020 }, { "epoch": 14.93, "learning_rate": 7.476635514018692e-06, "loss": 0.0425, "step": 1030 }, { "epoch": 15.0, "step": 1035, "total_flos": 1.5036330849795359e+19, "train_loss": 1.3250362157965627, "train_runtime": 8304.6494, "train_samples_per_second": 3.963, "train_steps_per_second": 0.125 } ], "max_steps": 1035, "num_train_epochs": 15, "total_flos": 1.5036330849795359e+19, "trial_name": null, "trial_params": null }