{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 1035, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 8e-06, "loss": 11.3437, "step": 10 }, { "epoch": 0.29, "learning_rate": 1.7000000000000003e-05, "loss": 12.2411, "step": 20 }, { "epoch": 0.43, "learning_rate": 2.7e-05, "loss": 11.8912, "step": 30 }, { "epoch": 0.58, "learning_rate": 3.7e-05, "loss": 10.4142, "step": 40 }, { "epoch": 0.72, "learning_rate": 4.7000000000000004e-05, "loss": 7.7542, "step": 50 }, { "epoch": 0.87, "learning_rate": 5.7e-05, "loss": 4.6198, "step": 60 }, { "epoch": 1.01, "learning_rate": 6.7e-05, "loss": 3.8278, "step": 70 }, { "epoch": 1.16, "learning_rate": 7.7e-05, "loss": 3.2769, "step": 80 }, { "epoch": 1.3, "learning_rate": 8.7e-05, "loss": 3.2017, "step": 90 }, { "epoch": 1.45, "learning_rate": 9.7e-05, "loss": 3.0602, "step": 100 }, { "epoch": 1.59, "learning_rate": 0.000107, "loss": 2.963, "step": 110 }, { "epoch": 1.74, "learning_rate": 0.00011700000000000001, "loss": 3.0003, "step": 120 }, { "epoch": 1.88, "learning_rate": 0.000127, "loss": 2.9574, "step": 130 }, { "epoch": 2.03, "learning_rate": 0.00013700000000000002, "loss": 2.9113, "step": 140 }, { "epoch": 2.17, "learning_rate": 0.000147, "loss": 2.9198, "step": 150 }, { "epoch": 2.32, "learning_rate": 0.000157, "loss": 2.9141, "step": 160 }, { "epoch": 2.46, "learning_rate": 0.00016700000000000002, "loss": 2.9118, "step": 170 }, { "epoch": 2.61, "learning_rate": 0.000177, "loss": 2.8995, "step": 180 }, { "epoch": 2.75, "learning_rate": 0.000187, "loss": 2.9353, "step": 190 }, { "epoch": 2.9, "learning_rate": 0.00019700000000000002, "loss": 2.8971, "step": 200 }, { "epoch": 3.04, "learning_rate": 0.000207, "loss": 2.8813, "step": 210 }, { "epoch": 3.19, "learning_rate": 0.00021700000000000002, "loss": 2.903, "step": 220 }, { "epoch": 3.33, "learning_rate": 0.00022700000000000002, "loss": 2.8678, "step": 230 }, { "epoch": 3.48, "learning_rate": 0.000237, "loss": 2.9094, "step": 240 }, { "epoch": 3.62, "learning_rate": 0.000247, "loss": 2.887, "step": 250 }, { "epoch": 3.77, "learning_rate": 0.000257, "loss": 2.9137, "step": 260 }, { "epoch": 3.91, "learning_rate": 0.00026700000000000004, "loss": 2.8704, "step": 270 }, { "epoch": 4.06, "learning_rate": 0.000277, "loss": 2.67, "step": 280 }, { "epoch": 4.2, "learning_rate": 0.000287, "loss": 2.3059, "step": 290 }, { "epoch": 4.35, "learning_rate": 0.000297, "loss": 1.834, "step": 300 }, { "epoch": 4.49, "learning_rate": 0.000307, "loss": 1.0162, "step": 310 }, { "epoch": 4.64, "learning_rate": 0.000317, "loss": 0.6458, "step": 320 }, { "epoch": 4.78, "learning_rate": 0.00032700000000000003, "loss": 0.5226, "step": 330 }, { "epoch": 4.93, "learning_rate": 0.000337, "loss": 0.5121, "step": 340 }, { "epoch": 5.07, "learning_rate": 0.000347, "loss": 0.5253, "step": 350 }, { "epoch": 5.22, "learning_rate": 0.000357, "loss": 0.3705, "step": 360 }, { "epoch": 5.36, "learning_rate": 0.000367, "loss": 0.296, "step": 370 }, { "epoch": 5.51, "learning_rate": 0.000377, "loss": 0.29, "step": 380 }, { "epoch": 5.65, "learning_rate": 0.00038700000000000003, "loss": 0.289, "step": 390 }, { "epoch": 5.8, "learning_rate": 0.00039700000000000005, "loss": 0.2989, "step": 400 }, { "epoch": 5.94, "learning_rate": 0.00040699999999999997, "loss": 0.2531, "step": 410 }, { "epoch": 6.09, "learning_rate": 0.000417, "loss": 0.2184, "step": 420 }, { "epoch": 6.23, "learning_rate": 0.000427, "loss": 0.1819, "step": 430 }, { "epoch": 6.38, "learning_rate": 0.000437, "loss": 0.1702, "step": 440 }, { "epoch": 6.52, "learning_rate": 0.000447, "loss": 0.189, "step": 450 }, { "epoch": 6.67, "learning_rate": 0.00045700000000000005, "loss": 0.2173, "step": 460 }, { "epoch": 6.81, "learning_rate": 0.000467, "loss": 0.1703, "step": 470 }, { "epoch": 6.96, "learning_rate": 0.000477, "loss": 0.2397, "step": 480 }, { "epoch": 7.1, "learning_rate": 0.000487, "loss": 0.1703, "step": 490 }, { "epoch": 7.25, "learning_rate": 0.000497, "loss": 0.1612, "step": 500 }, { "epoch": 7.25, "eval_loss": 0.3108726739883423, "eval_runtime": 608.0436, "eval_samples_per_second": 5.582, "eval_steps_per_second": 0.699, "eval_wer": 0.2548134999876735, "step": 500 }, { "epoch": 7.39, "learning_rate": 0.0004934579439252336, "loss": 0.1538, "step": 510 }, { "epoch": 7.54, "learning_rate": 0.00048411214953271025, "loss": 0.1432, "step": 520 }, { "epoch": 7.68, "learning_rate": 0.0004747663551401869, "loss": 0.1354, "step": 530 }, { "epoch": 7.83, "learning_rate": 0.00046542056074766355, "loss": 0.1766, "step": 540 }, { "epoch": 7.97, "learning_rate": 0.00045607476635514015, "loss": 0.1654, "step": 550 }, { "epoch": 8.12, "learning_rate": 0.00044672897196261686, "loss": 0.1267, "step": 560 }, { "epoch": 8.26, "learning_rate": 0.0004373831775700935, "loss": 0.1313, "step": 570 }, { "epoch": 8.41, "learning_rate": 0.0004280373831775701, "loss": 0.1213, "step": 580 }, { "epoch": 8.55, "learning_rate": 0.00041869158878504677, "loss": 0.1221, "step": 590 }, { "epoch": 8.7, "learning_rate": 0.0004093457943925234, "loss": 0.119, "step": 600 }, { "epoch": 8.84, "learning_rate": 0.0004, "loss": 0.1085, "step": 610 }, { "epoch": 8.99, "learning_rate": 0.00039065420560747667, "loss": 0.1185, "step": 620 }, { "epoch": 9.13, "learning_rate": 0.00038130841121495327, "loss": 0.1115, "step": 630 }, { "epoch": 9.28, "learning_rate": 0.0003719626168224299, "loss": 0.0799, "step": 640 }, { "epoch": 9.42, "learning_rate": 0.0003626168224299066, "loss": 0.1101, "step": 650 }, { "epoch": 9.57, "learning_rate": 0.0003532710280373832, "loss": 0.0795, "step": 660 }, { "epoch": 9.71, "learning_rate": 0.00034392523364485983, "loss": 0.1179, "step": 670 }, { "epoch": 9.86, "learning_rate": 0.0003345794392523365, "loss": 0.1134, "step": 680 }, { "epoch": 10.0, "learning_rate": 0.0003252336448598131, "loss": 0.0883, "step": 690 }, { "epoch": 10.14, "learning_rate": 0.00031588785046728974, "loss": 0.0795, "step": 700 }, { "epoch": 10.29, "learning_rate": 0.0003065420560747664, "loss": 0.1018, "step": 710 }, { "epoch": 10.43, "learning_rate": 0.000297196261682243, "loss": 0.1054, "step": 720 }, { "epoch": 10.58, "learning_rate": 0.00028785046728971965, "loss": 0.0795, "step": 730 }, { "epoch": 10.72, "learning_rate": 0.0002785046728971963, "loss": 0.0796, "step": 740 }, { "epoch": 10.87, "learning_rate": 0.0002691588785046729, "loss": 0.0668, "step": 750 }, { "epoch": 11.01, "learning_rate": 0.00025981308411214955, "loss": 0.0732, "step": 760 }, { "epoch": 11.16, "learning_rate": 0.0002504672897196262, "loss": 0.061, "step": 770 }, { "epoch": 11.3, "learning_rate": 0.0002411214953271028, "loss": 0.0578, "step": 780 }, { "epoch": 11.45, "learning_rate": 0.00023177570093457946, "loss": 0.0555, "step": 790 }, { "epoch": 11.59, "learning_rate": 0.00022242990654205608, "loss": 0.0588, "step": 800 }, { "epoch": 11.74, "learning_rate": 0.0002130841121495327, "loss": 0.0699, "step": 810 }, { "epoch": 11.88, "learning_rate": 0.00020373831775700934, "loss": 0.0676, "step": 820 }, { "epoch": 12.03, "learning_rate": 0.000194392523364486, "loss": 0.0555, "step": 830 }, { "epoch": 12.17, "learning_rate": 0.00018504672897196262, "loss": 0.0612, "step": 840 }, { "epoch": 12.32, "learning_rate": 0.00017570093457943924, "loss": 0.0393, "step": 850 }, { "epoch": 12.46, "learning_rate": 0.0001663551401869159, "loss": 0.0508, "step": 860 }, { "epoch": 12.61, "learning_rate": 0.00015700934579439252, "loss": 0.0494, "step": 870 }, { "epoch": 12.75, "learning_rate": 0.00014766355140186915, "loss": 0.0445, "step": 880 }, { "epoch": 12.9, "learning_rate": 0.0001383177570093458, "loss": 0.0538, "step": 890 }, { "epoch": 13.04, "learning_rate": 0.00012897196261682243, "loss": 0.0509, "step": 900 }, { "epoch": 13.19, "learning_rate": 0.00011962616822429907, "loss": 0.0552, "step": 910 }, { "epoch": 13.33, "learning_rate": 0.00011028037383177571, "loss": 0.0363, "step": 920 }, { "epoch": 13.48, "learning_rate": 0.00010093457943925234, "loss": 0.0332, "step": 930 }, { "epoch": 13.62, "learning_rate": 9.158878504672898e-05, "loss": 0.0394, "step": 940 }, { "epoch": 13.77, "learning_rate": 8.224299065420562e-05, "loss": 0.0385, "step": 950 }, { "epoch": 13.91, "learning_rate": 7.289719626168224e-05, "loss": 0.042, "step": 960 }, { "epoch": 14.06, "learning_rate": 6.355140186915888e-05, "loss": 0.0427, "step": 970 }, { "epoch": 14.2, "learning_rate": 5.420560747663551e-05, "loss": 0.0352, "step": 980 }, { "epoch": 14.35, "learning_rate": 4.485981308411215e-05, "loss": 0.0448, "step": 990 }, { "epoch": 14.49, "learning_rate": 3.551401869158879e-05, "loss": 0.0433, "step": 1000 }, { "epoch": 14.49, "eval_loss": 0.2945314943790436, "eval_runtime": 593.7298, "eval_samples_per_second": 5.716, "eval_steps_per_second": 0.716, "eval_wer": 0.18705881386156514, "step": 1000 }, { "epoch": 14.64, "learning_rate": 2.6168224299065423e-05, "loss": 0.0409, "step": 1010 }, { "epoch": 14.78, "learning_rate": 1.6822429906542056e-05, "loss": 0.0354, "step": 1020 }, { "epoch": 14.93, "learning_rate": 7.476635514018692e-06, "loss": 0.0321, "step": 1030 }, { "epoch": 15.0, "step": 1035, "total_flos": 1.5036330849795359e+19, "train_loss": 1.3424202077342693, "train_runtime": 8416.5784, "train_samples_per_second": 3.91, "train_steps_per_second": 0.123 } ], "max_steps": 1035, "num_train_epochs": 15, "total_flos": 1.5036330849795359e+19, "trial_name": null, "trial_params": null }