|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 1035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8e-06, |
|
"loss": 11.3437, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 12.2411, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.7e-05, |
|
"loss": 11.8912, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.7e-05, |
|
"loss": 10.4142, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.7000000000000004e-05, |
|
"loss": 7.7542, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.7e-05, |
|
"loss": 4.6198, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.7e-05, |
|
"loss": 3.8278, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.7e-05, |
|
"loss": 3.2769, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.7e-05, |
|
"loss": 3.2017, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.7e-05, |
|
"loss": 3.0602, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.000107, |
|
"loss": 2.963, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011700000000000001, |
|
"loss": 3.0003, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.000127, |
|
"loss": 2.9574, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00013700000000000002, |
|
"loss": 2.9113, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.000147, |
|
"loss": 2.9198, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.000157, |
|
"loss": 2.9141, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00016700000000000002, |
|
"loss": 2.9118, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.000177, |
|
"loss": 2.8995, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.000187, |
|
"loss": 2.9353, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00019700000000000002, |
|
"loss": 2.8971, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.000207, |
|
"loss": 2.8813, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00021700000000000002, |
|
"loss": 2.903, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00022700000000000002, |
|
"loss": 2.8678, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000237, |
|
"loss": 2.9094, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.000247, |
|
"loss": 2.887, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.000257, |
|
"loss": 2.9137, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00026700000000000004, |
|
"loss": 2.8704, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.000277, |
|
"loss": 2.67, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.000287, |
|
"loss": 2.3059, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.000297, |
|
"loss": 1.834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.000307, |
|
"loss": 1.0162, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.000317, |
|
"loss": 0.6458, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00032700000000000003, |
|
"loss": 0.5226, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.000337, |
|
"loss": 0.5121, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.000347, |
|
"loss": 0.5253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.000357, |
|
"loss": 0.3705, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.000367, |
|
"loss": 0.296, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.000377, |
|
"loss": 0.29, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00038700000000000003, |
|
"loss": 0.289, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00039700000000000005, |
|
"loss": 0.2989, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00040699999999999997, |
|
"loss": 0.2531, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.000417, |
|
"loss": 0.2184, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.000427, |
|
"loss": 0.1819, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.000437, |
|
"loss": 0.1702, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.000447, |
|
"loss": 0.189, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00045700000000000005, |
|
"loss": 0.2173, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.000467, |
|
"loss": 0.1703, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.000477, |
|
"loss": 0.2397, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.000487, |
|
"loss": 0.1703, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.000497, |
|
"loss": 0.1612, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 0.3108726739883423, |
|
"eval_runtime": 608.0436, |
|
"eval_samples_per_second": 5.582, |
|
"eval_steps_per_second": 0.699, |
|
"eval_wer": 0.2548134999876735, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0004934579439252336, |
|
"loss": 0.1538, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00048411214953271025, |
|
"loss": 0.1432, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0004747663551401869, |
|
"loss": 0.1354, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00046542056074766355, |
|
"loss": 0.1766, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00045607476635514015, |
|
"loss": 0.1654, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00044672897196261686, |
|
"loss": 0.1267, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0004373831775700935, |
|
"loss": 0.1313, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0004280373831775701, |
|
"loss": 0.1213, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.00041869158878504677, |
|
"loss": 0.1221, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0004093457943925234, |
|
"loss": 0.119, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0004, |
|
"loss": 0.1085, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00039065420560747667, |
|
"loss": 0.1185, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.00038130841121495327, |
|
"loss": 0.1115, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0003719626168224299, |
|
"loss": 0.0799, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0003626168224299066, |
|
"loss": 0.1101, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0003532710280373832, |
|
"loss": 0.0795, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00034392523364485983, |
|
"loss": 0.1179, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0003345794392523365, |
|
"loss": 0.1134, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0003252336448598131, |
|
"loss": 0.0883, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00031588785046728974, |
|
"loss": 0.0795, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.0003065420560747664, |
|
"loss": 0.1018, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.000297196261682243, |
|
"loss": 0.1054, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.00028785046728971965, |
|
"loss": 0.0795, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0002785046728971963, |
|
"loss": 0.0796, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0002691588785046729, |
|
"loss": 0.0668, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.00025981308411214955, |
|
"loss": 0.0732, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 0.0002504672897196262, |
|
"loss": 0.061, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.0002411214953271028, |
|
"loss": 0.0578, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.00023177570093457946, |
|
"loss": 0.0555, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00022242990654205608, |
|
"loss": 0.0588, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 0.0002130841121495327, |
|
"loss": 0.0699, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.00020373831775700934, |
|
"loss": 0.0676, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.000194392523364486, |
|
"loss": 0.0555, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 0.00018504672897196262, |
|
"loss": 0.0612, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00017570093457943924, |
|
"loss": 0.0393, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0001663551401869159, |
|
"loss": 0.0508, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.00015700934579439252, |
|
"loss": 0.0494, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.00014766355140186915, |
|
"loss": 0.0445, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0001383177570093458, |
|
"loss": 0.0538, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00012897196261682243, |
|
"loss": 0.0509, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00011962616822429907, |
|
"loss": 0.0552, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00011028037383177571, |
|
"loss": 0.0363, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 0.00010093457943925234, |
|
"loss": 0.0332, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 9.158878504672898e-05, |
|
"loss": 0.0394, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 8.224299065420562e-05, |
|
"loss": 0.0385, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 7.289719626168224e-05, |
|
"loss": 0.042, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 6.355140186915888e-05, |
|
"loss": 0.0427, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 5.420560747663551e-05, |
|
"loss": 0.0352, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.485981308411215e-05, |
|
"loss": 0.0448, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.551401869158879e-05, |
|
"loss": 0.0433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_loss": 0.2945314943790436, |
|
"eval_runtime": 593.7298, |
|
"eval_samples_per_second": 5.716, |
|
"eval_steps_per_second": 0.716, |
|
"eval_wer": 0.18705881386156514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 2.6168224299065423e-05, |
|
"loss": 0.0409, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 1.6822429906542056e-05, |
|
"loss": 0.0354, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 7.476635514018692e-06, |
|
"loss": 0.0321, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 1035, |
|
"total_flos": 1.5036330849795359e+19, |
|
"train_loss": 1.3424202077342693, |
|
"train_runtime": 8416.5784, |
|
"train_samples_per_second": 3.91, |
|
"train_steps_per_second": 0.123 |
|
} |
|
], |
|
"max_steps": 1035, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.5036330849795359e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|