|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 1035, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8e-06, |
|
"loss": 11.0428, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8e-05, |
|
"loss": 11.5863, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.7e-05, |
|
"loss": 10.2859, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.7e-05, |
|
"loss": 7.7802, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.7000000000000004e-05, |
|
"loss": 5.4079, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.7e-05, |
|
"loss": 4.1036, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.7e-05, |
|
"loss": 3.9029, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.7e-05, |
|
"loss": 3.4366, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.7e-05, |
|
"loss": 3.3931, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.7e-05, |
|
"loss": 3.1944, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.000107, |
|
"loss": 3.043, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011700000000000001, |
|
"loss": 3.0573, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.000127, |
|
"loss": 2.9868, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00013700000000000002, |
|
"loss": 2.9365, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.000147, |
|
"loss": 2.9301, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.000157, |
|
"loss": 2.9331, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00016700000000000002, |
|
"loss": 2.9344, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.000177, |
|
"loss": 2.9127, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.000187, |
|
"loss": 2.9461, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00019700000000000002, |
|
"loss": 2.9012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.000207, |
|
"loss": 2.8907, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00021700000000000002, |
|
"loss": 2.9403, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00022700000000000002, |
|
"loss": 2.8832, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000237, |
|
"loss": 2.9188, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.000247, |
|
"loss": 2.861, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.000257, |
|
"loss": 2.8522, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00026700000000000004, |
|
"loss": 2.7281, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.000277, |
|
"loss": 2.5301, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.000287, |
|
"loss": 2.2375, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.000297, |
|
"loss": 1.7317, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.000307, |
|
"loss": 1.1891, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.000317, |
|
"loss": 0.9012, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00032700000000000003, |
|
"loss": 0.8128, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.000337, |
|
"loss": 0.7676, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.000347, |
|
"loss": 0.8468, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.000357, |
|
"loss": 0.7011, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.000367, |
|
"loss": 0.5223, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.000377, |
|
"loss": 0.4903, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00038700000000000003, |
|
"loss": 0.4989, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00039700000000000005, |
|
"loss": 0.5719, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00040699999999999997, |
|
"loss": 0.4448, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.000417, |
|
"loss": 0.4106, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.000427, |
|
"loss": 0.3305, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.000437, |
|
"loss": 0.3317, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.000447, |
|
"loss": 0.3787, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00045700000000000005, |
|
"loss": 0.3884, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.000467, |
|
"loss": 0.3162, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.000477, |
|
"loss": 0.2973, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.000487, |
|
"loss": 0.2614, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.000497, |
|
"loss": 0.265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 0.48176705837249756, |
|
"eval_runtime": 597.1948, |
|
"eval_samples_per_second": 5.683, |
|
"eval_steps_per_second": 0.712, |
|
"eval_wer": 0.44399247261461594, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0004934579439252336, |
|
"loss": 0.2784, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00048411214953271025, |
|
"loss": 0.2536, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0004747663551401869, |
|
"loss": 0.2452, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00046542056074766355, |
|
"loss": 0.2731, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00045607476635514015, |
|
"loss": 0.2524, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00044672897196261686, |
|
"loss": 0.2179, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0004373831775700935, |
|
"loss": 0.1944, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0004280373831775701, |
|
"loss": 0.1866, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.00041869158878504677, |
|
"loss": 0.179, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0004093457943925234, |
|
"loss": 0.1778, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.0004, |
|
"loss": 0.1683, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00039065420560747667, |
|
"loss": 0.1796, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.00038130841121495327, |
|
"loss": 0.17, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0003719626168224299, |
|
"loss": 0.1274, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0003626168224299066, |
|
"loss": 0.1498, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0003532710280373832, |
|
"loss": 0.1164, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00034392523364485983, |
|
"loss": 0.1308, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0003345794392523365, |
|
"loss": 0.1364, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0003252336448598131, |
|
"loss": 0.124, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00031588785046728974, |
|
"loss": 0.1068, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.0003065420560747664, |
|
"loss": 0.1209, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.000297196261682243, |
|
"loss": 0.1348, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.00028785046728971965, |
|
"loss": 0.1014, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0002785046728971963, |
|
"loss": 0.0974, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.0002691588785046729, |
|
"loss": 0.097, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 0.00025981308411214955, |
|
"loss": 0.1105, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 0.0002504672897196262, |
|
"loss": 0.0849, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.0002411214953271028, |
|
"loss": 0.0777, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.00023177570093457946, |
|
"loss": 0.0741, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00022242990654205608, |
|
"loss": 0.083, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 0.0002130841121495327, |
|
"loss": 0.0989, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.00020373831775700934, |
|
"loss": 0.0944, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.000194392523364486, |
|
"loss": 0.0713, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 0.00018504672897196262, |
|
"loss": 0.0823, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00017570093457943924, |
|
"loss": 0.0632, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0001663551401869159, |
|
"loss": 0.0671, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.00015700934579439252, |
|
"loss": 0.0722, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.00014766355140186915, |
|
"loss": 0.0657, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0001383177570093458, |
|
"loss": 0.0749, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00012897196261682243, |
|
"loss": 0.0727, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00011962616822429907, |
|
"loss": 0.0741, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00011028037383177571, |
|
"loss": 0.0511, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 0.00010093457943925234, |
|
"loss": 0.0461, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 9.158878504672898e-05, |
|
"loss": 0.0517, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 8.224299065420562e-05, |
|
"loss": 0.0533, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 7.289719626168224e-05, |
|
"loss": 0.0577, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 6.355140186915888e-05, |
|
"loss": 0.0571, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 5.420560747663551e-05, |
|
"loss": 0.048, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.485981308411215e-05, |
|
"loss": 0.0542, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.551401869158879e-05, |
|
"loss": 0.0522, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_loss": 0.39019492268562317, |
|
"eval_runtime": 581.0044, |
|
"eval_samples_per_second": 5.842, |
|
"eval_steps_per_second": 0.731, |
|
"eval_wer": 0.2725472310562171, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 2.6168224299065423e-05, |
|
"loss": 0.0524, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 1.6822429906542056e-05, |
|
"loss": 0.0501, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 7.476635514018692e-06, |
|
"loss": 0.0425, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 1035, |
|
"total_flos": 1.5036330849795359e+19, |
|
"train_loss": 1.3250362157965627, |
|
"train_runtime": 8304.6494, |
|
"train_samples_per_second": 3.963, |
|
"train_steps_per_second": 0.125 |
|
} |
|
], |
|
"max_steps": 1035, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.5036330849795359e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|