|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.272727272727273, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 17.8657, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 12.104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.199999999999999e-05, |
|
"loss": 4.7542, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.599999999999999e-05, |
|
"loss": 3.5251, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 3.2968, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 3.1885, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.000168, |
|
"loss": 3.173, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00019199999999999998, |
|
"loss": 3.1139, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00021599999999999996, |
|
"loss": 3.103, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 3.0862, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 3.177677869796753, |
|
"eval_runtime": 53.6086, |
|
"eval_samples_per_second": 22.403, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 3.0227, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 2.6934, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0002982658959537572, |
|
"loss": 2.1587, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00029479768786127165, |
|
"loss": 1.7134, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0002913294797687861, |
|
"loss": 1.4758, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00028786127167630053, |
|
"loss": 1.311, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.000284393063583815, |
|
"loss": 1.1944, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00028092485549132947, |
|
"loss": 1.082, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0002774566473988439, |
|
"loss": 1.024, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00027398843930635835, |
|
"loss": 0.9718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 1.0074659585952759, |
|
"eval_runtime": 53.837, |
|
"eval_samples_per_second": 22.308, |
|
"eval_wer": 0.8955532574974147, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.0002705202312138728, |
|
"loss": 0.8674, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.0002670520231213873, |
|
"loss": 0.8369, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0002635838150289017, |
|
"loss": 0.8437, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00026011560693641616, |
|
"loss": 0.7578, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00025664739884393063, |
|
"loss": 0.7275, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00025317919075144504, |
|
"loss": 0.7207, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.0002497109826589595, |
|
"loss": 0.6415, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.000246242774566474, |
|
"loss": 0.6446, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00024277456647398842, |
|
"loss": 0.6243, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.0002393063583815029, |
|
"loss": 0.6085, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 0.8931847214698792, |
|
"eval_runtime": 53.9711, |
|
"eval_samples_per_second": 22.253, |
|
"eval_wer": 0.797015807357069, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.00023583815028901733, |
|
"loss": 0.5279, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.00023236994219653174, |
|
"loss": 0.5279, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0002289017341040462, |
|
"loss": 0.5421, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.00022543352601156065, |
|
"loss": 0.4856, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.00022196531791907512, |
|
"loss": 0.5182, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.00021849710982658956, |
|
"loss": 0.4756, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.00021502890173410403, |
|
"loss": 0.479, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.00021156069364161847, |
|
"loss": 0.4419, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.00020809248554913294, |
|
"loss": 0.448, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 0.00020462427745664738, |
|
"loss": 0.4295, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 0.9030922055244446, |
|
"eval_runtime": 54.1136, |
|
"eval_samples_per_second": 22.194, |
|
"eval_wer": 0.7748559609986704, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 0.00020115606936416184, |
|
"loss": 0.3976, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 0.00019768786127167629, |
|
"loss": 0.3699, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.00019421965317919073, |
|
"loss": 0.4159, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0001907514450867052, |
|
"loss": 0.3221, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 0.00018728323699421963, |
|
"loss": 0.3642, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.0001838150289017341, |
|
"loss": 0.3832, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 0.00018034682080924854, |
|
"loss": 0.3367, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.000176878612716763, |
|
"loss": 0.3152, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.00017341040462427745, |
|
"loss": 0.3246, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 0.00016994219653179192, |
|
"loss": 0.3012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_loss": 0.9842168688774109, |
|
"eval_runtime": 54.3687, |
|
"eval_samples_per_second": 22.09, |
|
"eval_wer": 0.7636283055104152, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.00016647398843930633, |
|
"loss": 0.3051, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.00016300578034682077, |
|
"loss": 0.2966, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.00015953757225433524, |
|
"loss": 0.309, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 0.00015606936416184968, |
|
"loss": 0.2553, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.00015260115606936415, |
|
"loss": 0.279, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 0.0001491329479768786, |
|
"loss": 0.2962, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 0.00014566473988439306, |
|
"loss": 0.2829, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 0.0001421965317919075, |
|
"loss": 0.2428, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 0.00013872832369942194, |
|
"loss": 0.2434, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.0001352601156069364, |
|
"loss": 0.2513, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 1.0579547882080078, |
|
"eval_runtime": 84.4423, |
|
"eval_samples_per_second": 14.223, |
|
"eval_wer": 0.7682080070911508, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 0.00013179190751445085, |
|
"loss": 0.2331, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 0.00012832369942196532, |
|
"loss": 0.2275, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 0.00012485549132947976, |
|
"loss": 0.2275, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 0.00012138728323699421, |
|
"loss": 0.2174, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 0.00011791907514450866, |
|
"loss": 0.2202, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0001144508670520231, |
|
"loss": 0.2291, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 0.00011098265895953756, |
|
"loss": 0.1909, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 0.00010751445086705201, |
|
"loss": 0.1897, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 0.00010404624277456647, |
|
"loss": 0.22, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 0.00010057803468208092, |
|
"loss": 0.194, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"eval_loss": 1.0953465700149536, |
|
"eval_runtime": 79.9574, |
|
"eval_samples_per_second": 15.02, |
|
"eval_wer": 0.7524006500221598, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 9.710982658959536e-05, |
|
"loss": 0.1862, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 9.364161849710982e-05, |
|
"loss": 0.2012, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 9.017341040462427e-05, |
|
"loss": 0.1823, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"learning_rate": 8.670520231213873e-05, |
|
"loss": 0.1885, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 8.323699421965317e-05, |
|
"loss": 0.1616, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 7.976878612716762e-05, |
|
"loss": 0.1893, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 7.630057803468207e-05, |
|
"loss": 0.1879, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 23.64, |
|
"learning_rate": 7.283236994219653e-05, |
|
"loss": 0.1759, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 6.936416184971097e-05, |
|
"loss": 0.1703, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 6.589595375722542e-05, |
|
"loss": 0.1679, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"eval_loss": 1.1073198318481445, |
|
"eval_runtime": 79.6701, |
|
"eval_samples_per_second": 15.075, |
|
"eval_wer": 0.7289112128822574, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 6.242774566473988e-05, |
|
"loss": 0.1663, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 5.895953757225433e-05, |
|
"loss": 0.1726, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 5.549132947976878e-05, |
|
"loss": 0.1733, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"learning_rate": 5.2023121387283234e-05, |
|
"loss": 0.1764, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"learning_rate": 4.855491329479768e-05, |
|
"loss": 0.14, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 4.5086705202312136e-05, |
|
"loss": 0.164, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"learning_rate": 4.161849710982658e-05, |
|
"loss": 0.1569, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3.815028901734104e-05, |
|
"loss": 0.1674, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"learning_rate": 3.4682080924855485e-05, |
|
"loss": 0.1635, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 3.121387283236994e-05, |
|
"loss": 0.1372, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_loss": 1.149294376373291, |
|
"eval_runtime": 54.3637, |
|
"eval_samples_per_second": 22.092, |
|
"eval_wer": 0.7225587235928498, |
|
"step": 3600 |
|
} |
|
], |
|
"max_steps": 3960, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.107485248885501e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|