{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.272727272727273, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "learning_rate": 2.3999999999999997e-05, "loss": 17.8657, "step": 40 }, { "epoch": 0.61, "learning_rate": 4.7999999999999994e-05, "loss": 12.104, "step": 80 }, { "epoch": 0.91, "learning_rate": 7.199999999999999e-05, "loss": 4.7542, "step": 120 }, { "epoch": 1.21, "learning_rate": 9.599999999999999e-05, "loss": 3.5251, "step": 160 }, { "epoch": 1.52, "learning_rate": 0.00011999999999999999, "loss": 3.2968, "step": 200 }, { "epoch": 1.82, "learning_rate": 0.00014399999999999998, "loss": 3.1885, "step": 240 }, { "epoch": 2.12, "learning_rate": 0.000168, "loss": 3.173, "step": 280 }, { "epoch": 2.42, "learning_rate": 0.00019199999999999998, "loss": 3.1139, "step": 320 }, { "epoch": 2.73, "learning_rate": 0.00021599999999999996, "loss": 3.103, "step": 360 }, { "epoch": 3.03, "learning_rate": 0.00023999999999999998, "loss": 3.0862, "step": 400 }, { "epoch": 3.03, "eval_loss": 3.177677869796753, "eval_runtime": 53.6086, "eval_samples_per_second": 22.403, "eval_wer": 1.0, "step": 400 }, { "epoch": 3.33, "learning_rate": 0.00026399999999999997, "loss": 3.0227, "step": 440 }, { "epoch": 3.64, "learning_rate": 0.00028799999999999995, "loss": 2.6934, "step": 480 }, { "epoch": 3.94, "learning_rate": 0.0002982658959537572, "loss": 2.1587, "step": 520 }, { "epoch": 4.24, "learning_rate": 0.00029479768786127165, "loss": 1.7134, "step": 560 }, { "epoch": 4.55, "learning_rate": 0.0002913294797687861, "loss": 1.4758, "step": 600 }, { "epoch": 4.85, "learning_rate": 0.00028786127167630053, "loss": 1.311, "step": 640 }, { "epoch": 5.15, "learning_rate": 0.000284393063583815, "loss": 1.1944, "step": 680 }, { "epoch": 5.45, "learning_rate": 0.00028092485549132947, "loss": 1.082, "step": 720 }, { "epoch": 5.76, "learning_rate": 0.0002774566473988439, "loss": 1.024, "step": 760 }, { "epoch": 6.06, "learning_rate": 0.00027398843930635835, "loss": 0.9718, "step": 800 }, { "epoch": 6.06, "eval_loss": 1.0074659585952759, "eval_runtime": 53.837, "eval_samples_per_second": 22.308, "eval_wer": 0.8955532574974147, "step": 800 }, { "epoch": 6.36, "learning_rate": 0.0002705202312138728, "loss": 0.8674, "step": 840 }, { "epoch": 6.67, "learning_rate": 0.0002670520231213873, "loss": 0.8369, "step": 880 }, { "epoch": 6.97, "learning_rate": 0.0002635838150289017, "loss": 0.8437, "step": 920 }, { "epoch": 7.27, "learning_rate": 0.00026011560693641616, "loss": 0.7578, "step": 960 }, { "epoch": 7.58, "learning_rate": 0.00025664739884393063, "loss": 0.7275, "step": 1000 }, { "epoch": 7.88, "learning_rate": 0.00025317919075144504, "loss": 0.7207, "step": 1040 }, { "epoch": 8.18, "learning_rate": 0.0002497109826589595, "loss": 0.6415, "step": 1080 }, { "epoch": 8.48, "learning_rate": 0.000246242774566474, "loss": 0.6446, "step": 1120 }, { "epoch": 8.79, "learning_rate": 0.00024277456647398842, "loss": 0.6243, "step": 1160 }, { "epoch": 9.09, "learning_rate": 0.0002393063583815029, "loss": 0.6085, "step": 1200 }, { "epoch": 9.09, "eval_loss": 0.8931847214698792, "eval_runtime": 53.9711, "eval_samples_per_second": 22.253, "eval_wer": 0.797015807357069, "step": 1200 }, { "epoch": 9.39, "learning_rate": 0.00023583815028901733, "loss": 0.5279, "step": 1240 }, { "epoch": 9.7, "learning_rate": 0.00023236994219653174, "loss": 0.5279, "step": 1280 }, { "epoch": 10.0, "learning_rate": 0.0002289017341040462, "loss": 0.5421, "step": 1320 }, { "epoch": 10.3, "learning_rate": 0.00022543352601156065, "loss": 0.4856, "step": 1360 }, { "epoch": 10.61, "learning_rate": 0.00022196531791907512, "loss": 0.5182, "step": 1400 }, { "epoch": 10.91, "learning_rate": 0.00021849710982658956, "loss": 0.4756, "step": 1440 }, { "epoch": 11.21, "learning_rate": 0.00021502890173410403, "loss": 0.479, "step": 1480 }, { "epoch": 11.52, "learning_rate": 0.00021156069364161847, "loss": 0.4419, "step": 1520 }, { "epoch": 11.82, "learning_rate": 0.00020809248554913294, "loss": 0.448, "step": 1560 }, { "epoch": 12.12, "learning_rate": 0.00020462427745664738, "loss": 0.4295, "step": 1600 }, { "epoch": 12.12, "eval_loss": 0.9030922055244446, "eval_runtime": 54.1136, "eval_samples_per_second": 22.194, "eval_wer": 0.7748559609986704, "step": 1600 }, { "epoch": 12.42, "learning_rate": 0.00020115606936416184, "loss": 0.3976, "step": 1640 }, { "epoch": 12.73, "learning_rate": 0.00019768786127167629, "loss": 0.3699, "step": 1680 }, { "epoch": 13.03, "learning_rate": 0.00019421965317919073, "loss": 0.4159, "step": 1720 }, { "epoch": 13.33, "learning_rate": 0.0001907514450867052, "loss": 0.3221, "step": 1760 }, { "epoch": 13.64, "learning_rate": 0.00018728323699421963, "loss": 0.3642, "step": 1800 }, { "epoch": 13.94, "learning_rate": 0.0001838150289017341, "loss": 0.3832, "step": 1840 }, { "epoch": 14.24, "learning_rate": 0.00018034682080924854, "loss": 0.3367, "step": 1880 }, { "epoch": 14.55, "learning_rate": 0.000176878612716763, "loss": 0.3152, "step": 1920 }, { "epoch": 14.85, "learning_rate": 0.00017341040462427745, "loss": 0.3246, "step": 1960 }, { "epoch": 15.15, "learning_rate": 0.00016994219653179192, "loss": 0.3012, "step": 2000 }, { "epoch": 15.15, "eval_loss": 0.9842168688774109, "eval_runtime": 54.3687, "eval_samples_per_second": 22.09, "eval_wer": 0.7636283055104152, "step": 2000 }, { "epoch": 15.45, "learning_rate": 0.00016647398843930633, "loss": 0.3051, "step": 2040 }, { "epoch": 15.76, "learning_rate": 0.00016300578034682077, "loss": 0.2966, "step": 2080 }, { "epoch": 16.06, "learning_rate": 0.00015953757225433524, "loss": 0.309, "step": 2120 }, { "epoch": 16.36, "learning_rate": 0.00015606936416184968, "loss": 0.2553, "step": 2160 }, { "epoch": 16.67, "learning_rate": 0.00015260115606936415, "loss": 0.279, "step": 2200 }, { "epoch": 16.97, "learning_rate": 0.0001491329479768786, "loss": 0.2962, "step": 2240 }, { "epoch": 17.27, "learning_rate": 0.00014566473988439306, "loss": 0.2829, "step": 2280 }, { "epoch": 17.58, "learning_rate": 0.0001421965317919075, "loss": 0.2428, "step": 2320 }, { "epoch": 17.88, "learning_rate": 0.00013872832369942194, "loss": 0.2434, "step": 2360 }, { "epoch": 18.18, "learning_rate": 0.0001352601156069364, "loss": 0.2513, "step": 2400 }, { "epoch": 18.18, "eval_loss": 1.0579547882080078, "eval_runtime": 84.4423, "eval_samples_per_second": 14.223, "eval_wer": 0.7682080070911508, "step": 2400 }, { "epoch": 18.48, "learning_rate": 0.00013179190751445085, "loss": 0.2331, "step": 2440 }, { "epoch": 18.79, "learning_rate": 0.00012832369942196532, "loss": 0.2275, "step": 2480 }, { "epoch": 19.09, "learning_rate": 0.00012485549132947976, "loss": 0.2275, "step": 2520 }, { "epoch": 19.39, "learning_rate": 0.00012138728323699421, "loss": 0.2174, "step": 2560 }, { "epoch": 19.7, "learning_rate": 0.00011791907514450866, "loss": 0.2202, "step": 2600 }, { "epoch": 20.0, "learning_rate": 0.0001144508670520231, "loss": 0.2291, "step": 2640 }, { "epoch": 20.3, "learning_rate": 0.00011098265895953756, "loss": 0.1909, "step": 2680 }, { "epoch": 20.61, "learning_rate": 0.00010751445086705201, "loss": 0.1897, "step": 2720 }, { "epoch": 20.91, "learning_rate": 0.00010404624277456647, "loss": 0.22, "step": 2760 }, { "epoch": 21.21, "learning_rate": 0.00010057803468208092, "loss": 0.194, "step": 2800 }, { "epoch": 21.21, "eval_loss": 1.0953465700149536, "eval_runtime": 79.9574, "eval_samples_per_second": 15.02, "eval_wer": 0.7524006500221598, "step": 2800 }, { "epoch": 21.52, "learning_rate": 9.710982658959536e-05, "loss": 0.1862, "step": 2840 }, { "epoch": 21.82, "learning_rate": 9.364161849710982e-05, "loss": 0.2012, "step": 2880 }, { "epoch": 22.12, "learning_rate": 9.017341040462427e-05, "loss": 0.1823, "step": 2920 }, { "epoch": 22.42, "learning_rate": 8.670520231213873e-05, "loss": 0.1885, "step": 2960 }, { "epoch": 22.73, "learning_rate": 8.323699421965317e-05, "loss": 0.1616, "step": 3000 }, { "epoch": 23.03, "learning_rate": 7.976878612716762e-05, "loss": 0.1893, "step": 3040 }, { "epoch": 23.33, "learning_rate": 7.630057803468207e-05, "loss": 0.1879, "step": 3080 }, { "epoch": 23.64, "learning_rate": 7.283236994219653e-05, "loss": 0.1759, "step": 3120 }, { "epoch": 23.94, "learning_rate": 6.936416184971097e-05, "loss": 0.1703, "step": 3160 }, { "epoch": 24.24, "learning_rate": 6.589595375722542e-05, "loss": 0.1679, "step": 3200 }, { "epoch": 24.24, "eval_loss": 1.1073198318481445, "eval_runtime": 79.6701, "eval_samples_per_second": 15.075, "eval_wer": 0.7289112128822574, "step": 3200 }, { "epoch": 24.55, "learning_rate": 6.242774566473988e-05, "loss": 0.1663, "step": 3240 }, { "epoch": 24.85, "learning_rate": 5.895953757225433e-05, "loss": 0.1726, "step": 3280 }, { "epoch": 25.15, "learning_rate": 5.549132947976878e-05, "loss": 0.1733, "step": 3320 }, { "epoch": 25.45, "learning_rate": 5.2023121387283234e-05, "loss": 0.1764, "step": 3360 }, { "epoch": 25.76, "learning_rate": 4.855491329479768e-05, "loss": 0.14, "step": 3400 }, { "epoch": 26.06, "learning_rate": 4.5086705202312136e-05, "loss": 0.164, "step": 3440 }, { "epoch": 26.36, "learning_rate": 4.161849710982658e-05, "loss": 0.1569, "step": 3480 }, { "epoch": 26.67, "learning_rate": 3.815028901734104e-05, "loss": 0.1674, "step": 3520 }, { "epoch": 26.97, "learning_rate": 3.4682080924855485e-05, "loss": 0.1635, "step": 3560 }, { "epoch": 27.27, "learning_rate": 3.121387283236994e-05, "loss": 0.1372, "step": 3600 }, { "epoch": 27.27, "eval_loss": 1.149294376373291, "eval_runtime": 54.3637, "eval_samples_per_second": 22.092, "eval_wer": 0.7225587235928498, "step": 3600 } ], "max_steps": 3960, "num_train_epochs": 30, "total_flos": 1.107485248885501e+19, "trial_name": null, "trial_params": null }