{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.093366093366093, "global_step": 407, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0, "loss": 1.8118, "step": 4 }, { "epoch": 0.02, "learning_rate": 3.6585365853658536e-07, "loss": 1.8122, "step": 8 }, { "epoch": 0.03, "learning_rate": 8.53658536585366e-07, "loss": 1.8174, "step": 12 }, { "epoch": 0.04, "learning_rate": 1.3414634146341465e-06, "loss": 1.7616, "step": 16 }, { "epoch": 0.05, "learning_rate": 1.8292682926829268e-06, "loss": 1.6875, "step": 20 }, { "epoch": 0.06, "learning_rate": 2.317073170731708e-06, "loss": 1.5201, "step": 24 }, { "epoch": 0.07, "learning_rate": 2.8048780487804884e-06, "loss": 1.3982, "step": 28 }, { "epoch": 0.08, "learning_rate": 3.292682926829269e-06, "loss": 1.3541, "step": 32 }, { "epoch": 0.09, "learning_rate": 3.780487804878049e-06, "loss": 1.2092, "step": 36 }, { "epoch": 0.1, "learning_rate": 4.268292682926829e-06, "loss": 1.1599, "step": 40 }, { "epoch": 0.1, "eval_loss": 1.142654299736023, "eval_runtime": 101.9854, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.127, "eval_wer": 15.213946117274169, "step": 40 }, { "epoch": 1.01, "learning_rate": 4.75609756097561e-06, "loss": 1.0124, "step": 44 }, { "epoch": 1.02, "learning_rate": 5.243902439024391e-06, "loss": 0.9171, "step": 48 }, { "epoch": 1.03, "learning_rate": 5.731707317073171e-06, "loss": 0.8027, "step": 52 }, { "epoch": 1.04, "learning_rate": 6.219512195121951e-06, "loss": 0.7284, "step": 56 }, { "epoch": 1.05, "learning_rate": 6.707317073170733e-06, "loss": 0.6185, "step": 60 }, { "epoch": 1.06, "learning_rate": 7.1951219512195125e-06, "loss": 0.57, "step": 64 }, { "epoch": 1.07, "learning_rate": 7.682926829268293e-06, "loss": 0.4985, "step": 68 }, { "epoch": 1.08, "learning_rate": 8.170731707317073e-06, "loss": 0.488, "step": 72 }, { "epoch": 1.09, "learning_rate": 8.658536585365854e-06, "loss": 0.4569, "step": 76 }, { "epoch": 1.1, "learning_rate": 9.146341463414635e-06, "loss": 0.4655, "step": 80 }, { "epoch": 1.1, "eval_loss": 0.5613037943840027, "eval_runtime": 91.9697, "eval_samples_per_second": 4.284, "eval_steps_per_second": 0.141, "eval_wer": 17.591125198098258, "step": 80 }, { "epoch": 2.0, "learning_rate": 9.634146341463415e-06, "loss": 0.425, "step": 84 }, { "epoch": 2.01, "learning_rate": 9.96923076923077e-06, "loss": 0.4162, "step": 88 }, { "epoch": 2.02, "learning_rate": 9.846153846153848e-06, "loss": 0.3809, "step": 92 }, { "epoch": 2.03, "learning_rate": 9.723076923076924e-06, "loss": 0.3533, "step": 96 }, { "epoch": 2.04, "learning_rate": 9.600000000000001e-06, "loss": 0.3511, "step": 100 }, { "epoch": 2.05, "learning_rate": 9.476923076923079e-06, "loss": 0.3475, "step": 104 }, { "epoch": 2.06, "learning_rate": 9.353846153846155e-06, "loss": 0.321, "step": 108 }, { "epoch": 2.07, "learning_rate": 9.230769230769232e-06, "loss": 0.2859, "step": 112 }, { "epoch": 2.08, "learning_rate": 9.107692307692308e-06, "loss": 0.3191, "step": 116 }, { "epoch": 2.09, "learning_rate": 8.984615384615386e-06, "loss": 0.2753, "step": 120 }, { "epoch": 2.09, "eval_loss": 0.5241264700889587, "eval_runtime": 88.0526, "eval_samples_per_second": 4.475, "eval_steps_per_second": 0.148, "eval_wer": 17.21321467755699, "step": 120 }, { "epoch": 3.0, "learning_rate": 8.861538461538463e-06, "loss": 0.3104, "step": 124 }, { "epoch": 3.01, "learning_rate": 8.73846153846154e-06, "loss": 0.2734, "step": 128 }, { "epoch": 3.02, "learning_rate": 8.615384615384617e-06, "loss": 0.2608, "step": 132 }, { "epoch": 3.03, "learning_rate": 8.492307692307693e-06, "loss": 0.2509, "step": 136 }, { "epoch": 3.04, "learning_rate": 8.36923076923077e-06, "loss": 0.2548, "step": 140 }, { "epoch": 3.05, "learning_rate": 8.246153846153848e-06, "loss": 0.2469, "step": 144 }, { "epoch": 3.06, "learning_rate": 8.123076923076924e-06, "loss": 0.2231, "step": 148 }, { "epoch": 3.07, "learning_rate": 8.000000000000001e-06, "loss": 0.2138, "step": 152 }, { "epoch": 3.08, "learning_rate": 7.876923076923077e-06, "loss": 0.2349, "step": 156 }, { "epoch": 3.09, "learning_rate": 7.753846153846155e-06, "loss": 0.2077, "step": 160 }, { "epoch": 3.09, "eval_loss": 0.5241798758506775, "eval_runtime": 88.5317, "eval_samples_per_second": 4.45, "eval_steps_per_second": 0.147, "eval_wer": 17.26197732536877, "step": 160 }, { "epoch": 3.1, "learning_rate": 7.630769230769232e-06, "loss": 0.2322, "step": 164 }, { "epoch": 4.01, "learning_rate": 7.507692307692308e-06, "loss": 0.2036, "step": 168 }, { "epoch": 4.02, "learning_rate": 7.384615384615386e-06, "loss": 0.2058, "step": 172 }, { "epoch": 4.03, "learning_rate": 7.261538461538462e-06, "loss": 0.1797, "step": 176 }, { "epoch": 4.04, "learning_rate": 7.1384615384615385e-06, "loss": 0.186, "step": 180 }, { "epoch": 4.05, "learning_rate": 7.015384615384616e-06, "loss": 0.2035, "step": 184 }, { "epoch": 4.06, "learning_rate": 6.892307692307693e-06, "loss": 0.1794, "step": 188 }, { "epoch": 4.07, "learning_rate": 6.76923076923077e-06, "loss": 0.1589, "step": 192 }, { "epoch": 4.08, "learning_rate": 6.646153846153846e-06, "loss": 0.1879, "step": 196 }, { "epoch": 4.09, "learning_rate": 6.523076923076923e-06, "loss": 0.1636, "step": 200 }, { "epoch": 4.09, "eval_loss": 0.5289868712425232, "eval_runtime": 95.5188, "eval_samples_per_second": 4.125, "eval_steps_per_second": 0.136, "eval_wer": 17.66426916981592, "step": 200 }, { "epoch": 4.1, "learning_rate": 6.4000000000000006e-06, "loss": 0.1767, "step": 204 }, { "epoch": 5.01, "learning_rate": 6.276923076923077e-06, "loss": 0.1657, "step": 208 }, { "epoch": 5.02, "learning_rate": 6.153846153846155e-06, "loss": 0.1607, "step": 212 }, { "epoch": 5.03, "learning_rate": 6.030769230769231e-06, "loss": 0.1458, "step": 216 }, { "epoch": 5.04, "learning_rate": 5.907692307692308e-06, "loss": 0.1541, "step": 220 }, { "epoch": 5.05, "learning_rate": 5.784615384615385e-06, "loss": 0.1494, "step": 224 }, { "epoch": 5.06, "learning_rate": 5.661538461538462e-06, "loss": 0.144, "step": 228 }, { "epoch": 5.07, "learning_rate": 5.538461538461539e-06, "loss": 0.1311, "step": 232 }, { "epoch": 5.08, "learning_rate": 5.415384615384615e-06, "loss": 0.1411, "step": 236 }, { "epoch": 5.09, "learning_rate": 5.292307692307693e-06, "loss": 0.1322, "step": 240 }, { "epoch": 5.09, "eval_loss": 0.5350630283355713, "eval_runtime": 92.5111, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.141, "eval_wer": 18.2128489576984, "step": 240 }, { "epoch": 5.1, "learning_rate": 5.16923076923077e-06, "loss": 0.1436, "step": 244 }, { "epoch": 6.0, "learning_rate": 5.046153846153846e-06, "loss": 0.1375, "step": 248 }, { "epoch": 6.01, "learning_rate": 4.923076923076924e-06, "loss": 0.1361, "step": 252 }, { "epoch": 6.02, "learning_rate": 4.800000000000001e-06, "loss": 0.129, "step": 256 }, { "epoch": 6.03, "learning_rate": 4.676923076923077e-06, "loss": 0.1127, "step": 260 }, { "epoch": 6.04, "learning_rate": 4.553846153846154e-06, "loss": 0.1266, "step": 264 }, { "epoch": 6.05, "learning_rate": 4.430769230769232e-06, "loss": 0.1193, "step": 268 }, { "epoch": 6.06, "learning_rate": 4.307692307692308e-06, "loss": 0.1127, "step": 272 }, { "epoch": 6.07, "learning_rate": 4.184615384615385e-06, "loss": 0.1064, "step": 276 }, { "epoch": 6.08, "learning_rate": 4.061538461538462e-06, "loss": 0.123, "step": 280 }, { "epoch": 6.08, "eval_loss": 0.5429388284683228, "eval_runtime": 91.5818, "eval_samples_per_second": 4.302, "eval_steps_per_second": 0.142, "eval_wer": 18.907716689016212, "step": 280 }, { "epoch": 6.09, "learning_rate": 3.938461538461539e-06, "loss": 0.1057, "step": 284 }, { "epoch": 7.0, "learning_rate": 3.815384615384616e-06, "loss": 0.1258, "step": 288 }, { "epoch": 7.01, "learning_rate": 3.692307692307693e-06, "loss": 0.1108, "step": 292 }, { "epoch": 7.02, "learning_rate": 3.5692307692307692e-06, "loss": 0.1115, "step": 296 }, { "epoch": 7.03, "learning_rate": 3.4461538461538464e-06, "loss": 0.0998, "step": 300 }, { "epoch": 7.04, "learning_rate": 3.323076923076923e-06, "loss": 0.1106, "step": 304 }, { "epoch": 7.05, "learning_rate": 3.2000000000000003e-06, "loss": 0.1045, "step": 308 }, { "epoch": 7.06, "learning_rate": 3.0769230769230774e-06, "loss": 0.0908, "step": 312 }, { "epoch": 7.07, "learning_rate": 2.953846153846154e-06, "loss": 0.0931, "step": 316 }, { "epoch": 7.08, "learning_rate": 2.830769230769231e-06, "loss": 0.1074, "step": 320 }, { "epoch": 7.08, "eval_loss": 0.5500437021255493, "eval_runtime": 104.0907, "eval_samples_per_second": 3.785, "eval_steps_per_second": 0.125, "eval_wer": 19.054004632451544, "step": 320 }, { "epoch": 7.09, "learning_rate": 2.7076923076923076e-06, "loss": 0.0937, "step": 324 }, { "epoch": 7.1, "learning_rate": 2.584615384615385e-06, "loss": 0.1091, "step": 328 }, { "epoch": 8.01, "learning_rate": 2.461538461538462e-06, "loss": 0.0951, "step": 332 }, { "epoch": 8.02, "learning_rate": 2.3384615384615387e-06, "loss": 0.1003, "step": 336 }, { "epoch": 8.03, "learning_rate": 2.215384615384616e-06, "loss": 0.0836, "step": 340 }, { "epoch": 8.04, "learning_rate": 2.0923076923076926e-06, "loss": 0.0907, "step": 344 }, { "epoch": 8.05, "learning_rate": 1.9692307692307693e-06, "loss": 0.1013, "step": 348 }, { "epoch": 8.06, "learning_rate": 1.8461538461538465e-06, "loss": 0.0891, "step": 352 }, { "epoch": 8.07, "learning_rate": 1.7230769230769232e-06, "loss": 0.077, "step": 356 }, { "epoch": 8.08, "learning_rate": 1.6000000000000001e-06, "loss": 0.1007, "step": 360 }, { "epoch": 8.08, "eval_loss": 0.5552565455436707, "eval_runtime": 88.458, "eval_samples_per_second": 4.454, "eval_steps_per_second": 0.147, "eval_wer": 19.310008533463368, "step": 360 }, { "epoch": 8.09, "learning_rate": 1.476923076923077e-06, "loss": 0.0849, "step": 364 }, { "epoch": 8.1, "learning_rate": 1.3538461538461538e-06, "loss": 0.0971, "step": 368 }, { "epoch": 9.01, "learning_rate": 1.230769230769231e-06, "loss": 0.0876, "step": 372 }, { "epoch": 9.02, "learning_rate": 1.107692307692308e-06, "loss": 0.0879, "step": 376 }, { "epoch": 9.03, "learning_rate": 9.846153846153847e-07, "loss": 0.0805, "step": 380 }, { "epoch": 9.04, "learning_rate": 8.615384615384616e-07, "loss": 0.0888, "step": 384 }, { "epoch": 9.05, "learning_rate": 7.384615384615385e-07, "loss": 0.0858, "step": 388 }, { "epoch": 9.06, "learning_rate": 6.153846153846155e-07, "loss": 0.0825, "step": 392 }, { "epoch": 9.07, "learning_rate": 4.923076923076923e-07, "loss": 0.0748, "step": 396 }, { "epoch": 9.08, "learning_rate": 3.6923076923076927e-07, "loss": 0.0876, "step": 400 }, { "epoch": 9.08, "eval_loss": 0.5568162202835083, "eval_runtime": 89.7223, "eval_samples_per_second": 4.391, "eval_steps_per_second": 0.145, "eval_wer": 19.3465805193222, "step": 400 }, { "epoch": 9.09, "learning_rate": 2.4615384615384616e-07, "loss": 0.0802, "step": 404 }, { "epoch": 9.09, "step": 407, "total_flos": 6.36398180352e+17, "train_loss": 0.35074408769753995, "train_runtime": 2707.3827, "train_samples_per_second": 9.621, "train_steps_per_second": 0.15 } ], "max_steps": 407, "num_train_epochs": 9223372036854775807, "total_flos": 6.36398180352e+17, "trial_name": null, "trial_params": null }