{ "best_metric": 158.1031454574485, "best_model_checkpoint": "./checkpoint-88", "epoch": 2.3214285714285716, "global_step": 112, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0, "loss": 1.6569, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.347826086956522e-07, "loss": 1.6564, "step": 2 }, { "epoch": 0.03, "learning_rate": 8.695652173913044e-07, "loss": 1.6518, "step": 3 }, { "epoch": 0.04, "learning_rate": 1.3043478260869566e-06, "loss": 1.6233, "step": 4 }, { "epoch": 0.04, "learning_rate": 1.7391304347826088e-06, "loss": 1.712, "step": 5 }, { "epoch": 0.05, "learning_rate": 2.173913043478261e-06, "loss": 1.6476, "step": 6 }, { "epoch": 0.06, "learning_rate": 2.6086956521739132e-06, "loss": 1.5716, "step": 7 }, { "epoch": 0.07, "learning_rate": 3.043478260869566e-06, "loss": 1.5909, "step": 8 }, { "epoch": 0.08, "learning_rate": 3.4782608695652175e-06, "loss": 1.5905, "step": 9 }, { "epoch": 0.09, "learning_rate": 3.91304347826087e-06, "loss": 1.5486, "step": 10 }, { "epoch": 0.1, "learning_rate": 4.347826086956522e-06, "loss": 1.5299, "step": 11 }, { "epoch": 0.1, "eval_loss": 1.5621598958969116, "eval_runtime": 105.0814, "eval_samples_per_second": 3.14, "eval_steps_per_second": 0.105, "eval_wer": 219.67108414497844, "step": 11 }, { "epoch": 0.11, "learning_rate": 4.782608695652174e-06, "loss": 1.4537, "step": 12 }, { "epoch": 0.12, "learning_rate": 5.2173913043478265e-06, "loss": 1.3729, "step": 13 }, { "epoch": 0.12, "learning_rate": 5.652173913043479e-06, "loss": 1.4921, "step": 14 }, { "epoch": 0.13, "learning_rate": 6.086956521739132e-06, "loss": 1.5069, "step": 15 }, { "epoch": 0.14, "learning_rate": 6.521739130434783e-06, "loss": 1.388, "step": 16 }, { "epoch": 0.15, "learning_rate": 6.956521739130435e-06, "loss": 1.3857, "step": 17 }, { "epoch": 0.16, "learning_rate": 7.391304347826087e-06, "loss": 1.3389, "step": 18 }, { "epoch": 0.17, "learning_rate": 7.82608695652174e-06, "loss": 1.3089, "step": 19 }, { "epoch": 0.18, "learning_rate": 8.260869565217392e-06, "loss": 1.2118, "step": 20 }, { "epoch": 0.19, "learning_rate": 8.695652173913044e-06, "loss": 1.1634, "step": 21 }, { "epoch": 0.2, "learning_rate": 9.130434782608697e-06, "loss": 1.1908, "step": 22 }, { "epoch": 0.2, "eval_loss": 1.3651723861694336, "eval_runtime": 110.8115, "eval_samples_per_second": 2.978, "eval_steps_per_second": 0.099, "eval_wer": 192.2401405077439, "step": 22 }, { "epoch": 0.21, "learning_rate": 9.565217391304349e-06, "loss": 1.1192, "step": 23 }, { "epoch": 0.21, "learning_rate": 1e-05, "loss": 1.2042, "step": 24 }, { "epoch": 0.22, "learning_rate": 9.887640449438202e-06, "loss": 1.1448, "step": 25 }, { "epoch": 0.23, "learning_rate": 9.775280898876405e-06, "loss": 1.1393, "step": 26 }, { "epoch": 0.24, "learning_rate": 9.662921348314608e-06, "loss": 1.1482, "step": 27 }, { "epoch": 0.25, "learning_rate": 9.55056179775281e-06, "loss": 1.179, "step": 28 }, { "epoch": 0.26, "learning_rate": 9.438202247191012e-06, "loss": 0.9847, "step": 29 }, { "epoch": 0.27, "learning_rate": 9.325842696629213e-06, "loss": 1.1149, "step": 30 }, { "epoch": 0.28, "learning_rate": 9.213483146067417e-06, "loss": 1.0657, "step": 31 }, { "epoch": 0.29, "learning_rate": 9.101123595505619e-06, "loss": 1.0028, "step": 32 }, { "epoch": 0.29, "learning_rate": 8.988764044943822e-06, "loss": 1.1161, "step": 33 }, { "epoch": 0.29, "eval_loss": 1.1921106576919556, "eval_runtime": 103.0628, "eval_samples_per_second": 3.202, "eval_steps_per_second": 0.107, "eval_wer": 200.23950183618075, "step": 33 }, { "epoch": 0.3, "learning_rate": 8.876404494382023e-06, "loss": 1.0931, "step": 34 }, { "epoch": 0.31, "learning_rate": 8.764044943820226e-06, "loss": 1.0635, "step": 35 }, { "epoch": 0.32, "learning_rate": 8.651685393258428e-06, "loss": 1.0671, "step": 36 }, { "epoch": 0.33, "learning_rate": 8.53932584269663e-06, "loss": 1.0251, "step": 37 }, { "epoch": 0.34, "learning_rate": 8.426966292134832e-06, "loss": 1.0486, "step": 38 }, { "epoch": 1.01, "learning_rate": 8.314606741573035e-06, "loss": 0.9926, "step": 39 }, { "epoch": 1.02, "learning_rate": 8.202247191011237e-06, "loss": 1.0232, "step": 40 }, { "epoch": 1.03, "learning_rate": 8.08988764044944e-06, "loss": 0.9421, "step": 41 }, { "epoch": 1.04, "learning_rate": 7.97752808988764e-06, "loss": 0.9541, "step": 42 }, { "epoch": 1.04, "learning_rate": 7.865168539325843e-06, "loss": 0.9639, "step": 43 }, { "epoch": 1.05, "learning_rate": 7.752808988764046e-06, "loss": 0.9216, "step": 44 }, { "epoch": 1.05, "eval_loss": 1.1263455152511597, "eval_runtime": 93.6663, "eval_samples_per_second": 3.523, "eval_steps_per_second": 0.117, "eval_wer": 186.52403001756346, "step": 44 }, { "epoch": 1.06, "learning_rate": 7.640449438202247e-06, "loss": 0.9097, "step": 45 }, { "epoch": 1.07, "learning_rate": 7.5280898876404495e-06, "loss": 0.8688, "step": 46 }, { "epoch": 1.08, "learning_rate": 7.415730337078652e-06, "loss": 0.9019, "step": 47 }, { "epoch": 1.09, "learning_rate": 7.303370786516854e-06, "loss": 0.9135, "step": 48 }, { "epoch": 1.1, "learning_rate": 7.191011235955056e-06, "loss": 0.9033, "step": 49 }, { "epoch": 1.11, "learning_rate": 7.078651685393258e-06, "loss": 0.8575, "step": 50 }, { "epoch": 1.12, "learning_rate": 6.966292134831461e-06, "loss": 0.8276, "step": 51 }, { "epoch": 1.12, "learning_rate": 6.853932584269663e-06, "loss": 0.9276, "step": 52 }, { "epoch": 1.13, "learning_rate": 6.741573033707865e-06, "loss": 0.9186, "step": 53 }, { "epoch": 1.14, "learning_rate": 6.629213483146067e-06, "loss": 0.8693, "step": 54 }, { "epoch": 1.15, "learning_rate": 6.51685393258427e-06, "loss": 0.8441, "step": 55 }, { "epoch": 1.15, "eval_loss": 1.0945535898208618, "eval_runtime": 102.0356, "eval_samples_per_second": 3.234, "eval_steps_per_second": 0.108, "eval_wer": 179.32300814306242, "step": 55 }, { "epoch": 1.16, "learning_rate": 6.404494382022472e-06, "loss": 0.8189, "step": 56 }, { "epoch": 1.17, "learning_rate": 6.292134831460674e-06, "loss": 0.8047, "step": 57 }, { "epoch": 1.18, "learning_rate": 6.179775280898876e-06, "loss": 0.7858, "step": 58 }, { "epoch": 1.19, "learning_rate": 6.06741573033708e-06, "loss": 0.7312, "step": 59 }, { "epoch": 1.2, "learning_rate": 5.955056179775281e-06, "loss": 0.7638, "step": 60 }, { "epoch": 1.21, "learning_rate": 5.842696629213483e-06, "loss": 0.7374, "step": 61 }, { "epoch": 1.21, "learning_rate": 5.730337078651685e-06, "loss": 0.7986, "step": 62 }, { "epoch": 1.22, "learning_rate": 5.617977528089889e-06, "loss": 0.762, "step": 63 }, { "epoch": 1.23, "learning_rate": 5.50561797752809e-06, "loss": 0.785, "step": 64 }, { "epoch": 1.24, "learning_rate": 5.393258426966292e-06, "loss": 0.8349, "step": 65 }, { "epoch": 1.25, "learning_rate": 5.280898876404494e-06, "loss": 0.8505, "step": 66 }, { "epoch": 1.25, "eval_loss": 1.0748353004455566, "eval_runtime": 93.3893, "eval_samples_per_second": 3.534, "eval_steps_per_second": 0.118, "eval_wer": 159.68385757624142, "step": 66 }, { "epoch": 1.26, "learning_rate": 5.168539325842698e-06, "loss": 0.7309, "step": 67 }, { "epoch": 1.27, "learning_rate": 5.0561797752809e-06, "loss": 0.8261, "step": 68 }, { "epoch": 1.28, "learning_rate": 4.943820224719101e-06, "loss": 0.8053, "step": 69 }, { "epoch": 1.29, "learning_rate": 4.831460674157304e-06, "loss": 0.7672, "step": 70 }, { "epoch": 1.29, "learning_rate": 4.719101123595506e-06, "loss": 0.8692, "step": 71 }, { "epoch": 1.3, "learning_rate": 4.606741573033709e-06, "loss": 0.8588, "step": 72 }, { "epoch": 1.31, "learning_rate": 4.494382022471911e-06, "loss": 0.8277, "step": 73 }, { "epoch": 1.32, "learning_rate": 4.382022471910113e-06, "loss": 0.8387, "step": 74 }, { "epoch": 1.33, "learning_rate": 4.269662921348315e-06, "loss": 0.801, "step": 75 }, { "epoch": 1.34, "learning_rate": 4.157303370786518e-06, "loss": 0.7801, "step": 76 }, { "epoch": 2.01, "learning_rate": 4.04494382022472e-06, "loss": 0.7844, "step": 77 }, { "epoch": 2.01, "eval_loss": 1.0585265159606934, "eval_runtime": 87.6428, "eval_samples_per_second": 3.765, "eval_steps_per_second": 0.126, "eval_wer": 163.2923519080313, "step": 77 }, { "epoch": 2.02, "learning_rate": 3.932584269662922e-06, "loss": 0.8227, "step": 78 }, { "epoch": 2.03, "learning_rate": 3.820224719101124e-06, "loss": 0.757, "step": 79 }, { "epoch": 2.04, "learning_rate": 3.707865168539326e-06, "loss": 0.7713, "step": 80 }, { "epoch": 2.04, "learning_rate": 3.595505617977528e-06, "loss": 0.7782, "step": 81 }, { "epoch": 2.05, "learning_rate": 3.4831460674157306e-06, "loss": 0.7468, "step": 82 }, { "epoch": 2.06, "learning_rate": 3.3707865168539327e-06, "loss": 0.7457, "step": 83 }, { "epoch": 2.07, "learning_rate": 3.258426966292135e-06, "loss": 0.7126, "step": 84 }, { "epoch": 2.08, "learning_rate": 3.146067415730337e-06, "loss": 0.7476, "step": 85 }, { "epoch": 2.09, "learning_rate": 3.03370786516854e-06, "loss": 0.76, "step": 86 }, { "epoch": 2.1, "learning_rate": 2.9213483146067416e-06, "loss": 0.7673, "step": 87 }, { "epoch": 2.11, "learning_rate": 2.8089887640449444e-06, "loss": 0.7208, "step": 88 }, { "epoch": 2.11, "eval_loss": 1.0490810871124268, "eval_runtime": 106.8845, "eval_samples_per_second": 3.087, "eval_steps_per_second": 0.103, "eval_wer": 158.1031454574485, "step": 88 }, { "epoch": 2.12, "learning_rate": 2.696629213483146e-06, "loss": 0.7045, "step": 89 }, { "epoch": 2.12, "learning_rate": 2.584269662921349e-06, "loss": 0.7887, "step": 90 }, { "epoch": 2.13, "learning_rate": 2.4719101123595505e-06, "loss": 0.7821, "step": 91 }, { "epoch": 2.14, "learning_rate": 2.359550561797753e-06, "loss": 0.7487, "step": 92 }, { "epoch": 2.15, "learning_rate": 2.2471910112359554e-06, "loss": 0.7281, "step": 93 }, { "epoch": 2.16, "learning_rate": 2.1348314606741574e-06, "loss": 0.7084, "step": 94 }, { "epoch": 2.17, "learning_rate": 2.02247191011236e-06, "loss": 0.6971, "step": 95 }, { "epoch": 2.18, "learning_rate": 1.910112359550562e-06, "loss": 0.6869, "step": 96 }, { "epoch": 2.19, "learning_rate": 1.797752808988764e-06, "loss": 0.6411, "step": 97 }, { "epoch": 2.2, "learning_rate": 1.6853932584269663e-06, "loss": 0.6658, "step": 98 }, { "epoch": 2.21, "learning_rate": 1.5730337078651686e-06, "loss": 0.6481, "step": 99 }, { "epoch": 2.21, "eval_loss": 1.046801209449768, "eval_runtime": 89.0792, "eval_samples_per_second": 3.705, "eval_steps_per_second": 0.123, "eval_wer": 158.51828197349514, "step": 99 }, { "epoch": 2.21, "learning_rate": 1.4606741573033708e-06, "loss": 0.7064, "step": 100 }, { "epoch": 2.22, "learning_rate": 1.348314606741573e-06, "loss": 0.6699, "step": 101 }, { "epoch": 2.23, "learning_rate": 1.2359550561797752e-06, "loss": 0.6974, "step": 102 }, { "epoch": 2.24, "learning_rate": 1.1235955056179777e-06, "loss": 0.7523, "step": 103 }, { "epoch": 2.25, "learning_rate": 1.01123595505618e-06, "loss": 0.7621, "step": 104 }, { "epoch": 2.26, "learning_rate": 8.98876404494382e-07, "loss": 0.657, "step": 105 }, { "epoch": 2.27, "learning_rate": 7.865168539325843e-07, "loss": 0.7443, "step": 106 }, { "epoch": 2.28, "learning_rate": 6.741573033707865e-07, "loss": 0.729, "step": 107 }, { "epoch": 2.29, "learning_rate": 5.617977528089888e-07, "loss": 0.6983, "step": 108 }, { "epoch": 2.29, "learning_rate": 4.49438202247191e-07, "loss": 0.7963, "step": 109 }, { "epoch": 2.3, "learning_rate": 3.3707865168539325e-07, "loss": 0.7912, "step": 110 }, { "epoch": 2.3, "eval_loss": 1.045613408088684, "eval_runtime": 91.8976, "eval_samples_per_second": 3.591, "eval_steps_per_second": 0.12, "eval_wer": 168.6092926712438, "step": 110 }, { "epoch": 2.31, "learning_rate": 2.247191011235955e-07, "loss": 0.7626, "step": 111 }, { "epoch": 2.32, "learning_rate": 1.1235955056179776e-07, "loss": 0.7754, "step": 112 }, { "epoch": 2.32, "step": 112, "total_flos": 1.7415399333888e+17, "train_loss": 0.9721650715385165, "train_runtime": 1491.7987, "train_samples_per_second": 4.805, "train_steps_per_second": 0.075 } ], "max_steps": 112, "num_train_epochs": 9223372036854775807, "total_flos": 1.7415399333888e+17, "trial_name": null, "trial_params": null }