{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.994708994709, "global_step": 9400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.06, "learning_rate": 4.950000000000001e-06, "loss": 13.5577, "step": 100 }, { "epoch": 2.13, "learning_rate": 9.950000000000001e-06, "loss": 5.868, "step": 200 }, { "epoch": 3.19, "learning_rate": 1.4950000000000001e-05, "loss": 4.1355, "step": 300 }, { "epoch": 4.25, "learning_rate": 1.995e-05, "loss": 3.5988, "step": 400 }, { "epoch": 5.32, "learning_rate": 2.495e-05, "loss": 3.3036, "step": 500 }, { "epoch": 5.32, "eval_loss": 3.262770652770996, "eval_runtime": 136.0729, "eval_samples_per_second": 20.166, "eval_steps_per_second": 2.521, "eval_wer": 1.0, "step": 500 }, { "epoch": 6.38, "learning_rate": 2.995e-05, "loss": 3.2243, "step": 600 }, { "epoch": 7.44, "learning_rate": 3.495e-05, "loss": 3.1823, "step": 700 }, { "epoch": 8.51, "learning_rate": 3.995e-05, "loss": 3.1371, "step": 800 }, { "epoch": 9.57, "learning_rate": 4.495e-05, "loss": 3.0942, "step": 900 }, { "epoch": 10.63, "learning_rate": 4.995e-05, "loss": 2.9734, "step": 1000 }, { "epoch": 10.63, "eval_loss": 2.5676724910736084, "eval_runtime": 135.3086, "eval_samples_per_second": 20.28, "eval_steps_per_second": 2.535, "eval_wer": 0.9980321266761246, "step": 1000 }, { "epoch": 11.7, "learning_rate": 5.495e-05, "loss": 2.2815, "step": 1100 }, { "epoch": 12.76, "learning_rate": 5.995000000000001e-05, "loss": 1.6698, "step": 1200 }, { "epoch": 13.83, "learning_rate": 6.494999999999999e-05, "loss": 1.4895, "step": 1300 }, { "epoch": 14.89, "learning_rate": 6.995e-05, "loss": 1.3959, "step": 1400 }, { "epoch": 15.95, "learning_rate": 7.495e-05, "loss": 1.3466, "step": 1500 }, { "epoch": 15.95, "eval_loss": 0.44553351402282715, "eval_runtime": 134.1872, "eval_samples_per_second": 20.449, "eval_steps_per_second": 2.556, "eval_wer": 0.6306347535581895, "step": 1500 }, { "epoch": 17.02, "learning_rate": 7.995e-05, "loss": 1.3288, "step": 1600 }, { "epoch": 18.08, "learning_rate": 8.495e-05, "loss": 1.2866, "step": 1700 }, { "epoch": 19.15, "learning_rate": 8.995e-05, "loss": 1.2619, "step": 1800 }, { "epoch": 20.21, "learning_rate": 9.495e-05, "loss": 1.2496, "step": 1900 }, { "epoch": 21.28, "learning_rate": 9.99e-05, "loss": 1.2424, "step": 2000 }, { "epoch": 21.28, "eval_loss": 0.3603059649467468, "eval_runtime": 134.7251, "eval_samples_per_second": 20.367, "eval_steps_per_second": 2.546, "eval_wer": 0.5301359205528351, "step": 2000 }, { "epoch": 22.34, "learning_rate": 9.867567567567569e-05, "loss": 1.2253, "step": 2100 }, { "epoch": 23.4, "learning_rate": 9.732432432432433e-05, "loss": 1.209, "step": 2200 }, { "epoch": 24.47, "learning_rate": 9.597297297297298e-05, "loss": 1.1984, "step": 2300 }, { "epoch": 25.53, "learning_rate": 9.462162162162162e-05, "loss": 1.1735, "step": 2400 }, { "epoch": 26.59, "learning_rate": 9.327027027027028e-05, "loss": 1.1655, "step": 2500 }, { "epoch": 26.59, "eval_loss": 0.3164927661418915, "eval_runtime": 135.1028, "eval_samples_per_second": 20.31, "eval_steps_per_second": 2.539, "eval_wer": 0.4739828840785319, "step": 2500 }, { "epoch": 27.66, "learning_rate": 9.191891891891893e-05, "loss": 1.1529, "step": 2600 }, { "epoch": 28.72, "learning_rate": 9.056756756756757e-05, "loss": 1.1408, "step": 2700 }, { "epoch": 29.78, "learning_rate": 8.921621621621622e-05, "loss": 1.1201, "step": 2800 }, { "epoch": 30.85, "learning_rate": 8.787837837837838e-05, "loss": 1.1108, "step": 2900 }, { "epoch": 31.91, "learning_rate": 8.652702702702703e-05, "loss": 1.1026, "step": 3000 }, { "epoch": 31.91, "eval_loss": 0.2930183410644531, "eval_runtime": 135.7738, "eval_samples_per_second": 20.21, "eval_steps_per_second": 2.526, "eval_wer": 0.4400256281177063, "step": 3000 }, { "epoch": 32.97, "learning_rate": 8.517567567567568e-05, "loss": 1.1035, "step": 3100 }, { "epoch": 34.04, "learning_rate": 8.382432432432433e-05, "loss": 1.0976, "step": 3200 }, { "epoch": 35.11, "learning_rate": 8.247297297297298e-05, "loss": 1.0717, "step": 3300 }, { "epoch": 36.17, "learning_rate": 8.112162162162162e-05, "loss": 1.0778, "step": 3400 }, { "epoch": 37.23, "learning_rate": 7.977027027027028e-05, "loss": 1.0655, "step": 3500 }, { "epoch": 37.23, "eval_loss": 0.26754099130630493, "eval_runtime": 134.1895, "eval_samples_per_second": 20.449, "eval_steps_per_second": 2.556, "eval_wer": 0.41590773877625736, "step": 3500 }, { "epoch": 38.3, "learning_rate": 7.841891891891892e-05, "loss": 1.0588, "step": 3600 }, { "epoch": 39.36, "learning_rate": 7.706756756756757e-05, "loss": 1.0401, "step": 3700 }, { "epoch": 40.42, "learning_rate": 7.571621621621621e-05, "loss": 1.0296, "step": 3800 }, { "epoch": 41.49, "learning_rate": 7.436486486486487e-05, "loss": 1.0218, "step": 3900 }, { "epoch": 42.55, "learning_rate": 7.301351351351352e-05, "loss": 1.0239, "step": 4000 }, { "epoch": 42.55, "eval_loss": 0.25800037384033203, "eval_runtime": 134.1224, "eval_samples_per_second": 20.459, "eval_steps_per_second": 2.557, "eval_wer": 0.3912864399798636, "step": 4000 }, { "epoch": 43.61, "learning_rate": 7.166216216216216e-05, "loss": 1.0226, "step": 4100 }, { "epoch": 44.68, "learning_rate": 7.031081081081081e-05, "loss": 1.0165, "step": 4200 }, { "epoch": 45.74, "learning_rate": 6.895945945945947e-05, "loss": 1.0083, "step": 4300 }, { "epoch": 46.8, "learning_rate": 6.760810810810811e-05, "loss": 0.9981, "step": 4400 }, { "epoch": 47.87, "learning_rate": 6.625675675675676e-05, "loss": 0.9938, "step": 4500 }, { "epoch": 47.87, "eval_loss": 0.23732751607894897, "eval_runtime": 135.9562, "eval_samples_per_second": 20.183, "eval_steps_per_second": 2.523, "eval_wer": 0.36977712690494713, "step": 4500 }, { "epoch": 48.93, "learning_rate": 6.49054054054054e-05, "loss": 0.9717, "step": 4600 }, { "epoch": 49.99, "learning_rate": 6.355405405405406e-05, "loss": 0.9795, "step": 4700 }, { "epoch": 51.06, "learning_rate": 6.22027027027027e-05, "loss": 0.9804, "step": 4800 }, { "epoch": 52.13, "learning_rate": 6.085135135135135e-05, "loss": 0.9655, "step": 4900 }, { "epoch": 53.19, "learning_rate": 5.95e-05, "loss": 0.9655, "step": 5000 }, { "epoch": 53.19, "eval_loss": 0.2379022240638733, "eval_runtime": 134.3498, "eval_samples_per_second": 20.424, "eval_steps_per_second": 2.553, "eval_wer": 0.3674889021097433, "step": 5000 }, { "epoch": 54.25, "learning_rate": 5.8148648648648655e-05, "loss": 0.9616, "step": 5100 }, { "epoch": 55.32, "learning_rate": 5.67972972972973e-05, "loss": 0.9633, "step": 5200 }, { "epoch": 56.38, "learning_rate": 5.544594594594595e-05, "loss": 0.94, "step": 5300 }, { "epoch": 57.44, "learning_rate": 5.40945945945946e-05, "loss": 0.9355, "step": 5400 }, { "epoch": 58.51, "learning_rate": 5.274324324324325e-05, "loss": 0.9374, "step": 5500 }, { "epoch": 58.51, "eval_loss": 0.24859154224395752, "eval_runtime": 135.3597, "eval_samples_per_second": 20.272, "eval_steps_per_second": 2.534, "eval_wer": 0.3794792000366116, "step": 5500 }, { "epoch": 59.57, "learning_rate": 5.1391891891891894e-05, "loss": 0.93, "step": 5600 }, { "epoch": 60.63, "learning_rate": 5.0040540540540546e-05, "loss": 0.9212, "step": 5700 }, { "epoch": 61.7, "learning_rate": 4.868918918918919e-05, "loss": 0.9233, "step": 5800 }, { "epoch": 62.76, "learning_rate": 4.733783783783784e-05, "loss": 0.914, "step": 5900 }, { "epoch": 63.83, "learning_rate": 4.598648648648649e-05, "loss": 0.9065, "step": 6000 }, { "epoch": 63.83, "eval_loss": 0.22428132593631744, "eval_runtime": 136.639, "eval_samples_per_second": 20.082, "eval_steps_per_second": 2.51, "eval_wer": 0.3405336140222415, "step": 6000 }, { "epoch": 64.89, "learning_rate": 4.463513513513514e-05, "loss": 0.9023, "step": 6100 }, { "epoch": 65.95, "learning_rate": 4.3283783783783785e-05, "loss": 0.9105, "step": 6200 }, { "epoch": 67.02, "learning_rate": 4.193243243243244e-05, "loss": 0.907, "step": 6300 }, { "epoch": 68.08, "learning_rate": 4.058108108108108e-05, "loss": 0.8939, "step": 6400 }, { "epoch": 69.15, "learning_rate": 3.9229729729729734e-05, "loss": 0.888, "step": 6500 }, { "epoch": 69.15, "eval_loss": 0.21568605303764343, "eval_runtime": 139.7942, "eval_samples_per_second": 19.629, "eval_steps_per_second": 2.454, "eval_wer": 0.3277195551690998, "step": 6500 }, { "epoch": 70.21, "learning_rate": 3.78918918918919e-05, "loss": 0.8866, "step": 6600 }, { "epoch": 71.28, "learning_rate": 3.654054054054054e-05, "loss": 0.8881, "step": 6700 }, { "epoch": 72.34, "learning_rate": 3.5189189189189195e-05, "loss": 0.8849, "step": 6800 }, { "epoch": 73.4, "learning_rate": 3.383783783783784e-05, "loss": 0.8648, "step": 6900 }, { "epoch": 74.47, "learning_rate": 3.2500000000000004e-05, "loss": 0.8646, "step": 7000 }, { "epoch": 74.47, "eval_loss": 0.21030458807945251, "eval_runtime": 135.9762, "eval_samples_per_second": 20.18, "eval_steps_per_second": 2.522, "eval_wer": 0.3287721385748936, "step": 7000 }, { "epoch": 75.53, "learning_rate": 3.114864864864865e-05, "loss": 0.8767, "step": 7100 }, { "epoch": 76.59, "learning_rate": 2.97972972972973e-05, "loss": 0.8655, "step": 7200 }, { "epoch": 77.66, "learning_rate": 2.8445945945945946e-05, "loss": 0.8672, "step": 7300 }, { "epoch": 78.72, "learning_rate": 2.7094594594594598e-05, "loss": 0.8625, "step": 7400 }, { "epoch": 79.78, "learning_rate": 2.5743243243243243e-05, "loss": 0.8602, "step": 7500 }, { "epoch": 79.78, "eval_loss": 0.20880180597305298, "eval_runtime": 135.4639, "eval_samples_per_second": 20.256, "eval_steps_per_second": 2.532, "eval_wer": 0.32378380852134914, "step": 7500 }, { "epoch": 80.85, "learning_rate": 2.4391891891891895e-05, "loss": 0.8515, "step": 7600 }, { "epoch": 81.91, "learning_rate": 2.3040540540540543e-05, "loss": 0.8554, "step": 7700 }, { "epoch": 82.97, "learning_rate": 2.1689189189189192e-05, "loss": 0.8563, "step": 7800 }, { "epoch": 84.04, "learning_rate": 2.033783783783784e-05, "loss": 0.8655, "step": 7900 }, { "epoch": 85.11, "learning_rate": 1.898648648648649e-05, "loss": 0.8442, "step": 8000 }, { "epoch": 85.11, "eval_loss": 0.2045026570558548, "eval_runtime": 138.8412, "eval_samples_per_second": 19.764, "eval_steps_per_second": 2.47, "eval_wer": 0.32657544277149786, "step": 8000 }, { "epoch": 86.17, "learning_rate": 1.7635135135135137e-05, "loss": 0.8323, "step": 8100 }, { "epoch": 87.23, "learning_rate": 1.6283783783783786e-05, "loss": 0.8384, "step": 8200 }, { "epoch": 88.3, "learning_rate": 1.4932432432432433e-05, "loss": 0.8391, "step": 8300 }, { "epoch": 89.36, "learning_rate": 1.3581081081081081e-05, "loss": 0.8292, "step": 8400 }, { "epoch": 90.42, "learning_rate": 1.222972972972973e-05, "loss": 0.8335, "step": 8500 }, { "epoch": 90.42, "eval_loss": 0.20376762747764587, "eval_runtime": 136.8377, "eval_samples_per_second": 20.053, "eval_steps_per_second": 2.507, "eval_wer": 0.3240583954967736, "step": 8500 }, { "epoch": 91.49, "learning_rate": 1.0878378378378378e-05, "loss": 0.8314, "step": 8600 }, { "epoch": 92.55, "learning_rate": 9.527027027027027e-06, "loss": 0.8254, "step": 8700 }, { "epoch": 93.61, "learning_rate": 8.175675675675675e-06, "loss": 0.8231, "step": 8800 }, { "epoch": 94.68, "learning_rate": 6.8243243243243244e-06, "loss": 0.8164, "step": 8900 }, { "epoch": 95.74, "learning_rate": 5.472972972972974e-06, "loss": 0.8288, "step": 9000 }, { "epoch": 95.74, "eval_loss": 0.20242640376091003, "eval_runtime": 135.0305, "eval_samples_per_second": 20.321, "eval_steps_per_second": 2.54, "eval_wer": 0.32799414214452427, "step": 9000 }, { "epoch": 96.8, "learning_rate": 4.121621621621622e-06, "loss": 0.816, "step": 9100 }, { "epoch": 97.87, "learning_rate": 2.7702702702702708e-06, "loss": 0.8163, "step": 9200 }, { "epoch": 98.93, "learning_rate": 1.418918918918919e-06, "loss": 0.8126, "step": 9300 }, { "epoch": 99.99, "learning_rate": 6.756756756756757e-08, "loss": 0.8084, "step": 9400 }, { "epoch": 99.99, "step": 9400, "total_flos": 1.0839754269306731e+20, "train_loss": 1.3946523244330225, "train_runtime": 41836.5581, "train_samples_per_second": 14.423, "train_steps_per_second": 0.225 } ], "max_steps": 9400, "num_train_epochs": 100, "total_flos": 1.0839754269306731e+20, "trial_name": null, "trial_params": null }