{ "best_metric": 0.22902172803878784, "best_model_checkpoint": "./checkpoint-11500", "epoch": 15.0, "global_step": 11670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 1.2933333333333334e-05, "loss": 14.1317, "step": 100 }, { "epoch": 0.26, "learning_rate": 2.6266666666666667e-05, "loss": 5.2635, "step": 200 }, { "epoch": 0.39, "learning_rate": 3.960000000000001e-05, "loss": 3.8268, "step": 300 }, { "epoch": 0.51, "learning_rate": 5.293333333333334e-05, "loss": 3.2266, "step": 400 }, { "epoch": 0.64, "learning_rate": 6.626666666666666e-05, "loss": 3.0952, "step": 500 }, { "epoch": 0.64, "eval_loss": 3.0981762409210205, "eval_runtime": 326.629, "eval_samples_per_second": 25.414, "eval_steps_per_second": 0.796, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.77, "learning_rate": 7.960000000000001e-05, "loss": 3.0583, "step": 600 }, { "epoch": 0.9, "learning_rate": 9.293333333333333e-05, "loss": 3.0349, "step": 700 }, { "epoch": 1.03, "learning_rate": 0.00010626666666666667, "loss": 2.9355, "step": 800 }, { "epoch": 1.16, "learning_rate": 0.00011960000000000001, "loss": 2.3372, "step": 900 }, { "epoch": 1.29, "learning_rate": 0.00013293333333333333, "loss": 1.7975, "step": 1000 }, { "epoch": 1.29, "eval_loss": 0.7887413501739502, "eval_runtime": 341.1915, "eval_samples_per_second": 24.329, "eval_steps_per_second": 0.762, "eval_wer": 0.5651080072872386, "step": 1000 }, { "epoch": 1.41, "learning_rate": 0.00014626666666666665, "loss": 1.6159, "step": 1100 }, { "epoch": 1.54, "learning_rate": 0.0001596, "loss": 1.5287, "step": 1200 }, { "epoch": 1.67, "learning_rate": 0.00017293333333333335, "loss": 1.4876, "step": 1300 }, { "epoch": 1.8, "learning_rate": 0.00018626666666666668, "loss": 1.4606, "step": 1400 }, { "epoch": 1.93, "learning_rate": 0.0001996, "loss": 1.4138, "step": 1500 }, { "epoch": 1.93, "eval_loss": 0.523814857006073, "eval_runtime": 341.7084, "eval_samples_per_second": 24.293, "eval_steps_per_second": 0.761, "eval_wer": 0.43889997397414765, "step": 1500 }, { "epoch": 2.06, "learning_rate": 0.00019809242871189774, "loss": 1.4258, "step": 1600 }, { "epoch": 2.19, "learning_rate": 0.000196125860373648, "loss": 1.3778, "step": 1700 }, { "epoch": 2.31, "learning_rate": 0.00019415929203539823, "loss": 1.3518, "step": 1800 }, { "epoch": 2.44, "learning_rate": 0.00019219272369714848, "loss": 1.3692, "step": 1900 }, { "epoch": 2.57, "learning_rate": 0.00019022615535889875, "loss": 1.344, "step": 2000 }, { "epoch": 2.57, "eval_loss": 0.4774917662143707, "eval_runtime": 337.4262, "eval_samples_per_second": 24.601, "eval_steps_per_second": 0.771, "eval_wer": 0.4318209421358549, "step": 2000 }, { "epoch": 2.7, "learning_rate": 0.000188259587020649, "loss": 1.3167, "step": 2100 }, { "epoch": 2.83, "learning_rate": 0.00018629301868239921, "loss": 1.3126, "step": 2200 }, { "epoch": 2.96, "learning_rate": 0.00018432645034414946, "loss": 1.3161, "step": 2300 }, { "epoch": 3.08, "learning_rate": 0.0001823598820058997, "loss": 1.2738, "step": 2400 }, { "epoch": 3.21, "learning_rate": 0.00018039331366764995, "loss": 1.2737, "step": 2500 }, { "epoch": 3.21, "eval_loss": 0.46475061774253845, "eval_runtime": 336.0925, "eval_samples_per_second": 24.699, "eval_steps_per_second": 0.774, "eval_wer": 0.4074954454758393, "step": 2500 }, { "epoch": 3.34, "learning_rate": 0.0001784267453294002, "loss": 1.262, "step": 2600 }, { "epoch": 3.47, "learning_rate": 0.00017646017699115044, "loss": 1.2496, "step": 2700 }, { "epoch": 3.6, "learning_rate": 0.0001744936086529007, "loss": 1.2612, "step": 2800 }, { "epoch": 3.73, "learning_rate": 0.00017252704031465093, "loss": 1.2197, "step": 2900 }, { "epoch": 3.86, "learning_rate": 0.0001705604719764012, "loss": 1.2554, "step": 3000 }, { "epoch": 3.86, "eval_loss": 0.4068518280982971, "eval_runtime": 331.555, "eval_samples_per_second": 25.037, "eval_steps_per_second": 0.784, "eval_wer": 0.36781469593129174, "step": 3000 }, { "epoch": 3.98, "learning_rate": 0.00016859390363815145, "loss": 1.2204, "step": 3100 }, { "epoch": 4.11, "learning_rate": 0.0001666273352999017, "loss": 1.2113, "step": 3200 }, { "epoch": 4.24, "learning_rate": 0.00016466076696165194, "loss": 1.2053, "step": 3300 }, { "epoch": 4.37, "learning_rate": 0.00016269419862340216, "loss": 1.2019, "step": 3400 }, { "epoch": 4.5, "learning_rate": 0.0001607276302851524, "loss": 1.1996, "step": 3500 }, { "epoch": 4.5, "eval_loss": 0.3914338946342468, "eval_runtime": 333.5366, "eval_samples_per_second": 24.888, "eval_steps_per_second": 0.78, "eval_wer": 0.36679101240565626, "step": 3500 }, { "epoch": 4.63, "learning_rate": 0.00015876106194690265, "loss": 1.1903, "step": 3600 }, { "epoch": 4.76, "learning_rate": 0.0001567944936086529, "loss": 1.1866, "step": 3700 }, { "epoch": 4.88, "learning_rate": 0.00015482792527040314, "loss": 1.1678, "step": 3800 }, { "epoch": 5.01, "learning_rate": 0.0001528613569321534, "loss": 1.1854, "step": 3900 }, { "epoch": 5.14, "learning_rate": 0.00015089478859390363, "loss": 1.1427, "step": 4000 }, { "epoch": 5.14, "eval_loss": 0.36938655376434326, "eval_runtime": 326.2453, "eval_samples_per_second": 25.444, "eval_steps_per_second": 0.797, "eval_wer": 0.3571614470373905, "step": 4000 }, { "epoch": 5.27, "learning_rate": 0.0001489282202556539, "loss": 1.1393, "step": 4100 }, { "epoch": 5.4, "learning_rate": 0.00014696165191740415, "loss": 1.1182, "step": 4200 }, { "epoch": 5.53, "learning_rate": 0.0001449950835791544, "loss": 1.127, "step": 4300 }, { "epoch": 5.66, "learning_rate": 0.00014302851524090464, "loss": 1.1431, "step": 4400 }, { "epoch": 5.78, "learning_rate": 0.0001410619469026549, "loss": 1.1372, "step": 4500 }, { "epoch": 5.78, "eval_loss": 0.3567572236061096, "eval_runtime": 325.458, "eval_samples_per_second": 25.506, "eval_steps_per_second": 0.799, "eval_wer": 0.3500824151990978, "step": 4500 }, { "epoch": 5.91, "learning_rate": 0.0001390953785644051, "loss": 1.1226, "step": 4600 }, { "epoch": 6.04, "learning_rate": 0.00013712881022615535, "loss": 1.1019, "step": 4700 }, { "epoch": 6.17, "learning_rate": 0.0001351622418879056, "loss": 1.1031, "step": 4800 }, { "epoch": 6.3, "learning_rate": 0.00013321533923303834, "loss": 1.0882, "step": 4900 }, { "epoch": 6.43, "learning_rate": 0.00013124877089478858, "loss": 1.0831, "step": 5000 }, { "epoch": 6.43, "eval_loss": 0.3331395387649536, "eval_runtime": 327.2346, "eval_samples_per_second": 25.367, "eval_steps_per_second": 0.795, "eval_wer": 0.3253058037650733, "step": 5000 }, { "epoch": 6.56, "learning_rate": 0.00012928220255653886, "loss": 1.1039, "step": 5100 }, { "epoch": 6.68, "learning_rate": 0.0001273156342182891, "loss": 1.0683, "step": 5200 }, { "epoch": 6.81, "learning_rate": 0.00012534906588003935, "loss": 1.0773, "step": 5300 }, { "epoch": 6.94, "learning_rate": 0.0001233824975417896, "loss": 1.1002, "step": 5400 }, { "epoch": 7.07, "learning_rate": 0.00012141592920353984, "loss": 1.1074, "step": 5500 }, { "epoch": 7.07, "eval_loss": 0.333199679851532, "eval_runtime": 329.8287, "eval_samples_per_second": 25.168, "eval_steps_per_second": 0.788, "eval_wer": 0.3352129782250369, "step": 5500 }, { "epoch": 7.2, "learning_rate": 0.00011944936086529008, "loss": 1.0341, "step": 5600 }, { "epoch": 7.33, "learning_rate": 0.00011748279252704033, "loss": 1.0812, "step": 5700 }, { "epoch": 7.46, "learning_rate": 0.00011551622418879056, "loss": 1.0709, "step": 5800 }, { "epoch": 7.58, "learning_rate": 0.00011354965585054081, "loss": 1.0843, "step": 5900 }, { "epoch": 7.71, "learning_rate": 0.00011158308751229105, "loss": 1.0536, "step": 6000 }, { "epoch": 7.71, "eval_loss": 0.3130946755409241, "eval_runtime": 327.9895, "eval_samples_per_second": 25.309, "eval_steps_per_second": 0.793, "eval_wer": 0.3151557213498742, "step": 6000 }, { "epoch": 7.84, "learning_rate": 0.0001096165191740413, "loss": 1.0239, "step": 6100 }, { "epoch": 7.97, "learning_rate": 0.00010764995083579154, "loss": 1.0383, "step": 6200 }, { "epoch": 8.1, "learning_rate": 0.00010568338249754179, "loss": 1.0157, "step": 6300 }, { "epoch": 8.23, "learning_rate": 0.00010371681415929205, "loss": 1.0128, "step": 6400 }, { "epoch": 8.35, "learning_rate": 0.0001017502458210423, "loss": 1.0248, "step": 6500 }, { "epoch": 8.35, "eval_loss": 0.30239033699035645, "eval_runtime": 328.0553, "eval_samples_per_second": 25.304, "eval_steps_per_second": 0.793, "eval_wer": 0.3022642491541598, "step": 6500 }, { "epoch": 8.48, "learning_rate": 9.978367748279254e-05, "loss": 0.9989, "step": 6600 }, { "epoch": 8.61, "learning_rate": 9.781710914454277e-05, "loss": 1.0151, "step": 6700 }, { "epoch": 8.74, "learning_rate": 9.585054080629302e-05, "loss": 0.9914, "step": 6800 }, { "epoch": 8.87, "learning_rate": 9.388397246804326e-05, "loss": 0.9893, "step": 6900 }, { "epoch": 9.0, "learning_rate": 9.193706981317602e-05, "loss": 1.0075, "step": 7000 }, { "epoch": 9.0, "eval_loss": 0.2947603166103363, "eval_runtime": 326.8764, "eval_samples_per_second": 25.395, "eval_steps_per_second": 0.795, "eval_wer": 0.3028368179057864, "step": 7000 }, { "epoch": 9.13, "learning_rate": 8.997050147492626e-05, "loss": 0.9851, "step": 7100 }, { "epoch": 9.25, "learning_rate": 8.800393313667651e-05, "loss": 0.973, "step": 7200 }, { "epoch": 9.38, "learning_rate": 8.605703048180925e-05, "loss": 0.9623, "step": 7300 }, { "epoch": 9.51, "learning_rate": 8.40904621435595e-05, "loss": 0.9598, "step": 7400 }, { "epoch": 9.64, "learning_rate": 8.212389380530974e-05, "loss": 0.979, "step": 7500 }, { "epoch": 9.64, "eval_loss": 0.27962473034858704, "eval_runtime": 329.4084, "eval_samples_per_second": 25.2, "eval_steps_per_second": 0.789, "eval_wer": 0.2852953934241346, "step": 7500 }, { "epoch": 9.77, "learning_rate": 8.015732546705999e-05, "loss": 0.9582, "step": 7600 }, { "epoch": 9.9, "learning_rate": 7.819075712881023e-05, "loss": 0.9467, "step": 7700 }, { "epoch": 10.03, "learning_rate": 7.622418879056048e-05, "loss": 0.907, "step": 7800 }, { "epoch": 10.15, "learning_rate": 7.425762045231072e-05, "loss": 0.944, "step": 7900 }, { "epoch": 10.28, "learning_rate": 7.229105211406097e-05, "loss": 0.9594, "step": 8000 }, { "epoch": 10.28, "eval_loss": 0.2719425559043884, "eval_runtime": 329.4201, "eval_samples_per_second": 25.199, "eval_steps_per_second": 0.789, "eval_wer": 0.2789103843150863, "step": 8000 }, { "epoch": 10.41, "learning_rate": 7.032448377581121e-05, "loss": 0.9559, "step": 8100 }, { "epoch": 10.54, "learning_rate": 6.835791543756146e-05, "loss": 0.9368, "step": 8200 }, { "epoch": 10.67, "learning_rate": 6.63913470993117e-05, "loss": 0.9362, "step": 8300 }, { "epoch": 10.8, "learning_rate": 6.442477876106195e-05, "loss": 0.9134, "step": 8400 }, { "epoch": 10.93, "learning_rate": 6.24582104228122e-05, "loss": 0.9172, "step": 8500 }, { "epoch": 10.93, "eval_loss": 0.2620205879211426, "eval_runtime": 325.6384, "eval_samples_per_second": 25.491, "eval_steps_per_second": 0.798, "eval_wer": 0.26950637633382496, "step": 8500 }, { "epoch": 11.05, "learning_rate": 6.049164208456244e-05, "loss": 0.9179, "step": 8600 }, { "epoch": 11.18, "learning_rate": 5.8525073746312686e-05, "loss": 0.9157, "step": 8700 }, { "epoch": 11.31, "learning_rate": 5.655850540806293e-05, "loss": 0.8974, "step": 8800 }, { "epoch": 11.44, "learning_rate": 5.459193706981318e-05, "loss": 0.8994, "step": 8900 }, { "epoch": 11.57, "learning_rate": 5.262536873156343e-05, "loss": 0.9047, "step": 9000 }, { "epoch": 11.57, "eval_loss": 0.25371646881103516, "eval_runtime": 328.7713, "eval_samples_per_second": 25.249, "eval_steps_per_second": 0.791, "eval_wer": 0.2596165524420925, "step": 9000 }, { "epoch": 11.7, "learning_rate": 5.065880039331367e-05, "loss": 0.8816, "step": 9100 }, { "epoch": 11.83, "learning_rate": 4.869223205506391e-05, "loss": 0.9007, "step": 9200 }, { "epoch": 11.95, "learning_rate": 4.672566371681416e-05, "loss": 0.9035, "step": 9300 }, { "epoch": 12.08, "learning_rate": 4.475909537856441e-05, "loss": 0.8705, "step": 9400 }, { "epoch": 12.21, "learning_rate": 4.279252704031465e-05, "loss": 0.8777, "step": 9500 }, { "epoch": 12.21, "eval_loss": 0.24379895627498627, "eval_runtime": 329.8391, "eval_samples_per_second": 25.167, "eval_steps_per_second": 0.788, "eval_wer": 0.25250281946733755, "step": 9500 }, { "epoch": 12.34, "learning_rate": 4.0825958702064895e-05, "loss": 0.8734, "step": 9600 }, { "epoch": 12.47, "learning_rate": 3.887905604719764e-05, "loss": 0.8886, "step": 9700 }, { "epoch": 12.6, "learning_rate": 3.691248770894789e-05, "loss": 0.8626, "step": 9800 }, { "epoch": 12.72, "learning_rate": 3.4945919370698134e-05, "loss": 0.8669, "step": 9900 }, { "epoch": 12.85, "learning_rate": 3.297935103244838e-05, "loss": 0.8629, "step": 10000 }, { "epoch": 12.85, "eval_loss": 0.2408979833126068, "eval_runtime": 327.8875, "eval_samples_per_second": 25.317, "eval_steps_per_second": 0.793, "eval_wer": 0.24934501604927561, "step": 10000 }, { "epoch": 12.98, "learning_rate": 3.1012782694198625e-05, "loss": 0.8489, "step": 10100 }, { "epoch": 13.11, "learning_rate": 2.904621435594887e-05, "loss": 0.8356, "step": 10200 }, { "epoch": 13.24, "learning_rate": 2.7079646017699116e-05, "loss": 0.8596, "step": 10300 }, { "epoch": 13.37, "learning_rate": 2.5113077679449358e-05, "loss": 0.8401, "step": 10400 }, { "epoch": 13.5, "learning_rate": 2.3146509341199607e-05, "loss": 0.8575, "step": 10500 }, { "epoch": 13.5, "eval_loss": 0.2366442084312439, "eval_runtime": 327.7324, "eval_samples_per_second": 25.329, "eval_steps_per_second": 0.793, "eval_wer": 0.24396633989763164, "step": 10500 }, { "epoch": 13.62, "learning_rate": 2.1179941002949856e-05, "loss": 0.8343, "step": 10600 }, { "epoch": 13.75, "learning_rate": 1.9213372664700098e-05, "loss": 0.8308, "step": 10700 }, { "epoch": 13.88, "learning_rate": 1.7246804326450343e-05, "loss": 0.8431, "step": 10800 }, { "epoch": 14.01, "learning_rate": 1.5280235988200592e-05, "loss": 0.8468, "step": 10900 }, { "epoch": 14.14, "learning_rate": 1.3313667649950836e-05, "loss": 0.8361, "step": 11000 }, { "epoch": 14.14, "eval_loss": 0.23166431486606598, "eval_runtime": 329.2059, "eval_samples_per_second": 25.215, "eval_steps_per_second": 0.79, "eval_wer": 0.23848356033660104, "step": 11000 }, { "epoch": 14.27, "learning_rate": 1.1347099311701081e-05, "loss": 0.824, "step": 11100 }, { "epoch": 14.4, "learning_rate": 9.380530973451327e-06, "loss": 0.8252, "step": 11200 }, { "epoch": 14.52, "learning_rate": 7.4336283185840714e-06, "loss": 0.8286, "step": 11300 }, { "epoch": 14.65, "learning_rate": 5.467059980334317e-06, "loss": 0.8077, "step": 11400 }, { "epoch": 14.78, "learning_rate": 3.500491642084563e-06, "loss": 0.8126, "step": 11500 }, { "epoch": 14.78, "eval_loss": 0.22902172803878784, "eval_runtime": 327.6124, "eval_samples_per_second": 25.338, "eval_steps_per_second": 0.794, "eval_wer": 0.23818860067667216, "step": 11500 }, { "epoch": 14.91, "learning_rate": 1.5339233038348083e-06, "loss": 0.8118, "step": 11600 }, { "epoch": 15.0, "step": 11670, "total_flos": 4.014305196020058e+19, "train_loss": 1.3307904394651542, "train_runtime": 23640.5221, "train_samples_per_second": 15.785, "train_steps_per_second": 0.494 } ], "max_steps": 11670, "num_train_epochs": 15, "total_flos": 4.014305196020058e+19, "trial_name": null, "trial_params": null }