{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3350592567023358, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.0305343511450381e-05, "loss": 13.0539, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.118320610687023e-05, "loss": 20.9649, "step": 40 }, { "epoch": 0.03, "learning_rate": 3.206106870229007e-05, "loss": 21.1001, "step": 60 }, { "epoch": 0.04, "learning_rate": 4.3511450381679383e-05, "loss": 5.5852, "step": 80 }, { "epoch": 0.05, "learning_rate": 5.438931297709923e-05, "loss": 5.2002, "step": 100 }, { "epoch": 0.05, "eval_loss": 3.5950469970703125, "eval_runtime": 194.8685, "eval_samples_per_second": 24.853, "eval_steps_per_second": 0.78, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.06, "learning_rate": 6.583969465648854e-05, "loss": 3.281, "step": 120 }, { "epoch": 0.06, "learning_rate": 7.495301487862176e-05, "loss": 3.3442, "step": 140 }, { "epoch": 0.07, "learning_rate": 7.472983555207517e-05, "loss": 3.5229, "step": 160 }, { "epoch": 0.08, "learning_rate": 7.449490994518402e-05, "loss": 3.089, "step": 180 }, { "epoch": 0.09, "learning_rate": 7.427173061863743e-05, "loss": 3.482, "step": 200 }, { "epoch": 0.09, "eval_loss": 3.274627923965454, "eval_runtime": 194.5649, "eval_samples_per_second": 24.891, "eval_steps_per_second": 0.781, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.1, "learning_rate": 7.403680501174628e-05, "loss": 3.1517, "step": 220 }, { "epoch": 0.11, "learning_rate": 7.380187940485512e-05, "loss": 3.1617, "step": 240 }, { "epoch": 0.12, "learning_rate": 7.357870007830852e-05, "loss": 3.2971, "step": 260 }, { "epoch": 0.13, "learning_rate": 7.334377447141738e-05, "loss": 3.022, "step": 280 }, { "epoch": 0.14, "learning_rate": 7.312059514487078e-05, "loss": 3.3322, "step": 300 }, { "epoch": 0.14, "eval_loss": 3.0716073513031006, "eval_runtime": 196.1674, "eval_samples_per_second": 24.688, "eval_steps_per_second": 0.775, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.15, "learning_rate": 7.288566953797964e-05, "loss": 2.9964, "step": 320 }, { "epoch": 0.16, "learning_rate": 7.265074393108849e-05, "loss": 3.0777, "step": 340 }, { "epoch": 0.17, "learning_rate": 7.242756460454189e-05, "loss": 3.2703, "step": 360 }, { "epoch": 0.17, "learning_rate": 7.219263899765073e-05, "loss": 3.1161, "step": 380 }, { "epoch": 0.18, "learning_rate": 7.195771339075958e-05, "loss": 3.2435, "step": 400 }, { "epoch": 0.18, "eval_loss": 3.063333034515381, "eval_runtime": 190.5938, "eval_samples_per_second": 25.41, "eval_steps_per_second": 0.798, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.19, "learning_rate": 7.172278778386843e-05, "loss": 2.9993, "step": 420 }, { "epoch": 0.2, "learning_rate": 7.148786217697727e-05, "loss": 3.0555, "step": 440 }, { "epoch": 0.21, "learning_rate": 7.126468285043069e-05, "loss": 3.1879, "step": 460 }, { "epoch": 0.22, "learning_rate": 7.102975724353953e-05, "loss": 2.9861, "step": 480 }, { "epoch": 0.23, "learning_rate": 7.080657791699295e-05, "loss": 3.0833, "step": 500 }, { "epoch": 0.23, "eval_loss": 2.9769718647003174, "eval_runtime": 195.2639, "eval_samples_per_second": 24.802, "eval_steps_per_second": 0.778, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.24, "learning_rate": 7.05716523101018e-05, "loss": 3.1608, "step": 520 }, { "epoch": 0.25, "learning_rate": 7.033672670321064e-05, "loss": 3.0206, "step": 540 }, { "epoch": 0.26, "learning_rate": 7.01018010963195e-05, "loss": 3.0523, "step": 560 }, { "epoch": 0.27, "learning_rate": 6.986687548942835e-05, "loss": 3.0222, "step": 580 }, { "epoch": 0.28, "learning_rate": 6.964369616288175e-05, "loss": 3.0913, "step": 600 }, { "epoch": 0.28, "eval_loss": 3.052091598510742, "eval_runtime": 189.8149, "eval_samples_per_second": 25.514, "eval_steps_per_second": 0.801, "eval_wer": 1.0, "step": 600 }, { "epoch": 0.29, "learning_rate": 6.94087705559906e-05, "loss": 3.1987, "step": 620 }, { "epoch": 0.29, "learning_rate": 6.917384494909944e-05, "loss": 3.1504, "step": 640 }, { "epoch": 0.3, "learning_rate": 6.893891934220829e-05, "loss": 3.2406, "step": 660 }, { "epoch": 0.31, "learning_rate": 6.870399373531715e-05, "loss": 3.0632, "step": 680 }, { "epoch": 0.32, "learning_rate": 6.848081440877055e-05, "loss": 3.1487, "step": 700 }, { "epoch": 0.32, "eval_loss": 3.1951305866241455, "eval_runtime": 196.8636, "eval_samples_per_second": 24.601, "eval_steps_per_second": 0.772, "eval_wer": 1.0, "step": 700 }, { "epoch": 0.33, "learning_rate": 6.824588880187941e-05, "loss": 3.298, "step": 720 }, { "epoch": 0.34, "learning_rate": 6.801096319498825e-05, "loss": 3.1669, "step": 740 }, { "epoch": 0.35, "learning_rate": 6.778778386844165e-05, "loss": 3.2651, "step": 760 }, { "epoch": 0.36, "learning_rate": 6.75528582615505e-05, "loss": 3.1562, "step": 780 }, { "epoch": 0.37, "learning_rate": 6.732967893500391e-05, "loss": 3.3254, "step": 800 }, { "epoch": 0.37, "eval_loss": 3.0070509910583496, "eval_runtime": 192.0978, "eval_samples_per_second": 25.211, "eval_steps_per_second": 0.791, "eval_wer": 1.0, "step": 800 }, { "epoch": 0.38, "learning_rate": 6.709475332811276e-05, "loss": 3.0487, "step": 820 }, { "epoch": 0.39, "learning_rate": 6.68598277212216e-05, "loss": 3.1936, "step": 840 }, { "epoch": 0.4, "learning_rate": 6.663664839467502e-05, "loss": 3.323, "step": 860 }, { "epoch": 0.41, "learning_rate": 6.640172278778387e-05, "loss": 2.998, "step": 880 }, { "epoch": 0.41, "learning_rate": 6.617854346123726e-05, "loss": 3.4698, "step": 900 }, { "epoch": 0.41, "eval_loss": 2.995044469833374, "eval_runtime": 195.1464, "eval_samples_per_second": 24.817, "eval_steps_per_second": 0.779, "eval_wer": 1.0, "step": 900 }, { "epoch": 0.42, "learning_rate": 6.594361785434611e-05, "loss": 3.0048, "step": 920 }, { "epoch": 0.43, "learning_rate": 6.570869224745496e-05, "loss": 3.1447, "step": 940 }, { "epoch": 0.44, "learning_rate": 6.548551292090837e-05, "loss": 3.3347, "step": 960 }, { "epoch": 0.45, "learning_rate": 6.525058731401722e-05, "loss": 2.9795, "step": 980 }, { "epoch": 0.46, "learning_rate": 6.503915426781518e-05, "loss": 3.5905, "step": 1000 }, { "epoch": 0.46, "eval_loss": 2.9961767196655273, "eval_runtime": 188.3505, "eval_samples_per_second": 25.713, "eval_steps_per_second": 0.807, "eval_wer": 1.0, "step": 1000 }, { "epoch": 0.47, "learning_rate": 6.480422866092403e-05, "loss": 2.9884, "step": 1020 }, { "epoch": 0.48, "learning_rate": 6.456930305403288e-05, "loss": 3.175, "step": 1040 }, { "epoch": 0.49, "learning_rate": 6.434612372748629e-05, "loss": 3.3163, "step": 1060 }, { "epoch": 0.5, "learning_rate": 6.411119812059513e-05, "loss": 3.0063, "step": 1080 }, { "epoch": 0.51, "learning_rate": 6.387627251370398e-05, "loss": 3.4965, "step": 1100 }, { "epoch": 0.51, "eval_loss": 3.0483579635620117, "eval_runtime": 193.4167, "eval_samples_per_second": 25.039, "eval_steps_per_second": 0.786, "eval_wer": 1.0, "step": 1100 }, { "epoch": 0.52, "learning_rate": 6.364134690681284e-05, "loss": 2.9967, "step": 1120 }, { "epoch": 0.52, "learning_rate": 6.340642129992169e-05, "loss": 3.2329, "step": 1140 }, { "epoch": 0.53, "learning_rate": 6.31832419733751e-05, "loss": 3.3814, "step": 1160 }, { "epoch": 0.54, "learning_rate": 6.294831636648395e-05, "loss": 3.0108, "step": 1180 }, { "epoch": 0.55, "learning_rate": 6.272513703993735e-05, "loss": 3.6227, "step": 1200 }, { "epoch": 0.55, "eval_loss": 3.0750181674957275, "eval_runtime": 193.4538, "eval_samples_per_second": 25.034, "eval_steps_per_second": 0.786, "eval_wer": 1.0, "step": 1200 }, { "epoch": 0.56, "learning_rate": 6.24902114330462e-05, "loss": 2.9917, "step": 1220 }, { "epoch": 0.57, "learning_rate": 6.225528582615504e-05, "loss": 3.1386, "step": 1240 }, { "epoch": 0.58, "learning_rate": 6.203210649960845e-05, "loss": 3.3022, "step": 1260 }, { "epoch": 0.59, "learning_rate": 6.17971808927173e-05, "loss": 3.0139, "step": 1280 }, { "epoch": 0.6, "learning_rate": 6.157400156617071e-05, "loss": 3.2921, "step": 1300 }, { "epoch": 0.6, "eval_loss": 3.019115924835205, "eval_runtime": 194.2188, "eval_samples_per_second": 24.936, "eval_steps_per_second": 0.783, "eval_wer": 1.0, "step": 1300 }, { "epoch": 0.61, "learning_rate": 6.133907595927956e-05, "loss": 3.0432, "step": 1320 }, { "epoch": 0.62, "learning_rate": 6.11041503523884e-05, "loss": 3.0951, "step": 1340 }, { "epoch": 0.63, "learning_rate": 6.088097102584181e-05, "loss": 3.189, "step": 1360 }, { "epoch": 0.64, "learning_rate": 6.064604541895066e-05, "loss": 3.1296, "step": 1380 }, { "epoch": 0.64, "learning_rate": 6.0422866092404065e-05, "loss": 3.2907, "step": 1400 }, { "epoch": 0.64, "eval_loss": 3.06713604927063, "eval_runtime": 192.5311, "eval_samples_per_second": 25.154, "eval_steps_per_second": 0.789, "eval_wer": 1.0, "step": 1400 }, { "epoch": 0.65, "learning_rate": 6.018794048551292e-05, "loss": 3.1849, "step": 1420 }, { "epoch": 0.66, "learning_rate": 5.9953014878621765e-05, "loss": 3.1682, "step": 1440 }, { "epoch": 0.67, "learning_rate": 5.9729835552075165e-05, "loss": 3.2002, "step": 1460 }, { "epoch": 0.68, "learning_rate": 5.949490994518402e-05, "loss": 3.2207, "step": 1480 }, { "epoch": 0.69, "learning_rate": 5.9271730618637424e-05, "loss": 3.1716, "step": 1500 }, { "epoch": 0.69, "eval_loss": 3.093482732772827, "eval_runtime": 193.22, "eval_samples_per_second": 25.065, "eval_steps_per_second": 0.787, "eval_wer": 1.0, "step": 1500 }, { "epoch": 0.7, "learning_rate": 5.903680501174628e-05, "loss": 3.2105, "step": 1520 }, { "epoch": 0.71, "learning_rate": 5.8801879404855124e-05, "loss": 3.173, "step": 1540 }, { "epoch": 0.72, "learning_rate": 5.857870007830853e-05, "loss": 3.2066, "step": 1560 }, { "epoch": 0.73, "learning_rate": 5.8343774471417384e-05, "loss": 3.2196, "step": 1580 }, { "epoch": 0.74, "learning_rate": 5.812059514487078e-05, "loss": 3.1788, "step": 1600 }, { "epoch": 0.74, "eval_loss": 3.092198133468628, "eval_runtime": 194.0448, "eval_samples_per_second": 24.958, "eval_steps_per_second": 0.783, "eval_wer": 1.0, "step": 1600 }, { "epoch": 0.75, "learning_rate": 5.788566953797963e-05, "loss": 3.2075, "step": 1620 }, { "epoch": 0.75, "learning_rate": 5.765074393108848e-05, "loss": 3.1772, "step": 1640 }, { "epoch": 0.76, "learning_rate": 5.742756460454189e-05, "loss": 3.2101, "step": 1660 }, { "epoch": 0.77, "learning_rate": 5.7192638997650736e-05, "loss": 3.2115, "step": 1680 }, { "epoch": 0.78, "learning_rate": 5.69812059514487e-05, "loss": 3.2986, "step": 1700 }, { "epoch": 0.78, "eval_loss": 3.092197895050049, "eval_runtime": 194.3305, "eval_samples_per_second": 24.921, "eval_steps_per_second": 0.782, "eval_wer": 1.0, "step": 1700 }, { "epoch": 0.79, "learning_rate": 5.674628034455755e-05, "loss": 3.2063, "step": 1720 }, { "epoch": 0.8, "learning_rate": 5.65113547376664e-05, "loss": 3.1723, "step": 1740 }, { "epoch": 0.81, "learning_rate": 5.627642913077525e-05, "loss": 3.0945, "step": 1760 }, { "epoch": 0.82, "learning_rate": 5.6041503523884095e-05, "loss": 3.2153, "step": 1780 }, { "epoch": 0.83, "learning_rate": 5.580657791699295e-05, "loss": 3.0761, "step": 1800 }, { "epoch": 0.83, "eval_loss": 3.092197895050049, "eval_runtime": 192.7837, "eval_samples_per_second": 25.121, "eval_steps_per_second": 0.788, "eval_wer": 1.0, "step": 1800 }, { "epoch": 0.84, "learning_rate": 5.5571652310101795e-05, "loss": 3.2061, "step": 1820 }, { "epoch": 0.85, "learning_rate": 5.533672670321064e-05, "loss": 3.1768, "step": 1840 }, { "epoch": 0.86, "learning_rate": 5.5101801096319495e-05, "loss": 3.096, "step": 1860 }, { "epoch": 0.87, "learning_rate": 5.486687548942834e-05, "loss": 3.213, "step": 1880 }, { "epoch": 0.87, "learning_rate": 5.464369616288175e-05, "loss": 3.1832, "step": 1900 }, { "epoch": 0.87, "eval_loss": 3.092197895050049, "eval_runtime": 196.1769, "eval_samples_per_second": 24.687, "eval_steps_per_second": 0.775, "eval_wer": 1.0, "step": 1900 }, { "epoch": 0.88, "learning_rate": 5.44087705559906e-05, "loss": 3.2046, "step": 1920 }, { "epoch": 0.89, "learning_rate": 5.417384494909945e-05, "loss": 3.1743, "step": 1940 }, { "epoch": 0.9, "learning_rate": 5.395066562255285e-05, "loss": 3.2055, "step": 1960 }, { "epoch": 0.91, "learning_rate": 5.37157400156617e-05, "loss": 3.2152, "step": 1980 }, { "epoch": 0.92, "learning_rate": 5.349256068911511e-05, "loss": 3.1845, "step": 2000 }, { "epoch": 0.92, "eval_loss": 3.092197895050049, "eval_runtime": 195.9315, "eval_samples_per_second": 24.718, "eval_steps_per_second": 0.776, "eval_wer": 1.0, "step": 2000 }, { "epoch": 0.93, "learning_rate": 5.3257635082223954e-05, "loss": 3.2093, "step": 2020 }, { "epoch": 0.94, "learning_rate": 5.302270947533281e-05, "loss": 3.1748, "step": 2040 }, { "epoch": 0.95, "learning_rate": 5.279953014878621e-05, "loss": 3.199, "step": 2060 }, { "epoch": 0.96, "learning_rate": 5.2564604541895067e-05, "loss": 3.2169, "step": 2080 }, { "epoch": 0.97, "learning_rate": 5.2341425215348466e-05, "loss": 3.1817, "step": 2100 }, { "epoch": 0.97, "eval_loss": 3.092197895050049, "eval_runtime": 193.8566, "eval_samples_per_second": 24.982, "eval_steps_per_second": 0.784, "eval_wer": 1.0, "step": 2100 }, { "epoch": 0.98, "learning_rate": 5.210649960845731e-05, "loss": 3.2075, "step": 2120 }, { "epoch": 0.98, "learning_rate": 5.1871574001566166e-05, "loss": 3.1693, "step": 2140 }, { "epoch": 0.99, "learning_rate": 5.164839467501957e-05, "loss": 3.2049, "step": 2160 }, { "epoch": 1.0, "learning_rate": 5.1425215348472984e-05, "loss": 3.3657, "step": 2180 }, { "epoch": 1.01, "learning_rate": 5.119028974158183e-05, "loss": 3.2209, "step": 2200 }, { "epoch": 1.01, "eval_loss": 3.092197895050049, "eval_runtime": 194.751, "eval_samples_per_second": 24.868, "eval_steps_per_second": 0.78, "eval_wer": 1.0, "step": 2200 }, { "epoch": 1.02, "learning_rate": 5.095536413469068e-05, "loss": 3.0961, "step": 2220 }, { "epoch": 1.03, "learning_rate": 5.073218480814408e-05, "loss": 3.2735, "step": 2240 }, { "epoch": 1.04, "learning_rate": 5.049725920125293e-05, "loss": 3.1904, "step": 2260 }, { "epoch": 1.05, "learning_rate": 5.0274079874706336e-05, "loss": 3.1892, "step": 2280 }, { "epoch": 1.06, "learning_rate": 5.003915426781519e-05, "loss": 3.2138, "step": 2300 }, { "epoch": 1.06, "eval_loss": 3.092197895050049, "eval_runtime": 195.285, "eval_samples_per_second": 24.8, "eval_steps_per_second": 0.778, "eval_wer": 1.0, "step": 2300 }, { "epoch": 1.07, "learning_rate": 4.980422866092404e-05, "loss": 3.0819, "step": 2320 }, { "epoch": 1.08, "learning_rate": 4.958104933437744e-05, "loss": 3.272, "step": 2340 }, { "epoch": 1.09, "learning_rate": 4.9346123727486296e-05, "loss": 3.2011, "step": 2360 }, { "epoch": 1.1, "learning_rate": 4.9122944400939695e-05, "loss": 3.1918, "step": 2380 }, { "epoch": 1.1, "learning_rate": 4.888801879404854e-05, "loss": 3.2154, "step": 2400 }, { "epoch": 1.1, "eval_loss": 3.092197895050049, "eval_runtime": 196.4218, "eval_samples_per_second": 24.656, "eval_steps_per_second": 0.774, "eval_wer": 1.0, "step": 2400 }, { "epoch": 1.11, "learning_rate": 4.8653093187157396e-05, "loss": 3.0946, "step": 2420 }, { "epoch": 1.12, "learning_rate": 4.84299138606108e-05, "loss": 3.2753, "step": 2440 }, { "epoch": 1.13, "learning_rate": 4.8194988253719655e-05, "loss": 3.1829, "step": 2460 }, { "epoch": 1.14, "learning_rate": 4.797180892717306e-05, "loss": 3.1839, "step": 2480 }, { "epoch": 1.15, "learning_rate": 4.773688332028191e-05, "loss": 3.2185, "step": 2500 }, { "epoch": 1.15, "eval_loss": 3.092197895050049, "eval_runtime": 192.8929, "eval_samples_per_second": 25.107, "eval_steps_per_second": 0.788, "eval_wer": 1.0, "step": 2500 }, { "epoch": 1.16, "learning_rate": 4.750195771339076e-05, "loss": 3.0971, "step": 2520 }, { "epoch": 1.17, "learning_rate": 4.727877838684416e-05, "loss": 3.2719, "step": 2540 }, { "epoch": 1.18, "learning_rate": 4.704385277995301e-05, "loss": 3.1974, "step": 2560 }, { "epoch": 1.19, "learning_rate": 4.680892717306186e-05, "loss": 3.0775, "step": 2580 }, { "epoch": 1.2, "learning_rate": 4.657400156617071e-05, "loss": 3.2201, "step": 2600 }, { "epoch": 1.2, "eval_loss": 3.092197895050049, "eval_runtime": 192.9488, "eval_samples_per_second": 25.1, "eval_steps_per_second": 0.788, "eval_wer": 1.0, "step": 2600 }, { "epoch": 1.21, "learning_rate": 4.6339075959279554e-05, "loss": 3.0946, "step": 2620 }, { "epoch": 1.22, "learning_rate": 4.611589663273297e-05, "loss": 3.2708, "step": 2640 }, { "epoch": 1.22, "learning_rate": 4.5880971025841814e-05, "loss": 3.1949, "step": 2660 }, { "epoch": 1.23, "learning_rate": 4.564604541895066e-05, "loss": 3.0763, "step": 2680 }, { "epoch": 1.24, "learning_rate": 4.5411119812059514e-05, "loss": 3.2184, "step": 2700 }, { "epoch": 1.24, "eval_loss": 3.092197895050049, "eval_runtime": 189.7646, "eval_samples_per_second": 25.521, "eval_steps_per_second": 0.801, "eval_wer": 1.0, "step": 2700 }, { "epoch": 1.25, "learning_rate": 4.517619420516836e-05, "loss": 3.0962, "step": 2720 }, { "epoch": 1.26, "learning_rate": 4.495301487862176e-05, "loss": 3.2795, "step": 2740 }, { "epoch": 1.27, "learning_rate": 4.4718089271730614e-05, "loss": 3.1901, "step": 2760 }, { "epoch": 1.28, "learning_rate": 4.449490994518402e-05, "loss": 3.1902, "step": 2780 }, { "epoch": 1.29, "learning_rate": 4.425998433829287e-05, "loss": 3.2196, "step": 2800 }, { "epoch": 1.29, "eval_loss": 3.092197895050049, "eval_runtime": 192.1661, "eval_samples_per_second": 25.202, "eval_steps_per_second": 0.791, "eval_wer": 1.0, "step": 2800 }, { "epoch": 1.3, "learning_rate": 4.402505873140172e-05, "loss": 3.0923, "step": 2820 }, { "epoch": 1.31, "learning_rate": 4.3801879404855126e-05, "loss": 3.2783, "step": 2840 }, { "epoch": 1.32, "learning_rate": 4.356695379796398e-05, "loss": 3.195, "step": 2860 }, { "epoch": 1.33, "learning_rate": 4.335552075176194e-05, "loss": 3.3041, "step": 2880 }, { "epoch": 1.34, "learning_rate": 4.3120595144870784e-05, "loss": 3.2158, "step": 2900 }, { "epoch": 1.34, "eval_loss": 3.092197895050049, "eval_runtime": 194.7934, "eval_samples_per_second": 24.862, "eval_steps_per_second": 0.78, "eval_wer": 1.0, "step": 2900 } ], "max_steps": 6516, "num_train_epochs": 3, "total_flos": 5.539671173723811e+19, "trial_name": null, "trial_params": null }