{ "best_metric": null, "best_model_checkpoint": null, "epoch": 76.49968454258675, "global_step": 40392, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 0.0002971500949968334, "loss": 28.5848, "step": 400 }, { "epoch": 0.5, "eval_cer": 1.0, "eval_loss": 6.502501964569092, "eval_runtime": 11.659, "eval_samples_per_second": 44.344, "eval_steps_per_second": 5.575, "step": 400 }, { "epoch": 1.01, "learning_rate": 0.000293350221659278, "loss": 6.3672, "step": 800 }, { "epoch": 1.01, "eval_cer": 1.0, "eval_loss": 6.260684490203857, "eval_runtime": 10.5591, "eval_samples_per_second": 48.963, "eval_steps_per_second": 6.156, "step": 800 }, { "epoch": 1.51, "learning_rate": 0.0002895503483217226, "loss": 6.2055, "step": 1200 }, { "epoch": 1.51, "eval_cer": 1.0, "eval_loss": 6.158237934112549, "eval_runtime": 10.6391, "eval_samples_per_second": 48.594, "eval_steps_per_second": 6.11, "step": 1200 }, { "epoch": 2.02, "learning_rate": 0.00028575047498416716, "loss": 5.7804, "step": 1600 }, { "epoch": 2.02, "eval_cer": 0.9059449866903283, "eval_loss": 4.536555767059326, "eval_runtime": 10.5927, "eval_samples_per_second": 48.807, "eval_steps_per_second": 6.136, "step": 1600 }, { "epoch": 2.52, "learning_rate": 0.00028195060164661176, "loss": 3.9161, "step": 2000 }, { "epoch": 2.52, "eval_cer": 0.8118899733806566, "eval_loss": 3.134929895401001, "eval_runtime": 10.6118, "eval_samples_per_second": 48.72, "eval_steps_per_second": 6.125, "step": 2000 }, { "epoch": 3.03, "learning_rate": 0.00027815072830905636, "loss": 3.0555, "step": 2400 }, { "epoch": 3.03, "eval_cer": 0.7320319432120674, "eval_loss": 2.483675241470337, "eval_runtime": 10.6525, "eval_samples_per_second": 48.533, "eval_steps_per_second": 6.102, "step": 2400 }, { "epoch": 3.54, "learning_rate": 0.00027435085497150096, "loss": 2.4529, "step": 2800 }, { "epoch": 3.54, "eval_cer": 0.6789707187222715, "eval_loss": 2.2075281143188477, "eval_runtime": 10.7002, "eval_samples_per_second": 48.317, "eval_steps_per_second": 6.075, "step": 2800 }, { "epoch": 4.04, "learning_rate": 0.0002705509816339455, "loss": 2.1371, "step": 3200 }, { "epoch": 4.04, "eval_cer": 0.5902395740905058, "eval_loss": 1.737073302268982, "eval_runtime": 10.7189, "eval_samples_per_second": 48.232, "eval_steps_per_second": 6.064, "step": 3200 }, { "epoch": 4.55, "learning_rate": 0.0002667511082963901, "loss": 1.83, "step": 3600 }, { "epoch": 4.55, "eval_cer": 0.5771073646850045, "eval_loss": 1.6357048749923706, "eval_runtime": 10.6531, "eval_samples_per_second": 48.53, "eval_steps_per_second": 6.102, "step": 3600 }, { "epoch": 5.05, "learning_rate": 0.0002629512349588347, "loss": 1.7147, "step": 4000 }, { "epoch": 5.05, "eval_cer": 0.540550133096717, "eval_loss": 1.4679120779037476, "eval_runtime": 10.6485, "eval_samples_per_second": 48.551, "eval_steps_per_second": 6.104, "step": 4000 }, { "epoch": 5.56, "learning_rate": 0.00025915136162127925, "loss": 1.5428, "step": 4400 }, { "epoch": 5.56, "eval_cer": 0.5210292812777285, "eval_loss": 1.4024958610534668, "eval_runtime": 11.1389, "eval_samples_per_second": 46.414, "eval_steps_per_second": 5.835, "step": 4400 }, { "epoch": 6.06, "learning_rate": 0.00025535148828372385, "loss": 1.4859, "step": 4800 }, { "epoch": 6.06, "eval_cer": 0.5094942324755989, "eval_loss": 1.3681739568710327, "eval_runtime": 10.7225, "eval_samples_per_second": 48.217, "eval_steps_per_second": 6.062, "step": 4800 }, { "epoch": 6.57, "learning_rate": 0.00025155161494616845, "loss": 1.359, "step": 5200 }, { "epoch": 6.57, "eval_cer": 0.49955634427684115, "eval_loss": 1.3149375915527344, "eval_runtime": 10.7188, "eval_samples_per_second": 48.233, "eval_steps_per_second": 6.064, "step": 5200 }, { "epoch": 7.07, "learning_rate": 0.00024775174160861305, "loss": 1.3425, "step": 5600 }, { "epoch": 7.07, "eval_cer": 0.4913930789707187, "eval_loss": 1.3069249391555786, "eval_runtime": 10.672, "eval_samples_per_second": 48.444, "eval_steps_per_second": 6.091, "step": 5600 }, { "epoch": 7.58, "learning_rate": 0.00024395186827105763, "loss": 1.2121, "step": 6000 }, { "epoch": 7.58, "eval_cer": 0.49032830523513754, "eval_loss": 1.284098744392395, "eval_runtime": 10.6314, "eval_samples_per_second": 48.629, "eval_steps_per_second": 6.114, "step": 6000 }, { "epoch": 8.08, "learning_rate": 0.0002401519949335022, "loss": 1.1872, "step": 6400 }, { "epoch": 8.08, "eval_cer": 0.4727595385980479, "eval_loss": 1.2425189018249512, "eval_runtime": 10.4012, "eval_samples_per_second": 49.706, "eval_steps_per_second": 6.249, "step": 6400 }, { "epoch": 8.59, "learning_rate": 0.00023635212159594677, "loss": 1.0969, "step": 6800 }, { "epoch": 8.59, "eval_cer": 0.47346938775510206, "eval_loss": 1.2218185663223267, "eval_runtime": 10.6834, "eval_samples_per_second": 48.393, "eval_steps_per_second": 6.084, "step": 6800 }, { "epoch": 9.09, "learning_rate": 0.00023255224825839138, "loss": 1.0807, "step": 7200 }, { "epoch": 9.09, "eval_cer": 0.4603371783496007, "eval_loss": 1.2110862731933594, "eval_runtime": 10.7448, "eval_samples_per_second": 48.116, "eval_steps_per_second": 6.049, "step": 7200 }, { "epoch": 9.6, "learning_rate": 0.00022875237492083595, "loss": 0.9964, "step": 7600 }, { "epoch": 9.6, "eval_cer": 0.44986690328305234, "eval_loss": 1.1391839981079102, "eval_runtime": 10.7437, "eval_samples_per_second": 48.121, "eval_steps_per_second": 6.05, "step": 7600 }, { "epoch": 10.1, "learning_rate": 0.00022495250158328055, "loss": 0.9758, "step": 8000 }, { "epoch": 10.1, "eval_cer": 0.4433007985803017, "eval_loss": 1.115509033203125, "eval_runtime": 10.6429, "eval_samples_per_second": 48.577, "eval_steps_per_second": 6.107, "step": 8000 }, { "epoch": 10.61, "learning_rate": 0.00022115262824572512, "loss": 0.8896, "step": 8400 }, { "epoch": 10.61, "eval_cer": 0.4456078083407276, "eval_loss": 1.134329080581665, "eval_runtime": 10.6834, "eval_samples_per_second": 48.393, "eval_steps_per_second": 6.084, "step": 8400 }, { "epoch": 11.11, "learning_rate": 0.00021735275490816972, "loss": 0.869, "step": 8800 }, { "epoch": 11.11, "eval_cer": 0.4413487133984028, "eval_loss": 1.1351521015167236, "eval_runtime": 10.6859, "eval_samples_per_second": 48.381, "eval_steps_per_second": 6.083, "step": 8800 }, { "epoch": 11.62, "learning_rate": 0.0002135528815706143, "loss": 0.8204, "step": 9200 }, { "epoch": 11.62, "eval_cer": 0.4431233362910382, "eval_loss": 1.1095759868621826, "eval_runtime": 10.6541, "eval_samples_per_second": 48.526, "eval_steps_per_second": 6.101, "step": 9200 }, { "epoch": 12.12, "learning_rate": 0.00020975300823305887, "loss": 0.7935, "step": 9600 }, { "epoch": 12.12, "eval_cer": 0.4427684117125111, "eval_loss": 1.1288646459579468, "eval_runtime": 10.7946, "eval_samples_per_second": 47.894, "eval_steps_per_second": 6.022, "step": 9600 }, { "epoch": 12.63, "learning_rate": 0.00020595313489550347, "loss": 0.728, "step": 10000 }, { "epoch": 12.63, "eval_cer": 0.4321206743566992, "eval_loss": 1.086965799331665, "eval_runtime": 10.816, "eval_samples_per_second": 47.8, "eval_steps_per_second": 6.01, "step": 10000 }, { "epoch": 13.13, "learning_rate": 0.00020215326155794804, "loss": 0.7185, "step": 10400 }, { "epoch": 13.13, "eval_cer": 0.42058562555456963, "eval_loss": 1.0575684309005737, "eval_runtime": 10.6718, "eval_samples_per_second": 48.446, "eval_steps_per_second": 6.091, "step": 10400 }, { "epoch": 13.64, "learning_rate": 0.00019835338822039264, "loss": 0.6604, "step": 10800 }, { "epoch": 13.64, "eval_cer": 0.4262644188110027, "eval_loss": 1.0773364305496216, "eval_runtime": 10.6968, "eval_samples_per_second": 48.332, "eval_steps_per_second": 6.077, "step": 10800 }, { "epoch": 14.14, "learning_rate": 0.00019455351488283722, "loss": 0.6319, "step": 11200 }, { "epoch": 14.14, "eval_cer": 0.41543921916592724, "eval_loss": 1.0636992454528809, "eval_runtime": 10.6335, "eval_samples_per_second": 48.62, "eval_steps_per_second": 6.113, "step": 11200 }, { "epoch": 14.65, "learning_rate": 0.00019075364154528182, "loss": 0.5949, "step": 11600 }, { "epoch": 14.65, "eval_cer": 0.41774622892635316, "eval_loss": 1.0470980405807495, "eval_runtime": 10.6966, "eval_samples_per_second": 48.333, "eval_steps_per_second": 6.077, "step": 11600 }, { "epoch": 15.15, "learning_rate": 0.0001869537682077264, "loss": 0.5729, "step": 12000 }, { "epoch": 15.15, "eval_cer": 0.4111801242236025, "eval_loss": 1.069692850112915, "eval_runtime": 10.6729, "eval_samples_per_second": 48.44, "eval_steps_per_second": 6.09, "step": 12000 }, { "epoch": 15.66, "learning_rate": 0.000183153894870171, "loss": 0.5408, "step": 12400 }, { "epoch": 15.66, "eval_cer": 0.4157941437444543, "eval_loss": 1.042482614517212, "eval_runtime": 10.665, "eval_samples_per_second": 48.476, "eval_steps_per_second": 6.095, "step": 12400 }, { "epoch": 16.16, "learning_rate": 0.00017935402153261557, "loss": 0.5246, "step": 12800 }, { "epoch": 16.16, "eval_cer": 0.4085181898846495, "eval_loss": 1.0480538606643677, "eval_runtime": 10.7079, "eval_samples_per_second": 48.282, "eval_steps_per_second": 6.07, "step": 12800 }, { "epoch": 16.67, "learning_rate": 0.00017555414819506014, "loss": 0.4757, "step": 13200 }, { "epoch": 16.67, "eval_cer": 0.4065661047027507, "eval_loss": 1.0319401025772095, "eval_runtime": 10.661, "eval_samples_per_second": 48.494, "eval_steps_per_second": 6.097, "step": 13200 }, { "epoch": 17.17, "learning_rate": 0.00017175427485750474, "loss": 0.4694, "step": 13600 }, { "epoch": 17.17, "eval_cer": 0.402661934338953, "eval_loss": 1.0221748352050781, "eval_runtime": 10.6738, "eval_samples_per_second": 48.436, "eval_steps_per_second": 6.09, "step": 13600 }, { "epoch": 17.68, "learning_rate": 0.0001679544015199493, "loss": 0.4514, "step": 14000 }, { "epoch": 17.68, "eval_cer": 0.4010647737355812, "eval_loss": 1.0336159467697144, "eval_runtime": 10.6673, "eval_samples_per_second": 48.466, "eval_steps_per_second": 6.093, "step": 14000 }, { "epoch": 18.18, "learning_rate": 0.00016415452818239391, "loss": 0.4479, "step": 14400 }, { "epoch": 18.18, "eval_cer": 0.40301685891748, "eval_loss": 1.0329766273498535, "eval_runtime": 11.2617, "eval_samples_per_second": 45.908, "eval_steps_per_second": 5.772, "step": 14400 }, { "epoch": 18.69, "learning_rate": 0.0001603546548448385, "loss": 0.4206, "step": 14800 }, { "epoch": 18.69, "eval_cer": 0.3953859804791482, "eval_loss": 1.0453214645385742, "eval_runtime": 10.4709, "eval_samples_per_second": 49.375, "eval_steps_per_second": 6.208, "step": 14800 }, { "epoch": 19.19, "learning_rate": 0.0001565547815072831, "loss": 0.4025, "step": 15200 }, { "epoch": 19.19, "eval_cer": 0.4, "eval_loss": 1.0425928831100464, "eval_runtime": 10.7087, "eval_samples_per_second": 48.278, "eval_steps_per_second": 6.07, "step": 15200 }, { "epoch": 19.7, "learning_rate": 0.00015275490816972766, "loss": 0.368, "step": 15600 }, { "epoch": 19.7, "eval_cer": 0.391659272404614, "eval_loss": 1.0207164287567139, "eval_runtime": 10.853, "eval_samples_per_second": 47.637, "eval_steps_per_second": 5.989, "step": 15600 }, { "epoch": 20.2, "learning_rate": 0.00014895503483217226, "loss": 0.3652, "step": 16000 }, { "epoch": 20.2, "eval_cer": 0.3877551020408163, "eval_loss": 1.019087791442871, "eval_runtime": 10.7301, "eval_samples_per_second": 48.182, "eval_steps_per_second": 6.058, "step": 16000 }, { "epoch": 20.71, "learning_rate": 0.00014515516149461683, "loss": 0.3362, "step": 16400 }, { "epoch": 20.71, "eval_cer": 0.38846495119787045, "eval_loss": 1.0187304019927979, "eval_runtime": 10.6995, "eval_samples_per_second": 48.32, "eval_steps_per_second": 6.075, "step": 16400 }, { "epoch": 21.21, "learning_rate": 0.0001413552881570614, "loss": 0.354, "step": 16800 }, { "epoch": 21.21, "eval_cer": 0.3881100266193434, "eval_loss": 1.0370773077011108, "eval_runtime": 10.6833, "eval_samples_per_second": 48.393, "eval_steps_per_second": 6.084, "step": 16800 }, { "epoch": 21.72, "learning_rate": 0.000137555414819506, "loss": 0.3296, "step": 17200 }, { "epoch": 21.72, "eval_cer": 0.3893522626441881, "eval_loss": 1.0535281896591187, "eval_runtime": 10.7771, "eval_samples_per_second": 47.972, "eval_steps_per_second": 6.031, "step": 17200 }, { "epoch": 22.22, "learning_rate": 0.00013375554148195058, "loss": 0.3134, "step": 17600 }, { "epoch": 22.22, "eval_cer": 0.3877551020408163, "eval_loss": 1.0371551513671875, "eval_runtime": 10.68, "eval_samples_per_second": 48.408, "eval_steps_per_second": 6.086, "step": 17600 }, { "epoch": 22.73, "learning_rate": 0.00012995566814439518, "loss": 0.3077, "step": 18000 }, { "epoch": 22.73, "eval_cer": 0.39077196095829636, "eval_loss": 1.0353987216949463, "eval_runtime": 10.4328, "eval_samples_per_second": 49.555, "eval_steps_per_second": 6.23, "step": 18000 }, { "epoch": 23.23, "learning_rate": 0.00012615579480683976, "loss": 0.289, "step": 18400 }, { "epoch": 23.23, "eval_cer": 0.3934338952972493, "eval_loss": 1.0498236417770386, "eval_runtime": 10.6646, "eval_samples_per_second": 48.478, "eval_steps_per_second": 6.095, "step": 18400 }, { "epoch": 23.74, "learning_rate": 0.00012235592146928436, "loss": 0.2753, "step": 18800 }, { "epoch": 23.74, "eval_cer": 0.39023957409050575, "eval_loss": 1.0461602210998535, "eval_runtime": 10.685, "eval_samples_per_second": 48.386, "eval_steps_per_second": 6.083, "step": 18800 }, { "epoch": 24.24, "learning_rate": 0.00011855604813172893, "loss": 0.2791, "step": 19200 }, { "epoch": 24.24, "eval_cer": 0.38846495119787045, "eval_loss": 1.07412588596344, "eval_runtime": 10.7167, "eval_samples_per_second": 48.243, "eval_steps_per_second": 6.065, "step": 19200 }, { "epoch": 24.75, "learning_rate": 0.00011475617479417352, "loss": 0.2757, "step": 19600 }, { "epoch": 24.75, "eval_cer": 0.385980479148181, "eval_loss": 1.0546280145645142, "eval_runtime": 10.5367, "eval_samples_per_second": 49.067, "eval_steps_per_second": 6.169, "step": 19600 }, { "epoch": 25.25, "learning_rate": 0.0001109563014566181, "loss": 0.2533, "step": 20000 }, { "epoch": 25.25, "eval_cer": 0.3817213842058563, "eval_loss": 1.0429767370224, "eval_runtime": 10.7598, "eval_samples_per_second": 48.049, "eval_steps_per_second": 6.041, "step": 20000 }, { "epoch": 25.76, "learning_rate": 0.00010715642811906269, "loss": 0.2499, "step": 20400 }, { "epoch": 25.76, "eval_cer": 0.38456078083407275, "eval_loss": 1.0354866981506348, "eval_runtime": 10.6619, "eval_samples_per_second": 48.49, "eval_steps_per_second": 6.096, "step": 20400 }, { "epoch": 26.26, "learning_rate": 0.00010335655478150728, "loss": 0.2407, "step": 20800 }, { "epoch": 26.26, "eval_cer": 0.38101153504880214, "eval_loss": 1.0512378215789795, "eval_runtime": 10.667, "eval_samples_per_second": 48.467, "eval_steps_per_second": 6.094, "step": 20800 }, { "epoch": 26.77, "learning_rate": 9.955668144395185e-05, "loss": 0.2373, "step": 21200 }, { "epoch": 26.77, "eval_cer": 0.3758651286601597, "eval_loss": 1.032917857170105, "eval_runtime": 10.6927, "eval_samples_per_second": 48.351, "eval_steps_per_second": 6.079, "step": 21200 }, { "epoch": 27.27, "learning_rate": 9.575680810639644e-05, "loss": 0.2295, "step": 21600 }, { "epoch": 27.27, "eval_cer": 0.3785270629991127, "eval_loss": 1.031385064125061, "eval_runtime": 10.7343, "eval_samples_per_second": 48.163, "eval_steps_per_second": 6.055, "step": 21600 }, { "epoch": 27.78, "learning_rate": 9.195693476884103e-05, "loss": 0.2186, "step": 22000 }, { "epoch": 27.78, "eval_cer": 0.3742679680567879, "eval_loss": 1.028822422027588, "eval_runtime": 10.6374, "eval_samples_per_second": 48.602, "eval_steps_per_second": 6.111, "step": 22000 }, { "epoch": 28.28, "learning_rate": 8.815706143128561e-05, "loss": 0.2084, "step": 22400 }, { "epoch": 28.28, "eval_cer": 0.37373558118899736, "eval_loss": 1.0298017263412476, "eval_runtime": 10.6689, "eval_samples_per_second": 48.459, "eval_steps_per_second": 6.092, "step": 22400 }, { "epoch": 28.79, "learning_rate": 8.43571880937302e-05, "loss": 0.2066, "step": 22800 }, { "epoch": 28.79, "eval_cer": 0.37497781721384205, "eval_loss": 1.0195808410644531, "eval_runtime": 10.4803, "eval_samples_per_second": 49.331, "eval_steps_per_second": 6.202, "step": 22800 }, { "epoch": 29.29, "learning_rate": 8.055731475617479e-05, "loss": 0.1933, "step": 23200 }, { "epoch": 29.29, "eval_cer": 0.380301685891748, "eval_loss": 1.0443964004516602, "eval_runtime": 10.6391, "eval_samples_per_second": 48.594, "eval_steps_per_second": 6.11, "step": 23200 }, { "epoch": 29.8, "learning_rate": 7.675744141861937e-05, "loss": 0.1875, "step": 23600 }, { "epoch": 29.8, "eval_cer": 0.3691215616681455, "eval_loss": 1.0274165868759155, "eval_runtime": 10.6901, "eval_samples_per_second": 48.363, "eval_steps_per_second": 6.08, "step": 23600 }, { "epoch": 30.3, "learning_rate": 7.295756808106396e-05, "loss": 0.184, "step": 24000 }, { "epoch": 30.3, "eval_cer": 0.37267080745341613, "eval_loss": 1.0159742832183838, "eval_runtime": 10.6909, "eval_samples_per_second": 48.359, "eval_steps_per_second": 6.08, "step": 24000 }, { "epoch": 30.81, "learning_rate": 6.915769474350855e-05, "loss": 0.1864, "step": 24400 }, { "epoch": 30.81, "eval_cer": 0.37089618456078083, "eval_loss": 1.0185551643371582, "eval_runtime": 10.6616, "eval_samples_per_second": 48.492, "eval_steps_per_second": 6.097, "step": 24400 }, { "epoch": 31.31, "learning_rate": 6.535782140595312e-05, "loss": 0.176, "step": 24800 }, { "epoch": 31.31, "eval_cer": 0.3682342502218279, "eval_loss": 1.01682710647583, "eval_runtime": 10.7487, "eval_samples_per_second": 48.099, "eval_steps_per_second": 6.047, "step": 24800 }, { "epoch": 31.82, "learning_rate": 6.155794806839771e-05, "loss": 0.1734, "step": 25200 }, { "epoch": 31.82, "eval_cer": 0.3685891748003549, "eval_loss": 1.0079487562179565, "eval_runtime": 10.6916, "eval_samples_per_second": 48.356, "eval_steps_per_second": 6.08, "step": 25200 }, { "epoch": 32.32, "learning_rate": 5.7758074730842294e-05, "loss": 0.1686, "step": 25600 }, { "epoch": 32.32, "eval_cer": 0.37107364685004435, "eval_loss": 1.0045541524887085, "eval_runtime": 10.6896, "eval_samples_per_second": 48.365, "eval_steps_per_second": 6.081, "step": 25600 }, { "epoch": 32.83, "learning_rate": 5.395820139328688e-05, "loss": 0.1636, "step": 26000 }, { "epoch": 32.83, "eval_cer": 0.366282165039929, "eval_loss": 1.0012236833572388, "eval_runtime": 10.6269, "eval_samples_per_second": 48.65, "eval_steps_per_second": 6.117, "step": 26000 }, { "epoch": 33.33, "learning_rate": 5.015832805573147e-05, "loss": 0.1584, "step": 26400 }, { "epoch": 33.33, "eval_cer": 0.3634427684117125, "eval_loss": 0.9943842887878418, "eval_runtime": 10.7058, "eval_samples_per_second": 48.292, "eval_steps_per_second": 6.071, "step": 26400 }, { "epoch": 33.84, "learning_rate": 4.635845471817606e-05, "loss": 0.1592, "step": 26800 }, { "epoch": 33.84, "eval_cer": 0.3678793256433008, "eval_loss": 0.9912722110748291, "eval_runtime": 10.6845, "eval_samples_per_second": 48.388, "eval_steps_per_second": 6.084, "step": 26800 }, { "epoch": 34.34, "learning_rate": 4.255858138062065e-05, "loss": 0.1574, "step": 27200 }, { "epoch": 34.34, "eval_cer": 0.36876663708961843, "eval_loss": 1.0088311433792114, "eval_runtime": 10.6592, "eval_samples_per_second": 48.503, "eval_steps_per_second": 6.098, "step": 27200 }, { "epoch": 34.85, "learning_rate": 3.875870804306523e-05, "loss": 0.1537, "step": 27600 }, { "epoch": 34.85, "eval_cer": 0.3645075421472937, "eval_loss": 0.9913118481636047, "eval_runtime": 11.1744, "eval_samples_per_second": 46.266, "eval_steps_per_second": 5.817, "step": 27600 }, { "epoch": 35.35, "learning_rate": 3.495883470550981e-05, "loss": 0.1461, "step": 28000 }, { "epoch": 35.35, "eval_cer": 0.3634427684117125, "eval_loss": 0.9954361915588379, "eval_runtime": 10.4022, "eval_samples_per_second": 49.701, "eval_steps_per_second": 6.249, "step": 28000 }, { "epoch": 35.86, "learning_rate": 3.1158961367954396e-05, "loss": 0.1462, "step": 28400 }, { "epoch": 35.86, "eval_cer": 0.35989352262644186, "eval_loss": 0.9881103038787842, "eval_runtime": 10.6493, "eval_samples_per_second": 48.548, "eval_steps_per_second": 6.104, "step": 28400 }, { "epoch": 36.36, "learning_rate": 2.7359088030398983e-05, "loss": 0.1412, "step": 28800 }, { "epoch": 36.36, "eval_cer": 0.3593611357586513, "eval_loss": 0.9881191849708557, "eval_runtime": 11.1771, "eval_samples_per_second": 46.255, "eval_steps_per_second": 5.815, "step": 28800 }, { "epoch": 36.87, "learning_rate": 2.3559214692843567e-05, "loss": 0.1382, "step": 29200 }, { "epoch": 36.87, "eval_cer": 0.36184560780834074, "eval_loss": 0.9879063963890076, "eval_runtime": 10.6397, "eval_samples_per_second": 48.592, "eval_steps_per_second": 6.109, "step": 29200 }, { "epoch": 37.37, "learning_rate": 1.9759341355288154e-05, "loss": 0.1395, "step": 29600 }, { "epoch": 37.37, "eval_cer": 0.3582963620230701, "eval_loss": 0.9859166145324707, "eval_runtime": 10.6674, "eval_samples_per_second": 48.465, "eval_steps_per_second": 6.093, "step": 29600 }, { "epoch": 37.88, "learning_rate": 1.595946801773274e-05, "loss": 0.1375, "step": 30000 }, { "epoch": 37.88, "eval_cer": 0.36024844720496896, "eval_loss": 0.9944302439689636, "eval_runtime": 10.7121, "eval_samples_per_second": 48.263, "eval_steps_per_second": 6.068, "step": 30000 }, { "epoch": 38.38, "learning_rate": 1.2159594680177326e-05, "loss": 0.1277, "step": 30400 }, { "epoch": 38.38, "eval_cer": 0.3611357586512866, "eval_loss": 0.9873452186584473, "eval_runtime": 10.6816, "eval_samples_per_second": 48.401, "eval_steps_per_second": 6.085, "step": 30400 }, { "epoch": 38.89, "learning_rate": 8.359721342621911e-06, "loss": 0.1356, "step": 30800 }, { "epoch": 38.89, "eval_cer": 0.36007098491570544, "eval_loss": 0.9833679795265198, "eval_runtime": 10.6711, "eval_samples_per_second": 48.449, "eval_steps_per_second": 6.091, "step": 30800 }, { "epoch": 59.09, "learning_rate": 7.859515899383008e-05, "loss": 0.141, "step": 31200 }, { "epoch": 59.09, "eval_cer": 0.36539485359361135, "eval_loss": 1.0076383352279663, "eval_runtime": 11.8848, "eval_samples_per_second": 43.501, "eval_steps_per_second": 5.469, "step": 31200 }, { "epoch": 59.85, "learning_rate": 7.574750830564784e-05, "loss": 0.1391, "step": 31600 }, { "epoch": 59.85, "eval_cer": 0.363265306122449, "eval_loss": 1.0228257179260254, "eval_runtime": 10.7532, "eval_samples_per_second": 48.079, "eval_steps_per_second": 6.045, "step": 31600 }, { "epoch": 60.61, "learning_rate": 7.289985761746559e-05, "loss": 0.1444, "step": 32000 }, { "epoch": 60.61, "eval_cer": 0.36876663708961843, "eval_loss": 1.0302114486694336, "eval_runtime": 10.5859, "eval_samples_per_second": 48.838, "eval_steps_per_second": 6.14, "step": 32000 }, { "epoch": 61.36, "learning_rate": 7.005220692928333e-05, "loss": 0.1396, "step": 32400 }, { "epoch": 61.36, "eval_cer": 0.3634427684117125, "eval_loss": 1.0219813585281372, "eval_runtime": 10.7349, "eval_samples_per_second": 48.161, "eval_steps_per_second": 6.055, "step": 32400 }, { "epoch": 62.12, "learning_rate": 6.720455624110109e-05, "loss": 0.1383, "step": 32800 }, { "epoch": 62.12, "eval_cer": 0.3625554569653949, "eval_loss": 1.0074561834335327, "eval_runtime": 10.7365, "eval_samples_per_second": 48.154, "eval_steps_per_second": 6.054, "step": 32800 }, { "epoch": 62.88, "learning_rate": 6.435690555291883e-05, "loss": 0.1338, "step": 33200 }, { "epoch": 62.88, "eval_cer": 0.36131322094055013, "eval_loss": 1.009969711303711, "eval_runtime": 10.8491, "eval_samples_per_second": 47.654, "eval_steps_per_second": 5.991, "step": 33200 }, { "epoch": 63.64, "learning_rate": 6.150925486473658e-05, "loss": 0.1322, "step": 33600 }, { "epoch": 63.64, "eval_cer": 0.35989352262644186, "eval_loss": 1.0064263343811035, "eval_runtime": 10.7017, "eval_samples_per_second": 48.31, "eval_steps_per_second": 6.074, "step": 33600 }, { "epoch": 64.39, "learning_rate": 5.866160417655434e-05, "loss": 0.1313, "step": 34000 }, { "epoch": 64.39, "eval_cer": 0.3611357586512866, "eval_loss": 1.0025349855422974, "eval_runtime": 10.8036, "eval_samples_per_second": 47.855, "eval_steps_per_second": 6.017, "step": 34000 }, { "epoch": 65.15, "learning_rate": 5.581395348837209e-05, "loss": 0.1275, "step": 34400 }, { "epoch": 65.15, "eval_cer": 0.3625554569653949, "eval_loss": 0.9986574649810791, "eval_runtime": 10.7283, "eval_samples_per_second": 48.19, "eval_steps_per_second": 6.059, "step": 34400 }, { "epoch": 65.91, "learning_rate": 5.296630280018984e-05, "loss": 0.125, "step": 34800 }, { "epoch": 65.91, "eval_cer": 0.36574977817213844, "eval_loss": 1.010204553604126, "eval_runtime": 10.8039, "eval_samples_per_second": 47.853, "eval_steps_per_second": 6.016, "step": 34800 }, { "epoch": 66.67, "learning_rate": 5.011865211200759e-05, "loss": 0.121, "step": 35200 }, { "epoch": 66.67, "eval_cer": 0.36308784383318543, "eval_loss": 1.0088319778442383, "eval_runtime": 10.4383, "eval_samples_per_second": 49.529, "eval_steps_per_second": 6.227, "step": 35200 }, { "epoch": 67.42, "learning_rate": 4.727100142382534e-05, "loss": 0.1247, "step": 35600 }, { "epoch": 67.42, "eval_cer": 0.3648624667258208, "eval_loss": 1.0154913663864136, "eval_runtime": 10.7106, "eval_samples_per_second": 48.27, "eval_steps_per_second": 6.069, "step": 35600 }, { "epoch": 68.18, "learning_rate": 4.442335073564309e-05, "loss": 0.1164, "step": 36000 }, { "epoch": 68.18, "eval_cer": 0.3622005323868678, "eval_loss": 0.9949304461479187, "eval_runtime": 10.713, "eval_samples_per_second": 48.259, "eval_steps_per_second": 6.067, "step": 36000 }, { "epoch": 68.94, "learning_rate": 4.157570004746084e-05, "loss": 0.1112, "step": 36400 }, { "epoch": 68.94, "eval_cer": 0.3609582963620231, "eval_loss": 1.00165593624115, "eval_runtime": 10.7166, "eval_samples_per_second": 48.243, "eval_steps_per_second": 6.065, "step": 36400 }, { "epoch": 69.7, "learning_rate": 3.872804935927859e-05, "loss": 0.1143, "step": 36800 }, { "epoch": 69.7, "eval_cer": 0.3595385980479148, "eval_loss": 0.9980924725532532, "eval_runtime": 10.7622, "eval_samples_per_second": 48.038, "eval_steps_per_second": 6.04, "step": 36800 }, { "epoch": 70.45, "learning_rate": 3.588039867109634e-05, "loss": 0.109, "step": 37200 }, { "epoch": 70.45, "eval_cer": 0.3604259094942325, "eval_loss": 1.001591682434082, "eval_runtime": 10.6962, "eval_samples_per_second": 48.335, "eval_steps_per_second": 6.077, "step": 37200 }, { "epoch": 71.21, "learning_rate": 3.303274798291409e-05, "loss": 0.1066, "step": 37600 }, { "epoch": 71.21, "eval_cer": 0.35918367346938773, "eval_loss": 0.9884746074676514, "eval_runtime": 10.6604, "eval_samples_per_second": 48.497, "eval_steps_per_second": 6.097, "step": 37600 }, { "epoch": 71.97, "learning_rate": 3.0185097294731845e-05, "loss": 0.1042, "step": 38000 }, { "epoch": 71.97, "eval_cer": 0.36007098491570544, "eval_loss": 0.9990329742431641, "eval_runtime": 10.7259, "eval_samples_per_second": 48.201, "eval_steps_per_second": 6.06, "step": 38000 }, { "epoch": 72.73, "learning_rate": 2.7337446606549593e-05, "loss": 0.1024, "step": 38400 }, { "epoch": 72.73, "eval_cer": 0.36007098491570544, "eval_loss": 0.9916397333145142, "eval_runtime": 10.721, "eval_samples_per_second": 48.223, "eval_steps_per_second": 6.063, "step": 38400 }, { "epoch": 73.48, "learning_rate": 2.448979591836734e-05, "loss": 0.1064, "step": 38800 }, { "epoch": 73.48, "eval_cer": 0.35811889973380656, "eval_loss": 0.9944778084754944, "eval_runtime": 10.7694, "eval_samples_per_second": 48.006, "eval_steps_per_second": 6.036, "step": 38800 }, { "epoch": 74.24, "learning_rate": 2.1642145230185097e-05, "loss": 0.1019, "step": 39200 }, { "epoch": 74.24, "eval_cer": 0.3566992014196983, "eval_loss": 0.9997159838676453, "eval_runtime": 10.7395, "eval_samples_per_second": 48.14, "eval_steps_per_second": 6.052, "step": 39200 }, { "epoch": 75.0, "learning_rate": 1.8794494542002845e-05, "loss": 0.0977, "step": 39600 }, { "epoch": 75.0, "eval_cer": 0.35616681455190774, "eval_loss": 0.9909945130348206, "eval_runtime": 10.7111, "eval_samples_per_second": 48.268, "eval_steps_per_second": 6.068, "step": 39600 }, { "epoch": 75.76, "learning_rate": 1.5946843853820597e-05, "loss": 0.097, "step": 40000 }, { "epoch": 75.76, "eval_cer": 0.35598935226264417, "eval_loss": 0.9969141483306885, "eval_runtime": 10.7789, "eval_samples_per_second": 47.964, "eval_steps_per_second": 6.03, "step": 40000 } ], "max_steps": 42240, "num_train_epochs": 80, "total_flos": 9.124217746582361e+19, "trial_name": null, "trial_params": null }