{ "best_metric": 1.18520188331604, "best_model_checkpoint": "batoula187/wav2vec2-xlsr-arabic2/checkpoint-1800", "epoch": 20.027725563909776, "eval_steps": 200, "global_step": 14200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42283298097251587, "grad_norm": 0.43856075406074524, "learning_rate": 0.00011999999999999999, "loss": 0.1174, "step": 200 }, { "epoch": 0.42283298097251587, "eval_loss": 1.2291227579116821, "eval_runtime": 64.2106, "eval_samples_per_second": 16.321, "eval_steps_per_second": 2.04, "eval_wer": 0.6153393487642213, "step": 200 }, { "epoch": 0.8456659619450317, "grad_norm": 5.825935363769531, "learning_rate": 0.00023999999999999998, "loss": 0.098, "step": 400 }, { "epoch": 0.8456659619450317, "eval_loss": 1.232483148574829, "eval_runtime": 64.9125, "eval_samples_per_second": 16.145, "eval_steps_per_second": 2.018, "eval_wer": 0.6275009807767752, "step": 400 }, { "epoch": 1.2684989429175475, "grad_norm": 0.9385707378387451, "learning_rate": 0.00029780861943024103, "loss": 0.1301, "step": 600 }, { "epoch": 1.2684989429175475, "eval_loss": 1.1968672275543213, "eval_runtime": 63.9557, "eval_samples_per_second": 16.386, "eval_steps_per_second": 2.048, "eval_wer": 0.6127893291486858, "step": 600 }, { "epoch": 1.6913319238900635, "grad_norm": 0.5655494332313538, "learning_rate": 0.00029346968590211834, "loss": 0.1514, "step": 800 }, { "epoch": 1.6913319238900635, "eval_loss": 1.2292981147766113, "eval_runtime": 64.6544, "eval_samples_per_second": 16.209, "eval_steps_per_second": 2.026, "eval_wer": 0.6488819144762652, "step": 800 }, { "epoch": 2.1141649048625792, "grad_norm": 1.0750213861465454, "learning_rate": 0.0002890869247626004, "loss": 0.1494, "step": 1000 }, { "epoch": 2.1141649048625792, "eval_loss": 1.3062251806259155, "eval_runtime": 63.5313, "eval_samples_per_second": 16.496, "eval_steps_per_second": 2.062, "eval_wer": 0.670066692820714, "step": 1000 }, { "epoch": 2.536997885835095, "grad_norm": 0.8127875328063965, "learning_rate": 0.0002847041636230825, "loss": 0.1382, "step": 1200 }, { "epoch": 2.536997885835095, "eval_loss": 1.2222816944122314, "eval_runtime": 63.8482, "eval_samples_per_second": 16.414, "eval_steps_per_second": 2.052, "eval_wer": 0.6261278932914869, "step": 1200 }, { "epoch": 2.9598308668076108, "grad_norm": 0.8811041116714478, "learning_rate": 0.00028032140248356464, "loss": 0.1382, "step": 1400 }, { "epoch": 2.9598308668076108, "eval_loss": 1.3115968704223633, "eval_runtime": 65.0526, "eval_samples_per_second": 16.11, "eval_steps_per_second": 2.014, "eval_wer": 0.6506473126716359, "step": 1400 }, { "epoch": 3.382663847780127, "grad_norm": 0.7833051085472107, "learning_rate": 0.0002759386413440467, "loss": 0.1239, "step": 1600 }, { "epoch": 3.382663847780127, "eval_loss": 1.1977170705795288, "eval_runtime": 64.8745, "eval_samples_per_second": 16.154, "eval_steps_per_second": 2.019, "eval_wer": 0.6188701451549627, "step": 1600 }, { "epoch": 3.8054968287526427, "grad_norm": 0.4267895817756653, "learning_rate": 0.0002715558802045288, "loss": 0.1228, "step": 1800 }, { "epoch": 3.8054968287526427, "eval_loss": 1.18520188331604, "eval_runtime": 64.2457, "eval_samples_per_second": 16.312, "eval_steps_per_second": 2.039, "eval_wer": 0.6280894468418988, "step": 1800 }, { "epoch": 4.2283298097251585, "grad_norm": 0.7231135964393616, "learning_rate": 0.00026717311906501094, "loss": 0.1117, "step": 2000 }, { "epoch": 4.2283298097251585, "eval_loss": 1.3370016813278198, "eval_runtime": 64.7812, "eval_samples_per_second": 16.178, "eval_steps_per_second": 2.022, "eval_wer": 0.6494703805413887, "step": 2000 }, { "epoch": 4.651162790697675, "grad_norm": 0.40635955333709717, "learning_rate": 0.000262790357925493, "loss": 0.1118, "step": 2200 }, { "epoch": 4.651162790697675, "eval_loss": 1.3265222311019897, "eval_runtime": 63.5949, "eval_samples_per_second": 16.479, "eval_steps_per_second": 2.06, "eval_wer": 0.6431934091800706, "step": 2200 }, { "epoch": 5.07399577167019, "grad_norm": 0.9417168498039246, "learning_rate": 0.0002584075967859751, "loss": 0.1101, "step": 2400 }, { "epoch": 5.07399577167019, "eval_loss": 1.345849633216858, "eval_runtime": 64.4165, "eval_samples_per_second": 16.269, "eval_steps_per_second": 2.034, "eval_wer": 0.6310317771675167, "step": 2400 }, { "epoch": 5.496828752642706, "grad_norm": 0.7866037487983704, "learning_rate": 0.00025402483564645724, "loss": 0.1328, "step": 2600 }, { "epoch": 5.496828752642706, "eval_loss": 1.2545326948165894, "eval_runtime": 64.0398, "eval_samples_per_second": 16.365, "eval_steps_per_second": 2.046, "eval_wer": 0.6341702628481758, "step": 2600 }, { "epoch": 5.9196617336152215, "grad_norm": 0.47020798921585083, "learning_rate": 0.00024964207450693936, "loss": 0.1384, "step": 2800 }, { "epoch": 5.9196617336152215, "eval_loss": 1.2806007862091064, "eval_runtime": 64.3065, "eval_samples_per_second": 16.297, "eval_steps_per_second": 2.037, "eval_wer": 0.6265202040015693, "step": 2800 }, { "epoch": 6.342494714587738, "grad_norm": 0.605645477771759, "learning_rate": 0.0002452593133674214, "loss": 0.1334, "step": 3000 }, { "epoch": 6.342494714587738, "eval_loss": 1.2484089136123657, "eval_runtime": 65.2815, "eval_samples_per_second": 16.054, "eval_steps_per_second": 2.007, "eval_wer": 0.6369164378187524, "step": 3000 }, { "epoch": 6.765327695560254, "grad_norm": 0.6708455085754395, "learning_rate": 0.00024087655222790357, "loss": 0.1383, "step": 3200 }, { "epoch": 6.765327695560254, "eval_loss": 1.2701318264007568, "eval_runtime": 63.6121, "eval_samples_per_second": 16.475, "eval_steps_per_second": 2.059, "eval_wer": 0.6479011377010593, "step": 3200 }, { "epoch": 7.188160676532769, "grad_norm": 0.3204992711544037, "learning_rate": 0.00023649379108838566, "loss": 0.1281, "step": 3400 }, { "epoch": 7.188160676532769, "eval_loss": 1.1926395893096924, "eval_runtime": 65.1953, "eval_samples_per_second": 16.075, "eval_steps_per_second": 2.009, "eval_wer": 0.6314240878775991, "step": 3400 }, { "epoch": 7.6109936575052854, "grad_norm": 2.2028682231903076, "learning_rate": 0.00023211102994886775, "loss": 0.1232, "step": 3600 }, { "epoch": 7.6109936575052854, "eval_loss": 1.2255371809005737, "eval_runtime": 65.3961, "eval_samples_per_second": 16.025, "eval_steps_per_second": 2.003, "eval_wer": 0.6186739897999215, "step": 3600 }, { "epoch": 8.033826638477802, "grad_norm": 0.33132538199424744, "learning_rate": 2e-05, "loss": 0.0727, "step": 3800 }, { "epoch": 8.033826638477802, "eval_loss": 1.23982572555542, "eval_runtime": 66.6836, "eval_samples_per_second": 15.716, "eval_steps_per_second": 1.575, "eval_wer": 0.6014123185562966, "step": 3800 }, { "epoch": 8.456659619450317, "grad_norm": 0.3985452950000763, "learning_rate": 4e-05, "loss": 0.0749, "step": 4000 }, { "epoch": 8.456659619450317, "eval_loss": 1.2319424152374268, "eval_runtime": 67.0281, "eval_samples_per_second": 15.635, "eval_steps_per_second": 1.567, "eval_wer": 0.595723813260102, "step": 4000 }, { "epoch": 8.879492600422832, "grad_norm": 0.5187695622444153, "learning_rate": 4.955849889624724e-05, "loss": 0.0734, "step": 4200 }, { "epoch": 8.879492600422832, "eval_loss": 1.2246508598327637, "eval_runtime": 66.6194, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.576, "eval_wer": 0.5878775990584543, "step": 4200 }, { "epoch": 9.30232558139535, "grad_norm": 0.3908683955669403, "learning_rate": 4.867549668874172e-05, "loss": 0.0684, "step": 4400 }, { "epoch": 9.30232558139535, "eval_loss": 1.347394585609436, "eval_runtime": 65.574, "eval_samples_per_second": 15.982, "eval_steps_per_second": 1.601, "eval_wer": 0.6135739505688506, "step": 4400 }, { "epoch": 9.725158562367865, "grad_norm": 1.09392511844635, "learning_rate": 4.779690949227373e-05, "loss": 0.073, "step": 4600 }, { "epoch": 9.725158562367865, "eval_loss": 1.2836501598358154, "eval_runtime": 76.5173, "eval_samples_per_second": 13.696, "eval_steps_per_second": 1.372, "eval_wer": 0.5935661043546488, "step": 4600 }, { "epoch": 10.14799154334038, "grad_norm": 0.786432683467865, "learning_rate": 4.691390728476822e-05, "loss": 0.0728, "step": 4800 }, { "epoch": 10.14799154334038, "eval_loss": 1.247693657875061, "eval_runtime": 66.6059, "eval_samples_per_second": 15.734, "eval_steps_per_second": 1.576, "eval_wer": 0.5910160847391134, "step": 4800 }, { "epoch": 10.570824524312897, "grad_norm": 0.22806741297245026, "learning_rate": 4.6030905077262693e-05, "loss": 0.0718, "step": 5000 }, { "epoch": 10.570824524312897, "eval_loss": 1.2471730709075928, "eval_runtime": 67.4477, "eval_samples_per_second": 15.538, "eval_steps_per_second": 1.557, "eval_wer": 0.5867006669282071, "step": 5000 }, { "epoch": 10.993657505285412, "grad_norm": 1.541914463043213, "learning_rate": 4.5147902869757175e-05, "loss": 0.0685, "step": 5200 }, { "epoch": 10.993657505285412, "eval_loss": 1.2693225145339966, "eval_runtime": 65.4788, "eval_samples_per_second": 16.005, "eval_steps_per_second": 1.604, "eval_wer": 0.5788544527265594, "step": 5200 }, { "epoch": 11.416490486257928, "grad_norm": 0.4897485673427582, "learning_rate": 4.4264900662251656e-05, "loss": 0.0649, "step": 5400 }, { "epoch": 11.416490486257928, "eval_loss": 1.2164980173110962, "eval_runtime": 66.1024, "eval_samples_per_second": 15.854, "eval_steps_per_second": 1.588, "eval_wer": 0.5786582973715182, "step": 5400 }, { "epoch": 11.839323467230443, "grad_norm": 0.269406795501709, "learning_rate": 4.338189845474614e-05, "loss": 0.0632, "step": 5600 }, { "epoch": 11.839323467230443, "eval_loss": 1.2446550130844116, "eval_runtime": 69.6183, "eval_samples_per_second": 15.054, "eval_steps_per_second": 1.508, "eval_wer": 0.5841506473126716, "step": 5600 }, { "epoch": 12.26215644820296, "grad_norm": 0.6048020720481873, "learning_rate": 4.249889624724062e-05, "loss": 0.0625, "step": 5800 }, { "epoch": 12.26215644820296, "eval_loss": 1.308754324913025, "eval_runtime": 67.8481, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.548, "eval_wer": 0.5806198509219301, "step": 5800 }, { "epoch": 12.684989429175475, "grad_norm": 0.2880701720714569, "learning_rate": 4.16158940397351e-05, "loss": 0.061, "step": 6000 }, { "epoch": 12.684989429175475, "eval_loss": 1.3398616313934326, "eval_runtime": 67.8965, "eval_samples_per_second": 15.435, "eval_steps_per_second": 1.546, "eval_wer": 0.5923891722244017, "step": 6000 }, { "epoch": 13.10782241014799, "grad_norm": 0.419969767332077, "learning_rate": 4.073289183222958e-05, "loss": 0.0595, "step": 6200 }, { "epoch": 13.10782241014799, "eval_loss": 1.3048608303070068, "eval_runtime": 66.7124, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.574, "eval_wer": 0.5768928991761475, "step": 6200 }, { "epoch": 13.530655391120508, "grad_norm": 0.17939791083335876, "learning_rate": 3.9849889624724064e-05, "loss": 0.0608, "step": 6400 }, { "epoch": 13.530655391120508, "eval_loss": 1.2737226486206055, "eval_runtime": 67.084, "eval_samples_per_second": 15.622, "eval_steps_per_second": 1.565, "eval_wer": 0.573362102785406, "step": 6400 }, { "epoch": 13.953488372093023, "grad_norm": 0.32987338304519653, "learning_rate": 3.8966887417218545e-05, "loss": 0.0596, "step": 6600 }, { "epoch": 13.953488372093023, "eval_loss": 1.228838324546814, "eval_runtime": 66.3842, "eval_samples_per_second": 15.787, "eval_steps_per_second": 1.582, "eval_wer": 0.5747351902706944, "step": 6600 }, { "epoch": 14.376321353065538, "grad_norm": 1.2768590450286865, "learning_rate": 3.808388520971303e-05, "loss": 0.0565, "step": 6800 }, { "epoch": 14.376321353065538, "eval_loss": 1.2599496841430664, "eval_runtime": 66.2771, "eval_samples_per_second": 15.812, "eval_steps_per_second": 1.584, "eval_wer": 0.5676735974892114, "step": 6800 }, { "epoch": 14.799154334038056, "grad_norm": 0.410118043422699, "learning_rate": 3.720088300220751e-05, "loss": 0.0568, "step": 7000 }, { "epoch": 14.799154334038056, "eval_loss": 1.2704553604125977, "eval_runtime": 66.0979, "eval_samples_per_second": 15.855, "eval_steps_per_second": 1.589, "eval_wer": 0.5621812475480581, "step": 7000 }, { "epoch": 15.221987315010571, "grad_norm": 0.3582391142845154, "learning_rate": 3.631788079470198e-05, "loss": 0.0538, "step": 7200 }, { "epoch": 15.221987315010571, "eval_loss": 1.3540122509002686, "eval_runtime": 66.4126, "eval_samples_per_second": 15.78, "eval_steps_per_second": 1.581, "eval_wer": 0.5837583366025892, "step": 7200 }, { "epoch": 15.644820295983086, "grad_norm": 0.261311411857605, "learning_rate": 3.5439293598234e-05, "loss": 0.0585, "step": 7400 }, { "epoch": 15.644820295983086, "eval_loss": 1.3333740234375, "eval_runtime": 66.9559, "eval_samples_per_second": 15.652, "eval_steps_per_second": 1.568, "eval_wer": 0.5798352295017654, "step": 7400 }, { "epoch": 16.067653276955603, "grad_norm": 0.3650034964084625, "learning_rate": 3.455629139072848e-05, "loss": 0.0548, "step": 7600 }, { "epoch": 16.067653276955603, "eval_loss": 1.3312934637069702, "eval_runtime": 66.7147, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.574, "eval_wer": 0.5723813260102001, "step": 7600 }, { "epoch": 16.49048625792812, "grad_norm": 0.4263480305671692, "learning_rate": 3.367328918322296e-05, "loss": 0.0526, "step": 7800 }, { "epoch": 16.49048625792812, "eval_loss": 1.3299189805984497, "eval_runtime": 67.2527, "eval_samples_per_second": 15.583, "eval_steps_per_second": 1.561, "eval_wer": 0.5719890153001177, "step": 7800 }, { "epoch": 16.913319238900634, "grad_norm": 0.41519472002983093, "learning_rate": 3.2790286975717444e-05, "loss": 0.0577, "step": 8000 }, { "epoch": 16.913319238900634, "eval_loss": 1.3206344842910767, "eval_runtime": 67.3412, "eval_samples_per_second": 15.563, "eval_steps_per_second": 1.559, "eval_wer": 0.5829737151824245, "step": 8000 }, { "epoch": 17.33615221987315, "grad_norm": 0.9854322075843811, "learning_rate": 3.190728476821192e-05, "loss": 0.0513, "step": 8200 }, { "epoch": 17.33615221987315, "eval_loss": 1.3500277996063232, "eval_runtime": 66.908, "eval_samples_per_second": 15.663, "eval_steps_per_second": 1.569, "eval_wer": 0.5786582973715182, "step": 8200 }, { "epoch": 17.758985200845665, "grad_norm": 0.4509384334087372, "learning_rate": 3.10242825607064e-05, "loss": 0.0506, "step": 8400 }, { "epoch": 17.758985200845665, "eval_loss": 1.3184752464294434, "eval_runtime": 66.0674, "eval_samples_per_second": 15.863, "eval_steps_per_second": 1.589, "eval_wer": 0.5698313063946646, "step": 8400 }, { "epoch": 18.181818181818183, "grad_norm": 0.20881137251853943, "learning_rate": 3.0145695364238412e-05, "loss": 0.0498, "step": 8600 }, { "epoch": 18.181818181818183, "eval_loss": 1.3655884265899658, "eval_runtime": 65.7835, "eval_samples_per_second": 15.931, "eval_steps_per_second": 1.596, "eval_wer": 0.5800313848568066, "step": 8600 }, { "epoch": 18.6046511627907, "grad_norm": 8.674209594726562, "learning_rate": 2.9262693156732894e-05, "loss": 0.0515, "step": 8800 }, { "epoch": 18.6046511627907, "eval_loss": 1.3253123760223389, "eval_runtime": 65.9715, "eval_samples_per_second": 15.886, "eval_steps_per_second": 1.592, "eval_wer": 0.5668889760690466, "step": 8800 }, { "epoch": 19.027484143763214, "grad_norm": 0.2627590298652649, "learning_rate": 2.83841059602649e-05, "loss": 0.05, "step": 9000 }, { "epoch": 19.027484143763214, "eval_loss": 1.3410530090332031, "eval_runtime": 65.3408, "eval_samples_per_second": 16.039, "eval_steps_per_second": 1.607, "eval_wer": 0.5810121616320125, "step": 9000 }, { "epoch": 19.45031712473573, "grad_norm": 0.3467521667480469, "learning_rate": 2.7501103752759385e-05, "loss": 0.048, "step": 9200 }, { "epoch": 19.45031712473573, "eval_loss": 1.3627517223358154, "eval_runtime": 67.7618, "eval_samples_per_second": 15.466, "eval_steps_per_second": 1.55, "eval_wer": 0.5729697920753236, "step": 9200 }, { "epoch": 19.873150105708245, "grad_norm": 0.3944872319698334, "learning_rate": 2.6618101545253866e-05, "loss": 0.049, "step": 9400 }, { "epoch": 19.873150105708245, "eval_loss": 1.3700072765350342, "eval_runtime": 71.6559, "eval_samples_per_second": 14.625, "eval_steps_per_second": 1.465, "eval_wer": 0.5729697920753236, "step": 9400 }, { "epoch": 20.29598308668076, "grad_norm": 0.63824862241745, "learning_rate": 2.5735099337748348e-05, "loss": 0.0469, "step": 9600 }, { "epoch": 20.29598308668076, "eval_loss": 1.364588975906372, "eval_runtime": 75.7846, "eval_samples_per_second": 13.829, "eval_steps_per_second": 1.386, "eval_wer": 0.5717928599450764, "step": 9600 }, { "epoch": 20.718816067653275, "grad_norm": 0.29639732837677, "learning_rate": 2.4852097130242826e-05, "loss": 0.0474, "step": 9800 }, { "epoch": 20.718816067653275, "eval_loss": 1.4190597534179688, "eval_runtime": 72.4218, "eval_samples_per_second": 14.471, "eval_steps_per_second": 1.45, "eval_wer": 0.5786582973715182, "step": 9800 }, { "epoch": 21.141649048625794, "grad_norm": 0.3832600712776184, "learning_rate": 2.396909492273731e-05, "loss": 0.0488, "step": 10000 }, { "epoch": 21.141649048625794, "eval_loss": 1.3449620008468628, "eval_runtime": 76.5571, "eval_samples_per_second": 13.689, "eval_steps_per_second": 1.372, "eval_wer": 0.575323656335818, "step": 10000 }, { "epoch": 21.56448202959831, "grad_norm": 0.44069257378578186, "learning_rate": 2.308609271523179e-05, "loss": 0.0466, "step": 10200 }, { "epoch": 21.56448202959831, "eval_loss": 1.2960785627365112, "eval_runtime": 72.0544, "eval_samples_per_second": 14.545, "eval_steps_per_second": 1.457, "eval_wer": 0.5612004707728521, "step": 10200 }, { "epoch": 21.987315010570825, "grad_norm": 0.336251437664032, "learning_rate": 2.220309050772627e-05, "loss": 0.0462, "step": 10400 }, { "epoch": 21.987315010570825, "eval_loss": 1.3378616571426392, "eval_runtime": 66.2158, "eval_samples_per_second": 15.827, "eval_steps_per_second": 1.586, "eval_wer": 0.5731659474303649, "step": 10400 }, { "epoch": 22.41014799154334, "grad_norm": 0.24060240387916565, "learning_rate": 2.1320088300220752e-05, "loss": 0.0479, "step": 10600 }, { "epoch": 22.41014799154334, "eval_loss": 1.364053726196289, "eval_runtime": 65.0016, "eval_samples_per_second": 16.123, "eval_steps_per_second": 1.615, "eval_wer": 0.5755198116908592, "step": 10600 }, { "epoch": 22.832980972515855, "grad_norm": 0.28006625175476074, "learning_rate": 2.0437086092715233e-05, "loss": 0.0475, "step": 10800 }, { "epoch": 22.832980972515855, "eval_loss": 1.3316149711608887, "eval_runtime": 66.7406, "eval_samples_per_second": 15.703, "eval_steps_per_second": 1.573, "eval_wer": 0.5751275009807768, "step": 10800 }, { "epoch": 23.25581395348837, "grad_norm": 1.4817793369293213, "learning_rate": 1.9554083885209715e-05, "loss": 0.0461, "step": 11000 }, { "epoch": 23.25581395348837, "eval_loss": 1.4020766019821167, "eval_runtime": 65.2476, "eval_samples_per_second": 16.062, "eval_steps_per_second": 1.609, "eval_wer": 0.5778736759513535, "step": 11000 }, { "epoch": 23.67864693446089, "grad_norm": 0.13971921801567078, "learning_rate": 1.8671081677704193e-05, "loss": 0.0443, "step": 11200 }, { "epoch": 23.67864693446089, "eval_loss": 1.3807997703552246, "eval_runtime": 65.7311, "eval_samples_per_second": 15.944, "eval_steps_per_second": 1.597, "eval_wer": 0.5766967438211064, "step": 11200 }, { "epoch": 24.101479915433405, "grad_norm": 1.3055016994476318, "learning_rate": 1.7788079470198678e-05, "loss": 0.0448, "step": 11400 }, { "epoch": 24.101479915433405, "eval_loss": 1.4156945943832397, "eval_runtime": 65.6108, "eval_samples_per_second": 15.973, "eval_steps_per_second": 1.6, "eval_wer": 0.5778736759513535, "step": 11400 }, { "epoch": 16.360902255639097, "grad_norm": 0.8831915259361267, "learning_rate": 2.8240928882438317e-05, "loss": 0.1948, "step": 11600 }, { "epoch": 16.360902255639097, "eval_loss": 0.8630273342132568, "eval_runtime": 66.0147, "eval_samples_per_second": 15.875, "eval_steps_per_second": 1.591, "eval_wer": 0.5619850921930168, "step": 11600 }, { "epoch": 16.642857142857142, "grad_norm": 3.0019664764404297, "learning_rate": 2.7663280116110306e-05, "loss": 0.1658, "step": 11800 }, { "epoch": 16.642857142857142, "eval_loss": 0.933027982711792, "eval_runtime": 65.0734, "eval_samples_per_second": 16.105, "eval_steps_per_second": 1.614, "eval_wer": 0.569242840329541, "step": 11800 }, { "epoch": 16.924812030075188, "grad_norm": 1.1889337301254272, "learning_rate": 2.7082728592162554e-05, "loss": 0.1632, "step": 12000 }, { "epoch": 16.924812030075188, "eval_loss": 0.8790073394775391, "eval_runtime": 66.0534, "eval_samples_per_second": 15.866, "eval_steps_per_second": 1.59, "eval_wer": 0.5517850137308749, "step": 12000 }, { "epoch": 17.206766917293233, "grad_norm": 0.7651678919792175, "learning_rate": 2.6502177068214808e-05, "loss": 0.1373, "step": 12200 }, { "epoch": 17.206766917293233, "eval_loss": 0.9278713464736938, "eval_runtime": 66.2333, "eval_samples_per_second": 15.823, "eval_steps_per_second": 1.585, "eval_wer": 0.5455080423695566, "step": 12200 }, { "epoch": 17.48872180451128, "grad_norm": 0.5034601092338562, "learning_rate": 2.5921625544267052e-05, "loss": 0.1233, "step": 12400 }, { "epoch": 17.48872180451128, "eval_loss": 1.0113743543624878, "eval_runtime": 64.1889, "eval_samples_per_second": 16.327, "eval_steps_per_second": 1.636, "eval_wer": 0.5633581796783053, "step": 12400 }, { "epoch": 17.770676691729324, "grad_norm": 0.6201246380805969, "learning_rate": 2.5341074020319307e-05, "loss": 0.1223, "step": 12600 }, { "epoch": 17.770676691729324, "eval_loss": 1.0203057527542114, "eval_runtime": 65.0796, "eval_samples_per_second": 16.103, "eval_steps_per_second": 1.613, "eval_wer": 0.5637504903883876, "step": 12600 }, { "epoch": 18.05263157894737, "grad_norm": 0.47344139218330383, "learning_rate": 2.4760522496371554e-05, "loss": 0.1207, "step": 12800 }, { "epoch": 18.05263157894737, "eval_loss": 1.0659551620483398, "eval_runtime": 65.8453, "eval_samples_per_second": 15.916, "eval_steps_per_second": 1.595, "eval_wer": 0.5723813260102001, "step": 12800 }, { "epoch": 18.334586466165412, "grad_norm": 0.9515664577484131, "learning_rate": 2.4179970972423805e-05, "loss": 0.1009, "step": 13000 }, { "epoch": 18.334586466165412, "eval_loss": 1.0872668027877808, "eval_runtime": 66.957, "eval_samples_per_second": 15.652, "eval_steps_per_second": 1.568, "eval_wer": 0.5666928207140055, "step": 13000 }, { "epoch": 18.616541353383457, "grad_norm": 2.7835988998413086, "learning_rate": 2.3599419448476053e-05, "loss": 0.106, "step": 13200 }, { "epoch": 18.616541353383457, "eval_loss": 1.1188278198242188, "eval_runtime": 65.7163, "eval_samples_per_second": 15.947, "eval_steps_per_second": 1.598, "eval_wer": 0.5666928207140055, "step": 13200 }, { "epoch": 18.898496240601503, "grad_norm": 0.33580583333969116, "learning_rate": 2.3018867924528304e-05, "loss": 0.0989, "step": 13400 }, { "epoch": 18.898496240601503, "eval_loss": 1.0954115390777588, "eval_runtime": 67.9533, "eval_samples_per_second": 15.422, "eval_steps_per_second": 1.545, "eval_wer": 0.5688505296194586, "step": 13400 }, { "epoch": 19.18045112781955, "grad_norm": 0.20191040635108948, "learning_rate": 2.243831640058055e-05, "loss": 0.0981, "step": 13600 }, { "epoch": 19.18045112781955, "eval_loss": 1.1167967319488525, "eval_runtime": 68.7147, "eval_samples_per_second": 15.251, "eval_steps_per_second": 1.528, "eval_wer": 0.5635543350333464, "step": 13600 }, { "epoch": 19.463815789473685, "grad_norm": 0.5584314465522766, "learning_rate": 2.186066763425254e-05, "loss": 0.0858, "step": 13800 }, { "epoch": 19.463815789473685, "eval_loss": 1.1654815673828125, "eval_runtime": 67.1626, "eval_samples_per_second": 15.604, "eval_steps_per_second": 1.563, "eval_wer": 0.5668889760690466, "step": 13800 }, { "epoch": 19.74577067669173, "grad_norm": 0.5650402307510376, "learning_rate": 2.128011611030479e-05, "loss": 0.0851, "step": 14000 }, { "epoch": 19.74577067669173, "eval_loss": 1.1516063213348389, "eval_runtime": 66.1494, "eval_samples_per_second": 15.843, "eval_steps_per_second": 1.587, "eval_wer": 0.5596312279325225, "step": 14000 }, { "epoch": 20.027725563909776, "grad_norm": 0.35859623551368713, "learning_rate": 2.069956458635704e-05, "loss": 0.0929, "step": 14200 }, { "epoch": 20.027725563909776, "eval_loss": 1.1067341566085815, "eval_runtime": 65.8487, "eval_samples_per_second": 15.915, "eval_steps_per_second": 1.595, "eval_wer": 0.5545311887014516, "step": 14200 } ], "logging_steps": 200, "max_steps": 17725, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.5363964686252196e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }