|
{ |
|
"best_metric": 1.18520188331604, |
|
"best_model_checkpoint": "batoula187/wav2vec2-xlsr-arabic2/checkpoint-1800", |
|
"epoch": 21.719454887218046, |
|
"eval_steps": 200, |
|
"global_step": 15400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42283298097251587, |
|
"grad_norm": 0.43856075406074524, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.1174, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42283298097251587, |
|
"eval_loss": 1.2291227579116821, |
|
"eval_runtime": 64.2106, |
|
"eval_samples_per_second": 16.321, |
|
"eval_steps_per_second": 2.04, |
|
"eval_wer": 0.6153393487642213, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8456659619450317, |
|
"grad_norm": 5.825935363769531, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.098, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8456659619450317, |
|
"eval_loss": 1.232483148574829, |
|
"eval_runtime": 64.9125, |
|
"eval_samples_per_second": 16.145, |
|
"eval_steps_per_second": 2.018, |
|
"eval_wer": 0.6275009807767752, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2684989429175475, |
|
"grad_norm": 0.9385707378387451, |
|
"learning_rate": 0.00029780861943024103, |
|
"loss": 0.1301, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2684989429175475, |
|
"eval_loss": 1.1968672275543213, |
|
"eval_runtime": 63.9557, |
|
"eval_samples_per_second": 16.386, |
|
"eval_steps_per_second": 2.048, |
|
"eval_wer": 0.6127893291486858, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6913319238900635, |
|
"grad_norm": 0.5655494332313538, |
|
"learning_rate": 0.00029346968590211834, |
|
"loss": 0.1514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6913319238900635, |
|
"eval_loss": 1.2292981147766113, |
|
"eval_runtime": 64.6544, |
|
"eval_samples_per_second": 16.209, |
|
"eval_steps_per_second": 2.026, |
|
"eval_wer": 0.6488819144762652, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1141649048625792, |
|
"grad_norm": 1.0750213861465454, |
|
"learning_rate": 0.0002890869247626004, |
|
"loss": 0.1494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.1141649048625792, |
|
"eval_loss": 1.3062251806259155, |
|
"eval_runtime": 63.5313, |
|
"eval_samples_per_second": 16.496, |
|
"eval_steps_per_second": 2.062, |
|
"eval_wer": 0.670066692820714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.536997885835095, |
|
"grad_norm": 0.8127875328063965, |
|
"learning_rate": 0.0002847041636230825, |
|
"loss": 0.1382, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.536997885835095, |
|
"eval_loss": 1.2222816944122314, |
|
"eval_runtime": 63.8482, |
|
"eval_samples_per_second": 16.414, |
|
"eval_steps_per_second": 2.052, |
|
"eval_wer": 0.6261278932914869, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9598308668076108, |
|
"grad_norm": 0.8811041116714478, |
|
"learning_rate": 0.00028032140248356464, |
|
"loss": 0.1382, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9598308668076108, |
|
"eval_loss": 1.3115968704223633, |
|
"eval_runtime": 65.0526, |
|
"eval_samples_per_second": 16.11, |
|
"eval_steps_per_second": 2.014, |
|
"eval_wer": 0.6506473126716359, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.382663847780127, |
|
"grad_norm": 0.7833051085472107, |
|
"learning_rate": 0.0002759386413440467, |
|
"loss": 0.1239, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.382663847780127, |
|
"eval_loss": 1.1977170705795288, |
|
"eval_runtime": 64.8745, |
|
"eval_samples_per_second": 16.154, |
|
"eval_steps_per_second": 2.019, |
|
"eval_wer": 0.6188701451549627, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.8054968287526427, |
|
"grad_norm": 0.4267895817756653, |
|
"learning_rate": 0.0002715558802045288, |
|
"loss": 0.1228, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.8054968287526427, |
|
"eval_loss": 1.18520188331604, |
|
"eval_runtime": 64.2457, |
|
"eval_samples_per_second": 16.312, |
|
"eval_steps_per_second": 2.039, |
|
"eval_wer": 0.6280894468418988, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.2283298097251585, |
|
"grad_norm": 0.7231135964393616, |
|
"learning_rate": 0.00026717311906501094, |
|
"loss": 0.1117, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.2283298097251585, |
|
"eval_loss": 1.3370016813278198, |
|
"eval_runtime": 64.7812, |
|
"eval_samples_per_second": 16.178, |
|
"eval_steps_per_second": 2.022, |
|
"eval_wer": 0.6494703805413887, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.651162790697675, |
|
"grad_norm": 0.40635955333709717, |
|
"learning_rate": 0.000262790357925493, |
|
"loss": 0.1118, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.651162790697675, |
|
"eval_loss": 1.3265222311019897, |
|
"eval_runtime": 63.5949, |
|
"eval_samples_per_second": 16.479, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 0.6431934091800706, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.07399577167019, |
|
"grad_norm": 0.9417168498039246, |
|
"learning_rate": 0.0002584075967859751, |
|
"loss": 0.1101, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.07399577167019, |
|
"eval_loss": 1.345849633216858, |
|
"eval_runtime": 64.4165, |
|
"eval_samples_per_second": 16.269, |
|
"eval_steps_per_second": 2.034, |
|
"eval_wer": 0.6310317771675167, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.496828752642706, |
|
"grad_norm": 0.7866037487983704, |
|
"learning_rate": 0.00025402483564645724, |
|
"loss": 0.1328, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.496828752642706, |
|
"eval_loss": 1.2545326948165894, |
|
"eval_runtime": 64.0398, |
|
"eval_samples_per_second": 16.365, |
|
"eval_steps_per_second": 2.046, |
|
"eval_wer": 0.6341702628481758, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.9196617336152215, |
|
"grad_norm": 0.47020798921585083, |
|
"learning_rate": 0.00024964207450693936, |
|
"loss": 0.1384, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.9196617336152215, |
|
"eval_loss": 1.2806007862091064, |
|
"eval_runtime": 64.3065, |
|
"eval_samples_per_second": 16.297, |
|
"eval_steps_per_second": 2.037, |
|
"eval_wer": 0.6265202040015693, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.342494714587738, |
|
"grad_norm": 0.605645477771759, |
|
"learning_rate": 0.0002452593133674214, |
|
"loss": 0.1334, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.342494714587738, |
|
"eval_loss": 1.2484089136123657, |
|
"eval_runtime": 65.2815, |
|
"eval_samples_per_second": 16.054, |
|
"eval_steps_per_second": 2.007, |
|
"eval_wer": 0.6369164378187524, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.765327695560254, |
|
"grad_norm": 0.6708455085754395, |
|
"learning_rate": 0.00024087655222790357, |
|
"loss": 0.1383, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.765327695560254, |
|
"eval_loss": 1.2701318264007568, |
|
"eval_runtime": 63.6121, |
|
"eval_samples_per_second": 16.475, |
|
"eval_steps_per_second": 2.059, |
|
"eval_wer": 0.6479011377010593, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.188160676532769, |
|
"grad_norm": 0.3204992711544037, |
|
"learning_rate": 0.00023649379108838566, |
|
"loss": 0.1281, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.188160676532769, |
|
"eval_loss": 1.1926395893096924, |
|
"eval_runtime": 65.1953, |
|
"eval_samples_per_second": 16.075, |
|
"eval_steps_per_second": 2.009, |
|
"eval_wer": 0.6314240878775991, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.6109936575052854, |
|
"grad_norm": 2.2028682231903076, |
|
"learning_rate": 0.00023211102994886775, |
|
"loss": 0.1232, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.6109936575052854, |
|
"eval_loss": 1.2255371809005737, |
|
"eval_runtime": 65.3961, |
|
"eval_samples_per_second": 16.025, |
|
"eval_steps_per_second": 2.003, |
|
"eval_wer": 0.6186739897999215, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.033826638477802, |
|
"grad_norm": 0.33132538199424744, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0727, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.033826638477802, |
|
"eval_loss": 1.23982572555542, |
|
"eval_runtime": 66.6836, |
|
"eval_samples_per_second": 15.716, |
|
"eval_steps_per_second": 1.575, |
|
"eval_wer": 0.6014123185562966, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.456659619450317, |
|
"grad_norm": 0.3985452950000763, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0749, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.456659619450317, |
|
"eval_loss": 1.2319424152374268, |
|
"eval_runtime": 67.0281, |
|
"eval_samples_per_second": 15.635, |
|
"eval_steps_per_second": 1.567, |
|
"eval_wer": 0.595723813260102, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.879492600422832, |
|
"grad_norm": 0.5187695622444153, |
|
"learning_rate": 4.955849889624724e-05, |
|
"loss": 0.0734, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.879492600422832, |
|
"eval_loss": 1.2246508598327637, |
|
"eval_runtime": 66.6194, |
|
"eval_samples_per_second": 15.731, |
|
"eval_steps_per_second": 1.576, |
|
"eval_wer": 0.5878775990584543, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.30232558139535, |
|
"grad_norm": 0.3908683955669403, |
|
"learning_rate": 4.867549668874172e-05, |
|
"loss": 0.0684, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.30232558139535, |
|
"eval_loss": 1.347394585609436, |
|
"eval_runtime": 65.574, |
|
"eval_samples_per_second": 15.982, |
|
"eval_steps_per_second": 1.601, |
|
"eval_wer": 0.6135739505688506, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.725158562367865, |
|
"grad_norm": 1.09392511844635, |
|
"learning_rate": 4.779690949227373e-05, |
|
"loss": 0.073, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.725158562367865, |
|
"eval_loss": 1.2836501598358154, |
|
"eval_runtime": 76.5173, |
|
"eval_samples_per_second": 13.696, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.5935661043546488, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 10.14799154334038, |
|
"grad_norm": 0.786432683467865, |
|
"learning_rate": 4.691390728476822e-05, |
|
"loss": 0.0728, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.14799154334038, |
|
"eval_loss": 1.247693657875061, |
|
"eval_runtime": 66.6059, |
|
"eval_samples_per_second": 15.734, |
|
"eval_steps_per_second": 1.576, |
|
"eval_wer": 0.5910160847391134, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.570824524312897, |
|
"grad_norm": 0.22806741297245026, |
|
"learning_rate": 4.6030905077262693e-05, |
|
"loss": 0.0718, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.570824524312897, |
|
"eval_loss": 1.2471730709075928, |
|
"eval_runtime": 67.4477, |
|
"eval_samples_per_second": 15.538, |
|
"eval_steps_per_second": 1.557, |
|
"eval_wer": 0.5867006669282071, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.993657505285412, |
|
"grad_norm": 1.541914463043213, |
|
"learning_rate": 4.5147902869757175e-05, |
|
"loss": 0.0685, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.993657505285412, |
|
"eval_loss": 1.2693225145339966, |
|
"eval_runtime": 65.4788, |
|
"eval_samples_per_second": 16.005, |
|
"eval_steps_per_second": 1.604, |
|
"eval_wer": 0.5788544527265594, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 11.416490486257928, |
|
"grad_norm": 0.4897485673427582, |
|
"learning_rate": 4.4264900662251656e-05, |
|
"loss": 0.0649, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 11.416490486257928, |
|
"eval_loss": 1.2164980173110962, |
|
"eval_runtime": 66.1024, |
|
"eval_samples_per_second": 15.854, |
|
"eval_steps_per_second": 1.588, |
|
"eval_wer": 0.5786582973715182, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 11.839323467230443, |
|
"grad_norm": 0.269406795501709, |
|
"learning_rate": 4.338189845474614e-05, |
|
"loss": 0.0632, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.839323467230443, |
|
"eval_loss": 1.2446550130844116, |
|
"eval_runtime": 69.6183, |
|
"eval_samples_per_second": 15.054, |
|
"eval_steps_per_second": 1.508, |
|
"eval_wer": 0.5841506473126716, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 12.26215644820296, |
|
"grad_norm": 0.6048020720481873, |
|
"learning_rate": 4.249889624724062e-05, |
|
"loss": 0.0625, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 12.26215644820296, |
|
"eval_loss": 1.308754324913025, |
|
"eval_runtime": 67.8481, |
|
"eval_samples_per_second": 15.446, |
|
"eval_steps_per_second": 1.548, |
|
"eval_wer": 0.5806198509219301, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 12.684989429175475, |
|
"grad_norm": 0.2880701720714569, |
|
"learning_rate": 4.16158940397351e-05, |
|
"loss": 0.061, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.684989429175475, |
|
"eval_loss": 1.3398616313934326, |
|
"eval_runtime": 67.8965, |
|
"eval_samples_per_second": 15.435, |
|
"eval_steps_per_second": 1.546, |
|
"eval_wer": 0.5923891722244017, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.10782241014799, |
|
"grad_norm": 0.419969767332077, |
|
"learning_rate": 4.073289183222958e-05, |
|
"loss": 0.0595, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 13.10782241014799, |
|
"eval_loss": 1.3048608303070068, |
|
"eval_runtime": 66.7124, |
|
"eval_samples_per_second": 15.709, |
|
"eval_steps_per_second": 1.574, |
|
"eval_wer": 0.5768928991761475, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 13.530655391120508, |
|
"grad_norm": 0.17939791083335876, |
|
"learning_rate": 3.9849889624724064e-05, |
|
"loss": 0.0608, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.530655391120508, |
|
"eval_loss": 1.2737226486206055, |
|
"eval_runtime": 67.084, |
|
"eval_samples_per_second": 15.622, |
|
"eval_steps_per_second": 1.565, |
|
"eval_wer": 0.573362102785406, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.953488372093023, |
|
"grad_norm": 0.32987338304519653, |
|
"learning_rate": 3.8966887417218545e-05, |
|
"loss": 0.0596, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 13.953488372093023, |
|
"eval_loss": 1.228838324546814, |
|
"eval_runtime": 66.3842, |
|
"eval_samples_per_second": 15.787, |
|
"eval_steps_per_second": 1.582, |
|
"eval_wer": 0.5747351902706944, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 14.376321353065538, |
|
"grad_norm": 1.2768590450286865, |
|
"learning_rate": 3.808388520971303e-05, |
|
"loss": 0.0565, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.376321353065538, |
|
"eval_loss": 1.2599496841430664, |
|
"eval_runtime": 66.2771, |
|
"eval_samples_per_second": 15.812, |
|
"eval_steps_per_second": 1.584, |
|
"eval_wer": 0.5676735974892114, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.799154334038056, |
|
"grad_norm": 0.410118043422699, |
|
"learning_rate": 3.720088300220751e-05, |
|
"loss": 0.0568, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.799154334038056, |
|
"eval_loss": 1.2704553604125977, |
|
"eval_runtime": 66.0979, |
|
"eval_samples_per_second": 15.855, |
|
"eval_steps_per_second": 1.589, |
|
"eval_wer": 0.5621812475480581, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.221987315010571, |
|
"grad_norm": 0.3582391142845154, |
|
"learning_rate": 3.631788079470198e-05, |
|
"loss": 0.0538, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.221987315010571, |
|
"eval_loss": 1.3540122509002686, |
|
"eval_runtime": 66.4126, |
|
"eval_samples_per_second": 15.78, |
|
"eval_steps_per_second": 1.581, |
|
"eval_wer": 0.5837583366025892, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.644820295983086, |
|
"grad_norm": 0.261311411857605, |
|
"learning_rate": 3.5439293598234e-05, |
|
"loss": 0.0585, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 15.644820295983086, |
|
"eval_loss": 1.3333740234375, |
|
"eval_runtime": 66.9559, |
|
"eval_samples_per_second": 15.652, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wer": 0.5798352295017654, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 16.067653276955603, |
|
"grad_norm": 0.3650034964084625, |
|
"learning_rate": 3.455629139072848e-05, |
|
"loss": 0.0548, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.067653276955603, |
|
"eval_loss": 1.3312934637069702, |
|
"eval_runtime": 66.7147, |
|
"eval_samples_per_second": 15.709, |
|
"eval_steps_per_second": 1.574, |
|
"eval_wer": 0.5723813260102001, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.49048625792812, |
|
"grad_norm": 0.4263480305671692, |
|
"learning_rate": 3.367328918322296e-05, |
|
"loss": 0.0526, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 16.49048625792812, |
|
"eval_loss": 1.3299189805984497, |
|
"eval_runtime": 67.2527, |
|
"eval_samples_per_second": 15.583, |
|
"eval_steps_per_second": 1.561, |
|
"eval_wer": 0.5719890153001177, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 16.913319238900634, |
|
"grad_norm": 0.41519472002983093, |
|
"learning_rate": 3.2790286975717444e-05, |
|
"loss": 0.0577, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.913319238900634, |
|
"eval_loss": 1.3206344842910767, |
|
"eval_runtime": 67.3412, |
|
"eval_samples_per_second": 15.563, |
|
"eval_steps_per_second": 1.559, |
|
"eval_wer": 0.5829737151824245, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.33615221987315, |
|
"grad_norm": 0.9854322075843811, |
|
"learning_rate": 3.190728476821192e-05, |
|
"loss": 0.0513, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 17.33615221987315, |
|
"eval_loss": 1.3500277996063232, |
|
"eval_runtime": 66.908, |
|
"eval_samples_per_second": 15.663, |
|
"eval_steps_per_second": 1.569, |
|
"eval_wer": 0.5786582973715182, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 17.758985200845665, |
|
"grad_norm": 0.4509384334087372, |
|
"learning_rate": 3.10242825607064e-05, |
|
"loss": 0.0506, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 17.758985200845665, |
|
"eval_loss": 1.3184752464294434, |
|
"eval_runtime": 66.0674, |
|
"eval_samples_per_second": 15.863, |
|
"eval_steps_per_second": 1.589, |
|
"eval_wer": 0.5698313063946646, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"grad_norm": 0.20881137251853943, |
|
"learning_rate": 3.0145695364238412e-05, |
|
"loss": 0.0498, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"eval_loss": 1.3655884265899658, |
|
"eval_runtime": 65.7835, |
|
"eval_samples_per_second": 15.931, |
|
"eval_steps_per_second": 1.596, |
|
"eval_wer": 0.5800313848568066, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 18.6046511627907, |
|
"grad_norm": 8.674209594726562, |
|
"learning_rate": 2.9262693156732894e-05, |
|
"loss": 0.0515, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.6046511627907, |
|
"eval_loss": 1.3253123760223389, |
|
"eval_runtime": 65.9715, |
|
"eval_samples_per_second": 15.886, |
|
"eval_steps_per_second": 1.592, |
|
"eval_wer": 0.5668889760690466, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 19.027484143763214, |
|
"grad_norm": 0.2627590298652649, |
|
"learning_rate": 2.83841059602649e-05, |
|
"loss": 0.05, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.027484143763214, |
|
"eval_loss": 1.3410530090332031, |
|
"eval_runtime": 65.3408, |
|
"eval_samples_per_second": 16.039, |
|
"eval_steps_per_second": 1.607, |
|
"eval_wer": 0.5810121616320125, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.45031712473573, |
|
"grad_norm": 0.3467521667480469, |
|
"learning_rate": 2.7501103752759385e-05, |
|
"loss": 0.048, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.45031712473573, |
|
"eval_loss": 1.3627517223358154, |
|
"eval_runtime": 67.7618, |
|
"eval_samples_per_second": 15.466, |
|
"eval_steps_per_second": 1.55, |
|
"eval_wer": 0.5729697920753236, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.873150105708245, |
|
"grad_norm": 0.3944872319698334, |
|
"learning_rate": 2.6618101545253866e-05, |
|
"loss": 0.049, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 19.873150105708245, |
|
"eval_loss": 1.3700072765350342, |
|
"eval_runtime": 71.6559, |
|
"eval_samples_per_second": 14.625, |
|
"eval_steps_per_second": 1.465, |
|
"eval_wer": 0.5729697920753236, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 20.29598308668076, |
|
"grad_norm": 0.63824862241745, |
|
"learning_rate": 2.5735099337748348e-05, |
|
"loss": 0.0469, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.29598308668076, |
|
"eval_loss": 1.364588975906372, |
|
"eval_runtime": 75.7846, |
|
"eval_samples_per_second": 13.829, |
|
"eval_steps_per_second": 1.386, |
|
"eval_wer": 0.5717928599450764, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 20.718816067653275, |
|
"grad_norm": 0.29639732837677, |
|
"learning_rate": 2.4852097130242826e-05, |
|
"loss": 0.0474, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 20.718816067653275, |
|
"eval_loss": 1.4190597534179688, |
|
"eval_runtime": 72.4218, |
|
"eval_samples_per_second": 14.471, |
|
"eval_steps_per_second": 1.45, |
|
"eval_wer": 0.5786582973715182, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 21.141649048625794, |
|
"grad_norm": 0.3832600712776184, |
|
"learning_rate": 2.396909492273731e-05, |
|
"loss": 0.0488, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.141649048625794, |
|
"eval_loss": 1.3449620008468628, |
|
"eval_runtime": 76.5571, |
|
"eval_samples_per_second": 13.689, |
|
"eval_steps_per_second": 1.372, |
|
"eval_wer": 0.575323656335818, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.56448202959831, |
|
"grad_norm": 0.44069257378578186, |
|
"learning_rate": 2.308609271523179e-05, |
|
"loss": 0.0466, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 21.56448202959831, |
|
"eval_loss": 1.2960785627365112, |
|
"eval_runtime": 72.0544, |
|
"eval_samples_per_second": 14.545, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.5612004707728521, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 21.987315010570825, |
|
"grad_norm": 0.336251437664032, |
|
"learning_rate": 2.220309050772627e-05, |
|
"loss": 0.0462, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 21.987315010570825, |
|
"eval_loss": 1.3378616571426392, |
|
"eval_runtime": 66.2158, |
|
"eval_samples_per_second": 15.827, |
|
"eval_steps_per_second": 1.586, |
|
"eval_wer": 0.5731659474303649, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 22.41014799154334, |
|
"grad_norm": 0.24060240387916565, |
|
"learning_rate": 2.1320088300220752e-05, |
|
"loss": 0.0479, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 22.41014799154334, |
|
"eval_loss": 1.364053726196289, |
|
"eval_runtime": 65.0016, |
|
"eval_samples_per_second": 16.123, |
|
"eval_steps_per_second": 1.615, |
|
"eval_wer": 0.5755198116908592, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 22.832980972515855, |
|
"grad_norm": 0.28006625175476074, |
|
"learning_rate": 2.0437086092715233e-05, |
|
"loss": 0.0475, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 22.832980972515855, |
|
"eval_loss": 1.3316149711608887, |
|
"eval_runtime": 66.7406, |
|
"eval_samples_per_second": 15.703, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wer": 0.5751275009807768, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 23.25581395348837, |
|
"grad_norm": 1.4817793369293213, |
|
"learning_rate": 1.9554083885209715e-05, |
|
"loss": 0.0461, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.25581395348837, |
|
"eval_loss": 1.4020766019821167, |
|
"eval_runtime": 65.2476, |
|
"eval_samples_per_second": 16.062, |
|
"eval_steps_per_second": 1.609, |
|
"eval_wer": 0.5778736759513535, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.67864693446089, |
|
"grad_norm": 0.13971921801567078, |
|
"learning_rate": 1.8671081677704193e-05, |
|
"loss": 0.0443, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 23.67864693446089, |
|
"eval_loss": 1.3807997703552246, |
|
"eval_runtime": 65.7311, |
|
"eval_samples_per_second": 15.944, |
|
"eval_steps_per_second": 1.597, |
|
"eval_wer": 0.5766967438211064, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 24.101479915433405, |
|
"grad_norm": 1.3055016994476318, |
|
"learning_rate": 1.7788079470198678e-05, |
|
"loss": 0.0448, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 24.101479915433405, |
|
"eval_loss": 1.4156945943832397, |
|
"eval_runtime": 65.6108, |
|
"eval_samples_per_second": 15.973, |
|
"eval_steps_per_second": 1.6, |
|
"eval_wer": 0.5778736759513535, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 16.360902255639097, |
|
"grad_norm": 0.8831915259361267, |
|
"learning_rate": 2.8240928882438317e-05, |
|
"loss": 0.1948, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 16.360902255639097, |
|
"eval_loss": 0.8630273342132568, |
|
"eval_runtime": 66.0147, |
|
"eval_samples_per_second": 15.875, |
|
"eval_steps_per_second": 1.591, |
|
"eval_wer": 0.5619850921930168, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 16.642857142857142, |
|
"grad_norm": 3.0019664764404297, |
|
"learning_rate": 2.7663280116110306e-05, |
|
"loss": 0.1658, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 16.642857142857142, |
|
"eval_loss": 0.933027982711792, |
|
"eval_runtime": 65.0734, |
|
"eval_samples_per_second": 16.105, |
|
"eval_steps_per_second": 1.614, |
|
"eval_wer": 0.569242840329541, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 16.924812030075188, |
|
"grad_norm": 1.1889337301254272, |
|
"learning_rate": 2.7082728592162554e-05, |
|
"loss": 0.1632, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.924812030075188, |
|
"eval_loss": 0.8790073394775391, |
|
"eval_runtime": 66.0534, |
|
"eval_samples_per_second": 15.866, |
|
"eval_steps_per_second": 1.59, |
|
"eval_wer": 0.5517850137308749, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 17.206766917293233, |
|
"grad_norm": 0.7651678919792175, |
|
"learning_rate": 2.6502177068214808e-05, |
|
"loss": 0.1373, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 17.206766917293233, |
|
"eval_loss": 0.9278713464736938, |
|
"eval_runtime": 66.2333, |
|
"eval_samples_per_second": 15.823, |
|
"eval_steps_per_second": 1.585, |
|
"eval_wer": 0.5455080423695566, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 17.48872180451128, |
|
"grad_norm": 0.5034601092338562, |
|
"learning_rate": 2.5921625544267052e-05, |
|
"loss": 0.1233, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 17.48872180451128, |
|
"eval_loss": 1.0113743543624878, |
|
"eval_runtime": 64.1889, |
|
"eval_samples_per_second": 16.327, |
|
"eval_steps_per_second": 1.636, |
|
"eval_wer": 0.5633581796783053, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 17.770676691729324, |
|
"grad_norm": 0.6201246380805969, |
|
"learning_rate": 2.5341074020319307e-05, |
|
"loss": 0.1223, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 17.770676691729324, |
|
"eval_loss": 1.0203057527542114, |
|
"eval_runtime": 65.0796, |
|
"eval_samples_per_second": 16.103, |
|
"eval_steps_per_second": 1.613, |
|
"eval_wer": 0.5637504903883876, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 18.05263157894737, |
|
"grad_norm": 0.47344139218330383, |
|
"learning_rate": 2.4760522496371554e-05, |
|
"loss": 0.1207, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 18.05263157894737, |
|
"eval_loss": 1.0659551620483398, |
|
"eval_runtime": 65.8453, |
|
"eval_samples_per_second": 15.916, |
|
"eval_steps_per_second": 1.595, |
|
"eval_wer": 0.5723813260102001, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 18.334586466165412, |
|
"grad_norm": 0.9515664577484131, |
|
"learning_rate": 2.4179970972423805e-05, |
|
"loss": 0.1009, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 18.334586466165412, |
|
"eval_loss": 1.0872668027877808, |
|
"eval_runtime": 66.957, |
|
"eval_samples_per_second": 15.652, |
|
"eval_steps_per_second": 1.568, |
|
"eval_wer": 0.5666928207140055, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 18.616541353383457, |
|
"grad_norm": 2.7835988998413086, |
|
"learning_rate": 2.3599419448476053e-05, |
|
"loss": 0.106, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 18.616541353383457, |
|
"eval_loss": 1.1188278198242188, |
|
"eval_runtime": 65.7163, |
|
"eval_samples_per_second": 15.947, |
|
"eval_steps_per_second": 1.598, |
|
"eval_wer": 0.5666928207140055, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 18.898496240601503, |
|
"grad_norm": 0.33580583333969116, |
|
"learning_rate": 2.3018867924528304e-05, |
|
"loss": 0.0989, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 18.898496240601503, |
|
"eval_loss": 1.0954115390777588, |
|
"eval_runtime": 67.9533, |
|
"eval_samples_per_second": 15.422, |
|
"eval_steps_per_second": 1.545, |
|
"eval_wer": 0.5688505296194586, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 19.18045112781955, |
|
"grad_norm": 0.20191040635108948, |
|
"learning_rate": 2.243831640058055e-05, |
|
"loss": 0.0981, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 19.18045112781955, |
|
"eval_loss": 1.1167967319488525, |
|
"eval_runtime": 68.7147, |
|
"eval_samples_per_second": 15.251, |
|
"eval_steps_per_second": 1.528, |
|
"eval_wer": 0.5635543350333464, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 19.463815789473685, |
|
"grad_norm": 0.5584314465522766, |
|
"learning_rate": 2.186066763425254e-05, |
|
"loss": 0.0858, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 19.463815789473685, |
|
"eval_loss": 1.1654815673828125, |
|
"eval_runtime": 67.1626, |
|
"eval_samples_per_second": 15.604, |
|
"eval_steps_per_second": 1.563, |
|
"eval_wer": 0.5668889760690466, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 19.74577067669173, |
|
"grad_norm": 0.5650402307510376, |
|
"learning_rate": 2.128011611030479e-05, |
|
"loss": 0.0851, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 19.74577067669173, |
|
"eval_loss": 1.1516063213348389, |
|
"eval_runtime": 66.1494, |
|
"eval_samples_per_second": 15.843, |
|
"eval_steps_per_second": 1.587, |
|
"eval_wer": 0.5596312279325225, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 20.027725563909776, |
|
"grad_norm": 0.35859623551368713, |
|
"learning_rate": 2.069956458635704e-05, |
|
"loss": 0.0929, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 20.027725563909776, |
|
"eval_loss": 1.1067341566085815, |
|
"eval_runtime": 65.8487, |
|
"eval_samples_per_second": 15.915, |
|
"eval_steps_per_second": 1.595, |
|
"eval_wer": 0.5545311887014516, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 20.309680451127818, |
|
"grad_norm": 0.4309854507446289, |
|
"learning_rate": 2.011901306240929e-05, |
|
"loss": 0.0816, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 20.309680451127818, |
|
"eval_loss": 1.147910714149475, |
|
"eval_runtime": 66.4223, |
|
"eval_samples_per_second": 15.778, |
|
"eval_steps_per_second": 1.581, |
|
"eval_wer": 0.5608081600627697, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 20.591635338345863, |
|
"grad_norm": 0.8902326226234436, |
|
"learning_rate": 1.9538461538461537e-05, |
|
"loss": 0.0853, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 20.591635338345863, |
|
"eval_loss": 1.157361388206482, |
|
"eval_runtime": 66.7554, |
|
"eval_samples_per_second": 15.699, |
|
"eval_steps_per_second": 1.573, |
|
"eval_wer": 0.5625735582581405, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 20.87359022556391, |
|
"grad_norm": 0.7075725793838501, |
|
"learning_rate": 1.8960812772133526e-05, |
|
"loss": 0.0823, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 20.87359022556391, |
|
"eval_loss": 1.178560495376587, |
|
"eval_runtime": 67.6139, |
|
"eval_samples_per_second": 15.5, |
|
"eval_steps_per_second": 1.553, |
|
"eval_wer": 0.5655158885837583, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 21.155545112781954, |
|
"grad_norm": 0.28010666370391846, |
|
"learning_rate": 1.8380261248185777e-05, |
|
"loss": 0.0837, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 21.155545112781954, |
|
"eval_loss": 1.180931568145752, |
|
"eval_runtime": 66.8333, |
|
"eval_samples_per_second": 15.681, |
|
"eval_steps_per_second": 1.571, |
|
"eval_wer": 0.5617889368379757, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 21.4375, |
|
"grad_norm": 0.8791319131851196, |
|
"learning_rate": 1.7799709724238025e-05, |
|
"loss": 0.0806, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 21.4375, |
|
"eval_loss": 1.1776329278945923, |
|
"eval_runtime": 66.6056, |
|
"eval_samples_per_second": 15.734, |
|
"eval_steps_per_second": 1.576, |
|
"eval_wer": 0.5547273440564927, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 21.719454887218046, |
|
"grad_norm": 0.5617702007293701, |
|
"learning_rate": 1.721915820029028e-05, |
|
"loss": 0.0819, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 21.719454887218046, |
|
"eval_loss": 1.1667627096176147, |
|
"eval_runtime": 68.9763, |
|
"eval_samples_per_second": 15.194, |
|
"eval_steps_per_second": 1.522, |
|
"eval_wer": 0.5580619850921931, |
|
"step": 15400 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 17725, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.916231529580259e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|