{ "best_metric": 1.18520188331604, "best_model_checkpoint": "batoula187/wav2vec2-xlsr-arabic2/checkpoint-1800", "epoch": 12.684989429175475, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42283298097251587, "grad_norm": 0.43856075406074524, "learning_rate": 0.00011999999999999999, "loss": 0.1174, "step": 200 }, { "epoch": 0.42283298097251587, "eval_loss": 1.2291227579116821, "eval_runtime": 64.2106, "eval_samples_per_second": 16.321, "eval_steps_per_second": 2.04, "eval_wer": 0.6153393487642213, "step": 200 }, { "epoch": 0.8456659619450317, "grad_norm": 5.825935363769531, "learning_rate": 0.00023999999999999998, "loss": 0.098, "step": 400 }, { "epoch": 0.8456659619450317, "eval_loss": 1.232483148574829, "eval_runtime": 64.9125, "eval_samples_per_second": 16.145, "eval_steps_per_second": 2.018, "eval_wer": 0.6275009807767752, "step": 400 }, { "epoch": 1.2684989429175475, "grad_norm": 0.9385707378387451, "learning_rate": 0.00029780861943024103, "loss": 0.1301, "step": 600 }, { "epoch": 1.2684989429175475, "eval_loss": 1.1968672275543213, "eval_runtime": 63.9557, "eval_samples_per_second": 16.386, "eval_steps_per_second": 2.048, "eval_wer": 0.6127893291486858, "step": 600 }, { "epoch": 1.6913319238900635, "grad_norm": 0.5655494332313538, "learning_rate": 0.00029346968590211834, "loss": 0.1514, "step": 800 }, { "epoch": 1.6913319238900635, "eval_loss": 1.2292981147766113, "eval_runtime": 64.6544, "eval_samples_per_second": 16.209, "eval_steps_per_second": 2.026, "eval_wer": 0.6488819144762652, "step": 800 }, { "epoch": 2.1141649048625792, "grad_norm": 1.0750213861465454, "learning_rate": 0.0002890869247626004, "loss": 0.1494, "step": 1000 }, { "epoch": 2.1141649048625792, "eval_loss": 1.3062251806259155, "eval_runtime": 63.5313, "eval_samples_per_second": 16.496, "eval_steps_per_second": 2.062, "eval_wer": 0.670066692820714, "step": 1000 }, { "epoch": 2.536997885835095, "grad_norm": 0.8127875328063965, "learning_rate": 0.0002847041636230825, "loss": 0.1382, "step": 1200 }, { "epoch": 2.536997885835095, "eval_loss": 1.2222816944122314, "eval_runtime": 63.8482, "eval_samples_per_second": 16.414, "eval_steps_per_second": 2.052, "eval_wer": 0.6261278932914869, "step": 1200 }, { "epoch": 2.9598308668076108, "grad_norm": 0.8811041116714478, "learning_rate": 0.00028032140248356464, "loss": 0.1382, "step": 1400 }, { "epoch": 2.9598308668076108, "eval_loss": 1.3115968704223633, "eval_runtime": 65.0526, "eval_samples_per_second": 16.11, "eval_steps_per_second": 2.014, "eval_wer": 0.6506473126716359, "step": 1400 }, { "epoch": 3.382663847780127, "grad_norm": 0.7833051085472107, "learning_rate": 0.0002759386413440467, "loss": 0.1239, "step": 1600 }, { "epoch": 3.382663847780127, "eval_loss": 1.1977170705795288, "eval_runtime": 64.8745, "eval_samples_per_second": 16.154, "eval_steps_per_second": 2.019, "eval_wer": 0.6188701451549627, "step": 1600 }, { "epoch": 3.8054968287526427, "grad_norm": 0.4267895817756653, "learning_rate": 0.0002715558802045288, "loss": 0.1228, "step": 1800 }, { "epoch": 3.8054968287526427, "eval_loss": 1.18520188331604, "eval_runtime": 64.2457, "eval_samples_per_second": 16.312, "eval_steps_per_second": 2.039, "eval_wer": 0.6280894468418988, "step": 1800 }, { "epoch": 4.2283298097251585, "grad_norm": 0.7231135964393616, "learning_rate": 0.00026717311906501094, "loss": 0.1117, "step": 2000 }, { "epoch": 4.2283298097251585, "eval_loss": 1.3370016813278198, "eval_runtime": 64.7812, "eval_samples_per_second": 16.178, "eval_steps_per_second": 2.022, "eval_wer": 0.6494703805413887, "step": 2000 }, { "epoch": 4.651162790697675, "grad_norm": 0.40635955333709717, "learning_rate": 0.000262790357925493, "loss": 0.1118, "step": 2200 }, { "epoch": 4.651162790697675, "eval_loss": 1.3265222311019897, "eval_runtime": 63.5949, "eval_samples_per_second": 16.479, "eval_steps_per_second": 2.06, "eval_wer": 0.6431934091800706, "step": 2200 }, { "epoch": 5.07399577167019, "grad_norm": 0.9417168498039246, "learning_rate": 0.0002584075967859751, "loss": 0.1101, "step": 2400 }, { "epoch": 5.07399577167019, "eval_loss": 1.345849633216858, "eval_runtime": 64.4165, "eval_samples_per_second": 16.269, "eval_steps_per_second": 2.034, "eval_wer": 0.6310317771675167, "step": 2400 }, { "epoch": 5.496828752642706, "grad_norm": 0.7866037487983704, "learning_rate": 0.00025402483564645724, "loss": 0.1328, "step": 2600 }, { "epoch": 5.496828752642706, "eval_loss": 1.2545326948165894, "eval_runtime": 64.0398, "eval_samples_per_second": 16.365, "eval_steps_per_second": 2.046, "eval_wer": 0.6341702628481758, "step": 2600 }, { "epoch": 5.9196617336152215, "grad_norm": 0.47020798921585083, "learning_rate": 0.00024964207450693936, "loss": 0.1384, "step": 2800 }, { "epoch": 5.9196617336152215, "eval_loss": 1.2806007862091064, "eval_runtime": 64.3065, "eval_samples_per_second": 16.297, "eval_steps_per_second": 2.037, "eval_wer": 0.6265202040015693, "step": 2800 }, { "epoch": 6.342494714587738, "grad_norm": 0.605645477771759, "learning_rate": 0.0002452593133674214, "loss": 0.1334, "step": 3000 }, { "epoch": 6.342494714587738, "eval_loss": 1.2484089136123657, "eval_runtime": 65.2815, "eval_samples_per_second": 16.054, "eval_steps_per_second": 2.007, "eval_wer": 0.6369164378187524, "step": 3000 }, { "epoch": 6.765327695560254, "grad_norm": 0.6708455085754395, "learning_rate": 0.00024087655222790357, "loss": 0.1383, "step": 3200 }, { "epoch": 6.765327695560254, "eval_loss": 1.2701318264007568, "eval_runtime": 63.6121, "eval_samples_per_second": 16.475, "eval_steps_per_second": 2.059, "eval_wer": 0.6479011377010593, "step": 3200 }, { "epoch": 7.188160676532769, "grad_norm": 0.3204992711544037, "learning_rate": 0.00023649379108838566, "loss": 0.1281, "step": 3400 }, { "epoch": 7.188160676532769, "eval_loss": 1.1926395893096924, "eval_runtime": 65.1953, "eval_samples_per_second": 16.075, "eval_steps_per_second": 2.009, "eval_wer": 0.6314240878775991, "step": 3400 }, { "epoch": 7.6109936575052854, "grad_norm": 2.2028682231903076, "learning_rate": 0.00023211102994886775, "loss": 0.1232, "step": 3600 }, { "epoch": 7.6109936575052854, "eval_loss": 1.2255371809005737, "eval_runtime": 65.3961, "eval_samples_per_second": 16.025, "eval_steps_per_second": 2.003, "eval_wer": 0.6186739897999215, "step": 3600 }, { "epoch": 8.033826638477802, "grad_norm": 0.33132538199424744, "learning_rate": 2e-05, "loss": 0.0727, "step": 3800 }, { "epoch": 8.033826638477802, "eval_loss": 1.23982572555542, "eval_runtime": 66.6836, "eval_samples_per_second": 15.716, "eval_steps_per_second": 1.575, "eval_wer": 0.6014123185562966, "step": 3800 }, { "epoch": 8.456659619450317, "grad_norm": 0.3985452950000763, "learning_rate": 4e-05, "loss": 0.0749, "step": 4000 }, { "epoch": 8.456659619450317, "eval_loss": 1.2319424152374268, "eval_runtime": 67.0281, "eval_samples_per_second": 15.635, "eval_steps_per_second": 1.567, "eval_wer": 0.595723813260102, "step": 4000 }, { "epoch": 8.879492600422832, "grad_norm": 0.5187695622444153, "learning_rate": 4.955849889624724e-05, "loss": 0.0734, "step": 4200 }, { "epoch": 8.879492600422832, "eval_loss": 1.2246508598327637, "eval_runtime": 66.6194, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.576, "eval_wer": 0.5878775990584543, "step": 4200 }, { "epoch": 9.30232558139535, "grad_norm": 0.3908683955669403, "learning_rate": 4.867549668874172e-05, "loss": 0.0684, "step": 4400 }, { "epoch": 9.30232558139535, "eval_loss": 1.347394585609436, "eval_runtime": 65.574, "eval_samples_per_second": 15.982, "eval_steps_per_second": 1.601, "eval_wer": 0.6135739505688506, "step": 4400 }, { "epoch": 9.725158562367865, "grad_norm": 1.09392511844635, "learning_rate": 4.779690949227373e-05, "loss": 0.073, "step": 4600 }, { "epoch": 9.725158562367865, "eval_loss": 1.2836501598358154, "eval_runtime": 76.5173, "eval_samples_per_second": 13.696, "eval_steps_per_second": 1.372, "eval_wer": 0.5935661043546488, "step": 4600 }, { "epoch": 10.14799154334038, "grad_norm": 0.786432683467865, "learning_rate": 4.691390728476822e-05, "loss": 0.0728, "step": 4800 }, { "epoch": 10.14799154334038, "eval_loss": 1.247693657875061, "eval_runtime": 66.6059, "eval_samples_per_second": 15.734, "eval_steps_per_second": 1.576, "eval_wer": 0.5910160847391134, "step": 4800 }, { "epoch": 10.570824524312897, "grad_norm": 0.22806741297245026, "learning_rate": 4.6030905077262693e-05, "loss": 0.0718, "step": 5000 }, { "epoch": 10.570824524312897, "eval_loss": 1.2471730709075928, "eval_runtime": 67.4477, "eval_samples_per_second": 15.538, "eval_steps_per_second": 1.557, "eval_wer": 0.5867006669282071, "step": 5000 }, { "epoch": 10.993657505285412, "grad_norm": 1.541914463043213, "learning_rate": 4.5147902869757175e-05, "loss": 0.0685, "step": 5200 }, { "epoch": 10.993657505285412, "eval_loss": 1.2693225145339966, "eval_runtime": 65.4788, "eval_samples_per_second": 16.005, "eval_steps_per_second": 1.604, "eval_wer": 0.5788544527265594, "step": 5200 }, { "epoch": 11.416490486257928, "grad_norm": 0.4897485673427582, "learning_rate": 4.4264900662251656e-05, "loss": 0.0649, "step": 5400 }, { "epoch": 11.416490486257928, "eval_loss": 1.2164980173110962, "eval_runtime": 66.1024, "eval_samples_per_second": 15.854, "eval_steps_per_second": 1.588, "eval_wer": 0.5786582973715182, "step": 5400 }, { "epoch": 11.839323467230443, "grad_norm": 0.269406795501709, "learning_rate": 4.338189845474614e-05, "loss": 0.0632, "step": 5600 }, { "epoch": 11.839323467230443, "eval_loss": 1.2446550130844116, "eval_runtime": 69.6183, "eval_samples_per_second": 15.054, "eval_steps_per_second": 1.508, "eval_wer": 0.5841506473126716, "step": 5600 }, { "epoch": 12.26215644820296, "grad_norm": 0.6048020720481873, "learning_rate": 4.249889624724062e-05, "loss": 0.0625, "step": 5800 }, { "epoch": 12.26215644820296, "eval_loss": 1.308754324913025, "eval_runtime": 67.8481, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.548, "eval_wer": 0.5806198509219301, "step": 5800 }, { "epoch": 12.684989429175475, "grad_norm": 0.2880701720714569, "learning_rate": 4.16158940397351e-05, "loss": 0.061, "step": 6000 }, { "epoch": 12.684989429175475, "eval_loss": 1.3398616313934326, "eval_runtime": 67.8965, "eval_samples_per_second": 15.435, "eval_steps_per_second": 1.546, "eval_wer": 0.5923891722244017, "step": 6000 } ], "logging_steps": 200, "max_steps": 11825, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9205431597482693e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }