{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6775106082036775, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 3.3999907970428467, "eval_runtime": 162.0918, "eval_samples_per_second": 34.894, "eval_steps_per_second": 4.362, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 2.9512617588043213, "eval_runtime": 159.7591, "eval_samples_per_second": 35.403, "eval_steps_per_second": 4.425, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 1.9453805685043335, "eval_runtime": 160.8922, "eval_samples_per_second": 35.154, "eval_steps_per_second": 4.394, "eval_wer": 0.9325480252282904, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.1412426233291626, "eval_runtime": 161.9235, "eval_samples_per_second": 34.93, "eval_steps_per_second": 4.366, "eval_wer": 0.773025629503619, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 2.79933500289917, "learning_rate": 0.00029699999999999996, "loss": 3.2243, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 0.9250076413154602, "eval_runtime": 160.568, "eval_samples_per_second": 35.225, "eval_steps_per_second": 4.403, "eval_wer": 0.67890099661376, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.8018025159835815, "eval_runtime": 160.6715, "eval_samples_per_second": 35.202, "eval_steps_per_second": 4.4, "eval_wer": 0.5982892266213028, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.7181887030601501, "eval_runtime": 160.5511, "eval_samples_per_second": 35.229, "eval_steps_per_second": 4.404, "eval_wer": 0.5408675835727239, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6198393106460571, "eval_runtime": 161.1454, "eval_samples_per_second": 35.099, "eval_steps_per_second": 4.387, "eval_wer": 0.4769944311598273, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.6102356910705566, "eval_runtime": 160.7517, "eval_samples_per_second": 35.185, "eval_steps_per_second": 4.398, "eval_wer": 0.47124905714881804, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 1.0946769714355469, "learning_rate": 0.00022928571428571426, "loss": 0.7983, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.5605342984199524, "eval_runtime": 160.7985, "eval_samples_per_second": 35.174, "eval_steps_per_second": 4.397, "eval_wer": 0.4425863812167996, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5336170196533203, "eval_runtime": 160.9878, "eval_samples_per_second": 35.133, "eval_steps_per_second": 4.392, "eval_wer": 0.4162186451830335, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.5258753299713135, "eval_runtime": 161.623, "eval_samples_per_second": 34.995, "eval_steps_per_second": 4.374, "eval_wer": 0.41161271685577183, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.4960057735443115, "eval_runtime": 161.2649, "eval_samples_per_second": 35.073, "eval_steps_per_second": 4.384, "eval_wer": 0.3872349986358749, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.48569169640541077, "eval_runtime": 161.7331, "eval_samples_per_second": 34.971, "eval_steps_per_second": 4.371, "eval_wer": 0.38676959124392163, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 1.4740066528320312, "learning_rate": 0.0001582857142857143, "loss": 0.6274, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.46894121170043945, "eval_runtime": 161.6698, "eval_samples_per_second": 34.985, "eval_steps_per_second": 4.373, "eval_wer": 0.36555343358315545, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.4679875373840332, "eval_runtime": 161.7805, "eval_samples_per_second": 34.961, "eval_steps_per_second": 4.37, "eval_wer": 0.3562131886825761, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.4536493718624115, "eval_runtime": 162.2799, "eval_samples_per_second": 34.853, "eval_steps_per_second": 4.357, "eval_wer": 0.3535330840461556, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.44862428307533264, "eval_runtime": 161.7828, "eval_samples_per_second": 34.96, "eval_steps_per_second": 4.37, "eval_wer": 0.3500826499333986, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.43955981731414795, "eval_runtime": 162.5762, "eval_samples_per_second": 34.79, "eval_steps_per_second": 4.349, "eval_wer": 0.35051596026383786, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.927208662033081, "learning_rate": 8.728571428571428e-05, "loss": 0.4939, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.4298574924468994, "eval_runtime": 162.5099, "eval_samples_per_second": 34.804, "eval_steps_per_second": 4.351, "eval_wer": 0.3340020221148754, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.4291674494743347, "eval_runtime": 162.0566, "eval_samples_per_second": 34.901, "eval_steps_per_second": 4.363, "eval_wer": 0.33114538364012774, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.42761147022247314, "eval_runtime": 162.7807, "eval_samples_per_second": 34.746, "eval_steps_per_second": 4.343, "eval_wer": 0.3270690568278474, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.4232628345489502, "eval_runtime": 161.9234, "eval_samples_per_second": 34.93, "eval_steps_per_second": 4.366, "eval_wer": 0.3260419508593988, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.4192351996898651, "eval_runtime": 162.4093, "eval_samples_per_second": 34.826, "eval_steps_per_second": 4.353, "eval_wer": 0.32230264319301566, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.7725916504859924, "learning_rate": 1.614285714285714e-05, "loss": 0.4072, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.4178767800331116, "eval_runtime": 162.2884, "eval_samples_per_second": 34.852, "eval_steps_per_second": 4.356, "eval_wer": 0.31946205324902505, "step": 2500 }, { "epoch": 3.6775106082036775, "eval_loss": 0.4158582091331482, "eval_runtime": 162.2061, "eval_samples_per_second": 34.869, "eval_steps_per_second": 4.359, "eval_wer": 0.3171350162892587, "step": 2600 }, { "epoch": 3.6775106082036775, "step": 2600, "total_flos": 9.931497341662648e+18, "train_loss": 1.0819015429570125, "train_runtime": 7138.5232, "train_samples_per_second": 11.655, "train_steps_per_second": 0.364 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.931497341662648e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }