{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.536067892503536, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 3.7463672161102295, "eval_runtime": 153.5435, "eval_samples_per_second": 36.836, "eval_steps_per_second": 4.605, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 2.9399216175079346, "eval_runtime": 150.7703, "eval_samples_per_second": 37.514, "eval_steps_per_second": 4.689, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 2.5961458683013916, "eval_runtime": 151.4236, "eval_samples_per_second": 37.352, "eval_steps_per_second": 4.669, "eval_wer": 0.9991041576682503, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.1618728637695312, "eval_runtime": 152.5862, "eval_samples_per_second": 37.068, "eval_steps_per_second": 4.633, "eval_wer": 0.7905328662155461, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 1.7047498226165771, "learning_rate": 0.00029699999999999996, "loss": 3.5448, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 0.946560800075531, "eval_runtime": 153.1427, "eval_samples_per_second": 36.933, "eval_steps_per_second": 4.617, "eval_wer": 0.6897506038937147, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.7894724607467651, "eval_runtime": 151.7162, "eval_samples_per_second": 37.28, "eval_steps_per_second": 4.66, "eval_wer": 0.6110604533602086, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.6820164918899536, "eval_runtime": 152.0086, "eval_samples_per_second": 37.208, "eval_steps_per_second": 4.651, "eval_wer": 0.5378893314776599, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6039016842842102, "eval_runtime": 152.002, "eval_samples_per_second": 37.21, "eval_steps_per_second": 4.651, "eval_wer": 0.47239685815296506, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.5631398558616638, "eval_runtime": 153.0321, "eval_samples_per_second": 36.96, "eval_steps_per_second": 4.62, "eval_wer": 0.46745372814384667, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.8790757656097412, "learning_rate": 0.00022574999999999996, "loss": 0.7808, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.5279428958892822, "eval_runtime": 152.9768, "eval_samples_per_second": 36.973, "eval_steps_per_second": 4.622, "eval_wer": 0.4291084769080642, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5024306178092957, "eval_runtime": 152.8927, "eval_samples_per_second": 36.993, "eval_steps_per_second": 4.624, "eval_wer": 0.39940170529986724, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.4894837439060211, "eval_runtime": 153.3103, "eval_samples_per_second": 36.892, "eval_steps_per_second": 4.612, "eval_wer": 0.3894514565436483, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.4595918357372284, "eval_runtime": 153.0651, "eval_samples_per_second": 36.952, "eval_steps_per_second": 4.619, "eval_wer": 0.3695829534002016, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.44729524850845337, "eval_runtime": 154.1998, "eval_samples_per_second": 36.68, "eval_steps_per_second": 4.585, "eval_wer": 0.3610884484330758, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 0.8205087184906006, "learning_rate": 0.0001512, "loss": 0.6005, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.43324384093284607, "eval_runtime": 150.9169, "eval_samples_per_second": 37.478, "eval_steps_per_second": 4.685, "eval_wer": 0.3474268528738942, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.4268616735935211, "eval_runtime": 152.3911, "eval_samples_per_second": 37.115, "eval_steps_per_second": 4.639, "eval_wer": 0.3418118411159636, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.4155045449733734, "eval_runtime": 153.4832, "eval_samples_per_second": 36.851, "eval_steps_per_second": 4.606, "eval_wer": 0.33606885188206875, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.4121190905570984, "eval_runtime": 153.7529, "eval_samples_per_second": 36.786, "eval_steps_per_second": 4.598, "eval_wer": 0.32143142806865993, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.4145391285419464, "eval_runtime": 159.3319, "eval_samples_per_second": 35.498, "eval_steps_per_second": 4.437, "eval_wer": 0.3366447505239078, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.8615767359733582, "learning_rate": 7.664999999999999e-05, "loss": 0.4666, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.39387884736061096, "eval_runtime": 153.1343, "eval_samples_per_second": 36.935, "eval_steps_per_second": 4.617, "eval_wer": 0.3114171905744589, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.38894009590148926, "eval_runtime": 152.8789, "eval_samples_per_second": 36.997, "eval_steps_per_second": 4.625, "eval_wer": 0.30807377901489336, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.3909347653388977, "eval_runtime": 154.4919, "eval_samples_per_second": 36.61, "eval_steps_per_second": 4.576, "eval_wer": 0.30644206619634945, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.3874327838420868, "eval_runtime": 153.3025, "eval_samples_per_second": 36.894, "eval_steps_per_second": 4.612, "eval_wer": 0.3015469277407176, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.386868953704834, "eval_runtime": 153.0443, "eval_samples_per_second": 36.957, "eval_steps_per_second": 4.62, "eval_wer": 0.29833149365711636, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.6678842902183533, "learning_rate": 1.9499999999999995e-06, "loss": 0.3805, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.3846580684185028, "eval_runtime": 154.6331, "eval_samples_per_second": 36.577, "eval_steps_per_second": 4.572, "eval_wer": 0.29666778646958136, "step": 2500 }, { "epoch": 3.536067892503536, "step": 2500, "total_flos": 9.55129266706546e+18, "train_loss": 1.1546670959472656, "train_runtime": 6557.446, "train_samples_per_second": 12.2, "train_steps_per_second": 0.381 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.55129266706546e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }