{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 300, "global_step": 7070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4243281471004243, "eval_loss": 5.990268707275391, "eval_runtime": 138.1214, "eval_samples_per_second": 40.949, "eval_steps_per_second": 5.119, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.7072135785007072, "grad_norm": 2.022184371948242, "learning_rate": 4.9500000000000004e-05, "loss": 7.061, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 3.0451483726501465, "eval_runtime": 135.2034, "eval_samples_per_second": 41.833, "eval_steps_per_second": 5.229, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.272984441301273, "eval_loss": 2.9642043113708496, "eval_runtime": 136.8192, "eval_samples_per_second": 41.339, "eval_steps_per_second": 5.167, "eval_wer": 1.0, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.7561541795730591, "learning_rate": 4.624048706240488e-05, "loss": 3.0081, "step": 1000 }, { "epoch": 1.6973125884016973, "eval_loss": 2.956415891647339, "eval_runtime": 136.6446, "eval_samples_per_second": 41.392, "eval_steps_per_second": 5.174, "eval_wer": 1.0, "step": 1200 }, { "epoch": 2.1216407355021216, "grad_norm": 0.3668934106826782, "learning_rate": 4.245053272450533e-05, "loss": 2.9733, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 2.947998285293579, "eval_runtime": 136.4831, "eval_samples_per_second": 41.441, "eval_steps_per_second": 5.18, "eval_wer": 1.0, "step": 1500 }, { "epoch": 2.545968882602546, "eval_loss": 2.945077657699585, "eval_runtime": 136.3573, "eval_samples_per_second": 41.479, "eval_steps_per_second": 5.185, "eval_wer": 1.0, "step": 1800 }, { "epoch": 2.828854314002829, "grad_norm": 1.441468596458435, "learning_rate": 3.866057838660579e-05, "loss": 2.9454, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 2.9147346019744873, "eval_runtime": 135.861, "eval_samples_per_second": 41.631, "eval_steps_per_second": 5.204, "eval_wer": 1.0, "step": 2100 }, { "epoch": 3.3946251768033946, "eval_loss": 2.9019417762756348, "eval_runtime": 136.0133, "eval_samples_per_second": 41.584, "eval_steps_per_second": 5.198, "eval_wer": 1.0, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.46694883704185486, "learning_rate": 3.487062404870624e-05, "loss": 2.9064, "step": 2500 }, { "epoch": 3.818953323903819, "eval_loss": 2.884958505630493, "eval_runtime": 136.0891, "eval_samples_per_second": 41.561, "eval_steps_per_second": 5.195, "eval_wer": 1.0, "step": 2700 }, { "epoch": 4.243281471004243, "grad_norm": 0.4952280819416046, "learning_rate": 3.10882800608828e-05, "loss": 2.9048, "step": 3000 }, { "epoch": 4.243281471004243, "eval_loss": 2.8812334537506104, "eval_runtime": 136.4568, "eval_samples_per_second": 41.449, "eval_steps_per_second": 5.181, "eval_wer": 1.0, "step": 3000 }, { "epoch": 4.667609618104668, "eval_loss": 2.884371042251587, "eval_runtime": 136.787, "eval_samples_per_second": 41.349, "eval_steps_per_second": 5.169, "eval_wer": 1.0, "step": 3300 }, { "epoch": 4.9504950495049505, "grad_norm": 0.8865047097206116, "learning_rate": 2.7290715372907157e-05, "loss": 2.8965, "step": 3500 }, { "epoch": 5.091937765205092, "eval_loss": 2.9125277996063232, "eval_runtime": 136.3564, "eval_samples_per_second": 41.48, "eval_steps_per_second": 5.185, "eval_wer": 1.0, "step": 3600 }, { "epoch": 5.516265912305516, "eval_loss": 2.898144006729126, "eval_runtime": 136.0768, "eval_samples_per_second": 41.565, "eval_steps_per_second": 5.196, "eval_wer": 1.0, "step": 3900 }, { "epoch": 5.657708628005658, "grad_norm": 0.3529145121574402, "learning_rate": 2.3508371385083716e-05, "loss": 2.9261, "step": 4000 }, { "epoch": 5.9405940594059405, "eval_loss": 2.905318260192871, "eval_runtime": 136.6781, "eval_samples_per_second": 41.382, "eval_steps_per_second": 5.173, "eval_wer": 1.0, "step": 4200 }, { "epoch": 6.364922206506365, "grad_norm": 0.22229251265525818, "learning_rate": 1.971841704718417e-05, "loss": 2.9273, "step": 4500 }, { "epoch": 6.364922206506365, "eval_loss": 2.916677951812744, "eval_runtime": 136.7502, "eval_samples_per_second": 41.36, "eval_steps_per_second": 5.17, "eval_wer": 1.0, "step": 4500 }, { "epoch": 6.789250353606789, "eval_loss": 2.911259651184082, "eval_runtime": 136.484, "eval_samples_per_second": 41.441, "eval_steps_per_second": 5.18, "eval_wer": 1.0, "step": 4800 }, { "epoch": 7.072135785007072, "grad_norm": 1.7586228847503662, "learning_rate": 1.592846270928463e-05, "loss": 2.9302, "step": 5000 }, { "epoch": 7.2135785007072135, "eval_loss": 2.9133317470550537, "eval_runtime": 135.9523, "eval_samples_per_second": 41.603, "eval_steps_per_second": 5.2, "eval_wer": 1.0, "step": 5100 }, { "epoch": 7.637906647807638, "eval_loss": 2.921302080154419, "eval_runtime": 136.5228, "eval_samples_per_second": 41.429, "eval_steps_per_second": 5.179, "eval_wer": 1.0, "step": 5400 }, { "epoch": 7.779349363507779, "grad_norm": 0.6302638649940491, "learning_rate": 1.2146118721461187e-05, "loss": 2.9397, "step": 5500 }, { "epoch": 8.062234794908063, "eval_loss": 2.9251174926757812, "eval_runtime": 136.4335, "eval_samples_per_second": 41.456, "eval_steps_per_second": 5.182, "eval_wer": 1.0, "step": 5700 }, { "epoch": 8.486562942008486, "grad_norm": 0.5835816860198975, "learning_rate": 8.340943683409437e-06, "loss": 2.937, "step": 6000 }, { "epoch": 8.486562942008486, "eval_loss": 2.921030282974243, "eval_runtime": 136.1229, "eval_samples_per_second": 41.551, "eval_steps_per_second": 5.194, "eval_wer": 1.0, "step": 6000 }, { "epoch": 8.910891089108912, "eval_loss": 2.92145037651062, "eval_runtime": 137.1094, "eval_samples_per_second": 41.252, "eval_steps_per_second": 5.156, "eval_wer": 1.0, "step": 6300 }, { "epoch": 9.193776520509195, "grad_norm": 0.7211419939994812, "learning_rate": 4.558599695585997e-06, "loss": 2.9406, "step": 6500 }, { "epoch": 9.335219236209335, "eval_loss": 2.917142629623413, "eval_runtime": 136.4229, "eval_samples_per_second": 41.459, "eval_steps_per_second": 5.182, "eval_wer": 1.0, "step": 6600 }, { "epoch": 9.75954738330976, "eval_loss": 2.9176828861236572, "eval_runtime": 137.2177, "eval_samples_per_second": 41.219, "eval_steps_per_second": 5.152, "eval_wer": 1.0, "step": 6900 }, { "epoch": 9.900990099009901, "grad_norm": 0.0, "learning_rate": 7.762557077625571e-07, "loss": 2.9378, "step": 7000 }, { "epoch": 10.0, "step": 7070, "total_flos": 2.4662883830172946e+19, "train_loss": 3.228043903960534, "train_runtime": 12366.081, "train_samples_per_second": 18.293, "train_steps_per_second": 0.572 } ], "logging_steps": 500, "max_steps": 7070, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 600, "total_flos": 2.4662883830172946e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }