{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.536067892503536, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 4.035430908203125, "eval_runtime": 153.5895, "eval_samples_per_second": 36.825, "eval_steps_per_second": 4.603, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 3.097731590270996, "eval_runtime": 149.7934, "eval_samples_per_second": 37.759, "eval_steps_per_second": 4.72, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 3.0768725872039795, "eval_runtime": 151.0662, "eval_samples_per_second": 37.441, "eval_steps_per_second": 4.68, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.373844027519226, "eval_runtime": 150.8632, "eval_samples_per_second": 37.491, "eval_steps_per_second": 4.686, "eval_wer": 0.8913622648777043, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 2.2486917972564697, "learning_rate": 0.0002964, "loss": 3.7586, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 1.0915191173553467, "eval_runtime": 151.7555, "eval_samples_per_second": 37.27, "eval_steps_per_second": 4.659, "eval_wer": 0.7692431148488156, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.9360549449920654, "eval_runtime": 152.9408, "eval_samples_per_second": 36.982, "eval_steps_per_second": 4.623, "eval_wer": 0.6854818000898761, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.8494995832443237, "eval_runtime": 151.5832, "eval_samples_per_second": 37.313, "eval_steps_per_second": 4.664, "eval_wer": 0.6247191371894459, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6885886788368225, "eval_runtime": 151.913, "eval_samples_per_second": 37.232, "eval_steps_per_second": 4.654, "eval_wer": 0.5397059767606086, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.6703779101371765, "eval_runtime": 153.2384, "eval_samples_per_second": 36.91, "eval_steps_per_second": 4.614, "eval_wer": 0.531215895230147, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.8719882369041443, "learning_rate": 0.0002259, "loss": 0.8877, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.62369304895401, "eval_runtime": 153.3779, "eval_samples_per_second": 36.876, "eval_steps_per_second": 4.61, "eval_wer": 0.495040765230789, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5992260575294495, "eval_runtime": 152.5492, "eval_samples_per_second": 37.077, "eval_steps_per_second": 4.635, "eval_wer": 0.4767606085895872, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.5729629397392273, "eval_runtime": 152.035, "eval_samples_per_second": 37.202, "eval_steps_per_second": 4.65, "eval_wer": 0.4521730756885151, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.5504249930381775, "eval_runtime": 152.762, "eval_samples_per_second": 37.025, "eval_steps_per_second": 4.628, "eval_wer": 0.4417891763497464, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.5288310050964355, "eval_runtime": 152.1876, "eval_samples_per_second": 37.165, "eval_steps_per_second": 4.646, "eval_wer": 0.42586826731719846, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 0.7933566570281982, "learning_rate": 0.00015134999999999997, "loss": 0.6844, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.5165240168571472, "eval_runtime": 152.8573, "eval_samples_per_second": 37.002, "eval_steps_per_second": 4.625, "eval_wer": 0.4216633498106182, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.5071681141853333, "eval_runtime": 152.7102, "eval_samples_per_second": 37.037, "eval_steps_per_second": 4.63, "eval_wer": 0.41930410220196446, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.49842193722724915, "eval_runtime": 153.4952, "eval_samples_per_second": 36.848, "eval_steps_per_second": 4.606, "eval_wer": 0.41546831867496953, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.4882013499736786, "eval_runtime": 152.4926, "eval_samples_per_second": 37.09, "eval_steps_per_second": 4.636, "eval_wer": 0.4096905694292868, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.4803565740585327, "eval_runtime": 152.0827, "eval_samples_per_second": 37.19, "eval_steps_per_second": 4.649, "eval_wer": 0.40800539256596263, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.7038583159446716, "learning_rate": 7.68e-05, "loss": 0.537, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.470017671585083, "eval_runtime": 152.5003, "eval_samples_per_second": 37.088, "eval_steps_per_second": 4.636, "eval_wer": 0.39272645567182385, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.46773043274879456, "eval_runtime": 152.3208, "eval_samples_per_second": 37.132, "eval_steps_per_second": 4.642, "eval_wer": 0.3884573409514027, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.4683248996734619, "eval_runtime": 152.8021, "eval_samples_per_second": 37.015, "eval_steps_per_second": 4.627, "eval_wer": 0.38572895936316365, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.4618384838104248, "eval_runtime": 152.4829, "eval_samples_per_second": 37.093, "eval_steps_per_second": 4.637, "eval_wer": 0.3792289914617706, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.46039119362831116, "eval_runtime": 152.4542, "eval_samples_per_second": 37.1, "eval_steps_per_second": 4.637, "eval_wer": 0.3762759196250883, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.6644078493118286, "learning_rate": 2.1e-06, "loss": 0.4434, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.4588949382305145, "eval_runtime": 152.8982, "eval_samples_per_second": 36.992, "eval_steps_per_second": 4.624, "eval_wer": 0.3742697566925595, "step": 2500 }, { "epoch": 3.536067892503536, "step": 2500, "total_flos": 9.55169606524761e+18, "train_loss": 1.2622116821289062, "train_runtime": 6522.8506, "train_samples_per_second": 12.265, "train_steps_per_second": 0.383 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.55169606524761e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }