{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.536067892503536, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 1.2105046510696411, "eval_runtime": 283.0228, "eval_samples_per_second": 19.984, "eval_steps_per_second": 2.498, "eval_wer": 0.8709347154900737, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 0.9787197113037109, "eval_runtime": 281.4976, "eval_samples_per_second": 20.093, "eval_steps_per_second": 2.512, "eval_wer": 0.6986290412887332, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 1.190703272819519, "eval_runtime": 281.4959, "eval_samples_per_second": 20.093, "eval_steps_per_second": 2.512, "eval_wer": 0.7126745692758074, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.0559463500976562, "eval_runtime": 280.2535, "eval_samples_per_second": 20.182, "eval_steps_per_second": 2.523, "eval_wer": 0.7169298203516181, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 5.1558146476745605, "learning_rate": 0.00029699999999999996, "loss": 1.4456, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 1.2105939388275146, "eval_runtime": 281.5867, "eval_samples_per_second": 20.086, "eval_steps_per_second": 2.511, "eval_wer": 0.794404184863464, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 1.0231719017028809, "eval_runtime": 280.7447, "eval_samples_per_second": 20.146, "eval_steps_per_second": 2.518, "eval_wer": 0.7033002191614276, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 1.038698673248291, "eval_runtime": 281.8275, "eval_samples_per_second": 20.069, "eval_steps_per_second": 2.509, "eval_wer": 0.7335988865959591, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.7234079837799072, "eval_runtime": 280.5616, "eval_samples_per_second": 20.16, "eval_steps_per_second": 2.52, "eval_wer": 0.5223240709635104, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.7242198586463928, "eval_runtime": 286.25, "eval_samples_per_second": 19.759, "eval_steps_per_second": 2.47, "eval_wer": 0.5566220345219242, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 1.7350859642028809, "learning_rate": 0.0002259, "loss": 0.9155, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.7096899151802063, "eval_runtime": 280.6062, "eval_samples_per_second": 20.156, "eval_steps_per_second": 2.52, "eval_wer": 0.5258754459215178, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.6368164420127869, "eval_runtime": 282.6165, "eval_samples_per_second": 20.013, "eval_steps_per_second": 2.502, "eval_wer": 0.47965957991393515, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.6065136194229126, "eval_runtime": 280.4494, "eval_samples_per_second": 20.168, "eval_steps_per_second": 2.521, "eval_wer": 0.4652941082369503, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.6206967830657959, "eval_runtime": 282.4382, "eval_samples_per_second": 20.026, "eval_steps_per_second": 2.503, "eval_wer": 0.4716929820351618, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.5924867987632751, "eval_runtime": 281.7785, "eval_samples_per_second": 20.073, "eval_steps_per_second": 2.509, "eval_wer": 0.470669162227448, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 1.3663442134857178, "learning_rate": 0.0001512, "loss": 0.7436, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.5382007360458374, "eval_runtime": 284.0884, "eval_samples_per_second": 19.909, "eval_steps_per_second": 2.489, "eval_wer": 0.40460079026091406, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.5200654864311218, "eval_runtime": 282.3736, "eval_samples_per_second": 20.03, "eval_steps_per_second": 2.504, "eval_wer": 0.3995616771448225, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.4883446991443634, "eval_runtime": 283.5276, "eval_samples_per_second": 19.949, "eval_steps_per_second": 2.494, "eval_wer": 0.36979091679864345, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.47043663263320923, "eval_runtime": 282.1263, "eval_samples_per_second": 20.048, "eval_steps_per_second": 2.506, "eval_wer": 0.36585560941274337, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.4443446099758148, "eval_runtime": 282.7688, "eval_samples_per_second": 20.002, "eval_steps_per_second": 2.5, "eval_wer": 0.3521460223000752, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 1.0020660161972046, "learning_rate": 7.635e-05, "loss": 0.5645, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.4469930827617645, "eval_runtime": 282.9465, "eval_samples_per_second": 19.99, "eval_steps_per_second": 2.499, "eval_wer": 0.34761881908784054, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.41922062635421753, "eval_runtime": 281.1649, "eval_samples_per_second": 20.116, "eval_steps_per_second": 2.515, "eval_wer": 0.3241669466173953, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.41775766015052795, "eval_runtime": 282.3459, "eval_samples_per_second": 20.032, "eval_steps_per_second": 2.504, "eval_wer": 0.3160883684471533, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.4122001826763153, "eval_runtime": 282.9107, "eval_samples_per_second": 19.992, "eval_steps_per_second": 2.499, "eval_wer": 0.305370254835149, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.396011620759964, "eval_runtime": 280.8684, "eval_samples_per_second": 20.138, "eval_steps_per_second": 2.517, "eval_wer": 0.2990353697749196, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.5122537612915039, "learning_rate": 1.9499999999999995e-06, "loss": 0.4232, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.3903259038925171, "eval_runtime": 282.302, "eval_samples_per_second": 20.035, "eval_steps_per_second": 2.504, "eval_wer": 0.2956919582153541, "step": 2500 }, { "epoch": 3.536067892503536, "step": 2500, "total_flos": 6.538015641955614e+19, "train_loss": 0.8184805297851563, "train_runtime": 14471.5924, "train_samples_per_second": 5.528, "train_steps_per_second": 0.173 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 6.538015641955614e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }