{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5807200929152149, "eval_steps": 100, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, "eval_loss": 3.5493686199188232, "eval_runtime": 162.4972, "eval_samples_per_second": 34.807, "eval_steps_per_second": 4.351, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, "eval_loss": 3.0426220893859863, "eval_runtime": 160.0022, "eval_samples_per_second": 35.35, "eval_steps_per_second": 4.419, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, "eval_loss": 2.8965415954589844, "eval_runtime": 160.2187, "eval_samples_per_second": 35.302, "eval_steps_per_second": 4.413, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, "eval_loss": 1.826250433921814, "eval_runtime": 161.1204, "eval_samples_per_second": 35.104, "eval_steps_per_second": 4.388, "eval_wer": 0.9828762176822712, "step": 400 }, { "epoch": 0.09678668215253582, "grad_norm": 4.890473365783691, "learning_rate": 0.0002982, "loss": 3.9715, "step": 500 }, { "epoch": 0.09678668215253582, "eval_loss": 1.3860080242156982, "eval_runtime": 161.4467, "eval_samples_per_second": 35.033, "eval_steps_per_second": 4.379, "eval_wer": 0.8748856542183563, "step": 500 }, { "epoch": 0.11614401858304298, "eval_loss": 1.308407187461853, "eval_runtime": 161.0068, "eval_samples_per_second": 35.129, "eval_steps_per_second": 4.391, "eval_wer": 0.8153456051098522, "step": 600 }, { "epoch": 0.13550135501355012, "eval_loss": 1.0549893379211426, "eval_runtime": 161.8736, "eval_samples_per_second": 34.941, "eval_steps_per_second": 4.368, "eval_wer": 0.7336906806181894, "step": 700 }, { "epoch": 0.1548586914440573, "eval_loss": 1.0011754035949707, "eval_runtime": 161.7881, "eval_samples_per_second": 34.959, "eval_steps_per_second": 4.37, "eval_wer": 0.7190062749755259, "step": 800 }, { "epoch": 0.17421602787456447, "eval_loss": 0.913667619228363, "eval_runtime": 162.0369, "eval_samples_per_second": 34.906, "eval_steps_per_second": 4.363, "eval_wer": 0.6752258830704049, "step": 900 }, { "epoch": 0.19357336430507163, "grad_norm": 3.201047658920288, "learning_rate": 0.0002406, "loss": 1.0155, "step": 1000 }, { "epoch": 0.19357336430507163, "eval_loss": 0.8486206531524658, "eval_runtime": 161.6295, "eval_samples_per_second": 34.994, "eval_steps_per_second": 4.374, "eval_wer": 0.6469483718765547, "step": 1000 }, { "epoch": 0.2129307007355788, "eval_loss": 0.8534524440765381, "eval_runtime": 161.4191, "eval_samples_per_second": 35.039, "eval_steps_per_second": 4.38, "eval_wer": 0.6111761968191812, "step": 1100 }, { "epoch": 0.23228803716608595, "eval_loss": 0.8349705934524536, "eval_runtime": 162.6145, "eval_samples_per_second": 34.782, "eval_steps_per_second": 4.348, "eval_wer": 0.6192807048514708, "step": 1200 }, { "epoch": 0.2516453735965931, "eval_loss": 0.7680675983428955, "eval_runtime": 161.7995, "eval_samples_per_second": 34.957, "eval_steps_per_second": 4.37, "eval_wer": 0.5669785431143779, "step": 1300 }, { "epoch": 0.27100271002710025, "eval_loss": 0.7377049326896667, "eval_runtime": 162.5136, "eval_samples_per_second": 34.803, "eval_steps_per_second": 4.35, "eval_wer": 0.5559211054227985, "step": 1400 }, { "epoch": 0.29036004645760743, "grad_norm": 5.448112487792969, "learning_rate": 0.00018059999999999997, "loss": 0.7987, "step": 1500 }, { "epoch": 0.29036004645760743, "eval_loss": 0.7129804491996765, "eval_runtime": 162.7944, "eval_samples_per_second": 34.743, "eval_steps_per_second": 4.343, "eval_wer": 0.5437242220474715, "step": 1500 }, { "epoch": 0.3097173828881146, "eval_loss": 0.7039781808853149, "eval_runtime": 162.4512, "eval_samples_per_second": 34.817, "eval_steps_per_second": 4.352, "eval_wer": 0.5451846383463594, "step": 1600 }, { "epoch": 0.32907471931862176, "eval_loss": 0.6728500127792358, "eval_runtime": 166.2937, "eval_samples_per_second": 34.012, "eval_steps_per_second": 4.252, "eval_wer": 0.5050954085153504, "step": 1700 }, { "epoch": 0.34843205574912894, "eval_loss": 0.6646420359611511, "eval_runtime": 164.3307, "eval_samples_per_second": 34.418, "eval_steps_per_second": 4.302, "eval_wer": 0.511338286979827, "step": 1800 }, { "epoch": 0.3677893921796361, "eval_loss": 0.6530969142913818, "eval_runtime": 163.5666, "eval_samples_per_second": 34.579, "eval_steps_per_second": 4.322, "eval_wer": 0.49691065782927574, "step": 1900 }, { "epoch": 0.38714672861014326, "grad_norm": 2.6550886631011963, "learning_rate": 0.00012059999999999999, "loss": 0.6851, "step": 2000 }, { "epoch": 0.38714672861014326, "eval_loss": 0.6413969397544861, "eval_runtime": 162.7473, "eval_samples_per_second": 34.753, "eval_steps_per_second": 4.344, "eval_wer": 0.5037954775240326, "step": 2000 }, { "epoch": 0.4065040650406504, "eval_loss": 0.6108531355857849, "eval_runtime": 163.3073, "eval_samples_per_second": 34.634, "eval_steps_per_second": 4.329, "eval_wer": 0.467654186259248, "step": 2100 }, { "epoch": 0.4258614014711576, "eval_loss": 0.6034538745880127, "eval_runtime": 164.2548, "eval_samples_per_second": 34.434, "eval_steps_per_second": 4.304, "eval_wer": 0.46924299080419185, "step": 2200 }, { "epoch": 0.4452187379016647, "eval_loss": 0.5801683664321899, "eval_runtime": 163.2622, "eval_samples_per_second": 34.644, "eval_steps_per_second": 4.33, "eval_wer": 0.458955882588949, "step": 2300 }, { "epoch": 0.4645760743321719, "eval_loss": 0.571967363357544, "eval_runtime": 164.105, "eval_samples_per_second": 34.466, "eval_steps_per_second": 4.308, "eval_wer": 0.44545906822230424, "step": 2400 }, { "epoch": 0.48393341076267904, "grad_norm": 6.736985683441162, "learning_rate": 6.0599999999999996e-05, "loss": 0.5979, "step": 2500 }, { "epoch": 0.48393341076267904, "eval_loss": 0.569513201713562, "eval_runtime": 163.6068, "eval_samples_per_second": 34.571, "eval_steps_per_second": 4.321, "eval_wer": 0.4425542841552856, "step": 2500 }, { "epoch": 0.5032907471931862, "eval_loss": 0.5556703209877014, "eval_runtime": 163.3416, "eval_samples_per_second": 34.627, "eval_steps_per_second": 4.328, "eval_wer": 0.43513986294554735, "step": 2600 }, { "epoch": 0.5226480836236934, "eval_loss": 0.5499459505081177, "eval_runtime": 163.0587, "eval_samples_per_second": 34.687, "eval_steps_per_second": 4.336, "eval_wer": 0.4269711607902297, "step": 2700 }, { "epoch": 0.5420054200542005, "eval_loss": 0.5451160073280334, "eval_runtime": 163.5278, "eval_samples_per_second": 34.587, "eval_steps_per_second": 4.323, "eval_wer": 0.425815666575725, "step": 2800 }, { "epoch": 0.5613627564847077, "eval_loss": 0.5382982492446899, "eval_runtime": 163.1776, "eval_samples_per_second": 34.662, "eval_steps_per_second": 4.333, "eval_wer": 0.42172329123268765, "step": 2900 }, { "epoch": 0.5807200929152149, "grad_norm": 4.4167070388793945, "learning_rate": 6e-07, "loss": 0.5753, "step": 3000 }, { "epoch": 0.5807200929152149, "eval_loss": 0.5355480313301086, "eval_runtime": 164.5272, "eval_samples_per_second": 34.377, "eval_steps_per_second": 4.297, "eval_wer": 0.41856173067355684, "step": 3000 }, { "epoch": 0.5807200929152149, "step": 3000, "total_flos": 3.3874766991231493e+18, "train_loss": 1.2740035095214843, "train_runtime": 6100.931, "train_samples_per_second": 3.934, "train_steps_per_second": 0.492 } ], "logging_steps": 500, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "total_flos": 3.3874766991231493e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }