{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.3166023166023164, "eval_steps": 200, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07722007722007722, "eval_loss": Infinity, "eval_runtime": 223.773, "eval_samples_per_second": 31.38, "eval_steps_per_second": 0.492, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.15444015444015444, "eval_loss": Infinity, "eval_runtime": 201.0458, "eval_samples_per_second": 34.927, "eval_steps_per_second": 0.547, "eval_wer": 0.8963291911755158, "step": 400 }, { "epoch": 0.19305019305019305, "grad_norm": 4.3846588134765625, "learning_rate": 0.00024799999999999996, "loss": 3.9177, "step": 500 }, { "epoch": 0.23166023166023167, "eval_loss": Infinity, "eval_runtime": 194.7647, "eval_samples_per_second": 36.054, "eval_steps_per_second": 0.565, "eval_wer": 0.7594821653487074, "step": 600 }, { "epoch": 0.3088803088803089, "eval_loss": Infinity, "eval_runtime": 194.859, "eval_samples_per_second": 36.036, "eval_steps_per_second": 0.565, "eval_wer": 0.7512108345344293, "step": 800 }, { "epoch": 0.3861003861003861, "grad_norm": 3.36423921585083, "learning_rate": 0.00027805555555555553, "loss": 0.9791, "step": 1000 }, { "epoch": 0.3861003861003861, "eval_loss": Infinity, "eval_runtime": 195.6019, "eval_samples_per_second": 35.899, "eval_steps_per_second": 0.562, "eval_wer": 0.598444418474803, "step": 1000 }, { "epoch": 0.46332046332046334, "eval_loss": Infinity, "eval_runtime": 197.8836, "eval_samples_per_second": 35.486, "eval_steps_per_second": 0.556, "eval_wer": 0.5867710646254528, "step": 1200 }, { "epoch": 0.5405405405405406, "eval_loss": Infinity, "eval_runtime": 203.8782, "eval_samples_per_second": 34.442, "eval_steps_per_second": 0.54, "eval_wer": 0.5255476348149014, "step": 1400 }, { "epoch": 0.5791505791505791, "grad_norm": 2.6850786209106445, "learning_rate": 0.0002503333333333333, "loss": 0.805, "step": 1500 }, { "epoch": 0.6177606177606177, "eval_loss": Infinity, "eval_runtime": 199.2388, "eval_samples_per_second": 35.244, "eval_steps_per_second": 0.552, "eval_wer": 0.5281575837845559, "step": 1600 }, { "epoch": 0.694980694980695, "eval_loss": Infinity, "eval_runtime": 194.9574, "eval_samples_per_second": 36.018, "eval_steps_per_second": 0.564, "eval_wer": 0.4768805266643294, "step": 1800 }, { "epoch": 0.7722007722007722, "grad_norm": 2.9242658615112305, "learning_rate": 0.0002226111111111111, "loss": 0.7184, "step": 2000 }, { "epoch": 0.7722007722007722, "eval_loss": Infinity, "eval_runtime": 198.941, "eval_samples_per_second": 35.297, "eval_steps_per_second": 0.553, "eval_wer": 0.4743095321569086, "step": 2000 }, { "epoch": 0.8494208494208494, "eval_loss": Infinity, "eval_runtime": 207.8762, "eval_samples_per_second": 33.78, "eval_steps_per_second": 0.529, "eval_wer": 0.46802487891654654, "step": 2200 }, { "epoch": 0.9266409266409267, "eval_loss": Infinity, "eval_runtime": 215.534, "eval_samples_per_second": 32.58, "eval_steps_per_second": 0.51, "eval_wer": 0.457026735745913, "step": 2400 }, { "epoch": 0.9652509652509652, "grad_norm": 3.865280866622925, "learning_rate": 0.00019483333333333332, "loss": 0.6704, "step": 2500 }, { "epoch": 1.0038610038610039, "eval_loss": Infinity, "eval_runtime": 212.4212, "eval_samples_per_second": 33.057, "eval_steps_per_second": 0.518, "eval_wer": 0.4252528793840001, "step": 2600 }, { "epoch": 1.0810810810810811, "eval_loss": Infinity, "eval_runtime": 211.2545, "eval_samples_per_second": 33.24, "eval_steps_per_second": 0.521, "eval_wer": 0.4163972316362173, "step": 2800 }, { "epoch": 1.1583011583011582, "grad_norm": 0.6646206974983215, "learning_rate": 0.00016716666666666665, "loss": 0.5664, "step": 3000 }, { "epoch": 1.1583011583011582, "eval_loss": Infinity, "eval_runtime": 202.1583, "eval_samples_per_second": 34.735, "eval_steps_per_second": 0.544, "eval_wer": 0.41592977808941345, "step": 3000 }, { "epoch": 1.2355212355212355, "eval_loss": Infinity, "eval_runtime": 198.8952, "eval_samples_per_second": 35.305, "eval_steps_per_second": 0.553, "eval_wer": 0.3995039798475582, "step": 3200 }, { "epoch": 1.3127413127413128, "eval_loss": Infinity, "eval_runtime": 197.172, "eval_samples_per_second": 35.614, "eval_steps_per_second": 0.558, "eval_wer": 0.3940633399555919, "step": 3400 }, { "epoch": 1.3513513513513513, "grad_norm": 1.06748366355896, "learning_rate": 0.00013944444444444442, "loss": 0.5359, "step": 3500 }, { "epoch": 1.3899613899613898, "eval_loss": Infinity, "eval_runtime": 194.4658, "eval_samples_per_second": 36.109, "eval_steps_per_second": 0.566, "eval_wer": 0.38185760845571526, "step": 3600 }, { "epoch": 1.4671814671814671, "eval_loss": Infinity, "eval_runtime": 201.7117, "eval_samples_per_second": 34.812, "eval_steps_per_second": 0.545, "eval_wer": 0.3810785192110423, "step": 3800 }, { "epoch": 1.5444015444015444, "grad_norm": 0.8601678013801575, "learning_rate": 0.00011166666666666667, "loss": 0.5172, "step": 4000 }, { "epoch": 1.5444015444015444, "eval_loss": Infinity, "eval_runtime": 196.5164, "eval_samples_per_second": 35.732, "eval_steps_per_second": 0.56, "eval_wer": 0.36905457520158935, "step": 4000 }, { "epoch": 1.6216216216216215, "eval_loss": Infinity, "eval_runtime": 193.7969, "eval_samples_per_second": 36.234, "eval_steps_per_second": 0.568, "eval_wer": 0.36086115331177854, "step": 4200 }, { "epoch": 1.698841698841699, "eval_loss": Infinity, "eval_runtime": 196.3411, "eval_samples_per_second": 35.764, "eval_steps_per_second": 0.56, "eval_wer": 0.3599652006804046, "step": 4400 }, { "epoch": 1.7374517374517375, "grad_norm": 0.6527121663093567, "learning_rate": 8.394444444444443e-05, "loss": 0.4817, "step": 4500 }, { "epoch": 1.776061776061776, "eval_loss": Infinity, "eval_runtime": 194.384, "eval_samples_per_second": 36.124, "eval_steps_per_second": 0.566, "eval_wer": 0.35086284133847534, "step": 4600 }, { "epoch": 1.8532818532818531, "eval_loss": Infinity, "eval_runtime": 196.7828, "eval_samples_per_second": 35.684, "eval_steps_per_second": 0.559, "eval_wer": 0.3529663822990924, "step": 4800 }, { "epoch": 1.9305019305019306, "grad_norm": 0.7631692886352539, "learning_rate": 5.6166666666666665e-05, "loss": 0.4818, "step": 5000 }, { "epoch": 1.9305019305019306, "eval_loss": Infinity, "eval_runtime": 194.6791, "eval_samples_per_second": 36.07, "eval_steps_per_second": 0.565, "eval_wer": 0.34340955423110386, "step": 5000 }, { "epoch": 2.0077220077220077, "eval_loss": Infinity, "eval_runtime": 200.454, "eval_samples_per_second": 35.03, "eval_steps_per_second": 0.549, "eval_wer": 0.336254918000857, "step": 5200 }, { "epoch": 2.0849420849420848, "eval_loss": Infinity, "eval_runtime": 193.7233, "eval_samples_per_second": 36.248, "eval_steps_per_second": 0.568, "eval_wer": 0.33718982509446455, "step": 5400 }, { "epoch": 2.1235521235521237, "grad_norm": 1.005771279335022, "learning_rate": 2.844444444444444e-05, "loss": 0.4196, "step": 5500 }, { "epoch": 2.1621621621621623, "eval_loss": Infinity, "eval_runtime": 198.8274, "eval_samples_per_second": 35.317, "eval_steps_per_second": 0.553, "eval_wer": 0.3320348512588784, "step": 5600 }, { "epoch": 2.2393822393822393, "eval_loss": Infinity, "eval_runtime": 197.3434, "eval_samples_per_second": 35.583, "eval_steps_per_second": 0.557, "eval_wer": 0.3292690844402893, "step": 5800 }, { "epoch": 2.3166023166023164, "grad_norm": 1.702697515487671, "learning_rate": 7.222222222222222e-07, "loss": 0.3743, "step": 6000 }, { "epoch": 2.3166023166023164, "eval_loss": Infinity, "eval_runtime": 192.6595, "eval_samples_per_second": 36.448, "eval_steps_per_second": 0.571, "eval_wer": 0.3263604845935102, "step": 6000 }, { "epoch": 2.3166023166023164, "step": 6000, "total_flos": 1.2607274134194512e+19, "train_loss": 0.8722912038167318, "train_runtime": 9671.7075, "train_samples_per_second": 9.926, "train_steps_per_second": 0.62 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2607274134194512e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }