{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.680164252309797, "eval_steps": 300, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.37, "learning_rate": 5.96e-05, "loss": 14.3929, "step": 300 }, { "epoch": 0.37, "eval_cer": 0.9869306169517587, "eval_loss": 7.0630784034729, "eval_runtime": 45.4918, "eval_samples_per_second": 48.822, "eval_steps_per_second": 6.111, "step": 300 }, { "epoch": 0.73, "learning_rate": 9.895209580838324e-05, "loss": 8.6544, "step": 600 }, { "epoch": 0.73, "eval_cer": 0.8301460695752451, "eval_loss": 5.885794639587402, "eval_runtime": 39.1068, "eval_samples_per_second": 56.793, "eval_steps_per_second": 7.109, "step": 600 }, { "epoch": 1.1, "learning_rate": 9.575491873396066e-05, "loss": 4.2963, "step": 900 }, { "epoch": 1.1, "eval_cer": 0.5212857966557756, "eval_loss": 2.0850701332092285, "eval_runtime": 39.3849, "eval_samples_per_second": 56.392, "eval_steps_per_second": 7.059, "step": 900 }, { "epoch": 1.46, "learning_rate": 9.256843455945254e-05, "loss": 2.416, "step": 1200 }, { "epoch": 1.46, "eval_cer": 0.42461080146069574, "eval_loss": 1.6146297454833984, "eval_runtime": 39.2918, "eval_samples_per_second": 56.526, "eval_steps_per_second": 7.075, "step": 1200 }, { "epoch": 1.83, "learning_rate": 8.936056458511549e-05, "loss": 2.0879, "step": 1500 }, { "epoch": 1.83, "eval_cer": 0.38155871612531234, "eval_loss": 1.3970342874526978, "eval_runtime": 39.3238, "eval_samples_per_second": 56.48, "eval_steps_per_second": 7.07, "step": 1500 }, { "epoch": 2.19, "learning_rate": 8.615269461077845e-05, "loss": 1.8739, "step": 1800 }, { "epoch": 2.19, "eval_cer": 0.3361522198731501, "eval_loss": 1.259849190711975, "eval_runtime": 39.8822, "eval_samples_per_second": 55.689, "eval_steps_per_second": 6.971, "step": 1800 }, { "epoch": 2.56, "learning_rate": 8.29448246364414e-05, "loss": 1.7464, "step": 2100 }, { "epoch": 2.56, "eval_cer": 0.3469632904093792, "eval_loss": 1.1771619319915771, "eval_runtime": 39.4164, "eval_samples_per_second": 56.347, "eval_steps_per_second": 7.053, "step": 2100 }, { "epoch": 2.92, "learning_rate": 7.973695466210437e-05, "loss": 1.7025, "step": 2400 }, { "epoch": 2.92, "eval_cer": 0.31635594849125503, "eval_loss": 1.1029127836227417, "eval_runtime": 39.436, "eval_samples_per_second": 56.319, "eval_steps_per_second": 7.049, "step": 2400 }, { "epoch": 3.29, "learning_rate": 7.652908468776732e-05, "loss": 1.5982, "step": 2700 }, { "epoch": 3.29, "eval_cer": 0.3095329617528349, "eval_loss": 1.049662709236145, "eval_runtime": 39.3386, "eval_samples_per_second": 56.459, "eval_steps_per_second": 7.067, "step": 2700 }, { "epoch": 3.65, "learning_rate": 7.332121471343029e-05, "loss": 1.5632, "step": 3000 }, { "epoch": 3.65, "eval_cer": 0.30030751489525276, "eval_loss": 1.013907790184021, "eval_runtime": 39.4665, "eval_samples_per_second": 56.276, "eval_steps_per_second": 7.044, "step": 3000 }, { "epoch": 4.02, "learning_rate": 7.011334473909324e-05, "loss": 1.5063, "step": 3300 }, { "epoch": 4.02, "eval_cer": 0.28858350951374206, "eval_loss": 0.9488086104393005, "eval_runtime": 39.4805, "eval_samples_per_second": 56.256, "eval_steps_per_second": 7.041, "step": 3300 }, { "epoch": 4.38, "learning_rate": 6.69054747647562e-05, "loss": 1.4507, "step": 3600 }, { "epoch": 4.38, "eval_cer": 0.272583125120123, "eval_loss": 0.9192214012145996, "eval_runtime": 39.3363, "eval_samples_per_second": 56.462, "eval_steps_per_second": 7.067, "step": 3600 }, { "epoch": 4.75, "learning_rate": 6.369760479041916e-05, "loss": 1.4029, "step": 3900 }, { "epoch": 4.75, "eval_cer": 0.2764751105131655, "eval_loss": 0.9218717217445374, "eval_runtime": 39.3534, "eval_samples_per_second": 56.437, "eval_steps_per_second": 7.064, "step": 3900 }, { "epoch": 5.11, "learning_rate": 6.050042771599658e-05, "loss": 1.3126, "step": 4200 }, { "epoch": 5.11, "eval_cer": 0.27450509321545263, "eval_loss": 0.9048557281494141, "eval_runtime": 39.4768, "eval_samples_per_second": 56.261, "eval_steps_per_second": 7.042, "step": 4200 }, { "epoch": 5.48, "learning_rate": 5.729255774165954e-05, "loss": 1.3053, "step": 4500 }, { "epoch": 5.48, "eval_cer": 0.2652315971554872, "eval_loss": 0.8531870245933533, "eval_runtime": 39.4442, "eval_samples_per_second": 56.307, "eval_steps_per_second": 7.048, "step": 4500 }, { "epoch": 5.84, "learning_rate": 5.40846877673225e-05, "loss": 1.2989, "step": 4800 }, { "epoch": 5.84, "eval_cer": 0.24903901595233519, "eval_loss": 0.8212350010871887, "eval_runtime": 39.4345, "eval_samples_per_second": 56.321, "eval_steps_per_second": 7.05, "step": 4800 }, { "epoch": 6.21, "learning_rate": 5.087681779298546e-05, "loss": 1.2403, "step": 5100 }, { "epoch": 6.21, "eval_cer": 0.26124351335767826, "eval_loss": 0.8196715116500854, "eval_runtime": 39.3565, "eval_samples_per_second": 56.433, "eval_steps_per_second": 7.064, "step": 5100 }, { "epoch": 6.57, "learning_rate": 4.766894781864842e-05, "loss": 1.1903, "step": 5400 }, { "epoch": 6.57, "eval_cer": 0.2607630213338459, "eval_loss": 0.8173399567604065, "eval_runtime": 39.5209, "eval_samples_per_second": 56.198, "eval_steps_per_second": 7.034, "step": 5400 }, { "epoch": 6.94, "learning_rate": 4.446107784431138e-05, "loss": 1.2313, "step": 5700 }, { "epoch": 6.94, "eval_cer": 0.24995195079761676, "eval_loss": 0.8241677284240723, "eval_runtime": 39.4127, "eval_samples_per_second": 56.352, "eval_steps_per_second": 7.054, "step": 5700 }, { "epoch": 7.3, "learning_rate": 4.125320786997434e-05, "loss": 1.1554, "step": 6000 }, { "epoch": 7.3, "eval_cer": 0.24529117816644244, "eval_loss": 0.7795117497444153, "eval_runtime": 39.3516, "eval_samples_per_second": 56.44, "eval_steps_per_second": 7.065, "step": 6000 }, { "epoch": 7.67, "learning_rate": 3.80453378956373e-05, "loss": 1.1243, "step": 6300 }, { "epoch": 7.67, "eval_cer": 0.2526427061310782, "eval_loss": 0.782616913318634, "eval_runtime": 39.456, "eval_samples_per_second": 56.291, "eval_steps_per_second": 7.046, "step": 6300 }, { "epoch": 8.03, "learning_rate": 3.483746792130026e-05, "loss": 1.099, "step": 6600 }, { "epoch": 8.03, "eval_cer": 0.23010763021333847, "eval_loss": 0.7462431192398071, "eval_runtime": 39.3849, "eval_samples_per_second": 56.392, "eval_steps_per_second": 7.059, "step": 6600 }, { "epoch": 8.4, "learning_rate": 3.1629597946963216e-05, "loss": 1.0777, "step": 6900 }, { "epoch": 8.4, "eval_cer": 0.22544685758216412, "eval_loss": 0.7633857131004333, "eval_runtime": 39.2653, "eval_samples_per_second": 56.564, "eval_steps_per_second": 7.08, "step": 6900 }, { "epoch": 8.76, "learning_rate": 2.8421727972626178e-05, "loss": 1.0901, "step": 7200 }, { "epoch": 8.76, "eval_cer": 0.23880453584470498, "eval_loss": 0.7462579607963562, "eval_runtime": 39.2693, "eval_samples_per_second": 56.558, "eval_steps_per_second": 7.079, "step": 7200 }, { "epoch": 9.13, "learning_rate": 2.5224550898203592e-05, "loss": 1.0049, "step": 7500 }, { "epoch": 9.13, "eval_cer": 0.22155487218912165, "eval_loss": 0.7342504858970642, "eval_runtime": 39.3738, "eval_samples_per_second": 56.408, "eval_steps_per_second": 7.061, "step": 7500 }, { "epoch": 9.49, "learning_rate": 2.2016680923866555e-05, "loss": 1.0011, "step": 7800 }, { "epoch": 9.49, "eval_cer": 0.22674418604651161, "eval_loss": 0.7101256251335144, "eval_runtime": 39.3531, "eval_samples_per_second": 56.438, "eval_steps_per_second": 7.064, "step": 7800 }, { "epoch": 9.86, "learning_rate": 1.8808810949529513e-05, "loss": 1.0084, "step": 8100 }, { "epoch": 9.86, "eval_cer": 0.22189121660580435, "eval_loss": 0.698137640953064, "eval_runtime": 39.3187, "eval_samples_per_second": 56.487, "eval_steps_per_second": 7.07, "step": 8100 }, { "epoch": 10.22, "learning_rate": 1.5600940975192476e-05, "loss": 0.9547, "step": 8400 }, { "epoch": 10.22, "eval_cer": 0.22222756102248703, "eval_loss": 0.7049764394760132, "eval_runtime": 39.2625, "eval_samples_per_second": 56.568, "eval_steps_per_second": 7.081, "step": 8400 }, { "epoch": 10.59, "learning_rate": 1.2393071000855433e-05, "loss": 0.9304, "step": 8700 }, { "epoch": 10.59, "eval_cer": 0.2266961368441284, "eval_loss": 0.6863571405410767, "eval_runtime": 39.4555, "eval_samples_per_second": 56.291, "eval_steps_per_second": 7.046, "step": 8700 }, { "epoch": 10.95, "learning_rate": 9.185201026518392e-06, "loss": 0.9044, "step": 9000 }, { "epoch": 10.95, "eval_cer": 0.2206419373438401, "eval_loss": 0.6961002945899963, "eval_runtime": 39.4203, "eval_samples_per_second": 56.341, "eval_steps_per_second": 7.052, "step": 9000 }, { "epoch": 11.32, "learning_rate": 5.977331052181352e-06, "loss": 0.9054, "step": 9300 }, { "epoch": 11.32, "eval_cer": 0.21146453968864118, "eval_loss": 0.6892764568328857, "eval_runtime": 39.4357, "eval_samples_per_second": 56.32, "eval_steps_per_second": 7.049, "step": 9300 }, { "epoch": 11.68, "learning_rate": 2.7694610778443115e-06, "loss": 0.9067, "step": 9600 }, { "epoch": 11.68, "eval_cer": 0.21718239477224677, "eval_loss": 0.682004451751709, "eval_runtime": 39.4317, "eval_samples_per_second": 56.325, "eval_steps_per_second": 7.05, "step": 9600 } ], "logging_steps": 300, "max_steps": 9852, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 600, "total_flos": 1.5092741663533305e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }