{ "best_metric": 53.62439467312349, "best_model_checkpoint": "./checkpoint-300", "epoch": 3.566371681415929, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 3e-06, "loss": 4.1495, "step": 10 }, { "epoch": 0.24, "learning_rate": 6.333333333333333e-06, "loss": 2.9287, "step": 20 }, { "epoch": 0.35, "learning_rate": 9.666666666666667e-06, "loss": 2.0462, "step": 30 }, { "epoch": 0.47, "learning_rate": 9.666666666666667e-06, "loss": 1.6138, "step": 40 }, { "epoch": 0.59, "learning_rate": 9.296296296296296e-06, "loss": 1.3862, "step": 50 }, { "epoch": 0.71, "learning_rate": 8.925925925925927e-06, "loss": 1.2604, "step": 60 }, { "epoch": 0.83, "learning_rate": 8.555555555555556e-06, "loss": 1.1436, "step": 70 }, { "epoch": 0.94, "learning_rate": 8.185185185185187e-06, "loss": 1.168, "step": 80 }, { "epoch": 1.07, "learning_rate": 7.814814814814816e-06, "loss": 1.1041, "step": 90 }, { "epoch": 1.19, "learning_rate": 7.444444444444445e-06, "loss": 0.9683, "step": 100 }, { "epoch": 1.19, "eval_cer": 131.61659035460045, "eval_loss": 0.8811978697776794, "eval_runtime": 593.3196, "eval_samples_per_second": 0.863, "eval_steps_per_second": 0.431, "eval_wer": 139.37651331719127, "step": 100 }, { "epoch": 1.31, "learning_rate": 7.074074074074074e-06, "loss": 0.909, "step": 110 }, { "epoch": 1.42, "learning_rate": 6.703703703703704e-06, "loss": 0.9213, "step": 120 }, { "epoch": 1.54, "learning_rate": 6.333333333333333e-06, "loss": 0.9092, "step": 130 }, { "epoch": 1.66, "learning_rate": 5.962962962962963e-06, "loss": 0.8481, "step": 140 }, { "epoch": 1.78, "learning_rate": 5.5925925925925926e-06, "loss": 0.8471, "step": 150 }, { "epoch": 1.9, "learning_rate": 5.2222222222222226e-06, "loss": 0.8504, "step": 160 }, { "epoch": 2.02, "learning_rate": 4.851851851851852e-06, "loss": 0.8264, "step": 170 }, { "epoch": 2.14, "learning_rate": 4.481481481481482e-06, "loss": 0.7236, "step": 180 }, { "epoch": 2.26, "learning_rate": 4.111111111111111e-06, "loss": 0.6898, "step": 190 }, { "epoch": 2.38, "learning_rate": 3.740740740740741e-06, "loss": 0.6848, "step": 200 }, { "epoch": 2.38, "eval_cer": 151.33685371478225, "eval_loss": 0.7542899250984192, "eval_runtime": 551.6472, "eval_samples_per_second": 0.928, "eval_steps_per_second": 0.464, "eval_wer": 145.9972760290557, "step": 200 }, { "epoch": 2.5, "learning_rate": 3.3703703703703705e-06, "loss": 0.7021, "step": 210 }, { "epoch": 2.61, "learning_rate": 3e-06, "loss": 0.6956, "step": 220 }, { "epoch": 2.73, "learning_rate": 2.6296296296296297e-06, "loss": 0.629, "step": 230 }, { "epoch": 2.85, "learning_rate": 2.2592592592592592e-06, "loss": 0.6661, "step": 240 }, { "epoch": 2.97, "learning_rate": 1.888888888888889e-06, "loss": 0.6251, "step": 250 }, { "epoch": 3.09, "learning_rate": 1.5185185185185186e-06, "loss": 0.6852, "step": 260 }, { "epoch": 3.21, "learning_rate": 1.1481481481481482e-06, "loss": 0.5772, "step": 270 }, { "epoch": 3.33, "learning_rate": 7.777777777777779e-07, "loss": 0.5592, "step": 280 }, { "epoch": 3.45, "learning_rate": 4.074074074074075e-07, "loss": 0.5845, "step": 290 }, { "epoch": 3.57, "learning_rate": 3.703703703703704e-08, "loss": 0.5548, "step": 300 }, { "epoch": 3.57, "eval_cer": 22.68473647271147, "eval_loss": 0.6978507041931152, "eval_runtime": 832.954, "eval_samples_per_second": 0.615, "eval_steps_per_second": 0.307, "eval_wer": 53.62439467312349, "step": 300 } ], "max_steps": 300, "num_train_epochs": 4, "total_flos": 2.79033223569408e+18, "trial_name": null, "trial_params": null }