{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 64050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 9.825795424664739e-05, "loss": 7.6544, "step": 4270 }, { "epoch": 1.0, "eval_cer": 0.21212121212121213, "eval_loss": 1.1680512428283691, "eval_new_wer": 0.18095238095238095, "eval_old_wer": 0.4, "eval_runtime": 7.6679, "eval_samples_per_second": 27.387, "eval_steps_per_second": 3.521, "step": 4270 }, { "epoch": 2.0, "learning_rate": 9.124046805153827e-05, "loss": 0.867, "step": 8540 }, { "epoch": 2.0, "eval_cer": 0.14545454545454545, "eval_loss": 0.9439918398857117, "eval_new_wer": 0.10952380952380952, "eval_old_wer": 0.28095238095238095, "eval_runtime": 7.5859, "eval_samples_per_second": 27.683, "eval_steps_per_second": 3.559, "step": 8540 }, { "epoch": 3.0, "learning_rate": 8.42246252958191e-05, "loss": 0.5681, "step": 12810 }, { "epoch": 3.0, "eval_cer": 0.10735930735930736, "eval_loss": 0.6605939865112305, "eval_new_wer": 0.0761904761904762, "eval_old_wer": 0.22857142857142856, "eval_runtime": 7.6378, "eval_samples_per_second": 27.495, "eval_steps_per_second": 3.535, "step": 12810 }, { "epoch": 4.0, "learning_rate": 7.721042597948987e-05, "loss": 0.4336, "step": 17080 }, { "epoch": 4.0, "eval_cer": 0.10216450216450217, "eval_loss": 0.581123948097229, "eval_new_wer": 0.08095238095238096, "eval_old_wer": 0.19523809523809524, "eval_runtime": 7.6234, "eval_samples_per_second": 27.547, "eval_steps_per_second": 3.542, "step": 17080 }, { "epoch": 5.0, "learning_rate": 7.019458322377071e-05, "loss": 0.3415, "step": 21350 }, { "epoch": 5.0, "eval_cer": 0.08484848484848485, "eval_loss": 0.742690920829773, "eval_new_wer": 0.05714285714285714, "eval_old_wer": 0.1619047619047619, "eval_runtime": 7.6897, "eval_samples_per_second": 27.309, "eval_steps_per_second": 3.511, "step": 21350 }, { "epoch": 6.0, "learning_rate": 6.318202734683146e-05, "loss": 0.271, "step": 25620 }, { "epoch": 6.0, "eval_cer": 0.09264069264069263, "eval_loss": 0.5863712430000305, "eval_new_wer": 0.09523809523809523, "eval_old_wer": 0.1761904761904762, "eval_runtime": 7.7063, "eval_samples_per_second": 27.25, "eval_steps_per_second": 3.504, "step": 25620 }, { "epoch": 7.0, "learning_rate": 5.617111490928215e-05, "loss": 0.2626, "step": 29890 }, { "epoch": 7.0, "eval_cer": 0.14285714285714285, "eval_loss": 1.4601606130599976, "eval_new_wer": 0.14285714285714285, "eval_old_wer": 0.2714285714285714, "eval_runtime": 7.7229, "eval_samples_per_second": 27.192, "eval_steps_per_second": 3.496, "step": 29890 }, { "epoch": 8.0, "learning_rate": 4.9155272153562984e-05, "loss": 0.2013, "step": 34160 }, { "epoch": 8.0, "eval_cer": 0.10043290043290043, "eval_loss": 0.9108497500419617, "eval_new_wer": 0.11428571428571428, "eval_old_wer": 0.18095238095238095, "eval_runtime": 7.7082, "eval_samples_per_second": 27.244, "eval_steps_per_second": 3.503, "step": 34160 }, { "epoch": 9.0, "learning_rate": 4.213778595845386e-05, "loss": 0.1484, "step": 38430 }, { "epoch": 9.0, "eval_cer": 0.14025974025974025, "eval_loss": 1.427252173423767, "eval_new_wer": 0.1380952380952381, "eval_old_wer": 0.2, "eval_runtime": 7.7333, "eval_samples_per_second": 27.155, "eval_steps_per_second": 3.491, "step": 38430 }, { "epoch": 10.0, "learning_rate": 3.5121943202734684e-05, "loss": 0.1179, "step": 42700 }, { "epoch": 10.0, "eval_cer": 0.1264069264069264, "eval_loss": 1.0122177600860596, "eval_new_wer": 0.1523809523809524, "eval_old_wer": 0.18095238095238095, "eval_runtime": 7.7665, "eval_samples_per_second": 27.039, "eval_steps_per_second": 3.476, "step": 42700 }, { "epoch": 11.0, "learning_rate": 2.8106100447015514e-05, "loss": 0.0987, "step": 46970 }, { "epoch": 11.0, "eval_cer": 0.08831168831168831, "eval_loss": 0.7208192944526672, "eval_new_wer": 0.0761904761904762, "eval_old_wer": 0.14285714285714285, "eval_runtime": 7.7985, "eval_samples_per_second": 26.928, "eval_steps_per_second": 3.462, "step": 46970 }, { "epoch": 12.0, "learning_rate": 2.1090257691296347e-05, "loss": 0.0771, "step": 51240 }, { "epoch": 12.0, "eval_cer": 0.11428571428571428, "eval_loss": 1.2593973875045776, "eval_new_wer": 0.11428571428571428, "eval_old_wer": 0.1761904761904762, "eval_runtime": 7.5989, "eval_samples_per_second": 27.635, "eval_steps_per_second": 3.553, "step": 51240 }, { "epoch": 13.0, "learning_rate": 1.4076058374967133e-05, "loss": 0.0603, "step": 55510 }, { "epoch": 13.0, "eval_cer": 0.10735930735930736, "eval_loss": 1.1333523988723755, "eval_new_wer": 0.11428571428571428, "eval_old_wer": 0.1619047619047619, "eval_runtime": 7.5623, "eval_samples_per_second": 27.77, "eval_steps_per_second": 3.57, "step": 55510 }, { "epoch": 14.0, "learning_rate": 7.061859058637918e-06, "loss": 0.0489, "step": 59780 }, { "epoch": 14.0, "eval_cer": 0.09177489177489177, "eval_loss": 0.9029120802879333, "eval_new_wer": 0.10476190476190476, "eval_old_wer": 0.12857142857142856, "eval_runtime": 7.6393, "eval_samples_per_second": 27.489, "eval_steps_per_second": 3.534, "step": 59780 }, { "epoch": 15.0, "learning_rate": 4.7659742308703654e-08, "loss": 0.0409, "step": 64050 }, { "epoch": 15.0, "eval_cer": 0.09264069264069263, "eval_loss": 1.0212522745132446, "eval_new_wer": 0.1, "eval_old_wer": 0.1380952380952381, "eval_runtime": 7.658, "eval_samples_per_second": 27.422, "eval_steps_per_second": 3.526, "step": 64050 } ], "max_steps": 64050, "num_train_epochs": 15, "total_flos": 5.079400634281409e+19, "trial_name": null, "trial_params": null }