{ "best_metric": null, "best_model_checkpoint": null, "epoch": 799.8, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 49.8, "learning_rate": 5.5799999999999994e-05, "loss": 42.4143, "step": 100 }, { "epoch": 49.8, "eval_loss": 21.511550903320312, "eval_runtime": 22.3742, "eval_samples_per_second": 2.413, "eval_steps_per_second": 0.313, "eval_wer": 1.0, "step": 100 }, { "epoch": 99.8, "learning_rate": 0.0001158, "loss": 5.9884, "step": 200 }, { "epoch": 99.8, "eval_loss": 31.79755401611328, "eval_runtime": 0.9499, "eval_samples_per_second": 56.846, "eval_steps_per_second": 7.369, "eval_wer": 1.0, "step": 200 }, { "epoch": 149.8, "learning_rate": 0.00017579999999999996, "loss": 4.0043, "step": 300 }, { "epoch": 149.8, "eval_loss": 3.482863187789917, "eval_runtime": 0.9439, "eval_samples_per_second": 57.208, "eval_steps_per_second": 7.416, "eval_wer": 1.0, "step": 300 }, { "epoch": 199.8, "learning_rate": 0.00023579999999999999, "loss": 3.653, "step": 400 }, { "epoch": 199.8, "eval_loss": 3.6417062282562256, "eval_runtime": 0.9393, "eval_samples_per_second": 57.489, "eval_steps_per_second": 7.452, "eval_wer": 1.0, "step": 400 }, { "epoch": 249.8, "learning_rate": 0.0002958, "loss": 3.5207, "step": 500 }, { "epoch": 249.8, "eval_loss": 3.508103370666504, "eval_runtime": 0.9443, "eval_samples_per_second": 57.187, "eval_steps_per_second": 7.413, "eval_wer": 1.0, "step": 500 }, { "epoch": 299.8, "learning_rate": 0.0002746363636363636, "loss": 3.63, "step": 600 }, { "epoch": 299.8, "eval_loss": 3.4835526943206787, "eval_runtime": 0.943, "eval_samples_per_second": 57.264, "eval_steps_per_second": 7.423, "eval_wer": 1.0, "step": 600 }, { "epoch": 349.8, "learning_rate": 0.00024736363636363633, "loss": 3.648, "step": 700 }, { "epoch": 349.8, "eval_loss": 3.4515135288238525, "eval_runtime": 0.9803, "eval_samples_per_second": 55.084, "eval_steps_per_second": 7.141, "eval_wer": 1.0, "step": 700 }, { "epoch": 399.8, "learning_rate": 0.00022009090909090907, "loss": 3.6448, "step": 800 }, { "epoch": 399.8, "eval_loss": 3.4646780490875244, "eval_runtime": 0.9384, "eval_samples_per_second": 57.547, "eval_steps_per_second": 7.46, "eval_wer": 1.0, "step": 800 }, { "epoch": 449.8, "learning_rate": 0.0001928181818181818, "loss": 3.6872, "step": 900 }, { "epoch": 449.8, "eval_loss": 3.4371423721313477, "eval_runtime": 0.938, "eval_samples_per_second": 57.568, "eval_steps_per_second": 7.463, "eval_wer": 1.0, "step": 900 }, { "epoch": 499.8, "learning_rate": 0.0001655454545454545, "loss": 3.6892, "step": 1000 }, { "epoch": 499.8, "eval_loss": 3.4336819648742676, "eval_runtime": 0.9476, "eval_samples_per_second": 56.988, "eval_steps_per_second": 7.387, "eval_wer": 1.0, "step": 1000 }, { "epoch": 549.8, "learning_rate": 0.00013827272727272728, "loss": 3.684, "step": 1100 }, { "epoch": 549.8, "eval_loss": 3.437472343444824, "eval_runtime": 0.94, "eval_samples_per_second": 57.449, "eval_steps_per_second": 7.447, "eval_wer": 1.0, "step": 1100 }, { "epoch": 599.8, "learning_rate": 0.00011099999999999999, "loss": 3.6843, "step": 1200 }, { "epoch": 599.8, "eval_loss": 3.4452097415924072, "eval_runtime": 0.9463, "eval_samples_per_second": 57.067, "eval_steps_per_second": 7.398, "eval_wer": 1.0, "step": 1200 }, { "epoch": 649.8, "learning_rate": 8.372727272727272e-05, "loss": 3.6842, "step": 1300 }, { "epoch": 649.8, "eval_loss": 3.441551446914673, "eval_runtime": 0.9462, "eval_samples_per_second": 57.068, "eval_steps_per_second": 7.398, "eval_wer": 1.0, "step": 1300 }, { "epoch": 699.8, "learning_rate": 5.645454545454545e-05, "loss": 3.6819, "step": 1400 }, { "epoch": 699.8, "eval_loss": 3.4498231410980225, "eval_runtime": 0.9402, "eval_samples_per_second": 57.435, "eval_steps_per_second": 7.445, "eval_wer": 1.0, "step": 1400 }, { "epoch": 749.8, "learning_rate": 2.918181818181818e-05, "loss": 3.6832, "step": 1500 }, { "epoch": 749.8, "eval_loss": 3.4524271488189697, "eval_runtime": 0.9511, "eval_samples_per_second": 56.778, "eval_steps_per_second": 7.36, "eval_wer": 1.0, "step": 1500 }, { "epoch": 799.8, "learning_rate": 1.909090909090909e-06, "loss": 3.6828, "step": 1600 }, { "epoch": 799.8, "eval_loss": 3.4495346546173096, "eval_runtime": 0.9541, "eval_samples_per_second": 56.598, "eval_steps_per_second": 7.337, "eval_wer": 1.0, "step": 1600 } ], "max_steps": 1600, "num_train_epochs": 800, "total_flos": 1.8719390163492864e+18, "trial_name": null, "trial_params": null }