{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.17, "learning_rate": 5.82e-05, "loss": 11.8246, "step": 100 }, { "epoch": 8.33, "learning_rate": 0.0001182, "loss": 3.9245, "step": 200 }, { "epoch": 12.5, "learning_rate": 0.00017819999999999997, "loss": 3.3755, "step": 300 }, { "epoch": 16.67, "learning_rate": 0.0002382, "loss": 3.2775, "step": 400 }, { "epoch": 20.83, "learning_rate": 0.0002982, "loss": 2.3957, "step": 500 }, { "epoch": 20.83, "eval_loss": 1.0925403833389282, "eval_runtime": 10.3822, "eval_samples_per_second": 20.516, "eval_steps_per_second": 1.348, "eval_wer": 0.8111332007952287, "step": 500 }, { "epoch": 25.0, "learning_rate": 0.0002846842105263158, "loss": 1.5517, "step": 600 }, { "epoch": 29.17, "learning_rate": 0.0002688947368421052, "loss": 1.3096, "step": 700 }, { "epoch": 33.33, "learning_rate": 0.0002531052631578947, "loss": 1.2066, "step": 800 }, { "epoch": 37.5, "learning_rate": 0.0002373157894736842, "loss": 1.1194, "step": 900 }, { "epoch": 41.67, "learning_rate": 0.00022152631578947366, "loss": 1.0351, "step": 1000 }, { "epoch": 41.67, "eval_loss": 0.7836905717849731, "eval_runtime": 11.7781, "eval_samples_per_second": 18.084, "eval_steps_per_second": 1.189, "eval_wer": 0.6573889993373094, "step": 1000 }, { "epoch": 45.83, "learning_rate": 0.00020573684210526314, "loss": 0.955, "step": 1100 }, { "epoch": 50.0, "learning_rate": 0.00018994736842105263, "loss": 0.8928, "step": 1200 }, { "epoch": 54.17, "learning_rate": 0.00017415789473684208, "loss": 0.8239, "step": 1300 }, { "epoch": 58.33, "learning_rate": 0.00015836842105263158, "loss": 0.7782, "step": 1400 }, { "epoch": 62.5, "learning_rate": 0.00014257894736842105, "loss": 0.7396, "step": 1500 }, { "epoch": 62.5, "eval_loss": 0.7674338221549988, "eval_runtime": 10.3819, "eval_samples_per_second": 20.516, "eval_steps_per_second": 1.348, "eval_wer": 0.6083499005964215, "step": 1500 }, { "epoch": 66.67, "learning_rate": 0.00012678947368421052, "loss": 0.6811, "step": 1600 }, { "epoch": 70.83, "learning_rate": 0.00011099999999999999, "loss": 0.6438, "step": 1700 }, { "epoch": 75.0, "learning_rate": 9.521052631578946e-05, "loss": 0.6074, "step": 1800 }, { "epoch": 79.17, "learning_rate": 7.942105263157894e-05, "loss": 0.5791, "step": 1900 }, { "epoch": 83.33, "learning_rate": 6.363157894736841e-05, "loss": 0.5385, "step": 2000 }, { "epoch": 83.33, "eval_loss": 0.801542341709137, "eval_runtime": 10.3837, "eval_samples_per_second": 20.513, "eval_steps_per_second": 1.348, "eval_wer": 0.5811795891318754, "step": 2000 }, { "epoch": 87.5, "learning_rate": 4.784210526315789e-05, "loss": 0.5167, "step": 2100 }, { "epoch": 91.67, "learning_rate": 3.205263157894736e-05, "loss": 0.495, "step": 2200 }, { "epoch": 95.83, "learning_rate": 1.626315789473684e-05, "loss": 0.4769, "step": 2300 }, { "epoch": 100.0, "learning_rate": 4.7368421052631574e-07, "loss": 0.4558, "step": 2400 }, { "epoch": 100.0, "step": 2400, "total_flos": 1.3881399754298573e+19, "train_loss": 1.675173250834147, "train_runtime": 5040.7553, "train_samples_per_second": 14.938, "train_steps_per_second": 0.476 } ], "max_steps": 2400, "num_train_epochs": 100, "total_flos": 1.3881399754298573e+19, "trial_name": null, "trial_params": null }