{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.98804780876495, "global_step": 3100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.22, "learning_rate": 3.7125e-06, "loss": 17.9778, "step": 100 }, { "epoch": 6.45, "learning_rate": 7.4625e-06, "loss": 10.1788, "step": 200 }, { "epoch": 9.67, "learning_rate": 1.1212499999999998e-05, "loss": 7.1097, "step": 300 }, { "epoch": 12.89, "learning_rate": 1.49625e-05, "loss": 5.9162, "step": 400 }, { "epoch": 16.13, "learning_rate": 1.8712499999999997e-05, "loss": 4.917, "step": 500 }, { "epoch": 16.13, "eval_loss": 4.89633846282959, "eval_runtime": 284.283, "eval_samples_per_second": 9.473, "eval_steps_per_second": 0.594, "eval_wer": 1.0, "step": 500 }, { "epoch": 19.35, "learning_rate": 2.2462499999999997e-05, "loss": 4.1291, "step": 600 }, { "epoch": 22.57, "learning_rate": 2.6212499999999997e-05, "loss": 3.6844, "step": 700 }, { "epoch": 25.8, "learning_rate": 2.99625e-05, "loss": 3.5213, "step": 800 }, { "epoch": 29.03, "learning_rate": 3.37125e-05, "loss": 3.4601, "step": 900 }, { "epoch": 32.25, "learning_rate": 3.7462499999999996e-05, "loss": 3.3585, "step": 1000 }, { "epoch": 32.25, "eval_loss": 3.3068907260894775, "eval_runtime": 283.3986, "eval_samples_per_second": 9.503, "eval_steps_per_second": 0.596, "eval_wer": 1.0000410576449335, "step": 1000 }, { "epoch": 35.48, "learning_rate": 4.12125e-05, "loss": 3.2956, "step": 1100 }, { "epoch": 38.7, "learning_rate": 4.4962499999999995e-05, "loss": 2.8135, "step": 1200 }, { "epoch": 41.92, "learning_rate": 4.871249999999999e-05, "loss": 2.0147, "step": 1300 }, { "epoch": 45.16, "learning_rate": 5.2424999999999994e-05, "loss": 1.7332, "step": 1400 }, { "epoch": 48.38, "learning_rate": 5.6175e-05, "loss": 1.5873, "step": 1500 }, { "epoch": 48.38, "eval_loss": 0.8273974061012268, "eval_runtime": 283.1526, "eval_samples_per_second": 9.511, "eval_steps_per_second": 0.597, "eval_wer": 1.006076531450156, "step": 1500 }, { "epoch": 51.61, "learning_rate": 5.9925e-05, "loss": 1.476, "step": 1600 }, { "epoch": 54.83, "learning_rate": 6.367499999999999e-05, "loss": 1.4204, "step": 1700 }, { "epoch": 58.06, "learning_rate": 6.7425e-05, "loss": 1.357, "step": 1800 }, { "epoch": 61.29, "learning_rate": 7.11375e-05, "loss": 1.3037, "step": 1900 }, { "epoch": 64.51, "learning_rate": 7.48875e-05, "loss": 1.2654, "step": 2000 }, { "epoch": 64.51, "eval_loss": 0.6249768137931824, "eval_runtime": 282.7219, "eval_samples_per_second": 9.525, "eval_steps_per_second": 0.598, "eval_wer": 1.0075546066677616, "step": 2000 }, { "epoch": 67.73, "learning_rate": 6.838636363636363e-05, "loss": 1.2231, "step": 2100 }, { "epoch": 70.96, "learning_rate": 6.156818181818181e-05, "loss": 1.1802, "step": 2200 }, { "epoch": 74.19, "learning_rate": 5.481818181818182e-05, "loss": 1.1484, "step": 2300 }, { "epoch": 77.41, "learning_rate": 4.7999999999999994e-05, "loss": 1.1185, "step": 2400 }, { "epoch": 80.64, "learning_rate": 4.118181818181818e-05, "loss": 1.0917, "step": 2500 }, { "epoch": 80.64, "eval_loss": 0.5459514856338501, "eval_runtime": 282.9764, "eval_samples_per_second": 9.517, "eval_steps_per_second": 0.597, "eval_wer": 1.0056248973558877, "step": 2500 }, { "epoch": 83.86, "learning_rate": 3.436363636363636e-05, "loss": 1.0663, "step": 2600 }, { "epoch": 87.1, "learning_rate": 2.754545454545454e-05, "loss": 1.0487, "step": 2700 }, { "epoch": 90.32, "learning_rate": 2.0727272727272725e-05, "loss": 1.0214, "step": 2800 }, { "epoch": 93.54, "learning_rate": 1.3909090909090907e-05, "loss": 1.016, "step": 2900 }, { "epoch": 96.76, "learning_rate": 7.09090909090909e-06, "loss": 1.0001, "step": 3000 }, { "epoch": 96.76, "eval_loss": 0.5304259061813354, "eval_runtime": 282.7392, "eval_samples_per_second": 9.525, "eval_steps_per_second": 0.598, "eval_wer": 1.0082525866316308, "step": 3000 }, { "epoch": 99.99, "learning_rate": 2.727272727272727e-07, "loss": 0.9947, "step": 3100 }, { "epoch": 99.99, "step": 3100, "total_flos": 5.0567037565307716e+19, "train_loss": 3.0460862658100742, "train_runtime": 81000.8052, "train_samples_per_second": 4.939, "train_steps_per_second": 0.038 } ], "max_steps": 3100, "num_train_epochs": 100, "total_flos": 5.0567037565307716e+19, "trial_name": null, "trial_params": null }