{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 5900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.69, "learning_rate": 5.88e-05, "loss": 7.6789, "step": 100 }, { "epoch": 3.39, "learning_rate": 0.0001188, "loss": 3.2952, "step": 200 }, { "epoch": 5.08, "learning_rate": 0.00017879999999999998, "loss": 3.0495, "step": 300 }, { "epoch": 6.78, "learning_rate": 0.0002388, "loss": 2.7948, "step": 400 }, { "epoch": 8.47, "learning_rate": 0.0002988, "loss": 1.8597, "step": 500 }, { "epoch": 8.47, "eval_loss": 0.7730758786201477, "eval_runtime": 49.5825, "eval_samples_per_second": 16.135, "eval_steps_per_second": 16.135, "eval_wer": 0.7211251598238386, "step": 500 }, { "epoch": 10.17, "learning_rate": 0.00029455555555555555, "loss": 1.4933, "step": 600 }, { "epoch": 11.86, "learning_rate": 0.000289, "loss": 1.365, "step": 700 }, { "epoch": 13.56, "learning_rate": 0.0002834444444444444, "loss": 1.3187, "step": 800 }, { "epoch": 15.25, "learning_rate": 0.0002778888888888889, "loss": 1.2744, "step": 900 }, { "epoch": 16.95, "learning_rate": 0.0002723333333333333, "loss": 1.2508, "step": 1000 }, { "epoch": 16.95, "eval_loss": 0.5367600321769714, "eval_runtime": 48.7576, "eval_samples_per_second": 16.408, "eval_steps_per_second": 16.408, "eval_wer": 0.5989487143060094, "step": 1000 }, { "epoch": 18.64, "learning_rate": 0.0002667777777777778, "loss": 1.1856, "step": 1100 }, { "epoch": 20.34, "learning_rate": 0.0002612222222222222, "loss": 1.1821, "step": 1200 }, { "epoch": 22.03, "learning_rate": 0.00025566666666666663, "loss": 1.1284, "step": 1300 }, { "epoch": 23.73, "learning_rate": 0.0002501111111111111, "loss": 1.1204, "step": 1400 }, { "epoch": 25.42, "learning_rate": 0.00024455555555555553, "loss": 1.1066, "step": 1500 }, { "epoch": 25.42, "eval_loss": 0.5033634901046753, "eval_runtime": 48.524, "eval_samples_per_second": 16.487, "eval_steps_per_second": 16.487, "eval_wer": 0.553345645688308, "step": 1500 }, { "epoch": 27.12, "learning_rate": 0.00023905555555555553, "loss": 1.0826, "step": 1600 }, { "epoch": 28.81, "learning_rate": 0.00023349999999999998, "loss": 1.0375, "step": 1700 }, { "epoch": 30.51, "learning_rate": 0.00022794444444444443, "loss": 1.0304, "step": 1800 }, { "epoch": 32.2, "learning_rate": 0.00022238888888888889, "loss": 1.0207, "step": 1900 }, { "epoch": 33.9, "learning_rate": 0.0002168333333333333, "loss": 1.0064, "step": 2000 }, { "epoch": 33.9, "eval_loss": 0.46856507658958435, "eval_runtime": 48.9718, "eval_samples_per_second": 16.336, "eval_steps_per_second": 16.336, "eval_wer": 0.5114362835630061, "step": 2000 }, { "epoch": 35.59, "learning_rate": 0.00021127777777777776, "loss": 0.9979, "step": 2100 }, { "epoch": 37.29, "learning_rate": 0.0002057222222222222, "loss": 0.9902, "step": 2200 }, { "epoch": 38.98, "learning_rate": 0.00020016666666666666, "loss": 0.9511, "step": 2300 }, { "epoch": 40.68, "learning_rate": 0.00019461111111111109, "loss": 0.9584, "step": 2400 }, { "epoch": 42.37, "learning_rate": 0.00018905555555555554, "loss": 0.9324, "step": 2500 }, { "epoch": 42.37, "eval_loss": 0.49271178245544434, "eval_runtime": 48.7262, "eval_samples_per_second": 16.418, "eval_steps_per_second": 16.418, "eval_wer": 0.5056115925557607, "step": 2500 }, { "epoch": 44.07, "learning_rate": 0.0001835, "loss": 0.9287, "step": 2600 }, { "epoch": 45.76, "learning_rate": 0.00017794444444444444, "loss": 0.9264, "step": 2700 }, { "epoch": 47.46, "learning_rate": 0.00017238888888888886, "loss": 0.9031, "step": 2800 }, { "epoch": 49.15, "learning_rate": 0.00016683333333333331, "loss": 0.8996, "step": 2900 }, { "epoch": 50.85, "learning_rate": 0.00016127777777777776, "loss": 0.876, "step": 3000 }, { "epoch": 50.85, "eval_loss": 0.4733906686306, "eval_runtime": 48.5334, "eval_samples_per_second": 16.484, "eval_steps_per_second": 16.484, "eval_wer": 0.4794715158403182, "step": 3000 }, { "epoch": 52.54, "learning_rate": 0.0001557222222222222, "loss": 0.8432, "step": 3100 }, { "epoch": 54.24, "learning_rate": 0.00015016666666666664, "loss": 0.8592, "step": 3200 }, { "epoch": 55.93, "learning_rate": 0.0001446111111111111, "loss": 0.8322, "step": 3300 }, { "epoch": 57.63, "learning_rate": 0.00013905555555555554, "loss": 0.8286, "step": 3400 }, { "epoch": 59.32, "learning_rate": 0.0001335, "loss": 0.8082, "step": 3500 }, { "epoch": 59.32, "eval_loss": 0.474797785282135, "eval_runtime": 48.7764, "eval_samples_per_second": 16.401, "eval_steps_per_second": 16.401, "eval_wer": 0.4798977127432874, "step": 3500 }, { "epoch": 61.02, "learning_rate": 0.00012794444444444442, "loss": 0.8101, "step": 3600 }, { "epoch": 62.71, "learning_rate": 0.0001223888888888889, "loss": 0.7805, "step": 3700 }, { "epoch": 64.41, "learning_rate": 0.00011683333333333332, "loss": 0.7741, "step": 3800 }, { "epoch": 66.1, "learning_rate": 0.00011127777777777777, "loss": 0.773, "step": 3900 }, { "epoch": 67.8, "learning_rate": 0.0001057222222222222, "loss": 0.7604, "step": 4000 }, { "epoch": 67.8, "eval_loss": 0.49494636058807373, "eval_runtime": 48.9958, "eval_samples_per_second": 16.328, "eval_steps_per_second": 16.328, "eval_wer": 0.46910072453473506, "step": 4000 }, { "epoch": 69.49, "learning_rate": 0.00010016666666666666, "loss": 0.7527, "step": 4100 }, { "epoch": 71.19, "learning_rate": 9.46111111111111e-05, "loss": 0.7354, "step": 4200 }, { "epoch": 72.88, "learning_rate": 8.905555555555555e-05, "loss": 0.7334, "step": 4300 }, { "epoch": 74.58, "learning_rate": 8.349999999999998e-05, "loss": 0.7286, "step": 4400 }, { "epoch": 76.27, "learning_rate": 7.794444444444445e-05, "loss": 0.7241, "step": 4500 }, { "epoch": 76.27, "eval_loss": 0.5090368390083313, "eval_runtime": 49.0851, "eval_samples_per_second": 16.298, "eval_steps_per_second": 16.298, "eval_wer": 0.4627077709901975, "step": 4500 }, { "epoch": 77.97, "learning_rate": 7.238888888888889e-05, "loss": 0.7065, "step": 4600 }, { "epoch": 79.66, "learning_rate": 6.683333333333332e-05, "loss": 0.6978, "step": 4700 }, { "epoch": 81.36, "learning_rate": 6.127777777777777e-05, "loss": 0.688, "step": 4800 }, { "epoch": 83.05, "learning_rate": 5.572222222222222e-05, "loss": 0.6725, "step": 4900 }, { "epoch": 84.75, "learning_rate": 5.016666666666666e-05, "loss": 0.6739, "step": 5000 }, { "epoch": 84.75, "eval_loss": 0.49671733379364014, "eval_runtime": 50.1856, "eval_samples_per_second": 15.941, "eval_steps_per_second": 15.941, "eval_wer": 0.4452336979684614, "step": 5000 }, { "epoch": 86.44, "learning_rate": 4.4611111111111106e-05, "loss": 0.6675, "step": 5100 }, { "epoch": 88.14, "learning_rate": 3.905555555555555e-05, "loss": 0.6548, "step": 5200 }, { "epoch": 89.83, "learning_rate": 3.3499999999999994e-05, "loss": 0.6465, "step": 5300 }, { "epoch": 91.53, "learning_rate": 2.8e-05, "loss": 0.6471, "step": 5400 }, { "epoch": 93.22, "learning_rate": 2.2444444444444444e-05, "loss": 0.6447, "step": 5500 }, { "epoch": 93.22, "eval_loss": 0.5071204900741577, "eval_runtime": 50.5031, "eval_samples_per_second": 15.841, "eval_steps_per_second": 15.841, "eval_wer": 0.4436709759909078, "step": 5500 }, { "epoch": 94.92, "learning_rate": 1.6888888888888888e-05, "loss": 0.6362, "step": 5600 }, { "epoch": 96.61, "learning_rate": 1.1333333333333332e-05, "loss": 0.6434, "step": 5700 }, { "epoch": 98.31, "learning_rate": 5.777777777777778e-06, "loss": 0.6269, "step": 5800 }, { "epoch": 100.0, "learning_rate": 2.222222222222222e-07, "loss": 0.6369, "step": 5900 }, { "epoch": 100.0, "step": 5900, "total_flos": 3.4027105250131104e+19, "train_loss": 1.1293389116707495, "train_runtime": 13204.1924, "train_samples_per_second": 14.064, "train_steps_per_second": 0.447 } ], "max_steps": 5900, "num_train_epochs": 100, "total_flos": 3.4027105250131104e+19, "trial_name": null, "trial_params": null }