{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 300, "global_step": 16425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "eval_loss": 3.293024778366089, "eval_runtime": 329.2436, "eval_samples_per_second": 33.516, "eval_steps_per_second": 1.048, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.46, "learning_rate": 0.0007389237223065434, "loss": 5.6462, "step": 500 }, { "epoch": 0.55, "eval_loss": 3.415907859802246, "eval_runtime": 328.5335, "eval_samples_per_second": 33.589, "eval_steps_per_second": 1.05, "eval_wer": 1.0, "step": 600 }, { "epoch": 0.82, "eval_loss": 3.4422173500061035, "eval_runtime": 338.1217, "eval_samples_per_second": 32.636, "eval_steps_per_second": 1.02, "eval_wer": 1.0, "step": 900 }, { "epoch": 0.91, "learning_rate": 0.0014838065068897525, "loss": 3.3522, "step": 1000 }, { "epoch": 1.1, "eval_loss": 3.3719358444213867, "eval_runtime": 340.7912, "eval_samples_per_second": 32.381, "eval_steps_per_second": 1.012, "eval_wer": 1.0, "step": 1200 }, { "epoch": 1.37, "learning_rate": 0.002228689291472962, "loss": 3.2605, "step": 1500 }, { "epoch": 1.37, "eval_loss": 3.4025769233703613, "eval_runtime": 335.1987, "eval_samples_per_second": 32.921, "eval_steps_per_second": 1.029, "eval_wer": 1.0, "step": 1500 }, { "epoch": 1.64, "eval_loss": 3.444835662841797, "eval_runtime": 332.4199, "eval_samples_per_second": 33.196, "eval_steps_per_second": 1.038, "eval_wer": 1.0, "step": 1800 }, { "epoch": 1.83, "learning_rate": 0.0029735720760561708, "loss": 3.2766, "step": 2000 }, { "epoch": 1.92, "eval_loss": 3.473637104034424, "eval_runtime": 334.5697, "eval_samples_per_second": 32.983, "eval_steps_per_second": 1.031, "eval_wer": 0.9999093792478477, "step": 2100 }, { "epoch": 2.19, "eval_loss": 3.982806444168091, "eval_runtime": 342.0917, "eval_samples_per_second": 32.257, "eval_steps_per_second": 1.009, "eval_wer": 1.0, "step": 2400 }, { "epoch": 2.28, "learning_rate": 0.0037184548606393796, "loss": 3.2853, "step": 2500 }, { "epoch": 2.47, "eval_loss": 3.553187370300293, "eval_runtime": 329.7168, "eval_samples_per_second": 33.468, "eval_steps_per_second": 1.046, "eval_wer": 1.0, "step": 2700 }, { "epoch": 2.74, "learning_rate": 0.004460358114084256, "loss": 3.3389, "step": 3000 }, { "epoch": 2.74, "eval_loss": 3.781858444213867, "eval_runtime": 323.3731, "eval_samples_per_second": 34.125, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 3000 }, { "epoch": 3.01, "eval_loss": 3.2249505519866943, "eval_runtime": 323.988, "eval_samples_per_second": 34.06, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 3300 }, { "epoch": 3.2, "learning_rate": 0.005205240898667465, "loss": 3.2186, "step": 3500 }, { "epoch": 3.29, "eval_loss": 3.2372846603393555, "eval_runtime": 324.034, "eval_samples_per_second": 34.055, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 3600 }, { "epoch": 3.56, "eval_loss": 3.2161905765533447, "eval_runtime": 323.7754, "eval_samples_per_second": 34.082, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 3900 }, { "epoch": 3.65, "learning_rate": 0.005950123683250674, "loss": 3.1916, "step": 4000 }, { "epoch": 3.84, "eval_loss": 3.2367777824401855, "eval_runtime": 323.3817, "eval_samples_per_second": 34.124, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 4200 }, { "epoch": 4.11, "learning_rate": 0.0066950064678338835, "loss": 3.2188, "step": 4500 }, { "epoch": 4.11, "eval_loss": 3.2376551628112793, "eval_runtime": 323.9231, "eval_samples_per_second": 34.067, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 4500 }, { "epoch": 4.38, "eval_loss": 3.4206786155700684, "eval_runtime": 324.1643, "eval_samples_per_second": 34.041, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 4800 }, { "epoch": 4.57, "learning_rate": 0.007152364497567974, "loss": 5.3067, "step": 5000 }, { "epoch": 4.66, "eval_loss": NaN, "eval_runtime": 324.7836, "eval_samples_per_second": 33.976, "eval_steps_per_second": 1.062, "eval_wer": 1.0, "step": 5100 }, { "epoch": 4.93, "eval_loss": NaN, "eval_runtime": 323.844, "eval_samples_per_second": 34.075, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 5400 }, { "epoch": 5.02, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 5500 }, { "epoch": 5.21, "eval_loss": NaN, "eval_runtime": 323.0895, "eval_samples_per_second": 34.155, "eval_steps_per_second": 1.068, "eval_wer": 1.0, "step": 5700 }, { "epoch": 5.48, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 6000 }, { "epoch": 5.48, "eval_loss": NaN, "eval_runtime": 323.9944, "eval_samples_per_second": 34.059, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 6000 }, { "epoch": 5.75, "eval_loss": NaN, "eval_runtime": 324.3089, "eval_samples_per_second": 34.026, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 6300 }, { "epoch": 5.94, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 6500 }, { "epoch": 6.03, "eval_loss": NaN, "eval_runtime": 323.6949, "eval_samples_per_second": 34.091, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 6600 }, { "epoch": 6.3, "eval_loss": NaN, "eval_runtime": 323.4351, "eval_samples_per_second": 34.118, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 6900 }, { "epoch": 6.39, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 7000 }, { "epoch": 6.58, "eval_loss": NaN, "eval_runtime": 323.8786, "eval_samples_per_second": 34.071, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 7200 }, { "epoch": 6.85, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 7500 }, { "epoch": 6.85, "eval_loss": NaN, "eval_runtime": 323.8341, "eval_samples_per_second": 34.076, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 7500 }, { "epoch": 7.12, "eval_loss": NaN, "eval_runtime": 323.3556, "eval_samples_per_second": 34.127, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 7800 }, { "epoch": 7.31, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 8000 }, { "epoch": 7.4, "eval_loss": NaN, "eval_runtime": 323.4333, "eval_samples_per_second": 34.118, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 8100 }, { "epoch": 7.67, "eval_loss": NaN, "eval_runtime": 323.7546, "eval_samples_per_second": 34.084, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 8400 }, { "epoch": 7.76, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 8500 }, { "epoch": 7.95, "eval_loss": NaN, "eval_runtime": 323.2121, "eval_samples_per_second": 34.142, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 8700 }, { "epoch": 8.22, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 9000 }, { "epoch": 8.22, "eval_loss": NaN, "eval_runtime": 322.4572, "eval_samples_per_second": 34.222, "eval_steps_per_second": 1.07, "eval_wer": 1.0, "step": 9000 }, { "epoch": 8.49, "eval_loss": NaN, "eval_runtime": 318.5677, "eval_samples_per_second": 34.639, "eval_steps_per_second": 1.083, "eval_wer": 1.0, "step": 9300 }, { "epoch": 8.68, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 9500 }, { "epoch": 8.77, "eval_loss": NaN, "eval_runtime": 319.654, "eval_samples_per_second": 34.522, "eval_steps_per_second": 1.079, "eval_wer": 1.0, "step": 9600 }, { "epoch": 9.04, "eval_loss": NaN, "eval_runtime": 320.9193, "eval_samples_per_second": 34.386, "eval_steps_per_second": 1.075, "eval_wer": 1.0, "step": 9900 }, { "epoch": 9.13, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 10000 }, { "epoch": 9.32, "eval_loss": NaN, "eval_runtime": 322.8537, "eval_samples_per_second": 34.18, "eval_steps_per_second": 1.069, "eval_wer": 1.0, "step": 10200 }, { "epoch": 9.59, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 10500 }, { "epoch": 9.59, "eval_loss": NaN, "eval_runtime": 321.9757, "eval_samples_per_second": 34.273, "eval_steps_per_second": 1.072, "eval_wer": 1.0, "step": 10500 }, { "epoch": 9.86, "eval_loss": NaN, "eval_runtime": 323.2439, "eval_samples_per_second": 34.138, "eval_steps_per_second": 1.067, "eval_wer": 1.0, "step": 10800 }, { "epoch": 10.05, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 11000 }, { "epoch": 10.14, "eval_loss": NaN, "eval_runtime": 323.9537, "eval_samples_per_second": 34.064, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 11100 }, { "epoch": 10.41, "eval_loss": NaN, "eval_runtime": 323.8438, "eval_samples_per_second": 34.075, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 11400 }, { "epoch": 10.5, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 11500 }, { "epoch": 10.68, "eval_loss": NaN, "eval_runtime": 323.9188, "eval_samples_per_second": 34.067, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 11700 }, { "epoch": 10.96, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 12000 }, { "epoch": 10.96, "eval_loss": NaN, "eval_runtime": 323.8826, "eval_samples_per_second": 34.071, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 12000 }, { "epoch": 11.23, "eval_loss": NaN, "eval_runtime": 325.2969, "eval_samples_per_second": 33.923, "eval_steps_per_second": 1.061, "eval_wer": 1.0, "step": 12300 }, { "epoch": 11.42, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 12500 }, { "epoch": 11.51, "eval_loss": NaN, "eval_runtime": 324.7333, "eval_samples_per_second": 33.982, "eval_steps_per_second": 1.062, "eval_wer": 1.0, "step": 12600 }, { "epoch": 11.78, "eval_loss": NaN, "eval_runtime": 324.3204, "eval_samples_per_second": 34.025, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 12900 }, { "epoch": 11.87, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 13000 }, { "epoch": 12.05, "eval_loss": NaN, "eval_runtime": 324.8823, "eval_samples_per_second": 33.966, "eval_steps_per_second": 1.062, "eval_wer": 1.0, "step": 13200 }, { "epoch": 12.33, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 13500 }, { "epoch": 12.33, "eval_loss": NaN, "eval_runtime": 323.9396, "eval_samples_per_second": 34.065, "eval_steps_per_second": 1.065, "eval_wer": 1.0, "step": 13500 }, { "epoch": 12.6, "eval_loss": NaN, "eval_runtime": 326.2533, "eval_samples_per_second": 33.823, "eval_steps_per_second": 1.057, "eval_wer": 1.0, "step": 13800 }, { "epoch": 12.79, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 14000 }, { "epoch": 12.88, "eval_loss": NaN, "eval_runtime": 323.7094, "eval_samples_per_second": 34.089, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 14100 }, { "epoch": 13.15, "eval_loss": NaN, "eval_runtime": 325.3499, "eval_samples_per_second": 33.917, "eval_steps_per_second": 1.06, "eval_wer": 1.0, "step": 14400 }, { "epoch": 13.24, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 14500 }, { "epoch": 13.42, "eval_loss": NaN, "eval_runtime": 325.7039, "eval_samples_per_second": 33.88, "eval_steps_per_second": 1.059, "eval_wer": 1.0, "step": 14700 }, { "epoch": 13.7, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 15000 }, { "epoch": 13.7, "eval_loss": NaN, "eval_runtime": 323.516, "eval_samples_per_second": 34.11, "eval_steps_per_second": 1.066, "eval_wer": 1.0, "step": 15000 }, { "epoch": 13.97, "eval_loss": NaN, "eval_runtime": 471.9655, "eval_samples_per_second": 23.381, "eval_steps_per_second": 0.731, "eval_wer": 1.0, "step": 15300 }, { "epoch": 14.16, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 15500 }, { "epoch": 14.25, "eval_loss": NaN, "eval_runtime": 324.2368, "eval_samples_per_second": 34.034, "eval_steps_per_second": 1.064, "eval_wer": 1.0, "step": 15600 }, { "epoch": 14.52, "eval_loss": NaN, "eval_runtime": 325.6971, "eval_samples_per_second": 33.881, "eval_steps_per_second": 1.059, "eval_wer": 1.0, "step": 15900 }, { "epoch": 14.61, "learning_rate": 0.007152364497567974, "loss": 0.0, "step": 16000 }, { "epoch": 14.79, "eval_loss": NaN, "eval_runtime": 325.4219, "eval_samples_per_second": 33.91, "eval_steps_per_second": 1.06, "eval_wer": 1.0, "step": 16200 }, { "epoch": 15.0, "step": 16425, "total_flos": 6.442470243808035e+19, "train_loss": 1.129231213434646, "train_runtime": 45647.7738, "train_samples_per_second": 14.392, "train_steps_per_second": 0.36 } ], "logging_steps": 500, "max_steps": 16425, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 400, "total_flos": 6.442470243808035e+19, "train_batch_size": 20, "trial_name": null, "trial_params": null }