{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4722536806342017, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056625141562853906, "eval_loss": 3.599447011947632, "eval_runtime": 153.6415, "eval_samples_per_second": 36.813, "eval_steps_per_second": 4.602, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.11325028312570781, "eval_loss": 3.0223705768585205, "eval_runtime": 151.1357, "eval_samples_per_second": 37.423, "eval_steps_per_second": 4.678, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.16987542468856173, "eval_loss": 1.9626648426055908, "eval_runtime": 153.3369, "eval_samples_per_second": 36.886, "eval_steps_per_second": 4.611, "eval_wer": 0.9060519009484682, "step": 300 }, { "epoch": 0.22650056625141562, "eval_loss": 0.9921229481697083, "eval_runtime": 154.0126, "eval_samples_per_second": 36.724, "eval_steps_per_second": 4.591, "eval_wer": 0.6951581582706103, "step": 400 }, { "epoch": 0.28312570781426954, "grad_norm": 1.7500211000442505, "learning_rate": 0.0002982, "loss": 3.6184, "step": 500 }, { "epoch": 0.28312570781426954, "eval_loss": 0.8504024147987366, "eval_runtime": 153.4805, "eval_samples_per_second": 36.852, "eval_steps_per_second": 4.606, "eval_wer": 0.6157981736771998, "step": 500 }, { "epoch": 0.33975084937712347, "eval_loss": 0.7935531735420227, "eval_runtime": 152.4296, "eval_samples_per_second": 37.106, "eval_steps_per_second": 4.638, "eval_wer": 0.5963634029304618, "step": 600 }, { "epoch": 0.39637599093997733, "eval_loss": 0.7600815892219543, "eval_runtime": 156.4822, "eval_samples_per_second": 36.145, "eval_steps_per_second": 4.518, "eval_wer": 0.5606072764038452, "step": 700 }, { "epoch": 0.45300113250283125, "eval_loss": 0.6909334063529968, "eval_runtime": 152.429, "eval_samples_per_second": 37.106, "eval_steps_per_second": 4.638, "eval_wer": 0.5127666062172008, "step": 800 }, { "epoch": 0.5096262740656852, "eval_loss": 0.6616554856300354, "eval_runtime": 152.3205, "eval_samples_per_second": 37.132, "eval_steps_per_second": 4.642, "eval_wer": 0.4916146426794627, "step": 900 }, { "epoch": 0.5662514156285391, "grad_norm": 3.9567878246307373, "learning_rate": 0.00022914285714285712, "loss": 0.4662, "step": 1000 }, { "epoch": 0.5662514156285391, "eval_loss": 0.6466770172119141, "eval_runtime": 152.3467, "eval_samples_per_second": 37.126, "eval_steps_per_second": 4.641, "eval_wer": 0.4811510006258927, "step": 1000 }, { "epoch": 0.622876557191393, "eval_loss": 0.6144490838050842, "eval_runtime": 152.8109, "eval_samples_per_second": 37.013, "eval_steps_per_second": 4.627, "eval_wer": 0.46394697565437887, "step": 1100 }, { "epoch": 0.6795016987542469, "eval_loss": 0.5942133069038391, "eval_runtime": 157.9769, "eval_samples_per_second": 35.803, "eval_steps_per_second": 4.475, "eval_wer": 0.45369196450064997, "step": 1200 }, { "epoch": 0.7361268403171007, "eval_loss": 0.5675162076950073, "eval_runtime": 165.0294, "eval_samples_per_second": 34.273, "eval_steps_per_second": 4.284, "eval_wer": 0.4351077658840333, "step": 1300 }, { "epoch": 0.7927519818799547, "eval_loss": 0.5538555383682251, "eval_runtime": 154.4584, "eval_samples_per_second": 36.618, "eval_steps_per_second": 4.577, "eval_wer": 0.42295902810097735, "step": 1400 }, { "epoch": 0.8493771234428086, "grad_norm": 1.9575612545013428, "learning_rate": 0.00015785714285714285, "loss": 0.3508, "step": 1500 }, { "epoch": 0.8493771234428086, "eval_loss": 0.5447892546653748, "eval_runtime": 152.2149, "eval_samples_per_second": 37.158, "eval_steps_per_second": 4.645, "eval_wer": 0.41440516120749143, "step": 1500 }, { "epoch": 0.9060022650056625, "eval_loss": 0.5326293110847473, "eval_runtime": 157.3458, "eval_samples_per_second": 35.946, "eval_steps_per_second": 4.493, "eval_wer": 0.406621623790342, "step": 1600 }, { "epoch": 0.9626274065685164, "eval_loss": 0.5154709219932556, "eval_runtime": 154.1323, "eval_samples_per_second": 36.696, "eval_steps_per_second": 4.587, "eval_wer": 0.3989343775577346, "step": 1700 }, { "epoch": 1.0192525481313703, "eval_loss": 0.5067195296287537, "eval_runtime": 153.6675, "eval_samples_per_second": 36.807, "eval_steps_per_second": 4.601, "eval_wer": 0.38559804849866, "step": 1800 }, { "epoch": 1.0758776896942241, "eval_loss": 0.49161842465400696, "eval_runtime": 153.2473, "eval_samples_per_second": 36.908, "eval_steps_per_second": 4.613, "eval_wer": 0.3724061562163984, "step": 1900 }, { "epoch": 1.1325028312570782, "grad_norm": 0.9259862303733826, "learning_rate": 8.671428571428571e-05, "loss": 0.2774, "step": 2000 }, { "epoch": 1.1325028312570782, "eval_loss": 0.48551619052886963, "eval_runtime": 153.8121, "eval_samples_per_second": 36.772, "eval_steps_per_second": 4.597, "eval_wer": 0.36971000304922086, "step": 2000 }, { "epoch": 1.189127972819932, "eval_loss": 0.4797590672969818, "eval_runtime": 152.9797, "eval_samples_per_second": 36.972, "eval_steps_per_second": 4.622, "eval_wer": 0.3661311806904078, "step": 2100 }, { "epoch": 1.245753114382786, "eval_loss": 0.47735241055488586, "eval_runtime": 152.7086, "eval_samples_per_second": 37.038, "eval_steps_per_second": 4.63, "eval_wer": 0.36462261879924895, "step": 2200 }, { "epoch": 1.3023782559456398, "eval_loss": 0.46992629766464233, "eval_runtime": 152.5925, "eval_samples_per_second": 37.066, "eval_steps_per_second": 4.633, "eval_wer": 0.3584760315193144, "step": 2300 }, { "epoch": 1.3590033975084936, "eval_loss": 0.4651219844818115, "eval_runtime": 153.5802, "eval_samples_per_second": 36.828, "eval_steps_per_second": 4.603, "eval_wer": 0.3550255974065574, "step": 2400 }, { "epoch": 1.4156285390713477, "grad_norm": 0.43387308716773987, "learning_rate": 1.5428571428571428e-05, "loss": 0.2328, "step": 2500 }, { "epoch": 1.4156285390713477, "eval_loss": 0.4611109495162964, "eval_runtime": 153.4779, "eval_samples_per_second": 36.852, "eval_steps_per_second": 4.607, "eval_wer": 0.3570156152204266, "step": 2500 }, { "epoch": 1.4722536806342017, "eval_loss": 0.4593363106250763, "eval_runtime": 153.5126, "eval_samples_per_second": 36.844, "eval_steps_per_second": 4.605, "eval_wer": 0.3553465680216976, "step": 2600 }, { "epoch": 1.4722536806342017, "step": 2600, "total_flos": 2.3804122051094954e+19, "train_loss": 0.9595056893275334, "train_runtime": 10902.6713, "train_samples_per_second": 15.262, "train_steps_per_second": 0.238 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "total_flos": 2.3804122051094954e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }