{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6775106082036775, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 3.626366138458252, "eval_runtime": 164.728, "eval_samples_per_second": 34.335, "eval_steps_per_second": 4.292, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 3.1079769134521484, "eval_runtime": 161.0986, "eval_samples_per_second": 35.109, "eval_steps_per_second": 4.389, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 2.5440056324005127, "eval_runtime": 161.942, "eval_samples_per_second": 34.926, "eval_steps_per_second": 4.366, "eval_wer": 0.9862761130617415, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.2381267547607422, "eval_runtime": 162.1273, "eval_samples_per_second": 34.886, "eval_steps_per_second": 4.361, "eval_wer": 0.8110079681913649, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 3.555830955505371, "learning_rate": 0.00029699999999999996, "loss": 3.4586, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 1.0035638809204102, "eval_runtime": 161.1941, "eval_samples_per_second": 35.088, "eval_steps_per_second": 4.386, "eval_wer": 0.7184358616709153, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.8524216413497925, "eval_runtime": 162.0442, "eval_samples_per_second": 34.904, "eval_steps_per_second": 4.363, "eval_wer": 0.6379042213778398, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.7411457300186157, "eval_runtime": 162.8526, "eval_samples_per_second": 34.731, "eval_steps_per_second": 4.341, "eval_wer": 0.5673288121462813, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6202757954597473, "eval_runtime": 162.5052, "eval_samples_per_second": 34.805, "eval_steps_per_second": 4.351, "eval_wer": 0.48383114488640916, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.6016646027565002, "eval_runtime": 162.6225, "eval_samples_per_second": 34.78, "eval_steps_per_second": 4.347, "eval_wer": 0.48198739839353566, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 1.029418706893921, "learning_rate": 0.00022928571428571426, "loss": 0.8146, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.5637466311454773, "eval_runtime": 163.5441, "eval_samples_per_second": 34.584, "eval_steps_per_second": 4.323, "eval_wer": 0.4461706186971927, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5380699038505554, "eval_runtime": 163.2757, "eval_samples_per_second": 34.641, "eval_steps_per_second": 4.33, "eval_wer": 0.4207589822519359, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.5155439972877502, "eval_runtime": 163.2795, "eval_samples_per_second": 34.64, "eval_steps_per_second": 4.33, "eval_wer": 0.4043416221762622, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.4857912063598633, "eval_runtime": 163.399, "eval_samples_per_second": 34.615, "eval_steps_per_second": 4.327, "eval_wer": 0.39034518140862234, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.47579917311668396, "eval_runtime": 162.8853, "eval_samples_per_second": 34.724, "eval_steps_per_second": 4.34, "eval_wer": 0.38234492488737115, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 1.043891429901123, "learning_rate": 0.0001582857142857143, "loss": 0.6294, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.46035128831863403, "eval_runtime": 163.2756, "eval_samples_per_second": 34.641, "eval_steps_per_second": 4.33, "eval_wer": 0.36421207894441504, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.45305728912353516, "eval_runtime": 164.2898, "eval_samples_per_second": 34.427, "eval_steps_per_second": 4.303, "eval_wer": 0.35355041444214647, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.44349414110183716, "eval_runtime": 164.2629, "eval_samples_per_second": 34.433, "eval_steps_per_second": 4.304, "eval_wer": 0.3510813973995158, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.4366327226161957, "eval_runtime": 164.0684, "eval_samples_per_second": 34.473, "eval_steps_per_second": 4.309, "eval_wer": 0.3498949866127972, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.43093863129615784, "eval_runtime": 164.1859, "eval_samples_per_second": 34.449, "eval_steps_per_second": 4.306, "eval_wer": 0.34513331088772387, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.763507604598999, "learning_rate": 8.728571428571428e-05, "loss": 0.4914, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.4252130389213562, "eval_runtime": 164.7353, "eval_samples_per_second": 34.334, "eval_steps_per_second": 4.292, "eval_wer": 0.3367803376460969, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.42176735401153564, "eval_runtime": 163.889, "eval_samples_per_second": 34.511, "eval_steps_per_second": 4.314, "eval_wer": 0.3285075272954644, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.42082250118255615, "eval_runtime": 165.1043, "eval_samples_per_second": 34.257, "eval_steps_per_second": 4.282, "eval_wer": 0.32510862071729757, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.41444018483161926, "eval_runtime": 164.0969, "eval_samples_per_second": 34.467, "eval_steps_per_second": 4.308, "eval_wer": 0.32363362352299874, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.41402822732925415, "eval_runtime": 164.4996, "eval_samples_per_second": 34.383, "eval_steps_per_second": 4.298, "eval_wer": 0.31654722395908486, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.8644358515739441, "learning_rate": 1.614285714285714e-05, "loss": 0.4011, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.4133088290691376, "eval_runtime": 164.6412, "eval_samples_per_second": 34.353, "eval_steps_per_second": 4.294, "eval_wer": 0.31574559504913985, "step": 2500 }, { "epoch": 3.6775106082036775, "eval_loss": 0.4086272120475769, "eval_runtime": 163.5841, "eval_samples_per_second": 34.575, "eval_steps_per_second": 4.322, "eval_wer": 0.3122344604235807, "step": 2600 }, { "epoch": 3.6775106082036775, "step": 2600, "total_flos": 9.93185228450562e+18, "train_loss": 1.1287707856985238, "train_runtime": 7180.9983, "train_samples_per_second": 11.586, "train_steps_per_second": 0.362 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.93185228450562e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }