{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.536067892503536, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 4.031618595123291, "eval_runtime": 161.4756, "eval_samples_per_second": 35.027, "eval_steps_per_second": 4.378, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 3.0548782348632812, "eval_runtime": 158.3539, "eval_samples_per_second": 35.717, "eval_steps_per_second": 4.465, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 2.5533947944641113, "eval_runtime": 158.9465, "eval_samples_per_second": 35.584, "eval_steps_per_second": 4.448, "eval_wer": 0.9862296976311228, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.4279608726501465, "eval_runtime": 159.2587, "eval_samples_per_second": 35.515, "eval_steps_per_second": 4.439, "eval_wer": 0.8846536560313283, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 1.9099421501159668, "learning_rate": 0.00029699999999999996, "loss": 3.6818, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 1.1378353834152222, "eval_runtime": 160.4319, "eval_samples_per_second": 35.255, "eval_steps_per_second": 4.407, "eval_wer": 0.7958368106824164, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.9263126254081726, "eval_runtime": 159.8222, "eval_samples_per_second": 35.389, "eval_steps_per_second": 4.424, "eval_wer": 0.6777299865185851, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.8501134514808655, "eval_runtime": 160.338, "eval_samples_per_second": 35.275, "eval_steps_per_second": 4.409, "eval_wer": 0.6388104256275278, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6984566450119019, "eval_runtime": 159.8325, "eval_samples_per_second": 35.387, "eval_steps_per_second": 4.423, "eval_wer": 0.5563972523592476, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.6664860248565674, "eval_runtime": 162.9316, "eval_samples_per_second": 34.714, "eval_steps_per_second": 4.339, "eval_wer": 0.5401393079540348, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.8423302173614502, "learning_rate": 0.00022574999999999996, "loss": 0.895, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.6228350400924683, "eval_runtime": 161.5875, "eval_samples_per_second": 35.003, "eval_steps_per_second": 4.375, "eval_wer": 0.5000802465173011, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5974757075309753, "eval_runtime": 160.961, "eval_samples_per_second": 35.139, "eval_steps_per_second": 4.392, "eval_wer": 0.48359761186364514, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.5826218128204346, "eval_runtime": 162.2219, "eval_samples_per_second": 34.866, "eval_steps_per_second": 4.358, "eval_wer": 0.4677248507414778, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.5473496317863464, "eval_runtime": 164.7294, "eval_samples_per_second": 34.335, "eval_steps_per_second": 4.292, "eval_wer": 0.4512903639982025, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.5284178256988525, "eval_runtime": 168.0547, "eval_samples_per_second": 33.656, "eval_steps_per_second": 4.207, "eval_wer": 0.4376966039673878, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 1.0099021196365356, "learning_rate": 0.0001512, "loss": 0.687, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.5137470960617065, "eval_runtime": 160.3266, "eval_samples_per_second": 35.278, "eval_steps_per_second": 4.41, "eval_wer": 0.42363741413622646, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.5110819339752197, "eval_runtime": 161.254, "eval_samples_per_second": 35.075, "eval_steps_per_second": 4.384, "eval_wer": 0.4103004429607755, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.49603915214538574, "eval_runtime": 160.5657, "eval_samples_per_second": 35.225, "eval_steps_per_second": 4.403, "eval_wer": 0.4084226744559286, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.4876905679702759, "eval_runtime": 161.8676, "eval_samples_per_second": 34.942, "eval_steps_per_second": 4.368, "eval_wer": 0.40148937536110935, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.47859108448028564, "eval_runtime": 161.5548, "eval_samples_per_second": 35.01, "eval_steps_per_second": 4.376, "eval_wer": 0.39929062078705785, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.5755736231803894, "learning_rate": 7.664999999999999e-05, "loss": 0.5319, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.47312092781066895, "eval_runtime": 162.5051, "eval_samples_per_second": 34.805, "eval_steps_per_second": 4.351, "eval_wer": 0.39299929383064774, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.4668172001838684, "eval_runtime": 162.21, "eval_samples_per_second": 34.868, "eval_steps_per_second": 4.359, "eval_wer": 0.3877190729922321, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.46727854013442993, "eval_runtime": 161.4684, "eval_samples_per_second": 35.029, "eval_steps_per_second": 4.379, "eval_wer": 0.3849585927970726, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.46300554275512695, "eval_runtime": 161.1936, "eval_samples_per_second": 35.088, "eval_steps_per_second": 4.386, "eval_wer": 0.3804326892212878, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.4593857228755951, "eval_runtime": 161.5089, "eval_samples_per_second": 35.02, "eval_steps_per_second": 4.377, "eval_wer": 0.3768697438531168, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.887208104133606, "learning_rate": 1.9499999999999995e-06, "loss": 0.4355, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.4583967626094818, "eval_runtime": 162.1219, "eval_samples_per_second": 34.887, "eval_steps_per_second": 4.361, "eval_wer": 0.37831418116453747, "step": 2500 }, { "epoch": 3.536067892503536, "step": 2500, "total_flos": 9.55169606524761e+18, "train_loss": 1.2462444946289062, "train_runtime": 6962.0027, "train_samples_per_second": 11.491, "train_steps_per_second": 0.359 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 9.55169606524761e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }