{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "eval_loss": 4.132408142089844, "eval_runtime": 7.7892, "eval_samples_per_second": 33.893, "eval_steps_per_second": 4.237, "eval_wer": 1.0, "step": 50 }, { "epoch": 0.71, "eval_loss": 3.357618570327759, "eval_runtime": 7.7699, "eval_samples_per_second": 33.977, "eval_steps_per_second": 4.247, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.07, "eval_loss": 3.0935420989990234, "eval_runtime": 8.3088, "eval_samples_per_second": 31.773, "eval_steps_per_second": 3.972, "eval_wer": 1.0, "step": 150 }, { "epoch": 1.43, "eval_loss": 3.000765085220337, "eval_runtime": 7.8476, "eval_samples_per_second": 33.641, "eval_steps_per_second": 4.205, "eval_wer": 1.2494669509594882, "step": 200 }, { "epoch": 1.79, "eval_loss": 2.8665499687194824, "eval_runtime": 7.9301, "eval_samples_per_second": 33.291, "eval_steps_per_second": 4.161, "eval_wer": 1.0874200426439233, "step": 250 }, { "epoch": 2.14, "eval_loss": 2.7977161407470703, "eval_runtime": 7.8406, "eval_samples_per_second": 33.671, "eval_steps_per_second": 4.209, "eval_wer": 1.537313432835821, "step": 300 }, { "epoch": 2.5, "eval_loss": 2.618842601776123, "eval_runtime": 8.0288, "eval_samples_per_second": 32.882, "eval_steps_per_second": 4.11, "eval_wer": 1.2345415778251598, "step": 350 }, { "epoch": 2.86, "eval_loss": 2.3096766471862793, "eval_runtime": 8.0346, "eval_samples_per_second": 32.858, "eval_steps_per_second": 4.107, "eval_wer": 1.1279317697228144, "step": 400 }, { "epoch": 3.21, "eval_loss": 1.8073012828826904, "eval_runtime": 7.9049, "eval_samples_per_second": 33.397, "eval_steps_per_second": 4.175, "eval_wer": 1.251599147121535, "step": 450 }, { "epoch": 3.57, "learning_rate": 0.0002409, "loss": 3.5589, "step": 500 }, { "epoch": 3.57, "eval_loss": 1.3744713068008423, "eval_runtime": 7.9139, "eval_samples_per_second": 33.359, "eval_steps_per_second": 4.17, "eval_wer": 1.0895522388059702, "step": 500 }, { "epoch": 3.93, "eval_loss": 1.1971436738967896, "eval_runtime": 8.4443, "eval_samples_per_second": 31.264, "eval_steps_per_second": 3.908, "eval_wer": 1.2921108742004264, "step": 550 }, { "epoch": 4.29, "eval_loss": 1.0361448526382446, "eval_runtime": 7.8907, "eval_samples_per_second": 33.457, "eval_steps_per_second": 4.182, "eval_wer": 0.9872068230277186, "step": 600 }, { "epoch": 4.64, "eval_loss": 1.0113328695297241, "eval_runtime": 8.6808, "eval_samples_per_second": 30.412, "eval_steps_per_second": 3.802, "eval_wer": 1.1556503198294243, "step": 650 }, { "epoch": 5.0, "eval_loss": 0.9761010408401489, "eval_runtime": 8.374, "eval_samples_per_second": 31.526, "eval_steps_per_second": 3.941, "eval_wer": 0.9509594882729211, "step": 700 }, { "epoch": 5.36, "eval_loss": 0.8795022368431091, "eval_runtime": 8.0897, "eval_samples_per_second": 32.634, "eval_steps_per_second": 4.079, "eval_wer": 1.1279317697228144, "step": 750 }, { "epoch": 5.71, "eval_loss": 0.8115519881248474, "eval_runtime": 7.9853, "eval_samples_per_second": 33.061, "eval_steps_per_second": 4.133, "eval_wer": 0.8869936034115139, "step": 800 }, { "epoch": 6.07, "eval_loss": 0.7683095932006836, "eval_runtime": 8.0654, "eval_samples_per_second": 32.732, "eval_steps_per_second": 4.092, "eval_wer": 0.9275053304904051, "step": 850 }, { "epoch": 6.43, "eval_loss": 0.7249290943145752, "eval_runtime": 7.8449, "eval_samples_per_second": 33.652, "eval_steps_per_second": 4.207, "eval_wer": 1.0255863539445629, "step": 900 }, { "epoch": 6.79, "eval_loss": 0.7122178077697754, "eval_runtime": 8.1435, "eval_samples_per_second": 32.419, "eval_steps_per_second": 4.052, "eval_wer": 0.9211087420042644, "step": 950 }, { "epoch": 7.14, "learning_rate": 0.00016634999999999998, "loss": 1.5095, "step": 1000 }, { "epoch": 7.14, "eval_loss": 0.7041318416595459, "eval_runtime": 9.0145, "eval_samples_per_second": 29.286, "eval_steps_per_second": 3.661, "eval_wer": 1.0319829424307037, "step": 1000 }, { "epoch": 7.5, "eval_loss": 0.678531289100647, "eval_runtime": 8.7862, "eval_samples_per_second": 30.047, "eval_steps_per_second": 3.756, "eval_wer": 0.8699360341151386, "step": 1050 }, { "epoch": 7.86, "eval_loss": 0.7056036591529846, "eval_runtime": 8.0033, "eval_samples_per_second": 32.986, "eval_steps_per_second": 4.123, "eval_wer": 0.9680170575692963, "step": 1100 }, { "epoch": 8.21, "eval_loss": 0.6487303972244263, "eval_runtime": 8.7104, "eval_samples_per_second": 30.309, "eval_steps_per_second": 3.789, "eval_wer": 0.8550106609808102, "step": 1150 }, { "epoch": 8.57, "eval_loss": 0.5972908139228821, "eval_runtime": 7.951, "eval_samples_per_second": 33.204, "eval_steps_per_second": 4.15, "eval_wer": 0.7889125799573561, "step": 1200 }, { "epoch": 8.93, "eval_loss": 0.5955255627632141, "eval_runtime": 7.9427, "eval_samples_per_second": 33.238, "eval_steps_per_second": 4.155, "eval_wer": 0.8443496801705757, "step": 1250 }, { "epoch": 9.29, "eval_loss": 0.5822768211364746, "eval_runtime": 7.8596, "eval_samples_per_second": 33.59, "eval_steps_per_second": 4.199, "eval_wer": 0.8017057569296375, "step": 1300 }, { "epoch": 9.64, "eval_loss": 0.5886873006820679, "eval_runtime": 7.8662, "eval_samples_per_second": 33.561, "eval_steps_per_second": 4.195, "eval_wer": 0.7569296375266524, "step": 1350 }, { "epoch": 10.0, "eval_loss": 0.5869713425636292, "eval_runtime": 7.8909, "eval_samples_per_second": 33.456, "eval_steps_per_second": 4.182, "eval_wer": 0.7569296375266524, "step": 1400 }, { "epoch": 10.36, "eval_loss": 0.5845889449119568, "eval_runtime": 7.8445, "eval_samples_per_second": 33.654, "eval_steps_per_second": 4.207, "eval_wer": 0.7484008528784648, "step": 1450 }, { "epoch": 10.71, "learning_rate": 9.18e-05, "loss": 1.1157, "step": 1500 }, { "epoch": 10.71, "eval_loss": 0.5864734053611755, "eval_runtime": 8.0229, "eval_samples_per_second": 32.906, "eval_steps_per_second": 4.113, "eval_wer": 0.7547974413646056, "step": 1500 }, { "epoch": 11.07, "eval_loss": 0.5586370825767517, "eval_runtime": 7.8673, "eval_samples_per_second": 33.557, "eval_steps_per_second": 4.195, "eval_wer": 0.7334754797441365, "step": 1550 }, { "epoch": 11.43, "eval_loss": 0.5573432445526123, "eval_runtime": 7.8679, "eval_samples_per_second": 33.554, "eval_steps_per_second": 4.194, "eval_wer": 0.744136460554371, "step": 1600 }, { "epoch": 11.79, "eval_loss": 0.5594019889831543, "eval_runtime": 7.9618, "eval_samples_per_second": 33.158, "eval_steps_per_second": 4.145, "eval_wer": 0.7292110874200426, "step": 1650 }, { "epoch": 12.14, "eval_loss": 0.5614868998527527, "eval_runtime": 7.8272, "eval_samples_per_second": 33.729, "eval_steps_per_second": 4.216, "eval_wer": 0.7569296375266524, "step": 1700 }, { "epoch": 12.5, "eval_loss": 0.5569693446159363, "eval_runtime": 8.0892, "eval_samples_per_second": 32.636, "eval_steps_per_second": 4.08, "eval_wer": 0.7654584221748401, "step": 1750 }, { "epoch": 12.86, "eval_loss": 0.5408880710601807, "eval_runtime": 7.8701, "eval_samples_per_second": 33.545, "eval_steps_per_second": 4.193, "eval_wer": 0.7121535181236673, "step": 1800 }, { "epoch": 13.21, "eval_loss": 0.5358032584190369, "eval_runtime": 7.8977, "eval_samples_per_second": 33.427, "eval_steps_per_second": 4.178, "eval_wer": 0.6652452025586354, "step": 1850 }, { "epoch": 13.57, "eval_loss": 0.5394359827041626, "eval_runtime": 7.8219, "eval_samples_per_second": 33.751, "eval_steps_per_second": 4.219, "eval_wer": 0.6823027718550106, "step": 1900 }, { "epoch": 13.93, "eval_loss": 0.5434439778327942, "eval_runtime": 7.8516, "eval_samples_per_second": 33.624, "eval_steps_per_second": 4.203, "eval_wer": 0.6993603411513859, "step": 1950 }, { "epoch": 14.29, "learning_rate": 1.74e-05, "loss": 0.8658, "step": 2000 }, { "epoch": 14.29, "eval_loss": 0.5396074056625366, "eval_runtime": 7.9269, "eval_samples_per_second": 33.304, "eval_steps_per_second": 4.163, "eval_wer": 0.6823027718550106, "step": 2000 }, { "epoch": 14.64, "eval_loss": 0.5431792736053467, "eval_runtime": 7.8451, "eval_samples_per_second": 33.651, "eval_steps_per_second": 4.206, "eval_wer": 0.6780383795309168, "step": 2050 }, { "epoch": 15.0, "eval_loss": 0.5424522757530212, "eval_runtime": 7.9063, "eval_samples_per_second": 33.391, "eval_steps_per_second": 4.174, "eval_wer": 0.6865671641791045, "step": 2100 }, { "epoch": 15.0, "step": 2100, "total_flos": 2.38289603930769e+18, "train_loss": 1.7177187274751209, "train_runtime": 3292.1945, "train_samples_per_second": 20.344, "train_steps_per_second": 0.638 } ], "max_steps": 2100, "num_train_epochs": 15, "total_flos": 2.38289603930769e+18, "trial_name": null, "trial_params": null }