{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8312570781426953, "eval_steps": 100, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056625141562853906, "eval_loss": 3.596607208251953, "eval_runtime": 152.7562, "eval_samples_per_second": 37.026, "eval_steps_per_second": 4.628, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.11325028312570781, "eval_loss": 3.5807790756225586, "eval_runtime": 150.6166, "eval_samples_per_second": 37.552, "eval_steps_per_second": 4.694, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.16987542468856173, "eval_loss": 2.264209270477295, "eval_runtime": 151.9721, "eval_samples_per_second": 37.217, "eval_steps_per_second": 4.652, "eval_wer": 0.9883969122626823, "step": 300 }, { "epoch": 0.22650056625141562, "eval_loss": 1.0231246948242188, "eval_runtime": 152.2425, "eval_samples_per_second": 37.151, "eval_steps_per_second": 4.644, "eval_wer": 0.7425976151883296, "step": 400 }, { "epoch": 0.28312570781426954, "grad_norm": 1.5508322715759277, "learning_rate": 0.0002982, "loss": 3.6826, "step": 500 }, { "epoch": 0.28312570781426954, "eval_loss": 0.9161245822906494, "eval_runtime": 152.0295, "eval_samples_per_second": 37.203, "eval_steps_per_second": 4.65, "eval_wer": 0.645680537946751, "step": 500 }, { "epoch": 0.33975084937712347, "eval_loss": 0.7942017316818237, "eval_runtime": 151.7127, "eval_samples_per_second": 37.281, "eval_steps_per_second": 4.66, "eval_wer": 0.5827382003177609, "step": 600 }, { "epoch": 0.39637599093997733, "eval_loss": 0.7551485300064087, "eval_runtime": 152.436, "eval_samples_per_second": 37.104, "eval_steps_per_second": 4.638, "eval_wer": 0.5417021071720884, "step": 700 }, { "epoch": 0.45300113250283125, "eval_loss": 0.7242529392242432, "eval_runtime": 151.7599, "eval_samples_per_second": 37.269, "eval_steps_per_second": 4.659, "eval_wer": 0.5352345492770136, "step": 800 }, { "epoch": 0.5096262740656852, "eval_loss": 0.666481077671051, "eval_runtime": 152.1768, "eval_samples_per_second": 37.167, "eval_steps_per_second": 4.646, "eval_wer": 0.49851551090497664, "step": 900 }, { "epoch": 0.5662514156285391, "grad_norm": 1.319555401802063, "learning_rate": 0.00026693333333333333, "loss": 0.4714, "step": 1000 }, { "epoch": 0.5662514156285391, "eval_loss": 0.6402752995491028, "eval_runtime": 152.9026, "eval_samples_per_second": 36.991, "eval_steps_per_second": 4.624, "eval_wer": 0.48628653046813564, "step": 1000 }, { "epoch": 0.622876557191393, "eval_loss": 0.6219765543937683, "eval_runtime": 153.2975, "eval_samples_per_second": 36.896, "eval_steps_per_second": 4.612, "eval_wer": 0.4727415705092199, "step": 1100 }, { "epoch": 0.6795016987542469, "eval_loss": 0.6100151538848877, "eval_runtime": 152.7807, "eval_samples_per_second": 37.02, "eval_steps_per_second": 4.628, "eval_wer": 0.46418770361573397, "step": 1200 }, { "epoch": 0.7361268403171007, "eval_loss": 0.586254358291626, "eval_runtime": 152.4833, "eval_samples_per_second": 37.093, "eval_steps_per_second": 4.637, "eval_wer": 0.4441912342925005, "step": 1300 }, { "epoch": 0.7927519818799547, "eval_loss": 0.5667628645896912, "eval_runtime": 154.0765, "eval_samples_per_second": 36.709, "eval_steps_per_second": 4.589, "eval_wer": 0.43844586028149124, "step": 1400 }, { "epoch": 0.8493771234428086, "grad_norm": 1.6093908548355103, "learning_rate": 0.00023366666666666664, "loss": 0.3661, "step": 1500 }, { "epoch": 0.8493771234428086, "eval_loss": 0.5753483176231384, "eval_runtime": 154.2844, "eval_samples_per_second": 36.66, "eval_steps_per_second": 4.582, "eval_wer": 0.44213702235560337, "step": 1500 }, { "epoch": 0.9060022650056625, "eval_loss": 0.5641067624092102, "eval_runtime": 153.3007, "eval_samples_per_second": 36.895, "eval_steps_per_second": 4.612, "eval_wer": 0.43111168172553804, "step": 1600 }, { "epoch": 0.9626274065685164, "eval_loss": 0.5466340780258179, "eval_runtime": 154.2149, "eval_samples_per_second": 36.676, "eval_steps_per_second": 4.585, "eval_wer": 0.41525573333761295, "step": 1700 }, { "epoch": 1.0192525481313703, "eval_loss": 0.5303572416305542, "eval_runtime": 152.6129, "eval_samples_per_second": 37.061, "eval_steps_per_second": 4.633, "eval_wer": 0.40233666607822055, "step": 1800 }, { "epoch": 1.0758776896942241, "eval_loss": 0.5279697179794312, "eval_runtime": 153.0429, "eval_samples_per_second": 36.957, "eval_steps_per_second": 4.62, "eval_wer": 0.3938469933077627, "step": 1900 }, { "epoch": 1.1325028312570782, "grad_norm": 0.6171467900276184, "learning_rate": 0.00020046666666666667, "loss": 0.3069, "step": 2000 }, { "epoch": 1.1325028312570782, "eval_loss": 0.5219519138336182, "eval_runtime": 153.2427, "eval_samples_per_second": 36.909, "eval_steps_per_second": 4.614, "eval_wer": 0.3902200253566786, "step": 2000 }, { "epoch": 1.189127972819932, "eval_loss": 0.50642991065979, "eval_runtime": 153.5923, "eval_samples_per_second": 36.825, "eval_steps_per_second": 4.603, "eval_wer": 0.3928519844008281, "step": 2100 }, { "epoch": 1.245753114382786, "eval_loss": 0.5032854676246643, "eval_runtime": 152.9375, "eval_samples_per_second": 36.982, "eval_steps_per_second": 4.623, "eval_wer": 0.38232414822423005, "step": 2200 }, { "epoch": 1.3023782559456398, "eval_loss": 0.4920494556427002, "eval_runtime": 153.4126, "eval_samples_per_second": 36.868, "eval_steps_per_second": 4.608, "eval_wer": 0.37728490956652916, "step": 2300 }, { "epoch": 1.3590033975084936, "eval_loss": 0.49545198678970337, "eval_runtime": 153.0386, "eval_samples_per_second": 36.958, "eval_steps_per_second": 4.62, "eval_wer": 0.3736739901462021, "step": 2400 }, { "epoch": 1.4156285390713477, "grad_norm": 0.5370103716850281, "learning_rate": 0.0001672, "loss": 0.264, "step": 2500 }, { "epoch": 1.4156285390713477, "eval_loss": 0.4954512119293213, "eval_runtime": 152.6004, "eval_samples_per_second": 37.064, "eval_steps_per_second": 4.633, "eval_wer": 0.3739147181075573, "step": 2500 }, { "epoch": 1.4722536806342017, "eval_loss": 0.4845733046531677, "eval_runtime": 150.9099, "eval_samples_per_second": 37.479, "eval_steps_per_second": 4.685, "eval_wer": 0.3651040747219592, "step": 2600 }, { "epoch": 1.5288788221970555, "eval_loss": 0.47606581449508667, "eval_runtime": 150.159, "eval_samples_per_second": 37.667, "eval_steps_per_second": 4.708, "eval_wer": 0.36447818202243587, "step": 2700 }, { "epoch": 1.5855039637599093, "eval_loss": 0.4672282934188843, "eval_runtime": 151.2978, "eval_samples_per_second": 37.383, "eval_steps_per_second": 4.673, "eval_wer": 0.36266469804689383, "step": 2800 }, { "epoch": 1.6421291053227633, "eval_loss": 0.4644935131072998, "eval_runtime": 151.1778, "eval_samples_per_second": 37.413, "eval_steps_per_second": 4.677, "eval_wer": 0.36399672609972555, "step": 2900 }, { "epoch": 1.6987542468856174, "grad_norm": 1.252485990524292, "learning_rate": 0.00013386666666666665, "loss": 0.2498, "step": 3000 }, { "epoch": 1.6987542468856174, "eval_loss": 0.46515557169914246, "eval_runtime": 152.0904, "eval_samples_per_second": 37.188, "eval_steps_per_second": 4.649, "eval_wer": 0.3588772447882396, "step": 3000 }, { "epoch": 1.7553793884484712, "eval_loss": 0.45957261323928833, "eval_runtime": 153.0761, "eval_samples_per_second": 36.949, "eval_steps_per_second": 4.619, "eval_wer": 0.35335655020782847, "step": 3100 }, { "epoch": 1.812004530011325, "eval_loss": 0.450503408908844, "eval_runtime": 153.7448, "eval_samples_per_second": 36.788, "eval_steps_per_second": 4.599, "eval_wer": 0.35098136765579113, "step": 3200 }, { "epoch": 1.8686296715741788, "eval_loss": 0.4490334093570709, "eval_runtime": 152.7992, "eval_samples_per_second": 37.016, "eval_steps_per_second": 4.627, "eval_wer": 0.3522010559933238, "step": 3300 }, { "epoch": 1.9252548131370328, "eval_loss": 0.44600149989128113, "eval_runtime": 153.6597, "eval_samples_per_second": 36.809, "eval_steps_per_second": 4.601, "eval_wer": 0.34557301279067903, "step": 3400 }, { "epoch": 1.9818799546998869, "grad_norm": 0.38641658425331116, "learning_rate": 0.00010059999999999999, "loss": 0.2304, "step": 3500 }, { "epoch": 1.9818799546998869, "eval_loss": 0.44401755928993225, "eval_runtime": 152.5541, "eval_samples_per_second": 37.075, "eval_steps_per_second": 4.634, "eval_wer": 0.3404535314791931, "step": 3500 }, { "epoch": 2.0385050962627407, "eval_loss": 0.44931888580322266, "eval_runtime": 151.9569, "eval_samples_per_second": 37.221, "eval_steps_per_second": 4.653, "eval_wer": 0.3424756463545762, "step": 3600 }, { "epoch": 2.0951302378255945, "eval_loss": 0.4386073052883148, "eval_runtime": 152.2426, "eval_samples_per_second": 37.151, "eval_steps_per_second": 4.644, "eval_wer": 0.33743640769687533, "step": 3700 }, { "epoch": 2.1517553793884483, "eval_loss": 0.4424333870410919, "eval_runtime": 152.4948, "eval_samples_per_second": 37.09, "eval_steps_per_second": 4.636, "eval_wer": 0.33806230039639873, "step": 3800 }, { "epoch": 2.2083805209513026, "eval_loss": 0.43851807713508606, "eval_runtime": 151.5663, "eval_samples_per_second": 37.317, "eval_steps_per_second": 4.665, "eval_wer": 0.3372598738585483, "step": 3900 }, { "epoch": 2.2650056625141564, "grad_norm": 0.3740696609020233, "learning_rate": 6.739999999999998e-05, "loss": 0.1947, "step": 4000 }, { "epoch": 2.2650056625141564, "eval_loss": 0.4393535852432251, "eval_runtime": 151.3645, "eval_samples_per_second": 37.367, "eval_steps_per_second": 4.671, "eval_wer": 0.3354624384137632, "step": 4000 }, { "epoch": 2.32163080407701, "eval_loss": 0.4353909194469452, "eval_runtime": 151.9247, "eval_samples_per_second": 37.229, "eval_steps_per_second": 4.654, "eval_wer": 0.33242926610068846, "step": 4100 }, { "epoch": 2.378255945639864, "eval_loss": 0.4344673752784729, "eval_runtime": 151.7721, "eval_samples_per_second": 37.266, "eval_steps_per_second": 4.658, "eval_wer": 0.3324774116929595, "step": 4200 }, { "epoch": 2.434881087202718, "eval_loss": 0.4342668950557709, "eval_runtime": 151.7151, "eval_samples_per_second": 37.28, "eval_steps_per_second": 4.66, "eval_wer": 0.33311935292323985, "step": 4300 }, { "epoch": 2.491506228765572, "eval_loss": 0.4516645669937134, "eval_runtime": 150.7864, "eval_samples_per_second": 37.51, "eval_steps_per_second": 4.689, "eval_wer": 0.3349328368987819, "step": 4400 }, { "epoch": 2.548131370328426, "grad_norm": 1.0771362781524658, "learning_rate": 3.4133333333333334e-05, "loss": 0.1996, "step": 4500 }, { "epoch": 2.548131370328426, "eval_loss": 0.45709919929504395, "eval_runtime": 151.5098, "eval_samples_per_second": 37.331, "eval_steps_per_second": 4.666, "eval_wer": 0.3327662852465857, "step": 4500 }, { "epoch": 2.6047565118912797, "eval_loss": 0.44231659173965454, "eval_runtime": 151.9331, "eval_samples_per_second": 37.227, "eval_steps_per_second": 4.653, "eval_wer": 0.3254963008136605, "step": 4600 }, { "epoch": 2.661381653454134, "eval_loss": 0.44050487875938416, "eval_runtime": 151.3798, "eval_samples_per_second": 37.363, "eval_steps_per_second": 4.67, "eval_wer": 0.3253518640368474, "step": 4700 }, { "epoch": 2.7180067950169873, "eval_loss": 0.4436578154563904, "eval_runtime": 152.4074, "eval_samples_per_second": 37.111, "eval_steps_per_second": 4.639, "eval_wer": 0.3237791080226605, "step": 4800 }, { "epoch": 2.7746319365798415, "eval_loss": 0.44009852409362793, "eval_runtime": 152.2847, "eval_samples_per_second": 37.141, "eval_steps_per_second": 4.643, "eval_wer": 0.3232334579769222, "step": 4900 }, { "epoch": 2.8312570781426953, "grad_norm": 0.8818329572677612, "learning_rate": 7.999999999999999e-07, "loss": 0.2024, "step": 5000 }, { "epoch": 2.8312570781426953, "eval_loss": 0.44040822982788086, "eval_runtime": 151.6855, "eval_samples_per_second": 37.288, "eval_steps_per_second": 4.661, "eval_wer": 0.32349023446903435, "step": 5000 }, { "epoch": 2.8312570781426953, "step": 5000, "total_flos": 4.569633767265734e+19, "train_loss": 0.6167776947021484, "train_runtime": 19530.6251, "train_samples_per_second": 16.385, "train_steps_per_second": 0.256 } ], "logging_steps": 500, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "total_flos": 4.569633767265734e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }