{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.739938080495356, "eval_steps": 100, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15479876160990713, "eval_loss": 3.5586698055267334, "eval_runtime": 163.4718, "eval_samples_per_second": 34.599, "eval_steps_per_second": 4.325, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.30959752321981426, "eval_loss": 3.2505505084991455, "eval_runtime": 161.1897, "eval_samples_per_second": 35.089, "eval_steps_per_second": 4.386, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.46439628482972134, "eval_loss": 2.774010181427002, "eval_runtime": 161.9005, "eval_samples_per_second": 34.935, "eval_steps_per_second": 4.367, "eval_wer": 0.9999518544077289, "step": 300 }, { "epoch": 0.6191950464396285, "eval_loss": 1.11959707736969, "eval_runtime": 162.1425, "eval_samples_per_second": 34.883, "eval_steps_per_second": 4.36, "eval_wer": 0.7807449727977404, "step": 400 }, { "epoch": 0.7739938080495357, "grad_norm": 2.676631212234497, "learning_rate": 0.00029699999999999996, "loss": 3.6484, "step": 500 }, { "epoch": 0.7739938080495357, "eval_loss": 0.9134386777877808, "eval_runtime": 162.7792, "eval_samples_per_second": 34.746, "eval_steps_per_second": 4.343, "eval_wer": 0.6538813371635827, "step": 500 }, { "epoch": 0.9287925696594427, "eval_loss": 0.7674785256385803, "eval_runtime": 162.7057, "eval_samples_per_second": 34.762, "eval_steps_per_second": 4.345, "eval_wer": 0.5923191731796954, "step": 600 }, { "epoch": 1.08359133126935, "eval_loss": 0.7207810282707214, "eval_runtime": 163.9828, "eval_samples_per_second": 34.491, "eval_steps_per_second": 4.311, "eval_wer": 0.5289595737510231, "step": 700 }, { "epoch": 1.238390092879257, "eval_loss": 0.6209472417831421, "eval_runtime": 163.6218, "eval_samples_per_second": 34.568, "eval_steps_per_second": 4.321, "eval_wer": 0.4744748118309769, "step": 800 }, { "epoch": 1.3931888544891642, "eval_loss": 0.6220189332962036, "eval_runtime": 162.6081, "eval_samples_per_second": 34.783, "eval_steps_per_second": 4.348, "eval_wer": 0.47879186660461237, "step": 900 }, { "epoch": 1.5479876160990713, "grad_norm": 0.3950090706348419, "learning_rate": 0.0002672, "loss": 0.6286, "step": 1000 }, { "epoch": 1.5479876160990713, "eval_loss": 0.5738953351974487, "eval_runtime": 162.8483, "eval_samples_per_second": 34.732, "eval_steps_per_second": 4.341, "eval_wer": 0.4588114458121359, "step": 1000 }, { "epoch": 1.7027863777089784, "eval_loss": 0.564153790473938, "eval_runtime": 164.0752, "eval_samples_per_second": 34.472, "eval_steps_per_second": 4.309, "eval_wer": 0.4262008313138932, "step": 1100 }, { "epoch": 1.8575851393188856, "eval_loss": 0.5511888265609741, "eval_runtime": 164.5846, "eval_samples_per_second": 34.365, "eval_steps_per_second": 4.296, "eval_wer": 0.42080852497953813, "step": 1200 }, { "epoch": 2.0123839009287927, "eval_loss": 0.527522623538971, "eval_runtime": 162.1572, "eval_samples_per_second": 34.88, "eval_steps_per_second": 4.36, "eval_wer": 0.38652886328256647, "step": 1300 }, { "epoch": 2.1671826625387, "eval_loss": 0.4955059587955475, "eval_runtime": 160.6015, "eval_samples_per_second": 35.218, "eval_steps_per_second": 4.402, "eval_wer": 0.37545537706023013, "step": 1400 }, { "epoch": 2.321981424148607, "grad_norm": 0.4153783619403839, "learning_rate": 0.00023393333333333332, "loss": 0.4816, "step": 1500 }, { "epoch": 2.321981424148607, "eval_loss": 0.4908938407897949, "eval_runtime": 160.2948, "eval_samples_per_second": 35.285, "eval_steps_per_second": 4.411, "eval_wer": 0.37325672834651985, "step": 1500 }, { "epoch": 2.476780185758514, "eval_loss": 0.4982919991016388, "eval_runtime": 162.1379, "eval_samples_per_second": 34.884, "eval_steps_per_second": 4.36, "eval_wer": 0.37279132095456663, "step": 1600 }, { "epoch": 2.6315789473684212, "eval_loss": 0.48909762501716614, "eval_runtime": 161.7559, "eval_samples_per_second": 34.966, "eval_steps_per_second": 4.371, "eval_wer": 0.36550528799088444, "step": 1700 }, { "epoch": 2.7863777089783284, "eval_loss": 0.47961312532424927, "eval_runtime": 160.2886, "eval_samples_per_second": 35.286, "eval_steps_per_second": 4.411, "eval_wer": 0.3570958578742116, "step": 1800 }, { "epoch": 2.9411764705882355, "eval_loss": 0.46432051062583923, "eval_runtime": 162.1185, "eval_samples_per_second": 34.888, "eval_steps_per_second": 4.361, "eval_wer": 0.3591982154033798, "step": 1900 }, { "epoch": 3.0959752321981426, "grad_norm": 0.8931769728660583, "learning_rate": 0.00020079999999999997, "loss": 0.4017, "step": 2000 }, { "epoch": 3.0959752321981426, "eval_loss": 0.5084750652313232, "eval_runtime": 162.3398, "eval_samples_per_second": 34.841, "eval_steps_per_second": 4.355, "eval_wer": 0.3697902457030059, "step": 2000 }, { "epoch": 3.2507739938080498, "eval_loss": 0.6755269169807434, "eval_runtime": 163.0508, "eval_samples_per_second": 34.689, "eval_steps_per_second": 4.336, "eval_wer": 0.4530018776780986, "step": 2100 }, { "epoch": 3.405572755417957, "eval_loss": 0.710012674331665, "eval_runtime": 161.8251, "eval_samples_per_second": 34.951, "eval_steps_per_second": 4.369, "eval_wer": 0.5108247339956027, "step": 2200 }, { "epoch": 3.560371517027864, "eval_loss": 0.8310704231262207, "eval_runtime": 162.1264, "eval_samples_per_second": 34.886, "eval_steps_per_second": 4.361, "eval_wer": 0.5642984384779574, "step": 2300 }, { "epoch": 3.715170278637771, "eval_loss": 0.7031980156898499, "eval_runtime": 166.0393, "eval_samples_per_second": 34.064, "eval_steps_per_second": 4.258, "eval_wer": 0.5028807112708832, "step": 2400 }, { "epoch": 3.8699690402476783, "grad_norm": 4.817399024963379, "learning_rate": 0.00016766666666666666, "loss": 0.6839, "step": 2500 }, { "epoch": 3.8699690402476783, "eval_loss": 0.7070674896240234, "eval_runtime": 164.0683, "eval_samples_per_second": 34.473, "eval_steps_per_second": 4.309, "eval_wer": 0.5006660140264159, "step": 2500 }, { "epoch": 4.024767801857585, "eval_loss": 0.8223607540130615, "eval_runtime": 164.3114, "eval_samples_per_second": 34.422, "eval_steps_per_second": 4.303, "eval_wer": 0.5069409895524065, "step": 2600 }, { "epoch": 4.179566563467493, "eval_loss": 0.8343736529350281, "eval_runtime": 163.8342, "eval_samples_per_second": 34.523, "eval_steps_per_second": 4.315, "eval_wer": 0.5162491373914718, "step": 2700 }, { "epoch": 4.3343653250774, "eval_loss": 0.9089197516441345, "eval_runtime": 162.2031, "eval_samples_per_second": 34.87, "eval_steps_per_second": 4.359, "eval_wer": 0.5620195471104621, "step": 2800 }, { "epoch": 4.489164086687307, "eval_loss": 0.9664539098739624, "eval_runtime": 163.3849, "eval_samples_per_second": 34.618, "eval_steps_per_second": 4.327, "eval_wer": 0.5640095649243312, "step": 2900 }, { "epoch": 4.643962848297214, "grad_norm": 7.185929775238037, "learning_rate": 0.00013446666666666666, "loss": 0.8292, "step": 3000 }, { "epoch": 4.643962848297214, "eval_loss": 0.9127740859985352, "eval_runtime": 162.7094, "eval_samples_per_second": 34.761, "eval_steps_per_second": 4.345, "eval_wer": 0.5414774277414902, "step": 3000 }, { "epoch": 4.798761609907121, "eval_loss": 1.1924536228179932, "eval_runtime": 161.8545, "eval_samples_per_second": 34.945, "eval_steps_per_second": 4.368, "eval_wer": 0.5938598321323683, "step": 3100 }, { "epoch": 4.953560371517028, "eval_loss": 1.4327375888824463, "eval_runtime": 167.5589, "eval_samples_per_second": 33.755, "eval_steps_per_second": 4.219, "eval_wer": 0.6999406204361991, "step": 3200 }, { "epoch": 5.108359133126935, "eval_loss": 1.2741221189498901, "eval_runtime": 166.0465, "eval_samples_per_second": 34.063, "eval_steps_per_second": 4.258, "eval_wer": 0.7826707964885815, "step": 3300 }, { "epoch": 5.2631578947368425, "eval_loss": 1.9348175525665283, "eval_runtime": 163.4154, "eval_samples_per_second": 34.611, "eval_steps_per_second": 4.326, "eval_wer": 0.8741795188650479, "step": 3400 }, { "epoch": 5.41795665634675, "grad_norm": 0.6057500839233398, "learning_rate": 0.00010126666666666666, "loss": 1.4131, "step": 3500 }, { "epoch": 5.41795665634675, "eval_loss": 1.9216177463531494, "eval_runtime": 161.6687, "eval_samples_per_second": 34.985, "eval_steps_per_second": 4.373, "eval_wer": 0.9870167386175795, "step": 3500 }, { "epoch": 5.572755417956657, "eval_loss": 1.8565247058868408, "eval_runtime": 164.0672, "eval_samples_per_second": 34.474, "eval_steps_per_second": 4.309, "eval_wer": 0.9367045946943557, "step": 3600 }, { "epoch": 5.727554179566564, "eval_loss": 1.7827845811843872, "eval_runtime": 163.0587, "eval_samples_per_second": 34.687, "eval_steps_per_second": 4.336, "eval_wer": 0.8240278602493941, "step": 3700 }, { "epoch": 5.882352941176471, "eval_loss": 1.6846531629562378, "eval_runtime": 162.8654, "eval_samples_per_second": 34.728, "eval_steps_per_second": 4.341, "eval_wer": 0.8059090690247308, "step": 3800 }, { "epoch": 6.037151702786378, "eval_loss": 1.6440324783325195, "eval_runtime": 163.0292, "eval_samples_per_second": 34.693, "eval_steps_per_second": 4.337, "eval_wer": 0.7983823080996935, "step": 3900 }, { "epoch": 6.191950464396285, "grad_norm": 0.5905019640922546, "learning_rate": 6.806666666666666e-05, "loss": 1.7728, "step": 4000 }, { "epoch": 6.191950464396285, "eval_loss": 1.6765395402908325, "eval_runtime": 166.4929, "eval_samples_per_second": 33.971, "eval_steps_per_second": 4.246, "eval_wer": 0.8053313219174785, "step": 4000 }, { "epoch": 6.346749226006192, "eval_loss": 1.6733070611953735, "eval_runtime": 163.1125, "eval_samples_per_second": 34.675, "eval_steps_per_second": 4.334, "eval_wer": 0.8024265378504598, "step": 4100 }, { "epoch": 6.5015479876160995, "eval_loss": 1.6601324081420898, "eval_runtime": 163.6995, "eval_samples_per_second": 34.551, "eval_steps_per_second": 4.319, "eval_wer": 0.7899568294522636, "step": 4200 }, { "epoch": 6.656346749226007, "eval_loss": 1.6604827642440796, "eval_runtime": 162.8495, "eval_samples_per_second": 34.731, "eval_steps_per_second": 4.341, "eval_wer": 0.7972910080082168, "step": 4300 }, { "epoch": 6.811145510835914, "eval_loss": 1.6598834991455078, "eval_runtime": 163.8109, "eval_samples_per_second": 34.528, "eval_steps_per_second": 4.316, "eval_wer": 0.7804560992441142, "step": 4400 }, { "epoch": 6.965944272445821, "grad_norm": 0.9840993881225586, "learning_rate": 3.493333333333333e-05, "loss": 1.6777, "step": 4500 }, { "epoch": 6.965944272445821, "eval_loss": 1.635949730873108, "eval_runtime": 163.8566, "eval_samples_per_second": 34.518, "eval_steps_per_second": 4.315, "eval_wer": 0.7693184188987499, "step": 4500 }, { "epoch": 7.120743034055727, "eval_loss": 1.6399564743041992, "eval_runtime": 164.6612, "eval_samples_per_second": 34.349, "eval_steps_per_second": 4.294, "eval_wer": 0.7651137038404134, "step": 4600 }, { "epoch": 7.275541795665634, "eval_loss": 1.6759086847305298, "eval_runtime": 161.5503, "eval_samples_per_second": 35.011, "eval_steps_per_second": 4.376, "eval_wer": 0.7672481584310956, "step": 4700 }, { "epoch": 7.430340557275541, "eval_loss": 1.6848710775375366, "eval_runtime": 162.0136, "eval_samples_per_second": 34.911, "eval_steps_per_second": 4.364, "eval_wer": 0.7686283320761984, "step": 4800 }, { "epoch": 7.585139318885449, "eval_loss": 1.6857717037200928, "eval_runtime": 161.3723, "eval_samples_per_second": 35.049, "eval_steps_per_second": 4.381, "eval_wer": 0.7689974482836096, "step": 4900 }, { "epoch": 7.739938080495356, "grad_norm": 1.753746747970581, "learning_rate": 1.8e-06, "loss": 1.683, "step": 5000 }, { "epoch": 7.739938080495356, "eval_loss": 1.6955511569976807, "eval_runtime": 160.8529, "eval_samples_per_second": 35.163, "eval_steps_per_second": 4.395, "eval_wer": 0.7701529424981143, "step": 5000 }, { "epoch": 7.739938080495356, "step": 5000, "total_flos": 4.528229347801944e+19, "train_loss": 1.3220068420410156, "train_runtime": 20543.6538, "train_samples_per_second": 15.577, "train_steps_per_second": 0.243 } ], "logging_steps": 500, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 400, "total_flos": 4.528229347801944e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }