|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.739938080495356, |
|
"eval_steps": 100, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15479876160990713, |
|
"eval_loss": 3.5586698055267334, |
|
"eval_runtime": 163.4718, |
|
"eval_samples_per_second": 34.599, |
|
"eval_steps_per_second": 4.325, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30959752321981426, |
|
"eval_loss": 3.2505505084991455, |
|
"eval_runtime": 161.1897, |
|
"eval_samples_per_second": 35.089, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46439628482972134, |
|
"eval_loss": 2.774010181427002, |
|
"eval_runtime": 161.9005, |
|
"eval_samples_per_second": 34.935, |
|
"eval_steps_per_second": 4.367, |
|
"eval_wer": 0.9999518544077289, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6191950464396285, |
|
"eval_loss": 1.11959707736969, |
|
"eval_runtime": 162.1425, |
|
"eval_samples_per_second": 34.883, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.7807449727977404, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7739938080495357, |
|
"grad_norm": 2.676631212234497, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.6484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7739938080495357, |
|
"eval_loss": 0.9134386777877808, |
|
"eval_runtime": 162.7792, |
|
"eval_samples_per_second": 34.746, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.6538813371635827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9287925696594427, |
|
"eval_loss": 0.7674785256385803, |
|
"eval_runtime": 162.7057, |
|
"eval_samples_per_second": 34.762, |
|
"eval_steps_per_second": 4.345, |
|
"eval_wer": 0.5923191731796954, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.08359133126935, |
|
"eval_loss": 0.7207810282707214, |
|
"eval_runtime": 163.9828, |
|
"eval_samples_per_second": 34.491, |
|
"eval_steps_per_second": 4.311, |
|
"eval_wer": 0.5289595737510231, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.238390092879257, |
|
"eval_loss": 0.6209472417831421, |
|
"eval_runtime": 163.6218, |
|
"eval_samples_per_second": 34.568, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 0.4744748118309769, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3931888544891642, |
|
"eval_loss": 0.6220189332962036, |
|
"eval_runtime": 162.6081, |
|
"eval_samples_per_second": 34.783, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.47879186660461237, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5479876160990713, |
|
"grad_norm": 0.3950090706348419, |
|
"learning_rate": 0.0002672, |
|
"loss": 0.6286, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5479876160990713, |
|
"eval_loss": 0.5738953351974487, |
|
"eval_runtime": 162.8483, |
|
"eval_samples_per_second": 34.732, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.4588114458121359, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7027863777089784, |
|
"eval_loss": 0.564153790473938, |
|
"eval_runtime": 164.0752, |
|
"eval_samples_per_second": 34.472, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.4262008313138932, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8575851393188856, |
|
"eval_loss": 0.5511888265609741, |
|
"eval_runtime": 164.5846, |
|
"eval_samples_per_second": 34.365, |
|
"eval_steps_per_second": 4.296, |
|
"eval_wer": 0.42080852497953813, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0123839009287927, |
|
"eval_loss": 0.527522623538971, |
|
"eval_runtime": 162.1572, |
|
"eval_samples_per_second": 34.88, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.38652886328256647, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1671826625387, |
|
"eval_loss": 0.4955059587955475, |
|
"eval_runtime": 160.6015, |
|
"eval_samples_per_second": 35.218, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.37545537706023013, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.321981424148607, |
|
"grad_norm": 0.4153783619403839, |
|
"learning_rate": 0.00023393333333333332, |
|
"loss": 0.4816, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.321981424148607, |
|
"eval_loss": 0.4908938407897949, |
|
"eval_runtime": 160.2948, |
|
"eval_samples_per_second": 35.285, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.37325672834651985, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.476780185758514, |
|
"eval_loss": 0.4982919991016388, |
|
"eval_runtime": 162.1379, |
|
"eval_samples_per_second": 34.884, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.37279132095456663, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"eval_loss": 0.48909762501716614, |
|
"eval_runtime": 161.7559, |
|
"eval_samples_per_second": 34.966, |
|
"eval_steps_per_second": 4.371, |
|
"eval_wer": 0.36550528799088444, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7863777089783284, |
|
"eval_loss": 0.47961312532424927, |
|
"eval_runtime": 160.2886, |
|
"eval_samples_per_second": 35.286, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.3570958578742116, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"eval_loss": 0.46432051062583923, |
|
"eval_runtime": 162.1185, |
|
"eval_samples_per_second": 34.888, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.3591982154033798, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.0959752321981426, |
|
"grad_norm": 0.8931769728660583, |
|
"learning_rate": 0.00020079999999999997, |
|
"loss": 0.4017, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0959752321981426, |
|
"eval_loss": 0.5084750652313232, |
|
"eval_runtime": 162.3398, |
|
"eval_samples_per_second": 34.841, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.3697902457030059, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2507739938080498, |
|
"eval_loss": 0.6755269169807434, |
|
"eval_runtime": 163.0508, |
|
"eval_samples_per_second": 34.689, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.4530018776780986, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.405572755417957, |
|
"eval_loss": 0.710012674331665, |
|
"eval_runtime": 161.8251, |
|
"eval_samples_per_second": 34.951, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 0.5108247339956027, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.560371517027864, |
|
"eval_loss": 0.8310704231262207, |
|
"eval_runtime": 162.1264, |
|
"eval_samples_per_second": 34.886, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.5642984384779574, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.715170278637771, |
|
"eval_loss": 0.7031980156898499, |
|
"eval_runtime": 166.0393, |
|
"eval_samples_per_second": 34.064, |
|
"eval_steps_per_second": 4.258, |
|
"eval_wer": 0.5028807112708832, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.8699690402476783, |
|
"grad_norm": 4.817399024963379, |
|
"learning_rate": 0.00016766666666666666, |
|
"loss": 0.6839, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.8699690402476783, |
|
"eval_loss": 0.7070674896240234, |
|
"eval_runtime": 164.0683, |
|
"eval_samples_per_second": 34.473, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.5006660140264159, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.024767801857585, |
|
"eval_loss": 0.8223607540130615, |
|
"eval_runtime": 164.3114, |
|
"eval_samples_per_second": 34.422, |
|
"eval_steps_per_second": 4.303, |
|
"eval_wer": 0.5069409895524065, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.179566563467493, |
|
"eval_loss": 0.8343736529350281, |
|
"eval_runtime": 163.8342, |
|
"eval_samples_per_second": 34.523, |
|
"eval_steps_per_second": 4.315, |
|
"eval_wer": 0.5162491373914718, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.3343653250774, |
|
"eval_loss": 0.9089197516441345, |
|
"eval_runtime": 162.2031, |
|
"eval_samples_per_second": 34.87, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.5620195471104621, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.489164086687307, |
|
"eval_loss": 0.9664539098739624, |
|
"eval_runtime": 163.3849, |
|
"eval_samples_per_second": 34.618, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.5640095649243312, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.643962848297214, |
|
"grad_norm": 7.185929775238037, |
|
"learning_rate": 0.00013446666666666666, |
|
"loss": 0.8292, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.643962848297214, |
|
"eval_loss": 0.9127740859985352, |
|
"eval_runtime": 162.7094, |
|
"eval_samples_per_second": 34.761, |
|
"eval_steps_per_second": 4.345, |
|
"eval_wer": 0.5414774277414902, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.798761609907121, |
|
"eval_loss": 1.1924536228179932, |
|
"eval_runtime": 161.8545, |
|
"eval_samples_per_second": 34.945, |
|
"eval_steps_per_second": 4.368, |
|
"eval_wer": 0.5938598321323683, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.953560371517028, |
|
"eval_loss": 1.4327375888824463, |
|
"eval_runtime": 167.5589, |
|
"eval_samples_per_second": 33.755, |
|
"eval_steps_per_second": 4.219, |
|
"eval_wer": 0.6999406204361991, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.108359133126935, |
|
"eval_loss": 1.2741221189498901, |
|
"eval_runtime": 166.0465, |
|
"eval_samples_per_second": 34.063, |
|
"eval_steps_per_second": 4.258, |
|
"eval_wer": 0.7826707964885815, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"eval_loss": 1.9348175525665283, |
|
"eval_runtime": 163.4154, |
|
"eval_samples_per_second": 34.611, |
|
"eval_steps_per_second": 4.326, |
|
"eval_wer": 0.8741795188650479, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.41795665634675, |
|
"grad_norm": 0.6057500839233398, |
|
"learning_rate": 0.00010126666666666666, |
|
"loss": 1.4131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.41795665634675, |
|
"eval_loss": 1.9216177463531494, |
|
"eval_runtime": 161.6687, |
|
"eval_samples_per_second": 34.985, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.9870167386175795, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.572755417956657, |
|
"eval_loss": 1.8565247058868408, |
|
"eval_runtime": 164.0672, |
|
"eval_samples_per_second": 34.474, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.9367045946943557, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.727554179566564, |
|
"eval_loss": 1.7827845811843872, |
|
"eval_runtime": 163.0587, |
|
"eval_samples_per_second": 34.687, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.8240278602493941, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"eval_loss": 1.6846531629562378, |
|
"eval_runtime": 162.8654, |
|
"eval_samples_per_second": 34.728, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.8059090690247308, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.037151702786378, |
|
"eval_loss": 1.6440324783325195, |
|
"eval_runtime": 163.0292, |
|
"eval_samples_per_second": 34.693, |
|
"eval_steps_per_second": 4.337, |
|
"eval_wer": 0.7983823080996935, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.191950464396285, |
|
"grad_norm": 0.5905019640922546, |
|
"learning_rate": 6.806666666666666e-05, |
|
"loss": 1.7728, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.191950464396285, |
|
"eval_loss": 1.6765395402908325, |
|
"eval_runtime": 166.4929, |
|
"eval_samples_per_second": 33.971, |
|
"eval_steps_per_second": 4.246, |
|
"eval_wer": 0.8053313219174785, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.346749226006192, |
|
"eval_loss": 1.6733070611953735, |
|
"eval_runtime": 163.1125, |
|
"eval_samples_per_second": 34.675, |
|
"eval_steps_per_second": 4.334, |
|
"eval_wer": 0.8024265378504598, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.5015479876160995, |
|
"eval_loss": 1.6601324081420898, |
|
"eval_runtime": 163.6995, |
|
"eval_samples_per_second": 34.551, |
|
"eval_steps_per_second": 4.319, |
|
"eval_wer": 0.7899568294522636, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.656346749226007, |
|
"eval_loss": 1.6604827642440796, |
|
"eval_runtime": 162.8495, |
|
"eval_samples_per_second": 34.731, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.7972910080082168, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.811145510835914, |
|
"eval_loss": 1.6598834991455078, |
|
"eval_runtime": 163.8109, |
|
"eval_samples_per_second": 34.528, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.7804560992441142, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.965944272445821, |
|
"grad_norm": 0.9840993881225586, |
|
"learning_rate": 3.493333333333333e-05, |
|
"loss": 1.6777, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.965944272445821, |
|
"eval_loss": 1.635949730873108, |
|
"eval_runtime": 163.8566, |
|
"eval_samples_per_second": 34.518, |
|
"eval_steps_per_second": 4.315, |
|
"eval_wer": 0.7693184188987499, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.120743034055727, |
|
"eval_loss": 1.6399564743041992, |
|
"eval_runtime": 164.6612, |
|
"eval_samples_per_second": 34.349, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.7651137038404134, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.275541795665634, |
|
"eval_loss": 1.6759086847305298, |
|
"eval_runtime": 161.5503, |
|
"eval_samples_per_second": 35.011, |
|
"eval_steps_per_second": 4.376, |
|
"eval_wer": 0.7672481584310956, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.430340557275541, |
|
"eval_loss": 1.6848710775375366, |
|
"eval_runtime": 162.0136, |
|
"eval_samples_per_second": 34.911, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.7686283320761984, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.585139318885449, |
|
"eval_loss": 1.6857717037200928, |
|
"eval_runtime": 161.3723, |
|
"eval_samples_per_second": 35.049, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.7689974482836096, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.739938080495356, |
|
"grad_norm": 1.753746747970581, |
|
"learning_rate": 1.8e-06, |
|
"loss": 1.683, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.739938080495356, |
|
"eval_loss": 1.6955511569976807, |
|
"eval_runtime": 160.8529, |
|
"eval_samples_per_second": 35.163, |
|
"eval_steps_per_second": 4.395, |
|
"eval_wer": 0.7701529424981143, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.739938080495356, |
|
"step": 5000, |
|
"total_flos": 4.528229347801944e+19, |
|
"train_loss": 1.3220068420410156, |
|
"train_runtime": 20543.6538, |
|
"train_samples_per_second": 15.577, |
|
"train_steps_per_second": 0.243 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 400, |
|
"total_flos": 4.528229347801944e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|