|
{ |
|
"best_metric": 0.21656855629922317, |
|
"best_model_checkpoint": "/cluster/home/torstefl/Master/saved_model/W2V/Combined/BB-NB-RUND-1b-ll/checkpoint-38778", |
|
"epoch": 23.0, |
|
"eval_steps": 500, |
|
"global_step": 38778, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 40.23420715332031, |
|
"learning_rate": 1.994419168941462e-05, |
|
"loss": 0.7914, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bigbrother_loss": 1.3365620374679565, |
|
"eval_bigbrother_runtime": 55.0794, |
|
"eval_bigbrother_samples_per_second": 24.873, |
|
"eval_bigbrother_steps_per_second": 0.781, |
|
"eval_bigbrother_wer": 0.4995569161363087, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_BB_NB_RUND_loss": 0.7492167949676514, |
|
"eval_BB_NB_RUND_runtime": 219.5304, |
|
"eval_BB_NB_RUND_samples_per_second": 14.768, |
|
"eval_BB_NB_RUND_steps_per_second": 0.465, |
|
"eval_BB_NB_RUND_wer": 0.29118619466172085, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_rundkast_loss": 0.2676470875740051, |
|
"eval_rundkast_runtime": 39.1567, |
|
"eval_rundkast_samples_per_second": 34.196, |
|
"eval_rundkast_steps_per_second": 1.073, |
|
"eval_rundkast_wer": 0.1707726763717805, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_nb_samtale_loss": 0.44947025179862976, |
|
"eval_nb_samtale_runtime": 46.7998, |
|
"eval_nb_samtale_samples_per_second": 11.389, |
|
"eval_nb_samtale_steps_per_second": 0.363, |
|
"eval_nb_samtale_wer": 0.20320638020833334, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 34.448326110839844, |
|
"learning_rate": 1.943312101910828e-05, |
|
"loss": 0.6845, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bigbrother_loss": 1.3286552429199219, |
|
"eval_bigbrother_runtime": 56.6035, |
|
"eval_bigbrother_samples_per_second": 24.203, |
|
"eval_bigbrother_steps_per_second": 0.76, |
|
"eval_bigbrother_wer": 0.4742608555546604, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_BB_NB_RUND_loss": 0.7409276366233826, |
|
"eval_BB_NB_RUND_runtime": 208.4781, |
|
"eval_BB_NB_RUND_samples_per_second": 15.551, |
|
"eval_BB_NB_RUND_steps_per_second": 0.489, |
|
"eval_BB_NB_RUND_wer": 0.27532725855441764, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_rundkast_loss": 0.25520867109298706, |
|
"eval_rundkast_runtime": 40.2745, |
|
"eval_rundkast_samples_per_second": 33.247, |
|
"eval_rundkast_steps_per_second": 1.043, |
|
"eval_rundkast_wer": 0.15853463445848665, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_nb_samtale_loss": 0.45029357075691223, |
|
"eval_nb_samtale_runtime": 48.0222, |
|
"eval_nb_samtale_samples_per_second": 11.099, |
|
"eval_nb_samtale_steps_per_second": 0.354, |
|
"eval_nb_samtale_wer": 0.19246419270833334, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.3843355178833, |
|
"learning_rate": 1.8921747042766152e-05, |
|
"loss": 0.6323, |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bigbrother_loss": 1.217179775238037, |
|
"eval_bigbrother_runtime": 56.073, |
|
"eval_bigbrother_samples_per_second": 24.432, |
|
"eval_bigbrother_steps_per_second": 0.767, |
|
"eval_bigbrother_wer": 0.44896479497301217, |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_BB_NB_RUND_loss": 0.6835288405418396, |
|
"eval_BB_NB_RUND_runtime": 193.4829, |
|
"eval_BB_NB_RUND_samples_per_second": 16.756, |
|
"eval_BB_NB_RUND_steps_per_second": 0.527, |
|
"eval_BB_NB_RUND_wer": 0.2632583393812327, |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_rundkast_loss": 0.2418593019247055, |
|
"eval_rundkast_runtime": 40.4751, |
|
"eval_rundkast_samples_per_second": 33.082, |
|
"eval_rundkast_steps_per_second": 1.038, |
|
"eval_rundkast_wer": 0.15301551751719725, |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_nb_samtale_loss": 0.42157283425331116, |
|
"eval_nb_samtale_runtime": 47.3972, |
|
"eval_nb_samtale_samples_per_second": 11.245, |
|
"eval_nb_samtale_steps_per_second": 0.359, |
|
"eval_nb_samtale_wer": 0.18790690104166666, |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 17.625219345092773, |
|
"learning_rate": 1.8410979678495603e-05, |
|
"loss": 0.5949, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bigbrother_loss": 1.384012222290039, |
|
"eval_bigbrother_runtime": 55.0197, |
|
"eval_bigbrother_samples_per_second": 24.9, |
|
"eval_bigbrother_steps_per_second": 0.782, |
|
"eval_bigbrother_wer": 0.44872311286554417, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_BB_NB_RUND_loss": 0.7864356637001038, |
|
"eval_BB_NB_RUND_runtime": 181.8859, |
|
"eval_BB_NB_RUND_samples_per_second": 17.824, |
|
"eval_BB_NB_RUND_steps_per_second": 0.561, |
|
"eval_BB_NB_RUND_wer": 0.25925328602532055, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_rundkast_loss": 0.3103683590888977, |
|
"eval_rundkast_runtime": 40.029, |
|
"eval_rundkast_samples_per_second": 33.451, |
|
"eval_rundkast_steps_per_second": 1.049, |
|
"eval_rundkast_wer": 0.14637657974724044, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_nb_samtale_loss": 0.4464314579963684, |
|
"eval_nb_samtale_runtime": 47.5761, |
|
"eval_nb_samtale_samples_per_second": 11.203, |
|
"eval_nb_samtale_steps_per_second": 0.357, |
|
"eval_nb_samtale_wer": 0.182861328125, |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 14.08320426940918, |
|
"learning_rate": 1.7899605702153475e-05, |
|
"loss": 0.5659, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bigbrother_loss": 1.3207027912139893, |
|
"eval_bigbrother_runtime": 55.6797, |
|
"eval_bigbrother_samples_per_second": 24.605, |
|
"eval_bigbrother_steps_per_second": 0.772, |
|
"eval_bigbrother_wer": 0.4434061065012487, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_BB_NB_RUND_loss": 0.7573540210723877, |
|
"eval_BB_NB_RUND_runtime": 179.8263, |
|
"eval_BB_NB_RUND_samples_per_second": 18.029, |
|
"eval_BB_NB_RUND_steps_per_second": 0.567, |
|
"eval_BB_NB_RUND_wer": 0.2573448377818993, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_rundkast_loss": 0.31218844652175903, |
|
"eval_rundkast_runtime": 40.0113, |
|
"eval_rundkast_samples_per_second": 33.466, |
|
"eval_rundkast_steps_per_second": 1.05, |
|
"eval_rundkast_wer": 0.1435770276755719, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_nb_samtale_loss": 0.4283309280872345, |
|
"eval_nb_samtale_runtime": 47.4461, |
|
"eval_nb_samtale_samples_per_second": 11.234, |
|
"eval_nb_samtale_steps_per_second": 0.358, |
|
"eval_nb_samtale_wer": 0.18497721354166666, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 20.013416290283203, |
|
"learning_rate": 1.7388231725811344e-05, |
|
"loss": 1.6171144283853038e+30, |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bigbrother_loss": 1.1751562356948853, |
|
"eval_bigbrother_runtime": 55.6865, |
|
"eval_bigbrother_samples_per_second": 24.602, |
|
"eval_bigbrother_steps_per_second": 0.772, |
|
"eval_bigbrother_wer": 0.4262466768710223, |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_BB_NB_RUND_loss": 0.6712942719459534, |
|
"eval_BB_NB_RUND_runtime": 186.6214, |
|
"eval_BB_NB_RUND_samples_per_second": 17.372, |
|
"eval_BB_NB_RUND_steps_per_second": 0.547, |
|
"eval_BB_NB_RUND_wer": 0.25070558825901135, |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_rundkast_loss": 0.26327571272850037, |
|
"eval_rundkast_runtime": 40.3853, |
|
"eval_rundkast_samples_per_second": 33.156, |
|
"eval_rundkast_steps_per_second": 1.04, |
|
"eval_rundkast_wer": 0.14549672052471604, |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_nb_samtale_loss": 0.4015786945819855, |
|
"eval_nb_samtale_runtime": 48.2988, |
|
"eval_nb_samtale_samples_per_second": 11.035, |
|
"eval_nb_samtale_steps_per_second": 0.352, |
|
"eval_nb_samtale_wer": 0.18123372395833334, |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 11.362643241882324, |
|
"learning_rate": 1.6877161055505007e-05, |
|
"loss": 0.5188, |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bigbrother_loss": 1.2031224966049194, |
|
"eval_bigbrother_runtime": 56.0574, |
|
"eval_bigbrother_samples_per_second": 24.439, |
|
"eval_bigbrother_steps_per_second": 0.767, |
|
"eval_bigbrother_wer": 0.4243937807137678, |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_BB_NB_RUND_loss": 0.6787930130958557, |
|
"eval_BB_NB_RUND_runtime": 188.339, |
|
"eval_BB_NB_RUND_samples_per_second": 17.214, |
|
"eval_BB_NB_RUND_steps_per_second": 0.542, |
|
"eval_BB_NB_RUND_wer": 0.24680805311399617, |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_rundkast_loss": 0.2649306356906891, |
|
"eval_rundkast_runtime": 40.4917, |
|
"eval_rundkast_samples_per_second": 33.069, |
|
"eval_rundkast_steps_per_second": 1.037, |
|
"eval_rundkast_wer": 0.14077747560390338, |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_nb_samtale_loss": 0.37048718333244324, |
|
"eval_nb_samtale_runtime": 48.165, |
|
"eval_nb_samtale_samples_per_second": 11.066, |
|
"eval_nb_samtale_steps_per_second": 0.353, |
|
"eval_nb_samtale_wer": 0.17569986979166666, |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 14.940473556518555, |
|
"learning_rate": 1.6366090385198667e-05, |
|
"loss": 0.4948, |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bigbrother_loss": 1.2190874814987183, |
|
"eval_bigbrother_runtime": 56.6486, |
|
"eval_bigbrother_samples_per_second": 24.184, |
|
"eval_bigbrother_steps_per_second": 0.759, |
|
"eval_bigbrother_wer": 0.41134294691049705, |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_BB_NB_RUND_loss": 0.7082846164703369, |
|
"eval_BB_NB_RUND_runtime": 187.8309, |
|
"eval_BB_NB_RUND_samples_per_second": 17.26, |
|
"eval_BB_NB_RUND_steps_per_second": 0.543, |
|
"eval_BB_NB_RUND_wer": 0.24280299975808403, |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_rundkast_loss": 0.3050954043865204, |
|
"eval_rundkast_runtime": 40.703, |
|
"eval_rundkast_samples_per_second": 32.897, |
|
"eval_rundkast_steps_per_second": 1.032, |
|
"eval_rundkast_wer": 0.1406974884018557, |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_nb_samtale_loss": 0.4080573618412018, |
|
"eval_nb_samtale_runtime": 48.4037, |
|
"eval_nb_samtale_samples_per_second": 11.012, |
|
"eval_nb_samtale_steps_per_second": 0.351, |
|
"eval_nb_samtale_wer": 0.176513671875, |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 14.726886749267578, |
|
"learning_rate": 1.585471640885654e-05, |
|
"loss": 0.4819, |
|
"step": 15174 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bigbrother_loss": 1.1392048597335815, |
|
"eval_bigbrother_runtime": 56.0533, |
|
"eval_bigbrother_samples_per_second": 24.441, |
|
"eval_bigbrother_steps_per_second": 0.767, |
|
"eval_bigbrother_wer": 0.4111012648030291, |
|
"step": 15174 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_BB_NB_RUND_loss": 0.6559087634086609, |
|
"eval_BB_NB_RUND_runtime": 178.4738, |
|
"eval_BB_NB_RUND_samples_per_second": 18.165, |
|
"eval_BB_NB_RUND_steps_per_second": 0.572, |
|
"eval_BB_NB_RUND_wer": 0.24307179528532646, |
|
"step": 15174 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_rundkast_loss": 0.2665271759033203, |
|
"eval_rundkast_runtime": 40.869, |
|
"eval_rundkast_samples_per_second": 32.763, |
|
"eval_rundkast_steps_per_second": 1.028, |
|
"eval_rundkast_wer": 0.13973764197728364, |
|
"step": 15174 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_nb_samtale_loss": 0.39354923367500305, |
|
"eval_nb_samtale_runtime": 48.6692, |
|
"eval_nb_samtale_samples_per_second": 10.951, |
|
"eval_nb_samtale_steps_per_second": 0.349, |
|
"eval_nb_samtale_wer": 0.178466796875, |
|
"step": 15174 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 28.40872573852539, |
|
"learning_rate": 1.53436457385502e-05, |
|
"loss": 0.4676, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bigbrother_loss": 1.2027722597122192, |
|
"eval_bigbrother_runtime": 56.4265, |
|
"eval_bigbrother_samples_per_second": 24.279, |
|
"eval_bigbrother_steps_per_second": 0.762, |
|
"eval_bigbrother_wer": 0.3986143559171836, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_BB_NB_RUND_loss": 0.7014443278312683, |
|
"eval_BB_NB_RUND_runtime": 184.3191, |
|
"eval_BB_NB_RUND_samples_per_second": 17.589, |
|
"eval_BB_NB_RUND_steps_per_second": 0.553, |
|
"eval_BB_NB_RUND_wer": 0.2354648818643658, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_rundkast_loss": 0.3014816343784332, |
|
"eval_rundkast_runtime": 40.6222, |
|
"eval_rundkast_samples_per_second": 32.962, |
|
"eval_rundkast_steps_per_second": 1.034, |
|
"eval_rundkast_wer": 0.13389857622780355, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_nb_samtale_loss": 0.4174867272377014, |
|
"eval_nb_samtale_runtime": 50.8259, |
|
"eval_nb_samtale_samples_per_second": 10.487, |
|
"eval_nb_samtale_steps_per_second": 0.334, |
|
"eval_nb_samtale_wer": 0.174072265625, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 17.167695999145508, |
|
"learning_rate": 1.483257506824386e-05, |
|
"loss": 0.4529, |
|
"step": 18546 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bigbrother_loss": 1.2520931959152222, |
|
"eval_bigbrother_runtime": 56.0621, |
|
"eval_bigbrother_samples_per_second": 24.437, |
|
"eval_bigbrother_steps_per_second": 0.767, |
|
"eval_bigbrother_wer": 0.4011117376943527, |
|
"step": 18546 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_BB_NB_RUND_loss": 0.7142683863639832, |
|
"eval_BB_NB_RUND_runtime": 188.0039, |
|
"eval_BB_NB_RUND_samples_per_second": 17.244, |
|
"eval_BB_NB_RUND_steps_per_second": 0.543, |
|
"eval_BB_NB_RUND_wer": 0.2372389323441658, |
|
"step": 18546 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_rundkast_loss": 0.2879612147808075, |
|
"eval_rundkast_runtime": 41.0453, |
|
"eval_rundkast_samples_per_second": 32.622, |
|
"eval_rundkast_steps_per_second": 1.023, |
|
"eval_rundkast_wer": 0.1372580387138058, |
|
"step": 18546 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_nb_samtale_loss": 0.4028375446796417, |
|
"eval_nb_samtale_runtime": 48.6427, |
|
"eval_nb_samtale_samples_per_second": 10.957, |
|
"eval_nb_samtale_steps_per_second": 0.349, |
|
"eval_nb_samtale_wer": 0.17342122395833334, |
|
"step": 18546 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.578996658325195, |
|
"learning_rate": 1.432120109190173e-05, |
|
"loss": 0.4393, |
|
"step": 20232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bigbrother_loss": 1.1568479537963867, |
|
"eval_bigbrother_runtime": 56.7267, |
|
"eval_bigbrother_samples_per_second": 24.151, |
|
"eval_bigbrother_steps_per_second": 0.758, |
|
"eval_bigbrother_wer": 0.3946668814952066, |
|
"step": 20232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_BB_NB_RUND_loss": 0.6711928844451904, |
|
"eval_BB_NB_RUND_runtime": 189.16, |
|
"eval_BB_NB_RUND_samples_per_second": 17.139, |
|
"eval_BB_NB_RUND_steps_per_second": 0.539, |
|
"eval_BB_NB_RUND_wer": 0.23304572211918392, |
|
"step": 20232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_rundkast_loss": 0.281593918800354, |
|
"eval_rundkast_runtime": 40.9816, |
|
"eval_rundkast_samples_per_second": 32.673, |
|
"eval_rundkast_steps_per_second": 1.025, |
|
"eval_rundkast_wer": 0.13509838425851864, |
|
"step": 20232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_nb_samtale_loss": 0.40170300006866455, |
|
"eval_nb_samtale_runtime": 48.5059, |
|
"eval_nb_samtale_samples_per_second": 10.988, |
|
"eval_nb_samtale_steps_per_second": 0.35, |
|
"eval_nb_samtale_wer": 0.17008463541666666, |
|
"step": 20232 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 26.488574981689453, |
|
"learning_rate": 1.3810130421595392e-05, |
|
"loss": 0.4286, |
|
"step": 21918 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bigbrother_loss": 1.211380958557129, |
|
"eval_bigbrother_runtime": 56.1397, |
|
"eval_bigbrother_samples_per_second": 24.403, |
|
"eval_bigbrother_steps_per_second": 0.766, |
|
"eval_bigbrother_wer": 0.39313622814790944, |
|
"step": 21918 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_BB_NB_RUND_loss": 0.7042691707611084, |
|
"eval_BB_NB_RUND_runtime": 192.1708, |
|
"eval_BB_NB_RUND_samples_per_second": 16.87, |
|
"eval_BB_NB_RUND_steps_per_second": 0.531, |
|
"eval_BB_NB_RUND_wer": 0.23051904416310512, |
|
"step": 21918 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_rundkast_loss": 0.2999264895915985, |
|
"eval_rundkast_runtime": 40.6906, |
|
"eval_rundkast_samples_per_second": 32.907, |
|
"eval_rundkast_steps_per_second": 1.032, |
|
"eval_rundkast_wer": 0.1297392417213246, |
|
"step": 21918 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_nb_samtale_loss": 0.41735681891441345, |
|
"eval_nb_samtale_runtime": 48.1494, |
|
"eval_nb_samtale_samples_per_second": 11.07, |
|
"eval_nb_samtale_steps_per_second": 0.353, |
|
"eval_nb_samtale_wer": 0.169189453125, |
|
"step": 21918 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 8.799247741699219, |
|
"learning_rate": 1.3298756445253262e-05, |
|
"loss": 0.4158, |
|
"step": 23604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bigbrother_loss": 1.2186354398727417, |
|
"eval_bigbrother_runtime": 55.8345, |
|
"eval_bigbrother_samples_per_second": 24.537, |
|
"eval_bigbrother_steps_per_second": 0.77, |
|
"eval_bigbrother_wer": 0.39225006042052685, |
|
"step": 23604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_BB_NB_RUND_loss": 0.7078685760498047, |
|
"eval_BB_NB_RUND_runtime": 188.634, |
|
"eval_BB_NB_RUND_samples_per_second": 17.187, |
|
"eval_BB_NB_RUND_steps_per_second": 0.541, |
|
"eval_BB_NB_RUND_wer": 0.23111039432303845, |
|
"step": 23604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_rundkast_loss": 0.3022992014884949, |
|
"eval_rundkast_runtime": 40.9858, |
|
"eval_rundkast_samples_per_second": 32.67, |
|
"eval_rundkast_steps_per_second": 1.025, |
|
"eval_rundkast_wer": 0.1311790113581827, |
|
"step": 23604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_nb_samtale_loss": 0.4138488471508026, |
|
"eval_nb_samtale_runtime": 48.2354, |
|
"eval_nb_samtale_samples_per_second": 11.05, |
|
"eval_nb_samtale_steps_per_second": 0.352, |
|
"eval_nb_samtale_wer": 0.16943359375, |
|
"step": 23604 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 8.913851737976074, |
|
"learning_rate": 1.2787989080982713e-05, |
|
"loss": 0.4061, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bigbrother_loss": 1.2142188549041748, |
|
"eval_bigbrother_runtime": 56.7489, |
|
"eval_bigbrother_samples_per_second": 24.141, |
|
"eval_bigbrother_steps_per_second": 0.758, |
|
"eval_bigbrother_wer": 0.3879803431885926, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_BB_NB_RUND_loss": 0.7142959833145142, |
|
"eval_BB_NB_RUND_runtime": 176.8357, |
|
"eval_BB_NB_RUND_samples_per_second": 18.333, |
|
"eval_BB_NB_RUND_steps_per_second": 0.577, |
|
"eval_BB_NB_RUND_wer": 0.2274816547052657, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_rundkast_loss": 0.325125128030777, |
|
"eval_rundkast_runtime": 41.4419, |
|
"eval_rundkast_samples_per_second": 32.31, |
|
"eval_rundkast_steps_per_second": 1.013, |
|
"eval_rundkast_wer": 0.13093904975203968, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_nb_samtale_loss": 0.4068756699562073, |
|
"eval_nb_samtale_runtime": 48.9343, |
|
"eval_nb_samtale_samples_per_second": 10.892, |
|
"eval_nb_samtale_steps_per_second": 0.347, |
|
"eval_nb_samtale_wer": 0.16357421875, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 16.82915496826172, |
|
"learning_rate": 1.2276615104640583e-05, |
|
"loss": 0.3964, |
|
"step": 26976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bigbrother_loss": 1.1660621166229248, |
|
"eval_bigbrother_runtime": 56.8428, |
|
"eval_bigbrother_samples_per_second": 24.102, |
|
"eval_bigbrother_steps_per_second": 0.756, |
|
"eval_bigbrother_wer": 0.38072987996455326, |
|
"step": 26976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_BB_NB_RUND_loss": 0.6708107590675354, |
|
"eval_BB_NB_RUND_runtime": 211.8358, |
|
"eval_BB_NB_RUND_samples_per_second": 15.304, |
|
"eval_BB_NB_RUND_steps_per_second": 0.482, |
|
"eval_BB_NB_RUND_wer": 0.22318092626938688, |
|
"step": 26976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_rundkast_loss": 0.2847665250301361, |
|
"eval_rundkast_runtime": 40.762, |
|
"eval_rundkast_samples_per_second": 32.849, |
|
"eval_rundkast_steps_per_second": 1.03, |
|
"eval_rundkast_wer": 0.126219804831227, |
|
"step": 26976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_nb_samtale_loss": 0.3676682114601135, |
|
"eval_nb_samtale_runtime": 48.3802, |
|
"eval_nb_samtale_samples_per_second": 11.017, |
|
"eval_nb_samtale_steps_per_second": 0.351, |
|
"eval_nb_samtale_wer": 0.162353515625, |
|
"step": 26976 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 12.924592018127441, |
|
"learning_rate": 1.1765544434334245e-05, |
|
"loss": 0.3839, |
|
"step": 28662 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bigbrother_loss": 1.2347418069839478, |
|
"eval_bigbrother_runtime": 56.4815, |
|
"eval_bigbrother_samples_per_second": 24.256, |
|
"eval_bigbrother_steps_per_second": 0.761, |
|
"eval_bigbrother_wer": 0.3837911866591477, |
|
"step": 28662 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_BB_NB_RUND_loss": 0.7269824147224426, |
|
"eval_BB_NB_RUND_runtime": 198.7713, |
|
"eval_BB_NB_RUND_samples_per_second": 16.31, |
|
"eval_BB_NB_RUND_steps_per_second": 0.513, |
|
"eval_BB_NB_RUND_wer": 0.2254656882509475, |
|
"step": 28662 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_rundkast_loss": 0.33907437324523926, |
|
"eval_rundkast_runtime": 40.907, |
|
"eval_rundkast_samples_per_second": 32.733, |
|
"eval_rundkast_steps_per_second": 1.027, |
|
"eval_rundkast_wer": 0.12925931850903855, |
|
"step": 28662 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_nb_samtale_loss": 0.39654919505119324, |
|
"eval_nb_samtale_runtime": 48.4458, |
|
"eval_nb_samtale_samples_per_second": 11.002, |
|
"eval_nb_samtale_steps_per_second": 0.351, |
|
"eval_nb_samtale_wer": 0.163818359375, |
|
"step": 28662 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 56.129154205322266, |
|
"learning_rate": 1.1254170457992115e-05, |
|
"loss": 0.3754, |
|
"step": 30348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bigbrother_loss": 1.2182375192642212, |
|
"eval_bigbrother_runtime": 56.0084, |
|
"eval_bigbrother_samples_per_second": 24.461, |
|
"eval_bigbrother_steps_per_second": 0.768, |
|
"eval_bigbrother_wer": 0.3763796020301297, |
|
"step": 30348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_BB_NB_RUND_loss": 0.6986347436904907, |
|
"eval_BB_NB_RUND_runtime": 210.8249, |
|
"eval_BB_NB_RUND_samples_per_second": 15.378, |
|
"eval_BB_NB_RUND_steps_per_second": 0.484, |
|
"eval_BB_NB_RUND_wer": 0.22167567131682928, |
|
"step": 30348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_rundkast_loss": 0.2828354239463806, |
|
"eval_rundkast_runtime": 40.4262, |
|
"eval_rundkast_samples_per_second": 33.122, |
|
"eval_rundkast_steps_per_second": 1.039, |
|
"eval_rundkast_wer": 0.12525995840665494, |
|
"step": 30348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_nb_samtale_loss": 0.40772777795791626, |
|
"eval_nb_samtale_runtime": 47.7849, |
|
"eval_nb_samtale_samples_per_second": 11.154, |
|
"eval_nb_samtale_steps_per_second": 0.356, |
|
"eval_nb_samtale_wer": 0.16300455729166666, |
|
"step": 30348 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 25.304649353027344, |
|
"learning_rate": 1.0743099787685776e-05, |
|
"loss": 0.366, |
|
"step": 32034 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bigbrother_loss": 1.1887106895446777, |
|
"eval_bigbrother_runtime": 56.2869, |
|
"eval_bigbrother_samples_per_second": 24.34, |
|
"eval_bigbrother_steps_per_second": 0.764, |
|
"eval_bigbrother_wer": 0.3763796020301297, |
|
"step": 32034 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_BB_NB_RUND_loss": 0.7024869918823242, |
|
"eval_BB_NB_RUND_runtime": 194.4983, |
|
"eval_BB_NB_RUND_samples_per_second": 16.669, |
|
"eval_BB_NB_RUND_steps_per_second": 0.524, |
|
"eval_BB_NB_RUND_wer": 0.22148751444775958, |
|
"step": 32034 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_rundkast_loss": 0.3243892788887024, |
|
"eval_rundkast_runtime": 42.0254, |
|
"eval_rundkast_samples_per_second": 31.862, |
|
"eval_rundkast_steps_per_second": 0.999, |
|
"eval_rundkast_wer": 0.125979843225084, |
|
"step": 32034 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_nb_samtale_loss": 0.4026784598827362, |
|
"eval_nb_samtale_runtime": 47.8774, |
|
"eval_nb_samtale_samples_per_second": 11.133, |
|
"eval_nb_samtale_steps_per_second": 0.355, |
|
"eval_nb_samtale_wer": 0.162353515625, |
|
"step": 32034 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.739073753356934, |
|
"learning_rate": 1.0231725811343647e-05, |
|
"loss": 0.3577, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bigbrother_loss": 1.2659293413162231, |
|
"eval_bigbrother_runtime": 55.6963, |
|
"eval_bigbrother_samples_per_second": 24.598, |
|
"eval_bigbrother_steps_per_second": 0.772, |
|
"eval_bigbrother_wer": 0.37339885603802464, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_BB_NB_RUND_loss": 0.7312197685241699, |
|
"eval_BB_NB_RUND_runtime": 175.0676, |
|
"eval_BB_NB_RUND_samples_per_second": 18.519, |
|
"eval_BB_NB_RUND_steps_per_second": 0.583, |
|
"eval_BB_NB_RUND_wer": 0.2187189205171626, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_rundkast_loss": 0.3108943998813629, |
|
"eval_rundkast_runtime": 40.3883, |
|
"eval_rundkast_samples_per_second": 33.153, |
|
"eval_rundkast_steps_per_second": 1.04, |
|
"eval_rundkast_wer": 0.12270036794112942, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_nb_samtale_loss": 0.41297149658203125, |
|
"eval_nb_samtale_runtime": 48.2786, |
|
"eval_nb_samtale_samples_per_second": 11.04, |
|
"eval_nb_samtale_steps_per_second": 0.352, |
|
"eval_nb_samtale_wer": 0.16015625, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 11.251120567321777, |
|
"learning_rate": 9.720655141037308e-06, |
|
"loss": 0.3504, |
|
"step": 35406 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bigbrother_loss": 1.274532437324524, |
|
"eval_bigbrother_runtime": 57.087, |
|
"eval_bigbrother_samples_per_second": 23.998, |
|
"eval_bigbrother_steps_per_second": 0.753, |
|
"eval_bigbrother_wer": 0.37275437041811005, |
|
"step": 35406 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_BB_NB_RUND_loss": 0.7424564957618713, |
|
"eval_BB_NB_RUND_runtime": 190.1044, |
|
"eval_BB_NB_RUND_samples_per_second": 17.054, |
|
"eval_BB_NB_RUND_steps_per_second": 0.537, |
|
"eval_BB_NB_RUND_wer": 0.22003601860065047, |
|
"step": 35406 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_rundkast_loss": 0.3286913335323334, |
|
"eval_rundkast_runtime": 40.9067, |
|
"eval_rundkast_samples_per_second": 32.733, |
|
"eval_rundkast_steps_per_second": 1.027, |
|
"eval_rundkast_wer": 0.12494000959846424, |
|
"step": 35406 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_nb_samtale_loss": 0.4145500361919403, |
|
"eval_nb_samtale_runtime": 48.4768, |
|
"eval_nb_samtale_samples_per_second": 10.995, |
|
"eval_nb_samtale_steps_per_second": 0.351, |
|
"eval_nb_samtale_wer": 0.16243489583333334, |
|
"step": 35406 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 9.809260368347168, |
|
"learning_rate": 9.209584470730968e-06, |
|
"loss": 0.3439, |
|
"step": 37092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bigbrother_loss": 1.3220369815826416, |
|
"eval_bigbrother_runtime": 57.3394, |
|
"eval_bigbrother_samples_per_second": 23.893, |
|
"eval_bigbrother_steps_per_second": 0.75, |
|
"eval_bigbrother_wer": 0.3744461451703859, |
|
"step": 37092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_BB_NB_RUND_loss": 0.7814194560050964, |
|
"eval_BB_NB_RUND_runtime": 216.0386, |
|
"eval_BB_NB_RUND_samples_per_second": 15.007, |
|
"eval_BB_NB_RUND_steps_per_second": 0.472, |
|
"eval_BB_NB_RUND_wer": 0.22033169368061714, |
|
"step": 37092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_rundkast_loss": 0.36451050639152527, |
|
"eval_rundkast_runtime": 41.6479, |
|
"eval_rundkast_samples_per_second": 32.15, |
|
"eval_rundkast_steps_per_second": 1.008, |
|
"eval_rundkast_wer": 0.12589985602303633, |
|
"step": 37092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_nb_samtale_loss": 0.4385349452495575, |
|
"eval_nb_samtale_runtime": 48.7109, |
|
"eval_nb_samtale_samples_per_second": 10.942, |
|
"eval_nb_samtale_steps_per_second": 0.349, |
|
"eval_nb_samtale_wer": 0.16015625, |
|
"step": 37092 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 12.92618179321289, |
|
"learning_rate": 8.69851380042463e-06, |
|
"loss": 0.3378, |
|
"step": 38778 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bigbrother_loss": 1.227617859840393, |
|
"eval_bigbrother_runtime": 57.2357, |
|
"eval_bigbrother_samples_per_second": 23.936, |
|
"eval_bigbrother_steps_per_second": 0.751, |
|
"eval_bigbrother_wer": 0.3678401675662612, |
|
"step": 38778 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_BB_NB_RUND_loss": 0.7239949107170105, |
|
"eval_BB_NB_RUND_runtime": 206.829, |
|
"eval_BB_NB_RUND_samples_per_second": 15.675, |
|
"eval_BB_NB_RUND_steps_per_second": 0.493, |
|
"eval_BB_NB_RUND_wer": 0.21656855629922317, |
|
"step": 38778 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_rundkast_loss": 0.3332752287387848, |
|
"eval_rundkast_runtime": 41.5376, |
|
"eval_rundkast_samples_per_second": 32.236, |
|
"eval_rundkast_steps_per_second": 1.011, |
|
"eval_rundkast_wer": 0.12286034234522476, |
|
"step": 38778 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_nb_samtale_loss": 0.4117254614830017, |
|
"eval_nb_samtale_runtime": 48.8538, |
|
"eval_nb_samtale_samples_per_second": 10.91, |
|
"eval_nb_samtale_steps_per_second": 0.348, |
|
"eval_nb_samtale_wer": 0.1591796875, |
|
"step": 38778 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 67440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 6.56874003739803e+20, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|