|
{ |
|
"best_metric": 0.14368463395012068, |
|
"best_model_checkpoint": "/cluster/home/torstefl/Master/saved_model/W2V/Combined/NB-RUND/22.05/checkpoint-48581", |
|
"epoch": 37.0, |
|
"eval_steps": 500, |
|
"global_step": 48581, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.1965060234069824, |
|
"learning_rate": 8.740000000000001e-05, |
|
"loss": 0.5607, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bigbrother_loss": 1.985207200050354, |
|
"eval_bigbrother_runtime": 42.9617, |
|
"eval_bigbrother_samples_per_second": 31.889, |
|
"eval_bigbrother_steps_per_second": 1.001, |
|
"eval_bigbrother_wer": 0.5811649077579957, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_NB_RUND_loss": 0.3991325795650482, |
|
"eval_NB_RUND_runtime": 118.1565, |
|
"eval_NB_RUND_samples_per_second": 15.894, |
|
"eval_NB_RUND_steps_per_second": 0.499, |
|
"eval_NB_RUND_wer": 0.21319388576025744, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_rundkast_loss": 0.3412451446056366, |
|
"eval_rundkast_runtime": 33.4748, |
|
"eval_rundkast_samples_per_second": 40.18, |
|
"eval_rundkast_steps_per_second": 1.285, |
|
"eval_rundkast_wer": 0.1870824053452116, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_nb_samtale_loss": 0.5455772876739502, |
|
"eval_nb_samtale_runtime": 38.7172, |
|
"eval_nb_samtale_samples_per_second": 13.766, |
|
"eval_nb_samtale_steps_per_second": 0.439, |
|
"eval_nb_samtale_wer": 0.23990885416666666, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.949782371520996, |
|
"learning_rate": 9.779694237553901e-05, |
|
"loss": 0.4692, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bigbrother_loss": 1.9343265295028687, |
|
"eval_bigbrother_runtime": 41.3509, |
|
"eval_bigbrother_samples_per_second": 33.131, |
|
"eval_bigbrother_steps_per_second": 1.04, |
|
"eval_bigbrother_wer": 0.5653750100700878, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_NB_RUND_loss": 0.3552016317844391, |
|
"eval_NB_RUND_runtime": 86.1673, |
|
"eval_NB_RUND_samples_per_second": 21.795, |
|
"eval_NB_RUND_steps_per_second": 0.685, |
|
"eval_NB_RUND_wer": 0.1984714400643604, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_rundkast_loss": 0.29250362515449524, |
|
"eval_rundkast_runtime": 33.3182, |
|
"eval_rundkast_samples_per_second": 40.368, |
|
"eval_rundkast_steps_per_second": 1.291, |
|
"eval_rundkast_wer": 0.17181037225580656, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_nb_samtale_loss": 0.5133547782897949, |
|
"eval_nb_samtale_runtime": 37.6446, |
|
"eval_nb_samtale_samples_per_second": 14.159, |
|
"eval_nb_samtale_steps_per_second": 0.452, |
|
"eval_nb_samtale_wer": 0.224609375, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.300737142562866, |
|
"learning_rate": 9.522344178753431e-05, |
|
"loss": 0.4264, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bigbrother_loss": 1.831033706665039, |
|
"eval_bigbrother_runtime": 41.3456, |
|
"eval_bigbrother_samples_per_second": 33.135, |
|
"eval_bigbrother_steps_per_second": 1.04, |
|
"eval_bigbrother_wer": 0.5553049222589221, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_NB_RUND_loss": 0.33576926589012146, |
|
"eval_NB_RUND_runtime": 85.9654, |
|
"eval_NB_RUND_samples_per_second": 21.846, |
|
"eval_NB_RUND_steps_per_second": 0.686, |
|
"eval_NB_RUND_wer": 0.19296057924376508, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_rundkast_loss": 0.2803370952606201, |
|
"eval_rundkast_runtime": 33.2875, |
|
"eval_rundkast_samples_per_second": 40.406, |
|
"eval_rundkast_steps_per_second": 1.292, |
|
"eval_rundkast_wer": 0.16457206490614062, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_nb_samtale_loss": 0.47577109932899475, |
|
"eval_nb_samtale_runtime": 37.7954, |
|
"eval_nb_samtale_samples_per_second": 14.102, |
|
"eval_nb_samtale_steps_per_second": 0.45, |
|
"eval_nb_samtale_wer": 0.22102864583333334, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.673075199127197, |
|
"learning_rate": 9.26499411995296e-05, |
|
"loss": 0.397, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bigbrother_loss": 1.7913014888763428, |
|
"eval_bigbrother_runtime": 41.2837, |
|
"eval_bigbrother_samples_per_second": 33.185, |
|
"eval_bigbrother_steps_per_second": 1.042, |
|
"eval_bigbrother_wer": 0.5413679207282688, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_NB_RUND_loss": 0.3278275728225708, |
|
"eval_NB_RUND_runtime": 85.8041, |
|
"eval_NB_RUND_samples_per_second": 21.887, |
|
"eval_NB_RUND_steps_per_second": 0.688, |
|
"eval_NB_RUND_wer": 0.1829847144006436, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_rundkast_loss": 0.2707855999469757, |
|
"eval_rundkast_runtime": 33.1752, |
|
"eval_rundkast_samples_per_second": 40.542, |
|
"eval_rundkast_steps_per_second": 1.296, |
|
"eval_rundkast_wer": 0.15431116767419661, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_nb_samtale_loss": 0.47150808572769165, |
|
"eval_nb_samtale_runtime": 37.7895, |
|
"eval_nb_samtale_samples_per_second": 14.104, |
|
"eval_nb_samtale_steps_per_second": 0.45, |
|
"eval_nb_samtale_wer": 0.21158854166666666, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.968538999557495, |
|
"learning_rate": 9.00764406115249e-05, |
|
"loss": 0.3742, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bigbrother_loss": 1.9187724590301514, |
|
"eval_bigbrother_runtime": 41.3465, |
|
"eval_bigbrother_samples_per_second": 33.135, |
|
"eval_bigbrother_steps_per_second": 1.04, |
|
"eval_bigbrother_wer": 0.5373398856038024, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_NB_RUND_loss": 0.3254011571407318, |
|
"eval_NB_RUND_runtime": 85.8831, |
|
"eval_NB_RUND_samples_per_second": 21.867, |
|
"eval_NB_RUND_steps_per_second": 0.687, |
|
"eval_NB_RUND_wer": 0.17666934835076428, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_rundkast_loss": 0.2743120491504669, |
|
"eval_rundkast_runtime": 32.9681, |
|
"eval_rundkast_samples_per_second": 40.797, |
|
"eval_rundkast_steps_per_second": 1.304, |
|
"eval_rundkast_wer": 0.14937957365574292, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_nb_samtale_loss": 0.453761488199234, |
|
"eval_nb_samtale_runtime": 38.2375, |
|
"eval_nb_samtale_samples_per_second": 13.939, |
|
"eval_nb_samtale_steps_per_second": 0.445, |
|
"eval_nb_samtale_wer": 0.20475260416666666, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.727647304534912, |
|
"learning_rate": 8.750490003920032e-05, |
|
"loss": 0.3559, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bigbrother_loss": 1.8001114130020142, |
|
"eval_bigbrother_runtime": 41.331, |
|
"eval_bigbrother_samples_per_second": 33.147, |
|
"eval_bigbrother_steps_per_second": 1.04, |
|
"eval_bigbrother_wer": 0.5302505437847418, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_NB_RUND_loss": 0.31470128893852234, |
|
"eval_NB_RUND_runtime": 85.2967, |
|
"eval_NB_RUND_samples_per_second": 22.017, |
|
"eval_NB_RUND_steps_per_second": 0.692, |
|
"eval_NB_RUND_wer": 0.1740949316170555, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_rundkast_loss": 0.26562032103538513, |
|
"eval_rundkast_runtime": 33.1186, |
|
"eval_rundkast_samples_per_second": 40.612, |
|
"eval_rundkast_steps_per_second": 1.298, |
|
"eval_rundkast_wer": 0.14802736239261852, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_nb_samtale_loss": 0.43801799416542053, |
|
"eval_nb_samtale_runtime": 37.7602, |
|
"eval_nb_samtale_samples_per_second": 14.115, |
|
"eval_nb_samtale_steps_per_second": 0.45, |
|
"eval_nb_samtale_wer": 0.20100911458333334, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.560679912567139, |
|
"learning_rate": 8.493139945119561e-05, |
|
"loss": 0.3425, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bigbrother_loss": 1.8266634941101074, |
|
"eval_bigbrother_runtime": 41.178, |
|
"eval_bigbrother_samples_per_second": 33.27, |
|
"eval_bigbrother_steps_per_second": 1.044, |
|
"eval_bigbrother_wer": 0.5329090469668896, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_NB_RUND_loss": 0.3031991124153137, |
|
"eval_NB_RUND_runtime": 85.2734, |
|
"eval_NB_RUND_samples_per_second": 22.023, |
|
"eval_NB_RUND_steps_per_second": 0.692, |
|
"eval_NB_RUND_wer": 0.168141592920354, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_rundkast_loss": 0.2632894814014435, |
|
"eval_rundkast_runtime": 33.1093, |
|
"eval_rundkast_samples_per_second": 40.623, |
|
"eval_rundkast_steps_per_second": 1.299, |
|
"eval_rundkast_wer": 0.14007317849188675, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_nb_samtale_loss": 0.4041052460670471, |
|
"eval_nb_samtale_runtime": 37.7437, |
|
"eval_nb_samtale_samples_per_second": 14.122, |
|
"eval_nb_samtale_steps_per_second": 0.45, |
|
"eval_nb_samtale_wer": 0.1962890625, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.754389524459839, |
|
"learning_rate": 8.235985887887103e-05, |
|
"loss": 0.3264, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bigbrother_loss": 1.930435299873352, |
|
"eval_bigbrother_runtime": 41.8539, |
|
"eval_bigbrother_samples_per_second": 32.733, |
|
"eval_bigbrother_steps_per_second": 1.027, |
|
"eval_bigbrother_wer": 0.5231612019656812, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_NB_RUND_loss": 0.3151450455188751, |
|
"eval_NB_RUND_runtime": 106.9286, |
|
"eval_NB_RUND_samples_per_second": 17.563, |
|
"eval_NB_RUND_steps_per_second": 0.552, |
|
"eval_NB_RUND_wer": 0.16790024135156878, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_rundkast_loss": 0.2726036012172699, |
|
"eval_rundkast_runtime": 37.1374, |
|
"eval_rundkast_samples_per_second": 36.217, |
|
"eval_rundkast_steps_per_second": 1.158, |
|
"eval_rundkast_wer": 0.1422208081450843, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_nb_samtale_loss": 0.422342449426651, |
|
"eval_nb_samtale_runtime": 37.6557, |
|
"eval_nb_samtale_samples_per_second": 14.155, |
|
"eval_nb_samtale_steps_per_second": 0.451, |
|
"eval_nb_samtale_wer": 0.19490559895833334, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.9258495569229126, |
|
"learning_rate": 7.978635829086632e-05, |
|
"loss": 0.3126, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bigbrother_loss": 1.8571630716323853, |
|
"eval_bigbrother_runtime": 41.3864, |
|
"eval_bigbrother_samples_per_second": 33.103, |
|
"eval_bigbrother_steps_per_second": 1.039, |
|
"eval_bigbrother_wer": 0.524611294610489, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_NB_RUND_loss": 0.3117910325527191, |
|
"eval_NB_RUND_runtime": 86.1461, |
|
"eval_NB_RUND_samples_per_second": 21.8, |
|
"eval_NB_RUND_steps_per_second": 0.685, |
|
"eval_NB_RUND_wer": 0.16287208366854383, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_rundkast_loss": 0.27052268385887146, |
|
"eval_rundkast_runtime": 33.2454, |
|
"eval_rundkast_samples_per_second": 40.457, |
|
"eval_rundkast_steps_per_second": 1.293, |
|
"eval_rundkast_wer": 0.1399140948138721, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_nb_samtale_loss": 0.41521695256233215, |
|
"eval_nb_samtale_runtime": 37.6049, |
|
"eval_nb_samtale_samples_per_second": 14.174, |
|
"eval_nb_samtale_steps_per_second": 0.452, |
|
"eval_nb_samtale_wer": 0.18636067708333334, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.801841735839844, |
|
"learning_rate": 7.721285770286163e-05, |
|
"loss": 0.3009, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bigbrother_loss": 1.9679045677185059, |
|
"eval_bigbrother_runtime": 41.3496, |
|
"eval_bigbrother_samples_per_second": 33.132, |
|
"eval_bigbrother_steps_per_second": 1.04, |
|
"eval_bigbrother_wer": 0.5192942882461935, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_NB_RUND_loss": 0.31853994727134705, |
|
"eval_NB_RUND_runtime": 85.1737, |
|
"eval_NB_RUND_samples_per_second": 22.049, |
|
"eval_NB_RUND_steps_per_second": 0.693, |
|
"eval_NB_RUND_wer": 0.16182622687047465, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_rundkast_loss": 0.2728247046470642, |
|
"eval_rundkast_runtime": 32.73, |
|
"eval_rundkast_samples_per_second": 41.094, |
|
"eval_rundkast_steps_per_second": 1.314, |
|
"eval_rundkast_wer": 0.13880050906776964, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_nb_samtale_loss": 0.4334540069103241, |
|
"eval_nb_samtale_runtime": 37.2959, |
|
"eval_nb_samtale_samples_per_second": 14.291, |
|
"eval_nb_samtale_steps_per_second": 0.456, |
|
"eval_nb_samtale_wer": 0.185302734375, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.734304428100586, |
|
"learning_rate": 7.464131713053705e-05, |
|
"loss": 0.2911, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bigbrother_loss": 2.009200096130371, |
|
"eval_bigbrother_runtime": 41.4463, |
|
"eval_bigbrother_samples_per_second": 33.055, |
|
"eval_bigbrother_steps_per_second": 1.037, |
|
"eval_bigbrother_wer": 0.5190526061387255, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_NB_RUND_loss": 0.3390868902206421, |
|
"eval_NB_RUND_runtime": 85.2442, |
|
"eval_NB_RUND_samples_per_second": 22.031, |
|
"eval_NB_RUND_steps_per_second": 0.692, |
|
"eval_NB_RUND_wer": 0.16061946902654867, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_rundkast_loss": 0.3032366931438446, |
|
"eval_rundkast_runtime": 33.1436, |
|
"eval_rundkast_samples_per_second": 40.581, |
|
"eval_rundkast_steps_per_second": 1.297, |
|
"eval_rundkast_wer": 0.1363347120585428, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_nb_samtale_loss": 0.4296092391014099, |
|
"eval_nb_samtale_runtime": 37.8422, |
|
"eval_nb_samtale_samples_per_second": 14.085, |
|
"eval_nb_samtale_steps_per_second": 0.449, |
|
"eval_nb_samtale_wer": 0.184814453125, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.582427740097046, |
|
"learning_rate": 7.206781654253235e-05, |
|
"loss": 0.2795, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bigbrother_loss": 2.00494122505188, |
|
"eval_bigbrother_runtime": 41.5443, |
|
"eval_bigbrother_samples_per_second": 32.977, |
|
"eval_bigbrother_steps_per_second": 1.035, |
|
"eval_bigbrother_wer": 0.5176025134939176, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_NB_RUND_loss": 0.3187481760978699, |
|
"eval_NB_RUND_runtime": 85.8022, |
|
"eval_NB_RUND_samples_per_second": 21.888, |
|
"eval_NB_RUND_steps_per_second": 0.688, |
|
"eval_NB_RUND_wer": 0.15901045856798068, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_rundkast_loss": 0.2752975821495056, |
|
"eval_rundkast_runtime": 33.305, |
|
"eval_rundkast_samples_per_second": 40.384, |
|
"eval_rundkast_steps_per_second": 1.291, |
|
"eval_rundkast_wer": 0.1350620426344257, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_nb_samtale_loss": 0.42811375856399536, |
|
"eval_nb_samtale_runtime": 37.713, |
|
"eval_nb_samtale_samples_per_second": 14.133, |
|
"eval_nb_samtale_steps_per_second": 0.451, |
|
"eval_nb_samtale_wer": 0.18310546875, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.836462020874023, |
|
"learning_rate": 6.94982359858879e-05, |
|
"loss": 0.2712, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bigbrother_loss": 1.9352320432662964, |
|
"eval_bigbrother_runtime": 41.182, |
|
"eval_bigbrother_samples_per_second": 33.267, |
|
"eval_bigbrother_steps_per_second": 1.044, |
|
"eval_bigbrother_wer": 0.5195359703536615, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_NB_RUND_loss": 0.305833101272583, |
|
"eval_NB_RUND_runtime": 84.7929, |
|
"eval_NB_RUND_samples_per_second": 22.148, |
|
"eval_NB_RUND_steps_per_second": 0.696, |
|
"eval_NB_RUND_wer": 0.1578037007240547, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_rundkast_loss": 0.26513412594795227, |
|
"eval_rundkast_runtime": 32.9292, |
|
"eval_rundkast_samples_per_second": 40.845, |
|
"eval_rundkast_steps_per_second": 1.306, |
|
"eval_rundkast_wer": 0.13434616608335984, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_nb_samtale_loss": 0.4084182679653168, |
|
"eval_nb_samtale_runtime": 37.2213, |
|
"eval_nb_samtale_samples_per_second": 14.32, |
|
"eval_nb_samtale_steps_per_second": 0.457, |
|
"eval_nb_samtale_wer": 0.18131510416666666, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 3.973949909210205, |
|
"learning_rate": 6.692473539788319e-05, |
|
"loss": 0.2655, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bigbrother_loss": 1.9412481784820557, |
|
"eval_bigbrother_runtime": 41.0966, |
|
"eval_bigbrother_samples_per_second": 33.336, |
|
"eval_bigbrother_steps_per_second": 1.046, |
|
"eval_bigbrother_wer": 0.5118021429146862, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_NB_RUND_loss": 0.3095969557762146, |
|
"eval_NB_RUND_runtime": 84.8936, |
|
"eval_NB_RUND_samples_per_second": 22.122, |
|
"eval_NB_RUND_steps_per_second": 0.695, |
|
"eval_NB_RUND_wer": 0.15530973451327434, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_rundkast_loss": 0.2749696373939514, |
|
"eval_rundkast_runtime": 33.0503, |
|
"eval_rundkast_samples_per_second": 40.696, |
|
"eval_rundkast_steps_per_second": 1.301, |
|
"eval_rundkast_wer": 0.1327553293032135, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_nb_samtale_loss": 0.395680695772171, |
|
"eval_nb_samtale_runtime": 37.1829, |
|
"eval_nb_samtale_samples_per_second": 14.335, |
|
"eval_nb_samtale_steps_per_second": 0.457, |
|
"eval_nb_samtale_wer": 0.17789713541666666, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.9156057834625244, |
|
"learning_rate": 6.435123480987849e-05, |
|
"loss": 0.2557, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bigbrother_loss": 1.9280781745910645, |
|
"eval_bigbrother_runtime": 42.2614, |
|
"eval_bigbrother_samples_per_second": 32.417, |
|
"eval_bigbrother_steps_per_second": 1.017, |
|
"eval_bigbrother_wer": 0.5109159751873036, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_NB_RUND_loss": 0.2862784266471863, |
|
"eval_NB_RUND_runtime": 85.5508, |
|
"eval_NB_RUND_samples_per_second": 21.952, |
|
"eval_NB_RUND_steps_per_second": 0.69, |
|
"eval_NB_RUND_wer": 0.1563958165728077, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_rundkast_loss": 0.2504226267337799, |
|
"eval_rundkast_runtime": 33.4378, |
|
"eval_rundkast_samples_per_second": 40.224, |
|
"eval_rundkast_steps_per_second": 1.286, |
|
"eval_rundkast_wer": 0.13426662424435254, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_nb_samtale_loss": 0.37682539224624634, |
|
"eval_nb_samtale_runtime": 37.3809, |
|
"eval_nb_samtale_samples_per_second": 14.259, |
|
"eval_nb_samtale_steps_per_second": 0.455, |
|
"eval_nb_samtale_wer": 0.17887369791666666, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.767539024353027, |
|
"learning_rate": 6.17796942375539e-05, |
|
"loss": 0.2497, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bigbrother_loss": 2.0177502632141113, |
|
"eval_bigbrother_runtime": 41.1529, |
|
"eval_bigbrother_samples_per_second": 33.291, |
|
"eval_bigbrother_steps_per_second": 1.045, |
|
"eval_bigbrother_wer": 0.5074518649802626, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_NB_RUND_loss": 0.32014137506484985, |
|
"eval_NB_RUND_runtime": 85.0566, |
|
"eval_NB_RUND_samples_per_second": 22.079, |
|
"eval_NB_RUND_steps_per_second": 0.694, |
|
"eval_NB_RUND_wer": 0.15522928399034594, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_rundkast_loss": 0.28833797574043274, |
|
"eval_rundkast_runtime": 33.0799, |
|
"eval_rundkast_samples_per_second": 40.659, |
|
"eval_rundkast_steps_per_second": 1.3, |
|
"eval_rundkast_wer": 0.13219853643016227, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_nb_samtale_loss": 0.40036484599113464, |
|
"eval_nb_samtale_runtime": 37.391, |
|
"eval_nb_samtale_samples_per_second": 14.255, |
|
"eval_nb_samtale_steps_per_second": 0.455, |
|
"eval_nb_samtale_wer": 0.17838541666666666, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.875094413757324, |
|
"learning_rate": 5.9206193649549204e-05, |
|
"loss": 0.2424, |
|
"step": 22321 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bigbrother_loss": 2.1849660873413086, |
|
"eval_bigbrother_runtime": 41.489, |
|
"eval_bigbrother_samples_per_second": 33.021, |
|
"eval_bigbrother_steps_per_second": 1.036, |
|
"eval_bigbrother_wer": 0.51494401031177, |
|
"step": 22321 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_NB_RUND_loss": 0.32200556993484497, |
|
"eval_NB_RUND_runtime": 85.425, |
|
"eval_NB_RUND_samples_per_second": 21.984, |
|
"eval_NB_RUND_steps_per_second": 0.691, |
|
"eval_NB_RUND_wer": 0.15289621882542237, |
|
"step": 22321 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_rundkast_loss": 0.28678199648857117, |
|
"eval_rundkast_runtime": 33.351, |
|
"eval_rundkast_samples_per_second": 40.329, |
|
"eval_rundkast_steps_per_second": 1.289, |
|
"eval_rundkast_wer": 0.13180082723512568, |
|
"step": 22321 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_nb_samtale_loss": 0.4106721878051758, |
|
"eval_nb_samtale_runtime": 37.5903, |
|
"eval_nb_samtale_samples_per_second": 14.179, |
|
"eval_nb_samtale_steps_per_second": 0.452, |
|
"eval_nb_samtale_wer": 0.17415364583333334, |
|
"step": 22321 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.949888229370117, |
|
"learning_rate": 5.66326930615445e-05, |
|
"loss": 0.2374, |
|
"step": 23634 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bigbrother_loss": 2.1056079864501953, |
|
"eval_bigbrother_runtime": 41.6333, |
|
"eval_bigbrother_samples_per_second": 32.906, |
|
"eval_bigbrother_steps_per_second": 1.033, |
|
"eval_bigbrother_wer": 0.5105937323773463, |
|
"step": 23634 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_NB_RUND_loss": 0.32035475969314575, |
|
"eval_NB_RUND_runtime": 87.7221, |
|
"eval_NB_RUND_samples_per_second": 21.409, |
|
"eval_NB_RUND_steps_per_second": 0.673, |
|
"eval_NB_RUND_wer": 0.15329847144006437, |
|
"step": 23634 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_rundkast_loss": 0.29013389348983765, |
|
"eval_rundkast_runtime": 33.2901, |
|
"eval_rundkast_samples_per_second": 40.402, |
|
"eval_rundkast_steps_per_second": 1.292, |
|
"eval_rundkast_wer": 0.13005090677696468, |
|
"step": 23634 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_nb_samtale_loss": 0.39669641852378845, |
|
"eval_nb_samtale_runtime": 38.5507, |
|
"eval_nb_samtale_samples_per_second": 13.826, |
|
"eval_nb_samtale_steps_per_second": 0.441, |
|
"eval_nb_samtale_wer": 0.17724609375, |
|
"step": 23634 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 5.62293004989624, |
|
"learning_rate": 5.4063112504900036e-05, |
|
"loss": 0.2313, |
|
"step": 24947 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bigbrother_loss": 2.0347869396209717, |
|
"eval_bigbrother_runtime": 41.8028, |
|
"eval_bigbrother_samples_per_second": 32.773, |
|
"eval_bigbrother_steps_per_second": 1.029, |
|
"eval_bigbrother_wer": 0.503584951260775, |
|
"step": 24947 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_NB_RUND_loss": 0.3127482533454895, |
|
"eval_NB_RUND_runtime": 85.6096, |
|
"eval_NB_RUND_samples_per_second": 21.937, |
|
"eval_NB_RUND_steps_per_second": 0.689, |
|
"eval_NB_RUND_wer": 0.15100563153660498, |
|
"step": 24947 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_rundkast_loss": 0.2715360224246979, |
|
"eval_rundkast_runtime": 33.3269, |
|
"eval_rundkast_samples_per_second": 40.358, |
|
"eval_rundkast_steps_per_second": 1.29, |
|
"eval_rundkast_wer": 0.1292554883868915, |
|
"step": 24947 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_nb_samtale_loss": 0.4169124960899353, |
|
"eval_nb_samtale_runtime": 37.555, |
|
"eval_nb_samtale_samples_per_second": 14.193, |
|
"eval_nb_samtale_steps_per_second": 0.453, |
|
"eval_nb_samtale_wer": 0.17350260416666666, |
|
"step": 24947 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.86464262008667, |
|
"learning_rate": 5.148961191689534e-05, |
|
"loss": 0.2227, |
|
"step": 26260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bigbrother_loss": 2.1283321380615234, |
|
"eval_bigbrother_runtime": 41.6851, |
|
"eval_bigbrother_samples_per_second": 32.865, |
|
"eval_bigbrother_steps_per_second": 1.032, |
|
"eval_bigbrother_wer": 0.5016514944010312, |
|
"step": 26260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_NB_RUND_loss": 0.3282929062843323, |
|
"eval_NB_RUND_runtime": 86.0672, |
|
"eval_NB_RUND_samples_per_second": 21.82, |
|
"eval_NB_RUND_steps_per_second": 0.686, |
|
"eval_NB_RUND_wer": 0.15160901045856798, |
|
"step": 26260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_rundkast_loss": 0.2934817969799042, |
|
"eval_rundkast_runtime": 33.1568, |
|
"eval_rundkast_samples_per_second": 40.565, |
|
"eval_rundkast_steps_per_second": 1.297, |
|
"eval_rundkast_wer": 0.1306076996500159, |
|
"step": 26260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_nb_samtale_loss": 0.41641756892204285, |
|
"eval_nb_samtale_runtime": 37.7201, |
|
"eval_nb_samtale_samples_per_second": 14.13, |
|
"eval_nb_samtale_steps_per_second": 0.451, |
|
"eval_nb_samtale_wer": 0.17301432291666666, |
|
"step": 26260 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 3.0193545818328857, |
|
"learning_rate": 4.8916111328890636e-05, |
|
"loss": 0.2214, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bigbrother_loss": 2.2207038402557373, |
|
"eval_bigbrother_runtime": 41.5705, |
|
"eval_bigbrother_samples_per_second": 32.956, |
|
"eval_bigbrother_steps_per_second": 1.034, |
|
"eval_bigbrother_wer": 0.50656569725288, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_NB_RUND_loss": 0.32080039381980896, |
|
"eval_NB_RUND_runtime": 85.8822, |
|
"eval_NB_RUND_samples_per_second": 21.867, |
|
"eval_NB_RUND_steps_per_second": 0.687, |
|
"eval_NB_RUND_wer": 0.1498793242156074, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_rundkast_loss": 0.28392452001571655, |
|
"eval_rundkast_runtime": 33.3668, |
|
"eval_rundkast_samples_per_second": 40.31, |
|
"eval_rundkast_steps_per_second": 1.289, |
|
"eval_rundkast_wer": 0.1287782373528476, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_nb_samtale_loss": 0.4135919511318207, |
|
"eval_nb_samtale_runtime": 37.8061, |
|
"eval_nb_samtale_samples_per_second": 14.098, |
|
"eval_nb_samtale_steps_per_second": 0.45, |
|
"eval_nb_samtale_wer": 0.17097981770833334, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 3.5664854049682617, |
|
"learning_rate": 4.6344570756566055e-05, |
|
"loss": 0.2162, |
|
"step": 28886 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bigbrother_loss": 2.1843111515045166, |
|
"eval_bigbrother_runtime": 40.759, |
|
"eval_bigbrother_samples_per_second": 33.612, |
|
"eval_bigbrother_steps_per_second": 1.055, |
|
"eval_bigbrother_wer": 0.5060823330379441, |
|
"step": 28886 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_NB_RUND_loss": 0.33185243606567383, |
|
"eval_NB_RUND_runtime": 84.3565, |
|
"eval_NB_RUND_samples_per_second": 22.263, |
|
"eval_NB_RUND_steps_per_second": 0.699, |
|
"eval_NB_RUND_wer": 0.1508447304907482, |
|
"step": 28886 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_rundkast_loss": 0.2995389699935913, |
|
"eval_rundkast_runtime": 32.6931, |
|
"eval_rundkast_samples_per_second": 41.14, |
|
"eval_rundkast_steps_per_second": 1.315, |
|
"eval_rundkast_wer": 0.12869869551384028, |
|
"step": 28886 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_nb_samtale_loss": 0.4128870964050293, |
|
"eval_nb_samtale_runtime": 36.8088, |
|
"eval_nb_samtale_samples_per_second": 14.48, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.173828125, |
|
"step": 28886 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 4.249322414398193, |
|
"learning_rate": 4.37749901999216e-05, |
|
"loss": 0.2125, |
|
"step": 30199 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bigbrother_loss": 2.460817575454712, |
|
"eval_bigbrother_runtime": 40.6315, |
|
"eval_bigbrother_samples_per_second": 33.718, |
|
"eval_bigbrother_steps_per_second": 1.058, |
|
"eval_bigbrother_wer": 0.5047128010956256, |
|
"step": 30199 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_NB_RUND_loss": 0.34763312339782715, |
|
"eval_NB_RUND_runtime": 84.2518, |
|
"eval_NB_RUND_samples_per_second": 22.29, |
|
"eval_NB_RUND_steps_per_second": 0.7, |
|
"eval_NB_RUND_wer": 0.14774738535800483, |
|
"step": 30199 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_rundkast_loss": 0.3159354031085968, |
|
"eval_rundkast_runtime": 32.702, |
|
"eval_rundkast_samples_per_second": 41.129, |
|
"eval_rundkast_steps_per_second": 1.315, |
|
"eval_rundkast_wer": 0.12758510976773782, |
|
"step": 30199 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_nb_samtale_loss": 0.42722073197364807, |
|
"eval_nb_samtale_runtime": 36.8013, |
|
"eval_nb_samtale_samples_per_second": 14.483, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.16796875, |
|
"step": 30199 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 3.8950681686401367, |
|
"learning_rate": 4.12014896119169e-05, |
|
"loss": 0.2054, |
|
"step": 31512 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bigbrother_loss": 2.2196638584136963, |
|
"eval_bigbrother_runtime": 41.3959, |
|
"eval_bigbrother_samples_per_second": 33.095, |
|
"eval_bigbrother_steps_per_second": 1.039, |
|
"eval_bigbrother_wer": 0.4956900024168211, |
|
"step": 31512 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_NB_RUND_loss": 0.31967854499816895, |
|
"eval_NB_RUND_runtime": 85.4915, |
|
"eval_NB_RUND_samples_per_second": 21.967, |
|
"eval_NB_RUND_steps_per_second": 0.69, |
|
"eval_NB_RUND_wer": 0.14903459372485922, |
|
"step": 31512 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_rundkast_loss": 0.28724637627601624, |
|
"eval_rundkast_runtime": 32.6772, |
|
"eval_rundkast_samples_per_second": 41.16, |
|
"eval_rundkast_steps_per_second": 1.316, |
|
"eval_rundkast_wer": 0.12726694241170855, |
|
"step": 31512 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_nb_samtale_loss": 0.401165634393692, |
|
"eval_nb_samtale_runtime": 37.0545, |
|
"eval_nb_samtale_samples_per_second": 14.384, |
|
"eval_nb_samtale_steps_per_second": 0.459, |
|
"eval_nb_samtale_wer": 0.17106119791666666, |
|
"step": 31512 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 3.483825922012329, |
|
"learning_rate": 3.8627989023912194e-05, |
|
"loss": 0.2016, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bigbrother_loss": 2.3513925075531006, |
|
"eval_bigbrother_runtime": 40.9522, |
|
"eval_bigbrother_samples_per_second": 33.454, |
|
"eval_bigbrother_steps_per_second": 1.05, |
|
"eval_bigbrother_wer": 0.5014098122935632, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_NB_RUND_loss": 0.3408574163913727, |
|
"eval_NB_RUND_runtime": 84.79, |
|
"eval_NB_RUND_samples_per_second": 22.149, |
|
"eval_NB_RUND_steps_per_second": 0.696, |
|
"eval_NB_RUND_wer": 0.14714400643604184, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_rundkast_loss": 0.3113822042942047, |
|
"eval_rundkast_runtime": 33.2613, |
|
"eval_rundkast_samples_per_second": 40.437, |
|
"eval_rundkast_steps_per_second": 1.293, |
|
"eval_rundkast_wer": 0.12599427298759147, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_nb_samtale_loss": 0.41482433676719666, |
|
"eval_nb_samtale_runtime": 36.9383, |
|
"eval_nb_samtale_samples_per_second": 14.429, |
|
"eval_nb_samtale_steps_per_second": 0.46, |
|
"eval_nb_samtale_wer": 0.16861979166666666, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 5.882791042327881, |
|
"learning_rate": 3.6054488435907494e-05, |
|
"loss": 0.1986, |
|
"step": 34138 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bigbrother_loss": 2.2447853088378906, |
|
"eval_bigbrother_runtime": 41.1993, |
|
"eval_bigbrother_samples_per_second": 33.253, |
|
"eval_bigbrother_steps_per_second": 1.044, |
|
"eval_bigbrother_wer": 0.4957705631193104, |
|
"step": 34138 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_NB_RUND_loss": 0.3391417860984802, |
|
"eval_NB_RUND_runtime": 86.9591, |
|
"eval_NB_RUND_samples_per_second": 21.596, |
|
"eval_NB_RUND_steps_per_second": 0.678, |
|
"eval_NB_RUND_wer": 0.14734513274336283, |
|
"step": 34138 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_rundkast_loss": 0.3099968433380127, |
|
"eval_rundkast_runtime": 32.6851, |
|
"eval_rundkast_samples_per_second": 41.15, |
|
"eval_rundkast_steps_per_second": 1.316, |
|
"eval_rundkast_wer": 0.12575564747056953, |
|
"step": 34138 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_nb_samtale_loss": 0.41232603788375854, |
|
"eval_nb_samtale_runtime": 37.0492, |
|
"eval_nb_samtale_samples_per_second": 14.386, |
|
"eval_nb_samtale_steps_per_second": 0.459, |
|
"eval_nb_samtale_wer": 0.16951497395833334, |
|
"step": 34138 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 2.837421178817749, |
|
"learning_rate": 3.348098784790279e-05, |
|
"loss": 0.1953, |
|
"step": 35451 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bigbrother_loss": 2.240429401397705, |
|
"eval_bigbrother_runtime": 41.126, |
|
"eval_bigbrother_samples_per_second": 33.312, |
|
"eval_bigbrother_steps_per_second": 1.046, |
|
"eval_bigbrother_wer": 0.49552888101184245, |
|
"step": 35451 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_NB_RUND_loss": 0.3420700132846832, |
|
"eval_NB_RUND_runtime": 84.1024, |
|
"eval_NB_RUND_samples_per_second": 22.33, |
|
"eval_NB_RUND_steps_per_second": 0.702, |
|
"eval_NB_RUND_wer": 0.14738535800482702, |
|
"step": 35451 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_rundkast_loss": 0.3145124018192291, |
|
"eval_rundkast_runtime": 32.9045, |
|
"eval_rundkast_samples_per_second": 40.876, |
|
"eval_rundkast_steps_per_second": 1.307, |
|
"eval_rundkast_wer": 0.12591473114858415, |
|
"step": 35451 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_nb_samtale_loss": 0.4117932617664337, |
|
"eval_nb_samtale_runtime": 36.8227, |
|
"eval_nb_samtale_samples_per_second": 14.475, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.16943359375, |
|
"step": 35451 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 3.3868250846862793, |
|
"learning_rate": 3.0909447275578206e-05, |
|
"loss": 0.1939, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bigbrother_loss": 2.409648895263672, |
|
"eval_bigbrother_runtime": 41.1082, |
|
"eval_bigbrother_samples_per_second": 33.327, |
|
"eval_bigbrother_steps_per_second": 1.046, |
|
"eval_bigbrother_wer": 0.49536775960686374, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_NB_RUND_loss": 0.34609299898147583, |
|
"eval_NB_RUND_runtime": 86.3212, |
|
"eval_NB_RUND_samples_per_second": 21.756, |
|
"eval_NB_RUND_steps_per_second": 0.683, |
|
"eval_NB_RUND_wer": 0.14569589702333066, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_rundkast_loss": 0.31334388256073, |
|
"eval_rundkast_runtime": 32.8372, |
|
"eval_rundkast_samples_per_second": 40.96, |
|
"eval_rundkast_steps_per_second": 1.309, |
|
"eval_rundkast_wer": 0.12368755965637926, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_nb_samtale_loss": 0.42863455414772034, |
|
"eval_nb_samtale_runtime": 37.033, |
|
"eval_nb_samtale_samples_per_second": 14.393, |
|
"eval_nb_samtale_steps_per_second": 0.459, |
|
"eval_nb_samtale_wer": 0.16829427083333334, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 3.9038360118865967, |
|
"learning_rate": 2.8337906703253626e-05, |
|
"loss": 0.1883, |
|
"step": 38077 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bigbrother_loss": 2.32140851020813, |
|
"eval_bigbrother_runtime": 40.9774, |
|
"eval_bigbrother_samples_per_second": 33.433, |
|
"eval_bigbrother_steps_per_second": 1.049, |
|
"eval_bigbrother_wer": 0.4960122452267784, |
|
"step": 38077 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_NB_RUND_loss": 0.35102906823158264, |
|
"eval_NB_RUND_runtime": 84.5748, |
|
"eval_NB_RUND_samples_per_second": 22.205, |
|
"eval_NB_RUND_steps_per_second": 0.698, |
|
"eval_NB_RUND_wer": 0.14646017699115044, |
|
"step": 38077 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_rundkast_loss": 0.31922492384910583, |
|
"eval_rundkast_runtime": 32.7767, |
|
"eval_rundkast_samples_per_second": 41.035, |
|
"eval_rundkast_steps_per_second": 1.312, |
|
"eval_rundkast_wer": 0.12496022908049634, |
|
"step": 38077 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_nb_samtale_loss": 0.4308791756629944, |
|
"eval_nb_samtale_runtime": 36.5599, |
|
"eval_nb_samtale_samples_per_second": 14.579, |
|
"eval_nb_samtale_steps_per_second": 0.465, |
|
"eval_nb_samtale_wer": 0.167724609375, |
|
"step": 38077 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.5224320888519287, |
|
"learning_rate": 2.5764406115248922e-05, |
|
"loss": 0.1857, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bigbrother_loss": 2.3643717765808105, |
|
"eval_bigbrother_runtime": 41.8962, |
|
"eval_bigbrother_samples_per_second": 32.7, |
|
"eval_bigbrother_steps_per_second": 1.026, |
|
"eval_bigbrother_wer": 0.496415048739225, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_NB_RUND_loss": 0.3557446002960205, |
|
"eval_NB_RUND_runtime": 143.0815, |
|
"eval_NB_RUND_samples_per_second": 13.125, |
|
"eval_NB_RUND_steps_per_second": 0.412, |
|
"eval_NB_RUND_wer": 0.14617860016090103, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_rundkast_loss": 0.3290172815322876, |
|
"eval_rundkast_runtime": 32.9137, |
|
"eval_rundkast_samples_per_second": 40.864, |
|
"eval_rundkast_steps_per_second": 1.306, |
|
"eval_rundkast_wer": 0.12623289850461342, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_nb_samtale_loss": 0.42328643798828125, |
|
"eval_nb_samtale_runtime": 36.9569, |
|
"eval_nb_samtale_samples_per_second": 14.422, |
|
"eval_nb_samtale_steps_per_second": 0.46, |
|
"eval_nb_samtale_wer": 0.16715494791666666, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 8.252937316894531, |
|
"learning_rate": 2.319090552724422e-05, |
|
"loss": 0.184, |
|
"step": 40703 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bigbrother_loss": 2.3923449516296387, |
|
"eval_bigbrother_runtime": 41.1367, |
|
"eval_bigbrother_samples_per_second": 33.304, |
|
"eval_bigbrother_steps_per_second": 1.045, |
|
"eval_bigbrother_wer": 0.4919036493998228, |
|
"step": 40703 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_NB_RUND_loss": 0.35123324394226074, |
|
"eval_NB_RUND_runtime": 85.5059, |
|
"eval_NB_RUND_samples_per_second": 21.963, |
|
"eval_NB_RUND_steps_per_second": 0.69, |
|
"eval_NB_RUND_wer": 0.14473049074818986, |
|
"step": 40703 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_rundkast_loss": 0.31798994541168213, |
|
"eval_rundkast_runtime": 33.3879, |
|
"eval_rundkast_samples_per_second": 40.284, |
|
"eval_rundkast_steps_per_second": 1.288, |
|
"eval_rundkast_wer": 0.1243238943684378, |
|
"step": 40703 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_nb_samtale_loss": 0.4355938732624054, |
|
"eval_nb_samtale_runtime": 37.0847, |
|
"eval_nb_samtale_samples_per_second": 14.372, |
|
"eval_nb_samtale_steps_per_second": 0.458, |
|
"eval_nb_samtale_wer": 0.16536458333333334, |
|
"step": 40703 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 4.856067657470703, |
|
"learning_rate": 2.061936495491964e-05, |
|
"loss": 0.1795, |
|
"step": 42016 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bigbrother_loss": 2.3352065086364746, |
|
"eval_bigbrother_runtime": 41.1475, |
|
"eval_bigbrother_samples_per_second": 33.295, |
|
"eval_bigbrother_steps_per_second": 1.045, |
|
"eval_bigbrother_wer": 0.4930314992346733, |
|
"step": 42016 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_NB_RUND_loss": 0.3572410047054291, |
|
"eval_NB_RUND_runtime": 105.2885, |
|
"eval_NB_RUND_samples_per_second": 17.837, |
|
"eval_NB_RUND_steps_per_second": 0.56, |
|
"eval_NB_RUND_wer": 0.14477071600965405, |
|
"step": 42016 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_rundkast_loss": 0.32713791728019714, |
|
"eval_rundkast_runtime": 33.0242, |
|
"eval_rundkast_samples_per_second": 40.728, |
|
"eval_rundkast_steps_per_second": 1.302, |
|
"eval_rundkast_wer": 0.1243238943684378, |
|
"step": 42016 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_nb_samtale_loss": 0.43330347537994385, |
|
"eval_nb_samtale_runtime": 36.808, |
|
"eval_nb_samtale_samples_per_second": 14.481, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.16560872395833334, |
|
"step": 42016 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 5.330729961395264, |
|
"learning_rate": 1.8045864366914934e-05, |
|
"loss": 0.1797, |
|
"step": 43329 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bigbrother_loss": 2.4726736545562744, |
|
"eval_bigbrother_runtime": 41.9313, |
|
"eval_bigbrother_samples_per_second": 32.672, |
|
"eval_bigbrother_steps_per_second": 1.025, |
|
"eval_bigbrother_wer": 0.4939982276645452, |
|
"step": 43329 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_NB_RUND_loss": 0.36341410875320435, |
|
"eval_NB_RUND_runtime": 84.523, |
|
"eval_NB_RUND_samples_per_second": 22.219, |
|
"eval_NB_RUND_steps_per_second": 0.698, |
|
"eval_NB_RUND_wer": 0.14710378117457765, |
|
"step": 43329 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_rundkast_loss": 0.3344702422618866, |
|
"eval_rundkast_runtime": 32.8493, |
|
"eval_rundkast_samples_per_second": 40.945, |
|
"eval_rundkast_steps_per_second": 1.309, |
|
"eval_rundkast_wer": 0.12750556792873052, |
|
"step": 43329 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_nb_samtale_loss": 0.43638789653778076, |
|
"eval_nb_samtale_runtime": 36.859, |
|
"eval_nb_samtale_samples_per_second": 14.46, |
|
"eval_nb_samtale_steps_per_second": 0.461, |
|
"eval_nb_samtale_wer": 0.16756184895833334, |
|
"step": 43329 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 2.472703218460083, |
|
"learning_rate": 1.5474323794590357e-05, |
|
"loss": 0.1792, |
|
"step": 44642 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bigbrother_loss": 2.369781255722046, |
|
"eval_bigbrother_runtime": 41.5232, |
|
"eval_bigbrother_samples_per_second": 32.994, |
|
"eval_bigbrother_steps_per_second": 1.036, |
|
"eval_bigbrother_wer": 0.4910980423749295, |
|
"step": 44642 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_NB_RUND_loss": 0.3522418141365051, |
|
"eval_NB_RUND_runtime": 86.176, |
|
"eval_NB_RUND_samples_per_second": 21.793, |
|
"eval_NB_RUND_steps_per_second": 0.685, |
|
"eval_NB_RUND_wer": 0.14509251810136767, |
|
"step": 44642 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_rundkast_loss": 0.3241034746170044, |
|
"eval_rundkast_runtime": 32.7879, |
|
"eval_rundkast_samples_per_second": 41.021, |
|
"eval_rundkast_steps_per_second": 1.311, |
|
"eval_rundkast_wer": 0.12360801781737193, |
|
"step": 44642 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_nb_samtale_loss": 0.42330336570739746, |
|
"eval_nb_samtale_runtime": 37.3979, |
|
"eval_nb_samtale_samples_per_second": 14.252, |
|
"eval_nb_samtale_steps_per_second": 0.455, |
|
"eval_nb_samtale_wer": 0.16707356770833334, |
|
"step": 44642 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 5.900428295135498, |
|
"learning_rate": 1.2900823206585652e-05, |
|
"loss": 0.175, |
|
"step": 45955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bigbrother_loss": 2.3766987323760986, |
|
"eval_bigbrother_runtime": 42.6165, |
|
"eval_bigbrother_samples_per_second": 32.147, |
|
"eval_bigbrother_steps_per_second": 1.009, |
|
"eval_bigbrother_wer": 0.48900346411020706, |
|
"step": 45955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_NB_RUND_loss": 0.359698623418808, |
|
"eval_NB_RUND_runtime": 84.4668, |
|
"eval_NB_RUND_samples_per_second": 22.234, |
|
"eval_NB_RUND_steps_per_second": 0.698, |
|
"eval_NB_RUND_wer": 0.14400643604183427, |
|
"step": 45955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_rundkast_loss": 0.32820314168930054, |
|
"eval_rundkast_runtime": 33.145, |
|
"eval_rundkast_samples_per_second": 40.579, |
|
"eval_rundkast_steps_per_second": 1.297, |
|
"eval_rundkast_wer": 0.12313076678332803, |
|
"step": 45955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_nb_samtale_loss": 0.438490629196167, |
|
"eval_nb_samtale_runtime": 36.783, |
|
"eval_nb_samtale_samples_per_second": 14.49, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.16552734375, |
|
"step": 45955 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 9.065438270568848, |
|
"learning_rate": 1.0329282634261075e-05, |
|
"loss": 0.1735, |
|
"step": 47268 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bigbrother_loss": 2.3984920978546143, |
|
"eval_bigbrother_runtime": 41.0024, |
|
"eval_bigbrother_samples_per_second": 33.413, |
|
"eval_bigbrother_steps_per_second": 1.049, |
|
"eval_bigbrother_wer": 0.4888423427052284, |
|
"step": 47268 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_NB_RUND_loss": 0.3673810064792633, |
|
"eval_NB_RUND_runtime": 85.7426, |
|
"eval_NB_RUND_samples_per_second": 21.903, |
|
"eval_NB_RUND_steps_per_second": 0.688, |
|
"eval_NB_RUND_wer": 0.14384553499597746, |
|
"step": 47268 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_rundkast_loss": 0.3365083932876587, |
|
"eval_rundkast_runtime": 32.9335, |
|
"eval_rundkast_samples_per_second": 40.84, |
|
"eval_rundkast_steps_per_second": 1.306, |
|
"eval_rundkast_wer": 0.12336939230034999, |
|
"step": 47268 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_nb_samtale_loss": 0.44506001472473145, |
|
"eval_nb_samtale_runtime": 36.8094, |
|
"eval_nb_samtale_samples_per_second": 14.48, |
|
"eval_nb_samtale_steps_per_second": 0.462, |
|
"eval_nb_samtale_wer": 0.16463216145833334, |
|
"step": 47268 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 18.192665100097656, |
|
"learning_rate": 7.757742061936496e-06, |
|
"loss": 0.1731, |
|
"step": 48581 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bigbrother_loss": 2.399851083755493, |
|
"eval_bigbrother_runtime": 41.7999, |
|
"eval_bigbrother_samples_per_second": 32.775, |
|
"eval_bigbrother_steps_per_second": 1.029, |
|
"eval_bigbrother_wer": 0.49117860307741884, |
|
"step": 48581 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_NB_RUND_loss": 0.3626614809036255, |
|
"eval_NB_RUND_runtime": 84.556, |
|
"eval_NB_RUND_samples_per_second": 22.21, |
|
"eval_NB_RUND_steps_per_second": 0.698, |
|
"eval_NB_RUND_wer": 0.14368463395012068, |
|
"step": 48581 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_rundkast_loss": 0.33268144726753235, |
|
"eval_rundkast_runtime": 33.4742, |
|
"eval_rundkast_samples_per_second": 40.18, |
|
"eval_rundkast_steps_per_second": 1.285, |
|
"eval_rundkast_wer": 0.12289214126630607, |
|
"step": 48581 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_nb_samtale_loss": 0.4378024637699127, |
|
"eval_nb_samtale_runtime": 36.9248, |
|
"eval_nb_samtale_samples_per_second": 14.435, |
|
"eval_nb_samtale_steps_per_second": 0.46, |
|
"eval_nb_samtale_wer": 0.164794921875, |
|
"step": 48581 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 52520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 2.707743528931453e+20, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|