|
{ |
|
"best_metric": 0.017104836092177864, |
|
"best_model_checkpoint": "./whisper-large-finetuned/checkpoint-1000", |
|
"epoch": 1.7428073234524848, |
|
"eval_steps": 125, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008718395815170008, |
|
"grad_norm": 934.4418334960938, |
|
"learning_rate": 2.0833333333333335e-08, |
|
"loss": 31.0573, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017436791630340016, |
|
"grad_norm": 941.2315063476562, |
|
"learning_rate": 1.2500000000000002e-07, |
|
"loss": 30.6931, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026155187445510025, |
|
"grad_norm": 905.4633178710938, |
|
"learning_rate": 2.291666666666667e-07, |
|
"loss": 30.2051, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03487358326068003, |
|
"grad_norm": 606.8495483398438, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 27.5929, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.043591979075850044, |
|
"grad_norm": 322.0547180175781, |
|
"learning_rate": 4.3750000000000005e-07, |
|
"loss": 22.3102, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05231037489102005, |
|
"grad_norm": 289.5645751953125, |
|
"learning_rate": 5.416666666666667e-07, |
|
"loss": 16.8524, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06102877070619006, |
|
"grad_norm": 180.1189422607422, |
|
"learning_rate": 6.458333333333333e-07, |
|
"loss": 13.885, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06974716652136007, |
|
"grad_norm": 136.20899963378906, |
|
"learning_rate": 7.5e-07, |
|
"loss": 10.6818, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07846556233653008, |
|
"grad_norm": 112.72056579589844, |
|
"learning_rate": 8.541666666666666e-07, |
|
"loss": 8.517, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08718395815170009, |
|
"grad_norm": 92.7685546875, |
|
"learning_rate": 9.583333333333334e-07, |
|
"loss": 6.7615, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0959023539668701, |
|
"grad_norm": 82.76348876953125, |
|
"learning_rate": 1.0625000000000002e-06, |
|
"loss": 5.2322, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1046207497820401, |
|
"grad_norm": 62.46050262451172, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 4.2162, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11333914559721012, |
|
"grad_norm": 65.8958969116211, |
|
"learning_rate": 1.2708333333333334e-06, |
|
"loss": 3.2443, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12205754141238012, |
|
"grad_norm": 40.446800231933594, |
|
"learning_rate": 1.3750000000000002e-06, |
|
"loss": 2.6679, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13077593722755013, |
|
"grad_norm": 29.84249496459961, |
|
"learning_rate": 1.4791666666666668e-06, |
|
"loss": 2.2718, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13949433304272013, |
|
"grad_norm": 34.509098052978516, |
|
"learning_rate": 1.5833333333333336e-06, |
|
"loss": 1.7758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14821272885789014, |
|
"grad_norm": 38.756187438964844, |
|
"learning_rate": 1.6875000000000001e-06, |
|
"loss": 1.6045, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15693112467306017, |
|
"grad_norm": 37.90249252319336, |
|
"learning_rate": 1.7916666666666667e-06, |
|
"loss": 1.4181, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16564952048823017, |
|
"grad_norm": 62.175453186035156, |
|
"learning_rate": 1.8958333333333335e-06, |
|
"loss": 1.3154, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17436791630340018, |
|
"grad_norm": 21.30702781677246, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.1437, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18308631211857018, |
|
"grad_norm": 40.73722839355469, |
|
"learning_rate": 2.1041666666666667e-06, |
|
"loss": 1.0276, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1918047079337402, |
|
"grad_norm": 39.23003005981445, |
|
"learning_rate": 2.2083333333333335e-06, |
|
"loss": 0.9993, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2005231037489102, |
|
"grad_norm": 31.393991470336914, |
|
"learning_rate": 2.3125e-06, |
|
"loss": 0.9329, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2092414995640802, |
|
"grad_norm": 33.02350997924805, |
|
"learning_rate": 2.4166666666666667e-06, |
|
"loss": 0.8536, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21795989537925023, |
|
"grad_norm": 28.391830444335938, |
|
"learning_rate": 2.5208333333333335e-06, |
|
"loss": 0.7985, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21795989537925023, |
|
"eval_cer": 0.0605322947095099, |
|
"eval_loss": 0.10038114339113235, |
|
"eval_runtime": 211.5139, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.222, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.22667829119442023, |
|
"grad_norm": 29.52627182006836, |
|
"learning_rate": 2.625e-06, |
|
"loss": 0.7896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23539668700959024, |
|
"grad_norm": 25.15435028076172, |
|
"learning_rate": 2.7291666666666667e-06, |
|
"loss": 0.7167, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.24411508282476024, |
|
"grad_norm": 27.16618537902832, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.6817, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2528334786399303, |
|
"grad_norm": 29.221731185913086, |
|
"learning_rate": 2.9375e-06, |
|
"loss": 0.7135, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.26155187445510025, |
|
"grad_norm": 40.65930938720703, |
|
"learning_rate": 3.041666666666667e-06, |
|
"loss": 0.643, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 36.81560134887695, |
|
"learning_rate": 3.1458333333333334e-06, |
|
"loss": 0.6227, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.27898866608544026, |
|
"grad_norm": 27.88620376586914, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.6461, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2877070619006103, |
|
"grad_norm": 35.246299743652344, |
|
"learning_rate": 3.3541666666666666e-06, |
|
"loss": 0.602, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.29642545771578027, |
|
"grad_norm": 21.056604385375977, |
|
"learning_rate": 3.4583333333333334e-06, |
|
"loss": 0.5815, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3051438535309503, |
|
"grad_norm": 32.8330078125, |
|
"learning_rate": 3.5624999999999998e-06, |
|
"loss": 0.5754, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.31386224934612034, |
|
"grad_norm": 18.76894187927246, |
|
"learning_rate": 3.666666666666667e-06, |
|
"loss": 0.5358, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 22.874624252319336, |
|
"learning_rate": 3.770833333333334e-06, |
|
"loss": 0.5767, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.33129904097646035, |
|
"grad_norm": 31.20766830444336, |
|
"learning_rate": 3.875e-06, |
|
"loss": 0.5296, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3400174367916303, |
|
"grad_norm": 15.421257972717285, |
|
"learning_rate": 3.979166666666667e-06, |
|
"loss": 0.5882, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.34873583260680036, |
|
"grad_norm": 28.616559982299805, |
|
"learning_rate": 4.083333333333334e-06, |
|
"loss": 0.5434, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35745422842197033, |
|
"grad_norm": 16.531875610351562, |
|
"learning_rate": 4.1875e-06, |
|
"loss": 0.5753, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.36617262423714037, |
|
"grad_norm": 22.337430953979492, |
|
"learning_rate": 4.2916666666666665e-06, |
|
"loss": 0.5279, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3748910200523104, |
|
"grad_norm": 23.263813018798828, |
|
"learning_rate": 4.395833333333334e-06, |
|
"loss": 0.4958, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3836094158674804, |
|
"grad_norm": 17.456787109375, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.5024, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3923278116826504, |
|
"grad_norm": 29.84246063232422, |
|
"learning_rate": 4.604166666666667e-06, |
|
"loss": 0.5349, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4010462074978204, |
|
"grad_norm": 21.097015380859375, |
|
"learning_rate": 4.708333333333334e-06, |
|
"loss": 0.4826, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4097646033129904, |
|
"grad_norm": 14.620965957641602, |
|
"learning_rate": 4.8125e-06, |
|
"loss": 0.5414, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4184829991281604, |
|
"grad_norm": 22.258108139038086, |
|
"learning_rate": 4.9166666666666665e-06, |
|
"loss": 0.4802, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4272013949433304, |
|
"grad_norm": 16.524280548095703, |
|
"learning_rate": 5.020833333333334e-06, |
|
"loss": 0.478, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.43591979075850046, |
|
"grad_norm": 19.307289123535156, |
|
"learning_rate": 5.125e-06, |
|
"loss": 0.4407, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43591979075850046, |
|
"eval_cer": 0.03700097370983447, |
|
"eval_loss": 0.05736248940229416, |
|
"eval_runtime": 212.9084, |
|
"eval_samples_per_second": 1.743, |
|
"eval_steps_per_second": 0.221, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.44463818657367044, |
|
"grad_norm": 11.922791481018066, |
|
"learning_rate": 5.229166666666667e-06, |
|
"loss": 0.458, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.45335658238884047, |
|
"grad_norm": 12.10986614227295, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.4751, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.46207497820401044, |
|
"grad_norm": 28.33596420288086, |
|
"learning_rate": 5.4375e-06, |
|
"loss": 0.416, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4707933740191805, |
|
"grad_norm": 27.863170623779297, |
|
"learning_rate": 5.541666666666667e-06, |
|
"loss": 0.4643, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.47951176983435045, |
|
"grad_norm": 18.146421432495117, |
|
"learning_rate": 5.645833333333334e-06, |
|
"loss": 0.4753, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4882301656495205, |
|
"grad_norm": 11.3834867477417, |
|
"learning_rate": 5.750000000000001e-06, |
|
"loss": 0.3643, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4969485614646905, |
|
"grad_norm": 32.558372497558594, |
|
"learning_rate": 5.854166666666667e-06, |
|
"loss": 0.4635, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5056669572798606, |
|
"grad_norm": 16.592920303344727, |
|
"learning_rate": 5.958333333333334e-06, |
|
"loss": 0.398, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5143853530950305, |
|
"grad_norm": 17.8409423828125, |
|
"learning_rate": 6.0625e-06, |
|
"loss": 0.4353, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5231037489102005, |
|
"grad_norm": 12.89478588104248, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.3784, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5318221447253705, |
|
"grad_norm": 18.778263092041016, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3828, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 16.030410766601562, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.41, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5492589363557105, |
|
"grad_norm": 28.651365280151367, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.4401, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5579773321708805, |
|
"grad_norm": 24.014606475830078, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3942, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5666957279860506, |
|
"grad_norm": 23.663330078125, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.4192, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5754141238012206, |
|
"grad_norm": 18.65936851501465, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3927, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5841325196163906, |
|
"grad_norm": 13.435624122619629, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3786, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5928509154315605, |
|
"grad_norm": 13.244964599609375, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3748, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6015693112467306, |
|
"grad_norm": 13.424527168273926, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3506, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6102877070619006, |
|
"grad_norm": 19.737199783325195, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3287, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6190061028770706, |
|
"grad_norm": 28.116437911987305, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.4045, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6277244986922407, |
|
"grad_norm": 10.127391815185547, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3479, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6364428945074107, |
|
"grad_norm": 14.744454383850098, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.353, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 12.78295612335205, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3954, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6538796861377506, |
|
"grad_norm": 14.76408863067627, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.407, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6538796861377506, |
|
"eval_cer": 0.028367413177539758, |
|
"eval_loss": 0.04341413080692291, |
|
"eval_runtime": 212.2095, |
|
"eval_samples_per_second": 1.748, |
|
"eval_steps_per_second": 0.221, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6625980819529207, |
|
"grad_norm": 16.073122024536133, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3726, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6713164777680907, |
|
"grad_norm": 17.39419174194336, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3617, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6800348735832606, |
|
"grad_norm": 13.058511734008789, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3645, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6887532693984307, |
|
"grad_norm": 13.417937278747559, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.32, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6974716652136007, |
|
"grad_norm": 14.515336036682129, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3277, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7061900610287707, |
|
"grad_norm": 14.097831726074219, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3659, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7149084568439407, |
|
"grad_norm": 11.983433723449707, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3619, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7236268526591108, |
|
"grad_norm": 9.440328598022461, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3428, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7323452484742807, |
|
"grad_norm": 25.6270809173584, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3351, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7410636442894507, |
|
"grad_norm": 10.264219284057617, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3555, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7497820401046208, |
|
"grad_norm": 19.086257934570312, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3809, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7585004359197908, |
|
"grad_norm": 11.476728439331055, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3065, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7672188317349607, |
|
"grad_norm": 9.463808059692383, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3436, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7759372275501307, |
|
"grad_norm": 11.51846981048584, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3266, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7846556233653008, |
|
"grad_norm": 13.305411338806152, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7933740191804708, |
|
"grad_norm": 13.012825965881348, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3931, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8020924149956408, |
|
"grad_norm": 11.237920761108398, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.358, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 7.914185523986816, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3237, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.8195292066259808, |
|
"grad_norm": 13.393628120422363, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3197, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8282476024411508, |
|
"grad_norm": 12.152388572692871, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3181, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8369659982563208, |
|
"grad_norm": 11.488780975341797, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.284, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8456843940714909, |
|
"grad_norm": 11.495716094970703, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3057, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8544027898866609, |
|
"grad_norm": 12.111288070678711, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3214, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8631211857018308, |
|
"grad_norm": 10.114215850830078, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3147, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8718395815170009, |
|
"grad_norm": 18.122196197509766, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8718395815170009, |
|
"eval_cer": 0.02557611165206102, |
|
"eval_loss": 0.03728635609149933, |
|
"eval_runtime": 212.3354, |
|
"eval_samples_per_second": 1.747, |
|
"eval_steps_per_second": 0.221, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8805579773321709, |
|
"grad_norm": 10.484134674072266, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3316, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8892763731473409, |
|
"grad_norm": 19.02334213256836, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3026, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8979947689625108, |
|
"grad_norm": 10.848348617553711, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.295, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9067131647776809, |
|
"grad_norm": 16.650806427001953, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2775, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9154315605928509, |
|
"grad_norm": 12.108776092529297, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3105, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9241499564080209, |
|
"grad_norm": 11.745095252990723, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2738, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.932868352223191, |
|
"grad_norm": 14.852615356445312, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2659, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.941586748038361, |
|
"grad_norm": 9.099912643432617, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2875, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9503051438535309, |
|
"grad_norm": 7.640926837921143, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2907, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9590235396687009, |
|
"grad_norm": 9.235946655273438, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.3091, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 11.265698432922363, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2927, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.976460331299041, |
|
"grad_norm": 8.588621139526367, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2904, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.985178727114211, |
|
"grad_norm": 12.887239456176758, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2831, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.993897122929381, |
|
"grad_norm": 9.062308311462402, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2547, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.001743679163034, |
|
"grad_norm": 7.798802852630615, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2329, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.010462074978204, |
|
"grad_norm": 9.294201850891113, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1935, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.019180470793374, |
|
"grad_norm": 9.507511138916016, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.192, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.027898866608544, |
|
"grad_norm": 10.073638916015625, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2104, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.036617262423714, |
|
"grad_norm": 8.133487701416016, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2347, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.045335658238884, |
|
"grad_norm": 7.9588422775268555, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2596, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.054054054054054, |
|
"grad_norm": 14.125382423400879, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2095, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0627724498692241, |
|
"grad_norm": 11.050345420837402, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2371, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0714908456843941, |
|
"grad_norm": 11.05793571472168, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2307, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.080209241499564, |
|
"grad_norm": 9.359646797180176, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2155, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.088927637314734, |
|
"grad_norm": 8.282297134399414, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2219, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.088927637314734, |
|
"eval_cer": 0.02112950340798442, |
|
"eval_loss": 0.03238729014992714, |
|
"eval_runtime": 212.432, |
|
"eval_samples_per_second": 1.746, |
|
"eval_steps_per_second": 0.221, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.097646033129904, |
|
"grad_norm": 12.268821716308594, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2242, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.106364428945074, |
|
"grad_norm": 8.24521255493164, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2143, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.1150828247602442, |
|
"grad_norm": 7.206003665924072, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2086, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1238012205754142, |
|
"grad_norm": 9.055399894714355, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2174, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.1325196163905842, |
|
"grad_norm": 8.929832458496094, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2133, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1412380122057542, |
|
"grad_norm": 11.186538696289062, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2193, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.1499564080209241, |
|
"grad_norm": 7.43967342376709, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2042, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1586748038360941, |
|
"grad_norm": 8.988080024719238, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2118, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.167393199651264, |
|
"grad_norm": 9.018464088439941, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1938, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.176111595466434, |
|
"grad_norm": 9.74881649017334, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2395, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1848299912816043, |
|
"grad_norm": 11.451117515563965, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2105, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1935483870967742, |
|
"grad_norm": 9.217769622802734, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2072, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.2022667829119442, |
|
"grad_norm": 11.924041748046875, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2578, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.2109851787271142, |
|
"grad_norm": 11.975310325622559, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2249, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.2197035745422842, |
|
"grad_norm": 9.89647102355957, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1899, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2284219703574542, |
|
"grad_norm": 10.440160751342773, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1898, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.2371403661726244, |
|
"grad_norm": 7.328766822814941, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1654, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2458587619877943, |
|
"grad_norm": 9.291050910949707, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2527, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.2545771578029643, |
|
"grad_norm": 10.257084846496582, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1948, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2632955536181343, |
|
"grad_norm": 13.300470352172852, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2211, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.2720139494333043, |
|
"grad_norm": 7.229800701141357, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2218, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2807323452484742, |
|
"grad_norm": 7.776634216308594, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2189, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.2894507410636442, |
|
"grad_norm": 6.323727130889893, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1901, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2981691368788142, |
|
"grad_norm": 7.206363201141357, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1756, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.3068875326939844, |
|
"grad_norm": 7.8909783363342285, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1981, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3068875326939844, |
|
"eval_cer": 0.019636481661798117, |
|
"eval_loss": 0.03092861734330654, |
|
"eval_runtime": 212.2998, |
|
"eval_samples_per_second": 1.748, |
|
"eval_steps_per_second": 0.221, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3156059285091544, |
|
"grad_norm": 11.669926643371582, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2066, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.3243243243243243, |
|
"grad_norm": 8.25307559967041, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1892, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3330427201394943, |
|
"grad_norm": 6.406688213348389, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1877, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.3417611159546643, |
|
"grad_norm": 8.468006134033203, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2034, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3504795117698343, |
|
"grad_norm": 9.530508995056152, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2026, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.3591979075850045, |
|
"grad_norm": 6.790230751037598, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.219, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3679163034001744, |
|
"grad_norm": 8.093545913696289, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1843, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.3766346992153444, |
|
"grad_norm": 8.671226501464844, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.188, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3853530950305144, |
|
"grad_norm": 8.892730712890625, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1962, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.3940714908456844, |
|
"grad_norm": 10.313153266906738, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1669, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4027898866608544, |
|
"grad_norm": 8.458233833312988, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1923, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.4115082824760243, |
|
"grad_norm": 7.510833740234375, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2031, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.4202266782911943, |
|
"grad_norm": 8.42331314086914, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.183, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.4289450741063645, |
|
"grad_norm": 9.886040687561035, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1964, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4376634699215345, |
|
"grad_norm": 6.524728298187256, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1827, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.4463818657367045, |
|
"grad_norm": 8.29251766204834, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.184, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4551002615518744, |
|
"grad_norm": 8.159611701965332, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2232, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.4638186573670444, |
|
"grad_norm": 7.022614479064941, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1658, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.4725370531822144, |
|
"grad_norm": 7.722076892852783, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2011, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.4812554489973846, |
|
"grad_norm": 6.505570411682129, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1959, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4899738448125546, |
|
"grad_norm": 8.246548652648926, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1859, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.4986922406277245, |
|
"grad_norm": 6.680718421936035, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1589, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.5074106364428945, |
|
"grad_norm": 6.155304908752441, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1619, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.5161290322580645, |
|
"grad_norm": 8.872124671936035, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2289, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5248474280732345, |
|
"grad_norm": 6.164592742919922, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1718, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.5248474280732345, |
|
"eval_cer": 0.018500486854917234, |
|
"eval_loss": 0.029056401923298836, |
|
"eval_runtime": 211.7367, |
|
"eval_samples_per_second": 1.752, |
|
"eval_steps_per_second": 0.222, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.5335658238884045, |
|
"grad_norm": 6.420663833618164, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1753, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5422842197035744, |
|
"grad_norm": 10.49184799194336, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2049, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.5510026155187444, |
|
"grad_norm": 7.615091800689697, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1734, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.5597210113339146, |
|
"grad_norm": 8.663217544555664, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1645, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.5684394071490846, |
|
"grad_norm": 6.248359203338623, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1813, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5771578029642546, |
|
"grad_norm": 9.073200225830078, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2042, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.5858761987794245, |
|
"grad_norm": 5.36655855178833, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1848, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5945945945945947, |
|
"grad_norm": 6.54818868637085, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1829, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.6033129904097647, |
|
"grad_norm": 8.669767379760742, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1669, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.6120313862249347, |
|
"grad_norm": 6.731302738189697, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1855, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.6207497820401047, |
|
"grad_norm": 6.575505256652832, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.174, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6294681778552746, |
|
"grad_norm": 7.167125701904297, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2021, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.6381865736704446, |
|
"grad_norm": 9.105996131896973, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2631, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.6469049694856146, |
|
"grad_norm": 7.38139009475708, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.192, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.6556233653007846, |
|
"grad_norm": 7.435015678405762, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.163, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6643417611159546, |
|
"grad_norm": 9.437699317932129, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1717, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.6730601569311245, |
|
"grad_norm": 8.728166580200195, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1718, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6817785527462947, |
|
"grad_norm": 8.472909927368164, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1835, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.6904969485614647, |
|
"grad_norm": 7.229650974273682, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1783, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6992153443766347, |
|
"grad_norm": 8.300793647766113, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1544, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.7079337401918047, |
|
"grad_norm": 8.715877532958984, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1943, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.7166521360069749, |
|
"grad_norm": 8.236053466796875, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1969, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.7253705318221448, |
|
"grad_norm": 10.76419448852539, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1807, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.7340889276373148, |
|
"grad_norm": 7.493725776672363, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2034, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.7428073234524848, |
|
"grad_norm": 10.558133125305176, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7428073234524848, |
|
"eval_cer": 0.017104836092177864, |
|
"eval_loss": 0.028250334784388542, |
|
"eval_runtime": 212.8205, |
|
"eval_samples_per_second": 1.743, |
|
"eval_steps_per_second": 0.221, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 125, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0905549552746496e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|