4e-4-xlsr-wav2vec2 / trainer_state.json
soba1911's picture
Upload 7 files
430ee31 verified
raw
history blame contribute delete
No virus
24.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.23143277929728592,
"eval_steps": 10,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004207868714496108,
"grad_norm": 3.2594902515411377,
"learning_rate": 0.00039272727272727273,
"loss": 0.6971,
"step": 10
},
{
"epoch": 0.004207868714496108,
"eval_accuracy": 0.5914159417152405,
"eval_loss": 0.7148427963256836,
"eval_runtime": 585.2226,
"eval_samples_per_second": 8.122,
"eval_steps_per_second": 2.032,
"step": 10
},
{
"epoch": 0.008415737428992216,
"grad_norm": 22.503503799438477,
"learning_rate": 0.0003854545454545455,
"loss": 0.7436,
"step": 20
},
{
"epoch": 0.008415737428992216,
"eval_accuracy": 0.5914159417152405,
"eval_loss": 1.42451012134552,
"eval_runtime": 577.2378,
"eval_samples_per_second": 8.234,
"eval_steps_per_second": 2.06,
"step": 20
},
{
"epoch": 0.012623606143488323,
"grad_norm": 1.2163587808609009,
"learning_rate": 0.0003781818181818182,
"loss": 0.9446,
"step": 30
},
{
"epoch": 0.012623606143488323,
"eval_accuracy": 0.4085840582847595,
"eval_loss": 0.7073472738265991,
"eval_runtime": 581.0519,
"eval_samples_per_second": 8.18,
"eval_steps_per_second": 2.046,
"step": 30
},
{
"epoch": 0.016831474857984433,
"grad_norm": 2.1413767337799072,
"learning_rate": 0.0003709090909090909,
"loss": 0.6945,
"step": 40
},
{
"epoch": 0.016831474857984433,
"eval_accuracy": 0.5914159417152405,
"eval_loss": 0.7796906232833862,
"eval_runtime": 591.5892,
"eval_samples_per_second": 8.034,
"eval_steps_per_second": 2.01,
"step": 40
},
{
"epoch": 0.021039343572480537,
"grad_norm": 0.8763795495033264,
"learning_rate": 0.00036363636363636367,
"loss": 0.653,
"step": 50
},
{
"epoch": 0.021039343572480537,
"eval_accuracy": 0.5867872834205627,
"eval_loss": 0.6816809773445129,
"eval_runtime": 588.5616,
"eval_samples_per_second": 8.076,
"eval_steps_per_second": 2.02,
"step": 50
},
{
"epoch": 0.025247212286976645,
"grad_norm": 0.9567063450813293,
"learning_rate": 0.0003563636363636364,
"loss": 0.6479,
"step": 60
},
{
"epoch": 0.025247212286976645,
"eval_accuracy": 0.8102251291275024,
"eval_loss": 0.5443565845489502,
"eval_runtime": 581.8352,
"eval_samples_per_second": 8.169,
"eval_steps_per_second": 2.044,
"step": 60
},
{
"epoch": 0.029455081001472753,
"grad_norm": 4.155121803283691,
"learning_rate": 0.0003490909090909091,
"loss": 0.502,
"step": 70
},
{
"epoch": 0.029455081001472753,
"eval_accuracy": 0.5952030420303345,
"eval_loss": 1.1268996000289917,
"eval_runtime": 583.4269,
"eval_samples_per_second": 8.147,
"eval_steps_per_second": 2.038,
"step": 70
},
{
"epoch": 0.033662949715968865,
"grad_norm": 3.364042282104492,
"learning_rate": 0.0003418181818181818,
"loss": 0.6052,
"step": 80
},
{
"epoch": 0.033662949715968865,
"eval_accuracy": 0.8194824457168579,
"eval_loss": 0.6350404024124146,
"eval_runtime": 585.0166,
"eval_samples_per_second": 8.125,
"eval_steps_per_second": 2.032,
"step": 80
},
{
"epoch": 0.03787081843046497,
"grad_norm": 1.5822795629501343,
"learning_rate": 0.00033454545454545456,
"loss": 0.3917,
"step": 90
},
{
"epoch": 0.03787081843046497,
"eval_accuracy": 0.5914159417152405,
"eval_loss": 1.0552942752838135,
"eval_runtime": 579.8411,
"eval_samples_per_second": 8.197,
"eval_steps_per_second": 2.051,
"step": 90
},
{
"epoch": 0.042078687144961074,
"grad_norm": 1.6902482509613037,
"learning_rate": 0.0003272727272727273,
"loss": 0.648,
"step": 100
},
{
"epoch": 0.042078687144961074,
"eval_accuracy": 0.8116979002952576,
"eval_loss": 0.7334651947021484,
"eval_runtime": 577.8009,
"eval_samples_per_second": 8.226,
"eval_steps_per_second": 2.058,
"step": 100
},
{
"epoch": 0.046286555859457186,
"grad_norm": 1.05351984500885,
"learning_rate": 0.00032,
"loss": 0.431,
"step": 110
},
{
"epoch": 0.046286555859457186,
"eval_accuracy": 0.8480959534645081,
"eval_loss": 0.39465391635894775,
"eval_runtime": 576.7671,
"eval_samples_per_second": 8.241,
"eval_steps_per_second": 2.061,
"step": 110
},
{
"epoch": 0.05049442457395329,
"grad_norm": 0.6553493738174438,
"learning_rate": 0.00031272727272727273,
"loss": 0.5428,
"step": 120
},
{
"epoch": 0.05049442457395329,
"eval_accuracy": 0.8729223608970642,
"eval_loss": 0.38810551166534424,
"eval_runtime": 579.7258,
"eval_samples_per_second": 8.199,
"eval_steps_per_second": 2.051,
"step": 120
},
{
"epoch": 0.0547022932884494,
"grad_norm": 0.42037296295166016,
"learning_rate": 0.0003054545454545455,
"loss": 0.34,
"step": 130
},
{
"epoch": 0.0547022932884494,
"eval_accuracy": 0.8840731978416443,
"eval_loss": 0.35628741979599,
"eval_runtime": 582.4004,
"eval_samples_per_second": 8.161,
"eval_steps_per_second": 2.042,
"step": 130
},
{
"epoch": 0.05891016200294551,
"grad_norm": 0.47820377349853516,
"learning_rate": 0.0002981818181818182,
"loss": 0.398,
"step": 140
},
{
"epoch": 0.05891016200294551,
"eval_accuracy": 0.8842836022377014,
"eval_loss": 0.3024275004863739,
"eval_runtime": 581.4736,
"eval_samples_per_second": 8.174,
"eval_steps_per_second": 2.045,
"step": 140
},
{
"epoch": 0.06311803071744161,
"grad_norm": 0.9904082417488098,
"learning_rate": 0.0002909090909090909,
"loss": 0.1834,
"step": 150
},
{
"epoch": 0.06311803071744161,
"eval_accuracy": 0.87250155210495,
"eval_loss": 0.38005706667900085,
"eval_runtime": 580.8975,
"eval_samples_per_second": 8.182,
"eval_steps_per_second": 2.047,
"step": 150
},
{
"epoch": 0.06732589943193773,
"grad_norm": 1.6720079183578491,
"learning_rate": 0.0002836363636363637,
"loss": 0.5052,
"step": 160
},
{
"epoch": 0.06732589943193773,
"eval_accuracy": 0.9114243388175964,
"eval_loss": 0.23850664496421814,
"eval_runtime": 581.0949,
"eval_samples_per_second": 8.179,
"eval_steps_per_second": 2.046,
"step": 160
},
{
"epoch": 0.07153376814643383,
"grad_norm": 1.5653446912765503,
"learning_rate": 0.0002763636363636364,
"loss": 0.3953,
"step": 170
},
{
"epoch": 0.07153376814643383,
"eval_accuracy": 0.8544077277183533,
"eval_loss": 0.41211748123168945,
"eval_runtime": 581.5472,
"eval_samples_per_second": 8.173,
"eval_steps_per_second": 2.045,
"step": 170
},
{
"epoch": 0.07574163686092994,
"grad_norm": 1.7199907302856445,
"learning_rate": 0.0002690909090909091,
"loss": 0.407,
"step": 180
},
{
"epoch": 0.07574163686092994,
"eval_accuracy": 0.9044813513755798,
"eval_loss": 0.26600247621536255,
"eval_runtime": 580.7861,
"eval_samples_per_second": 8.184,
"eval_steps_per_second": 2.047,
"step": 180
},
{
"epoch": 0.07994950557542604,
"grad_norm": 2.303934335708618,
"learning_rate": 0.00026181818181818185,
"loss": 0.3571,
"step": 190
},
{
"epoch": 0.07994950557542604,
"eval_accuracy": 0.8844940066337585,
"eval_loss": 0.3394128680229187,
"eval_runtime": 584.97,
"eval_samples_per_second": 8.125,
"eval_steps_per_second": 2.033,
"step": 190
},
{
"epoch": 0.08415737428992215,
"grad_norm": 2.5085370540618896,
"learning_rate": 0.00025454545454545456,
"loss": 0.2747,
"step": 200
},
{
"epoch": 0.08415737428992215,
"eval_accuracy": 0.9147906303405762,
"eval_loss": 0.2246033400297165,
"eval_runtime": 585.8187,
"eval_samples_per_second": 8.113,
"eval_steps_per_second": 2.03,
"step": 200
},
{
"epoch": 0.08836524300441827,
"grad_norm": 4.109118461608887,
"learning_rate": 0.00024727272727272727,
"loss": 0.2863,
"step": 210
},
{
"epoch": 0.08836524300441827,
"eval_accuracy": 0.9244687557220459,
"eval_loss": 0.2438182830810547,
"eval_runtime": 582.1202,
"eval_samples_per_second": 8.165,
"eval_steps_per_second": 2.043,
"step": 210
},
{
"epoch": 0.09257311171891437,
"grad_norm": 0.5757103562355042,
"learning_rate": 0.00024,
"loss": 0.2334,
"step": 220
},
{
"epoch": 0.09257311171891437,
"eval_accuracy": 0.922154426574707,
"eval_loss": 0.21005088090896606,
"eval_runtime": 579.1924,
"eval_samples_per_second": 8.206,
"eval_steps_per_second": 2.053,
"step": 220
},
{
"epoch": 0.09678098043341048,
"grad_norm": 0.36710911989212036,
"learning_rate": 0.00023272727272727271,
"loss": 0.1744,
"step": 230
},
{
"epoch": 0.09678098043341048,
"eval_accuracy": 0.8529350161552429,
"eval_loss": 0.41234469413757324,
"eval_runtime": 589.1406,
"eval_samples_per_second": 8.068,
"eval_steps_per_second": 2.018,
"step": 230
},
{
"epoch": 0.10098884914790658,
"grad_norm": 3.2360424995422363,
"learning_rate": 0.00022545454545454545,
"loss": 0.1948,
"step": 240
},
{
"epoch": 0.10098884914790658,
"eval_accuracy": 0.9253103137016296,
"eval_loss": 0.22991585731506348,
"eval_runtime": 587.1353,
"eval_samples_per_second": 8.095,
"eval_steps_per_second": 2.025,
"step": 240
},
{
"epoch": 0.1051967178624027,
"grad_norm": 0.21486328542232513,
"learning_rate": 0.00021818181818181818,
"loss": 0.2382,
"step": 250
},
{
"epoch": 0.1051967178624027,
"eval_accuracy": 0.9322533011436462,
"eval_loss": 0.27035772800445557,
"eval_runtime": 587.222,
"eval_samples_per_second": 8.094,
"eval_steps_per_second": 2.025,
"step": 250
},
{
"epoch": 0.1094045865768988,
"grad_norm": 1.8511269092559814,
"learning_rate": 0.0002109090909090909,
"loss": 0.219,
"step": 260
},
{
"epoch": 0.1094045865768988,
"eval_accuracy": 0.9137386679649353,
"eval_loss": 0.3539877235889435,
"eval_runtime": 585.6282,
"eval_samples_per_second": 8.116,
"eval_steps_per_second": 2.03,
"step": 260
},
{
"epoch": 0.11361245529139491,
"grad_norm": 0.11713656038045883,
"learning_rate": 0.00020363636363636363,
"loss": 0.1122,
"step": 270
},
{
"epoch": 0.11361245529139491,
"eval_accuracy": 0.9339364767074585,
"eval_loss": 0.2783205509185791,
"eval_runtime": 585.5155,
"eval_samples_per_second": 8.118,
"eval_steps_per_second": 2.031,
"step": 270
},
{
"epoch": 0.11782032400589101,
"grad_norm": 2.4492409229278564,
"learning_rate": 0.00019636363636363636,
"loss": 0.1902,
"step": 280
},
{
"epoch": 0.11782032400589101,
"eval_accuracy": 0.9322533011436462,
"eval_loss": 0.28133705258369446,
"eval_runtime": 587.357,
"eval_samples_per_second": 8.092,
"eval_steps_per_second": 2.024,
"step": 280
},
{
"epoch": 0.12202819272038712,
"grad_norm": 0.09383056312799454,
"learning_rate": 0.0001890909090909091,
"loss": 0.1279,
"step": 290
},
{
"epoch": 0.12202819272038712,
"eval_accuracy": 0.9297285676002502,
"eval_loss": 0.22569426894187927,
"eval_runtime": 586.2579,
"eval_samples_per_second": 8.107,
"eval_steps_per_second": 2.028,
"step": 290
},
{
"epoch": 0.12623606143488322,
"grad_norm": 1.5099377632141113,
"learning_rate": 0.00018181818181818183,
"loss": 0.168,
"step": 300
},
{
"epoch": 0.12623606143488322,
"eval_accuracy": 0.9347780346870422,
"eval_loss": 0.2831152081489563,
"eval_runtime": 587.962,
"eval_samples_per_second": 8.084,
"eval_steps_per_second": 2.022,
"step": 300
},
{
"epoch": 0.13044393014937933,
"grad_norm": 16.816967010498047,
"learning_rate": 0.00017454545454545454,
"loss": 0.1351,
"step": 310
},
{
"epoch": 0.13044393014937933,
"eval_accuracy": 0.9293078184127808,
"eval_loss": 0.3322593569755554,
"eval_runtime": 586.8235,
"eval_samples_per_second": 8.1,
"eval_steps_per_second": 2.026,
"step": 310
},
{
"epoch": 0.13465179886387546,
"grad_norm": 0.6043083667755127,
"learning_rate": 0.00016727272727272728,
"loss": 0.0422,
"step": 320
},
{
"epoch": 0.13465179886387546,
"eval_accuracy": 0.9427729845046997,
"eval_loss": 0.28099876642227173,
"eval_runtime": 587.4493,
"eval_samples_per_second": 8.091,
"eval_steps_per_second": 2.024,
"step": 320
},
{
"epoch": 0.13885966757837157,
"grad_norm": 16.680456161499023,
"learning_rate": 0.00016,
"loss": 0.1904,
"step": 330
},
{
"epoch": 0.13885966757837157,
"eval_accuracy": 0.9349884390830994,
"eval_loss": 0.3474605977535248,
"eval_runtime": 579.304,
"eval_samples_per_second": 8.205,
"eval_steps_per_second": 2.052,
"step": 330
},
{
"epoch": 0.14306753629286767,
"grad_norm": 0.6815859079360962,
"learning_rate": 0.00015272727272727275,
"loss": 0.0864,
"step": 340
},
{
"epoch": 0.14306753629286767,
"eval_accuracy": 0.9438249468803406,
"eval_loss": 0.3012893497943878,
"eval_runtime": 588.316,
"eval_samples_per_second": 8.079,
"eval_steps_per_second": 2.021,
"step": 340
},
{
"epoch": 0.14727540500736377,
"grad_norm": 0.01422311831265688,
"learning_rate": 0.00014545454545454546,
"loss": 0.0198,
"step": 350
},
{
"epoch": 0.14727540500736377,
"eval_accuracy": 0.9335156679153442,
"eval_loss": 0.38236290216445923,
"eval_runtime": 589.516,
"eval_samples_per_second": 8.063,
"eval_steps_per_second": 2.017,
"step": 350
},
{
"epoch": 0.15148327372185988,
"grad_norm": 2.280247211456299,
"learning_rate": 0.0001381818181818182,
"loss": 0.2155,
"step": 360
},
{
"epoch": 0.15148327372185988,
"eval_accuracy": 0.9463496804237366,
"eval_loss": 0.3106628656387329,
"eval_runtime": 581.9576,
"eval_samples_per_second": 8.167,
"eval_steps_per_second": 2.043,
"step": 360
},
{
"epoch": 0.15569114243635598,
"grad_norm": 15.617796897888184,
"learning_rate": 0.00013090909090909093,
"loss": 0.2275,
"step": 370
},
{
"epoch": 0.15569114243635598,
"eval_accuracy": 0.9450873136520386,
"eval_loss": 0.2654193341732025,
"eval_runtime": 582.2813,
"eval_samples_per_second": 8.163,
"eval_steps_per_second": 2.042,
"step": 370
},
{
"epoch": 0.1598990111508521,
"grad_norm": 9.974563598632812,
"learning_rate": 0.00012363636363636364,
"loss": 0.1118,
"step": 380
},
{
"epoch": 0.1598990111508521,
"eval_accuracy": 0.9421418309211731,
"eval_loss": 0.2898730933666229,
"eval_runtime": 580.2758,
"eval_samples_per_second": 8.191,
"eval_steps_per_second": 2.049,
"step": 380
},
{
"epoch": 0.1641068798653482,
"grad_norm": 1.2296732664108276,
"learning_rate": 0.00011636363636363636,
"loss": 0.0258,
"step": 390
},
{
"epoch": 0.1641068798653482,
"eval_accuracy": 0.9570797681808472,
"eval_loss": 0.18523547053337097,
"eval_runtime": 580.5297,
"eval_samples_per_second": 8.187,
"eval_steps_per_second": 2.048,
"step": 390
},
{
"epoch": 0.1683147485798443,
"grad_norm": 1.1257351636886597,
"learning_rate": 0.00010909090909090909,
"loss": 0.0816,
"step": 400
},
{
"epoch": 0.1683147485798443,
"eval_accuracy": 0.9535030722618103,
"eval_loss": 0.18315376341342926,
"eval_runtime": 582.012,
"eval_samples_per_second": 8.166,
"eval_steps_per_second": 2.043,
"step": 400
},
{
"epoch": 0.17252261729434043,
"grad_norm": 0.1417863965034485,
"learning_rate": 0.00010181818181818181,
"loss": 0.1385,
"step": 410
},
{
"epoch": 0.17252261729434043,
"eval_accuracy": 0.9606564044952393,
"eval_loss": 0.1722693145275116,
"eval_runtime": 583.7175,
"eval_samples_per_second": 8.143,
"eval_steps_per_second": 2.037,
"step": 410
},
{
"epoch": 0.17673048600883653,
"grad_norm": 0.05042952299118042,
"learning_rate": 9.454545454545455e-05,
"loss": 0.1194,
"step": 420
},
{
"epoch": 0.17673048600883653,
"eval_accuracy": 0.9391962885856628,
"eval_loss": 0.26152685284614563,
"eval_runtime": 583.6437,
"eval_samples_per_second": 8.144,
"eval_steps_per_second": 2.037,
"step": 420
},
{
"epoch": 0.18093835472333264,
"grad_norm": 0.06302843242883682,
"learning_rate": 8.727272727272727e-05,
"loss": 0.2722,
"step": 430
},
{
"epoch": 0.18093835472333264,
"eval_accuracy": 0.9667578339576721,
"eval_loss": 0.1336488574743271,
"eval_runtime": 585.4585,
"eval_samples_per_second": 8.118,
"eval_steps_per_second": 2.031,
"step": 430
},
{
"epoch": 0.18514622343782874,
"grad_norm": 1.160020112991333,
"learning_rate": 8e-05,
"loss": 0.1969,
"step": 440
},
{
"epoch": 0.18514622343782874,
"eval_accuracy": 0.9520303010940552,
"eval_loss": 0.1606164574623108,
"eval_runtime": 582.6331,
"eval_samples_per_second": 8.158,
"eval_steps_per_second": 2.041,
"step": 440
},
{
"epoch": 0.18935409215232485,
"grad_norm": 0.8594697713851929,
"learning_rate": 7.272727272727273e-05,
"loss": 0.109,
"step": 450
},
{
"epoch": 0.18935409215232485,
"eval_accuracy": 0.9610772132873535,
"eval_loss": 0.13081230223178864,
"eval_runtime": 598.3887,
"eval_samples_per_second": 7.943,
"eval_steps_per_second": 1.987,
"step": 450
},
{
"epoch": 0.19356196086682095,
"grad_norm": 0.1255054622888565,
"learning_rate": 6.545454545454546e-05,
"loss": 0.1662,
"step": 460
},
{
"epoch": 0.19356196086682095,
"eval_accuracy": 0.9657058715820312,
"eval_loss": 0.1277003139257431,
"eval_runtime": 596.8696,
"eval_samples_per_second": 7.963,
"eval_steps_per_second": 1.992,
"step": 460
},
{
"epoch": 0.19776982958131706,
"grad_norm": 0.09831862151622772,
"learning_rate": 5.818181818181818e-05,
"loss": 0.0393,
"step": 470
},
{
"epoch": 0.19776982958131706,
"eval_accuracy": 0.964022696018219,
"eval_loss": 0.12812593579292297,
"eval_runtime": 594.9871,
"eval_samples_per_second": 7.988,
"eval_steps_per_second": 1.998,
"step": 470
},
{
"epoch": 0.20197769829581316,
"grad_norm": 0.08306915313005447,
"learning_rate": 5.090909090909091e-05,
"loss": 0.1268,
"step": 480
},
{
"epoch": 0.20197769829581316,
"eval_accuracy": 0.9644435048103333,
"eval_loss": 0.13266168534755707,
"eval_runtime": 586.2759,
"eval_samples_per_second": 8.107,
"eval_steps_per_second": 2.028,
"step": 480
},
{
"epoch": 0.20618556701030927,
"grad_norm": 0.08147989958524704,
"learning_rate": 4.3636363636363636e-05,
"loss": 0.0548,
"step": 490
},
{
"epoch": 0.20618556701030927,
"eval_accuracy": 0.9646539092063904,
"eval_loss": 0.14851805567741394,
"eval_runtime": 591.656,
"eval_samples_per_second": 8.033,
"eval_steps_per_second": 2.01,
"step": 490
},
{
"epoch": 0.2103934357248054,
"grad_norm": 0.6941895484924316,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.0484,
"step": 500
},
{
"epoch": 0.2103934357248054,
"eval_accuracy": 0.9629707336425781,
"eval_loss": 0.16351090371608734,
"eval_runtime": 589.9697,
"eval_samples_per_second": 8.056,
"eval_steps_per_second": 2.015,
"step": 500
},
{
"epoch": 0.2146013044393015,
"grad_norm": 0.04926018416881561,
"learning_rate": 2.909090909090909e-05,
"loss": 0.022,
"step": 510
},
{
"epoch": 0.2146013044393015,
"eval_accuracy": 0.964022696018219,
"eval_loss": 0.15824884176254272,
"eval_runtime": 597.3025,
"eval_samples_per_second": 7.957,
"eval_steps_per_second": 1.991,
"step": 510
},
{
"epoch": 0.2188091731537976,
"grad_norm": 0.043105900287628174,
"learning_rate": 2.1818181818181818e-05,
"loss": 0.1426,
"step": 520
},
{
"epoch": 0.2188091731537976,
"eval_accuracy": 0.9642331004142761,
"eval_loss": 0.15840177237987518,
"eval_runtime": 596.1264,
"eval_samples_per_second": 7.973,
"eval_steps_per_second": 1.995,
"step": 520
},
{
"epoch": 0.22301704186829371,
"grad_norm": 0.04364515841007233,
"learning_rate": 1.4545454545454545e-05,
"loss": 0.0611,
"step": 530
},
{
"epoch": 0.22301704186829371,
"eval_accuracy": 0.9636019468307495,
"eval_loss": 0.1682334691286087,
"eval_runtime": 590.7102,
"eval_samples_per_second": 8.046,
"eval_steps_per_second": 2.013,
"step": 530
},
{
"epoch": 0.22722491058278982,
"grad_norm": 0.9690969586372375,
"learning_rate": 7.272727272727272e-06,
"loss": 0.0668,
"step": 540
},
{
"epoch": 0.22722491058278982,
"eval_accuracy": 0.9633915424346924,
"eval_loss": 0.16814225912094116,
"eval_runtime": 593.6369,
"eval_samples_per_second": 8.007,
"eval_steps_per_second": 2.003,
"step": 540
},
{
"epoch": 0.23143277929728592,
"grad_norm": 0.053498703986406326,
"learning_rate": 0.0,
"loss": 0.0471,
"step": 550
},
{
"epoch": 0.23143277929728592,
"eval_accuracy": 0.964022696018219,
"eval_loss": 0.16578398644924164,
"eval_runtime": 583.1222,
"eval_samples_per_second": 8.151,
"eval_steps_per_second": 2.039,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 550,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.208452205931052e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}