xlsr-wav2vec2-lr5e-4 / trainer_state.json
soba1911's picture
Upload 6 files
e69357d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21670606776989756,
"eval_steps": 10,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003940110323089046,
"grad_norm": 2.0866503715515137,
"learning_rate": 0.0004909090909090909,
"loss": 0.4138,
"step": 10
},
{
"epoch": 0.003940110323089046,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.4887285530567169,
"eval_runtime": 644.3572,
"eval_samples_per_second": 7.878,
"eval_steps_per_second": 1.969,
"step": 10
},
{
"epoch": 0.007880220646178092,
"grad_norm": 0.8669756650924683,
"learning_rate": 0.00048181818181818184,
"loss": 0.4995,
"step": 20
},
{
"epoch": 0.007880220646178092,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.38217219710350037,
"eval_runtime": 633.2128,
"eval_samples_per_second": 8.016,
"eval_steps_per_second": 2.004,
"step": 20
},
{
"epoch": 0.01182033096926714,
"grad_norm": 2.203610420227051,
"learning_rate": 0.0004727272727272727,
"loss": 0.382,
"step": 30
},
{
"epoch": 0.01182033096926714,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3638584017753601,
"eval_runtime": 642.1063,
"eval_samples_per_second": 7.905,
"eval_steps_per_second": 1.976,
"step": 30
},
{
"epoch": 0.015760441292356184,
"grad_norm": 0.812998354434967,
"learning_rate": 0.00046363636363636366,
"loss": 0.354,
"step": 40
},
{
"epoch": 0.015760441292356184,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3297870457172394,
"eval_runtime": 644.4864,
"eval_samples_per_second": 7.876,
"eval_steps_per_second": 1.969,
"step": 40
},
{
"epoch": 0.019700551615445233,
"grad_norm": 0.6705520749092102,
"learning_rate": 0.00045454545454545455,
"loss": 0.521,
"step": 50
},
{
"epoch": 0.019700551615445233,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.38316452503204346,
"eval_runtime": 645.7523,
"eval_samples_per_second": 7.861,
"eval_steps_per_second": 1.965,
"step": 50
},
{
"epoch": 0.02364066193853428,
"grad_norm": 1.1547324657440186,
"learning_rate": 0.00044545454545454543,
"loss": 0.3344,
"step": 60
},
{
"epoch": 0.02364066193853428,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3688468039035797,
"eval_runtime": 647.1212,
"eval_samples_per_second": 7.844,
"eval_steps_per_second": 1.961,
"step": 60
},
{
"epoch": 0.027580772261623327,
"grad_norm": 0.7195687890052795,
"learning_rate": 0.00043636363636363637,
"loss": 0.3524,
"step": 70
},
{
"epoch": 0.027580772261623327,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.33884838223457336,
"eval_runtime": 644.2281,
"eval_samples_per_second": 7.879,
"eval_steps_per_second": 1.97,
"step": 70
},
{
"epoch": 0.03152088258471237,
"grad_norm": 0.030797701328992844,
"learning_rate": 0.00042727272727272726,
"loss": 0.2702,
"step": 80
},
{
"epoch": 0.03152088258471237,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.32865411043167114,
"eval_runtime": 643.6443,
"eval_samples_per_second": 7.886,
"eval_steps_per_second": 1.972,
"step": 80
},
{
"epoch": 0.03546099290780142,
"grad_norm": 0.7822753190994263,
"learning_rate": 0.00041818181818181814,
"loss": 0.3767,
"step": 90
},
{
"epoch": 0.03546099290780142,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3282410800457001,
"eval_runtime": 647.7767,
"eval_samples_per_second": 7.836,
"eval_steps_per_second": 1.959,
"step": 90
},
{
"epoch": 0.039401103230890466,
"grad_norm": 0.7474893927574158,
"learning_rate": 0.00040909090909090913,
"loss": 0.2964,
"step": 100
},
{
"epoch": 0.039401103230890466,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3291880786418915,
"eval_runtime": 640.1347,
"eval_samples_per_second": 7.93,
"eval_steps_per_second": 1.982,
"step": 100
},
{
"epoch": 0.04334121355397951,
"grad_norm": 0.44683077931404114,
"learning_rate": 0.0004,
"loss": 0.3428,
"step": 110
},
{
"epoch": 0.04334121355397951,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.33626437187194824,
"eval_runtime": 640.0811,
"eval_samples_per_second": 7.93,
"eval_steps_per_second": 1.983,
"step": 110
},
{
"epoch": 0.04728132387706856,
"grad_norm": 0.07774700969457626,
"learning_rate": 0.00039090909090909096,
"loss": 0.3215,
"step": 120
},
{
"epoch": 0.04728132387706856,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.32750025391578674,
"eval_runtime": 641.6289,
"eval_samples_per_second": 7.911,
"eval_steps_per_second": 1.978,
"step": 120
},
{
"epoch": 0.0512214342001576,
"grad_norm": 0.8798918128013611,
"learning_rate": 0.00038181818181818184,
"loss": 0.3524,
"step": 130
},
{
"epoch": 0.0512214342001576,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3243106007575989,
"eval_runtime": 642.1452,
"eval_samples_per_second": 7.905,
"eval_steps_per_second": 1.976,
"step": 130
},
{
"epoch": 0.055161544523246654,
"grad_norm": 0.513219952583313,
"learning_rate": 0.00037272727272727273,
"loss": 0.3029,
"step": 140
},
{
"epoch": 0.055161544523246654,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.33765551447868347,
"eval_runtime": 644.4577,
"eval_samples_per_second": 7.876,
"eval_steps_per_second": 1.969,
"step": 140
},
{
"epoch": 0.0591016548463357,
"grad_norm": 0.1864446997642517,
"learning_rate": 0.00036363636363636367,
"loss": 0.494,
"step": 150
},
{
"epoch": 0.0591016548463357,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3399695158004761,
"eval_runtime": 644.9312,
"eval_samples_per_second": 7.871,
"eval_steps_per_second": 1.968,
"step": 150
},
{
"epoch": 0.06304176516942474,
"grad_norm": 0.6781743168830872,
"learning_rate": 0.00035454545454545455,
"loss": 0.2655,
"step": 160
},
{
"epoch": 0.06304176516942474,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.32840684056282043,
"eval_runtime": 638.0586,
"eval_samples_per_second": 7.955,
"eval_steps_per_second": 1.989,
"step": 160
},
{
"epoch": 0.06698187549251379,
"grad_norm": 0.4446357786655426,
"learning_rate": 0.00034545454545454544,
"loss": 0.3505,
"step": 170
},
{
"epoch": 0.06698187549251379,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.34705930948257446,
"eval_runtime": 636.2641,
"eval_samples_per_second": 7.978,
"eval_steps_per_second": 1.994,
"step": 170
},
{
"epoch": 0.07092198581560284,
"grad_norm": 0.5605026483535767,
"learning_rate": 0.0003363636363636364,
"loss": 0.2416,
"step": 180
},
{
"epoch": 0.07092198581560284,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3337305784225464,
"eval_runtime": 637.842,
"eval_samples_per_second": 7.958,
"eval_steps_per_second": 1.99,
"step": 180
},
{
"epoch": 0.07486209613869188,
"grad_norm": 0.48381492495536804,
"learning_rate": 0.00032727272727272726,
"loss": 0.3361,
"step": 190
},
{
"epoch": 0.07486209613869188,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3374550938606262,
"eval_runtime": 630.1852,
"eval_samples_per_second": 8.055,
"eval_steps_per_second": 2.014,
"step": 190
},
{
"epoch": 0.07880220646178093,
"grad_norm": 0.20769913494586945,
"learning_rate": 0.0003181818181818182,
"loss": 0.3264,
"step": 200
},
{
"epoch": 0.07880220646178093,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.32236042618751526,
"eval_runtime": 634.0108,
"eval_samples_per_second": 8.006,
"eval_steps_per_second": 2.002,
"step": 200
},
{
"epoch": 0.08274231678486997,
"grad_norm": 0.5153699517250061,
"learning_rate": 0.0003090909090909091,
"loss": 0.1682,
"step": 210
},
{
"epoch": 0.08274231678486997,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3330242931842804,
"eval_runtime": 630.7179,
"eval_samples_per_second": 8.048,
"eval_steps_per_second": 2.012,
"step": 210
},
{
"epoch": 0.08668242710795902,
"grad_norm": 0.5939351916313171,
"learning_rate": 0.0003,
"loss": 0.3564,
"step": 220
},
{
"epoch": 0.08668242710795902,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3238199055194855,
"eval_runtime": 639.7513,
"eval_samples_per_second": 7.934,
"eval_steps_per_second": 1.984,
"step": 220
},
{
"epoch": 0.09062253743104808,
"grad_norm": 0.5458611845970154,
"learning_rate": 0.0002909090909090909,
"loss": 0.2441,
"step": 230
},
{
"epoch": 0.09062253743104808,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.3024224638938904,
"eval_runtime": 640.3192,
"eval_samples_per_second": 7.927,
"eval_steps_per_second": 1.982,
"step": 230
},
{
"epoch": 0.09456264775413711,
"grad_norm": 4.142682075500488,
"learning_rate": 0.0002818181818181818,
"loss": 0.4017,
"step": 240
},
{
"epoch": 0.09456264775413711,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.28254833817481995,
"eval_runtime": 639.8776,
"eval_samples_per_second": 7.933,
"eval_steps_per_second": 1.983,
"step": 240
},
{
"epoch": 0.09850275807722617,
"grad_norm": 0.83643639087677,
"learning_rate": 0.00027272727272727274,
"loss": 0.2683,
"step": 250
},
{
"epoch": 0.09850275807722617,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.2727406322956085,
"eval_runtime": 637.4582,
"eval_samples_per_second": 7.963,
"eval_steps_per_second": 1.991,
"step": 250
},
{
"epoch": 0.1024428684003152,
"grad_norm": 2.7200253009796143,
"learning_rate": 0.0002636363636363636,
"loss": 0.3417,
"step": 260
},
{
"epoch": 0.1024428684003152,
"eval_accuracy": 0.8975571393966675,
"eval_loss": 0.2998380661010742,
"eval_runtime": 637.2677,
"eval_samples_per_second": 7.965,
"eval_steps_per_second": 1.991,
"step": 260
},
{
"epoch": 0.10638297872340426,
"grad_norm": 0.9870793223381042,
"learning_rate": 0.0002545454545454545,
"loss": 0.3689,
"step": 270
},
{
"epoch": 0.10638297872340426,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.25626352429389954,
"eval_runtime": 638.6518,
"eval_samples_per_second": 7.948,
"eval_steps_per_second": 1.987,
"step": 270
},
{
"epoch": 0.11032308904649331,
"grad_norm": 0.7646285891532898,
"learning_rate": 0.00024545454545454545,
"loss": 0.3017,
"step": 280
},
{
"epoch": 0.11032308904649331,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.2582588195800781,
"eval_runtime": 633.7229,
"eval_samples_per_second": 8.01,
"eval_steps_per_second": 2.002,
"step": 280
},
{
"epoch": 0.11426319936958235,
"grad_norm": 3.958172082901001,
"learning_rate": 0.00023636363636363636,
"loss": 0.3033,
"step": 290
},
{
"epoch": 0.11426319936958235,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.2637666165828705,
"eval_runtime": 635.1066,
"eval_samples_per_second": 7.992,
"eval_steps_per_second": 1.998,
"step": 290
},
{
"epoch": 0.1182033096926714,
"grad_norm": 3.4462485313415527,
"learning_rate": 0.00022727272727272727,
"loss": 0.1859,
"step": 300
},
{
"epoch": 0.1182033096926714,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.22763606905937195,
"eval_runtime": 635.7822,
"eval_samples_per_second": 7.984,
"eval_steps_per_second": 1.996,
"step": 300
},
{
"epoch": 0.12214342001576044,
"grad_norm": 0.9540772438049316,
"learning_rate": 0.00021818181818181818,
"loss": 0.2832,
"step": 310
},
{
"epoch": 0.12214342001576044,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1975635588169098,
"eval_runtime": 642.0949,
"eval_samples_per_second": 7.905,
"eval_steps_per_second": 1.976,
"step": 310
},
{
"epoch": 0.12608353033884948,
"grad_norm": 0.45892244577407837,
"learning_rate": 0.00020909090909090907,
"loss": 0.2679,
"step": 320
},
{
"epoch": 0.12608353033884948,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1894582062959671,
"eval_runtime": 642.7065,
"eval_samples_per_second": 7.898,
"eval_steps_per_second": 1.974,
"step": 320
},
{
"epoch": 0.13002364066193853,
"grad_norm": 0.4674457013607025,
"learning_rate": 0.0002,
"loss": 0.1966,
"step": 330
},
{
"epoch": 0.13002364066193853,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.193992018699646,
"eval_runtime": 641.7924,
"eval_samples_per_second": 7.909,
"eval_steps_per_second": 1.977,
"step": 330
},
{
"epoch": 0.13396375098502758,
"grad_norm": 0.4076831638813019,
"learning_rate": 0.00019090909090909092,
"loss": 0.2063,
"step": 340
},
{
"epoch": 0.13396375098502758,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.19286341965198517,
"eval_runtime": 639.6626,
"eval_samples_per_second": 7.935,
"eval_steps_per_second": 1.984,
"step": 340
},
{
"epoch": 0.13790386130811663,
"grad_norm": 0.5408686995506287,
"learning_rate": 0.00018181818181818183,
"loss": 0.2215,
"step": 350
},
{
"epoch": 0.13790386130811663,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.17871853709220886,
"eval_runtime": 636.151,
"eval_samples_per_second": 7.979,
"eval_steps_per_second": 1.995,
"step": 350
},
{
"epoch": 0.14184397163120568,
"grad_norm": 3.9466795921325684,
"learning_rate": 0.00017272727272727272,
"loss": 0.2226,
"step": 360
},
{
"epoch": 0.14184397163120568,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.22342762351036072,
"eval_runtime": 635.3665,
"eval_samples_per_second": 7.989,
"eval_steps_per_second": 1.997,
"step": 360
},
{
"epoch": 0.1457840819542947,
"grad_norm": 0.3433726131916046,
"learning_rate": 0.00016363636363636363,
"loss": 0.2688,
"step": 370
},
{
"epoch": 0.1457840819542947,
"eval_accuracy": 0.8358944058418274,
"eval_loss": 0.3028296232223511,
"eval_runtime": 626.8327,
"eval_samples_per_second": 8.098,
"eval_steps_per_second": 2.024,
"step": 370
},
{
"epoch": 0.14972419227738376,
"grad_norm": 0.269267201423645,
"learning_rate": 0.00015454545454545454,
"loss": 0.2317,
"step": 380
},
{
"epoch": 0.14972419227738376,
"eval_accuracy": 0.8861308097839355,
"eval_loss": 0.1874387264251709,
"eval_runtime": 634.1878,
"eval_samples_per_second": 8.004,
"eval_steps_per_second": 2.001,
"step": 380
},
{
"epoch": 0.1536643026004728,
"grad_norm": 0.16006210446357727,
"learning_rate": 0.00014545454545454546,
"loss": 0.2088,
"step": 390
},
{
"epoch": 0.1536643026004728,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.2302139550447464,
"eval_runtime": 631.9364,
"eval_samples_per_second": 8.032,
"eval_steps_per_second": 2.008,
"step": 390
},
{
"epoch": 0.15760441292356187,
"grad_norm": 0.5244100093841553,
"learning_rate": 0.00013636363636363637,
"loss": 0.4595,
"step": 400
},
{
"epoch": 0.15760441292356187,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.19357828795909882,
"eval_runtime": 642.9065,
"eval_samples_per_second": 7.895,
"eval_steps_per_second": 1.974,
"step": 400
},
{
"epoch": 0.16154452324665092,
"grad_norm": 0.5354598164558411,
"learning_rate": 0.00012727272727272725,
"loss": 0.15,
"step": 410
},
{
"epoch": 0.16154452324665092,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.18797340989112854,
"eval_runtime": 640.1417,
"eval_samples_per_second": 7.929,
"eval_steps_per_second": 1.982,
"step": 410
},
{
"epoch": 0.16548463356973994,
"grad_norm": 0.2795056998729706,
"learning_rate": 0.00011818181818181818,
"loss": 0.1919,
"step": 420
},
{
"epoch": 0.16548463356973994,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1791592687368393,
"eval_runtime": 639.6272,
"eval_samples_per_second": 7.936,
"eval_steps_per_second": 1.984,
"step": 420
},
{
"epoch": 0.169424743892829,
"grad_norm": 0.2897014021873474,
"learning_rate": 0.00010909090909090909,
"loss": 0.3189,
"step": 430
},
{
"epoch": 0.169424743892829,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1778295636177063,
"eval_runtime": 637.1833,
"eval_samples_per_second": 7.966,
"eval_steps_per_second": 1.992,
"step": 430
},
{
"epoch": 0.17336485421591805,
"grad_norm": 0.08481621742248535,
"learning_rate": 0.0001,
"loss": 0.2422,
"step": 440
},
{
"epoch": 0.17336485421591805,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.18217456340789795,
"eval_runtime": 639.7951,
"eval_samples_per_second": 7.934,
"eval_steps_per_second": 1.983,
"step": 440
},
{
"epoch": 0.1773049645390071,
"grad_norm": 0.3332684636116028,
"learning_rate": 9.090909090909092e-05,
"loss": 0.1599,
"step": 450
},
{
"epoch": 0.1773049645390071,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.18780100345611572,
"eval_runtime": 641.5947,
"eval_samples_per_second": 7.912,
"eval_steps_per_second": 1.978,
"step": 450
},
{
"epoch": 0.18124507486209615,
"grad_norm": 0.5597277879714966,
"learning_rate": 8.181818181818182e-05,
"loss": 0.2962,
"step": 460
},
{
"epoch": 0.18124507486209615,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1818259209394455,
"eval_runtime": 638.6242,
"eval_samples_per_second": 7.948,
"eval_steps_per_second": 1.987,
"step": 460
},
{
"epoch": 0.18518518518518517,
"grad_norm": 0.9142216444015503,
"learning_rate": 7.272727272727273e-05,
"loss": 0.1295,
"step": 470
},
{
"epoch": 0.18518518518518517,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1787974089384079,
"eval_runtime": 642.4129,
"eval_samples_per_second": 7.901,
"eval_steps_per_second": 1.975,
"step": 470
},
{
"epoch": 0.18912529550827423,
"grad_norm": 0.5384683012962341,
"learning_rate": 6.363636363636363e-05,
"loss": 0.2327,
"step": 480
},
{
"epoch": 0.18912529550827423,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1720920354127884,
"eval_runtime": 637.6983,
"eval_samples_per_second": 7.96,
"eval_steps_per_second": 1.99,
"step": 480
},
{
"epoch": 0.19306540583136328,
"grad_norm": 0.3855592608451843,
"learning_rate": 5.4545454545454546e-05,
"loss": 0.2012,
"step": 490
},
{
"epoch": 0.19306540583136328,
"eval_accuracy": 0.8977541327476501,
"eval_loss": 0.17169128358364105,
"eval_runtime": 643.1115,
"eval_samples_per_second": 7.893,
"eval_steps_per_second": 1.973,
"step": 490
},
{
"epoch": 0.19700551615445233,
"grad_norm": 0.18903906643390656,
"learning_rate": 4.545454545454546e-05,
"loss": 0.2338,
"step": 500
},
{
"epoch": 0.19700551615445233,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.16954948008060455,
"eval_runtime": 641.432,
"eval_samples_per_second": 7.914,
"eval_steps_per_second": 1.978,
"step": 500
},
{
"epoch": 0.20094562647754138,
"grad_norm": 0.5222665667533875,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.261,
"step": 510
},
{
"epoch": 0.20094562647754138,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.16886387765407562,
"eval_runtime": 644.6602,
"eval_samples_per_second": 7.874,
"eval_steps_per_second": 1.968,
"step": 510
},
{
"epoch": 0.2048857368006304,
"grad_norm": 0.1900663524866104,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.2295,
"step": 520
},
{
"epoch": 0.2048857368006304,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.16761720180511475,
"eval_runtime": 644.587,
"eval_samples_per_second": 7.875,
"eval_steps_per_second": 1.969,
"step": 520
},
{
"epoch": 0.20882584712371946,
"grad_norm": 0.7705594897270203,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.2785,
"step": 530
},
{
"epoch": 0.20882584712371946,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.16720621287822723,
"eval_runtime": 641.3858,
"eval_samples_per_second": 7.914,
"eval_steps_per_second": 1.979,
"step": 530
},
{
"epoch": 0.2127659574468085,
"grad_norm": 3.4368479251861572,
"learning_rate": 9.090909090909091e-06,
"loss": 0.2326,
"step": 540
},
{
"epoch": 0.2127659574468085,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1670488715171814,
"eval_runtime": 635.73,
"eval_samples_per_second": 7.985,
"eval_steps_per_second": 1.996,
"step": 540
},
{
"epoch": 0.21670606776989756,
"grad_norm": 0.17799390852451324,
"learning_rate": 0.0,
"loss": 0.2048,
"step": 550
},
{
"epoch": 0.21670606776989756,
"eval_accuracy": 0.8983451724052429,
"eval_loss": 0.1670481413602829,
"eval_runtime": 633.0482,
"eval_samples_per_second": 8.018,
"eval_steps_per_second": 2.005,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 550,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.667515335043259e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}