{ "best_metric": 0.3373493975903614, "best_model_checkpoint": "videomae-base-finetuned-numbers-augmented2/checkpoint-2816", "epoch": 3.2496448863636362, "eval_steps": 500, "global_step": 2816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035511363636363635, "grad_norm": 11.042091369628906, "learning_rate": 1.7730496453900713e-07, "loss": 2.6254, "step": 10 }, { "epoch": 0.007102272727272727, "grad_norm": 15.068171501159668, "learning_rate": 3.5460992907801425e-07, "loss": 2.6399, "step": 20 }, { "epoch": 0.01065340909090909, "grad_norm": 12.725520133972168, "learning_rate": 5.319148936170213e-07, "loss": 2.5084, "step": 30 }, { "epoch": 0.014204545454545454, "grad_norm": 12.825174331665039, "learning_rate": 7.092198581560285e-07, "loss": 2.5423, "step": 40 }, { "epoch": 0.01775568181818182, "grad_norm": 19.975149154663086, "learning_rate": 8.865248226950356e-07, "loss": 2.6042, "step": 50 }, { "epoch": 0.02130681818181818, "grad_norm": 13.08024787902832, "learning_rate": 1.0638297872340427e-06, "loss": 2.4857, "step": 60 }, { "epoch": 0.024857954545454544, "grad_norm": 13.69168758392334, "learning_rate": 1.2411347517730497e-06, "loss": 2.5421, "step": 70 }, { "epoch": 0.028409090909090908, "grad_norm": 21.916378021240234, "learning_rate": 1.418439716312057e-06, "loss": 2.5581, "step": 80 }, { "epoch": 0.03196022727272727, "grad_norm": 44.70994567871094, "learning_rate": 1.595744680851064e-06, "loss": 2.5354, "step": 90 }, { "epoch": 0.03551136363636364, "grad_norm": 16.238237380981445, "learning_rate": 1.7730496453900712e-06, "loss": 2.4371, "step": 100 }, { "epoch": 0.0390625, "grad_norm": 11.415323257446289, "learning_rate": 1.9503546099290782e-06, "loss": 2.4871, "step": 110 }, { "epoch": 0.04261363636363636, "grad_norm": 8.98475456237793, "learning_rate": 2.1276595744680853e-06, "loss": 2.3905, "step": 120 }, { "epoch": 0.04616477272727273, "grad_norm": 11.144891738891602, "learning_rate": 2.3049645390070924e-06, "loss": 2.4819, "step": 130 }, { "epoch": 0.04971590909090909, "grad_norm": 15.341207504272461, "learning_rate": 2.4822695035460995e-06, "loss": 2.4592, "step": 140 }, { "epoch": 0.053267045454545456, "grad_norm": 12.760394096374512, "learning_rate": 2.6595744680851065e-06, "loss": 2.4855, "step": 150 }, { "epoch": 0.056818181818181816, "grad_norm": 13.143548965454102, "learning_rate": 2.836879432624114e-06, "loss": 2.4556, "step": 160 }, { "epoch": 0.060369318181818184, "grad_norm": 11.302803993225098, "learning_rate": 3.0141843971631207e-06, "loss": 2.4462, "step": 170 }, { "epoch": 0.06392045454545454, "grad_norm": 11.462811470031738, "learning_rate": 3.191489361702128e-06, "loss": 2.3793, "step": 180 }, { "epoch": 0.06747159090909091, "grad_norm": 14.533432960510254, "learning_rate": 3.368794326241135e-06, "loss": 2.3991, "step": 190 }, { "epoch": 0.07102272727272728, "grad_norm": 16.54037094116211, "learning_rate": 3.5460992907801423e-06, "loss": 2.4178, "step": 200 }, { "epoch": 0.07457386363636363, "grad_norm": 25.168344497680664, "learning_rate": 3.723404255319149e-06, "loss": 2.3658, "step": 210 }, { "epoch": 0.078125, "grad_norm": 14.53443431854248, "learning_rate": 3.9007092198581565e-06, "loss": 2.4974, "step": 220 }, { "epoch": 0.08167613636363637, "grad_norm": 13.964099884033203, "learning_rate": 4.078014184397163e-06, "loss": 2.3467, "step": 230 }, { "epoch": 0.08522727272727272, "grad_norm": 14.819562911987305, "learning_rate": 4.255319148936171e-06, "loss": 2.3952, "step": 240 }, { "epoch": 0.08877840909090909, "grad_norm": 9.89671802520752, "learning_rate": 4.432624113475177e-06, "loss": 2.4154, "step": 250 }, { "epoch": 0.09232954545454546, "grad_norm": 16.636743545532227, "learning_rate": 4.609929078014185e-06, "loss": 2.4225, "step": 260 }, { "epoch": 0.09588068181818182, "grad_norm": 12.833456993103027, "learning_rate": 4.787234042553192e-06, "loss": 2.4442, "step": 270 }, { "epoch": 0.09943181818181818, "grad_norm": 11.386968612670898, "learning_rate": 4.964539007092199e-06, "loss": 2.3345, "step": 280 }, { "epoch": 0.10298295454545454, "grad_norm": 15.00284481048584, "learning_rate": 4.984214680347277e-06, "loss": 2.3783, "step": 290 }, { "epoch": 0.10653409090909091, "grad_norm": 12.9210786819458, "learning_rate": 4.964483030781374e-06, "loss": 2.3678, "step": 300 }, { "epoch": 0.11008522727272728, "grad_norm": 46.81148910522461, "learning_rate": 4.94475138121547e-06, "loss": 2.3202, "step": 310 }, { "epoch": 0.11363636363636363, "grad_norm": 11.623534202575684, "learning_rate": 4.925019731649567e-06, "loss": 2.4116, "step": 320 }, { "epoch": 0.1171875, "grad_norm": 12.292633056640625, "learning_rate": 4.905288082083662e-06, "loss": 2.3704, "step": 330 }, { "epoch": 0.12073863636363637, "grad_norm": 13.08430004119873, "learning_rate": 4.885556432517759e-06, "loss": 2.351, "step": 340 }, { "epoch": 0.12428977272727272, "grad_norm": 63.07197570800781, "learning_rate": 4.865824782951855e-06, "loss": 2.3944, "step": 350 }, { "epoch": 0.1278409090909091, "grad_norm": 20.782880783081055, "learning_rate": 4.846093133385951e-06, "loss": 2.3507, "step": 360 }, { "epoch": 0.13139204545454544, "grad_norm": 11.26919174194336, "learning_rate": 4.8263614838200475e-06, "loss": 2.316, "step": 370 }, { "epoch": 0.13494318181818182, "grad_norm": 13.572308540344238, "learning_rate": 4.806629834254144e-06, "loss": 2.3005, "step": 380 }, { "epoch": 0.13849431818181818, "grad_norm": 17.582345962524414, "learning_rate": 4.78689818468824e-06, "loss": 2.2751, "step": 390 }, { "epoch": 0.14204545454545456, "grad_norm": 12.714279174804688, "learning_rate": 4.767166535122337e-06, "loss": 2.3287, "step": 400 }, { "epoch": 0.1455965909090909, "grad_norm": 11.136545181274414, "learning_rate": 4.747434885556433e-06, "loss": 2.3605, "step": 410 }, { "epoch": 0.14914772727272727, "grad_norm": 26.191139221191406, "learning_rate": 4.727703235990529e-06, "loss": 2.3669, "step": 420 }, { "epoch": 0.15269886363636365, "grad_norm": 15.782291412353516, "learning_rate": 4.707971586424626e-06, "loss": 2.3671, "step": 430 }, { "epoch": 0.15625, "grad_norm": 13.54359245300293, "learning_rate": 4.6882399368587215e-06, "loss": 2.3991, "step": 440 }, { "epoch": 0.15980113636363635, "grad_norm": 12.135380744934082, "learning_rate": 4.668508287292818e-06, "loss": 2.3314, "step": 450 }, { "epoch": 0.16335227272727273, "grad_norm": 12.856097221374512, "learning_rate": 4.6487766377269145e-06, "loss": 2.2192, "step": 460 }, { "epoch": 0.1669034090909091, "grad_norm": 15.03661823272705, "learning_rate": 4.62904498816101e-06, "loss": 2.3, "step": 470 }, { "epoch": 0.17045454545454544, "grad_norm": 11.673720359802246, "learning_rate": 4.609313338595107e-06, "loss": 2.2383, "step": 480 }, { "epoch": 0.17400568181818182, "grad_norm": 14.064128875732422, "learning_rate": 4.589581689029203e-06, "loss": 2.2481, "step": 490 }, { "epoch": 0.17755681818181818, "grad_norm": 15.709892272949219, "learning_rate": 4.5698500394633e-06, "loss": 2.3622, "step": 500 }, { "epoch": 0.18110795454545456, "grad_norm": 14.75145149230957, "learning_rate": 4.550118389897396e-06, "loss": 2.2797, "step": 510 }, { "epoch": 0.1846590909090909, "grad_norm": 12.432939529418945, "learning_rate": 4.530386740331492e-06, "loss": 2.2493, "step": 520 }, { "epoch": 0.18821022727272727, "grad_norm": 13.656457901000977, "learning_rate": 4.510655090765588e-06, "loss": 2.2605, "step": 530 }, { "epoch": 0.19176136363636365, "grad_norm": 16.014392852783203, "learning_rate": 4.490923441199685e-06, "loss": 2.2669, "step": 540 }, { "epoch": 0.1953125, "grad_norm": 17.114728927612305, "learning_rate": 4.4711917916337814e-06, "loss": 2.3496, "step": 550 }, { "epoch": 0.19886363636363635, "grad_norm": 13.276822090148926, "learning_rate": 4.451460142067877e-06, "loss": 2.1851, "step": 560 }, { "epoch": 0.20241477272727273, "grad_norm": 14.958836555480957, "learning_rate": 4.431728492501974e-06, "loss": 2.2624, "step": 570 }, { "epoch": 0.2059659090909091, "grad_norm": 17.03455924987793, "learning_rate": 4.41199684293607e-06, "loss": 2.1772, "step": 580 }, { "epoch": 0.20951704545454544, "grad_norm": 14.998839378356934, "learning_rate": 4.392265193370166e-06, "loss": 2.2558, "step": 590 }, { "epoch": 0.21306818181818182, "grad_norm": 16.826061248779297, "learning_rate": 4.372533543804262e-06, "loss": 2.2345, "step": 600 }, { "epoch": 0.21661931818181818, "grad_norm": 12.38198471069336, "learning_rate": 4.352801894238359e-06, "loss": 2.2144, "step": 610 }, { "epoch": 0.22017045454545456, "grad_norm": 12.347530364990234, "learning_rate": 4.3330702446724545e-06, "loss": 2.1875, "step": 620 }, { "epoch": 0.2237215909090909, "grad_norm": 17.95762825012207, "learning_rate": 4.313338595106552e-06, "loss": 2.1382, "step": 630 }, { "epoch": 0.22727272727272727, "grad_norm": 20.329545974731445, "learning_rate": 4.2936069455406475e-06, "loss": 2.1519, "step": 640 }, { "epoch": 0.23082386363636365, "grad_norm": 14.157989501953125, "learning_rate": 4.273875295974744e-06, "loss": 2.1994, "step": 650 }, { "epoch": 0.234375, "grad_norm": 17.65989112854004, "learning_rate": 4.2541436464088406e-06, "loss": 2.2021, "step": 660 }, { "epoch": 0.23792613636363635, "grad_norm": 19.491857528686523, "learning_rate": 4.234411996842936e-06, "loss": 2.25, "step": 670 }, { "epoch": 0.24147727272727273, "grad_norm": 14.291483879089355, "learning_rate": 4.214680347277033e-06, "loss": 2.1732, "step": 680 }, { "epoch": 0.2450284090909091, "grad_norm": 15.794211387634277, "learning_rate": 4.194948697711129e-06, "loss": 2.2288, "step": 690 }, { "epoch": 0.24857954545454544, "grad_norm": 13.537517547607422, "learning_rate": 4.175217048145225e-06, "loss": 2.2279, "step": 700 }, { "epoch": 0.25035511363636365, "eval_accuracy": 0.18244406196213425, "eval_f1": 0.1261780590834414, "eval_loss": 2.2644505500793457, "eval_precision": 0.25592730406989506, "eval_recall": 0.17920384277210025, "eval_runtime": 691.0928, "eval_samples_per_second": 0.841, "eval_steps_per_second": 0.211, "step": 705 }, { "epoch": 1.0017755681818181, "grad_norm": 16.53750228881836, "learning_rate": 4.1554853985793214e-06, "loss": 2.2194, "step": 710 }, { "epoch": 1.0053267045454546, "grad_norm": 14.238625526428223, "learning_rate": 4.135753749013418e-06, "loss": 2.1206, "step": 720 }, { "epoch": 1.0088778409090908, "grad_norm": 19.52684783935547, "learning_rate": 4.1160220994475145e-06, "loss": 2.0227, "step": 730 }, { "epoch": 1.0124289772727273, "grad_norm": 21.326141357421875, "learning_rate": 4.09629044988161e-06, "loss": 2.2306, "step": 740 }, { "epoch": 1.0159801136363635, "grad_norm": 18.599313735961914, "learning_rate": 4.076558800315707e-06, "loss": 2.0473, "step": 750 }, { "epoch": 1.01953125, "grad_norm": 21.073596954345703, "learning_rate": 4.056827150749803e-06, "loss": 2.0669, "step": 760 }, { "epoch": 1.0230823863636365, "grad_norm": 13.940530776977539, "learning_rate": 4.037095501183899e-06, "loss": 2.0881, "step": 770 }, { "epoch": 1.0266335227272727, "grad_norm": 23.56789207458496, "learning_rate": 4.017363851617996e-06, "loss": 2.0495, "step": 780 }, { "epoch": 1.0301846590909092, "grad_norm": 24.358179092407227, "learning_rate": 3.997632202052092e-06, "loss": 2.1131, "step": 790 }, { "epoch": 1.0337357954545454, "grad_norm": 23.59551239013672, "learning_rate": 3.977900552486188e-06, "loss": 2.2194, "step": 800 }, { "epoch": 1.0372869318181819, "grad_norm": 18.300479888916016, "learning_rate": 3.958168902920285e-06, "loss": 2.0495, "step": 810 }, { "epoch": 1.0408380681818181, "grad_norm": 14.824146270751953, "learning_rate": 3.9384372533543806e-06, "loss": 2.0368, "step": 820 }, { "epoch": 1.0443892045454546, "grad_norm": 30.449533462524414, "learning_rate": 3.918705603788477e-06, "loss": 1.9466, "step": 830 }, { "epoch": 1.0479403409090908, "grad_norm": 30.030054092407227, "learning_rate": 3.898973954222574e-06, "loss": 2.037, "step": 840 }, { "epoch": 1.0514914772727273, "grad_norm": 16.496719360351562, "learning_rate": 3.879242304656669e-06, "loss": 2.0618, "step": 850 }, { "epoch": 1.0550426136363635, "grad_norm": 30.52495574951172, "learning_rate": 3.859510655090766e-06, "loss": 1.9784, "step": 860 }, { "epoch": 1.05859375, "grad_norm": 16.27585792541504, "learning_rate": 3.839779005524862e-06, "loss": 2.0537, "step": 870 }, { "epoch": 1.0621448863636365, "grad_norm": 21.442113876342773, "learning_rate": 3.820047355958959e-06, "loss": 2.0197, "step": 880 }, { "epoch": 1.0656960227272727, "grad_norm": 23.343584060668945, "learning_rate": 3.8003157063930545e-06, "loss": 1.9195, "step": 890 }, { "epoch": 1.0692471590909092, "grad_norm": 12.5782470703125, "learning_rate": 3.7805840568271514e-06, "loss": 1.9142, "step": 900 }, { "epoch": 1.0727982954545454, "grad_norm": 14.708935737609863, "learning_rate": 3.7608524072612475e-06, "loss": 1.958, "step": 910 }, { "epoch": 1.0763494318181819, "grad_norm": 14.57020378112793, "learning_rate": 3.7411207576953436e-06, "loss": 1.9806, "step": 920 }, { "epoch": 1.0799005681818181, "grad_norm": 12.808838844299316, "learning_rate": 3.72138910812944e-06, "loss": 2.0507, "step": 930 }, { "epoch": 1.0834517045454546, "grad_norm": 20.186933517456055, "learning_rate": 3.7016574585635362e-06, "loss": 2.1607, "step": 940 }, { "epoch": 1.0870028409090908, "grad_norm": 17.26938819885254, "learning_rate": 3.6819258089976323e-06, "loss": 1.9388, "step": 950 }, { "epoch": 1.0905539772727273, "grad_norm": 18.56201934814453, "learning_rate": 3.662194159431729e-06, "loss": 1.984, "step": 960 }, { "epoch": 1.0941051136363635, "grad_norm": 13.663825035095215, "learning_rate": 3.642462509865825e-06, "loss": 1.9401, "step": 970 }, { "epoch": 1.09765625, "grad_norm": 18.881681442260742, "learning_rate": 3.622730860299921e-06, "loss": 1.914, "step": 980 }, { "epoch": 1.1012073863636365, "grad_norm": 25.957923889160156, "learning_rate": 3.602999210734018e-06, "loss": 1.9745, "step": 990 }, { "epoch": 1.1047585227272727, "grad_norm": 33.24240493774414, "learning_rate": 3.583267561168114e-06, "loss": 1.9489, "step": 1000 }, { "epoch": 1.1083096590909092, "grad_norm": 16.282686233520508, "learning_rate": 3.56353591160221e-06, "loss": 1.9327, "step": 1010 }, { "epoch": 1.1118607954545454, "grad_norm": 27.778982162475586, "learning_rate": 3.5438042620363066e-06, "loss": 1.801, "step": 1020 }, { "epoch": 1.1154119318181819, "grad_norm": 20.463930130004883, "learning_rate": 3.5240726124704027e-06, "loss": 1.9934, "step": 1030 }, { "epoch": 1.1189630681818181, "grad_norm": 26.343746185302734, "learning_rate": 3.504340962904499e-06, "loss": 1.915, "step": 1040 }, { "epoch": 1.1225142045454546, "grad_norm": 21.86429214477539, "learning_rate": 3.4846093133385953e-06, "loss": 1.9066, "step": 1050 }, { "epoch": 1.1260653409090908, "grad_norm": 21.5794620513916, "learning_rate": 3.4648776637726914e-06, "loss": 1.66, "step": 1060 }, { "epoch": 1.1296164772727273, "grad_norm": 14.89384937286377, "learning_rate": 3.4451460142067875e-06, "loss": 1.7305, "step": 1070 }, { "epoch": 1.1331676136363638, "grad_norm": 19.86824607849121, "learning_rate": 3.4254143646408845e-06, "loss": 2.0088, "step": 1080 }, { "epoch": 1.13671875, "grad_norm": 19.656740188598633, "learning_rate": 3.4056827150749806e-06, "loss": 2.007, "step": 1090 }, { "epoch": 1.1402698863636362, "grad_norm": 19.889986038208008, "learning_rate": 3.3859510655090767e-06, "loss": 1.9091, "step": 1100 }, { "epoch": 1.1438210227272727, "grad_norm": 34.58203887939453, "learning_rate": 3.366219415943173e-06, "loss": 1.982, "step": 1110 }, { "epoch": 1.1473721590909092, "grad_norm": 14.81122875213623, "learning_rate": 3.3464877663772693e-06, "loss": 1.916, "step": 1120 }, { "epoch": 1.1509232954545454, "grad_norm": 25.33427619934082, "learning_rate": 3.3267561168113654e-06, "loss": 1.9497, "step": 1130 }, { "epoch": 1.1544744318181819, "grad_norm": 18.72292709350586, "learning_rate": 3.3070244672454623e-06, "loss": 1.7237, "step": 1140 }, { "epoch": 1.1580255681818181, "grad_norm": 20.77664566040039, "learning_rate": 3.2872928176795584e-06, "loss": 1.8997, "step": 1150 }, { "epoch": 1.1615767045454546, "grad_norm": 23.85498809814453, "learning_rate": 3.2675611681136545e-06, "loss": 1.8602, "step": 1160 }, { "epoch": 1.1651278409090908, "grad_norm": 22.836532592773438, "learning_rate": 3.247829518547751e-06, "loss": 1.8654, "step": 1170 }, { "epoch": 1.1686789772727273, "grad_norm": 35.54890441894531, "learning_rate": 3.228097868981847e-06, "loss": 1.6553, "step": 1180 }, { "epoch": 1.1722301136363638, "grad_norm": 19.193073272705078, "learning_rate": 3.208366219415943e-06, "loss": 1.8181, "step": 1190 }, { "epoch": 1.17578125, "grad_norm": 18.36795425415039, "learning_rate": 3.1886345698500397e-06, "loss": 1.796, "step": 1200 }, { "epoch": 1.1793323863636362, "grad_norm": 28.257104873657227, "learning_rate": 3.1689029202841358e-06, "loss": 1.9526, "step": 1210 }, { "epoch": 1.1828835227272727, "grad_norm": 21.073034286499023, "learning_rate": 3.149171270718232e-06, "loss": 1.8972, "step": 1220 }, { "epoch": 1.1864346590909092, "grad_norm": 30.784828186035156, "learning_rate": 3.129439621152329e-06, "loss": 1.8178, "step": 1230 }, { "epoch": 1.1899857954545454, "grad_norm": 33.99855041503906, "learning_rate": 3.109707971586425e-06, "loss": 1.7748, "step": 1240 }, { "epoch": 1.1935369318181819, "grad_norm": 19.716716766357422, "learning_rate": 3.089976322020521e-06, "loss": 1.7614, "step": 1250 }, { "epoch": 1.1970880681818181, "grad_norm": 17.041454315185547, "learning_rate": 3.0702446724546175e-06, "loss": 1.7971, "step": 1260 }, { "epoch": 1.2006392045454546, "grad_norm": 32.49284744262695, "learning_rate": 3.0505130228887136e-06, "loss": 1.8446, "step": 1270 }, { "epoch": 1.2041903409090908, "grad_norm": 18.60474395751953, "learning_rate": 3.03078137332281e-06, "loss": 1.6414, "step": 1280 }, { "epoch": 1.2077414772727273, "grad_norm": 16.46165657043457, "learning_rate": 3.0110497237569062e-06, "loss": 1.9618, "step": 1290 }, { "epoch": 1.2112926136363638, "grad_norm": 23.773658752441406, "learning_rate": 2.9913180741910023e-06, "loss": 1.69, "step": 1300 }, { "epoch": 1.21484375, "grad_norm": 26.765705108642578, "learning_rate": 2.9715864246250992e-06, "loss": 1.8167, "step": 1310 }, { "epoch": 1.2183948863636362, "grad_norm": 28.61939811706543, "learning_rate": 2.9518547750591953e-06, "loss": 1.7087, "step": 1320 }, { "epoch": 1.2219460227272727, "grad_norm": 12.1895112991333, "learning_rate": 2.9321231254932914e-06, "loss": 1.6608, "step": 1330 }, { "epoch": 1.2254971590909092, "grad_norm": 36.88252258300781, "learning_rate": 2.912391475927388e-06, "loss": 1.8907, "step": 1340 }, { "epoch": 1.2290482954545454, "grad_norm": 12.692719459533691, "learning_rate": 2.892659826361484e-06, "loss": 1.7991, "step": 1350 }, { "epoch": 1.2325994318181819, "grad_norm": 17.56617546081543, "learning_rate": 2.87292817679558e-06, "loss": 1.7126, "step": 1360 }, { "epoch": 1.2361505681818181, "grad_norm": 26.137487411499023, "learning_rate": 2.853196527229677e-06, "loss": 1.6474, "step": 1370 }, { "epoch": 1.2397017045454546, "grad_norm": 18.798303604125977, "learning_rate": 2.833464877663773e-06, "loss": 1.5864, "step": 1380 }, { "epoch": 1.2432528409090908, "grad_norm": 29.38694953918457, "learning_rate": 2.8137332280978693e-06, "loss": 1.8485, "step": 1390 }, { "epoch": 1.2468039772727273, "grad_norm": 25.83952522277832, "learning_rate": 2.7940015785319658e-06, "loss": 1.7024, "step": 1400 }, { "epoch": 1.25, "eval_accuracy": 0.31669535283993117, "eval_f1": 0.2827610289664265, "eval_loss": 2.046189308166504, "eval_precision": 0.3353755833663839, "eval_recall": 0.31518980464596447, "eval_runtime": 747.7036, "eval_samples_per_second": 0.777, "eval_steps_per_second": 0.195, "step": 1409 }, { "epoch": 2.0003551136363638, "grad_norm": 20.87928009033203, "learning_rate": 2.774269928966062e-06, "loss": 1.6345, "step": 1410 }, { "epoch": 2.00390625, "grad_norm": 28.964290618896484, "learning_rate": 2.754538279400158e-06, "loss": 1.7143, "step": 1420 }, { "epoch": 2.0074573863636362, "grad_norm": 17.409208297729492, "learning_rate": 2.7348066298342545e-06, "loss": 1.6937, "step": 1430 }, { "epoch": 2.011008522727273, "grad_norm": 24.774322509765625, "learning_rate": 2.7150749802683506e-06, "loss": 1.6225, "step": 1440 }, { "epoch": 2.014559659090909, "grad_norm": 33.20758819580078, "learning_rate": 2.6953433307024466e-06, "loss": 1.6123, "step": 1450 }, { "epoch": 2.0181107954545454, "grad_norm": 24.023727416992188, "learning_rate": 2.6756116811365436e-06, "loss": 1.6963, "step": 1460 }, { "epoch": 2.0216619318181817, "grad_norm": 26.691741943359375, "learning_rate": 2.6558800315706397e-06, "loss": 1.7662, "step": 1470 }, { "epoch": 2.0252130681818183, "grad_norm": 31.09050178527832, "learning_rate": 2.6361483820047358e-06, "loss": 1.6466, "step": 1480 }, { "epoch": 2.0287642045454546, "grad_norm": 34.407222747802734, "learning_rate": 2.6164167324388323e-06, "loss": 1.577, "step": 1490 }, { "epoch": 2.032315340909091, "grad_norm": 21.151485443115234, "learning_rate": 2.5966850828729284e-06, "loss": 1.4755, "step": 1500 }, { "epoch": 2.035866477272727, "grad_norm": 33.150962829589844, "learning_rate": 2.5769534333070245e-06, "loss": 1.7999, "step": 1510 }, { "epoch": 2.0394176136363638, "grad_norm": 37.68964385986328, "learning_rate": 2.557221783741121e-06, "loss": 1.7355, "step": 1520 }, { "epoch": 2.04296875, "grad_norm": 21.464839935302734, "learning_rate": 2.537490134175217e-06, "loss": 1.5964, "step": 1530 }, { "epoch": 2.0465198863636362, "grad_norm": 33.7103157043457, "learning_rate": 2.517758484609313e-06, "loss": 1.6579, "step": 1540 }, { "epoch": 2.050071022727273, "grad_norm": 34.679019927978516, "learning_rate": 2.4980268350434097e-06, "loss": 1.5309, "step": 1550 }, { "epoch": 2.053622159090909, "grad_norm": 25.11505889892578, "learning_rate": 2.478295185477506e-06, "loss": 1.605, "step": 1560 }, { "epoch": 2.0571732954545454, "grad_norm": 14.84094524383545, "learning_rate": 2.4585635359116027e-06, "loss": 1.5956, "step": 1570 }, { "epoch": 2.0607244318181817, "grad_norm": 22.252281188964844, "learning_rate": 2.438831886345699e-06, "loss": 1.5511, "step": 1580 }, { "epoch": 2.0642755681818183, "grad_norm": 50.657798767089844, "learning_rate": 2.419100236779795e-06, "loss": 1.5066, "step": 1590 }, { "epoch": 2.0678267045454546, "grad_norm": 32.89131546020508, "learning_rate": 2.3993685872138914e-06, "loss": 1.6128, "step": 1600 }, { "epoch": 2.071377840909091, "grad_norm": 32.423648834228516, "learning_rate": 2.3796369376479875e-06, "loss": 1.6446, "step": 1610 }, { "epoch": 2.074928977272727, "grad_norm": 30.093395233154297, "learning_rate": 2.359905288082084e-06, "loss": 1.5277, "step": 1620 }, { "epoch": 2.0784801136363638, "grad_norm": 40.93949508666992, "learning_rate": 2.34017363851618e-06, "loss": 1.5847, "step": 1630 }, { "epoch": 2.08203125, "grad_norm": 21.456939697265625, "learning_rate": 2.320441988950276e-06, "loss": 1.5051, "step": 1640 }, { "epoch": 2.0855823863636362, "grad_norm": 13.460980415344238, "learning_rate": 2.3007103393843727e-06, "loss": 1.6683, "step": 1650 }, { "epoch": 2.089133522727273, "grad_norm": 25.33333969116211, "learning_rate": 2.2809786898184692e-06, "loss": 1.5145, "step": 1660 }, { "epoch": 2.092684659090909, "grad_norm": 35.350223541259766, "learning_rate": 2.2612470402525653e-06, "loss": 1.5161, "step": 1670 }, { "epoch": 2.0962357954545454, "grad_norm": 11.545449256896973, "learning_rate": 2.2415153906866614e-06, "loss": 1.5556, "step": 1680 }, { "epoch": 2.0997869318181817, "grad_norm": 24.93936538696289, "learning_rate": 2.221783741120758e-06, "loss": 1.6311, "step": 1690 }, { "epoch": 2.1033380681818183, "grad_norm": 17.35971450805664, "learning_rate": 2.202052091554854e-06, "loss": 1.6324, "step": 1700 }, { "epoch": 2.1068892045454546, "grad_norm": 30.070161819458008, "learning_rate": 2.1823204419889505e-06, "loss": 1.6366, "step": 1710 }, { "epoch": 2.110440340909091, "grad_norm": 30.212112426757812, "learning_rate": 2.1625887924230466e-06, "loss": 1.5943, "step": 1720 }, { "epoch": 2.113991477272727, "grad_norm": 19.462881088256836, "learning_rate": 2.1428571428571427e-06, "loss": 1.5983, "step": 1730 }, { "epoch": 2.1175426136363638, "grad_norm": 17.48008155822754, "learning_rate": 2.1231254932912392e-06, "loss": 1.5725, "step": 1740 }, { "epoch": 2.12109375, "grad_norm": 18.313573837280273, "learning_rate": 2.1033938437253358e-06, "loss": 1.6039, "step": 1750 }, { "epoch": 2.1246448863636362, "grad_norm": 12.866426467895508, "learning_rate": 2.083662194159432e-06, "loss": 1.4405, "step": 1760 }, { "epoch": 2.128196022727273, "grad_norm": 25.331958770751953, "learning_rate": 2.063930544593528e-06, "loss": 1.728, "step": 1770 }, { "epoch": 2.131747159090909, "grad_norm": 31.10738182067871, "learning_rate": 2.0441988950276245e-06, "loss": 1.6007, "step": 1780 }, { "epoch": 2.1352982954545454, "grad_norm": 27.759248733520508, "learning_rate": 2.0244672454617206e-06, "loss": 1.5672, "step": 1790 }, { "epoch": 2.1388494318181817, "grad_norm": 16.519384384155273, "learning_rate": 2.004735595895817e-06, "loss": 1.3412, "step": 1800 }, { "epoch": 2.1424005681818183, "grad_norm": 19.062610626220703, "learning_rate": 1.9850039463299136e-06, "loss": 1.6176, "step": 1810 }, { "epoch": 2.1459517045454546, "grad_norm": 48.56722640991211, "learning_rate": 1.9652722967640097e-06, "loss": 1.6479, "step": 1820 }, { "epoch": 2.149502840909091, "grad_norm": 22.719276428222656, "learning_rate": 1.9455406471981058e-06, "loss": 1.4365, "step": 1830 }, { "epoch": 2.153053977272727, "grad_norm": 12.384623527526855, "learning_rate": 1.9258089976322023e-06, "loss": 1.4842, "step": 1840 }, { "epoch": 2.1566051136363638, "grad_norm": 29.254623413085938, "learning_rate": 1.9060773480662986e-06, "loss": 1.4222, "step": 1850 }, { "epoch": 2.16015625, "grad_norm": 32.79444885253906, "learning_rate": 1.8863456985003947e-06, "loss": 1.6447, "step": 1860 }, { "epoch": 2.1637073863636362, "grad_norm": 21.711156845092773, "learning_rate": 1.8666140489344912e-06, "loss": 1.4027, "step": 1870 }, { "epoch": 2.167258522727273, "grad_norm": 29.14606285095215, "learning_rate": 1.8468823993685875e-06, "loss": 1.5075, "step": 1880 }, { "epoch": 2.170809659090909, "grad_norm": 35.72248077392578, "learning_rate": 1.8271507498026836e-06, "loss": 1.6284, "step": 1890 }, { "epoch": 2.1743607954545454, "grad_norm": 28.522951126098633, "learning_rate": 1.8074191002367799e-06, "loss": 1.5546, "step": 1900 }, { "epoch": 2.1779119318181817, "grad_norm": 14.657508850097656, "learning_rate": 1.7876874506708764e-06, "loss": 1.5282, "step": 1910 }, { "epoch": 2.1814630681818183, "grad_norm": 23.2537899017334, "learning_rate": 1.7679558011049725e-06, "loss": 1.4651, "step": 1920 }, { "epoch": 2.1850142045454546, "grad_norm": 21.244699478149414, "learning_rate": 1.7482241515390688e-06, "loss": 1.4957, "step": 1930 }, { "epoch": 2.188565340909091, "grad_norm": 42.69889831542969, "learning_rate": 1.7284925019731651e-06, "loss": 1.6205, "step": 1940 }, { "epoch": 2.192116477272727, "grad_norm": 32.654991149902344, "learning_rate": 1.7087608524072614e-06, "loss": 1.5725, "step": 1950 }, { "epoch": 2.1956676136363638, "grad_norm": 60.657169342041016, "learning_rate": 1.6890292028413577e-06, "loss": 1.5037, "step": 1960 }, { "epoch": 2.19921875, "grad_norm": 39.7910041809082, "learning_rate": 1.669297553275454e-06, "loss": 1.4801, "step": 1970 }, { "epoch": 2.2027698863636362, "grad_norm": 28.17043113708496, "learning_rate": 1.6495659037095501e-06, "loss": 1.4794, "step": 1980 }, { "epoch": 2.206321022727273, "grad_norm": 22.981891632080078, "learning_rate": 1.6298342541436466e-06, "loss": 1.5583, "step": 1990 }, { "epoch": 2.209872159090909, "grad_norm": 24.155668258666992, "learning_rate": 1.610102604577743e-06, "loss": 1.4282, "step": 2000 }, { "epoch": 2.2134232954545454, "grad_norm": 54.06626892089844, "learning_rate": 1.590370955011839e-06, "loss": 1.5627, "step": 2010 }, { "epoch": 2.2169744318181817, "grad_norm": 22.495920181274414, "learning_rate": 1.5706393054459353e-06, "loss": 1.4818, "step": 2020 }, { "epoch": 2.2205255681818183, "grad_norm": 36.265045166015625, "learning_rate": 1.5509076558800318e-06, "loss": 1.5929, "step": 2030 }, { "epoch": 2.2240767045454546, "grad_norm": 15.528562545776367, "learning_rate": 1.531176006314128e-06, "loss": 1.4649, "step": 2040 }, { "epoch": 2.227627840909091, "grad_norm": 26.963422775268555, "learning_rate": 1.5114443567482242e-06, "loss": 1.4863, "step": 2050 }, { "epoch": 2.231178977272727, "grad_norm": 16.62386131286621, "learning_rate": 1.4917127071823205e-06, "loss": 1.2891, "step": 2060 }, { "epoch": 2.2347301136363638, "grad_norm": 22.612010955810547, "learning_rate": 1.4719810576164168e-06, "loss": 1.4939, "step": 2070 }, { "epoch": 2.23828125, "grad_norm": 59.00920104980469, "learning_rate": 1.4522494080505131e-06, "loss": 1.471, "step": 2080 }, { "epoch": 2.2418323863636362, "grad_norm": 22.010278701782227, "learning_rate": 1.4325177584846095e-06, "loss": 1.471, "step": 2090 }, { "epoch": 2.245383522727273, "grad_norm": 25.769332885742188, "learning_rate": 1.4127861089187055e-06, "loss": 1.3952, "step": 2100 }, { "epoch": 2.248934659090909, "grad_norm": 31.15215301513672, "learning_rate": 1.393054459352802e-06, "loss": 1.3164, "step": 2110 }, { "epoch": 2.25, "eval_accuracy": 0.3080895008605852, "eval_f1": 0.25675363952610347, "eval_loss": 1.9758973121643066, "eval_precision": 0.30216996841958826, "eval_recall": 0.3084819823166105, "eval_runtime": 716.7915, "eval_samples_per_second": 0.811, "eval_steps_per_second": 0.204, "step": 2113 }, { "epoch": 3.0024857954545454, "grad_norm": 25.143760681152344, "learning_rate": 1.3733228097868984e-06, "loss": 1.4265, "step": 2120 }, { "epoch": 3.0060369318181817, "grad_norm": 33.473609924316406, "learning_rate": 1.3535911602209945e-06, "loss": 1.3055, "step": 2130 }, { "epoch": 3.0095880681818183, "grad_norm": 15.528383255004883, "learning_rate": 1.3338595106550908e-06, "loss": 1.332, "step": 2140 }, { "epoch": 3.0131392045454546, "grad_norm": 17.708980560302734, "learning_rate": 1.3141278610891873e-06, "loss": 1.3542, "step": 2150 }, { "epoch": 3.016690340909091, "grad_norm": 90.82689666748047, "learning_rate": 1.2943962115232834e-06, "loss": 1.551, "step": 2160 }, { "epoch": 3.020241477272727, "grad_norm": 45.7540397644043, "learning_rate": 1.2746645619573797e-06, "loss": 1.4612, "step": 2170 }, { "epoch": 3.0237926136363638, "grad_norm": 16.101266860961914, "learning_rate": 1.254932912391476e-06, "loss": 1.3709, "step": 2180 }, { "epoch": 3.02734375, "grad_norm": 13.82054615020752, "learning_rate": 1.2352012628255723e-06, "loss": 1.3762, "step": 2190 }, { "epoch": 3.0308948863636362, "grad_norm": 33.30424880981445, "learning_rate": 1.2154696132596686e-06, "loss": 1.4682, "step": 2200 }, { "epoch": 3.034446022727273, "grad_norm": 30.845436096191406, "learning_rate": 1.1957379636937649e-06, "loss": 1.4023, "step": 2210 }, { "epoch": 3.037997159090909, "grad_norm": 16.949199676513672, "learning_rate": 1.1760063141278612e-06, "loss": 1.3019, "step": 2220 }, { "epoch": 3.0415482954545454, "grad_norm": 19.528390884399414, "learning_rate": 1.1562746645619575e-06, "loss": 1.4701, "step": 2230 }, { "epoch": 3.0450994318181817, "grad_norm": 53.9114990234375, "learning_rate": 1.1365430149960538e-06, "loss": 1.3315, "step": 2240 }, { "epoch": 3.0486505681818183, "grad_norm": 32.42067337036133, "learning_rate": 1.11681136543015e-06, "loss": 1.5252, "step": 2250 }, { "epoch": 3.0522017045454546, "grad_norm": 16.656024932861328, "learning_rate": 1.0970797158642462e-06, "loss": 1.5387, "step": 2260 }, { "epoch": 3.055752840909091, "grad_norm": 22.413053512573242, "learning_rate": 1.0773480662983427e-06, "loss": 1.3136, "step": 2270 }, { "epoch": 3.059303977272727, "grad_norm": 11.095012664794922, "learning_rate": 1.0576164167324388e-06, "loss": 1.4976, "step": 2280 }, { "epoch": 3.0628551136363638, "grad_norm": 18.41863250732422, "learning_rate": 1.0378847671665353e-06, "loss": 1.383, "step": 2290 }, { "epoch": 3.06640625, "grad_norm": 26.215972900390625, "learning_rate": 1.0181531176006316e-06, "loss": 1.5059, "step": 2300 }, { "epoch": 3.0699573863636362, "grad_norm": 28.589454650878906, "learning_rate": 9.984214680347277e-07, "loss": 1.4089, "step": 2310 }, { "epoch": 3.073508522727273, "grad_norm": 50.552528381347656, "learning_rate": 9.786898184688242e-07, "loss": 1.4042, "step": 2320 }, { "epoch": 3.077059659090909, "grad_norm": 13.100271224975586, "learning_rate": 9.589581689029203e-07, "loss": 1.2665, "step": 2330 }, { "epoch": 3.0806107954545454, "grad_norm": 24.274110794067383, "learning_rate": 9.392265193370166e-07, "loss": 1.3727, "step": 2340 }, { "epoch": 3.0841619318181817, "grad_norm": 37.16689682006836, "learning_rate": 9.19494869771113e-07, "loss": 1.4553, "step": 2350 }, { "epoch": 3.0877130681818183, "grad_norm": 26.41681480407715, "learning_rate": 8.997632202052092e-07, "loss": 1.4915, "step": 2360 }, { "epoch": 3.0912642045454546, "grad_norm": 39.80120849609375, "learning_rate": 8.800315706393054e-07, "loss": 1.5277, "step": 2370 }, { "epoch": 3.094815340909091, "grad_norm": 20.090126037597656, "learning_rate": 8.602999210734018e-07, "loss": 1.3832, "step": 2380 }, { "epoch": 3.098366477272727, "grad_norm": 35.57830047607422, "learning_rate": 8.405682715074981e-07, "loss": 1.358, "step": 2390 }, { "epoch": 3.1019176136363638, "grad_norm": 15.675562858581543, "learning_rate": 8.208366219415943e-07, "loss": 1.3206, "step": 2400 }, { "epoch": 3.10546875, "grad_norm": 40.23310470581055, "learning_rate": 8.011049723756907e-07, "loss": 1.3537, "step": 2410 }, { "epoch": 3.1090198863636362, "grad_norm": 26.265541076660156, "learning_rate": 7.813733228097869e-07, "loss": 1.3955, "step": 2420 }, { "epoch": 3.112571022727273, "grad_norm": 25.306949615478516, "learning_rate": 7.616416732438831e-07, "loss": 1.3381, "step": 2430 }, { "epoch": 3.116122159090909, "grad_norm": 18.622577667236328, "learning_rate": 7.419100236779796e-07, "loss": 1.3047, "step": 2440 }, { "epoch": 3.1196732954545454, "grad_norm": 14.70700454711914, "learning_rate": 7.221783741120759e-07, "loss": 1.3387, "step": 2450 }, { "epoch": 3.1232244318181817, "grad_norm": 30.96035385131836, "learning_rate": 7.024467245461721e-07, "loss": 1.3884, "step": 2460 }, { "epoch": 3.1267755681818183, "grad_norm": 12.719250679016113, "learning_rate": 6.827150749802685e-07, "loss": 1.4549, "step": 2470 }, { "epoch": 3.1303267045454546, "grad_norm": 16.468534469604492, "learning_rate": 6.629834254143647e-07, "loss": 1.1182, "step": 2480 }, { "epoch": 3.133877840909091, "grad_norm": 21.350452423095703, "learning_rate": 6.432517758484611e-07, "loss": 1.318, "step": 2490 }, { "epoch": 3.137428977272727, "grad_norm": 33.729400634765625, "learning_rate": 6.235201262825573e-07, "loss": 1.3263, "step": 2500 }, { "epoch": 3.1409801136363638, "grad_norm": 42.90283966064453, "learning_rate": 6.037884767166536e-07, "loss": 1.3509, "step": 2510 }, { "epoch": 3.14453125, "grad_norm": 45.704078674316406, "learning_rate": 5.840568271507499e-07, "loss": 1.5069, "step": 2520 }, { "epoch": 3.1480823863636362, "grad_norm": 41.951148986816406, "learning_rate": 5.643251775848462e-07, "loss": 1.3066, "step": 2530 }, { "epoch": 3.151633522727273, "grad_norm": 12.798999786376953, "learning_rate": 5.445935280189425e-07, "loss": 1.3401, "step": 2540 }, { "epoch": 3.155184659090909, "grad_norm": 21.00889778137207, "learning_rate": 5.248618784530387e-07, "loss": 1.2965, "step": 2550 }, { "epoch": 3.1587357954545454, "grad_norm": 17.473670959472656, "learning_rate": 5.05130228887135e-07, "loss": 1.2885, "step": 2560 }, { "epoch": 3.1622869318181817, "grad_norm": 28.33868980407715, "learning_rate": 4.853985793212313e-07, "loss": 1.3802, "step": 2570 }, { "epoch": 3.1658380681818183, "grad_norm": 37.80378723144531, "learning_rate": 4.6566692975532754e-07, "loss": 1.3751, "step": 2580 }, { "epoch": 3.1693892045454546, "grad_norm": 37.315486907958984, "learning_rate": 4.4593528018942384e-07, "loss": 1.4067, "step": 2590 }, { "epoch": 3.172940340909091, "grad_norm": 26.541744232177734, "learning_rate": 4.2620363062352015e-07, "loss": 1.3521, "step": 2600 }, { "epoch": 3.176491477272727, "grad_norm": 13.96971321105957, "learning_rate": 4.064719810576165e-07, "loss": 1.4583, "step": 2610 }, { "epoch": 3.1800426136363638, "grad_norm": 19.711917877197266, "learning_rate": 3.867403314917127e-07, "loss": 1.3062, "step": 2620 }, { "epoch": 3.18359375, "grad_norm": 28.168289184570312, "learning_rate": 3.6700868192580906e-07, "loss": 1.4479, "step": 2630 }, { "epoch": 3.1871448863636362, "grad_norm": 16.958044052124023, "learning_rate": 3.4727703235990536e-07, "loss": 1.2349, "step": 2640 }, { "epoch": 3.190696022727273, "grad_norm": 14.27488899230957, "learning_rate": 3.2754538279400156e-07, "loss": 1.3124, "step": 2650 }, { "epoch": 3.194247159090909, "grad_norm": 39.53180694580078, "learning_rate": 3.078137332280979e-07, "loss": 1.3704, "step": 2660 }, { "epoch": 3.1977982954545454, "grad_norm": 30.55574607849121, "learning_rate": 2.8808208366219417e-07, "loss": 1.4764, "step": 2670 }, { "epoch": 3.2013494318181817, "grad_norm": 32.31404113769531, "learning_rate": 2.6835043409629047e-07, "loss": 1.3712, "step": 2680 }, { "epoch": 3.2049005681818183, "grad_norm": 17.91683578491211, "learning_rate": 2.486187845303868e-07, "loss": 1.3872, "step": 2690 }, { "epoch": 3.2084517045454546, "grad_norm": 27.588768005371094, "learning_rate": 2.2888713496448305e-07, "loss": 1.3406, "step": 2700 }, { "epoch": 3.212002840909091, "grad_norm": 16.939321517944336, "learning_rate": 2.0915548539857933e-07, "loss": 1.4795, "step": 2710 }, { "epoch": 3.215553977272727, "grad_norm": 30.011600494384766, "learning_rate": 1.8942383583267563e-07, "loss": 1.2874, "step": 2720 }, { "epoch": 3.2191051136363638, "grad_norm": 38.46843719482422, "learning_rate": 1.696921862667719e-07, "loss": 1.4501, "step": 2730 }, { "epoch": 3.22265625, "grad_norm": 28.307205200195312, "learning_rate": 1.4996053670086821e-07, "loss": 1.3977, "step": 2740 }, { "epoch": 3.2262073863636362, "grad_norm": 36.628013610839844, "learning_rate": 1.302288871349645e-07, "loss": 1.2878, "step": 2750 }, { "epoch": 3.229758522727273, "grad_norm": 38.92404556274414, "learning_rate": 1.1049723756906078e-07, "loss": 1.3168, "step": 2760 }, { "epoch": 3.233309659090909, "grad_norm": 30.143110275268555, "learning_rate": 9.076558800315707e-08, "loss": 1.3171, "step": 2770 }, { "epoch": 3.2368607954545454, "grad_norm": 21.9947509765625, "learning_rate": 7.103393843725336e-08, "loss": 1.3034, "step": 2780 }, { "epoch": 3.2404119318181817, "grad_norm": 29.996990203857422, "learning_rate": 5.1302288871349646e-08, "loss": 1.3983, "step": 2790 }, { "epoch": 3.2439630681818183, "grad_norm": 67.70350646972656, "learning_rate": 3.1570639305445937e-08, "loss": 1.5066, "step": 2800 }, { "epoch": 3.2475142045454546, "grad_norm": 15.010125160217285, "learning_rate": 1.1838989739542227e-08, "loss": 1.3877, "step": 2810 }, { "epoch": 3.2496448863636362, "eval_accuracy": 0.3373493975903614, "eval_f1": 0.2839461670482319, "eval_loss": 1.9640586376190186, "eval_precision": 0.30306634843723573, "eval_recall": 0.3366896237317991, "eval_runtime": 724.6553, "eval_samples_per_second": 0.802, "eval_steps_per_second": 0.201, "step": 2816 }, { "epoch": 3.2496448863636362, "step": 2816, "total_flos": 1.4033059590817677e+19, "train_loss": 1.800246679986065, "train_runtime": 18585.0507, "train_samples_per_second": 0.606, "train_steps_per_second": 0.152 }, { "epoch": 3.2496448863636362, "eval_accuracy": 0.3268698060941828, "eval_f1": 0.2716015567288163, "eval_loss": 1.972208857536316, "eval_precision": 0.3970093449590282, "eval_recall": 0.32766441258833473, "eval_runtime": 907.789, "eval_samples_per_second": 0.795, "eval_steps_per_second": 0.199, "step": 2816 }, { "epoch": 3.2496448863636362, "eval_accuracy": 0.3268698060941828, "eval_f1": 0.2716015567288163, "eval_loss": 1.972209095954895, "eval_precision": 0.3970093449590282, "eval_recall": 0.32766441258833473, "eval_runtime": 909.7137, "eval_samples_per_second": 0.794, "eval_steps_per_second": 0.199, "step": 2816 } ], "logging_steps": 10, "max_steps": 2816, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 1.4033059590817677e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }