{ "best_metric": 5.022224426269531, "best_model_checkpoint": "./ViMedical_Diseases/checkpoint-400", "epoch": 84.76821192052981, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.85, "eval_accuracy": 0.0024875621890547263, "eval_f1": 1.2345221781909311e-05, "eval_loss": 6.415043830871582, "eval_runtime": 0.3946, "eval_samples_per_second": 3055.887, "eval_steps_per_second": 25.339, "step": 4 }, { "epoch": 1.91, "eval_accuracy": 0.0024875621890547263, "eval_f1": 1.2756729174639624e-05, "eval_loss": 6.412064075469971, "eval_runtime": 0.3736, "eval_samples_per_second": 3227.701, "eval_steps_per_second": 26.764, "step": 9 }, { "epoch": 2.97, "eval_accuracy": 0.0024875621890547263, "eval_f1": 1.338598487383709e-05, "eval_loss": 6.407182693481445, "eval_runtime": 0.3753, "eval_samples_per_second": 3213.746, "eval_steps_per_second": 26.648, "step": 14 }, { "epoch": 3.81, "eval_accuracy": 0.0041459369817578775, "eval_f1": 0.0005888539889881497, "eval_loss": 6.397731304168701, "eval_runtime": 0.3765, "eval_samples_per_second": 3202.793, "eval_steps_per_second": 26.557, "step": 18 }, { "epoch": 4.87, "eval_accuracy": 0.0041459369817578775, "eval_f1": 0.00024318292743308673, "eval_loss": 6.378692626953125, "eval_runtime": 0.375, "eval_samples_per_second": 3215.914, "eval_steps_per_second": 26.666, "step": 23 }, { "epoch": 5.93, "eval_accuracy": 0.006633499170812604, "eval_f1": 0.0006805627237639513, "eval_loss": 6.348721981048584, "eval_runtime": 0.3755, "eval_samples_per_second": 3212.024, "eval_steps_per_second": 26.634, "step": 28 }, { "epoch": 6.99, "eval_accuracy": 0.00912106135986733, "eval_f1": 0.0044415843789941045, "eval_loss": 6.305507183074951, "eval_runtime": 0.3784, "eval_samples_per_second": 3187.077, "eval_steps_per_second": 26.427, "step": 33 }, { "epoch": 7.84, "eval_accuracy": 0.015754560530679935, "eval_f1": 0.004968013448467957, "eval_loss": 6.288573265075684, "eval_runtime": 0.3772, "eval_samples_per_second": 3197.112, "eval_steps_per_second": 26.51, "step": 37 }, { "epoch": 8.9, "eval_accuracy": 0.01658374792703151, "eval_f1": 0.004671981131856493, "eval_loss": 6.297088623046875, "eval_runtime": 0.3776, "eval_samples_per_second": 3193.556, "eval_steps_per_second": 26.481, "step": 42 }, { "epoch": 9.96, "eval_accuracy": 0.013266998341625208, "eval_f1": 0.004042787456966562, "eval_loss": 6.216242790222168, "eval_runtime": 0.3779, "eval_samples_per_second": 3191.019, "eval_steps_per_second": 26.46, "step": 47 }, { "epoch": 10.81, "eval_accuracy": 0.020729684908789386, "eval_f1": 0.008522136976574753, "eval_loss": 6.187163352966309, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.288, "eval_steps_per_second": 26.371, "step": 51 }, { "epoch": 11.87, "eval_accuracy": 0.029850746268656716, "eval_f1": 0.014040979403889549, "eval_loss": 6.177770137786865, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.276, "eval_steps_per_second": 26.37, "step": 56 }, { "epoch": 12.93, "eval_accuracy": 0.02902155887230514, "eval_f1": 0.011104717761289505, "eval_loss": 6.158536911010742, "eval_runtime": 0.3786, "eval_samples_per_second": 3185.18, "eval_steps_per_second": 26.411, "step": 61 }, { "epoch": 13.99, "eval_accuracy": 0.02570480928689884, "eval_f1": 0.01033900277156658, "eval_loss": 6.1090803146362305, "eval_runtime": 0.38, "eval_samples_per_second": 3173.403, "eval_steps_per_second": 26.313, "step": 66 }, { "epoch": 14.83, "eval_accuracy": 0.03316749585406302, "eval_f1": 0.015474453231986877, "eval_loss": 6.08702278137207, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.296, "eval_steps_per_second": 26.371, "step": 70 }, { "epoch": 15.89, "eval_accuracy": 0.04809286898839138, "eval_f1": 0.02513988397193506, "eval_loss": 6.0609517097473145, "eval_runtime": 0.3801, "eval_samples_per_second": 3172.69, "eval_steps_per_second": 26.308, "step": 75 }, { "epoch": 16.95, "eval_accuracy": 0.03814262023217247, "eval_f1": 0.01813347312377026, "eval_loss": 6.025730609893799, "eval_runtime": 0.3785, "eval_samples_per_second": 3185.844, "eval_steps_per_second": 26.417, "step": 80 }, { "epoch": 17.8, "eval_accuracy": 0.05555555555555555, "eval_f1": 0.0316611803261535, "eval_loss": 6.014459133148193, "eval_runtime": 0.3787, "eval_samples_per_second": 3184.258, "eval_steps_per_second": 26.403, "step": 84 }, { "epoch": 18.86, "eval_accuracy": 0.04311774461028192, "eval_f1": 0.02307944350501337, "eval_loss": 5.969913005828857, "eval_runtime": 0.3804, "eval_samples_per_second": 3169.946, "eval_steps_per_second": 26.285, "step": 89 }, { "epoch": 19.92, "eval_accuracy": 0.05638474295190713, "eval_f1": 0.0302836119107397, "eval_loss": 5.954006195068359, "eval_runtime": 0.3796, "eval_samples_per_second": 3176.755, "eval_steps_per_second": 26.341, "step": 94 }, { "epoch": 20.98, "eval_accuracy": 0.06301824212271974, "eval_f1": 0.03687442226058399, "eval_loss": 5.912012100219727, "eval_runtime": 0.3781, "eval_samples_per_second": 3189.303, "eval_steps_per_second": 26.445, "step": 99 }, { "epoch": 21.83, "eval_accuracy": 0.06384742951907131, "eval_f1": 0.03535293317796501, "eval_loss": 5.8892645835876465, "eval_runtime": 0.3799, "eval_samples_per_second": 3174.661, "eval_steps_per_second": 26.324, "step": 103 }, { "epoch": 22.89, "eval_accuracy": 0.07711442786069651, "eval_f1": 0.04528489909367382, "eval_loss": 5.885714530944824, "eval_runtime": 0.3814, "eval_samples_per_second": 3162.388, "eval_steps_per_second": 26.222, "step": 108 }, { "epoch": 23.95, "eval_accuracy": 0.06550580431177445, "eval_f1": 0.03852630441009513, "eval_loss": 5.839115142822266, "eval_runtime": 0.3786, "eval_samples_per_second": 3185.788, "eval_steps_per_second": 26.416, "step": 113 }, { "epoch": 24.79, "eval_accuracy": 0.07877280265339967, "eval_f1": 0.050102448183655314, "eval_loss": 5.812053203582764, "eval_runtime": 0.3783, "eval_samples_per_second": 3187.772, "eval_steps_per_second": 26.433, "step": 117 }, { "epoch": 25.85, "eval_accuracy": 0.0845771144278607, "eval_f1": 0.05181174832064436, "eval_loss": 5.789350986480713, "eval_runtime": 0.3785, "eval_samples_per_second": 3186.218, "eval_steps_per_second": 26.42, "step": 122 }, { "epoch": 26.91, "eval_accuracy": 0.09286898839137644, "eval_f1": 0.0554051006900966, "eval_loss": 5.8099284172058105, "eval_runtime": 0.3795, "eval_samples_per_second": 3177.946, "eval_steps_per_second": 26.351, "step": 127 }, { "epoch": 27.97, "eval_accuracy": 0.08208955223880597, "eval_f1": 0.052706293384051014, "eval_loss": 5.745517253875732, "eval_runtime": 0.379, "eval_samples_per_second": 3182.087, "eval_steps_per_second": 26.385, "step": 132 }, { "epoch": 28.82, "eval_accuracy": 0.08706467661691543, "eval_f1": 0.05317439161254717, "eval_loss": 5.725302696228027, "eval_runtime": 0.38, "eval_samples_per_second": 3173.908, "eval_steps_per_second": 26.318, "step": 136 }, { "epoch": 29.88, "eval_accuracy": 0.09618573797678276, "eval_f1": 0.05978131215599709, "eval_loss": 5.701379299163818, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.126, "eval_steps_per_second": 26.369, "step": 141 }, { "epoch": 30.94, "eval_accuracy": 0.09286898839137644, "eval_f1": 0.057675351156170285, "eval_loss": 5.6744208335876465, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.732, "eval_steps_per_second": 26.374, "step": 146 }, { "epoch": 31.79, "grad_norm": 2.17041015625, "learning_rate": 1.255e-05, "loss": 6.0949, "step": 150 }, { "epoch": 32.0, "eval_accuracy": 0.09950248756218906, "eval_f1": 0.06490404658367951, "eval_loss": 5.660266399383545, "eval_runtime": 0.379, "eval_samples_per_second": 3182.291, "eval_steps_per_second": 26.387, "step": 151 }, { "epoch": 32.85, "eval_accuracy": 0.09867330016583747, "eval_f1": 0.0630415651434736, "eval_loss": 5.6351704597473145, "eval_runtime": 0.3796, "eval_samples_per_second": 3176.904, "eval_steps_per_second": 26.342, "step": 155 }, { "epoch": 33.91, "eval_accuracy": 0.09867330016583747, "eval_f1": 0.060441362912511246, "eval_loss": 5.600429058074951, "eval_runtime": 0.3801, "eval_samples_per_second": 3172.485, "eval_steps_per_second": 26.306, "step": 160 }, { "epoch": 34.97, "eval_accuracy": 0.10281923714759536, "eval_f1": 0.0637386932721227, "eval_loss": 5.598119258880615, "eval_runtime": 0.3795, "eval_samples_per_second": 3177.533, "eval_steps_per_second": 26.348, "step": 165 }, { "epoch": 35.81, "eval_accuracy": 0.1011608623548922, "eval_f1": 0.06296000608037869, "eval_loss": 5.570890426635742, "eval_runtime": 0.3788, "eval_samples_per_second": 3183.869, "eval_steps_per_second": 26.4, "step": 169 }, { "epoch": 36.87, "eval_accuracy": 0.10530679933665009, "eval_f1": 0.06954752384683803, "eval_loss": 5.540558815002441, "eval_runtime": 0.3796, "eval_samples_per_second": 3176.978, "eval_steps_per_second": 26.343, "step": 174 }, { "epoch": 37.93, "eval_accuracy": 0.11442786069651742, "eval_f1": 0.07287036657509441, "eval_loss": 5.520463943481445, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.006, "eval_steps_per_second": 26.368, "step": 179 }, { "epoch": 38.99, "eval_accuracy": 0.10862354892205639, "eval_f1": 0.07094764147857342, "eval_loss": 5.498680114746094, "eval_runtime": 0.3784, "eval_samples_per_second": 3187.079, "eval_steps_per_second": 26.427, "step": 184 }, { "epoch": 39.84, "eval_accuracy": 0.1111111111111111, "eval_f1": 0.06995636683961066, "eval_loss": 5.477349758148193, "eval_runtime": 0.3792, "eval_samples_per_second": 3180.04, "eval_steps_per_second": 26.368, "step": 188 }, { "epoch": 40.9, "eval_accuracy": 0.12023217247097844, "eval_f1": 0.0803838202287807, "eval_loss": 5.464529514312744, "eval_runtime": 0.3783, "eval_samples_per_second": 3188.288, "eval_steps_per_second": 26.437, "step": 193 }, { "epoch": 41.96, "eval_accuracy": 0.12189054726368159, "eval_f1": 0.0797876479133484, "eval_loss": 5.439937591552734, "eval_runtime": 0.3805, "eval_samples_per_second": 3169.807, "eval_steps_per_second": 26.284, "step": 198 }, { "epoch": 42.81, "eval_accuracy": 0.11774461028192372, "eval_f1": 0.07397147599836879, "eval_loss": 5.417842388153076, "eval_runtime": 0.3791, "eval_samples_per_second": 3181.28, "eval_steps_per_second": 26.379, "step": 202 }, { "epoch": 43.87, "eval_accuracy": 0.12271973466003316, "eval_f1": 0.08191085838440999, "eval_loss": 5.408046245574951, "eval_runtime": 0.379, "eval_samples_per_second": 3181.692, "eval_steps_per_second": 26.382, "step": 207 }, { "epoch": 44.93, "eval_accuracy": 0.1310116086235489, "eval_f1": 0.09138005527552184, "eval_loss": 5.374771595001221, "eval_runtime": 0.3787, "eval_samples_per_second": 3184.593, "eval_steps_per_second": 26.406, "step": 212 }, { "epoch": 45.99, "eval_accuracy": 0.12769485903814262, "eval_f1": 0.08429454245370348, "eval_loss": 5.369958400726318, "eval_runtime": 0.3796, "eval_samples_per_second": 3177.034, "eval_steps_per_second": 26.344, "step": 217 }, { "epoch": 46.83, "eval_accuracy": 0.1252072968490879, "eval_f1": 0.0828030322134829, "eval_loss": 5.345793724060059, "eval_runtime": 0.3798, "eval_samples_per_second": 3175.327, "eval_steps_per_second": 26.329, "step": 221 }, { "epoch": 47.89, "eval_accuracy": 0.12686567164179105, "eval_f1": 0.08616638865359265, "eval_loss": 5.33195686340332, "eval_runtime": 0.3775, "eval_samples_per_second": 3194.5, "eval_steps_per_second": 26.488, "step": 226 }, { "epoch": 48.95, "eval_accuracy": 0.13598673300165837, "eval_f1": 0.09754942424775702, "eval_loss": 5.319748878479004, "eval_runtime": 0.3785, "eval_samples_per_second": 3186.573, "eval_steps_per_second": 26.423, "step": 231 }, { "epoch": 49.8, "eval_accuracy": 0.13598673300165837, "eval_f1": 0.09534346865693065, "eval_loss": 5.305931091308594, "eval_runtime": 0.3795, "eval_samples_per_second": 3178.056, "eval_steps_per_second": 26.352, "step": 235 }, { "epoch": 50.86, "eval_accuracy": 0.13018242122719734, "eval_f1": 0.08991729329826506, "eval_loss": 5.287778377532959, "eval_runtime": 0.3796, "eval_samples_per_second": 3176.946, "eval_steps_per_second": 26.343, "step": 240 }, { "epoch": 51.92, "eval_accuracy": 0.1384742951907131, "eval_f1": 0.0985219901693966, "eval_loss": 5.267482757568359, "eval_runtime": 0.378, "eval_samples_per_second": 3190.643, "eval_steps_per_second": 26.456, "step": 245 }, { "epoch": 52.98, "eval_accuracy": 0.13764510779436154, "eval_f1": 0.09503565447256919, "eval_loss": 5.257174491882324, "eval_runtime": 0.3791, "eval_samples_per_second": 3181.268, "eval_steps_per_second": 26.379, "step": 250 }, { "epoch": 53.83, "eval_accuracy": 0.1417910447761194, "eval_f1": 0.09976792054154651, "eval_loss": 5.241861820220947, "eval_runtime": 0.378, "eval_samples_per_second": 3190.114, "eval_steps_per_second": 26.452, "step": 254 }, { "epoch": 54.89, "eval_accuracy": 0.14925373134328357, "eval_f1": 0.10946077502309857, "eval_loss": 5.227325439453125, "eval_runtime": 0.3793, "eval_samples_per_second": 3179.266, "eval_steps_per_second": 26.362, "step": 259 }, { "epoch": 55.95, "eval_accuracy": 0.1451077943615257, "eval_f1": 0.10515805134406893, "eval_loss": 5.209225654602051, "eval_runtime": 0.3796, "eval_samples_per_second": 3177.24, "eval_steps_per_second": 26.345, "step": 264 }, { "epoch": 56.79, "eval_accuracy": 0.14262023217247097, "eval_f1": 0.10262149479931104, "eval_loss": 5.20381498336792, "eval_runtime": 0.38, "eval_samples_per_second": 3174.088, "eval_steps_per_second": 26.319, "step": 268 }, { "epoch": 57.85, "eval_accuracy": 0.14759535655058043, "eval_f1": 0.10374650435743883, "eval_loss": 5.192402362823486, "eval_runtime": 0.3799, "eval_samples_per_second": 3174.635, "eval_steps_per_second": 26.324, "step": 273 }, { "epoch": 58.91, "eval_accuracy": 0.148424543946932, "eval_f1": 0.10865541288671039, "eval_loss": 5.173834800720215, "eval_runtime": 0.3803, "eval_samples_per_second": 3171.494, "eval_steps_per_second": 26.298, "step": 278 }, { "epoch": 59.97, "eval_accuracy": 0.15008291873963517, "eval_f1": 0.10904088165373273, "eval_loss": 5.164543628692627, "eval_runtime": 0.3779, "eval_samples_per_second": 3190.923, "eval_steps_per_second": 26.459, "step": 283 }, { "epoch": 60.82, "eval_accuracy": 0.15008291873963517, "eval_f1": 0.10954105182677536, "eval_loss": 5.1523356437683105, "eval_runtime": 0.3794, "eval_samples_per_second": 3178.663, "eval_steps_per_second": 26.357, "step": 287 }, { "epoch": 61.88, "eval_accuracy": 0.15339966832504145, "eval_f1": 0.11320704863201249, "eval_loss": 5.140935897827148, "eval_runtime": 0.3968, "eval_samples_per_second": 3039.575, "eval_steps_per_second": 25.204, "step": 292 }, { "epoch": 62.94, "eval_accuracy": 0.15671641791044777, "eval_f1": 0.11619378649382489, "eval_loss": 5.13328218460083, "eval_runtime": 0.3811, "eval_samples_per_second": 3164.429, "eval_steps_per_second": 26.239, "step": 297 }, { "epoch": 63.58, "grad_norm": 1.4214905500411987, "learning_rate": 5.050000000000001e-06, "loss": 5.3883, "step": 300 }, { "epoch": 64.0, "eval_accuracy": 0.15754560530679934, "eval_f1": 0.11639663398572815, "eval_loss": 5.12091064453125, "eval_runtime": 0.3803, "eval_samples_per_second": 3171.109, "eval_steps_per_second": 26.294, "step": 302 }, { "epoch": 64.85, "eval_accuracy": 0.15754560530679934, "eval_f1": 0.11683650415743181, "eval_loss": 5.114450454711914, "eval_runtime": 0.3804, "eval_samples_per_second": 3169.982, "eval_steps_per_second": 26.285, "step": 306 }, { "epoch": 65.91, "eval_accuracy": 0.1550580431177446, "eval_f1": 0.11358274444416293, "eval_loss": 5.104104995727539, "eval_runtime": 0.3928, "eval_samples_per_second": 3069.891, "eval_steps_per_second": 25.455, "step": 311 }, { "epoch": 66.97, "eval_accuracy": 0.15671641791044777, "eval_f1": 0.11432989925366752, "eval_loss": 5.097550868988037, "eval_runtime": 0.3806, "eval_samples_per_second": 3168.505, "eval_steps_per_second": 26.273, "step": 316 }, { "epoch": 67.81, "eval_accuracy": 0.1583747927031509, "eval_f1": 0.11862831647923934, "eval_loss": 5.090635776519775, "eval_runtime": 0.3895, "eval_samples_per_second": 3096.27, "eval_steps_per_second": 25.674, "step": 320 }, { "epoch": 68.87, "eval_accuracy": 0.1625207296849088, "eval_f1": 0.12161516717070989, "eval_loss": 5.080664157867432, "eval_runtime": 0.3789, "eval_samples_per_second": 3183.124, "eval_steps_per_second": 26.394, "step": 325 }, { "epoch": 69.93, "eval_accuracy": 0.16169154228855723, "eval_f1": 0.12018082388719613, "eval_loss": 5.074178218841553, "eval_runtime": 0.3782, "eval_samples_per_second": 3189.028, "eval_steps_per_second": 26.443, "step": 330 }, { "epoch": 70.99, "eval_accuracy": 0.1625207296849088, "eval_f1": 0.12052740787172442, "eval_loss": 5.066323757171631, "eval_runtime": 0.3784, "eval_samples_per_second": 3186.978, "eval_steps_per_second": 26.426, "step": 335 }, { "epoch": 71.84, "eval_accuracy": 0.1625207296849088, "eval_f1": 0.12160006289622655, "eval_loss": 5.062046527862549, "eval_runtime": 0.3801, "eval_samples_per_second": 3172.618, "eval_steps_per_second": 26.307, "step": 339 }, { "epoch": 72.9, "eval_accuracy": 0.16417910447761194, "eval_f1": 0.12152850729201306, "eval_loss": 5.056005954742432, "eval_runtime": 0.381, "eval_samples_per_second": 3165.275, "eval_steps_per_second": 26.246, "step": 344 }, { "epoch": 73.96, "eval_accuracy": 0.16666666666666666, "eval_f1": 0.12561182312709163, "eval_loss": 5.050036430358887, "eval_runtime": 0.3802, "eval_samples_per_second": 3171.695, "eval_steps_per_second": 26.299, "step": 349 }, { "epoch": 74.81, "eval_accuracy": 0.16832504145936983, "eval_f1": 0.12484080598728817, "eval_loss": 5.044373035430908, "eval_runtime": 0.38, "eval_samples_per_second": 3173.886, "eval_steps_per_second": 26.317, "step": 353 }, { "epoch": 75.87, "eval_accuracy": 0.17081260364842454, "eval_f1": 0.12924225508300144, "eval_loss": 5.041046619415283, "eval_runtime": 0.3807, "eval_samples_per_second": 3168.064, "eval_steps_per_second": 26.269, "step": 358 }, { "epoch": 76.93, "eval_accuracy": 0.16832504145936983, "eval_f1": 0.1266665967282326, "eval_loss": 5.036989688873291, "eval_runtime": 0.3914, "eval_samples_per_second": 3081.513, "eval_steps_per_second": 25.552, "step": 363 }, { "epoch": 77.99, "eval_accuracy": 0.17081260364842454, "eval_f1": 0.12804840525194425, "eval_loss": 5.0318603515625, "eval_runtime": 0.3794, "eval_samples_per_second": 3178.933, "eval_steps_per_second": 26.359, "step": 368 }, { "epoch": 78.83, "eval_accuracy": 0.16998341625207297, "eval_f1": 0.1290972468615459, "eval_loss": 5.03059720993042, "eval_runtime": 0.3795, "eval_samples_per_second": 3178.278, "eval_steps_per_second": 26.354, "step": 372 }, { "epoch": 79.89, "eval_accuracy": 0.17164179104477612, "eval_f1": 0.12944060169078692, "eval_loss": 5.027899265289307, "eval_runtime": 0.3787, "eval_samples_per_second": 3184.745, "eval_steps_per_second": 26.408, "step": 377 }, { "epoch": 80.95, "eval_accuracy": 0.1724709784411277, "eval_f1": 0.13072735697861348, "eval_loss": 5.026247501373291, "eval_runtime": 0.3797, "eval_samples_per_second": 3176.404, "eval_steps_per_second": 26.338, "step": 382 }, { "epoch": 81.8, "eval_accuracy": 0.16998341625207297, "eval_f1": 0.12880907562232394, "eval_loss": 5.024827003479004, "eval_runtime": 0.3798, "eval_samples_per_second": 3175.064, "eval_steps_per_second": 26.327, "step": 386 }, { "epoch": 82.86, "eval_accuracy": 0.17081260364842454, "eval_f1": 0.12912358374020652, "eval_loss": 5.0235443115234375, "eval_runtime": 0.379, "eval_samples_per_second": 3182.237, "eval_steps_per_second": 26.387, "step": 391 }, { "epoch": 83.92, "eval_accuracy": 0.17081260364842454, "eval_f1": 0.12920331329754803, "eval_loss": 5.022723197937012, "eval_runtime": 0.3809, "eval_samples_per_second": 3166.333, "eval_steps_per_second": 26.255, "step": 396 }, { "epoch": 84.77, "eval_accuracy": 0.17081260364842454, "eval_f1": 0.12896882646345054, "eval_loss": 5.022224426269531, "eval_runtime": 0.3794, "eval_samples_per_second": 3178.549, "eval_steps_per_second": 26.356, "step": 400 } ], "logging_steps": 150, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.010721223440829e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }