{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.46345811051693403, "eval_steps": 202, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030897207367795603, "grad_norm": 122071.1171875, "learning_rate": 4.0000000000000004e-11, "loss": 28.8142, "step": 1 }, { "epoch": 0.006179441473559121, "grad_norm": 119214.984375, "learning_rate": 8.000000000000001e-11, "loss": 28.2909, "step": 2 }, { "epoch": 0.009269162210338681, "grad_norm": 126446.515625, "learning_rate": 1.2e-10, "loss": 27.4632, "step": 3 }, { "epoch": 0.012358882947118241, "grad_norm": 113183.5078125, "learning_rate": 1.6000000000000002e-10, "loss": 29.2478, "step": 4 }, { "epoch": 0.015448603683897801, "grad_norm": 122417.453125, "learning_rate": 2e-10, "loss": 27.6777, "step": 5 }, { "epoch": 0.018538324420677363, "grad_norm": 122875.0234375, "learning_rate": 1.9998728465660104e-10, "loss": 26.864, "step": 6 }, { "epoch": 0.02162804515745692, "grad_norm": 123327.859375, "learning_rate": 1.9994914186000328e-10, "loss": 27.4961, "step": 7 }, { "epoch": 0.024717765894236483, "grad_norm": 125148.296875, "learning_rate": 1.9988558131018187e-10, "loss": 27.0758, "step": 8 }, { "epoch": 0.027807486631016044, "grad_norm": 123805.0078125, "learning_rate": 1.9979661917102113e-10, "loss": 26.7955, "step": 9 }, { "epoch": 0.030897207367795602, "grad_norm": 137252.546875, "learning_rate": 1.996822780662041e-10, "loss": 23.848, "step": 10 }, { "epoch": 0.03398692810457516, "grad_norm": 123848.6953125, "learning_rate": 1.99542587073459e-10, "loss": 26.2061, "step": 11 }, { "epoch": 0.037076648841354726, "grad_norm": 103722.9296875, "learning_rate": 1.9937758171716467e-10, "loss": 30.3732, "step": 12 }, { "epoch": 0.040166369578134284, "grad_norm": 132914.09375, "learning_rate": 1.9918730395931647e-10, "loss": 25.836, "step": 13 }, { "epoch": 0.04325609031491384, "grad_norm": 122652.0546875, "learning_rate": 1.9897180218885506e-10, "loss": 26.7338, "step": 14 }, { "epoch": 0.04634581105169341, "grad_norm": 125711.875, "learning_rate": 1.9873113120936074e-10, "loss": 27.9079, "step": 15 }, { "epoch": 0.049435531788472965, "grad_norm": 131257.78125, "learning_rate": 1.9846535222511647e-10, "loss": 25.2777, "step": 16 }, { "epoch": 0.052525252525252523, "grad_norm": 135439.875, "learning_rate": 1.9817453282554334e-10, "loss": 25.622, "step": 17 }, { "epoch": 0.05561497326203209, "grad_norm": 112875.90625, "learning_rate": 1.97858746968012e-10, "loss": 28.8657, "step": 18 }, { "epoch": 0.05870469399881165, "grad_norm": 124934.046875, "learning_rate": 1.9751807495903485e-10, "loss": 27.4469, "step": 19 }, { "epoch": 0.061794414735591205, "grad_norm": 126672.046875, "learning_rate": 1.9715260343384348e-10, "loss": 26.0207, "step": 20 }, { "epoch": 0.06488413547237076, "grad_norm": 121388.1953125, "learning_rate": 1.9676242533435677e-10, "loss": 28.2452, "step": 21 }, { "epoch": 0.06797385620915032, "grad_norm": 126791.9921875, "learning_rate": 1.963476398855452e-10, "loss": 27.0343, "step": 22 }, { "epoch": 0.0710635769459299, "grad_norm": 120207.7734375, "learning_rate": 1.9590835257019716e-10, "loss": 27.06, "step": 23 }, { "epoch": 0.07415329768270945, "grad_norm": 119256.28125, "learning_rate": 1.9544467510209388e-10, "loss": 27.0009, "step": 24 }, { "epoch": 0.07724301841948901, "grad_norm": 132457.265625, "learning_rate": 1.9495672539760009e-10, "loss": 26.5873, "step": 25 }, { "epoch": 0.08033273915626857, "grad_norm": 130291.3515625, "learning_rate": 1.9444462754567682e-10, "loss": 27.6641, "step": 26 }, { "epoch": 0.08342245989304813, "grad_norm": 122393.9375, "learning_rate": 1.9390851177632496e-10, "loss": 26.8649, "step": 27 }, { "epoch": 0.08651218062982768, "grad_norm": 123576.078125, "learning_rate": 1.9334851442746664e-10, "loss": 28.0714, "step": 28 }, { "epoch": 0.08960190136660724, "grad_norm": 121570.703125, "learning_rate": 1.9276477791027375e-10, "loss": 26.8387, "step": 29 }, { "epoch": 0.09269162210338681, "grad_norm": 117390.9921875, "learning_rate": 1.9215745067295168e-10, "loss": 28.8172, "step": 30 }, { "epoch": 0.09578134284016637, "grad_norm": 127661.9609375, "learning_rate": 1.9152668716298797e-10, "loss": 27.3977, "step": 31 }, { "epoch": 0.09887106357694593, "grad_norm": 129576.4453125, "learning_rate": 1.9087264778787533e-10, "loss": 25.7966, "step": 32 }, { "epoch": 0.10196078431372549, "grad_norm": 117011.7578125, "learning_rate": 1.9019549887431877e-10, "loss": 29.9255, "step": 33 }, { "epoch": 0.10505050505050505, "grad_norm": 120770.2265625, "learning_rate": 1.894954126259376e-10, "loss": 27.4423, "step": 34 }, { "epoch": 0.1081402257872846, "grad_norm": 118635.3671875, "learning_rate": 1.8877256707947306e-10, "loss": 29.0251, "step": 35 }, { "epoch": 0.11122994652406418, "grad_norm": 122824.8203125, "learning_rate": 1.88027146059512e-10, "loss": 28.9256, "step": 36 }, { "epoch": 0.11431966726084374, "grad_norm": 126411.421875, "learning_rate": 1.872593391317394e-10, "loss": 26.5279, "step": 37 }, { "epoch": 0.1174093879976233, "grad_norm": 132717.421875, "learning_rate": 1.8646934155473023e-10, "loss": 24.6933, "step": 38 }, { "epoch": 0.12049910873440285, "grad_norm": 121920.703125, "learning_rate": 1.8565735423029405e-10, "loss": 27.9129, "step": 39 }, { "epoch": 0.12358882947118241, "grad_norm": 118110.3515625, "learning_rate": 1.8482358365238414e-10, "loss": 28.6163, "step": 40 }, { "epoch": 0.12667855020796198, "grad_norm": 120299.3359375, "learning_rate": 1.839682418545848e-10, "loss": 28.5066, "step": 41 }, { "epoch": 0.12976827094474153, "grad_norm": 140847.53125, "learning_rate": 1.8309154635618964e-10, "loss": 26.3515, "step": 42 }, { "epoch": 0.1328579916815211, "grad_norm": 112302.9609375, "learning_rate": 1.8219372010688515e-10, "loss": 28.644, "step": 43 }, { "epoch": 0.13594771241830064, "grad_norm": 123308.875, "learning_rate": 1.8127499143005265e-10, "loss": 27.6619, "step": 44 }, { "epoch": 0.13903743315508021, "grad_norm": 125378.4921875, "learning_rate": 1.8033559396470454e-10, "loss": 26.452, "step": 45 }, { "epoch": 0.1421271538918598, "grad_norm": 123326.15625, "learning_rate": 1.7937576660606797e-10, "loss": 26.3463, "step": 46 }, { "epoch": 0.14521687462863933, "grad_norm": 117453.265625, "learning_rate": 1.7839575344483237e-10, "loss": 29.4332, "step": 47 }, { "epoch": 0.1483065953654189, "grad_norm": 118000.7265625, "learning_rate": 1.773958037050753e-10, "loss": 28.7868, "step": 48 }, { "epoch": 0.15139631610219845, "grad_norm": 127199.6953125, "learning_rate": 1.7637617168088326e-10, "loss": 25.8592, "step": 49 }, { "epoch": 0.15448603683897802, "grad_norm": 115966.7265625, "learning_rate": 1.753371166716828e-10, "loss": 28.3176, "step": 50 }, { "epoch": 0.15757575757575756, "grad_norm": 120307.578125, "learning_rate": 1.7427890291629892e-10, "loss": 28.7395, "step": 51 }, { "epoch": 0.16066547831253714, "grad_norm": 110086.4296875, "learning_rate": 1.732017995257575e-10, "loss": 29.0903, "step": 52 }, { "epoch": 0.1637551990493167, "grad_norm": 125848.5, "learning_rate": 1.721060804148482e-10, "loss": 27.0135, "step": 53 }, { "epoch": 0.16684491978609625, "grad_norm": 138127.296875, "learning_rate": 1.7099202423246632e-10, "loss": 24.2078, "step": 54 }, { "epoch": 0.16993464052287582, "grad_norm": 126173.71875, "learning_rate": 1.6985991429075038e-10, "loss": 27.1029, "step": 55 }, { "epoch": 0.17302436125965537, "grad_norm": 113517.828125, "learning_rate": 1.687100384930338e-10, "loss": 28.6511, "step": 56 }, { "epoch": 0.17611408199643494, "grad_norm": 125443.1640625, "learning_rate": 1.6754268926062938e-10, "loss": 28.0516, "step": 57 }, { "epoch": 0.17920380273321448, "grad_norm": 120414.265625, "learning_rate": 1.6635816345846412e-10, "loss": 26.6481, "step": 58 }, { "epoch": 0.18229352346999406, "grad_norm": 142621.21875, "learning_rate": 1.6515676231958488e-10, "loss": 24.7811, "step": 59 }, { "epoch": 0.18538324420677363, "grad_norm": 128307.296875, "learning_rate": 1.6393879136855248e-10, "loss": 24.9562, "step": 60 }, { "epoch": 0.18847296494355317, "grad_norm": 131207.0625, "learning_rate": 1.6270456034374474e-10, "loss": 26.3174, "step": 61 }, { "epoch": 0.19156268568033274, "grad_norm": 126856.7578125, "learning_rate": 1.6145438311858797e-10, "loss": 24.581, "step": 62 }, { "epoch": 0.1946524064171123, "grad_norm": 130663.9921875, "learning_rate": 1.601885776217367e-10, "loss": 24.543, "step": 63 }, { "epoch": 0.19774212715389186, "grad_norm": 117330.453125, "learning_rate": 1.589074657562223e-10, "loss": 26.1565, "step": 64 }, { "epoch": 0.20083184789067143, "grad_norm": 126654.546875, "learning_rate": 1.5761137331759085e-10, "loss": 26.9999, "step": 65 }, { "epoch": 0.20392156862745098, "grad_norm": 122709.8984375, "learning_rate": 1.5630062991105098e-10, "loss": 26.9336, "step": 66 }, { "epoch": 0.20701128936423055, "grad_norm": 111839.421875, "learning_rate": 1.5497556886765316e-10, "loss": 29.4629, "step": 67 }, { "epoch": 0.2101010101010101, "grad_norm": 115566.3515625, "learning_rate": 1.536365271595212e-10, "loss": 28.8732, "step": 68 }, { "epoch": 0.21319073083778967, "grad_norm": 120251.84375, "learning_rate": 1.5228384531415808e-10, "loss": 28.2254, "step": 69 }, { "epoch": 0.2162804515745692, "grad_norm": 109853.484375, "learning_rate": 1.5091786732784717e-10, "loss": 28.8846, "step": 70 }, { "epoch": 0.21937017231134878, "grad_norm": 124965.2890625, "learning_rate": 1.495389405781719e-10, "loss": 27.476, "step": 71 }, { "epoch": 0.22245989304812835, "grad_norm": 123831.765625, "learning_rate": 1.4814741573567514e-10, "loss": 28.3641, "step": 72 }, { "epoch": 0.2255496137849079, "grad_norm": 122635.5625, "learning_rate": 1.467436466746814e-10, "loss": 26.8985, "step": 73 }, { "epoch": 0.22863933452168747, "grad_norm": 114768.140625, "learning_rate": 1.4532799038330386e-10, "loss": 27.3799, "step": 74 }, { "epoch": 0.23172905525846701, "grad_norm": 126590.0, "learning_rate": 1.4390080687266012e-10, "loss": 27.1862, "step": 75 }, { "epoch": 0.2348187759952466, "grad_norm": 121103.6640625, "learning_rate": 1.4246245908531884e-10, "loss": 28.5261, "step": 76 }, { "epoch": 0.23790849673202613, "grad_norm": 116113.4765625, "learning_rate": 1.410133128030009e-10, "loss": 29.2815, "step": 77 }, { "epoch": 0.2409982174688057, "grad_norm": 124221.265625, "learning_rate": 1.3955373655355853e-10, "loss": 28.5093, "step": 78 }, { "epoch": 0.24408793820558528, "grad_norm": 122242.0625, "learning_rate": 1.3808410151725631e-10, "loss": 27.9406, "step": 79 }, { "epoch": 0.24717765894236482, "grad_norm": 129292.234375, "learning_rate": 1.3660478143237748e-10, "loss": 25.4098, "step": 80 }, { "epoch": 0.25026737967914436, "grad_norm": 113323.875, "learning_rate": 1.351161525001795e-10, "loss": 30.2781, "step": 81 }, { "epoch": 0.25335710041592396, "grad_norm": 120109.671875, "learning_rate": 1.3361859328922368e-10, "loss": 28.7683, "step": 82 }, { "epoch": 0.2564468211527035, "grad_norm": 131119.078125, "learning_rate": 1.3211248463910263e-10, "loss": 26.0915, "step": 83 }, { "epoch": 0.25953654188948305, "grad_norm": 133106.078125, "learning_rate": 1.3059820956358996e-10, "loss": 25.8208, "step": 84 }, { "epoch": 0.26262626262626265, "grad_norm": 117423.5859375, "learning_rate": 1.290761531532374e-10, "loss": 28.491, "step": 85 }, { "epoch": 0.2657159833630422, "grad_norm": 135454.3125, "learning_rate": 1.2754670247744354e-10, "loss": 24.7346, "step": 86 }, { "epoch": 0.26880570409982174, "grad_norm": 125783.6484375, "learning_rate": 1.260102464860195e-10, "loss": 25.684, "step": 87 }, { "epoch": 0.2718954248366013, "grad_norm": 124170.6484375, "learning_rate": 1.2446717591027624e-10, "loss": 27.912, "step": 88 }, { "epoch": 0.2749851455733809, "grad_norm": 123135.8984375, "learning_rate": 1.2291788316365887e-10, "loss": 27.8059, "step": 89 }, { "epoch": 0.27807486631016043, "grad_norm": 127220.2890625, "learning_rate": 1.213627622419535e-10, "loss": 26.8872, "step": 90 }, { "epoch": 0.28116458704694, "grad_norm": 114622.0625, "learning_rate": 1.1980220862309098e-10, "loss": 27.7952, "step": 91 }, { "epoch": 0.2842543077837196, "grad_norm": 129463.1484375, "learning_rate": 1.182366191665744e-10, "loss": 25.9395, "step": 92 }, { "epoch": 0.2873440285204991, "grad_norm": 138215.8125, "learning_rate": 1.1666639201255506e-10, "loss": 25.4167, "step": 93 }, { "epoch": 0.29043374925727866, "grad_norm": 113110.0, "learning_rate": 1.1509192648058249e-10, "loss": 28.0616, "step": 94 }, { "epoch": 0.2935234699940582, "grad_norm": 120913.9765625, "learning_rate": 1.1351362296805485e-10, "loss": 28.1966, "step": 95 }, { "epoch": 0.2966131907308378, "grad_norm": 117201.953125, "learning_rate": 1.1193188284839518e-10, "loss": 27.8393, "step": 96 }, { "epoch": 0.29970291146761735, "grad_norm": 124783.2890625, "learning_rate": 1.1034710836897921e-10, "loss": 26.5054, "step": 97 }, { "epoch": 0.3027926322043969, "grad_norm": 127615.96875, "learning_rate": 1.0875970254884129e-10, "loss": 26.9284, "step": 98 }, { "epoch": 0.3058823529411765, "grad_norm": 132355.703125, "learning_rate": 1.0717006907618376e-10, "loss": 25.1614, "step": 99 }, { "epoch": 0.30897207367795604, "grad_norm": 132260.8125, "learning_rate": 1.0557861220571625e-10, "loss": 24.961, "step": 100 }, { "epoch": 0.3120617944147356, "grad_norm": 117828.6796875, "learning_rate": 1.0398573665585105e-10, "loss": 28.4477, "step": 101 }, { "epoch": 0.3151515151515151, "grad_norm": 130773.75, "learning_rate": 1.023918475057803e-10, "loss": 27.1515, "step": 102 }, { "epoch": 0.3182412358882947, "grad_norm": 121530.078125, "learning_rate": 1.0079735009246167e-10, "loss": 27.9482, "step": 103 }, { "epoch": 0.32133095662507427, "grad_norm": 122278.96875, "learning_rate": 9.920264990753837e-11, "loss": 28.2168, "step": 104 }, { "epoch": 0.3244206773618538, "grad_norm": 112929.9140625, "learning_rate": 9.760815249421973e-11, "loss": 29.3778, "step": 105 }, { "epoch": 0.3275103980986334, "grad_norm": 116076.59375, "learning_rate": 9.601426334414898e-11, "loss": 28.591, "step": 106 }, { "epoch": 0.33060011883541296, "grad_norm": 128873.78125, "learning_rate": 9.442138779428376e-11, "loss": 26.1022, "step": 107 }, { "epoch": 0.3336898395721925, "grad_norm": 127668.328125, "learning_rate": 9.282993092381625e-11, "loss": 25.6443, "step": 108 }, { "epoch": 0.33677956030897205, "grad_norm": 122660.5625, "learning_rate": 9.12402974511587e-11, "loss": 26.6564, "step": 109 }, { "epoch": 0.33986928104575165, "grad_norm": 120201.34375, "learning_rate": 8.965289163102078e-11, "loss": 25.9894, "step": 110 }, { "epoch": 0.3429590017825312, "grad_norm": 127018.890625, "learning_rate": 8.806811715160484e-11, "loss": 25.5922, "step": 111 }, { "epoch": 0.34604872251931074, "grad_norm": 126997.375, "learning_rate": 8.648637703194516e-11, "loss": 27.1782, "step": 112 }, { "epoch": 0.34913844325609034, "grad_norm": 111071.9921875, "learning_rate": 8.490807351941753e-11, "loss": 29.0618, "step": 113 }, { "epoch": 0.3522281639928699, "grad_norm": 109156.3125, "learning_rate": 8.333360798744496e-11, "loss": 31.8425, "step": 114 }, { "epoch": 0.3553178847296494, "grad_norm": 126704.4375, "learning_rate": 8.17633808334256e-11, "loss": 26.271, "step": 115 }, { "epoch": 0.35840760546642897, "grad_norm": 129102.53125, "learning_rate": 8.019779137690906e-11, "loss": 26.1617, "step": 116 }, { "epoch": 0.36149732620320857, "grad_norm": 116444.125, "learning_rate": 7.863723775804651e-11, "loss": 29.3636, "step": 117 }, { "epoch": 0.3645870469399881, "grad_norm": 119892.703125, "learning_rate": 7.708211683634111e-11, "loss": 28.351, "step": 118 }, { "epoch": 0.36767676767676766, "grad_norm": 133809.671875, "learning_rate": 7.553282408972381e-11, "loss": 25.5753, "step": 119 }, { "epoch": 0.37076648841354726, "grad_norm": 131220.359375, "learning_rate": 7.398975351398053e-11, "loss": 26.8812, "step": 120 }, { "epoch": 0.3738562091503268, "grad_norm": 116611.2734375, "learning_rate": 7.245329752255648e-11, "loss": 29.5172, "step": 121 }, { "epoch": 0.37694592988710635, "grad_norm": 133015.046875, "learning_rate": 7.092384684676262e-11, "loss": 25.8481, "step": 122 }, { "epoch": 0.38003565062388595, "grad_norm": 136814.09375, "learning_rate": 6.940179043641005e-11, "loss": 24.7821, "step": 123 }, { "epoch": 0.3831253713606655, "grad_norm": 111482.6875, "learning_rate": 6.788751536089739e-11, "loss": 28.6458, "step": 124 }, { "epoch": 0.38621509209744503, "grad_norm": 105425.6796875, "learning_rate": 6.638140671077632e-11, "loss": 30.565, "step": 125 }, { "epoch": 0.3893048128342246, "grad_norm": 126840.734375, "learning_rate": 6.488384749982054e-11, "loss": 24.9263, "step": 126 }, { "epoch": 0.3923945335710042, "grad_norm": 126449.0078125, "learning_rate": 6.339521856762254e-11, "loss": 27.0906, "step": 127 }, { "epoch": 0.3954842543077837, "grad_norm": 110226.4375, "learning_rate": 6.191589848274368e-11, "loss": 28.7828, "step": 128 }, { "epoch": 0.39857397504456327, "grad_norm": 117383.6171875, "learning_rate": 6.04462634464415e-11, "loss": 27.1669, "step": 129 }, { "epoch": 0.40166369578134287, "grad_norm": 129581.0390625, "learning_rate": 5.898668719699914e-11, "loss": 24.9744, "step": 130 }, { "epoch": 0.4047534165181224, "grad_norm": 126919.765625, "learning_rate": 5.753754091468115e-11, "loss": 26.5326, "step": 131 }, { "epoch": 0.40784313725490196, "grad_norm": 126623.8046875, "learning_rate": 5.6099193127339865e-11, "loss": 28.2028, "step": 132 }, { "epoch": 0.4109328579916815, "grad_norm": 128991.1171875, "learning_rate": 5.467200961669618e-11, "loss": 27.0198, "step": 133 }, { "epoch": 0.4140225787284611, "grad_norm": 127212.984375, "learning_rate": 5.325635332531864e-11, "loss": 27.5968, "step": 134 }, { "epoch": 0.41711229946524064, "grad_norm": 114877.2421875, "learning_rate": 5.1852584264324866e-11, "loss": 29.2745, "step": 135 }, { "epoch": 0.4202020202020202, "grad_norm": 110463.8828125, "learning_rate": 5.046105942182815e-11, "loss": 28.0633, "step": 136 }, { "epoch": 0.4232917409387998, "grad_norm": 124949.09375, "learning_rate": 4.908213267215287e-11, "loss": 27.0064, "step": 137 }, { "epoch": 0.42638146167557933, "grad_norm": 137615.125, "learning_rate": 4.771615468584194e-11, "loss": 25.8549, "step": 138 }, { "epoch": 0.4294711824123589, "grad_norm": 125163.0, "learning_rate": 4.636347284047877e-11, "loss": 28.2664, "step": 139 }, { "epoch": 0.4325609031491384, "grad_norm": 120346.09375, "learning_rate": 4.502443113234688e-11, "loss": 27.093, "step": 140 }, { "epoch": 0.435650623885918, "grad_norm": 130197.5234375, "learning_rate": 4.3699370088949064e-11, "loss": 25.7805, "step": 141 }, { "epoch": 0.43874034462269756, "grad_norm": 141218.203125, "learning_rate": 4.238862668240919e-11, "loss": 25.3932, "step": 142 }, { "epoch": 0.4418300653594771, "grad_norm": 124990.84375, "learning_rate": 4.1092534243777726e-11, "loss": 25.7902, "step": 143 }, { "epoch": 0.4449197860962567, "grad_norm": 131223.625, "learning_rate": 3.981142237826332e-11, "loss": 25.7262, "step": 144 }, { "epoch": 0.44800950683303625, "grad_norm": 119055.7109375, "learning_rate": 3.854561688141205e-11, "loss": 27.7554, "step": 145 }, { "epoch": 0.4510992275698158, "grad_norm": 127996.890625, "learning_rate": 3.729543965625526e-11, "loss": 25.1842, "step": 146 }, { "epoch": 0.45418894830659534, "grad_norm": 130496.4609375, "learning_rate": 3.606120863144753e-11, "loss": 26.7469, "step": 147 }, { "epoch": 0.45727866904337494, "grad_norm": 138091.28125, "learning_rate": 3.484323768041515e-11, "loss": 24.2791, "step": 148 }, { "epoch": 0.4603683897801545, "grad_norm": 114539.9140625, "learning_rate": 3.364183654153592e-11, "loss": 27.8677, "step": 149 }, { "epoch": 0.46345811051693403, "grad_norm": 129665.9140625, "learning_rate": 3.245731073937068e-11, "loss": 26.8488, "step": 150 } ], "logging_steps": 1, "max_steps": 202, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.489068960874496e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }