{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.999848249174855, "global_step": 105432, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.9810311468568605e-08, "loss": 0.6518, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.962062293713722e-08, "loss": 0.6229, "step": 1000 }, { "epoch": 0.06, "learning_rate": 4.9430934405705826e-08, "loss": 0.5878, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.924124587427444e-08, "loss": 0.5654, "step": 2000 }, { "epoch": 0.09, "learning_rate": 4.9051557342843047e-08, "loss": 0.5387, "step": 2500 }, { "epoch": 0.11, "learning_rate": 4.886186881141166e-08, "loss": 0.512, "step": 3000 }, { "epoch": 0.13, "learning_rate": 4.867218027998027e-08, "loss": 0.4914, "step": 3500 }, { "epoch": 0.15, "learning_rate": 4.848249174854888e-08, "loss": 0.4605, "step": 4000 }, { "epoch": 0.17, "learning_rate": 4.829280321711749e-08, "loss": 0.445, "step": 4500 }, { "epoch": 0.19, "learning_rate": 4.81031146856861e-08, "loss": 0.4245, "step": 5000 }, { "epoch": 0.21, "learning_rate": 4.791342615425471e-08, "loss": 0.4061, "step": 5500 }, { "epoch": 0.23, "learning_rate": 4.772373762282332e-08, "loss": 0.3907, "step": 6000 }, { "epoch": 0.25, "learning_rate": 4.753404909139193e-08, "loss": 0.3671, "step": 6500 }, { "epoch": 0.27, "learning_rate": 4.7344360559960544e-08, "loss": 0.3672, "step": 7000 }, { "epoch": 0.28, "learning_rate": 4.715467202852915e-08, "loss": 0.3369, "step": 7500 }, { "epoch": 0.3, "learning_rate": 4.6964983497097764e-08, "loss": 0.33, "step": 8000 }, { "epoch": 0.32, "learning_rate": 4.677529496566637e-08, "loss": 0.3152, "step": 8500 }, { "epoch": 0.34, "learning_rate": 4.6585606434234985e-08, "loss": 0.2996, "step": 9000 }, { "epoch": 0.36, "learning_rate": 4.639591790280359e-08, "loss": 0.2835, "step": 9500 }, { "epoch": 0.38, "learning_rate": 4.6206229371372206e-08, "loss": 0.2747, "step": 10000 }, { "epoch": 0.4, "learning_rate": 4.601654083994081e-08, "loss": 0.2597, "step": 10500 }, { "epoch": 0.42, "learning_rate": 4.582685230850942e-08, "loss": 0.2509, "step": 11000 }, { "epoch": 0.44, "learning_rate": 4.5637163777078034e-08, "loss": 0.2321, "step": 11500 }, { "epoch": 0.46, "learning_rate": 4.544747524564664e-08, "loss": 0.2216, "step": 12000 }, { "epoch": 0.47, "learning_rate": 4.5257786714215255e-08, "loss": 0.2138, "step": 12500 }, { "epoch": 0.49, "learning_rate": 4.506809818278386e-08, "loss": 0.1964, "step": 13000 }, { "epoch": 0.51, "learning_rate": 4.4878409651352476e-08, "loss": 0.183, "step": 13500 }, { "epoch": 0.53, "learning_rate": 4.468872111992108e-08, "loss": 0.1738, "step": 14000 }, { "epoch": 0.55, "learning_rate": 4.4499032588489697e-08, "loss": 0.1628, "step": 14500 }, { "epoch": 0.57, "learning_rate": 4.4309344057058304e-08, "loss": 0.155, "step": 15000 }, { "epoch": 0.59, "learning_rate": 4.411965552562692e-08, "loss": 0.1433, "step": 15500 }, { "epoch": 0.61, "learning_rate": 4.3929966994195524e-08, "loss": 0.1336, "step": 16000 }, { "epoch": 0.63, "learning_rate": 4.374027846276414e-08, "loss": 0.125, "step": 16500 }, { "epoch": 0.64, "learning_rate": 4.3550589931332745e-08, "loss": 0.115, "step": 17000 }, { "epoch": 0.66, "learning_rate": 4.336090139990136e-08, "loss": 0.1095, "step": 17500 }, { "epoch": 0.68, "learning_rate": 4.317121286846997e-08, "loss": 0.1024, "step": 18000 }, { "epoch": 0.7, "learning_rate": 4.298152433703858e-08, "loss": 0.0947, "step": 18500 }, { "epoch": 0.72, "learning_rate": 4.2791835805607194e-08, "loss": 0.0908, "step": 19000 }, { "epoch": 0.74, "learning_rate": 4.26021472741758e-08, "loss": 0.0832, "step": 19500 }, { "epoch": 0.76, "learning_rate": 4.2412458742744414e-08, "loss": 0.0846, "step": 20000 }, { "epoch": 0.78, "learning_rate": 4.222277021131302e-08, "loss": 0.0722, "step": 20500 }, { "epoch": 0.8, "learning_rate": 4.2033081679881635e-08, "loss": 0.0673, "step": 21000 }, { "epoch": 0.82, "learning_rate": 4.184339314845024e-08, "loss": 0.0656, "step": 21500 }, { "epoch": 0.83, "learning_rate": 4.1653704617018856e-08, "loss": 0.0585, "step": 22000 }, { "epoch": 0.85, "learning_rate": 4.146401608558746e-08, "loss": 0.0589, "step": 22500 }, { "epoch": 0.87, "learning_rate": 4.127432755415608e-08, "loss": 0.0543, "step": 23000 }, { "epoch": 0.89, "learning_rate": 4.1084639022724684e-08, "loss": 0.0548, "step": 23500 }, { "epoch": 0.91, "learning_rate": 4.08949504912933e-08, "loss": 0.0496, "step": 24000 }, { "epoch": 0.93, "learning_rate": 4.0705261959861905e-08, "loss": 0.055, "step": 24500 }, { "epoch": 0.95, "learning_rate": 4.051557342843052e-08, "loss": 0.0437, "step": 25000 }, { "epoch": 0.97, "learning_rate": 4.0325884896999126e-08, "loss": 0.0429, "step": 25500 }, { "epoch": 0.99, "learning_rate": 4.013619636556774e-08, "loss": 0.0451, "step": 26000 }, { "epoch": 1.0, "eval_loss": 0.03802574425935745, "eval_runtime": 313.8581, "eval_samples_per_second": 125.974, "eval_steps_per_second": 15.749, "step": 26359 }, { "epoch": 1.01, "learning_rate": 3.9946507834136347e-08, "loss": 0.0423, "step": 26500 }, { "epoch": 1.02, "learning_rate": 3.975681930270496e-08, "loss": 0.0437, "step": 27000 }, { "epoch": 1.04, "learning_rate": 3.956713077127357e-08, "loss": 0.0409, "step": 27500 }, { "epoch": 1.06, "learning_rate": 3.937744223984218e-08, "loss": 0.034, "step": 28000 }, { "epoch": 1.08, "learning_rate": 3.918775370841079e-08, "loss": 0.0421, "step": 28500 }, { "epoch": 1.1, "learning_rate": 3.89980651769794e-08, "loss": 0.038, "step": 29000 }, { "epoch": 1.12, "learning_rate": 3.880837664554801e-08, "loss": 0.0391, "step": 29500 }, { "epoch": 1.14, "learning_rate": 3.861868811411662e-08, "loss": 0.0341, "step": 30000 }, { "epoch": 1.16, "learning_rate": 3.842899958268523e-08, "loss": 0.0438, "step": 30500 }, { "epoch": 1.18, "learning_rate": 3.8239311051253844e-08, "loss": 0.0398, "step": 31000 }, { "epoch": 1.2, "learning_rate": 3.804962251982245e-08, "loss": 0.0402, "step": 31500 }, { "epoch": 1.21, "learning_rate": 3.7859933988391064e-08, "loss": 0.0424, "step": 32000 }, { "epoch": 1.23, "learning_rate": 3.767024545695967e-08, "loss": 0.0381, "step": 32500 }, { "epoch": 1.25, "learning_rate": 3.748055692552828e-08, "loss": 0.027, "step": 33000 }, { "epoch": 1.27, "learning_rate": 3.729086839409689e-08, "loss": 0.0339, "step": 33500 }, { "epoch": 1.29, "learning_rate": 3.71011798626655e-08, "loss": 0.0318, "step": 34000 }, { "epoch": 1.31, "learning_rate": 3.691149133123411e-08, "loss": 0.0415, "step": 34500 }, { "epoch": 1.33, "learning_rate": 3.672180279980272e-08, "loss": 0.0323, "step": 35000 }, { "epoch": 1.35, "learning_rate": 3.6532114268371334e-08, "loss": 0.0349, "step": 35500 }, { "epoch": 1.37, "learning_rate": 3.634242573693994e-08, "loss": 0.0278, "step": 36000 }, { "epoch": 1.38, "learning_rate": 3.6152737205508555e-08, "loss": 0.0367, "step": 36500 }, { "epoch": 1.4, "learning_rate": 3.596304867407716e-08, "loss": 0.0292, "step": 37000 }, { "epoch": 1.42, "learning_rate": 3.5773360142645776e-08, "loss": 0.0322, "step": 37500 }, { "epoch": 1.44, "learning_rate": 3.558367161121438e-08, "loss": 0.0326, "step": 38000 }, { "epoch": 1.46, "learning_rate": 3.5393983079782997e-08, "loss": 0.0288, "step": 38500 }, { "epoch": 1.48, "learning_rate": 3.5204294548351604e-08, "loss": 0.0398, "step": 39000 }, { "epoch": 1.5, "learning_rate": 3.501460601692022e-08, "loss": 0.0265, "step": 39500 }, { "epoch": 1.52, "learning_rate": 3.4824917485488824e-08, "loss": 0.0372, "step": 40000 }, { "epoch": 1.54, "learning_rate": 3.463522895405744e-08, "loss": 0.042, "step": 40500 }, { "epoch": 1.56, "learning_rate": 3.4445540422626045e-08, "loss": 0.0383, "step": 41000 }, { "epoch": 1.57, "learning_rate": 3.425585189119466e-08, "loss": 0.0397, "step": 41500 }, { "epoch": 1.59, "learning_rate": 3.4066163359763266e-08, "loss": 0.0319, "step": 42000 }, { "epoch": 1.61, "learning_rate": 3.387647482833188e-08, "loss": 0.0286, "step": 42500 }, { "epoch": 1.63, "learning_rate": 3.368678629690049e-08, "loss": 0.033, "step": 43000 }, { "epoch": 1.65, "learning_rate": 3.3497097765469094e-08, "loss": 0.0361, "step": 43500 }, { "epoch": 1.67, "learning_rate": 3.330740923403771e-08, "loss": 0.0329, "step": 44000 }, { "epoch": 1.69, "learning_rate": 3.3117720702606315e-08, "loss": 0.0343, "step": 44500 }, { "epoch": 1.71, "learning_rate": 3.292803217117493e-08, "loss": 0.0302, "step": 45000 }, { "epoch": 1.73, "learning_rate": 3.2738343639743536e-08, "loss": 0.0237, "step": 45500 }, { "epoch": 1.75, "learning_rate": 3.254865510831215e-08, "loss": 0.0321, "step": 46000 }, { "epoch": 1.76, "learning_rate": 3.2358966576880757e-08, "loss": 0.0227, "step": 46500 }, { "epoch": 1.78, "learning_rate": 3.216927804544937e-08, "loss": 0.0324, "step": 47000 }, { "epoch": 1.8, "learning_rate": 3.197958951401798e-08, "loss": 0.0262, "step": 47500 }, { "epoch": 1.82, "learning_rate": 3.178990098258659e-08, "loss": 0.0337, "step": 48000 }, { "epoch": 1.84, "learning_rate": 3.16002124511552e-08, "loss": 0.0314, "step": 48500 }, { "epoch": 1.86, "learning_rate": 3.141052391972381e-08, "loss": 0.0433, "step": 49000 }, { "epoch": 1.88, "learning_rate": 3.122083538829242e-08, "loss": 0.0351, "step": 49500 }, { "epoch": 1.9, "learning_rate": 3.103114685686103e-08, "loss": 0.02, "step": 50000 }, { "epoch": 1.92, "learning_rate": 3.084145832542964e-08, "loss": 0.023, "step": 50500 }, { "epoch": 1.93, "learning_rate": 3.0651769793998254e-08, "loss": 0.0311, "step": 51000 }, { "epoch": 1.95, "learning_rate": 3.046208126256686e-08, "loss": 0.0231, "step": 51500 }, { "epoch": 1.97, "learning_rate": 3.0272392731135474e-08, "loss": 0.0277, "step": 52000 }, { "epoch": 1.99, "learning_rate": 3.008270419970408e-08, "loss": 0.0233, "step": 52500 }, { "epoch": 2.0, "eval_loss": 0.029595419764518738, "eval_runtime": 313.3391, "eval_samples_per_second": 126.183, "eval_steps_per_second": 15.775, "step": 52718 }, { "epoch": 2.01, "learning_rate": 2.9893015668272695e-08, "loss": 0.0279, "step": 53000 }, { "epoch": 2.03, "learning_rate": 2.9703327136841306e-08, "loss": 0.0324, "step": 53500 }, { "epoch": 2.05, "learning_rate": 2.9513638605409913e-08, "loss": 0.0245, "step": 54000 }, { "epoch": 2.07, "learning_rate": 2.9323950073978527e-08, "loss": 0.0397, "step": 54500 }, { "epoch": 2.09, "learning_rate": 2.9134261542547134e-08, "loss": 0.0306, "step": 55000 }, { "epoch": 2.11, "learning_rate": 2.8944573011115747e-08, "loss": 0.0215, "step": 55500 }, { "epoch": 2.12, "learning_rate": 2.8754884479684354e-08, "loss": 0.0227, "step": 56000 }, { "epoch": 2.14, "learning_rate": 2.8565195948252968e-08, "loss": 0.0246, "step": 56500 }, { "epoch": 2.16, "learning_rate": 2.8375507416821575e-08, "loss": 0.0318, "step": 57000 }, { "epoch": 2.18, "learning_rate": 2.818581888539019e-08, "loss": 0.0454, "step": 57500 }, { "epoch": 2.2, "learning_rate": 2.7996130353958796e-08, "loss": 0.0236, "step": 58000 }, { "epoch": 2.22, "learning_rate": 2.780644182252741e-08, "loss": 0.032, "step": 58500 }, { "epoch": 2.24, "learning_rate": 2.7616753291096017e-08, "loss": 0.0315, "step": 59000 }, { "epoch": 2.26, "learning_rate": 2.742706475966463e-08, "loss": 0.0367, "step": 59500 }, { "epoch": 2.28, "learning_rate": 2.7237376228233238e-08, "loss": 0.0244, "step": 60000 }, { "epoch": 2.3, "learning_rate": 2.704768769680185e-08, "loss": 0.0268, "step": 60500 }, { "epoch": 2.31, "learning_rate": 2.685799916537046e-08, "loss": 0.0197, "step": 61000 }, { "epoch": 2.33, "learning_rate": 2.6668310633939072e-08, "loss": 0.037, "step": 61500 }, { "epoch": 2.35, "learning_rate": 2.647862210250768e-08, "loss": 0.0239, "step": 62000 }, { "epoch": 2.37, "learning_rate": 2.6288933571076293e-08, "loss": 0.0392, "step": 62500 }, { "epoch": 2.39, "learning_rate": 2.60992450396449e-08, "loss": 0.0269, "step": 63000 }, { "epoch": 2.41, "learning_rate": 2.5909556508213514e-08, "loss": 0.016, "step": 63500 }, { "epoch": 2.43, "learning_rate": 2.571986797678212e-08, "loss": 0.0293, "step": 64000 }, { "epoch": 2.45, "learning_rate": 2.553017944535073e-08, "loss": 0.0368, "step": 64500 }, { "epoch": 2.47, "learning_rate": 2.5340490913919342e-08, "loss": 0.0173, "step": 65000 }, { "epoch": 2.48, "learning_rate": 2.5150802382487952e-08, "loss": 0.037, "step": 65500 }, { "epoch": 2.5, "learning_rate": 2.4961113851056563e-08, "loss": 0.0328, "step": 66000 }, { "epoch": 2.52, "learning_rate": 2.4771425319625177e-08, "loss": 0.0312, "step": 66500 }, { "epoch": 2.54, "learning_rate": 2.4581736788193787e-08, "loss": 0.0435, "step": 67000 }, { "epoch": 2.56, "learning_rate": 2.4392048256762397e-08, "loss": 0.0302, "step": 67500 }, { "epoch": 2.58, "learning_rate": 2.4202359725331008e-08, "loss": 0.0327, "step": 68000 }, { "epoch": 2.6, "learning_rate": 2.4012671193899618e-08, "loss": 0.0266, "step": 68500 }, { "epoch": 2.62, "learning_rate": 2.382298266246823e-08, "loss": 0.0242, "step": 69000 }, { "epoch": 2.64, "learning_rate": 2.363329413103684e-08, "loss": 0.0203, "step": 69500 }, { "epoch": 2.66, "learning_rate": 2.3443605599605446e-08, "loss": 0.025, "step": 70000 }, { "epoch": 2.67, "learning_rate": 2.3253917068174057e-08, "loss": 0.0373, "step": 70500 }, { "epoch": 2.69, "learning_rate": 2.3064228536742667e-08, "loss": 0.0357, "step": 71000 }, { "epoch": 2.71, "learning_rate": 2.2874540005311277e-08, "loss": 0.0294, "step": 71500 }, { "epoch": 2.73, "learning_rate": 2.2684851473879888e-08, "loss": 0.0361, "step": 72000 }, { "epoch": 2.75, "learning_rate": 2.2495162942448498e-08, "loss": 0.0271, "step": 72500 }, { "epoch": 2.77, "learning_rate": 2.230547441101711e-08, "loss": 0.0417, "step": 73000 }, { "epoch": 2.79, "learning_rate": 2.211578587958572e-08, "loss": 0.0348, "step": 73500 }, { "epoch": 2.81, "learning_rate": 2.192609734815433e-08, "loss": 0.0322, "step": 74000 }, { "epoch": 2.83, "learning_rate": 2.173640881672294e-08, "loss": 0.0245, "step": 74500 }, { "epoch": 2.85, "learning_rate": 2.154672028529155e-08, "loss": 0.0393, "step": 75000 }, { "epoch": 2.86, "learning_rate": 2.135703175386016e-08, "loss": 0.0275, "step": 75500 }, { "epoch": 2.88, "learning_rate": 2.116734322242877e-08, "loss": 0.0221, "step": 76000 }, { "epoch": 2.9, "learning_rate": 2.097765469099738e-08, "loss": 0.0401, "step": 76500 }, { "epoch": 2.92, "learning_rate": 2.0787966159565992e-08, "loss": 0.0392, "step": 77000 }, { "epoch": 2.94, "learning_rate": 2.0598277628134602e-08, "loss": 0.0271, "step": 77500 }, { "epoch": 2.96, "learning_rate": 2.0408589096703213e-08, "loss": 0.027, "step": 78000 }, { "epoch": 2.98, "learning_rate": 2.0218900565271823e-08, "loss": 0.0267, "step": 78500 }, { "epoch": 3.0, "learning_rate": 2.0029212033840434e-08, "loss": 0.0379, "step": 79000 }, { "epoch": 3.0, "eval_loss": 0.02849128097295761, "eval_runtime": 312.9013, "eval_samples_per_second": 126.359, "eval_steps_per_second": 15.797, "step": 79077 }, { "epoch": 3.02, "learning_rate": 1.9839523502409044e-08, "loss": 0.0335, "step": 79500 }, { "epoch": 3.04, "learning_rate": 1.9649834970977654e-08, "loss": 0.0283, "step": 80000 }, { "epoch": 3.05, "learning_rate": 1.946014643954626e-08, "loss": 0.0296, "step": 80500 }, { "epoch": 3.07, "learning_rate": 1.9270457908114872e-08, "loss": 0.0174, "step": 81000 }, { "epoch": 3.09, "learning_rate": 1.9080769376683482e-08, "loss": 0.0228, "step": 81500 }, { "epoch": 3.11, "learning_rate": 1.8891080845252093e-08, "loss": 0.0271, "step": 82000 }, { "epoch": 3.13, "learning_rate": 1.8701392313820703e-08, "loss": 0.0178, "step": 82500 }, { "epoch": 3.15, "learning_rate": 1.8511703782389314e-08, "loss": 0.0293, "step": 83000 }, { "epoch": 3.17, "learning_rate": 1.8322015250957924e-08, "loss": 0.0405, "step": 83500 }, { "epoch": 3.19, "learning_rate": 1.8132326719526538e-08, "loss": 0.0273, "step": 84000 }, { "epoch": 3.21, "learning_rate": 1.7942638188095148e-08, "loss": 0.0292, "step": 84500 }, { "epoch": 3.22, "learning_rate": 1.775294965666376e-08, "loss": 0.0233, "step": 85000 }, { "epoch": 3.24, "learning_rate": 1.756326112523237e-08, "loss": 0.0327, "step": 85500 }, { "epoch": 3.26, "learning_rate": 1.737357259380098e-08, "loss": 0.046, "step": 86000 }, { "epoch": 3.28, "learning_rate": 1.718388406236959e-08, "loss": 0.0195, "step": 86500 }, { "epoch": 3.3, "learning_rate": 1.69941955309382e-08, "loss": 0.0266, "step": 87000 }, { "epoch": 3.32, "learning_rate": 1.680450699950681e-08, "loss": 0.0241, "step": 87500 }, { "epoch": 3.34, "learning_rate": 1.661481846807542e-08, "loss": 0.0169, "step": 88000 }, { "epoch": 3.36, "learning_rate": 1.642512993664403e-08, "loss": 0.0283, "step": 88500 }, { "epoch": 3.38, "learning_rate": 1.6235441405212642e-08, "loss": 0.0369, "step": 89000 }, { "epoch": 3.4, "learning_rate": 1.6045752873781252e-08, "loss": 0.0295, "step": 89500 }, { "epoch": 3.41, "learning_rate": 1.5856064342349863e-08, "loss": 0.0308, "step": 90000 }, { "epoch": 3.43, "learning_rate": 1.5666375810918473e-08, "loss": 0.0299, "step": 90500 }, { "epoch": 3.45, "learning_rate": 1.5476687279487084e-08, "loss": 0.0218, "step": 91000 }, { "epoch": 3.47, "learning_rate": 1.528699874805569e-08, "loss": 0.0207, "step": 91500 }, { "epoch": 3.49, "learning_rate": 1.50973102166243e-08, "loss": 0.0214, "step": 92000 }, { "epoch": 3.51, "learning_rate": 1.490762168519291e-08, "loss": 0.0302, "step": 92500 }, { "epoch": 3.53, "learning_rate": 1.4717933153761522e-08, "loss": 0.0246, "step": 93000 }, { "epoch": 3.55, "learning_rate": 1.4528244622330132e-08, "loss": 0.0241, "step": 93500 }, { "epoch": 3.57, "learning_rate": 1.4338556090898743e-08, "loss": 0.0516, "step": 94000 }, { "epoch": 3.59, "learning_rate": 1.4148867559467353e-08, "loss": 0.0328, "step": 94500 }, { "epoch": 3.6, "learning_rate": 1.3959179028035964e-08, "loss": 0.0495, "step": 95000 }, { "epoch": 3.62, "learning_rate": 1.3769490496604574e-08, "loss": 0.0216, "step": 95500 }, { "epoch": 3.64, "learning_rate": 1.3579801965173184e-08, "loss": 0.0389, "step": 96000 }, { "epoch": 3.66, "learning_rate": 1.3390113433741795e-08, "loss": 0.0189, "step": 96500 }, { "epoch": 3.68, "learning_rate": 1.3200424902310405e-08, "loss": 0.0179, "step": 97000 }, { "epoch": 3.7, "learning_rate": 1.3010736370879016e-08, "loss": 0.032, "step": 97500 }, { "epoch": 3.72, "learning_rate": 1.2821047839447626e-08, "loss": 0.0329, "step": 98000 }, { "epoch": 3.74, "learning_rate": 1.2631359308016237e-08, "loss": 0.0292, "step": 98500 }, { "epoch": 3.76, "learning_rate": 1.2441670776584847e-08, "loss": 0.026, "step": 99000 }, { "epoch": 3.77, "learning_rate": 1.2251982245153457e-08, "loss": 0.0323, "step": 99500 }, { "epoch": 3.79, "learning_rate": 1.2062293713722068e-08, "loss": 0.0223, "step": 100000 }, { "epoch": 3.81, "learning_rate": 1.1872605182290678e-08, "loss": 0.0243, "step": 100500 }, { "epoch": 3.83, "learning_rate": 1.1682916650859289e-08, "loss": 0.0167, "step": 101000 }, { "epoch": 3.85, "learning_rate": 1.1493228119427899e-08, "loss": 0.0222, "step": 101500 }, { "epoch": 3.87, "learning_rate": 1.130353958799651e-08, "loss": 0.0218, "step": 102000 }, { "epoch": 3.89, "learning_rate": 1.111385105656512e-08, "loss": 0.0304, "step": 102500 }, { "epoch": 3.91, "learning_rate": 1.092416252513373e-08, "loss": 0.0357, "step": 103000 }, { "epoch": 3.93, "learning_rate": 1.073447399370234e-08, "loss": 0.0377, "step": 103500 }, { "epoch": 3.95, "learning_rate": 1.0544785462270951e-08, "loss": 0.0301, "step": 104000 }, { "epoch": 3.96, "learning_rate": 1.0355096930839562e-08, "loss": 0.0274, "step": 104500 }, { "epoch": 3.98, "learning_rate": 1.016540839940817e-08, "loss": 0.0276, "step": 105000 } ], "max_steps": 131795, "num_train_epochs": 5, "total_flos": 1.6644037006596096e+17, "trial_name": null, "trial_params": null }