{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 42694, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.622729721679287, "learning_rate": 3.9032006245121e-09, "loss": 0.5483, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.9874238212397355, "learning_rate": 7.8064012490242e-09, "loss": 0.54, "step": 2 }, { "epoch": 0.0, "grad_norm": 4.5453329237395454, "learning_rate": 1.1709601873536301e-08, "loss": 0.51, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.771523459488045, "learning_rate": 1.56128024980484e-08, "loss": 0.5275, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.938423447703485, "learning_rate": 1.95160031225605e-08, "loss": 0.5251, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.746415280407646, "learning_rate": 2.3419203747072602e-08, "loss": 0.5373, "step": 6 }, { "epoch": 0.0, "grad_norm": 5.103974884798016, "learning_rate": 2.7322404371584703e-08, "loss": 0.5306, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.8332911295043655, "learning_rate": 3.12256049960968e-08, "loss": 0.5391, "step": 8 }, { "epoch": 0.0, "grad_norm": 4.914533251816914, "learning_rate": 3.51288056206089e-08, "loss": 0.5423, "step": 9 }, { "epoch": 0.0, "grad_norm": 4.942202520877101, "learning_rate": 3.9032006245121e-08, "loss": 0.5286, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.672023119250784, "learning_rate": 4.293520686963311e-08, "loss": 0.5161, "step": 11 }, { "epoch": 0.0, "grad_norm": 4.956795172054546, "learning_rate": 4.6838407494145204e-08, "loss": 0.5351, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.227337187609141, "learning_rate": 5.0741608118657305e-08, "loss": 0.5514, "step": 13 }, { "epoch": 0.0, "grad_norm": 4.472700621071287, "learning_rate": 5.4644808743169406e-08, "loss": 0.513, "step": 14 }, { "epoch": 0.0, "grad_norm": 4.576174860666567, "learning_rate": 5.8548009367681506e-08, "loss": 0.5277, "step": 15 }, { "epoch": 0.0, "grad_norm": 4.910034497586738, "learning_rate": 6.24512099921936e-08, "loss": 0.5313, "step": 16 }, { "epoch": 0.0, "grad_norm": 4.785311525245733, "learning_rate": 6.635441061670571e-08, "loss": 0.5216, "step": 17 }, { "epoch": 0.0, "grad_norm": 4.474570928291394, "learning_rate": 7.02576112412178e-08, "loss": 0.509, "step": 18 }, { "epoch": 0.0, "grad_norm": 4.869580247707969, "learning_rate": 7.41608118657299e-08, "loss": 0.5193, "step": 19 }, { "epoch": 0.0, "grad_norm": 4.8940671477221045, "learning_rate": 7.8064012490242e-08, "loss": 0.5437, "step": 20 }, { "epoch": 0.0, "grad_norm": 5.162550605877126, "learning_rate": 8.19672131147541e-08, "loss": 0.5574, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.0781329772719745, "learning_rate": 8.587041373926622e-08, "loss": 0.5311, "step": 22 }, { "epoch": 0.0, "grad_norm": 4.802466955440946, "learning_rate": 8.977361436377831e-08, "loss": 0.5025, "step": 23 }, { "epoch": 0.0, "grad_norm": 4.757025990013422, "learning_rate": 9.367681498829041e-08, "loss": 0.5397, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.580769103038741, "learning_rate": 9.758001561280251e-08, "loss": 0.556, "step": 25 }, { "epoch": 0.0, "grad_norm": 5.063035505775675, "learning_rate": 1.0148321623731461e-07, "loss": 0.5073, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.393285113859669, "learning_rate": 1.053864168618267e-07, "loss": 0.5452, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.871040167590817, "learning_rate": 1.0928961748633881e-07, "loss": 0.5523, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.916487334487691, "learning_rate": 1.1319281811085091e-07, "loss": 0.5527, "step": 29 }, { "epoch": 0.0, "grad_norm": 4.681130170031395, "learning_rate": 1.1709601873536301e-07, "loss": 0.5347, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.558809227525443, "learning_rate": 1.2099921935987511e-07, "loss": 0.5273, "step": 31 }, { "epoch": 0.0, "grad_norm": 4.625041196215798, "learning_rate": 1.249024199843872e-07, "loss": 0.5429, "step": 32 }, { "epoch": 0.0, "grad_norm": 4.416095401198277, "learning_rate": 1.2880562060889932e-07, "loss": 0.5248, "step": 33 }, { "epoch": 0.0, "grad_norm": 4.603944493910623, "learning_rate": 1.3270882123341143e-07, "loss": 0.541, "step": 34 }, { "epoch": 0.0, "grad_norm": 4.3215544422628405, "learning_rate": 1.366120218579235e-07, "loss": 0.5215, "step": 35 }, { "epoch": 0.0, "grad_norm": 4.4355540740489605, "learning_rate": 1.405152224824356e-07, "loss": 0.5228, "step": 36 }, { "epoch": 0.0, "grad_norm": 4.156788742243626, "learning_rate": 1.4441842310694772e-07, "loss": 0.5077, "step": 37 }, { "epoch": 0.0, "grad_norm": 4.323971066639269, "learning_rate": 1.483216237314598e-07, "loss": 0.5298, "step": 38 }, { "epoch": 0.0, "grad_norm": 4.035424996763108, "learning_rate": 1.522248243559719e-07, "loss": 0.4931, "step": 39 }, { "epoch": 0.0, "grad_norm": 4.21943051677546, "learning_rate": 1.56128024980484e-07, "loss": 0.5398, "step": 40 }, { "epoch": 0.0, "grad_norm": 4.222509976155011, "learning_rate": 1.6003122560499612e-07, "loss": 0.523, "step": 41 }, { "epoch": 0.0, "grad_norm": 3.916317451405703, "learning_rate": 1.639344262295082e-07, "loss": 0.5433, "step": 42 }, { "epoch": 0.0, "grad_norm": 4.0196983308015914, "learning_rate": 1.6783762685402032e-07, "loss": 0.5163, "step": 43 }, { "epoch": 0.0, "grad_norm": 4.294682649688215, "learning_rate": 1.7174082747853244e-07, "loss": 0.5211, "step": 44 }, { "epoch": 0.0, "grad_norm": 4.15139626010468, "learning_rate": 1.756440281030445e-07, "loss": 0.5184, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.726393229513805, "learning_rate": 1.7954722872755661e-07, "loss": 0.505, "step": 46 }, { "epoch": 0.0, "grad_norm": 3.37925548303896, "learning_rate": 1.834504293520687e-07, "loss": 0.4903, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.8975201529597565, "learning_rate": 1.8735362997658082e-07, "loss": 0.4749, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.996512187875095, "learning_rate": 1.912568306010929e-07, "loss": 0.4784, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.7024299768318, "learning_rate": 1.9516003122560502e-07, "loss": 0.4745, "step": 50 }, { "epoch": 0.0, "grad_norm": 3.048661356849958, "learning_rate": 1.9906323185011713e-07, "loss": 0.4738, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.6938356320245602, "learning_rate": 2.0296643247462922e-07, "loss": 0.4738, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.857796796791975, "learning_rate": 2.068696330991413e-07, "loss": 0.5026, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.7440602131187415, "learning_rate": 2.107728337236534e-07, "loss": 0.4981, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.6069629510706958, "learning_rate": 2.146760343481655e-07, "loss": 0.4931, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.593446222285576, "learning_rate": 2.1857923497267762e-07, "loss": 0.4815, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.425235101214889, "learning_rate": 2.224824355971897e-07, "loss": 0.4639, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.4653887582549388, "learning_rate": 2.2638563622170182e-07, "loss": 0.4747, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.1512577978720966, "learning_rate": 2.3028883684621394e-07, "loss": 0.4764, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.389804994952366, "learning_rate": 2.3419203747072603e-07, "loss": 0.4531, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.3966539116445094, "learning_rate": 2.3809523809523811e-07, "loss": 0.4674, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.293633641919212, "learning_rate": 2.4199843871975023e-07, "loss": 0.4779, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.9282054190146631, "learning_rate": 2.459016393442623e-07, "loss": 0.4524, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.7125508870682322, "learning_rate": 2.498048399687744e-07, "loss": 0.4179, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.7155722113406209, "learning_rate": 2.5370804059328654e-07, "loss": 0.4733, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.579095880980918, "learning_rate": 2.5761124121779863e-07, "loss": 0.4784, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.5536282684022076, "learning_rate": 2.615144418423107e-07, "loss": 0.4269, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.5235193645160208, "learning_rate": 2.6541764246682286e-07, "loss": 0.4388, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.4955293726691672, "learning_rate": 2.693208430913349e-07, "loss": 0.4523, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.3792296221118685, "learning_rate": 2.73224043715847e-07, "loss": 0.4584, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.342382803840417, "learning_rate": 2.771272443403591e-07, "loss": 0.4351, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.2782325828524457, "learning_rate": 2.810304449648712e-07, "loss": 0.4525, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.1864441639263879, "learning_rate": 2.849336455893833e-07, "loss": 0.4271, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.0900708651070972, "learning_rate": 2.8883684621389544e-07, "loss": 0.4387, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.1571683464380191, "learning_rate": 2.927400468384075e-07, "loss": 0.4029, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.0543627668256164, "learning_rate": 2.966432474629196e-07, "loss": 0.4398, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.3596442930379864, "learning_rate": 3.005464480874317e-07, "loss": 0.4323, "step": 77 }, { "epoch": 0.0, "grad_norm": 1.058102853758717, "learning_rate": 3.044496487119438e-07, "loss": 0.4313, "step": 78 }, { "epoch": 0.0, "grad_norm": 1.1093218857992502, "learning_rate": 3.0835284933645593e-07, "loss": 0.4474, "step": 79 }, { "epoch": 0.0, "grad_norm": 1.12681798118543, "learning_rate": 3.12256049960968e-07, "loss": 0.4371, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.2531884097984334, "learning_rate": 3.161592505854801e-07, "loss": 0.4499, "step": 81 }, { "epoch": 0.0, "grad_norm": 1.2258256056301933, "learning_rate": 3.2006245120999224e-07, "loss": 0.4222, "step": 82 }, { "epoch": 0.0, "grad_norm": 1.195532558331112, "learning_rate": 3.2396565183450433e-07, "loss": 0.4466, "step": 83 }, { "epoch": 0.0, "grad_norm": 1.0892772233242887, "learning_rate": 3.278688524590164e-07, "loss": 0.4272, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.0183091953336865, "learning_rate": 3.3177205308352856e-07, "loss": 0.4326, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.9809369017337862, "learning_rate": 3.3567525370804065e-07, "loss": 0.4127, "step": 86 }, { "epoch": 0.0, "grad_norm": 1.1099685917276574, "learning_rate": 3.3957845433255274e-07, "loss": 0.4194, "step": 87 }, { "epoch": 0.0, "grad_norm": 0.9832506757005745, "learning_rate": 3.434816549570649e-07, "loss": 0.431, "step": 88 }, { "epoch": 0.0, "grad_norm": 0.9310636074872207, "learning_rate": 3.473848555815769e-07, "loss": 0.4062, "step": 89 }, { "epoch": 0.0, "grad_norm": 0.9390862304596608, "learning_rate": 3.51288056206089e-07, "loss": 0.4176, "step": 90 }, { "epoch": 0.0, "grad_norm": 1.0151679977414008, "learning_rate": 3.551912568306011e-07, "loss": 0.4021, "step": 91 }, { "epoch": 0.0, "grad_norm": 0.9825107320866411, "learning_rate": 3.5909445745511323e-07, "loss": 0.4234, "step": 92 }, { "epoch": 0.0, "grad_norm": 0.9484229899033223, "learning_rate": 3.629976580796253e-07, "loss": 0.4249, "step": 93 }, { "epoch": 0.0, "grad_norm": 0.8835717883353453, "learning_rate": 3.669008587041374e-07, "loss": 0.41, "step": 94 }, { "epoch": 0.0, "grad_norm": 0.9322549084740902, "learning_rate": 3.7080405932864954e-07, "loss": 0.4173, "step": 95 }, { "epoch": 0.0, "grad_norm": 1.0184301857541327, "learning_rate": 3.7470725995316163e-07, "loss": 0.4179, "step": 96 }, { "epoch": 0.0, "grad_norm": 0.871678964881241, "learning_rate": 3.786104605776737e-07, "loss": 0.4024, "step": 97 }, { "epoch": 0.0, "grad_norm": 0.9079726857593808, "learning_rate": 3.825136612021858e-07, "loss": 0.41, "step": 98 }, { "epoch": 0.0, "grad_norm": 0.9137816825133326, "learning_rate": 3.8641686182669795e-07, "loss": 0.3979, "step": 99 }, { "epoch": 0.0, "grad_norm": 0.8605166206910289, "learning_rate": 3.9032006245121003e-07, "loss": 0.4048, "step": 100 }, { "epoch": 0.0, "grad_norm": 0.8649526568124305, "learning_rate": 3.942232630757221e-07, "loss": 0.3991, "step": 101 }, { "epoch": 0.0, "grad_norm": 0.8429804711556066, "learning_rate": 3.9812646370023426e-07, "loss": 0.392, "step": 102 }, { "epoch": 0.0, "grad_norm": 0.9071801246586166, "learning_rate": 4.0202966432474635e-07, "loss": 0.4182, "step": 103 }, { "epoch": 0.0, "grad_norm": 0.8653628602837699, "learning_rate": 4.0593286494925844e-07, "loss": 0.431, "step": 104 }, { "epoch": 0.0, "grad_norm": 0.9057315997795852, "learning_rate": 4.0983606557377047e-07, "loss": 0.4221, "step": 105 }, { "epoch": 0.0, "grad_norm": 0.8715118742847457, "learning_rate": 4.137392661982826e-07, "loss": 0.4202, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.9256478701405499, "learning_rate": 4.176424668227947e-07, "loss": 0.4147, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.7822126523481772, "learning_rate": 4.215456674473068e-07, "loss": 0.3861, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.8693223307081748, "learning_rate": 4.2544886807181893e-07, "loss": 0.4065, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.8697349675321939, "learning_rate": 4.29352068696331e-07, "loss": 0.4016, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.8945889816344377, "learning_rate": 4.332552693208431e-07, "loss": 0.4132, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.8526016397346817, "learning_rate": 4.3715846994535524e-07, "loss": 0.4008, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.8691083507125095, "learning_rate": 4.4106167056986733e-07, "loss": 0.4161, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.8107423805281441, "learning_rate": 4.449648711943794e-07, "loss": 0.4247, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.8793459657506867, "learning_rate": 4.4886807181889156e-07, "loss": 0.3904, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.9299864241047985, "learning_rate": 4.5277127244340365e-07, "loss": 0.436, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.8725264489737143, "learning_rate": 4.5667447306791574e-07, "loss": 0.384, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.880884707772859, "learning_rate": 4.605776736924279e-07, "loss": 0.4262, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.8026771754707254, "learning_rate": 4.6448087431693996e-07, "loss": 0.3692, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.8015380206629927, "learning_rate": 4.6838407494145205e-07, "loss": 0.3854, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.8638432623568536, "learning_rate": 4.722872755659641e-07, "loss": 0.4059, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.8561786878973998, "learning_rate": 4.7619047619047623e-07, "loss": 0.3854, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.8434541279647906, "learning_rate": 4.800936768149883e-07, "loss": 0.3985, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.8010029303324552, "learning_rate": 4.839968774395005e-07, "loss": 0.3955, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.8575199239511408, "learning_rate": 4.879000780640125e-07, "loss": 0.4276, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.9333959449873935, "learning_rate": 4.918032786885246e-07, "loss": 0.3787, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.8199887116385266, "learning_rate": 4.957064793130368e-07, "loss": 0.4148, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.8189211916310329, "learning_rate": 4.996096799375488e-07, "loss": 0.3949, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.8682936020259665, "learning_rate": 5.035128805620609e-07, "loss": 0.4029, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.850867465194446, "learning_rate": 5.074160811865731e-07, "loss": 0.3685, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.8460062322640085, "learning_rate": 5.113192818110851e-07, "loss": 0.3885, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.9334319433179203, "learning_rate": 5.152224824355973e-07, "loss": 0.4033, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.8798267253048739, "learning_rate": 5.191256830601094e-07, "loss": 0.4045, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.8595835375977054, "learning_rate": 5.230288836846214e-07, "loss": 0.4199, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.8094193552143988, "learning_rate": 5.269320843091336e-07, "loss": 0.3813, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.7766841480628787, "learning_rate": 5.308352849336457e-07, "loss": 0.3728, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.8863757481993635, "learning_rate": 5.347384855581578e-07, "loss": 0.4106, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.8394808241117865, "learning_rate": 5.386416861826698e-07, "loss": 0.3937, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.8704725239537591, "learning_rate": 5.425448868071819e-07, "loss": 0.4015, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.7665424854876717, "learning_rate": 5.46448087431694e-07, "loss": 0.3842, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.8290444464877001, "learning_rate": 5.503512880562061e-07, "loss": 0.3934, "step": 141 }, { "epoch": 0.01, "grad_norm": 0.8390971274603042, "learning_rate": 5.542544886807182e-07, "loss": 0.3839, "step": 142 }, { "epoch": 0.01, "grad_norm": 0.9273351295576844, "learning_rate": 5.581576893052303e-07, "loss": 0.3563, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.8232430355620092, "learning_rate": 5.620608899297424e-07, "loss": 0.3908, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.8828034707607485, "learning_rate": 5.659640905542546e-07, "loss": 0.3966, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.8837715130966162, "learning_rate": 5.698672911787666e-07, "loss": 0.3705, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.8157209537631375, "learning_rate": 5.737704918032787e-07, "loss": 0.3789, "step": 147 }, { "epoch": 0.01, "grad_norm": 0.8402247679335287, "learning_rate": 5.776736924277909e-07, "loss": 0.384, "step": 148 }, { "epoch": 0.01, "grad_norm": 0.7965742448405706, "learning_rate": 5.815768930523029e-07, "loss": 0.3724, "step": 149 }, { "epoch": 0.01, "grad_norm": 0.9350150267765636, "learning_rate": 5.85480093676815e-07, "loss": 0.3886, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.8629841246695686, "learning_rate": 5.893832943013272e-07, "loss": 0.3886, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.8355352522588186, "learning_rate": 5.932864949258392e-07, "loss": 0.4003, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.8475807531624556, "learning_rate": 5.971896955503513e-07, "loss": 0.3891, "step": 153 }, { "epoch": 0.01, "grad_norm": 0.8430332622455138, "learning_rate": 6.010928961748634e-07, "loss": 0.3811, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.8564816093873223, "learning_rate": 6.049960967993755e-07, "loss": 0.3828, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.8315223811544212, "learning_rate": 6.088992974238876e-07, "loss": 0.3739, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.8654116410134031, "learning_rate": 6.128024980483997e-07, "loss": 0.3954, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.8406165949837806, "learning_rate": 6.167056986729119e-07, "loss": 0.3828, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.8546330898187493, "learning_rate": 6.206088992974239e-07, "loss": 0.3869, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.0049134609008348, "learning_rate": 6.24512099921936e-07, "loss": 0.4003, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.0098992238779199, "learning_rate": 6.284153005464482e-07, "loss": 0.3772, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.8537267996296484, "learning_rate": 6.323185011709602e-07, "loss": 0.3869, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.0770995434944979, "learning_rate": 6.362217017954723e-07, "loss": 0.3699, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.9895019796252554, "learning_rate": 6.401249024199845e-07, "loss": 0.3697, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.8398164417005768, "learning_rate": 6.440281030444965e-07, "loss": 0.3744, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.0164297114139453, "learning_rate": 6.479313036690087e-07, "loss": 0.3899, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.8739587550577989, "learning_rate": 6.518345042935208e-07, "loss": 0.3801, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.8620478437871912, "learning_rate": 6.557377049180328e-07, "loss": 0.4036, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.8733728650221982, "learning_rate": 6.59640905542545e-07, "loss": 0.3754, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.8513921113064464, "learning_rate": 6.635441061670571e-07, "loss": 0.3728, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.8044710772963816, "learning_rate": 6.674473067915692e-07, "loss": 0.3674, "step": 171 }, { "epoch": 0.01, "grad_norm": 0.8642165359896761, "learning_rate": 6.713505074160813e-07, "loss": 0.3933, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.842025818834614, "learning_rate": 6.752537080405934e-07, "loss": 0.3884, "step": 173 }, { "epoch": 0.01, "grad_norm": 0.8190387791173283, "learning_rate": 6.791569086651055e-07, "loss": 0.3816, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.8514099801083872, "learning_rate": 6.830601092896176e-07, "loss": 0.3686, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.879852229257622, "learning_rate": 6.869633099141298e-07, "loss": 0.3905, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.8189034929478652, "learning_rate": 6.908665105386417e-07, "loss": 0.3885, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.8171522974714015, "learning_rate": 6.947697111631538e-07, "loss": 0.3936, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.8677961842949385, "learning_rate": 6.986729117876659e-07, "loss": 0.3729, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.8394881049157396, "learning_rate": 7.02576112412178e-07, "loss": 0.3808, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.8276512940997796, "learning_rate": 7.064793130366901e-07, "loss": 0.3706, "step": 181 }, { "epoch": 0.01, "grad_norm": 0.7879185309946193, "learning_rate": 7.103825136612022e-07, "loss": 0.3727, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.8159215077864062, "learning_rate": 7.142857142857143e-07, "loss": 0.379, "step": 183 }, { "epoch": 0.01, "grad_norm": 0.9179275023653323, "learning_rate": 7.181889149102265e-07, "loss": 0.4055, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.8143770557434388, "learning_rate": 7.220921155347385e-07, "loss": 0.3742, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.8065611541557082, "learning_rate": 7.259953161592506e-07, "loss": 0.3741, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.9488414891408218, "learning_rate": 7.298985167837628e-07, "loss": 0.388, "step": 187 }, { "epoch": 0.01, "grad_norm": 0.7912907748044055, "learning_rate": 7.338017174082748e-07, "loss": 0.3529, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.8856310794509977, "learning_rate": 7.377049180327869e-07, "loss": 0.3678, "step": 189 }, { "epoch": 0.01, "grad_norm": 0.7996437661915726, "learning_rate": 7.416081186572991e-07, "loss": 0.3801, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.8248744965873346, "learning_rate": 7.455113192818111e-07, "loss": 0.3917, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.8293545467547411, "learning_rate": 7.494145199063233e-07, "loss": 0.3992, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.8496822680632784, "learning_rate": 7.533177205308354e-07, "loss": 0.3576, "step": 193 }, { "epoch": 0.01, "grad_norm": 0.8519314323035403, "learning_rate": 7.572209211553474e-07, "loss": 0.3687, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.8389642926270531, "learning_rate": 7.611241217798596e-07, "loss": 0.369, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.8281081648829327, "learning_rate": 7.650273224043716e-07, "loss": 0.3809, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.8724082065995259, "learning_rate": 7.689305230288838e-07, "loss": 0.3726, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.8432177164396634, "learning_rate": 7.728337236533959e-07, "loss": 0.3867, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.7997352132762979, "learning_rate": 7.767369242779079e-07, "loss": 0.3676, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.918306503419813, "learning_rate": 7.806401249024201e-07, "loss": 0.3704, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.8259143292954171, "learning_rate": 7.845433255269322e-07, "loss": 0.3639, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.9683099092434378, "learning_rate": 7.884465261514442e-07, "loss": 0.3841, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.935779296727939, "learning_rate": 7.923497267759564e-07, "loss": 0.3962, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.8824398243252304, "learning_rate": 7.962529274004685e-07, "loss": 0.3944, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.8624705316191792, "learning_rate": 8.001561280249806e-07, "loss": 0.3728, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.9335831229963681, "learning_rate": 8.040593286494927e-07, "loss": 0.3651, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.8536110174923287, "learning_rate": 8.079625292740048e-07, "loss": 0.3662, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.9195563571065184, "learning_rate": 8.118657298985169e-07, "loss": 0.3634, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.9168034533666126, "learning_rate": 8.157689305230289e-07, "loss": 0.3866, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.8915190951248373, "learning_rate": 8.196721311475409e-07, "loss": 0.3581, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.7508610533245367, "learning_rate": 8.235753317720531e-07, "loss": 0.3609, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.8458555968444709, "learning_rate": 8.274785323965652e-07, "loss": 0.3895, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.8370272432406768, "learning_rate": 8.313817330210773e-07, "loss": 0.3525, "step": 213 }, { "epoch": 0.01, "grad_norm": 0.9121891938215305, "learning_rate": 8.352849336455894e-07, "loss": 0.3809, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.9731578753621841, "learning_rate": 8.391881342701015e-07, "loss": 0.3886, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.8617924848601851, "learning_rate": 8.430913348946136e-07, "loss": 0.3749, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.9266985942420548, "learning_rate": 8.469945355191257e-07, "loss": 0.3696, "step": 217 }, { "epoch": 0.01, "grad_norm": 0.8283383828534994, "learning_rate": 8.508977361436379e-07, "loss": 0.3575, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.8267911525906587, "learning_rate": 8.548009367681499e-07, "loss": 0.3557, "step": 219 }, { "epoch": 0.01, "grad_norm": 0.8669836754699147, "learning_rate": 8.58704137392662e-07, "loss": 0.3569, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.8727863985290725, "learning_rate": 8.626073380171742e-07, "loss": 0.4104, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.8502890001538393, "learning_rate": 8.665105386416862e-07, "loss": 0.3761, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.8432821638567171, "learning_rate": 8.704137392661983e-07, "loss": 0.3911, "step": 223 }, { "epoch": 0.01, "grad_norm": 0.8478264204129208, "learning_rate": 8.743169398907105e-07, "loss": 0.3597, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.9926464471740118, "learning_rate": 8.782201405152225e-07, "loss": 0.3797, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.8596454816076141, "learning_rate": 8.821233411397347e-07, "loss": 0.3813, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.8311955008029471, "learning_rate": 8.860265417642468e-07, "loss": 0.3796, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.8451651537533872, "learning_rate": 8.899297423887588e-07, "loss": 0.375, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.9595522633911128, "learning_rate": 8.93832943013271e-07, "loss": 0.3551, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.8133573369556113, "learning_rate": 8.977361436377831e-07, "loss": 0.3568, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.8716155259439982, "learning_rate": 9.016393442622952e-07, "loss": 0.3765, "step": 231 }, { "epoch": 0.01, "grad_norm": 0.8120406469827169, "learning_rate": 9.055425448868073e-07, "loss": 0.3579, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.8606318289485451, "learning_rate": 9.094457455113194e-07, "loss": 0.3803, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.8194303265100454, "learning_rate": 9.133489461358315e-07, "loss": 0.3523, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.8542841425369457, "learning_rate": 9.172521467603436e-07, "loss": 0.3689, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.8154494535834671, "learning_rate": 9.211553473848558e-07, "loss": 0.41, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.9016423923225648, "learning_rate": 9.250585480093678e-07, "loss": 0.3803, "step": 237 }, { "epoch": 0.01, "grad_norm": 0.8101126516991347, "learning_rate": 9.289617486338799e-07, "loss": 0.3678, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.8150552739944517, "learning_rate": 9.328649492583921e-07, "loss": 0.3592, "step": 239 }, { "epoch": 0.01, "grad_norm": 0.8075017563514328, "learning_rate": 9.367681498829041e-07, "loss": 0.3654, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.8639730715576405, "learning_rate": 9.406713505074161e-07, "loss": 0.3949, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.8649985737086766, "learning_rate": 9.445745511319282e-07, "loss": 0.3735, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.7779703538392265, "learning_rate": 9.484777517564403e-07, "loss": 0.3468, "step": 243 }, { "epoch": 0.01, "grad_norm": 0.7756043331546959, "learning_rate": 9.523809523809525e-07, "loss": 0.3456, "step": 244 }, { "epoch": 0.01, "grad_norm": 0.8869001301007062, "learning_rate": 9.562841530054645e-07, "loss": 0.3914, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.9152728351581307, "learning_rate": 9.601873536299766e-07, "loss": 0.357, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.8484547366701196, "learning_rate": 9.640905542544888e-07, "loss": 0.3653, "step": 247 }, { "epoch": 0.01, "grad_norm": 0.9334156312717311, "learning_rate": 9.67993754879001e-07, "loss": 0.3741, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.8073477833175995, "learning_rate": 9.718969555035128e-07, "loss": 0.3528, "step": 249 }, { "epoch": 0.01, "grad_norm": 0.7791154639115215, "learning_rate": 9.75800156128025e-07, "loss": 0.3458, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.827754253079892, "learning_rate": 9.797033567525371e-07, "loss": 0.3617, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.9181867159366851, "learning_rate": 9.836065573770493e-07, "loss": 0.3885, "step": 252 }, { "epoch": 0.01, "grad_norm": 0.8738737477561861, "learning_rate": 9.875097580015614e-07, "loss": 0.3583, "step": 253 }, { "epoch": 0.01, "grad_norm": 0.8483802367680947, "learning_rate": 9.914129586260735e-07, "loss": 0.3684, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.7893604640723723, "learning_rate": 9.953161592505855e-07, "loss": 0.358, "step": 255 }, { "epoch": 0.01, "grad_norm": 0.8228454633646461, "learning_rate": 9.992193598750976e-07, "loss": 0.3606, "step": 256 }, { "epoch": 0.01, "grad_norm": 0.8232485379021844, "learning_rate": 1.0031225604996098e-06, "loss": 0.3551, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.9054177513982167, "learning_rate": 1.0070257611241219e-06, "loss": 0.3733, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.8421630388784828, "learning_rate": 1.010928961748634e-06, "loss": 0.3767, "step": 259 }, { "epoch": 0.01, "grad_norm": 0.8044634540922767, "learning_rate": 1.0148321623731462e-06, "loss": 0.3398, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.876967592916659, "learning_rate": 1.018735362997658e-06, "loss": 0.3588, "step": 261 }, { "epoch": 0.01, "grad_norm": 0.786229799395679, "learning_rate": 1.0226385636221702e-06, "loss": 0.3389, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.8793620270887019, "learning_rate": 1.0265417642466824e-06, "loss": 0.3849, "step": 263 }, { "epoch": 0.01, "grad_norm": 0.895174865533386, "learning_rate": 1.0304449648711945e-06, "loss": 0.3677, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.7438654971848939, "learning_rate": 1.0343481654957067e-06, "loss": 0.3643, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.8157931822853366, "learning_rate": 1.0382513661202188e-06, "loss": 0.38, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.8478226736861759, "learning_rate": 1.0421545667447307e-06, "loss": 0.3641, "step": 267 }, { "epoch": 0.01, "grad_norm": 0.8455758508026294, "learning_rate": 1.0460577673692429e-06, "loss": 0.3597, "step": 268 }, { "epoch": 0.01, "grad_norm": 0.8426668186541952, "learning_rate": 1.049960967993755e-06, "loss": 0.3597, "step": 269 }, { "epoch": 0.01, "grad_norm": 0.8481281550900193, "learning_rate": 1.0538641686182672e-06, "loss": 0.3836, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.8338404829688861, "learning_rate": 1.0577673692427793e-06, "loss": 0.3817, "step": 271 }, { "epoch": 0.01, "grad_norm": 0.8333230075189905, "learning_rate": 1.0616705698672914e-06, "loss": 0.3658, "step": 272 }, { "epoch": 0.01, "grad_norm": 0.8566408699730564, "learning_rate": 1.0655737704918034e-06, "loss": 0.3748, "step": 273 }, { "epoch": 0.01, "grad_norm": 0.9302395217241406, "learning_rate": 1.0694769711163155e-06, "loss": 0.3692, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.7792405316129761, "learning_rate": 1.0733801717408274e-06, "loss": 0.3531, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.785568066491356, "learning_rate": 1.0772833723653396e-06, "loss": 0.3335, "step": 276 }, { "epoch": 0.01, "grad_norm": 0.8183096192847972, "learning_rate": 1.0811865729898517e-06, "loss": 0.3584, "step": 277 }, { "epoch": 0.01, "grad_norm": 0.8707395551945405, "learning_rate": 1.0850897736143639e-06, "loss": 0.3592, "step": 278 }, { "epoch": 0.01, "grad_norm": 0.8564331128175929, "learning_rate": 1.088992974238876e-06, "loss": 0.3557, "step": 279 }, { "epoch": 0.01, "grad_norm": 0.75442588960338, "learning_rate": 1.092896174863388e-06, "loss": 0.3478, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.8675448386502503, "learning_rate": 1.0967993754879e-06, "loss": 0.3661, "step": 281 }, { "epoch": 0.01, "grad_norm": 0.7844141655885469, "learning_rate": 1.1007025761124122e-06, "loss": 0.328, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.7907726555649885, "learning_rate": 1.1046057767369243e-06, "loss": 0.3695, "step": 283 }, { "epoch": 0.01, "grad_norm": 0.7643962025408518, "learning_rate": 1.1085089773614365e-06, "loss": 0.3596, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.8649157492648186, "learning_rate": 1.1124121779859486e-06, "loss": 0.3287, "step": 285 }, { "epoch": 0.01, "grad_norm": 0.8286153533040325, "learning_rate": 1.1163153786104606e-06, "loss": 0.3533, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.8994413511836561, "learning_rate": 1.1202185792349727e-06, "loss": 0.3723, "step": 287 }, { "epoch": 0.01, "grad_norm": 0.7706577376571753, "learning_rate": 1.1241217798594848e-06, "loss": 0.3528, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.8695843397602406, "learning_rate": 1.128024980483997e-06, "loss": 0.3779, "step": 289 }, { "epoch": 0.01, "grad_norm": 0.7945932437863785, "learning_rate": 1.1319281811085091e-06, "loss": 0.3446, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.7659475754747396, "learning_rate": 1.1358313817330213e-06, "loss": 0.3398, "step": 291 }, { "epoch": 0.01, "grad_norm": 0.766518999389875, "learning_rate": 1.1397345823575332e-06, "loss": 0.338, "step": 292 }, { "epoch": 0.01, "grad_norm": 0.8039349334530823, "learning_rate": 1.1436377829820453e-06, "loss": 0.3639, "step": 293 }, { "epoch": 0.01, "grad_norm": 0.781255043681138, "learning_rate": 1.1475409836065575e-06, "loss": 0.3581, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.7817919359225565, "learning_rate": 1.1514441842310696e-06, "loss": 0.3305, "step": 295 }, { "epoch": 0.01, "grad_norm": 0.7481266786865592, "learning_rate": 1.1553473848555818e-06, "loss": 0.3466, "step": 296 }, { "epoch": 0.01, "grad_norm": 0.8593968591736764, "learning_rate": 1.1592505854800939e-06, "loss": 0.3608, "step": 297 }, { "epoch": 0.01, "grad_norm": 0.7807133925661633, "learning_rate": 1.1631537861046058e-06, "loss": 0.3404, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.8399250415345166, "learning_rate": 1.167056986729118e-06, "loss": 0.3535, "step": 299 }, { "epoch": 0.01, "grad_norm": 0.7767108289451815, "learning_rate": 1.17096018735363e-06, "loss": 0.3526, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.9231001485814859, "learning_rate": 1.1748633879781422e-06, "loss": 0.3504, "step": 301 }, { "epoch": 0.01, "grad_norm": 0.8383685618989679, "learning_rate": 1.1787665886026544e-06, "loss": 0.3667, "step": 302 }, { "epoch": 0.01, "grad_norm": 0.8522508371664709, "learning_rate": 1.1826697892271665e-06, "loss": 0.3692, "step": 303 }, { "epoch": 0.01, "grad_norm": 0.8056954671293388, "learning_rate": 1.1865729898516785e-06, "loss": 0.3441, "step": 304 }, { "epoch": 0.01, "grad_norm": 0.917591351570124, "learning_rate": 1.1904761904761906e-06, "loss": 0.3595, "step": 305 }, { "epoch": 0.01, "grad_norm": 0.8931024407564067, "learning_rate": 1.1943793911007025e-06, "loss": 0.3568, "step": 306 }, { "epoch": 0.01, "grad_norm": 0.8206328706115729, "learning_rate": 1.1982825917252147e-06, "loss": 0.3539, "step": 307 }, { "epoch": 0.01, "grad_norm": 0.9266299245255428, "learning_rate": 1.2021857923497268e-06, "loss": 0.3446, "step": 308 }, { "epoch": 0.01, "grad_norm": 0.7958288846588454, "learning_rate": 1.206088992974239e-06, "loss": 0.3573, "step": 309 }, { "epoch": 0.01, "grad_norm": 0.9853350857336837, "learning_rate": 1.209992193598751e-06, "loss": 0.3708, "step": 310 }, { "epoch": 0.01, "grad_norm": 0.8704429163609152, "learning_rate": 1.2138953942232632e-06, "loss": 0.3616, "step": 311 }, { "epoch": 0.01, "grad_norm": 0.8901256306257386, "learning_rate": 1.2177985948477752e-06, "loss": 0.3632, "step": 312 }, { "epoch": 0.01, "grad_norm": 0.9157374494579276, "learning_rate": 1.2217017954722873e-06, "loss": 0.3386, "step": 313 }, { "epoch": 0.01, "grad_norm": 0.8865645877771393, "learning_rate": 1.2256049960967994e-06, "loss": 0.3583, "step": 314 }, { "epoch": 0.01, "grad_norm": 0.9480676186417357, "learning_rate": 1.2295081967213116e-06, "loss": 0.3442, "step": 315 }, { "epoch": 0.01, "grad_norm": 0.9670057857057475, "learning_rate": 1.2334113973458237e-06, "loss": 0.3599, "step": 316 }, { "epoch": 0.01, "grad_norm": 0.8177542909994509, "learning_rate": 1.2373145979703359e-06, "loss": 0.3424, "step": 317 }, { "epoch": 0.01, "grad_norm": 0.993529810533958, "learning_rate": 1.2412177985948478e-06, "loss": 0.3372, "step": 318 }, { "epoch": 0.01, "grad_norm": 1.001047400913079, "learning_rate": 1.24512099921936e-06, "loss": 0.3562, "step": 319 }, { "epoch": 0.01, "grad_norm": 0.9998908301919309, "learning_rate": 1.249024199843872e-06, "loss": 0.3725, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.0894599927783868, "learning_rate": 1.2529274004683842e-06, "loss": 0.3628, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.1128766999517847, "learning_rate": 1.2568306010928963e-06, "loss": 0.3613, "step": 322 }, { "epoch": 0.02, "grad_norm": 0.817380825237106, "learning_rate": 1.2607338017174085e-06, "loss": 0.3446, "step": 323 }, { "epoch": 0.02, "grad_norm": 0.8816290740598899, "learning_rate": 1.2646370023419204e-06, "loss": 0.3549, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.0795860709011016, "learning_rate": 1.2685402029664326e-06, "loss": 0.3616, "step": 325 }, { "epoch": 0.02, "grad_norm": 0.8187722780129082, "learning_rate": 1.2724434035909447e-06, "loss": 0.3451, "step": 326 }, { "epoch": 0.02, "grad_norm": 0.8514763148761405, "learning_rate": 1.2763466042154568e-06, "loss": 0.3548, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.0239629661846263, "learning_rate": 1.280249804839969e-06, "loss": 0.3375, "step": 328 }, { "epoch": 0.02, "grad_norm": 0.8643184589149551, "learning_rate": 1.2841530054644811e-06, "loss": 0.3723, "step": 329 }, { "epoch": 0.02, "grad_norm": 0.7688847165240739, "learning_rate": 1.288056206088993e-06, "loss": 0.3626, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.8019679337487924, "learning_rate": 1.2919594067135052e-06, "loss": 0.3458, "step": 331 }, { "epoch": 0.02, "grad_norm": 0.8870607693004382, "learning_rate": 1.2958626073380173e-06, "loss": 0.349, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.813274208523567, "learning_rate": 1.2997658079625295e-06, "loss": 0.3817, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.7950670703846899, "learning_rate": 1.3036690085870416e-06, "loss": 0.3519, "step": 334 }, { "epoch": 0.02, "grad_norm": 0.8002582670653976, "learning_rate": 1.3075722092115535e-06, "loss": 0.358, "step": 335 }, { "epoch": 0.02, "grad_norm": 0.8438170613319562, "learning_rate": 1.3114754098360657e-06, "loss": 0.3725, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.8441469761018485, "learning_rate": 1.3153786104605778e-06, "loss": 0.3729, "step": 337 }, { "epoch": 0.02, "grad_norm": 0.7795518160675929, "learning_rate": 1.31928181108509e-06, "loss": 0.3494, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.779972199092561, "learning_rate": 1.323185011709602e-06, "loss": 0.3433, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.768110797866881, "learning_rate": 1.3270882123341142e-06, "loss": 0.3342, "step": 340 }, { "epoch": 0.02, "grad_norm": 0.7902255948559378, "learning_rate": 1.3309914129586262e-06, "loss": 0.3587, "step": 341 }, { "epoch": 0.02, "grad_norm": 0.8741388204821082, "learning_rate": 1.3348946135831383e-06, "loss": 0.3692, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.8105155403995535, "learning_rate": 1.3387978142076505e-06, "loss": 0.3651, "step": 343 }, { "epoch": 0.02, "grad_norm": 0.7660183347929093, "learning_rate": 1.3427010148321626e-06, "loss": 0.3383, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.784400512768931, "learning_rate": 1.3466042154566747e-06, "loss": 0.3359, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.8357944787060468, "learning_rate": 1.3505074160811869e-06, "loss": 0.3564, "step": 346 }, { "epoch": 0.02, "grad_norm": 0.834558949504693, "learning_rate": 1.3544106167056988e-06, "loss": 0.382, "step": 347 }, { "epoch": 0.02, "grad_norm": 0.7747547740089832, "learning_rate": 1.358313817330211e-06, "loss": 0.3488, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.794158243996675, "learning_rate": 1.362217017954723e-06, "loss": 0.3493, "step": 349 }, { "epoch": 0.02, "grad_norm": 0.8226191807757942, "learning_rate": 1.3661202185792352e-06, "loss": 0.3567, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.7728555867369871, "learning_rate": 1.3700234192037474e-06, "loss": 0.3488, "step": 351 }, { "epoch": 0.02, "grad_norm": 0.8006623254319585, "learning_rate": 1.3739266198282595e-06, "loss": 0.3562, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.7487582039811426, "learning_rate": 1.3778298204527712e-06, "loss": 0.3253, "step": 353 }, { "epoch": 0.02, "grad_norm": 0.7802908968035446, "learning_rate": 1.3817330210772834e-06, "loss": 0.3278, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.8112031933685945, "learning_rate": 1.3856362217017955e-06, "loss": 0.3603, "step": 355 }, { "epoch": 0.02, "grad_norm": 0.7858256598927056, "learning_rate": 1.3895394223263076e-06, "loss": 0.339, "step": 356 }, { "epoch": 0.02, "grad_norm": 0.8503992905019468, "learning_rate": 1.3934426229508196e-06, "loss": 0.363, "step": 357 }, { "epoch": 0.02, "grad_norm": 0.7722171391186934, "learning_rate": 1.3973458235753317e-06, "loss": 0.3301, "step": 358 }, { "epoch": 0.02, "grad_norm": 0.7609929808068646, "learning_rate": 1.4012490241998439e-06, "loss": 0.3266, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.8205844547130833, "learning_rate": 1.405152224824356e-06, "loss": 0.3569, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.881581951184364, "learning_rate": 1.4090554254488681e-06, "loss": 0.3397, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.8794442379259257, "learning_rate": 1.4129586260733803e-06, "loss": 0.3331, "step": 362 }, { "epoch": 0.02, "grad_norm": 0.8751525445249699, "learning_rate": 1.4168618266978922e-06, "loss": 0.3338, "step": 363 }, { "epoch": 0.02, "grad_norm": 0.8282031567235338, "learning_rate": 1.4207650273224043e-06, "loss": 0.3586, "step": 364 }, { "epoch": 0.02, "grad_norm": 0.802141115998238, "learning_rate": 1.4246682279469165e-06, "loss": 0.3384, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.8452042586333027, "learning_rate": 1.4285714285714286e-06, "loss": 0.335, "step": 366 }, { "epoch": 0.02, "grad_norm": 0.8564701032071632, "learning_rate": 1.4324746291959408e-06, "loss": 0.3489, "step": 367 }, { "epoch": 0.02, "grad_norm": 0.8031651754266314, "learning_rate": 1.436377829820453e-06, "loss": 0.359, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.8004393992736598, "learning_rate": 1.4402810304449648e-06, "loss": 0.3502, "step": 369 }, { "epoch": 0.02, "grad_norm": 0.8794244081868436, "learning_rate": 1.444184231069477e-06, "loss": 0.3686, "step": 370 }, { "epoch": 0.02, "grad_norm": 0.8474845011153278, "learning_rate": 1.4480874316939891e-06, "loss": 0.3602, "step": 371 }, { "epoch": 0.02, "grad_norm": 0.850385798584401, "learning_rate": 1.4519906323185013e-06, "loss": 0.3676, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.801224430180162, "learning_rate": 1.4558938329430134e-06, "loss": 0.3602, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.7523538668966292, "learning_rate": 1.4597970335675255e-06, "loss": 0.3502, "step": 374 }, { "epoch": 0.02, "grad_norm": 0.802511578909268, "learning_rate": 1.4637002341920375e-06, "loss": 0.348, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.759246040931667, "learning_rate": 1.4676034348165496e-06, "loss": 0.3405, "step": 376 }, { "epoch": 0.02, "grad_norm": 0.7740017130875888, "learning_rate": 1.4715066354410618e-06, "loss": 0.3226, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.8619942512661257, "learning_rate": 1.4754098360655739e-06, "loss": 0.3627, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.7962185545607231, "learning_rate": 1.479313036690086e-06, "loss": 0.3561, "step": 379 }, { "epoch": 0.02, "grad_norm": 0.8627791768706268, "learning_rate": 1.4832162373145982e-06, "loss": 0.3762, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.7702800702094907, "learning_rate": 1.48711943793911e-06, "loss": 0.3523, "step": 381 }, { "epoch": 0.02, "grad_norm": 0.7383564115527774, "learning_rate": 1.4910226385636222e-06, "loss": 0.3227, "step": 382 }, { "epoch": 0.02, "grad_norm": 0.814597968947982, "learning_rate": 1.4949258391881344e-06, "loss": 0.344, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.9353176371168589, "learning_rate": 1.4988290398126465e-06, "loss": 0.3785, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.7834930754783883, "learning_rate": 1.5027322404371587e-06, "loss": 0.3501, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.8204511568682297, "learning_rate": 1.5066354410616708e-06, "loss": 0.3768, "step": 386 }, { "epoch": 0.02, "grad_norm": 0.7651396794596279, "learning_rate": 1.5105386416861827e-06, "loss": 0.3417, "step": 387 }, { "epoch": 0.02, "grad_norm": 0.8676874311826211, "learning_rate": 1.5144418423106949e-06, "loss": 0.358, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.8568070450915463, "learning_rate": 1.518345042935207e-06, "loss": 0.3221, "step": 389 }, { "epoch": 0.02, "grad_norm": 0.8932836715259755, "learning_rate": 1.5222482435597192e-06, "loss": 0.3584, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.8129000102178708, "learning_rate": 1.5261514441842313e-06, "loss": 0.3366, "step": 391 }, { "epoch": 0.02, "grad_norm": 0.8426653382976391, "learning_rate": 1.5300546448087432e-06, "loss": 0.3706, "step": 392 }, { "epoch": 0.02, "grad_norm": 0.802217831930987, "learning_rate": 1.5339578454332554e-06, "loss": 0.3671, "step": 393 }, { "epoch": 0.02, "grad_norm": 0.8063571420227426, "learning_rate": 1.5378610460577675e-06, "loss": 0.3576, "step": 394 }, { "epoch": 0.02, "grad_norm": 0.8112651659031157, "learning_rate": 1.5417642466822796e-06, "loss": 0.3312, "step": 395 }, { "epoch": 0.02, "grad_norm": 0.8256835436570796, "learning_rate": 1.5456674473067918e-06, "loss": 0.3351, "step": 396 }, { "epoch": 0.02, "grad_norm": 0.8402022792830862, "learning_rate": 1.549570647931304e-06, "loss": 0.3643, "step": 397 }, { "epoch": 0.02, "grad_norm": 0.813526207768186, "learning_rate": 1.5534738485558159e-06, "loss": 0.3265, "step": 398 }, { "epoch": 0.02, "grad_norm": 0.8091116971837633, "learning_rate": 1.557377049180328e-06, "loss": 0.3498, "step": 399 }, { "epoch": 0.02, "grad_norm": 0.8140598758568126, "learning_rate": 1.5612802498048401e-06, "loss": 0.3721, "step": 400 }, { "epoch": 0.02, "grad_norm": 0.797121709314926, "learning_rate": 1.5651834504293523e-06, "loss": 0.3281, "step": 401 }, { "epoch": 0.02, "grad_norm": 0.7913627484323329, "learning_rate": 1.5690866510538644e-06, "loss": 0.349, "step": 402 }, { "epoch": 0.02, "grad_norm": 0.7952131842491763, "learning_rate": 1.5729898516783766e-06, "loss": 0.3519, "step": 403 }, { "epoch": 0.02, "grad_norm": 0.7994612454319783, "learning_rate": 1.5768930523028885e-06, "loss": 0.3381, "step": 404 }, { "epoch": 0.02, "grad_norm": 0.848780452071145, "learning_rate": 1.5807962529274006e-06, "loss": 0.3542, "step": 405 }, { "epoch": 0.02, "grad_norm": 0.8085383683680807, "learning_rate": 1.5846994535519128e-06, "loss": 0.3768, "step": 406 }, { "epoch": 0.02, "grad_norm": 0.7806852450554176, "learning_rate": 1.588602654176425e-06, "loss": 0.3555, "step": 407 }, { "epoch": 0.02, "grad_norm": 0.943604616216737, "learning_rate": 1.592505854800937e-06, "loss": 0.3581, "step": 408 }, { "epoch": 0.02, "grad_norm": 0.8104476401182904, "learning_rate": 1.5964090554254492e-06, "loss": 0.3323, "step": 409 }, { "epoch": 0.02, "grad_norm": 0.8905921061110477, "learning_rate": 1.6003122560499611e-06, "loss": 0.3529, "step": 410 }, { "epoch": 0.02, "grad_norm": 0.8048260084533357, "learning_rate": 1.6042154566744733e-06, "loss": 0.3495, "step": 411 }, { "epoch": 0.02, "grad_norm": 0.7640329879124377, "learning_rate": 1.6081186572989854e-06, "loss": 0.3387, "step": 412 }, { "epoch": 0.02, "grad_norm": 0.933014397809207, "learning_rate": 1.6120218579234975e-06, "loss": 0.3413, "step": 413 }, { "epoch": 0.02, "grad_norm": 0.7826236535012769, "learning_rate": 1.6159250585480097e-06, "loss": 0.3331, "step": 414 }, { "epoch": 0.02, "grad_norm": 1.0190802204536085, "learning_rate": 1.6198282591725218e-06, "loss": 0.3534, "step": 415 }, { "epoch": 0.02, "grad_norm": 0.8691956315281144, "learning_rate": 1.6237314597970338e-06, "loss": 0.3499, "step": 416 }, { "epoch": 0.02, "grad_norm": 0.8043086326409871, "learning_rate": 1.6276346604215457e-06, "loss": 0.3573, "step": 417 }, { "epoch": 0.02, "grad_norm": 0.8647530798784214, "learning_rate": 1.6315378610460578e-06, "loss": 0.3702, "step": 418 }, { "epoch": 0.02, "grad_norm": 0.8913223909156825, "learning_rate": 1.63544106167057e-06, "loss": 0.3304, "step": 419 }, { "epoch": 0.02, "grad_norm": 0.7730725085178504, "learning_rate": 1.6393442622950819e-06, "loss": 0.3477, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.9388426665671403, "learning_rate": 1.643247462919594e-06, "loss": 0.3385, "step": 421 }, { "epoch": 0.02, "grad_norm": 0.8645221080995654, "learning_rate": 1.6471506635441062e-06, "loss": 0.357, "step": 422 }, { "epoch": 0.02, "grad_norm": 0.8274723523201518, "learning_rate": 1.6510538641686183e-06, "loss": 0.3591, "step": 423 }, { "epoch": 0.02, "grad_norm": 0.8929913836533515, "learning_rate": 1.6549570647931305e-06, "loss": 0.3386, "step": 424 }, { "epoch": 0.02, "grad_norm": 1.0362822190573269, "learning_rate": 1.6588602654176426e-06, "loss": 0.3464, "step": 425 }, { "epoch": 0.02, "grad_norm": 0.83422142999996, "learning_rate": 1.6627634660421545e-06, "loss": 0.3325, "step": 426 }, { "epoch": 0.02, "grad_norm": 0.7559385796253701, "learning_rate": 1.6666666666666667e-06, "loss": 0.3315, "step": 427 }, { "epoch": 0.02, "grad_norm": 0.9445334816350274, "learning_rate": 1.6705698672911788e-06, "loss": 0.3385, "step": 428 }, { "epoch": 0.02, "grad_norm": 0.9551099975907847, "learning_rate": 1.674473067915691e-06, "loss": 0.3505, "step": 429 }, { "epoch": 0.02, "grad_norm": 0.8262714043431814, "learning_rate": 1.678376268540203e-06, "loss": 0.3307, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.844363251156397, "learning_rate": 1.6822794691647152e-06, "loss": 0.3399, "step": 431 }, { "epoch": 0.02, "grad_norm": 0.9106213453465972, "learning_rate": 1.6861826697892272e-06, "loss": 0.3444, "step": 432 }, { "epoch": 0.02, "grad_norm": 1.0347643384515455, "learning_rate": 1.6900858704137393e-06, "loss": 0.3518, "step": 433 }, { "epoch": 0.02, "grad_norm": 0.7797503120162157, "learning_rate": 1.6939890710382514e-06, "loss": 0.3531, "step": 434 }, { "epoch": 0.02, "grad_norm": 0.9271815408352906, "learning_rate": 1.6978922716627636e-06, "loss": 0.3428, "step": 435 }, { "epoch": 0.02, "grad_norm": 0.8999507315772118, "learning_rate": 1.7017954722872757e-06, "loss": 0.3394, "step": 436 }, { "epoch": 0.02, "grad_norm": 0.8077804915501534, "learning_rate": 1.7056986729117879e-06, "loss": 0.3466, "step": 437 }, { "epoch": 0.02, "grad_norm": 0.8039344045918295, "learning_rate": 1.7096018735362998e-06, "loss": 0.346, "step": 438 }, { "epoch": 0.02, "grad_norm": 0.8076175608440448, "learning_rate": 1.713505074160812e-06, "loss": 0.3303, "step": 439 }, { "epoch": 0.02, "grad_norm": 0.7783493122245685, "learning_rate": 1.717408274785324e-06, "loss": 0.3333, "step": 440 }, { "epoch": 0.02, "grad_norm": 0.7739166584299653, "learning_rate": 1.7213114754098362e-06, "loss": 0.3259, "step": 441 }, { "epoch": 0.02, "grad_norm": 0.8752574513259891, "learning_rate": 1.7252146760343483e-06, "loss": 0.3529, "step": 442 }, { "epoch": 0.02, "grad_norm": 0.8036210209664938, "learning_rate": 1.7291178766588605e-06, "loss": 0.3484, "step": 443 }, { "epoch": 0.02, "grad_norm": 0.7855887385319112, "learning_rate": 1.7330210772833724e-06, "loss": 0.336, "step": 444 }, { "epoch": 0.02, "grad_norm": 0.8691927310521814, "learning_rate": 1.7369242779078846e-06, "loss": 0.3689, "step": 445 }, { "epoch": 0.02, "grad_norm": 0.819433098526287, "learning_rate": 1.7408274785323967e-06, "loss": 0.3593, "step": 446 }, { "epoch": 0.02, "grad_norm": 0.8044045223097964, "learning_rate": 1.7447306791569088e-06, "loss": 0.3364, "step": 447 }, { "epoch": 0.02, "grad_norm": 0.8553326165821837, "learning_rate": 1.748633879781421e-06, "loss": 0.3399, "step": 448 }, { "epoch": 0.02, "grad_norm": 0.8097907722122035, "learning_rate": 1.752537080405933e-06, "loss": 0.3518, "step": 449 }, { "epoch": 0.02, "grad_norm": 0.7829910801112184, "learning_rate": 1.756440281030445e-06, "loss": 0.3514, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.7682096424024997, "learning_rate": 1.7603434816549572e-06, "loss": 0.3385, "step": 451 }, { "epoch": 0.02, "grad_norm": 0.8747405845664556, "learning_rate": 1.7642466822794693e-06, "loss": 0.3507, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.7584845821798897, "learning_rate": 1.7681498829039815e-06, "loss": 0.3403, "step": 453 }, { "epoch": 0.02, "grad_norm": 0.7768556567715447, "learning_rate": 1.7720530835284936e-06, "loss": 0.3521, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.7986084451646666, "learning_rate": 1.7759562841530055e-06, "loss": 0.3485, "step": 455 }, { "epoch": 0.02, "grad_norm": 0.8264624900959446, "learning_rate": 1.7798594847775177e-06, "loss": 0.3352, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.7540310841896886, "learning_rate": 1.7837626854020298e-06, "loss": 0.3198, "step": 457 }, { "epoch": 0.02, "grad_norm": 0.8011072414974729, "learning_rate": 1.787665886026542e-06, "loss": 0.3391, "step": 458 }, { "epoch": 0.02, "grad_norm": 0.7915728264209736, "learning_rate": 1.791569086651054e-06, "loss": 0.3282, "step": 459 }, { "epoch": 0.02, "grad_norm": 0.7526082651562527, "learning_rate": 1.7954722872755662e-06, "loss": 0.3283, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.8065224994878701, "learning_rate": 1.7993754879000782e-06, "loss": 0.3265, "step": 461 }, { "epoch": 0.02, "grad_norm": 0.720021576109131, "learning_rate": 1.8032786885245903e-06, "loss": 0.3378, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.8009645488357455, "learning_rate": 1.8071818891491025e-06, "loss": 0.3249, "step": 463 }, { "epoch": 0.02, "grad_norm": 0.764199649808597, "learning_rate": 1.8110850897736146e-06, "loss": 0.3164, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.8392410128448871, "learning_rate": 1.8149882903981267e-06, "loss": 0.3486, "step": 465 }, { "epoch": 0.02, "grad_norm": 0.8347846614582782, "learning_rate": 1.8188914910226389e-06, "loss": 0.3321, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.8041869658807121, "learning_rate": 1.8227946916471508e-06, "loss": 0.3424, "step": 467 }, { "epoch": 0.02, "grad_norm": 0.8084940094695268, "learning_rate": 1.826697892271663e-06, "loss": 0.3264, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.7905325945807548, "learning_rate": 1.830601092896175e-06, "loss": 0.3278, "step": 469 }, { "epoch": 0.02, "grad_norm": 0.8204026872768688, "learning_rate": 1.8345042935206872e-06, "loss": 0.3221, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.8547949164112751, "learning_rate": 1.8384074941451994e-06, "loss": 0.3592, "step": 471 }, { "epoch": 0.02, "grad_norm": 0.8239886989922721, "learning_rate": 1.8423106947697115e-06, "loss": 0.3827, "step": 472 }, { "epoch": 0.02, "grad_norm": 0.7940628180165971, "learning_rate": 1.8462138953942234e-06, "loss": 0.3259, "step": 473 }, { "epoch": 0.02, "grad_norm": 0.7518707517527868, "learning_rate": 1.8501170960187356e-06, "loss": 0.3267, "step": 474 }, { "epoch": 0.02, "grad_norm": 0.8054700350826289, "learning_rate": 1.8540202966432477e-06, "loss": 0.3479, "step": 475 }, { "epoch": 0.02, "grad_norm": 0.7566510698996279, "learning_rate": 1.8579234972677599e-06, "loss": 0.3565, "step": 476 }, { "epoch": 0.02, "grad_norm": 0.7744965121296424, "learning_rate": 1.861826697892272e-06, "loss": 0.3582, "step": 477 }, { "epoch": 0.02, "grad_norm": 0.8333147887465753, "learning_rate": 1.8657298985167841e-06, "loss": 0.3582, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.8705498982783497, "learning_rate": 1.869633099141296e-06, "loss": 0.3747, "step": 479 }, { "epoch": 0.02, "grad_norm": 0.8242185390839183, "learning_rate": 1.8735362997658082e-06, "loss": 0.3727, "step": 480 }, { "epoch": 0.02, "grad_norm": 0.8938283079590679, "learning_rate": 1.8774395003903201e-06, "loss": 0.3371, "step": 481 }, { "epoch": 0.02, "grad_norm": 0.8274775966846732, "learning_rate": 1.8813427010148323e-06, "loss": 0.3373, "step": 482 }, { "epoch": 0.02, "grad_norm": 0.7834754439680777, "learning_rate": 1.8852459016393442e-06, "loss": 0.3409, "step": 483 }, { "epoch": 0.02, "grad_norm": 0.802248199612125, "learning_rate": 1.8891491022638563e-06, "loss": 0.3189, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.8423818357808843, "learning_rate": 1.8930523028883685e-06, "loss": 0.3512, "step": 485 }, { "epoch": 0.02, "grad_norm": 0.8779632662103783, "learning_rate": 1.8969555035128806e-06, "loss": 0.3421, "step": 486 }, { "epoch": 0.02, "grad_norm": 0.9029760535205037, "learning_rate": 1.9008587041373928e-06, "loss": 0.348, "step": 487 }, { "epoch": 0.02, "grad_norm": 0.8502347019942966, "learning_rate": 1.904761904761905e-06, "loss": 0.3604, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.7793366770665221, "learning_rate": 1.908665105386417e-06, "loss": 0.3323, "step": 489 }, { "epoch": 0.02, "grad_norm": 0.8181185505467234, "learning_rate": 1.912568306010929e-06, "loss": 0.3516, "step": 490 }, { "epoch": 0.02, "grad_norm": 0.816062272805071, "learning_rate": 1.916471506635441e-06, "loss": 0.3431, "step": 491 }, { "epoch": 0.02, "grad_norm": 0.7872606792200858, "learning_rate": 1.9203747072599533e-06, "loss": 0.347, "step": 492 }, { "epoch": 0.02, "grad_norm": 0.8189155770202825, "learning_rate": 1.9242779078844654e-06, "loss": 0.3297, "step": 493 }, { "epoch": 0.02, "grad_norm": 0.8703111446645835, "learning_rate": 1.9281811085089775e-06, "loss": 0.3318, "step": 494 }, { "epoch": 0.02, "grad_norm": 0.7293655995524029, "learning_rate": 1.9320843091334897e-06, "loss": 0.3136, "step": 495 }, { "epoch": 0.02, "grad_norm": 0.8320973632647902, "learning_rate": 1.935987509758002e-06, "loss": 0.3467, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.8177091031563406, "learning_rate": 1.939890710382514e-06, "loss": 0.3463, "step": 497 }, { "epoch": 0.02, "grad_norm": 0.7864970902017039, "learning_rate": 1.9437939110070257e-06, "loss": 0.3291, "step": 498 }, { "epoch": 0.02, "grad_norm": 0.8802739456156831, "learning_rate": 1.947697111631538e-06, "loss": 0.352, "step": 499 }, { "epoch": 0.02, "grad_norm": 0.8435116959311106, "learning_rate": 1.95160031225605e-06, "loss": 0.3355, "step": 500 }, { "epoch": 0.02, "grad_norm": 0.8284973943393457, "learning_rate": 1.955503512880562e-06, "loss": 0.3348, "step": 501 }, { "epoch": 0.02, "grad_norm": 0.758843137064754, "learning_rate": 1.9594067135050742e-06, "loss": 0.3312, "step": 502 }, { "epoch": 0.02, "grad_norm": 0.798607297413246, "learning_rate": 1.9633099141295864e-06, "loss": 0.3558, "step": 503 }, { "epoch": 0.02, "grad_norm": 0.7961600320688458, "learning_rate": 1.9672131147540985e-06, "loss": 0.3654, "step": 504 }, { "epoch": 0.02, "grad_norm": 0.8081626037113192, "learning_rate": 1.9711163153786107e-06, "loss": 0.349, "step": 505 }, { "epoch": 0.02, "grad_norm": 0.804960032378753, "learning_rate": 1.975019516003123e-06, "loss": 0.3245, "step": 506 }, { "epoch": 0.02, "grad_norm": 0.8523743024265802, "learning_rate": 1.978922716627635e-06, "loss": 0.3566, "step": 507 }, { "epoch": 0.02, "grad_norm": 0.7998472698069916, "learning_rate": 1.982825917252147e-06, "loss": 0.3394, "step": 508 }, { "epoch": 0.02, "grad_norm": 0.7866075552176608, "learning_rate": 1.986729117876659e-06, "loss": 0.3334, "step": 509 }, { "epoch": 0.02, "grad_norm": 0.8143604673651875, "learning_rate": 1.990632318501171e-06, "loss": 0.3584, "step": 510 }, { "epoch": 0.02, "grad_norm": 0.8260591693822977, "learning_rate": 1.994535519125683e-06, "loss": 0.346, "step": 511 }, { "epoch": 0.02, "grad_norm": 0.7724749985956596, "learning_rate": 1.9984387197501952e-06, "loss": 0.3464, "step": 512 }, { "epoch": 0.02, "grad_norm": 0.8456122078869557, "learning_rate": 2.0023419203747074e-06, "loss": 0.3453, "step": 513 }, { "epoch": 0.02, "grad_norm": 0.9175665164690844, "learning_rate": 2.0062451209992195e-06, "loss": 0.3529, "step": 514 }, { "epoch": 0.02, "grad_norm": 0.8131362241534775, "learning_rate": 2.0101483216237316e-06, "loss": 0.3249, "step": 515 }, { "epoch": 0.02, "grad_norm": 0.7875554564813882, "learning_rate": 2.0140515222482438e-06, "loss": 0.3314, "step": 516 }, { "epoch": 0.02, "grad_norm": 0.8893803970873522, "learning_rate": 2.017954722872756e-06, "loss": 0.3485, "step": 517 }, { "epoch": 0.02, "grad_norm": 0.851098871467831, "learning_rate": 2.021857923497268e-06, "loss": 0.3385, "step": 518 }, { "epoch": 0.02, "grad_norm": 0.7980840276586699, "learning_rate": 2.02576112412178e-06, "loss": 0.3395, "step": 519 }, { "epoch": 0.02, "grad_norm": 0.9884006798599159, "learning_rate": 2.0296643247462923e-06, "loss": 0.3595, "step": 520 }, { "epoch": 0.02, "grad_norm": 0.9385561217923857, "learning_rate": 2.033567525370804e-06, "loss": 0.3552, "step": 521 }, { "epoch": 0.02, "grad_norm": 0.7852475719899504, "learning_rate": 2.037470725995316e-06, "loss": 0.3384, "step": 522 }, { "epoch": 0.02, "grad_norm": 0.9147349639084852, "learning_rate": 2.0413739266198283e-06, "loss": 0.3391, "step": 523 }, { "epoch": 0.02, "grad_norm": 0.792848222220544, "learning_rate": 2.0452771272443405e-06, "loss": 0.3169, "step": 524 }, { "epoch": 0.02, "grad_norm": 0.7521658886883144, "learning_rate": 2.0491803278688526e-06, "loss": 0.3265, "step": 525 }, { "epoch": 0.02, "grad_norm": 0.8403936311289207, "learning_rate": 2.0530835284933648e-06, "loss": 0.3373, "step": 526 }, { "epoch": 0.02, "grad_norm": 0.9251397978580324, "learning_rate": 2.056986729117877e-06, "loss": 0.336, "step": 527 }, { "epoch": 0.02, "grad_norm": 0.7676392434229101, "learning_rate": 2.060889929742389e-06, "loss": 0.3464, "step": 528 }, { "epoch": 0.02, "grad_norm": 0.812699929509099, "learning_rate": 2.064793130366901e-06, "loss": 0.3671, "step": 529 }, { "epoch": 0.02, "grad_norm": 0.7716543087705485, "learning_rate": 2.0686963309914133e-06, "loss": 0.3308, "step": 530 }, { "epoch": 0.02, "grad_norm": 0.7972143681944185, "learning_rate": 2.0725995316159255e-06, "loss": 0.3093, "step": 531 }, { "epoch": 0.02, "grad_norm": 0.782879731854589, "learning_rate": 2.0765027322404376e-06, "loss": 0.3176, "step": 532 }, { "epoch": 0.02, "grad_norm": 0.7416915004747296, "learning_rate": 2.0804059328649493e-06, "loss": 0.3187, "step": 533 }, { "epoch": 0.03, "grad_norm": 0.8432968237623029, "learning_rate": 2.0843091334894615e-06, "loss": 0.3384, "step": 534 }, { "epoch": 0.03, "grad_norm": 0.7618461243507044, "learning_rate": 2.0882123341139736e-06, "loss": 0.3223, "step": 535 }, { "epoch": 0.03, "grad_norm": 0.8184523653891167, "learning_rate": 2.0921155347384857e-06, "loss": 0.331, "step": 536 }, { "epoch": 0.03, "grad_norm": 0.8061009058222663, "learning_rate": 2.096018735362998e-06, "loss": 0.3677, "step": 537 }, { "epoch": 0.03, "grad_norm": 0.7605093112940662, "learning_rate": 2.09992193598751e-06, "loss": 0.3277, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.776005431590662, "learning_rate": 2.103825136612022e-06, "loss": 0.3194, "step": 539 }, { "epoch": 0.03, "grad_norm": 0.7690444714246482, "learning_rate": 2.1077283372365343e-06, "loss": 0.3265, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.7829947334201941, "learning_rate": 2.1116315378610465e-06, "loss": 0.3391, "step": 541 }, { "epoch": 0.03, "grad_norm": 0.8448858469489935, "learning_rate": 2.1155347384855586e-06, "loss": 0.3608, "step": 542 }, { "epoch": 0.03, "grad_norm": 0.8192097246862647, "learning_rate": 2.1194379391100707e-06, "loss": 0.3443, "step": 543 }, { "epoch": 0.03, "grad_norm": 0.8177801352147176, "learning_rate": 2.123341139734583e-06, "loss": 0.3373, "step": 544 }, { "epoch": 0.03, "grad_norm": 0.796544203626462, "learning_rate": 2.1272443403590946e-06, "loss": 0.3361, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.8322224622052117, "learning_rate": 2.1311475409836067e-06, "loss": 0.3549, "step": 546 }, { "epoch": 0.03, "grad_norm": 0.8053915510469698, "learning_rate": 2.135050741608119e-06, "loss": 0.36, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.7917592617514915, "learning_rate": 2.138953942232631e-06, "loss": 0.311, "step": 548 }, { "epoch": 0.03, "grad_norm": 0.7580255127182716, "learning_rate": 2.1428571428571427e-06, "loss": 0.3277, "step": 549 }, { "epoch": 0.03, "grad_norm": 0.8822547698184935, "learning_rate": 2.146760343481655e-06, "loss": 0.3407, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.843271279476342, "learning_rate": 2.150663544106167e-06, "loss": 0.3486, "step": 551 }, { "epoch": 0.03, "grad_norm": 0.8140940911711986, "learning_rate": 2.154566744730679e-06, "loss": 0.3329, "step": 552 }, { "epoch": 0.03, "grad_norm": 0.9034552942380556, "learning_rate": 2.1584699453551913e-06, "loss": 0.3537, "step": 553 }, { "epoch": 0.03, "grad_norm": 0.808335806406233, "learning_rate": 2.1623731459797034e-06, "loss": 0.3401, "step": 554 }, { "epoch": 0.03, "grad_norm": 0.8634246156617748, "learning_rate": 2.1662763466042156e-06, "loss": 0.3329, "step": 555 }, { "epoch": 0.03, "grad_norm": 0.8348786689394118, "learning_rate": 2.1701795472287277e-06, "loss": 0.3596, "step": 556 }, { "epoch": 0.03, "grad_norm": 0.7816203798414081, "learning_rate": 2.17408274785324e-06, "loss": 0.3396, "step": 557 }, { "epoch": 0.03, "grad_norm": 0.7823891520124928, "learning_rate": 2.177985948477752e-06, "loss": 0.3349, "step": 558 }, { "epoch": 0.03, "grad_norm": 0.778370223851375, "learning_rate": 2.181889149102264e-06, "loss": 0.3397, "step": 559 }, { "epoch": 0.03, "grad_norm": 0.8281540742887777, "learning_rate": 2.185792349726776e-06, "loss": 0.3365, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.8393024894786068, "learning_rate": 2.189695550351288e-06, "loss": 0.3372, "step": 561 }, { "epoch": 0.03, "grad_norm": 0.8459807283262023, "learning_rate": 2.1935987509758e-06, "loss": 0.3499, "step": 562 }, { "epoch": 0.03, "grad_norm": 0.7951522396776779, "learning_rate": 2.1975019516003123e-06, "loss": 0.3404, "step": 563 }, { "epoch": 0.03, "grad_norm": 0.833758662299918, "learning_rate": 2.2014051522248244e-06, "loss": 0.3503, "step": 564 }, { "epoch": 0.03, "grad_norm": 0.8026196775710626, "learning_rate": 2.2053083528493366e-06, "loss": 0.3315, "step": 565 }, { "epoch": 0.03, "grad_norm": 0.7823428427861416, "learning_rate": 2.2092115534738487e-06, "loss": 0.3426, "step": 566 }, { "epoch": 0.03, "grad_norm": 0.8244970571097272, "learning_rate": 2.213114754098361e-06, "loss": 0.3409, "step": 567 }, { "epoch": 0.03, "grad_norm": 0.8197848051288171, "learning_rate": 2.217017954722873e-06, "loss": 0.3205, "step": 568 }, { "epoch": 0.03, "grad_norm": 0.7890611354954277, "learning_rate": 2.220921155347385e-06, "loss": 0.3537, "step": 569 }, { "epoch": 0.03, "grad_norm": 0.8957270945111125, "learning_rate": 2.2248243559718973e-06, "loss": 0.3298, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.8064455501463538, "learning_rate": 2.2287275565964094e-06, "loss": 0.3408, "step": 571 }, { "epoch": 0.03, "grad_norm": 0.8259409832293229, "learning_rate": 2.232630757220921e-06, "loss": 0.3156, "step": 572 }, { "epoch": 0.03, "grad_norm": 0.7843405470720111, "learning_rate": 2.2365339578454333e-06, "loss": 0.3431, "step": 573 }, { "epoch": 0.03, "grad_norm": 0.7291240582266163, "learning_rate": 2.2404371584699454e-06, "loss": 0.3209, "step": 574 }, { "epoch": 0.03, "grad_norm": 0.6926258757702719, "learning_rate": 2.2443403590944575e-06, "loss": 0.3193, "step": 575 }, { "epoch": 0.03, "grad_norm": 0.8136685263039289, "learning_rate": 2.2482435597189697e-06, "loss": 0.3409, "step": 576 }, { "epoch": 0.03, "grad_norm": 0.7786278946607379, "learning_rate": 2.252146760343482e-06, "loss": 0.3458, "step": 577 }, { "epoch": 0.03, "grad_norm": 0.7629653236626819, "learning_rate": 2.256049960967994e-06, "loss": 0.3361, "step": 578 }, { "epoch": 0.03, "grad_norm": 0.885397145765798, "learning_rate": 2.259953161592506e-06, "loss": 0.3513, "step": 579 }, { "epoch": 0.03, "grad_norm": 0.8379005464133578, "learning_rate": 2.2638563622170182e-06, "loss": 0.3453, "step": 580 }, { "epoch": 0.03, "grad_norm": 0.8003694377306825, "learning_rate": 2.2677595628415304e-06, "loss": 0.3382, "step": 581 }, { "epoch": 0.03, "grad_norm": 0.7976949076061977, "learning_rate": 2.2716627634660425e-06, "loss": 0.3528, "step": 582 }, { "epoch": 0.03, "grad_norm": 0.7606944770531175, "learning_rate": 2.2755659640905547e-06, "loss": 0.3209, "step": 583 }, { "epoch": 0.03, "grad_norm": 0.8176711824838864, "learning_rate": 2.2794691647150664e-06, "loss": 0.33, "step": 584 }, { "epoch": 0.03, "grad_norm": 0.8225948771042985, "learning_rate": 2.2833723653395785e-06, "loss": 0.3481, "step": 585 }, { "epoch": 0.03, "grad_norm": 0.7864326962619818, "learning_rate": 2.2872755659640907e-06, "loss": 0.3433, "step": 586 }, { "epoch": 0.03, "grad_norm": 0.7862938810335386, "learning_rate": 2.291178766588603e-06, "loss": 0.3508, "step": 587 }, { "epoch": 0.03, "grad_norm": 0.7471045848938762, "learning_rate": 2.295081967213115e-06, "loss": 0.3332, "step": 588 }, { "epoch": 0.03, "grad_norm": 0.7554708567751323, "learning_rate": 2.298985167837627e-06, "loss": 0.3187, "step": 589 }, { "epoch": 0.03, "grad_norm": 0.7488333444906121, "learning_rate": 2.3028883684621392e-06, "loss": 0.324, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.7916469200082464, "learning_rate": 2.3067915690866514e-06, "loss": 0.3222, "step": 591 }, { "epoch": 0.03, "grad_norm": 0.7789434123164025, "learning_rate": 2.3106947697111635e-06, "loss": 0.3498, "step": 592 }, { "epoch": 0.03, "grad_norm": 0.8368553226904428, "learning_rate": 2.3145979703356756e-06, "loss": 0.3659, "step": 593 }, { "epoch": 0.03, "grad_norm": 0.7750079989436724, "learning_rate": 2.3185011709601878e-06, "loss": 0.3265, "step": 594 }, { "epoch": 0.03, "grad_norm": 0.816126471368532, "learning_rate": 2.3224043715847e-06, "loss": 0.3315, "step": 595 }, { "epoch": 0.03, "grad_norm": 0.8118383918916602, "learning_rate": 2.3263075722092116e-06, "loss": 0.3291, "step": 596 }, { "epoch": 0.03, "grad_norm": 0.8051406399258569, "learning_rate": 2.3302107728337238e-06, "loss": 0.3427, "step": 597 }, { "epoch": 0.03, "grad_norm": 0.7621211317704851, "learning_rate": 2.334113973458236e-06, "loss": 0.3456, "step": 598 }, { "epoch": 0.03, "grad_norm": 0.8308707652739893, "learning_rate": 2.338017174082748e-06, "loss": 0.3645, "step": 599 }, { "epoch": 0.03, "grad_norm": 0.8660209210650036, "learning_rate": 2.34192037470726e-06, "loss": 0.3674, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.8312514605364693, "learning_rate": 2.3458235753317723e-06, "loss": 0.34, "step": 601 }, { "epoch": 0.03, "grad_norm": 0.8801429915550012, "learning_rate": 2.3497267759562845e-06, "loss": 0.3617, "step": 602 }, { "epoch": 0.03, "grad_norm": 0.8208042665808435, "learning_rate": 2.3536299765807966e-06, "loss": 0.3225, "step": 603 }, { "epoch": 0.03, "grad_norm": 0.7462094770308701, "learning_rate": 2.3575331772053088e-06, "loss": 0.3364, "step": 604 }, { "epoch": 0.03, "grad_norm": 0.8091057086684812, "learning_rate": 2.361436377829821e-06, "loss": 0.3311, "step": 605 }, { "epoch": 0.03, "grad_norm": 0.8225695955214373, "learning_rate": 2.365339578454333e-06, "loss": 0.336, "step": 606 }, { "epoch": 0.03, "grad_norm": 0.8149114966891813, "learning_rate": 2.369242779078845e-06, "loss": 0.3444, "step": 607 }, { "epoch": 0.03, "grad_norm": 0.868508869614167, "learning_rate": 2.373145979703357e-06, "loss": 0.359, "step": 608 }, { "epoch": 0.03, "grad_norm": 0.7823059396365701, "learning_rate": 2.377049180327869e-06, "loss": 0.3215, "step": 609 }, { "epoch": 0.03, "grad_norm": 0.8442542971736989, "learning_rate": 2.380952380952381e-06, "loss": 0.3695, "step": 610 }, { "epoch": 0.03, "grad_norm": 0.7734463953532689, "learning_rate": 2.3848555815768933e-06, "loss": 0.3155, "step": 611 }, { "epoch": 0.03, "grad_norm": 0.776040340990316, "learning_rate": 2.388758782201405e-06, "loss": 0.334, "step": 612 }, { "epoch": 0.03, "grad_norm": 0.8442630728872506, "learning_rate": 2.392661982825917e-06, "loss": 0.3381, "step": 613 }, { "epoch": 0.03, "grad_norm": 0.8529305732296938, "learning_rate": 2.3965651834504293e-06, "loss": 0.3504, "step": 614 }, { "epoch": 0.03, "grad_norm": 0.8448447635977847, "learning_rate": 2.4004683840749415e-06, "loss": 0.3529, "step": 615 }, { "epoch": 0.03, "grad_norm": 0.8940693585726635, "learning_rate": 2.4043715846994536e-06, "loss": 0.3421, "step": 616 }, { "epoch": 0.03, "grad_norm": 0.8079194826483432, "learning_rate": 2.4082747853239657e-06, "loss": 0.3156, "step": 617 }, { "epoch": 0.03, "grad_norm": 0.7960200078427281, "learning_rate": 2.412177985948478e-06, "loss": 0.3675, "step": 618 }, { "epoch": 0.03, "grad_norm": 0.776318123439197, "learning_rate": 2.41608118657299e-06, "loss": 0.3317, "step": 619 }, { "epoch": 0.03, "grad_norm": 0.7936762898762251, "learning_rate": 2.419984387197502e-06, "loss": 0.3409, "step": 620 }, { "epoch": 0.03, "grad_norm": 0.8348333964379586, "learning_rate": 2.4238875878220143e-06, "loss": 0.3369, "step": 621 }, { "epoch": 0.03, "grad_norm": 0.796603314823004, "learning_rate": 2.4277907884465265e-06, "loss": 0.3196, "step": 622 }, { "epoch": 0.03, "grad_norm": 0.7152377817881694, "learning_rate": 2.431693989071038e-06, "loss": 0.3497, "step": 623 }, { "epoch": 0.03, "grad_norm": 0.8066787847079624, "learning_rate": 2.4355971896955503e-06, "loss": 0.3132, "step": 624 }, { "epoch": 0.03, "grad_norm": 0.771806254662875, "learning_rate": 2.4395003903200624e-06, "loss": 0.3402, "step": 625 }, { "epoch": 0.03, "grad_norm": 0.8927890026492508, "learning_rate": 2.4434035909445746e-06, "loss": 0.3244, "step": 626 }, { "epoch": 0.03, "grad_norm": 0.8386291161482408, "learning_rate": 2.4473067915690867e-06, "loss": 0.3386, "step": 627 }, { "epoch": 0.03, "grad_norm": 0.9151412413059024, "learning_rate": 2.451209992193599e-06, "loss": 0.3521, "step": 628 }, { "epoch": 0.03, "grad_norm": 0.8023348195093023, "learning_rate": 2.455113192818111e-06, "loss": 0.3425, "step": 629 }, { "epoch": 0.03, "grad_norm": 0.8951013803694953, "learning_rate": 2.459016393442623e-06, "loss": 0.3669, "step": 630 }, { "epoch": 0.03, "grad_norm": 0.8498256722189474, "learning_rate": 2.4629195940671353e-06, "loss": 0.3383, "step": 631 }, { "epoch": 0.03, "grad_norm": 0.8155112992464246, "learning_rate": 2.4668227946916474e-06, "loss": 0.334, "step": 632 }, { "epoch": 0.03, "grad_norm": 0.8183420547857655, "learning_rate": 2.4707259953161596e-06, "loss": 0.3579, "step": 633 }, { "epoch": 0.03, "grad_norm": 0.9084211502331642, "learning_rate": 2.4746291959406717e-06, "loss": 0.3423, "step": 634 }, { "epoch": 0.03, "grad_norm": 0.8097793659548036, "learning_rate": 2.4785323965651834e-06, "loss": 0.3381, "step": 635 }, { "epoch": 0.03, "grad_norm": 0.7629016132125738, "learning_rate": 2.4824355971896956e-06, "loss": 0.3411, "step": 636 }, { "epoch": 0.03, "grad_norm": 0.906423479129678, "learning_rate": 2.4863387978142077e-06, "loss": 0.3609, "step": 637 }, { "epoch": 0.03, "grad_norm": 0.7919661976209177, "learning_rate": 2.49024199843872e-06, "loss": 0.3301, "step": 638 }, { "epoch": 0.03, "grad_norm": 0.805424102406669, "learning_rate": 2.494145199063232e-06, "loss": 0.348, "step": 639 }, { "epoch": 0.03, "grad_norm": 0.8485367403383143, "learning_rate": 2.498048399687744e-06, "loss": 0.3386, "step": 640 }, { "epoch": 0.03, "grad_norm": 0.766144493946584, "learning_rate": 2.5019516003122563e-06, "loss": 0.3277, "step": 641 }, { "epoch": 0.03, "grad_norm": 0.8365029784346091, "learning_rate": 2.5058548009367684e-06, "loss": 0.3215, "step": 642 }, { "epoch": 0.03, "grad_norm": 0.8521290543890946, "learning_rate": 2.5097580015612806e-06, "loss": 0.3258, "step": 643 }, { "epoch": 0.03, "grad_norm": 0.8197005154682134, "learning_rate": 2.5136612021857927e-06, "loss": 0.3222, "step": 644 }, { "epoch": 0.03, "grad_norm": 0.8893944638116964, "learning_rate": 2.517564402810305e-06, "loss": 0.3387, "step": 645 }, { "epoch": 0.03, "grad_norm": 0.8568159129647409, "learning_rate": 2.521467603434817e-06, "loss": 0.3739, "step": 646 }, { "epoch": 0.03, "grad_norm": 0.8252722425646443, "learning_rate": 2.5253708040593287e-06, "loss": 0.3629, "step": 647 }, { "epoch": 0.03, "grad_norm": 0.7753439123265872, "learning_rate": 2.529274004683841e-06, "loss": 0.3514, "step": 648 }, { "epoch": 0.03, "grad_norm": 0.8356188546927573, "learning_rate": 2.533177205308353e-06, "loss": 0.3162, "step": 649 }, { "epoch": 0.03, "grad_norm": 0.7966738087447028, "learning_rate": 2.537080405932865e-06, "loss": 0.3405, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.8369322969138862, "learning_rate": 2.5409836065573773e-06, "loss": 0.3397, "step": 651 }, { "epoch": 0.03, "grad_norm": 0.853806642287919, "learning_rate": 2.5448868071818894e-06, "loss": 0.333, "step": 652 }, { "epoch": 0.03, "grad_norm": 0.8100218455026337, "learning_rate": 2.5487900078064015e-06, "loss": 0.3537, "step": 653 }, { "epoch": 0.03, "grad_norm": 0.8339253808236127, "learning_rate": 2.5526932084309137e-06, "loss": 0.3567, "step": 654 }, { "epoch": 0.03, "grad_norm": 0.7663236726118475, "learning_rate": 2.556596409055426e-06, "loss": 0.3315, "step": 655 }, { "epoch": 0.03, "grad_norm": 0.7752952418477345, "learning_rate": 2.560499609679938e-06, "loss": 0.321, "step": 656 }, { "epoch": 0.03, "grad_norm": 0.8192925488822871, "learning_rate": 2.56440281030445e-06, "loss": 0.3124, "step": 657 }, { "epoch": 0.03, "grad_norm": 0.7589312094507481, "learning_rate": 2.5683060109289622e-06, "loss": 0.3265, "step": 658 }, { "epoch": 0.03, "grad_norm": 0.8104224889312353, "learning_rate": 2.572209211553474e-06, "loss": 0.3372, "step": 659 }, { "epoch": 0.03, "grad_norm": 0.7654378258806438, "learning_rate": 2.576112412177986e-06, "loss": 0.3416, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.8181306454442836, "learning_rate": 2.5800156128024982e-06, "loss": 0.3542, "step": 661 }, { "epoch": 0.03, "grad_norm": 0.8051998490357577, "learning_rate": 2.5839188134270104e-06, "loss": 0.3226, "step": 662 }, { "epoch": 0.03, "grad_norm": 0.7630363952276362, "learning_rate": 2.5878220140515225e-06, "loss": 0.3399, "step": 663 }, { "epoch": 0.03, "grad_norm": 0.8426353402808956, "learning_rate": 2.5917252146760347e-06, "loss": 0.3429, "step": 664 }, { "epoch": 0.03, "grad_norm": 0.7858997353361868, "learning_rate": 2.595628415300547e-06, "loss": 0.3313, "step": 665 }, { "epoch": 0.03, "grad_norm": 0.7866297452184502, "learning_rate": 2.599531615925059e-06, "loss": 0.3273, "step": 666 }, { "epoch": 0.03, "grad_norm": 0.8221860059287672, "learning_rate": 2.603434816549571e-06, "loss": 0.3213, "step": 667 }, { "epoch": 0.03, "grad_norm": 0.8516859005441, "learning_rate": 2.6073380171740832e-06, "loss": 0.3355, "step": 668 }, { "epoch": 0.03, "grad_norm": 0.8617959254185017, "learning_rate": 2.6112412177985954e-06, "loss": 0.3563, "step": 669 }, { "epoch": 0.03, "grad_norm": 0.8167427412014266, "learning_rate": 2.615144418423107e-06, "loss": 0.3487, "step": 670 }, { "epoch": 0.03, "grad_norm": 0.7964710848187857, "learning_rate": 2.6190476190476192e-06, "loss": 0.3512, "step": 671 }, { "epoch": 0.03, "grad_norm": 0.7691816468401839, "learning_rate": 2.6229508196721314e-06, "loss": 0.3346, "step": 672 }, { "epoch": 0.03, "grad_norm": 0.877804691869427, "learning_rate": 2.6268540202966435e-06, "loss": 0.3426, "step": 673 }, { "epoch": 0.03, "grad_norm": 0.7923258060123479, "learning_rate": 2.6307572209211556e-06, "loss": 0.3429, "step": 674 }, { "epoch": 0.03, "grad_norm": 0.7807139797630203, "learning_rate": 2.6346604215456678e-06, "loss": 0.3219, "step": 675 }, { "epoch": 0.03, "grad_norm": 0.7899172759968026, "learning_rate": 2.63856362217018e-06, "loss": 0.3437, "step": 676 }, { "epoch": 0.03, "grad_norm": 0.8198910429728323, "learning_rate": 2.642466822794692e-06, "loss": 0.3454, "step": 677 }, { "epoch": 0.03, "grad_norm": 0.8529665656085381, "learning_rate": 2.646370023419204e-06, "loss": 0.3393, "step": 678 }, { "epoch": 0.03, "grad_norm": 0.8215443956177911, "learning_rate": 2.6502732240437163e-06, "loss": 0.3164, "step": 679 }, { "epoch": 0.03, "grad_norm": 0.8218102126971579, "learning_rate": 2.6541764246682285e-06, "loss": 0.3286, "step": 680 }, { "epoch": 0.03, "grad_norm": 0.8034161970149091, "learning_rate": 2.6580796252927406e-06, "loss": 0.3287, "step": 681 }, { "epoch": 0.03, "grad_norm": 0.7747677421244202, "learning_rate": 2.6619828259172523e-06, "loss": 0.3234, "step": 682 }, { "epoch": 0.03, "grad_norm": 0.7658455310349103, "learning_rate": 2.6658860265417645e-06, "loss": 0.326, "step": 683 }, { "epoch": 0.03, "grad_norm": 0.810968377218814, "learning_rate": 2.6697892271662766e-06, "loss": 0.3402, "step": 684 }, { "epoch": 0.03, "grad_norm": 0.8442301712666332, "learning_rate": 2.6736924277907888e-06, "loss": 0.3499, "step": 685 }, { "epoch": 0.03, "grad_norm": 0.8439489899628114, "learning_rate": 2.677595628415301e-06, "loss": 0.323, "step": 686 }, { "epoch": 0.03, "grad_norm": 0.8614225957489333, "learning_rate": 2.681498829039813e-06, "loss": 0.33, "step": 687 }, { "epoch": 0.03, "grad_norm": 0.8033934544338569, "learning_rate": 2.685402029664325e-06, "loss": 0.3377, "step": 688 }, { "epoch": 0.03, "grad_norm": 0.8043456222224746, "learning_rate": 2.6893052302888373e-06, "loss": 0.3204, "step": 689 }, { "epoch": 0.03, "grad_norm": 0.8096315000049439, "learning_rate": 2.6932084309133495e-06, "loss": 0.3195, "step": 690 }, { "epoch": 0.03, "grad_norm": 0.7984721034381544, "learning_rate": 2.6971116315378616e-06, "loss": 0.3313, "step": 691 }, { "epoch": 0.03, "grad_norm": 0.7805940232007262, "learning_rate": 2.7010148321623738e-06, "loss": 0.3403, "step": 692 }, { "epoch": 0.03, "grad_norm": 0.7827369219619398, "learning_rate": 2.704918032786886e-06, "loss": 0.3397, "step": 693 }, { "epoch": 0.03, "grad_norm": 0.8049744336281308, "learning_rate": 2.7088212334113976e-06, "loss": 0.3167, "step": 694 }, { "epoch": 0.03, "grad_norm": 0.7631759404007294, "learning_rate": 2.7127244340359097e-06, "loss": 0.3218, "step": 695 }, { "epoch": 0.03, "grad_norm": 0.7691376189334814, "learning_rate": 2.716627634660422e-06, "loss": 0.312, "step": 696 }, { "epoch": 0.03, "grad_norm": 0.7838346235163963, "learning_rate": 2.720530835284934e-06, "loss": 0.3492, "step": 697 }, { "epoch": 0.03, "grad_norm": 0.7862020941494849, "learning_rate": 2.724434035909446e-06, "loss": 0.3408, "step": 698 }, { "epoch": 0.03, "grad_norm": 0.8570278601809549, "learning_rate": 2.7283372365339583e-06, "loss": 0.3641, "step": 699 }, { "epoch": 0.03, "grad_norm": 0.8050586320636697, "learning_rate": 2.7322404371584705e-06, "loss": 0.3062, "step": 700 }, { "epoch": 0.03, "grad_norm": 0.8177632339533573, "learning_rate": 2.7361436377829826e-06, "loss": 0.3332, "step": 701 }, { "epoch": 0.03, "grad_norm": 0.779980016178864, "learning_rate": 2.7400468384074947e-06, "loss": 0.3122, "step": 702 }, { "epoch": 0.03, "grad_norm": 0.8569443672980638, "learning_rate": 2.743950039032007e-06, "loss": 0.3361, "step": 703 }, { "epoch": 0.03, "grad_norm": 0.7950587200221287, "learning_rate": 2.747853239656519e-06, "loss": 0.3497, "step": 704 }, { "epoch": 0.03, "grad_norm": 0.7932447591678385, "learning_rate": 2.7517564402810303e-06, "loss": 0.3356, "step": 705 }, { "epoch": 0.03, "grad_norm": 0.7541101258894791, "learning_rate": 2.7556596409055424e-06, "loss": 0.3446, "step": 706 }, { "epoch": 0.03, "grad_norm": 0.8553408312023306, "learning_rate": 2.7595628415300546e-06, "loss": 0.3457, "step": 707 }, { "epoch": 0.03, "grad_norm": 0.8124230545284448, "learning_rate": 2.7634660421545667e-06, "loss": 0.335, "step": 708 }, { "epoch": 0.03, "grad_norm": 0.8496890969766349, "learning_rate": 2.767369242779079e-06, "loss": 0.3597, "step": 709 }, { "epoch": 0.03, "grad_norm": 0.8451795950035875, "learning_rate": 2.771272443403591e-06, "loss": 0.3671, "step": 710 }, { "epoch": 0.03, "grad_norm": 0.7715712362898155, "learning_rate": 2.775175644028103e-06, "loss": 0.3507, "step": 711 }, { "epoch": 0.03, "grad_norm": 0.7788158978039329, "learning_rate": 2.7790788446526153e-06, "loss": 0.334, "step": 712 }, { "epoch": 0.03, "grad_norm": 0.7978813760225691, "learning_rate": 2.7829820452771274e-06, "loss": 0.325, "step": 713 }, { "epoch": 0.03, "grad_norm": 0.7763649021421601, "learning_rate": 2.786885245901639e-06, "loss": 0.3323, "step": 714 }, { "epoch": 0.03, "grad_norm": 0.8332951878326039, "learning_rate": 2.7907884465261513e-06, "loss": 0.3337, "step": 715 }, { "epoch": 0.03, "grad_norm": 0.8182831701712994, "learning_rate": 2.7946916471506634e-06, "loss": 0.3398, "step": 716 }, { "epoch": 0.03, "grad_norm": 0.7181141025956902, "learning_rate": 2.7985948477751756e-06, "loss": 0.325, "step": 717 }, { "epoch": 0.03, "grad_norm": 0.7495419041903385, "learning_rate": 2.8024980483996877e-06, "loss": 0.3321, "step": 718 }, { "epoch": 0.03, "grad_norm": 0.7689848336555103, "learning_rate": 2.8064012490242e-06, "loss": 0.3336, "step": 719 }, { "epoch": 0.03, "grad_norm": 0.8586580468246836, "learning_rate": 2.810304449648712e-06, "loss": 0.3506, "step": 720 }, { "epoch": 0.03, "grad_norm": 0.7725515440599748, "learning_rate": 2.814207650273224e-06, "loss": 0.3364, "step": 721 }, { "epoch": 0.03, "grad_norm": 0.8162205456560944, "learning_rate": 2.8181108508977363e-06, "loss": 0.3468, "step": 722 }, { "epoch": 0.03, "grad_norm": 0.7342182698461299, "learning_rate": 2.8220140515222484e-06, "loss": 0.3511, "step": 723 }, { "epoch": 0.03, "grad_norm": 0.7376438813335502, "learning_rate": 2.8259172521467606e-06, "loss": 0.3107, "step": 724 }, { "epoch": 0.03, "grad_norm": 0.7853931944131775, "learning_rate": 2.8298204527712727e-06, "loss": 0.3332, "step": 725 }, { "epoch": 0.03, "grad_norm": 0.8187099850589857, "learning_rate": 2.8337236533957844e-06, "loss": 0.3377, "step": 726 }, { "epoch": 0.03, "grad_norm": 0.8274370486385725, "learning_rate": 2.8376268540202966e-06, "loss": 0.3496, "step": 727 }, { "epoch": 0.03, "grad_norm": 0.7894338110040804, "learning_rate": 2.8415300546448087e-06, "loss": 0.3108, "step": 728 }, { "epoch": 0.03, "grad_norm": 0.8427486897690427, "learning_rate": 2.845433255269321e-06, "loss": 0.3401, "step": 729 }, { "epoch": 0.03, "grad_norm": 0.757772305733387, "learning_rate": 2.849336455893833e-06, "loss": 0.3213, "step": 730 }, { "epoch": 0.03, "grad_norm": 0.8923657203583834, "learning_rate": 2.853239656518345e-06, "loss": 0.3354, "step": 731 }, { "epoch": 0.03, "grad_norm": 0.7977685228531756, "learning_rate": 2.8571428571428573e-06, "loss": 0.3318, "step": 732 }, { "epoch": 0.03, "grad_norm": 0.809048644715256, "learning_rate": 2.8610460577673694e-06, "loss": 0.3346, "step": 733 }, { "epoch": 0.03, "grad_norm": 0.793338825118952, "learning_rate": 2.8649492583918815e-06, "loss": 0.3217, "step": 734 }, { "epoch": 0.03, "grad_norm": 0.7942420180904798, "learning_rate": 2.8688524590163937e-06, "loss": 0.3377, "step": 735 }, { "epoch": 0.03, "grad_norm": 0.8284604305290624, "learning_rate": 2.872755659640906e-06, "loss": 0.3268, "step": 736 }, { "epoch": 0.03, "grad_norm": 0.8219416473293603, "learning_rate": 2.8766588602654175e-06, "loss": 0.3317, "step": 737 }, { "epoch": 0.03, "grad_norm": 0.7896923942535435, "learning_rate": 2.8805620608899297e-06, "loss": 0.3154, "step": 738 }, { "epoch": 0.03, "grad_norm": 0.8486812000557402, "learning_rate": 2.884465261514442e-06, "loss": 0.3261, "step": 739 }, { "epoch": 0.03, "grad_norm": 0.7936555710601935, "learning_rate": 2.888368462138954e-06, "loss": 0.3116, "step": 740 }, { "epoch": 0.03, "grad_norm": 0.7450683315725468, "learning_rate": 2.892271662763466e-06, "loss": 0.3053, "step": 741 }, { "epoch": 0.03, "grad_norm": 0.8234800384273521, "learning_rate": 2.8961748633879782e-06, "loss": 0.3349, "step": 742 }, { "epoch": 0.03, "grad_norm": 0.8156383945114665, "learning_rate": 2.9000780640124904e-06, "loss": 0.3381, "step": 743 }, { "epoch": 0.03, "grad_norm": 0.8410110252236526, "learning_rate": 2.9039812646370025e-06, "loss": 0.354, "step": 744 }, { "epoch": 0.03, "grad_norm": 0.916437311505579, "learning_rate": 2.9078844652615147e-06, "loss": 0.3396, "step": 745 }, { "epoch": 0.03, "grad_norm": 0.7834399088297331, "learning_rate": 2.911787665886027e-06, "loss": 0.3251, "step": 746 }, { "epoch": 0.03, "grad_norm": 0.8552350688832298, "learning_rate": 2.915690866510539e-06, "loss": 0.3303, "step": 747 }, { "epoch": 0.04, "grad_norm": 0.7241434188411299, "learning_rate": 2.919594067135051e-06, "loss": 0.3089, "step": 748 }, { "epoch": 0.04, "grad_norm": 0.7812130167151623, "learning_rate": 2.923497267759563e-06, "loss": 0.3188, "step": 749 }, { "epoch": 0.04, "grad_norm": 0.8122469616416123, "learning_rate": 2.927400468384075e-06, "loss": 0.3372, "step": 750 }, { "epoch": 0.04, "grad_norm": 0.8029421057407501, "learning_rate": 2.931303669008587e-06, "loss": 0.3301, "step": 751 }, { "epoch": 0.04, "grad_norm": 0.7566646579184225, "learning_rate": 2.9352068696330992e-06, "loss": 0.3185, "step": 752 }, { "epoch": 0.04, "grad_norm": 0.7968556552003517, "learning_rate": 2.9391100702576114e-06, "loss": 0.3448, "step": 753 }, { "epoch": 0.04, "grad_norm": 0.8016038204651001, "learning_rate": 2.9430132708821235e-06, "loss": 0.333, "step": 754 }, { "epoch": 0.04, "grad_norm": 0.8618607621424933, "learning_rate": 2.9469164715066356e-06, "loss": 0.3184, "step": 755 }, { "epoch": 0.04, "grad_norm": 0.8302300219646712, "learning_rate": 2.9508196721311478e-06, "loss": 0.3337, "step": 756 }, { "epoch": 0.04, "grad_norm": 0.799091014412622, "learning_rate": 2.95472287275566e-06, "loss": 0.325, "step": 757 }, { "epoch": 0.04, "grad_norm": 0.7691156714790359, "learning_rate": 2.958626073380172e-06, "loss": 0.3311, "step": 758 }, { "epoch": 0.04, "grad_norm": 0.8121559532747387, "learning_rate": 2.962529274004684e-06, "loss": 0.3274, "step": 759 }, { "epoch": 0.04, "grad_norm": 0.8344345712853426, "learning_rate": 2.9664324746291963e-06, "loss": 0.3277, "step": 760 }, { "epoch": 0.04, "grad_norm": 0.8253657271612495, "learning_rate": 2.970335675253708e-06, "loss": 0.3427, "step": 761 }, { "epoch": 0.04, "grad_norm": 0.8156392379208636, "learning_rate": 2.97423887587822e-06, "loss": 0.3511, "step": 762 }, { "epoch": 0.04, "grad_norm": 0.7541797466669938, "learning_rate": 2.9781420765027323e-06, "loss": 0.3347, "step": 763 }, { "epoch": 0.04, "grad_norm": 0.8363411941041499, "learning_rate": 2.9820452771272445e-06, "loss": 0.3336, "step": 764 }, { "epoch": 0.04, "grad_norm": 0.7204706114714415, "learning_rate": 2.9859484777517566e-06, "loss": 0.3185, "step": 765 }, { "epoch": 0.04, "grad_norm": 0.8532660169644781, "learning_rate": 2.9898516783762688e-06, "loss": 0.3295, "step": 766 }, { "epoch": 0.04, "grad_norm": 0.7282820054166352, "learning_rate": 2.993754879000781e-06, "loss": 0.3193, "step": 767 }, { "epoch": 0.04, "grad_norm": 0.7971398304095669, "learning_rate": 2.997658079625293e-06, "loss": 0.3187, "step": 768 }, { "epoch": 0.04, "grad_norm": 0.8190063663785068, "learning_rate": 3.001561280249805e-06, "loss": 0.3194, "step": 769 }, { "epoch": 0.04, "grad_norm": 0.8616919609003421, "learning_rate": 3.0054644808743173e-06, "loss": 0.3084, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.811698617625455, "learning_rate": 3.0093676814988295e-06, "loss": 0.3349, "step": 771 }, { "epoch": 0.04, "grad_norm": 0.7756913400165172, "learning_rate": 3.0132708821233416e-06, "loss": 0.3203, "step": 772 }, { "epoch": 0.04, "grad_norm": 0.9150608313252299, "learning_rate": 3.0171740827478533e-06, "loss": 0.358, "step": 773 }, { "epoch": 0.04, "grad_norm": 0.7522790028830695, "learning_rate": 3.0210772833723655e-06, "loss": 0.3302, "step": 774 }, { "epoch": 0.04, "grad_norm": 0.7782927956049192, "learning_rate": 3.0249804839968776e-06, "loss": 0.3072, "step": 775 }, { "epoch": 0.04, "grad_norm": 0.7995018967422126, "learning_rate": 3.0288836846213897e-06, "loss": 0.3155, "step": 776 }, { "epoch": 0.04, "grad_norm": 0.8307098312387007, "learning_rate": 3.032786885245902e-06, "loss": 0.3338, "step": 777 }, { "epoch": 0.04, "grad_norm": 0.7731822591171632, "learning_rate": 3.036690085870414e-06, "loss": 0.2989, "step": 778 }, { "epoch": 0.04, "grad_norm": 0.841444102203878, "learning_rate": 3.040593286494926e-06, "loss": 0.3577, "step": 779 }, { "epoch": 0.04, "grad_norm": 0.9089754300438045, "learning_rate": 3.0444964871194383e-06, "loss": 0.333, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.8462727412075749, "learning_rate": 3.0483996877439504e-06, "loss": 0.3414, "step": 781 }, { "epoch": 0.04, "grad_norm": 0.8228273244503667, "learning_rate": 3.0523028883684626e-06, "loss": 0.3344, "step": 782 }, { "epoch": 0.04, "grad_norm": 0.8447388776746985, "learning_rate": 3.0562060889929747e-06, "loss": 0.3137, "step": 783 }, { "epoch": 0.04, "grad_norm": 0.8754394322361377, "learning_rate": 3.0601092896174864e-06, "loss": 0.3395, "step": 784 }, { "epoch": 0.04, "grad_norm": 0.838103587023006, "learning_rate": 3.0640124902419986e-06, "loss": 0.3337, "step": 785 }, { "epoch": 0.04, "grad_norm": 0.7500904029855162, "learning_rate": 3.0679156908665107e-06, "loss": 0.3338, "step": 786 }, { "epoch": 0.04, "grad_norm": 0.8624169718221015, "learning_rate": 3.071818891491023e-06, "loss": 0.3436, "step": 787 }, { "epoch": 0.04, "grad_norm": 1.052708452495042, "learning_rate": 3.075722092115535e-06, "loss": 0.3508, "step": 788 }, { "epoch": 0.04, "grad_norm": 0.8891424545077748, "learning_rate": 3.079625292740047e-06, "loss": 0.3318, "step": 789 }, { "epoch": 0.04, "grad_norm": 0.9109984378098749, "learning_rate": 3.0835284933645593e-06, "loss": 0.3675, "step": 790 }, { "epoch": 0.04, "grad_norm": 1.0039460159844047, "learning_rate": 3.0874316939890714e-06, "loss": 0.3392, "step": 791 }, { "epoch": 0.04, "grad_norm": 0.8005122878965755, "learning_rate": 3.0913348946135836e-06, "loss": 0.3538, "step": 792 }, { "epoch": 0.04, "grad_norm": 0.7644232596544708, "learning_rate": 3.0952380952380957e-06, "loss": 0.3504, "step": 793 }, { "epoch": 0.04, "grad_norm": 0.7994143003467342, "learning_rate": 3.099141295862608e-06, "loss": 0.3262, "step": 794 }, { "epoch": 0.04, "grad_norm": 0.8221448503875004, "learning_rate": 3.10304449648712e-06, "loss": 0.3246, "step": 795 }, { "epoch": 0.04, "grad_norm": 0.7647300450833371, "learning_rate": 3.1069476971116317e-06, "loss": 0.3218, "step": 796 }, { "epoch": 0.04, "grad_norm": 0.8265657721600739, "learning_rate": 3.110850897736144e-06, "loss": 0.3511, "step": 797 }, { "epoch": 0.04, "grad_norm": 0.8266120344513347, "learning_rate": 3.114754098360656e-06, "loss": 0.3158, "step": 798 }, { "epoch": 0.04, "grad_norm": 0.8209161618407386, "learning_rate": 3.118657298985168e-06, "loss": 0.3275, "step": 799 }, { "epoch": 0.04, "grad_norm": 0.7771361400125482, "learning_rate": 3.1225604996096803e-06, "loss": 0.3566, "step": 800 }, { "epoch": 0.04, "grad_norm": 0.793282863891631, "learning_rate": 3.1264637002341924e-06, "loss": 0.3359, "step": 801 }, { "epoch": 0.04, "grad_norm": 0.8635743381528056, "learning_rate": 3.1303669008587046e-06, "loss": 0.3387, "step": 802 }, { "epoch": 0.04, "grad_norm": 0.8536329729255786, "learning_rate": 3.1342701014832167e-06, "loss": 0.3282, "step": 803 }, { "epoch": 0.04, "grad_norm": 0.8376574890269507, "learning_rate": 3.138173302107729e-06, "loss": 0.3207, "step": 804 }, { "epoch": 0.04, "grad_norm": 0.9280302526645755, "learning_rate": 3.142076502732241e-06, "loss": 0.3331, "step": 805 }, { "epoch": 0.04, "grad_norm": 0.8134711744056579, "learning_rate": 3.145979703356753e-06, "loss": 0.3303, "step": 806 }, { "epoch": 0.04, "grad_norm": 0.7960738984951738, "learning_rate": 3.1498829039812653e-06, "loss": 0.3396, "step": 807 }, { "epoch": 0.04, "grad_norm": 0.8022116621207531, "learning_rate": 3.153786104605777e-06, "loss": 0.3508, "step": 808 }, { "epoch": 0.04, "grad_norm": 0.916071245771324, "learning_rate": 3.157689305230289e-06, "loss": 0.3507, "step": 809 }, { "epoch": 0.04, "grad_norm": 0.7857188471874412, "learning_rate": 3.1615925058548013e-06, "loss": 0.3443, "step": 810 }, { "epoch": 0.04, "grad_norm": 0.7441787885157019, "learning_rate": 3.1654957064793134e-06, "loss": 0.326, "step": 811 }, { "epoch": 0.04, "grad_norm": 0.7444036351495855, "learning_rate": 3.1693989071038255e-06, "loss": 0.3331, "step": 812 }, { "epoch": 0.04, "grad_norm": 0.8392784058471788, "learning_rate": 3.1733021077283377e-06, "loss": 0.3325, "step": 813 }, { "epoch": 0.04, "grad_norm": 0.803343334347028, "learning_rate": 3.17720530835285e-06, "loss": 0.3359, "step": 814 }, { "epoch": 0.04, "grad_norm": 0.7522728429933897, "learning_rate": 3.181108508977362e-06, "loss": 0.3316, "step": 815 }, { "epoch": 0.04, "grad_norm": 0.8134946439357837, "learning_rate": 3.185011709601874e-06, "loss": 0.3503, "step": 816 }, { "epoch": 0.04, "grad_norm": 0.8752313960312206, "learning_rate": 3.1889149102263862e-06, "loss": 0.3666, "step": 817 }, { "epoch": 0.04, "grad_norm": 0.7806651192201576, "learning_rate": 3.1928181108508984e-06, "loss": 0.3511, "step": 818 }, { "epoch": 0.04, "grad_norm": 0.7680533420472494, "learning_rate": 3.1967213114754105e-06, "loss": 0.3318, "step": 819 }, { "epoch": 0.04, "grad_norm": 0.83716733877137, "learning_rate": 3.2006245120999222e-06, "loss": 0.3374, "step": 820 }, { "epoch": 0.04, "grad_norm": 0.8204507134205091, "learning_rate": 3.2045277127244344e-06, "loss": 0.3365, "step": 821 }, { "epoch": 0.04, "grad_norm": 0.7842270747171057, "learning_rate": 3.2084309133489465e-06, "loss": 0.3227, "step": 822 }, { "epoch": 0.04, "grad_norm": 0.7417054584959047, "learning_rate": 3.2123341139734587e-06, "loss": 0.3293, "step": 823 }, { "epoch": 0.04, "grad_norm": 0.7188135181900579, "learning_rate": 3.216237314597971e-06, "loss": 0.316, "step": 824 }, { "epoch": 0.04, "grad_norm": 0.8035596046328799, "learning_rate": 3.220140515222483e-06, "loss": 0.3372, "step": 825 }, { "epoch": 0.04, "grad_norm": 0.7283596883233108, "learning_rate": 3.224043715846995e-06, "loss": 0.3278, "step": 826 }, { "epoch": 0.04, "grad_norm": 0.7509435619880571, "learning_rate": 3.2279469164715072e-06, "loss": 0.3239, "step": 827 }, { "epoch": 0.04, "grad_norm": 0.7211576057815914, "learning_rate": 3.2318501170960194e-06, "loss": 0.3213, "step": 828 }, { "epoch": 0.04, "grad_norm": 0.726394665486096, "learning_rate": 3.2357533177205315e-06, "loss": 0.3183, "step": 829 }, { "epoch": 0.04, "grad_norm": 0.7713345483006055, "learning_rate": 3.2396565183450436e-06, "loss": 0.3227, "step": 830 }, { "epoch": 0.04, "grad_norm": 0.8340463553704909, "learning_rate": 3.2435597189695554e-06, "loss": 0.3416, "step": 831 }, { "epoch": 0.04, "grad_norm": 0.7545648500036302, "learning_rate": 3.2474629195940675e-06, "loss": 0.3427, "step": 832 }, { "epoch": 0.04, "grad_norm": 0.7312445566337831, "learning_rate": 3.2513661202185792e-06, "loss": 0.3327, "step": 833 }, { "epoch": 0.04, "grad_norm": 0.8230221181091733, "learning_rate": 3.2552693208430914e-06, "loss": 0.3396, "step": 834 }, { "epoch": 0.04, "grad_norm": 0.8009610524453586, "learning_rate": 3.2591725214676035e-06, "loss": 0.32, "step": 835 }, { "epoch": 0.04, "grad_norm": 0.7457447458736481, "learning_rate": 3.2630757220921156e-06, "loss": 0.3063, "step": 836 }, { "epoch": 0.04, "grad_norm": 0.7684066144698827, "learning_rate": 3.2669789227166278e-06, "loss": 0.3165, "step": 837 }, { "epoch": 0.04, "grad_norm": 0.8522804128859687, "learning_rate": 3.27088212334114e-06, "loss": 0.3393, "step": 838 }, { "epoch": 0.04, "grad_norm": 0.7635423777074165, "learning_rate": 3.274785323965652e-06, "loss": 0.3181, "step": 839 }, { "epoch": 0.04, "grad_norm": 0.7636316455379989, "learning_rate": 3.2786885245901638e-06, "loss": 0.3481, "step": 840 }, { "epoch": 0.04, "grad_norm": 0.7962142143126167, "learning_rate": 3.282591725214676e-06, "loss": 0.3634, "step": 841 }, { "epoch": 0.04, "grad_norm": 0.797541005449039, "learning_rate": 3.286494925839188e-06, "loss": 0.341, "step": 842 }, { "epoch": 0.04, "grad_norm": 0.7915566085751801, "learning_rate": 3.2903981264637e-06, "loss": 0.3378, "step": 843 }, { "epoch": 0.04, "grad_norm": 0.8072465079859944, "learning_rate": 3.2943013270882123e-06, "loss": 0.3385, "step": 844 }, { "epoch": 0.04, "grad_norm": 0.769720696479069, "learning_rate": 3.2982045277127245e-06, "loss": 0.3264, "step": 845 }, { "epoch": 0.04, "grad_norm": 0.8117625605037423, "learning_rate": 3.3021077283372366e-06, "loss": 0.3332, "step": 846 }, { "epoch": 0.04, "grad_norm": 0.8080889046006523, "learning_rate": 3.3060109289617488e-06, "loss": 0.3328, "step": 847 }, { "epoch": 0.04, "grad_norm": 0.9152136741387104, "learning_rate": 3.309914129586261e-06, "loss": 0.3362, "step": 848 }, { "epoch": 0.04, "grad_norm": 0.718083494197403, "learning_rate": 3.313817330210773e-06, "loss": 0.2867, "step": 849 }, { "epoch": 0.04, "grad_norm": 0.775350908486815, "learning_rate": 3.317720530835285e-06, "loss": 0.312, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.8789946035530294, "learning_rate": 3.321623731459797e-06, "loss": 0.3269, "step": 851 }, { "epoch": 0.04, "grad_norm": 0.793020921778407, "learning_rate": 3.325526932084309e-06, "loss": 0.3308, "step": 852 }, { "epoch": 0.04, "grad_norm": 0.7259971758792938, "learning_rate": 3.329430132708821e-06, "loss": 0.3297, "step": 853 }, { "epoch": 0.04, "grad_norm": 0.8153957475378928, "learning_rate": 3.3333333333333333e-06, "loss": 0.3226, "step": 854 }, { "epoch": 0.04, "grad_norm": 0.7650808299692285, "learning_rate": 3.3372365339578455e-06, "loss": 0.3195, "step": 855 }, { "epoch": 0.04, "grad_norm": 0.8111772008170571, "learning_rate": 3.3411397345823576e-06, "loss": 0.3313, "step": 856 }, { "epoch": 0.04, "grad_norm": 0.761890415525151, "learning_rate": 3.3450429352068697e-06, "loss": 0.3417, "step": 857 }, { "epoch": 0.04, "grad_norm": 0.8398770092534883, "learning_rate": 3.348946135831382e-06, "loss": 0.3438, "step": 858 }, { "epoch": 0.04, "grad_norm": 0.9113600145812258, "learning_rate": 3.352849336455894e-06, "loss": 0.3268, "step": 859 }, { "epoch": 0.04, "grad_norm": 0.7327985633088402, "learning_rate": 3.356752537080406e-06, "loss": 0.3147, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.7747300652496784, "learning_rate": 3.3606557377049183e-06, "loss": 0.3369, "step": 861 }, { "epoch": 0.04, "grad_norm": 0.8365518812699518, "learning_rate": 3.3645589383294304e-06, "loss": 0.3526, "step": 862 }, { "epoch": 0.04, "grad_norm": 0.7376035705265241, "learning_rate": 3.368462138953942e-06, "loss": 0.3184, "step": 863 }, { "epoch": 0.04, "grad_norm": 0.7768389737953629, "learning_rate": 3.3723653395784543e-06, "loss": 0.3366, "step": 864 }, { "epoch": 0.04, "grad_norm": 0.7722353163854936, "learning_rate": 3.3762685402029664e-06, "loss": 0.3379, "step": 865 }, { "epoch": 0.04, "grad_norm": 0.7644296862178812, "learning_rate": 3.3801717408274786e-06, "loss": 0.3139, "step": 866 }, { "epoch": 0.04, "grad_norm": 0.7227476378947277, "learning_rate": 3.3840749414519907e-06, "loss": 0.32, "step": 867 }, { "epoch": 0.04, "grad_norm": 0.8275473037758273, "learning_rate": 3.387978142076503e-06, "loss": 0.3553, "step": 868 }, { "epoch": 0.04, "grad_norm": 0.865947688952979, "learning_rate": 3.391881342701015e-06, "loss": 0.3446, "step": 869 }, { "epoch": 0.04, "grad_norm": 0.8130923373070527, "learning_rate": 3.395784543325527e-06, "loss": 0.3263, "step": 870 }, { "epoch": 0.04, "grad_norm": 0.7676129307866603, "learning_rate": 3.3996877439500393e-06, "loss": 0.337, "step": 871 }, { "epoch": 0.04, "grad_norm": 0.7797667888693276, "learning_rate": 3.4035909445745514e-06, "loss": 0.337, "step": 872 }, { "epoch": 0.04, "grad_norm": 0.8253042591862586, "learning_rate": 3.4074941451990636e-06, "loss": 0.321, "step": 873 }, { "epoch": 0.04, "grad_norm": 0.8117768546323659, "learning_rate": 3.4113973458235757e-06, "loss": 0.3355, "step": 874 }, { "epoch": 0.04, "grad_norm": 0.7738043801327601, "learning_rate": 3.4153005464480874e-06, "loss": 0.3237, "step": 875 }, { "epoch": 0.04, "grad_norm": 0.7271047040670663, "learning_rate": 3.4192037470725996e-06, "loss": 0.3158, "step": 876 }, { "epoch": 0.04, "grad_norm": 0.8087840120826398, "learning_rate": 3.4231069476971117e-06, "loss": 0.3191, "step": 877 }, { "epoch": 0.04, "grad_norm": 0.7941412633035108, "learning_rate": 3.427010148321624e-06, "loss": 0.3388, "step": 878 }, { "epoch": 0.04, "grad_norm": 0.7877303502580432, "learning_rate": 3.430913348946136e-06, "loss": 0.3453, "step": 879 }, { "epoch": 0.04, "grad_norm": 0.8154554299572914, "learning_rate": 3.434816549570648e-06, "loss": 0.3351, "step": 880 }, { "epoch": 0.04, "grad_norm": 0.7704542209398879, "learning_rate": 3.4387197501951603e-06, "loss": 0.3334, "step": 881 }, { "epoch": 0.04, "grad_norm": 0.7610813178384717, "learning_rate": 3.4426229508196724e-06, "loss": 0.3363, "step": 882 }, { "epoch": 0.04, "grad_norm": 0.8123810668489388, "learning_rate": 3.4465261514441846e-06, "loss": 0.3122, "step": 883 }, { "epoch": 0.04, "grad_norm": 0.8531157012395201, "learning_rate": 3.4504293520686967e-06, "loss": 0.3422, "step": 884 }, { "epoch": 0.04, "grad_norm": 0.7917430412840865, "learning_rate": 3.454332552693209e-06, "loss": 0.363, "step": 885 }, { "epoch": 0.04, "grad_norm": 0.7724838395015216, "learning_rate": 3.458235753317721e-06, "loss": 0.3266, "step": 886 }, { "epoch": 0.04, "grad_norm": 0.7393769065386122, "learning_rate": 3.4621389539422327e-06, "loss": 0.2936, "step": 887 }, { "epoch": 0.04, "grad_norm": 1.0559936009065718, "learning_rate": 3.466042154566745e-06, "loss": 0.3553, "step": 888 }, { "epoch": 0.04, "grad_norm": 0.8293808528940515, "learning_rate": 3.469945355191257e-06, "loss": 0.3443, "step": 889 }, { "epoch": 0.04, "grad_norm": 0.7635639412811581, "learning_rate": 3.473848555815769e-06, "loss": 0.3178, "step": 890 }, { "epoch": 0.04, "grad_norm": 0.8130042775388293, "learning_rate": 3.4777517564402813e-06, "loss": 0.3094, "step": 891 }, { "epoch": 0.04, "grad_norm": 0.9537634622389648, "learning_rate": 3.4816549570647934e-06, "loss": 0.3189, "step": 892 }, { "epoch": 0.04, "grad_norm": 0.7418089558489026, "learning_rate": 3.4855581576893055e-06, "loss": 0.3367, "step": 893 }, { "epoch": 0.04, "grad_norm": 0.956840469122639, "learning_rate": 3.4894613583138177e-06, "loss": 0.3492, "step": 894 }, { "epoch": 0.04, "grad_norm": 0.8172849941417073, "learning_rate": 3.49336455893833e-06, "loss": 0.3452, "step": 895 }, { "epoch": 0.04, "grad_norm": 0.7510868761341295, "learning_rate": 3.497267759562842e-06, "loss": 0.3177, "step": 896 }, { "epoch": 0.04, "grad_norm": 0.8060394791917349, "learning_rate": 3.501170960187354e-06, "loss": 0.3353, "step": 897 }, { "epoch": 0.04, "grad_norm": 0.806716986079775, "learning_rate": 3.505074160811866e-06, "loss": 0.3048, "step": 898 }, { "epoch": 0.04, "grad_norm": 0.8600109143743565, "learning_rate": 3.508977361436378e-06, "loss": 0.3226, "step": 899 }, { "epoch": 0.04, "grad_norm": 0.7713881667032363, "learning_rate": 3.51288056206089e-06, "loss": 0.3228, "step": 900 }, { "epoch": 0.04, "grad_norm": 0.8015311464017524, "learning_rate": 3.5167837626854022e-06, "loss": 0.346, "step": 901 }, { "epoch": 0.04, "grad_norm": 0.942495571454301, "learning_rate": 3.5206869633099144e-06, "loss": 0.3549, "step": 902 }, { "epoch": 0.04, "grad_norm": 0.7194074341345336, "learning_rate": 3.5245901639344265e-06, "loss": 0.3305, "step": 903 }, { "epoch": 0.04, "grad_norm": 0.7931051777866895, "learning_rate": 3.5284933645589387e-06, "loss": 0.3153, "step": 904 }, { "epoch": 0.04, "grad_norm": 0.8029643392390938, "learning_rate": 3.532396565183451e-06, "loss": 0.3365, "step": 905 }, { "epoch": 0.04, "grad_norm": 0.7611429213543123, "learning_rate": 3.536299765807963e-06, "loss": 0.3275, "step": 906 }, { "epoch": 0.04, "grad_norm": 0.7455878952583745, "learning_rate": 3.540202966432475e-06, "loss": 0.3199, "step": 907 }, { "epoch": 0.04, "grad_norm": 0.9290360237593249, "learning_rate": 3.5441061670569872e-06, "loss": 0.3595, "step": 908 }, { "epoch": 0.04, "grad_norm": 0.7949664721782665, "learning_rate": 3.5480093676814994e-06, "loss": 0.3342, "step": 909 }, { "epoch": 0.04, "grad_norm": 0.8055983880398003, "learning_rate": 3.551912568306011e-06, "loss": 0.3361, "step": 910 }, { "epoch": 0.04, "grad_norm": 0.7882975494866825, "learning_rate": 3.5558157689305232e-06, "loss": 0.3192, "step": 911 }, { "epoch": 0.04, "grad_norm": 0.7909086937151064, "learning_rate": 3.5597189695550354e-06, "loss": 0.3285, "step": 912 }, { "epoch": 0.04, "grad_norm": 0.7765582277116957, "learning_rate": 3.5636221701795475e-06, "loss": 0.3142, "step": 913 }, { "epoch": 0.04, "grad_norm": 0.8367425350249224, "learning_rate": 3.5675253708040596e-06, "loss": 0.3329, "step": 914 }, { "epoch": 0.04, "grad_norm": 0.8409167641030671, "learning_rate": 3.5714285714285718e-06, "loss": 0.3098, "step": 915 }, { "epoch": 0.04, "grad_norm": 0.8301927914526555, "learning_rate": 3.575331772053084e-06, "loss": 0.3414, "step": 916 }, { "epoch": 0.04, "grad_norm": 0.8491807077348973, "learning_rate": 3.579234972677596e-06, "loss": 0.3327, "step": 917 }, { "epoch": 0.04, "grad_norm": 0.8440584749190931, "learning_rate": 3.583138173302108e-06, "loss": 0.3315, "step": 918 }, { "epoch": 0.04, "grad_norm": 0.8440893986467001, "learning_rate": 3.5870413739266203e-06, "loss": 0.3487, "step": 919 }, { "epoch": 0.04, "grad_norm": 0.859290425354415, "learning_rate": 3.5909445745511325e-06, "loss": 0.3163, "step": 920 }, { "epoch": 0.04, "grad_norm": 0.7919663481629915, "learning_rate": 3.5948477751756446e-06, "loss": 0.3058, "step": 921 }, { "epoch": 0.04, "grad_norm": 0.8251123311409907, "learning_rate": 3.5987509758001563e-06, "loss": 0.3264, "step": 922 }, { "epoch": 0.04, "grad_norm": 0.7946474838515742, "learning_rate": 3.6026541764246685e-06, "loss": 0.3276, "step": 923 }, { "epoch": 0.04, "grad_norm": 0.8532649866655985, "learning_rate": 3.6065573770491806e-06, "loss": 0.3395, "step": 924 }, { "epoch": 0.04, "grad_norm": 0.8548919937304804, "learning_rate": 3.6104605776736928e-06, "loss": 0.3369, "step": 925 }, { "epoch": 0.04, "grad_norm": 0.8955005454822654, "learning_rate": 3.614363778298205e-06, "loss": 0.3207, "step": 926 }, { "epoch": 0.04, "grad_norm": 0.7632648826424214, "learning_rate": 3.618266978922717e-06, "loss": 0.3437, "step": 927 }, { "epoch": 0.04, "grad_norm": 0.8072860604476365, "learning_rate": 3.622170179547229e-06, "loss": 0.3255, "step": 928 }, { "epoch": 0.04, "grad_norm": 0.8760802832467262, "learning_rate": 3.6260733801717413e-06, "loss": 0.3646, "step": 929 }, { "epoch": 0.04, "grad_norm": 0.8656503814453173, "learning_rate": 3.6299765807962535e-06, "loss": 0.3252, "step": 930 }, { "epoch": 0.04, "grad_norm": 0.8130250774495784, "learning_rate": 3.6338797814207656e-06, "loss": 0.3484, "step": 931 }, { "epoch": 0.04, "grad_norm": 0.7909203448417327, "learning_rate": 3.6377829820452777e-06, "loss": 0.3218, "step": 932 }, { "epoch": 0.04, "grad_norm": 0.6964803463182522, "learning_rate": 3.64168618266979e-06, "loss": 0.3119, "step": 933 }, { "epoch": 0.04, "grad_norm": 0.8720178364428258, "learning_rate": 3.6455893832943016e-06, "loss": 0.3593, "step": 934 }, { "epoch": 0.04, "grad_norm": 0.8584073671761802, "learning_rate": 3.6494925839188137e-06, "loss": 0.3391, "step": 935 }, { "epoch": 0.04, "grad_norm": 0.7920450003494458, "learning_rate": 3.653395784543326e-06, "loss": 0.3336, "step": 936 }, { "epoch": 0.04, "grad_norm": 0.8426930769580041, "learning_rate": 3.657298985167838e-06, "loss": 0.3311, "step": 937 }, { "epoch": 0.04, "grad_norm": 0.8689801617084886, "learning_rate": 3.66120218579235e-06, "loss": 0.3394, "step": 938 }, { "epoch": 0.04, "grad_norm": 0.8286000493056713, "learning_rate": 3.6651053864168623e-06, "loss": 0.3222, "step": 939 }, { "epoch": 0.04, "grad_norm": 0.800336495310161, "learning_rate": 3.6690085870413744e-06, "loss": 0.3549, "step": 940 }, { "epoch": 0.04, "grad_norm": 0.8617829184382465, "learning_rate": 3.6729117876658866e-06, "loss": 0.3202, "step": 941 }, { "epoch": 0.04, "grad_norm": 0.8689088516749263, "learning_rate": 3.6768149882903987e-06, "loss": 0.3409, "step": 942 }, { "epoch": 0.04, "grad_norm": 0.8013660151983368, "learning_rate": 3.680718188914911e-06, "loss": 0.3298, "step": 943 }, { "epoch": 0.04, "grad_norm": 0.7557451681966474, "learning_rate": 3.684621389539423e-06, "loss": 0.3365, "step": 944 }, { "epoch": 0.04, "grad_norm": 0.8439126755160394, "learning_rate": 3.6885245901639347e-06, "loss": 0.3452, "step": 945 }, { "epoch": 0.04, "grad_norm": 0.8430614543303967, "learning_rate": 3.692427790788447e-06, "loss": 0.3352, "step": 946 }, { "epoch": 0.04, "grad_norm": 0.7856132455875008, "learning_rate": 3.696330991412959e-06, "loss": 0.3264, "step": 947 }, { "epoch": 0.04, "grad_norm": 0.7387629642401166, "learning_rate": 3.700234192037471e-06, "loss": 0.3119, "step": 948 }, { "epoch": 0.04, "grad_norm": 0.813033092286266, "learning_rate": 3.7041373926619833e-06, "loss": 0.3565, "step": 949 }, { "epoch": 0.04, "grad_norm": 0.7596858845415385, "learning_rate": 3.7080405932864954e-06, "loss": 0.3049, "step": 950 }, { "epoch": 0.04, "grad_norm": 0.8340576417695552, "learning_rate": 3.7119437939110076e-06, "loss": 0.3385, "step": 951 }, { "epoch": 0.04, "grad_norm": 0.7365498775997931, "learning_rate": 3.7158469945355197e-06, "loss": 0.3221, "step": 952 }, { "epoch": 0.04, "grad_norm": 0.7615740593876276, "learning_rate": 3.719750195160032e-06, "loss": 0.3147, "step": 953 }, { "epoch": 0.04, "grad_norm": 0.8722114257350929, "learning_rate": 3.723653395784544e-06, "loss": 0.3215, "step": 954 }, { "epoch": 0.04, "grad_norm": 0.7598820261402331, "learning_rate": 3.727556596409056e-06, "loss": 0.326, "step": 955 }, { "epoch": 0.04, "grad_norm": 0.8207602557816609, "learning_rate": 3.7314597970335683e-06, "loss": 0.3339, "step": 956 }, { "epoch": 0.04, "grad_norm": 0.8192586863048623, "learning_rate": 3.73536299765808e-06, "loss": 0.3516, "step": 957 }, { "epoch": 0.04, "grad_norm": 0.7709055368938971, "learning_rate": 3.739266198282592e-06, "loss": 0.3276, "step": 958 }, { "epoch": 0.04, "grad_norm": 0.7502410411971993, "learning_rate": 3.7431693989071043e-06, "loss": 0.3234, "step": 959 }, { "epoch": 0.04, "grad_norm": 0.8536371932479321, "learning_rate": 3.7470725995316164e-06, "loss": 0.3399, "step": 960 }, { "epoch": 0.05, "grad_norm": 0.8389733897792988, "learning_rate": 3.750975800156128e-06, "loss": 0.3412, "step": 961 }, { "epoch": 0.05, "grad_norm": 0.8053147767949144, "learning_rate": 3.7548790007806403e-06, "loss": 0.3406, "step": 962 }, { "epoch": 0.05, "grad_norm": 0.8132929307436576, "learning_rate": 3.7587822014051524e-06, "loss": 0.3442, "step": 963 }, { "epoch": 0.05, "grad_norm": 0.7546123830385424, "learning_rate": 3.7626854020296646e-06, "loss": 0.3253, "step": 964 }, { "epoch": 0.05, "grad_norm": 0.8357659310098826, "learning_rate": 3.7665886026541763e-06, "loss": 0.322, "step": 965 }, { "epoch": 0.05, "grad_norm": 0.739658053499859, "learning_rate": 3.7704918032786884e-06, "loss": 0.3454, "step": 966 }, { "epoch": 0.05, "grad_norm": 0.7972718145466251, "learning_rate": 3.7743950039032006e-06, "loss": 0.3324, "step": 967 }, { "epoch": 0.05, "grad_norm": 0.8432896818295975, "learning_rate": 3.7782982045277127e-06, "loss": 0.3257, "step": 968 }, { "epoch": 0.05, "grad_norm": 0.8471889606538532, "learning_rate": 3.782201405152225e-06, "loss": 0.3561, "step": 969 }, { "epoch": 0.05, "grad_norm": 0.8179039774211231, "learning_rate": 3.786104605776737e-06, "loss": 0.3296, "step": 970 }, { "epoch": 0.05, "grad_norm": 0.9094698142095845, "learning_rate": 3.790007806401249e-06, "loss": 0.3572, "step": 971 }, { "epoch": 0.05, "grad_norm": 0.8035942030412484, "learning_rate": 3.7939110070257613e-06, "loss": 0.3269, "step": 972 }, { "epoch": 0.05, "grad_norm": 0.7625220616302513, "learning_rate": 3.7978142076502734e-06, "loss": 0.3254, "step": 973 }, { "epoch": 0.05, "grad_norm": 0.8092451289897646, "learning_rate": 3.8017174082747855e-06, "loss": 0.3277, "step": 974 }, { "epoch": 0.05, "grad_norm": 0.7539295998268221, "learning_rate": 3.8056206088992977e-06, "loss": 0.3027, "step": 975 }, { "epoch": 0.05, "grad_norm": 0.8355907557653678, "learning_rate": 3.80952380952381e-06, "loss": 0.3549, "step": 976 }, { "epoch": 0.05, "grad_norm": 0.7367614000037639, "learning_rate": 3.8134270101483215e-06, "loss": 0.3334, "step": 977 }, { "epoch": 0.05, "grad_norm": 0.835979463148373, "learning_rate": 3.817330210772834e-06, "loss": 0.3266, "step": 978 }, { "epoch": 0.05, "grad_norm": 0.80339628054785, "learning_rate": 3.821233411397346e-06, "loss": 0.325, "step": 979 }, { "epoch": 0.05, "grad_norm": 0.8255250685515131, "learning_rate": 3.825136612021858e-06, "loss": 0.3362, "step": 980 }, { "epoch": 0.05, "grad_norm": 0.7561570577557267, "learning_rate": 3.82903981264637e-06, "loss": 0.3303, "step": 981 }, { "epoch": 0.05, "grad_norm": 0.8062603122203911, "learning_rate": 3.832943013270882e-06, "loss": 0.3207, "step": 982 }, { "epoch": 0.05, "grad_norm": 0.7811281631079525, "learning_rate": 3.836846213895394e-06, "loss": 0.3275, "step": 983 }, { "epoch": 0.05, "grad_norm": 0.8213392735769245, "learning_rate": 3.8407494145199065e-06, "loss": 0.3326, "step": 984 }, { "epoch": 0.05, "grad_norm": 0.8465246372299461, "learning_rate": 3.844652615144419e-06, "loss": 0.3321, "step": 985 }, { "epoch": 0.05, "grad_norm": 0.7825737985816031, "learning_rate": 3.848555815768931e-06, "loss": 0.3135, "step": 986 }, { "epoch": 0.05, "grad_norm": 0.7129246477564651, "learning_rate": 3.852459016393443e-06, "loss": 0.3202, "step": 987 }, { "epoch": 0.05, "grad_norm": 0.7411345770728354, "learning_rate": 3.856362217017955e-06, "loss": 0.3211, "step": 988 }, { "epoch": 0.05, "grad_norm": 0.7596579543552242, "learning_rate": 3.860265417642467e-06, "loss": 0.3122, "step": 989 }, { "epoch": 0.05, "grad_norm": 0.8013433597816115, "learning_rate": 3.864168618266979e-06, "loss": 0.3374, "step": 990 }, { "epoch": 0.05, "grad_norm": 0.9370076083302152, "learning_rate": 3.8680718188914915e-06, "loss": 0.3078, "step": 991 }, { "epoch": 0.05, "grad_norm": 0.7062535591471397, "learning_rate": 3.871975019516004e-06, "loss": 0.3173, "step": 992 }, { "epoch": 0.05, "grad_norm": 0.8423517333186136, "learning_rate": 3.875878220140516e-06, "loss": 0.3155, "step": 993 }, { "epoch": 0.05, "grad_norm": 0.7985684676633483, "learning_rate": 3.879781420765028e-06, "loss": 0.3269, "step": 994 }, { "epoch": 0.05, "grad_norm": 0.7678495362241716, "learning_rate": 3.883684621389539e-06, "loss": 0.3198, "step": 995 }, { "epoch": 0.05, "grad_norm": 0.7336264797959, "learning_rate": 3.887587822014051e-06, "loss": 0.3157, "step": 996 }, { "epoch": 0.05, "grad_norm": 0.7860970439217574, "learning_rate": 3.8914910226385635e-06, "loss": 0.3321, "step": 997 }, { "epoch": 0.05, "grad_norm": 0.74218741684949, "learning_rate": 3.895394223263076e-06, "loss": 0.3169, "step": 998 }, { "epoch": 0.05, "grad_norm": 0.7517542650869181, "learning_rate": 3.899297423887588e-06, "loss": 0.3153, "step": 999 }, { "epoch": 0.05, "grad_norm": 0.801465554671392, "learning_rate": 3.9032006245121e-06, "loss": 0.3269, "step": 1000 }, { "epoch": 0.05, "grad_norm": 0.7868527073239706, "learning_rate": 3.907103825136612e-06, "loss": 0.3373, "step": 1001 }, { "epoch": 0.05, "grad_norm": 0.804160864761319, "learning_rate": 3.911007025761124e-06, "loss": 0.3352, "step": 1002 }, { "epoch": 0.05, "grad_norm": 0.8426601477000829, "learning_rate": 3.914910226385636e-06, "loss": 0.3324, "step": 1003 }, { "epoch": 0.05, "grad_norm": 0.8920566829923665, "learning_rate": 3.9188134270101485e-06, "loss": 0.3408, "step": 1004 }, { "epoch": 0.05, "grad_norm": 0.8282588490276088, "learning_rate": 3.922716627634661e-06, "loss": 0.3501, "step": 1005 }, { "epoch": 0.05, "grad_norm": 0.7460949682365215, "learning_rate": 3.926619828259173e-06, "loss": 0.3336, "step": 1006 }, { "epoch": 0.05, "grad_norm": 0.8711609145031212, "learning_rate": 3.930523028883685e-06, "loss": 0.3329, "step": 1007 }, { "epoch": 0.05, "grad_norm": 0.8356857906134444, "learning_rate": 3.934426229508197e-06, "loss": 0.3377, "step": 1008 }, { "epoch": 0.05, "grad_norm": 0.7673043430044278, "learning_rate": 3.938329430132709e-06, "loss": 0.3258, "step": 1009 }, { "epoch": 0.05, "grad_norm": 0.7941049656040756, "learning_rate": 3.942232630757221e-06, "loss": 0.3052, "step": 1010 }, { "epoch": 0.05, "grad_norm": 0.7121748038719863, "learning_rate": 3.9461358313817335e-06, "loss": 0.3143, "step": 1011 }, { "epoch": 0.05, "grad_norm": 0.8463781646553441, "learning_rate": 3.950039032006246e-06, "loss": 0.3565, "step": 1012 }, { "epoch": 0.05, "grad_norm": 0.8528091987633496, "learning_rate": 3.953942232630758e-06, "loss": 0.3291, "step": 1013 }, { "epoch": 0.05, "grad_norm": 0.806708986779555, "learning_rate": 3.95784543325527e-06, "loss": 0.3347, "step": 1014 }, { "epoch": 0.05, "grad_norm": 0.8037005023622362, "learning_rate": 3.961748633879782e-06, "loss": 0.3296, "step": 1015 }, { "epoch": 0.05, "grad_norm": 0.791312271351172, "learning_rate": 3.965651834504294e-06, "loss": 0.3167, "step": 1016 }, { "epoch": 0.05, "grad_norm": 0.761304011671788, "learning_rate": 3.969555035128806e-06, "loss": 0.3223, "step": 1017 }, { "epoch": 0.05, "grad_norm": 0.832443552891366, "learning_rate": 3.973458235753318e-06, "loss": 0.3224, "step": 1018 }, { "epoch": 0.05, "grad_norm": 0.9423490749563538, "learning_rate": 3.97736143637783e-06, "loss": 0.35, "step": 1019 }, { "epoch": 0.05, "grad_norm": 0.7920397638065396, "learning_rate": 3.981264637002342e-06, "loss": 0.3383, "step": 1020 }, { "epoch": 0.05, "grad_norm": 0.7909693782698485, "learning_rate": 3.985167837626854e-06, "loss": 0.3125, "step": 1021 }, { "epoch": 0.05, "grad_norm": 0.8597376202893422, "learning_rate": 3.989071038251366e-06, "loss": 0.3253, "step": 1022 }, { "epoch": 0.05, "grad_norm": 0.8934167196082788, "learning_rate": 3.992974238875878e-06, "loss": 0.349, "step": 1023 }, { "epoch": 0.05, "grad_norm": 0.7741811787120841, "learning_rate": 3.9968774395003904e-06, "loss": 0.3209, "step": 1024 }, { "epoch": 0.05, "grad_norm": 0.7970655032408883, "learning_rate": 4.000780640124903e-06, "loss": 0.3151, "step": 1025 }, { "epoch": 0.05, "grad_norm": 0.7975903965146152, "learning_rate": 4.004683840749415e-06, "loss": 0.3496, "step": 1026 }, { "epoch": 0.05, "grad_norm": 0.7843244476130126, "learning_rate": 4.008587041373927e-06, "loss": 0.3257, "step": 1027 }, { "epoch": 0.05, "grad_norm": 0.7164307517223025, "learning_rate": 4.012490241998439e-06, "loss": 0.3077, "step": 1028 }, { "epoch": 0.05, "grad_norm": 0.7942687617519383, "learning_rate": 4.016393442622951e-06, "loss": 0.3485, "step": 1029 }, { "epoch": 0.05, "grad_norm": 0.8241373083463113, "learning_rate": 4.020296643247463e-06, "loss": 0.3363, "step": 1030 }, { "epoch": 0.05, "grad_norm": 0.7809439780952002, "learning_rate": 4.0241998438719754e-06, "loss": 0.3297, "step": 1031 }, { "epoch": 0.05, "grad_norm": 0.7004532899521256, "learning_rate": 4.0281030444964876e-06, "loss": 0.315, "step": 1032 }, { "epoch": 0.05, "grad_norm": 0.8171960784407708, "learning_rate": 4.032006245121e-06, "loss": 0.3275, "step": 1033 }, { "epoch": 0.05, "grad_norm": 0.7536411607022162, "learning_rate": 4.035909445745512e-06, "loss": 0.3152, "step": 1034 }, { "epoch": 0.05, "grad_norm": 0.8398439956197662, "learning_rate": 4.039812646370024e-06, "loss": 0.3197, "step": 1035 }, { "epoch": 0.05, "grad_norm": 0.7750455541647057, "learning_rate": 4.043715846994536e-06, "loss": 0.3149, "step": 1036 }, { "epoch": 0.05, "grad_norm": 0.8192760096960554, "learning_rate": 4.047619047619048e-06, "loss": 0.3153, "step": 1037 }, { "epoch": 0.05, "grad_norm": 0.7772789659363601, "learning_rate": 4.05152224824356e-06, "loss": 0.337, "step": 1038 }, { "epoch": 0.05, "grad_norm": 0.794129215020833, "learning_rate": 4.0554254488680726e-06, "loss": 0.3133, "step": 1039 }, { "epoch": 0.05, "grad_norm": 0.7430636574188977, "learning_rate": 4.059328649492585e-06, "loss": 0.3167, "step": 1040 }, { "epoch": 0.05, "grad_norm": 0.7258586993887473, "learning_rate": 4.063231850117097e-06, "loss": 0.3039, "step": 1041 }, { "epoch": 0.05, "grad_norm": 0.7142964857046481, "learning_rate": 4.067135050741608e-06, "loss": 0.3163, "step": 1042 }, { "epoch": 0.05, "grad_norm": 0.8554870911499838, "learning_rate": 4.07103825136612e-06, "loss": 0.3549, "step": 1043 }, { "epoch": 0.05, "grad_norm": 0.8095567330424142, "learning_rate": 4.074941451990632e-06, "loss": 0.3258, "step": 1044 }, { "epoch": 0.05, "grad_norm": 0.7725747186393114, "learning_rate": 4.0788446526151446e-06, "loss": 0.3406, "step": 1045 }, { "epoch": 0.05, "grad_norm": 0.8177815876841619, "learning_rate": 4.082747853239657e-06, "loss": 0.3362, "step": 1046 }, { "epoch": 0.05, "grad_norm": 0.7846846849384338, "learning_rate": 4.086651053864169e-06, "loss": 0.3368, "step": 1047 }, { "epoch": 0.05, "grad_norm": 0.7329859164097651, "learning_rate": 4.090554254488681e-06, "loss": 0.3149, "step": 1048 }, { "epoch": 0.05, "grad_norm": 0.7518828854583361, "learning_rate": 4.094457455113193e-06, "loss": 0.3234, "step": 1049 }, { "epoch": 0.05, "grad_norm": 0.7570870948356919, "learning_rate": 4.098360655737705e-06, "loss": 0.3262, "step": 1050 }, { "epoch": 0.05, "grad_norm": 0.7815048592529774, "learning_rate": 4.102263856362217e-06, "loss": 0.3462, "step": 1051 }, { "epoch": 0.05, "grad_norm": 0.7622841807644124, "learning_rate": 4.1061670569867295e-06, "loss": 0.3284, "step": 1052 }, { "epoch": 0.05, "grad_norm": 0.7871223655339628, "learning_rate": 4.110070257611242e-06, "loss": 0.3286, "step": 1053 }, { "epoch": 0.05, "grad_norm": 0.7103371177686691, "learning_rate": 4.113973458235754e-06, "loss": 0.3394, "step": 1054 }, { "epoch": 0.05, "grad_norm": 0.7378037838581551, "learning_rate": 4.117876658860266e-06, "loss": 0.3226, "step": 1055 }, { "epoch": 0.05, "grad_norm": 0.7557597715521295, "learning_rate": 4.121779859484778e-06, "loss": 0.3394, "step": 1056 }, { "epoch": 0.05, "grad_norm": 0.814048254433811, "learning_rate": 4.12568306010929e-06, "loss": 0.3265, "step": 1057 }, { "epoch": 0.05, "grad_norm": 0.7870604106780377, "learning_rate": 4.129586260733802e-06, "loss": 0.3336, "step": 1058 }, { "epoch": 0.05, "grad_norm": 0.8232699770079824, "learning_rate": 4.1334894613583145e-06, "loss": 0.3473, "step": 1059 }, { "epoch": 0.05, "grad_norm": 0.7713489596151366, "learning_rate": 4.137392661982827e-06, "loss": 0.3374, "step": 1060 }, { "epoch": 0.05, "grad_norm": 0.7446546279052748, "learning_rate": 4.141295862607339e-06, "loss": 0.3219, "step": 1061 }, { "epoch": 0.05, "grad_norm": 0.7613819279560381, "learning_rate": 4.145199063231851e-06, "loss": 0.3157, "step": 1062 }, { "epoch": 0.05, "grad_norm": 0.8538321996694695, "learning_rate": 4.149102263856363e-06, "loss": 0.329, "step": 1063 }, { "epoch": 0.05, "grad_norm": 0.7409984387736864, "learning_rate": 4.153005464480875e-06, "loss": 0.3156, "step": 1064 }, { "epoch": 0.05, "grad_norm": 0.7911431659611726, "learning_rate": 4.1569086651053865e-06, "loss": 0.3419, "step": 1065 }, { "epoch": 0.05, "grad_norm": 0.74657095242217, "learning_rate": 4.160811865729899e-06, "loss": 0.3367, "step": 1066 }, { "epoch": 0.05, "grad_norm": 0.765269208802623, "learning_rate": 4.164715066354411e-06, "loss": 0.3194, "step": 1067 }, { "epoch": 0.05, "grad_norm": 0.8043592990405705, "learning_rate": 4.168618266978923e-06, "loss": 0.3113, "step": 1068 }, { "epoch": 0.05, "grad_norm": 0.8085620855377833, "learning_rate": 4.172521467603435e-06, "loss": 0.3647, "step": 1069 }, { "epoch": 0.05, "grad_norm": 0.7990228122089519, "learning_rate": 4.176424668227947e-06, "loss": 0.3182, "step": 1070 }, { "epoch": 0.05, "grad_norm": 0.9040155254741561, "learning_rate": 4.180327868852459e-06, "loss": 0.3515, "step": 1071 }, { "epoch": 0.05, "grad_norm": 0.7899225631269892, "learning_rate": 4.1842310694769715e-06, "loss": 0.3473, "step": 1072 }, { "epoch": 0.05, "grad_norm": 0.7690416713588238, "learning_rate": 4.188134270101484e-06, "loss": 0.3238, "step": 1073 }, { "epoch": 0.05, "grad_norm": 0.7630156058147718, "learning_rate": 4.192037470725996e-06, "loss": 0.307, "step": 1074 }, { "epoch": 0.05, "grad_norm": 0.8625253080202074, "learning_rate": 4.195940671350508e-06, "loss": 0.3093, "step": 1075 }, { "epoch": 0.05, "grad_norm": 0.8129731923198934, "learning_rate": 4.19984387197502e-06, "loss": 0.3329, "step": 1076 }, { "epoch": 0.05, "grad_norm": 0.7934095658317847, "learning_rate": 4.203747072599532e-06, "loss": 0.3389, "step": 1077 }, { "epoch": 0.05, "grad_norm": 0.7339282651268476, "learning_rate": 4.207650273224044e-06, "loss": 0.3135, "step": 1078 }, { "epoch": 0.05, "grad_norm": 0.7945362792054323, "learning_rate": 4.2115534738485565e-06, "loss": 0.3121, "step": 1079 }, { "epoch": 0.05, "grad_norm": 0.7785411781814795, "learning_rate": 4.215456674473069e-06, "loss": 0.3157, "step": 1080 }, { "epoch": 0.05, "grad_norm": 0.7245974787821303, "learning_rate": 4.219359875097581e-06, "loss": 0.3423, "step": 1081 }, { "epoch": 0.05, "grad_norm": 0.8041426868294155, "learning_rate": 4.223263075722093e-06, "loss": 0.3332, "step": 1082 }, { "epoch": 0.05, "grad_norm": 0.7626961137264079, "learning_rate": 4.227166276346605e-06, "loss": 0.329, "step": 1083 }, { "epoch": 0.05, "grad_norm": 0.7399745734037926, "learning_rate": 4.231069476971117e-06, "loss": 0.3294, "step": 1084 }, { "epoch": 0.05, "grad_norm": 0.9757282630972325, "learning_rate": 4.234972677595629e-06, "loss": 0.3196, "step": 1085 }, { "epoch": 0.05, "grad_norm": 0.7709814845647732, "learning_rate": 4.2388758782201415e-06, "loss": 0.3601, "step": 1086 }, { "epoch": 0.05, "grad_norm": 0.7085720097502731, "learning_rate": 4.242779078844654e-06, "loss": 0.3196, "step": 1087 }, { "epoch": 0.05, "grad_norm": 0.7276270226935244, "learning_rate": 4.246682279469166e-06, "loss": 0.3057, "step": 1088 }, { "epoch": 0.05, "grad_norm": 0.7154431407559079, "learning_rate": 4.250585480093677e-06, "loss": 0.3216, "step": 1089 }, { "epoch": 0.05, "grad_norm": 0.8219937295500883, "learning_rate": 4.254488680718189e-06, "loss": 0.3393, "step": 1090 }, { "epoch": 0.05, "grad_norm": 0.7696289547206345, "learning_rate": 4.258391881342701e-06, "loss": 0.3221, "step": 1091 }, { "epoch": 0.05, "grad_norm": 0.8078598793710751, "learning_rate": 4.2622950819672135e-06, "loss": 0.352, "step": 1092 }, { "epoch": 0.05, "grad_norm": 0.7878016353321163, "learning_rate": 4.266198282591726e-06, "loss": 0.3238, "step": 1093 }, { "epoch": 0.05, "grad_norm": 0.8214492840177962, "learning_rate": 4.270101483216238e-06, "loss": 0.3408, "step": 1094 }, { "epoch": 0.05, "grad_norm": 0.8315196647745042, "learning_rate": 4.27400468384075e-06, "loss": 0.3353, "step": 1095 }, { "epoch": 0.05, "grad_norm": 0.7505995354116095, "learning_rate": 4.277907884465262e-06, "loss": 0.3593, "step": 1096 }, { "epoch": 0.05, "grad_norm": 0.8479225598990868, "learning_rate": 4.281811085089773e-06, "loss": 0.352, "step": 1097 }, { "epoch": 0.05, "grad_norm": 0.7807060160062745, "learning_rate": 4.2857142857142855e-06, "loss": 0.3185, "step": 1098 }, { "epoch": 0.05, "grad_norm": 0.759009079029957, "learning_rate": 4.289617486338798e-06, "loss": 0.355, "step": 1099 }, { "epoch": 0.05, "grad_norm": 0.8196708590415941, "learning_rate": 4.29352068696331e-06, "loss": 0.3389, "step": 1100 }, { "epoch": 0.05, "grad_norm": 0.7949029707301941, "learning_rate": 4.297423887587822e-06, "loss": 0.3365, "step": 1101 }, { "epoch": 0.05, "grad_norm": 0.7775805454162443, "learning_rate": 4.301327088212334e-06, "loss": 0.3392, "step": 1102 }, { "epoch": 0.05, "grad_norm": 0.7482196592712799, "learning_rate": 4.305230288836846e-06, "loss": 0.3217, "step": 1103 }, { "epoch": 0.05, "grad_norm": 0.8059045956958294, "learning_rate": 4.309133489461358e-06, "loss": 0.3463, "step": 1104 }, { "epoch": 0.05, "grad_norm": 0.8128933000220175, "learning_rate": 4.3130366900858704e-06, "loss": 0.3233, "step": 1105 }, { "epoch": 0.05, "grad_norm": 0.7737591374993412, "learning_rate": 4.316939890710383e-06, "loss": 0.3373, "step": 1106 }, { "epoch": 0.05, "grad_norm": 0.8648982210545426, "learning_rate": 4.320843091334895e-06, "loss": 0.3066, "step": 1107 }, { "epoch": 0.05, "grad_norm": 0.8381258266262805, "learning_rate": 4.324746291959407e-06, "loss": 0.337, "step": 1108 }, { "epoch": 0.05, "grad_norm": 0.7819822885797783, "learning_rate": 4.328649492583919e-06, "loss": 0.3352, "step": 1109 }, { "epoch": 0.05, "grad_norm": 0.7930983079225373, "learning_rate": 4.332552693208431e-06, "loss": 0.3258, "step": 1110 }, { "epoch": 0.05, "grad_norm": 0.8068786105830926, "learning_rate": 4.336455893832943e-06, "loss": 0.3192, "step": 1111 }, { "epoch": 0.05, "grad_norm": 0.8421270210500106, "learning_rate": 4.3403590944574554e-06, "loss": 0.333, "step": 1112 }, { "epoch": 0.05, "grad_norm": 0.8311501988164857, "learning_rate": 4.3442622950819676e-06, "loss": 0.3459, "step": 1113 }, { "epoch": 0.05, "grad_norm": 0.8733144807634394, "learning_rate": 4.34816549570648e-06, "loss": 0.3475, "step": 1114 }, { "epoch": 0.05, "grad_norm": 0.7333203781218812, "learning_rate": 4.352068696330992e-06, "loss": 0.3278, "step": 1115 }, { "epoch": 0.05, "grad_norm": 0.7907541011156831, "learning_rate": 4.355971896955504e-06, "loss": 0.3332, "step": 1116 }, { "epoch": 0.05, "grad_norm": 0.870712017894598, "learning_rate": 4.359875097580016e-06, "loss": 0.3377, "step": 1117 }, { "epoch": 0.05, "grad_norm": 0.75078089759963, "learning_rate": 4.363778298204528e-06, "loss": 0.3123, "step": 1118 }, { "epoch": 0.05, "grad_norm": 0.7959570198877866, "learning_rate": 4.36768149882904e-06, "loss": 0.3587, "step": 1119 }, { "epoch": 0.05, "grad_norm": 0.8199562253507233, "learning_rate": 4.371584699453552e-06, "loss": 0.3457, "step": 1120 }, { "epoch": 0.05, "grad_norm": 0.7539389984923776, "learning_rate": 4.375487900078064e-06, "loss": 0.3314, "step": 1121 }, { "epoch": 0.05, "grad_norm": 0.7774541586558678, "learning_rate": 4.379391100702576e-06, "loss": 0.3205, "step": 1122 }, { "epoch": 0.05, "grad_norm": 0.7158416857315977, "learning_rate": 4.383294301327088e-06, "loss": 0.3128, "step": 1123 }, { "epoch": 0.05, "grad_norm": 0.8019237726485963, "learning_rate": 4.3871975019516e-06, "loss": 0.3439, "step": 1124 }, { "epoch": 0.05, "grad_norm": 0.7945134625901896, "learning_rate": 4.391100702576112e-06, "loss": 0.3417, "step": 1125 }, { "epoch": 0.05, "grad_norm": 0.8030387119487601, "learning_rate": 4.3950039032006245e-06, "loss": 0.326, "step": 1126 }, { "epoch": 0.05, "grad_norm": 0.7608939249537782, "learning_rate": 4.398907103825137e-06, "loss": 0.3106, "step": 1127 }, { "epoch": 0.05, "grad_norm": 0.843899363072372, "learning_rate": 4.402810304449649e-06, "loss": 0.3412, "step": 1128 }, { "epoch": 0.05, "grad_norm": 0.7520682965182158, "learning_rate": 4.406713505074161e-06, "loss": 0.3168, "step": 1129 }, { "epoch": 0.05, "grad_norm": 0.8160462118117454, "learning_rate": 4.410616705698673e-06, "loss": 0.328, "step": 1130 }, { "epoch": 0.05, "grad_norm": 0.7394665604249703, "learning_rate": 4.414519906323185e-06, "loss": 0.3201, "step": 1131 }, { "epoch": 0.05, "grad_norm": 0.7723673763030374, "learning_rate": 4.418423106947697e-06, "loss": 0.3244, "step": 1132 }, { "epoch": 0.05, "grad_norm": 0.717333581240294, "learning_rate": 4.4223263075722095e-06, "loss": 0.3157, "step": 1133 }, { "epoch": 0.05, "grad_norm": 0.7536719020079228, "learning_rate": 4.426229508196722e-06, "loss": 0.3232, "step": 1134 }, { "epoch": 0.05, "grad_norm": 0.7797077716990461, "learning_rate": 4.430132708821234e-06, "loss": 0.3314, "step": 1135 }, { "epoch": 0.05, "grad_norm": 0.7879258123244073, "learning_rate": 4.434035909445746e-06, "loss": 0.3317, "step": 1136 }, { "epoch": 0.05, "grad_norm": 0.7418047923204022, "learning_rate": 4.437939110070258e-06, "loss": 0.3287, "step": 1137 }, { "epoch": 0.05, "grad_norm": 0.7857658507133573, "learning_rate": 4.44184231069477e-06, "loss": 0.3122, "step": 1138 }, { "epoch": 0.05, "grad_norm": 0.773080902450866, "learning_rate": 4.445745511319282e-06, "loss": 0.306, "step": 1139 }, { "epoch": 0.05, "grad_norm": 0.7521305802775401, "learning_rate": 4.4496487119437945e-06, "loss": 0.3356, "step": 1140 }, { "epoch": 0.05, "grad_norm": 0.7887897219408405, "learning_rate": 4.453551912568307e-06, "loss": 0.3203, "step": 1141 }, { "epoch": 0.05, "grad_norm": 0.8836903646248546, "learning_rate": 4.457455113192819e-06, "loss": 0.3242, "step": 1142 }, { "epoch": 0.05, "grad_norm": 0.7574615135962971, "learning_rate": 4.461358313817331e-06, "loss": 0.3486, "step": 1143 }, { "epoch": 0.05, "grad_norm": 0.7813213846959247, "learning_rate": 4.465261514441842e-06, "loss": 0.3189, "step": 1144 }, { "epoch": 0.05, "grad_norm": 0.7729664775573555, "learning_rate": 4.469164715066354e-06, "loss": 0.3267, "step": 1145 }, { "epoch": 0.05, "grad_norm": 0.8018999834961694, "learning_rate": 4.4730679156908665e-06, "loss": 0.3285, "step": 1146 }, { "epoch": 0.05, "grad_norm": 0.7434686351484441, "learning_rate": 4.476971116315379e-06, "loss": 0.318, "step": 1147 }, { "epoch": 0.05, "grad_norm": 0.7505204664861658, "learning_rate": 4.480874316939891e-06, "loss": 0.3182, "step": 1148 }, { "epoch": 0.05, "grad_norm": 0.8528481712745205, "learning_rate": 4.484777517564403e-06, "loss": 0.3241, "step": 1149 }, { "epoch": 0.05, "grad_norm": 0.7722934197072976, "learning_rate": 4.488680718188915e-06, "loss": 0.3472, "step": 1150 }, { "epoch": 0.05, "grad_norm": 0.8744420640434646, "learning_rate": 4.492583918813427e-06, "loss": 0.3467, "step": 1151 }, { "epoch": 0.05, "grad_norm": 0.7834143405648434, "learning_rate": 4.496487119437939e-06, "loss": 0.3375, "step": 1152 }, { "epoch": 0.05, "grad_norm": 0.8180738040586236, "learning_rate": 4.5003903200624515e-06, "loss": 0.3301, "step": 1153 }, { "epoch": 0.05, "grad_norm": 0.8069896316404841, "learning_rate": 4.504293520686964e-06, "loss": 0.3672, "step": 1154 }, { "epoch": 0.05, "grad_norm": 0.777690308577652, "learning_rate": 4.508196721311476e-06, "loss": 0.3416, "step": 1155 }, { "epoch": 0.05, "grad_norm": 0.8071548121534299, "learning_rate": 4.512099921935988e-06, "loss": 0.3027, "step": 1156 }, { "epoch": 0.05, "grad_norm": 0.7825094984345531, "learning_rate": 4.5160031225605e-06, "loss": 0.3328, "step": 1157 }, { "epoch": 0.05, "grad_norm": 0.8247099962264008, "learning_rate": 4.519906323185012e-06, "loss": 0.3487, "step": 1158 }, { "epoch": 0.05, "grad_norm": 0.7914229657813877, "learning_rate": 4.523809523809524e-06, "loss": 0.3245, "step": 1159 }, { "epoch": 0.05, "grad_norm": 0.8319241680100073, "learning_rate": 4.5277127244340365e-06, "loss": 0.3341, "step": 1160 }, { "epoch": 0.05, "grad_norm": 0.7321441870462392, "learning_rate": 4.531615925058549e-06, "loss": 0.309, "step": 1161 }, { "epoch": 0.05, "grad_norm": 0.7903653449296587, "learning_rate": 4.535519125683061e-06, "loss": 0.322, "step": 1162 }, { "epoch": 0.05, "grad_norm": 0.7464466848416672, "learning_rate": 4.539422326307573e-06, "loss": 0.32, "step": 1163 }, { "epoch": 0.05, "grad_norm": 0.7513299849162668, "learning_rate": 4.543325526932085e-06, "loss": 0.3301, "step": 1164 }, { "epoch": 0.05, "grad_norm": 0.7455689059751355, "learning_rate": 4.547228727556597e-06, "loss": 0.2991, "step": 1165 }, { "epoch": 0.05, "grad_norm": 0.7340372003213809, "learning_rate": 4.551131928181109e-06, "loss": 0.3245, "step": 1166 }, { "epoch": 0.05, "grad_norm": 0.801099243879809, "learning_rate": 4.5550351288056215e-06, "loss": 0.3296, "step": 1167 }, { "epoch": 0.05, "grad_norm": 0.8091049232703141, "learning_rate": 4.558938329430133e-06, "loss": 0.3268, "step": 1168 }, { "epoch": 0.05, "grad_norm": 0.725198942359715, "learning_rate": 4.562841530054645e-06, "loss": 0.3347, "step": 1169 }, { "epoch": 0.05, "grad_norm": 0.8091911176240195, "learning_rate": 4.566744730679157e-06, "loss": 0.3184, "step": 1170 }, { "epoch": 0.05, "grad_norm": 0.7410087196063481, "learning_rate": 4.570647931303669e-06, "loss": 0.3367, "step": 1171 }, { "epoch": 0.05, "grad_norm": 0.8058079757133377, "learning_rate": 4.574551131928181e-06, "loss": 0.3248, "step": 1172 }, { "epoch": 0.05, "grad_norm": 0.8048682224120459, "learning_rate": 4.5784543325526935e-06, "loss": 0.3281, "step": 1173 }, { "epoch": 0.05, "grad_norm": 0.784922943494407, "learning_rate": 4.582357533177206e-06, "loss": 0.3378, "step": 1174 }, { "epoch": 0.06, "grad_norm": 0.7875099952505576, "learning_rate": 4.586260733801718e-06, "loss": 0.3485, "step": 1175 }, { "epoch": 0.06, "grad_norm": 0.716861225618717, "learning_rate": 4.59016393442623e-06, "loss": 0.3114, "step": 1176 }, { "epoch": 0.06, "grad_norm": 0.780846901801135, "learning_rate": 4.594067135050742e-06, "loss": 0.313, "step": 1177 }, { "epoch": 0.06, "grad_norm": 0.7577791047890048, "learning_rate": 4.597970335675254e-06, "loss": 0.3397, "step": 1178 }, { "epoch": 0.06, "grad_norm": 0.7690472310672557, "learning_rate": 4.601873536299766e-06, "loss": 0.3165, "step": 1179 }, { "epoch": 0.06, "grad_norm": 0.7733795422584592, "learning_rate": 4.6057767369242784e-06, "loss": 0.3417, "step": 1180 }, { "epoch": 0.06, "grad_norm": 0.8392925510948042, "learning_rate": 4.609679937548791e-06, "loss": 0.3424, "step": 1181 }, { "epoch": 0.06, "grad_norm": 0.7111725081416846, "learning_rate": 4.613583138173303e-06, "loss": 0.296, "step": 1182 }, { "epoch": 0.06, "grad_norm": 0.8513619451765813, "learning_rate": 4.617486338797815e-06, "loss": 0.3356, "step": 1183 }, { "epoch": 0.06, "grad_norm": 0.7875945959998719, "learning_rate": 4.621389539422327e-06, "loss": 0.3311, "step": 1184 }, { "epoch": 0.06, "grad_norm": 0.7669350867947335, "learning_rate": 4.625292740046839e-06, "loss": 0.3367, "step": 1185 }, { "epoch": 0.06, "grad_norm": 0.8337841795115746, "learning_rate": 4.629195940671351e-06, "loss": 0.3224, "step": 1186 }, { "epoch": 0.06, "grad_norm": 0.7717487122026646, "learning_rate": 4.6330991412958634e-06, "loss": 0.3048, "step": 1187 }, { "epoch": 0.06, "grad_norm": 0.7007504837538838, "learning_rate": 4.6370023419203756e-06, "loss": 0.3107, "step": 1188 }, { "epoch": 0.06, "grad_norm": 0.7785296339400172, "learning_rate": 4.640905542544888e-06, "loss": 0.3164, "step": 1189 }, { "epoch": 0.06, "grad_norm": 0.7227534369783942, "learning_rate": 4.6448087431694e-06, "loss": 0.3075, "step": 1190 }, { "epoch": 0.06, "grad_norm": 0.7524646935432423, "learning_rate": 4.648711943793911e-06, "loss": 0.3046, "step": 1191 }, { "epoch": 0.06, "grad_norm": 0.8396265499663016, "learning_rate": 4.652615144418423e-06, "loss": 0.3391, "step": 1192 }, { "epoch": 0.06, "grad_norm": 0.751680387631082, "learning_rate": 4.6565183450429354e-06, "loss": 0.3247, "step": 1193 }, { "epoch": 0.06, "grad_norm": 0.7541148833501374, "learning_rate": 4.6604215456674476e-06, "loss": 0.3177, "step": 1194 }, { "epoch": 0.06, "grad_norm": 0.768031674377618, "learning_rate": 4.66432474629196e-06, "loss": 0.3247, "step": 1195 }, { "epoch": 0.06, "grad_norm": 0.865291775756713, "learning_rate": 4.668227946916472e-06, "loss": 0.3519, "step": 1196 }, { "epoch": 0.06, "grad_norm": 0.78324554839197, "learning_rate": 4.672131147540984e-06, "loss": 0.3371, "step": 1197 }, { "epoch": 0.06, "grad_norm": 0.7515151649070249, "learning_rate": 4.676034348165496e-06, "loss": 0.3103, "step": 1198 }, { "epoch": 0.06, "grad_norm": 0.7658841521811186, "learning_rate": 4.679937548790008e-06, "loss": 0.3282, "step": 1199 }, { "epoch": 0.06, "grad_norm": 0.7444163221472007, "learning_rate": 4.68384074941452e-06, "loss": 0.3084, "step": 1200 }, { "epoch": 0.06, "grad_norm": 0.7900746582451963, "learning_rate": 4.6877439500390326e-06, "loss": 0.3335, "step": 1201 }, { "epoch": 0.06, "grad_norm": 0.7772416466347553, "learning_rate": 4.691647150663545e-06, "loss": 0.3271, "step": 1202 }, { "epoch": 0.06, "grad_norm": 0.7580981679055885, "learning_rate": 4.695550351288057e-06, "loss": 0.3307, "step": 1203 }, { "epoch": 0.06, "grad_norm": 0.8473319565326328, "learning_rate": 4.699453551912569e-06, "loss": 0.3419, "step": 1204 }, { "epoch": 0.06, "grad_norm": 0.7564314492569815, "learning_rate": 4.703356752537081e-06, "loss": 0.3253, "step": 1205 }, { "epoch": 0.06, "grad_norm": 0.717025328451318, "learning_rate": 4.707259953161593e-06, "loss": 0.33, "step": 1206 }, { "epoch": 0.06, "grad_norm": 0.7818112513165272, "learning_rate": 4.711163153786105e-06, "loss": 0.335, "step": 1207 }, { "epoch": 0.06, "grad_norm": 0.8781303663180692, "learning_rate": 4.7150663544106175e-06, "loss": 0.3339, "step": 1208 }, { "epoch": 0.06, "grad_norm": 0.8315431405979283, "learning_rate": 4.71896955503513e-06, "loss": 0.3444, "step": 1209 }, { "epoch": 0.06, "grad_norm": 0.8189351184214387, "learning_rate": 4.722872755659642e-06, "loss": 0.3406, "step": 1210 }, { "epoch": 0.06, "grad_norm": 0.735869229568121, "learning_rate": 4.726775956284154e-06, "loss": 0.3235, "step": 1211 }, { "epoch": 0.06, "grad_norm": 0.8294830764067048, "learning_rate": 4.730679156908666e-06, "loss": 0.3367, "step": 1212 }, { "epoch": 0.06, "grad_norm": 0.8354841612956521, "learning_rate": 4.734582357533178e-06, "loss": 0.3017, "step": 1213 }, { "epoch": 0.06, "grad_norm": 0.8227247343353387, "learning_rate": 4.73848555815769e-06, "loss": 0.323, "step": 1214 }, { "epoch": 0.06, "grad_norm": 0.824587040521365, "learning_rate": 4.742388758782202e-06, "loss": 0.3435, "step": 1215 }, { "epoch": 0.06, "grad_norm": 0.7341508886043664, "learning_rate": 4.746291959406714e-06, "loss": 0.3226, "step": 1216 }, { "epoch": 0.06, "grad_norm": 0.8777944438379092, "learning_rate": 4.750195160031226e-06, "loss": 0.3263, "step": 1217 }, { "epoch": 0.06, "grad_norm": 0.7584736045922514, "learning_rate": 4.754098360655738e-06, "loss": 0.3137, "step": 1218 }, { "epoch": 0.06, "grad_norm": 0.8176468832771439, "learning_rate": 4.75800156128025e-06, "loss": 0.3098, "step": 1219 }, { "epoch": 0.06, "grad_norm": 0.9508349247770249, "learning_rate": 4.761904761904762e-06, "loss": 0.3522, "step": 1220 }, { "epoch": 0.06, "grad_norm": 0.7255629930257858, "learning_rate": 4.7658079625292745e-06, "loss": 0.3204, "step": 1221 }, { "epoch": 0.06, "grad_norm": 0.7177808258446199, "learning_rate": 4.769711163153787e-06, "loss": 0.3153, "step": 1222 }, { "epoch": 0.06, "grad_norm": 0.8049481332762758, "learning_rate": 4.773614363778298e-06, "loss": 0.3197, "step": 1223 }, { "epoch": 0.06, "grad_norm": 0.8104592910423961, "learning_rate": 4.77751756440281e-06, "loss": 0.3498, "step": 1224 }, { "epoch": 0.06, "grad_norm": 0.8658183820118093, "learning_rate": 4.781420765027322e-06, "loss": 0.3196, "step": 1225 }, { "epoch": 0.06, "grad_norm": 0.8302675732882627, "learning_rate": 4.785323965651834e-06, "loss": 0.3396, "step": 1226 }, { "epoch": 0.06, "grad_norm": 0.7561091699667413, "learning_rate": 4.7892271662763465e-06, "loss": 0.3239, "step": 1227 }, { "epoch": 0.06, "grad_norm": 0.7889787861290779, "learning_rate": 4.793130366900859e-06, "loss": 0.3338, "step": 1228 }, { "epoch": 0.06, "grad_norm": 0.7059046769531109, "learning_rate": 4.797033567525371e-06, "loss": 0.3045, "step": 1229 }, { "epoch": 0.06, "grad_norm": 0.7423340821227207, "learning_rate": 4.800936768149883e-06, "loss": 0.3386, "step": 1230 }, { "epoch": 0.06, "grad_norm": 0.8360795460167664, "learning_rate": 4.804839968774395e-06, "loss": 0.3359, "step": 1231 }, { "epoch": 0.06, "grad_norm": 0.8222573732108486, "learning_rate": 4.808743169398907e-06, "loss": 0.3371, "step": 1232 }, { "epoch": 0.06, "grad_norm": 0.8485394399110076, "learning_rate": 4.812646370023419e-06, "loss": 0.3631, "step": 1233 }, { "epoch": 0.06, "grad_norm": 0.794027479491186, "learning_rate": 4.8165495706479315e-06, "loss": 0.3463, "step": 1234 }, { "epoch": 0.06, "grad_norm": 0.8185214444691623, "learning_rate": 4.820452771272444e-06, "loss": 0.3337, "step": 1235 }, { "epoch": 0.06, "grad_norm": 0.8221224991308261, "learning_rate": 4.824355971896956e-06, "loss": 0.3285, "step": 1236 }, { "epoch": 0.06, "grad_norm": 0.7919473301914005, "learning_rate": 4.828259172521468e-06, "loss": 0.346, "step": 1237 }, { "epoch": 0.06, "grad_norm": 0.7244893762319451, "learning_rate": 4.83216237314598e-06, "loss": 0.3118, "step": 1238 }, { "epoch": 0.06, "grad_norm": 0.8396373238994559, "learning_rate": 4.836065573770492e-06, "loss": 0.3577, "step": 1239 }, { "epoch": 0.06, "grad_norm": 0.9020800742116587, "learning_rate": 4.839968774395004e-06, "loss": 0.3102, "step": 1240 }, { "epoch": 0.06, "grad_norm": 0.7819052930665716, "learning_rate": 4.8438719750195165e-06, "loss": 0.32, "step": 1241 }, { "epoch": 0.06, "grad_norm": 0.7497677853256657, "learning_rate": 4.847775175644029e-06, "loss": 0.3151, "step": 1242 }, { "epoch": 0.06, "grad_norm": 0.7829821859204853, "learning_rate": 4.851678376268541e-06, "loss": 0.3434, "step": 1243 }, { "epoch": 0.06, "grad_norm": 0.8186020024751871, "learning_rate": 4.855581576893053e-06, "loss": 0.3477, "step": 1244 }, { "epoch": 0.06, "grad_norm": 0.7362084513663902, "learning_rate": 4.859484777517565e-06, "loss": 0.3035, "step": 1245 }, { "epoch": 0.06, "grad_norm": 0.8505777937565062, "learning_rate": 4.863387978142076e-06, "loss": 0.3177, "step": 1246 }, { "epoch": 0.06, "grad_norm": 0.8145690492577363, "learning_rate": 4.8672911787665885e-06, "loss": 0.3428, "step": 1247 }, { "epoch": 0.06, "grad_norm": 0.7818545959585905, "learning_rate": 4.871194379391101e-06, "loss": 0.3122, "step": 1248 }, { "epoch": 0.06, "grad_norm": 0.7960478442547329, "learning_rate": 4.875097580015613e-06, "loss": 0.3269, "step": 1249 }, { "epoch": 0.06, "grad_norm": 0.7779048211833903, "learning_rate": 4.879000780640125e-06, "loss": 0.3283, "step": 1250 }, { "epoch": 0.06, "grad_norm": 0.8249720164542179, "learning_rate": 4.882903981264637e-06, "loss": 0.3307, "step": 1251 }, { "epoch": 0.06, "grad_norm": 0.7943877729716782, "learning_rate": 4.886807181889149e-06, "loss": 0.3079, "step": 1252 }, { "epoch": 0.06, "grad_norm": 0.8034055560865014, "learning_rate": 4.890710382513661e-06, "loss": 0.321, "step": 1253 }, { "epoch": 0.06, "grad_norm": 0.8410481455135758, "learning_rate": 4.8946135831381735e-06, "loss": 0.3193, "step": 1254 }, { "epoch": 0.06, "grad_norm": 0.8264291944888741, "learning_rate": 4.898516783762686e-06, "loss": 0.3126, "step": 1255 }, { "epoch": 0.06, "grad_norm": 0.8531688397120727, "learning_rate": 4.902419984387198e-06, "loss": 0.3136, "step": 1256 }, { "epoch": 0.06, "grad_norm": 0.7685841005898182, "learning_rate": 4.90632318501171e-06, "loss": 0.3345, "step": 1257 }, { "epoch": 0.06, "grad_norm": 0.7830586414062322, "learning_rate": 4.910226385636222e-06, "loss": 0.3315, "step": 1258 }, { "epoch": 0.06, "grad_norm": 0.7813446500308224, "learning_rate": 4.914129586260734e-06, "loss": 0.3218, "step": 1259 }, { "epoch": 0.06, "grad_norm": 0.7735809271337962, "learning_rate": 4.918032786885246e-06, "loss": 0.3165, "step": 1260 }, { "epoch": 0.06, "grad_norm": 0.7361258705235983, "learning_rate": 4.9219359875097584e-06, "loss": 0.3013, "step": 1261 }, { "epoch": 0.06, "grad_norm": 0.7728615677939132, "learning_rate": 4.925839188134271e-06, "loss": 0.3308, "step": 1262 }, { "epoch": 0.06, "grad_norm": 0.7756592390849794, "learning_rate": 4.929742388758783e-06, "loss": 0.3228, "step": 1263 }, { "epoch": 0.06, "grad_norm": 0.771770736441463, "learning_rate": 4.933645589383295e-06, "loss": 0.3148, "step": 1264 }, { "epoch": 0.06, "grad_norm": 0.7056656979199595, "learning_rate": 4.937548790007807e-06, "loss": 0.3077, "step": 1265 }, { "epoch": 0.06, "grad_norm": 0.7556295649413937, "learning_rate": 4.941451990632319e-06, "loss": 0.3192, "step": 1266 }, { "epoch": 0.06, "grad_norm": 0.7550118187784288, "learning_rate": 4.945355191256831e-06, "loss": 0.3303, "step": 1267 }, { "epoch": 0.06, "grad_norm": 0.7752972144288165, "learning_rate": 4.9492583918813434e-06, "loss": 0.3488, "step": 1268 }, { "epoch": 0.06, "grad_norm": 0.7831803680685049, "learning_rate": 4.9531615925058556e-06, "loss": 0.3293, "step": 1269 }, { "epoch": 0.06, "grad_norm": 0.748514262293179, "learning_rate": 4.957064793130367e-06, "loss": 0.3254, "step": 1270 }, { "epoch": 0.06, "grad_norm": 0.8236744619318931, "learning_rate": 4.960967993754879e-06, "loss": 0.3477, "step": 1271 }, { "epoch": 0.06, "grad_norm": 0.7638744825055725, "learning_rate": 4.964871194379391e-06, "loss": 0.3251, "step": 1272 }, { "epoch": 0.06, "grad_norm": 0.7318747144658153, "learning_rate": 4.968774395003903e-06, "loss": 0.3181, "step": 1273 }, { "epoch": 0.06, "grad_norm": 0.7828499676619863, "learning_rate": 4.9726775956284154e-06, "loss": 0.3324, "step": 1274 }, { "epoch": 0.06, "grad_norm": 0.791571618969947, "learning_rate": 4.9765807962529276e-06, "loss": 0.3213, "step": 1275 }, { "epoch": 0.06, "grad_norm": 0.7951307041122969, "learning_rate": 4.98048399687744e-06, "loss": 0.3091, "step": 1276 }, { "epoch": 0.06, "grad_norm": 0.8255482691373225, "learning_rate": 4.984387197501952e-06, "loss": 0.3428, "step": 1277 }, { "epoch": 0.06, "grad_norm": 0.708282737480509, "learning_rate": 4.988290398126464e-06, "loss": 0.3132, "step": 1278 }, { "epoch": 0.06, "grad_norm": 0.7885071154472629, "learning_rate": 4.992193598750976e-06, "loss": 0.31, "step": 1279 }, { "epoch": 0.06, "grad_norm": 0.777552679317253, "learning_rate": 4.996096799375488e-06, "loss": 0.3457, "step": 1280 }, { "epoch": 0.06, "grad_norm": 0.764862573690078, "learning_rate": 5e-06, "loss": 0.3418, "step": 1281 }, { "epoch": 0.06, "grad_norm": 0.7803137656050225, "learning_rate": 4.999999992806565e-06, "loss": 0.3037, "step": 1282 }, { "epoch": 0.06, "grad_norm": 0.8110414517342917, "learning_rate": 4.999999971226257e-06, "loss": 0.3258, "step": 1283 }, { "epoch": 0.06, "grad_norm": 0.7133219635181552, "learning_rate": 4.9999999352590784e-06, "loss": 0.3227, "step": 1284 }, { "epoch": 0.06, "grad_norm": 0.7226255150842057, "learning_rate": 4.999999884905028e-06, "loss": 0.3035, "step": 1285 }, { "epoch": 0.06, "grad_norm": 0.8308273029600334, "learning_rate": 4.999999820164106e-06, "loss": 0.3536, "step": 1286 }, { "epoch": 0.06, "grad_norm": 0.7251481944860867, "learning_rate": 4.999999741036315e-06, "loss": 0.3498, "step": 1287 }, { "epoch": 0.06, "grad_norm": 0.8174791747152399, "learning_rate": 4.9999996475216525e-06, "loss": 0.3196, "step": 1288 }, { "epoch": 0.06, "grad_norm": 0.8069131948035422, "learning_rate": 4.999999539620122e-06, "loss": 0.3288, "step": 1289 }, { "epoch": 0.06, "grad_norm": 0.7485398226862541, "learning_rate": 4.999999417331721e-06, "loss": 0.3261, "step": 1290 }, { "epoch": 0.06, "grad_norm": 0.733823443482088, "learning_rate": 4.9999992806564526e-06, "loss": 0.3506, "step": 1291 }, { "epoch": 0.06, "grad_norm": 0.9072100948677695, "learning_rate": 4.999999129594316e-06, "loss": 0.3435, "step": 1292 }, { "epoch": 0.06, "grad_norm": 0.7612573448994624, "learning_rate": 4.999998964145313e-06, "loss": 0.3444, "step": 1293 }, { "epoch": 0.06, "grad_norm": 0.7939437529928963, "learning_rate": 4.999998784309444e-06, "loss": 0.3264, "step": 1294 }, { "epoch": 0.06, "grad_norm": 0.7609080159324426, "learning_rate": 4.999998590086711e-06, "loss": 0.3206, "step": 1295 }, { "epoch": 0.06, "grad_norm": 0.7384256141372285, "learning_rate": 4.999998381477114e-06, "loss": 0.335, "step": 1296 }, { "epoch": 0.06, "grad_norm": 0.6844258838632076, "learning_rate": 4.999998158480655e-06, "loss": 0.2974, "step": 1297 }, { "epoch": 0.06, "grad_norm": 0.8054200488914997, "learning_rate": 4.999997921097334e-06, "loss": 0.3343, "step": 1298 }, { "epoch": 0.06, "grad_norm": 0.7867870728390103, "learning_rate": 4.999997669327155e-06, "loss": 0.3235, "step": 1299 }, { "epoch": 0.06, "grad_norm": 0.7975287047884598, "learning_rate": 4.999997403170116e-06, "loss": 0.316, "step": 1300 }, { "epoch": 0.06, "grad_norm": 0.7509507853960665, "learning_rate": 4.9999971226262215e-06, "loss": 0.3171, "step": 1301 }, { "epoch": 0.06, "grad_norm": 0.741425898796729, "learning_rate": 4.999996827695472e-06, "loss": 0.2881, "step": 1302 }, { "epoch": 0.06, "grad_norm": 0.677456620155258, "learning_rate": 4.999996518377867e-06, "loss": 0.3076, "step": 1303 }, { "epoch": 0.06, "grad_norm": 0.7650894674918964, "learning_rate": 4.999996194673413e-06, "loss": 0.3415, "step": 1304 }, { "epoch": 0.06, "grad_norm": 0.6911346932621355, "learning_rate": 4.999995856582108e-06, "loss": 0.3017, "step": 1305 }, { "epoch": 0.06, "grad_norm": 1.2509123669028706, "learning_rate": 4.999995504103956e-06, "loss": 0.3249, "step": 1306 }, { "epoch": 0.06, "grad_norm": 0.7745646094052938, "learning_rate": 4.999995137238957e-06, "loss": 0.3375, "step": 1307 }, { "epoch": 0.06, "grad_norm": 0.6935304153293038, "learning_rate": 4.999994755987115e-06, "loss": 0.308, "step": 1308 }, { "epoch": 0.06, "grad_norm": 0.7912320672734515, "learning_rate": 4.9999943603484315e-06, "loss": 0.3292, "step": 1309 }, { "epoch": 0.06, "grad_norm": 0.8507651477028199, "learning_rate": 4.999993950322909e-06, "loss": 0.3415, "step": 1310 }, { "epoch": 0.06, "grad_norm": 0.7367441318116214, "learning_rate": 4.99999352591055e-06, "loss": 0.3186, "step": 1311 }, { "epoch": 0.06, "grad_norm": 0.7730847121976596, "learning_rate": 4.999993087111356e-06, "loss": 0.3304, "step": 1312 }, { "epoch": 0.06, "grad_norm": 0.7367448351282575, "learning_rate": 4.99999263392533e-06, "loss": 0.3136, "step": 1313 }, { "epoch": 0.06, "grad_norm": 0.7274886410459188, "learning_rate": 4.999992166352475e-06, "loss": 0.3145, "step": 1314 }, { "epoch": 0.06, "grad_norm": 0.6964111368551325, "learning_rate": 4.999991684392793e-06, "loss": 0.3253, "step": 1315 }, { "epoch": 0.06, "grad_norm": 0.7866002292207801, "learning_rate": 4.999991188046288e-06, "loss": 0.3344, "step": 1316 }, { "epoch": 0.06, "grad_norm": 0.7133987678637378, "learning_rate": 4.999990677312961e-06, "loss": 0.3118, "step": 1317 }, { "epoch": 0.06, "grad_norm": 0.7517369159089695, "learning_rate": 4.999990152192817e-06, "loss": 0.3251, "step": 1318 }, { "epoch": 0.06, "grad_norm": 0.728674657623226, "learning_rate": 4.999989612685858e-06, "loss": 0.2986, "step": 1319 }, { "epoch": 0.06, "grad_norm": 0.7471289297645265, "learning_rate": 4.999989058792087e-06, "loss": 0.3412, "step": 1320 }, { "epoch": 0.06, "grad_norm": 0.6840244527003992, "learning_rate": 4.999988490511507e-06, "loss": 0.3144, "step": 1321 }, { "epoch": 0.06, "grad_norm": 0.7410554736281589, "learning_rate": 4.9999879078441215e-06, "loss": 0.3281, "step": 1322 }, { "epoch": 0.06, "grad_norm": 0.7187145273078142, "learning_rate": 4.999987310789935e-06, "loss": 0.3048, "step": 1323 }, { "epoch": 0.06, "grad_norm": 0.7563440638011815, "learning_rate": 4.9999866993489485e-06, "loss": 0.3233, "step": 1324 }, { "epoch": 0.06, "grad_norm": 0.826675002511806, "learning_rate": 4.999986073521168e-06, "loss": 0.3373, "step": 1325 }, { "epoch": 0.06, "grad_norm": 0.7260286045265256, "learning_rate": 4.999985433306595e-06, "loss": 0.3316, "step": 1326 }, { "epoch": 0.06, "grad_norm": 0.8456393204595302, "learning_rate": 4.999984778705235e-06, "loss": 0.3164, "step": 1327 }, { "epoch": 0.06, "grad_norm": 0.7625339001232265, "learning_rate": 4.999984109717091e-06, "loss": 0.3324, "step": 1328 }, { "epoch": 0.06, "grad_norm": 0.707449583391776, "learning_rate": 4.999983426342165e-06, "loss": 0.3302, "step": 1329 }, { "epoch": 0.06, "grad_norm": 0.7240693800042678, "learning_rate": 4.9999827285804645e-06, "loss": 0.3247, "step": 1330 }, { "epoch": 0.06, "grad_norm": 0.7376891288395485, "learning_rate": 4.9999820164319914e-06, "loss": 0.3205, "step": 1331 }, { "epoch": 0.06, "grad_norm": 0.7594742731199269, "learning_rate": 4.99998128989675e-06, "loss": 0.3314, "step": 1332 }, { "epoch": 0.06, "grad_norm": 0.7393227447224908, "learning_rate": 4.999980548974745e-06, "loss": 0.3267, "step": 1333 }, { "epoch": 0.06, "grad_norm": 0.7442323249759815, "learning_rate": 4.99997979366598e-06, "loss": 0.3219, "step": 1334 }, { "epoch": 0.06, "grad_norm": 0.699437957714745, "learning_rate": 4.9999790239704594e-06, "loss": 0.2982, "step": 1335 }, { "epoch": 0.06, "grad_norm": 0.7149296770155923, "learning_rate": 4.999978239888188e-06, "loss": 0.3221, "step": 1336 }, { "epoch": 0.06, "grad_norm": 0.7507402452498291, "learning_rate": 4.9999774414191706e-06, "loss": 0.3158, "step": 1337 }, { "epoch": 0.06, "grad_norm": 0.7132009971878002, "learning_rate": 4.999976628563411e-06, "loss": 0.314, "step": 1338 }, { "epoch": 0.06, "grad_norm": 0.732670820746385, "learning_rate": 4.999975801320915e-06, "loss": 0.3112, "step": 1339 }, { "epoch": 0.06, "grad_norm": 0.6960867630576326, "learning_rate": 4.999974959691685e-06, "loss": 0.3212, "step": 1340 }, { "epoch": 0.06, "grad_norm": 0.7336747596530897, "learning_rate": 4.999974103675729e-06, "loss": 0.3138, "step": 1341 }, { "epoch": 0.06, "grad_norm": 0.7376761786809612, "learning_rate": 4.99997323327305e-06, "loss": 0.3342, "step": 1342 }, { "epoch": 0.06, "grad_norm": 0.7663278709694125, "learning_rate": 4.999972348483653e-06, "loss": 0.3336, "step": 1343 }, { "epoch": 0.06, "grad_norm": 0.7182471284919185, "learning_rate": 4.999971449307543e-06, "loss": 0.3092, "step": 1344 }, { "epoch": 0.06, "grad_norm": 0.6752330412016309, "learning_rate": 4.999970535744726e-06, "loss": 0.2974, "step": 1345 }, { "epoch": 0.06, "grad_norm": 0.8053265039608123, "learning_rate": 4.999969607795207e-06, "loss": 0.3367, "step": 1346 }, { "epoch": 0.06, "grad_norm": 0.7530869191830782, "learning_rate": 4.999968665458992e-06, "loss": 0.3015, "step": 1347 }, { "epoch": 0.06, "grad_norm": 0.7315717862149176, "learning_rate": 4.999967708736085e-06, "loss": 0.3263, "step": 1348 }, { "epoch": 0.06, "grad_norm": 0.7651342740255899, "learning_rate": 4.999966737626492e-06, "loss": 0.3268, "step": 1349 }, { "epoch": 0.06, "grad_norm": 0.8765825368834491, "learning_rate": 4.999965752130219e-06, "loss": 0.3456, "step": 1350 }, { "epoch": 0.06, "grad_norm": 0.7453257401213077, "learning_rate": 4.999964752247271e-06, "loss": 0.3182, "step": 1351 }, { "epoch": 0.06, "grad_norm": 0.7207060754579225, "learning_rate": 4.999963737977655e-06, "loss": 0.314, "step": 1352 }, { "epoch": 0.06, "grad_norm": 0.7723811117964019, "learning_rate": 4.999962709321376e-06, "loss": 0.3269, "step": 1353 }, { "epoch": 0.06, "grad_norm": 0.7672414775940899, "learning_rate": 4.99996166627844e-06, "loss": 0.3405, "step": 1354 }, { "epoch": 0.06, "grad_norm": 0.7376459107776716, "learning_rate": 4.999960608848852e-06, "loss": 0.3335, "step": 1355 }, { "epoch": 0.06, "grad_norm": 0.731540370103192, "learning_rate": 4.99995953703262e-06, "loss": 0.3109, "step": 1356 }, { "epoch": 0.06, "grad_norm": 0.7496925688781383, "learning_rate": 4.999958450829749e-06, "loss": 0.3233, "step": 1357 }, { "epoch": 0.06, "grad_norm": 0.7924665179991935, "learning_rate": 4.999957350240245e-06, "loss": 0.314, "step": 1358 }, { "epoch": 0.06, "grad_norm": 0.7467863568791201, "learning_rate": 4.999956235264115e-06, "loss": 0.3505, "step": 1359 }, { "epoch": 0.06, "grad_norm": 0.7425333397549844, "learning_rate": 4.9999551059013655e-06, "loss": 0.3091, "step": 1360 }, { "epoch": 0.06, "grad_norm": 0.7220379527817716, "learning_rate": 4.999953962152002e-06, "loss": 0.3104, "step": 1361 }, { "epoch": 0.06, "grad_norm": 0.7363486132695186, "learning_rate": 4.9999528040160325e-06, "loss": 0.3321, "step": 1362 }, { "epoch": 0.06, "grad_norm": 0.7845208892207514, "learning_rate": 4.999951631493463e-06, "loss": 0.3298, "step": 1363 }, { "epoch": 0.06, "grad_norm": 0.7652113944199916, "learning_rate": 4.999950444584299e-06, "loss": 0.3367, "step": 1364 }, { "epoch": 0.06, "grad_norm": 0.7699446091316131, "learning_rate": 4.99994924328855e-06, "loss": 0.3075, "step": 1365 }, { "epoch": 0.06, "grad_norm": 0.7817358011925266, "learning_rate": 4.9999480276062215e-06, "loss": 0.3425, "step": 1366 }, { "epoch": 0.06, "grad_norm": 0.7593612499750346, "learning_rate": 4.999946797537319e-06, "loss": 0.3364, "step": 1367 }, { "epoch": 0.06, "grad_norm": 0.7729739330217843, "learning_rate": 4.999945553081853e-06, "loss": 0.3191, "step": 1368 }, { "epoch": 0.06, "grad_norm": 0.8293784372725229, "learning_rate": 4.9999442942398265e-06, "loss": 0.3354, "step": 1369 }, { "epoch": 0.06, "grad_norm": 0.8294068613949712, "learning_rate": 4.999943021011251e-06, "loss": 0.3411, "step": 1370 }, { "epoch": 0.06, "grad_norm": 0.7718945851400763, "learning_rate": 4.99994173339613e-06, "loss": 0.3348, "step": 1371 }, { "epoch": 0.06, "grad_norm": 0.7874737232528474, "learning_rate": 4.999940431394473e-06, "loss": 0.3185, "step": 1372 }, { "epoch": 0.06, "grad_norm": 0.8004965991525651, "learning_rate": 4.999939115006288e-06, "loss": 0.3315, "step": 1373 }, { "epoch": 0.06, "grad_norm": 0.7780665180412123, "learning_rate": 4.999937784231581e-06, "loss": 0.3253, "step": 1374 }, { "epoch": 0.06, "grad_norm": 0.8331445053959585, "learning_rate": 4.999936439070361e-06, "loss": 0.3242, "step": 1375 }, { "epoch": 0.06, "grad_norm": 0.7505248455802971, "learning_rate": 4.999935079522635e-06, "loss": 0.317, "step": 1376 }, { "epoch": 0.06, "grad_norm": 0.7023018733888201, "learning_rate": 4.99993370558841e-06, "loss": 0.3306, "step": 1377 }, { "epoch": 0.06, "grad_norm": 0.7457684082937429, "learning_rate": 4.999932317267696e-06, "loss": 0.3193, "step": 1378 }, { "epoch": 0.06, "grad_norm": 0.7455622596608662, "learning_rate": 4.9999309145605e-06, "loss": 0.3207, "step": 1379 }, { "epoch": 0.06, "grad_norm": 0.7284889032760635, "learning_rate": 4.999929497466829e-06, "loss": 0.313, "step": 1380 }, { "epoch": 0.06, "grad_norm": 0.6766479267786115, "learning_rate": 4.999928065986692e-06, "loss": 0.2926, "step": 1381 }, { "epoch": 0.06, "grad_norm": 0.7498029346052265, "learning_rate": 4.999926620120098e-06, "loss": 0.328, "step": 1382 }, { "epoch": 0.06, "grad_norm": 0.7410110099764343, "learning_rate": 4.999925159867055e-06, "loss": 0.3283, "step": 1383 }, { "epoch": 0.06, "grad_norm": 0.7320006488126742, "learning_rate": 4.99992368522757e-06, "loss": 0.3225, "step": 1384 }, { "epoch": 0.06, "grad_norm": 0.7553792860215451, "learning_rate": 4.999922196201653e-06, "loss": 0.3147, "step": 1385 }, { "epoch": 0.06, "grad_norm": 0.7864043415783705, "learning_rate": 4.999920692789312e-06, "loss": 0.3253, "step": 1386 }, { "epoch": 0.06, "grad_norm": 0.7715901578313776, "learning_rate": 4.9999191749905556e-06, "loss": 0.3382, "step": 1387 }, { "epoch": 0.07, "grad_norm": 0.728328630068724, "learning_rate": 4.999917642805393e-06, "loss": 0.3232, "step": 1388 }, { "epoch": 0.07, "grad_norm": 0.7732240391462509, "learning_rate": 4.999916096233832e-06, "loss": 0.3304, "step": 1389 }, { "epoch": 0.07, "grad_norm": 0.7240590240714608, "learning_rate": 4.999914535275883e-06, "loss": 0.3118, "step": 1390 }, { "epoch": 0.07, "grad_norm": 0.7639291371417187, "learning_rate": 4.9999129599315544e-06, "loss": 0.3178, "step": 1391 }, { "epoch": 0.07, "grad_norm": 0.7580636566443026, "learning_rate": 4.999911370200854e-06, "loss": 0.3178, "step": 1392 }, { "epoch": 0.07, "grad_norm": 0.7975027762407995, "learning_rate": 4.999909766083792e-06, "loss": 0.3151, "step": 1393 }, { "epoch": 0.07, "grad_norm": 0.8284257291499109, "learning_rate": 4.999908147580379e-06, "loss": 0.3376, "step": 1394 }, { "epoch": 0.07, "grad_norm": 0.6870880785742166, "learning_rate": 4.999906514690622e-06, "loss": 0.3063, "step": 1395 }, { "epoch": 0.07, "grad_norm": 0.750755239452954, "learning_rate": 4.999904867414532e-06, "loss": 0.3087, "step": 1396 }, { "epoch": 0.07, "grad_norm": 0.7686038513243841, "learning_rate": 4.999903205752117e-06, "loss": 0.3053, "step": 1397 }, { "epoch": 0.07, "grad_norm": 0.6955423395534619, "learning_rate": 4.999901529703387e-06, "loss": 0.3145, "step": 1398 }, { "epoch": 0.07, "grad_norm": 0.7458412751939388, "learning_rate": 4.999899839268354e-06, "loss": 0.3094, "step": 1399 }, { "epoch": 0.07, "grad_norm": 0.8556588701793159, "learning_rate": 4.999898134447024e-06, "loss": 0.3531, "step": 1400 }, { "epoch": 0.07, "grad_norm": 0.7759435277767608, "learning_rate": 4.999896415239409e-06, "loss": 0.3136, "step": 1401 }, { "epoch": 0.07, "grad_norm": 0.713043956245841, "learning_rate": 4.9998946816455186e-06, "loss": 0.3031, "step": 1402 }, { "epoch": 0.07, "grad_norm": 0.721641871002048, "learning_rate": 4.999892933665362e-06, "loss": 0.3133, "step": 1403 }, { "epoch": 0.07, "grad_norm": 0.7847844087621879, "learning_rate": 4.9998911712989506e-06, "loss": 0.3443, "step": 1404 }, { "epoch": 0.07, "grad_norm": 0.7543272724175308, "learning_rate": 4.9998893945462935e-06, "loss": 0.3135, "step": 1405 }, { "epoch": 0.07, "grad_norm": 0.7515858786415736, "learning_rate": 4.999887603407402e-06, "loss": 0.3236, "step": 1406 }, { "epoch": 0.07, "grad_norm": 0.7276615670183029, "learning_rate": 4.999885797882284e-06, "loss": 0.3288, "step": 1407 }, { "epoch": 0.07, "grad_norm": 0.7678786528139937, "learning_rate": 4.999883977970953e-06, "loss": 0.3086, "step": 1408 }, { "epoch": 0.07, "grad_norm": 0.7839575915932258, "learning_rate": 4.999882143673418e-06, "loss": 0.3043, "step": 1409 }, { "epoch": 0.07, "grad_norm": 0.7796041764180076, "learning_rate": 4.999880294989689e-06, "loss": 0.3272, "step": 1410 }, { "epoch": 0.07, "grad_norm": 0.7574251431609124, "learning_rate": 4.999878431919777e-06, "loss": 0.3224, "step": 1411 }, { "epoch": 0.07, "grad_norm": 0.8005703637422873, "learning_rate": 4.999876554463694e-06, "loss": 0.3437, "step": 1412 }, { "epoch": 0.07, "grad_norm": 0.770765554335854, "learning_rate": 4.9998746626214486e-06, "loss": 0.335, "step": 1413 }, { "epoch": 0.07, "grad_norm": 0.760272510637501, "learning_rate": 4.999872756393054e-06, "loss": 0.3254, "step": 1414 }, { "epoch": 0.07, "grad_norm": 0.7500995242982944, "learning_rate": 4.9998708357785185e-06, "loss": 0.3016, "step": 1415 }, { "epoch": 0.07, "grad_norm": 0.7675302714379824, "learning_rate": 4.999868900777855e-06, "loss": 0.3571, "step": 1416 }, { "epoch": 0.07, "grad_norm": 0.7471692721093864, "learning_rate": 4.999866951391076e-06, "loss": 0.3136, "step": 1417 }, { "epoch": 0.07, "grad_norm": 0.7413781941137431, "learning_rate": 4.9998649876181895e-06, "loss": 0.3259, "step": 1418 }, { "epoch": 0.07, "grad_norm": 0.7385478956059102, "learning_rate": 4.999863009459209e-06, "loss": 0.3267, "step": 1419 }, { "epoch": 0.07, "grad_norm": 0.7287171944674179, "learning_rate": 4.9998610169141444e-06, "loss": 0.3151, "step": 1420 }, { "epoch": 0.07, "grad_norm": 0.8016330543785316, "learning_rate": 4.999859009983009e-06, "loss": 0.3311, "step": 1421 }, { "epoch": 0.07, "grad_norm": 0.7659098906624392, "learning_rate": 4.999856988665812e-06, "loss": 0.3151, "step": 1422 }, { "epoch": 0.07, "grad_norm": 0.7748308044546797, "learning_rate": 4.999854952962568e-06, "loss": 0.3208, "step": 1423 }, { "epoch": 0.07, "grad_norm": 0.8162940742738379, "learning_rate": 4.999852902873286e-06, "loss": 0.3456, "step": 1424 }, { "epoch": 0.07, "grad_norm": 0.8064536318089695, "learning_rate": 4.999850838397978e-06, "loss": 0.3314, "step": 1425 }, { "epoch": 0.07, "grad_norm": 0.7051252301364939, "learning_rate": 4.999848759536658e-06, "loss": 0.3278, "step": 1426 }, { "epoch": 0.07, "grad_norm": 0.7636065634705897, "learning_rate": 4.999846666289337e-06, "loss": 0.3212, "step": 1427 }, { "epoch": 0.07, "grad_norm": 0.7985170581778217, "learning_rate": 4.999844558656025e-06, "loss": 0.3118, "step": 1428 }, { "epoch": 0.07, "grad_norm": 0.696943496637969, "learning_rate": 4.9998424366367386e-06, "loss": 0.3355, "step": 1429 }, { "epoch": 0.07, "grad_norm": 0.7678619919042976, "learning_rate": 4.999840300231485e-06, "loss": 0.3443, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.7645393944549164, "learning_rate": 4.9998381494402794e-06, "loss": 0.3177, "step": 1431 }, { "epoch": 0.07, "grad_norm": 0.7960209204280255, "learning_rate": 4.999835984263134e-06, "loss": 0.3061, "step": 1432 }, { "epoch": 0.07, "grad_norm": 0.7852127326746268, "learning_rate": 4.99983380470006e-06, "loss": 0.3271, "step": 1433 }, { "epoch": 0.07, "grad_norm": 0.8245427438276254, "learning_rate": 4.999831610751071e-06, "loss": 0.3367, "step": 1434 }, { "epoch": 0.07, "grad_norm": 0.7055901907643555, "learning_rate": 4.99982940241618e-06, "loss": 0.295, "step": 1435 }, { "epoch": 0.07, "grad_norm": 0.7743251183803946, "learning_rate": 4.999827179695399e-06, "loss": 0.3116, "step": 1436 }, { "epoch": 0.07, "grad_norm": 0.7572678382918469, "learning_rate": 4.9998249425887404e-06, "loss": 0.3205, "step": 1437 }, { "epoch": 0.07, "grad_norm": 0.7530058536620665, "learning_rate": 4.999822691096218e-06, "loss": 0.3261, "step": 1438 }, { "epoch": 0.07, "grad_norm": 0.7935108533156817, "learning_rate": 4.999820425217844e-06, "loss": 0.337, "step": 1439 }, { "epoch": 0.07, "grad_norm": 0.6879200841488003, "learning_rate": 4.999818144953632e-06, "loss": 0.2973, "step": 1440 }, { "epoch": 0.07, "grad_norm": 0.7710208367699641, "learning_rate": 4.999815850303595e-06, "loss": 0.3216, "step": 1441 }, { "epoch": 0.07, "grad_norm": 0.7768023803389901, "learning_rate": 4.999813541267745e-06, "loss": 0.3284, "step": 1442 }, { "epoch": 0.07, "grad_norm": 0.7780844186171958, "learning_rate": 4.999811217846098e-06, "loss": 0.2999, "step": 1443 }, { "epoch": 0.07, "grad_norm": 0.9005667750952311, "learning_rate": 4.9998088800386645e-06, "loss": 0.3403, "step": 1444 }, { "epoch": 0.07, "grad_norm": 0.6656818167416058, "learning_rate": 4.99980652784546e-06, "loss": 0.3042, "step": 1445 }, { "epoch": 0.07, "grad_norm": 0.7849902303955985, "learning_rate": 4.999804161266497e-06, "loss": 0.3059, "step": 1446 }, { "epoch": 0.07, "grad_norm": 0.7511714916351702, "learning_rate": 4.9998017803017894e-06, "loss": 0.3003, "step": 1447 }, { "epoch": 0.07, "grad_norm": 0.7499509656137348, "learning_rate": 4.99979938495135e-06, "loss": 0.3238, "step": 1448 }, { "epoch": 0.07, "grad_norm": 0.7898348344375091, "learning_rate": 4.999796975215195e-06, "loss": 0.3292, "step": 1449 }, { "epoch": 0.07, "grad_norm": 0.7759340245363471, "learning_rate": 4.999794551093336e-06, "loss": 0.3229, "step": 1450 }, { "epoch": 0.07, "grad_norm": 0.7949613514841498, "learning_rate": 4.999792112585787e-06, "loss": 0.3254, "step": 1451 }, { "epoch": 0.07, "grad_norm": 0.811030214076513, "learning_rate": 4.999789659692564e-06, "loss": 0.3408, "step": 1452 }, { "epoch": 0.07, "grad_norm": 0.7863273664766185, "learning_rate": 4.999787192413679e-06, "loss": 0.2963, "step": 1453 }, { "epoch": 0.07, "grad_norm": 0.8046246008440711, "learning_rate": 4.999784710749146e-06, "loss": 0.3058, "step": 1454 }, { "epoch": 0.07, "grad_norm": 0.7505664921887646, "learning_rate": 4.999782214698982e-06, "loss": 0.3136, "step": 1455 }, { "epoch": 0.07, "grad_norm": 0.7931554508127876, "learning_rate": 4.9997797042631994e-06, "loss": 0.3439, "step": 1456 }, { "epoch": 0.07, "grad_norm": 0.7769282421116374, "learning_rate": 4.999777179441812e-06, "loss": 0.3191, "step": 1457 }, { "epoch": 0.07, "grad_norm": 0.88297802817566, "learning_rate": 4.999774640234836e-06, "loss": 0.3402, "step": 1458 }, { "epoch": 0.07, "grad_norm": 0.7878196274623868, "learning_rate": 4.9997720866422845e-06, "loss": 0.324, "step": 1459 }, { "epoch": 0.07, "grad_norm": 0.7209205083679588, "learning_rate": 4.9997695186641735e-06, "loss": 0.3226, "step": 1460 }, { "epoch": 0.07, "grad_norm": 0.7178222919139742, "learning_rate": 4.999766936300517e-06, "loss": 0.3329, "step": 1461 }, { "epoch": 0.07, "grad_norm": 0.7331462724333657, "learning_rate": 4.99976433955133e-06, "loss": 0.3262, "step": 1462 }, { "epoch": 0.07, "grad_norm": 0.6983354735218428, "learning_rate": 4.999761728416628e-06, "loss": 0.291, "step": 1463 }, { "epoch": 0.07, "grad_norm": 0.7515257028444886, "learning_rate": 4.999759102896425e-06, "loss": 0.3174, "step": 1464 }, { "epoch": 0.07, "grad_norm": 0.7171470173784439, "learning_rate": 4.999756462990737e-06, "loss": 0.3335, "step": 1465 }, { "epoch": 0.07, "grad_norm": 0.6550264664467874, "learning_rate": 4.999753808699579e-06, "loss": 0.3158, "step": 1466 }, { "epoch": 0.07, "grad_norm": 0.7401193701766067, "learning_rate": 4.999751140022965e-06, "loss": 0.3332, "step": 1467 }, { "epoch": 0.07, "grad_norm": 0.7367393117315025, "learning_rate": 4.9997484569609125e-06, "loss": 0.3255, "step": 1468 }, { "epoch": 0.07, "grad_norm": 0.7532629974878763, "learning_rate": 4.999745759513436e-06, "loss": 0.3427, "step": 1469 }, { "epoch": 0.07, "grad_norm": 0.7589488653499541, "learning_rate": 4.999743047680551e-06, "loss": 0.3315, "step": 1470 }, { "epoch": 0.07, "grad_norm": 0.7570716731130944, "learning_rate": 4.999740321462272e-06, "loss": 0.3199, "step": 1471 }, { "epoch": 0.07, "grad_norm": 0.6830572060485115, "learning_rate": 4.999737580858617e-06, "loss": 0.3019, "step": 1472 }, { "epoch": 0.07, "grad_norm": 0.7972202301333423, "learning_rate": 4.9997348258696e-06, "loss": 0.3377, "step": 1473 }, { "epoch": 0.07, "grad_norm": 0.7408353535294735, "learning_rate": 4.9997320564952364e-06, "loss": 0.3244, "step": 1474 }, { "epoch": 0.07, "grad_norm": 0.8500308092057246, "learning_rate": 4.999729272735545e-06, "loss": 0.3233, "step": 1475 }, { "epoch": 0.07, "grad_norm": 0.6767041635007625, "learning_rate": 4.9997264745905385e-06, "loss": 0.3163, "step": 1476 }, { "epoch": 0.07, "grad_norm": 0.7278111667740418, "learning_rate": 4.999723662060235e-06, "loss": 0.3325, "step": 1477 }, { "epoch": 0.07, "grad_norm": 0.7577688724372197, "learning_rate": 4.99972083514465e-06, "loss": 0.3303, "step": 1478 }, { "epoch": 0.07, "grad_norm": 0.7684262999074705, "learning_rate": 4.9997179938438e-06, "loss": 0.3365, "step": 1479 }, { "epoch": 0.07, "grad_norm": 0.7207956773728583, "learning_rate": 4.999715138157702e-06, "loss": 0.3158, "step": 1480 }, { "epoch": 0.07, "grad_norm": 0.660766685104361, "learning_rate": 4.99971226808637e-06, "loss": 0.304, "step": 1481 }, { "epoch": 0.07, "grad_norm": 0.7828411923637231, "learning_rate": 4.999709383629823e-06, "loss": 0.316, "step": 1482 }, { "epoch": 0.07, "grad_norm": 0.7390744229238987, "learning_rate": 4.999706484788076e-06, "loss": 0.3308, "step": 1483 }, { "epoch": 0.07, "grad_norm": 0.7107513398493261, "learning_rate": 4.9997035715611476e-06, "loss": 0.3405, "step": 1484 }, { "epoch": 0.07, "grad_norm": 0.7586528681082031, "learning_rate": 4.999700643949053e-06, "loss": 0.3364, "step": 1485 }, { "epoch": 0.07, "grad_norm": 0.7621367392687403, "learning_rate": 4.999697701951809e-06, "loss": 0.3408, "step": 1486 }, { "epoch": 0.07, "grad_norm": 0.6707334982158796, "learning_rate": 4.999694745569434e-06, "loss": 0.2888, "step": 1487 }, { "epoch": 0.07, "grad_norm": 0.6949320169518288, "learning_rate": 4.9996917748019435e-06, "loss": 0.3206, "step": 1488 }, { "epoch": 0.07, "grad_norm": 0.7900310038896375, "learning_rate": 4.999688789649355e-06, "loss": 0.3254, "step": 1489 }, { "epoch": 0.07, "grad_norm": 0.734224741489078, "learning_rate": 4.999685790111686e-06, "loss": 0.3351, "step": 1490 }, { "epoch": 0.07, "grad_norm": 0.7089561733450659, "learning_rate": 4.9996827761889535e-06, "loss": 0.3292, "step": 1491 }, { "epoch": 0.07, "grad_norm": 0.8157798321741538, "learning_rate": 4.999679747881174e-06, "loss": 0.3257, "step": 1492 }, { "epoch": 0.07, "grad_norm": 0.7462695793704598, "learning_rate": 4.999676705188367e-06, "loss": 0.3195, "step": 1493 }, { "epoch": 0.07, "grad_norm": 0.7522877880571439, "learning_rate": 4.999673648110549e-06, "loss": 0.3082, "step": 1494 }, { "epoch": 0.07, "grad_norm": 0.842096333993474, "learning_rate": 4.999670576647736e-06, "loss": 0.3408, "step": 1495 }, { "epoch": 0.07, "grad_norm": 0.7886679015240067, "learning_rate": 4.999667490799948e-06, "loss": 0.319, "step": 1496 }, { "epoch": 0.07, "grad_norm": 0.7329785823970988, "learning_rate": 4.9996643905672024e-06, "loss": 0.3237, "step": 1497 }, { "epoch": 0.07, "grad_norm": 0.7920290208959585, "learning_rate": 4.9996612759495155e-06, "loss": 0.3168, "step": 1498 }, { "epoch": 0.07, "grad_norm": 0.7016122990559039, "learning_rate": 4.999658146946907e-06, "loss": 0.3125, "step": 1499 }, { "epoch": 0.07, "grad_norm": 0.7297207495928555, "learning_rate": 4.999655003559394e-06, "loss": 0.3271, "step": 1500 }, { "epoch": 0.07, "grad_norm": 0.7888931666103407, "learning_rate": 4.999651845786994e-06, "loss": 0.3249, "step": 1501 }, { "epoch": 0.07, "grad_norm": 0.7481178407915542, "learning_rate": 4.999648673629727e-06, "loss": 0.3176, "step": 1502 }, { "epoch": 0.07, "grad_norm": 0.7070698464219193, "learning_rate": 4.9996454870876085e-06, "loss": 0.3128, "step": 1503 }, { "epoch": 0.07, "grad_norm": 0.7242977001610423, "learning_rate": 4.99964228616066e-06, "loss": 0.3177, "step": 1504 }, { "epoch": 0.07, "grad_norm": 0.7221293720659834, "learning_rate": 4.999639070848898e-06, "loss": 0.3165, "step": 1505 }, { "epoch": 0.07, "grad_norm": 0.7520132804809743, "learning_rate": 4.999635841152342e-06, "loss": 0.3285, "step": 1506 }, { "epoch": 0.07, "grad_norm": 0.7320463754698797, "learning_rate": 4.999632597071008e-06, "loss": 0.3169, "step": 1507 }, { "epoch": 0.07, "grad_norm": 0.8043506670384957, "learning_rate": 4.999629338604919e-06, "loss": 0.3306, "step": 1508 }, { "epoch": 0.07, "grad_norm": 0.7289483279065602, "learning_rate": 4.99962606575409e-06, "loss": 0.3004, "step": 1509 }, { "epoch": 0.07, "grad_norm": 0.7508220128921895, "learning_rate": 4.9996227785185415e-06, "loss": 0.3173, "step": 1510 }, { "epoch": 0.07, "grad_norm": 0.729523572859493, "learning_rate": 4.999619476898292e-06, "loss": 0.3284, "step": 1511 }, { "epoch": 0.07, "grad_norm": 0.7277889113116073, "learning_rate": 4.999616160893361e-06, "loss": 0.3188, "step": 1512 }, { "epoch": 0.07, "grad_norm": 0.7708974964846851, "learning_rate": 4.9996128305037675e-06, "loss": 0.3216, "step": 1513 }, { "epoch": 0.07, "grad_norm": 0.7328097206962179, "learning_rate": 4.999609485729531e-06, "loss": 0.3101, "step": 1514 }, { "epoch": 0.07, "grad_norm": 0.7900625240014929, "learning_rate": 4.999606126570669e-06, "loss": 0.3228, "step": 1515 }, { "epoch": 0.07, "grad_norm": 0.7664824236357681, "learning_rate": 4.999602753027202e-06, "loss": 0.3436, "step": 1516 }, { "epoch": 0.07, "grad_norm": 0.7795850783777316, "learning_rate": 4.99959936509915e-06, "loss": 0.3146, "step": 1517 }, { "epoch": 0.07, "grad_norm": 0.8129287625859359, "learning_rate": 4.999595962786532e-06, "loss": 0.3286, "step": 1518 }, { "epoch": 0.07, "grad_norm": 0.7631296667183622, "learning_rate": 4.9995925460893664e-06, "loss": 0.3145, "step": 1519 }, { "epoch": 0.07, "grad_norm": 0.7700571251955761, "learning_rate": 4.999589115007675e-06, "loss": 0.3089, "step": 1520 }, { "epoch": 0.07, "grad_norm": 0.7804865653166885, "learning_rate": 4.999585669541476e-06, "loss": 0.3189, "step": 1521 }, { "epoch": 0.07, "grad_norm": 0.8046219415553443, "learning_rate": 4.99958220969079e-06, "loss": 0.3372, "step": 1522 }, { "epoch": 0.07, "grad_norm": 0.7821502375022974, "learning_rate": 4.999578735455636e-06, "loss": 0.3187, "step": 1523 }, { "epoch": 0.07, "grad_norm": 0.7280757898553232, "learning_rate": 4.999575246836036e-06, "loss": 0.3161, "step": 1524 }, { "epoch": 0.07, "grad_norm": 0.8153552072473981, "learning_rate": 4.999571743832007e-06, "loss": 0.3429, "step": 1525 }, { "epoch": 0.07, "grad_norm": 0.904142585099536, "learning_rate": 4.999568226443572e-06, "loss": 0.3415, "step": 1526 }, { "epoch": 0.07, "grad_norm": 0.7654763676279214, "learning_rate": 4.99956469467075e-06, "loss": 0.3177, "step": 1527 }, { "epoch": 0.07, "grad_norm": 0.7520403034344559, "learning_rate": 4.999561148513561e-06, "loss": 0.313, "step": 1528 }, { "epoch": 0.07, "grad_norm": 0.7447390713355699, "learning_rate": 4.999557587972026e-06, "loss": 0.2968, "step": 1529 }, { "epoch": 0.07, "grad_norm": 0.7924378365296926, "learning_rate": 4.999554013046165e-06, "loss": 0.3085, "step": 1530 }, { "epoch": 0.07, "grad_norm": 0.8146373815794455, "learning_rate": 4.9995504237359994e-06, "loss": 0.3252, "step": 1531 }, { "epoch": 0.07, "grad_norm": 0.7184854850470002, "learning_rate": 4.999546820041549e-06, "loss": 0.306, "step": 1532 }, { "epoch": 0.07, "grad_norm": 0.7785688745390072, "learning_rate": 4.9995432019628355e-06, "loss": 0.3494, "step": 1533 }, { "epoch": 0.07, "grad_norm": 0.7954621537509475, "learning_rate": 4.999539569499878e-06, "loss": 0.3089, "step": 1534 }, { "epoch": 0.07, "grad_norm": 0.7117172999110207, "learning_rate": 4.9995359226527e-06, "loss": 0.3333, "step": 1535 }, { "epoch": 0.07, "grad_norm": 0.8265710724847547, "learning_rate": 4.9995322614213195e-06, "loss": 0.3233, "step": 1536 }, { "epoch": 0.07, "grad_norm": 0.7242801056379659, "learning_rate": 4.99952858580576e-06, "loss": 0.3178, "step": 1537 }, { "epoch": 0.07, "grad_norm": 0.7164278565347454, "learning_rate": 4.999524895806042e-06, "loss": 0.3237, "step": 1538 }, { "epoch": 0.07, "grad_norm": 0.7180861021551446, "learning_rate": 4.999521191422185e-06, "loss": 0.3354, "step": 1539 }, { "epoch": 0.07, "grad_norm": 0.6976371542360692, "learning_rate": 4.9995174726542135e-06, "loss": 0.3083, "step": 1540 }, { "epoch": 0.07, "grad_norm": 0.7035713162569168, "learning_rate": 4.999513739502147e-06, "loss": 0.3275, "step": 1541 }, { "epoch": 0.07, "grad_norm": 0.7501275657201921, "learning_rate": 4.999509991966006e-06, "loss": 0.316, "step": 1542 }, { "epoch": 0.07, "grad_norm": 0.6853848262537231, "learning_rate": 4.9995062300458136e-06, "loss": 0.3156, "step": 1543 }, { "epoch": 0.07, "grad_norm": 0.7140777882207315, "learning_rate": 4.999502453741591e-06, "loss": 0.3376, "step": 1544 }, { "epoch": 0.07, "grad_norm": 0.7091259844997619, "learning_rate": 4.999498663053361e-06, "loss": 0.3272, "step": 1545 }, { "epoch": 0.07, "grad_norm": 0.7557288394547415, "learning_rate": 4.999494857981143e-06, "loss": 0.3151, "step": 1546 }, { "epoch": 0.07, "grad_norm": 0.7194751115015698, "learning_rate": 4.9994910385249614e-06, "loss": 0.295, "step": 1547 }, { "epoch": 0.07, "grad_norm": 0.7801434553746144, "learning_rate": 4.999487204684836e-06, "loss": 0.3296, "step": 1548 }, { "epoch": 0.07, "grad_norm": 0.7949960143614095, "learning_rate": 4.999483356460791e-06, "loss": 0.2971, "step": 1549 }, { "epoch": 0.07, "grad_norm": 0.7577336533172907, "learning_rate": 4.999479493852847e-06, "loss": 0.3231, "step": 1550 }, { "epoch": 0.07, "grad_norm": 0.8611660494149874, "learning_rate": 4.9994756168610274e-06, "loss": 0.3315, "step": 1551 }, { "epoch": 0.07, "grad_norm": 0.7115035048995083, "learning_rate": 4.999471725485353e-06, "loss": 0.3352, "step": 1552 }, { "epoch": 0.07, "grad_norm": 0.720803129842589, "learning_rate": 4.9994678197258475e-06, "loss": 0.3211, "step": 1553 }, { "epoch": 0.07, "grad_norm": 0.8126696640878327, "learning_rate": 4.999463899582533e-06, "loss": 0.3217, "step": 1554 }, { "epoch": 0.07, "grad_norm": 0.7260695689138561, "learning_rate": 4.999459965055432e-06, "loss": 0.3066, "step": 1555 }, { "epoch": 0.07, "grad_norm": 0.738263222998644, "learning_rate": 4.999456016144566e-06, "loss": 0.3319, "step": 1556 }, { "epoch": 0.07, "grad_norm": 0.7454447197988133, "learning_rate": 4.99945205284996e-06, "loss": 0.3352, "step": 1557 }, { "epoch": 0.07, "grad_norm": 0.7676085745630984, "learning_rate": 4.999448075171636e-06, "loss": 0.3159, "step": 1558 }, { "epoch": 0.07, "grad_norm": 0.7544609261141583, "learning_rate": 4.999444083109616e-06, "loss": 0.3262, "step": 1559 }, { "epoch": 0.07, "grad_norm": 0.7596667273216736, "learning_rate": 4.999440076663923e-06, "loss": 0.344, "step": 1560 }, { "epoch": 0.07, "grad_norm": 0.7309542676809847, "learning_rate": 4.9994360558345815e-06, "loss": 0.3281, "step": 1561 }, { "epoch": 0.07, "grad_norm": 0.7050190677894967, "learning_rate": 4.9994320206216125e-06, "loss": 0.3012, "step": 1562 }, { "epoch": 0.07, "grad_norm": 0.7565841770688835, "learning_rate": 4.9994279710250415e-06, "loss": 0.3398, "step": 1563 }, { "epoch": 0.07, "grad_norm": 0.7108711216627874, "learning_rate": 4.999423907044891e-06, "loss": 0.3161, "step": 1564 }, { "epoch": 0.07, "grad_norm": 0.7468147261953452, "learning_rate": 4.9994198286811825e-06, "loss": 0.3256, "step": 1565 }, { "epoch": 0.07, "grad_norm": 0.7200574534664601, "learning_rate": 4.999415735933943e-06, "loss": 0.313, "step": 1566 }, { "epoch": 0.07, "grad_norm": 0.7762689688882982, "learning_rate": 4.999411628803192e-06, "loss": 0.3047, "step": 1567 }, { "epoch": 0.07, "grad_norm": 0.744724846615189, "learning_rate": 4.999407507288957e-06, "loss": 0.3294, "step": 1568 }, { "epoch": 0.07, "grad_norm": 0.7251104920048113, "learning_rate": 4.999403371391259e-06, "loss": 0.3146, "step": 1569 }, { "epoch": 0.07, "grad_norm": 0.6783811370715859, "learning_rate": 4.999399221110124e-06, "loss": 0.3045, "step": 1570 }, { "epoch": 0.07, "grad_norm": 0.7459990808042437, "learning_rate": 4.999395056445574e-06, "loss": 0.3399, "step": 1571 }, { "epoch": 0.07, "grad_norm": 0.7886185310807565, "learning_rate": 4.9993908773976335e-06, "loss": 0.3208, "step": 1572 }, { "epoch": 0.07, "grad_norm": 0.6951992277417983, "learning_rate": 4.9993866839663265e-06, "loss": 0.3117, "step": 1573 }, { "epoch": 0.07, "grad_norm": 0.6618623193122845, "learning_rate": 4.999382476151678e-06, "loss": 0.3066, "step": 1574 }, { "epoch": 0.07, "grad_norm": 0.7156384015624929, "learning_rate": 4.999378253953711e-06, "loss": 0.3158, "step": 1575 }, { "epoch": 0.07, "grad_norm": 0.6727640514860471, "learning_rate": 4.999374017372451e-06, "loss": 0.3077, "step": 1576 }, { "epoch": 0.07, "grad_norm": 0.7725779480492453, "learning_rate": 4.999369766407921e-06, "loss": 0.3327, "step": 1577 }, { "epoch": 0.07, "grad_norm": 0.7170565022267472, "learning_rate": 4.999365501060147e-06, "loss": 0.3186, "step": 1578 }, { "epoch": 0.07, "grad_norm": 0.6685425117008461, "learning_rate": 4.999361221329152e-06, "loss": 0.3082, "step": 1579 }, { "epoch": 0.07, "grad_norm": 0.7645416182266488, "learning_rate": 4.999356927214961e-06, "loss": 0.3472, "step": 1580 }, { "epoch": 0.07, "grad_norm": 0.7811348475060741, "learning_rate": 4.9993526187176e-06, "loss": 0.3389, "step": 1581 }, { "epoch": 0.07, "grad_norm": 0.7354481208722943, "learning_rate": 4.999348295837092e-06, "loss": 0.3197, "step": 1582 }, { "epoch": 0.07, "grad_norm": 0.7578029989797281, "learning_rate": 4.9993439585734635e-06, "loss": 0.3224, "step": 1583 }, { "epoch": 0.07, "grad_norm": 0.7282013324265326, "learning_rate": 4.999339606926738e-06, "loss": 0.3228, "step": 1584 }, { "epoch": 0.07, "grad_norm": 0.7545225962405183, "learning_rate": 4.999335240896942e-06, "loss": 0.3314, "step": 1585 }, { "epoch": 0.07, "grad_norm": 0.7736047193816242, "learning_rate": 4.9993308604841e-06, "loss": 0.3271, "step": 1586 }, { "epoch": 0.07, "grad_norm": 0.7338290995817963, "learning_rate": 4.999326465688237e-06, "loss": 0.3345, "step": 1587 }, { "epoch": 0.07, "grad_norm": 0.7038057895773236, "learning_rate": 4.999322056509378e-06, "loss": 0.3099, "step": 1588 }, { "epoch": 0.07, "grad_norm": 0.7088622231228038, "learning_rate": 4.999317632947549e-06, "loss": 0.3293, "step": 1589 }, { "epoch": 0.07, "grad_norm": 0.6909281726507206, "learning_rate": 4.999313195002774e-06, "loss": 0.3007, "step": 1590 }, { "epoch": 0.07, "grad_norm": 0.7022354236752141, "learning_rate": 4.9993087426750805e-06, "loss": 0.3131, "step": 1591 }, { "epoch": 0.07, "grad_norm": 0.6834875964544871, "learning_rate": 4.999304275964494e-06, "loss": 0.3145, "step": 1592 }, { "epoch": 0.07, "grad_norm": 0.722314853238451, "learning_rate": 4.999299794871039e-06, "loss": 0.33, "step": 1593 }, { "epoch": 0.07, "grad_norm": 0.6815492027121889, "learning_rate": 4.9992952993947425e-06, "loss": 0.3054, "step": 1594 }, { "epoch": 0.07, "grad_norm": 0.7161421756622602, "learning_rate": 4.999290789535629e-06, "loss": 0.3058, "step": 1595 }, { "epoch": 0.07, "grad_norm": 0.7253518530301497, "learning_rate": 4.999286265293726e-06, "loss": 0.315, "step": 1596 }, { "epoch": 0.07, "grad_norm": 0.7502641728986101, "learning_rate": 4.9992817266690575e-06, "loss": 0.3177, "step": 1597 }, { "epoch": 0.07, "grad_norm": 0.7478340437518278, "learning_rate": 4.999277173661652e-06, "loss": 0.3201, "step": 1598 }, { "epoch": 0.07, "grad_norm": 0.7435914801876982, "learning_rate": 4.999272606271534e-06, "loss": 0.3479, "step": 1599 }, { "epoch": 0.07, "grad_norm": 0.7401637723219691, "learning_rate": 4.99926802449873e-06, "loss": 0.3264, "step": 1600 }, { "epoch": 0.07, "grad_norm": 0.762264834843614, "learning_rate": 4.999263428343267e-06, "loss": 0.3176, "step": 1601 }, { "epoch": 0.08, "grad_norm": 0.7422250110527431, "learning_rate": 4.999258817805171e-06, "loss": 0.3246, "step": 1602 }, { "epoch": 0.08, "grad_norm": 0.7618582712375128, "learning_rate": 4.999254192884469e-06, "loss": 0.3052, "step": 1603 }, { "epoch": 0.08, "grad_norm": 0.7339292728809115, "learning_rate": 4.9992495535811866e-06, "loss": 0.3336, "step": 1604 }, { "epoch": 0.08, "grad_norm": 0.6609183575223918, "learning_rate": 4.999244899895352e-06, "loss": 0.2909, "step": 1605 }, { "epoch": 0.08, "grad_norm": 0.7717855551799784, "learning_rate": 4.999240231826991e-06, "loss": 0.3104, "step": 1606 }, { "epoch": 0.08, "grad_norm": 0.770074935019334, "learning_rate": 4.9992355493761305e-06, "loss": 0.3089, "step": 1607 }, { "epoch": 0.08, "grad_norm": 0.7138351605496025, "learning_rate": 4.999230852542797e-06, "loss": 0.3028, "step": 1608 }, { "epoch": 0.08, "grad_norm": 0.6836608666602769, "learning_rate": 4.999226141327019e-06, "loss": 0.3027, "step": 1609 }, { "epoch": 0.08, "grad_norm": 0.76264224464993, "learning_rate": 4.9992214157288225e-06, "loss": 0.3292, "step": 1610 }, { "epoch": 0.08, "grad_norm": 0.718124507624867, "learning_rate": 4.999216675748235e-06, "loss": 0.3268, "step": 1611 }, { "epoch": 0.08, "grad_norm": 0.7966639628941184, "learning_rate": 4.999211921385283e-06, "loss": 0.2875, "step": 1612 }, { "epoch": 0.08, "grad_norm": 0.7374922881004943, "learning_rate": 4.999207152639995e-06, "loss": 0.3247, "step": 1613 }, { "epoch": 0.08, "grad_norm": 0.7982808240602456, "learning_rate": 4.999202369512398e-06, "loss": 0.3594, "step": 1614 }, { "epoch": 0.08, "grad_norm": 0.6767326630254633, "learning_rate": 4.9991975720025195e-06, "loss": 0.3101, "step": 1615 }, { "epoch": 0.08, "grad_norm": 0.7523780266203851, "learning_rate": 4.999192760110387e-06, "loss": 0.357, "step": 1616 }, { "epoch": 0.08, "grad_norm": 0.747332102240822, "learning_rate": 4.999187933836028e-06, "loss": 0.3115, "step": 1617 }, { "epoch": 0.08, "grad_norm": 0.7421595288962545, "learning_rate": 4.999183093179472e-06, "loss": 0.3332, "step": 1618 }, { "epoch": 0.08, "grad_norm": 0.7152668717206908, "learning_rate": 4.999178238140744e-06, "loss": 0.322, "step": 1619 }, { "epoch": 0.08, "grad_norm": 0.7610367437194858, "learning_rate": 4.999173368719874e-06, "loss": 0.3222, "step": 1620 }, { "epoch": 0.08, "grad_norm": 0.7477361680979185, "learning_rate": 4.999168484916889e-06, "loss": 0.3118, "step": 1621 }, { "epoch": 0.08, "grad_norm": 0.7850032291318698, "learning_rate": 4.999163586731818e-06, "loss": 0.3441, "step": 1622 }, { "epoch": 0.08, "grad_norm": 0.7735174823279416, "learning_rate": 4.999158674164688e-06, "loss": 0.3193, "step": 1623 }, { "epoch": 0.08, "grad_norm": 0.7013413381568773, "learning_rate": 4.999153747215529e-06, "loss": 0.3101, "step": 1624 }, { "epoch": 0.08, "grad_norm": 0.7214332890281085, "learning_rate": 4.999148805884368e-06, "loss": 0.3121, "step": 1625 }, { "epoch": 0.08, "grad_norm": 0.8038184289519242, "learning_rate": 4.999143850171233e-06, "loss": 0.3484, "step": 1626 }, { "epoch": 0.08, "grad_norm": 0.701630024366284, "learning_rate": 4.999138880076154e-06, "loss": 0.3094, "step": 1627 }, { "epoch": 0.08, "grad_norm": 0.7600781660106994, "learning_rate": 4.999133895599159e-06, "loss": 0.3072, "step": 1628 }, { "epoch": 0.08, "grad_norm": 0.6825130713542163, "learning_rate": 4.999128896740275e-06, "loss": 0.3222, "step": 1629 }, { "epoch": 0.08, "grad_norm": 0.8048960700229559, "learning_rate": 4.999123883499534e-06, "loss": 0.3541, "step": 1630 }, { "epoch": 0.08, "grad_norm": 0.7438402680391148, "learning_rate": 4.999118855876963e-06, "loss": 0.3298, "step": 1631 }, { "epoch": 0.08, "grad_norm": 0.7423778919481935, "learning_rate": 4.999113813872591e-06, "loss": 0.3108, "step": 1632 }, { "epoch": 0.08, "grad_norm": 0.6614086120713557, "learning_rate": 4.999108757486447e-06, "loss": 0.2924, "step": 1633 }, { "epoch": 0.08, "grad_norm": 0.7610039119180538, "learning_rate": 4.99910368671856e-06, "loss": 0.323, "step": 1634 }, { "epoch": 0.08, "grad_norm": 0.7972689346898589, "learning_rate": 4.99909860156896e-06, "loss": 0.3461, "step": 1635 }, { "epoch": 0.08, "grad_norm": 0.778524022555661, "learning_rate": 4.999093502037675e-06, "loss": 0.331, "step": 1636 }, { "epoch": 0.08, "grad_norm": 0.7563034880823084, "learning_rate": 4.999088388124736e-06, "loss": 0.3155, "step": 1637 }, { "epoch": 0.08, "grad_norm": 0.7375071135183848, "learning_rate": 4.999083259830171e-06, "loss": 0.3363, "step": 1638 }, { "epoch": 0.08, "grad_norm": 0.7178744669189682, "learning_rate": 4.9990781171540095e-06, "loss": 0.312, "step": 1639 }, { "epoch": 0.08, "grad_norm": 0.7452885061238755, "learning_rate": 4.999072960096281e-06, "loss": 0.299, "step": 1640 }, { "epoch": 0.08, "grad_norm": 0.8048486837416327, "learning_rate": 4.999067788657017e-06, "loss": 0.3126, "step": 1641 }, { "epoch": 0.08, "grad_norm": 0.7078401627495173, "learning_rate": 4.999062602836246e-06, "loss": 0.2944, "step": 1642 }, { "epoch": 0.08, "grad_norm": 0.7133066584199885, "learning_rate": 4.999057402633997e-06, "loss": 0.3225, "step": 1643 }, { "epoch": 0.08, "grad_norm": 0.7982168545728273, "learning_rate": 4.999052188050301e-06, "loss": 0.3328, "step": 1644 }, { "epoch": 0.08, "grad_norm": 0.7828734967931195, "learning_rate": 4.9990469590851875e-06, "loss": 0.3507, "step": 1645 }, { "epoch": 0.08, "grad_norm": 0.7487712520083271, "learning_rate": 4.999041715738687e-06, "loss": 0.3117, "step": 1646 }, { "epoch": 0.08, "grad_norm": 0.7634019631197171, "learning_rate": 4.99903645801083e-06, "loss": 0.308, "step": 1647 }, { "epoch": 0.08, "grad_norm": 0.7465538390064974, "learning_rate": 4.999031185901646e-06, "loss": 0.3188, "step": 1648 }, { "epoch": 0.08, "grad_norm": 0.7519035176230956, "learning_rate": 4.999025899411166e-06, "loss": 0.3359, "step": 1649 }, { "epoch": 0.08, "grad_norm": 0.7284348981755598, "learning_rate": 4.99902059853942e-06, "loss": 0.3101, "step": 1650 }, { "epoch": 0.08, "grad_norm": 0.7997777546980201, "learning_rate": 4.999015283286438e-06, "loss": 0.3253, "step": 1651 }, { "epoch": 0.08, "grad_norm": 0.7823712572386659, "learning_rate": 4.999009953652252e-06, "loss": 0.3314, "step": 1652 }, { "epoch": 0.08, "grad_norm": 0.7064302203073027, "learning_rate": 4.999004609636891e-06, "loss": 0.3163, "step": 1653 }, { "epoch": 0.08, "grad_norm": 0.7166746561933975, "learning_rate": 4.998999251240387e-06, "loss": 0.3065, "step": 1654 }, { "epoch": 0.08, "grad_norm": 0.6986848236222497, "learning_rate": 4.9989938784627705e-06, "loss": 0.3219, "step": 1655 }, { "epoch": 0.08, "grad_norm": 0.7686442518064631, "learning_rate": 4.998988491304072e-06, "loss": 0.3027, "step": 1656 }, { "epoch": 0.08, "grad_norm": 0.755893272738977, "learning_rate": 4.998983089764323e-06, "loss": 0.2995, "step": 1657 }, { "epoch": 0.08, "grad_norm": 0.7018002227971664, "learning_rate": 4.998977673843554e-06, "loss": 0.3252, "step": 1658 }, { "epoch": 0.08, "grad_norm": 0.7473303608512185, "learning_rate": 4.998972243541797e-06, "loss": 0.3098, "step": 1659 }, { "epoch": 0.08, "grad_norm": 0.7269801935768679, "learning_rate": 4.998966798859083e-06, "loss": 0.3143, "step": 1660 }, { "epoch": 0.08, "grad_norm": 0.6651439520555769, "learning_rate": 4.998961339795443e-06, "loss": 0.3113, "step": 1661 }, { "epoch": 0.08, "grad_norm": 0.7279211377369268, "learning_rate": 4.998955866350908e-06, "loss": 0.3282, "step": 1662 }, { "epoch": 0.08, "grad_norm": 0.8173911671620676, "learning_rate": 4.99895037852551e-06, "loss": 0.3609, "step": 1663 }, { "epoch": 0.08, "grad_norm": 0.7593340599501268, "learning_rate": 4.998944876319282e-06, "loss": 0.3186, "step": 1664 }, { "epoch": 0.08, "grad_norm": 0.7068837359645022, "learning_rate": 4.998939359732253e-06, "loss": 0.3196, "step": 1665 }, { "epoch": 0.08, "grad_norm": 0.682231439675626, "learning_rate": 4.998933828764457e-06, "loss": 0.3101, "step": 1666 }, { "epoch": 0.08, "grad_norm": 0.7521152033836275, "learning_rate": 4.998928283415924e-06, "loss": 0.3275, "step": 1667 }, { "epoch": 0.08, "grad_norm": 0.7693914390148405, "learning_rate": 4.998922723686688e-06, "loss": 0.3314, "step": 1668 }, { "epoch": 0.08, "grad_norm": 0.7366946871456527, "learning_rate": 4.998917149576779e-06, "loss": 0.3171, "step": 1669 }, { "epoch": 0.08, "grad_norm": 0.845667812934547, "learning_rate": 4.99891156108623e-06, "loss": 0.3451, "step": 1670 }, { "epoch": 0.08, "grad_norm": 0.7178654067965274, "learning_rate": 4.998905958215073e-06, "loss": 0.2903, "step": 1671 }, { "epoch": 0.08, "grad_norm": 0.7066536466637067, "learning_rate": 4.99890034096334e-06, "loss": 0.2995, "step": 1672 }, { "epoch": 0.08, "grad_norm": 0.7456870650257225, "learning_rate": 4.998894709331064e-06, "loss": 0.3086, "step": 1673 }, { "epoch": 0.08, "grad_norm": 0.7772952969313207, "learning_rate": 4.998889063318277e-06, "loss": 0.3154, "step": 1674 }, { "epoch": 0.08, "grad_norm": 0.7819113104871929, "learning_rate": 4.998883402925012e-06, "loss": 0.3223, "step": 1675 }, { "epoch": 0.08, "grad_norm": 0.7385555654934371, "learning_rate": 4.998877728151301e-06, "loss": 0.3289, "step": 1676 }, { "epoch": 0.08, "grad_norm": 0.812804597323901, "learning_rate": 4.9988720389971764e-06, "loss": 0.3267, "step": 1677 }, { "epoch": 0.08, "grad_norm": 0.6830459663416636, "learning_rate": 4.998866335462671e-06, "loss": 0.3006, "step": 1678 }, { "epoch": 0.08, "grad_norm": 0.7057038157621082, "learning_rate": 4.998860617547818e-06, "loss": 0.3065, "step": 1679 }, { "epoch": 0.08, "grad_norm": 0.7517042964753557, "learning_rate": 4.998854885252651e-06, "loss": 0.342, "step": 1680 }, { "epoch": 0.08, "grad_norm": 0.7995839884466274, "learning_rate": 4.998849138577201e-06, "loss": 0.3209, "step": 1681 }, { "epoch": 0.08, "grad_norm": 0.8667777013902617, "learning_rate": 4.9988433775215025e-06, "loss": 0.3242, "step": 1682 }, { "epoch": 0.08, "grad_norm": 0.718172060955515, "learning_rate": 4.998837602085589e-06, "loss": 0.3124, "step": 1683 }, { "epoch": 0.08, "grad_norm": 0.6412282870607331, "learning_rate": 4.998831812269493e-06, "loss": 0.2947, "step": 1684 }, { "epoch": 0.08, "grad_norm": 0.7388983029115189, "learning_rate": 4.998826008073248e-06, "loss": 0.3257, "step": 1685 }, { "epoch": 0.08, "grad_norm": 0.7843335896066234, "learning_rate": 4.998820189496887e-06, "loss": 0.3213, "step": 1686 }, { "epoch": 0.08, "grad_norm": 0.7473124221595492, "learning_rate": 4.998814356540445e-06, "loss": 0.3196, "step": 1687 }, { "epoch": 0.08, "grad_norm": 0.698651836867668, "learning_rate": 4.9988085092039526e-06, "loss": 0.2936, "step": 1688 }, { "epoch": 0.08, "grad_norm": 0.6950129573704931, "learning_rate": 4.9988026474874466e-06, "loss": 0.3177, "step": 1689 }, { "epoch": 0.08, "grad_norm": 0.7187400923327444, "learning_rate": 4.998796771390959e-06, "loss": 0.3354, "step": 1690 }, { "epoch": 0.08, "grad_norm": 0.7866352982702991, "learning_rate": 4.998790880914524e-06, "loss": 0.3214, "step": 1691 }, { "epoch": 0.08, "grad_norm": 0.7717076429244231, "learning_rate": 4.998784976058175e-06, "loss": 0.3509, "step": 1692 }, { "epoch": 0.08, "grad_norm": 0.7367159944515886, "learning_rate": 4.998779056821948e-06, "loss": 0.3127, "step": 1693 }, { "epoch": 0.08, "grad_norm": 0.7898077593407784, "learning_rate": 4.998773123205874e-06, "loss": 0.3282, "step": 1694 }, { "epoch": 0.08, "grad_norm": 0.702623986236606, "learning_rate": 4.99876717520999e-06, "loss": 0.3236, "step": 1695 }, { "epoch": 0.08, "grad_norm": 0.7459675042006468, "learning_rate": 4.998761212834328e-06, "loss": 0.3, "step": 1696 }, { "epoch": 0.08, "grad_norm": 0.8257675007262679, "learning_rate": 4.998755236078924e-06, "loss": 0.3285, "step": 1697 }, { "epoch": 0.08, "grad_norm": 0.7583060387154643, "learning_rate": 4.998749244943811e-06, "loss": 0.3237, "step": 1698 }, { "epoch": 0.08, "grad_norm": 0.7081598458468251, "learning_rate": 4.998743239429024e-06, "loss": 0.3392, "step": 1699 }, { "epoch": 0.08, "grad_norm": 0.7348255641070603, "learning_rate": 4.998737219534599e-06, "loss": 0.3175, "step": 1700 }, { "epoch": 0.08, "grad_norm": 0.671216471334538, "learning_rate": 4.998731185260568e-06, "loss": 0.313, "step": 1701 }, { "epoch": 0.08, "grad_norm": 0.7277886541867677, "learning_rate": 4.9987251366069675e-06, "loss": 0.306, "step": 1702 }, { "epoch": 0.08, "grad_norm": 0.6416974430697506, "learning_rate": 4.998719073573832e-06, "loss": 0.286, "step": 1703 }, { "epoch": 0.08, "grad_norm": 0.7179044539112954, "learning_rate": 4.998712996161196e-06, "loss": 0.3231, "step": 1704 }, { "epoch": 0.08, "grad_norm": 0.681990052502104, "learning_rate": 4.998706904369095e-06, "loss": 0.3222, "step": 1705 }, { "epoch": 0.08, "grad_norm": 0.7266064387011337, "learning_rate": 4.9987007981975635e-06, "loss": 0.3098, "step": 1706 }, { "epoch": 0.08, "grad_norm": 0.7001457463251953, "learning_rate": 4.998694677646637e-06, "loss": 0.284, "step": 1707 }, { "epoch": 0.08, "grad_norm": 0.7248509122251477, "learning_rate": 4.99868854271635e-06, "loss": 0.3158, "step": 1708 }, { "epoch": 0.08, "grad_norm": 0.799168287325932, "learning_rate": 4.998682393406739e-06, "loss": 0.3545, "step": 1709 }, { "epoch": 0.08, "grad_norm": 0.7620824972386876, "learning_rate": 4.99867622971784e-06, "loss": 0.3214, "step": 1710 }, { "epoch": 0.08, "grad_norm": 0.7007858773476957, "learning_rate": 4.998670051649686e-06, "loss": 0.3282, "step": 1711 }, { "epoch": 0.08, "grad_norm": 0.7624661827813749, "learning_rate": 4.998663859202314e-06, "loss": 0.345, "step": 1712 }, { "epoch": 0.08, "grad_norm": 0.698826217843581, "learning_rate": 4.99865765237576e-06, "loss": 0.3119, "step": 1713 }, { "epoch": 0.08, "grad_norm": 0.7414525308834549, "learning_rate": 4.9986514311700594e-06, "loss": 0.3161, "step": 1714 }, { "epoch": 0.08, "grad_norm": 0.7546880433207858, "learning_rate": 4.998645195585247e-06, "loss": 0.3189, "step": 1715 }, { "epoch": 0.08, "grad_norm": 0.7087605039097468, "learning_rate": 4.9986389456213605e-06, "loss": 0.3293, "step": 1716 }, { "epoch": 0.08, "grad_norm": 0.6806453547338477, "learning_rate": 4.9986326812784334e-06, "loss": 0.301, "step": 1717 }, { "epoch": 0.08, "grad_norm": 0.8185632363733483, "learning_rate": 4.998626402556505e-06, "loss": 0.3331, "step": 1718 }, { "epoch": 0.08, "grad_norm": 0.7521392200791931, "learning_rate": 4.998620109455608e-06, "loss": 0.3104, "step": 1719 }, { "epoch": 0.08, "grad_norm": 0.7303717397137349, "learning_rate": 4.998613801975781e-06, "loss": 0.3243, "step": 1720 }, { "epoch": 0.08, "grad_norm": 0.6730159394452321, "learning_rate": 4.99860748011706e-06, "loss": 0.2941, "step": 1721 }, { "epoch": 0.08, "grad_norm": 0.7419018851209224, "learning_rate": 4.9986011438794806e-06, "loss": 0.3058, "step": 1722 }, { "epoch": 0.08, "grad_norm": 0.8771733096100495, "learning_rate": 4.99859479326308e-06, "loss": 0.3333, "step": 1723 }, { "epoch": 0.08, "grad_norm": 0.7110613093535557, "learning_rate": 4.9985884282678935e-06, "loss": 0.3292, "step": 1724 }, { "epoch": 0.08, "grad_norm": 0.7620087127352067, "learning_rate": 4.9985820488939586e-06, "loss": 0.3137, "step": 1725 }, { "epoch": 0.08, "grad_norm": 0.8161361739283891, "learning_rate": 4.998575655141312e-06, "loss": 0.3184, "step": 1726 }, { "epoch": 0.08, "grad_norm": 0.8586064027191836, "learning_rate": 4.998569247009991e-06, "loss": 0.3545, "step": 1727 }, { "epoch": 0.08, "grad_norm": 0.8102411244562121, "learning_rate": 4.998562824500032e-06, "loss": 0.3515, "step": 1728 }, { "epoch": 0.08, "grad_norm": 0.7897331926965182, "learning_rate": 4.998556387611472e-06, "loss": 0.3262, "step": 1729 }, { "epoch": 0.08, "grad_norm": 0.7827400563794741, "learning_rate": 4.998549936344348e-06, "loss": 0.3297, "step": 1730 }, { "epoch": 0.08, "grad_norm": 0.7545982491087264, "learning_rate": 4.998543470698697e-06, "loss": 0.3266, "step": 1731 }, { "epoch": 0.08, "grad_norm": 0.7003527032758037, "learning_rate": 4.998536990674556e-06, "loss": 0.3189, "step": 1732 }, { "epoch": 0.08, "grad_norm": 0.697372252464276, "learning_rate": 4.998530496271963e-06, "loss": 0.3155, "step": 1733 }, { "epoch": 0.08, "grad_norm": 0.6960140155835915, "learning_rate": 4.998523987490955e-06, "loss": 0.3237, "step": 1734 }, { "epoch": 0.08, "grad_norm": 0.6686493589477254, "learning_rate": 4.998517464331569e-06, "loss": 0.289, "step": 1735 }, { "epoch": 0.08, "grad_norm": 0.7414978661150851, "learning_rate": 4.998510926793844e-06, "loss": 0.3329, "step": 1736 }, { "epoch": 0.08, "grad_norm": 0.7673726921119707, "learning_rate": 4.998504374877815e-06, "loss": 0.3223, "step": 1737 }, { "epoch": 0.08, "grad_norm": 0.6799599923184064, "learning_rate": 4.998497808583522e-06, "loss": 0.2776, "step": 1738 }, { "epoch": 0.08, "grad_norm": 0.6879388426320046, "learning_rate": 4.998491227911002e-06, "loss": 0.3033, "step": 1739 }, { "epoch": 0.08, "grad_norm": 0.7548048546962131, "learning_rate": 4.998484632860293e-06, "loss": 0.3172, "step": 1740 }, { "epoch": 0.08, "grad_norm": 0.713617913934807, "learning_rate": 4.998478023431434e-06, "loss": 0.3085, "step": 1741 }, { "epoch": 0.08, "grad_norm": 0.7690454012480247, "learning_rate": 4.99847139962446e-06, "loss": 0.298, "step": 1742 }, { "epoch": 0.08, "grad_norm": 0.6950314898943085, "learning_rate": 4.998464761439413e-06, "loss": 0.3043, "step": 1743 }, { "epoch": 0.08, "grad_norm": 0.7075000971046799, "learning_rate": 4.998458108876328e-06, "loss": 0.3103, "step": 1744 }, { "epoch": 0.08, "grad_norm": 0.7798423173993134, "learning_rate": 4.998451441935245e-06, "loss": 0.344, "step": 1745 }, { "epoch": 0.08, "grad_norm": 0.7452003721000116, "learning_rate": 4.998444760616201e-06, "loss": 0.3252, "step": 1746 }, { "epoch": 0.08, "grad_norm": 0.6679886897551953, "learning_rate": 4.998438064919237e-06, "loss": 0.2998, "step": 1747 }, { "epoch": 0.08, "grad_norm": 0.7206542598810673, "learning_rate": 4.998431354844389e-06, "loss": 0.3031, "step": 1748 }, { "epoch": 0.08, "grad_norm": 0.7509049192178332, "learning_rate": 4.998424630391696e-06, "loss": 0.3183, "step": 1749 }, { "epoch": 0.08, "grad_norm": 0.7546458022981962, "learning_rate": 4.998417891561198e-06, "loss": 0.3222, "step": 1750 }, { "epoch": 0.08, "grad_norm": 0.6855404395700321, "learning_rate": 4.998411138352933e-06, "loss": 0.3101, "step": 1751 }, { "epoch": 0.08, "grad_norm": 0.7238018142407787, "learning_rate": 4.998404370766939e-06, "loss": 0.3308, "step": 1752 }, { "epoch": 0.08, "grad_norm": 0.7056564201596913, "learning_rate": 4.998397588803256e-06, "loss": 0.3107, "step": 1753 }, { "epoch": 0.08, "grad_norm": 0.7632406240763014, "learning_rate": 4.998390792461924e-06, "loss": 0.3327, "step": 1754 }, { "epoch": 0.08, "grad_norm": 0.7295120925574667, "learning_rate": 4.9983839817429806e-06, "loss": 0.3368, "step": 1755 }, { "epoch": 0.08, "grad_norm": 0.7802718964125281, "learning_rate": 4.998377156646465e-06, "loss": 0.3168, "step": 1756 }, { "epoch": 0.08, "grad_norm": 0.7057602056148283, "learning_rate": 4.998370317172416e-06, "loss": 0.3361, "step": 1757 }, { "epoch": 0.08, "grad_norm": 0.7434180035006607, "learning_rate": 4.998363463320874e-06, "loss": 0.3106, "step": 1758 }, { "epoch": 0.08, "grad_norm": 0.8954340677926301, "learning_rate": 4.99835659509188e-06, "loss": 0.3356, "step": 1759 }, { "epoch": 0.08, "grad_norm": 0.7154793462938862, "learning_rate": 4.998349712485469e-06, "loss": 0.309, "step": 1760 }, { "epoch": 0.08, "grad_norm": 0.7480020898829135, "learning_rate": 4.998342815501686e-06, "loss": 0.3204, "step": 1761 }, { "epoch": 0.08, "grad_norm": 0.7163655607771257, "learning_rate": 4.998335904140567e-06, "loss": 0.3272, "step": 1762 }, { "epoch": 0.08, "grad_norm": 0.7759190883810828, "learning_rate": 4.998328978402152e-06, "loss": 0.337, "step": 1763 }, { "epoch": 0.08, "grad_norm": 0.7267166675328236, "learning_rate": 4.9983220382864815e-06, "loss": 0.2968, "step": 1764 }, { "epoch": 0.08, "grad_norm": 0.7808124316361885, "learning_rate": 4.998315083793597e-06, "loss": 0.3239, "step": 1765 }, { "epoch": 0.08, "grad_norm": 0.7935108783025467, "learning_rate": 4.998308114923537e-06, "loss": 0.3259, "step": 1766 }, { "epoch": 0.08, "grad_norm": 0.7987650415729342, "learning_rate": 4.998301131676341e-06, "loss": 0.3263, "step": 1767 }, { "epoch": 0.08, "grad_norm": 0.72807329199965, "learning_rate": 4.9982941340520495e-06, "loss": 0.3333, "step": 1768 }, { "epoch": 0.08, "grad_norm": 0.7323147289935289, "learning_rate": 4.998287122050704e-06, "loss": 0.3261, "step": 1769 }, { "epoch": 0.08, "grad_norm": 0.7242895678071853, "learning_rate": 4.998280095672344e-06, "loss": 0.3193, "step": 1770 }, { "epoch": 0.08, "grad_norm": 0.7098091601813346, "learning_rate": 4.99827305491701e-06, "loss": 0.3403, "step": 1771 }, { "epoch": 0.08, "grad_norm": 0.7367894092366094, "learning_rate": 4.998265999784741e-06, "loss": 0.3252, "step": 1772 }, { "epoch": 0.08, "grad_norm": 0.7291537995218673, "learning_rate": 4.998258930275581e-06, "loss": 0.3172, "step": 1773 }, { "epoch": 0.08, "grad_norm": 0.6870567079778375, "learning_rate": 4.998251846389568e-06, "loss": 0.3322, "step": 1774 }, { "epoch": 0.08, "grad_norm": 0.7474369591725489, "learning_rate": 4.998244748126744e-06, "loss": 0.298, "step": 1775 }, { "epoch": 0.08, "grad_norm": 0.7830366462232454, "learning_rate": 4.998237635487149e-06, "loss": 0.3519, "step": 1776 }, { "epoch": 0.08, "grad_norm": 0.76818945844856, "learning_rate": 4.998230508470824e-06, "loss": 0.3322, "step": 1777 }, { "epoch": 0.08, "grad_norm": 0.7109054411232639, "learning_rate": 4.9982233670778114e-06, "loss": 0.3274, "step": 1778 }, { "epoch": 0.08, "grad_norm": 0.6986536477620593, "learning_rate": 4.99821621130815e-06, "loss": 0.3236, "step": 1779 }, { "epoch": 0.08, "grad_norm": 0.7155236486233788, "learning_rate": 4.998209041161883e-06, "loss": 0.3102, "step": 1780 }, { "epoch": 0.08, "grad_norm": 0.6801665064335161, "learning_rate": 4.998201856639051e-06, "loss": 0.3179, "step": 1781 }, { "epoch": 0.08, "grad_norm": 0.7699799523838893, "learning_rate": 4.998194657739695e-06, "loss": 0.3387, "step": 1782 }, { "epoch": 0.08, "grad_norm": 0.7606229931649146, "learning_rate": 4.998187444463856e-06, "loss": 0.328, "step": 1783 }, { "epoch": 0.08, "grad_norm": 0.7139241231626434, "learning_rate": 4.998180216811576e-06, "loss": 0.3249, "step": 1784 }, { "epoch": 0.08, "grad_norm": 0.7159153957303732, "learning_rate": 4.998172974782898e-06, "loss": 0.32, "step": 1785 }, { "epoch": 0.08, "grad_norm": 0.7088580650004593, "learning_rate": 4.998165718377862e-06, "loss": 0.3247, "step": 1786 }, { "epoch": 0.08, "grad_norm": 0.7077902237615663, "learning_rate": 4.99815844759651e-06, "loss": 0.2995, "step": 1787 }, { "epoch": 0.08, "grad_norm": 0.7324174537367453, "learning_rate": 4.998151162438884e-06, "loss": 0.3106, "step": 1788 }, { "epoch": 0.08, "grad_norm": 0.7263458637467279, "learning_rate": 4.998143862905025e-06, "loss": 0.3213, "step": 1789 }, { "epoch": 0.08, "grad_norm": 0.7718814672642494, "learning_rate": 4.998136548994977e-06, "loss": 0.3161, "step": 1790 }, { "epoch": 0.08, "grad_norm": 0.6959745622776874, "learning_rate": 4.998129220708781e-06, "loss": 0.318, "step": 1791 }, { "epoch": 0.08, "grad_norm": 0.698710157275822, "learning_rate": 4.998121878046479e-06, "loss": 0.3104, "step": 1792 }, { "epoch": 0.08, "grad_norm": 0.7351381021236111, "learning_rate": 4.998114521008114e-06, "loss": 0.3315, "step": 1793 }, { "epoch": 0.08, "grad_norm": 0.7466049061178303, "learning_rate": 4.998107149593727e-06, "loss": 0.3333, "step": 1794 }, { "epoch": 0.08, "grad_norm": 0.6928736768465505, "learning_rate": 4.998099763803362e-06, "loss": 0.3115, "step": 1795 }, { "epoch": 0.08, "grad_norm": 0.6958381477494567, "learning_rate": 4.99809236363706e-06, "loss": 0.3035, "step": 1796 }, { "epoch": 0.08, "grad_norm": 0.7602021574577486, "learning_rate": 4.998084949094864e-06, "loss": 0.3259, "step": 1797 }, { "epoch": 0.08, "grad_norm": 0.6927924823999699, "learning_rate": 4.998077520176818e-06, "loss": 0.3103, "step": 1798 }, { "epoch": 0.08, "grad_norm": 0.7498158760171405, "learning_rate": 4.998070076882964e-06, "loss": 0.336, "step": 1799 }, { "epoch": 0.08, "grad_norm": 0.7453694620699397, "learning_rate": 4.998062619213344e-06, "loss": 0.326, "step": 1800 }, { "epoch": 0.08, "grad_norm": 0.8068477086252633, "learning_rate": 4.998055147168002e-06, "loss": 0.3258, "step": 1801 }, { "epoch": 0.08, "grad_norm": 0.6955121679811483, "learning_rate": 4.99804766074698e-06, "loss": 0.3047, "step": 1802 }, { "epoch": 0.08, "grad_norm": 0.7262856926709291, "learning_rate": 4.998040159950322e-06, "loss": 0.3329, "step": 1803 }, { "epoch": 0.08, "grad_norm": 0.6959419974806004, "learning_rate": 4.998032644778069e-06, "loss": 0.3158, "step": 1804 }, { "epoch": 0.08, "grad_norm": 0.7187913266130985, "learning_rate": 4.998025115230268e-06, "loss": 0.315, "step": 1805 }, { "epoch": 0.08, "grad_norm": 0.7366374624629235, "learning_rate": 4.998017571306959e-06, "loss": 0.2924, "step": 1806 }, { "epoch": 0.08, "grad_norm": 0.7780051659615007, "learning_rate": 4.9980100130081875e-06, "loss": 0.322, "step": 1807 }, { "epoch": 0.08, "grad_norm": 0.735962223319754, "learning_rate": 4.998002440333996e-06, "loss": 0.3041, "step": 1808 }, { "epoch": 0.08, "grad_norm": 0.6795134987475824, "learning_rate": 4.997994853284429e-06, "loss": 0.3063, "step": 1809 }, { "epoch": 0.08, "grad_norm": 0.6826684435710839, "learning_rate": 4.997987251859528e-06, "loss": 0.3128, "step": 1810 }, { "epoch": 0.08, "grad_norm": 0.6823867138381928, "learning_rate": 4.997979636059339e-06, "loss": 0.3259, "step": 1811 }, { "epoch": 0.08, "grad_norm": 0.7187396126467649, "learning_rate": 4.997972005883905e-06, "loss": 0.3303, "step": 1812 }, { "epoch": 0.08, "grad_norm": 0.7243086563802056, "learning_rate": 4.997964361333271e-06, "loss": 0.3233, "step": 1813 }, { "epoch": 0.08, "grad_norm": 0.7026059949246819, "learning_rate": 4.997956702407478e-06, "loss": 0.3241, "step": 1814 }, { "epoch": 0.09, "grad_norm": 0.7479737242754531, "learning_rate": 4.997949029106574e-06, "loss": 0.3425, "step": 1815 }, { "epoch": 0.09, "grad_norm": 0.8071354633995583, "learning_rate": 4.997941341430599e-06, "loss": 0.3341, "step": 1816 }, { "epoch": 0.09, "grad_norm": 0.7533007935571685, "learning_rate": 4.997933639379601e-06, "loss": 0.3236, "step": 1817 }, { "epoch": 0.09, "grad_norm": 0.7471213322037267, "learning_rate": 4.9979259229536226e-06, "loss": 0.3031, "step": 1818 }, { "epoch": 0.09, "grad_norm": 0.7469831232903273, "learning_rate": 4.997918192152707e-06, "loss": 0.3242, "step": 1819 }, { "epoch": 0.09, "grad_norm": 0.6691807061984908, "learning_rate": 4.997910446976902e-06, "loss": 0.3039, "step": 1820 }, { "epoch": 0.09, "grad_norm": 0.6715047763051905, "learning_rate": 4.997902687426249e-06, "loss": 0.2996, "step": 1821 }, { "epoch": 0.09, "grad_norm": 0.7139486870455246, "learning_rate": 4.997894913500794e-06, "loss": 0.3095, "step": 1822 }, { "epoch": 0.09, "grad_norm": 0.7963261280517057, "learning_rate": 4.997887125200582e-06, "loss": 0.3223, "step": 1823 }, { "epoch": 0.09, "grad_norm": 0.7668077363746179, "learning_rate": 4.997879322525657e-06, "loss": 0.3142, "step": 1824 }, { "epoch": 0.09, "grad_norm": 0.7043956208805083, "learning_rate": 4.997871505476064e-06, "loss": 0.3076, "step": 1825 }, { "epoch": 0.09, "grad_norm": 0.6809975385236566, "learning_rate": 4.99786367405185e-06, "loss": 0.3101, "step": 1826 }, { "epoch": 0.09, "grad_norm": 0.778689259579579, "learning_rate": 4.9978558282530565e-06, "loss": 0.3115, "step": 1827 }, { "epoch": 0.09, "grad_norm": 0.6838314546241825, "learning_rate": 4.9978479680797305e-06, "loss": 0.3072, "step": 1828 }, { "epoch": 0.09, "grad_norm": 0.7190708957130343, "learning_rate": 4.997840093531918e-06, "loss": 0.3159, "step": 1829 }, { "epoch": 0.09, "grad_norm": 0.7296557020481241, "learning_rate": 4.997832204609664e-06, "loss": 0.3293, "step": 1830 }, { "epoch": 0.09, "grad_norm": 0.7415202359639985, "learning_rate": 4.997824301313013e-06, "loss": 0.3177, "step": 1831 }, { "epoch": 0.09, "grad_norm": 0.6715278100003642, "learning_rate": 4.997816383642012e-06, "loss": 0.2917, "step": 1832 }, { "epoch": 0.09, "grad_norm": 0.7219307230041185, "learning_rate": 4.9978084515967035e-06, "loss": 0.3166, "step": 1833 }, { "epoch": 0.09, "grad_norm": 0.7188266363425205, "learning_rate": 4.997800505177136e-06, "loss": 0.3166, "step": 1834 }, { "epoch": 0.09, "grad_norm": 0.7775991769574809, "learning_rate": 4.997792544383354e-06, "loss": 0.3458, "step": 1835 }, { "epoch": 0.09, "grad_norm": 0.7098277819200154, "learning_rate": 4.997784569215405e-06, "loss": 0.3208, "step": 1836 }, { "epoch": 0.09, "grad_norm": 0.7039087094141674, "learning_rate": 4.997776579673332e-06, "loss": 0.3056, "step": 1837 }, { "epoch": 0.09, "grad_norm": 0.6895476967375671, "learning_rate": 4.997768575757184e-06, "loss": 0.3229, "step": 1838 }, { "epoch": 0.09, "grad_norm": 0.7282488955405598, "learning_rate": 4.997760557467005e-06, "loss": 0.3411, "step": 1839 }, { "epoch": 0.09, "grad_norm": 0.7111674495871648, "learning_rate": 4.9977525248028415e-06, "loss": 0.293, "step": 1840 }, { "epoch": 0.09, "grad_norm": 0.7001478017670746, "learning_rate": 4.99774447776474e-06, "loss": 0.3185, "step": 1841 }, { "epoch": 0.09, "grad_norm": 0.7386876078647513, "learning_rate": 4.997736416352747e-06, "loss": 0.3294, "step": 1842 }, { "epoch": 0.09, "grad_norm": 0.7131410596494243, "learning_rate": 4.997728340566909e-06, "loss": 0.3088, "step": 1843 }, { "epoch": 0.09, "grad_norm": 0.7554464345794174, "learning_rate": 4.997720250407272e-06, "loss": 0.3232, "step": 1844 }, { "epoch": 0.09, "grad_norm": 0.7118736026163497, "learning_rate": 4.997712145873883e-06, "loss": 0.3355, "step": 1845 }, { "epoch": 0.09, "grad_norm": 0.6931751775850544, "learning_rate": 4.997704026966788e-06, "loss": 0.3041, "step": 1846 }, { "epoch": 0.09, "grad_norm": 0.7076553211120602, "learning_rate": 4.997695893686034e-06, "loss": 0.3203, "step": 1847 }, { "epoch": 0.09, "grad_norm": 0.743248104282541, "learning_rate": 4.997687746031669e-06, "loss": 0.3065, "step": 1848 }, { "epoch": 0.09, "grad_norm": 0.6857642940206522, "learning_rate": 4.997679584003737e-06, "loss": 0.2877, "step": 1849 }, { "epoch": 0.09, "grad_norm": 0.7267861614460097, "learning_rate": 4.9976714076022885e-06, "loss": 0.3027, "step": 1850 }, { "epoch": 0.09, "grad_norm": 0.6699607370263757, "learning_rate": 4.997663216827367e-06, "loss": 0.3041, "step": 1851 }, { "epoch": 0.09, "grad_norm": 0.6911567970506886, "learning_rate": 4.997655011679022e-06, "loss": 0.3187, "step": 1852 }, { "epoch": 0.09, "grad_norm": 0.7088719665933794, "learning_rate": 4.997646792157301e-06, "loss": 0.3051, "step": 1853 }, { "epoch": 0.09, "grad_norm": 0.6639722550318254, "learning_rate": 4.9976385582622496e-06, "loss": 0.3168, "step": 1854 }, { "epoch": 0.09, "grad_norm": 0.6526392683521721, "learning_rate": 4.997630309993917e-06, "loss": 0.2916, "step": 1855 }, { "epoch": 0.09, "grad_norm": 0.6822839071237815, "learning_rate": 4.99762204735235e-06, "loss": 0.3091, "step": 1856 }, { "epoch": 0.09, "grad_norm": 0.7366819568090066, "learning_rate": 4.997613770337595e-06, "loss": 0.3192, "step": 1857 }, { "epoch": 0.09, "grad_norm": 0.7033342995649723, "learning_rate": 4.9976054789497e-06, "loss": 0.3006, "step": 1858 }, { "epoch": 0.09, "grad_norm": 0.7056612060017199, "learning_rate": 4.997597173188714e-06, "loss": 0.3359, "step": 1859 }, { "epoch": 0.09, "grad_norm": 0.7401241781969261, "learning_rate": 4.997588853054683e-06, "loss": 0.3362, "step": 1860 }, { "epoch": 0.09, "grad_norm": 0.7269733388695065, "learning_rate": 4.997580518547658e-06, "loss": 0.3323, "step": 1861 }, { "epoch": 0.09, "grad_norm": 0.7413527685377155, "learning_rate": 4.9975721696676834e-06, "loss": 0.3459, "step": 1862 }, { "epoch": 0.09, "grad_norm": 0.681558143450639, "learning_rate": 4.9975638064148086e-06, "loss": 0.3082, "step": 1863 }, { "epoch": 0.09, "grad_norm": 0.6930896963455039, "learning_rate": 4.997555428789082e-06, "loss": 0.3294, "step": 1864 }, { "epoch": 0.09, "grad_norm": 0.6606396190758275, "learning_rate": 4.997547036790552e-06, "loss": 0.3255, "step": 1865 }, { "epoch": 0.09, "grad_norm": 0.7297307947405649, "learning_rate": 4.997538630419267e-06, "loss": 0.333, "step": 1866 }, { "epoch": 0.09, "grad_norm": 0.7329292581732605, "learning_rate": 4.997530209675273e-06, "loss": 0.3385, "step": 1867 }, { "epoch": 0.09, "grad_norm": 0.7424093867254915, "learning_rate": 4.997521774558622e-06, "loss": 0.3108, "step": 1868 }, { "epoch": 0.09, "grad_norm": 0.7188352838783755, "learning_rate": 4.997513325069361e-06, "loss": 0.3055, "step": 1869 }, { "epoch": 0.09, "grad_norm": 0.666567765815909, "learning_rate": 4.997504861207538e-06, "loss": 0.3111, "step": 1870 }, { "epoch": 0.09, "grad_norm": 0.8232753568530948, "learning_rate": 4.997496382973202e-06, "loss": 0.3287, "step": 1871 }, { "epoch": 0.09, "grad_norm": 0.751013891356403, "learning_rate": 4.997487890366403e-06, "loss": 0.3426, "step": 1872 }, { "epoch": 0.09, "grad_norm": 0.719654054311617, "learning_rate": 4.9974793833871875e-06, "loss": 0.3327, "step": 1873 }, { "epoch": 0.09, "grad_norm": 0.7548392395678372, "learning_rate": 4.997470862035606e-06, "loss": 0.3453, "step": 1874 }, { "epoch": 0.09, "grad_norm": 0.7068259307911579, "learning_rate": 4.997462326311708e-06, "loss": 0.309, "step": 1875 }, { "epoch": 0.09, "grad_norm": 0.6890734110093145, "learning_rate": 4.997453776215543e-06, "loss": 0.3003, "step": 1876 }, { "epoch": 0.09, "grad_norm": 0.7353684379308423, "learning_rate": 4.997445211747158e-06, "loss": 0.3294, "step": 1877 }, { "epoch": 0.09, "grad_norm": 0.75396330749522, "learning_rate": 4.997436632906604e-06, "loss": 0.3164, "step": 1878 }, { "epoch": 0.09, "grad_norm": 0.7146925613272879, "learning_rate": 4.99742803969393e-06, "loss": 0.359, "step": 1879 }, { "epoch": 0.09, "grad_norm": 0.6837506254526113, "learning_rate": 4.997419432109184e-06, "loss": 0.3171, "step": 1880 }, { "epoch": 0.09, "grad_norm": 0.7036093907929248, "learning_rate": 4.997410810152419e-06, "loss": 0.3332, "step": 1881 }, { "epoch": 0.09, "grad_norm": 0.6909854235356081, "learning_rate": 4.997402173823681e-06, "loss": 0.3078, "step": 1882 }, { "epoch": 0.09, "grad_norm": 0.7164450520194366, "learning_rate": 4.9973935231230215e-06, "loss": 0.3209, "step": 1883 }, { "epoch": 0.09, "grad_norm": 0.6915842235057327, "learning_rate": 4.9973848580504905e-06, "loss": 0.3357, "step": 1884 }, { "epoch": 0.09, "grad_norm": 0.6767201626593479, "learning_rate": 4.9973761786061355e-06, "loss": 0.3137, "step": 1885 }, { "epoch": 0.09, "grad_norm": 0.6826660752961203, "learning_rate": 4.997367484790011e-06, "loss": 0.2857, "step": 1886 }, { "epoch": 0.09, "grad_norm": 0.6783039996557115, "learning_rate": 4.997358776602163e-06, "loss": 0.3163, "step": 1887 }, { "epoch": 0.09, "grad_norm": 0.7687018581489734, "learning_rate": 4.997350054042642e-06, "loss": 0.3214, "step": 1888 }, { "epoch": 0.09, "grad_norm": 0.7212583900936821, "learning_rate": 4.9973413171115e-06, "loss": 0.3275, "step": 1889 }, { "epoch": 0.09, "grad_norm": 0.7184324707098966, "learning_rate": 4.997332565808787e-06, "loss": 0.3223, "step": 1890 }, { "epoch": 0.09, "grad_norm": 0.7663605858286447, "learning_rate": 4.997323800134552e-06, "loss": 0.3443, "step": 1891 }, { "epoch": 0.09, "grad_norm": 0.7635449157288412, "learning_rate": 4.997315020088846e-06, "loss": 0.3441, "step": 1892 }, { "epoch": 0.09, "grad_norm": 0.6837677725488568, "learning_rate": 4.9973062256717204e-06, "loss": 0.3307, "step": 1893 }, { "epoch": 0.09, "grad_norm": 0.6599091374386534, "learning_rate": 4.997297416883225e-06, "loss": 0.31, "step": 1894 }, { "epoch": 0.09, "grad_norm": 0.6688961670672441, "learning_rate": 4.99728859372341e-06, "loss": 0.3165, "step": 1895 }, { "epoch": 0.09, "grad_norm": 0.7408658442788287, "learning_rate": 4.997279756192328e-06, "loss": 0.3239, "step": 1896 }, { "epoch": 0.09, "grad_norm": 0.7298599595339883, "learning_rate": 4.997270904290028e-06, "loss": 0.3266, "step": 1897 }, { "epoch": 0.09, "grad_norm": 0.7288308767288255, "learning_rate": 4.997262038016561e-06, "loss": 0.3129, "step": 1898 }, { "epoch": 0.09, "grad_norm": 0.6771704459219822, "learning_rate": 4.997253157371979e-06, "loss": 0.2913, "step": 1899 }, { "epoch": 0.09, "grad_norm": 0.6963374331826315, "learning_rate": 4.997244262356333e-06, "loss": 0.3022, "step": 1900 }, { "epoch": 0.09, "grad_norm": 0.7836837735572619, "learning_rate": 4.9972353529696745e-06, "loss": 0.3424, "step": 1901 }, { "epoch": 0.09, "grad_norm": 0.6990091642582053, "learning_rate": 4.997226429212054e-06, "loss": 0.3152, "step": 1902 }, { "epoch": 0.09, "grad_norm": 0.7831222265900695, "learning_rate": 4.997217491083523e-06, "loss": 0.3257, "step": 1903 }, { "epoch": 0.09, "grad_norm": 0.8036304138051275, "learning_rate": 4.997208538584132e-06, "loss": 0.3144, "step": 1904 }, { "epoch": 0.09, "grad_norm": 0.7251270047858688, "learning_rate": 4.997199571713934e-06, "loss": 0.3186, "step": 1905 }, { "epoch": 0.09, "grad_norm": 0.7116158139412175, "learning_rate": 4.997190590472981e-06, "loss": 0.3196, "step": 1906 }, { "epoch": 0.09, "grad_norm": 0.7201846583418852, "learning_rate": 4.9971815948613225e-06, "loss": 0.3311, "step": 1907 }, { "epoch": 0.09, "grad_norm": 0.7997702696957747, "learning_rate": 4.997172584879012e-06, "loss": 0.3325, "step": 1908 }, { "epoch": 0.09, "grad_norm": 0.7668203809437942, "learning_rate": 4.997163560526102e-06, "loss": 0.3431, "step": 1909 }, { "epoch": 0.09, "grad_norm": 0.7158109589347311, "learning_rate": 4.9971545218026415e-06, "loss": 0.3228, "step": 1910 }, { "epoch": 0.09, "grad_norm": 0.7932227280705801, "learning_rate": 4.9971454687086855e-06, "loss": 0.3225, "step": 1911 }, { "epoch": 0.09, "grad_norm": 0.735527862306773, "learning_rate": 4.9971364012442845e-06, "loss": 0.3296, "step": 1912 }, { "epoch": 0.09, "grad_norm": 0.7158975375635732, "learning_rate": 4.997127319409492e-06, "loss": 0.3217, "step": 1913 }, { "epoch": 0.09, "grad_norm": 0.7323482712540782, "learning_rate": 4.997118223204358e-06, "loss": 0.3233, "step": 1914 }, { "epoch": 0.09, "grad_norm": 0.6880820913560425, "learning_rate": 4.997109112628937e-06, "loss": 0.2948, "step": 1915 }, { "epoch": 0.09, "grad_norm": 0.7837134603822978, "learning_rate": 4.997099987683281e-06, "loss": 0.3248, "step": 1916 }, { "epoch": 0.09, "grad_norm": 0.7071113985732169, "learning_rate": 4.997090848367442e-06, "loss": 0.3076, "step": 1917 }, { "epoch": 0.09, "grad_norm": 0.6593355548184715, "learning_rate": 4.997081694681473e-06, "loss": 0.3171, "step": 1918 }, { "epoch": 0.09, "grad_norm": 0.6876502834785655, "learning_rate": 4.997072526625426e-06, "loss": 0.2911, "step": 1919 }, { "epoch": 0.09, "grad_norm": 0.8331073934271395, "learning_rate": 4.997063344199354e-06, "loss": 0.3339, "step": 1920 }, { "epoch": 0.09, "grad_norm": 0.7133722159310094, "learning_rate": 4.997054147403311e-06, "loss": 0.3172, "step": 1921 }, { "epoch": 0.09, "grad_norm": 0.6488035182272132, "learning_rate": 4.997044936237349e-06, "loss": 0.2892, "step": 1922 }, { "epoch": 0.09, "grad_norm": 0.7490176843677582, "learning_rate": 4.99703571070152e-06, "loss": 0.3035, "step": 1923 }, { "epoch": 0.09, "grad_norm": 0.7353834138289501, "learning_rate": 4.997026470795878e-06, "loss": 0.3317, "step": 1924 }, { "epoch": 0.09, "grad_norm": 0.7761396775437668, "learning_rate": 4.997017216520477e-06, "loss": 0.3157, "step": 1925 }, { "epoch": 0.09, "grad_norm": 0.6953386038853723, "learning_rate": 4.9970079478753695e-06, "loss": 0.3104, "step": 1926 }, { "epoch": 0.09, "grad_norm": 0.6747279018109393, "learning_rate": 4.996998664860608e-06, "loss": 0.3084, "step": 1927 }, { "epoch": 0.09, "grad_norm": 0.7223050296761307, "learning_rate": 4.996989367476248e-06, "loss": 0.3129, "step": 1928 }, { "epoch": 0.09, "grad_norm": 0.7037064353240281, "learning_rate": 4.996980055722341e-06, "loss": 0.3059, "step": 1929 }, { "epoch": 0.09, "grad_norm": 0.7104164777301738, "learning_rate": 4.996970729598942e-06, "loss": 0.3166, "step": 1930 }, { "epoch": 0.09, "grad_norm": 0.6909175558052765, "learning_rate": 4.996961389106104e-06, "loss": 0.3187, "step": 1931 }, { "epoch": 0.09, "grad_norm": 0.7254390781246494, "learning_rate": 4.99695203424388e-06, "loss": 0.3037, "step": 1932 }, { "epoch": 0.09, "grad_norm": 0.7279276591343629, "learning_rate": 4.996942665012326e-06, "loss": 0.3166, "step": 1933 }, { "epoch": 0.09, "grad_norm": 0.7594721863395164, "learning_rate": 4.996933281411492e-06, "loss": 0.3153, "step": 1934 }, { "epoch": 0.09, "grad_norm": 0.6780087296646461, "learning_rate": 4.996923883441437e-06, "loss": 0.3045, "step": 1935 }, { "epoch": 0.09, "grad_norm": 0.6821646650173121, "learning_rate": 4.9969144711022115e-06, "loss": 0.3107, "step": 1936 }, { "epoch": 0.09, "grad_norm": 0.8667170497564675, "learning_rate": 4.996905044393872e-06, "loss": 0.3208, "step": 1937 }, { "epoch": 0.09, "grad_norm": 0.7752206510634256, "learning_rate": 4.99689560331647e-06, "loss": 0.2991, "step": 1938 }, { "epoch": 0.09, "grad_norm": 0.6663394309335908, "learning_rate": 4.996886147870062e-06, "loss": 0.2809, "step": 1939 }, { "epoch": 0.09, "grad_norm": 0.68910794070722, "learning_rate": 4.996876678054702e-06, "loss": 0.316, "step": 1940 }, { "epoch": 0.09, "grad_norm": 0.7448187981262238, "learning_rate": 4.9968671938704435e-06, "loss": 0.3179, "step": 1941 }, { "epoch": 0.09, "grad_norm": 0.7997261717133721, "learning_rate": 4.996857695317343e-06, "loss": 0.3318, "step": 1942 }, { "epoch": 0.09, "grad_norm": 0.7277065429822372, "learning_rate": 4.996848182395453e-06, "loss": 0.3313, "step": 1943 }, { "epoch": 0.09, "grad_norm": 0.6995087059659084, "learning_rate": 4.99683865510483e-06, "loss": 0.2915, "step": 1944 }, { "epoch": 0.09, "grad_norm": 0.7500194247164272, "learning_rate": 4.9968291134455285e-06, "loss": 0.3382, "step": 1945 }, { "epoch": 0.09, "grad_norm": 0.8043447545238351, "learning_rate": 4.9968195574176015e-06, "loss": 0.3301, "step": 1946 }, { "epoch": 0.09, "grad_norm": 0.7773128590339266, "learning_rate": 4.9968099870211064e-06, "loss": 0.3403, "step": 1947 }, { "epoch": 0.09, "grad_norm": 0.7636779095414591, "learning_rate": 4.996800402256098e-06, "loss": 0.3302, "step": 1948 }, { "epoch": 0.09, "grad_norm": 0.6776972292481492, "learning_rate": 4.99679080312263e-06, "loss": 0.3296, "step": 1949 }, { "epoch": 0.09, "grad_norm": 0.7871746604257311, "learning_rate": 4.996781189620759e-06, "loss": 0.3243, "step": 1950 }, { "epoch": 0.09, "grad_norm": 0.7381429331745297, "learning_rate": 4.996771561750539e-06, "loss": 0.3137, "step": 1951 }, { "epoch": 0.09, "grad_norm": 0.6991042128244905, "learning_rate": 4.996761919512026e-06, "loss": 0.3176, "step": 1952 }, { "epoch": 0.09, "grad_norm": 0.734969840078329, "learning_rate": 4.996752262905277e-06, "loss": 0.3088, "step": 1953 }, { "epoch": 0.09, "grad_norm": 0.6579275817182733, "learning_rate": 4.996742591930345e-06, "loss": 0.3091, "step": 1954 }, { "epoch": 0.09, "grad_norm": 0.7993589249036315, "learning_rate": 4.9967329065872874e-06, "loss": 0.3377, "step": 1955 }, { "epoch": 0.09, "grad_norm": 0.7689596540842992, "learning_rate": 4.99672320687616e-06, "loss": 0.3274, "step": 1956 }, { "epoch": 0.09, "grad_norm": 0.7900213165159051, "learning_rate": 4.9967134927970175e-06, "loss": 0.3239, "step": 1957 }, { "epoch": 0.09, "grad_norm": 0.7159964306244458, "learning_rate": 4.9967037643499166e-06, "loss": 0.324, "step": 1958 }, { "epoch": 0.09, "grad_norm": 0.7474630611415196, "learning_rate": 4.996694021534913e-06, "loss": 0.3453, "step": 1959 }, { "epoch": 0.09, "grad_norm": 0.7966518428507672, "learning_rate": 4.996684264352063e-06, "loss": 0.3489, "step": 1960 }, { "epoch": 0.09, "grad_norm": 0.7031007878620311, "learning_rate": 4.996674492801422e-06, "loss": 0.3055, "step": 1961 }, { "epoch": 0.09, "grad_norm": 0.7913475176910331, "learning_rate": 4.9966647068830476e-06, "loss": 0.3318, "step": 1962 }, { "epoch": 0.09, "grad_norm": 0.73957207620478, "learning_rate": 4.996654906596995e-06, "loss": 0.3295, "step": 1963 }, { "epoch": 0.09, "grad_norm": 0.6773020245710241, "learning_rate": 4.996645091943321e-06, "loss": 0.2944, "step": 1964 }, { "epoch": 0.09, "grad_norm": 0.6806928340542534, "learning_rate": 4.996635262922082e-06, "loss": 0.2932, "step": 1965 }, { "epoch": 0.09, "grad_norm": 0.7585239426799351, "learning_rate": 4.996625419533333e-06, "loss": 0.3374, "step": 1966 }, { "epoch": 0.09, "grad_norm": 0.7074513198076765, "learning_rate": 4.996615561777134e-06, "loss": 0.3207, "step": 1967 }, { "epoch": 0.09, "grad_norm": 0.6914585274096812, "learning_rate": 4.99660568965354e-06, "loss": 0.33, "step": 1968 }, { "epoch": 0.09, "grad_norm": 0.661062052777361, "learning_rate": 4.996595803162607e-06, "loss": 0.3086, "step": 1969 }, { "epoch": 0.09, "grad_norm": 0.7115858274680142, "learning_rate": 4.996585902304393e-06, "loss": 0.3176, "step": 1970 }, { "epoch": 0.09, "grad_norm": 0.6807019705232173, "learning_rate": 4.9965759870789545e-06, "loss": 0.3192, "step": 1971 }, { "epoch": 0.09, "grad_norm": 0.7248476486564042, "learning_rate": 4.996566057486348e-06, "loss": 0.3337, "step": 1972 }, { "epoch": 0.09, "grad_norm": 0.6904311121359333, "learning_rate": 4.996556113526633e-06, "loss": 0.3173, "step": 1973 }, { "epoch": 0.09, "grad_norm": 0.7306640582016146, "learning_rate": 4.996546155199864e-06, "loss": 0.3212, "step": 1974 }, { "epoch": 0.09, "grad_norm": 0.7356398855193282, "learning_rate": 4.9965361825061e-06, "loss": 0.3147, "step": 1975 }, { "epoch": 0.09, "grad_norm": 0.6890633431582758, "learning_rate": 4.996526195445397e-06, "loss": 0.3178, "step": 1976 }, { "epoch": 0.09, "grad_norm": 0.6618299416200297, "learning_rate": 4.996516194017813e-06, "loss": 0.3192, "step": 1977 }, { "epoch": 0.09, "grad_norm": 0.7081314021779908, "learning_rate": 4.996506178223406e-06, "loss": 0.3064, "step": 1978 }, { "epoch": 0.09, "grad_norm": 0.7051453717902891, "learning_rate": 4.996496148062234e-06, "loss": 0.3128, "step": 1979 }, { "epoch": 0.09, "grad_norm": 0.7160342677676603, "learning_rate": 4.996486103534354e-06, "loss": 0.3328, "step": 1980 }, { "epoch": 0.09, "grad_norm": 0.6961733703657984, "learning_rate": 4.9964760446398236e-06, "loss": 0.3247, "step": 1981 }, { "epoch": 0.09, "grad_norm": 0.7030255026396931, "learning_rate": 4.9964659713787015e-06, "loss": 0.3086, "step": 1982 }, { "epoch": 0.09, "grad_norm": 0.6797773801964319, "learning_rate": 4.996455883751044e-06, "loss": 0.3325, "step": 1983 }, { "epoch": 0.09, "grad_norm": 0.7218973006376445, "learning_rate": 4.996445781756912e-06, "loss": 0.312, "step": 1984 }, { "epoch": 0.09, "grad_norm": 0.7327849542558678, "learning_rate": 4.996435665396361e-06, "loss": 0.3184, "step": 1985 }, { "epoch": 0.09, "grad_norm": 0.7751262674490096, "learning_rate": 4.99642553466945e-06, "loss": 0.3178, "step": 1986 }, { "epoch": 0.09, "grad_norm": 0.6938787389743843, "learning_rate": 4.996415389576238e-06, "loss": 0.3096, "step": 1987 }, { "epoch": 0.09, "grad_norm": 0.6902095563237467, "learning_rate": 4.996405230116783e-06, "loss": 0.3176, "step": 1988 }, { "epoch": 0.09, "grad_norm": 0.732727021725934, "learning_rate": 4.996395056291143e-06, "loss": 0.3011, "step": 1989 }, { "epoch": 0.09, "grad_norm": 0.6776226488820124, "learning_rate": 4.996384868099378e-06, "loss": 0.3317, "step": 1990 }, { "epoch": 0.09, "grad_norm": 0.7337498477441999, "learning_rate": 4.996374665541544e-06, "loss": 0.3258, "step": 1991 }, { "epoch": 0.09, "grad_norm": 0.7513878336937183, "learning_rate": 4.996364448617702e-06, "loss": 0.3062, "step": 1992 }, { "epoch": 0.09, "grad_norm": 0.7546091481185564, "learning_rate": 4.99635421732791e-06, "loss": 0.3281, "step": 1993 }, { "epoch": 0.09, "grad_norm": 0.749783182598125, "learning_rate": 4.996343971672226e-06, "loss": 0.3371, "step": 1994 }, { "epoch": 0.09, "grad_norm": 0.6810520212312787, "learning_rate": 4.996333711650711e-06, "loss": 0.3087, "step": 1995 }, { "epoch": 0.09, "grad_norm": 0.7519857883680776, "learning_rate": 4.996323437263423e-06, "loss": 0.3273, "step": 1996 }, { "epoch": 0.09, "grad_norm": 0.7178022665080589, "learning_rate": 4.99631314851042e-06, "loss": 0.3119, "step": 1997 }, { "epoch": 0.09, "grad_norm": 0.69700242113943, "learning_rate": 4.996302845391763e-06, "loss": 0.3036, "step": 1998 }, { "epoch": 0.09, "grad_norm": 0.6275142874244114, "learning_rate": 4.996292527907511e-06, "loss": 0.3001, "step": 1999 }, { "epoch": 0.09, "grad_norm": 0.6806431371033609, "learning_rate": 4.996282196057722e-06, "loss": 0.291, "step": 2000 }, { "epoch": 0.09, "grad_norm": 0.7162968451659444, "learning_rate": 4.996271849842457e-06, "loss": 0.3276, "step": 2001 }, { "epoch": 0.09, "grad_norm": 0.6793452324954328, "learning_rate": 4.996261489261774e-06, "loss": 0.3165, "step": 2002 }, { "epoch": 0.09, "grad_norm": 0.7265893043004891, "learning_rate": 4.996251114315734e-06, "loss": 0.3072, "step": 2003 }, { "epoch": 0.09, "grad_norm": 0.6727822894849818, "learning_rate": 4.996240725004397e-06, "loss": 0.3152, "step": 2004 }, { "epoch": 0.09, "grad_norm": 0.6626612597708353, "learning_rate": 4.996230321327821e-06, "loss": 0.3148, "step": 2005 }, { "epoch": 0.09, "grad_norm": 0.6930216608453353, "learning_rate": 4.996219903286067e-06, "loss": 0.3304, "step": 2006 }, { "epoch": 0.09, "grad_norm": 0.6991611440649248, "learning_rate": 4.996209470879195e-06, "loss": 0.3182, "step": 2007 }, { "epoch": 0.09, "grad_norm": 0.6943444922924938, "learning_rate": 4.996199024107265e-06, "loss": 0.3379, "step": 2008 }, { "epoch": 0.09, "grad_norm": 0.7185752172120992, "learning_rate": 4.9961885629703376e-06, "loss": 0.3241, "step": 2009 }, { "epoch": 0.09, "grad_norm": 0.6818338953878365, "learning_rate": 4.996178087468471e-06, "loss": 0.3252, "step": 2010 }, { "epoch": 0.09, "grad_norm": 0.73129274465354, "learning_rate": 4.996167597601729e-06, "loss": 0.3165, "step": 2011 }, { "epoch": 0.09, "grad_norm": 0.6668892045953531, "learning_rate": 4.9961570933701684e-06, "loss": 0.3105, "step": 2012 }, { "epoch": 0.09, "grad_norm": 0.8048888466377019, "learning_rate": 4.996146574773851e-06, "loss": 0.3319, "step": 2013 }, { "epoch": 0.09, "grad_norm": 0.7022578850602126, "learning_rate": 4.996136041812838e-06, "loss": 0.3033, "step": 2014 }, { "epoch": 0.09, "grad_norm": 0.7214059308725413, "learning_rate": 4.9961254944871895e-06, "loss": 0.3067, "step": 2015 }, { "epoch": 0.09, "grad_norm": 0.6004132207874183, "learning_rate": 4.996114932796966e-06, "loss": 0.2768, "step": 2016 }, { "epoch": 0.09, "grad_norm": 0.7204367294706567, "learning_rate": 4.996104356742228e-06, "loss": 0.3145, "step": 2017 }, { "epoch": 0.09, "grad_norm": 0.7532391253395982, "learning_rate": 4.996093766323038e-06, "loss": 0.326, "step": 2018 }, { "epoch": 0.09, "grad_norm": 0.72512529105649, "learning_rate": 4.996083161539455e-06, "loss": 0.3114, "step": 2019 }, { "epoch": 0.09, "grad_norm": 0.7229045385144908, "learning_rate": 4.99607254239154e-06, "loss": 0.3216, "step": 2020 }, { "epoch": 0.09, "grad_norm": 0.704978826789479, "learning_rate": 4.996061908879356e-06, "loss": 0.3115, "step": 2021 }, { "epoch": 0.09, "grad_norm": 0.69803821366478, "learning_rate": 4.9960512610029625e-06, "loss": 0.2988, "step": 2022 }, { "epoch": 0.09, "grad_norm": 0.8070362772972249, "learning_rate": 4.996040598762421e-06, "loss": 0.3396, "step": 2023 }, { "epoch": 0.09, "grad_norm": 0.6905061687146364, "learning_rate": 4.996029922157795e-06, "loss": 0.3064, "step": 2024 }, { "epoch": 0.09, "grad_norm": 0.7330085432984124, "learning_rate": 4.996019231189142e-06, "loss": 0.3224, "step": 2025 }, { "epoch": 0.09, "grad_norm": 0.7202361827496362, "learning_rate": 4.996008525856527e-06, "loss": 0.3285, "step": 2026 }, { "epoch": 0.09, "grad_norm": 0.6806881546586806, "learning_rate": 4.9959978061600104e-06, "loss": 0.3034, "step": 2027 }, { "epoch": 0.1, "grad_norm": 0.699354028503221, "learning_rate": 4.995987072099653e-06, "loss": 0.3048, "step": 2028 }, { "epoch": 0.1, "grad_norm": 0.7528084838502914, "learning_rate": 4.995976323675517e-06, "loss": 0.3217, "step": 2029 }, { "epoch": 0.1, "grad_norm": 0.7025373475943397, "learning_rate": 4.995965560887666e-06, "loss": 0.314, "step": 2030 }, { "epoch": 0.1, "grad_norm": 0.7784791234767059, "learning_rate": 4.995954783736159e-06, "loss": 0.3566, "step": 2031 }, { "epoch": 0.1, "grad_norm": 0.7263136612599541, "learning_rate": 4.995943992221061e-06, "loss": 0.3108, "step": 2032 }, { "epoch": 0.1, "grad_norm": 0.7131279360509972, "learning_rate": 4.995933186342431e-06, "loss": 0.3237, "step": 2033 }, { "epoch": 0.1, "grad_norm": 0.7419875917380949, "learning_rate": 4.995922366100334e-06, "loss": 0.3244, "step": 2034 }, { "epoch": 0.1, "grad_norm": 0.7337273470801657, "learning_rate": 4.99591153149483e-06, "loss": 0.2912, "step": 2035 }, { "epoch": 0.1, "grad_norm": 0.7601069349492571, "learning_rate": 4.995900682525983e-06, "loss": 0.3236, "step": 2036 }, { "epoch": 0.1, "grad_norm": 0.7889723649976328, "learning_rate": 4.9958898191938565e-06, "loss": 0.3322, "step": 2037 }, { "epoch": 0.1, "grad_norm": 0.6655029627480791, "learning_rate": 4.99587894149851e-06, "loss": 0.3122, "step": 2038 }, { "epoch": 0.1, "grad_norm": 0.689591537280031, "learning_rate": 4.995868049440008e-06, "loss": 0.3221, "step": 2039 }, { "epoch": 0.1, "grad_norm": 0.7386857331320142, "learning_rate": 4.995857143018412e-06, "loss": 0.3122, "step": 2040 }, { "epoch": 0.1, "grad_norm": 0.698494224717507, "learning_rate": 4.995846222233785e-06, "loss": 0.3192, "step": 2041 }, { "epoch": 0.1, "grad_norm": 0.7663458341407203, "learning_rate": 4.995835287086192e-06, "loss": 0.329, "step": 2042 }, { "epoch": 0.1, "grad_norm": 0.6675677203996234, "learning_rate": 4.995824337575693e-06, "loss": 0.304, "step": 2043 }, { "epoch": 0.1, "grad_norm": 0.7045633541467177, "learning_rate": 4.995813373702352e-06, "loss": 0.3349, "step": 2044 }, { "epoch": 0.1, "grad_norm": 0.8001088806098321, "learning_rate": 4.995802395466233e-06, "loss": 0.3269, "step": 2045 }, { "epoch": 0.1, "grad_norm": 0.6684643537490988, "learning_rate": 4.995791402867399e-06, "loss": 0.3306, "step": 2046 }, { "epoch": 0.1, "grad_norm": 0.7017845606723002, "learning_rate": 4.995780395905912e-06, "loss": 0.3066, "step": 2047 }, { "epoch": 0.1, "grad_norm": 0.7219907046506884, "learning_rate": 4.995769374581836e-06, "loss": 0.3286, "step": 2048 }, { "epoch": 0.1, "grad_norm": 0.7611232678657989, "learning_rate": 4.995758338895235e-06, "loss": 0.322, "step": 2049 }, { "epoch": 0.1, "grad_norm": 0.7297936592281694, "learning_rate": 4.995747288846171e-06, "loss": 0.3252, "step": 2050 }, { "epoch": 0.1, "grad_norm": 0.662164079528115, "learning_rate": 4.9957362244347104e-06, "loss": 0.3119, "step": 2051 }, { "epoch": 0.1, "grad_norm": 0.7512124248027041, "learning_rate": 4.995725145660914e-06, "loss": 0.329, "step": 2052 }, { "epoch": 0.1, "grad_norm": 0.7364984112961053, "learning_rate": 4.995714052524847e-06, "loss": 0.3389, "step": 2053 }, { "epoch": 0.1, "grad_norm": 0.7518378719297122, "learning_rate": 4.9957029450265726e-06, "loss": 0.3289, "step": 2054 }, { "epoch": 0.1, "grad_norm": 0.7008184167259965, "learning_rate": 4.995691823166155e-06, "loss": 0.3247, "step": 2055 }, { "epoch": 0.1, "grad_norm": 0.7318672838819901, "learning_rate": 4.995680686943658e-06, "loss": 0.3234, "step": 2056 }, { "epoch": 0.1, "grad_norm": 0.748409492884662, "learning_rate": 4.995669536359147e-06, "loss": 0.3257, "step": 2057 }, { "epoch": 0.1, "grad_norm": 0.7630862730616792, "learning_rate": 4.995658371412684e-06, "loss": 0.3105, "step": 2058 }, { "epoch": 0.1, "grad_norm": 0.8310588706591965, "learning_rate": 4.995647192104335e-06, "loss": 0.3158, "step": 2059 }, { "epoch": 0.1, "grad_norm": 0.6856091369346331, "learning_rate": 4.995635998434163e-06, "loss": 0.3159, "step": 2060 }, { "epoch": 0.1, "grad_norm": 0.672522372694842, "learning_rate": 4.9956247904022335e-06, "loss": 0.3195, "step": 2061 }, { "epoch": 0.1, "grad_norm": 0.7242321473645752, "learning_rate": 4.99561356800861e-06, "loss": 0.3331, "step": 2062 }, { "epoch": 0.1, "grad_norm": 0.695451217035721, "learning_rate": 4.995602331253359e-06, "loss": 0.2921, "step": 2063 }, { "epoch": 0.1, "grad_norm": 0.6699566919656599, "learning_rate": 4.9955910801365425e-06, "loss": 0.2949, "step": 2064 }, { "epoch": 0.1, "grad_norm": 0.6948181594873838, "learning_rate": 4.995579814658228e-06, "loss": 0.3123, "step": 2065 }, { "epoch": 0.1, "grad_norm": 0.6920526269980979, "learning_rate": 4.995568534818478e-06, "loss": 0.3286, "step": 2066 }, { "epoch": 0.1, "grad_norm": 0.6859101146805546, "learning_rate": 4.995557240617358e-06, "loss": 0.3116, "step": 2067 }, { "epoch": 0.1, "grad_norm": 0.7199519623103068, "learning_rate": 4.9955459320549335e-06, "loss": 0.3114, "step": 2068 }, { "epoch": 0.1, "grad_norm": 0.6624799208975389, "learning_rate": 4.995534609131269e-06, "loss": 0.3002, "step": 2069 }, { "epoch": 0.1, "grad_norm": 0.7345684193622452, "learning_rate": 4.995523271846432e-06, "loss": 0.3236, "step": 2070 }, { "epoch": 0.1, "grad_norm": 0.7277376794213963, "learning_rate": 4.995511920200483e-06, "loss": 0.3196, "step": 2071 }, { "epoch": 0.1, "grad_norm": 0.7966042320944818, "learning_rate": 4.995500554193492e-06, "loss": 0.3389, "step": 2072 }, { "epoch": 0.1, "grad_norm": 0.7288973169737949, "learning_rate": 4.995489173825522e-06, "loss": 0.3176, "step": 2073 }, { "epoch": 0.1, "grad_norm": 0.6599058328676276, "learning_rate": 4.9954777790966395e-06, "loss": 0.3114, "step": 2074 }, { "epoch": 0.1, "grad_norm": 0.7456654896908892, "learning_rate": 4.995466370006909e-06, "loss": 0.3069, "step": 2075 }, { "epoch": 0.1, "grad_norm": 0.7100040579208217, "learning_rate": 4.995454946556397e-06, "loss": 0.2897, "step": 2076 }, { "epoch": 0.1, "grad_norm": 0.6940114034808494, "learning_rate": 4.995443508745169e-06, "loss": 0.2791, "step": 2077 }, { "epoch": 0.1, "grad_norm": 0.7534207824449729, "learning_rate": 4.99543205657329e-06, "loss": 0.3247, "step": 2078 }, { "epoch": 0.1, "grad_norm": 0.7053005832363709, "learning_rate": 4.995420590040828e-06, "loss": 0.3161, "step": 2079 }, { "epoch": 0.1, "grad_norm": 0.7360936652114926, "learning_rate": 4.9954091091478474e-06, "loss": 0.3371, "step": 2080 }, { "epoch": 0.1, "grad_norm": 0.7361277447061083, "learning_rate": 4.995397613894414e-06, "loss": 0.2998, "step": 2081 }, { "epoch": 0.1, "grad_norm": 0.7578695238762561, "learning_rate": 4.995386104280595e-06, "loss": 0.3243, "step": 2082 }, { "epoch": 0.1, "grad_norm": 0.6887447683194639, "learning_rate": 4.995374580306456e-06, "loss": 0.3052, "step": 2083 }, { "epoch": 0.1, "grad_norm": 0.7351082814470324, "learning_rate": 4.995363041972063e-06, "loss": 0.3297, "step": 2084 }, { "epoch": 0.1, "grad_norm": 0.8379569815257168, "learning_rate": 4.995351489277484e-06, "loss": 0.343, "step": 2085 }, { "epoch": 0.1, "grad_norm": 0.6746011528273849, "learning_rate": 4.995339922222784e-06, "loss": 0.307, "step": 2086 }, { "epoch": 0.1, "grad_norm": 0.7536579685652788, "learning_rate": 4.99532834080803e-06, "loss": 0.3046, "step": 2087 }, { "epoch": 0.1, "grad_norm": 0.6048047823207747, "learning_rate": 4.995316745033287e-06, "loss": 0.2821, "step": 2088 }, { "epoch": 0.1, "grad_norm": 0.7698779951598945, "learning_rate": 4.9953051348986245e-06, "loss": 0.3138, "step": 2089 }, { "epoch": 0.1, "grad_norm": 0.7246611717852909, "learning_rate": 4.995293510404109e-06, "loss": 0.3161, "step": 2090 }, { "epoch": 0.1, "grad_norm": 0.6797194895971466, "learning_rate": 4.995281871549805e-06, "loss": 0.3083, "step": 2091 }, { "epoch": 0.1, "grad_norm": 0.7718555193688023, "learning_rate": 4.995270218335782e-06, "loss": 0.3206, "step": 2092 }, { "epoch": 0.1, "grad_norm": 0.688538715738008, "learning_rate": 4.995258550762106e-06, "loss": 0.3081, "step": 2093 }, { "epoch": 0.1, "grad_norm": 0.7251399736219075, "learning_rate": 4.995246868828844e-06, "loss": 0.3094, "step": 2094 }, { "epoch": 0.1, "grad_norm": 0.7593002180282846, "learning_rate": 4.995235172536063e-06, "loss": 0.3035, "step": 2095 }, { "epoch": 0.1, "grad_norm": 0.7568472233922776, "learning_rate": 4.995223461883831e-06, "loss": 0.2847, "step": 2096 }, { "epoch": 0.1, "grad_norm": 0.690897163187103, "learning_rate": 4.9952117368722155e-06, "loss": 0.327, "step": 2097 }, { "epoch": 0.1, "grad_norm": 0.7380133555465154, "learning_rate": 4.995199997501283e-06, "loss": 0.322, "step": 2098 }, { "epoch": 0.1, "grad_norm": 0.7599174665252005, "learning_rate": 4.995188243771102e-06, "loss": 0.2968, "step": 2099 }, { "epoch": 0.1, "grad_norm": 0.7543462461951145, "learning_rate": 4.99517647568174e-06, "loss": 0.3064, "step": 2100 }, { "epoch": 0.1, "grad_norm": 0.6785276673999853, "learning_rate": 4.995164693233264e-06, "loss": 0.3072, "step": 2101 }, { "epoch": 0.1, "grad_norm": 0.6801999540643456, "learning_rate": 4.995152896425744e-06, "loss": 0.3182, "step": 2102 }, { "epoch": 0.1, "grad_norm": 0.7157938353120792, "learning_rate": 4.995141085259244e-06, "loss": 0.3507, "step": 2103 }, { "epoch": 0.1, "grad_norm": 0.6634684405372138, "learning_rate": 4.995129259733835e-06, "loss": 0.303, "step": 2104 }, { "epoch": 0.1, "grad_norm": 0.6948258480655293, "learning_rate": 4.995117419849585e-06, "loss": 0.3267, "step": 2105 }, { "epoch": 0.1, "grad_norm": 0.7349833299145839, "learning_rate": 4.99510556560656e-06, "loss": 0.302, "step": 2106 }, { "epoch": 0.1, "grad_norm": 0.6513771738931029, "learning_rate": 4.99509369700483e-06, "loss": 0.3055, "step": 2107 }, { "epoch": 0.1, "grad_norm": 0.6573655422742545, "learning_rate": 4.9950818140444634e-06, "loss": 0.3086, "step": 2108 }, { "epoch": 0.1, "grad_norm": 0.7298904017917056, "learning_rate": 4.995069916725528e-06, "loss": 0.3096, "step": 2109 }, { "epoch": 0.1, "grad_norm": 0.6592191877525462, "learning_rate": 4.995058005048092e-06, "loss": 0.3017, "step": 2110 }, { "epoch": 0.1, "grad_norm": 0.658047161169274, "learning_rate": 4.995046079012225e-06, "loss": 0.3229, "step": 2111 }, { "epoch": 0.1, "grad_norm": 0.6641755663570664, "learning_rate": 4.995034138617993e-06, "loss": 0.3117, "step": 2112 }, { "epoch": 0.1, "grad_norm": 0.7168422958712559, "learning_rate": 4.995022183865469e-06, "loss": 0.3104, "step": 2113 }, { "epoch": 0.1, "grad_norm": 0.6412624758581381, "learning_rate": 4.995010214754718e-06, "loss": 0.2953, "step": 2114 }, { "epoch": 0.1, "grad_norm": 0.707773277638653, "learning_rate": 4.994998231285811e-06, "loss": 0.2983, "step": 2115 }, { "epoch": 0.1, "grad_norm": 0.6654860997231012, "learning_rate": 4.9949862334588165e-06, "loss": 0.2987, "step": 2116 }, { "epoch": 0.1, "grad_norm": 0.6920216999662693, "learning_rate": 4.994974221273802e-06, "loss": 0.3077, "step": 2117 }, { "epoch": 0.1, "grad_norm": 0.7043248811209329, "learning_rate": 4.994962194730839e-06, "loss": 0.3039, "step": 2118 }, { "epoch": 0.1, "grad_norm": 0.6414661717906337, "learning_rate": 4.994950153829995e-06, "loss": 0.3026, "step": 2119 }, { "epoch": 0.1, "grad_norm": 0.7656162583343783, "learning_rate": 4.994938098571341e-06, "loss": 0.3229, "step": 2120 }, { "epoch": 0.1, "grad_norm": 0.7329448545541555, "learning_rate": 4.994926028954944e-06, "loss": 0.3036, "step": 2121 }, { "epoch": 0.1, "grad_norm": 0.7504873821177167, "learning_rate": 4.994913944980876e-06, "loss": 0.3135, "step": 2122 }, { "epoch": 0.1, "grad_norm": 0.6525584091372416, "learning_rate": 4.994901846649205e-06, "loss": 0.3079, "step": 2123 }, { "epoch": 0.1, "grad_norm": 0.6677683010828573, "learning_rate": 4.99488973396e-06, "loss": 0.2989, "step": 2124 }, { "epoch": 0.1, "grad_norm": 0.6383993767237515, "learning_rate": 4.994877606913333e-06, "loss": 0.3136, "step": 2125 }, { "epoch": 0.1, "grad_norm": 0.7672218869300704, "learning_rate": 4.994865465509272e-06, "loss": 0.298, "step": 2126 }, { "epoch": 0.1, "grad_norm": 0.7898948123791807, "learning_rate": 4.994853309747887e-06, "loss": 0.3217, "step": 2127 }, { "epoch": 0.1, "grad_norm": 0.6512444266777496, "learning_rate": 4.994841139629248e-06, "loss": 0.3235, "step": 2128 }, { "epoch": 0.1, "grad_norm": 0.6630778062645278, "learning_rate": 4.994828955153426e-06, "loss": 0.3049, "step": 2129 }, { "epoch": 0.1, "grad_norm": 0.6674226556798483, "learning_rate": 4.9948167563204896e-06, "loss": 0.2838, "step": 2130 }, { "epoch": 0.1, "grad_norm": 0.7183317086908528, "learning_rate": 4.99480454313051e-06, "loss": 0.3114, "step": 2131 }, { "epoch": 0.1, "grad_norm": 0.7285979668034909, "learning_rate": 4.9947923155835585e-06, "loss": 0.2913, "step": 2132 }, { "epoch": 0.1, "grad_norm": 0.7575939692668359, "learning_rate": 4.994780073679703e-06, "loss": 0.3207, "step": 2133 }, { "epoch": 0.1, "grad_norm": 0.7961760710929432, "learning_rate": 4.994767817419016e-06, "loss": 0.3549, "step": 2134 }, { "epoch": 0.1, "grad_norm": 0.6732189726251565, "learning_rate": 4.994755546801566e-06, "loss": 0.3404, "step": 2135 }, { "epoch": 0.1, "grad_norm": 0.6589464539118469, "learning_rate": 4.994743261827426e-06, "loss": 0.3188, "step": 2136 }, { "epoch": 0.1, "grad_norm": 0.7146730473388665, "learning_rate": 4.9947309624966655e-06, "loss": 0.2938, "step": 2137 }, { "epoch": 0.1, "grad_norm": 0.7908326285223344, "learning_rate": 4.994718648809355e-06, "loss": 0.3134, "step": 2138 }, { "epoch": 0.1, "grad_norm": 0.6470129845540165, "learning_rate": 4.994706320765566e-06, "loss": 0.3094, "step": 2139 }, { "epoch": 0.1, "grad_norm": 0.7535951360308738, "learning_rate": 4.994693978365369e-06, "loss": 0.3014, "step": 2140 }, { "epoch": 0.1, "grad_norm": 0.659742105053161, "learning_rate": 4.994681621608835e-06, "loss": 0.2986, "step": 2141 }, { "epoch": 0.1, "grad_norm": 0.7234559568834276, "learning_rate": 4.994669250496035e-06, "loss": 0.3117, "step": 2142 }, { "epoch": 0.1, "grad_norm": 0.7420546242600153, "learning_rate": 4.994656865027041e-06, "loss": 0.3263, "step": 2143 }, { "epoch": 0.1, "grad_norm": 0.6878873848680856, "learning_rate": 4.994644465201924e-06, "loss": 0.316, "step": 2144 }, { "epoch": 0.1, "grad_norm": 0.7130448330807805, "learning_rate": 4.994632051020755e-06, "loss": 0.316, "step": 2145 }, { "epoch": 0.1, "grad_norm": 0.6985326407386208, "learning_rate": 4.994619622483605e-06, "loss": 0.3176, "step": 2146 }, { "epoch": 0.1, "grad_norm": 0.6954824915563703, "learning_rate": 4.994607179590546e-06, "loss": 0.3202, "step": 2147 }, { "epoch": 0.1, "grad_norm": 0.701608322872022, "learning_rate": 4.9945947223416504e-06, "loss": 0.3279, "step": 2148 }, { "epoch": 0.1, "grad_norm": 0.7647580945393683, "learning_rate": 4.994582250736989e-06, "loss": 0.3218, "step": 2149 }, { "epoch": 0.1, "grad_norm": 0.7116139743876687, "learning_rate": 4.994569764776633e-06, "loss": 0.3093, "step": 2150 }, { "epoch": 0.1, "grad_norm": 0.7408421617761687, "learning_rate": 4.994557264460656e-06, "loss": 0.331, "step": 2151 }, { "epoch": 0.1, "grad_norm": 0.7621433107967988, "learning_rate": 4.994544749789129e-06, "loss": 0.3443, "step": 2152 }, { "epoch": 0.1, "grad_norm": 0.6856200874398263, "learning_rate": 4.994532220762123e-06, "loss": 0.3129, "step": 2153 }, { "epoch": 0.1, "grad_norm": 0.6602667227673957, "learning_rate": 4.994519677379711e-06, "loss": 0.3036, "step": 2154 }, { "epoch": 0.1, "grad_norm": 0.6575252306991487, "learning_rate": 4.994507119641966e-06, "loss": 0.3065, "step": 2155 }, { "epoch": 0.1, "grad_norm": 0.6462179327425783, "learning_rate": 4.9944945475489595e-06, "loss": 0.3046, "step": 2156 }, { "epoch": 0.1, "grad_norm": 0.8124030652977513, "learning_rate": 4.9944819611007625e-06, "loss": 0.328, "step": 2157 }, { "epoch": 0.1, "grad_norm": 0.7396438437035795, "learning_rate": 4.99446936029745e-06, "loss": 0.3162, "step": 2158 }, { "epoch": 0.1, "grad_norm": 0.7019138256179622, "learning_rate": 4.994456745139093e-06, "loss": 0.3488, "step": 2159 }, { "epoch": 0.1, "grad_norm": 0.6706820652537383, "learning_rate": 4.994444115625765e-06, "loss": 0.3183, "step": 2160 }, { "epoch": 0.1, "grad_norm": 0.6478576608051356, "learning_rate": 4.9944314717575375e-06, "loss": 0.2835, "step": 2161 }, { "epoch": 0.1, "grad_norm": 0.760462593606468, "learning_rate": 4.994418813534484e-06, "loss": 0.3254, "step": 2162 }, { "epoch": 0.1, "grad_norm": 0.6833273975277072, "learning_rate": 4.994406140956677e-06, "loss": 0.3048, "step": 2163 }, { "epoch": 0.1, "grad_norm": 0.6962533105832606, "learning_rate": 4.99439345402419e-06, "loss": 0.3041, "step": 2164 }, { "epoch": 0.1, "grad_norm": 0.641555512002947, "learning_rate": 4.9943807527370945e-06, "loss": 0.303, "step": 2165 }, { "epoch": 0.1, "grad_norm": 0.7115814791514812, "learning_rate": 4.994368037095466e-06, "loss": 0.3173, "step": 2166 }, { "epoch": 0.1, "grad_norm": 0.68661756878424, "learning_rate": 4.994355307099375e-06, "loss": 0.2896, "step": 2167 }, { "epoch": 0.1, "grad_norm": 0.7005308344940274, "learning_rate": 4.994342562748897e-06, "loss": 0.3178, "step": 2168 }, { "epoch": 0.1, "grad_norm": 0.7598988318670659, "learning_rate": 4.994329804044105e-06, "loss": 0.3271, "step": 2169 }, { "epoch": 0.1, "grad_norm": 0.7648446254554273, "learning_rate": 4.994317030985071e-06, "loss": 0.2999, "step": 2170 }, { "epoch": 0.1, "grad_norm": 0.7334109877095383, "learning_rate": 4.99430424357187e-06, "loss": 0.3137, "step": 2171 }, { "epoch": 0.1, "grad_norm": 0.747867947590961, "learning_rate": 4.994291441804575e-06, "loss": 0.3125, "step": 2172 }, { "epoch": 0.1, "grad_norm": 0.7601727862740293, "learning_rate": 4.99427862568326e-06, "loss": 0.3288, "step": 2173 }, { "epoch": 0.1, "grad_norm": 0.6362727188523271, "learning_rate": 4.994265795207998e-06, "loss": 0.2884, "step": 2174 }, { "epoch": 0.1, "grad_norm": 0.6996284726742361, "learning_rate": 4.994252950378863e-06, "loss": 0.2881, "step": 2175 }, { "epoch": 0.1, "grad_norm": 0.7385240875157256, "learning_rate": 4.994240091195929e-06, "loss": 0.318, "step": 2176 }, { "epoch": 0.1, "grad_norm": 0.669260171942932, "learning_rate": 4.994227217659271e-06, "loss": 0.2902, "step": 2177 }, { "epoch": 0.1, "grad_norm": 1.0025843335596905, "learning_rate": 4.994214329768961e-06, "loss": 0.3365, "step": 2178 }, { "epoch": 0.1, "grad_norm": 0.7006552906041222, "learning_rate": 4.994201427525075e-06, "loss": 0.3127, "step": 2179 }, { "epoch": 0.1, "grad_norm": 0.7039834708528693, "learning_rate": 4.994188510927687e-06, "loss": 0.3154, "step": 2180 }, { "epoch": 0.1, "grad_norm": 0.7259054005192246, "learning_rate": 4.994175579976871e-06, "loss": 0.3283, "step": 2181 }, { "epoch": 0.1, "grad_norm": 0.6973919891396498, "learning_rate": 4.994162634672701e-06, "loss": 0.3392, "step": 2182 }, { "epoch": 0.1, "grad_norm": 0.6791034522532114, "learning_rate": 4.994149675015253e-06, "loss": 0.3203, "step": 2183 }, { "epoch": 0.1, "grad_norm": 0.6965934656143554, "learning_rate": 4.9941367010046e-06, "loss": 0.3107, "step": 2184 }, { "epoch": 0.1, "grad_norm": 0.6675039819467468, "learning_rate": 4.994123712640816e-06, "loss": 0.32, "step": 2185 }, { "epoch": 0.1, "grad_norm": 0.7316109438004123, "learning_rate": 4.994110709923978e-06, "loss": 0.3202, "step": 2186 }, { "epoch": 0.1, "grad_norm": 0.6983565797571897, "learning_rate": 4.99409769285416e-06, "loss": 0.3081, "step": 2187 }, { "epoch": 0.1, "grad_norm": 0.7412088874437971, "learning_rate": 4.994084661431436e-06, "loss": 0.3325, "step": 2188 }, { "epoch": 0.1, "grad_norm": 0.6724119608299791, "learning_rate": 4.9940716156558816e-06, "loss": 0.3021, "step": 2189 }, { "epoch": 0.1, "grad_norm": 0.695833221887371, "learning_rate": 4.994058555527573e-06, "loss": 0.3057, "step": 2190 }, { "epoch": 0.1, "grad_norm": 0.6749668305608559, "learning_rate": 4.994045481046582e-06, "loss": 0.3174, "step": 2191 }, { "epoch": 0.1, "grad_norm": 0.7420199556874306, "learning_rate": 4.994032392212988e-06, "loss": 0.3233, "step": 2192 }, { "epoch": 0.1, "grad_norm": 0.6988257975609354, "learning_rate": 4.9940192890268644e-06, "loss": 0.3333, "step": 2193 }, { "epoch": 0.1, "grad_norm": 0.6665813867609155, "learning_rate": 4.994006171488286e-06, "loss": 0.3095, "step": 2194 }, { "epoch": 0.1, "grad_norm": 0.6703943285313823, "learning_rate": 4.993993039597329e-06, "loss": 0.3133, "step": 2195 }, { "epoch": 0.1, "grad_norm": 0.692437597938566, "learning_rate": 4.993979893354069e-06, "loss": 0.333, "step": 2196 }, { "epoch": 0.1, "grad_norm": 0.6874342900592044, "learning_rate": 4.9939667327585815e-06, "loss": 0.3311, "step": 2197 }, { "epoch": 0.1, "grad_norm": 0.6833858980788423, "learning_rate": 4.9939535578109425e-06, "loss": 0.3231, "step": 2198 }, { "epoch": 0.1, "grad_norm": 0.7434103746505378, "learning_rate": 4.993940368511227e-06, "loss": 0.2983, "step": 2199 }, { "epoch": 0.1, "grad_norm": 0.7739676984897637, "learning_rate": 4.993927164859512e-06, "loss": 0.2952, "step": 2200 }, { "epoch": 0.1, "grad_norm": 0.6969256187177068, "learning_rate": 4.9939139468558736e-06, "loss": 0.316, "step": 2201 }, { "epoch": 0.1, "grad_norm": 0.6859521621412211, "learning_rate": 4.993900714500386e-06, "loss": 0.3172, "step": 2202 }, { "epoch": 0.1, "grad_norm": 0.7804255329858026, "learning_rate": 4.993887467793128e-06, "loss": 0.3146, "step": 2203 }, { "epoch": 0.1, "grad_norm": 0.6985589275901173, "learning_rate": 4.993874206734173e-06, "loss": 0.2983, "step": 2204 }, { "epoch": 0.1, "grad_norm": 0.7170707568384548, "learning_rate": 4.9938609313236e-06, "loss": 0.308, "step": 2205 }, { "epoch": 0.1, "grad_norm": 0.7964921598317342, "learning_rate": 4.993847641561484e-06, "loss": 0.3166, "step": 2206 }, { "epoch": 0.1, "grad_norm": 0.7050736722252827, "learning_rate": 4.993834337447901e-06, "loss": 0.33, "step": 2207 }, { "epoch": 0.1, "grad_norm": 0.7610705110272393, "learning_rate": 4.993821018982928e-06, "loss": 0.3261, "step": 2208 }, { "epoch": 0.1, "grad_norm": 0.734991062645969, "learning_rate": 4.9938076861666415e-06, "loss": 0.33, "step": 2209 }, { "epoch": 0.1, "grad_norm": 0.6732122236954349, "learning_rate": 4.993794338999119e-06, "loss": 0.3074, "step": 2210 }, { "epoch": 0.1, "grad_norm": 0.7071262311859321, "learning_rate": 4.993780977480438e-06, "loss": 0.3189, "step": 2211 }, { "epoch": 0.1, "grad_norm": 0.6950009398343167, "learning_rate": 4.9937676016106735e-06, "loss": 0.2956, "step": 2212 }, { "epoch": 0.1, "grad_norm": 0.6155565621668154, "learning_rate": 4.993754211389903e-06, "loss": 0.2993, "step": 2213 }, { "epoch": 0.1, "grad_norm": 0.7305084235499043, "learning_rate": 4.9937408068182035e-06, "loss": 0.3242, "step": 2214 }, { "epoch": 0.1, "grad_norm": 0.7397057538069514, "learning_rate": 4.993727387895653e-06, "loss": 0.3198, "step": 2215 }, { "epoch": 0.1, "grad_norm": 0.6357282792159281, "learning_rate": 4.993713954622328e-06, "loss": 0.296, "step": 2216 }, { "epoch": 0.1, "grad_norm": 0.6824129953292024, "learning_rate": 4.993700506998306e-06, "loss": 0.3029, "step": 2217 }, { "epoch": 0.1, "grad_norm": 0.7788433004460239, "learning_rate": 4.993687045023665e-06, "loss": 0.3392, "step": 2218 }, { "epoch": 0.1, "grad_norm": 0.6742656407116071, "learning_rate": 4.99367356869848e-06, "loss": 0.2862, "step": 2219 }, { "epoch": 0.1, "grad_norm": 0.7113659462078031, "learning_rate": 4.993660078022833e-06, "loss": 0.2817, "step": 2220 }, { "epoch": 0.1, "grad_norm": 0.6844971535747861, "learning_rate": 4.993646572996797e-06, "loss": 0.3331, "step": 2221 }, { "epoch": 0.1, "grad_norm": 0.6945151599053865, "learning_rate": 4.993633053620453e-06, "loss": 0.3085, "step": 2222 }, { "epoch": 0.1, "grad_norm": 0.7406367580760873, "learning_rate": 4.9936195198938765e-06, "loss": 0.319, "step": 2223 }, { "epoch": 0.1, "grad_norm": 0.6724021638834367, "learning_rate": 4.993605971817146e-06, "loss": 0.3046, "step": 2224 }, { "epoch": 0.1, "grad_norm": 0.6627687184810518, "learning_rate": 4.9935924093903405e-06, "loss": 0.2992, "step": 2225 }, { "epoch": 0.1, "grad_norm": 0.6929860023223073, "learning_rate": 4.993578832613538e-06, "loss": 0.2915, "step": 2226 }, { "epoch": 0.1, "grad_norm": 0.7100248967999525, "learning_rate": 4.993565241486816e-06, "loss": 0.3165, "step": 2227 }, { "epoch": 0.1, "grad_norm": 0.7277900674031972, "learning_rate": 4.993551636010252e-06, "loss": 0.312, "step": 2228 }, { "epoch": 0.1, "grad_norm": 0.7366157787775478, "learning_rate": 4.993538016183925e-06, "loss": 0.3296, "step": 2229 }, { "epoch": 0.1, "grad_norm": 0.6765847070967917, "learning_rate": 4.993524382007914e-06, "loss": 0.3103, "step": 2230 }, { "epoch": 0.1, "grad_norm": 0.7146617125777828, "learning_rate": 4.993510733482297e-06, "loss": 0.3308, "step": 2231 }, { "epoch": 0.1, "grad_norm": 0.7869348245946277, "learning_rate": 4.993497070607152e-06, "loss": 0.335, "step": 2232 }, { "epoch": 0.1, "grad_norm": 0.7265530052096573, "learning_rate": 4.993483393382558e-06, "loss": 0.3313, "step": 2233 }, { "epoch": 0.1, "grad_norm": 0.6971658373676896, "learning_rate": 4.993469701808594e-06, "loss": 0.3188, "step": 2234 }, { "epoch": 0.1, "grad_norm": 0.7306551726369791, "learning_rate": 4.9934559958853394e-06, "loss": 0.3301, "step": 2235 }, { "epoch": 0.1, "grad_norm": 0.7740126060600911, "learning_rate": 4.993442275612871e-06, "loss": 0.3308, "step": 2236 }, { "epoch": 0.1, "grad_norm": 0.7221425559266359, "learning_rate": 4.99342854099127e-06, "loss": 0.3268, "step": 2237 }, { "epoch": 0.1, "grad_norm": 0.7245485401590184, "learning_rate": 4.993414792020613e-06, "loss": 0.3093, "step": 2238 }, { "epoch": 0.1, "grad_norm": 0.7273764206174397, "learning_rate": 4.993401028700982e-06, "loss": 0.3177, "step": 2239 }, { "epoch": 0.1, "grad_norm": 0.7053459713265139, "learning_rate": 4.993387251032454e-06, "loss": 0.3182, "step": 2240 }, { "epoch": 0.1, "grad_norm": 0.7234082567388366, "learning_rate": 4.993373459015109e-06, "loss": 0.3091, "step": 2241 }, { "epoch": 0.11, "grad_norm": 0.7495635265639762, "learning_rate": 4.993359652649027e-06, "loss": 0.3108, "step": 2242 }, { "epoch": 0.11, "grad_norm": 0.7166320595590765, "learning_rate": 4.9933458319342875e-06, "loss": 0.3283, "step": 2243 }, { "epoch": 0.11, "grad_norm": 0.8850850695062106, "learning_rate": 4.993331996870968e-06, "loss": 0.3351, "step": 2244 }, { "epoch": 0.11, "grad_norm": 0.7917277597578298, "learning_rate": 4.993318147459151e-06, "loss": 0.3252, "step": 2245 }, { "epoch": 0.11, "grad_norm": 0.7150360920753059, "learning_rate": 4.993304283698913e-06, "loss": 0.2958, "step": 2246 }, { "epoch": 0.11, "grad_norm": 0.6657136868905353, "learning_rate": 4.993290405590336e-06, "loss": 0.3073, "step": 2247 }, { "epoch": 0.11, "grad_norm": 0.8374345387737329, "learning_rate": 4.9932765131335e-06, "loss": 0.3318, "step": 2248 }, { "epoch": 0.11, "grad_norm": 0.8109352436324987, "learning_rate": 4.993262606328485e-06, "loss": 0.3372, "step": 2249 }, { "epoch": 0.11, "grad_norm": 0.7110167483930909, "learning_rate": 4.993248685175369e-06, "loss": 0.3015, "step": 2250 }, { "epoch": 0.11, "grad_norm": 0.6705980218774601, "learning_rate": 4.993234749674234e-06, "loss": 0.3021, "step": 2251 }, { "epoch": 0.11, "grad_norm": 0.7649668845937174, "learning_rate": 4.993220799825159e-06, "loss": 0.3133, "step": 2252 }, { "epoch": 0.11, "grad_norm": 0.7063203103546492, "learning_rate": 4.993206835628226e-06, "loss": 0.3021, "step": 2253 }, { "epoch": 0.11, "grad_norm": 0.7032101296035645, "learning_rate": 4.993192857083514e-06, "loss": 0.2989, "step": 2254 }, { "epoch": 0.11, "grad_norm": 0.7492453099052793, "learning_rate": 4.993178864191104e-06, "loss": 0.2988, "step": 2255 }, { "epoch": 0.11, "grad_norm": 0.7574169509547419, "learning_rate": 4.993164856951075e-06, "loss": 0.3201, "step": 2256 }, { "epoch": 0.11, "grad_norm": 0.6378415681054531, "learning_rate": 4.99315083536351e-06, "loss": 0.2817, "step": 2257 }, { "epoch": 0.11, "grad_norm": 0.6757547787540222, "learning_rate": 4.9931367994284876e-06, "loss": 0.3119, "step": 2258 }, { "epoch": 0.11, "grad_norm": 0.7560837298522554, "learning_rate": 4.993122749146091e-06, "loss": 0.3295, "step": 2259 }, { "epoch": 0.11, "grad_norm": 0.692972514988318, "learning_rate": 4.993108684516398e-06, "loss": 0.2929, "step": 2260 }, { "epoch": 0.11, "grad_norm": 0.6936460484408653, "learning_rate": 4.9930946055394926e-06, "loss": 0.3305, "step": 2261 }, { "epoch": 0.11, "grad_norm": 0.6481165917210637, "learning_rate": 4.993080512215453e-06, "loss": 0.2981, "step": 2262 }, { "epoch": 0.11, "grad_norm": 0.6809514223546914, "learning_rate": 4.993066404544364e-06, "loss": 0.3086, "step": 2263 }, { "epoch": 0.11, "grad_norm": 0.7204052503572076, "learning_rate": 4.993052282526301e-06, "loss": 0.3258, "step": 2264 }, { "epoch": 0.11, "grad_norm": 0.6991208263004631, "learning_rate": 4.993038146161352e-06, "loss": 0.3158, "step": 2265 }, { "epoch": 0.11, "grad_norm": 0.7100982069896482, "learning_rate": 4.993023995449593e-06, "loss": 0.3113, "step": 2266 }, { "epoch": 0.11, "grad_norm": 0.6365664930994619, "learning_rate": 4.993009830391108e-06, "loss": 0.2961, "step": 2267 }, { "epoch": 0.11, "grad_norm": 0.6580170277111368, "learning_rate": 4.992995650985978e-06, "loss": 0.3346, "step": 2268 }, { "epoch": 0.11, "grad_norm": 0.7343050502685629, "learning_rate": 4.992981457234285e-06, "loss": 0.3427, "step": 2269 }, { "epoch": 0.11, "grad_norm": 0.691930159512099, "learning_rate": 4.99296724913611e-06, "loss": 0.3059, "step": 2270 }, { "epoch": 0.11, "grad_norm": 0.6525299694798655, "learning_rate": 4.9929530266915354e-06, "loss": 0.3015, "step": 2271 }, { "epoch": 0.11, "grad_norm": 0.7730659388975933, "learning_rate": 4.992938789900643e-06, "loss": 0.3085, "step": 2272 }, { "epoch": 0.11, "grad_norm": 0.6940638256706138, "learning_rate": 4.992924538763514e-06, "loss": 0.3132, "step": 2273 }, { "epoch": 0.11, "grad_norm": 0.6960520388387565, "learning_rate": 4.99291027328023e-06, "loss": 0.3076, "step": 2274 }, { "epoch": 0.11, "grad_norm": 0.6665392473283404, "learning_rate": 4.992895993450875e-06, "loss": 0.312, "step": 2275 }, { "epoch": 0.11, "grad_norm": 0.7194099922016544, "learning_rate": 4.9928816992755295e-06, "loss": 0.3063, "step": 2276 }, { "epoch": 0.11, "grad_norm": 0.6765340393796643, "learning_rate": 4.992867390754277e-06, "loss": 0.301, "step": 2277 }, { "epoch": 0.11, "grad_norm": 0.7062733478059953, "learning_rate": 4.992853067887199e-06, "loss": 0.3285, "step": 2278 }, { "epoch": 0.11, "grad_norm": 0.6897313106103454, "learning_rate": 4.992838730674378e-06, "loss": 0.3201, "step": 2279 }, { "epoch": 0.11, "grad_norm": 0.6723503332630821, "learning_rate": 4.992824379115897e-06, "loss": 0.3056, "step": 2280 }, { "epoch": 0.11, "grad_norm": 0.7431548392919325, "learning_rate": 4.9928100132118375e-06, "loss": 0.3375, "step": 2281 }, { "epoch": 0.11, "grad_norm": 0.677762901903657, "learning_rate": 4.992795632962284e-06, "loss": 0.2836, "step": 2282 }, { "epoch": 0.11, "grad_norm": 0.6503899565774751, "learning_rate": 4.9927812383673165e-06, "loss": 0.298, "step": 2283 }, { "epoch": 0.11, "grad_norm": 0.7665934127613793, "learning_rate": 4.992766829427021e-06, "loss": 0.3436, "step": 2284 }, { "epoch": 0.11, "grad_norm": 0.6748924869440155, "learning_rate": 4.992752406141479e-06, "loss": 0.2985, "step": 2285 }, { "epoch": 0.11, "grad_norm": 0.6877861607090004, "learning_rate": 4.992737968510772e-06, "loss": 0.3149, "step": 2286 }, { "epoch": 0.11, "grad_norm": 0.6688569464827442, "learning_rate": 4.992723516534987e-06, "loss": 0.302, "step": 2287 }, { "epoch": 0.11, "grad_norm": 0.696719472079597, "learning_rate": 4.992709050214202e-06, "loss": 0.3065, "step": 2288 }, { "epoch": 0.11, "grad_norm": 0.6944017823222013, "learning_rate": 4.992694569548504e-06, "loss": 0.3169, "step": 2289 }, { "epoch": 0.11, "grad_norm": 0.7315030760475426, "learning_rate": 4.992680074537975e-06, "loss": 0.3165, "step": 2290 }, { "epoch": 0.11, "grad_norm": 0.610032267113296, "learning_rate": 4.9926655651827e-06, "loss": 0.2777, "step": 2291 }, { "epoch": 0.11, "grad_norm": 0.7708883070201884, "learning_rate": 4.99265104148276e-06, "loss": 0.337, "step": 2292 }, { "epoch": 0.11, "grad_norm": 0.6818760285280581, "learning_rate": 4.99263650343824e-06, "loss": 0.3499, "step": 2293 }, { "epoch": 0.11, "grad_norm": 0.6986964743350761, "learning_rate": 4.992621951049224e-06, "loss": 0.3076, "step": 2294 }, { "epoch": 0.11, "grad_norm": 0.7481203506009354, "learning_rate": 4.992607384315794e-06, "loss": 0.3286, "step": 2295 }, { "epoch": 0.11, "grad_norm": 0.7720347470718703, "learning_rate": 4.9925928032380354e-06, "loss": 0.3321, "step": 2296 }, { "epoch": 0.11, "grad_norm": 0.757292016494418, "learning_rate": 4.992578207816032e-06, "loss": 0.3163, "step": 2297 }, { "epoch": 0.11, "grad_norm": 0.7110073644752205, "learning_rate": 4.992563598049868e-06, "loss": 0.3288, "step": 2298 }, { "epoch": 0.11, "grad_norm": 0.7152599403741762, "learning_rate": 4.992548973939626e-06, "loss": 0.3102, "step": 2299 }, { "epoch": 0.11, "grad_norm": 0.692028815189754, "learning_rate": 4.992534335485392e-06, "loss": 0.3073, "step": 2300 }, { "epoch": 0.11, "grad_norm": 0.8033099931155931, "learning_rate": 4.992519682687248e-06, "loss": 0.3448, "step": 2301 }, { "epoch": 0.11, "grad_norm": 0.6797939137603848, "learning_rate": 4.99250501554528e-06, "loss": 0.2985, "step": 2302 }, { "epoch": 0.11, "grad_norm": 0.6742560224412237, "learning_rate": 4.9924903340595735e-06, "loss": 0.302, "step": 2303 }, { "epoch": 0.11, "grad_norm": 0.7328918223619636, "learning_rate": 4.992475638230211e-06, "loss": 0.3243, "step": 2304 }, { "epoch": 0.11, "grad_norm": 0.7505924700252627, "learning_rate": 4.992460928057277e-06, "loss": 0.329, "step": 2305 }, { "epoch": 0.11, "grad_norm": 0.6587972791702824, "learning_rate": 4.9924462035408575e-06, "loss": 0.3018, "step": 2306 }, { "epoch": 0.11, "grad_norm": 0.6555947232028648, "learning_rate": 4.992431464681035e-06, "loss": 0.3158, "step": 2307 }, { "epoch": 0.11, "grad_norm": 0.7151405241381659, "learning_rate": 4.9924167114778985e-06, "loss": 0.3308, "step": 2308 }, { "epoch": 0.11, "grad_norm": 0.6691702834061449, "learning_rate": 4.9924019439315285e-06, "loss": 0.3127, "step": 2309 }, { "epoch": 0.11, "grad_norm": 0.6449015253995048, "learning_rate": 4.9923871620420125e-06, "loss": 0.2978, "step": 2310 }, { "epoch": 0.11, "grad_norm": 0.7050535223033867, "learning_rate": 4.992372365809434e-06, "loss": 0.3112, "step": 2311 }, { "epoch": 0.11, "grad_norm": 0.6868926430359059, "learning_rate": 4.9923575552338795e-06, "loss": 0.3113, "step": 2312 }, { "epoch": 0.11, "grad_norm": 0.7387197743426941, "learning_rate": 4.992342730315434e-06, "loss": 0.321, "step": 2313 }, { "epoch": 0.11, "grad_norm": 0.7705993541354828, "learning_rate": 4.992327891054181e-06, "loss": 0.329, "step": 2314 }, { "epoch": 0.11, "grad_norm": 0.6720501330547278, "learning_rate": 4.992313037450209e-06, "loss": 0.3246, "step": 2315 }, { "epoch": 0.11, "grad_norm": 0.7381405181936465, "learning_rate": 4.992298169503602e-06, "loss": 0.3207, "step": 2316 }, { "epoch": 0.11, "grad_norm": 0.7057673304839985, "learning_rate": 4.992283287214445e-06, "loss": 0.3029, "step": 2317 }, { "epoch": 0.11, "grad_norm": 0.6719595884223667, "learning_rate": 4.992268390582824e-06, "loss": 0.3154, "step": 2318 }, { "epoch": 0.11, "grad_norm": 0.6597055091367027, "learning_rate": 4.992253479608825e-06, "loss": 0.308, "step": 2319 }, { "epoch": 0.11, "grad_norm": 0.6546958248426524, "learning_rate": 4.992238554292533e-06, "loss": 0.2761, "step": 2320 }, { "epoch": 0.11, "grad_norm": 0.6239507980727053, "learning_rate": 4.992223614634035e-06, "loss": 0.3043, "step": 2321 }, { "epoch": 0.11, "grad_norm": 0.7106290758642707, "learning_rate": 4.992208660633417e-06, "loss": 0.3164, "step": 2322 }, { "epoch": 0.11, "grad_norm": 0.7051291775465305, "learning_rate": 4.992193692290764e-06, "loss": 0.3225, "step": 2323 }, { "epoch": 0.11, "grad_norm": 0.6713774641709707, "learning_rate": 4.992178709606162e-06, "loss": 0.3031, "step": 2324 }, { "epoch": 0.11, "grad_norm": 0.6693995757714735, "learning_rate": 4.9921637125797e-06, "loss": 0.3324, "step": 2325 }, { "epoch": 0.11, "grad_norm": 0.6524567316975368, "learning_rate": 4.99214870121146e-06, "loss": 0.3047, "step": 2326 }, { "epoch": 0.11, "grad_norm": 0.698078899714943, "learning_rate": 4.992133675501532e-06, "loss": 0.3164, "step": 2327 }, { "epoch": 0.11, "grad_norm": 0.888539611807526, "learning_rate": 4.99211863545e-06, "loss": 0.3295, "step": 2328 }, { "epoch": 0.11, "grad_norm": 0.7000506449486106, "learning_rate": 4.992103581056952e-06, "loss": 0.2866, "step": 2329 }, { "epoch": 0.11, "grad_norm": 0.7444297794929415, "learning_rate": 4.992088512322475e-06, "loss": 0.3238, "step": 2330 }, { "epoch": 0.11, "grad_norm": 0.7458274475650195, "learning_rate": 4.992073429246654e-06, "loss": 0.3181, "step": 2331 }, { "epoch": 0.11, "grad_norm": 0.6323066664941309, "learning_rate": 4.992058331829577e-06, "loss": 0.2798, "step": 2332 }, { "epoch": 0.11, "grad_norm": 0.7505909304564012, "learning_rate": 4.992043220071331e-06, "loss": 0.3358, "step": 2333 }, { "epoch": 0.11, "grad_norm": 0.6617940752007274, "learning_rate": 4.9920280939720025e-06, "loss": 0.3167, "step": 2334 }, { "epoch": 0.11, "grad_norm": 0.7255140134913691, "learning_rate": 4.992012953531679e-06, "loss": 0.3356, "step": 2335 }, { "epoch": 0.11, "grad_norm": 0.7313608026172641, "learning_rate": 4.991997798750446e-06, "loss": 0.331, "step": 2336 }, { "epoch": 0.11, "grad_norm": 0.7197995545695549, "learning_rate": 4.991982629628394e-06, "loss": 0.3127, "step": 2337 }, { "epoch": 0.11, "grad_norm": 0.7944081669770238, "learning_rate": 4.9919674461656065e-06, "loss": 0.3154, "step": 2338 }, { "epoch": 0.11, "grad_norm": 0.6452866484250859, "learning_rate": 4.991952248362174e-06, "loss": 0.3018, "step": 2339 }, { "epoch": 0.11, "grad_norm": 0.7016532431612533, "learning_rate": 4.991937036218182e-06, "loss": 0.3129, "step": 2340 }, { "epoch": 0.11, "grad_norm": 0.7253054939790068, "learning_rate": 4.991921809733719e-06, "loss": 0.3238, "step": 2341 }, { "epoch": 0.11, "grad_norm": 0.671797961635785, "learning_rate": 4.991906568908871e-06, "loss": 0.3099, "step": 2342 }, { "epoch": 0.11, "grad_norm": 0.6971669627966228, "learning_rate": 4.9918913137437285e-06, "loss": 0.3167, "step": 2343 }, { "epoch": 0.11, "grad_norm": 0.6996956752385861, "learning_rate": 4.9918760442383775e-06, "loss": 0.3039, "step": 2344 }, { "epoch": 0.11, "grad_norm": 0.7637775735780707, "learning_rate": 4.991860760392906e-06, "loss": 0.3285, "step": 2345 }, { "epoch": 0.11, "grad_norm": 0.6695367151225856, "learning_rate": 4.991845462207402e-06, "loss": 0.3146, "step": 2346 }, { "epoch": 0.11, "grad_norm": 0.6701911452495309, "learning_rate": 4.991830149681953e-06, "loss": 0.2892, "step": 2347 }, { "epoch": 0.11, "grad_norm": 0.7298845264268727, "learning_rate": 4.991814822816649e-06, "loss": 0.3006, "step": 2348 }, { "epoch": 0.11, "grad_norm": 0.6813068191005663, "learning_rate": 4.991799481611577e-06, "loss": 0.3033, "step": 2349 }, { "epoch": 0.11, "grad_norm": 0.6357524187198214, "learning_rate": 4.9917841260668246e-06, "loss": 0.271, "step": 2350 }, { "epoch": 0.11, "grad_norm": 0.7229655725652769, "learning_rate": 4.991768756182481e-06, "loss": 0.3288, "step": 2351 }, { "epoch": 0.11, "grad_norm": 0.7084478929282095, "learning_rate": 4.991753371958634e-06, "loss": 0.3203, "step": 2352 }, { "epoch": 0.11, "grad_norm": 0.7148579284806731, "learning_rate": 4.991737973395374e-06, "loss": 0.2991, "step": 2353 }, { "epoch": 0.11, "grad_norm": 0.7359865217126507, "learning_rate": 4.991722560492787e-06, "loss": 0.3189, "step": 2354 }, { "epoch": 0.11, "grad_norm": 0.7909060039806096, "learning_rate": 4.9917071332509635e-06, "loss": 0.336, "step": 2355 }, { "epoch": 0.11, "grad_norm": 0.6897327670455706, "learning_rate": 4.991691691669992e-06, "loss": 0.2932, "step": 2356 }, { "epoch": 0.11, "grad_norm": 0.6820187845612918, "learning_rate": 4.991676235749961e-06, "loss": 0.3201, "step": 2357 }, { "epoch": 0.11, "grad_norm": 0.7454837276729831, "learning_rate": 4.991660765490959e-06, "loss": 0.3247, "step": 2358 }, { "epoch": 0.11, "grad_norm": 0.6688288760793715, "learning_rate": 4.991645280893076e-06, "loss": 0.2631, "step": 2359 }, { "epoch": 0.11, "grad_norm": 0.6768849086601592, "learning_rate": 4.9916297819563994e-06, "loss": 0.3148, "step": 2360 }, { "epoch": 0.11, "grad_norm": 0.677232258857392, "learning_rate": 4.991614268681021e-06, "loss": 0.2973, "step": 2361 }, { "epoch": 0.11, "grad_norm": 0.6601725402809256, "learning_rate": 4.9915987410670284e-06, "loss": 0.2872, "step": 2362 }, { "epoch": 0.11, "grad_norm": 0.6361593284239909, "learning_rate": 4.991583199114512e-06, "loss": 0.2928, "step": 2363 }, { "epoch": 0.11, "grad_norm": 0.6741617474324861, "learning_rate": 4.991567642823559e-06, "loss": 0.3089, "step": 2364 }, { "epoch": 0.11, "grad_norm": 0.7099619029348974, "learning_rate": 4.9915520721942615e-06, "loss": 0.3156, "step": 2365 }, { "epoch": 0.11, "grad_norm": 0.6789990699107289, "learning_rate": 4.991536487226708e-06, "loss": 0.3071, "step": 2366 }, { "epoch": 0.11, "grad_norm": 0.721690675395533, "learning_rate": 4.991520887920988e-06, "loss": 0.3246, "step": 2367 }, { "epoch": 0.11, "grad_norm": 0.6574185880587174, "learning_rate": 4.991505274277191e-06, "loss": 0.3122, "step": 2368 }, { "epoch": 0.11, "grad_norm": 0.6621068011224228, "learning_rate": 4.991489646295408e-06, "loss": 0.3115, "step": 2369 }, { "epoch": 0.11, "grad_norm": 0.6493372574013051, "learning_rate": 4.991474003975728e-06, "loss": 0.3088, "step": 2370 }, { "epoch": 0.11, "grad_norm": 0.7120702484689082, "learning_rate": 4.991458347318242e-06, "loss": 0.3081, "step": 2371 }, { "epoch": 0.11, "grad_norm": 0.6750962596502782, "learning_rate": 4.991442676323039e-06, "loss": 0.3004, "step": 2372 }, { "epoch": 0.11, "grad_norm": 0.7008837077440461, "learning_rate": 4.9914269909902095e-06, "loss": 0.3133, "step": 2373 }, { "epoch": 0.11, "grad_norm": 0.6528575590803208, "learning_rate": 4.991411291319844e-06, "loss": 0.2891, "step": 2374 }, { "epoch": 0.11, "grad_norm": 0.7705996595733008, "learning_rate": 4.991395577312032e-06, "loss": 0.3327, "step": 2375 }, { "epoch": 0.11, "grad_norm": 0.7269728917693805, "learning_rate": 4.991379848966865e-06, "loss": 0.3004, "step": 2376 }, { "epoch": 0.11, "grad_norm": 0.7448706850415588, "learning_rate": 4.991364106284434e-06, "loss": 0.3092, "step": 2377 }, { "epoch": 0.11, "grad_norm": 0.8165393491509028, "learning_rate": 4.991348349264828e-06, "loss": 0.3212, "step": 2378 }, { "epoch": 0.11, "grad_norm": 0.7428354847957275, "learning_rate": 4.991332577908139e-06, "loss": 0.3054, "step": 2379 }, { "epoch": 0.11, "grad_norm": 0.7438945231178475, "learning_rate": 4.991316792214457e-06, "loss": 0.3209, "step": 2380 }, { "epoch": 0.11, "grad_norm": 0.8520781927858059, "learning_rate": 4.991300992183872e-06, "loss": 0.3188, "step": 2381 }, { "epoch": 0.11, "grad_norm": 0.7124170150301321, "learning_rate": 4.991285177816477e-06, "loss": 0.3234, "step": 2382 }, { "epoch": 0.11, "grad_norm": 0.6964835284325164, "learning_rate": 4.991269349112362e-06, "loss": 0.3167, "step": 2383 }, { "epoch": 0.11, "grad_norm": 0.7461094957710867, "learning_rate": 4.991253506071617e-06, "loss": 0.3055, "step": 2384 }, { "epoch": 0.11, "grad_norm": 0.6907255095777912, "learning_rate": 4.991237648694336e-06, "loss": 0.3187, "step": 2385 }, { "epoch": 0.11, "grad_norm": 0.6932633286159118, "learning_rate": 4.9912217769806075e-06, "loss": 0.3202, "step": 2386 }, { "epoch": 0.11, "grad_norm": 0.7039605703267855, "learning_rate": 4.991205890930523e-06, "loss": 0.3281, "step": 2387 }, { "epoch": 0.11, "grad_norm": 0.6807791819537211, "learning_rate": 4.9911899905441755e-06, "loss": 0.3191, "step": 2388 }, { "epoch": 0.11, "grad_norm": 0.786009677736922, "learning_rate": 4.9911740758216565e-06, "loss": 0.3141, "step": 2389 }, { "epoch": 0.11, "grad_norm": 0.6797531174513047, "learning_rate": 4.991158146763056e-06, "loss": 0.2922, "step": 2390 }, { "epoch": 0.11, "grad_norm": 0.6751093304894836, "learning_rate": 4.991142203368466e-06, "loss": 0.3117, "step": 2391 }, { "epoch": 0.11, "grad_norm": 0.6337425430019834, "learning_rate": 4.991126245637979e-06, "loss": 0.3005, "step": 2392 }, { "epoch": 0.11, "grad_norm": 0.6929595558708915, "learning_rate": 4.991110273571688e-06, "loss": 0.323, "step": 2393 }, { "epoch": 0.11, "grad_norm": 0.6865234828010763, "learning_rate": 4.991094287169682e-06, "loss": 0.2925, "step": 2394 }, { "epoch": 0.11, "grad_norm": 0.6780430995504249, "learning_rate": 4.991078286432055e-06, "loss": 0.3196, "step": 2395 }, { "epoch": 0.11, "grad_norm": 0.6942918987026917, "learning_rate": 4.9910622713588984e-06, "loss": 0.3369, "step": 2396 }, { "epoch": 0.11, "grad_norm": 0.7310620079411794, "learning_rate": 4.9910462419503046e-06, "loss": 0.3288, "step": 2397 }, { "epoch": 0.11, "grad_norm": 0.6888203463351311, "learning_rate": 4.991030198206366e-06, "loss": 0.2976, "step": 2398 }, { "epoch": 0.11, "grad_norm": 0.7842437161730333, "learning_rate": 4.991014140127174e-06, "loss": 0.3274, "step": 2399 }, { "epoch": 0.11, "grad_norm": 0.749123270010661, "learning_rate": 4.990998067712822e-06, "loss": 0.3327, "step": 2400 }, { "epoch": 0.11, "grad_norm": 0.719409896295562, "learning_rate": 4.990981980963403e-06, "loss": 0.2994, "step": 2401 }, { "epoch": 0.11, "grad_norm": 0.7951220034953331, "learning_rate": 4.990965879879008e-06, "loss": 0.3445, "step": 2402 }, { "epoch": 0.11, "grad_norm": 0.721836173722458, "learning_rate": 4.99094976445973e-06, "loss": 0.3172, "step": 2403 }, { "epoch": 0.11, "grad_norm": 0.7555709951764337, "learning_rate": 4.990933634705664e-06, "loss": 0.3343, "step": 2404 }, { "epoch": 0.11, "grad_norm": 0.6905557378179727, "learning_rate": 4.9909174906169e-06, "loss": 0.3202, "step": 2405 }, { "epoch": 0.11, "grad_norm": 0.7117924331328038, "learning_rate": 4.990901332193531e-06, "loss": 0.3047, "step": 2406 }, { "epoch": 0.11, "grad_norm": 0.6883345771222072, "learning_rate": 4.990885159435652e-06, "loss": 0.3213, "step": 2407 }, { "epoch": 0.11, "grad_norm": 0.6652981572634223, "learning_rate": 4.990868972343355e-06, "loss": 0.3221, "step": 2408 }, { "epoch": 0.11, "grad_norm": 0.6757712346511, "learning_rate": 4.990852770916732e-06, "loss": 0.2932, "step": 2409 }, { "epoch": 0.11, "grad_norm": 0.6772034831748057, "learning_rate": 4.990836555155878e-06, "loss": 0.3129, "step": 2410 }, { "epoch": 0.11, "grad_norm": 0.7280216899954342, "learning_rate": 4.990820325060885e-06, "loss": 0.3253, "step": 2411 }, { "epoch": 0.11, "grad_norm": 0.6773673022420167, "learning_rate": 4.990804080631849e-06, "loss": 0.3126, "step": 2412 }, { "epoch": 0.11, "grad_norm": 0.6760054225962787, "learning_rate": 4.99078782186886e-06, "loss": 0.296, "step": 2413 }, { "epoch": 0.11, "grad_norm": 0.6720381043238625, "learning_rate": 4.990771548772013e-06, "loss": 0.3028, "step": 2414 }, { "epoch": 0.11, "grad_norm": 0.7723097984809802, "learning_rate": 4.9907552613414025e-06, "loss": 0.3259, "step": 2415 }, { "epoch": 0.11, "grad_norm": 0.7394311932072927, "learning_rate": 4.9907389595771216e-06, "loss": 0.3173, "step": 2416 }, { "epoch": 0.11, "grad_norm": 0.6352030267340605, "learning_rate": 4.9907226434792635e-06, "loss": 0.2863, "step": 2417 }, { "epoch": 0.11, "grad_norm": 0.7096466164207457, "learning_rate": 4.9907063130479224e-06, "loss": 0.3123, "step": 2418 }, { "epoch": 0.11, "grad_norm": 0.7416039343171802, "learning_rate": 4.990689968283193e-06, "loss": 0.2807, "step": 2419 }, { "epoch": 0.11, "grad_norm": 0.7327699937476152, "learning_rate": 4.99067360918517e-06, "loss": 0.3228, "step": 2420 }, { "epoch": 0.11, "grad_norm": 0.7519312929380826, "learning_rate": 4.990657235753944e-06, "loss": 0.3178, "step": 2421 }, { "epoch": 0.11, "grad_norm": 0.7024147819767798, "learning_rate": 4.990640847989613e-06, "loss": 0.3038, "step": 2422 }, { "epoch": 0.11, "grad_norm": 0.7246759675238348, "learning_rate": 4.990624445892269e-06, "loss": 0.3307, "step": 2423 }, { "epoch": 0.11, "grad_norm": 0.6954254577988576, "learning_rate": 4.9906080294620085e-06, "loss": 0.3305, "step": 2424 }, { "epoch": 0.11, "grad_norm": 0.6963221991779899, "learning_rate": 4.990591598698924e-06, "loss": 0.3174, "step": 2425 }, { "epoch": 0.11, "grad_norm": 0.6771619604968555, "learning_rate": 4.990575153603111e-06, "loss": 0.32, "step": 2426 }, { "epoch": 0.11, "grad_norm": 0.7430805252803147, "learning_rate": 4.9905586941746645e-06, "loss": 0.3154, "step": 2427 }, { "epoch": 0.11, "grad_norm": 0.716425058116847, "learning_rate": 4.9905422204136775e-06, "loss": 0.3029, "step": 2428 }, { "epoch": 0.11, "grad_norm": 0.7118726953540812, "learning_rate": 4.9905257323202476e-06, "loss": 0.3301, "step": 2429 }, { "epoch": 0.11, "grad_norm": 0.681058723206773, "learning_rate": 4.990509229894467e-06, "loss": 0.3096, "step": 2430 }, { "epoch": 0.11, "grad_norm": 0.7500763523758301, "learning_rate": 4.9904927131364325e-06, "loss": 0.3294, "step": 2431 }, { "epoch": 0.11, "grad_norm": 0.7290818084187604, "learning_rate": 4.990476182046237e-06, "loss": 0.2924, "step": 2432 }, { "epoch": 0.11, "grad_norm": 0.6623373402616922, "learning_rate": 4.990459636623978e-06, "loss": 0.2876, "step": 2433 }, { "epoch": 0.11, "grad_norm": 0.6542355413879511, "learning_rate": 4.99044307686975e-06, "loss": 0.3432, "step": 2434 }, { "epoch": 0.11, "grad_norm": 0.6821804382543978, "learning_rate": 4.990426502783647e-06, "loss": 0.3191, "step": 2435 }, { "epoch": 0.11, "grad_norm": 0.6869901225718229, "learning_rate": 4.9904099143657665e-06, "loss": 0.3014, "step": 2436 }, { "epoch": 0.11, "grad_norm": 0.7050173502934065, "learning_rate": 4.990393311616203e-06, "loss": 0.3337, "step": 2437 }, { "epoch": 0.11, "grad_norm": 0.7412111033474408, "learning_rate": 4.9903766945350504e-06, "loss": 0.3161, "step": 2438 }, { "epoch": 0.11, "grad_norm": 0.6839166372751112, "learning_rate": 4.990360063122407e-06, "loss": 0.2977, "step": 2439 }, { "epoch": 0.11, "grad_norm": 0.6152042439889196, "learning_rate": 4.990343417378367e-06, "loss": 0.3046, "step": 2440 }, { "epoch": 0.11, "grad_norm": 0.6776908385258404, "learning_rate": 4.990326757303028e-06, "loss": 0.3283, "step": 2441 }, { "epoch": 0.11, "grad_norm": 0.7193089418854844, "learning_rate": 4.990310082896482e-06, "loss": 0.3276, "step": 2442 }, { "epoch": 0.11, "grad_norm": 0.7311686006014344, "learning_rate": 4.9902933941588295e-06, "loss": 0.324, "step": 2443 }, { "epoch": 0.11, "grad_norm": 0.7206160441567077, "learning_rate": 4.990276691090164e-06, "loss": 0.3212, "step": 2444 }, { "epoch": 0.11, "grad_norm": 0.6370820867058148, "learning_rate": 4.990259973690581e-06, "loss": 0.3024, "step": 2445 }, { "epoch": 0.11, "grad_norm": 0.6868634370742973, "learning_rate": 4.9902432419601785e-06, "loss": 0.3342, "step": 2446 }, { "epoch": 0.11, "grad_norm": 0.6850334429575595, "learning_rate": 4.990226495899051e-06, "loss": 0.2956, "step": 2447 }, { "epoch": 0.11, "grad_norm": 0.7445223705994136, "learning_rate": 4.990209735507298e-06, "loss": 0.3242, "step": 2448 }, { "epoch": 0.11, "grad_norm": 0.645769400070969, "learning_rate": 4.990192960785012e-06, "loss": 0.3117, "step": 2449 }, { "epoch": 0.11, "grad_norm": 0.7079208960635979, "learning_rate": 4.9901761717322915e-06, "loss": 0.3344, "step": 2450 }, { "epoch": 0.11, "grad_norm": 0.7062945305080015, "learning_rate": 4.9901593683492335e-06, "loss": 0.3081, "step": 2451 }, { "epoch": 0.11, "grad_norm": 0.6626775420637546, "learning_rate": 4.990142550635935e-06, "loss": 0.2944, "step": 2452 }, { "epoch": 0.11, "grad_norm": 0.7352959493387669, "learning_rate": 4.990125718592491e-06, "loss": 0.328, "step": 2453 }, { "epoch": 0.11, "grad_norm": 0.6362086369080926, "learning_rate": 4.990108872218999e-06, "loss": 0.2856, "step": 2454 }, { "epoch": 0.12, "grad_norm": 0.6396669218227405, "learning_rate": 4.990092011515557e-06, "loss": 0.3113, "step": 2455 }, { "epoch": 0.12, "grad_norm": 0.6733489005459731, "learning_rate": 4.990075136482262e-06, "loss": 0.3129, "step": 2456 }, { "epoch": 0.12, "grad_norm": 0.6257268699810145, "learning_rate": 4.9900582471192094e-06, "loss": 0.2913, "step": 2457 }, { "epoch": 0.12, "grad_norm": 0.6371716171989381, "learning_rate": 4.990041343426498e-06, "loss": 0.3092, "step": 2458 }, { "epoch": 0.12, "grad_norm": 0.696602631101336, "learning_rate": 4.990024425404224e-06, "loss": 0.3331, "step": 2459 }, { "epoch": 0.12, "grad_norm": 0.6779242016529771, "learning_rate": 4.990007493052485e-06, "loss": 0.2964, "step": 2460 }, { "epoch": 0.12, "grad_norm": 0.6954440157597109, "learning_rate": 4.98999054637138e-06, "loss": 0.3144, "step": 2461 }, { "epoch": 0.12, "grad_norm": 0.6498928432875791, "learning_rate": 4.9899735853610045e-06, "loss": 0.291, "step": 2462 }, { "epoch": 0.12, "grad_norm": 0.6528936410616636, "learning_rate": 4.989956610021457e-06, "loss": 0.2993, "step": 2463 }, { "epoch": 0.12, "grad_norm": 0.7000846016375903, "learning_rate": 4.9899396203528345e-06, "loss": 0.3127, "step": 2464 }, { "epoch": 0.12, "grad_norm": 0.6875031672939942, "learning_rate": 4.9899226163552365e-06, "loss": 0.3134, "step": 2465 }, { "epoch": 0.12, "grad_norm": 0.727083379518094, "learning_rate": 4.989905598028758e-06, "loss": 0.339, "step": 2466 }, { "epoch": 0.12, "grad_norm": 0.7000746056193832, "learning_rate": 4.989888565373499e-06, "loss": 0.3213, "step": 2467 }, { "epoch": 0.12, "grad_norm": 0.7793588631412453, "learning_rate": 4.989871518389559e-06, "loss": 0.3441, "step": 2468 }, { "epoch": 0.12, "grad_norm": 0.8011989311858084, "learning_rate": 4.989854457077032e-06, "loss": 0.3102, "step": 2469 }, { "epoch": 0.12, "grad_norm": 0.6605872717000557, "learning_rate": 4.989837381436019e-06, "loss": 0.2928, "step": 2470 }, { "epoch": 0.12, "grad_norm": 0.6786356791759671, "learning_rate": 4.989820291466619e-06, "loss": 0.3187, "step": 2471 }, { "epoch": 0.12, "grad_norm": 0.7766365549897377, "learning_rate": 4.989803187168927e-06, "loss": 0.3265, "step": 2472 }, { "epoch": 0.12, "grad_norm": 0.7334446172750475, "learning_rate": 4.989786068543045e-06, "loss": 0.3256, "step": 2473 }, { "epoch": 0.12, "grad_norm": 0.6696447409154154, "learning_rate": 4.989768935589069e-06, "loss": 0.3238, "step": 2474 }, { "epoch": 0.12, "grad_norm": 0.6429021467295299, "learning_rate": 4.989751788307099e-06, "loss": 0.3032, "step": 2475 }, { "epoch": 0.12, "grad_norm": 0.705010196258185, "learning_rate": 4.9897346266972325e-06, "loss": 0.2985, "step": 2476 }, { "epoch": 0.12, "grad_norm": 0.6947774202436862, "learning_rate": 4.9897174507595694e-06, "loss": 0.2982, "step": 2477 }, { "epoch": 0.12, "grad_norm": 0.7060260420794753, "learning_rate": 4.9897002604942085e-06, "loss": 0.2935, "step": 2478 }, { "epoch": 0.12, "grad_norm": 0.6922017923641826, "learning_rate": 4.989683055901248e-06, "loss": 0.3047, "step": 2479 }, { "epoch": 0.12, "grad_norm": 0.6887069945753617, "learning_rate": 4.989665836980788e-06, "loss": 0.3147, "step": 2480 }, { "epoch": 0.12, "grad_norm": 0.7266362808214661, "learning_rate": 4.9896486037329265e-06, "loss": 0.3064, "step": 2481 }, { "epoch": 0.12, "grad_norm": 0.6806720502535538, "learning_rate": 4.989631356157763e-06, "loss": 0.2995, "step": 2482 }, { "epoch": 0.12, "grad_norm": 0.6357604900431841, "learning_rate": 4.989614094255396e-06, "loss": 0.3005, "step": 2483 }, { "epoch": 0.12, "grad_norm": 0.7220973922029662, "learning_rate": 4.989596818025926e-06, "loss": 0.3113, "step": 2484 }, { "epoch": 0.12, "grad_norm": 0.8159896570184343, "learning_rate": 4.989579527469452e-06, "loss": 0.3342, "step": 2485 }, { "epoch": 0.12, "grad_norm": 0.6963448076079461, "learning_rate": 4.989562222586074e-06, "loss": 0.2898, "step": 2486 }, { "epoch": 0.12, "grad_norm": 0.7240607355878613, "learning_rate": 4.989544903375891e-06, "loss": 0.3201, "step": 2487 }, { "epoch": 0.12, "grad_norm": 0.7005062049090963, "learning_rate": 4.989527569839004e-06, "loss": 0.3255, "step": 2488 }, { "epoch": 0.12, "grad_norm": 0.6637184193402471, "learning_rate": 4.989510221975509e-06, "loss": 0.2948, "step": 2489 }, { "epoch": 0.12, "grad_norm": 0.6199790250294851, "learning_rate": 4.9894928597855094e-06, "loss": 0.2963, "step": 2490 }, { "epoch": 0.12, "grad_norm": 0.6781961297577891, "learning_rate": 4.989475483269105e-06, "loss": 0.3148, "step": 2491 }, { "epoch": 0.12, "grad_norm": 0.7177021818931334, "learning_rate": 4.989458092426395e-06, "loss": 0.3278, "step": 2492 }, { "epoch": 0.12, "grad_norm": 0.7349709876228968, "learning_rate": 4.9894406872574785e-06, "loss": 0.313, "step": 2493 }, { "epoch": 0.12, "grad_norm": 0.7485843933252829, "learning_rate": 4.9894232677624565e-06, "loss": 0.3271, "step": 2494 }, { "epoch": 0.12, "grad_norm": 0.6808032525270925, "learning_rate": 4.98940583394143e-06, "loss": 0.2976, "step": 2495 }, { "epoch": 0.12, "grad_norm": 0.7322569309205689, "learning_rate": 4.989388385794498e-06, "loss": 0.3071, "step": 2496 }, { "epoch": 0.12, "grad_norm": 0.7241434145214645, "learning_rate": 4.989370923321761e-06, "loss": 0.3036, "step": 2497 }, { "epoch": 0.12, "grad_norm": 0.7285213863259554, "learning_rate": 4.989353446523321e-06, "loss": 0.3078, "step": 2498 }, { "epoch": 0.12, "grad_norm": 0.7034274665603591, "learning_rate": 4.989335955399277e-06, "loss": 0.318, "step": 2499 }, { "epoch": 0.12, "grad_norm": 0.7176095544281921, "learning_rate": 4.989318449949731e-06, "loss": 0.3164, "step": 2500 }, { "epoch": 0.12, "grad_norm": 0.7146056992606665, "learning_rate": 4.989300930174782e-06, "loss": 0.3191, "step": 2501 }, { "epoch": 0.12, "grad_norm": 0.7558163196617218, "learning_rate": 4.989283396074532e-06, "loss": 0.2839, "step": 2502 }, { "epoch": 0.12, "grad_norm": 0.6640074803033309, "learning_rate": 4.989265847649083e-06, "loss": 0.3045, "step": 2503 }, { "epoch": 0.12, "grad_norm": 0.6621415936153033, "learning_rate": 4.989248284898533e-06, "loss": 0.2922, "step": 2504 }, { "epoch": 0.12, "grad_norm": 0.7461213389579124, "learning_rate": 4.989230707822987e-06, "loss": 0.3059, "step": 2505 }, { "epoch": 0.12, "grad_norm": 0.6492646422311312, "learning_rate": 4.989213116422542e-06, "loss": 0.2889, "step": 2506 }, { "epoch": 0.12, "grad_norm": 0.7654323211498955, "learning_rate": 4.989195510697301e-06, "loss": 0.3273, "step": 2507 }, { "epoch": 0.12, "grad_norm": 0.7315414843790528, "learning_rate": 4.989177890647367e-06, "loss": 0.3342, "step": 2508 }, { "epoch": 0.12, "grad_norm": 0.7239423683488871, "learning_rate": 4.989160256272838e-06, "loss": 0.3165, "step": 2509 }, { "epoch": 0.12, "grad_norm": 0.6768216599654596, "learning_rate": 4.989142607573818e-06, "loss": 0.2881, "step": 2510 }, { "epoch": 0.12, "grad_norm": 0.7280642272290861, "learning_rate": 4.989124944550409e-06, "loss": 0.3023, "step": 2511 }, { "epoch": 0.12, "grad_norm": 0.7725633416736322, "learning_rate": 4.989107267202711e-06, "loss": 0.312, "step": 2512 }, { "epoch": 0.12, "grad_norm": 0.7439169526134014, "learning_rate": 4.989089575530826e-06, "loss": 0.3211, "step": 2513 }, { "epoch": 0.12, "grad_norm": 0.6591187481624474, "learning_rate": 4.989071869534855e-06, "loss": 0.3018, "step": 2514 }, { "epoch": 0.12, "grad_norm": 0.7126026215264374, "learning_rate": 4.989054149214902e-06, "loss": 0.3204, "step": 2515 }, { "epoch": 0.12, "grad_norm": 0.7058506917968352, "learning_rate": 4.989036414571069e-06, "loss": 0.3182, "step": 2516 }, { "epoch": 0.12, "grad_norm": 0.7294414778703925, "learning_rate": 4.989018665603456e-06, "loss": 0.3225, "step": 2517 }, { "epoch": 0.12, "grad_norm": 0.6736178655906577, "learning_rate": 4.9890009023121665e-06, "loss": 0.2912, "step": 2518 }, { "epoch": 0.12, "grad_norm": 0.6414317057357551, "learning_rate": 4.988983124697302e-06, "loss": 0.3108, "step": 2519 }, { "epoch": 0.12, "grad_norm": 0.7158724428672455, "learning_rate": 4.988965332758965e-06, "loss": 0.3212, "step": 2520 }, { "epoch": 0.12, "grad_norm": 0.7780529193125982, "learning_rate": 4.988947526497259e-06, "loss": 0.3455, "step": 2521 }, { "epoch": 0.12, "grad_norm": 0.6800331029981759, "learning_rate": 4.988929705912285e-06, "loss": 0.288, "step": 2522 }, { "epoch": 0.12, "grad_norm": 0.7240972275684893, "learning_rate": 4.988911871004146e-06, "loss": 0.3254, "step": 2523 }, { "epoch": 0.12, "grad_norm": 0.667094931125728, "learning_rate": 4.9888940217729455e-06, "loss": 0.313, "step": 2524 }, { "epoch": 0.12, "grad_norm": 0.6296732876094374, "learning_rate": 4.988876158218784e-06, "loss": 0.2945, "step": 2525 }, { "epoch": 0.12, "grad_norm": 0.6972116115013175, "learning_rate": 4.988858280341768e-06, "loss": 0.3064, "step": 2526 }, { "epoch": 0.12, "grad_norm": 0.7600692948760438, "learning_rate": 4.988840388141996e-06, "loss": 0.3174, "step": 2527 }, { "epoch": 0.12, "grad_norm": 0.6363500903798379, "learning_rate": 4.988822481619574e-06, "loss": 0.3144, "step": 2528 }, { "epoch": 0.12, "grad_norm": 0.682879179160212, "learning_rate": 4.988804560774604e-06, "loss": 0.2901, "step": 2529 }, { "epoch": 0.12, "grad_norm": 0.7394581661341556, "learning_rate": 4.988786625607189e-06, "loss": 0.3323, "step": 2530 }, { "epoch": 0.12, "grad_norm": 0.6629766631595073, "learning_rate": 4.988768676117433e-06, "loss": 0.3172, "step": 2531 }, { "epoch": 0.12, "grad_norm": 0.7045164593225907, "learning_rate": 4.988750712305439e-06, "loss": 0.3285, "step": 2532 }, { "epoch": 0.12, "grad_norm": 0.7254338123438372, "learning_rate": 4.98873273417131e-06, "loss": 0.309, "step": 2533 }, { "epoch": 0.12, "grad_norm": 0.6428356914388451, "learning_rate": 4.988714741715149e-06, "loss": 0.3067, "step": 2534 }, { "epoch": 0.12, "grad_norm": 0.664309588564329, "learning_rate": 4.988696734937061e-06, "loss": 0.2961, "step": 2535 }, { "epoch": 0.12, "grad_norm": 0.6669117127094563, "learning_rate": 4.9886787138371484e-06, "loss": 0.3035, "step": 2536 }, { "epoch": 0.12, "grad_norm": 0.7511399777127872, "learning_rate": 4.9886606784155155e-06, "loss": 0.3293, "step": 2537 }, { "epoch": 0.12, "grad_norm": 0.7288270316485492, "learning_rate": 4.9886426286722655e-06, "loss": 0.3221, "step": 2538 }, { "epoch": 0.12, "grad_norm": 0.6509824731651909, "learning_rate": 4.988624564607503e-06, "loss": 0.289, "step": 2539 }, { "epoch": 0.12, "grad_norm": 0.7240302030652209, "learning_rate": 4.988606486221331e-06, "loss": 0.331, "step": 2540 }, { "epoch": 0.12, "grad_norm": 0.6613089707463681, "learning_rate": 4.988588393513855e-06, "loss": 0.2856, "step": 2541 }, { "epoch": 0.12, "grad_norm": 0.6839890284636726, "learning_rate": 4.988570286485178e-06, "loss": 0.2994, "step": 2542 }, { "epoch": 0.12, "grad_norm": 0.7634488727069186, "learning_rate": 4.988552165135405e-06, "loss": 0.3138, "step": 2543 }, { "epoch": 0.12, "grad_norm": 0.6823041377065954, "learning_rate": 4.988534029464639e-06, "loss": 0.3203, "step": 2544 }, { "epoch": 0.12, "grad_norm": 0.7169827143959852, "learning_rate": 4.988515879472985e-06, "loss": 0.3059, "step": 2545 }, { "epoch": 0.12, "grad_norm": 0.7882821539969701, "learning_rate": 4.988497715160548e-06, "loss": 0.309, "step": 2546 }, { "epoch": 0.12, "grad_norm": 0.6611751633794805, "learning_rate": 4.988479536527432e-06, "loss": 0.3001, "step": 2547 }, { "epoch": 0.12, "grad_norm": 0.7058031400985368, "learning_rate": 4.988461343573742e-06, "loss": 0.3263, "step": 2548 }, { "epoch": 0.12, "grad_norm": 0.6739460038628883, "learning_rate": 4.988443136299582e-06, "loss": 0.2982, "step": 2549 }, { "epoch": 0.12, "grad_norm": 0.6810796585611433, "learning_rate": 4.9884249147050574e-06, "loss": 0.3063, "step": 2550 }, { "epoch": 0.12, "grad_norm": 0.8453776935210562, "learning_rate": 4.988406678790273e-06, "loss": 0.332, "step": 2551 }, { "epoch": 0.12, "grad_norm": 0.7104137697601194, "learning_rate": 4.988388428555334e-06, "loss": 0.3184, "step": 2552 }, { "epoch": 0.12, "grad_norm": 0.6695897818381483, "learning_rate": 4.988370164000344e-06, "loss": 0.2942, "step": 2553 }, { "epoch": 0.12, "grad_norm": 0.7037197053675502, "learning_rate": 4.988351885125409e-06, "loss": 0.3234, "step": 2554 }, { "epoch": 0.12, "grad_norm": 0.6175476162818516, "learning_rate": 4.988333591930636e-06, "loss": 0.2797, "step": 2555 }, { "epoch": 0.12, "grad_norm": 0.6657174964668005, "learning_rate": 4.988315284416127e-06, "loss": 0.3066, "step": 2556 }, { "epoch": 0.12, "grad_norm": 0.6854663531833657, "learning_rate": 4.9882969625819895e-06, "loss": 0.2863, "step": 2557 }, { "epoch": 0.12, "grad_norm": 0.6368521711189166, "learning_rate": 4.988278626428328e-06, "loss": 0.299, "step": 2558 }, { "epoch": 0.12, "grad_norm": 0.6756578010332885, "learning_rate": 4.988260275955249e-06, "loss": 0.297, "step": 2559 }, { "epoch": 0.12, "grad_norm": 0.6973198181629864, "learning_rate": 4.988241911162857e-06, "loss": 0.322, "step": 2560 }, { "epoch": 0.12, "grad_norm": 0.8070162723751143, "learning_rate": 4.988223532051259e-06, "loss": 0.329, "step": 2561 }, { "epoch": 0.12, "grad_norm": 0.6869338541529664, "learning_rate": 4.98820513862056e-06, "loss": 0.333, "step": 2562 }, { "epoch": 0.12, "grad_norm": 0.6734769524076113, "learning_rate": 4.988186730870865e-06, "loss": 0.2913, "step": 2563 }, { "epoch": 0.12, "grad_norm": 0.7254299429037464, "learning_rate": 4.9881683088022806e-06, "loss": 0.3385, "step": 2564 }, { "epoch": 0.12, "grad_norm": 0.6928674158872561, "learning_rate": 4.988149872414913e-06, "loss": 0.2941, "step": 2565 }, { "epoch": 0.12, "grad_norm": 0.7142949103382032, "learning_rate": 4.988131421708869e-06, "loss": 0.314, "step": 2566 }, { "epoch": 0.12, "grad_norm": 0.6936719970270498, "learning_rate": 4.988112956684254e-06, "loss": 0.302, "step": 2567 }, { "epoch": 0.12, "grad_norm": 0.7064986790286651, "learning_rate": 4.988094477341174e-06, "loss": 0.2937, "step": 2568 }, { "epoch": 0.12, "grad_norm": 0.6970369257487562, "learning_rate": 4.988075983679736e-06, "loss": 0.3041, "step": 2569 }, { "epoch": 0.12, "grad_norm": 0.6736884227310062, "learning_rate": 4.988057475700045e-06, "loss": 0.3067, "step": 2570 }, { "epoch": 0.12, "grad_norm": 0.7462941351635272, "learning_rate": 4.988038953402209e-06, "loss": 0.3149, "step": 2571 }, { "epoch": 0.12, "grad_norm": 0.7112155117608525, "learning_rate": 4.988020416786335e-06, "loss": 0.3244, "step": 2572 }, { "epoch": 0.12, "grad_norm": 0.6692027943550227, "learning_rate": 4.988001865852528e-06, "loss": 0.3098, "step": 2573 }, { "epoch": 0.12, "grad_norm": 0.7273025266069253, "learning_rate": 4.987983300600896e-06, "loss": 0.312, "step": 2574 }, { "epoch": 0.12, "grad_norm": 0.694223205081879, "learning_rate": 4.987964721031545e-06, "loss": 0.3188, "step": 2575 }, { "epoch": 0.12, "grad_norm": 0.7545567873431591, "learning_rate": 4.987946127144583e-06, "loss": 0.3314, "step": 2576 }, { "epoch": 0.12, "grad_norm": 0.6845139031587791, "learning_rate": 4.987927518940116e-06, "loss": 0.3098, "step": 2577 }, { "epoch": 0.12, "grad_norm": 0.6575433088786312, "learning_rate": 4.9879088964182515e-06, "loss": 0.302, "step": 2578 }, { "epoch": 0.12, "grad_norm": 0.7557713276706663, "learning_rate": 4.987890259579097e-06, "loss": 0.3072, "step": 2579 }, { "epoch": 0.12, "grad_norm": 0.6874230511393818, "learning_rate": 4.987871608422759e-06, "loss": 0.3165, "step": 2580 }, { "epoch": 0.12, "grad_norm": 0.6692888443635425, "learning_rate": 4.987852942949345e-06, "loss": 0.3084, "step": 2581 }, { "epoch": 0.12, "grad_norm": 0.6937320127701615, "learning_rate": 4.987834263158963e-06, "loss": 0.3099, "step": 2582 }, { "epoch": 0.12, "grad_norm": 0.6595792724236078, "learning_rate": 4.98781556905172e-06, "loss": 0.3067, "step": 2583 }, { "epoch": 0.12, "grad_norm": 0.6770108983271594, "learning_rate": 4.9877968606277225e-06, "loss": 0.3106, "step": 2584 }, { "epoch": 0.12, "grad_norm": 0.6324561944814804, "learning_rate": 4.98777813788708e-06, "loss": 0.3212, "step": 2585 }, { "epoch": 0.12, "grad_norm": 0.7610670379321801, "learning_rate": 4.987759400829901e-06, "loss": 0.3289, "step": 2586 }, { "epoch": 0.12, "grad_norm": 0.6634657838495456, "learning_rate": 4.987740649456291e-06, "loss": 0.3192, "step": 2587 }, { "epoch": 0.12, "grad_norm": 0.7638620722030118, "learning_rate": 4.9877218837663586e-06, "loss": 0.3201, "step": 2588 }, { "epoch": 0.12, "grad_norm": 0.7005446848472232, "learning_rate": 4.987703103760212e-06, "loss": 0.3246, "step": 2589 }, { "epoch": 0.12, "grad_norm": 0.7310635979756521, "learning_rate": 4.9876843094379595e-06, "loss": 0.3253, "step": 2590 }, { "epoch": 0.12, "grad_norm": 0.7119317737158616, "learning_rate": 4.987665500799709e-06, "loss": 0.3149, "step": 2591 }, { "epoch": 0.12, "grad_norm": 0.6851559139957113, "learning_rate": 4.987646677845569e-06, "loss": 0.3029, "step": 2592 }, { "epoch": 0.12, "grad_norm": 0.6894126208398874, "learning_rate": 4.9876278405756475e-06, "loss": 0.2995, "step": 2593 }, { "epoch": 0.12, "grad_norm": 0.7018071706679382, "learning_rate": 4.987608988990052e-06, "loss": 0.3051, "step": 2594 }, { "epoch": 0.12, "grad_norm": 0.7260262976740165, "learning_rate": 4.987590123088894e-06, "loss": 0.33, "step": 2595 }, { "epoch": 0.12, "grad_norm": 0.6564120482116753, "learning_rate": 4.987571242872279e-06, "loss": 0.3037, "step": 2596 }, { "epoch": 0.12, "grad_norm": 0.6682019777288305, "learning_rate": 4.9875523483403165e-06, "loss": 0.3146, "step": 2597 }, { "epoch": 0.12, "grad_norm": 0.6254509209005744, "learning_rate": 4.9875334394931154e-06, "loss": 0.2958, "step": 2598 }, { "epoch": 0.12, "grad_norm": 0.7413136746016084, "learning_rate": 4.987514516330785e-06, "loss": 0.3235, "step": 2599 }, { "epoch": 0.12, "grad_norm": 0.811641416833189, "learning_rate": 4.987495578853434e-06, "loss": 0.3158, "step": 2600 }, { "epoch": 0.12, "grad_norm": 0.7216203057367442, "learning_rate": 4.987476627061171e-06, "loss": 0.3352, "step": 2601 }, { "epoch": 0.12, "grad_norm": 0.630697231383496, "learning_rate": 4.987457660954105e-06, "loss": 0.2784, "step": 2602 }, { "epoch": 0.12, "grad_norm": 0.6707172033875742, "learning_rate": 4.9874386805323446e-06, "loss": 0.3026, "step": 2603 }, { "epoch": 0.12, "grad_norm": 0.660686710171611, "learning_rate": 4.987419685796001e-06, "loss": 0.3139, "step": 2604 }, { "epoch": 0.12, "grad_norm": 0.701390283082083, "learning_rate": 4.9874006767451825e-06, "loss": 0.3234, "step": 2605 }, { "epoch": 0.12, "grad_norm": 0.7351729136497146, "learning_rate": 4.987381653379997e-06, "loss": 0.3128, "step": 2606 }, { "epoch": 0.12, "grad_norm": 0.632900122826865, "learning_rate": 4.987362615700556e-06, "loss": 0.2909, "step": 2607 }, { "epoch": 0.12, "grad_norm": 0.6639497078801305, "learning_rate": 4.987343563706969e-06, "loss": 0.2938, "step": 2608 }, { "epoch": 0.12, "grad_norm": 0.725088790221481, "learning_rate": 4.987324497399344e-06, "loss": 0.3106, "step": 2609 }, { "epoch": 0.12, "grad_norm": 0.6838652848002703, "learning_rate": 4.987305416777791e-06, "loss": 0.3029, "step": 2610 }, { "epoch": 0.12, "grad_norm": 0.6851788722665033, "learning_rate": 4.987286321842422e-06, "loss": 0.3102, "step": 2611 }, { "epoch": 0.12, "grad_norm": 0.6089600785746957, "learning_rate": 4.987267212593345e-06, "loss": 0.2951, "step": 2612 }, { "epoch": 0.12, "grad_norm": 0.6664775011857139, "learning_rate": 4.98724808903067e-06, "loss": 0.2915, "step": 2613 }, { "epoch": 0.12, "grad_norm": 0.6794336760894079, "learning_rate": 4.987228951154507e-06, "loss": 0.3115, "step": 2614 }, { "epoch": 0.12, "grad_norm": 0.6867784635686116, "learning_rate": 4.987209798964967e-06, "loss": 0.3108, "step": 2615 }, { "epoch": 0.12, "grad_norm": 0.6660254419400036, "learning_rate": 4.98719063246216e-06, "loss": 0.3051, "step": 2616 }, { "epoch": 0.12, "grad_norm": 0.6248847852332967, "learning_rate": 4.987171451646196e-06, "loss": 0.3053, "step": 2617 }, { "epoch": 0.12, "grad_norm": 0.7193580985790888, "learning_rate": 4.987152256517185e-06, "loss": 0.3403, "step": 2618 }, { "epoch": 0.12, "grad_norm": 0.6588089872937163, "learning_rate": 4.987133047075238e-06, "loss": 0.3104, "step": 2619 }, { "epoch": 0.12, "grad_norm": 0.699702252575233, "learning_rate": 4.9871138233204655e-06, "loss": 0.3025, "step": 2620 }, { "epoch": 0.12, "grad_norm": 0.6805096997856174, "learning_rate": 4.9870945852529785e-06, "loss": 0.3103, "step": 2621 }, { "epoch": 0.12, "grad_norm": 0.718934550555825, "learning_rate": 4.987075332872887e-06, "loss": 0.3259, "step": 2622 }, { "epoch": 0.12, "grad_norm": 0.7320695804255767, "learning_rate": 4.987056066180302e-06, "loss": 0.3111, "step": 2623 }, { "epoch": 0.12, "grad_norm": 0.6676872614684224, "learning_rate": 4.987036785175334e-06, "loss": 0.3033, "step": 2624 }, { "epoch": 0.12, "grad_norm": 0.6622712824087398, "learning_rate": 4.9870174898580944e-06, "loss": 0.2855, "step": 2625 }, { "epoch": 0.12, "grad_norm": 0.6133836946103636, "learning_rate": 4.986998180228695e-06, "loss": 0.3008, "step": 2626 }, { "epoch": 0.12, "grad_norm": 0.6519633431305055, "learning_rate": 4.986978856287246e-06, "loss": 0.3158, "step": 2627 }, { "epoch": 0.12, "grad_norm": 0.7060064155205813, "learning_rate": 4.986959518033858e-06, "loss": 0.2881, "step": 2628 }, { "epoch": 0.12, "grad_norm": 0.6617304589126916, "learning_rate": 4.986940165468644e-06, "loss": 0.3014, "step": 2629 }, { "epoch": 0.12, "grad_norm": 0.6916815324984398, "learning_rate": 4.986920798591714e-06, "loss": 0.3213, "step": 2630 }, { "epoch": 0.12, "grad_norm": 0.665660291578906, "learning_rate": 4.986901417403179e-06, "loss": 0.3157, "step": 2631 }, { "epoch": 0.12, "grad_norm": 0.7103949930813817, "learning_rate": 4.9868820219031535e-06, "loss": 0.3101, "step": 2632 }, { "epoch": 0.12, "grad_norm": 0.6689162450817072, "learning_rate": 4.9868626120917455e-06, "loss": 0.2917, "step": 2633 }, { "epoch": 0.12, "grad_norm": 0.7704283383296351, "learning_rate": 4.986843187969068e-06, "loss": 0.325, "step": 2634 }, { "epoch": 0.12, "grad_norm": 0.6685232619102731, "learning_rate": 4.986823749535233e-06, "loss": 0.314, "step": 2635 }, { "epoch": 0.12, "grad_norm": 0.6126743017518813, "learning_rate": 4.9868042967903535e-06, "loss": 0.2905, "step": 2636 }, { "epoch": 0.12, "grad_norm": 0.6692530758276065, "learning_rate": 4.98678482973454e-06, "loss": 0.3101, "step": 2637 }, { "epoch": 0.12, "grad_norm": 0.712457785501523, "learning_rate": 4.986765348367905e-06, "loss": 0.3338, "step": 2638 }, { "epoch": 0.12, "grad_norm": 0.7037399050766334, "learning_rate": 4.98674585269056e-06, "loss": 0.3194, "step": 2639 }, { "epoch": 0.12, "grad_norm": 0.6693581510158645, "learning_rate": 4.986726342702617e-06, "loss": 0.3234, "step": 2640 }, { "epoch": 0.12, "grad_norm": 0.7507621429877559, "learning_rate": 4.986706818404189e-06, "loss": 0.3397, "step": 2641 }, { "epoch": 0.12, "grad_norm": 0.668076676065211, "learning_rate": 4.9866872797953895e-06, "loss": 0.3189, "step": 2642 }, { "epoch": 0.12, "grad_norm": 0.6987145119459186, "learning_rate": 4.9866677268763285e-06, "loss": 0.3196, "step": 2643 }, { "epoch": 0.12, "grad_norm": 0.7000197675953901, "learning_rate": 4.98664815964712e-06, "loss": 0.3155, "step": 2644 }, { "epoch": 0.12, "grad_norm": 0.7189732826578218, "learning_rate": 4.986628578107877e-06, "loss": 0.3092, "step": 2645 }, { "epoch": 0.12, "grad_norm": 0.630866291703158, "learning_rate": 4.9866089822587106e-06, "loss": 0.2913, "step": 2646 }, { "epoch": 0.12, "grad_norm": 0.681962276329209, "learning_rate": 4.9865893720997346e-06, "loss": 0.3025, "step": 2647 }, { "epoch": 0.12, "grad_norm": 0.6607323379408168, "learning_rate": 4.986569747631063e-06, "loss": 0.3114, "step": 2648 }, { "epoch": 0.12, "grad_norm": 0.6367293245300596, "learning_rate": 4.986550108852806e-06, "loss": 0.3008, "step": 2649 }, { "epoch": 0.12, "grad_norm": 0.6618355919698719, "learning_rate": 4.986530455765079e-06, "loss": 0.3086, "step": 2650 }, { "epoch": 0.12, "grad_norm": 0.6528035571834094, "learning_rate": 4.9865107883679945e-06, "loss": 0.3071, "step": 2651 }, { "epoch": 0.12, "grad_norm": 0.6927147001599878, "learning_rate": 4.986491106661664e-06, "loss": 0.3143, "step": 2652 }, { "epoch": 0.12, "grad_norm": 0.6227649443774443, "learning_rate": 4.986471410646203e-06, "loss": 0.3064, "step": 2653 }, { "epoch": 0.12, "grad_norm": 0.6572889269228513, "learning_rate": 4.986451700321724e-06, "loss": 0.3114, "step": 2654 }, { "epoch": 0.12, "grad_norm": 0.6545727574431969, "learning_rate": 4.98643197568834e-06, "loss": 0.3119, "step": 2655 }, { "epoch": 0.12, "grad_norm": 0.736161714148256, "learning_rate": 4.986412236746165e-06, "loss": 0.2989, "step": 2656 }, { "epoch": 0.12, "grad_norm": 0.6759289587889629, "learning_rate": 4.986392483495313e-06, "loss": 0.3038, "step": 2657 }, { "epoch": 0.12, "grad_norm": 0.6584244338564449, "learning_rate": 4.9863727159358965e-06, "loss": 0.3139, "step": 2658 }, { "epoch": 0.12, "grad_norm": 0.7187017475937235, "learning_rate": 4.98635293406803e-06, "loss": 0.3097, "step": 2659 }, { "epoch": 0.12, "grad_norm": 0.7983517242355249, "learning_rate": 4.9863331378918285e-06, "loss": 0.3429, "step": 2660 }, { "epoch": 0.12, "grad_norm": 0.6955413166701154, "learning_rate": 4.986313327407404e-06, "loss": 0.2814, "step": 2661 }, { "epoch": 0.12, "grad_norm": 0.6515538574843641, "learning_rate": 4.986293502614871e-06, "loss": 0.293, "step": 2662 }, { "epoch": 0.12, "grad_norm": 0.6810986611289185, "learning_rate": 4.986273663514344e-06, "loss": 0.3023, "step": 2663 }, { "epoch": 0.12, "grad_norm": 0.6761492909575139, "learning_rate": 4.986253810105937e-06, "loss": 0.2937, "step": 2664 }, { "epoch": 0.12, "grad_norm": 0.6520333660034692, "learning_rate": 4.986233942389763e-06, "loss": 0.307, "step": 2665 }, { "epoch": 0.12, "grad_norm": 0.7277968418688002, "learning_rate": 4.986214060365939e-06, "loss": 0.3153, "step": 2666 }, { "epoch": 0.12, "grad_norm": 0.6710197040581062, "learning_rate": 4.986194164034578e-06, "loss": 0.3011, "step": 2667 }, { "epoch": 0.12, "grad_norm": 0.6600039994246751, "learning_rate": 4.986174253395793e-06, "loss": 0.2942, "step": 2668 }, { "epoch": 0.13, "grad_norm": 0.7094010219884277, "learning_rate": 4.986154328449702e-06, "loss": 0.3153, "step": 2669 }, { "epoch": 0.13, "grad_norm": 0.6532478226652974, "learning_rate": 4.9861343891964165e-06, "loss": 0.2989, "step": 2670 }, { "epoch": 0.13, "grad_norm": 0.7062806229715204, "learning_rate": 4.986114435636053e-06, "loss": 0.311, "step": 2671 }, { "epoch": 0.13, "grad_norm": 0.7076051433581456, "learning_rate": 4.986094467768725e-06, "loss": 0.291, "step": 2672 }, { "epoch": 0.13, "grad_norm": 0.697563174021034, "learning_rate": 4.986074485594549e-06, "loss": 0.3042, "step": 2673 }, { "epoch": 0.13, "grad_norm": 0.6400876504248478, "learning_rate": 4.986054489113639e-06, "loss": 0.3046, "step": 2674 }, { "epoch": 0.13, "grad_norm": 0.7165167553772435, "learning_rate": 4.98603447832611e-06, "loss": 0.3217, "step": 2675 }, { "epoch": 0.13, "grad_norm": 0.6736576853854938, "learning_rate": 4.986014453232078e-06, "loss": 0.311, "step": 2676 }, { "epoch": 0.13, "grad_norm": 0.644960838160625, "learning_rate": 4.985994413831657e-06, "loss": 0.325, "step": 2677 }, { "epoch": 0.13, "grad_norm": 0.6601854396248257, "learning_rate": 4.985974360124963e-06, "loss": 0.3162, "step": 2678 }, { "epoch": 0.13, "grad_norm": 0.6322511503933199, "learning_rate": 4.985954292112112e-06, "loss": 0.2938, "step": 2679 }, { "epoch": 0.13, "grad_norm": 0.6214562297562297, "learning_rate": 4.985934209793218e-06, "loss": 0.3091, "step": 2680 }, { "epoch": 0.13, "grad_norm": 0.6081989063698556, "learning_rate": 4.985914113168398e-06, "loss": 0.2889, "step": 2681 }, { "epoch": 0.13, "grad_norm": 0.6533508080523244, "learning_rate": 4.985894002237766e-06, "loss": 0.3021, "step": 2682 }, { "epoch": 0.13, "grad_norm": 0.7101776776849726, "learning_rate": 4.985873877001441e-06, "loss": 0.3212, "step": 2683 }, { "epoch": 0.13, "grad_norm": 0.6369378266417303, "learning_rate": 4.985853737459535e-06, "loss": 0.2893, "step": 2684 }, { "epoch": 0.13, "grad_norm": 0.6903457446356897, "learning_rate": 4.985833583612166e-06, "loss": 0.3125, "step": 2685 }, { "epoch": 0.13, "grad_norm": 0.7010224592812112, "learning_rate": 4.985813415459449e-06, "loss": 0.3344, "step": 2686 }, { "epoch": 0.13, "grad_norm": 0.7342719201749003, "learning_rate": 4.985793233001502e-06, "loss": 0.3221, "step": 2687 }, { "epoch": 0.13, "grad_norm": 0.7028221455548029, "learning_rate": 4.985773036238439e-06, "loss": 0.3223, "step": 2688 }, { "epoch": 0.13, "grad_norm": 0.6164646874752331, "learning_rate": 4.9857528251703765e-06, "loss": 0.2935, "step": 2689 }, { "epoch": 0.13, "grad_norm": 0.6760327975478746, "learning_rate": 4.985732599797431e-06, "loss": 0.3157, "step": 2690 }, { "epoch": 0.13, "grad_norm": 0.7444907145941833, "learning_rate": 4.98571236011972e-06, "loss": 0.3373, "step": 2691 }, { "epoch": 0.13, "grad_norm": 0.7411865065000474, "learning_rate": 4.985692106137359e-06, "loss": 0.3169, "step": 2692 }, { "epoch": 0.13, "grad_norm": 0.61333862942202, "learning_rate": 4.985671837850464e-06, "loss": 0.2922, "step": 2693 }, { "epoch": 0.13, "grad_norm": 0.6430786045175184, "learning_rate": 4.985651555259153e-06, "loss": 0.2939, "step": 2694 }, { "epoch": 0.13, "grad_norm": 0.6772348813406737, "learning_rate": 4.985631258363542e-06, "loss": 0.3187, "step": 2695 }, { "epoch": 0.13, "grad_norm": 0.6755026375058879, "learning_rate": 4.985610947163748e-06, "loss": 0.2807, "step": 2696 }, { "epoch": 0.13, "grad_norm": 0.9090991632888398, "learning_rate": 4.985590621659887e-06, "loss": 0.3064, "step": 2697 }, { "epoch": 0.13, "grad_norm": 0.6957715683178097, "learning_rate": 4.985570281852078e-06, "loss": 0.3078, "step": 2698 }, { "epoch": 0.13, "grad_norm": 0.7019579065310796, "learning_rate": 4.985549927740435e-06, "loss": 0.3085, "step": 2699 }, { "epoch": 0.13, "grad_norm": 0.6597866613102564, "learning_rate": 4.985529559325079e-06, "loss": 0.2775, "step": 2700 }, { "epoch": 0.13, "grad_norm": 0.6822166102400778, "learning_rate": 4.9855091766061234e-06, "loss": 0.3068, "step": 2701 }, { "epoch": 0.13, "grad_norm": 0.7908361802917686, "learning_rate": 4.985488779583688e-06, "loss": 0.3221, "step": 2702 }, { "epoch": 0.13, "grad_norm": 0.7134557695557282, "learning_rate": 4.985468368257889e-06, "loss": 0.3128, "step": 2703 }, { "epoch": 0.13, "grad_norm": 0.6974508521813934, "learning_rate": 4.985447942628843e-06, "loss": 0.3141, "step": 2704 }, { "epoch": 0.13, "grad_norm": 0.7007772311582975, "learning_rate": 4.985427502696671e-06, "loss": 0.3206, "step": 2705 }, { "epoch": 0.13, "grad_norm": 0.7069290585840607, "learning_rate": 4.985407048461487e-06, "loss": 0.3265, "step": 2706 }, { "epoch": 0.13, "grad_norm": 0.6765347919744771, "learning_rate": 4.985386579923411e-06, "loss": 0.3086, "step": 2707 }, { "epoch": 0.13, "grad_norm": 0.6926054643301713, "learning_rate": 4.985366097082559e-06, "loss": 0.3091, "step": 2708 }, { "epoch": 0.13, "grad_norm": 0.6283202261350292, "learning_rate": 4.985345599939051e-06, "loss": 0.292, "step": 2709 }, { "epoch": 0.13, "grad_norm": 0.6683509889241858, "learning_rate": 4.985325088493003e-06, "loss": 0.3024, "step": 2710 }, { "epoch": 0.13, "grad_norm": 0.8351471303574877, "learning_rate": 4.985304562744534e-06, "loss": 0.2952, "step": 2711 }, { "epoch": 0.13, "grad_norm": 0.6596935611411917, "learning_rate": 4.985284022693762e-06, "loss": 0.2874, "step": 2712 }, { "epoch": 0.13, "grad_norm": 0.7303208467400482, "learning_rate": 4.985263468340804e-06, "loss": 0.2931, "step": 2713 }, { "epoch": 0.13, "grad_norm": 0.8137347367384566, "learning_rate": 4.985242899685781e-06, "loss": 0.305, "step": 2714 }, { "epoch": 0.13, "grad_norm": 0.6947314106271173, "learning_rate": 4.9852223167288085e-06, "loss": 0.317, "step": 2715 }, { "epoch": 0.13, "grad_norm": 0.6945574216801129, "learning_rate": 4.985201719470006e-06, "loss": 0.3072, "step": 2716 }, { "epoch": 0.13, "grad_norm": 0.6579427315207388, "learning_rate": 4.9851811079094935e-06, "loss": 0.2755, "step": 2717 }, { "epoch": 0.13, "grad_norm": 0.7456472616927775, "learning_rate": 4.985160482047388e-06, "loss": 0.3103, "step": 2718 }, { "epoch": 0.13, "grad_norm": 0.7678285572959176, "learning_rate": 4.9851398418838085e-06, "loss": 0.3182, "step": 2719 }, { "epoch": 0.13, "grad_norm": 0.6580565803883347, "learning_rate": 4.985119187418873e-06, "loss": 0.2832, "step": 2720 }, { "epoch": 0.13, "grad_norm": 0.7109711539865722, "learning_rate": 4.985098518652702e-06, "loss": 0.3117, "step": 2721 }, { "epoch": 0.13, "grad_norm": 0.7444870868667772, "learning_rate": 4.985077835585413e-06, "loss": 0.3094, "step": 2722 }, { "epoch": 0.13, "grad_norm": 0.7256281532638628, "learning_rate": 4.985057138217127e-06, "loss": 0.3077, "step": 2723 }, { "epoch": 0.13, "grad_norm": 0.6696714659519352, "learning_rate": 4.985036426547961e-06, "loss": 0.3207, "step": 2724 }, { "epoch": 0.13, "grad_norm": 0.6648891063990117, "learning_rate": 4.985015700578035e-06, "loss": 0.312, "step": 2725 }, { "epoch": 0.13, "grad_norm": 0.6547806568377581, "learning_rate": 4.984994960307468e-06, "loss": 0.2905, "step": 2726 }, { "epoch": 0.13, "grad_norm": 0.7111021810788002, "learning_rate": 4.984974205736379e-06, "loss": 0.3045, "step": 2727 }, { "epoch": 0.13, "grad_norm": 0.6743766919776626, "learning_rate": 4.984953436864889e-06, "loss": 0.3079, "step": 2728 }, { "epoch": 0.13, "grad_norm": 0.6427703055799978, "learning_rate": 4.984932653693116e-06, "loss": 0.3094, "step": 2729 }, { "epoch": 0.13, "grad_norm": 0.6441565854284128, "learning_rate": 4.984911856221181e-06, "loss": 0.3109, "step": 2730 }, { "epoch": 0.13, "grad_norm": 0.7320153583036513, "learning_rate": 4.9848910444492015e-06, "loss": 0.3149, "step": 2731 }, { "epoch": 0.13, "grad_norm": 0.7532002057103185, "learning_rate": 4.9848702183772995e-06, "loss": 0.3261, "step": 2732 }, { "epoch": 0.13, "grad_norm": 0.7253309226033688, "learning_rate": 4.984849378005594e-06, "loss": 0.3031, "step": 2733 }, { "epoch": 0.13, "grad_norm": 0.674185262665623, "learning_rate": 4.984828523334204e-06, "loss": 0.295, "step": 2734 }, { "epoch": 0.13, "grad_norm": 0.6303052988295248, "learning_rate": 4.984807654363252e-06, "loss": 0.2802, "step": 2735 }, { "epoch": 0.13, "grad_norm": 0.7147151759368848, "learning_rate": 4.984786771092855e-06, "loss": 0.3229, "step": 2736 }, { "epoch": 0.13, "grad_norm": 0.6934056778835322, "learning_rate": 4.9847658735231355e-06, "loss": 0.3324, "step": 2737 }, { "epoch": 0.13, "grad_norm": 0.7448749171519544, "learning_rate": 4.984744961654212e-06, "loss": 0.3116, "step": 2738 }, { "epoch": 0.13, "grad_norm": 0.7000226333628277, "learning_rate": 4.984724035486206e-06, "loss": 0.3193, "step": 2739 }, { "epoch": 0.13, "grad_norm": 0.7051558937344505, "learning_rate": 4.9847030950192385e-06, "loss": 0.322, "step": 2740 }, { "epoch": 0.13, "grad_norm": 0.7586060912158282, "learning_rate": 4.984682140253429e-06, "loss": 0.3182, "step": 2741 }, { "epoch": 0.13, "grad_norm": 0.6519683793905946, "learning_rate": 4.984661171188897e-06, "loss": 0.3256, "step": 2742 }, { "epoch": 0.13, "grad_norm": 0.6786415676868995, "learning_rate": 4.9846401878257655e-06, "loss": 0.3079, "step": 2743 }, { "epoch": 0.13, "grad_norm": 0.7731899948827805, "learning_rate": 4.984619190164154e-06, "loss": 0.3182, "step": 2744 }, { "epoch": 0.13, "grad_norm": 0.6928221953875281, "learning_rate": 4.9845981782041845e-06, "loss": 0.3018, "step": 2745 }, { "epoch": 0.13, "grad_norm": 0.6857909223638868, "learning_rate": 4.984577151945976e-06, "loss": 0.3013, "step": 2746 }, { "epoch": 0.13, "grad_norm": 0.7031269057653435, "learning_rate": 4.98455611138965e-06, "loss": 0.3069, "step": 2747 }, { "epoch": 0.13, "grad_norm": 0.6912560690440933, "learning_rate": 4.984535056535329e-06, "loss": 0.3067, "step": 2748 }, { "epoch": 0.13, "grad_norm": 0.6870875565938638, "learning_rate": 4.984513987383133e-06, "loss": 0.3174, "step": 2749 }, { "epoch": 0.13, "grad_norm": 0.658515487727245, "learning_rate": 4.9844929039331835e-06, "loss": 0.3093, "step": 2750 }, { "epoch": 0.13, "grad_norm": 0.7184811700318372, "learning_rate": 4.984471806185601e-06, "loss": 0.3169, "step": 2751 }, { "epoch": 0.13, "grad_norm": 0.6881563610371892, "learning_rate": 4.984450694140509e-06, "loss": 0.2953, "step": 2752 }, { "epoch": 0.13, "grad_norm": 0.7691889466723798, "learning_rate": 4.984429567798027e-06, "loss": 0.3263, "step": 2753 }, { "epoch": 0.13, "grad_norm": 0.7441412872427494, "learning_rate": 4.984408427158277e-06, "loss": 0.3041, "step": 2754 }, { "epoch": 0.13, "grad_norm": 0.6911123891719066, "learning_rate": 4.984387272221382e-06, "loss": 0.3018, "step": 2755 }, { "epoch": 0.13, "grad_norm": 0.6492130931784367, "learning_rate": 4.984366102987461e-06, "loss": 0.305, "step": 2756 }, { "epoch": 0.13, "grad_norm": 0.6985000123980021, "learning_rate": 4.984344919456639e-06, "loss": 0.3154, "step": 2757 }, { "epoch": 0.13, "grad_norm": 0.6548082257709306, "learning_rate": 4.984323721629035e-06, "loss": 0.3118, "step": 2758 }, { "epoch": 0.13, "grad_norm": 0.6525785465514359, "learning_rate": 4.984302509504774e-06, "loss": 0.3042, "step": 2759 }, { "epoch": 0.13, "grad_norm": 0.6929632583692081, "learning_rate": 4.9842812830839755e-06, "loss": 0.3244, "step": 2760 }, { "epoch": 0.13, "grad_norm": 0.6898471411789359, "learning_rate": 4.984260042366763e-06, "loss": 0.3184, "step": 2761 }, { "epoch": 0.13, "grad_norm": 0.7268552340206348, "learning_rate": 4.9842387873532575e-06, "loss": 0.3175, "step": 2762 }, { "epoch": 0.13, "grad_norm": 0.6359196393190956, "learning_rate": 4.984217518043583e-06, "loss": 0.2935, "step": 2763 }, { "epoch": 0.13, "grad_norm": 0.7992437473503239, "learning_rate": 4.984196234437861e-06, "loss": 0.3019, "step": 2764 }, { "epoch": 0.13, "grad_norm": 0.6634220442348884, "learning_rate": 4.984174936536214e-06, "loss": 0.3046, "step": 2765 }, { "epoch": 0.13, "grad_norm": 0.667438225169493, "learning_rate": 4.984153624338765e-06, "loss": 0.3036, "step": 2766 }, { "epoch": 0.13, "grad_norm": 0.7228466311353479, "learning_rate": 4.984132297845635e-06, "loss": 0.328, "step": 2767 }, { "epoch": 0.13, "grad_norm": 0.7713256112160293, "learning_rate": 4.984110957056949e-06, "loss": 0.315, "step": 2768 }, { "epoch": 0.13, "grad_norm": 0.7475936915866939, "learning_rate": 4.984089601972829e-06, "loss": 0.3439, "step": 2769 }, { "epoch": 0.13, "grad_norm": 0.6750979395922051, "learning_rate": 4.984068232593397e-06, "loss": 0.306, "step": 2770 }, { "epoch": 0.13, "grad_norm": 0.6571707202164975, "learning_rate": 4.984046848918777e-06, "loss": 0.2991, "step": 2771 }, { "epoch": 0.13, "grad_norm": 0.7520984596707564, "learning_rate": 4.984025450949092e-06, "loss": 0.3272, "step": 2772 }, { "epoch": 0.13, "grad_norm": 0.679103633991206, "learning_rate": 4.984004038684465e-06, "loss": 0.3047, "step": 2773 }, { "epoch": 0.13, "grad_norm": 0.6839823220681328, "learning_rate": 4.983982612125018e-06, "loss": 0.3067, "step": 2774 }, { "epoch": 0.13, "grad_norm": 0.8158368923471491, "learning_rate": 4.983961171270876e-06, "loss": 0.3328, "step": 2775 }, { "epoch": 0.13, "grad_norm": 0.7221932817611965, "learning_rate": 4.983939716122161e-06, "loss": 0.3086, "step": 2776 }, { "epoch": 0.13, "grad_norm": 0.655282927345717, "learning_rate": 4.983918246678999e-06, "loss": 0.2855, "step": 2777 }, { "epoch": 0.13, "grad_norm": 0.6759510806318528, "learning_rate": 4.98389676294151e-06, "loss": 0.3046, "step": 2778 }, { "epoch": 0.13, "grad_norm": 0.6530992386166202, "learning_rate": 4.983875264909821e-06, "loss": 0.3043, "step": 2779 }, { "epoch": 0.13, "grad_norm": 0.6250381415692378, "learning_rate": 4.9838537525840536e-06, "loss": 0.2966, "step": 2780 }, { "epoch": 0.13, "grad_norm": 0.7010338407777167, "learning_rate": 4.983832225964332e-06, "loss": 0.3106, "step": 2781 }, { "epoch": 0.13, "grad_norm": 0.7060499950345298, "learning_rate": 4.98381068505078e-06, "loss": 0.3158, "step": 2782 }, { "epoch": 0.13, "grad_norm": 0.7248655129044128, "learning_rate": 4.983789129843522e-06, "loss": 0.35, "step": 2783 }, { "epoch": 0.13, "grad_norm": 0.6621012157054731, "learning_rate": 4.983767560342681e-06, "loss": 0.3078, "step": 2784 }, { "epoch": 0.13, "grad_norm": 0.6763996262839521, "learning_rate": 4.983745976548384e-06, "loss": 0.3006, "step": 2785 }, { "epoch": 0.13, "grad_norm": 0.6711684433762368, "learning_rate": 4.9837243784607516e-06, "loss": 0.3448, "step": 2786 }, { "epoch": 0.13, "grad_norm": 0.6943865319022802, "learning_rate": 4.983702766079909e-06, "loss": 0.3028, "step": 2787 }, { "epoch": 0.13, "grad_norm": 0.6578762387718345, "learning_rate": 4.9836811394059834e-06, "loss": 0.2962, "step": 2788 }, { "epoch": 0.13, "grad_norm": 0.6087569692381789, "learning_rate": 4.9836594984390964e-06, "loss": 0.2984, "step": 2789 }, { "epoch": 0.13, "grad_norm": 0.7285972063914142, "learning_rate": 4.983637843179372e-06, "loss": 0.317, "step": 2790 }, { "epoch": 0.13, "grad_norm": 0.7440575152415403, "learning_rate": 4.9836161736269375e-06, "loss": 0.3338, "step": 2791 }, { "epoch": 0.13, "grad_norm": 0.6912001109022753, "learning_rate": 4.9835944897819154e-06, "loss": 0.3049, "step": 2792 }, { "epoch": 0.13, "grad_norm": 0.6605540483398785, "learning_rate": 4.983572791644431e-06, "loss": 0.3053, "step": 2793 }, { "epoch": 0.13, "grad_norm": 0.6965579074954796, "learning_rate": 4.9835510792146095e-06, "loss": 0.3276, "step": 2794 }, { "epoch": 0.13, "grad_norm": 0.7225302429423766, "learning_rate": 4.983529352492576e-06, "loss": 0.3175, "step": 2795 }, { "epoch": 0.13, "grad_norm": 0.6962279177448402, "learning_rate": 4.9835076114784554e-06, "loss": 0.3144, "step": 2796 }, { "epoch": 0.13, "grad_norm": 0.7106303875621603, "learning_rate": 4.983485856172373e-06, "loss": 0.3204, "step": 2797 }, { "epoch": 0.13, "grad_norm": 0.7290723337458733, "learning_rate": 4.983464086574453e-06, "loss": 0.3035, "step": 2798 }, { "epoch": 0.13, "grad_norm": 0.7140696379610183, "learning_rate": 4.983442302684822e-06, "loss": 0.3055, "step": 2799 }, { "epoch": 0.13, "grad_norm": 0.6597762392226483, "learning_rate": 4.9834205045036046e-06, "loss": 0.3086, "step": 2800 }, { "epoch": 0.13, "grad_norm": 0.7536662851161271, "learning_rate": 4.9833986920309255e-06, "loss": 0.3245, "step": 2801 }, { "epoch": 0.13, "grad_norm": 0.6334236723335062, "learning_rate": 4.983376865266913e-06, "loss": 0.2839, "step": 2802 }, { "epoch": 0.13, "grad_norm": 0.6949281459424254, "learning_rate": 4.983355024211689e-06, "loss": 0.3068, "step": 2803 }, { "epoch": 0.13, "grad_norm": 0.7176638293605295, "learning_rate": 4.983333168865382e-06, "loss": 0.3092, "step": 2804 }, { "epoch": 0.13, "grad_norm": 0.714749291652377, "learning_rate": 4.983311299228116e-06, "loss": 0.3089, "step": 2805 }, { "epoch": 0.13, "grad_norm": 0.7293723349558504, "learning_rate": 4.983289415300018e-06, "loss": 0.3041, "step": 2806 }, { "epoch": 0.13, "grad_norm": 0.629780551454618, "learning_rate": 4.9832675170812135e-06, "loss": 0.2973, "step": 2807 }, { "epoch": 0.13, "grad_norm": 0.6604929985095718, "learning_rate": 4.983245604571828e-06, "loss": 0.2971, "step": 2808 }, { "epoch": 0.13, "grad_norm": 0.6982112701741372, "learning_rate": 4.983223677771989e-06, "loss": 0.3157, "step": 2809 }, { "epoch": 0.13, "grad_norm": 0.6723908356939835, "learning_rate": 4.983201736681822e-06, "loss": 0.3047, "step": 2810 }, { "epoch": 0.13, "grad_norm": 0.7164283199887137, "learning_rate": 4.9831797813014525e-06, "loss": 0.3132, "step": 2811 }, { "epoch": 0.13, "grad_norm": 0.6677694875067862, "learning_rate": 4.983157811631008e-06, "loss": 0.2963, "step": 2812 }, { "epoch": 0.13, "grad_norm": 0.6950116692682682, "learning_rate": 4.983135827670614e-06, "loss": 0.3276, "step": 2813 }, { "epoch": 0.13, "grad_norm": 0.6610663174149385, "learning_rate": 4.983113829420398e-06, "loss": 0.3163, "step": 2814 }, { "epoch": 0.13, "grad_norm": 0.6425238595623931, "learning_rate": 4.983091816880486e-06, "loss": 0.3042, "step": 2815 }, { "epoch": 0.13, "grad_norm": 0.681548974266561, "learning_rate": 4.983069790051004e-06, "loss": 0.3013, "step": 2816 }, { "epoch": 0.13, "grad_norm": 0.7077324345625784, "learning_rate": 4.98304774893208e-06, "loss": 0.3073, "step": 2817 }, { "epoch": 0.13, "grad_norm": 0.7155162898900909, "learning_rate": 4.98302569352384e-06, "loss": 0.3166, "step": 2818 }, { "epoch": 0.13, "grad_norm": 0.7344490960209316, "learning_rate": 4.983003623826412e-06, "loss": 0.3414, "step": 2819 }, { "epoch": 0.13, "grad_norm": 0.6496983495425597, "learning_rate": 4.982981539839921e-06, "loss": 0.3061, "step": 2820 }, { "epoch": 0.13, "grad_norm": 0.7000524568281578, "learning_rate": 4.982959441564496e-06, "loss": 0.2972, "step": 2821 }, { "epoch": 0.13, "grad_norm": 0.6765823843370966, "learning_rate": 4.982937329000264e-06, "loss": 0.2997, "step": 2822 }, { "epoch": 0.13, "grad_norm": 0.7047448234344201, "learning_rate": 4.982915202147351e-06, "loss": 0.3177, "step": 2823 }, { "epoch": 0.13, "grad_norm": 0.732822270185039, "learning_rate": 4.982893061005885e-06, "loss": 0.314, "step": 2824 }, { "epoch": 0.13, "grad_norm": 0.6311974074850603, "learning_rate": 4.982870905575993e-06, "loss": 0.2946, "step": 2825 }, { "epoch": 0.13, "grad_norm": 0.6395363008214913, "learning_rate": 4.982848735857804e-06, "loss": 0.2979, "step": 2826 }, { "epoch": 0.13, "grad_norm": 0.6397298801766539, "learning_rate": 4.982826551851444e-06, "loss": 0.3031, "step": 2827 }, { "epoch": 0.13, "grad_norm": 0.709853810218756, "learning_rate": 4.982804353557041e-06, "loss": 0.3428, "step": 2828 }, { "epoch": 0.13, "grad_norm": 0.7198002082034077, "learning_rate": 4.982782140974724e-06, "loss": 0.3477, "step": 2829 }, { "epoch": 0.13, "grad_norm": 0.6543329822480511, "learning_rate": 4.982759914104619e-06, "loss": 0.296, "step": 2830 }, { "epoch": 0.13, "grad_norm": 0.6661750939046363, "learning_rate": 4.982737672946855e-06, "loss": 0.3188, "step": 2831 }, { "epoch": 0.13, "grad_norm": 0.6627322078205536, "learning_rate": 4.9827154175015605e-06, "loss": 0.3179, "step": 2832 }, { "epoch": 0.13, "grad_norm": 0.7694101688457367, "learning_rate": 4.9826931477688615e-06, "loss": 0.3099, "step": 2833 }, { "epoch": 0.13, "grad_norm": 0.7358998479286714, "learning_rate": 4.982670863748888e-06, "loss": 0.3152, "step": 2834 }, { "epoch": 0.13, "grad_norm": 0.6375817449277336, "learning_rate": 4.982648565441767e-06, "loss": 0.2969, "step": 2835 }, { "epoch": 0.13, "grad_norm": 0.674129629871453, "learning_rate": 4.982626252847628e-06, "loss": 0.3088, "step": 2836 }, { "epoch": 0.13, "grad_norm": 0.7793755233341537, "learning_rate": 4.982603925966599e-06, "loss": 0.3391, "step": 2837 }, { "epoch": 0.13, "grad_norm": 0.7082102739267263, "learning_rate": 4.9825815847988085e-06, "loss": 0.3304, "step": 2838 }, { "epoch": 0.13, "grad_norm": 0.7134271204625757, "learning_rate": 4.982559229344385e-06, "loss": 0.2895, "step": 2839 }, { "epoch": 0.13, "grad_norm": 0.6925814383692093, "learning_rate": 4.982536859603457e-06, "loss": 0.3191, "step": 2840 }, { "epoch": 0.13, "grad_norm": 0.6982690124585428, "learning_rate": 4.982514475576153e-06, "loss": 0.3169, "step": 2841 }, { "epoch": 0.13, "grad_norm": 0.7035195762409365, "learning_rate": 4.982492077262602e-06, "loss": 0.3162, "step": 2842 }, { "epoch": 0.13, "grad_norm": 0.6650524734619729, "learning_rate": 4.982469664662933e-06, "loss": 0.311, "step": 2843 }, { "epoch": 0.13, "grad_norm": 0.696647482799808, "learning_rate": 4.982447237777276e-06, "loss": 0.3121, "step": 2844 }, { "epoch": 0.13, "grad_norm": 0.6646094832620065, "learning_rate": 4.982424796605758e-06, "loss": 0.3023, "step": 2845 }, { "epoch": 0.13, "grad_norm": 0.7159763060832582, "learning_rate": 4.98240234114851e-06, "loss": 0.3257, "step": 2846 }, { "epoch": 0.13, "grad_norm": 0.7880057636605249, "learning_rate": 4.9823798714056605e-06, "loss": 0.3217, "step": 2847 }, { "epoch": 0.13, "grad_norm": 0.6360308025891539, "learning_rate": 4.982357387377338e-06, "loss": 0.3148, "step": 2848 }, { "epoch": 0.13, "grad_norm": 0.7108321676991639, "learning_rate": 4.9823348890636735e-06, "loss": 0.3101, "step": 2849 }, { "epoch": 0.13, "grad_norm": 0.6845068028960403, "learning_rate": 4.9823123764647955e-06, "loss": 0.2923, "step": 2850 }, { "epoch": 0.13, "grad_norm": 0.6638311499320386, "learning_rate": 4.982289849580834e-06, "loss": 0.2941, "step": 2851 }, { "epoch": 0.13, "grad_norm": 0.6837366424962289, "learning_rate": 4.9822673084119165e-06, "loss": 0.3197, "step": 2852 }, { "epoch": 0.13, "grad_norm": 0.657037315797205, "learning_rate": 4.9822447529581764e-06, "loss": 0.2906, "step": 2853 }, { "epoch": 0.13, "grad_norm": 0.791812413015299, "learning_rate": 4.982222183219741e-06, "loss": 0.3356, "step": 2854 }, { "epoch": 0.13, "grad_norm": 0.6485466473834178, "learning_rate": 4.982199599196741e-06, "loss": 0.282, "step": 2855 }, { "epoch": 0.13, "grad_norm": 0.6826499048004787, "learning_rate": 4.982177000889306e-06, "loss": 0.3146, "step": 2856 }, { "epoch": 0.13, "grad_norm": 0.6991965577972341, "learning_rate": 4.982154388297566e-06, "loss": 0.31, "step": 2857 }, { "epoch": 0.13, "grad_norm": 0.668112343847632, "learning_rate": 4.982131761421651e-06, "loss": 0.2979, "step": 2858 }, { "epoch": 0.13, "grad_norm": 0.7288607151377567, "learning_rate": 4.982109120261692e-06, "loss": 0.3206, "step": 2859 }, { "epoch": 0.13, "grad_norm": 0.6847481447737396, "learning_rate": 4.98208646481782e-06, "loss": 0.3129, "step": 2860 }, { "epoch": 0.13, "grad_norm": 0.7377267725355848, "learning_rate": 4.982063795090163e-06, "loss": 0.3274, "step": 2861 }, { "epoch": 0.13, "grad_norm": 0.6491162467654843, "learning_rate": 4.982041111078853e-06, "loss": 0.309, "step": 2862 }, { "epoch": 0.13, "grad_norm": 0.6453518032601665, "learning_rate": 4.98201841278402e-06, "loss": 0.3039, "step": 2863 }, { "epoch": 0.13, "grad_norm": 0.6828987724932518, "learning_rate": 4.981995700205795e-06, "loss": 0.3347, "step": 2864 }, { "epoch": 0.13, "grad_norm": 0.7003639795352645, "learning_rate": 4.981972973344309e-06, "loss": 0.3132, "step": 2865 }, { "epoch": 0.13, "grad_norm": 0.724494407580755, "learning_rate": 4.981950232199692e-06, "loss": 0.3202, "step": 2866 }, { "epoch": 0.13, "grad_norm": 0.731821540541618, "learning_rate": 4.981927476772075e-06, "loss": 0.303, "step": 2867 }, { "epoch": 0.13, "grad_norm": 0.7014098666340283, "learning_rate": 4.98190470706159e-06, "loss": 0.3177, "step": 2868 }, { "epoch": 0.13, "grad_norm": 0.7545191597038859, "learning_rate": 4.981881923068366e-06, "loss": 0.306, "step": 2869 }, { "epoch": 0.13, "grad_norm": 0.6839357771433319, "learning_rate": 4.981859124792537e-06, "loss": 0.3097, "step": 2870 }, { "epoch": 0.13, "grad_norm": 0.7217831152840676, "learning_rate": 4.981836312234231e-06, "loss": 0.303, "step": 2871 }, { "epoch": 0.13, "grad_norm": 1.1871330973832201, "learning_rate": 4.9818134853935815e-06, "loss": 0.3228, "step": 2872 }, { "epoch": 0.13, "grad_norm": 0.646778177394147, "learning_rate": 4.981790644270718e-06, "loss": 0.2924, "step": 2873 }, { "epoch": 0.13, "grad_norm": 0.7230647438279234, "learning_rate": 4.981767788865774e-06, "loss": 0.3205, "step": 2874 }, { "epoch": 0.13, "grad_norm": 0.7043246824695116, "learning_rate": 4.981744919178882e-06, "loss": 0.3067, "step": 2875 }, { "epoch": 0.13, "grad_norm": 0.7176838087439297, "learning_rate": 4.981722035210169e-06, "loss": 0.3216, "step": 2876 }, { "epoch": 0.13, "grad_norm": 0.709893211199756, "learning_rate": 4.98169913695977e-06, "loss": 0.2996, "step": 2877 }, { "epoch": 0.13, "grad_norm": 0.7427818259703104, "learning_rate": 4.981676224427817e-06, "loss": 0.3031, "step": 2878 }, { "epoch": 0.13, "grad_norm": 0.66562009639244, "learning_rate": 4.981653297614441e-06, "loss": 0.2998, "step": 2879 }, { "epoch": 0.13, "grad_norm": 0.6805524426430655, "learning_rate": 4.981630356519774e-06, "loss": 0.3116, "step": 2880 }, { "epoch": 0.13, "grad_norm": 0.6723182415120426, "learning_rate": 4.981607401143947e-06, "loss": 0.3244, "step": 2881 }, { "epoch": 0.14, "grad_norm": 0.690456166475555, "learning_rate": 4.981584431487095e-06, "loss": 0.3033, "step": 2882 }, { "epoch": 0.14, "grad_norm": 0.6708977014050947, "learning_rate": 4.981561447549347e-06, "loss": 0.3077, "step": 2883 }, { "epoch": 0.14, "grad_norm": 0.6428508974188366, "learning_rate": 4.981538449330836e-06, "loss": 0.304, "step": 2884 }, { "epoch": 0.14, "grad_norm": 0.7012445174673048, "learning_rate": 4.981515436831697e-06, "loss": 0.3372, "step": 2885 }, { "epoch": 0.14, "grad_norm": 0.6837771078707225, "learning_rate": 4.981492410052058e-06, "loss": 0.2955, "step": 2886 }, { "epoch": 0.14, "grad_norm": 0.6929453666562796, "learning_rate": 4.9814693689920555e-06, "loss": 0.2995, "step": 2887 }, { "epoch": 0.14, "grad_norm": 0.6555601393630603, "learning_rate": 4.98144631365182e-06, "loss": 0.2915, "step": 2888 }, { "epoch": 0.14, "grad_norm": 0.6811265712013894, "learning_rate": 4.9814232440314845e-06, "loss": 0.2999, "step": 2889 }, { "epoch": 0.14, "grad_norm": 0.689158718564099, "learning_rate": 4.981400160131182e-06, "loss": 0.3004, "step": 2890 }, { "epoch": 0.14, "grad_norm": 0.6543872446108063, "learning_rate": 4.981377061951045e-06, "loss": 0.3015, "step": 2891 }, { "epoch": 0.14, "grad_norm": 0.6006401616845479, "learning_rate": 4.981353949491207e-06, "loss": 0.2874, "step": 2892 }, { "epoch": 0.14, "grad_norm": 0.6576207961810799, "learning_rate": 4.981330822751801e-06, "loss": 0.3031, "step": 2893 }, { "epoch": 0.14, "grad_norm": 0.8243971037356641, "learning_rate": 4.981307681732959e-06, "loss": 0.3409, "step": 2894 }, { "epoch": 0.14, "grad_norm": 0.6545386436449976, "learning_rate": 4.981284526434815e-06, "loss": 0.3141, "step": 2895 }, { "epoch": 0.14, "grad_norm": 0.6562476165631536, "learning_rate": 4.981261356857503e-06, "loss": 0.3173, "step": 2896 }, { "epoch": 0.14, "grad_norm": 0.6260657907545268, "learning_rate": 4.981238173001155e-06, "loss": 0.3108, "step": 2897 }, { "epoch": 0.14, "grad_norm": 0.6730050253990131, "learning_rate": 4.981214974865906e-06, "loss": 0.3017, "step": 2898 }, { "epoch": 0.14, "grad_norm": 0.6820846936593642, "learning_rate": 4.981191762451887e-06, "loss": 0.3156, "step": 2899 }, { "epoch": 0.14, "grad_norm": 0.7504873277675628, "learning_rate": 4.9811685357592335e-06, "loss": 0.3103, "step": 2900 }, { "epoch": 0.14, "grad_norm": 0.6653596274127646, "learning_rate": 4.981145294788079e-06, "loss": 0.3062, "step": 2901 }, { "epoch": 0.14, "grad_norm": 0.7279057756640062, "learning_rate": 4.981122039538556e-06, "loss": 0.3189, "step": 2902 }, { "epoch": 0.14, "grad_norm": 0.6327185360536858, "learning_rate": 4.9810987700108006e-06, "loss": 0.3018, "step": 2903 }, { "epoch": 0.14, "grad_norm": 0.6954776076308335, "learning_rate": 4.9810754862049446e-06, "loss": 0.3154, "step": 2904 }, { "epoch": 0.14, "grad_norm": 0.7244939845220488, "learning_rate": 4.9810521881211226e-06, "loss": 0.3074, "step": 2905 }, { "epoch": 0.14, "grad_norm": 0.7377403305845263, "learning_rate": 4.981028875759469e-06, "loss": 0.3162, "step": 2906 }, { "epoch": 0.14, "grad_norm": 0.7475041173073599, "learning_rate": 4.9810055491201185e-06, "loss": 0.3209, "step": 2907 }, { "epoch": 0.14, "grad_norm": 0.692710183360766, "learning_rate": 4.980982208203204e-06, "loss": 0.3422, "step": 2908 }, { "epoch": 0.14, "grad_norm": 0.6353637703353041, "learning_rate": 4.980958853008861e-06, "loss": 0.3126, "step": 2909 }, { "epoch": 0.14, "grad_norm": 0.7103667782654531, "learning_rate": 4.980935483537222e-06, "loss": 0.3081, "step": 2910 }, { "epoch": 0.14, "grad_norm": 0.6597696845158647, "learning_rate": 4.980912099788424e-06, "loss": 0.3257, "step": 2911 }, { "epoch": 0.14, "grad_norm": 0.693068448034552, "learning_rate": 4.9808887017626e-06, "loss": 0.3365, "step": 2912 }, { "epoch": 0.14, "grad_norm": 0.6226520476194174, "learning_rate": 4.980865289459886e-06, "loss": 0.294, "step": 2913 }, { "epoch": 0.14, "grad_norm": 0.6311503892793299, "learning_rate": 4.980841862880415e-06, "loss": 0.3013, "step": 2914 }, { "epoch": 0.14, "grad_norm": 0.6758086419529246, "learning_rate": 4.9808184220243225e-06, "loss": 0.2875, "step": 2915 }, { "epoch": 0.14, "grad_norm": 0.6625359038819385, "learning_rate": 4.980794966891743e-06, "loss": 0.2951, "step": 2916 }, { "epoch": 0.14, "grad_norm": 0.6102301935387882, "learning_rate": 4.980771497482814e-06, "loss": 0.2799, "step": 2917 }, { "epoch": 0.14, "grad_norm": 0.6721505530441206, "learning_rate": 4.980748013797666e-06, "loss": 0.3071, "step": 2918 }, { "epoch": 0.14, "grad_norm": 0.6634152139480582, "learning_rate": 4.980724515836438e-06, "loss": 0.3177, "step": 2919 }, { "epoch": 0.14, "grad_norm": 0.596814400520631, "learning_rate": 4.980701003599264e-06, "loss": 0.2765, "step": 2920 }, { "epoch": 0.14, "grad_norm": 0.6856935465961926, "learning_rate": 4.9806774770862796e-06, "loss": 0.324, "step": 2921 }, { "epoch": 0.14, "grad_norm": 0.6646243037478532, "learning_rate": 4.980653936297619e-06, "loss": 0.3263, "step": 2922 }, { "epoch": 0.14, "grad_norm": 0.6663506690397718, "learning_rate": 4.980630381233419e-06, "loss": 0.3068, "step": 2923 }, { "epoch": 0.14, "grad_norm": 0.6503113947276866, "learning_rate": 4.980606811893814e-06, "loss": 0.3124, "step": 2924 }, { "epoch": 0.14, "grad_norm": 0.6368664318600611, "learning_rate": 4.9805832282789414e-06, "loss": 0.2921, "step": 2925 }, { "epoch": 0.14, "grad_norm": 0.6173056132687096, "learning_rate": 4.980559630388934e-06, "loss": 0.3111, "step": 2926 }, { "epoch": 0.14, "grad_norm": 0.677583707526189, "learning_rate": 4.980536018223931e-06, "loss": 0.2944, "step": 2927 }, { "epoch": 0.14, "grad_norm": 0.6945186139024356, "learning_rate": 4.980512391784066e-06, "loss": 0.3113, "step": 2928 }, { "epoch": 0.14, "grad_norm": 0.6896329094718376, "learning_rate": 4.980488751069476e-06, "loss": 0.3228, "step": 2929 }, { "epoch": 0.14, "grad_norm": 0.6320093698648173, "learning_rate": 4.980465096080297e-06, "loss": 0.3052, "step": 2930 }, { "epoch": 0.14, "grad_norm": 0.7207370374153755, "learning_rate": 4.980441426816663e-06, "loss": 0.309, "step": 2931 }, { "epoch": 0.14, "grad_norm": 0.6543812826604832, "learning_rate": 4.980417743278714e-06, "loss": 0.3037, "step": 2932 }, { "epoch": 0.14, "grad_norm": 0.6450865696432099, "learning_rate": 4.980394045466583e-06, "loss": 0.2972, "step": 2933 }, { "epoch": 0.14, "grad_norm": 0.6469346670915633, "learning_rate": 4.980370333380409e-06, "loss": 0.2994, "step": 2934 }, { "epoch": 0.14, "grad_norm": 0.7223415693301952, "learning_rate": 4.980346607020327e-06, "loss": 0.3226, "step": 2935 }, { "epoch": 0.14, "grad_norm": 0.6462239435407594, "learning_rate": 4.9803228663864725e-06, "loss": 0.3018, "step": 2936 }, { "epoch": 0.14, "grad_norm": 0.6990031747789872, "learning_rate": 4.980299111478985e-06, "loss": 0.2998, "step": 2937 }, { "epoch": 0.14, "grad_norm": 0.6438034322924944, "learning_rate": 4.980275342297997e-06, "loss": 0.2771, "step": 2938 }, { "epoch": 0.14, "grad_norm": 0.6361989791759148, "learning_rate": 4.980251558843649e-06, "loss": 0.31, "step": 2939 }, { "epoch": 0.14, "grad_norm": 0.6643454305947235, "learning_rate": 4.980227761116078e-06, "loss": 0.3037, "step": 2940 }, { "epoch": 0.14, "grad_norm": 0.6722055240805427, "learning_rate": 4.980203949115418e-06, "loss": 0.298, "step": 2941 }, { "epoch": 0.14, "grad_norm": 0.6797437284176747, "learning_rate": 4.980180122841808e-06, "loss": 0.3071, "step": 2942 }, { "epoch": 0.14, "grad_norm": 0.6715402653575973, "learning_rate": 4.980156282295386e-06, "loss": 0.2802, "step": 2943 }, { "epoch": 0.14, "grad_norm": 0.7020600515856688, "learning_rate": 4.980132427476287e-06, "loss": 0.3074, "step": 2944 }, { "epoch": 0.14, "grad_norm": 0.6501000099641986, "learning_rate": 4.9801085583846486e-06, "loss": 0.2907, "step": 2945 }, { "epoch": 0.14, "grad_norm": 0.6016038238452592, "learning_rate": 4.9800846750206096e-06, "loss": 0.2884, "step": 2946 }, { "epoch": 0.14, "grad_norm": 0.6777874886480868, "learning_rate": 4.980060777384306e-06, "loss": 0.3421, "step": 2947 }, { "epoch": 0.14, "grad_norm": 0.6854544792588891, "learning_rate": 4.980036865475877e-06, "loss": 0.3143, "step": 2948 }, { "epoch": 0.14, "grad_norm": 0.6460514400112426, "learning_rate": 4.980012939295458e-06, "loss": 0.2924, "step": 2949 }, { "epoch": 0.14, "grad_norm": 0.6762125447562104, "learning_rate": 4.979988998843188e-06, "loss": 0.3373, "step": 2950 }, { "epoch": 0.14, "grad_norm": 0.6862152717141914, "learning_rate": 4.979965044119205e-06, "loss": 0.3007, "step": 2951 }, { "epoch": 0.14, "grad_norm": 0.6386565818701556, "learning_rate": 4.979941075123647e-06, "loss": 0.3205, "step": 2952 }, { "epoch": 0.14, "grad_norm": 0.721385968067536, "learning_rate": 4.979917091856651e-06, "loss": 0.3118, "step": 2953 }, { "epoch": 0.14, "grad_norm": 0.6735103946117452, "learning_rate": 4.979893094318356e-06, "loss": 0.309, "step": 2954 }, { "epoch": 0.14, "grad_norm": 0.764800614249626, "learning_rate": 4.979869082508898e-06, "loss": 0.3092, "step": 2955 }, { "epoch": 0.14, "grad_norm": 0.6528481089296237, "learning_rate": 4.9798450564284175e-06, "loss": 0.2967, "step": 2956 }, { "epoch": 0.14, "grad_norm": 0.6728524102762574, "learning_rate": 4.979821016077053e-06, "loss": 0.3181, "step": 2957 }, { "epoch": 0.14, "grad_norm": 0.6044784656182688, "learning_rate": 4.97979696145494e-06, "loss": 0.298, "step": 2958 }, { "epoch": 0.14, "grad_norm": 0.7306609732975013, "learning_rate": 4.97977289256222e-06, "loss": 0.3319, "step": 2959 }, { "epoch": 0.14, "grad_norm": 0.6671464336216066, "learning_rate": 4.979748809399031e-06, "loss": 0.2968, "step": 2960 }, { "epoch": 0.14, "grad_norm": 0.6099403086699732, "learning_rate": 4.979724711965509e-06, "loss": 0.2892, "step": 2961 }, { "epoch": 0.14, "grad_norm": 0.6458244143398582, "learning_rate": 4.9797006002617954e-06, "loss": 0.2936, "step": 2962 }, { "epoch": 0.14, "grad_norm": 0.6775753502716788, "learning_rate": 4.979676474288028e-06, "loss": 0.2988, "step": 2963 }, { "epoch": 0.14, "grad_norm": 0.6182678502409639, "learning_rate": 4.979652334044346e-06, "loss": 0.2888, "step": 2964 }, { "epoch": 0.14, "grad_norm": 0.6344210560201493, "learning_rate": 4.979628179530888e-06, "loss": 0.2991, "step": 2965 }, { "epoch": 0.14, "grad_norm": 0.6291479810410957, "learning_rate": 4.9796040107477925e-06, "loss": 0.2928, "step": 2966 }, { "epoch": 0.14, "grad_norm": 0.6903576794065232, "learning_rate": 4.9795798276952e-06, "loss": 0.3161, "step": 2967 }, { "epoch": 0.14, "grad_norm": 0.6522985875409987, "learning_rate": 4.979555630373248e-06, "loss": 0.3066, "step": 2968 }, { "epoch": 0.14, "grad_norm": 0.6382018333837904, "learning_rate": 4.979531418782078e-06, "loss": 0.3056, "step": 2969 }, { "epoch": 0.14, "grad_norm": 0.6902451955745146, "learning_rate": 4.979507192921826e-06, "loss": 0.325, "step": 2970 }, { "epoch": 0.14, "grad_norm": 0.6445419909191075, "learning_rate": 4.979482952792634e-06, "loss": 0.3101, "step": 2971 }, { "epoch": 0.14, "grad_norm": 0.6989777346115954, "learning_rate": 4.979458698394641e-06, "loss": 0.3064, "step": 2972 }, { "epoch": 0.14, "grad_norm": 0.7232908736221767, "learning_rate": 4.979434429727987e-06, "loss": 0.3369, "step": 2973 }, { "epoch": 0.14, "grad_norm": 0.7817332338698583, "learning_rate": 4.9794101467928104e-06, "loss": 0.3193, "step": 2974 }, { "epoch": 0.14, "grad_norm": 0.661952808351297, "learning_rate": 4.979385849589251e-06, "loss": 0.3139, "step": 2975 }, { "epoch": 0.14, "grad_norm": 0.6981358705928171, "learning_rate": 4.97936153811745e-06, "loss": 0.3228, "step": 2976 }, { "epoch": 0.14, "grad_norm": 0.7135109189079495, "learning_rate": 4.979337212377546e-06, "loss": 0.3184, "step": 2977 }, { "epoch": 0.14, "grad_norm": 0.7046319823416547, "learning_rate": 4.97931287236968e-06, "loss": 0.3022, "step": 2978 }, { "epoch": 0.14, "grad_norm": 0.7204466112201489, "learning_rate": 4.979288518093991e-06, "loss": 0.2989, "step": 2979 }, { "epoch": 0.14, "grad_norm": 0.6693175944195726, "learning_rate": 4.97926414955062e-06, "loss": 0.2977, "step": 2980 }, { "epoch": 0.14, "grad_norm": 0.6613990487426928, "learning_rate": 4.979239766739707e-06, "loss": 0.2886, "step": 2981 }, { "epoch": 0.14, "grad_norm": 0.7921219815326375, "learning_rate": 4.979215369661393e-06, "loss": 0.3244, "step": 2982 }, { "epoch": 0.14, "grad_norm": 0.6753521767575321, "learning_rate": 4.979190958315816e-06, "loss": 0.2929, "step": 2983 }, { "epoch": 0.14, "grad_norm": 0.7423708659973618, "learning_rate": 4.97916653270312e-06, "loss": 0.3178, "step": 2984 }, { "epoch": 0.14, "grad_norm": 0.7462640914270692, "learning_rate": 4.979142092823442e-06, "loss": 0.3153, "step": 2985 }, { "epoch": 0.14, "grad_norm": 0.6457459548717508, "learning_rate": 4.979117638676926e-06, "loss": 0.2797, "step": 2986 }, { "epoch": 0.14, "grad_norm": 0.6957773386482031, "learning_rate": 4.97909317026371e-06, "loss": 0.3067, "step": 2987 }, { "epoch": 0.14, "grad_norm": 0.6775644142391595, "learning_rate": 4.979068687583937e-06, "loss": 0.3105, "step": 2988 }, { "epoch": 0.14, "grad_norm": 0.6756564100726558, "learning_rate": 4.979044190637745e-06, "loss": 0.2991, "step": 2989 }, { "epoch": 0.14, "grad_norm": 0.5896990067852704, "learning_rate": 4.979019679425278e-06, "loss": 0.2944, "step": 2990 }, { "epoch": 0.14, "grad_norm": 0.7314540587292746, "learning_rate": 4.978995153946676e-06, "loss": 0.3115, "step": 2991 }, { "epoch": 0.14, "grad_norm": 0.7354830329367372, "learning_rate": 4.97897061420208e-06, "loss": 0.3209, "step": 2992 }, { "epoch": 0.14, "grad_norm": 0.6616748450455244, "learning_rate": 4.97894606019163e-06, "loss": 0.3253, "step": 2993 }, { "epoch": 0.14, "grad_norm": 0.652247639834857, "learning_rate": 4.978921491915469e-06, "loss": 0.3133, "step": 2994 }, { "epoch": 0.14, "grad_norm": 0.7279338870534404, "learning_rate": 4.9788969093737385e-06, "loss": 0.3148, "step": 2995 }, { "epoch": 0.14, "grad_norm": 0.6691524022923612, "learning_rate": 4.978872312566579e-06, "loss": 0.2884, "step": 2996 }, { "epoch": 0.14, "grad_norm": 0.745761840934808, "learning_rate": 4.9788477014941326e-06, "loss": 0.3219, "step": 2997 }, { "epoch": 0.14, "grad_norm": 0.6411169814884573, "learning_rate": 4.97882307615654e-06, "loss": 0.3113, "step": 2998 }, { "epoch": 0.14, "grad_norm": 0.6624491571760536, "learning_rate": 4.978798436553945e-06, "loss": 0.3179, "step": 2999 }, { "epoch": 0.14, "grad_norm": 0.6992997293496878, "learning_rate": 4.978773782686486e-06, "loss": 0.301, "step": 3000 }, { "epoch": 0.14, "grad_norm": 0.6244983208797048, "learning_rate": 4.978749114554308e-06, "loss": 0.2965, "step": 3001 }, { "epoch": 0.14, "grad_norm": 0.6923493090226205, "learning_rate": 4.9787244321575515e-06, "loss": 0.3119, "step": 3002 }, { "epoch": 0.14, "grad_norm": 0.6909966090497235, "learning_rate": 4.9786997354963595e-06, "loss": 0.3045, "step": 3003 }, { "epoch": 0.14, "grad_norm": 0.6298801087073285, "learning_rate": 4.9786750245708734e-06, "loss": 0.3014, "step": 3004 }, { "epoch": 0.14, "grad_norm": 0.6619380558750827, "learning_rate": 4.978650299381235e-06, "loss": 0.2917, "step": 3005 }, { "epoch": 0.14, "grad_norm": 0.7161252014275274, "learning_rate": 4.978625559927588e-06, "loss": 0.3235, "step": 3006 }, { "epoch": 0.14, "grad_norm": 0.6678550041040788, "learning_rate": 4.978600806210073e-06, "loss": 0.3225, "step": 3007 }, { "epoch": 0.14, "grad_norm": 0.6354690742997318, "learning_rate": 4.978576038228834e-06, "loss": 0.3226, "step": 3008 }, { "epoch": 0.14, "grad_norm": 0.6470425806355691, "learning_rate": 4.9785512559840125e-06, "loss": 0.3128, "step": 3009 }, { "epoch": 0.14, "grad_norm": 0.6410848096955444, "learning_rate": 4.978526459475751e-06, "loss": 0.3339, "step": 3010 }, { "epoch": 0.14, "grad_norm": 0.6096615379385852, "learning_rate": 4.9785016487041934e-06, "loss": 0.2879, "step": 3011 }, { "epoch": 0.14, "grad_norm": 0.6504932590580387, "learning_rate": 4.9784768236694815e-06, "loss": 0.2643, "step": 3012 }, { "epoch": 0.14, "grad_norm": 0.6385635157863873, "learning_rate": 4.978451984371759e-06, "loss": 0.306, "step": 3013 }, { "epoch": 0.14, "grad_norm": 0.6678151212218691, "learning_rate": 4.9784271308111675e-06, "loss": 0.3329, "step": 3014 }, { "epoch": 0.14, "grad_norm": 0.7208581008401224, "learning_rate": 4.978402262987852e-06, "loss": 0.31, "step": 3015 }, { "epoch": 0.14, "grad_norm": 0.7101623161800386, "learning_rate": 4.978377380901952e-06, "loss": 0.3294, "step": 3016 }, { "epoch": 0.14, "grad_norm": 0.6783794576507698, "learning_rate": 4.9783524845536144e-06, "loss": 0.3192, "step": 3017 }, { "epoch": 0.14, "grad_norm": 0.6633502667902396, "learning_rate": 4.978327573942982e-06, "loss": 0.3032, "step": 3018 }, { "epoch": 0.14, "grad_norm": 0.7005732042927495, "learning_rate": 4.978302649070197e-06, "loss": 0.3022, "step": 3019 }, { "epoch": 0.14, "grad_norm": 0.7008389334858319, "learning_rate": 4.978277709935402e-06, "loss": 0.3078, "step": 3020 }, { "epoch": 0.14, "grad_norm": 0.6528585596450588, "learning_rate": 4.978252756538743e-06, "loss": 0.3055, "step": 3021 }, { "epoch": 0.14, "grad_norm": 0.6694982893837929, "learning_rate": 4.978227788880362e-06, "loss": 0.3086, "step": 3022 }, { "epoch": 0.14, "grad_norm": 0.690861255151805, "learning_rate": 4.978202806960402e-06, "loss": 0.3345, "step": 3023 }, { "epoch": 0.14, "grad_norm": 0.6891817472005949, "learning_rate": 4.978177810779008e-06, "loss": 0.3115, "step": 3024 }, { "epoch": 0.14, "grad_norm": 0.7206084767120006, "learning_rate": 4.9781528003363245e-06, "loss": 0.3114, "step": 3025 }, { "epoch": 0.14, "grad_norm": 0.6935705620070478, "learning_rate": 4.978127775632494e-06, "loss": 0.3088, "step": 3026 }, { "epoch": 0.14, "grad_norm": 0.7076794345110523, "learning_rate": 4.978102736667661e-06, "loss": 0.3286, "step": 3027 }, { "epoch": 0.14, "grad_norm": 0.6840717997459809, "learning_rate": 4.978077683441969e-06, "loss": 0.3163, "step": 3028 }, { "epoch": 0.14, "grad_norm": 0.6747999606369702, "learning_rate": 4.9780526159555645e-06, "loss": 0.312, "step": 3029 }, { "epoch": 0.14, "grad_norm": 0.6261720412219386, "learning_rate": 4.978027534208588e-06, "loss": 0.2835, "step": 3030 }, { "epoch": 0.14, "grad_norm": 0.6510148244028909, "learning_rate": 4.9780024382011875e-06, "loss": 0.2993, "step": 3031 }, { "epoch": 0.14, "grad_norm": 0.6726888768388725, "learning_rate": 4.977977327933504e-06, "loss": 0.2835, "step": 3032 }, { "epoch": 0.14, "grad_norm": 0.8376852172219259, "learning_rate": 4.977952203405685e-06, "loss": 0.3404, "step": 3033 }, { "epoch": 0.14, "grad_norm": 0.6959316724365567, "learning_rate": 4.977927064617874e-06, "loss": 0.3123, "step": 3034 }, { "epoch": 0.14, "grad_norm": 0.6887316229204565, "learning_rate": 4.977901911570215e-06, "loss": 0.3155, "step": 3035 }, { "epoch": 0.14, "grad_norm": 0.6611732219203684, "learning_rate": 4.9778767442628535e-06, "loss": 0.3036, "step": 3036 }, { "epoch": 0.14, "grad_norm": 0.68247544841761, "learning_rate": 4.977851562695935e-06, "loss": 0.3108, "step": 3037 }, { "epoch": 0.14, "grad_norm": 0.7261559239957988, "learning_rate": 4.977826366869602e-06, "loss": 0.2967, "step": 3038 }, { "epoch": 0.14, "grad_norm": 0.7027897936077918, "learning_rate": 4.977801156784001e-06, "loss": 0.3223, "step": 3039 }, { "epoch": 0.14, "grad_norm": 0.6555789315962111, "learning_rate": 4.9777759324392784e-06, "loss": 0.3085, "step": 3040 }, { "epoch": 0.14, "grad_norm": 0.6391099759326939, "learning_rate": 4.977750693835578e-06, "loss": 0.2902, "step": 3041 }, { "epoch": 0.14, "grad_norm": 0.6570298094997383, "learning_rate": 4.977725440973045e-06, "loss": 0.3036, "step": 3042 }, { "epoch": 0.14, "grad_norm": 0.7401828677595584, "learning_rate": 4.977700173851824e-06, "loss": 0.3331, "step": 3043 }, { "epoch": 0.14, "grad_norm": 0.7186324951959578, "learning_rate": 4.977674892472062e-06, "loss": 0.3267, "step": 3044 }, { "epoch": 0.14, "grad_norm": 0.7692751639023974, "learning_rate": 4.9776495968339034e-06, "loss": 0.2961, "step": 3045 }, { "epoch": 0.14, "grad_norm": 0.6456655656957209, "learning_rate": 4.977624286937493e-06, "loss": 0.2988, "step": 3046 }, { "epoch": 0.14, "grad_norm": 0.7076467406817543, "learning_rate": 4.977598962782979e-06, "loss": 0.3136, "step": 3047 }, { "epoch": 0.14, "grad_norm": 0.740956035798324, "learning_rate": 4.977573624370506e-06, "loss": 0.3029, "step": 3048 }, { "epoch": 0.14, "grad_norm": 0.6865426154203156, "learning_rate": 4.977548271700219e-06, "loss": 0.2898, "step": 3049 }, { "epoch": 0.14, "grad_norm": 0.7394295404879839, "learning_rate": 4.977522904772264e-06, "loss": 0.307, "step": 3050 }, { "epoch": 0.14, "grad_norm": 0.7394559927989817, "learning_rate": 4.977497523586788e-06, "loss": 0.3107, "step": 3051 }, { "epoch": 0.14, "grad_norm": 0.7407073512811426, "learning_rate": 4.977472128143936e-06, "loss": 0.3418, "step": 3052 }, { "epoch": 0.14, "grad_norm": 0.7180181495679662, "learning_rate": 4.977446718443855e-06, "loss": 0.3085, "step": 3053 }, { "epoch": 0.14, "grad_norm": 0.6894796969358534, "learning_rate": 4.97742129448669e-06, "loss": 0.3051, "step": 3054 }, { "epoch": 0.14, "grad_norm": 0.7234824624871982, "learning_rate": 4.977395856272589e-06, "loss": 0.3113, "step": 3055 }, { "epoch": 0.14, "grad_norm": 0.6270742500621832, "learning_rate": 4.9773704038016975e-06, "loss": 0.3206, "step": 3056 }, { "epoch": 0.14, "grad_norm": 0.7069721883347555, "learning_rate": 4.977344937074161e-06, "loss": 0.3018, "step": 3057 }, { "epoch": 0.14, "grad_norm": 0.6503340923135553, "learning_rate": 4.9773194560901286e-06, "loss": 0.2987, "step": 3058 }, { "epoch": 0.14, "grad_norm": 0.6629051795233837, "learning_rate": 4.977293960849744e-06, "loss": 0.2895, "step": 3059 }, { "epoch": 0.14, "grad_norm": 0.6612723248569033, "learning_rate": 4.977268451353156e-06, "loss": 0.3235, "step": 3060 }, { "epoch": 0.14, "grad_norm": 0.609699547833597, "learning_rate": 4.977242927600511e-06, "loss": 0.2756, "step": 3061 }, { "epoch": 0.14, "grad_norm": 0.755214279262774, "learning_rate": 4.977217389591955e-06, "loss": 0.3111, "step": 3062 }, { "epoch": 0.14, "grad_norm": 0.6979055842149194, "learning_rate": 4.977191837327635e-06, "loss": 0.2968, "step": 3063 }, { "epoch": 0.14, "grad_norm": 0.6657437457967155, "learning_rate": 4.9771662708076995e-06, "loss": 0.3074, "step": 3064 }, { "epoch": 0.14, "grad_norm": 0.7137108000855773, "learning_rate": 4.977140690032294e-06, "loss": 0.3311, "step": 3065 }, { "epoch": 0.14, "grad_norm": 0.6735692584981807, "learning_rate": 4.977115095001567e-06, "loss": 0.3236, "step": 3066 }, { "epoch": 0.14, "grad_norm": 0.729487882310804, "learning_rate": 4.977089485715666e-06, "loss": 0.3017, "step": 3067 }, { "epoch": 0.14, "grad_norm": 0.716925798313675, "learning_rate": 4.977063862174737e-06, "loss": 0.3063, "step": 3068 }, { "epoch": 0.14, "grad_norm": 0.7256174172848481, "learning_rate": 4.9770382243789275e-06, "loss": 0.3158, "step": 3069 }, { "epoch": 0.14, "grad_norm": 0.7488148296912267, "learning_rate": 4.977012572328386e-06, "loss": 0.3076, "step": 3070 }, { "epoch": 0.14, "grad_norm": 0.6604583972378147, "learning_rate": 4.976986906023259e-06, "loss": 0.2966, "step": 3071 }, { "epoch": 0.14, "grad_norm": 0.6686488603021048, "learning_rate": 4.976961225463696e-06, "loss": 0.3158, "step": 3072 }, { "epoch": 0.14, "grad_norm": 0.708206412455035, "learning_rate": 4.976935530649843e-06, "loss": 0.307, "step": 3073 }, { "epoch": 0.14, "grad_norm": 0.6578891485357465, "learning_rate": 4.976909821581849e-06, "loss": 0.281, "step": 3074 }, { "epoch": 0.14, "grad_norm": 0.8671617143365045, "learning_rate": 4.976884098259861e-06, "loss": 0.3423, "step": 3075 }, { "epoch": 0.14, "grad_norm": 0.6195952949585365, "learning_rate": 4.9768583606840285e-06, "loss": 0.2921, "step": 3076 }, { "epoch": 0.14, "grad_norm": 0.638914278904986, "learning_rate": 4.976832608854498e-06, "loss": 0.3126, "step": 3077 }, { "epoch": 0.14, "grad_norm": 0.694387664536154, "learning_rate": 4.976806842771418e-06, "loss": 0.3063, "step": 3078 }, { "epoch": 0.14, "grad_norm": 0.6785767612788691, "learning_rate": 4.9767810624349375e-06, "loss": 0.3126, "step": 3079 }, { "epoch": 0.14, "grad_norm": 0.8200625592534825, "learning_rate": 4.976755267845205e-06, "loss": 0.3459, "step": 3080 }, { "epoch": 0.14, "grad_norm": 0.7203309083234158, "learning_rate": 4.976729459002367e-06, "loss": 0.3096, "step": 3081 }, { "epoch": 0.14, "grad_norm": 0.6921058820039908, "learning_rate": 4.976703635906575e-06, "loss": 0.2986, "step": 3082 }, { "epoch": 0.14, "grad_norm": 0.628955308655588, "learning_rate": 4.9766777985579765e-06, "loss": 0.3008, "step": 3083 }, { "epoch": 0.14, "grad_norm": 0.6221078000545739, "learning_rate": 4.976651946956718e-06, "loss": 0.3061, "step": 3084 }, { "epoch": 0.14, "grad_norm": 0.7382106719661001, "learning_rate": 4.976626081102951e-06, "loss": 0.2948, "step": 3085 }, { "epoch": 0.14, "grad_norm": 0.7243779386065178, "learning_rate": 4.976600200996823e-06, "loss": 0.3022, "step": 3086 }, { "epoch": 0.14, "grad_norm": 0.665546589473621, "learning_rate": 4.976574306638484e-06, "loss": 0.3108, "step": 3087 }, { "epoch": 0.14, "grad_norm": 0.6415137429274448, "learning_rate": 4.976548398028082e-06, "loss": 0.2958, "step": 3088 }, { "epoch": 0.14, "grad_norm": 0.6739518463027209, "learning_rate": 4.976522475165766e-06, "loss": 0.3061, "step": 3089 }, { "epoch": 0.14, "grad_norm": 0.6164618134446558, "learning_rate": 4.9764965380516864e-06, "loss": 0.3068, "step": 3090 }, { "epoch": 0.14, "grad_norm": 0.7218544022876243, "learning_rate": 4.976470586685991e-06, "loss": 0.3151, "step": 3091 }, { "epoch": 0.14, "grad_norm": 0.6766367188049776, "learning_rate": 4.97644462106883e-06, "loss": 0.3094, "step": 3092 }, { "epoch": 0.14, "grad_norm": 0.6208634560796986, "learning_rate": 4.976418641200353e-06, "loss": 0.3034, "step": 3093 }, { "epoch": 0.14, "grad_norm": 0.659168679220853, "learning_rate": 4.9763926470807074e-06, "loss": 0.2948, "step": 3094 }, { "epoch": 0.14, "grad_norm": 0.9367996685692764, "learning_rate": 4.976366638710046e-06, "loss": 0.298, "step": 3095 }, { "epoch": 0.15, "grad_norm": 0.6428541583162125, "learning_rate": 4.9763406160885175e-06, "loss": 0.3025, "step": 3096 }, { "epoch": 0.15, "grad_norm": 0.6273492199057696, "learning_rate": 4.97631457921627e-06, "loss": 0.3055, "step": 3097 }, { "epoch": 0.15, "grad_norm": 0.6923268576699759, "learning_rate": 4.976288528093456e-06, "loss": 0.3288, "step": 3098 }, { "epoch": 0.15, "grad_norm": 0.8303570656861711, "learning_rate": 4.9762624627202225e-06, "loss": 0.3266, "step": 3099 }, { "epoch": 0.15, "grad_norm": 0.717726766794788, "learning_rate": 4.976236383096721e-06, "loss": 0.3042, "step": 3100 }, { "epoch": 0.15, "grad_norm": 0.659236084027623, "learning_rate": 4.976210289223102e-06, "loss": 0.3211, "step": 3101 }, { "epoch": 0.15, "grad_norm": 0.6696689533180805, "learning_rate": 4.9761841810995145e-06, "loss": 0.2993, "step": 3102 }, { "epoch": 0.15, "grad_norm": 0.6621274703839616, "learning_rate": 4.9761580587261105e-06, "loss": 0.3213, "step": 3103 }, { "epoch": 0.15, "grad_norm": 0.7188460034644938, "learning_rate": 4.976131922103039e-06, "loss": 0.3243, "step": 3104 }, { "epoch": 0.15, "grad_norm": 0.6704185981218383, "learning_rate": 4.976105771230451e-06, "loss": 0.2785, "step": 3105 }, { "epoch": 0.15, "grad_norm": 0.6720114795441703, "learning_rate": 4.976079606108495e-06, "loss": 0.3225, "step": 3106 }, { "epoch": 0.15, "grad_norm": 0.7305089074067107, "learning_rate": 4.976053426737324e-06, "loss": 0.3288, "step": 3107 }, { "epoch": 0.15, "grad_norm": 0.7325645473915474, "learning_rate": 4.976027233117088e-06, "loss": 0.3236, "step": 3108 }, { "epoch": 0.15, "grad_norm": 0.6711939187665613, "learning_rate": 4.976001025247938e-06, "loss": 0.2988, "step": 3109 }, { "epoch": 0.15, "grad_norm": 0.6616594624720675, "learning_rate": 4.9759748031300234e-06, "loss": 0.3243, "step": 3110 }, { "epoch": 0.15, "grad_norm": 0.6705657500263612, "learning_rate": 4.975948566763497e-06, "loss": 0.312, "step": 3111 }, { "epoch": 0.15, "grad_norm": 0.8152402605616357, "learning_rate": 4.97592231614851e-06, "loss": 0.322, "step": 3112 }, { "epoch": 0.15, "grad_norm": 0.7870166650594661, "learning_rate": 4.97589605128521e-06, "loss": 0.3215, "step": 3113 }, { "epoch": 0.15, "grad_norm": 0.705697243570219, "learning_rate": 4.975869772173751e-06, "loss": 0.3113, "step": 3114 }, { "epoch": 0.15, "grad_norm": 0.6984176328406172, "learning_rate": 4.975843478814285e-06, "loss": 0.3117, "step": 3115 }, { "epoch": 0.15, "grad_norm": 0.7880094972004186, "learning_rate": 4.975817171206961e-06, "loss": 0.3093, "step": 3116 }, { "epoch": 0.15, "grad_norm": 0.6455665309875842, "learning_rate": 4.975790849351932e-06, "loss": 0.2705, "step": 3117 }, { "epoch": 0.15, "grad_norm": 0.6764762750986725, "learning_rate": 4.975764513249349e-06, "loss": 0.3185, "step": 3118 }, { "epoch": 0.15, "grad_norm": 0.6528838312380406, "learning_rate": 4.9757381628993624e-06, "loss": 0.2896, "step": 3119 }, { "epoch": 0.15, "grad_norm": 0.7283336679659015, "learning_rate": 4.975711798302126e-06, "loss": 0.3017, "step": 3120 }, { "epoch": 0.15, "grad_norm": 0.6794291577917159, "learning_rate": 4.975685419457791e-06, "loss": 0.3039, "step": 3121 }, { "epoch": 0.15, "grad_norm": 0.6791916690701325, "learning_rate": 4.975659026366507e-06, "loss": 0.2968, "step": 3122 }, { "epoch": 0.15, "grad_norm": 0.6084119756179807, "learning_rate": 4.975632619028429e-06, "loss": 0.263, "step": 3123 }, { "epoch": 0.15, "grad_norm": 0.6710764791245993, "learning_rate": 4.975606197443706e-06, "loss": 0.3086, "step": 3124 }, { "epoch": 0.15, "grad_norm": 0.6531607732622622, "learning_rate": 4.975579761612493e-06, "loss": 0.3052, "step": 3125 }, { "epoch": 0.15, "grad_norm": 0.6695536880710568, "learning_rate": 4.975553311534939e-06, "loss": 0.294, "step": 3126 }, { "epoch": 0.15, "grad_norm": 0.7544117269853265, "learning_rate": 4.9755268472112e-06, "loss": 0.319, "step": 3127 }, { "epoch": 0.15, "grad_norm": 0.6952246214258846, "learning_rate": 4.975500368641425e-06, "loss": 0.3142, "step": 3128 }, { "epoch": 0.15, "grad_norm": 0.7010424926257115, "learning_rate": 4.9754738758257684e-06, "loss": 0.3151, "step": 3129 }, { "epoch": 0.15, "grad_norm": 0.6619685247339846, "learning_rate": 4.975447368764381e-06, "loss": 0.3012, "step": 3130 }, { "epoch": 0.15, "grad_norm": 0.6637314051767675, "learning_rate": 4.975420847457416e-06, "loss": 0.3124, "step": 3131 }, { "epoch": 0.15, "grad_norm": 0.6919394231930645, "learning_rate": 4.975394311905027e-06, "loss": 0.3237, "step": 3132 }, { "epoch": 0.15, "grad_norm": 0.7982328602161759, "learning_rate": 4.975367762107365e-06, "loss": 0.3354, "step": 3133 }, { "epoch": 0.15, "grad_norm": 0.6642336199585558, "learning_rate": 4.975341198064585e-06, "loss": 0.3229, "step": 3134 }, { "epoch": 0.15, "grad_norm": 0.6684035996817128, "learning_rate": 4.975314619776838e-06, "loss": 0.3314, "step": 3135 }, { "epoch": 0.15, "grad_norm": 0.7543808796818245, "learning_rate": 4.975288027244277e-06, "loss": 0.3233, "step": 3136 }, { "epoch": 0.15, "grad_norm": 0.7054572704231693, "learning_rate": 4.9752614204670555e-06, "loss": 0.3255, "step": 3137 }, { "epoch": 0.15, "grad_norm": 0.7537798302054556, "learning_rate": 4.975234799445327e-06, "loss": 0.332, "step": 3138 }, { "epoch": 0.15, "grad_norm": 0.656112374298139, "learning_rate": 4.975208164179244e-06, "loss": 0.2896, "step": 3139 }, { "epoch": 0.15, "grad_norm": 0.6457532209227682, "learning_rate": 4.975181514668961e-06, "loss": 0.2911, "step": 3140 }, { "epoch": 0.15, "grad_norm": 0.672448944359245, "learning_rate": 4.975154850914629e-06, "loss": 0.3033, "step": 3141 }, { "epoch": 0.15, "grad_norm": 0.6752970726213818, "learning_rate": 4.975128172916405e-06, "loss": 0.3111, "step": 3142 }, { "epoch": 0.15, "grad_norm": 0.6840753877270026, "learning_rate": 4.975101480674439e-06, "loss": 0.3034, "step": 3143 }, { "epoch": 0.15, "grad_norm": 0.7380859926474068, "learning_rate": 4.975074774188886e-06, "loss": 0.3408, "step": 3144 }, { "epoch": 0.15, "grad_norm": 0.6304242976016087, "learning_rate": 4.9750480534599e-06, "loss": 0.2842, "step": 3145 }, { "epoch": 0.15, "grad_norm": 0.6423747623027455, "learning_rate": 4.9750213184876354e-06, "loss": 0.3071, "step": 3146 }, { "epoch": 0.15, "grad_norm": 0.6805170692153831, "learning_rate": 4.974994569272244e-06, "loss": 0.3195, "step": 3147 }, { "epoch": 0.15, "grad_norm": 0.6692316051109336, "learning_rate": 4.9749678058138816e-06, "loss": 0.3033, "step": 3148 }, { "epoch": 0.15, "grad_norm": 0.654576939940573, "learning_rate": 4.974941028112702e-06, "loss": 0.3134, "step": 3149 }, { "epoch": 0.15, "grad_norm": 0.6961540724218068, "learning_rate": 4.974914236168858e-06, "loss": 0.3104, "step": 3150 }, { "epoch": 0.15, "grad_norm": 0.622297854264455, "learning_rate": 4.9748874299825045e-06, "loss": 0.3071, "step": 3151 }, { "epoch": 0.15, "grad_norm": 0.6697808917817195, "learning_rate": 4.974860609553796e-06, "loss": 0.3228, "step": 3152 }, { "epoch": 0.15, "grad_norm": 0.6920971545703286, "learning_rate": 4.974833774882887e-06, "loss": 0.305, "step": 3153 }, { "epoch": 0.15, "grad_norm": 0.6046296047485855, "learning_rate": 4.974806925969931e-06, "loss": 0.2848, "step": 3154 }, { "epoch": 0.15, "grad_norm": 0.6606302760731616, "learning_rate": 4.974780062815085e-06, "loss": 0.3135, "step": 3155 }, { "epoch": 0.15, "grad_norm": 0.6972632122745962, "learning_rate": 4.9747531854185e-06, "loss": 0.3244, "step": 3156 }, { "epoch": 0.15, "grad_norm": 0.695936728554159, "learning_rate": 4.974726293780333e-06, "loss": 0.2912, "step": 3157 }, { "epoch": 0.15, "grad_norm": 0.7017550416893562, "learning_rate": 4.974699387900738e-06, "loss": 0.304, "step": 3158 }, { "epoch": 0.15, "grad_norm": 0.6594925411274708, "learning_rate": 4.974672467779869e-06, "loss": 0.3008, "step": 3159 }, { "epoch": 0.15, "grad_norm": 0.7085897180656224, "learning_rate": 4.974645533417883e-06, "loss": 0.3057, "step": 3160 }, { "epoch": 0.15, "grad_norm": 0.6897204654110072, "learning_rate": 4.974618584814935e-06, "loss": 0.3123, "step": 3161 }, { "epoch": 0.15, "grad_norm": 0.671585114621687, "learning_rate": 4.974591621971177e-06, "loss": 0.304, "step": 3162 }, { "epoch": 0.15, "grad_norm": 0.6795417964857438, "learning_rate": 4.974564644886768e-06, "loss": 0.2965, "step": 3163 }, { "epoch": 0.15, "grad_norm": 0.6445443631743831, "learning_rate": 4.97453765356186e-06, "loss": 0.2921, "step": 3164 }, { "epoch": 0.15, "grad_norm": 0.7218426945036918, "learning_rate": 4.974510647996611e-06, "loss": 0.3053, "step": 3165 }, { "epoch": 0.15, "grad_norm": 0.6808986919904879, "learning_rate": 4.974483628191174e-06, "loss": 0.2982, "step": 3166 }, { "epoch": 0.15, "grad_norm": 0.7480942061197398, "learning_rate": 4.974456594145707e-06, "loss": 0.3075, "step": 3167 }, { "epoch": 0.15, "grad_norm": 0.726518183358616, "learning_rate": 4.974429545860363e-06, "loss": 0.3087, "step": 3168 }, { "epoch": 0.15, "grad_norm": 0.7232853883993895, "learning_rate": 4.974402483335299e-06, "loss": 0.287, "step": 3169 }, { "epoch": 0.15, "grad_norm": 0.6746819789892432, "learning_rate": 4.974375406570671e-06, "loss": 0.3002, "step": 3170 }, { "epoch": 0.15, "grad_norm": 0.7138916236518111, "learning_rate": 4.9743483155666345e-06, "loss": 0.3288, "step": 3171 }, { "epoch": 0.15, "grad_norm": 0.6477171911935731, "learning_rate": 4.974321210323345e-06, "loss": 0.3036, "step": 3172 }, { "epoch": 0.15, "grad_norm": 0.7091375567075604, "learning_rate": 4.974294090840958e-06, "loss": 0.3069, "step": 3173 }, { "epoch": 0.15, "grad_norm": 0.6769956082494489, "learning_rate": 4.974266957119633e-06, "loss": 0.2937, "step": 3174 }, { "epoch": 0.15, "grad_norm": 0.6639327662957786, "learning_rate": 4.974239809159521e-06, "loss": 0.3096, "step": 3175 }, { "epoch": 0.15, "grad_norm": 0.6700964330934187, "learning_rate": 4.974212646960782e-06, "loss": 0.2804, "step": 3176 }, { "epoch": 0.15, "grad_norm": 0.7264355985357308, "learning_rate": 4.974185470523571e-06, "loss": 0.3081, "step": 3177 }, { "epoch": 0.15, "grad_norm": 0.6813583805024244, "learning_rate": 4.974158279848045e-06, "loss": 0.3194, "step": 3178 }, { "epoch": 0.15, "grad_norm": 0.6943989058385741, "learning_rate": 4.974131074934359e-06, "loss": 0.3271, "step": 3179 }, { "epoch": 0.15, "grad_norm": 0.635711069345906, "learning_rate": 4.974103855782671e-06, "loss": 0.297, "step": 3180 }, { "epoch": 0.15, "grad_norm": 0.6993869200879856, "learning_rate": 4.974076622393136e-06, "loss": 0.2825, "step": 3181 }, { "epoch": 0.15, "grad_norm": 0.7550720852175578, "learning_rate": 4.974049374765913e-06, "loss": 0.3311, "step": 3182 }, { "epoch": 0.15, "grad_norm": 0.6824198393035944, "learning_rate": 4.974022112901158e-06, "loss": 0.3164, "step": 3183 }, { "epoch": 0.15, "grad_norm": 0.6813363220179817, "learning_rate": 4.973994836799026e-06, "loss": 0.2996, "step": 3184 }, { "epoch": 0.15, "grad_norm": 0.691485828508293, "learning_rate": 4.973967546459677e-06, "loss": 0.3102, "step": 3185 }, { "epoch": 0.15, "grad_norm": 0.697356370445739, "learning_rate": 4.973940241883267e-06, "loss": 0.3124, "step": 3186 }, { "epoch": 0.15, "grad_norm": 0.7051388938852045, "learning_rate": 4.973912923069951e-06, "loss": 0.2948, "step": 3187 }, { "epoch": 0.15, "grad_norm": 0.6902009554302355, "learning_rate": 4.973885590019889e-06, "loss": 0.301, "step": 3188 }, { "epoch": 0.15, "grad_norm": 0.7206157428726775, "learning_rate": 4.973858242733237e-06, "loss": 0.3314, "step": 3189 }, { "epoch": 0.15, "grad_norm": 0.6604961322980587, "learning_rate": 4.973830881210153e-06, "loss": 0.3101, "step": 3190 }, { "epoch": 0.15, "grad_norm": 0.6498596402598587, "learning_rate": 4.9738035054507935e-06, "loss": 0.2745, "step": 3191 }, { "epoch": 0.15, "grad_norm": 0.6938854197123395, "learning_rate": 4.973776115455316e-06, "loss": 0.3257, "step": 3192 }, { "epoch": 0.15, "grad_norm": 0.745915708349064, "learning_rate": 4.973748711223881e-06, "loss": 0.3188, "step": 3193 }, { "epoch": 0.15, "grad_norm": 0.7163575143021312, "learning_rate": 4.973721292756641e-06, "loss": 0.3254, "step": 3194 }, { "epoch": 0.15, "grad_norm": 0.690615064784393, "learning_rate": 4.973693860053759e-06, "loss": 0.3065, "step": 3195 }, { "epoch": 0.15, "grad_norm": 0.6818639478735496, "learning_rate": 4.973666413115389e-06, "loss": 0.305, "step": 3196 }, { "epoch": 0.15, "grad_norm": 0.6864054994074072, "learning_rate": 4.973638951941692e-06, "loss": 0.31, "step": 3197 }, { "epoch": 0.15, "grad_norm": 0.6475451055701894, "learning_rate": 4.973611476532823e-06, "loss": 0.2772, "step": 3198 }, { "epoch": 0.15, "grad_norm": 0.6690707404480594, "learning_rate": 4.973583986888943e-06, "loss": 0.3038, "step": 3199 }, { "epoch": 0.15, "grad_norm": 0.7199230454984236, "learning_rate": 4.9735564830102075e-06, "loss": 0.3086, "step": 3200 }, { "epoch": 0.15, "grad_norm": 0.6893082299271855, "learning_rate": 4.973528964896778e-06, "loss": 0.2983, "step": 3201 }, { "epoch": 0.15, "grad_norm": 0.7317071041377692, "learning_rate": 4.97350143254881e-06, "loss": 0.3264, "step": 3202 }, { "epoch": 0.15, "grad_norm": 0.6663805979339916, "learning_rate": 4.973473885966462e-06, "loss": 0.3165, "step": 3203 }, { "epoch": 0.15, "grad_norm": 0.6959752648647526, "learning_rate": 4.973446325149894e-06, "loss": 0.3218, "step": 3204 }, { "epoch": 0.15, "grad_norm": 0.6799809970035542, "learning_rate": 4.973418750099265e-06, "loss": 0.3136, "step": 3205 }, { "epoch": 0.15, "grad_norm": 0.6123841004400639, "learning_rate": 4.973391160814732e-06, "loss": 0.2899, "step": 3206 }, { "epoch": 0.15, "grad_norm": 0.6832703495798172, "learning_rate": 4.973363557296455e-06, "loss": 0.2997, "step": 3207 }, { "epoch": 0.15, "grad_norm": 0.801310664905761, "learning_rate": 4.9733359395445926e-06, "loss": 0.3143, "step": 3208 }, { "epoch": 0.15, "grad_norm": 0.6695630947261402, "learning_rate": 4.973308307559303e-06, "loss": 0.2979, "step": 3209 }, { "epoch": 0.15, "grad_norm": 0.7857698137071673, "learning_rate": 4.973280661340746e-06, "loss": 0.3077, "step": 3210 }, { "epoch": 0.15, "grad_norm": 0.6513445103632505, "learning_rate": 4.97325300088908e-06, "loss": 0.3168, "step": 3211 }, { "epoch": 0.15, "grad_norm": 0.6813598154396838, "learning_rate": 4.973225326204464e-06, "loss": 0.3085, "step": 3212 }, { "epoch": 0.15, "grad_norm": 0.6068009110426618, "learning_rate": 4.9731976372870585e-06, "loss": 0.3119, "step": 3213 }, { "epoch": 0.15, "grad_norm": 0.6538504332170891, "learning_rate": 4.973169934137023e-06, "loss": 0.2931, "step": 3214 }, { "epoch": 0.15, "grad_norm": 0.6885837807483988, "learning_rate": 4.9731422167545155e-06, "loss": 0.2989, "step": 3215 }, { "epoch": 0.15, "grad_norm": 0.7342853376288654, "learning_rate": 4.973114485139696e-06, "loss": 0.3206, "step": 3216 }, { "epoch": 0.15, "grad_norm": 0.670430508481302, "learning_rate": 4.9730867392927246e-06, "loss": 0.3075, "step": 3217 }, { "epoch": 0.15, "grad_norm": 0.6107219670781068, "learning_rate": 4.97305897921376e-06, "loss": 0.2987, "step": 3218 }, { "epoch": 0.15, "grad_norm": 0.6502409859534586, "learning_rate": 4.973031204902963e-06, "loss": 0.3069, "step": 3219 }, { "epoch": 0.15, "grad_norm": 0.6421993940609388, "learning_rate": 4.973003416360493e-06, "loss": 0.2843, "step": 3220 }, { "epoch": 0.15, "grad_norm": 0.6929324689937736, "learning_rate": 4.97297561358651e-06, "loss": 0.3058, "step": 3221 }, { "epoch": 0.15, "grad_norm": 0.6932002241104093, "learning_rate": 4.9729477965811735e-06, "loss": 0.3126, "step": 3222 }, { "epoch": 0.15, "grad_norm": 0.6424278416982133, "learning_rate": 4.972919965344645e-06, "loss": 0.3048, "step": 3223 }, { "epoch": 0.15, "grad_norm": 0.6606594325856375, "learning_rate": 4.9728921198770825e-06, "loss": 0.2909, "step": 3224 }, { "epoch": 0.15, "grad_norm": 0.6793210175464343, "learning_rate": 4.9728642601786475e-06, "loss": 0.3054, "step": 3225 }, { "epoch": 0.15, "grad_norm": 0.6546105199497717, "learning_rate": 4.972836386249501e-06, "loss": 0.299, "step": 3226 }, { "epoch": 0.15, "grad_norm": 0.7604839946317288, "learning_rate": 4.972808498089802e-06, "loss": 0.3396, "step": 3227 }, { "epoch": 0.15, "grad_norm": 0.6589708514039305, "learning_rate": 4.972780595699711e-06, "loss": 0.3161, "step": 3228 }, { "epoch": 0.15, "grad_norm": 0.695067713435095, "learning_rate": 4.97275267907939e-06, "loss": 0.3134, "step": 3229 }, { "epoch": 0.15, "grad_norm": 0.7263044047881404, "learning_rate": 4.972724748228999e-06, "loss": 0.3291, "step": 3230 }, { "epoch": 0.15, "grad_norm": 0.6753208063827687, "learning_rate": 4.9726968031486985e-06, "loss": 0.2832, "step": 3231 }, { "epoch": 0.15, "grad_norm": 0.640899741701776, "learning_rate": 4.9726688438386494e-06, "loss": 0.3168, "step": 3232 }, { "epoch": 0.15, "grad_norm": 0.6275847959277713, "learning_rate": 4.972640870299012e-06, "loss": 0.305, "step": 3233 }, { "epoch": 0.15, "grad_norm": 0.6510014774654888, "learning_rate": 4.972612882529948e-06, "loss": 0.3005, "step": 3234 }, { "epoch": 0.15, "grad_norm": 0.6904122101910353, "learning_rate": 4.972584880531619e-06, "loss": 0.3206, "step": 3235 }, { "epoch": 0.15, "grad_norm": 0.6961497043002625, "learning_rate": 4.972556864304185e-06, "loss": 0.3047, "step": 3236 }, { "epoch": 0.15, "grad_norm": 0.6940923543323565, "learning_rate": 4.972528833847807e-06, "loss": 0.3383, "step": 3237 }, { "epoch": 0.15, "grad_norm": 0.6729081009568226, "learning_rate": 4.972500789162649e-06, "loss": 0.3322, "step": 3238 }, { "epoch": 0.15, "grad_norm": 0.6218373912943967, "learning_rate": 4.972472730248869e-06, "loss": 0.2916, "step": 3239 }, { "epoch": 0.15, "grad_norm": 0.6631285420019067, "learning_rate": 4.97244465710663e-06, "loss": 0.3064, "step": 3240 }, { "epoch": 0.15, "grad_norm": 0.6864319118201903, "learning_rate": 4.972416569736092e-06, "loss": 0.2827, "step": 3241 }, { "epoch": 0.15, "grad_norm": 0.7811036193862453, "learning_rate": 4.97238846813742e-06, "loss": 0.3225, "step": 3242 }, { "epoch": 0.15, "grad_norm": 0.7057271344024982, "learning_rate": 4.972360352310774e-06, "loss": 0.3003, "step": 3243 }, { "epoch": 0.15, "grad_norm": 0.6990904048534994, "learning_rate": 4.972332222256314e-06, "loss": 0.31, "step": 3244 }, { "epoch": 0.15, "grad_norm": 0.6566429532776057, "learning_rate": 4.972304077974205e-06, "loss": 0.3168, "step": 3245 }, { "epoch": 0.15, "grad_norm": 0.668835273540177, "learning_rate": 4.972275919464606e-06, "loss": 0.3, "step": 3246 }, { "epoch": 0.15, "grad_norm": 0.7302395974438441, "learning_rate": 4.9722477467276816e-06, "loss": 0.3089, "step": 3247 }, { "epoch": 0.15, "grad_norm": 0.6719920537903835, "learning_rate": 4.9722195597635925e-06, "loss": 0.3143, "step": 3248 }, { "epoch": 0.15, "grad_norm": 0.7381200598191917, "learning_rate": 4.972191358572501e-06, "loss": 0.3161, "step": 3249 }, { "epoch": 0.15, "grad_norm": 0.6396777286212424, "learning_rate": 4.97216314315457e-06, "loss": 0.2975, "step": 3250 }, { "epoch": 0.15, "grad_norm": 0.7004473496213856, "learning_rate": 4.972134913509961e-06, "loss": 0.3309, "step": 3251 }, { "epoch": 0.15, "grad_norm": 0.6617968513629569, "learning_rate": 4.972106669638837e-06, "loss": 0.3177, "step": 3252 }, { "epoch": 0.15, "grad_norm": 0.6687817478973818, "learning_rate": 4.972078411541361e-06, "loss": 0.3234, "step": 3253 }, { "epoch": 0.15, "grad_norm": 0.6654715686909081, "learning_rate": 4.972050139217694e-06, "loss": 0.297, "step": 3254 }, { "epoch": 0.15, "grad_norm": 0.6595061337200192, "learning_rate": 4.972021852668001e-06, "loss": 0.315, "step": 3255 }, { "epoch": 0.15, "grad_norm": 0.7032699590942766, "learning_rate": 4.971993551892442e-06, "loss": 0.3143, "step": 3256 }, { "epoch": 0.15, "grad_norm": 0.7042432699471265, "learning_rate": 4.971965236891183e-06, "loss": 0.3111, "step": 3257 }, { "epoch": 0.15, "grad_norm": 0.6775354191720786, "learning_rate": 4.971936907664385e-06, "loss": 0.3202, "step": 3258 }, { "epoch": 0.15, "grad_norm": 0.6554453959453443, "learning_rate": 4.971908564212211e-06, "loss": 0.3124, "step": 3259 }, { "epoch": 0.15, "grad_norm": 0.6585729433039264, "learning_rate": 4.971880206534825e-06, "loss": 0.3074, "step": 3260 }, { "epoch": 0.15, "grad_norm": 0.7312889742422728, "learning_rate": 4.971851834632388e-06, "loss": 0.3341, "step": 3261 }, { "epoch": 0.15, "grad_norm": 0.6836606214856308, "learning_rate": 4.971823448505067e-06, "loss": 0.3151, "step": 3262 }, { "epoch": 0.15, "grad_norm": 0.6361463517146924, "learning_rate": 4.971795048153023e-06, "loss": 0.313, "step": 3263 }, { "epoch": 0.15, "grad_norm": 0.6709666848284852, "learning_rate": 4.9717666335764194e-06, "loss": 0.2896, "step": 3264 }, { "epoch": 0.15, "grad_norm": 0.6347077088603799, "learning_rate": 4.97173820477542e-06, "loss": 0.2947, "step": 3265 }, { "epoch": 0.15, "grad_norm": 0.6057793551841573, "learning_rate": 4.971709761750189e-06, "loss": 0.312, "step": 3266 }, { "epoch": 0.15, "grad_norm": 0.6090311570890187, "learning_rate": 4.971681304500888e-06, "loss": 0.3009, "step": 3267 }, { "epoch": 0.15, "grad_norm": 0.6252680637738341, "learning_rate": 4.971652833027683e-06, "loss": 0.2951, "step": 3268 }, { "epoch": 0.15, "grad_norm": 0.6632583844371368, "learning_rate": 4.971624347330739e-06, "loss": 0.3086, "step": 3269 }, { "epoch": 0.15, "grad_norm": 0.7107632564484516, "learning_rate": 4.971595847410216e-06, "loss": 0.3189, "step": 3270 }, { "epoch": 0.15, "grad_norm": 0.6505439970215822, "learning_rate": 4.971567333266281e-06, "loss": 0.3007, "step": 3271 }, { "epoch": 0.15, "grad_norm": 0.7036311741936294, "learning_rate": 4.971538804899097e-06, "loss": 0.3127, "step": 3272 }, { "epoch": 0.15, "grad_norm": 0.6680023806872709, "learning_rate": 4.971510262308828e-06, "loss": 0.3166, "step": 3273 }, { "epoch": 0.15, "grad_norm": 0.6392994370935939, "learning_rate": 4.971481705495639e-06, "loss": 0.2968, "step": 3274 }, { "epoch": 0.15, "grad_norm": 0.657004448811326, "learning_rate": 4.971453134459694e-06, "loss": 0.2952, "step": 3275 }, { "epoch": 0.15, "grad_norm": 0.662430244373718, "learning_rate": 4.971424549201157e-06, "loss": 0.2944, "step": 3276 }, { "epoch": 0.15, "grad_norm": 0.6230697454962578, "learning_rate": 4.971395949720194e-06, "loss": 0.2834, "step": 3277 }, { "epoch": 0.15, "grad_norm": 0.6411402937281718, "learning_rate": 4.971367336016968e-06, "loss": 0.3066, "step": 3278 }, { "epoch": 0.15, "grad_norm": 0.6730133867988157, "learning_rate": 4.971338708091643e-06, "loss": 0.3187, "step": 3279 }, { "epoch": 0.15, "grad_norm": 0.621560870909766, "learning_rate": 4.971310065944386e-06, "loss": 0.2908, "step": 3280 }, { "epoch": 0.15, "grad_norm": 0.6802221461018401, "learning_rate": 4.971281409575361e-06, "loss": 0.3368, "step": 3281 }, { "epoch": 0.15, "grad_norm": 0.6567291624661407, "learning_rate": 4.971252738984732e-06, "loss": 0.3044, "step": 3282 }, { "epoch": 0.15, "grad_norm": 0.6679575673474563, "learning_rate": 4.9712240541726644e-06, "loss": 0.3003, "step": 3283 }, { "epoch": 0.15, "grad_norm": 0.7454792218012393, "learning_rate": 4.9711953551393235e-06, "loss": 0.3283, "step": 3284 }, { "epoch": 0.15, "grad_norm": 0.6578488468983894, "learning_rate": 4.9711666418848745e-06, "loss": 0.3105, "step": 3285 }, { "epoch": 0.15, "grad_norm": 0.751076368615146, "learning_rate": 4.9711379144094835e-06, "loss": 0.3089, "step": 3286 }, { "epoch": 0.15, "grad_norm": 0.6480938838764851, "learning_rate": 4.971109172713314e-06, "loss": 0.3018, "step": 3287 }, { "epoch": 0.15, "grad_norm": 0.6694505039927621, "learning_rate": 4.971080416796533e-06, "loss": 0.2924, "step": 3288 }, { "epoch": 0.15, "grad_norm": 0.7032137882978832, "learning_rate": 4.971051646659304e-06, "loss": 0.3133, "step": 3289 }, { "epoch": 0.15, "grad_norm": 0.6701017159791918, "learning_rate": 4.971022862301795e-06, "loss": 0.3102, "step": 3290 }, { "epoch": 0.15, "grad_norm": 0.6825408266202231, "learning_rate": 4.9709940637241705e-06, "loss": 0.3329, "step": 3291 }, { "epoch": 0.15, "grad_norm": 0.6862233963306584, "learning_rate": 4.970965250926595e-06, "loss": 0.3287, "step": 3292 }, { "epoch": 0.15, "grad_norm": 0.7056046598348609, "learning_rate": 4.970936423909237e-06, "loss": 0.2987, "step": 3293 }, { "epoch": 0.15, "grad_norm": 0.7087302744096199, "learning_rate": 4.97090758267226e-06, "loss": 0.3199, "step": 3294 }, { "epoch": 0.15, "grad_norm": 0.6904075216270879, "learning_rate": 4.970878727215831e-06, "loss": 0.3197, "step": 3295 }, { "epoch": 0.15, "grad_norm": 0.7385070144921659, "learning_rate": 4.970849857540116e-06, "loss": 0.3248, "step": 3296 }, { "epoch": 0.15, "grad_norm": 0.6374716238093078, "learning_rate": 4.970820973645282e-06, "loss": 0.3026, "step": 3297 }, { "epoch": 0.15, "grad_norm": 0.6800409976192051, "learning_rate": 4.970792075531493e-06, "loss": 0.2908, "step": 3298 }, { "epoch": 0.15, "grad_norm": 0.6125050812731417, "learning_rate": 4.9707631631989174e-06, "loss": 0.2937, "step": 3299 }, { "epoch": 0.15, "grad_norm": 0.7443215144935681, "learning_rate": 4.970734236647721e-06, "loss": 0.3203, "step": 3300 }, { "epoch": 0.15, "grad_norm": 0.6598122299910438, "learning_rate": 4.97070529587807e-06, "loss": 0.3045, "step": 3301 }, { "epoch": 0.15, "grad_norm": 0.6988204830817629, "learning_rate": 4.970676340890131e-06, "loss": 0.3049, "step": 3302 }, { "epoch": 0.15, "grad_norm": 0.6951771416110896, "learning_rate": 4.97064737168407e-06, "loss": 0.3351, "step": 3303 }, { "epoch": 0.15, "grad_norm": 0.6353831187457075, "learning_rate": 4.970618388260055e-06, "loss": 0.2882, "step": 3304 }, { "epoch": 0.15, "grad_norm": 0.6384316837681608, "learning_rate": 4.970589390618251e-06, "loss": 0.2914, "step": 3305 }, { "epoch": 0.15, "grad_norm": 0.6785834067701161, "learning_rate": 4.970560378758827e-06, "loss": 0.2976, "step": 3306 }, { "epoch": 0.15, "grad_norm": 0.623426582671745, "learning_rate": 4.970531352681949e-06, "loss": 0.3009, "step": 3307 }, { "epoch": 0.15, "grad_norm": 0.6384777495014755, "learning_rate": 4.9705023123877836e-06, "loss": 0.2796, "step": 3308 }, { "epoch": 0.16, "grad_norm": 0.7302808905749637, "learning_rate": 4.970473257876498e-06, "loss": 0.3107, "step": 3309 }, { "epoch": 0.16, "grad_norm": 0.6862286719098859, "learning_rate": 4.97044418914826e-06, "loss": 0.2939, "step": 3310 }, { "epoch": 0.16, "grad_norm": 0.6646029602970444, "learning_rate": 4.970415106203237e-06, "loss": 0.2997, "step": 3311 }, { "epoch": 0.16, "grad_norm": 0.6479261055492358, "learning_rate": 4.970386009041596e-06, "loss": 0.2986, "step": 3312 }, { "epoch": 0.16, "grad_norm": 0.7036114863013374, "learning_rate": 4.970356897663504e-06, "loss": 0.3111, "step": 3313 }, { "epoch": 0.16, "grad_norm": 0.7591278061764847, "learning_rate": 4.97032777206913e-06, "loss": 0.3062, "step": 3314 }, { "epoch": 0.16, "grad_norm": 0.6389615160161469, "learning_rate": 4.97029863225864e-06, "loss": 0.3043, "step": 3315 }, { "epoch": 0.16, "grad_norm": 0.6951655666077042, "learning_rate": 4.9702694782322015e-06, "loss": 0.3209, "step": 3316 }, { "epoch": 0.16, "grad_norm": 0.6852194699372498, "learning_rate": 4.970240309989984e-06, "loss": 0.3102, "step": 3317 }, { "epoch": 0.16, "grad_norm": 0.7127718829451639, "learning_rate": 4.970211127532154e-06, "loss": 0.3138, "step": 3318 }, { "epoch": 0.16, "grad_norm": 0.7003066715613847, "learning_rate": 4.970181930858879e-06, "loss": 0.2878, "step": 3319 }, { "epoch": 0.16, "grad_norm": 0.7179457272533443, "learning_rate": 4.970152719970329e-06, "loss": 0.3199, "step": 3320 }, { "epoch": 0.16, "grad_norm": 0.6050291033978676, "learning_rate": 4.970123494866671e-06, "loss": 0.2801, "step": 3321 }, { "epoch": 0.16, "grad_norm": 0.7079089525130537, "learning_rate": 4.970094255548073e-06, "loss": 0.3093, "step": 3322 }, { "epoch": 0.16, "grad_norm": 0.6606038891439958, "learning_rate": 4.970065002014702e-06, "loss": 0.301, "step": 3323 }, { "epoch": 0.16, "grad_norm": 0.6958499950602322, "learning_rate": 4.970035734266729e-06, "loss": 0.3159, "step": 3324 }, { "epoch": 0.16, "grad_norm": 0.6496889799060828, "learning_rate": 4.970006452304322e-06, "loss": 0.3, "step": 3325 }, { "epoch": 0.16, "grad_norm": 0.7199394542601798, "learning_rate": 4.9699771561276474e-06, "loss": 0.3094, "step": 3326 }, { "epoch": 0.16, "grad_norm": 0.6624095174430409, "learning_rate": 4.969947845736876e-06, "loss": 0.3098, "step": 3327 }, { "epoch": 0.16, "grad_norm": 0.6188827797616617, "learning_rate": 4.969918521132175e-06, "loss": 0.3104, "step": 3328 }, { "epoch": 0.16, "grad_norm": 0.6485475557282167, "learning_rate": 4.969889182313713e-06, "loss": 0.2968, "step": 3329 }, { "epoch": 0.16, "grad_norm": 0.6820803344689503, "learning_rate": 4.96985982928166e-06, "loss": 0.3014, "step": 3330 }, { "epoch": 0.16, "grad_norm": 0.6761631707698712, "learning_rate": 4.969830462036184e-06, "loss": 0.3184, "step": 3331 }, { "epoch": 0.16, "grad_norm": 0.6583961709060958, "learning_rate": 4.969801080577455e-06, "loss": 0.2976, "step": 3332 }, { "epoch": 0.16, "grad_norm": 0.7486106853588954, "learning_rate": 4.969771684905642e-06, "loss": 0.3245, "step": 3333 }, { "epoch": 0.16, "grad_norm": 0.6284940079521152, "learning_rate": 4.9697422750209134e-06, "loss": 0.2929, "step": 3334 }, { "epoch": 0.16, "grad_norm": 0.6252421108805043, "learning_rate": 4.969712850923439e-06, "loss": 0.3021, "step": 3335 }, { "epoch": 0.16, "grad_norm": 0.6702389228550659, "learning_rate": 4.969683412613388e-06, "loss": 0.3134, "step": 3336 }, { "epoch": 0.16, "grad_norm": 0.6286365771788543, "learning_rate": 4.969653960090929e-06, "loss": 0.3034, "step": 3337 }, { "epoch": 0.16, "grad_norm": 0.6765726142152317, "learning_rate": 4.969624493356232e-06, "loss": 0.3154, "step": 3338 }, { "epoch": 0.16, "grad_norm": 0.6652907916491871, "learning_rate": 4.9695950124094675e-06, "loss": 0.3355, "step": 3339 }, { "epoch": 0.16, "grad_norm": 0.6305610253898746, "learning_rate": 4.969565517250804e-06, "loss": 0.2985, "step": 3340 }, { "epoch": 0.16, "grad_norm": 0.6387595133547307, "learning_rate": 4.969536007880412e-06, "loss": 0.3167, "step": 3341 }, { "epoch": 0.16, "grad_norm": 0.6188959190218792, "learning_rate": 4.969506484298461e-06, "loss": 0.3147, "step": 3342 }, { "epoch": 0.16, "grad_norm": 0.6378675476502227, "learning_rate": 4.96947694650512e-06, "loss": 0.2847, "step": 3343 }, { "epoch": 0.16, "grad_norm": 0.6628555517682636, "learning_rate": 4.969447394500561e-06, "loss": 0.3219, "step": 3344 }, { "epoch": 0.16, "grad_norm": 0.6878191939648686, "learning_rate": 4.969417828284952e-06, "loss": 0.3247, "step": 3345 }, { "epoch": 0.16, "grad_norm": 0.6359650663910151, "learning_rate": 4.969388247858464e-06, "loss": 0.3025, "step": 3346 }, { "epoch": 0.16, "grad_norm": 0.63751393187577, "learning_rate": 4.969358653221268e-06, "loss": 0.302, "step": 3347 }, { "epoch": 0.16, "grad_norm": 0.6445679040157073, "learning_rate": 4.969329044373534e-06, "loss": 0.2998, "step": 3348 }, { "epoch": 0.16, "grad_norm": 0.6759104003915639, "learning_rate": 4.969299421315431e-06, "loss": 0.3182, "step": 3349 }, { "epoch": 0.16, "grad_norm": 0.6439350004854352, "learning_rate": 4.96926978404713e-06, "loss": 0.2883, "step": 3350 }, { "epoch": 0.16, "grad_norm": 0.6817589107434322, "learning_rate": 4.969240132568803e-06, "loss": 0.2883, "step": 3351 }, { "epoch": 0.16, "grad_norm": 0.647441175718101, "learning_rate": 4.96921046688062e-06, "loss": 0.2976, "step": 3352 }, { "epoch": 0.16, "grad_norm": 0.6029983072523059, "learning_rate": 4.969180786982751e-06, "loss": 0.2835, "step": 3353 }, { "epoch": 0.16, "grad_norm": 0.646510423671779, "learning_rate": 4.969151092875367e-06, "loss": 0.2763, "step": 3354 }, { "epoch": 0.16, "grad_norm": 0.6561194948619423, "learning_rate": 4.969121384558639e-06, "loss": 0.3122, "step": 3355 }, { "epoch": 0.16, "grad_norm": 0.6925329734671374, "learning_rate": 4.969091662032738e-06, "loss": 0.3185, "step": 3356 }, { "epoch": 0.16, "grad_norm": 0.6906957041655972, "learning_rate": 4.969061925297836e-06, "loss": 0.2982, "step": 3357 }, { "epoch": 0.16, "grad_norm": 0.6859502926492226, "learning_rate": 4.9690321743541015e-06, "loss": 0.3019, "step": 3358 }, { "epoch": 0.16, "grad_norm": 0.6949606050582727, "learning_rate": 4.969002409201709e-06, "loss": 0.3246, "step": 3359 }, { "epoch": 0.16, "grad_norm": 0.6531260883111191, "learning_rate": 4.968972629840827e-06, "loss": 0.2916, "step": 3360 }, { "epoch": 0.16, "grad_norm": 0.6344962838416676, "learning_rate": 4.968942836271628e-06, "loss": 0.3073, "step": 3361 }, { "epoch": 0.16, "grad_norm": 0.7120058259733749, "learning_rate": 4.968913028494285e-06, "loss": 0.3069, "step": 3362 }, { "epoch": 0.16, "grad_norm": 0.6977198762251349, "learning_rate": 4.968883206508966e-06, "loss": 0.3135, "step": 3363 }, { "epoch": 0.16, "grad_norm": 0.7231799345398543, "learning_rate": 4.968853370315846e-06, "loss": 0.3163, "step": 3364 }, { "epoch": 0.16, "grad_norm": 0.723014747181904, "learning_rate": 4.9688235199150955e-06, "loss": 0.305, "step": 3365 }, { "epoch": 0.16, "grad_norm": 0.6340815634519422, "learning_rate": 4.968793655306886e-06, "loss": 0.291, "step": 3366 }, { "epoch": 0.16, "grad_norm": 0.6176266773247702, "learning_rate": 4.968763776491389e-06, "loss": 0.3029, "step": 3367 }, { "epoch": 0.16, "grad_norm": 0.6353369707349245, "learning_rate": 4.968733883468777e-06, "loss": 0.3086, "step": 3368 }, { "epoch": 0.16, "grad_norm": 0.6177964789281678, "learning_rate": 4.968703976239223e-06, "loss": 0.2936, "step": 3369 }, { "epoch": 0.16, "grad_norm": 0.6880193612330779, "learning_rate": 4.968674054802897e-06, "loss": 0.3268, "step": 3370 }, { "epoch": 0.16, "grad_norm": 0.650977505340899, "learning_rate": 4.968644119159973e-06, "loss": 0.3138, "step": 3371 }, { "epoch": 0.16, "grad_norm": 0.6472720852630887, "learning_rate": 4.9686141693106224e-06, "loss": 0.3091, "step": 3372 }, { "epoch": 0.16, "grad_norm": 0.6617614411558547, "learning_rate": 4.968584205255017e-06, "loss": 0.3155, "step": 3373 }, { "epoch": 0.16, "grad_norm": 0.7244315504943746, "learning_rate": 4.968554226993331e-06, "loss": 0.3127, "step": 3374 }, { "epoch": 0.16, "grad_norm": 0.6845135645723774, "learning_rate": 4.968524234525736e-06, "loss": 0.3164, "step": 3375 }, { "epoch": 0.16, "grad_norm": 0.6510824030401108, "learning_rate": 4.968494227852403e-06, "loss": 0.3105, "step": 3376 }, { "epoch": 0.16, "grad_norm": 0.6802329629364813, "learning_rate": 4.968464206973508e-06, "loss": 0.3048, "step": 3377 }, { "epoch": 0.16, "grad_norm": 0.6845863790624869, "learning_rate": 4.96843417188922e-06, "loss": 0.3068, "step": 3378 }, { "epoch": 0.16, "grad_norm": 0.6416408036993533, "learning_rate": 4.968404122599715e-06, "loss": 0.3224, "step": 3379 }, { "epoch": 0.16, "grad_norm": 0.6686613102127152, "learning_rate": 4.968374059105164e-06, "loss": 0.3246, "step": 3380 }, { "epoch": 0.16, "grad_norm": 0.6553388916819008, "learning_rate": 4.968343981405741e-06, "loss": 0.2805, "step": 3381 }, { "epoch": 0.16, "grad_norm": 0.6669352369189859, "learning_rate": 4.968313889501619e-06, "loss": 0.2976, "step": 3382 }, { "epoch": 0.16, "grad_norm": 0.7205675719405906, "learning_rate": 4.968283783392971e-06, "loss": 0.3014, "step": 3383 }, { "epoch": 0.16, "grad_norm": 0.6549969814529539, "learning_rate": 4.96825366307997e-06, "loss": 0.3075, "step": 3384 }, { "epoch": 0.16, "grad_norm": 0.6285099559225316, "learning_rate": 4.96822352856279e-06, "loss": 0.3188, "step": 3385 }, { "epoch": 0.16, "grad_norm": 0.6884543672259118, "learning_rate": 4.968193379841603e-06, "loss": 0.308, "step": 3386 }, { "epoch": 0.16, "grad_norm": 0.6401037408266127, "learning_rate": 4.968163216916584e-06, "loss": 0.2952, "step": 3387 }, { "epoch": 0.16, "grad_norm": 0.6903057038508503, "learning_rate": 4.968133039787906e-06, "loss": 0.3403, "step": 3388 }, { "epoch": 0.16, "grad_norm": 0.6774388244341045, "learning_rate": 4.968102848455743e-06, "loss": 0.3104, "step": 3389 }, { "epoch": 0.16, "grad_norm": 0.5729777024306506, "learning_rate": 4.968072642920268e-06, "loss": 0.2817, "step": 3390 }, { "epoch": 0.16, "grad_norm": 0.6699944709989725, "learning_rate": 4.9680424231816555e-06, "loss": 0.3179, "step": 3391 }, { "epoch": 0.16, "grad_norm": 0.6650488020432269, "learning_rate": 4.968012189240079e-06, "loss": 0.3065, "step": 3392 }, { "epoch": 0.16, "grad_norm": 0.6288524438435954, "learning_rate": 4.967981941095713e-06, "loss": 0.2967, "step": 3393 }, { "epoch": 0.16, "grad_norm": 0.6633726456895555, "learning_rate": 4.9679516787487305e-06, "loss": 0.2972, "step": 3394 }, { "epoch": 0.16, "grad_norm": 0.6300012254897606, "learning_rate": 4.9679214021993075e-06, "loss": 0.2761, "step": 3395 }, { "epoch": 0.16, "grad_norm": 0.6774475885255693, "learning_rate": 4.967891111447616e-06, "loss": 0.3097, "step": 3396 }, { "epoch": 0.16, "grad_norm": 0.6709929183972987, "learning_rate": 4.967860806493832e-06, "loss": 0.314, "step": 3397 }, { "epoch": 0.16, "grad_norm": 0.6267328677791072, "learning_rate": 4.967830487338129e-06, "loss": 0.3069, "step": 3398 }, { "epoch": 0.16, "grad_norm": 0.6648103795714004, "learning_rate": 4.967800153980682e-06, "loss": 0.2893, "step": 3399 }, { "epoch": 0.16, "grad_norm": 0.6830730706831809, "learning_rate": 4.967769806421666e-06, "loss": 0.3144, "step": 3400 }, { "epoch": 0.16, "grad_norm": 0.6754415027479043, "learning_rate": 4.967739444661254e-06, "loss": 0.303, "step": 3401 }, { "epoch": 0.16, "grad_norm": 0.6693280028784325, "learning_rate": 4.967709068699622e-06, "loss": 0.3081, "step": 3402 }, { "epoch": 0.16, "grad_norm": 0.7103880361664633, "learning_rate": 4.967678678536945e-06, "loss": 0.3007, "step": 3403 }, { "epoch": 0.16, "grad_norm": 0.7257640773630752, "learning_rate": 4.967648274173398e-06, "loss": 0.3181, "step": 3404 }, { "epoch": 0.16, "grad_norm": 0.6434222379059561, "learning_rate": 4.967617855609154e-06, "loss": 0.2972, "step": 3405 }, { "epoch": 0.16, "grad_norm": 0.6067760790291001, "learning_rate": 4.96758742284439e-06, "loss": 0.3014, "step": 3406 }, { "epoch": 0.16, "grad_norm": 0.621598034752574, "learning_rate": 4.967556975879281e-06, "loss": 0.3085, "step": 3407 }, { "epoch": 0.16, "grad_norm": 0.6600944444364549, "learning_rate": 4.967526514714001e-06, "loss": 0.3026, "step": 3408 }, { "epoch": 0.16, "grad_norm": 0.6630119127748713, "learning_rate": 4.967496039348727e-06, "loss": 0.3108, "step": 3409 }, { "epoch": 0.16, "grad_norm": 0.648938503159328, "learning_rate": 4.967465549783633e-06, "loss": 0.302, "step": 3410 }, { "epoch": 0.16, "grad_norm": 0.6118861486401367, "learning_rate": 4.967435046018894e-06, "loss": 0.3005, "step": 3411 }, { "epoch": 0.16, "grad_norm": 0.6382761496917141, "learning_rate": 4.967404528054688e-06, "loss": 0.306, "step": 3412 }, { "epoch": 0.16, "grad_norm": 0.6358586480175769, "learning_rate": 4.967373995891188e-06, "loss": 0.3109, "step": 3413 }, { "epoch": 0.16, "grad_norm": 0.6555684992154505, "learning_rate": 4.967343449528572e-06, "loss": 0.2981, "step": 3414 }, { "epoch": 0.16, "grad_norm": 0.704395812558234, "learning_rate": 4.9673128889670135e-06, "loss": 0.2932, "step": 3415 }, { "epoch": 0.16, "grad_norm": 0.6195908692051428, "learning_rate": 4.96728231420669e-06, "loss": 0.2998, "step": 3416 }, { "epoch": 0.16, "grad_norm": 0.6531819372349852, "learning_rate": 4.967251725247777e-06, "loss": 0.2985, "step": 3417 }, { "epoch": 0.16, "grad_norm": 0.682820875094087, "learning_rate": 4.96722112209045e-06, "loss": 0.3131, "step": 3418 }, { "epoch": 0.16, "grad_norm": 0.6745645837628049, "learning_rate": 4.967190504734886e-06, "loss": 0.2804, "step": 3419 }, { "epoch": 0.16, "grad_norm": 0.5955324893478968, "learning_rate": 4.96715987318126e-06, "loss": 0.2982, "step": 3420 }, { "epoch": 0.16, "grad_norm": 0.6563292488745064, "learning_rate": 4.96712922742975e-06, "loss": 0.2829, "step": 3421 }, { "epoch": 0.16, "grad_norm": 0.6843042217558536, "learning_rate": 4.967098567480531e-06, "loss": 0.2886, "step": 3422 }, { "epoch": 0.16, "grad_norm": 0.6842703207178971, "learning_rate": 4.96706789333378e-06, "loss": 0.3227, "step": 3423 }, { "epoch": 0.16, "grad_norm": 0.7275329383502238, "learning_rate": 4.967037204989672e-06, "loss": 0.3233, "step": 3424 }, { "epoch": 0.16, "grad_norm": 0.6634477011988483, "learning_rate": 4.967006502448386e-06, "loss": 0.2941, "step": 3425 }, { "epoch": 0.16, "grad_norm": 0.6852638278588341, "learning_rate": 4.966975785710099e-06, "loss": 0.3009, "step": 3426 }, { "epoch": 0.16, "grad_norm": 0.6781271715216793, "learning_rate": 4.966945054774984e-06, "loss": 0.3135, "step": 3427 }, { "epoch": 0.16, "grad_norm": 0.66181466904244, "learning_rate": 4.9669143096432215e-06, "loss": 0.2995, "step": 3428 }, { "epoch": 0.16, "grad_norm": 0.6500337396156831, "learning_rate": 4.966883550314987e-06, "loss": 0.3149, "step": 3429 }, { "epoch": 0.16, "grad_norm": 0.6637636667164385, "learning_rate": 4.966852776790458e-06, "loss": 0.3075, "step": 3430 }, { "epoch": 0.16, "grad_norm": 0.6451816884030155, "learning_rate": 4.966821989069811e-06, "loss": 0.3114, "step": 3431 }, { "epoch": 0.16, "grad_norm": 0.6838828086296613, "learning_rate": 4.966791187153224e-06, "loss": 0.2937, "step": 3432 }, { "epoch": 0.16, "grad_norm": 0.6656970745928591, "learning_rate": 4.966760371040873e-06, "loss": 0.3139, "step": 3433 }, { "epoch": 0.16, "grad_norm": 0.7068237075254736, "learning_rate": 4.966729540732936e-06, "loss": 0.3318, "step": 3434 }, { "epoch": 0.16, "grad_norm": 0.7045755734229551, "learning_rate": 4.9666986962295906e-06, "loss": 0.3407, "step": 3435 }, { "epoch": 0.16, "grad_norm": 0.6549504752472107, "learning_rate": 4.966667837531015e-06, "loss": 0.2863, "step": 3436 }, { "epoch": 0.16, "grad_norm": 0.6615649082627856, "learning_rate": 4.9666369646373845e-06, "loss": 0.3041, "step": 3437 }, { "epoch": 0.16, "grad_norm": 0.6727879612093063, "learning_rate": 4.96660607754888e-06, "loss": 0.3037, "step": 3438 }, { "epoch": 0.16, "grad_norm": 0.6652027925827134, "learning_rate": 4.966575176265676e-06, "loss": 0.2923, "step": 3439 }, { "epoch": 0.16, "grad_norm": 0.6038235244178337, "learning_rate": 4.966544260787952e-06, "loss": 0.2956, "step": 3440 }, { "epoch": 0.16, "grad_norm": 0.6456453690164063, "learning_rate": 4.966513331115887e-06, "loss": 0.2838, "step": 3441 }, { "epoch": 0.16, "grad_norm": 0.6564052958298291, "learning_rate": 4.966482387249656e-06, "loss": 0.3139, "step": 3442 }, { "epoch": 0.16, "grad_norm": 0.6506569823854772, "learning_rate": 4.9664514291894394e-06, "loss": 0.2983, "step": 3443 }, { "epoch": 0.16, "grad_norm": 0.703771675717859, "learning_rate": 4.966420456935415e-06, "loss": 0.289, "step": 3444 }, { "epoch": 0.16, "grad_norm": 0.6469152662976023, "learning_rate": 4.966389470487761e-06, "loss": 0.3144, "step": 3445 }, { "epoch": 0.16, "grad_norm": 0.6298428215889007, "learning_rate": 4.966358469846655e-06, "loss": 0.2993, "step": 3446 }, { "epoch": 0.16, "grad_norm": 0.6574713776203185, "learning_rate": 4.9663274550122764e-06, "loss": 0.3059, "step": 3447 }, { "epoch": 0.16, "grad_norm": 0.6059680398302292, "learning_rate": 4.966296425984802e-06, "loss": 0.2912, "step": 3448 }, { "epoch": 0.16, "grad_norm": 0.6213514532003285, "learning_rate": 4.966265382764413e-06, "loss": 0.3086, "step": 3449 }, { "epoch": 0.16, "grad_norm": 0.6963776729639058, "learning_rate": 4.966234325351286e-06, "loss": 0.3206, "step": 3450 }, { "epoch": 0.16, "grad_norm": 0.6350283112103439, "learning_rate": 4.9662032537456006e-06, "loss": 0.2908, "step": 3451 }, { "epoch": 0.16, "grad_norm": 0.7103076616056372, "learning_rate": 4.966172167947535e-06, "loss": 0.317, "step": 3452 }, { "epoch": 0.16, "grad_norm": 0.6594769221775657, "learning_rate": 4.966141067957269e-06, "loss": 0.2958, "step": 3453 }, { "epoch": 0.16, "grad_norm": 0.6481970050201922, "learning_rate": 4.96610995377498e-06, "loss": 0.3139, "step": 3454 }, { "epoch": 0.16, "grad_norm": 0.7278052788750831, "learning_rate": 4.9660788254008485e-06, "loss": 0.3406, "step": 3455 }, { "epoch": 0.16, "grad_norm": 0.685301388434883, "learning_rate": 4.966047682835053e-06, "loss": 0.299, "step": 3456 }, { "epoch": 0.16, "grad_norm": 0.657010170933295, "learning_rate": 4.966016526077773e-06, "loss": 0.3159, "step": 3457 }, { "epoch": 0.16, "grad_norm": 0.6508935123189824, "learning_rate": 4.9659853551291885e-06, "loss": 0.3049, "step": 3458 }, { "epoch": 0.16, "grad_norm": 0.6688875258129111, "learning_rate": 4.965954169989476e-06, "loss": 0.3076, "step": 3459 }, { "epoch": 0.16, "grad_norm": 0.6536497894490383, "learning_rate": 4.96592297065882e-06, "loss": 0.3024, "step": 3460 }, { "epoch": 0.16, "grad_norm": 0.6905456177976459, "learning_rate": 4.9658917571373945e-06, "loss": 0.3177, "step": 3461 }, { "epoch": 0.16, "grad_norm": 0.6398894954828066, "learning_rate": 4.965860529425383e-06, "loss": 0.2962, "step": 3462 }, { "epoch": 0.16, "grad_norm": 0.6372954028820373, "learning_rate": 4.965829287522964e-06, "loss": 0.3154, "step": 3463 }, { "epoch": 0.16, "grad_norm": 0.5978607372765657, "learning_rate": 4.9657980314303166e-06, "loss": 0.2726, "step": 3464 }, { "epoch": 0.16, "grad_norm": 0.6648941661610799, "learning_rate": 4.965766761147621e-06, "loss": 0.2845, "step": 3465 }, { "epoch": 0.16, "grad_norm": 0.6997139929113699, "learning_rate": 4.965735476675059e-06, "loss": 0.3258, "step": 3466 }, { "epoch": 0.16, "grad_norm": 0.6466623839404014, "learning_rate": 4.965704178012808e-06, "loss": 0.2878, "step": 3467 }, { "epoch": 0.16, "grad_norm": 0.6813198944620333, "learning_rate": 4.965672865161049e-06, "loss": 0.3143, "step": 3468 }, { "epoch": 0.16, "grad_norm": 0.6693205947360903, "learning_rate": 4.965641538119963e-06, "loss": 0.3061, "step": 3469 }, { "epoch": 0.16, "grad_norm": 0.6219803083054185, "learning_rate": 4.965610196889729e-06, "loss": 0.2886, "step": 3470 }, { "epoch": 0.16, "grad_norm": 0.6278605971033077, "learning_rate": 4.965578841470529e-06, "loss": 0.3122, "step": 3471 }, { "epoch": 0.16, "grad_norm": 0.6732808783804621, "learning_rate": 4.965547471862541e-06, "loss": 0.3096, "step": 3472 }, { "epoch": 0.16, "grad_norm": 0.6866491641147656, "learning_rate": 4.965516088065948e-06, "loss": 0.3209, "step": 3473 }, { "epoch": 0.16, "grad_norm": 0.6137829100011645, "learning_rate": 4.965484690080929e-06, "loss": 0.3076, "step": 3474 }, { "epoch": 0.16, "grad_norm": 0.7094071400824788, "learning_rate": 4.965453277907666e-06, "loss": 0.3413, "step": 3475 }, { "epoch": 0.16, "grad_norm": 0.6869512442444557, "learning_rate": 4.9654218515463385e-06, "loss": 0.2955, "step": 3476 }, { "epoch": 0.16, "grad_norm": 0.7054184432889758, "learning_rate": 4.9653904109971285e-06, "loss": 0.3043, "step": 3477 }, { "epoch": 0.16, "grad_norm": 0.6415036167592748, "learning_rate": 4.965358956260216e-06, "loss": 0.3039, "step": 3478 }, { "epoch": 0.16, "grad_norm": 0.6952426902019585, "learning_rate": 4.9653274873357825e-06, "loss": 0.3219, "step": 3479 }, { "epoch": 0.16, "grad_norm": 0.6281315674671674, "learning_rate": 4.965296004224008e-06, "loss": 0.2926, "step": 3480 }, { "epoch": 0.16, "grad_norm": 0.6804275176737136, "learning_rate": 4.965264506925076e-06, "loss": 0.3206, "step": 3481 }, { "epoch": 0.16, "grad_norm": 0.7289364080465224, "learning_rate": 4.965232995439166e-06, "loss": 0.3182, "step": 3482 }, { "epoch": 0.16, "grad_norm": 0.6209742528788837, "learning_rate": 4.965201469766459e-06, "loss": 0.3055, "step": 3483 }, { "epoch": 0.16, "grad_norm": 0.64234574988949, "learning_rate": 4.9651699299071375e-06, "loss": 0.3173, "step": 3484 }, { "epoch": 0.16, "grad_norm": 0.6468490354222002, "learning_rate": 4.9651383758613835e-06, "loss": 0.3202, "step": 3485 }, { "epoch": 0.16, "grad_norm": 0.6566754272816404, "learning_rate": 4.965106807629377e-06, "loss": 0.2833, "step": 3486 }, { "epoch": 0.16, "grad_norm": 0.6926410359252726, "learning_rate": 4.9650752252113e-06, "loss": 0.3161, "step": 3487 }, { "epoch": 0.16, "grad_norm": 0.7095942151644057, "learning_rate": 4.9650436286073355e-06, "loss": 0.3083, "step": 3488 }, { "epoch": 0.16, "grad_norm": 0.6512690860030629, "learning_rate": 4.965012017817664e-06, "loss": 0.2962, "step": 3489 }, { "epoch": 0.16, "grad_norm": 0.6220295966077791, "learning_rate": 4.964980392842468e-06, "loss": 0.3063, "step": 3490 }, { "epoch": 0.16, "grad_norm": 0.656846838078545, "learning_rate": 4.96494875368193e-06, "loss": 0.3004, "step": 3491 }, { "epoch": 0.16, "grad_norm": 0.6543881059734019, "learning_rate": 4.96491710033623e-06, "loss": 0.3246, "step": 3492 }, { "epoch": 0.16, "grad_norm": 0.6345036850492254, "learning_rate": 4.964885432805553e-06, "loss": 0.2908, "step": 3493 }, { "epoch": 0.16, "grad_norm": 0.7619510485048341, "learning_rate": 4.964853751090079e-06, "loss": 0.3177, "step": 3494 }, { "epoch": 0.16, "grad_norm": 0.6521140665562739, "learning_rate": 4.9648220551899916e-06, "loss": 0.3049, "step": 3495 }, { "epoch": 0.16, "grad_norm": 0.7858164762938912, "learning_rate": 4.964790345105472e-06, "loss": 0.3165, "step": 3496 }, { "epoch": 0.16, "grad_norm": 0.6771058771707748, "learning_rate": 4.964758620836705e-06, "loss": 0.3108, "step": 3497 }, { "epoch": 0.16, "grad_norm": 0.6512734999816213, "learning_rate": 4.96472688238387e-06, "loss": 0.2781, "step": 3498 }, { "epoch": 0.16, "grad_norm": 0.6893515310667888, "learning_rate": 4.964695129747152e-06, "loss": 0.3326, "step": 3499 }, { "epoch": 0.16, "grad_norm": 0.7467598270391363, "learning_rate": 4.964663362926734e-06, "loss": 0.3039, "step": 3500 }, { "epoch": 0.16, "grad_norm": 0.657100199939505, "learning_rate": 4.964631581922797e-06, "loss": 0.3041, "step": 3501 }, { "epoch": 0.16, "grad_norm": 0.6743086678904111, "learning_rate": 4.964599786735524e-06, "loss": 0.3073, "step": 3502 }, { "epoch": 0.16, "grad_norm": 0.7237611514147487, "learning_rate": 4.964567977365099e-06, "loss": 0.3384, "step": 3503 }, { "epoch": 0.16, "grad_norm": 0.6417333922832205, "learning_rate": 4.9645361538117056e-06, "loss": 0.3115, "step": 3504 }, { "epoch": 0.16, "grad_norm": 0.6769890734590144, "learning_rate": 4.964504316075525e-06, "loss": 0.3128, "step": 3505 }, { "epoch": 0.16, "grad_norm": 0.6402040914500164, "learning_rate": 4.964472464156742e-06, "loss": 0.3045, "step": 3506 }, { "epoch": 0.16, "grad_norm": 0.6541453979646806, "learning_rate": 4.96444059805554e-06, "loss": 0.3211, "step": 3507 }, { "epoch": 0.16, "grad_norm": 0.715586458337534, "learning_rate": 4.964408717772101e-06, "loss": 0.3207, "step": 3508 }, { "epoch": 0.16, "grad_norm": 0.6949103998180217, "learning_rate": 4.9643768233066096e-06, "loss": 0.3147, "step": 3509 }, { "epoch": 0.16, "grad_norm": 0.7222136741583585, "learning_rate": 4.964344914659248e-06, "loss": 0.3314, "step": 3510 }, { "epoch": 0.16, "grad_norm": 0.6298666986757088, "learning_rate": 4.964312991830201e-06, "loss": 0.296, "step": 3511 }, { "epoch": 0.16, "grad_norm": 0.6410751080789302, "learning_rate": 4.964281054819654e-06, "loss": 0.2867, "step": 3512 }, { "epoch": 0.16, "grad_norm": 0.6807929819689099, "learning_rate": 4.9642491036277875e-06, "loss": 0.2936, "step": 3513 }, { "epoch": 0.16, "grad_norm": 0.683353620077659, "learning_rate": 4.964217138254787e-06, "loss": 0.3017, "step": 3514 }, { "epoch": 0.16, "grad_norm": 0.7093493036260424, "learning_rate": 4.964185158700835e-06, "loss": 0.3208, "step": 3515 }, { "epoch": 0.16, "grad_norm": 0.7073342880162289, "learning_rate": 4.964153164966118e-06, "loss": 0.3253, "step": 3516 }, { "epoch": 0.16, "grad_norm": 0.6192177439689824, "learning_rate": 4.964121157050819e-06, "loss": 0.2921, "step": 3517 }, { "epoch": 0.16, "grad_norm": 0.671832008530866, "learning_rate": 4.964089134955122e-06, "loss": 0.3041, "step": 3518 }, { "epoch": 0.16, "grad_norm": 0.6732264617576768, "learning_rate": 4.964057098679211e-06, "loss": 0.286, "step": 3519 }, { "epoch": 0.16, "grad_norm": 0.7865057374677676, "learning_rate": 4.964025048223271e-06, "loss": 0.3252, "step": 3520 }, { "epoch": 0.16, "grad_norm": 0.6890363936232602, "learning_rate": 4.963992983587486e-06, "loss": 0.3103, "step": 3521 }, { "epoch": 0.16, "grad_norm": 0.6661900278775075, "learning_rate": 4.963960904772041e-06, "loss": 0.2883, "step": 3522 }, { "epoch": 0.17, "grad_norm": 0.6514894103345559, "learning_rate": 4.963928811777119e-06, "loss": 0.3093, "step": 3523 }, { "epoch": 0.17, "grad_norm": 0.681729852410718, "learning_rate": 4.963896704602908e-06, "loss": 0.3156, "step": 3524 }, { "epoch": 0.17, "grad_norm": 0.7197989981243194, "learning_rate": 4.963864583249589e-06, "loss": 0.3074, "step": 3525 }, { "epoch": 0.17, "grad_norm": 0.6643351440961649, "learning_rate": 4.963832447717349e-06, "loss": 0.2973, "step": 3526 }, { "epoch": 0.17, "grad_norm": 0.71840660919547, "learning_rate": 4.963800298006373e-06, "loss": 0.3218, "step": 3527 }, { "epoch": 0.17, "grad_norm": 0.6334602772469382, "learning_rate": 4.963768134116845e-06, "loss": 0.3139, "step": 3528 }, { "epoch": 0.17, "grad_norm": 0.6062706901463952, "learning_rate": 4.963735956048952e-06, "loss": 0.2686, "step": 3529 }, { "epoch": 0.17, "grad_norm": 0.6515429163332376, "learning_rate": 4.963703763802876e-06, "loss": 0.3062, "step": 3530 }, { "epoch": 0.17, "grad_norm": 0.6706683761032628, "learning_rate": 4.9636715573788055e-06, "loss": 0.299, "step": 3531 }, { "epoch": 0.17, "grad_norm": 0.7130062284968551, "learning_rate": 4.963639336776923e-06, "loss": 0.308, "step": 3532 }, { "epoch": 0.17, "grad_norm": 0.7946464792609433, "learning_rate": 4.9636071019974165e-06, "loss": 0.333, "step": 3533 }, { "epoch": 0.17, "grad_norm": 0.6469401787903721, "learning_rate": 4.96357485304047e-06, "loss": 0.3013, "step": 3534 }, { "epoch": 0.17, "grad_norm": 0.6580396482068978, "learning_rate": 4.9635425899062696e-06, "loss": 0.2966, "step": 3535 }, { "epoch": 0.17, "grad_norm": 0.6387622541006915, "learning_rate": 4.963510312595e-06, "loss": 0.2955, "step": 3536 }, { "epoch": 0.17, "grad_norm": 0.6897209306587815, "learning_rate": 4.963478021106849e-06, "loss": 0.3273, "step": 3537 }, { "epoch": 0.17, "grad_norm": 0.7096638318170363, "learning_rate": 4.963445715442e-06, "loss": 0.3055, "step": 3538 }, { "epoch": 0.17, "grad_norm": 0.7354994698676673, "learning_rate": 4.9634133956006406e-06, "loss": 0.3192, "step": 3539 }, { "epoch": 0.17, "grad_norm": 0.6845497869836895, "learning_rate": 4.963381061582956e-06, "loss": 0.2978, "step": 3540 }, { "epoch": 0.17, "grad_norm": 0.6623434717231161, "learning_rate": 4.963348713389132e-06, "loss": 0.2985, "step": 3541 }, { "epoch": 0.17, "grad_norm": 0.6947265888826017, "learning_rate": 4.963316351019356e-06, "loss": 0.3287, "step": 3542 }, { "epoch": 0.17, "grad_norm": 0.6739264354291511, "learning_rate": 4.963283974473813e-06, "loss": 0.3099, "step": 3543 }, { "epoch": 0.17, "grad_norm": 0.7128587668207332, "learning_rate": 4.963251583752691e-06, "loss": 0.3201, "step": 3544 }, { "epoch": 0.17, "grad_norm": 0.6641100887239867, "learning_rate": 4.963219178856174e-06, "loss": 0.3001, "step": 3545 }, { "epoch": 0.17, "grad_norm": 0.7733087935901235, "learning_rate": 4.963186759784451e-06, "loss": 0.3181, "step": 3546 }, { "epoch": 0.17, "grad_norm": 0.6468692012239414, "learning_rate": 4.9631543265377066e-06, "loss": 0.3028, "step": 3547 }, { "epoch": 0.17, "grad_norm": 0.6975780025139846, "learning_rate": 4.9631218791161285e-06, "loss": 0.3163, "step": 3548 }, { "epoch": 0.17, "grad_norm": 0.6842848488144475, "learning_rate": 4.963089417519903e-06, "loss": 0.2928, "step": 3549 }, { "epoch": 0.17, "grad_norm": 0.7029328431950528, "learning_rate": 4.963056941749217e-06, "loss": 0.288, "step": 3550 }, { "epoch": 0.17, "grad_norm": 0.6765444172550752, "learning_rate": 4.9630244518042565e-06, "loss": 0.3025, "step": 3551 }, { "epoch": 0.17, "grad_norm": 0.6206148415013623, "learning_rate": 4.96299194768521e-06, "loss": 0.2807, "step": 3552 }, { "epoch": 0.17, "grad_norm": 0.6892972543318243, "learning_rate": 4.962959429392264e-06, "loss": 0.3141, "step": 3553 }, { "epoch": 0.17, "grad_norm": 0.6847197122158525, "learning_rate": 4.962926896925605e-06, "loss": 0.3049, "step": 3554 }, { "epoch": 0.17, "grad_norm": 0.6436412849498535, "learning_rate": 4.962894350285421e-06, "loss": 0.3133, "step": 3555 }, { "epoch": 0.17, "grad_norm": 0.6645170974808872, "learning_rate": 4.9628617894718996e-06, "loss": 0.3037, "step": 3556 }, { "epoch": 0.17, "grad_norm": 0.6658552040386531, "learning_rate": 4.962829214485227e-06, "loss": 0.3129, "step": 3557 }, { "epoch": 0.17, "grad_norm": 0.6091939618095806, "learning_rate": 4.962796625325591e-06, "loss": 0.2902, "step": 3558 }, { "epoch": 0.17, "grad_norm": 0.6953535859565844, "learning_rate": 4.962764021993181e-06, "loss": 0.3057, "step": 3559 }, { "epoch": 0.17, "grad_norm": 0.6710173586123958, "learning_rate": 4.9627314044881814e-06, "loss": 0.321, "step": 3560 }, { "epoch": 0.17, "grad_norm": 0.730202043789514, "learning_rate": 4.962698772810782e-06, "loss": 0.3216, "step": 3561 }, { "epoch": 0.17, "grad_norm": 0.6535486042934673, "learning_rate": 4.96266612696117e-06, "loss": 0.2992, "step": 3562 }, { "epoch": 0.17, "grad_norm": 0.6914198828750368, "learning_rate": 4.962633466939534e-06, "loss": 0.3216, "step": 3563 }, { "epoch": 0.17, "grad_norm": 0.6765318912103537, "learning_rate": 4.962600792746061e-06, "loss": 0.3056, "step": 3564 }, { "epoch": 0.17, "grad_norm": 0.6935162508022299, "learning_rate": 4.9625681043809396e-06, "loss": 0.3042, "step": 3565 }, { "epoch": 0.17, "grad_norm": 0.6246050269387451, "learning_rate": 4.962535401844357e-06, "loss": 0.2879, "step": 3566 }, { "epoch": 0.17, "grad_norm": 0.6420749320734751, "learning_rate": 4.962502685136502e-06, "loss": 0.2911, "step": 3567 }, { "epoch": 0.17, "grad_norm": 0.7142526117727966, "learning_rate": 4.962469954257564e-06, "loss": 0.3238, "step": 3568 }, { "epoch": 0.17, "grad_norm": 0.6370648017773498, "learning_rate": 4.96243720920773e-06, "loss": 0.314, "step": 3569 }, { "epoch": 0.17, "grad_norm": 0.6152203171964065, "learning_rate": 4.962404449987189e-06, "loss": 0.2913, "step": 3570 }, { "epoch": 0.17, "grad_norm": 0.6812656360068707, "learning_rate": 4.9623716765961285e-06, "loss": 0.3269, "step": 3571 }, { "epoch": 0.17, "grad_norm": 0.6504201833598272, "learning_rate": 4.9623388890347375e-06, "loss": 0.3142, "step": 3572 }, { "epoch": 0.17, "grad_norm": 0.7045688022144254, "learning_rate": 4.962306087303206e-06, "loss": 0.3114, "step": 3573 }, { "epoch": 0.17, "grad_norm": 0.6638831420976437, "learning_rate": 4.9622732714017215e-06, "loss": 0.3051, "step": 3574 }, { "epoch": 0.17, "grad_norm": 0.6563157253313547, "learning_rate": 4.9622404413304735e-06, "loss": 0.2988, "step": 3575 }, { "epoch": 0.17, "grad_norm": 0.6429937841221067, "learning_rate": 4.962207597089651e-06, "loss": 0.3071, "step": 3576 }, { "epoch": 0.17, "grad_norm": 0.719770410679172, "learning_rate": 4.9621747386794426e-06, "loss": 0.325, "step": 3577 }, { "epoch": 0.17, "grad_norm": 0.7012940904093565, "learning_rate": 4.962141866100037e-06, "loss": 0.3276, "step": 3578 }, { "epoch": 0.17, "grad_norm": 0.6807569945980584, "learning_rate": 4.962108979351623e-06, "loss": 0.298, "step": 3579 }, { "epoch": 0.17, "grad_norm": 0.6735356669564657, "learning_rate": 4.962076078434392e-06, "loss": 0.3114, "step": 3580 }, { "epoch": 0.17, "grad_norm": 0.6575112182911408, "learning_rate": 4.962043163348531e-06, "loss": 0.3037, "step": 3581 }, { "epoch": 0.17, "grad_norm": 0.6514534881391238, "learning_rate": 4.962010234094231e-06, "loss": 0.3192, "step": 3582 }, { "epoch": 0.17, "grad_norm": 0.6208648653765999, "learning_rate": 4.9619772906716815e-06, "loss": 0.302, "step": 3583 }, { "epoch": 0.17, "grad_norm": 0.6804587260010118, "learning_rate": 4.96194433308107e-06, "loss": 0.3185, "step": 3584 }, { "epoch": 0.17, "grad_norm": 0.7720768095342098, "learning_rate": 4.961911361322589e-06, "loss": 0.3254, "step": 3585 }, { "epoch": 0.17, "grad_norm": 0.635341049059702, "learning_rate": 4.9618783753964266e-06, "loss": 0.285, "step": 3586 }, { "epoch": 0.17, "grad_norm": 0.635633583053057, "learning_rate": 4.9618453753027715e-06, "loss": 0.3171, "step": 3587 }, { "epoch": 0.17, "grad_norm": 0.7077991974138533, "learning_rate": 4.961812361041817e-06, "loss": 0.3249, "step": 3588 }, { "epoch": 0.17, "grad_norm": 0.6724140499532713, "learning_rate": 4.9617793326137496e-06, "loss": 0.3155, "step": 3589 }, { "epoch": 0.17, "grad_norm": 0.6928883594346019, "learning_rate": 4.961746290018762e-06, "loss": 0.3136, "step": 3590 }, { "epoch": 0.17, "grad_norm": 0.6863743823373195, "learning_rate": 4.961713233257043e-06, "loss": 0.291, "step": 3591 }, { "epoch": 0.17, "grad_norm": 0.7160848845369857, "learning_rate": 4.961680162328783e-06, "loss": 0.3049, "step": 3592 }, { "epoch": 0.17, "grad_norm": 0.6845989655656619, "learning_rate": 4.961647077234171e-06, "loss": 0.294, "step": 3593 }, { "epoch": 0.17, "grad_norm": 0.70502779647904, "learning_rate": 4.9616139779734e-06, "loss": 0.3114, "step": 3594 }, { "epoch": 0.17, "grad_norm": 0.6810998446641695, "learning_rate": 4.96158086454666e-06, "loss": 0.3043, "step": 3595 }, { "epoch": 0.17, "grad_norm": 0.670911604386175, "learning_rate": 4.96154773695414e-06, "loss": 0.2941, "step": 3596 }, { "epoch": 0.17, "grad_norm": 0.6840998611285642, "learning_rate": 4.961514595196032e-06, "loss": 0.3141, "step": 3597 }, { "epoch": 0.17, "grad_norm": 0.7095380508638588, "learning_rate": 4.961481439272525e-06, "loss": 0.2865, "step": 3598 }, { "epoch": 0.17, "grad_norm": 0.7280081350808516, "learning_rate": 4.961448269183811e-06, "loss": 0.3261, "step": 3599 }, { "epoch": 0.17, "grad_norm": 0.6675120328792702, "learning_rate": 4.9614150849300825e-06, "loss": 0.2989, "step": 3600 }, { "epoch": 0.17, "grad_norm": 0.6796084679866232, "learning_rate": 4.961381886511528e-06, "loss": 0.2895, "step": 3601 }, { "epoch": 0.17, "grad_norm": 0.6702380610220419, "learning_rate": 4.961348673928339e-06, "loss": 0.3174, "step": 3602 }, { "epoch": 0.17, "grad_norm": 0.7038959781985313, "learning_rate": 4.961315447180707e-06, "loss": 0.3183, "step": 3603 }, { "epoch": 0.17, "grad_norm": 0.6420811640957474, "learning_rate": 4.961282206268824e-06, "loss": 0.2833, "step": 3604 }, { "epoch": 0.17, "grad_norm": 0.6478802986858357, "learning_rate": 4.96124895119288e-06, "loss": 0.3081, "step": 3605 }, { "epoch": 0.17, "grad_norm": 0.6721660500095951, "learning_rate": 4.961215681953067e-06, "loss": 0.3072, "step": 3606 }, { "epoch": 0.17, "grad_norm": 0.6313759019568335, "learning_rate": 4.961182398549577e-06, "loss": 0.303, "step": 3607 }, { "epoch": 0.17, "grad_norm": 0.7220317254566834, "learning_rate": 4.961149100982599e-06, "loss": 0.2956, "step": 3608 }, { "epoch": 0.17, "grad_norm": 0.662830830931359, "learning_rate": 4.961115789252328e-06, "loss": 0.2995, "step": 3609 }, { "epoch": 0.17, "grad_norm": 0.6826029866213171, "learning_rate": 4.961082463358954e-06, "loss": 0.3014, "step": 3610 }, { "epoch": 0.17, "grad_norm": 0.6404842359159343, "learning_rate": 4.9610491233026695e-06, "loss": 0.3029, "step": 3611 }, { "epoch": 0.17, "grad_norm": 0.7056438658897478, "learning_rate": 4.961015769083664e-06, "loss": 0.3241, "step": 3612 }, { "epoch": 0.17, "grad_norm": 0.6686724826158416, "learning_rate": 4.960982400702134e-06, "loss": 0.3147, "step": 3613 }, { "epoch": 0.17, "grad_norm": 0.6537047985229776, "learning_rate": 4.960949018158267e-06, "loss": 0.3101, "step": 3614 }, { "epoch": 0.17, "grad_norm": 0.7207228429606153, "learning_rate": 4.960915621452257e-06, "loss": 0.3225, "step": 3615 }, { "epoch": 0.17, "grad_norm": 0.6381934286318042, "learning_rate": 4.960882210584297e-06, "loss": 0.3167, "step": 3616 }, { "epoch": 0.17, "grad_norm": 0.6381980423246569, "learning_rate": 4.960848785554578e-06, "loss": 0.2929, "step": 3617 }, { "epoch": 0.17, "grad_norm": 0.6706254900617865, "learning_rate": 4.960815346363293e-06, "loss": 0.2872, "step": 3618 }, { "epoch": 0.17, "grad_norm": 0.6875119651964064, "learning_rate": 4.960781893010633e-06, "loss": 0.301, "step": 3619 }, { "epoch": 0.17, "grad_norm": 0.7140321043680492, "learning_rate": 4.960748425496793e-06, "loss": 0.3236, "step": 3620 }, { "epoch": 0.17, "grad_norm": 0.7086306592871755, "learning_rate": 4.960714943821964e-06, "loss": 0.3066, "step": 3621 }, { "epoch": 0.17, "grad_norm": 0.7252609472878898, "learning_rate": 4.9606814479863385e-06, "loss": 0.3024, "step": 3622 }, { "epoch": 0.17, "grad_norm": 0.6883218699569074, "learning_rate": 4.96064793799011e-06, "loss": 0.2906, "step": 3623 }, { "epoch": 0.17, "grad_norm": 0.6968112950502138, "learning_rate": 4.960614413833471e-06, "loss": 0.3152, "step": 3624 }, { "epoch": 0.17, "grad_norm": 0.720250322679075, "learning_rate": 4.9605808755166145e-06, "loss": 0.3126, "step": 3625 }, { "epoch": 0.17, "grad_norm": 0.693077645492975, "learning_rate": 4.9605473230397335e-06, "loss": 0.3349, "step": 3626 }, { "epoch": 0.17, "grad_norm": 0.6220212320823005, "learning_rate": 4.960513756403021e-06, "loss": 0.2948, "step": 3627 }, { "epoch": 0.17, "grad_norm": 0.6832789388626215, "learning_rate": 4.960480175606671e-06, "loss": 0.2993, "step": 3628 }, { "epoch": 0.17, "grad_norm": 0.7108707198268306, "learning_rate": 4.960446580650875e-06, "loss": 0.3233, "step": 3629 }, { "epoch": 0.17, "grad_norm": 0.7049607794328268, "learning_rate": 4.9604129715358285e-06, "loss": 0.3189, "step": 3630 }, { "epoch": 0.17, "grad_norm": 0.6603367511391072, "learning_rate": 4.960379348261722e-06, "loss": 0.315, "step": 3631 }, { "epoch": 0.17, "grad_norm": 0.7018650110678603, "learning_rate": 4.960345710828752e-06, "loss": 0.3062, "step": 3632 }, { "epoch": 0.17, "grad_norm": 0.7018540516253927, "learning_rate": 4.960312059237111e-06, "loss": 0.3115, "step": 3633 }, { "epoch": 0.17, "grad_norm": 0.6909172691712056, "learning_rate": 4.960278393486993e-06, "loss": 0.3192, "step": 3634 }, { "epoch": 0.17, "grad_norm": 0.6658668443785584, "learning_rate": 4.960244713578589e-06, "loss": 0.3377, "step": 3635 }, { "epoch": 0.17, "grad_norm": 0.6851416003198981, "learning_rate": 4.9602110195120964e-06, "loss": 0.2977, "step": 3636 }, { "epoch": 0.17, "grad_norm": 0.6904795837876903, "learning_rate": 4.960177311287708e-06, "loss": 0.3203, "step": 3637 }, { "epoch": 0.17, "grad_norm": 0.6508494822720842, "learning_rate": 4.9601435889056174e-06, "loss": 0.2908, "step": 3638 }, { "epoch": 0.17, "grad_norm": 0.6409518378830136, "learning_rate": 4.960109852366018e-06, "loss": 0.2863, "step": 3639 }, { "epoch": 0.17, "grad_norm": 0.610947995879343, "learning_rate": 4.960076101669106e-06, "loss": 0.2968, "step": 3640 }, { "epoch": 0.17, "grad_norm": 0.6604392648968497, "learning_rate": 4.960042336815074e-06, "loss": 0.2927, "step": 3641 }, { "epoch": 0.17, "grad_norm": 0.7026052080107118, "learning_rate": 4.960008557804116e-06, "loss": 0.2943, "step": 3642 }, { "epoch": 0.17, "grad_norm": 0.665664344425831, "learning_rate": 4.959974764636427e-06, "loss": 0.298, "step": 3643 }, { "epoch": 0.17, "grad_norm": 0.707237452659747, "learning_rate": 4.959940957312202e-06, "loss": 0.3243, "step": 3644 }, { "epoch": 0.17, "grad_norm": 0.7019064541264489, "learning_rate": 4.9599071358316355e-06, "loss": 0.3184, "step": 3645 }, { "epoch": 0.17, "grad_norm": 0.6633977663240037, "learning_rate": 4.959873300194921e-06, "loss": 0.3043, "step": 3646 }, { "epoch": 0.17, "grad_norm": 0.6952643306088327, "learning_rate": 4.959839450402254e-06, "loss": 0.3054, "step": 3647 }, { "epoch": 0.17, "grad_norm": 0.742025612333006, "learning_rate": 4.959805586453829e-06, "loss": 0.3061, "step": 3648 }, { "epoch": 0.17, "grad_norm": 0.7284382928623251, "learning_rate": 4.959771708349841e-06, "loss": 0.3213, "step": 3649 }, { "epoch": 0.17, "grad_norm": 0.7055789475122335, "learning_rate": 4.959737816090486e-06, "loss": 0.343, "step": 3650 }, { "epoch": 0.17, "grad_norm": 0.6673120558234535, "learning_rate": 4.9597039096759575e-06, "loss": 0.304, "step": 3651 }, { "epoch": 0.17, "grad_norm": 0.8114339245497142, "learning_rate": 4.959669989106451e-06, "loss": 0.3077, "step": 3652 }, { "epoch": 0.17, "grad_norm": 0.6888479545803418, "learning_rate": 4.959636054382162e-06, "loss": 0.3018, "step": 3653 }, { "epoch": 0.17, "grad_norm": 0.6807178154338008, "learning_rate": 4.959602105503286e-06, "loss": 0.2985, "step": 3654 }, { "epoch": 0.17, "grad_norm": 0.6464234700243526, "learning_rate": 4.959568142470018e-06, "loss": 0.3069, "step": 3655 }, { "epoch": 0.17, "grad_norm": 0.6941780639411497, "learning_rate": 4.959534165282554e-06, "loss": 0.3018, "step": 3656 }, { "epoch": 0.17, "grad_norm": 0.6969351308849965, "learning_rate": 4.9595001739410886e-06, "loss": 0.3048, "step": 3657 }, { "epoch": 0.17, "grad_norm": 0.7183685609645173, "learning_rate": 4.959466168445818e-06, "loss": 0.3, "step": 3658 }, { "epoch": 0.17, "grad_norm": 0.6664472121656249, "learning_rate": 4.959432148796937e-06, "loss": 0.3175, "step": 3659 }, { "epoch": 0.17, "grad_norm": 0.7033911006843407, "learning_rate": 4.959398114994644e-06, "loss": 0.2983, "step": 3660 }, { "epoch": 0.17, "grad_norm": 0.7082482876124213, "learning_rate": 4.959364067039131e-06, "loss": 0.3118, "step": 3661 }, { "epoch": 0.17, "grad_norm": 0.784308483093203, "learning_rate": 4.959330004930597e-06, "loss": 0.3245, "step": 3662 }, { "epoch": 0.17, "grad_norm": 0.6755079264414691, "learning_rate": 4.959295928669236e-06, "loss": 0.2951, "step": 3663 }, { "epoch": 0.17, "grad_norm": 0.681731577686721, "learning_rate": 4.959261838255246e-06, "loss": 0.3029, "step": 3664 }, { "epoch": 0.17, "grad_norm": 0.6686290560676441, "learning_rate": 4.959227733688822e-06, "loss": 0.3126, "step": 3665 }, { "epoch": 0.17, "grad_norm": 0.638685840179734, "learning_rate": 4.95919361497016e-06, "loss": 0.3077, "step": 3666 }, { "epoch": 0.17, "grad_norm": 0.5768047760641626, "learning_rate": 4.959159482099458e-06, "loss": 0.2764, "step": 3667 }, { "epoch": 0.17, "grad_norm": 0.6190840429924739, "learning_rate": 4.95912533507691e-06, "loss": 0.2917, "step": 3668 }, { "epoch": 0.17, "grad_norm": 0.6964080333244289, "learning_rate": 4.959091173902715e-06, "loss": 0.3166, "step": 3669 }, { "epoch": 0.17, "grad_norm": 0.665113046304739, "learning_rate": 4.959056998577067e-06, "loss": 0.2954, "step": 3670 }, { "epoch": 0.17, "grad_norm": 0.6947779401145283, "learning_rate": 4.9590228091001645e-06, "loss": 0.3031, "step": 3671 }, { "epoch": 0.17, "grad_norm": 0.6648751316640864, "learning_rate": 4.958988605472205e-06, "loss": 0.3098, "step": 3672 }, { "epoch": 0.17, "grad_norm": 0.6425125862371125, "learning_rate": 4.958954387693383e-06, "loss": 0.3041, "step": 3673 }, { "epoch": 0.17, "grad_norm": 0.6353442959835225, "learning_rate": 4.958920155763896e-06, "loss": 0.2913, "step": 3674 }, { "epoch": 0.17, "grad_norm": 0.679852076711212, "learning_rate": 4.958885909683942e-06, "loss": 0.3175, "step": 3675 }, { "epoch": 0.17, "grad_norm": 0.6706808336601867, "learning_rate": 4.958851649453718e-06, "loss": 0.2883, "step": 3676 }, { "epoch": 0.17, "grad_norm": 0.651919005670173, "learning_rate": 4.958817375073421e-06, "loss": 0.3256, "step": 3677 }, { "epoch": 0.17, "grad_norm": 0.6193481219791098, "learning_rate": 4.958783086543248e-06, "loss": 0.3081, "step": 3678 }, { "epoch": 0.17, "grad_norm": 0.6992567177910312, "learning_rate": 4.958748783863395e-06, "loss": 0.3187, "step": 3679 }, { "epoch": 0.17, "grad_norm": 0.6447904345172644, "learning_rate": 4.9587144670340614e-06, "loss": 0.3166, "step": 3680 }, { "epoch": 0.17, "grad_norm": 0.7301840868795018, "learning_rate": 4.958680136055445e-06, "loss": 0.3529, "step": 3681 }, { "epoch": 0.17, "grad_norm": 0.60775496305513, "learning_rate": 4.958645790927741e-06, "loss": 0.3147, "step": 3682 }, { "epoch": 0.17, "grad_norm": 0.6942170373313539, "learning_rate": 4.958611431651149e-06, "loss": 0.3066, "step": 3683 }, { "epoch": 0.17, "grad_norm": 0.6862911880663698, "learning_rate": 4.958577058225866e-06, "loss": 0.2939, "step": 3684 }, { "epoch": 0.17, "grad_norm": 0.6544833420713664, "learning_rate": 4.958542670652091e-06, "loss": 0.3155, "step": 3685 }, { "epoch": 0.17, "grad_norm": 0.6787080031368596, "learning_rate": 4.95850826893002e-06, "loss": 0.3217, "step": 3686 }, { "epoch": 0.17, "grad_norm": 0.6616982185138659, "learning_rate": 4.958473853059852e-06, "loss": 0.3106, "step": 3687 }, { "epoch": 0.17, "grad_norm": 0.5890148371855489, "learning_rate": 4.958439423041784e-06, "loss": 0.2943, "step": 3688 }, { "epoch": 0.17, "grad_norm": 0.664629344545347, "learning_rate": 4.958404978876016e-06, "loss": 0.33, "step": 3689 }, { "epoch": 0.17, "grad_norm": 0.6664437730099032, "learning_rate": 4.9583705205627454e-06, "loss": 0.3029, "step": 3690 }, { "epoch": 0.17, "grad_norm": 0.6623965324322186, "learning_rate": 4.95833604810217e-06, "loss": 0.3124, "step": 3691 }, { "epoch": 0.17, "grad_norm": 0.6409962504697018, "learning_rate": 4.958301561494488e-06, "loss": 0.3219, "step": 3692 }, { "epoch": 0.17, "grad_norm": 0.6822816657999984, "learning_rate": 4.958267060739899e-06, "loss": 0.33, "step": 3693 }, { "epoch": 0.17, "grad_norm": 0.7071815698586355, "learning_rate": 4.958232545838601e-06, "loss": 0.2993, "step": 3694 }, { "epoch": 0.17, "grad_norm": 0.6532139931150612, "learning_rate": 4.958198016790792e-06, "loss": 0.3058, "step": 3695 }, { "epoch": 0.17, "grad_norm": 0.6563199729194366, "learning_rate": 4.958163473596672e-06, "loss": 0.3101, "step": 3696 }, { "epoch": 0.17, "grad_norm": 0.7310221187615719, "learning_rate": 4.958128916256439e-06, "loss": 0.3316, "step": 3697 }, { "epoch": 0.17, "grad_norm": 0.6280325438562813, "learning_rate": 4.958094344770292e-06, "loss": 0.3178, "step": 3698 }, { "epoch": 0.17, "grad_norm": 0.6430240171852373, "learning_rate": 4.958059759138428e-06, "loss": 0.294, "step": 3699 }, { "epoch": 0.17, "grad_norm": 0.6785595451222093, "learning_rate": 4.9580251593610504e-06, "loss": 0.3068, "step": 3700 }, { "epoch": 0.17, "grad_norm": 0.6750961568485454, "learning_rate": 4.9579905454383545e-06, "loss": 0.3155, "step": 3701 }, { "epoch": 0.17, "grad_norm": 0.6491864164470899, "learning_rate": 4.957955917370541e-06, "loss": 0.3305, "step": 3702 }, { "epoch": 0.17, "grad_norm": 0.6800552472540977, "learning_rate": 4.957921275157809e-06, "loss": 0.3133, "step": 3703 }, { "epoch": 0.17, "grad_norm": 0.6817041112931528, "learning_rate": 4.957886618800358e-06, "loss": 0.3289, "step": 3704 }, { "epoch": 0.17, "grad_norm": 0.6760957953308356, "learning_rate": 4.957851948298387e-06, "loss": 0.2908, "step": 3705 }, { "epoch": 0.17, "grad_norm": 0.6774619082035219, "learning_rate": 4.957817263652096e-06, "loss": 0.315, "step": 3706 }, { "epoch": 0.17, "grad_norm": 0.5970244573152493, "learning_rate": 4.957782564861684e-06, "loss": 0.298, "step": 3707 }, { "epoch": 0.17, "grad_norm": 0.6717837092049953, "learning_rate": 4.957747851927351e-06, "loss": 0.3184, "step": 3708 }, { "epoch": 0.17, "grad_norm": 0.7268784575643834, "learning_rate": 4.9577131248492985e-06, "loss": 0.3135, "step": 3709 }, { "epoch": 0.17, "grad_norm": 0.7304469260189919, "learning_rate": 4.9576783836277234e-06, "loss": 0.3272, "step": 3710 }, { "epoch": 0.17, "grad_norm": 0.6113296781816023, "learning_rate": 4.957643628262827e-06, "loss": 0.3057, "step": 3711 }, { "epoch": 0.17, "grad_norm": 0.6241517336538518, "learning_rate": 4.957608858754809e-06, "loss": 0.3043, "step": 3712 }, { "epoch": 0.17, "grad_norm": 0.6275230667967374, "learning_rate": 4.957574075103869e-06, "loss": 0.2882, "step": 3713 }, { "epoch": 0.17, "grad_norm": 0.6210297247174288, "learning_rate": 4.957539277310209e-06, "loss": 0.306, "step": 3714 }, { "epoch": 0.17, "grad_norm": 0.6816674481308945, "learning_rate": 4.9575044653740285e-06, "loss": 0.3122, "step": 3715 }, { "epoch": 0.17, "grad_norm": 0.6350436945414795, "learning_rate": 4.9574696392955265e-06, "loss": 0.2977, "step": 3716 }, { "epoch": 0.17, "grad_norm": 0.6054069446517701, "learning_rate": 4.957434799074905e-06, "loss": 0.299, "step": 3717 }, { "epoch": 0.17, "grad_norm": 0.6875574169845918, "learning_rate": 4.957399944712364e-06, "loss": 0.3223, "step": 3718 }, { "epoch": 0.17, "grad_norm": 0.6791505074944582, "learning_rate": 4.957365076208103e-06, "loss": 0.3134, "step": 3719 }, { "epoch": 0.17, "grad_norm": 0.6294003820867861, "learning_rate": 4.9573301935623245e-06, "loss": 0.29, "step": 3720 }, { "epoch": 0.17, "grad_norm": 0.7183245239740692, "learning_rate": 4.957295296775229e-06, "loss": 0.3188, "step": 3721 }, { "epoch": 0.17, "grad_norm": 0.6777497374193041, "learning_rate": 4.957260385847015e-06, "loss": 0.303, "step": 3722 }, { "epoch": 0.17, "grad_norm": 0.7143594284980629, "learning_rate": 4.957225460777886e-06, "loss": 0.3287, "step": 3723 }, { "epoch": 0.17, "grad_norm": 0.7898862218553862, "learning_rate": 4.957190521568042e-06, "loss": 0.3339, "step": 3724 }, { "epoch": 0.17, "grad_norm": 0.6744815917616712, "learning_rate": 4.957155568217684e-06, "loss": 0.3221, "step": 3725 }, { "epoch": 0.17, "grad_norm": 0.6879346868861232, "learning_rate": 4.957120600727013e-06, "loss": 0.3124, "step": 3726 }, { "epoch": 0.17, "grad_norm": 0.7067395740907835, "learning_rate": 4.957085619096231e-06, "loss": 0.3027, "step": 3727 }, { "epoch": 0.17, "grad_norm": 0.6393021878956638, "learning_rate": 4.957050623325539e-06, "loss": 0.2949, "step": 3728 }, { "epoch": 0.17, "grad_norm": 0.6621160612548815, "learning_rate": 4.957015613415138e-06, "loss": 0.3036, "step": 3729 }, { "epoch": 0.17, "grad_norm": 0.6819931160458901, "learning_rate": 4.95698058936523e-06, "loss": 0.3185, "step": 3730 }, { "epoch": 0.17, "grad_norm": 0.6662592399709985, "learning_rate": 4.956945551176016e-06, "loss": 0.3118, "step": 3731 }, { "epoch": 0.17, "grad_norm": 0.6520972489753759, "learning_rate": 4.9569104988476975e-06, "loss": 0.285, "step": 3732 }, { "epoch": 0.17, "grad_norm": 0.6368010059298652, "learning_rate": 4.956875432380477e-06, "loss": 0.297, "step": 3733 }, { "epoch": 0.17, "grad_norm": 0.6215177486318567, "learning_rate": 4.956840351774556e-06, "loss": 0.2751, "step": 3734 }, { "epoch": 0.17, "grad_norm": 0.6241567577270442, "learning_rate": 4.956805257030135e-06, "loss": 0.2893, "step": 3735 }, { "epoch": 0.18, "grad_norm": 0.6626082601383543, "learning_rate": 4.956770148147419e-06, "loss": 0.3255, "step": 3736 }, { "epoch": 0.18, "grad_norm": 0.7155675690205063, "learning_rate": 4.9567350251266075e-06, "loss": 0.3228, "step": 3737 }, { "epoch": 0.18, "grad_norm": 0.6729949327916983, "learning_rate": 4.956699887967904e-06, "loss": 0.3126, "step": 3738 }, { "epoch": 0.18, "grad_norm": 0.6602157184808664, "learning_rate": 4.956664736671509e-06, "loss": 0.3046, "step": 3739 }, { "epoch": 0.18, "grad_norm": 0.644056936896296, "learning_rate": 4.9566295712376265e-06, "loss": 0.2968, "step": 3740 }, { "epoch": 0.18, "grad_norm": 0.6617695881918844, "learning_rate": 4.956594391666458e-06, "loss": 0.2968, "step": 3741 }, { "epoch": 0.18, "grad_norm": 0.6995125862064424, "learning_rate": 4.956559197958207e-06, "loss": 0.3131, "step": 3742 }, { "epoch": 0.18, "grad_norm": 0.6548908043386106, "learning_rate": 4.9565239901130745e-06, "loss": 0.3143, "step": 3743 }, { "epoch": 0.18, "grad_norm": 0.6243218334941747, "learning_rate": 4.9564887681312645e-06, "loss": 0.3002, "step": 3744 }, { "epoch": 0.18, "grad_norm": 0.6498999872807134, "learning_rate": 4.9564535320129786e-06, "loss": 0.3114, "step": 3745 }, { "epoch": 0.18, "grad_norm": 0.664604477546171, "learning_rate": 4.95641828175842e-06, "loss": 0.3005, "step": 3746 }, { "epoch": 0.18, "grad_norm": 0.6493955292665344, "learning_rate": 4.956383017367792e-06, "loss": 0.2973, "step": 3747 }, { "epoch": 0.18, "grad_norm": 0.6422665697315185, "learning_rate": 4.9563477388412965e-06, "loss": 0.2987, "step": 3748 }, { "epoch": 0.18, "grad_norm": 0.6763771003135712, "learning_rate": 4.956312446179137e-06, "loss": 0.3064, "step": 3749 }, { "epoch": 0.18, "grad_norm": 0.6742419875589394, "learning_rate": 4.956277139381518e-06, "loss": 0.32, "step": 3750 }, { "epoch": 0.18, "grad_norm": 0.5935021996172818, "learning_rate": 4.95624181844864e-06, "loss": 0.3058, "step": 3751 }, { "epoch": 0.18, "grad_norm": 0.7057018238156969, "learning_rate": 4.956206483380709e-06, "loss": 0.3168, "step": 3752 }, { "epoch": 0.18, "grad_norm": 0.6860303192704905, "learning_rate": 4.956171134177926e-06, "loss": 0.3059, "step": 3753 }, { "epoch": 0.18, "grad_norm": 0.6728828827117264, "learning_rate": 4.956135770840495e-06, "loss": 0.296, "step": 3754 }, { "epoch": 0.18, "grad_norm": 0.6613974649503525, "learning_rate": 4.956100393368621e-06, "loss": 0.2825, "step": 3755 }, { "epoch": 0.18, "grad_norm": 0.6412137225763401, "learning_rate": 4.956065001762507e-06, "loss": 0.3027, "step": 3756 }, { "epoch": 0.18, "grad_norm": 0.7250902840376898, "learning_rate": 4.956029596022356e-06, "loss": 0.3177, "step": 3757 }, { "epoch": 0.18, "grad_norm": 0.6650248966164864, "learning_rate": 4.955994176148371e-06, "loss": 0.3196, "step": 3758 }, { "epoch": 0.18, "grad_norm": 0.6228279909549999, "learning_rate": 4.9559587421407575e-06, "loss": 0.3073, "step": 3759 }, { "epoch": 0.18, "grad_norm": 0.632061418833055, "learning_rate": 4.955923293999718e-06, "loss": 0.3082, "step": 3760 }, { "epoch": 0.18, "grad_norm": 0.6507916429641172, "learning_rate": 4.955887831725457e-06, "loss": 0.3214, "step": 3761 }, { "epoch": 0.18, "grad_norm": 0.6825254317195222, "learning_rate": 4.955852355318181e-06, "loss": 0.3114, "step": 3762 }, { "epoch": 0.18, "grad_norm": 0.63244234928474, "learning_rate": 4.95581686477809e-06, "loss": 0.3022, "step": 3763 }, { "epoch": 0.18, "grad_norm": 0.6548802885230385, "learning_rate": 4.955781360105391e-06, "loss": 0.3065, "step": 3764 }, { "epoch": 0.18, "grad_norm": 0.6767204516857956, "learning_rate": 4.955745841300287e-06, "loss": 0.311, "step": 3765 }, { "epoch": 0.18, "grad_norm": 0.7032105475813829, "learning_rate": 4.955710308362983e-06, "loss": 0.3177, "step": 3766 }, { "epoch": 0.18, "grad_norm": 0.6617955675952766, "learning_rate": 4.955674761293683e-06, "loss": 0.3483, "step": 3767 }, { "epoch": 0.18, "grad_norm": 0.6478688499966516, "learning_rate": 4.955639200092593e-06, "loss": 0.2874, "step": 3768 }, { "epoch": 0.18, "grad_norm": 0.6137204201262089, "learning_rate": 4.955603624759916e-06, "loss": 0.3014, "step": 3769 }, { "epoch": 0.18, "grad_norm": 0.5705660754687994, "learning_rate": 4.955568035295857e-06, "loss": 0.2824, "step": 3770 }, { "epoch": 0.18, "grad_norm": 0.6169477138285894, "learning_rate": 4.9555324317006214e-06, "loss": 0.2969, "step": 3771 }, { "epoch": 0.18, "grad_norm": 0.6865789343493441, "learning_rate": 4.9554968139744144e-06, "loss": 0.3128, "step": 3772 }, { "epoch": 0.18, "grad_norm": 0.6913963123608945, "learning_rate": 4.9554611821174405e-06, "loss": 0.3197, "step": 3773 }, { "epoch": 0.18, "grad_norm": 0.6723390929578422, "learning_rate": 4.955425536129904e-06, "loss": 0.3138, "step": 3774 }, { "epoch": 0.18, "grad_norm": 0.6523485413310461, "learning_rate": 4.95538987601201e-06, "loss": 0.3111, "step": 3775 }, { "epoch": 0.18, "grad_norm": 0.6606768530010558, "learning_rate": 4.9553542017639655e-06, "loss": 0.3372, "step": 3776 }, { "epoch": 0.18, "grad_norm": 0.6646597143035937, "learning_rate": 4.955318513385975e-06, "loss": 0.3118, "step": 3777 }, { "epoch": 0.18, "grad_norm": 0.6571352957100912, "learning_rate": 4.955282810878243e-06, "loss": 0.3227, "step": 3778 }, { "epoch": 0.18, "grad_norm": 0.7954776294886046, "learning_rate": 4.9552470942409745e-06, "loss": 0.3264, "step": 3779 }, { "epoch": 0.18, "grad_norm": 0.7266462253410153, "learning_rate": 4.955211363474378e-06, "loss": 0.3196, "step": 3780 }, { "epoch": 0.18, "grad_norm": 0.7047630091321079, "learning_rate": 4.9551756185786555e-06, "loss": 0.2976, "step": 3781 }, { "epoch": 0.18, "grad_norm": 0.602441511975903, "learning_rate": 4.9551398595540155e-06, "loss": 0.2848, "step": 3782 }, { "epoch": 0.18, "grad_norm": 0.6527056697448925, "learning_rate": 4.955104086400663e-06, "loss": 0.2895, "step": 3783 }, { "epoch": 0.18, "grad_norm": 0.6243537093625596, "learning_rate": 4.955068299118803e-06, "loss": 0.2921, "step": 3784 }, { "epoch": 0.18, "grad_norm": 0.7199693220231292, "learning_rate": 4.955032497708642e-06, "loss": 0.3073, "step": 3785 }, { "epoch": 0.18, "grad_norm": 0.7168950315572221, "learning_rate": 4.954996682170385e-06, "loss": 0.3102, "step": 3786 }, { "epoch": 0.18, "grad_norm": 0.6435264558550668, "learning_rate": 4.95496085250424e-06, "loss": 0.3177, "step": 3787 }, { "epoch": 0.18, "grad_norm": 0.6312465711380998, "learning_rate": 4.954925008710413e-06, "loss": 0.3017, "step": 3788 }, { "epoch": 0.18, "grad_norm": 0.6439144928952655, "learning_rate": 4.954889150789109e-06, "loss": 0.2936, "step": 3789 }, { "epoch": 0.18, "grad_norm": 0.7602679746442265, "learning_rate": 4.9548532787405346e-06, "loss": 0.3108, "step": 3790 }, { "epoch": 0.18, "grad_norm": 0.6237227709823835, "learning_rate": 4.954817392564897e-06, "loss": 0.2904, "step": 3791 }, { "epoch": 0.18, "grad_norm": 0.6431190084249112, "learning_rate": 4.954781492262403e-06, "loss": 0.3019, "step": 3792 }, { "epoch": 0.18, "grad_norm": 0.6447170258837911, "learning_rate": 4.9547455778332575e-06, "loss": 0.3041, "step": 3793 }, { "epoch": 0.18, "grad_norm": 0.682703325270086, "learning_rate": 4.954709649277668e-06, "loss": 0.3274, "step": 3794 }, { "epoch": 0.18, "grad_norm": 0.6492236478099049, "learning_rate": 4.954673706595841e-06, "loss": 0.2977, "step": 3795 }, { "epoch": 0.18, "grad_norm": 0.6149368891920085, "learning_rate": 4.954637749787986e-06, "loss": 0.2942, "step": 3796 }, { "epoch": 0.18, "grad_norm": 0.6485092593350104, "learning_rate": 4.954601778854306e-06, "loss": 0.3004, "step": 3797 }, { "epoch": 0.18, "grad_norm": 0.6680870435914187, "learning_rate": 4.95456579379501e-06, "loss": 0.3, "step": 3798 }, { "epoch": 0.18, "grad_norm": 0.6267675363466689, "learning_rate": 4.954529794610305e-06, "loss": 0.2827, "step": 3799 }, { "epoch": 0.18, "grad_norm": 0.6892091686852478, "learning_rate": 4.954493781300398e-06, "loss": 0.3059, "step": 3800 }, { "epoch": 0.18, "grad_norm": 0.6116838627106528, "learning_rate": 4.954457753865496e-06, "loss": 0.2911, "step": 3801 }, { "epoch": 0.18, "grad_norm": 0.6963826651391127, "learning_rate": 4.9544217123058055e-06, "loss": 0.288, "step": 3802 }, { "epoch": 0.18, "grad_norm": 0.593537320037102, "learning_rate": 4.954385656621537e-06, "loss": 0.2965, "step": 3803 }, { "epoch": 0.18, "grad_norm": 0.6572777015133263, "learning_rate": 4.954349586812894e-06, "loss": 0.3037, "step": 3804 }, { "epoch": 0.18, "grad_norm": 0.6387658616285461, "learning_rate": 4.954313502880087e-06, "loss": 0.3058, "step": 3805 }, { "epoch": 0.18, "grad_norm": 0.6324743276031467, "learning_rate": 4.9542774048233236e-06, "loss": 0.3002, "step": 3806 }, { "epoch": 0.18, "grad_norm": 0.6195394931467697, "learning_rate": 4.9542412926428084e-06, "loss": 0.3104, "step": 3807 }, { "epoch": 0.18, "grad_norm": 0.5844826741821396, "learning_rate": 4.954205166338753e-06, "loss": 0.3186, "step": 3808 }, { "epoch": 0.18, "grad_norm": 0.635015728944429, "learning_rate": 4.954169025911363e-06, "loss": 0.3156, "step": 3809 }, { "epoch": 0.18, "grad_norm": 0.684195361238336, "learning_rate": 4.954132871360848e-06, "loss": 0.2912, "step": 3810 }, { "epoch": 0.18, "grad_norm": 0.6284846928250545, "learning_rate": 4.9540967026874145e-06, "loss": 0.2893, "step": 3811 }, { "epoch": 0.18, "grad_norm": 0.6883236535857055, "learning_rate": 4.954060519891271e-06, "loss": 0.3266, "step": 3812 }, { "epoch": 0.18, "grad_norm": 0.6765674200870546, "learning_rate": 4.954024322972626e-06, "loss": 0.3224, "step": 3813 }, { "epoch": 0.18, "grad_norm": 0.6057113242886419, "learning_rate": 4.953988111931689e-06, "loss": 0.2753, "step": 3814 }, { "epoch": 0.18, "grad_norm": 0.6587593678525668, "learning_rate": 4.953951886768666e-06, "loss": 0.3142, "step": 3815 }, { "epoch": 0.18, "grad_norm": 0.692137498196956, "learning_rate": 4.953915647483767e-06, "loss": 0.3235, "step": 3816 }, { "epoch": 0.18, "grad_norm": 0.6001029610761947, "learning_rate": 4.9538793940772e-06, "loss": 0.3048, "step": 3817 }, { "epoch": 0.18, "grad_norm": 0.6594802752564866, "learning_rate": 4.953843126549174e-06, "loss": 0.3194, "step": 3818 }, { "epoch": 0.18, "grad_norm": 0.6452811267300098, "learning_rate": 4.953806844899898e-06, "loss": 0.3026, "step": 3819 }, { "epoch": 0.18, "grad_norm": 0.6196256421776781, "learning_rate": 4.95377054912958e-06, "loss": 0.2945, "step": 3820 }, { "epoch": 0.18, "grad_norm": 0.6580476604433857, "learning_rate": 4.9537342392384295e-06, "loss": 0.3057, "step": 3821 }, { "epoch": 0.18, "grad_norm": 0.6857565196950721, "learning_rate": 4.953697915226654e-06, "loss": 0.3043, "step": 3822 }, { "epoch": 0.18, "grad_norm": 0.7456004007337436, "learning_rate": 4.953661577094465e-06, "loss": 0.31, "step": 3823 }, { "epoch": 0.18, "grad_norm": 0.664651823946127, "learning_rate": 4.95362522484207e-06, "loss": 0.3057, "step": 3824 }, { "epoch": 0.18, "grad_norm": 0.770812945910472, "learning_rate": 4.9535888584696776e-06, "loss": 0.3018, "step": 3825 }, { "epoch": 0.18, "grad_norm": 0.6244445673765677, "learning_rate": 4.953552477977499e-06, "loss": 0.2962, "step": 3826 }, { "epoch": 0.18, "grad_norm": 0.6329410330292136, "learning_rate": 4.953516083365742e-06, "loss": 0.2991, "step": 3827 }, { "epoch": 0.18, "grad_norm": 0.7253463297866423, "learning_rate": 4.953479674634618e-06, "loss": 0.2877, "step": 3828 }, { "epoch": 0.18, "grad_norm": 0.6235486817364045, "learning_rate": 4.953443251784334e-06, "loss": 0.2885, "step": 3829 }, { "epoch": 0.18, "grad_norm": 0.6157822790229128, "learning_rate": 4.9534068148151e-06, "loss": 0.3003, "step": 3830 }, { "epoch": 0.18, "grad_norm": 0.6382447875285903, "learning_rate": 4.953370363727128e-06, "loss": 0.3007, "step": 3831 }, { "epoch": 0.18, "grad_norm": 0.6930263498092591, "learning_rate": 4.953333898520626e-06, "loss": 0.3224, "step": 3832 }, { "epoch": 0.18, "grad_norm": 0.628986605724357, "learning_rate": 4.953297419195802e-06, "loss": 0.3144, "step": 3833 }, { "epoch": 0.18, "grad_norm": 0.6342918654488737, "learning_rate": 4.953260925752871e-06, "loss": 0.307, "step": 3834 }, { "epoch": 0.18, "grad_norm": 0.6882941050780148, "learning_rate": 4.953224418192038e-06, "loss": 0.3086, "step": 3835 }, { "epoch": 0.18, "grad_norm": 0.6371699153593882, "learning_rate": 4.953187896513516e-06, "loss": 0.301, "step": 3836 }, { "epoch": 0.18, "grad_norm": 0.6372467323526152, "learning_rate": 4.953151360717513e-06, "loss": 0.2997, "step": 3837 }, { "epoch": 0.18, "grad_norm": 0.6730091995910144, "learning_rate": 4.953114810804242e-06, "loss": 0.3, "step": 3838 }, { "epoch": 0.18, "grad_norm": 0.6745486492115671, "learning_rate": 4.95307824677391e-06, "loss": 0.3382, "step": 3839 }, { "epoch": 0.18, "grad_norm": 0.604878769695645, "learning_rate": 4.953041668626731e-06, "loss": 0.2928, "step": 3840 }, { "epoch": 0.18, "grad_norm": 0.6529854397311117, "learning_rate": 4.953005076362913e-06, "loss": 0.3288, "step": 3841 }, { "epoch": 0.18, "grad_norm": 0.6305627767661223, "learning_rate": 4.952968469982667e-06, "loss": 0.2892, "step": 3842 }, { "epoch": 0.18, "grad_norm": 0.6792964777757876, "learning_rate": 4.952931849486206e-06, "loss": 0.308, "step": 3843 }, { "epoch": 0.18, "grad_norm": 0.6378099277796465, "learning_rate": 4.952895214873737e-06, "loss": 0.2978, "step": 3844 }, { "epoch": 0.18, "grad_norm": 0.6801983988543489, "learning_rate": 4.952858566145472e-06, "loss": 0.3204, "step": 3845 }, { "epoch": 0.18, "grad_norm": 0.6575354835718736, "learning_rate": 4.952821903301623e-06, "loss": 0.3205, "step": 3846 }, { "epoch": 0.18, "grad_norm": 0.646068724632923, "learning_rate": 4.9527852263424e-06, "loss": 0.305, "step": 3847 }, { "epoch": 0.18, "grad_norm": 0.6855244940850691, "learning_rate": 4.952748535268016e-06, "loss": 0.296, "step": 3848 }, { "epoch": 0.18, "grad_norm": 0.6530882972738677, "learning_rate": 4.952711830078679e-06, "loss": 0.2974, "step": 3849 }, { "epoch": 0.18, "grad_norm": 0.576533778188705, "learning_rate": 4.952675110774603e-06, "loss": 0.2769, "step": 3850 }, { "epoch": 0.18, "grad_norm": 0.6544812723109635, "learning_rate": 4.952638377355998e-06, "loss": 0.3087, "step": 3851 }, { "epoch": 0.18, "grad_norm": 0.7129633703346069, "learning_rate": 4.952601629823075e-06, "loss": 0.332, "step": 3852 }, { "epoch": 0.18, "grad_norm": 0.6293656567911837, "learning_rate": 4.952564868176047e-06, "loss": 0.3071, "step": 3853 }, { "epoch": 0.18, "grad_norm": 0.6110529028756692, "learning_rate": 4.952528092415124e-06, "loss": 0.2923, "step": 3854 }, { "epoch": 0.18, "grad_norm": 0.6419245653286838, "learning_rate": 4.952491302540518e-06, "loss": 0.2875, "step": 3855 }, { "epoch": 0.18, "grad_norm": 0.6321835246877708, "learning_rate": 4.952454498552441e-06, "loss": 0.29, "step": 3856 }, { "epoch": 0.18, "grad_norm": 0.6753296582083702, "learning_rate": 4.952417680451106e-06, "loss": 0.3023, "step": 3857 }, { "epoch": 0.18, "grad_norm": 0.6346421668837727, "learning_rate": 4.952380848236722e-06, "loss": 0.3211, "step": 3858 }, { "epoch": 0.18, "grad_norm": 0.6483967002291722, "learning_rate": 4.952344001909504e-06, "loss": 0.2944, "step": 3859 }, { "epoch": 0.18, "grad_norm": 0.667891000697319, "learning_rate": 4.9523071414696615e-06, "loss": 0.3081, "step": 3860 }, { "epoch": 0.18, "grad_norm": 0.5929815033573294, "learning_rate": 4.952270266917408e-06, "loss": 0.2988, "step": 3861 }, { "epoch": 0.18, "grad_norm": 0.6656622288991725, "learning_rate": 4.952233378252956e-06, "loss": 0.3185, "step": 3862 }, { "epoch": 0.18, "grad_norm": 0.6145603577636161, "learning_rate": 4.952196475476517e-06, "loss": 0.278, "step": 3863 }, { "epoch": 0.18, "grad_norm": 0.6938460091665332, "learning_rate": 4.952159558588303e-06, "loss": 0.312, "step": 3864 }, { "epoch": 0.18, "grad_norm": 0.6597777073439282, "learning_rate": 4.952122627588528e-06, "loss": 0.3306, "step": 3865 }, { "epoch": 0.18, "grad_norm": 0.6769885036773343, "learning_rate": 4.952085682477403e-06, "loss": 0.2908, "step": 3866 }, { "epoch": 0.18, "grad_norm": 0.615377393086395, "learning_rate": 4.952048723255142e-06, "loss": 0.2787, "step": 3867 }, { "epoch": 0.18, "grad_norm": 0.6823500917538109, "learning_rate": 4.952011749921956e-06, "loss": 0.3222, "step": 3868 }, { "epoch": 0.18, "grad_norm": 0.6354050521313009, "learning_rate": 4.951974762478059e-06, "loss": 0.3077, "step": 3869 }, { "epoch": 0.18, "grad_norm": 0.6806202103644033, "learning_rate": 4.951937760923664e-06, "loss": 0.3166, "step": 3870 }, { "epoch": 0.18, "grad_norm": 0.6367206229738928, "learning_rate": 4.9519007452589825e-06, "loss": 0.3212, "step": 3871 }, { "epoch": 0.18, "grad_norm": 0.6463868385554403, "learning_rate": 4.951863715484229e-06, "loss": 0.2996, "step": 3872 }, { "epoch": 0.18, "grad_norm": 0.6466340225324219, "learning_rate": 4.951826671599615e-06, "loss": 0.2732, "step": 3873 }, { "epoch": 0.18, "grad_norm": 0.7274143424091667, "learning_rate": 4.951789613605357e-06, "loss": 0.3142, "step": 3874 }, { "epoch": 0.18, "grad_norm": 0.728355485917629, "learning_rate": 4.951752541501664e-06, "loss": 0.3056, "step": 3875 }, { "epoch": 0.18, "grad_norm": 0.6674009017384275, "learning_rate": 4.951715455288753e-06, "loss": 0.3112, "step": 3876 }, { "epoch": 0.18, "grad_norm": 0.726311686067271, "learning_rate": 4.951678354966834e-06, "loss": 0.3102, "step": 3877 }, { "epoch": 0.18, "grad_norm": 0.6736735704077752, "learning_rate": 4.951641240536123e-06, "loss": 0.2943, "step": 3878 }, { "epoch": 0.18, "grad_norm": 0.7052039359066079, "learning_rate": 4.951604111996834e-06, "loss": 0.3052, "step": 3879 }, { "epoch": 0.18, "grad_norm": 0.6671803198342157, "learning_rate": 4.951566969349178e-06, "loss": 0.3132, "step": 3880 }, { "epoch": 0.18, "grad_norm": 0.6695452588130605, "learning_rate": 4.951529812593371e-06, "loss": 0.2968, "step": 3881 }, { "epoch": 0.18, "grad_norm": 0.7280653715166338, "learning_rate": 4.951492641729626e-06, "loss": 0.3292, "step": 3882 }, { "epoch": 0.18, "grad_norm": 0.6745466703776889, "learning_rate": 4.9514554567581565e-06, "loss": 0.3029, "step": 3883 }, { "epoch": 0.18, "grad_norm": 0.6522919310521477, "learning_rate": 4.9514182576791775e-06, "loss": 0.3149, "step": 3884 }, { "epoch": 0.18, "grad_norm": 0.6497961472866646, "learning_rate": 4.951381044492902e-06, "loss": 0.3075, "step": 3885 }, { "epoch": 0.18, "grad_norm": 0.6185731186432837, "learning_rate": 4.951343817199545e-06, "loss": 0.3002, "step": 3886 }, { "epoch": 0.18, "grad_norm": 0.6671718493302634, "learning_rate": 4.9513065757993214e-06, "loss": 0.3152, "step": 3887 }, { "epoch": 0.18, "grad_norm": 0.6494815246566065, "learning_rate": 4.951269320292444e-06, "loss": 0.3027, "step": 3888 }, { "epoch": 0.18, "grad_norm": 0.6669338507926097, "learning_rate": 4.9512320506791274e-06, "loss": 0.3089, "step": 3889 }, { "epoch": 0.18, "grad_norm": 0.6351302629814924, "learning_rate": 4.951194766959587e-06, "loss": 0.3015, "step": 3890 }, { "epoch": 0.18, "grad_norm": 0.6700979031512542, "learning_rate": 4.951157469134036e-06, "loss": 0.3162, "step": 3891 }, { "epoch": 0.18, "grad_norm": 0.6305412411511966, "learning_rate": 4.95112015720269e-06, "loss": 0.291, "step": 3892 }, { "epoch": 0.18, "grad_norm": 0.6782890032920235, "learning_rate": 4.951082831165764e-06, "loss": 0.3074, "step": 3893 }, { "epoch": 0.18, "grad_norm": 0.7301534223898111, "learning_rate": 4.951045491023473e-06, "loss": 0.3119, "step": 3894 }, { "epoch": 0.18, "grad_norm": 0.6703207274288979, "learning_rate": 4.95100813677603e-06, "loss": 0.3196, "step": 3895 }, { "epoch": 0.18, "grad_norm": 0.6176043434933147, "learning_rate": 4.9509707684236515e-06, "loss": 0.3018, "step": 3896 }, { "epoch": 0.18, "grad_norm": 0.6904681511935216, "learning_rate": 4.9509333859665525e-06, "loss": 0.3288, "step": 3897 }, { "epoch": 0.18, "grad_norm": 0.6843013261486285, "learning_rate": 4.950895989404948e-06, "loss": 0.2923, "step": 3898 }, { "epoch": 0.18, "grad_norm": 0.6206228135226102, "learning_rate": 4.950858578739053e-06, "loss": 0.3004, "step": 3899 }, { "epoch": 0.18, "grad_norm": 0.6711023439810306, "learning_rate": 4.950821153969082e-06, "loss": 0.3133, "step": 3900 }, { "epoch": 0.18, "grad_norm": 0.6918559695502714, "learning_rate": 4.950783715095252e-06, "loss": 0.3262, "step": 3901 }, { "epoch": 0.18, "grad_norm": 0.6785557586188156, "learning_rate": 4.9507462621177784e-06, "loss": 0.3124, "step": 3902 }, { "epoch": 0.18, "grad_norm": 0.6343779211041368, "learning_rate": 4.9507087950368744e-06, "loss": 0.2847, "step": 3903 }, { "epoch": 0.18, "grad_norm": 0.6201983031855963, "learning_rate": 4.950671313852758e-06, "loss": 0.2955, "step": 3904 }, { "epoch": 0.18, "grad_norm": 0.6579042764251729, "learning_rate": 4.950633818565645e-06, "loss": 0.283, "step": 3905 }, { "epoch": 0.18, "grad_norm": 0.7474216611349931, "learning_rate": 4.95059630917575e-06, "loss": 0.3304, "step": 3906 }, { "epoch": 0.18, "grad_norm": 0.6596008241851985, "learning_rate": 4.950558785683288e-06, "loss": 0.2985, "step": 3907 }, { "epoch": 0.18, "grad_norm": 0.6674766207453474, "learning_rate": 4.950521248088477e-06, "loss": 0.3087, "step": 3908 }, { "epoch": 0.18, "grad_norm": 0.6638535823202008, "learning_rate": 4.950483696391533e-06, "loss": 0.3013, "step": 3909 }, { "epoch": 0.18, "grad_norm": 0.6699366715538422, "learning_rate": 4.95044613059267e-06, "loss": 0.3267, "step": 3910 }, { "epoch": 0.18, "grad_norm": 0.708734058468736, "learning_rate": 4.9504085506921055e-06, "loss": 0.3075, "step": 3911 }, { "epoch": 0.18, "grad_norm": 0.6897765080175224, "learning_rate": 4.950370956690056e-06, "loss": 0.3223, "step": 3912 }, { "epoch": 0.18, "grad_norm": 0.6161319836414754, "learning_rate": 4.950333348586737e-06, "loss": 0.2924, "step": 3913 }, { "epoch": 0.18, "grad_norm": 0.6274886804667097, "learning_rate": 4.950295726382366e-06, "loss": 0.2947, "step": 3914 }, { "epoch": 0.18, "grad_norm": 0.6348230888850753, "learning_rate": 4.950258090077159e-06, "loss": 0.2945, "step": 3915 }, { "epoch": 0.18, "grad_norm": 0.6276787555918011, "learning_rate": 4.950220439671333e-06, "loss": 0.306, "step": 3916 }, { "epoch": 0.18, "grad_norm": 0.6968863726772772, "learning_rate": 4.950182775165103e-06, "loss": 0.3112, "step": 3917 }, { "epoch": 0.18, "grad_norm": 0.6162590271020141, "learning_rate": 4.950145096558687e-06, "loss": 0.3133, "step": 3918 }, { "epoch": 0.18, "grad_norm": 0.654240371819918, "learning_rate": 4.9501074038523025e-06, "loss": 0.2865, "step": 3919 }, { "epoch": 0.18, "grad_norm": 0.6586156446075343, "learning_rate": 4.950069697046166e-06, "loss": 0.3074, "step": 3920 }, { "epoch": 0.18, "grad_norm": 0.673953263719859, "learning_rate": 4.950031976140494e-06, "loss": 0.3289, "step": 3921 }, { "epoch": 0.18, "grad_norm": 0.66617159363869, "learning_rate": 4.949994241135503e-06, "loss": 0.2987, "step": 3922 }, { "epoch": 0.18, "grad_norm": 0.6681442070869108, "learning_rate": 4.9499564920314116e-06, "loss": 0.3042, "step": 3923 }, { "epoch": 0.18, "grad_norm": 0.6802211162063929, "learning_rate": 4.9499187288284355e-06, "loss": 0.3425, "step": 3924 }, { "epoch": 0.18, "grad_norm": 0.6423642298679146, "learning_rate": 4.949880951526794e-06, "loss": 0.2891, "step": 3925 }, { "epoch": 0.18, "grad_norm": 0.7183667521683209, "learning_rate": 4.949843160126703e-06, "loss": 0.3245, "step": 3926 }, { "epoch": 0.18, "grad_norm": 0.6604712370586171, "learning_rate": 4.94980535462838e-06, "loss": 0.286, "step": 3927 }, { "epoch": 0.18, "grad_norm": 0.5628237801749574, "learning_rate": 4.949767535032043e-06, "loss": 0.2696, "step": 3928 }, { "epoch": 0.18, "grad_norm": 0.6403467151595718, "learning_rate": 4.94972970133791e-06, "loss": 0.3128, "step": 3929 }, { "epoch": 0.18, "grad_norm": 0.7843278532005523, "learning_rate": 4.9496918535461976e-06, "loss": 0.3389, "step": 3930 }, { "epoch": 0.18, "grad_norm": 0.6514220323808496, "learning_rate": 4.9496539916571255e-06, "loss": 0.2954, "step": 3931 }, { "epoch": 0.18, "grad_norm": 0.6857185944474273, "learning_rate": 4.9496161156709095e-06, "loss": 0.3384, "step": 3932 }, { "epoch": 0.18, "grad_norm": 0.6132260224363009, "learning_rate": 4.949578225587769e-06, "loss": 0.2814, "step": 3933 }, { "epoch": 0.18, "grad_norm": 0.7417342912923326, "learning_rate": 4.949540321407921e-06, "loss": 0.3129, "step": 3934 }, { "epoch": 0.18, "grad_norm": 0.6048659469081206, "learning_rate": 4.949502403131583e-06, "loss": 0.3023, "step": 3935 }, { "epoch": 0.18, "grad_norm": 0.6472939072464807, "learning_rate": 4.949464470758976e-06, "loss": 0.2764, "step": 3936 }, { "epoch": 0.18, "grad_norm": 0.6876517521224476, "learning_rate": 4.949426524290316e-06, "loss": 0.311, "step": 3937 }, { "epoch": 0.18, "grad_norm": 0.6658593478998952, "learning_rate": 4.949388563725822e-06, "loss": 0.3225, "step": 3938 }, { "epoch": 0.18, "grad_norm": 0.6982193305215572, "learning_rate": 4.949350589065713e-06, "loss": 0.2987, "step": 3939 }, { "epoch": 0.18, "grad_norm": 0.6164802441838267, "learning_rate": 4.9493126003102065e-06, "loss": 0.3107, "step": 3940 }, { "epoch": 0.18, "grad_norm": 0.6636565395530204, "learning_rate": 4.9492745974595216e-06, "loss": 0.291, "step": 3941 }, { "epoch": 0.18, "grad_norm": 0.6606329858744575, "learning_rate": 4.949236580513877e-06, "loss": 0.3109, "step": 3942 }, { "epoch": 0.18, "grad_norm": 0.7042762367231757, "learning_rate": 4.949198549473492e-06, "loss": 0.2978, "step": 3943 }, { "epoch": 0.18, "grad_norm": 0.6610493125381501, "learning_rate": 4.9491605043385835e-06, "loss": 0.3137, "step": 3944 }, { "epoch": 0.18, "grad_norm": 0.6274154639802467, "learning_rate": 4.949122445109374e-06, "loss": 0.3092, "step": 3945 }, { "epoch": 0.18, "grad_norm": 0.660735061613187, "learning_rate": 4.949084371786078e-06, "loss": 0.2957, "step": 3946 }, { "epoch": 0.18, "grad_norm": 0.6505321994083009, "learning_rate": 4.949046284368919e-06, "loss": 0.3215, "step": 3947 }, { "epoch": 0.18, "grad_norm": 0.6437008629830809, "learning_rate": 4.949008182858113e-06, "loss": 0.2769, "step": 3948 }, { "epoch": 0.18, "grad_norm": 0.6733097953009933, "learning_rate": 4.948970067253881e-06, "loss": 0.3036, "step": 3949 }, { "epoch": 0.19, "grad_norm": 0.6278823893478783, "learning_rate": 4.948931937556442e-06, "loss": 0.3067, "step": 3950 }, { "epoch": 0.19, "grad_norm": 0.6196851503418588, "learning_rate": 4.948893793766014e-06, "loss": 0.3097, "step": 3951 }, { "epoch": 0.19, "grad_norm": 0.6648903368310763, "learning_rate": 4.948855635882819e-06, "loss": 0.3031, "step": 3952 }, { "epoch": 0.19, "grad_norm": 0.7269564843448098, "learning_rate": 4.948817463907074e-06, "loss": 0.3294, "step": 3953 }, { "epoch": 0.19, "grad_norm": 0.6385120338989279, "learning_rate": 4.9487792778390014e-06, "loss": 0.3046, "step": 3954 }, { "epoch": 0.19, "grad_norm": 0.6247868509098149, "learning_rate": 4.948741077678819e-06, "loss": 0.2893, "step": 3955 }, { "epoch": 0.19, "grad_norm": 0.6205482615380392, "learning_rate": 4.9487028634267475e-06, "loss": 0.2889, "step": 3956 }, { "epoch": 0.19, "grad_norm": 0.6799878664434692, "learning_rate": 4.948664635083006e-06, "loss": 0.3115, "step": 3957 }, { "epoch": 0.19, "grad_norm": 0.6635373522391816, "learning_rate": 4.948626392647815e-06, "loss": 0.3076, "step": 3958 }, { "epoch": 0.19, "grad_norm": 0.6509507162613966, "learning_rate": 4.948588136121395e-06, "loss": 0.2884, "step": 3959 }, { "epoch": 0.19, "grad_norm": 0.6136343003141659, "learning_rate": 4.948549865503965e-06, "loss": 0.2994, "step": 3960 }, { "epoch": 0.19, "grad_norm": 0.609053921138647, "learning_rate": 4.948511580795746e-06, "loss": 0.293, "step": 3961 }, { "epoch": 0.19, "grad_norm": 0.6538497259839825, "learning_rate": 4.948473281996959e-06, "loss": 0.2834, "step": 3962 }, { "epoch": 0.19, "grad_norm": 0.6728093679517096, "learning_rate": 4.948434969107822e-06, "loss": 0.3063, "step": 3963 }, { "epoch": 0.19, "grad_norm": 0.6528870502854779, "learning_rate": 4.948396642128559e-06, "loss": 0.3232, "step": 3964 }, { "epoch": 0.19, "grad_norm": 0.6669238214821401, "learning_rate": 4.948358301059388e-06, "loss": 0.308, "step": 3965 }, { "epoch": 0.19, "grad_norm": 0.6514497087307065, "learning_rate": 4.94831994590053e-06, "loss": 0.3024, "step": 3966 }, { "epoch": 0.19, "grad_norm": 0.6107176695038865, "learning_rate": 4.9482815766522075e-06, "loss": 0.2855, "step": 3967 }, { "epoch": 0.19, "grad_norm": 0.6427523272484069, "learning_rate": 4.948243193314639e-06, "loss": 0.3127, "step": 3968 }, { "epoch": 0.19, "grad_norm": 0.6300986836126059, "learning_rate": 4.9482047958880455e-06, "loss": 0.305, "step": 3969 }, { "epoch": 0.19, "grad_norm": 0.6741435910380809, "learning_rate": 4.94816638437265e-06, "loss": 0.3192, "step": 3970 }, { "epoch": 0.19, "grad_norm": 0.7235803508441121, "learning_rate": 4.9481279587686715e-06, "loss": 0.3143, "step": 3971 }, { "epoch": 0.19, "grad_norm": 0.6826236383347479, "learning_rate": 4.948089519076332e-06, "loss": 0.3004, "step": 3972 }, { "epoch": 0.19, "grad_norm": 0.6606006308986659, "learning_rate": 4.9480510652958525e-06, "loss": 0.2941, "step": 3973 }, { "epoch": 0.19, "grad_norm": 0.6927407078450962, "learning_rate": 4.948012597427455e-06, "loss": 0.2944, "step": 3974 }, { "epoch": 0.19, "grad_norm": 0.6861844239786287, "learning_rate": 4.947974115471359e-06, "loss": 0.3216, "step": 3975 }, { "epoch": 0.19, "grad_norm": 0.7011065258090498, "learning_rate": 4.947935619427788e-06, "loss": 0.2998, "step": 3976 }, { "epoch": 0.19, "grad_norm": 0.680250596621615, "learning_rate": 4.947897109296963e-06, "loss": 0.3195, "step": 3977 }, { "epoch": 0.19, "grad_norm": 0.66128836062599, "learning_rate": 4.947858585079106e-06, "loss": 0.3305, "step": 3978 }, { "epoch": 0.19, "grad_norm": 0.6818930733972258, "learning_rate": 4.947820046774437e-06, "loss": 0.3241, "step": 3979 }, { "epoch": 0.19, "grad_norm": 0.6642742919001419, "learning_rate": 4.947781494383179e-06, "loss": 0.3009, "step": 3980 }, { "epoch": 0.19, "grad_norm": 0.6345695918561203, "learning_rate": 4.947742927905554e-06, "loss": 0.2976, "step": 3981 }, { "epoch": 0.19, "grad_norm": 0.6656821151431106, "learning_rate": 4.9477043473417844e-06, "loss": 0.3099, "step": 3982 }, { "epoch": 0.19, "grad_norm": 0.6930919262497441, "learning_rate": 4.94766575269209e-06, "loss": 0.3037, "step": 3983 }, { "epoch": 0.19, "grad_norm": 0.666885965327191, "learning_rate": 4.9476271439566955e-06, "loss": 0.2847, "step": 3984 }, { "epoch": 0.19, "grad_norm": 0.679000538353933, "learning_rate": 4.947588521135821e-06, "loss": 0.3216, "step": 3985 }, { "epoch": 0.19, "grad_norm": 0.6463745145614769, "learning_rate": 4.947549884229691e-06, "loss": 0.3003, "step": 3986 }, { "epoch": 0.19, "grad_norm": 0.7438162171474378, "learning_rate": 4.947511233238525e-06, "loss": 0.3105, "step": 3987 }, { "epoch": 0.19, "grad_norm": 0.6591293845092312, "learning_rate": 4.947472568162548e-06, "loss": 0.3197, "step": 3988 }, { "epoch": 0.19, "grad_norm": 0.5979033318288396, "learning_rate": 4.947433889001982e-06, "loss": 0.2837, "step": 3989 }, { "epoch": 0.19, "grad_norm": 0.647753163181197, "learning_rate": 4.947395195757049e-06, "loss": 0.291, "step": 3990 }, { "epoch": 0.19, "grad_norm": 0.6909971849241102, "learning_rate": 4.947356488427971e-06, "loss": 0.3181, "step": 3991 }, { "epoch": 0.19, "grad_norm": 0.6340257628027509, "learning_rate": 4.947317767014972e-06, "loss": 0.3032, "step": 3992 }, { "epoch": 0.19, "grad_norm": 0.6279732045125143, "learning_rate": 4.947279031518274e-06, "loss": 0.2908, "step": 3993 }, { "epoch": 0.19, "grad_norm": 0.6781615031972513, "learning_rate": 4.947240281938101e-06, "loss": 0.3104, "step": 3994 }, { "epoch": 0.19, "grad_norm": 0.6874031210042395, "learning_rate": 4.947201518274674e-06, "loss": 0.3058, "step": 3995 }, { "epoch": 0.19, "grad_norm": 0.6202072521330736, "learning_rate": 4.947162740528219e-06, "loss": 0.2956, "step": 3996 }, { "epoch": 0.19, "grad_norm": 0.736186472666844, "learning_rate": 4.947123948698956e-06, "loss": 0.3043, "step": 3997 }, { "epoch": 0.19, "grad_norm": 0.6804924012149006, "learning_rate": 4.947085142787111e-06, "loss": 0.3092, "step": 3998 }, { "epoch": 0.19, "grad_norm": 0.7002335267262974, "learning_rate": 4.9470463227929045e-06, "loss": 0.3125, "step": 3999 }, { "epoch": 0.19, "grad_norm": 0.6948965760608342, "learning_rate": 4.947007488716562e-06, "loss": 0.3003, "step": 4000 }, { "epoch": 0.19, "grad_norm": 0.6463040354037872, "learning_rate": 4.946968640558307e-06, "loss": 0.3056, "step": 4001 }, { "epoch": 0.19, "grad_norm": 0.6696435940729352, "learning_rate": 4.946929778318363e-06, "loss": 0.3103, "step": 4002 }, { "epoch": 0.19, "grad_norm": 0.6123970071735837, "learning_rate": 4.946890901996952e-06, "loss": 0.2952, "step": 4003 }, { "epoch": 0.19, "grad_norm": 0.6731440222283925, "learning_rate": 4.946852011594299e-06, "loss": 0.286, "step": 4004 }, { "epoch": 0.19, "grad_norm": 0.7086137582180966, "learning_rate": 4.9468131071106285e-06, "loss": 0.2967, "step": 4005 }, { "epoch": 0.19, "grad_norm": 0.6299162955026192, "learning_rate": 4.946774188546163e-06, "loss": 0.3012, "step": 4006 }, { "epoch": 0.19, "grad_norm": 0.5828398978130501, "learning_rate": 4.946735255901127e-06, "loss": 0.2745, "step": 4007 }, { "epoch": 0.19, "grad_norm": 0.6690716612590937, "learning_rate": 4.9466963091757446e-06, "loss": 0.3345, "step": 4008 }, { "epoch": 0.19, "grad_norm": 0.6685616092090618, "learning_rate": 4.946657348370239e-06, "loss": 0.3056, "step": 4009 }, { "epoch": 0.19, "grad_norm": 0.6270102849636614, "learning_rate": 4.946618373484836e-06, "loss": 0.2965, "step": 4010 }, { "epoch": 0.19, "grad_norm": 0.6203807049692027, "learning_rate": 4.9465793845197606e-06, "loss": 0.2975, "step": 4011 }, { "epoch": 0.19, "grad_norm": 0.6492791792665498, "learning_rate": 4.946540381475234e-06, "loss": 0.2884, "step": 4012 }, { "epoch": 0.19, "grad_norm": 0.631858564022216, "learning_rate": 4.9465013643514825e-06, "loss": 0.3111, "step": 4013 }, { "epoch": 0.19, "grad_norm": 0.6241410734656001, "learning_rate": 4.946462333148732e-06, "loss": 0.3025, "step": 4014 }, { "epoch": 0.19, "grad_norm": 0.647680396361955, "learning_rate": 4.946423287867204e-06, "loss": 0.3065, "step": 4015 }, { "epoch": 0.19, "grad_norm": 0.6958073893635972, "learning_rate": 4.946384228507126e-06, "loss": 0.2937, "step": 4016 }, { "epoch": 0.19, "grad_norm": 1.056434764257981, "learning_rate": 4.946345155068721e-06, "loss": 0.306, "step": 4017 }, { "epoch": 0.19, "grad_norm": 0.6399977984100117, "learning_rate": 4.946306067552214e-06, "loss": 0.3038, "step": 4018 }, { "epoch": 0.19, "grad_norm": 0.6407805815432042, "learning_rate": 4.9462669659578315e-06, "loss": 0.2923, "step": 4019 }, { "epoch": 0.19, "grad_norm": 0.617666967227774, "learning_rate": 4.946227850285798e-06, "loss": 0.286, "step": 4020 }, { "epoch": 0.19, "grad_norm": 0.7114699989071811, "learning_rate": 4.946188720536337e-06, "loss": 0.3251, "step": 4021 }, { "epoch": 0.19, "grad_norm": 0.6719297303330776, "learning_rate": 4.946149576709675e-06, "loss": 0.2959, "step": 4022 }, { "epoch": 0.19, "grad_norm": 0.7231490451272335, "learning_rate": 4.946110418806036e-06, "loss": 0.31, "step": 4023 }, { "epoch": 0.19, "grad_norm": 0.6795599188492125, "learning_rate": 4.946071246825648e-06, "loss": 0.3053, "step": 4024 }, { "epoch": 0.19, "grad_norm": 0.6371725011861904, "learning_rate": 4.946032060768734e-06, "loss": 0.3239, "step": 4025 }, { "epoch": 0.19, "grad_norm": 0.6505441789227641, "learning_rate": 4.945992860635519e-06, "loss": 0.3104, "step": 4026 }, { "epoch": 0.19, "grad_norm": 0.6883772884317736, "learning_rate": 4.945953646426232e-06, "loss": 0.2994, "step": 4027 }, { "epoch": 0.19, "grad_norm": 0.7463740351335906, "learning_rate": 4.945914418141095e-06, "loss": 0.2942, "step": 4028 }, { "epoch": 0.19, "grad_norm": 0.6876262772221503, "learning_rate": 4.9458751757803365e-06, "loss": 0.2927, "step": 4029 }, { "epoch": 0.19, "grad_norm": 0.6565757172426933, "learning_rate": 4.9458359193441805e-06, "loss": 0.3229, "step": 4030 }, { "epoch": 0.19, "grad_norm": 0.6764059421219064, "learning_rate": 4.9457966488328535e-06, "loss": 0.3279, "step": 4031 }, { "epoch": 0.19, "grad_norm": 0.609052778819772, "learning_rate": 4.9457573642465815e-06, "loss": 0.2966, "step": 4032 }, { "epoch": 0.19, "grad_norm": 0.6849800503278759, "learning_rate": 4.945718065585591e-06, "loss": 0.3193, "step": 4033 }, { "epoch": 0.19, "grad_norm": 0.7085945242467979, "learning_rate": 4.945678752850107e-06, "loss": 0.3001, "step": 4034 }, { "epoch": 0.19, "grad_norm": 0.6711784788386089, "learning_rate": 4.945639426040357e-06, "loss": 0.2923, "step": 4035 }, { "epoch": 0.19, "grad_norm": 0.6762028364639064, "learning_rate": 4.945600085156566e-06, "loss": 0.3163, "step": 4036 }, { "epoch": 0.19, "grad_norm": 0.615892860267046, "learning_rate": 4.945560730198963e-06, "loss": 0.2957, "step": 4037 }, { "epoch": 0.19, "grad_norm": 0.6615497537142055, "learning_rate": 4.945521361167771e-06, "loss": 0.3179, "step": 4038 }, { "epoch": 0.19, "grad_norm": 0.6779255772696079, "learning_rate": 4.945481978063219e-06, "loss": 0.3119, "step": 4039 }, { "epoch": 0.19, "grad_norm": 0.642064766588064, "learning_rate": 4.945442580885533e-06, "loss": 0.2996, "step": 4040 }, { "epoch": 0.19, "grad_norm": 0.6242205474600128, "learning_rate": 4.945403169634939e-06, "loss": 0.2994, "step": 4041 }, { "epoch": 0.19, "grad_norm": 0.6770770228249772, "learning_rate": 4.945363744311664e-06, "loss": 0.2996, "step": 4042 }, { "epoch": 0.19, "grad_norm": 0.6015218119097001, "learning_rate": 4.945324304915936e-06, "loss": 0.2864, "step": 4043 }, { "epoch": 0.19, "grad_norm": 0.6162962689049805, "learning_rate": 4.9452848514479814e-06, "loss": 0.2846, "step": 4044 }, { "epoch": 0.19, "grad_norm": 0.6238779990436835, "learning_rate": 4.9452453839080275e-06, "loss": 0.2842, "step": 4045 }, { "epoch": 0.19, "grad_norm": 0.6500167559235598, "learning_rate": 4.9452059022963e-06, "loss": 0.2909, "step": 4046 }, { "epoch": 0.19, "grad_norm": 0.6809930227127255, "learning_rate": 4.945166406613027e-06, "loss": 0.3184, "step": 4047 }, { "epoch": 0.19, "grad_norm": 0.5817051882442985, "learning_rate": 4.945126896858436e-06, "loss": 0.2917, "step": 4048 }, { "epoch": 0.19, "grad_norm": 0.619619665319572, "learning_rate": 4.945087373032755e-06, "loss": 0.2975, "step": 4049 }, { "epoch": 0.19, "grad_norm": 0.6715203322094637, "learning_rate": 4.945047835136211e-06, "loss": 0.323, "step": 4050 }, { "epoch": 0.19, "grad_norm": 0.6907206935431491, "learning_rate": 4.94500828316903e-06, "loss": 0.2982, "step": 4051 }, { "epoch": 0.19, "grad_norm": 0.6313029643350748, "learning_rate": 4.944968717131441e-06, "loss": 0.3026, "step": 4052 }, { "epoch": 0.19, "grad_norm": 0.7186612696828485, "learning_rate": 4.944929137023672e-06, "loss": 0.2854, "step": 4053 }, { "epoch": 0.19, "grad_norm": 0.719452369747068, "learning_rate": 4.944889542845951e-06, "loss": 0.3223, "step": 4054 }, { "epoch": 0.19, "grad_norm": 0.7138271316447846, "learning_rate": 4.944849934598504e-06, "loss": 0.2986, "step": 4055 }, { "epoch": 0.19, "grad_norm": 0.6695386538215767, "learning_rate": 4.94481031228156e-06, "loss": 0.2857, "step": 4056 }, { "epoch": 0.19, "grad_norm": 0.6309283869072212, "learning_rate": 4.944770675895349e-06, "loss": 0.2808, "step": 4057 }, { "epoch": 0.19, "grad_norm": 0.6566257093179256, "learning_rate": 4.944731025440095e-06, "loss": 0.3099, "step": 4058 }, { "epoch": 0.19, "grad_norm": 0.5895292717749633, "learning_rate": 4.94469136091603e-06, "loss": 0.2954, "step": 4059 }, { "epoch": 0.19, "grad_norm": 0.6968774191750897, "learning_rate": 4.9446516823233795e-06, "loss": 0.3106, "step": 4060 }, { "epoch": 0.19, "grad_norm": 0.6660930437586527, "learning_rate": 4.944611989662373e-06, "loss": 0.3127, "step": 4061 }, { "epoch": 0.19, "grad_norm": 0.6777853319064056, "learning_rate": 4.94457228293324e-06, "loss": 0.2962, "step": 4062 }, { "epoch": 0.19, "grad_norm": 0.6647785041910057, "learning_rate": 4.944532562136207e-06, "loss": 0.3226, "step": 4063 }, { "epoch": 0.19, "grad_norm": 0.6649997435454427, "learning_rate": 4.944492827271504e-06, "loss": 0.2905, "step": 4064 }, { "epoch": 0.19, "grad_norm": 0.650592516958424, "learning_rate": 4.94445307833936e-06, "loss": 0.2987, "step": 4065 }, { "epoch": 0.19, "grad_norm": 0.6873429099878308, "learning_rate": 4.944413315340001e-06, "loss": 0.2991, "step": 4066 }, { "epoch": 0.19, "grad_norm": 0.7129572857755725, "learning_rate": 4.944373538273659e-06, "loss": 0.3228, "step": 4067 }, { "epoch": 0.19, "grad_norm": 0.720066914269198, "learning_rate": 4.944333747140562e-06, "loss": 0.3169, "step": 4068 }, { "epoch": 0.19, "grad_norm": 0.7173104338697783, "learning_rate": 4.944293941940938e-06, "loss": 0.3262, "step": 4069 }, { "epoch": 0.19, "grad_norm": 0.6245944918113665, "learning_rate": 4.944254122675016e-06, "loss": 0.3258, "step": 4070 }, { "epoch": 0.19, "grad_norm": 0.6506344222223426, "learning_rate": 4.944214289343027e-06, "loss": 0.3063, "step": 4071 }, { "epoch": 0.19, "grad_norm": 0.6766487589683423, "learning_rate": 4.944174441945199e-06, "loss": 0.2819, "step": 4072 }, { "epoch": 0.19, "grad_norm": 0.6477755603289902, "learning_rate": 4.9441345804817605e-06, "loss": 0.3015, "step": 4073 }, { "epoch": 0.19, "grad_norm": 0.6782299151274053, "learning_rate": 4.9440947049529435e-06, "loss": 0.3003, "step": 4074 }, { "epoch": 0.19, "grad_norm": 0.6478883310019935, "learning_rate": 4.944054815358974e-06, "loss": 0.3004, "step": 4075 }, { "epoch": 0.19, "grad_norm": 0.6388899429772594, "learning_rate": 4.944014911700085e-06, "loss": 0.2902, "step": 4076 }, { "epoch": 0.19, "grad_norm": 0.6508246997724356, "learning_rate": 4.943974993976503e-06, "loss": 0.3207, "step": 4077 }, { "epoch": 0.19, "grad_norm": 0.7100455383878176, "learning_rate": 4.9439350621884595e-06, "loss": 0.3021, "step": 4078 }, { "epoch": 0.19, "grad_norm": 0.6261266849415391, "learning_rate": 4.943895116336184e-06, "loss": 0.2764, "step": 4079 }, { "epoch": 0.19, "grad_norm": 0.6528802091460985, "learning_rate": 4.943855156419907e-06, "loss": 0.3208, "step": 4080 }, { "epoch": 0.19, "grad_norm": 0.6255832837263752, "learning_rate": 4.943815182439858e-06, "loss": 0.2945, "step": 4081 }, { "epoch": 0.19, "grad_norm": 0.6254182695441898, "learning_rate": 4.943775194396265e-06, "loss": 0.2979, "step": 4082 }, { "epoch": 0.19, "grad_norm": 0.6206138023161015, "learning_rate": 4.943735192289361e-06, "loss": 0.3112, "step": 4083 }, { "epoch": 0.19, "grad_norm": 0.644868334251421, "learning_rate": 4.943695176119376e-06, "loss": 0.3137, "step": 4084 }, { "epoch": 0.19, "grad_norm": 0.717473932110518, "learning_rate": 4.943655145886539e-06, "loss": 0.3074, "step": 4085 }, { "epoch": 0.19, "grad_norm": 0.6050138296431044, "learning_rate": 4.94361510159108e-06, "loss": 0.2992, "step": 4086 }, { "epoch": 0.19, "grad_norm": 0.6148089838385821, "learning_rate": 4.943575043233231e-06, "loss": 0.2825, "step": 4087 }, { "epoch": 0.19, "grad_norm": 0.6769806335652231, "learning_rate": 4.943534970813222e-06, "loss": 0.2996, "step": 4088 }, { "epoch": 0.19, "grad_norm": 0.6581465905534803, "learning_rate": 4.943494884331282e-06, "loss": 0.2951, "step": 4089 }, { "epoch": 0.19, "grad_norm": 0.6394452451495053, "learning_rate": 4.943454783787644e-06, "loss": 0.3119, "step": 4090 }, { "epoch": 0.19, "grad_norm": 0.637693307439015, "learning_rate": 4.943414669182539e-06, "loss": 0.2985, "step": 4091 }, { "epoch": 0.19, "grad_norm": 0.6185736486519982, "learning_rate": 4.943374540516196e-06, "loss": 0.2951, "step": 4092 }, { "epoch": 0.19, "grad_norm": 0.7137331448637823, "learning_rate": 4.943334397788846e-06, "loss": 0.3214, "step": 4093 }, { "epoch": 0.19, "grad_norm": 0.6729389933093118, "learning_rate": 4.943294241000721e-06, "loss": 0.2974, "step": 4094 }, { "epoch": 0.19, "grad_norm": 0.6544911240689266, "learning_rate": 4.943254070152052e-06, "loss": 0.323, "step": 4095 }, { "epoch": 0.19, "grad_norm": 0.6307486287723785, "learning_rate": 4.94321388524307e-06, "loss": 0.2934, "step": 4096 }, { "epoch": 0.19, "grad_norm": 0.6826314206008418, "learning_rate": 4.943173686274005e-06, "loss": 0.2958, "step": 4097 }, { "epoch": 0.19, "grad_norm": 0.7167698642439724, "learning_rate": 4.943133473245091e-06, "loss": 0.3129, "step": 4098 }, { "epoch": 0.19, "grad_norm": 0.7264539515881033, "learning_rate": 4.9430932461565575e-06, "loss": 0.3059, "step": 4099 }, { "epoch": 0.19, "grad_norm": 0.61644830990953, "learning_rate": 4.943053005008635e-06, "loss": 0.3075, "step": 4100 }, { "epoch": 0.19, "grad_norm": 0.6347567861706847, "learning_rate": 4.943012749801559e-06, "loss": 0.2985, "step": 4101 }, { "epoch": 0.19, "grad_norm": 0.665240038940821, "learning_rate": 4.942972480535557e-06, "loss": 0.2739, "step": 4102 }, { "epoch": 0.19, "grad_norm": 0.6340101727567423, "learning_rate": 4.9429321972108624e-06, "loss": 0.2863, "step": 4103 }, { "epoch": 0.19, "grad_norm": 0.6747082834588, "learning_rate": 4.942891899827708e-06, "loss": 0.3038, "step": 4104 }, { "epoch": 0.19, "grad_norm": 0.6503186116087462, "learning_rate": 4.942851588386324e-06, "loss": 0.3108, "step": 4105 }, { "epoch": 0.19, "grad_norm": 0.6415081072712722, "learning_rate": 4.9428112628869425e-06, "loss": 0.2858, "step": 4106 }, { "epoch": 0.19, "grad_norm": 0.7003380004804693, "learning_rate": 4.942770923329797e-06, "loss": 0.3224, "step": 4107 }, { "epoch": 0.19, "grad_norm": 0.6326384474411696, "learning_rate": 4.942730569715119e-06, "loss": 0.2943, "step": 4108 }, { "epoch": 0.19, "grad_norm": 0.6198777569007651, "learning_rate": 4.94269020204314e-06, "loss": 0.2945, "step": 4109 }, { "epoch": 0.19, "grad_norm": 0.6284866463330124, "learning_rate": 4.942649820314092e-06, "loss": 0.3222, "step": 4110 }, { "epoch": 0.19, "grad_norm": 0.6614469782428811, "learning_rate": 4.94260942452821e-06, "loss": 0.3133, "step": 4111 }, { "epoch": 0.19, "grad_norm": 0.6977629948004583, "learning_rate": 4.942569014685724e-06, "loss": 0.3088, "step": 4112 }, { "epoch": 0.19, "grad_norm": 0.6673090849165911, "learning_rate": 4.942528590786867e-06, "loss": 0.2892, "step": 4113 }, { "epoch": 0.19, "grad_norm": 0.6365463660019918, "learning_rate": 4.942488152831873e-06, "loss": 0.2811, "step": 4114 }, { "epoch": 0.19, "grad_norm": 0.6797860676624364, "learning_rate": 4.942447700820972e-06, "loss": 0.3105, "step": 4115 }, { "epoch": 0.19, "grad_norm": 0.6266442883566442, "learning_rate": 4.942407234754399e-06, "loss": 0.2916, "step": 4116 }, { "epoch": 0.19, "grad_norm": 0.6588311076644136, "learning_rate": 4.942366754632386e-06, "loss": 0.3035, "step": 4117 }, { "epoch": 0.19, "grad_norm": 0.6836797319363849, "learning_rate": 4.942326260455167e-06, "loss": 0.2843, "step": 4118 }, { "epoch": 0.19, "grad_norm": 0.6372033311516753, "learning_rate": 4.942285752222973e-06, "loss": 0.3115, "step": 4119 }, { "epoch": 0.19, "grad_norm": 0.6079992250321575, "learning_rate": 4.942245229936039e-06, "loss": 0.2982, "step": 4120 }, { "epoch": 0.19, "grad_norm": 0.6532451587724025, "learning_rate": 4.9422046935945975e-06, "loss": 0.306, "step": 4121 }, { "epoch": 0.19, "grad_norm": 0.6485491975665721, "learning_rate": 4.942164143198882e-06, "loss": 0.3029, "step": 4122 }, { "epoch": 0.19, "grad_norm": 0.6207974525798321, "learning_rate": 4.942123578749125e-06, "loss": 0.3092, "step": 4123 }, { "epoch": 0.19, "grad_norm": 0.6202883485558583, "learning_rate": 4.9420830002455615e-06, "loss": 0.279, "step": 4124 }, { "epoch": 0.19, "grad_norm": 0.6517647535691435, "learning_rate": 4.942042407688423e-06, "loss": 0.306, "step": 4125 }, { "epoch": 0.19, "grad_norm": 0.6752871402619917, "learning_rate": 4.942001801077946e-06, "loss": 0.3121, "step": 4126 }, { "epoch": 0.19, "grad_norm": 0.6187110045047849, "learning_rate": 4.9419611804143605e-06, "loss": 0.308, "step": 4127 }, { "epoch": 0.19, "grad_norm": 0.6815090129746666, "learning_rate": 4.941920545697904e-06, "loss": 0.3138, "step": 4128 }, { "epoch": 0.19, "grad_norm": 0.6686087539565762, "learning_rate": 4.941879896928807e-06, "loss": 0.3061, "step": 4129 }, { "epoch": 0.19, "grad_norm": 0.660355084307782, "learning_rate": 4.941839234107305e-06, "loss": 0.3259, "step": 4130 }, { "epoch": 0.19, "grad_norm": 0.6200241907585098, "learning_rate": 4.941798557233633e-06, "loss": 0.3101, "step": 4131 }, { "epoch": 0.19, "grad_norm": 0.6398798407568975, "learning_rate": 4.941757866308024e-06, "loss": 0.2946, "step": 4132 }, { "epoch": 0.19, "grad_norm": 0.6895995411503758, "learning_rate": 4.941717161330712e-06, "loss": 0.2974, "step": 4133 }, { "epoch": 0.19, "grad_norm": 0.6511794848651576, "learning_rate": 4.94167644230193e-06, "loss": 0.3122, "step": 4134 }, { "epoch": 0.19, "grad_norm": 0.6322237345767258, "learning_rate": 4.941635709221915e-06, "loss": 0.3064, "step": 4135 }, { "epoch": 0.19, "grad_norm": 0.6728204013705917, "learning_rate": 4.9415949620909e-06, "loss": 0.3142, "step": 4136 }, { "epoch": 0.19, "grad_norm": 0.6343797521622473, "learning_rate": 4.94155420090912e-06, "loss": 0.3017, "step": 4137 }, { "epoch": 0.19, "grad_norm": 0.6402992914631411, "learning_rate": 4.941513425676808e-06, "loss": 0.2826, "step": 4138 }, { "epoch": 0.19, "grad_norm": 0.7328090466642322, "learning_rate": 4.941472636394201e-06, "loss": 0.323, "step": 4139 }, { "epoch": 0.19, "grad_norm": 0.7305690229067504, "learning_rate": 4.941431833061533e-06, "loss": 0.3111, "step": 4140 }, { "epoch": 0.19, "grad_norm": 0.632051322063656, "learning_rate": 4.941391015679038e-06, "loss": 0.3171, "step": 4141 }, { "epoch": 0.19, "grad_norm": 0.631831517818316, "learning_rate": 4.941350184246951e-06, "loss": 0.3097, "step": 4142 }, { "epoch": 0.19, "grad_norm": 0.6842786157734789, "learning_rate": 4.941309338765508e-06, "loss": 0.2954, "step": 4143 }, { "epoch": 0.19, "grad_norm": 0.604713092602542, "learning_rate": 4.941268479234942e-06, "loss": 0.2933, "step": 4144 }, { "epoch": 0.19, "grad_norm": 0.6510344189426738, "learning_rate": 4.94122760565549e-06, "loss": 0.3076, "step": 4145 }, { "epoch": 0.19, "grad_norm": 0.608393534720883, "learning_rate": 4.941186718027388e-06, "loss": 0.2987, "step": 4146 }, { "epoch": 0.19, "grad_norm": 0.6590722500234707, "learning_rate": 4.941145816350868e-06, "loss": 0.3047, "step": 4147 }, { "epoch": 0.19, "grad_norm": 0.6935630475154986, "learning_rate": 4.941104900626169e-06, "loss": 0.292, "step": 4148 }, { "epoch": 0.19, "grad_norm": 0.6673052518764858, "learning_rate": 4.941063970853524e-06, "loss": 0.3162, "step": 4149 }, { "epoch": 0.19, "grad_norm": 0.6835645274666473, "learning_rate": 4.94102302703317e-06, "loss": 0.3222, "step": 4150 }, { "epoch": 0.19, "grad_norm": 0.6086800216153062, "learning_rate": 4.9409820691653415e-06, "loss": 0.3002, "step": 4151 }, { "epoch": 0.19, "grad_norm": 0.5897163008580615, "learning_rate": 4.940941097250274e-06, "loss": 0.2792, "step": 4152 }, { "epoch": 0.19, "grad_norm": 0.6830898621621222, "learning_rate": 4.940900111288206e-06, "loss": 0.2904, "step": 4153 }, { "epoch": 0.19, "grad_norm": 0.6441274599804213, "learning_rate": 4.94085911127937e-06, "loss": 0.3083, "step": 4154 }, { "epoch": 0.19, "grad_norm": 0.6786433431709745, "learning_rate": 4.940818097224004e-06, "loss": 0.3203, "step": 4155 }, { "epoch": 0.19, "grad_norm": 0.687524983564155, "learning_rate": 4.940777069122342e-06, "loss": 0.3092, "step": 4156 }, { "epoch": 0.19, "grad_norm": 0.643311954373158, "learning_rate": 4.940736026974623e-06, "loss": 0.3085, "step": 4157 }, { "epoch": 0.19, "grad_norm": 0.6607644930624998, "learning_rate": 4.9406949707810806e-06, "loss": 0.2957, "step": 4158 }, { "epoch": 0.19, "grad_norm": 0.684191828013537, "learning_rate": 4.940653900541952e-06, "loss": 0.3009, "step": 4159 }, { "epoch": 0.19, "grad_norm": 0.6653989104858317, "learning_rate": 4.940612816257474e-06, "loss": 0.2911, "step": 4160 }, { "epoch": 0.19, "grad_norm": 0.6308129916413564, "learning_rate": 4.9405717179278835e-06, "loss": 0.308, "step": 4161 }, { "epoch": 0.19, "grad_norm": 0.6482944033951525, "learning_rate": 4.940530605553415e-06, "loss": 0.2844, "step": 4162 }, { "epoch": 0.2, "grad_norm": 0.6418853123557304, "learning_rate": 4.940489479134306e-06, "loss": 0.3134, "step": 4163 }, { "epoch": 0.2, "grad_norm": 0.6764277085371114, "learning_rate": 4.940448338670795e-06, "loss": 0.3012, "step": 4164 }, { "epoch": 0.2, "grad_norm": 0.6243043328637422, "learning_rate": 4.9404071841631165e-06, "loss": 0.2717, "step": 4165 }, { "epoch": 0.2, "grad_norm": 0.6654355120627087, "learning_rate": 4.940366015611507e-06, "loss": 0.2936, "step": 4166 }, { "epoch": 0.2, "grad_norm": 0.6611542917848251, "learning_rate": 4.940324833016206e-06, "loss": 0.3127, "step": 4167 }, { "epoch": 0.2, "grad_norm": 0.6534500527204972, "learning_rate": 4.9402836363774475e-06, "loss": 0.3079, "step": 4168 }, { "epoch": 0.2, "grad_norm": 0.6683199140497549, "learning_rate": 4.940242425695471e-06, "loss": 0.3021, "step": 4169 }, { "epoch": 0.2, "grad_norm": 0.6556015954203259, "learning_rate": 4.940201200970512e-06, "loss": 0.3242, "step": 4170 }, { "epoch": 0.2, "grad_norm": 0.6884579252000916, "learning_rate": 4.940159962202809e-06, "loss": 0.3051, "step": 4171 }, { "epoch": 0.2, "grad_norm": 0.6048085254247872, "learning_rate": 4.9401187093925984e-06, "loss": 0.3022, "step": 4172 }, { "epoch": 0.2, "grad_norm": 0.6245738594493908, "learning_rate": 4.940077442540118e-06, "loss": 0.2927, "step": 4173 }, { "epoch": 0.2, "grad_norm": 0.5928165431992007, "learning_rate": 4.9400361616456055e-06, "loss": 0.2805, "step": 4174 }, { "epoch": 0.2, "grad_norm": 0.6732288968905489, "learning_rate": 4.939994866709298e-06, "loss": 0.2958, "step": 4175 }, { "epoch": 0.2, "grad_norm": 0.660680666019072, "learning_rate": 4.9399535577314326e-06, "loss": 0.2927, "step": 4176 }, { "epoch": 0.2, "grad_norm": 0.6833967372070826, "learning_rate": 4.939912234712249e-06, "loss": 0.3023, "step": 4177 }, { "epoch": 0.2, "grad_norm": 0.6969214720327701, "learning_rate": 4.939870897651983e-06, "loss": 0.3225, "step": 4178 }, { "epoch": 0.2, "grad_norm": 0.650058137568874, "learning_rate": 4.939829546550874e-06, "loss": 0.3081, "step": 4179 }, { "epoch": 0.2, "grad_norm": 0.6825402409039648, "learning_rate": 4.9397881814091575e-06, "loss": 0.3165, "step": 4180 }, { "epoch": 0.2, "grad_norm": 0.713363912574367, "learning_rate": 4.939746802227075e-06, "loss": 0.3216, "step": 4181 }, { "epoch": 0.2, "grad_norm": 0.6441712111062389, "learning_rate": 4.939705409004862e-06, "loss": 0.2874, "step": 4182 }, { "epoch": 0.2, "grad_norm": 0.656644525181994, "learning_rate": 4.939664001742758e-06, "loss": 0.3043, "step": 4183 }, { "epoch": 0.2, "grad_norm": 0.7259575831196572, "learning_rate": 4.939622580441e-06, "loss": 0.3098, "step": 4184 }, { "epoch": 0.2, "grad_norm": 0.7222619020004604, "learning_rate": 4.939581145099828e-06, "loss": 0.315, "step": 4185 }, { "epoch": 0.2, "grad_norm": 0.6688624061953945, "learning_rate": 4.9395396957194795e-06, "loss": 0.296, "step": 4186 }, { "epoch": 0.2, "grad_norm": 0.6366066553999672, "learning_rate": 4.939498232300193e-06, "loss": 0.2951, "step": 4187 }, { "epoch": 0.2, "grad_norm": 0.61628158469505, "learning_rate": 4.939456754842207e-06, "loss": 0.2743, "step": 4188 }, { "epoch": 0.2, "grad_norm": 0.6783985237227866, "learning_rate": 4.939415263345762e-06, "loss": 0.2852, "step": 4189 }, { "epoch": 0.2, "grad_norm": 0.6881088665115965, "learning_rate": 4.939373757811093e-06, "loss": 0.2976, "step": 4190 }, { "epoch": 0.2, "grad_norm": 0.6124903044668188, "learning_rate": 4.939332238238443e-06, "loss": 0.2945, "step": 4191 }, { "epoch": 0.2, "grad_norm": 0.6689417949168224, "learning_rate": 4.939290704628048e-06, "loss": 0.3106, "step": 4192 }, { "epoch": 0.2, "grad_norm": 0.6949882354109607, "learning_rate": 4.939249156980149e-06, "loss": 0.319, "step": 4193 }, { "epoch": 0.2, "grad_norm": 0.6299243986474148, "learning_rate": 4.939207595294983e-06, "loss": 0.2871, "step": 4194 }, { "epoch": 0.2, "grad_norm": 0.665970845729932, "learning_rate": 4.939166019572792e-06, "loss": 0.3281, "step": 4195 }, { "epoch": 0.2, "grad_norm": 0.6553341837629645, "learning_rate": 4.939124429813813e-06, "loss": 0.3103, "step": 4196 }, { "epoch": 0.2, "grad_norm": 0.7377901068912297, "learning_rate": 4.939082826018286e-06, "loss": 0.3147, "step": 4197 }, { "epoch": 0.2, "grad_norm": 0.6069548623849501, "learning_rate": 4.939041208186449e-06, "loss": 0.2875, "step": 4198 }, { "epoch": 0.2, "grad_norm": 0.6872257178605964, "learning_rate": 4.9389995763185435e-06, "loss": 0.3011, "step": 4199 }, { "epoch": 0.2, "grad_norm": 0.681010674379607, "learning_rate": 4.938957930414809e-06, "loss": 0.294, "step": 4200 }, { "epoch": 0.2, "grad_norm": 0.6793703284645963, "learning_rate": 4.938916270475485e-06, "loss": 0.3106, "step": 4201 }, { "epoch": 0.2, "grad_norm": 0.601478697820927, "learning_rate": 4.938874596500811e-06, "loss": 0.2943, "step": 4202 }, { "epoch": 0.2, "grad_norm": 0.6309138087541395, "learning_rate": 4.938832908491025e-06, "loss": 0.2996, "step": 4203 }, { "epoch": 0.2, "grad_norm": 0.714511239571989, "learning_rate": 4.938791206446371e-06, "loss": 0.2895, "step": 4204 }, { "epoch": 0.2, "grad_norm": 0.6463083360692565, "learning_rate": 4.938749490367084e-06, "loss": 0.2961, "step": 4205 }, { "epoch": 0.2, "grad_norm": 0.6390446162738385, "learning_rate": 4.9387077602534086e-06, "loss": 0.2809, "step": 4206 }, { "epoch": 0.2, "grad_norm": 0.640700325661885, "learning_rate": 4.938666016105582e-06, "loss": 0.3241, "step": 4207 }, { "epoch": 0.2, "grad_norm": 0.6811396571583231, "learning_rate": 4.938624257923845e-06, "loss": 0.3386, "step": 4208 }, { "epoch": 0.2, "grad_norm": 0.6525280485529813, "learning_rate": 4.93858248570844e-06, "loss": 0.3091, "step": 4209 }, { "epoch": 0.2, "grad_norm": 0.7522069788351038, "learning_rate": 4.938540699459604e-06, "loss": 0.3208, "step": 4210 }, { "epoch": 0.2, "grad_norm": 0.6900292450306414, "learning_rate": 4.93849889917758e-06, "loss": 0.2915, "step": 4211 }, { "epoch": 0.2, "grad_norm": 0.6018812986047284, "learning_rate": 4.938457084862608e-06, "loss": 0.2837, "step": 4212 }, { "epoch": 0.2, "grad_norm": 0.6301841708731635, "learning_rate": 4.938415256514928e-06, "loss": 0.2985, "step": 4213 }, { "epoch": 0.2, "grad_norm": 0.6514208801883623, "learning_rate": 4.93837341413478e-06, "loss": 0.2923, "step": 4214 }, { "epoch": 0.2, "grad_norm": 0.7115874138527223, "learning_rate": 4.938331557722408e-06, "loss": 0.2956, "step": 4215 }, { "epoch": 0.2, "grad_norm": 0.648007640600105, "learning_rate": 4.93828968727805e-06, "loss": 0.2988, "step": 4216 }, { "epoch": 0.2, "grad_norm": 0.6693044262206057, "learning_rate": 4.938247802801946e-06, "loss": 0.3098, "step": 4217 }, { "epoch": 0.2, "grad_norm": 0.6236046702322466, "learning_rate": 4.938205904294341e-06, "loss": 0.3007, "step": 4218 }, { "epoch": 0.2, "grad_norm": 0.6052909277904271, "learning_rate": 4.938163991755473e-06, "loss": 0.308, "step": 4219 }, { "epoch": 0.2, "grad_norm": 0.6464404675538252, "learning_rate": 4.938122065185583e-06, "loss": 0.3045, "step": 4220 }, { "epoch": 0.2, "grad_norm": 0.7700065777683092, "learning_rate": 4.938080124584915e-06, "loss": 0.3202, "step": 4221 }, { "epoch": 0.2, "grad_norm": 0.6481808862391903, "learning_rate": 4.938038169953707e-06, "loss": 0.3224, "step": 4222 }, { "epoch": 0.2, "grad_norm": 0.6862569620187376, "learning_rate": 4.9379962012922036e-06, "loss": 0.3138, "step": 4223 }, { "epoch": 0.2, "grad_norm": 0.6717599938904384, "learning_rate": 4.937954218600644e-06, "loss": 0.3255, "step": 4224 }, { "epoch": 0.2, "grad_norm": 0.648705793150037, "learning_rate": 4.937912221879271e-06, "loss": 0.3125, "step": 4225 }, { "epoch": 0.2, "grad_norm": 0.6691683273749116, "learning_rate": 4.937870211128326e-06, "loss": 0.3204, "step": 4226 }, { "epoch": 0.2, "grad_norm": 0.6965100193691286, "learning_rate": 4.93782818634805e-06, "loss": 0.3097, "step": 4227 }, { "epoch": 0.2, "grad_norm": 0.6718281773873149, "learning_rate": 4.937786147538686e-06, "loss": 0.3192, "step": 4228 }, { "epoch": 0.2, "grad_norm": 0.7033105134331614, "learning_rate": 4.937744094700475e-06, "loss": 0.2934, "step": 4229 }, { "epoch": 0.2, "grad_norm": 0.6154848805721683, "learning_rate": 4.937702027833661e-06, "loss": 0.3115, "step": 4230 }, { "epoch": 0.2, "grad_norm": 0.6627520364900018, "learning_rate": 4.937659946938483e-06, "loss": 0.303, "step": 4231 }, { "epoch": 0.2, "grad_norm": 0.6502166802747167, "learning_rate": 4.9376178520151855e-06, "loss": 0.2972, "step": 4232 }, { "epoch": 0.2, "grad_norm": 0.6471718549238404, "learning_rate": 4.937575743064009e-06, "loss": 0.3057, "step": 4233 }, { "epoch": 0.2, "grad_norm": 0.6774626212357046, "learning_rate": 4.937533620085197e-06, "loss": 0.3181, "step": 4234 }, { "epoch": 0.2, "grad_norm": 0.6097926128515423, "learning_rate": 4.937491483078992e-06, "loss": 0.288, "step": 4235 }, { "epoch": 0.2, "grad_norm": 0.6899936373527461, "learning_rate": 4.937449332045637e-06, "loss": 0.2985, "step": 4236 }, { "epoch": 0.2, "grad_norm": 0.6289807159596459, "learning_rate": 4.9374071669853715e-06, "loss": 0.3074, "step": 4237 }, { "epoch": 0.2, "grad_norm": 0.6646847546994074, "learning_rate": 4.937364987898442e-06, "loss": 0.2917, "step": 4238 }, { "epoch": 0.2, "grad_norm": 0.6520855789095588, "learning_rate": 4.937322794785089e-06, "loss": 0.3139, "step": 4239 }, { "epoch": 0.2, "grad_norm": 0.6964020707383516, "learning_rate": 4.937280587645556e-06, "loss": 0.3015, "step": 4240 }, { "epoch": 0.2, "grad_norm": 0.6145329909973687, "learning_rate": 4.937238366480087e-06, "loss": 0.2692, "step": 4241 }, { "epoch": 0.2, "grad_norm": 0.7335010393881785, "learning_rate": 4.9371961312889225e-06, "loss": 0.3077, "step": 4242 }, { "epoch": 0.2, "grad_norm": 0.648134349938646, "learning_rate": 4.937153882072306e-06, "loss": 0.316, "step": 4243 }, { "epoch": 0.2, "grad_norm": 0.6096587452435731, "learning_rate": 4.937111618830484e-06, "loss": 0.2828, "step": 4244 }, { "epoch": 0.2, "grad_norm": 0.7224199714549407, "learning_rate": 4.937069341563695e-06, "loss": 0.3202, "step": 4245 }, { "epoch": 0.2, "grad_norm": 0.6005261797467649, "learning_rate": 4.937027050272185e-06, "loss": 0.2929, "step": 4246 }, { "epoch": 0.2, "grad_norm": 0.6520958092043545, "learning_rate": 4.936984744956198e-06, "loss": 0.3124, "step": 4247 }, { "epoch": 0.2, "grad_norm": 0.6321068145886485, "learning_rate": 4.936942425615974e-06, "loss": 0.3049, "step": 4248 }, { "epoch": 0.2, "grad_norm": 0.7070940965186946, "learning_rate": 4.936900092251761e-06, "loss": 0.3218, "step": 4249 }, { "epoch": 0.2, "grad_norm": 0.6182150667429792, "learning_rate": 4.9368577448638e-06, "loss": 0.3126, "step": 4250 }, { "epoch": 0.2, "grad_norm": 0.6210876028949713, "learning_rate": 4.9368153834523346e-06, "loss": 0.2846, "step": 4251 }, { "epoch": 0.2, "grad_norm": 0.6268510638145587, "learning_rate": 4.936773008017609e-06, "loss": 0.3017, "step": 4252 }, { "epoch": 0.2, "grad_norm": 0.6400644576262882, "learning_rate": 4.936730618559868e-06, "loss": 0.2867, "step": 4253 }, { "epoch": 0.2, "grad_norm": 0.6210965123361114, "learning_rate": 4.936688215079354e-06, "loss": 0.2871, "step": 4254 }, { "epoch": 0.2, "grad_norm": 0.6215456460570467, "learning_rate": 4.936645797576312e-06, "loss": 0.3095, "step": 4255 }, { "epoch": 0.2, "grad_norm": 0.6140142101668649, "learning_rate": 4.936603366050986e-06, "loss": 0.3075, "step": 4256 }, { "epoch": 0.2, "grad_norm": 0.676079888182186, "learning_rate": 4.93656092050362e-06, "loss": 0.3163, "step": 4257 }, { "epoch": 0.2, "grad_norm": 0.6185334776853224, "learning_rate": 4.936518460934458e-06, "loss": 0.2873, "step": 4258 }, { "epoch": 0.2, "grad_norm": 0.6476066929761994, "learning_rate": 4.936475987343745e-06, "loss": 0.2939, "step": 4259 }, { "epoch": 0.2, "grad_norm": 0.5765860063937199, "learning_rate": 4.936433499731725e-06, "loss": 0.2717, "step": 4260 }, { "epoch": 0.2, "grad_norm": 0.6573638339564868, "learning_rate": 4.936390998098643e-06, "loss": 0.3157, "step": 4261 }, { "epoch": 0.2, "grad_norm": 0.6583512255495342, "learning_rate": 4.936348482444743e-06, "loss": 0.3182, "step": 4262 }, { "epoch": 0.2, "grad_norm": 0.6852786605137863, "learning_rate": 4.93630595277027e-06, "loss": 0.3271, "step": 4263 }, { "epoch": 0.2, "grad_norm": 0.7097016904409551, "learning_rate": 4.9362634090754675e-06, "loss": 0.3294, "step": 4264 }, { "epoch": 0.2, "grad_norm": 0.6544302629921404, "learning_rate": 4.9362208513605826e-06, "loss": 0.3115, "step": 4265 }, { "epoch": 0.2, "grad_norm": 0.6341540771294116, "learning_rate": 4.936178279625858e-06, "loss": 0.3021, "step": 4266 }, { "epoch": 0.2, "grad_norm": 0.665930934217816, "learning_rate": 4.93613569387154e-06, "loss": 0.3001, "step": 4267 }, { "epoch": 0.2, "grad_norm": 0.6363986860447337, "learning_rate": 4.936093094097874e-06, "loss": 0.3019, "step": 4268 }, { "epoch": 0.2, "grad_norm": 0.6709033363841166, "learning_rate": 4.936050480305104e-06, "loss": 0.3182, "step": 4269 }, { "epoch": 0.2, "grad_norm": 0.6679906433208914, "learning_rate": 4.936007852493476e-06, "loss": 0.3154, "step": 4270 }, { "epoch": 0.2, "grad_norm": 0.6509844453559771, "learning_rate": 4.935965210663235e-06, "loss": 0.2903, "step": 4271 }, { "epoch": 0.2, "grad_norm": 0.6340379520104161, "learning_rate": 4.935922554814626e-06, "loss": 0.3168, "step": 4272 }, { "epoch": 0.2, "grad_norm": 0.6336042962469038, "learning_rate": 4.935879884947896e-06, "loss": 0.3091, "step": 4273 }, { "epoch": 0.2, "grad_norm": 0.6650087733608286, "learning_rate": 4.935837201063289e-06, "loss": 0.3041, "step": 4274 }, { "epoch": 0.2, "grad_norm": 0.601998793217193, "learning_rate": 4.935794503161051e-06, "loss": 0.3105, "step": 4275 }, { "epoch": 0.2, "grad_norm": 0.6712022286108309, "learning_rate": 4.935751791241428e-06, "loss": 0.3073, "step": 4276 }, { "epoch": 0.2, "grad_norm": 0.7362383427964769, "learning_rate": 4.935709065304665e-06, "loss": 0.3033, "step": 4277 }, { "epoch": 0.2, "grad_norm": 0.7280973867956524, "learning_rate": 4.935666325351009e-06, "loss": 0.3063, "step": 4278 }, { "epoch": 0.2, "grad_norm": 0.6847463226263703, "learning_rate": 4.935623571380706e-06, "loss": 0.2978, "step": 4279 }, { "epoch": 0.2, "grad_norm": 0.6314115674331612, "learning_rate": 4.935580803394001e-06, "loss": 0.3029, "step": 4280 }, { "epoch": 0.2, "grad_norm": 0.6555497992104744, "learning_rate": 4.9355380213911405e-06, "loss": 0.3104, "step": 4281 }, { "epoch": 0.2, "grad_norm": 0.6691274414674875, "learning_rate": 4.935495225372371e-06, "loss": 0.308, "step": 4282 }, { "epoch": 0.2, "grad_norm": 0.6299097070089956, "learning_rate": 4.935452415337939e-06, "loss": 0.2733, "step": 4283 }, { "epoch": 0.2, "grad_norm": 0.6631587851003573, "learning_rate": 4.935409591288089e-06, "loss": 0.3091, "step": 4284 }, { "epoch": 0.2, "grad_norm": 0.641360597843259, "learning_rate": 4.9353667532230706e-06, "loss": 0.3071, "step": 4285 }, { "epoch": 0.2, "grad_norm": 0.6666902231627266, "learning_rate": 4.9353239011431284e-06, "loss": 0.3198, "step": 4286 }, { "epoch": 0.2, "grad_norm": 0.649669625049403, "learning_rate": 4.9352810350485095e-06, "loss": 0.3106, "step": 4287 }, { "epoch": 0.2, "grad_norm": 0.7043698642390054, "learning_rate": 4.935238154939459e-06, "loss": 0.2923, "step": 4288 }, { "epoch": 0.2, "grad_norm": 0.6352589719774874, "learning_rate": 4.9351952608162255e-06, "loss": 0.2977, "step": 4289 }, { "epoch": 0.2, "grad_norm": 0.6398878044084061, "learning_rate": 4.935152352679056e-06, "loss": 0.2974, "step": 4290 }, { "epoch": 0.2, "grad_norm": 0.6477792016933916, "learning_rate": 4.935109430528196e-06, "loss": 0.3075, "step": 4291 }, { "epoch": 0.2, "grad_norm": 0.6410940089209384, "learning_rate": 4.935066494363894e-06, "loss": 0.2888, "step": 4292 }, { "epoch": 0.2, "grad_norm": 0.7148361094975859, "learning_rate": 4.9350235441863956e-06, "loss": 0.318, "step": 4293 }, { "epoch": 0.2, "grad_norm": 0.6347225727643427, "learning_rate": 4.934980579995949e-06, "loss": 0.2926, "step": 4294 }, { "epoch": 0.2, "grad_norm": 0.6892951968695588, "learning_rate": 4.934937601792802e-06, "loss": 0.3238, "step": 4295 }, { "epoch": 0.2, "grad_norm": 0.6527091990778614, "learning_rate": 4.9348946095772e-06, "loss": 0.3065, "step": 4296 }, { "epoch": 0.2, "grad_norm": 0.7012321540965558, "learning_rate": 4.9348516033493925e-06, "loss": 0.3057, "step": 4297 }, { "epoch": 0.2, "grad_norm": 0.6894088031386522, "learning_rate": 4.934808583109625e-06, "loss": 0.3153, "step": 4298 }, { "epoch": 0.2, "grad_norm": 0.6693994931869798, "learning_rate": 4.934765548858146e-06, "loss": 0.3047, "step": 4299 }, { "epoch": 0.2, "grad_norm": 0.6618855580090117, "learning_rate": 4.9347225005952035e-06, "loss": 0.3049, "step": 4300 }, { "epoch": 0.2, "grad_norm": 0.6578674867507429, "learning_rate": 4.934679438321045e-06, "loss": 0.3034, "step": 4301 }, { "epoch": 0.2, "grad_norm": 0.6897468086786607, "learning_rate": 4.934636362035918e-06, "loss": 0.2852, "step": 4302 }, { "epoch": 0.2, "grad_norm": 0.6352331414215208, "learning_rate": 4.934593271740072e-06, "loss": 0.303, "step": 4303 }, { "epoch": 0.2, "grad_norm": 0.6742238291468476, "learning_rate": 4.934550167433752e-06, "loss": 0.2927, "step": 4304 }, { "epoch": 0.2, "grad_norm": 0.6446578312710344, "learning_rate": 4.934507049117209e-06, "loss": 0.3101, "step": 4305 }, { "epoch": 0.2, "grad_norm": 0.6922774281213533, "learning_rate": 4.934463916790689e-06, "loss": 0.3208, "step": 4306 }, { "epoch": 0.2, "grad_norm": 0.6321329509569132, "learning_rate": 4.934420770454441e-06, "loss": 0.3063, "step": 4307 }, { "epoch": 0.2, "grad_norm": 0.6611684478892351, "learning_rate": 4.934377610108714e-06, "loss": 0.2923, "step": 4308 }, { "epoch": 0.2, "grad_norm": 0.6671874437356626, "learning_rate": 4.934334435753755e-06, "loss": 0.292, "step": 4309 }, { "epoch": 0.2, "grad_norm": 0.6998096488071737, "learning_rate": 4.9342912473898135e-06, "loss": 0.3273, "step": 4310 }, { "epoch": 0.2, "grad_norm": 0.6016346782623055, "learning_rate": 4.934248045017138e-06, "loss": 0.2787, "step": 4311 }, { "epoch": 0.2, "grad_norm": 0.6447519245731846, "learning_rate": 4.934204828635976e-06, "loss": 0.2849, "step": 4312 }, { "epoch": 0.2, "grad_norm": 0.5528309302916804, "learning_rate": 4.934161598246577e-06, "loss": 0.2837, "step": 4313 }, { "epoch": 0.2, "grad_norm": 0.6686181747594405, "learning_rate": 4.934118353849191e-06, "loss": 0.2933, "step": 4314 }, { "epoch": 0.2, "grad_norm": 0.7262005217155555, "learning_rate": 4.934075095444065e-06, "loss": 0.319, "step": 4315 }, { "epoch": 0.2, "grad_norm": 0.6937718567223268, "learning_rate": 4.934031823031449e-06, "loss": 0.3086, "step": 4316 }, { "epoch": 0.2, "grad_norm": 0.703581078261803, "learning_rate": 4.9339885366115904e-06, "loss": 0.3079, "step": 4317 }, { "epoch": 0.2, "grad_norm": 0.6588978152159801, "learning_rate": 4.933945236184741e-06, "loss": 0.2843, "step": 4318 }, { "epoch": 0.2, "grad_norm": 0.6607709533235281, "learning_rate": 4.933901921751147e-06, "loss": 0.2931, "step": 4319 }, { "epoch": 0.2, "grad_norm": 0.5822221331362756, "learning_rate": 4.9338585933110605e-06, "loss": 0.2778, "step": 4320 }, { "epoch": 0.2, "grad_norm": 0.6651306742319352, "learning_rate": 4.933815250864729e-06, "loss": 0.2921, "step": 4321 }, { "epoch": 0.2, "grad_norm": 0.6675527310614991, "learning_rate": 4.9337718944124025e-06, "loss": 0.3205, "step": 4322 }, { "epoch": 0.2, "grad_norm": 0.6011851120159789, "learning_rate": 4.933728523954331e-06, "loss": 0.2845, "step": 4323 }, { "epoch": 0.2, "grad_norm": 0.6778153777785397, "learning_rate": 4.933685139490763e-06, "loss": 0.2989, "step": 4324 }, { "epoch": 0.2, "grad_norm": 0.6768453511173526, "learning_rate": 4.9336417410219485e-06, "loss": 0.3053, "step": 4325 }, { "epoch": 0.2, "grad_norm": 0.6175548281535788, "learning_rate": 4.933598328548137e-06, "loss": 0.2949, "step": 4326 }, { "epoch": 0.2, "grad_norm": 0.6846240497408438, "learning_rate": 4.933554902069579e-06, "loss": 0.3117, "step": 4327 }, { "epoch": 0.2, "grad_norm": 0.6576300512447794, "learning_rate": 4.933511461586526e-06, "loss": 0.2986, "step": 4328 }, { "epoch": 0.2, "grad_norm": 0.6509301857799977, "learning_rate": 4.933468007099224e-06, "loss": 0.2964, "step": 4329 }, { "epoch": 0.2, "grad_norm": 0.5964385112414027, "learning_rate": 4.933424538607926e-06, "loss": 0.2794, "step": 4330 }, { "epoch": 0.2, "grad_norm": 0.6630108831114617, "learning_rate": 4.9333810561128815e-06, "loss": 0.3104, "step": 4331 }, { "epoch": 0.2, "grad_norm": 0.669131980613362, "learning_rate": 4.9333375596143405e-06, "loss": 0.3018, "step": 4332 }, { "epoch": 0.2, "grad_norm": 0.6795069126437588, "learning_rate": 4.9332940491125535e-06, "loss": 0.31, "step": 4333 }, { "epoch": 0.2, "grad_norm": 0.6711854735388263, "learning_rate": 4.933250524607771e-06, "loss": 0.299, "step": 4334 }, { "epoch": 0.2, "grad_norm": 0.6017992648874781, "learning_rate": 4.933206986100243e-06, "loss": 0.2868, "step": 4335 }, { "epoch": 0.2, "grad_norm": 0.6382601199330635, "learning_rate": 4.933163433590221e-06, "loss": 0.3012, "step": 4336 }, { "epoch": 0.2, "grad_norm": 0.6378885907446955, "learning_rate": 4.9331198670779546e-06, "loss": 0.2973, "step": 4337 }, { "epoch": 0.2, "grad_norm": 0.6874540018486377, "learning_rate": 4.9330762865636945e-06, "loss": 0.3182, "step": 4338 }, { "epoch": 0.2, "grad_norm": 0.6357871285267934, "learning_rate": 4.933032692047693e-06, "loss": 0.2887, "step": 4339 }, { "epoch": 0.2, "grad_norm": 0.6581891271192593, "learning_rate": 4.932989083530199e-06, "loss": 0.3005, "step": 4340 }, { "epoch": 0.2, "grad_norm": 0.683041419882167, "learning_rate": 4.932945461011463e-06, "loss": 0.3121, "step": 4341 }, { "epoch": 0.2, "grad_norm": 0.615158416267059, "learning_rate": 4.9329018244917396e-06, "loss": 0.3006, "step": 4342 }, { "epoch": 0.2, "grad_norm": 0.6597761242894131, "learning_rate": 4.932858173971277e-06, "loss": 0.2962, "step": 4343 }, { "epoch": 0.2, "grad_norm": 0.6604315642992126, "learning_rate": 4.932814509450326e-06, "loss": 0.3142, "step": 4344 }, { "epoch": 0.2, "grad_norm": 0.6295785761068077, "learning_rate": 4.932770830929141e-06, "loss": 0.317, "step": 4345 }, { "epoch": 0.2, "grad_norm": 0.660444979914469, "learning_rate": 4.93272713840797e-06, "loss": 0.3044, "step": 4346 }, { "epoch": 0.2, "grad_norm": 0.6613703074673201, "learning_rate": 4.932683431887066e-06, "loss": 0.3222, "step": 4347 }, { "epoch": 0.2, "grad_norm": 0.6178765630912544, "learning_rate": 4.93263971136668e-06, "loss": 0.3111, "step": 4348 }, { "epoch": 0.2, "grad_norm": 0.6573796331288045, "learning_rate": 4.932595976847064e-06, "loss": 0.3013, "step": 4349 }, { "epoch": 0.2, "grad_norm": 0.6368525809269094, "learning_rate": 4.93255222832847e-06, "loss": 0.3234, "step": 4350 }, { "epoch": 0.2, "grad_norm": 0.6468961769732541, "learning_rate": 4.9325084658111496e-06, "loss": 0.3307, "step": 4351 }, { "epoch": 0.2, "grad_norm": 0.612039689583627, "learning_rate": 4.9324646892953535e-06, "loss": 0.3144, "step": 4352 }, { "epoch": 0.2, "grad_norm": 0.6795752143181633, "learning_rate": 4.932420898781335e-06, "loss": 0.3299, "step": 4353 }, { "epoch": 0.2, "grad_norm": 0.6806070020175328, "learning_rate": 4.932377094269345e-06, "loss": 0.3034, "step": 4354 }, { "epoch": 0.2, "grad_norm": 0.643790415820373, "learning_rate": 4.932333275759637e-06, "loss": 0.3298, "step": 4355 }, { "epoch": 0.2, "grad_norm": 0.5825798412382219, "learning_rate": 4.932289443252462e-06, "loss": 0.2803, "step": 4356 }, { "epoch": 0.2, "grad_norm": 0.6556234037568299, "learning_rate": 4.932245596748072e-06, "loss": 0.3223, "step": 4357 }, { "epoch": 0.2, "grad_norm": 0.6268513723800507, "learning_rate": 4.9322017362467216e-06, "loss": 0.3184, "step": 4358 }, { "epoch": 0.2, "grad_norm": 0.6915362514299522, "learning_rate": 4.93215786174866e-06, "loss": 0.3161, "step": 4359 }, { "epoch": 0.2, "grad_norm": 0.6568896258805925, "learning_rate": 4.932113973254142e-06, "loss": 0.3178, "step": 4360 }, { "epoch": 0.2, "grad_norm": 0.6594634073413123, "learning_rate": 4.932070070763419e-06, "loss": 0.3203, "step": 4361 }, { "epoch": 0.2, "grad_norm": 0.6406446047773646, "learning_rate": 4.932026154276744e-06, "loss": 0.2986, "step": 4362 }, { "epoch": 0.2, "grad_norm": 0.7115189435938546, "learning_rate": 4.931982223794369e-06, "loss": 0.3101, "step": 4363 }, { "epoch": 0.2, "grad_norm": 0.6122516716044054, "learning_rate": 4.931938279316548e-06, "loss": 0.2872, "step": 4364 }, { "epoch": 0.2, "grad_norm": 0.5998832480046006, "learning_rate": 4.931894320843534e-06, "loss": 0.2868, "step": 4365 }, { "epoch": 0.2, "grad_norm": 0.7039044957513444, "learning_rate": 4.931850348375579e-06, "loss": 0.3251, "step": 4366 }, { "epoch": 0.2, "grad_norm": 0.6528245114186524, "learning_rate": 4.931806361912936e-06, "loss": 0.2968, "step": 4367 }, { "epoch": 0.2, "grad_norm": 0.7193364783647047, "learning_rate": 4.93176236145586e-06, "loss": 0.311, "step": 4368 }, { "epoch": 0.2, "grad_norm": 0.6753859640023959, "learning_rate": 4.931718347004601e-06, "loss": 0.3144, "step": 4369 }, { "epoch": 0.2, "grad_norm": 0.6390304658764332, "learning_rate": 4.931674318559416e-06, "loss": 0.3069, "step": 4370 }, { "epoch": 0.2, "grad_norm": 0.6280134191099735, "learning_rate": 4.931630276120555e-06, "loss": 0.3093, "step": 4371 }, { "epoch": 0.2, "grad_norm": 0.6128861235023645, "learning_rate": 4.931586219688273e-06, "loss": 0.2884, "step": 4372 }, { "epoch": 0.2, "grad_norm": 0.6143911844724484, "learning_rate": 4.931542149262825e-06, "loss": 0.3052, "step": 4373 }, { "epoch": 0.2, "grad_norm": 0.5682666496388501, "learning_rate": 4.931498064844462e-06, "loss": 0.277, "step": 4374 }, { "epoch": 0.2, "grad_norm": 0.6468193802584029, "learning_rate": 4.931453966433439e-06, "loss": 0.2822, "step": 4375 }, { "epoch": 0.2, "grad_norm": 0.6563894498557792, "learning_rate": 4.931409854030009e-06, "loss": 0.2879, "step": 4376 }, { "epoch": 0.21, "grad_norm": 0.6480233117043074, "learning_rate": 4.931365727634427e-06, "loss": 0.2934, "step": 4377 }, { "epoch": 0.21, "grad_norm": 0.6823230781342008, "learning_rate": 4.931321587246946e-06, "loss": 0.3112, "step": 4378 }, { "epoch": 0.21, "grad_norm": 0.6021990811435083, "learning_rate": 4.93127743286782e-06, "loss": 0.2743, "step": 4379 }, { "epoch": 0.21, "grad_norm": 0.6912471384950183, "learning_rate": 4.931233264497304e-06, "loss": 0.3332, "step": 4380 }, { "epoch": 0.21, "grad_norm": 0.6257025316921443, "learning_rate": 4.931189082135652e-06, "loss": 0.3076, "step": 4381 }, { "epoch": 0.21, "grad_norm": 0.6474523081666915, "learning_rate": 4.931144885783118e-06, "loss": 0.3125, "step": 4382 }, { "epoch": 0.21, "grad_norm": 0.6828415420776818, "learning_rate": 4.931100675439955e-06, "loss": 0.3158, "step": 4383 }, { "epoch": 0.21, "grad_norm": 0.6372818457864952, "learning_rate": 4.931056451106419e-06, "loss": 0.3106, "step": 4384 }, { "epoch": 0.21, "grad_norm": 0.6331789867427792, "learning_rate": 4.931012212782765e-06, "loss": 0.2957, "step": 4385 }, { "epoch": 0.21, "grad_norm": 0.6697899950834043, "learning_rate": 4.930967960469246e-06, "loss": 0.2998, "step": 4386 }, { "epoch": 0.21, "grad_norm": 0.6025667506528898, "learning_rate": 4.930923694166118e-06, "loss": 0.3073, "step": 4387 }, { "epoch": 0.21, "grad_norm": 0.6742174389564954, "learning_rate": 4.9308794138736334e-06, "loss": 0.3122, "step": 4388 }, { "epoch": 0.21, "grad_norm": 0.6576707909338951, "learning_rate": 4.930835119592051e-06, "loss": 0.305, "step": 4389 }, { "epoch": 0.21, "grad_norm": 0.6324061788113203, "learning_rate": 4.930790811321622e-06, "loss": 0.3126, "step": 4390 }, { "epoch": 0.21, "grad_norm": 0.634493264222439, "learning_rate": 4.930746489062603e-06, "loss": 0.3026, "step": 4391 }, { "epoch": 0.21, "grad_norm": 0.712460107557727, "learning_rate": 4.93070215281525e-06, "loss": 0.2991, "step": 4392 }, { "epoch": 0.21, "grad_norm": 0.6785092656318267, "learning_rate": 4.930657802579815e-06, "loss": 0.3203, "step": 4393 }, { "epoch": 0.21, "grad_norm": 0.6541306207848187, "learning_rate": 4.930613438356557e-06, "loss": 0.3151, "step": 4394 }, { "epoch": 0.21, "grad_norm": 0.7098397373937927, "learning_rate": 4.93056906014573e-06, "loss": 0.3038, "step": 4395 }, { "epoch": 0.21, "grad_norm": 0.6920522490799274, "learning_rate": 4.930524667947588e-06, "loss": 0.2783, "step": 4396 }, { "epoch": 0.21, "grad_norm": 0.5741431704404987, "learning_rate": 4.930480261762387e-06, "loss": 0.282, "step": 4397 }, { "epoch": 0.21, "grad_norm": 0.7079597413364511, "learning_rate": 4.930435841590384e-06, "loss": 0.3228, "step": 4398 }, { "epoch": 0.21, "grad_norm": 0.6402726030647493, "learning_rate": 4.930391407431833e-06, "loss": 0.3026, "step": 4399 }, { "epoch": 0.21, "grad_norm": 0.8252825746403726, "learning_rate": 4.93034695928699e-06, "loss": 0.2685, "step": 4400 }, { "epoch": 0.21, "grad_norm": 0.6295060935234387, "learning_rate": 4.930302497156112e-06, "loss": 0.3005, "step": 4401 }, { "epoch": 0.21, "grad_norm": 0.7254474397868759, "learning_rate": 4.930258021039453e-06, "loss": 0.3122, "step": 4402 }, { "epoch": 0.21, "grad_norm": 0.7268561303730519, "learning_rate": 4.93021353093727e-06, "loss": 0.2984, "step": 4403 }, { "epoch": 0.21, "grad_norm": 0.6628869509709043, "learning_rate": 4.9301690268498204e-06, "loss": 0.3071, "step": 4404 }, { "epoch": 0.21, "grad_norm": 0.6601639569747484, "learning_rate": 4.930124508777358e-06, "loss": 0.3022, "step": 4405 }, { "epoch": 0.21, "grad_norm": 0.6759740294031514, "learning_rate": 4.930079976720139e-06, "loss": 0.3074, "step": 4406 }, { "epoch": 0.21, "grad_norm": 0.626459115859232, "learning_rate": 4.930035430678421e-06, "loss": 0.2946, "step": 4407 }, { "epoch": 0.21, "grad_norm": 0.6304846915344932, "learning_rate": 4.9299908706524605e-06, "loss": 0.2978, "step": 4408 }, { "epoch": 0.21, "grad_norm": 0.6705556251737627, "learning_rate": 4.929946296642512e-06, "loss": 0.2987, "step": 4409 }, { "epoch": 0.21, "grad_norm": 0.7431844741784033, "learning_rate": 4.929901708648835e-06, "loss": 0.3362, "step": 4410 }, { "epoch": 0.21, "grad_norm": 0.6830608007459501, "learning_rate": 4.929857106671683e-06, "loss": 0.2812, "step": 4411 }, { "epoch": 0.21, "grad_norm": 0.6668876276981174, "learning_rate": 4.9298124907113145e-06, "loss": 0.2931, "step": 4412 }, { "epoch": 0.21, "grad_norm": 0.6561357460471147, "learning_rate": 4.929767860767986e-06, "loss": 0.3066, "step": 4413 }, { "epoch": 0.21, "grad_norm": 0.6449691766738963, "learning_rate": 4.929723216841954e-06, "loss": 0.3068, "step": 4414 }, { "epoch": 0.21, "grad_norm": 0.6919868661632343, "learning_rate": 4.929678558933475e-06, "loss": 0.3409, "step": 4415 }, { "epoch": 0.21, "grad_norm": 0.6773628143060986, "learning_rate": 4.929633887042807e-06, "loss": 0.3266, "step": 4416 }, { "epoch": 0.21, "grad_norm": 0.6841370073597205, "learning_rate": 4.929589201170207e-06, "loss": 0.3138, "step": 4417 }, { "epoch": 0.21, "grad_norm": 0.6202297390210262, "learning_rate": 4.929544501315932e-06, "loss": 0.2953, "step": 4418 }, { "epoch": 0.21, "grad_norm": 0.6078126282703337, "learning_rate": 4.929499787480238e-06, "loss": 0.3, "step": 4419 }, { "epoch": 0.21, "grad_norm": 0.6851338317533511, "learning_rate": 4.929455059663384e-06, "loss": 0.3036, "step": 4420 }, { "epoch": 0.21, "grad_norm": 0.6948121370102972, "learning_rate": 4.929410317865627e-06, "loss": 0.2921, "step": 4421 }, { "epoch": 0.21, "grad_norm": 0.6868579488580818, "learning_rate": 4.929365562087224e-06, "loss": 0.3249, "step": 4422 }, { "epoch": 0.21, "grad_norm": 0.6568429875314463, "learning_rate": 4.929320792328433e-06, "loss": 0.3092, "step": 4423 }, { "epoch": 0.21, "grad_norm": 0.6253498682143068, "learning_rate": 4.929276008589511e-06, "loss": 0.3026, "step": 4424 }, { "epoch": 0.21, "grad_norm": 0.6272973777818328, "learning_rate": 4.9292312108707165e-06, "loss": 0.2947, "step": 4425 }, { "epoch": 0.21, "grad_norm": 0.6207120772508524, "learning_rate": 4.9291863991723065e-06, "loss": 0.2741, "step": 4426 }, { "epoch": 0.21, "grad_norm": 0.6874198374016196, "learning_rate": 4.92914157349454e-06, "loss": 0.3076, "step": 4427 }, { "epoch": 0.21, "grad_norm": 0.655256644773478, "learning_rate": 4.929096733837674e-06, "loss": 0.2934, "step": 4428 }, { "epoch": 0.21, "grad_norm": 0.6572135392311848, "learning_rate": 4.929051880201967e-06, "loss": 0.279, "step": 4429 }, { "epoch": 0.21, "grad_norm": 0.7129297937691496, "learning_rate": 4.929007012587677e-06, "loss": 0.2851, "step": 4430 }, { "epoch": 0.21, "grad_norm": 0.6917709469123174, "learning_rate": 4.928962130995061e-06, "loss": 0.3244, "step": 4431 }, { "epoch": 0.21, "grad_norm": 0.6269496782451589, "learning_rate": 4.92891723542438e-06, "loss": 0.2949, "step": 4432 }, { "epoch": 0.21, "grad_norm": 0.6574525932087604, "learning_rate": 4.9288723258758895e-06, "loss": 0.3112, "step": 4433 }, { "epoch": 0.21, "grad_norm": 0.6461060323214031, "learning_rate": 4.92882740234985e-06, "loss": 0.2973, "step": 4434 }, { "epoch": 0.21, "grad_norm": 0.6345106910766878, "learning_rate": 4.928782464846519e-06, "loss": 0.3017, "step": 4435 }, { "epoch": 0.21, "grad_norm": 0.654442776190022, "learning_rate": 4.928737513366155e-06, "loss": 0.3112, "step": 4436 }, { "epoch": 0.21, "grad_norm": 0.7041201841494911, "learning_rate": 4.928692547909017e-06, "loss": 0.3305, "step": 4437 }, { "epoch": 0.21, "grad_norm": 0.677295288836208, "learning_rate": 4.928647568475365e-06, "loss": 0.3134, "step": 4438 }, { "epoch": 0.21, "grad_norm": 0.6756816478974931, "learning_rate": 4.928602575065456e-06, "loss": 0.3247, "step": 4439 }, { "epoch": 0.21, "grad_norm": 0.5750927571006492, "learning_rate": 4.92855756767955e-06, "loss": 0.2922, "step": 4440 }, { "epoch": 0.21, "grad_norm": 0.6680857082370398, "learning_rate": 4.928512546317905e-06, "loss": 0.2862, "step": 4441 }, { "epoch": 0.21, "grad_norm": 0.6631457489663308, "learning_rate": 4.928467510980781e-06, "loss": 0.3053, "step": 4442 }, { "epoch": 0.21, "grad_norm": 0.6295343254273811, "learning_rate": 4.928422461668436e-06, "loss": 0.2911, "step": 4443 }, { "epoch": 0.21, "grad_norm": 0.6898474578139467, "learning_rate": 4.9283773983811314e-06, "loss": 0.327, "step": 4444 }, { "epoch": 0.21, "grad_norm": 0.5922350089255672, "learning_rate": 4.928332321119124e-06, "loss": 0.3205, "step": 4445 }, { "epoch": 0.21, "grad_norm": 0.6687926857005492, "learning_rate": 4.928287229882675e-06, "loss": 0.3296, "step": 4446 }, { "epoch": 0.21, "grad_norm": 0.6624646143427426, "learning_rate": 4.928242124672043e-06, "loss": 0.3025, "step": 4447 }, { "epoch": 0.21, "grad_norm": 0.6194349800932485, "learning_rate": 4.928197005487489e-06, "loss": 0.2947, "step": 4448 }, { "epoch": 0.21, "grad_norm": 0.640037110265729, "learning_rate": 4.928151872329271e-06, "loss": 0.2852, "step": 4449 }, { "epoch": 0.21, "grad_norm": 0.6950066445005875, "learning_rate": 4.928106725197649e-06, "loss": 0.3252, "step": 4450 }, { "epoch": 0.21, "grad_norm": 0.6571754894137806, "learning_rate": 4.928061564092883e-06, "loss": 0.3081, "step": 4451 }, { "epoch": 0.21, "grad_norm": 0.6539331765954127, "learning_rate": 4.928016389015232e-06, "loss": 0.3103, "step": 4452 }, { "epoch": 0.21, "grad_norm": 0.6910927340459722, "learning_rate": 4.927971199964959e-06, "loss": 0.319, "step": 4453 }, { "epoch": 0.21, "grad_norm": 0.7113065225121853, "learning_rate": 4.927925996942321e-06, "loss": 0.3222, "step": 4454 }, { "epoch": 0.21, "grad_norm": 0.7013536565245575, "learning_rate": 4.927880779947579e-06, "loss": 0.3338, "step": 4455 }, { "epoch": 0.21, "grad_norm": 0.6880328643141408, "learning_rate": 4.927835548980994e-06, "loss": 0.3276, "step": 4456 }, { "epoch": 0.21, "grad_norm": 0.6370444429649221, "learning_rate": 4.927790304042824e-06, "loss": 0.3032, "step": 4457 }, { "epoch": 0.21, "grad_norm": 0.6309204922905849, "learning_rate": 4.927745045133332e-06, "loss": 0.3005, "step": 4458 }, { "epoch": 0.21, "grad_norm": 0.6654617228772759, "learning_rate": 4.927699772252778e-06, "loss": 0.3114, "step": 4459 }, { "epoch": 0.21, "grad_norm": 0.6607557902570279, "learning_rate": 4.927654485401422e-06, "loss": 0.2692, "step": 4460 }, { "epoch": 0.21, "grad_norm": 0.6185907336730514, "learning_rate": 4.927609184579523e-06, "loss": 0.2901, "step": 4461 }, { "epoch": 0.21, "grad_norm": 0.633708036941045, "learning_rate": 4.927563869787345e-06, "loss": 0.3129, "step": 4462 }, { "epoch": 0.21, "grad_norm": 0.6472463406987137, "learning_rate": 4.927518541025147e-06, "loss": 0.3132, "step": 4463 }, { "epoch": 0.21, "grad_norm": 0.7138312820839836, "learning_rate": 4.927473198293189e-06, "loss": 0.332, "step": 4464 }, { "epoch": 0.21, "grad_norm": 0.667618595288727, "learning_rate": 4.927427841591734e-06, "loss": 0.3116, "step": 4465 }, { "epoch": 0.21, "grad_norm": 0.719104575802436, "learning_rate": 4.9273824709210405e-06, "loss": 0.2935, "step": 4466 }, { "epoch": 0.21, "grad_norm": 0.6244483217224905, "learning_rate": 4.927337086281372e-06, "loss": 0.2993, "step": 4467 }, { "epoch": 0.21, "grad_norm": 0.6439622988822955, "learning_rate": 4.927291687672988e-06, "loss": 0.3275, "step": 4468 }, { "epoch": 0.21, "grad_norm": 0.636133959934106, "learning_rate": 4.927246275096151e-06, "loss": 0.2748, "step": 4469 }, { "epoch": 0.21, "grad_norm": 0.6475157737027254, "learning_rate": 4.927200848551122e-06, "loss": 0.3154, "step": 4470 }, { "epoch": 0.21, "grad_norm": 0.6989385465904451, "learning_rate": 4.927155408038161e-06, "loss": 0.3166, "step": 4471 }, { "epoch": 0.21, "grad_norm": 0.7245177077101411, "learning_rate": 4.927109953557532e-06, "loss": 0.3173, "step": 4472 }, { "epoch": 0.21, "grad_norm": 0.5764487353828057, "learning_rate": 4.927064485109494e-06, "loss": 0.2742, "step": 4473 }, { "epoch": 0.21, "grad_norm": 0.6106967927749954, "learning_rate": 4.927019002694311e-06, "loss": 0.2988, "step": 4474 }, { "epoch": 0.21, "grad_norm": 0.649039895063909, "learning_rate": 4.9269735063122424e-06, "loss": 0.3172, "step": 4475 }, { "epoch": 0.21, "grad_norm": 0.6976299101070069, "learning_rate": 4.926927995963553e-06, "loss": 0.3104, "step": 4476 }, { "epoch": 0.21, "grad_norm": 0.6419245298869971, "learning_rate": 4.926882471648502e-06, "loss": 0.305, "step": 4477 }, { "epoch": 0.21, "grad_norm": 0.6525870031717338, "learning_rate": 4.9268369333673514e-06, "loss": 0.3049, "step": 4478 }, { "epoch": 0.21, "grad_norm": 0.741861053350525, "learning_rate": 4.926791381120366e-06, "loss": 0.3145, "step": 4479 }, { "epoch": 0.21, "grad_norm": 0.6298175088796799, "learning_rate": 4.926745814907805e-06, "loss": 0.3031, "step": 4480 }, { "epoch": 0.21, "grad_norm": 0.6686664424020168, "learning_rate": 4.926700234729932e-06, "loss": 0.3053, "step": 4481 }, { "epoch": 0.21, "grad_norm": 0.6371966199996749, "learning_rate": 4.9266546405870095e-06, "loss": 0.2921, "step": 4482 }, { "epoch": 0.21, "grad_norm": 0.6169112654346999, "learning_rate": 4.926609032479299e-06, "loss": 0.2942, "step": 4483 }, { "epoch": 0.21, "grad_norm": 0.6755194845196444, "learning_rate": 4.926563410407063e-06, "loss": 0.3029, "step": 4484 }, { "epoch": 0.21, "grad_norm": 0.6670996507767957, "learning_rate": 4.926517774370565e-06, "loss": 0.3153, "step": 4485 }, { "epoch": 0.21, "grad_norm": 0.644714534756908, "learning_rate": 4.926472124370067e-06, "loss": 0.3043, "step": 4486 }, { "epoch": 0.21, "grad_norm": 0.6174622591437556, "learning_rate": 4.926426460405832e-06, "loss": 0.2932, "step": 4487 }, { "epoch": 0.21, "grad_norm": 0.664226740207951, "learning_rate": 4.926380782478123e-06, "loss": 0.3077, "step": 4488 }, { "epoch": 0.21, "grad_norm": 0.5872178983495393, "learning_rate": 4.926335090587201e-06, "loss": 0.287, "step": 4489 }, { "epoch": 0.21, "grad_norm": 0.6278919032826058, "learning_rate": 4.926289384733332e-06, "loss": 0.3131, "step": 4490 }, { "epoch": 0.21, "grad_norm": 0.612181625831464, "learning_rate": 4.926243664916776e-06, "loss": 0.2716, "step": 4491 }, { "epoch": 0.21, "grad_norm": 0.6568876097302112, "learning_rate": 4.9261979311377985e-06, "loss": 0.3091, "step": 4492 }, { "epoch": 0.21, "grad_norm": 0.6209786601956355, "learning_rate": 4.9261521833966615e-06, "loss": 0.2994, "step": 4493 }, { "epoch": 0.21, "grad_norm": 0.700131476620929, "learning_rate": 4.926106421693629e-06, "loss": 0.3164, "step": 4494 }, { "epoch": 0.21, "grad_norm": 0.6507324315655947, "learning_rate": 4.9260606460289625e-06, "loss": 0.2978, "step": 4495 }, { "epoch": 0.21, "grad_norm": 0.6266453639325512, "learning_rate": 4.926014856402928e-06, "loss": 0.2854, "step": 4496 }, { "epoch": 0.21, "grad_norm": 0.6678585157378778, "learning_rate": 4.9259690528157875e-06, "loss": 0.3037, "step": 4497 }, { "epoch": 0.21, "grad_norm": 0.6154491255192899, "learning_rate": 4.925923235267804e-06, "loss": 0.2848, "step": 4498 }, { "epoch": 0.21, "grad_norm": 0.7001360377920539, "learning_rate": 4.925877403759243e-06, "loss": 0.3274, "step": 4499 }, { "epoch": 0.21, "grad_norm": 0.6528627369828501, "learning_rate": 4.925831558290367e-06, "loss": 0.2871, "step": 4500 }, { "epoch": 0.21, "grad_norm": 0.6832028414189534, "learning_rate": 4.925785698861439e-06, "loss": 0.309, "step": 4501 }, { "epoch": 0.21, "grad_norm": 0.6404512594002214, "learning_rate": 4.925739825472726e-06, "loss": 0.3064, "step": 4502 }, { "epoch": 0.21, "grad_norm": 0.577023496299867, "learning_rate": 4.925693938124488e-06, "loss": 0.2992, "step": 4503 }, { "epoch": 0.21, "grad_norm": 0.6550802850196321, "learning_rate": 4.925648036816992e-06, "loss": 0.2969, "step": 4504 }, { "epoch": 0.21, "grad_norm": 0.6288605491403765, "learning_rate": 4.925602121550502e-06, "loss": 0.2953, "step": 4505 }, { "epoch": 0.21, "grad_norm": 0.6377067402695463, "learning_rate": 4.92555619232528e-06, "loss": 0.3012, "step": 4506 }, { "epoch": 0.21, "grad_norm": 0.7147660379493258, "learning_rate": 4.925510249141593e-06, "loss": 0.306, "step": 4507 }, { "epoch": 0.21, "grad_norm": 0.714553628020211, "learning_rate": 4.925464291999704e-06, "loss": 0.3199, "step": 4508 }, { "epoch": 0.21, "grad_norm": 0.678363100573355, "learning_rate": 4.925418320899877e-06, "loss": 0.3149, "step": 4509 }, { "epoch": 0.21, "grad_norm": 0.6795559379776098, "learning_rate": 4.925372335842377e-06, "loss": 0.2837, "step": 4510 }, { "epoch": 0.21, "grad_norm": 0.5974344819315637, "learning_rate": 4.92532633682747e-06, "loss": 0.2756, "step": 4511 }, { "epoch": 0.21, "grad_norm": 0.6350555453988977, "learning_rate": 4.925280323855418e-06, "loss": 0.2812, "step": 4512 }, { "epoch": 0.21, "grad_norm": 0.7320977354612084, "learning_rate": 4.925234296926488e-06, "loss": 0.3128, "step": 4513 }, { "epoch": 0.21, "grad_norm": 0.6561027098534131, "learning_rate": 4.925188256040944e-06, "loss": 0.2927, "step": 4514 }, { "epoch": 0.21, "grad_norm": 0.6274795660916729, "learning_rate": 4.925142201199052e-06, "loss": 0.2864, "step": 4515 }, { "epoch": 0.21, "grad_norm": 0.6852541994848502, "learning_rate": 4.925096132401075e-06, "loss": 0.2927, "step": 4516 }, { "epoch": 0.21, "grad_norm": 0.6960059251766032, "learning_rate": 4.92505004964728e-06, "loss": 0.3089, "step": 4517 }, { "epoch": 0.21, "grad_norm": 0.6536896155229162, "learning_rate": 4.925003952937931e-06, "loss": 0.2998, "step": 4518 }, { "epoch": 0.21, "grad_norm": 0.6741570974625475, "learning_rate": 4.924957842273294e-06, "loss": 0.305, "step": 4519 }, { "epoch": 0.21, "grad_norm": 0.6264843069402526, "learning_rate": 4.924911717653634e-06, "loss": 0.3137, "step": 4520 }, { "epoch": 0.21, "grad_norm": 0.6626169519839777, "learning_rate": 4.924865579079217e-06, "loss": 0.3147, "step": 4521 }, { "epoch": 0.21, "grad_norm": 0.6486421472692034, "learning_rate": 4.924819426550308e-06, "loss": 0.3005, "step": 4522 }, { "epoch": 0.21, "grad_norm": 0.6062814478089581, "learning_rate": 4.924773260067171e-06, "loss": 0.295, "step": 4523 }, { "epoch": 0.21, "grad_norm": 0.6153609058195016, "learning_rate": 4.924727079630075e-06, "loss": 0.2908, "step": 4524 }, { "epoch": 0.21, "grad_norm": 0.6719003771471443, "learning_rate": 4.924680885239284e-06, "loss": 0.3051, "step": 4525 }, { "epoch": 0.21, "grad_norm": 0.7022460875437452, "learning_rate": 4.924634676895063e-06, "loss": 0.3033, "step": 4526 }, { "epoch": 0.21, "grad_norm": 0.6131103933530557, "learning_rate": 4.92458845459768e-06, "loss": 0.2977, "step": 4527 }, { "epoch": 0.21, "grad_norm": 0.5980915270000645, "learning_rate": 4.9245422183474e-06, "loss": 0.2894, "step": 4528 }, { "epoch": 0.21, "grad_norm": 0.6641319677711318, "learning_rate": 4.924495968144488e-06, "loss": 0.3028, "step": 4529 }, { "epoch": 0.21, "grad_norm": 0.6298510004200859, "learning_rate": 4.924449703989211e-06, "loss": 0.299, "step": 4530 }, { "epoch": 0.21, "grad_norm": 0.6564397180602507, "learning_rate": 4.9244034258818365e-06, "loss": 0.3146, "step": 4531 }, { "epoch": 0.21, "grad_norm": 0.6605534758508583, "learning_rate": 4.924357133822628e-06, "loss": 0.2926, "step": 4532 }, { "epoch": 0.21, "grad_norm": 0.7007670800020913, "learning_rate": 4.924310827811855e-06, "loss": 0.2923, "step": 4533 }, { "epoch": 0.21, "grad_norm": 0.6480840672206956, "learning_rate": 4.924264507849782e-06, "loss": 0.3105, "step": 4534 }, { "epoch": 0.21, "grad_norm": 0.6560304308901874, "learning_rate": 4.924218173936675e-06, "loss": 0.3105, "step": 4535 }, { "epoch": 0.21, "grad_norm": 0.6528242205841888, "learning_rate": 4.924171826072804e-06, "loss": 0.2966, "step": 4536 }, { "epoch": 0.21, "grad_norm": 0.7271470788097215, "learning_rate": 4.9241254642584315e-06, "loss": 0.3224, "step": 4537 }, { "epoch": 0.21, "grad_norm": 0.6882800094392971, "learning_rate": 4.9240790884938265e-06, "loss": 0.3146, "step": 4538 }, { "epoch": 0.21, "grad_norm": 0.6633956247920071, "learning_rate": 4.924032698779256e-06, "loss": 0.3077, "step": 4539 }, { "epoch": 0.21, "grad_norm": 0.6578209551460945, "learning_rate": 4.923986295114986e-06, "loss": 0.3127, "step": 4540 }, { "epoch": 0.21, "grad_norm": 0.6539348220735014, "learning_rate": 4.923939877501285e-06, "loss": 0.3048, "step": 4541 }, { "epoch": 0.21, "grad_norm": 0.6595726927937876, "learning_rate": 4.923893445938419e-06, "loss": 0.3028, "step": 4542 }, { "epoch": 0.21, "grad_norm": 0.6673780761384033, "learning_rate": 4.923847000426656e-06, "loss": 0.325, "step": 4543 }, { "epoch": 0.21, "grad_norm": 0.644534642193851, "learning_rate": 4.923800540966261e-06, "loss": 0.3233, "step": 4544 }, { "epoch": 0.21, "grad_norm": 0.6209787190180615, "learning_rate": 4.923754067557505e-06, "loss": 0.2825, "step": 4545 }, { "epoch": 0.21, "grad_norm": 0.6332285769470354, "learning_rate": 4.923707580200653e-06, "loss": 0.2877, "step": 4546 }, { "epoch": 0.21, "grad_norm": 0.6649358552263912, "learning_rate": 4.923661078895972e-06, "loss": 0.2923, "step": 4547 }, { "epoch": 0.21, "grad_norm": 0.6867038334468127, "learning_rate": 4.923614563643732e-06, "loss": 0.3051, "step": 4548 }, { "epoch": 0.21, "grad_norm": 0.6630853412467658, "learning_rate": 4.923568034444198e-06, "loss": 0.3203, "step": 4549 }, { "epoch": 0.21, "grad_norm": 0.6446207851667675, "learning_rate": 4.9235214912976394e-06, "loss": 0.297, "step": 4550 }, { "epoch": 0.21, "grad_norm": 0.5861806614430553, "learning_rate": 4.923474934204324e-06, "loss": 0.285, "step": 4551 }, { "epoch": 0.21, "grad_norm": 0.6543573655749679, "learning_rate": 4.92342836316452e-06, "loss": 0.3165, "step": 4552 }, { "epoch": 0.21, "grad_norm": 0.5999907445875508, "learning_rate": 4.923381778178495e-06, "loss": 0.2818, "step": 4553 }, { "epoch": 0.21, "grad_norm": 0.6578521135484489, "learning_rate": 4.923335179246516e-06, "loss": 0.329, "step": 4554 }, { "epoch": 0.21, "grad_norm": 0.5875618727801298, "learning_rate": 4.9232885663688525e-06, "loss": 0.2902, "step": 4555 }, { "epoch": 0.21, "grad_norm": 0.6614291459534196, "learning_rate": 4.923241939545773e-06, "loss": 0.2947, "step": 4556 }, { "epoch": 0.21, "grad_norm": 0.6940160697121641, "learning_rate": 4.923195298777544e-06, "loss": 0.318, "step": 4557 }, { "epoch": 0.21, "grad_norm": 0.6512688040630048, "learning_rate": 4.923148644064436e-06, "loss": 0.3172, "step": 4558 }, { "epoch": 0.21, "grad_norm": 0.6057247338026178, "learning_rate": 4.923101975406717e-06, "loss": 0.282, "step": 4559 }, { "epoch": 0.21, "grad_norm": 0.6570229675353221, "learning_rate": 4.923055292804654e-06, "loss": 0.3085, "step": 4560 }, { "epoch": 0.21, "grad_norm": 0.5973202785756488, "learning_rate": 4.923008596258517e-06, "loss": 0.2684, "step": 4561 }, { "epoch": 0.21, "grad_norm": 0.6421583628550253, "learning_rate": 4.9229618857685755e-06, "loss": 0.3094, "step": 4562 }, { "epoch": 0.21, "grad_norm": 0.6310489195655733, "learning_rate": 4.9229151613350966e-06, "loss": 0.2747, "step": 4563 }, { "epoch": 0.21, "grad_norm": 0.6642183772958913, "learning_rate": 4.92286842295835e-06, "loss": 0.2995, "step": 4564 }, { "epoch": 0.21, "grad_norm": 0.702239809439023, "learning_rate": 4.922821670638604e-06, "loss": 0.3191, "step": 4565 }, { "epoch": 0.21, "grad_norm": 0.6262345004068487, "learning_rate": 4.922774904376129e-06, "loss": 0.3157, "step": 4566 }, { "epoch": 0.21, "grad_norm": 0.6348053149222856, "learning_rate": 4.922728124171193e-06, "loss": 0.3155, "step": 4567 }, { "epoch": 0.21, "grad_norm": 0.6421756559848522, "learning_rate": 4.922681330024066e-06, "loss": 0.309, "step": 4568 }, { "epoch": 0.21, "grad_norm": 0.6123948448943998, "learning_rate": 4.922634521935017e-06, "loss": 0.2734, "step": 4569 }, { "epoch": 0.21, "grad_norm": 0.6159196475225955, "learning_rate": 4.922587699904314e-06, "loss": 0.2946, "step": 4570 }, { "epoch": 0.21, "grad_norm": 0.645104020176425, "learning_rate": 4.922540863932229e-06, "loss": 0.2873, "step": 4571 }, { "epoch": 0.21, "grad_norm": 0.6149053254637892, "learning_rate": 4.92249401401903e-06, "loss": 0.2825, "step": 4572 }, { "epoch": 0.21, "grad_norm": 0.6716015151893557, "learning_rate": 4.922447150164987e-06, "loss": 0.3222, "step": 4573 }, { "epoch": 0.21, "grad_norm": 0.6657359871315198, "learning_rate": 4.922400272370368e-06, "loss": 0.3111, "step": 4574 }, { "epoch": 0.21, "grad_norm": 0.5995591289743971, "learning_rate": 4.922353380635446e-06, "loss": 0.2943, "step": 4575 }, { "epoch": 0.21, "grad_norm": 0.638348553758986, "learning_rate": 4.922306474960489e-06, "loss": 0.3067, "step": 4576 }, { "epoch": 0.21, "grad_norm": 0.6537637701425527, "learning_rate": 4.922259555345766e-06, "loss": 0.3045, "step": 4577 }, { "epoch": 0.21, "grad_norm": 0.6586391651778001, "learning_rate": 4.922212621791549e-06, "loss": 0.3315, "step": 4578 }, { "epoch": 0.21, "grad_norm": 0.7427415993980501, "learning_rate": 4.922165674298106e-06, "loss": 0.3037, "step": 4579 }, { "epoch": 0.21, "grad_norm": 0.6423213271810424, "learning_rate": 4.922118712865709e-06, "loss": 0.3099, "step": 4580 }, { "epoch": 0.21, "grad_norm": 0.5945349292317494, "learning_rate": 4.922071737494628e-06, "loss": 0.3105, "step": 4581 }, { "epoch": 0.21, "grad_norm": 0.7062897813312039, "learning_rate": 4.922024748185133e-06, "loss": 0.3272, "step": 4582 }, { "epoch": 0.21, "grad_norm": 0.6503170917128496, "learning_rate": 4.921977744937494e-06, "loss": 0.2828, "step": 4583 }, { "epoch": 0.21, "grad_norm": 0.7005702860217456, "learning_rate": 4.921930727751981e-06, "loss": 0.3059, "step": 4584 }, { "epoch": 0.21, "grad_norm": 0.7008044048065711, "learning_rate": 4.9218836966288665e-06, "loss": 0.3183, "step": 4585 }, { "epoch": 0.21, "grad_norm": 0.6281764712298719, "learning_rate": 4.92183665156842e-06, "loss": 0.3138, "step": 4586 }, { "epoch": 0.21, "grad_norm": 0.6279173393981322, "learning_rate": 4.921789592570912e-06, "loss": 0.3133, "step": 4587 }, { "epoch": 0.21, "grad_norm": 0.6079860822578063, "learning_rate": 4.9217425196366134e-06, "loss": 0.2945, "step": 4588 }, { "epoch": 0.21, "grad_norm": 0.664788877370541, "learning_rate": 4.921695432765795e-06, "loss": 0.3079, "step": 4589 }, { "epoch": 0.22, "grad_norm": 0.6927913751162266, "learning_rate": 4.921648331958729e-06, "loss": 0.305, "step": 4590 }, { "epoch": 0.22, "grad_norm": 0.6469791250167011, "learning_rate": 4.921601217215684e-06, "loss": 0.3053, "step": 4591 }, { "epoch": 0.22, "grad_norm": 0.6453360303035577, "learning_rate": 4.921554088536934e-06, "loss": 0.3039, "step": 4592 }, { "epoch": 0.22, "grad_norm": 0.7185349827859864, "learning_rate": 4.9215069459227496e-06, "loss": 0.317, "step": 4593 }, { "epoch": 0.22, "grad_norm": 0.6884824893750112, "learning_rate": 4.921459789373399e-06, "loss": 0.3134, "step": 4594 }, { "epoch": 0.22, "grad_norm": 0.7496360705693766, "learning_rate": 4.921412618889157e-06, "loss": 0.3242, "step": 4595 }, { "epoch": 0.22, "grad_norm": 0.6009729658102253, "learning_rate": 4.921365434470295e-06, "loss": 0.2839, "step": 4596 }, { "epoch": 0.22, "grad_norm": 0.6557826174916532, "learning_rate": 4.921318236117083e-06, "loss": 0.3144, "step": 4597 }, { "epoch": 0.22, "grad_norm": 0.6506468659960883, "learning_rate": 4.921271023829793e-06, "loss": 0.2856, "step": 4598 }, { "epoch": 0.22, "grad_norm": 0.6427303867331313, "learning_rate": 4.921223797608696e-06, "loss": 0.3054, "step": 4599 }, { "epoch": 0.22, "grad_norm": 0.6556700591448696, "learning_rate": 4.9211765574540655e-06, "loss": 0.2728, "step": 4600 }, { "epoch": 0.22, "grad_norm": 0.7103110925917483, "learning_rate": 4.921129303366173e-06, "loss": 0.3246, "step": 4601 }, { "epoch": 0.22, "grad_norm": 0.6220887645465705, "learning_rate": 4.921082035345288e-06, "loss": 0.3188, "step": 4602 }, { "epoch": 0.22, "grad_norm": 0.6156503700081077, "learning_rate": 4.921034753391686e-06, "loss": 0.3003, "step": 4603 }, { "epoch": 0.22, "grad_norm": 0.5701603476575733, "learning_rate": 4.920987457505637e-06, "loss": 0.266, "step": 4604 }, { "epoch": 0.22, "grad_norm": 0.6354815720553649, "learning_rate": 4.920940147687413e-06, "loss": 0.2969, "step": 4605 }, { "epoch": 0.22, "grad_norm": 0.599612763674905, "learning_rate": 4.920892823937287e-06, "loss": 0.2905, "step": 4606 }, { "epoch": 0.22, "grad_norm": 0.6084768617215734, "learning_rate": 4.9208454862555325e-06, "loss": 0.2828, "step": 4607 }, { "epoch": 0.22, "grad_norm": 0.6823048297721882, "learning_rate": 4.92079813464242e-06, "loss": 0.3255, "step": 4608 }, { "epoch": 0.22, "grad_norm": 0.6387043057512019, "learning_rate": 4.920750769098223e-06, "loss": 0.3028, "step": 4609 }, { "epoch": 0.22, "grad_norm": 0.6543725158657582, "learning_rate": 4.9207033896232135e-06, "loss": 0.3126, "step": 4610 }, { "epoch": 0.22, "grad_norm": 0.640712480029257, "learning_rate": 4.920655996217664e-06, "loss": 0.2866, "step": 4611 }, { "epoch": 0.22, "grad_norm": 0.60714234303834, "learning_rate": 4.920608588881848e-06, "loss": 0.305, "step": 4612 }, { "epoch": 0.22, "grad_norm": 0.6570117438573411, "learning_rate": 4.920561167616038e-06, "loss": 0.3138, "step": 4613 }, { "epoch": 0.22, "grad_norm": 0.6485681273634838, "learning_rate": 4.920513732420507e-06, "loss": 0.3122, "step": 4614 }, { "epoch": 0.22, "grad_norm": 0.6388042200197137, "learning_rate": 4.920466283295528e-06, "loss": 0.3097, "step": 4615 }, { "epoch": 0.22, "grad_norm": 0.6717327692376269, "learning_rate": 4.920418820241373e-06, "loss": 0.3112, "step": 4616 }, { "epoch": 0.22, "grad_norm": 0.6432346609588325, "learning_rate": 4.9203713432583165e-06, "loss": 0.3019, "step": 4617 }, { "epoch": 0.22, "grad_norm": 0.6178035182987959, "learning_rate": 4.920323852346631e-06, "loss": 0.3091, "step": 4618 }, { "epoch": 0.22, "grad_norm": 0.6657805969017181, "learning_rate": 4.920276347506591e-06, "loss": 0.2897, "step": 4619 }, { "epoch": 0.22, "grad_norm": 0.6543817206592588, "learning_rate": 4.920228828738468e-06, "loss": 0.2795, "step": 4620 }, { "epoch": 0.22, "grad_norm": 0.6999257252292103, "learning_rate": 4.920181296042537e-06, "loss": 0.315, "step": 4621 }, { "epoch": 0.22, "grad_norm": 0.5808673482702043, "learning_rate": 4.920133749419071e-06, "loss": 0.2906, "step": 4622 }, { "epoch": 0.22, "grad_norm": 0.6390209789031241, "learning_rate": 4.9200861888683434e-06, "loss": 0.3018, "step": 4623 }, { "epoch": 0.22, "grad_norm": 0.5979078894770896, "learning_rate": 4.920038614390628e-06, "loss": 0.2806, "step": 4624 }, { "epoch": 0.22, "grad_norm": 0.626944798838219, "learning_rate": 4.919991025986198e-06, "loss": 0.2954, "step": 4625 }, { "epoch": 0.22, "grad_norm": 0.630938519525427, "learning_rate": 4.919943423655329e-06, "loss": 0.3172, "step": 4626 }, { "epoch": 0.22, "grad_norm": 0.6809965706813415, "learning_rate": 4.9198958073982925e-06, "loss": 0.3044, "step": 4627 }, { "epoch": 0.22, "grad_norm": 0.6759085002033419, "learning_rate": 4.919848177215365e-06, "loss": 0.3013, "step": 4628 }, { "epoch": 0.22, "grad_norm": 0.6391825468235061, "learning_rate": 4.9198005331068185e-06, "loss": 0.2828, "step": 4629 }, { "epoch": 0.22, "grad_norm": 0.6244587054301652, "learning_rate": 4.919752875072929e-06, "loss": 0.3115, "step": 4630 }, { "epoch": 0.22, "grad_norm": 0.6481823440003496, "learning_rate": 4.919705203113969e-06, "loss": 0.3023, "step": 4631 }, { "epoch": 0.22, "grad_norm": 0.6967002542481644, "learning_rate": 4.9196575172302155e-06, "loss": 0.3104, "step": 4632 }, { "epoch": 0.22, "grad_norm": 0.5879820639737037, "learning_rate": 4.919609817421939e-06, "loss": 0.263, "step": 4633 }, { "epoch": 0.22, "grad_norm": 0.603987326766066, "learning_rate": 4.919562103689417e-06, "loss": 0.2994, "step": 4634 }, { "epoch": 0.22, "grad_norm": 0.6901537958101457, "learning_rate": 4.919514376032923e-06, "loss": 0.2972, "step": 4635 }, { "epoch": 0.22, "grad_norm": 0.6086650076590844, "learning_rate": 4.919466634452733e-06, "loss": 0.2756, "step": 4636 }, { "epoch": 0.22, "grad_norm": 0.6799650508865167, "learning_rate": 4.919418878949119e-06, "loss": 0.311, "step": 4637 }, { "epoch": 0.22, "grad_norm": 0.7073206175425172, "learning_rate": 4.9193711095223584e-06, "loss": 0.3285, "step": 4638 }, { "epoch": 0.22, "grad_norm": 0.6468175021904542, "learning_rate": 4.919323326172726e-06, "loss": 0.2792, "step": 4639 }, { "epoch": 0.22, "grad_norm": 0.6156131354870613, "learning_rate": 4.919275528900495e-06, "loss": 0.3216, "step": 4640 }, { "epoch": 0.22, "grad_norm": 0.6450150574617112, "learning_rate": 4.919227717705941e-06, "loss": 0.3154, "step": 4641 }, { "epoch": 0.22, "grad_norm": 0.6574026284220841, "learning_rate": 4.91917989258934e-06, "loss": 0.3127, "step": 4642 }, { "epoch": 0.22, "grad_norm": 0.6643203209704499, "learning_rate": 4.919132053550966e-06, "loss": 0.3187, "step": 4643 }, { "epoch": 0.22, "grad_norm": 0.6201182700542179, "learning_rate": 4.9190842005910955e-06, "loss": 0.293, "step": 4644 }, { "epoch": 0.22, "grad_norm": 0.6297163142211779, "learning_rate": 4.9190363337100036e-06, "loss": 0.2964, "step": 4645 }, { "epoch": 0.22, "grad_norm": 0.5824649502876073, "learning_rate": 4.918988452907966e-06, "loss": 0.2979, "step": 4646 }, { "epoch": 0.22, "grad_norm": 0.6420578963808022, "learning_rate": 4.9189405581852575e-06, "loss": 0.3104, "step": 4647 }, { "epoch": 0.22, "grad_norm": 0.7046269192986725, "learning_rate": 4.918892649542154e-06, "loss": 0.3043, "step": 4648 }, { "epoch": 0.22, "grad_norm": 0.6791513501339436, "learning_rate": 4.918844726978931e-06, "loss": 0.2999, "step": 4649 }, { "epoch": 0.22, "grad_norm": 0.6746384159249698, "learning_rate": 4.918796790495865e-06, "loss": 0.3119, "step": 4650 }, { "epoch": 0.22, "grad_norm": 0.6617211177233525, "learning_rate": 4.918748840093231e-06, "loss": 0.2866, "step": 4651 }, { "epoch": 0.22, "grad_norm": 0.6177055528507687, "learning_rate": 4.918700875771306e-06, "loss": 0.2861, "step": 4652 }, { "epoch": 0.22, "grad_norm": 0.6668373501500584, "learning_rate": 4.918652897530365e-06, "loss": 0.3024, "step": 4653 }, { "epoch": 0.22, "grad_norm": 0.6674510634037614, "learning_rate": 4.918604905370684e-06, "loss": 0.3088, "step": 4654 }, { "epoch": 0.22, "grad_norm": 0.6445155937708686, "learning_rate": 4.91855689929254e-06, "loss": 0.3084, "step": 4655 }, { "epoch": 0.22, "grad_norm": 0.6629805535073241, "learning_rate": 4.91850887929621e-06, "loss": 0.3075, "step": 4656 }, { "epoch": 0.22, "grad_norm": 0.6495964359526777, "learning_rate": 4.918460845381968e-06, "loss": 0.3052, "step": 4657 }, { "epoch": 0.22, "grad_norm": 0.6617750527618876, "learning_rate": 4.918412797550092e-06, "loss": 0.3144, "step": 4658 }, { "epoch": 0.22, "grad_norm": 0.6695189746931611, "learning_rate": 4.918364735800858e-06, "loss": 0.3146, "step": 4659 }, { "epoch": 0.22, "grad_norm": 0.6719135330381957, "learning_rate": 4.918316660134543e-06, "loss": 0.3113, "step": 4660 }, { "epoch": 0.22, "grad_norm": 0.6184385001897809, "learning_rate": 4.918268570551424e-06, "loss": 0.3169, "step": 4661 }, { "epoch": 0.22, "grad_norm": 0.6890683857936853, "learning_rate": 4.918220467051776e-06, "loss": 0.3068, "step": 4662 }, { "epoch": 0.22, "grad_norm": 0.630646049059073, "learning_rate": 4.918172349635878e-06, "loss": 0.2915, "step": 4663 }, { "epoch": 0.22, "grad_norm": 0.6310240924226657, "learning_rate": 4.918124218304006e-06, "loss": 0.3189, "step": 4664 }, { "epoch": 0.22, "grad_norm": 0.6564143549987344, "learning_rate": 4.918076073056436e-06, "loss": 0.2998, "step": 4665 }, { "epoch": 0.22, "grad_norm": 0.6527132457344403, "learning_rate": 4.918027913893446e-06, "loss": 0.2978, "step": 4666 }, { "epoch": 0.22, "grad_norm": 0.6677866198449403, "learning_rate": 4.917979740815314e-06, "loss": 0.3081, "step": 4667 }, { "epoch": 0.22, "grad_norm": 0.6534822281942936, "learning_rate": 4.917931553822315e-06, "loss": 0.3105, "step": 4668 }, { "epoch": 0.22, "grad_norm": 0.6349484386068629, "learning_rate": 4.917883352914729e-06, "loss": 0.3048, "step": 4669 }, { "epoch": 0.22, "grad_norm": 0.6210774281738634, "learning_rate": 4.917835138092831e-06, "loss": 0.2894, "step": 4670 }, { "epoch": 0.22, "grad_norm": 0.7007914477404632, "learning_rate": 4.917786909356901e-06, "loss": 0.3066, "step": 4671 }, { "epoch": 0.22, "grad_norm": 0.6112344660918914, "learning_rate": 4.917738666707214e-06, "loss": 0.2681, "step": 4672 }, { "epoch": 0.22, "grad_norm": 0.7008251145812795, "learning_rate": 4.917690410144048e-06, "loss": 0.3083, "step": 4673 }, { "epoch": 0.22, "grad_norm": 0.6768041505470049, "learning_rate": 4.917642139667682e-06, "loss": 0.2986, "step": 4674 }, { "epoch": 0.22, "grad_norm": 0.6635934327876942, "learning_rate": 4.917593855278393e-06, "loss": 0.3064, "step": 4675 }, { "epoch": 0.22, "grad_norm": 0.5858481648444755, "learning_rate": 4.91754555697646e-06, "loss": 0.2905, "step": 4676 }, { "epoch": 0.22, "grad_norm": 0.6625282252307694, "learning_rate": 4.917497244762158e-06, "loss": 0.2924, "step": 4677 }, { "epoch": 0.22, "grad_norm": 0.7128714995246705, "learning_rate": 4.917448918635769e-06, "loss": 0.3198, "step": 4678 }, { "epoch": 0.22, "grad_norm": 0.7224386270465862, "learning_rate": 4.917400578597567e-06, "loss": 0.309, "step": 4679 }, { "epoch": 0.22, "grad_norm": 0.6592394075255144, "learning_rate": 4.917352224647834e-06, "loss": 0.2899, "step": 4680 }, { "epoch": 0.22, "grad_norm": 0.606227597473977, "learning_rate": 4.917303856786846e-06, "loss": 0.292, "step": 4681 }, { "epoch": 0.22, "grad_norm": 0.5791680096607743, "learning_rate": 4.917255475014881e-06, "loss": 0.2986, "step": 4682 }, { "epoch": 0.22, "grad_norm": 0.6659621719757085, "learning_rate": 4.91720707933222e-06, "loss": 0.3233, "step": 4683 }, { "epoch": 0.22, "grad_norm": 0.6291129982458655, "learning_rate": 4.917158669739138e-06, "loss": 0.2979, "step": 4684 }, { "epoch": 0.22, "grad_norm": 0.607602871046246, "learning_rate": 4.9171102462359165e-06, "loss": 0.3003, "step": 4685 }, { "epoch": 0.22, "grad_norm": 0.6859218085052302, "learning_rate": 4.917061808822833e-06, "loss": 0.3161, "step": 4686 }, { "epoch": 0.22, "grad_norm": 0.653683471512791, "learning_rate": 4.917013357500167e-06, "loss": 0.3152, "step": 4687 }, { "epoch": 0.22, "grad_norm": 0.6961064183102886, "learning_rate": 4.916964892268195e-06, "loss": 0.3124, "step": 4688 }, { "epoch": 0.22, "grad_norm": 0.7537404650545658, "learning_rate": 4.9169164131271985e-06, "loss": 0.3325, "step": 4689 }, { "epoch": 0.22, "grad_norm": 0.5853206064987486, "learning_rate": 4.916867920077455e-06, "loss": 0.2988, "step": 4690 }, { "epoch": 0.22, "grad_norm": 0.6728288811717145, "learning_rate": 4.9168194131192445e-06, "loss": 0.3205, "step": 4691 }, { "epoch": 0.22, "grad_norm": 0.6622787370966358, "learning_rate": 4.916770892252846e-06, "loss": 0.3061, "step": 4692 }, { "epoch": 0.22, "grad_norm": 0.6271057785174585, "learning_rate": 4.916722357478538e-06, "loss": 0.2852, "step": 4693 }, { "epoch": 0.22, "grad_norm": 0.6814522770043056, "learning_rate": 4.9166738087965995e-06, "loss": 0.3322, "step": 4694 }, { "epoch": 0.22, "grad_norm": 0.5769116111916841, "learning_rate": 4.916625246207311e-06, "loss": 0.2948, "step": 4695 }, { "epoch": 0.22, "grad_norm": 0.6173755919230178, "learning_rate": 4.916576669710953e-06, "loss": 0.3021, "step": 4696 }, { "epoch": 0.22, "grad_norm": 0.6433309971632742, "learning_rate": 4.916528079307803e-06, "loss": 0.2952, "step": 4697 }, { "epoch": 0.22, "grad_norm": 0.6309635130243462, "learning_rate": 4.91647947499814e-06, "loss": 0.2835, "step": 4698 }, { "epoch": 0.22, "grad_norm": 0.7197522225186105, "learning_rate": 4.916430856782246e-06, "loss": 0.3011, "step": 4699 }, { "epoch": 0.22, "grad_norm": 0.6373660149554168, "learning_rate": 4.9163822246604e-06, "loss": 0.2948, "step": 4700 }, { "epoch": 0.22, "grad_norm": 0.6802173036414846, "learning_rate": 4.916333578632881e-06, "loss": 0.298, "step": 4701 }, { "epoch": 0.22, "grad_norm": 0.6006803775422077, "learning_rate": 4.91628491869997e-06, "loss": 0.3023, "step": 4702 }, { "epoch": 0.22, "grad_norm": 0.6655665491399155, "learning_rate": 4.916236244861946e-06, "loss": 0.3049, "step": 4703 }, { "epoch": 0.22, "grad_norm": 0.6360615788886047, "learning_rate": 4.916187557119091e-06, "loss": 0.2985, "step": 4704 }, { "epoch": 0.22, "grad_norm": 0.6456331970587739, "learning_rate": 4.916138855471682e-06, "loss": 0.3131, "step": 4705 }, { "epoch": 0.22, "grad_norm": 0.6961865377329493, "learning_rate": 4.916090139920003e-06, "loss": 0.2869, "step": 4706 }, { "epoch": 0.22, "grad_norm": 0.6515189424258969, "learning_rate": 4.916041410464332e-06, "loss": 0.2935, "step": 4707 }, { "epoch": 0.22, "grad_norm": 0.6910779575097085, "learning_rate": 4.91599266710495e-06, "loss": 0.3168, "step": 4708 }, { "epoch": 0.22, "grad_norm": 0.6489348205875948, "learning_rate": 4.915943909842137e-06, "loss": 0.2848, "step": 4709 }, { "epoch": 0.22, "grad_norm": 0.6203679736965066, "learning_rate": 4.9158951386761734e-06, "loss": 0.2882, "step": 4710 }, { "epoch": 0.22, "grad_norm": 0.6139099487759837, "learning_rate": 4.915846353607342e-06, "loss": 0.2905, "step": 4711 }, { "epoch": 0.22, "grad_norm": 0.6750876617505711, "learning_rate": 4.915797554635921e-06, "loss": 0.2963, "step": 4712 }, { "epoch": 0.22, "grad_norm": 0.6551796351653764, "learning_rate": 4.915748741762192e-06, "loss": 0.2962, "step": 4713 }, { "epoch": 0.22, "grad_norm": 0.6365898135785991, "learning_rate": 4.915699914986437e-06, "loss": 0.3121, "step": 4714 }, { "epoch": 0.22, "grad_norm": 0.6857564295920738, "learning_rate": 4.915651074308936e-06, "loss": 0.317, "step": 4715 }, { "epoch": 0.22, "grad_norm": 0.6356573193455815, "learning_rate": 4.91560221972997e-06, "loss": 0.3039, "step": 4716 }, { "epoch": 0.22, "grad_norm": 0.6117571888691118, "learning_rate": 4.91555335124982e-06, "loss": 0.2763, "step": 4717 }, { "epoch": 0.22, "grad_norm": 0.6488750320494839, "learning_rate": 4.915504468868769e-06, "loss": 0.2832, "step": 4718 }, { "epoch": 0.22, "grad_norm": 0.6763170303065337, "learning_rate": 4.9154555725870955e-06, "loss": 0.2997, "step": 4719 }, { "epoch": 0.22, "grad_norm": 0.6315320915095025, "learning_rate": 4.915406662405083e-06, "loss": 0.3125, "step": 4720 }, { "epoch": 0.22, "grad_norm": 0.6090096852546022, "learning_rate": 4.915357738323012e-06, "loss": 0.2991, "step": 4721 }, { "epoch": 0.22, "grad_norm": 0.6336041889046143, "learning_rate": 4.9153088003411645e-06, "loss": 0.3044, "step": 4722 }, { "epoch": 0.22, "grad_norm": 0.651515135620815, "learning_rate": 4.915259848459821e-06, "loss": 0.3055, "step": 4723 }, { "epoch": 0.22, "grad_norm": 0.6170029962752971, "learning_rate": 4.915210882679265e-06, "loss": 0.3104, "step": 4724 }, { "epoch": 0.22, "grad_norm": 0.6087494038881424, "learning_rate": 4.915161902999777e-06, "loss": 0.3222, "step": 4725 }, { "epoch": 0.22, "grad_norm": 0.6171286174107941, "learning_rate": 4.91511290942164e-06, "loss": 0.3026, "step": 4726 }, { "epoch": 0.22, "grad_norm": 0.6255939634086025, "learning_rate": 4.915063901945134e-06, "loss": 0.3051, "step": 4727 }, { "epoch": 0.22, "grad_norm": 0.7179481943821372, "learning_rate": 4.915014880570543e-06, "loss": 0.3165, "step": 4728 }, { "epoch": 0.22, "grad_norm": 0.6976159014169668, "learning_rate": 4.914965845298149e-06, "loss": 0.3306, "step": 4729 }, { "epoch": 0.22, "grad_norm": 0.6642358565160235, "learning_rate": 4.914916796128232e-06, "loss": 0.3043, "step": 4730 }, { "epoch": 0.22, "grad_norm": 0.6265041876135546, "learning_rate": 4.914867733061077e-06, "loss": 0.3016, "step": 4731 }, { "epoch": 0.22, "grad_norm": 0.7131013942976969, "learning_rate": 4.914818656096965e-06, "loss": 0.3169, "step": 4732 }, { "epoch": 0.22, "grad_norm": 0.6027952566819451, "learning_rate": 4.914769565236179e-06, "loss": 0.2902, "step": 4733 }, { "epoch": 0.22, "grad_norm": 0.6654590590017119, "learning_rate": 4.914720460479e-06, "loss": 0.311, "step": 4734 }, { "epoch": 0.22, "grad_norm": 0.7132180235710306, "learning_rate": 4.914671341825712e-06, "loss": 0.3118, "step": 4735 }, { "epoch": 0.22, "grad_norm": 0.6760275391581045, "learning_rate": 4.914622209276597e-06, "loss": 0.2909, "step": 4736 }, { "epoch": 0.22, "grad_norm": 0.592870474535652, "learning_rate": 4.914573062831939e-06, "loss": 0.2888, "step": 4737 }, { "epoch": 0.22, "grad_norm": 0.7011658621184228, "learning_rate": 4.914523902492019e-06, "loss": 0.2976, "step": 4738 }, { "epoch": 0.22, "grad_norm": 0.6620072147997381, "learning_rate": 4.914474728257122e-06, "loss": 0.3037, "step": 4739 }, { "epoch": 0.22, "grad_norm": 0.6387296873112804, "learning_rate": 4.914425540127529e-06, "loss": 0.3156, "step": 4740 }, { "epoch": 0.22, "grad_norm": 0.6967781939836509, "learning_rate": 4.914376338103524e-06, "loss": 0.2974, "step": 4741 }, { "epoch": 0.22, "grad_norm": 0.6521148575111595, "learning_rate": 4.91432712218539e-06, "loss": 0.3234, "step": 4742 }, { "epoch": 0.22, "grad_norm": 0.6079013779847154, "learning_rate": 4.914277892373409e-06, "loss": 0.2909, "step": 4743 }, { "epoch": 0.22, "grad_norm": 0.6231922869327325, "learning_rate": 4.914228648667867e-06, "loss": 0.3034, "step": 4744 }, { "epoch": 0.22, "grad_norm": 0.6375730365593395, "learning_rate": 4.914179391069046e-06, "loss": 0.2917, "step": 4745 }, { "epoch": 0.22, "grad_norm": 0.6088903532408108, "learning_rate": 4.914130119577228e-06, "loss": 0.3012, "step": 4746 }, { "epoch": 0.22, "grad_norm": 0.6907133538459269, "learning_rate": 4.914080834192699e-06, "loss": 0.3239, "step": 4747 }, { "epoch": 0.22, "grad_norm": 0.6270659782489301, "learning_rate": 4.914031534915742e-06, "loss": 0.3065, "step": 4748 }, { "epoch": 0.22, "grad_norm": 0.623169906413525, "learning_rate": 4.91398222174664e-06, "loss": 0.3088, "step": 4749 }, { "epoch": 0.22, "grad_norm": 0.6722532095468662, "learning_rate": 4.913932894685677e-06, "loss": 0.2954, "step": 4750 }, { "epoch": 0.22, "grad_norm": 0.660990246757122, "learning_rate": 4.913883553733136e-06, "loss": 0.3088, "step": 4751 }, { "epoch": 0.22, "grad_norm": 0.7581312410412591, "learning_rate": 4.9138341988893025e-06, "loss": 0.3125, "step": 4752 }, { "epoch": 0.22, "grad_norm": 0.6970144465062915, "learning_rate": 4.91378483015446e-06, "loss": 0.3057, "step": 4753 }, { "epoch": 0.22, "grad_norm": 0.6120704666417933, "learning_rate": 4.913735447528892e-06, "loss": 0.2985, "step": 4754 }, { "epoch": 0.22, "grad_norm": 0.6673864679025044, "learning_rate": 4.913686051012885e-06, "loss": 0.3179, "step": 4755 }, { "epoch": 0.22, "grad_norm": 0.7011064653409748, "learning_rate": 4.913636640606719e-06, "loss": 0.293, "step": 4756 }, { "epoch": 0.22, "grad_norm": 0.6521746657212177, "learning_rate": 4.9135872163106824e-06, "loss": 0.3029, "step": 4757 }, { "epoch": 0.22, "grad_norm": 0.6433443473679628, "learning_rate": 4.913537778125057e-06, "loss": 0.3076, "step": 4758 }, { "epoch": 0.22, "grad_norm": 0.6830641431980108, "learning_rate": 4.913488326050129e-06, "loss": 0.3141, "step": 4759 }, { "epoch": 0.22, "grad_norm": 0.7122407625985115, "learning_rate": 4.9134388600861816e-06, "loss": 0.3177, "step": 4760 }, { "epoch": 0.22, "grad_norm": 0.6569837674586347, "learning_rate": 4.913389380233501e-06, "loss": 0.305, "step": 4761 }, { "epoch": 0.22, "grad_norm": 0.6709719237427477, "learning_rate": 4.91333988649237e-06, "loss": 0.3134, "step": 4762 }, { "epoch": 0.22, "grad_norm": 0.6663308311228373, "learning_rate": 4.913290378863075e-06, "loss": 0.3159, "step": 4763 }, { "epoch": 0.22, "grad_norm": 0.6008918589545276, "learning_rate": 4.913240857345901e-06, "loss": 0.2741, "step": 4764 }, { "epoch": 0.22, "grad_norm": 0.6746189764886209, "learning_rate": 4.913191321941132e-06, "loss": 0.3033, "step": 4765 }, { "epoch": 0.22, "grad_norm": 0.6221114971349353, "learning_rate": 4.913141772649054e-06, "loss": 0.3198, "step": 4766 }, { "epoch": 0.22, "grad_norm": 0.6312017441697149, "learning_rate": 4.9130922094699504e-06, "loss": 0.3102, "step": 4767 }, { "epoch": 0.22, "grad_norm": 0.6320719174165474, "learning_rate": 4.913042632404108e-06, "loss": 0.2939, "step": 4768 }, { "epoch": 0.22, "grad_norm": 0.6205332750657601, "learning_rate": 4.912993041451812e-06, "loss": 0.3135, "step": 4769 }, { "epoch": 0.22, "grad_norm": 0.6606488551842469, "learning_rate": 4.912943436613348e-06, "loss": 0.3129, "step": 4770 }, { "epoch": 0.22, "grad_norm": 0.5822606775198145, "learning_rate": 4.912893817889001e-06, "loss": 0.2837, "step": 4771 }, { "epoch": 0.22, "grad_norm": 0.6125300204515292, "learning_rate": 4.912844185279056e-06, "loss": 0.2947, "step": 4772 }, { "epoch": 0.22, "grad_norm": 0.6400377222072344, "learning_rate": 4.9127945387837995e-06, "loss": 0.278, "step": 4773 }, { "epoch": 0.22, "grad_norm": 0.6101530767245031, "learning_rate": 4.912744878403516e-06, "loss": 0.3103, "step": 4774 }, { "epoch": 0.22, "grad_norm": 0.6500718756248658, "learning_rate": 4.912695204138494e-06, "loss": 0.3079, "step": 4775 }, { "epoch": 0.22, "grad_norm": 0.6506468603044561, "learning_rate": 4.912645515989015e-06, "loss": 0.2927, "step": 4776 }, { "epoch": 0.22, "grad_norm": 0.6671685697284879, "learning_rate": 4.9125958139553695e-06, "loss": 0.3202, "step": 4777 }, { "epoch": 0.22, "grad_norm": 0.7275468654753984, "learning_rate": 4.9125460980378405e-06, "loss": 0.3302, "step": 4778 }, { "epoch": 0.22, "grad_norm": 0.649310085309838, "learning_rate": 4.9124963682367156e-06, "loss": 0.3197, "step": 4779 }, { "epoch": 0.22, "grad_norm": 0.6210169510835925, "learning_rate": 4.912446624552279e-06, "loss": 0.3122, "step": 4780 }, { "epoch": 0.22, "grad_norm": 0.6388581659460779, "learning_rate": 4.912396866984821e-06, "loss": 0.3131, "step": 4781 }, { "epoch": 0.22, "grad_norm": 0.5951640332139397, "learning_rate": 4.912347095534623e-06, "loss": 0.2985, "step": 4782 }, { "epoch": 0.22, "grad_norm": 0.6132136510147432, "learning_rate": 4.912297310201975e-06, "loss": 0.2897, "step": 4783 }, { "epoch": 0.22, "grad_norm": 0.6705719398048903, "learning_rate": 4.912247510987162e-06, "loss": 0.2853, "step": 4784 }, { "epoch": 0.22, "grad_norm": 0.6614987417434678, "learning_rate": 4.912197697890471e-06, "loss": 0.3149, "step": 4785 }, { "epoch": 0.22, "grad_norm": 0.6884136549922983, "learning_rate": 4.912147870912189e-06, "loss": 0.301, "step": 4786 }, { "epoch": 0.22, "grad_norm": 0.6301676607548392, "learning_rate": 4.912098030052601e-06, "loss": 0.3023, "step": 4787 }, { "epoch": 0.22, "grad_norm": 0.6454295745466244, "learning_rate": 4.912048175311997e-06, "loss": 0.3103, "step": 4788 }, { "epoch": 0.22, "grad_norm": 0.6715279368831083, "learning_rate": 4.91199830669066e-06, "loss": 0.3091, "step": 4789 }, { "epoch": 0.22, "grad_norm": 0.5896436171567991, "learning_rate": 4.91194842418888e-06, "loss": 0.292, "step": 4790 }, { "epoch": 0.22, "grad_norm": 0.6391932006411144, "learning_rate": 4.911898527806942e-06, "loss": 0.2983, "step": 4791 }, { "epoch": 0.22, "grad_norm": 0.6609214878928429, "learning_rate": 4.911848617545135e-06, "loss": 0.3056, "step": 4792 }, { "epoch": 0.22, "grad_norm": 0.6953903781334551, "learning_rate": 4.911798693403746e-06, "loss": 0.3095, "step": 4793 }, { "epoch": 0.22, "grad_norm": 0.6639802787616015, "learning_rate": 4.911748755383061e-06, "loss": 0.2943, "step": 4794 }, { "epoch": 0.22, "grad_norm": 0.6439897146760032, "learning_rate": 4.911698803483368e-06, "loss": 0.3056, "step": 4795 }, { "epoch": 0.22, "grad_norm": 0.6443574454649527, "learning_rate": 4.911648837704955e-06, "loss": 0.3035, "step": 4796 }, { "epoch": 0.22, "grad_norm": 0.6943493026825515, "learning_rate": 4.9115988580481085e-06, "loss": 0.3147, "step": 4797 }, { "epoch": 0.22, "grad_norm": 0.6819683974679126, "learning_rate": 4.911548864513117e-06, "loss": 0.3054, "step": 4798 }, { "epoch": 0.22, "grad_norm": 0.6636920125907301, "learning_rate": 4.911498857100268e-06, "loss": 0.3227, "step": 4799 }, { "epoch": 0.22, "grad_norm": 0.6416917845660871, "learning_rate": 4.9114488358098485e-06, "loss": 0.2979, "step": 4800 }, { "epoch": 0.22, "grad_norm": 0.6223655169948561, "learning_rate": 4.911398800642148e-06, "loss": 0.3088, "step": 4801 }, { "epoch": 0.22, "grad_norm": 0.6436763809286503, "learning_rate": 4.911348751597453e-06, "loss": 0.3087, "step": 4802 }, { "epoch": 0.22, "grad_norm": 0.6064018967210065, "learning_rate": 4.911298688676053e-06, "loss": 0.3006, "step": 4803 }, { "epoch": 0.23, "grad_norm": 0.6521560035936225, "learning_rate": 4.911248611878234e-06, "loss": 0.3123, "step": 4804 }, { "epoch": 0.23, "grad_norm": 0.6430431823198839, "learning_rate": 4.911198521204285e-06, "loss": 0.3052, "step": 4805 }, { "epoch": 0.23, "grad_norm": 0.6650993777482658, "learning_rate": 4.911148416654495e-06, "loss": 0.3057, "step": 4806 }, { "epoch": 0.23, "grad_norm": 0.6152977203872838, "learning_rate": 4.911098298229152e-06, "loss": 0.2909, "step": 4807 }, { "epoch": 0.23, "grad_norm": 0.6653000466128979, "learning_rate": 4.911048165928545e-06, "loss": 0.2968, "step": 4808 }, { "epoch": 0.23, "grad_norm": 0.6240499687780728, "learning_rate": 4.91099801975296e-06, "loss": 0.3002, "step": 4809 }, { "epoch": 0.23, "grad_norm": 0.6419034231021984, "learning_rate": 4.910947859702689e-06, "loss": 0.2886, "step": 4810 }, { "epoch": 0.23, "grad_norm": 0.6352071404476337, "learning_rate": 4.910897685778019e-06, "loss": 0.3049, "step": 4811 }, { "epoch": 0.23, "grad_norm": 0.6654963225546116, "learning_rate": 4.910847497979237e-06, "loss": 0.2992, "step": 4812 }, { "epoch": 0.23, "grad_norm": 0.7034061496022909, "learning_rate": 4.910797296306635e-06, "loss": 0.328, "step": 4813 }, { "epoch": 0.23, "grad_norm": 0.6117552309326021, "learning_rate": 4.910747080760501e-06, "loss": 0.2778, "step": 4814 }, { "epoch": 0.23, "grad_norm": 0.6459208234840148, "learning_rate": 4.910696851341122e-06, "loss": 0.3276, "step": 4815 }, { "epoch": 0.23, "grad_norm": 0.6874451318619211, "learning_rate": 4.9106466080487905e-06, "loss": 0.3116, "step": 4816 }, { "epoch": 0.23, "grad_norm": 0.7020771673465714, "learning_rate": 4.910596350883791e-06, "loss": 0.3099, "step": 4817 }, { "epoch": 0.23, "grad_norm": 0.6249874372362013, "learning_rate": 4.910546079846418e-06, "loss": 0.3153, "step": 4818 }, { "epoch": 0.23, "grad_norm": 0.6983392550248901, "learning_rate": 4.910495794936957e-06, "loss": 0.2999, "step": 4819 }, { "epoch": 0.23, "grad_norm": 0.6439048302259689, "learning_rate": 4.9104454961556985e-06, "loss": 0.3075, "step": 4820 }, { "epoch": 0.23, "grad_norm": 0.6574370361340581, "learning_rate": 4.910395183502932e-06, "loss": 0.2971, "step": 4821 }, { "epoch": 0.23, "grad_norm": 0.6497468661338608, "learning_rate": 4.910344856978948e-06, "loss": 0.2847, "step": 4822 }, { "epoch": 0.23, "grad_norm": 0.6433533281438316, "learning_rate": 4.910294516584034e-06, "loss": 0.2844, "step": 4823 }, { "epoch": 0.23, "grad_norm": 0.6161945443053013, "learning_rate": 4.910244162318481e-06, "loss": 0.2977, "step": 4824 }, { "epoch": 0.23, "grad_norm": 0.6040710688377966, "learning_rate": 4.910193794182578e-06, "loss": 0.2841, "step": 4825 }, { "epoch": 0.23, "grad_norm": 0.7110892350155563, "learning_rate": 4.910143412176617e-06, "loss": 0.3092, "step": 4826 }, { "epoch": 0.23, "grad_norm": 0.652376180519213, "learning_rate": 4.9100930163008855e-06, "loss": 0.2959, "step": 4827 }, { "epoch": 0.23, "grad_norm": 0.6614280822700732, "learning_rate": 4.910042606555675e-06, "loss": 0.3197, "step": 4828 }, { "epoch": 0.23, "grad_norm": 0.7168524655156807, "learning_rate": 4.909992182941274e-06, "loss": 0.3105, "step": 4829 }, { "epoch": 0.23, "grad_norm": 0.6512715409899743, "learning_rate": 4.909941745457975e-06, "loss": 0.3114, "step": 4830 }, { "epoch": 0.23, "grad_norm": 0.5949673012338795, "learning_rate": 4.9098912941060665e-06, "loss": 0.2887, "step": 4831 }, { "epoch": 0.23, "grad_norm": 0.5780640144635302, "learning_rate": 4.909840828885839e-06, "loss": 0.2836, "step": 4832 }, { "epoch": 0.23, "grad_norm": 0.6421750022771274, "learning_rate": 4.9097903497975845e-06, "loss": 0.3083, "step": 4833 }, { "epoch": 0.23, "grad_norm": 0.6320726212915466, "learning_rate": 4.90973985684159e-06, "loss": 0.2815, "step": 4834 }, { "epoch": 0.23, "grad_norm": 0.6644345881019117, "learning_rate": 4.90968935001815e-06, "loss": 0.2983, "step": 4835 }, { "epoch": 0.23, "grad_norm": 0.6368023136094144, "learning_rate": 4.909638829327552e-06, "loss": 0.2934, "step": 4836 }, { "epoch": 0.23, "grad_norm": 0.6750158463189109, "learning_rate": 4.90958829477009e-06, "loss": 0.3002, "step": 4837 }, { "epoch": 0.23, "grad_norm": 0.6930472767261608, "learning_rate": 4.909537746346052e-06, "loss": 0.2888, "step": 4838 }, { "epoch": 0.23, "grad_norm": 0.7136754738476725, "learning_rate": 4.90948718405573e-06, "loss": 0.2904, "step": 4839 }, { "epoch": 0.23, "grad_norm": 0.6025820503901883, "learning_rate": 4.909436607899415e-06, "loss": 0.2862, "step": 4840 }, { "epoch": 0.23, "grad_norm": 0.6109274112281793, "learning_rate": 4.909386017877397e-06, "loss": 0.292, "step": 4841 }, { "epoch": 0.23, "grad_norm": 0.6755141566854054, "learning_rate": 4.909335413989969e-06, "loss": 0.2856, "step": 4842 }, { "epoch": 0.23, "grad_norm": 0.6369691768331908, "learning_rate": 4.909284796237421e-06, "loss": 0.3217, "step": 4843 }, { "epoch": 0.23, "grad_norm": 0.6679579443925197, "learning_rate": 4.909234164620044e-06, "loss": 0.3037, "step": 4844 }, { "epoch": 0.23, "grad_norm": 0.6497818717781451, "learning_rate": 4.909183519138131e-06, "loss": 0.3102, "step": 4845 }, { "epoch": 0.23, "grad_norm": 0.6512673209950056, "learning_rate": 4.909132859791972e-06, "loss": 0.3066, "step": 4846 }, { "epoch": 0.23, "grad_norm": 0.6679247316362044, "learning_rate": 4.909082186581859e-06, "loss": 0.279, "step": 4847 }, { "epoch": 0.23, "grad_norm": 0.6732151005084462, "learning_rate": 4.909031499508083e-06, "loss": 0.3047, "step": 4848 }, { "epoch": 0.23, "grad_norm": 0.6267215666470008, "learning_rate": 4.9089807985709366e-06, "loss": 0.3088, "step": 4849 }, { "epoch": 0.23, "grad_norm": 0.5863037035413868, "learning_rate": 4.908930083770711e-06, "loss": 0.2799, "step": 4850 }, { "epoch": 0.23, "grad_norm": 0.6906973552404329, "learning_rate": 4.908879355107699e-06, "loss": 0.3077, "step": 4851 }, { "epoch": 0.23, "grad_norm": 0.622261695709231, "learning_rate": 4.908828612582191e-06, "loss": 0.2885, "step": 4852 }, { "epoch": 0.23, "grad_norm": 0.7595284884806521, "learning_rate": 4.908777856194479e-06, "loss": 0.2922, "step": 4853 }, { "epoch": 0.23, "grad_norm": 0.6821054003896908, "learning_rate": 4.908727085944857e-06, "loss": 0.3027, "step": 4854 }, { "epoch": 0.23, "grad_norm": 0.6604920314450847, "learning_rate": 4.9086763018336155e-06, "loss": 0.3266, "step": 4855 }, { "epoch": 0.23, "grad_norm": 0.6609995687252148, "learning_rate": 4.908625503861048e-06, "loss": 0.2943, "step": 4856 }, { "epoch": 0.23, "grad_norm": 0.65425731707093, "learning_rate": 4.9085746920274456e-06, "loss": 0.307, "step": 4857 }, { "epoch": 0.23, "grad_norm": 0.6544080871762498, "learning_rate": 4.9085238663331004e-06, "loss": 0.3169, "step": 4858 }, { "epoch": 0.23, "grad_norm": 0.6070773300364595, "learning_rate": 4.908473026778307e-06, "loss": 0.2863, "step": 4859 }, { "epoch": 0.23, "grad_norm": 0.6501103135633721, "learning_rate": 4.908422173363356e-06, "loss": 0.283, "step": 4860 }, { "epoch": 0.23, "grad_norm": 0.6315129111410207, "learning_rate": 4.9083713060885405e-06, "loss": 0.3052, "step": 4861 }, { "epoch": 0.23, "grad_norm": 0.534609796315131, "learning_rate": 4.908320424954155e-06, "loss": 0.2698, "step": 4862 }, { "epoch": 0.23, "grad_norm": 0.6469459348921285, "learning_rate": 4.90826952996049e-06, "loss": 0.2967, "step": 4863 }, { "epoch": 0.23, "grad_norm": 0.663613253085424, "learning_rate": 4.908218621107838e-06, "loss": 0.3121, "step": 4864 }, { "epoch": 0.23, "grad_norm": 0.67962055573337, "learning_rate": 4.908167698396495e-06, "loss": 0.3096, "step": 4865 }, { "epoch": 0.23, "grad_norm": 0.7240851463311618, "learning_rate": 4.90811676182675e-06, "loss": 0.3026, "step": 4866 }, { "epoch": 0.23, "grad_norm": 0.6778080831277881, "learning_rate": 4.908065811398899e-06, "loss": 0.2934, "step": 4867 }, { "epoch": 0.23, "grad_norm": 0.6856196434213608, "learning_rate": 4.908014847113235e-06, "loss": 0.2902, "step": 4868 }, { "epoch": 0.23, "grad_norm": 0.6747542271888527, "learning_rate": 4.907963868970051e-06, "loss": 0.2809, "step": 4869 }, { "epoch": 0.23, "grad_norm": 0.6266583516857662, "learning_rate": 4.90791287696964e-06, "loss": 0.2951, "step": 4870 }, { "epoch": 0.23, "grad_norm": 0.671345054746889, "learning_rate": 4.907861871112295e-06, "loss": 0.3197, "step": 4871 }, { "epoch": 0.23, "grad_norm": 0.7035757608429964, "learning_rate": 4.907810851398311e-06, "loss": 0.3135, "step": 4872 }, { "epoch": 0.23, "grad_norm": 0.6779459580902061, "learning_rate": 4.90775981782798e-06, "loss": 0.3243, "step": 4873 }, { "epoch": 0.23, "grad_norm": 0.6890178834972497, "learning_rate": 4.907708770401597e-06, "loss": 0.2996, "step": 4874 }, { "epoch": 0.23, "grad_norm": 0.6118210917618375, "learning_rate": 4.907657709119455e-06, "loss": 0.2918, "step": 4875 }, { "epoch": 0.23, "grad_norm": 0.616453334551133, "learning_rate": 4.907606633981848e-06, "loss": 0.2897, "step": 4876 }, { "epoch": 0.23, "grad_norm": 0.7054927348193678, "learning_rate": 4.907555544989069e-06, "loss": 0.319, "step": 4877 }, { "epoch": 0.23, "grad_norm": 0.5996712216829164, "learning_rate": 4.9075044421414145e-06, "loss": 0.2902, "step": 4878 }, { "epoch": 0.23, "grad_norm": 0.6543873590107286, "learning_rate": 4.9074533254391764e-06, "loss": 0.3246, "step": 4879 }, { "epoch": 0.23, "grad_norm": 0.6828527830777072, "learning_rate": 4.907402194882649e-06, "loss": 0.3078, "step": 4880 }, { "epoch": 0.23, "grad_norm": 0.6533621658017833, "learning_rate": 4.907351050472128e-06, "loss": 0.3011, "step": 4881 }, { "epoch": 0.23, "grad_norm": 0.5982085851090544, "learning_rate": 4.907299892207906e-06, "loss": 0.2942, "step": 4882 }, { "epoch": 0.23, "grad_norm": 0.6265723766748602, "learning_rate": 4.907248720090278e-06, "loss": 0.314, "step": 4883 }, { "epoch": 0.23, "grad_norm": 0.574609694091718, "learning_rate": 4.907197534119539e-06, "loss": 0.2818, "step": 4884 }, { "epoch": 0.23, "grad_norm": 0.617863682248405, "learning_rate": 4.9071463342959835e-06, "loss": 0.2891, "step": 4885 }, { "epoch": 0.23, "grad_norm": 0.5950476453847877, "learning_rate": 4.907095120619905e-06, "loss": 0.2871, "step": 4886 }, { "epoch": 0.23, "grad_norm": 0.627277419835945, "learning_rate": 4.907043893091601e-06, "loss": 0.2963, "step": 4887 }, { "epoch": 0.23, "grad_norm": 0.6550714264872411, "learning_rate": 4.906992651711363e-06, "loss": 0.283, "step": 4888 }, { "epoch": 0.23, "grad_norm": 0.6635935802467845, "learning_rate": 4.906941396479488e-06, "loss": 0.3161, "step": 4889 }, { "epoch": 0.23, "grad_norm": 0.6271205334477905, "learning_rate": 4.906890127396269e-06, "loss": 0.314, "step": 4890 }, { "epoch": 0.23, "grad_norm": 0.6195452854549665, "learning_rate": 4.906838844462003e-06, "loss": 0.2773, "step": 4891 }, { "epoch": 0.23, "grad_norm": 0.6345104815552405, "learning_rate": 4.906787547676984e-06, "loss": 0.2781, "step": 4892 }, { "epoch": 0.23, "grad_norm": 0.6907474517794432, "learning_rate": 4.906736237041508e-06, "loss": 0.2924, "step": 4893 }, { "epoch": 0.23, "grad_norm": 0.6654935894967462, "learning_rate": 4.90668491255587e-06, "loss": 0.3131, "step": 4894 }, { "epoch": 0.23, "grad_norm": 0.6334102843700652, "learning_rate": 4.906633574220365e-06, "loss": 0.2855, "step": 4895 }, { "epoch": 0.23, "grad_norm": 0.61775119995487, "learning_rate": 4.906582222035288e-06, "loss": 0.304, "step": 4896 }, { "epoch": 0.23, "grad_norm": 0.6503583097836383, "learning_rate": 4.9065308560009365e-06, "loss": 0.294, "step": 4897 }, { "epoch": 0.23, "grad_norm": 0.657156643323677, "learning_rate": 4.906479476117604e-06, "loss": 0.2975, "step": 4898 }, { "epoch": 0.23, "grad_norm": 0.6199280428136705, "learning_rate": 4.906428082385587e-06, "loss": 0.2884, "step": 4899 }, { "epoch": 0.23, "grad_norm": 0.6434841996455111, "learning_rate": 4.906376674805181e-06, "loss": 0.2838, "step": 4900 }, { "epoch": 0.23, "grad_norm": 0.6407687282176534, "learning_rate": 4.906325253376682e-06, "loss": 0.2999, "step": 4901 }, { "epoch": 0.23, "grad_norm": 0.66879751156267, "learning_rate": 4.9062738181003866e-06, "loss": 0.3086, "step": 4902 }, { "epoch": 0.23, "grad_norm": 0.614540368198526, "learning_rate": 4.9062223689765896e-06, "loss": 0.3021, "step": 4903 }, { "epoch": 0.23, "grad_norm": 0.6720625186961431, "learning_rate": 4.9061709060055886e-06, "loss": 0.3118, "step": 4904 }, { "epoch": 0.23, "grad_norm": 0.5978687375504986, "learning_rate": 4.9061194291876775e-06, "loss": 0.2881, "step": 4905 }, { "epoch": 0.23, "grad_norm": 0.6498978079042762, "learning_rate": 4.9060679385231545e-06, "loss": 0.2923, "step": 4906 }, { "epoch": 0.23, "grad_norm": 0.6796366754956933, "learning_rate": 4.906016434012315e-06, "loss": 0.2941, "step": 4907 }, { "epoch": 0.23, "grad_norm": 0.6183174637014014, "learning_rate": 4.905964915655456e-06, "loss": 0.2732, "step": 4908 }, { "epoch": 0.23, "grad_norm": 0.6160357316656014, "learning_rate": 4.905913383452874e-06, "loss": 0.3053, "step": 4909 }, { "epoch": 0.23, "grad_norm": 0.6523158377167521, "learning_rate": 4.905861837404864e-06, "loss": 0.3297, "step": 4910 }, { "epoch": 0.23, "grad_norm": 0.6682087776728468, "learning_rate": 4.905810277511725e-06, "loss": 0.3152, "step": 4911 }, { "epoch": 0.23, "grad_norm": 0.7331986853017163, "learning_rate": 4.905758703773752e-06, "loss": 0.282, "step": 4912 }, { "epoch": 0.23, "grad_norm": 0.6765746790390098, "learning_rate": 4.9057071161912425e-06, "loss": 0.3126, "step": 4913 }, { "epoch": 0.23, "grad_norm": 0.6443584350112708, "learning_rate": 4.905655514764493e-06, "loss": 0.2991, "step": 4914 }, { "epoch": 0.23, "grad_norm": 0.6823666450111981, "learning_rate": 4.905603899493801e-06, "loss": 0.3019, "step": 4915 }, { "epoch": 0.23, "grad_norm": 0.6153116198798328, "learning_rate": 4.905552270379462e-06, "loss": 0.2947, "step": 4916 }, { "epoch": 0.23, "grad_norm": 0.6329226678937637, "learning_rate": 4.9055006274217755e-06, "loss": 0.3019, "step": 4917 }, { "epoch": 0.23, "grad_norm": 0.6467214247162777, "learning_rate": 4.905448970621037e-06, "loss": 0.3085, "step": 4918 }, { "epoch": 0.23, "grad_norm": 0.5946386707298917, "learning_rate": 4.905397299977545e-06, "loss": 0.3005, "step": 4919 }, { "epoch": 0.23, "grad_norm": 0.6699898316854893, "learning_rate": 4.905345615491595e-06, "loss": 0.3155, "step": 4920 }, { "epoch": 0.23, "grad_norm": 0.6286380594403421, "learning_rate": 4.905293917163486e-06, "loss": 0.3043, "step": 4921 }, { "epoch": 0.23, "grad_norm": 0.6018419748041278, "learning_rate": 4.905242204993516e-06, "loss": 0.3042, "step": 4922 }, { "epoch": 0.23, "grad_norm": 0.6034428759661863, "learning_rate": 4.90519047898198e-06, "loss": 0.295, "step": 4923 }, { "epoch": 0.23, "grad_norm": 0.635653311080953, "learning_rate": 4.905138739129178e-06, "loss": 0.287, "step": 4924 }, { "epoch": 0.23, "grad_norm": 0.6737531008454107, "learning_rate": 4.9050869854354075e-06, "loss": 0.2921, "step": 4925 }, { "epoch": 0.23, "grad_norm": 0.6876826104438359, "learning_rate": 4.905035217900965e-06, "loss": 0.3085, "step": 4926 }, { "epoch": 0.23, "grad_norm": 0.6847280395128166, "learning_rate": 4.904983436526151e-06, "loss": 0.2888, "step": 4927 }, { "epoch": 0.23, "grad_norm": 0.6315752840420158, "learning_rate": 4.90493164131126e-06, "loss": 0.324, "step": 4928 }, { "epoch": 0.23, "grad_norm": 0.6532502408405532, "learning_rate": 4.9048798322565925e-06, "loss": 0.2957, "step": 4929 }, { "epoch": 0.23, "grad_norm": 0.6239013952141966, "learning_rate": 4.9048280093624466e-06, "loss": 0.3007, "step": 4930 }, { "epoch": 0.23, "grad_norm": 0.7165808716882065, "learning_rate": 4.904776172629119e-06, "loss": 0.3369, "step": 4931 }, { "epoch": 0.23, "grad_norm": 0.6366320356528586, "learning_rate": 4.904724322056909e-06, "loss": 0.2945, "step": 4932 }, { "epoch": 0.23, "grad_norm": 0.6631681269187037, "learning_rate": 4.904672457646116e-06, "loss": 0.3221, "step": 4933 }, { "epoch": 0.23, "grad_norm": 0.6787864552412481, "learning_rate": 4.9046205793970355e-06, "loss": 0.3157, "step": 4934 }, { "epoch": 0.23, "grad_norm": 0.7208554699287946, "learning_rate": 4.904568687309969e-06, "loss": 0.2954, "step": 4935 }, { "epoch": 0.23, "grad_norm": 0.682701514896217, "learning_rate": 4.9045167813852145e-06, "loss": 0.304, "step": 4936 }, { "epoch": 0.23, "grad_norm": 0.602419484051279, "learning_rate": 4.90446486162307e-06, "loss": 0.2878, "step": 4937 }, { "epoch": 0.23, "grad_norm": 0.6825477132903969, "learning_rate": 4.904412928023835e-06, "loss": 0.3334, "step": 4938 }, { "epoch": 0.23, "grad_norm": 0.6365745382722405, "learning_rate": 4.904360980587807e-06, "loss": 0.2971, "step": 4939 }, { "epoch": 0.23, "grad_norm": 0.6506271447643448, "learning_rate": 4.904309019315286e-06, "loss": 0.3082, "step": 4940 }, { "epoch": 0.23, "grad_norm": 0.6947917779895205, "learning_rate": 4.904257044206572e-06, "loss": 0.3116, "step": 4941 }, { "epoch": 0.23, "grad_norm": 0.6807123099394795, "learning_rate": 4.904205055261962e-06, "loss": 0.3019, "step": 4942 }, { "epoch": 0.23, "grad_norm": 0.6411209733894886, "learning_rate": 4.904153052481756e-06, "loss": 0.3028, "step": 4943 }, { "epoch": 0.23, "grad_norm": 0.6115265553967283, "learning_rate": 4.9041010358662545e-06, "loss": 0.2928, "step": 4944 }, { "epoch": 0.23, "grad_norm": 0.6698581242016535, "learning_rate": 4.904049005415755e-06, "loss": 0.2873, "step": 4945 }, { "epoch": 0.23, "grad_norm": 0.6684272024942013, "learning_rate": 4.903996961130557e-06, "loss": 0.2996, "step": 4946 }, { "epoch": 0.23, "grad_norm": 0.6840028480986319, "learning_rate": 4.903944903010962e-06, "loss": 0.294, "step": 4947 }, { "epoch": 0.23, "grad_norm": 0.6430540160946681, "learning_rate": 4.903892831057268e-06, "loss": 0.3088, "step": 4948 }, { "epoch": 0.23, "grad_norm": 0.641759232360012, "learning_rate": 4.903840745269774e-06, "loss": 0.2956, "step": 4949 }, { "epoch": 0.23, "grad_norm": 0.632233711355445, "learning_rate": 4.903788645648782e-06, "loss": 0.3197, "step": 4950 }, { "epoch": 0.23, "grad_norm": 0.6118032670296409, "learning_rate": 4.90373653219459e-06, "loss": 0.2959, "step": 4951 }, { "epoch": 0.23, "grad_norm": 0.6305667105262824, "learning_rate": 4.903684404907498e-06, "loss": 0.3032, "step": 4952 }, { "epoch": 0.23, "grad_norm": 0.6803193001572024, "learning_rate": 4.903632263787807e-06, "loss": 0.3243, "step": 4953 }, { "epoch": 0.23, "grad_norm": 0.6576279667458154, "learning_rate": 4.903580108835817e-06, "loss": 0.3013, "step": 4954 }, { "epoch": 0.23, "grad_norm": 0.6705879696065558, "learning_rate": 4.903527940051826e-06, "loss": 0.2948, "step": 4955 }, { "epoch": 0.23, "grad_norm": 0.6319978909154639, "learning_rate": 4.903475757436137e-06, "loss": 0.2876, "step": 4956 }, { "epoch": 0.23, "grad_norm": 0.7106066826496841, "learning_rate": 4.9034235609890485e-06, "loss": 0.2943, "step": 4957 }, { "epoch": 0.23, "grad_norm": 0.6730321934778193, "learning_rate": 4.903371350710861e-06, "loss": 0.2897, "step": 4958 }, { "epoch": 0.23, "grad_norm": 0.604610458467186, "learning_rate": 4.903319126601877e-06, "loss": 0.2734, "step": 4959 }, { "epoch": 0.23, "grad_norm": 0.6071265484995634, "learning_rate": 4.9032668886623945e-06, "loss": 0.2898, "step": 4960 }, { "epoch": 0.23, "grad_norm": 0.6557748567721903, "learning_rate": 4.903214636892715e-06, "loss": 0.2754, "step": 4961 }, { "epoch": 0.23, "grad_norm": 0.6492404998562967, "learning_rate": 4.903162371293139e-06, "loss": 0.2876, "step": 4962 }, { "epoch": 0.23, "grad_norm": 0.691500734399718, "learning_rate": 4.903110091863969e-06, "loss": 0.3072, "step": 4963 }, { "epoch": 0.23, "grad_norm": 0.6364267864879378, "learning_rate": 4.903057798605503e-06, "loss": 0.288, "step": 4964 }, { "epoch": 0.23, "grad_norm": 0.6936809480227777, "learning_rate": 4.9030054915180445e-06, "loss": 0.329, "step": 4965 }, { "epoch": 0.23, "grad_norm": 0.6550619927769101, "learning_rate": 4.902953170601892e-06, "loss": 0.3045, "step": 4966 }, { "epoch": 0.23, "grad_norm": 0.6546523207334249, "learning_rate": 4.90290083585735e-06, "loss": 0.3103, "step": 4967 }, { "epoch": 0.23, "grad_norm": 0.6482961424213652, "learning_rate": 4.902848487284715e-06, "loss": 0.2964, "step": 4968 }, { "epoch": 0.23, "grad_norm": 0.6945049921682679, "learning_rate": 4.902796124884293e-06, "loss": 0.283, "step": 4969 }, { "epoch": 0.23, "grad_norm": 0.6110971295060152, "learning_rate": 4.902743748656382e-06, "loss": 0.2865, "step": 4970 }, { "epoch": 0.23, "grad_norm": 0.6550979517535978, "learning_rate": 4.902691358601286e-06, "loss": 0.3147, "step": 4971 }, { "epoch": 0.23, "grad_norm": 0.6553435506023665, "learning_rate": 4.902638954719303e-06, "loss": 0.3014, "step": 4972 }, { "epoch": 0.23, "grad_norm": 0.6833079265416042, "learning_rate": 4.902586537010739e-06, "loss": 0.3014, "step": 4973 }, { "epoch": 0.23, "grad_norm": 0.645348717933374, "learning_rate": 4.9025341054758915e-06, "loss": 0.3023, "step": 4974 }, { "epoch": 0.23, "grad_norm": 0.6363469114675112, "learning_rate": 4.902481660115065e-06, "loss": 0.2999, "step": 4975 }, { "epoch": 0.23, "grad_norm": 0.6533553175453575, "learning_rate": 4.90242920092856e-06, "loss": 0.2786, "step": 4976 }, { "epoch": 0.23, "grad_norm": 0.7670751450995019, "learning_rate": 4.902376727916679e-06, "loss": 0.3299, "step": 4977 }, { "epoch": 0.23, "grad_norm": 0.5952736142517793, "learning_rate": 4.902324241079723e-06, "loss": 0.27, "step": 4978 }, { "epoch": 0.23, "grad_norm": 0.6301599718253795, "learning_rate": 4.902271740417996e-06, "loss": 0.3065, "step": 4979 }, { "epoch": 0.23, "grad_norm": 0.6825556815549928, "learning_rate": 4.902219225931799e-06, "loss": 0.3182, "step": 4980 }, { "epoch": 0.23, "grad_norm": 0.6376179609658398, "learning_rate": 4.902166697621433e-06, "loss": 0.2941, "step": 4981 }, { "epoch": 0.23, "grad_norm": 0.6099538745436441, "learning_rate": 4.902114155487202e-06, "loss": 0.2867, "step": 4982 }, { "epoch": 0.23, "grad_norm": 0.7110249674427886, "learning_rate": 4.902061599529408e-06, "loss": 0.2961, "step": 4983 }, { "epoch": 0.23, "grad_norm": 0.7373717537059779, "learning_rate": 4.902009029748353e-06, "loss": 0.3092, "step": 4984 }, { "epoch": 0.23, "grad_norm": 0.6617767417897664, "learning_rate": 4.90195644614434e-06, "loss": 0.2967, "step": 4985 }, { "epoch": 0.23, "grad_norm": 0.6383853917764183, "learning_rate": 4.901903848717671e-06, "loss": 0.2945, "step": 4986 }, { "epoch": 0.23, "grad_norm": 0.6506638957988167, "learning_rate": 4.9018512374686486e-06, "loss": 0.2948, "step": 4987 }, { "epoch": 0.23, "grad_norm": 0.6728743102275163, "learning_rate": 4.901798612397577e-06, "loss": 0.3129, "step": 4988 }, { "epoch": 0.23, "grad_norm": 0.6007081501152104, "learning_rate": 4.901745973504758e-06, "loss": 0.2897, "step": 4989 }, { "epoch": 0.23, "grad_norm": 0.6838524114058869, "learning_rate": 4.901693320790494e-06, "loss": 0.3161, "step": 4990 }, { "epoch": 0.23, "grad_norm": 0.6342481171513263, "learning_rate": 4.901640654255089e-06, "loss": 0.3056, "step": 4991 }, { "epoch": 0.23, "grad_norm": 0.7005797443223993, "learning_rate": 4.901587973898844e-06, "loss": 0.306, "step": 4992 }, { "epoch": 0.23, "grad_norm": 0.6430557473331605, "learning_rate": 4.901535279722066e-06, "loss": 0.2981, "step": 4993 }, { "epoch": 0.23, "grad_norm": 0.6559113253359407, "learning_rate": 4.901482571725056e-06, "loss": 0.2867, "step": 4994 }, { "epoch": 0.23, "grad_norm": 0.6954748326733052, "learning_rate": 4.901429849908116e-06, "loss": 0.3126, "step": 4995 }, { "epoch": 0.23, "grad_norm": 0.6704939468084763, "learning_rate": 4.901377114271552e-06, "loss": 0.3097, "step": 4996 }, { "epoch": 0.23, "grad_norm": 0.6419537643245828, "learning_rate": 4.901324364815666e-06, "loss": 0.2841, "step": 4997 }, { "epoch": 0.23, "grad_norm": 0.6288262678339315, "learning_rate": 4.901271601540762e-06, "loss": 0.3132, "step": 4998 }, { "epoch": 0.23, "grad_norm": 0.6808341824161397, "learning_rate": 4.901218824447142e-06, "loss": 0.2999, "step": 4999 }, { "epoch": 0.23, "grad_norm": 0.6145904174504441, "learning_rate": 4.901166033535113e-06, "loss": 0.2985, "step": 5000 }, { "epoch": 0.23, "grad_norm": 0.6293468999609461, "learning_rate": 4.901113228804977e-06, "loss": 0.3097, "step": 5001 }, { "epoch": 0.23, "grad_norm": 0.6389519062263136, "learning_rate": 4.901060410257036e-06, "loss": 0.3094, "step": 5002 }, { "epoch": 0.23, "grad_norm": 0.6157233979751966, "learning_rate": 4.901007577891597e-06, "loss": 0.3094, "step": 5003 }, { "epoch": 0.23, "grad_norm": 0.6628503116883913, "learning_rate": 4.900954731708964e-06, "loss": 0.3343, "step": 5004 }, { "epoch": 0.23, "grad_norm": 0.6634512922969606, "learning_rate": 4.900901871709438e-06, "loss": 0.3029, "step": 5005 }, { "epoch": 0.23, "grad_norm": 0.609997523114147, "learning_rate": 4.900848997893326e-06, "loss": 0.3109, "step": 5006 }, { "epoch": 0.23, "grad_norm": 0.6243270486227518, "learning_rate": 4.900796110260931e-06, "loss": 0.3142, "step": 5007 }, { "epoch": 0.23, "grad_norm": 0.6451699321834804, "learning_rate": 4.900743208812558e-06, "loss": 0.3065, "step": 5008 }, { "epoch": 0.23, "grad_norm": 0.6209424146721033, "learning_rate": 4.900690293548512e-06, "loss": 0.2829, "step": 5009 }, { "epoch": 0.23, "grad_norm": 0.6121348114867212, "learning_rate": 4.900637364469097e-06, "loss": 0.282, "step": 5010 }, { "epoch": 0.23, "grad_norm": 0.6875729053306006, "learning_rate": 4.9005844215746156e-06, "loss": 0.3345, "step": 5011 }, { "epoch": 0.23, "grad_norm": 0.6611482077531915, "learning_rate": 4.900531464865376e-06, "loss": 0.3008, "step": 5012 }, { "epoch": 0.23, "grad_norm": 0.5899454798307215, "learning_rate": 4.90047849434168e-06, "loss": 0.3004, "step": 5013 }, { "epoch": 0.23, "grad_norm": 0.6753380310678905, "learning_rate": 4.900425510003834e-06, "loss": 0.3172, "step": 5014 }, { "epoch": 0.23, "grad_norm": 0.6565254704037873, "learning_rate": 4.900372511852142e-06, "loss": 0.3057, "step": 5015 }, { "epoch": 0.23, "grad_norm": 0.6585116275852085, "learning_rate": 4.9003194998869104e-06, "loss": 0.3068, "step": 5016 }, { "epoch": 0.24, "grad_norm": 0.5903584230611052, "learning_rate": 4.900266474108443e-06, "loss": 0.3095, "step": 5017 }, { "epoch": 0.24, "grad_norm": 0.626977819829023, "learning_rate": 4.9002134345170445e-06, "loss": 0.3127, "step": 5018 }, { "epoch": 0.24, "grad_norm": 0.6529848903675087, "learning_rate": 4.9001603811130224e-06, "loss": 0.3034, "step": 5019 }, { "epoch": 0.24, "grad_norm": 0.6600931826751415, "learning_rate": 4.90010731389668e-06, "loss": 0.3112, "step": 5020 }, { "epoch": 0.24, "grad_norm": 0.6411039340008952, "learning_rate": 4.900054232868323e-06, "loss": 0.3091, "step": 5021 }, { "epoch": 0.24, "grad_norm": 0.6533594051661354, "learning_rate": 4.900001138028257e-06, "loss": 0.3126, "step": 5022 }, { "epoch": 0.24, "grad_norm": 0.6978846165983246, "learning_rate": 4.899948029376788e-06, "loss": 0.324, "step": 5023 }, { "epoch": 0.24, "grad_norm": 0.6283365975005338, "learning_rate": 4.899894906914221e-06, "loss": 0.3041, "step": 5024 }, { "epoch": 0.24, "grad_norm": 0.5935587413171053, "learning_rate": 4.899841770640862e-06, "loss": 0.2945, "step": 5025 }, { "epoch": 0.24, "grad_norm": 0.5798825021033409, "learning_rate": 4.899788620557018e-06, "loss": 0.292, "step": 5026 }, { "epoch": 0.24, "grad_norm": 0.7172553208992074, "learning_rate": 4.899735456662993e-06, "loss": 0.301, "step": 5027 }, { "epoch": 0.24, "grad_norm": 0.6174985065177876, "learning_rate": 4.899682278959092e-06, "loss": 0.2903, "step": 5028 }, { "epoch": 0.24, "grad_norm": 0.5842424920886079, "learning_rate": 4.899629087445625e-06, "loss": 0.2753, "step": 5029 }, { "epoch": 0.24, "grad_norm": 0.610890611841172, "learning_rate": 4.8995758821228935e-06, "loss": 0.2845, "step": 5030 }, { "epoch": 0.24, "grad_norm": 1.056201648715382, "learning_rate": 4.899522662991208e-06, "loss": 0.3353, "step": 5031 }, { "epoch": 0.24, "grad_norm": 0.6212476288687496, "learning_rate": 4.899469430050872e-06, "loss": 0.2881, "step": 5032 }, { "epoch": 0.24, "grad_norm": 0.5912445196343996, "learning_rate": 4.899416183302192e-06, "loss": 0.2913, "step": 5033 }, { "epoch": 0.24, "grad_norm": 0.6332028487584183, "learning_rate": 4.8993629227454746e-06, "loss": 0.3057, "step": 5034 }, { "epoch": 0.24, "grad_norm": 0.6952294344699778, "learning_rate": 4.899309648381027e-06, "loss": 0.3153, "step": 5035 }, { "epoch": 0.24, "grad_norm": 0.6189002749467335, "learning_rate": 4.899256360209156e-06, "loss": 0.3067, "step": 5036 }, { "epoch": 0.24, "grad_norm": 0.6032352696624658, "learning_rate": 4.899203058230167e-06, "loss": 0.2873, "step": 5037 }, { "epoch": 0.24, "grad_norm": 0.6294222831470365, "learning_rate": 4.8991497424443675e-06, "loss": 0.3122, "step": 5038 }, { "epoch": 0.24, "grad_norm": 0.6216140638159572, "learning_rate": 4.899096412852065e-06, "loss": 0.2929, "step": 5039 }, { "epoch": 0.24, "grad_norm": 0.5963809413950203, "learning_rate": 4.899043069453565e-06, "loss": 0.2959, "step": 5040 }, { "epoch": 0.24, "grad_norm": 0.6156776772382877, "learning_rate": 4.898989712249175e-06, "loss": 0.2976, "step": 5041 }, { "epoch": 0.24, "grad_norm": 0.6209268844449913, "learning_rate": 4.898936341239202e-06, "loss": 0.3063, "step": 5042 }, { "epoch": 0.24, "grad_norm": 0.6271656596031232, "learning_rate": 4.898882956423954e-06, "loss": 0.2915, "step": 5043 }, { "epoch": 0.24, "grad_norm": 0.6313451639721498, "learning_rate": 4.898829557803737e-06, "loss": 0.2966, "step": 5044 }, { "epoch": 0.24, "grad_norm": 0.6409121664081797, "learning_rate": 4.898776145378859e-06, "loss": 0.3215, "step": 5045 }, { "epoch": 0.24, "grad_norm": 0.6107663973264055, "learning_rate": 4.898722719149628e-06, "loss": 0.2875, "step": 5046 }, { "epoch": 0.24, "grad_norm": 0.6513899425599715, "learning_rate": 4.8986692791163496e-06, "loss": 0.2883, "step": 5047 }, { "epoch": 0.24, "grad_norm": 0.6753283388247416, "learning_rate": 4.898615825279333e-06, "loss": 0.2954, "step": 5048 }, { "epoch": 0.24, "grad_norm": 0.7175896333677413, "learning_rate": 4.8985623576388845e-06, "loss": 0.2763, "step": 5049 }, { "epoch": 0.24, "grad_norm": 0.7035515144489924, "learning_rate": 4.8985088761953125e-06, "loss": 0.2982, "step": 5050 }, { "epoch": 0.24, "grad_norm": 0.5912188436815574, "learning_rate": 4.898455380948925e-06, "loss": 0.2935, "step": 5051 }, { "epoch": 0.24, "grad_norm": 0.6309470618689158, "learning_rate": 4.898401871900029e-06, "loss": 0.312, "step": 5052 }, { "epoch": 0.24, "grad_norm": 0.677097514735883, "learning_rate": 4.898348349048934e-06, "loss": 0.3092, "step": 5053 }, { "epoch": 0.24, "grad_norm": 0.6368769503177941, "learning_rate": 4.898294812395948e-06, "loss": 0.3105, "step": 5054 }, { "epoch": 0.24, "grad_norm": 0.6603168148127505, "learning_rate": 4.898241261941375e-06, "loss": 0.2824, "step": 5055 }, { "epoch": 0.24, "grad_norm": 0.6595891631188412, "learning_rate": 4.898187697685529e-06, "loss": 0.3231, "step": 5056 }, { "epoch": 0.24, "grad_norm": 0.6345040134248771, "learning_rate": 4.898134119628715e-06, "loss": 0.2867, "step": 5057 }, { "epoch": 0.24, "grad_norm": 0.6189662937414572, "learning_rate": 4.898080527771242e-06, "loss": 0.2932, "step": 5058 }, { "epoch": 0.24, "grad_norm": 0.6511008141253832, "learning_rate": 4.898026922113417e-06, "loss": 0.2866, "step": 5059 }, { "epoch": 0.24, "grad_norm": 0.653905534332155, "learning_rate": 4.897973302655551e-06, "loss": 0.3027, "step": 5060 }, { "epoch": 0.24, "grad_norm": 0.6401300604207992, "learning_rate": 4.8979196693979516e-06, "loss": 0.323, "step": 5061 }, { "epoch": 0.24, "grad_norm": 0.6008358003560047, "learning_rate": 4.897866022340927e-06, "loss": 0.2819, "step": 5062 }, { "epoch": 0.24, "grad_norm": 0.6433789032981104, "learning_rate": 4.897812361484785e-06, "loss": 0.3142, "step": 5063 }, { "epoch": 0.24, "grad_norm": 0.6973915632070986, "learning_rate": 4.897758686829837e-06, "loss": 0.3275, "step": 5064 }, { "epoch": 0.24, "grad_norm": 0.617742613858665, "learning_rate": 4.8977049983763895e-06, "loss": 0.3057, "step": 5065 }, { "epoch": 0.24, "grad_norm": 0.6622018311798163, "learning_rate": 4.897651296124753e-06, "loss": 0.2871, "step": 5066 }, { "epoch": 0.24, "grad_norm": 0.6652622238184539, "learning_rate": 4.897597580075235e-06, "loss": 0.2868, "step": 5067 }, { "epoch": 0.24, "grad_norm": 0.662603781450254, "learning_rate": 4.897543850228146e-06, "loss": 0.3032, "step": 5068 }, { "epoch": 0.24, "grad_norm": 0.6210686640257527, "learning_rate": 4.897490106583795e-06, "loss": 0.2954, "step": 5069 }, { "epoch": 0.24, "grad_norm": 0.6486448718651182, "learning_rate": 4.897436349142491e-06, "loss": 0.2969, "step": 5070 }, { "epoch": 0.24, "grad_norm": 0.640897905210256, "learning_rate": 4.897382577904544e-06, "loss": 0.2859, "step": 5071 }, { "epoch": 0.24, "grad_norm": 0.7584300601717701, "learning_rate": 4.897328792870261e-06, "loss": 0.3152, "step": 5072 }, { "epoch": 0.24, "grad_norm": 0.6813967595585443, "learning_rate": 4.897274994039955e-06, "loss": 0.3102, "step": 5073 }, { "epoch": 0.24, "grad_norm": 0.6242204379919696, "learning_rate": 4.897221181413933e-06, "loss": 0.3021, "step": 5074 }, { "epoch": 0.24, "grad_norm": 0.6785368830888221, "learning_rate": 4.897167354992506e-06, "loss": 0.2909, "step": 5075 }, { "epoch": 0.24, "grad_norm": 0.6555137677173287, "learning_rate": 4.897113514775984e-06, "loss": 0.3163, "step": 5076 }, { "epoch": 0.24, "grad_norm": 0.6570980650289568, "learning_rate": 4.897059660764675e-06, "loss": 0.2948, "step": 5077 }, { "epoch": 0.24, "grad_norm": 0.6339619680291116, "learning_rate": 4.897005792958891e-06, "loss": 0.3048, "step": 5078 }, { "epoch": 0.24, "grad_norm": 0.6479833814513009, "learning_rate": 4.89695191135894e-06, "loss": 0.289, "step": 5079 }, { "epoch": 0.24, "grad_norm": 0.6680960886843692, "learning_rate": 4.8968980159651345e-06, "loss": 0.3064, "step": 5080 }, { "epoch": 0.24, "grad_norm": 0.6188987829331495, "learning_rate": 4.896844106777783e-06, "loss": 0.2893, "step": 5081 }, { "epoch": 0.24, "grad_norm": 0.7000668073661758, "learning_rate": 4.896790183797196e-06, "loss": 0.3028, "step": 5082 }, { "epoch": 0.24, "grad_norm": 0.6830898333725892, "learning_rate": 4.896736247023684e-06, "loss": 0.2917, "step": 5083 }, { "epoch": 0.24, "grad_norm": 0.6143374874280696, "learning_rate": 4.896682296457556e-06, "loss": 0.2957, "step": 5084 }, { "epoch": 0.24, "grad_norm": 0.6417410729411349, "learning_rate": 4.896628332099126e-06, "loss": 0.287, "step": 5085 }, { "epoch": 0.24, "grad_norm": 0.6586577962995818, "learning_rate": 4.896574353948701e-06, "loss": 0.3094, "step": 5086 }, { "epoch": 0.24, "grad_norm": 0.6972699846971214, "learning_rate": 4.896520362006593e-06, "loss": 0.3121, "step": 5087 }, { "epoch": 0.24, "grad_norm": 0.6442602917631615, "learning_rate": 4.896466356273113e-06, "loss": 0.2852, "step": 5088 }, { "epoch": 0.24, "grad_norm": 0.6276814004169056, "learning_rate": 4.896412336748571e-06, "loss": 0.3071, "step": 5089 }, { "epoch": 0.24, "grad_norm": 0.6663995435918733, "learning_rate": 4.89635830343328e-06, "loss": 0.3187, "step": 5090 }, { "epoch": 0.24, "grad_norm": 0.6536103027357174, "learning_rate": 4.896304256327547e-06, "loss": 0.3133, "step": 5091 }, { "epoch": 0.24, "grad_norm": 0.6401392248647356, "learning_rate": 4.896250195431687e-06, "loss": 0.3014, "step": 5092 }, { "epoch": 0.24, "grad_norm": 0.6797762432694613, "learning_rate": 4.896196120746008e-06, "loss": 0.3258, "step": 5093 }, { "epoch": 0.24, "grad_norm": 0.6875681118065124, "learning_rate": 4.896142032270823e-06, "loss": 0.3162, "step": 5094 }, { "epoch": 0.24, "grad_norm": 0.6854812814927574, "learning_rate": 4.896087930006444e-06, "loss": 0.3033, "step": 5095 }, { "epoch": 0.24, "grad_norm": 0.6953842647435576, "learning_rate": 4.89603381395318e-06, "loss": 0.3152, "step": 5096 }, { "epoch": 0.24, "grad_norm": 0.705910224727134, "learning_rate": 4.895979684111343e-06, "loss": 0.3101, "step": 5097 }, { "epoch": 0.24, "grad_norm": 0.6606591755971951, "learning_rate": 4.895925540481246e-06, "loss": 0.3038, "step": 5098 }, { "epoch": 0.24, "grad_norm": 0.6595134309465683, "learning_rate": 4.8958713830632e-06, "loss": 0.3117, "step": 5099 }, { "epoch": 0.24, "grad_norm": 0.6389769203013871, "learning_rate": 4.895817211857516e-06, "loss": 0.2816, "step": 5100 }, { "epoch": 0.24, "grad_norm": 0.64211785302623, "learning_rate": 4.8957630268645065e-06, "loss": 0.3115, "step": 5101 }, { "epoch": 0.24, "grad_norm": 0.697420598792953, "learning_rate": 4.895708828084482e-06, "loss": 0.2881, "step": 5102 }, { "epoch": 0.24, "grad_norm": 0.6222271371457464, "learning_rate": 4.895654615517756e-06, "loss": 0.2841, "step": 5103 }, { "epoch": 0.24, "grad_norm": 0.641484685903683, "learning_rate": 4.89560038916464e-06, "loss": 0.3098, "step": 5104 }, { "epoch": 0.24, "grad_norm": 0.6554669889172927, "learning_rate": 4.895546149025445e-06, "loss": 0.2992, "step": 5105 }, { "epoch": 0.24, "grad_norm": 0.6748180724818486, "learning_rate": 4.895491895100485e-06, "loss": 0.3018, "step": 5106 }, { "epoch": 0.24, "grad_norm": 0.5965964914341664, "learning_rate": 4.8954376273900705e-06, "loss": 0.2943, "step": 5107 }, { "epoch": 0.24, "grad_norm": 0.6385450806258893, "learning_rate": 4.895383345894516e-06, "loss": 0.3019, "step": 5108 }, { "epoch": 0.24, "grad_norm": 0.6413599108263112, "learning_rate": 4.895329050614131e-06, "loss": 0.2901, "step": 5109 }, { "epoch": 0.24, "grad_norm": 0.6444634566107219, "learning_rate": 4.895274741549229e-06, "loss": 0.3166, "step": 5110 }, { "epoch": 0.24, "grad_norm": 0.6291651549917138, "learning_rate": 4.895220418700124e-06, "loss": 0.3029, "step": 5111 }, { "epoch": 0.24, "grad_norm": 0.6586349362773792, "learning_rate": 4.895166082067126e-06, "loss": 0.3166, "step": 5112 }, { "epoch": 0.24, "grad_norm": 0.6904606813745607, "learning_rate": 4.895111731650551e-06, "loss": 0.3237, "step": 5113 }, { "epoch": 0.24, "grad_norm": 0.7036319932909176, "learning_rate": 4.895057367450709e-06, "loss": 0.3259, "step": 5114 }, { "epoch": 0.24, "grad_norm": 0.728216992328819, "learning_rate": 4.895002989467914e-06, "loss": 0.3034, "step": 5115 }, { "epoch": 0.24, "grad_norm": 0.6167570258375489, "learning_rate": 4.8949485977024795e-06, "loss": 0.2958, "step": 5116 }, { "epoch": 0.24, "grad_norm": 0.681800630928544, "learning_rate": 4.894894192154717e-06, "loss": 0.3007, "step": 5117 }, { "epoch": 0.24, "grad_norm": 0.5745245752153533, "learning_rate": 4.89483977282494e-06, "loss": 0.2876, "step": 5118 }, { "epoch": 0.24, "grad_norm": 0.6402130086786343, "learning_rate": 4.894785339713462e-06, "loss": 0.3212, "step": 5119 }, { "epoch": 0.24, "grad_norm": 0.6152997286819488, "learning_rate": 4.894730892820598e-06, "loss": 0.3152, "step": 5120 }, { "epoch": 0.24, "grad_norm": 0.6240856471736022, "learning_rate": 4.894676432146658e-06, "loss": 0.284, "step": 5121 }, { "epoch": 0.24, "grad_norm": 0.6073781365456631, "learning_rate": 4.894621957691957e-06, "loss": 0.3002, "step": 5122 }, { "epoch": 0.24, "grad_norm": 0.6403564755760239, "learning_rate": 4.894567469456808e-06, "loss": 0.3194, "step": 5123 }, { "epoch": 0.24, "grad_norm": 0.6198252121375961, "learning_rate": 4.8945129674415265e-06, "loss": 0.2938, "step": 5124 }, { "epoch": 0.24, "grad_norm": 0.6414121668757555, "learning_rate": 4.894458451646425e-06, "loss": 0.3068, "step": 5125 }, { "epoch": 0.24, "grad_norm": 0.6453894347971341, "learning_rate": 4.894403922071815e-06, "loss": 0.3145, "step": 5126 }, { "epoch": 0.24, "grad_norm": 0.6161989806369667, "learning_rate": 4.894349378718014e-06, "loss": 0.2801, "step": 5127 }, { "epoch": 0.24, "grad_norm": 0.6664894846642954, "learning_rate": 4.894294821585332e-06, "loss": 0.3057, "step": 5128 }, { "epoch": 0.24, "grad_norm": 0.6130143585968736, "learning_rate": 4.894240250674087e-06, "loss": 0.2986, "step": 5129 }, { "epoch": 0.24, "grad_norm": 0.647745629566583, "learning_rate": 4.894185665984591e-06, "loss": 0.2836, "step": 5130 }, { "epoch": 0.24, "grad_norm": 0.6112524840342127, "learning_rate": 4.894131067517158e-06, "loss": 0.3064, "step": 5131 }, { "epoch": 0.24, "grad_norm": 0.6711784442228633, "learning_rate": 4.894076455272102e-06, "loss": 0.3073, "step": 5132 }, { "epoch": 0.24, "grad_norm": 0.6118411996228785, "learning_rate": 4.894021829249738e-06, "loss": 0.3083, "step": 5133 }, { "epoch": 0.24, "grad_norm": 0.6147087193432235, "learning_rate": 4.8939671894503805e-06, "loss": 0.2786, "step": 5134 }, { "epoch": 0.24, "grad_norm": 0.5827966224275695, "learning_rate": 4.893912535874343e-06, "loss": 0.2841, "step": 5135 }, { "epoch": 0.24, "grad_norm": 0.5950949299634859, "learning_rate": 4.893857868521941e-06, "loss": 0.2804, "step": 5136 }, { "epoch": 0.24, "grad_norm": 0.6186959003163575, "learning_rate": 4.893803187393488e-06, "loss": 0.2951, "step": 5137 }, { "epoch": 0.24, "grad_norm": 0.6426675747399225, "learning_rate": 4.8937484924893e-06, "loss": 0.3331, "step": 5138 }, { "epoch": 0.24, "grad_norm": 0.6436393907130512, "learning_rate": 4.89369378380969e-06, "loss": 0.2947, "step": 5139 }, { "epoch": 0.24, "grad_norm": 0.5945389920347681, "learning_rate": 4.893639061354975e-06, "loss": 0.2961, "step": 5140 }, { "epoch": 0.24, "grad_norm": 0.6434502227240558, "learning_rate": 4.893584325125468e-06, "loss": 0.313, "step": 5141 }, { "epoch": 0.24, "grad_norm": 0.6162757005088488, "learning_rate": 4.893529575121486e-06, "loss": 0.2886, "step": 5142 }, { "epoch": 0.24, "grad_norm": 0.6231214547095865, "learning_rate": 4.8934748113433414e-06, "loss": 0.2935, "step": 5143 }, { "epoch": 0.24, "grad_norm": 0.6229151735926917, "learning_rate": 4.893420033791352e-06, "loss": 0.3006, "step": 5144 }, { "epoch": 0.24, "grad_norm": 0.6958062784679158, "learning_rate": 4.893365242465832e-06, "loss": 0.3084, "step": 5145 }, { "epoch": 0.24, "grad_norm": 0.6508933110939545, "learning_rate": 4.8933104373670955e-06, "loss": 0.3112, "step": 5146 }, { "epoch": 0.24, "grad_norm": 0.6090311322164359, "learning_rate": 4.893255618495459e-06, "loss": 0.2878, "step": 5147 }, { "epoch": 0.24, "grad_norm": 0.6086594655293158, "learning_rate": 4.893200785851239e-06, "loss": 0.3037, "step": 5148 }, { "epoch": 0.24, "grad_norm": 0.6158079345780221, "learning_rate": 4.8931459394347495e-06, "loss": 0.2955, "step": 5149 }, { "epoch": 0.24, "grad_norm": 0.6749731783040199, "learning_rate": 4.893091079246306e-06, "loss": 0.3023, "step": 5150 }, { "epoch": 0.24, "grad_norm": 0.6166394134748452, "learning_rate": 4.8930362052862255e-06, "loss": 0.2841, "step": 5151 }, { "epoch": 0.24, "grad_norm": 0.6655417054818155, "learning_rate": 4.892981317554824e-06, "loss": 0.3037, "step": 5152 }, { "epoch": 0.24, "grad_norm": 0.6383945343518598, "learning_rate": 4.892926416052415e-06, "loss": 0.3031, "step": 5153 }, { "epoch": 0.24, "grad_norm": 0.6184040929894001, "learning_rate": 4.892871500779316e-06, "loss": 0.2943, "step": 5154 }, { "epoch": 0.24, "grad_norm": 0.6198516708048311, "learning_rate": 4.892816571735843e-06, "loss": 0.2957, "step": 5155 }, { "epoch": 0.24, "grad_norm": 0.6338569360051909, "learning_rate": 4.892761628922313e-06, "loss": 0.2957, "step": 5156 }, { "epoch": 0.24, "grad_norm": 0.6348738356990268, "learning_rate": 4.8927066723390404e-06, "loss": 0.301, "step": 5157 }, { "epoch": 0.24, "grad_norm": 0.5875314179739225, "learning_rate": 4.8926517019863425e-06, "loss": 0.2839, "step": 5158 }, { "epoch": 0.24, "grad_norm": 0.6452005341122407, "learning_rate": 4.892596717864535e-06, "loss": 0.3249, "step": 5159 }, { "epoch": 0.24, "grad_norm": 0.6825236817305996, "learning_rate": 4.892541719973936e-06, "loss": 0.3154, "step": 5160 }, { "epoch": 0.24, "grad_norm": 0.6492412226822253, "learning_rate": 4.89248670831486e-06, "loss": 0.3072, "step": 5161 }, { "epoch": 0.24, "grad_norm": 0.6272659917703546, "learning_rate": 4.892431682887623e-06, "loss": 0.3107, "step": 5162 }, { "epoch": 0.24, "grad_norm": 0.6718179709256632, "learning_rate": 4.892376643692544e-06, "loss": 0.315, "step": 5163 }, { "epoch": 0.24, "grad_norm": 0.6149836751939087, "learning_rate": 4.8923215907299394e-06, "loss": 0.2912, "step": 5164 }, { "epoch": 0.24, "grad_norm": 0.5840121639603745, "learning_rate": 4.892266524000125e-06, "loss": 0.2981, "step": 5165 }, { "epoch": 0.24, "grad_norm": 0.6874572265965615, "learning_rate": 4.8922114435034176e-06, "loss": 0.314, "step": 5166 }, { "epoch": 0.24, "grad_norm": 0.6434328509510293, "learning_rate": 4.892156349240135e-06, "loss": 0.2881, "step": 5167 }, { "epoch": 0.24, "grad_norm": 0.6513267615638119, "learning_rate": 4.892101241210594e-06, "loss": 0.2834, "step": 5168 }, { "epoch": 0.24, "grad_norm": 0.6701606953433286, "learning_rate": 4.892046119415111e-06, "loss": 0.3002, "step": 5169 }, { "epoch": 0.24, "grad_norm": 0.6050093867821545, "learning_rate": 4.891990983854004e-06, "loss": 0.2866, "step": 5170 }, { "epoch": 0.24, "grad_norm": 0.643949809900528, "learning_rate": 4.8919358345275904e-06, "loss": 0.3082, "step": 5171 }, { "epoch": 0.24, "grad_norm": 0.5662862388569373, "learning_rate": 4.891880671436187e-06, "loss": 0.2839, "step": 5172 }, { "epoch": 0.24, "grad_norm": 0.6169224021716748, "learning_rate": 4.8918254945801115e-06, "loss": 0.2891, "step": 5173 }, { "epoch": 0.24, "grad_norm": 0.6837994786713489, "learning_rate": 4.891770303959681e-06, "loss": 0.3019, "step": 5174 }, { "epoch": 0.24, "grad_norm": 0.5736452495853722, "learning_rate": 4.891715099575215e-06, "loss": 0.265, "step": 5175 }, { "epoch": 0.24, "grad_norm": 0.620283345887727, "learning_rate": 4.891659881427028e-06, "loss": 0.2872, "step": 5176 }, { "epoch": 0.24, "grad_norm": 0.6597608762003112, "learning_rate": 4.891604649515441e-06, "loss": 0.3035, "step": 5177 }, { "epoch": 0.24, "grad_norm": 0.635700588354394, "learning_rate": 4.891549403840769e-06, "loss": 0.2781, "step": 5178 }, { "epoch": 0.24, "grad_norm": 0.5958686572091743, "learning_rate": 4.891494144403333e-06, "loss": 0.2858, "step": 5179 }, { "epoch": 0.24, "grad_norm": 0.623559168860812, "learning_rate": 4.8914388712034475e-06, "loss": 0.2926, "step": 5180 }, { "epoch": 0.24, "grad_norm": 0.7036834816619151, "learning_rate": 4.891383584241433e-06, "loss": 0.3208, "step": 5181 }, { "epoch": 0.24, "grad_norm": 0.6577748096273982, "learning_rate": 4.891328283517607e-06, "loss": 0.2919, "step": 5182 }, { "epoch": 0.24, "grad_norm": 0.6686081430065548, "learning_rate": 4.891272969032288e-06, "loss": 0.3062, "step": 5183 }, { "epoch": 0.24, "grad_norm": 0.6789512008074047, "learning_rate": 4.891217640785794e-06, "loss": 0.3339, "step": 5184 }, { "epoch": 0.24, "grad_norm": 0.6174946193730139, "learning_rate": 4.891162298778444e-06, "loss": 0.2927, "step": 5185 }, { "epoch": 0.24, "grad_norm": 0.6647465844595235, "learning_rate": 4.891106943010555e-06, "loss": 0.3056, "step": 5186 }, { "epoch": 0.24, "grad_norm": 0.6274109789726052, "learning_rate": 4.891051573482446e-06, "loss": 0.2834, "step": 5187 }, { "epoch": 0.24, "grad_norm": 0.6359566988437559, "learning_rate": 4.8909961901944375e-06, "loss": 0.2928, "step": 5188 }, { "epoch": 0.24, "grad_norm": 0.5975628766504468, "learning_rate": 4.890940793146847e-06, "loss": 0.3053, "step": 5189 }, { "epoch": 0.24, "grad_norm": 0.6249447777471269, "learning_rate": 4.890885382339992e-06, "loss": 0.272, "step": 5190 }, { "epoch": 0.24, "grad_norm": 0.6573086309737979, "learning_rate": 4.890829957774193e-06, "loss": 0.324, "step": 5191 }, { "epoch": 0.24, "grad_norm": 0.6859583697943874, "learning_rate": 4.890774519449769e-06, "loss": 0.3124, "step": 5192 }, { "epoch": 0.24, "grad_norm": 0.65674448319712, "learning_rate": 4.890719067367038e-06, "loss": 0.3157, "step": 5193 }, { "epoch": 0.24, "grad_norm": 0.6076501074392219, "learning_rate": 4.89066360152632e-06, "loss": 0.2954, "step": 5194 }, { "epoch": 0.24, "grad_norm": 0.6159284477790362, "learning_rate": 4.890608121927934e-06, "loss": 0.2894, "step": 5195 }, { "epoch": 0.24, "grad_norm": 0.6858716533885446, "learning_rate": 4.890552628572199e-06, "loss": 0.3306, "step": 5196 }, { "epoch": 0.24, "grad_norm": 0.6981071652972534, "learning_rate": 4.890497121459434e-06, "loss": 0.299, "step": 5197 }, { "epoch": 0.24, "grad_norm": 0.6612112874530105, "learning_rate": 4.890441600589959e-06, "loss": 0.2918, "step": 5198 }, { "epoch": 0.24, "grad_norm": 0.6274839715689987, "learning_rate": 4.890386065964094e-06, "loss": 0.2925, "step": 5199 }, { "epoch": 0.24, "grad_norm": 0.6285452747146265, "learning_rate": 4.890330517582157e-06, "loss": 0.2991, "step": 5200 }, { "epoch": 0.24, "grad_norm": 0.6276451686951531, "learning_rate": 4.89027495544447e-06, "loss": 0.2643, "step": 5201 }, { "epoch": 0.24, "grad_norm": 0.6016138360047912, "learning_rate": 4.89021937955135e-06, "loss": 0.3017, "step": 5202 }, { "epoch": 0.24, "grad_norm": 0.6412917803010806, "learning_rate": 4.890163789903119e-06, "loss": 0.3153, "step": 5203 }, { "epoch": 0.24, "grad_norm": 0.6350377171921265, "learning_rate": 4.8901081865000965e-06, "loss": 0.3067, "step": 5204 }, { "epoch": 0.24, "grad_norm": 0.6503203962168622, "learning_rate": 4.890052569342601e-06, "loss": 0.3167, "step": 5205 }, { "epoch": 0.24, "grad_norm": 0.6337113407239329, "learning_rate": 4.889996938430955e-06, "loss": 0.2999, "step": 5206 }, { "epoch": 0.24, "grad_norm": 0.6108623088253532, "learning_rate": 4.8899412937654765e-06, "loss": 0.3122, "step": 5207 }, { "epoch": 0.24, "grad_norm": 0.6545560796700736, "learning_rate": 4.8898856353464865e-06, "loss": 0.2874, "step": 5208 }, { "epoch": 0.24, "grad_norm": 0.6151821444131818, "learning_rate": 4.8898299631743055e-06, "loss": 0.3156, "step": 5209 }, { "epoch": 0.24, "grad_norm": 0.6704974877788645, "learning_rate": 4.889774277249254e-06, "loss": 0.3125, "step": 5210 }, { "epoch": 0.24, "grad_norm": 0.6262538174531066, "learning_rate": 4.8897185775716514e-06, "loss": 0.3152, "step": 5211 }, { "epoch": 0.24, "grad_norm": 0.5841833507179075, "learning_rate": 4.8896628641418195e-06, "loss": 0.2856, "step": 5212 }, { "epoch": 0.24, "grad_norm": 0.6187609480734129, "learning_rate": 4.889607136960079e-06, "loss": 0.2994, "step": 5213 }, { "epoch": 0.24, "grad_norm": 0.669415339517152, "learning_rate": 4.889551396026749e-06, "loss": 0.2954, "step": 5214 }, { "epoch": 0.24, "grad_norm": 0.6473317685627805, "learning_rate": 4.889495641342152e-06, "loss": 0.3173, "step": 5215 }, { "epoch": 0.24, "grad_norm": 0.6381391843445409, "learning_rate": 4.889439872906608e-06, "loss": 0.3241, "step": 5216 }, { "epoch": 0.24, "grad_norm": 0.6239271153187887, "learning_rate": 4.889384090720438e-06, "loss": 0.2891, "step": 5217 }, { "epoch": 0.24, "grad_norm": 0.6769334339098761, "learning_rate": 4.889328294783964e-06, "loss": 0.3067, "step": 5218 }, { "epoch": 0.24, "grad_norm": 0.6181728119556958, "learning_rate": 4.8892724850975045e-06, "loss": 0.3098, "step": 5219 }, { "epoch": 0.24, "grad_norm": 0.6602706240393948, "learning_rate": 4.8892166616613836e-06, "loss": 0.3135, "step": 5220 }, { "epoch": 0.24, "grad_norm": 0.6386969755551418, "learning_rate": 4.889160824475921e-06, "loss": 0.2843, "step": 5221 }, { "epoch": 0.24, "grad_norm": 0.6598875842131721, "learning_rate": 4.889104973541438e-06, "loss": 0.2869, "step": 5222 }, { "epoch": 0.24, "grad_norm": 0.6992082215850818, "learning_rate": 4.889049108858257e-06, "loss": 0.3178, "step": 5223 }, { "epoch": 0.24, "grad_norm": 0.703927427371972, "learning_rate": 4.888993230426698e-06, "loss": 0.3003, "step": 5224 }, { "epoch": 0.24, "grad_norm": 0.6334654281696623, "learning_rate": 4.888937338247084e-06, "loss": 0.2725, "step": 5225 }, { "epoch": 0.24, "grad_norm": 0.6065220222016624, "learning_rate": 4.888881432319737e-06, "loss": 0.2801, "step": 5226 }, { "epoch": 0.24, "grad_norm": 0.6039612118231689, "learning_rate": 4.8888255126449765e-06, "loss": 0.2953, "step": 5227 }, { "epoch": 0.24, "grad_norm": 0.5675205500148867, "learning_rate": 4.888769579223126e-06, "loss": 0.2776, "step": 5228 }, { "epoch": 0.24, "grad_norm": 0.6687707211770961, "learning_rate": 4.888713632054506e-06, "loss": 0.2938, "step": 5229 }, { "epoch": 0.24, "grad_norm": 0.6412091521694604, "learning_rate": 4.888657671139441e-06, "loss": 0.2985, "step": 5230 }, { "epoch": 0.25, "grad_norm": 0.5875540145944776, "learning_rate": 4.88860169647825e-06, "loss": 0.2962, "step": 5231 }, { "epoch": 0.25, "grad_norm": 0.6295363426830813, "learning_rate": 4.8885457080712576e-06, "loss": 0.3125, "step": 5232 }, { "epoch": 0.25, "grad_norm": 0.6251185574448718, "learning_rate": 4.888489705918784e-06, "loss": 0.3142, "step": 5233 }, { "epoch": 0.25, "grad_norm": 0.6051264958411696, "learning_rate": 4.8884336900211535e-06, "loss": 0.2764, "step": 5234 }, { "epoch": 0.25, "grad_norm": 0.6549589022979034, "learning_rate": 4.888377660378688e-06, "loss": 0.3006, "step": 5235 }, { "epoch": 0.25, "grad_norm": 0.6509933571388234, "learning_rate": 4.888321616991708e-06, "loss": 0.3273, "step": 5236 }, { "epoch": 0.25, "grad_norm": 0.6199960632884824, "learning_rate": 4.888265559860538e-06, "loss": 0.3022, "step": 5237 }, { "epoch": 0.25, "grad_norm": 0.6318904625588456, "learning_rate": 4.888209488985499e-06, "loss": 0.3235, "step": 5238 }, { "epoch": 0.25, "grad_norm": 0.613043601148947, "learning_rate": 4.888153404366916e-06, "loss": 0.3057, "step": 5239 }, { "epoch": 0.25, "grad_norm": 0.6285381997512166, "learning_rate": 4.888097306005109e-06, "loss": 0.3008, "step": 5240 }, { "epoch": 0.25, "grad_norm": 0.6429958539000407, "learning_rate": 4.888041193900404e-06, "loss": 0.2898, "step": 5241 }, { "epoch": 0.25, "grad_norm": 0.6875889901826256, "learning_rate": 4.887985068053121e-06, "loss": 0.303, "step": 5242 }, { "epoch": 0.25, "grad_norm": 0.6502945532718232, "learning_rate": 4.887928928463585e-06, "loss": 0.3116, "step": 5243 }, { "epoch": 0.25, "grad_norm": 0.6256617347326331, "learning_rate": 4.887872775132117e-06, "loss": 0.3021, "step": 5244 }, { "epoch": 0.25, "grad_norm": 0.6242550065365551, "learning_rate": 4.887816608059042e-06, "loss": 0.2945, "step": 5245 }, { "epoch": 0.25, "grad_norm": 0.6068100260562845, "learning_rate": 4.887760427244682e-06, "loss": 0.2861, "step": 5246 }, { "epoch": 0.25, "grad_norm": 0.6486251738807932, "learning_rate": 4.887704232689362e-06, "loss": 0.3002, "step": 5247 }, { "epoch": 0.25, "grad_norm": 0.6833190584983988, "learning_rate": 4.887648024393403e-06, "loss": 0.3042, "step": 5248 }, { "epoch": 0.25, "grad_norm": 0.6579209693809229, "learning_rate": 4.88759180235713e-06, "loss": 0.3058, "step": 5249 }, { "epoch": 0.25, "grad_norm": 0.7026482161759655, "learning_rate": 4.887535566580867e-06, "loss": 0.3354, "step": 5250 }, { "epoch": 0.25, "grad_norm": 0.6131119530105485, "learning_rate": 4.887479317064937e-06, "loss": 0.301, "step": 5251 }, { "epoch": 0.25, "grad_norm": 0.5989683241676299, "learning_rate": 4.887423053809663e-06, "loss": 0.2997, "step": 5252 }, { "epoch": 0.25, "grad_norm": 0.6599198292357555, "learning_rate": 4.887366776815369e-06, "loss": 0.3089, "step": 5253 }, { "epoch": 0.25, "grad_norm": 0.6393456101852203, "learning_rate": 4.88731048608238e-06, "loss": 0.301, "step": 5254 }, { "epoch": 0.25, "grad_norm": 0.6580373191848946, "learning_rate": 4.8872541816110186e-06, "loss": 0.2845, "step": 5255 }, { "epoch": 0.25, "grad_norm": 0.6582772892483606, "learning_rate": 4.8871978634016105e-06, "loss": 0.3075, "step": 5256 }, { "epoch": 0.25, "grad_norm": 0.6091510259404116, "learning_rate": 4.887141531454478e-06, "loss": 0.3083, "step": 5257 }, { "epoch": 0.25, "grad_norm": 0.6309187943417661, "learning_rate": 4.8870851857699455e-06, "loss": 0.31, "step": 5258 }, { "epoch": 0.25, "grad_norm": 0.6166836675348699, "learning_rate": 4.887028826348338e-06, "loss": 0.3002, "step": 5259 }, { "epoch": 0.25, "grad_norm": 0.678738299044307, "learning_rate": 4.88697245318998e-06, "loss": 0.3107, "step": 5260 }, { "epoch": 0.25, "grad_norm": 0.6940895156829381, "learning_rate": 4.886916066295195e-06, "loss": 0.315, "step": 5261 }, { "epoch": 0.25, "grad_norm": 0.6483045202605507, "learning_rate": 4.886859665664308e-06, "loss": 0.3066, "step": 5262 }, { "epoch": 0.25, "grad_norm": 0.6796791561243735, "learning_rate": 4.8868032512976436e-06, "loss": 0.3152, "step": 5263 }, { "epoch": 0.25, "grad_norm": 0.6157331332165094, "learning_rate": 4.886746823195526e-06, "loss": 0.3065, "step": 5264 }, { "epoch": 0.25, "grad_norm": 0.5996085420748275, "learning_rate": 4.886690381358281e-06, "loss": 0.2906, "step": 5265 }, { "epoch": 0.25, "grad_norm": 0.6685346938785186, "learning_rate": 4.886633925786233e-06, "loss": 0.3113, "step": 5266 }, { "epoch": 0.25, "grad_norm": 0.6441215452641088, "learning_rate": 4.8865774564797056e-06, "loss": 0.287, "step": 5267 }, { "epoch": 0.25, "grad_norm": 0.655750808945052, "learning_rate": 4.886520973439026e-06, "loss": 0.3081, "step": 5268 }, { "epoch": 0.25, "grad_norm": 0.6456927645863532, "learning_rate": 4.886464476664517e-06, "loss": 0.3052, "step": 5269 }, { "epoch": 0.25, "grad_norm": 0.6064782565940878, "learning_rate": 4.886407966156505e-06, "loss": 0.2922, "step": 5270 }, { "epoch": 0.25, "grad_norm": 0.6056188836621018, "learning_rate": 4.886351441915315e-06, "loss": 0.2784, "step": 5271 }, { "epoch": 0.25, "grad_norm": 0.6698074036966454, "learning_rate": 4.886294903941272e-06, "loss": 0.3313, "step": 5272 }, { "epoch": 0.25, "grad_norm": 0.6221551738880189, "learning_rate": 4.886238352234702e-06, "loss": 0.2793, "step": 5273 }, { "epoch": 0.25, "grad_norm": 0.6312730802322105, "learning_rate": 4.88618178679593e-06, "loss": 0.2905, "step": 5274 }, { "epoch": 0.25, "grad_norm": 0.6351562010324319, "learning_rate": 4.886125207625282e-06, "loss": 0.3113, "step": 5275 }, { "epoch": 0.25, "grad_norm": 0.5818939872478521, "learning_rate": 4.886068614723082e-06, "loss": 0.287, "step": 5276 }, { "epoch": 0.25, "grad_norm": 0.6011985683382307, "learning_rate": 4.886012008089657e-06, "loss": 0.2734, "step": 5277 }, { "epoch": 0.25, "grad_norm": 0.65688309568886, "learning_rate": 4.885955387725333e-06, "loss": 0.3026, "step": 5278 }, { "epoch": 0.25, "grad_norm": 0.6671735551105278, "learning_rate": 4.885898753630436e-06, "loss": 0.3098, "step": 5279 }, { "epoch": 0.25, "grad_norm": 0.6425018045054312, "learning_rate": 4.885842105805291e-06, "loss": 0.2867, "step": 5280 }, { "epoch": 0.25, "grad_norm": 0.6429122844665603, "learning_rate": 4.885785444250224e-06, "loss": 0.3075, "step": 5281 }, { "epoch": 0.25, "grad_norm": 0.636492683046552, "learning_rate": 4.885728768965562e-06, "loss": 0.294, "step": 5282 }, { "epoch": 0.25, "grad_norm": 0.6109221330102181, "learning_rate": 4.88567207995163e-06, "loss": 0.3122, "step": 5283 }, { "epoch": 0.25, "grad_norm": 0.6770918338385654, "learning_rate": 4.885615377208754e-06, "loss": 0.296, "step": 5284 }, { "epoch": 0.25, "grad_norm": 0.6467448938621218, "learning_rate": 4.885558660737263e-06, "loss": 0.2948, "step": 5285 }, { "epoch": 0.25, "grad_norm": 0.6073956466814938, "learning_rate": 4.885501930537481e-06, "loss": 0.2756, "step": 5286 }, { "epoch": 0.25, "grad_norm": 0.6135494266081651, "learning_rate": 4.885445186609734e-06, "loss": 0.2975, "step": 5287 }, { "epoch": 0.25, "grad_norm": 0.5814888309897918, "learning_rate": 4.885388428954349e-06, "loss": 0.2767, "step": 5288 }, { "epoch": 0.25, "grad_norm": 0.6447873947824557, "learning_rate": 4.8853316575716545e-06, "loss": 0.3015, "step": 5289 }, { "epoch": 0.25, "grad_norm": 0.6479450247731127, "learning_rate": 4.885274872461976e-06, "loss": 0.3215, "step": 5290 }, { "epoch": 0.25, "grad_norm": 0.6147573645172083, "learning_rate": 4.88521807362564e-06, "loss": 0.3013, "step": 5291 }, { "epoch": 0.25, "grad_norm": 0.6805380435330799, "learning_rate": 4.885161261062973e-06, "loss": 0.3052, "step": 5292 }, { "epoch": 0.25, "grad_norm": 0.6160523758056062, "learning_rate": 4.885104434774302e-06, "loss": 0.291, "step": 5293 }, { "epoch": 0.25, "grad_norm": 0.6706590985480252, "learning_rate": 4.885047594759955e-06, "loss": 0.3023, "step": 5294 }, { "epoch": 0.25, "grad_norm": 0.6262465999275699, "learning_rate": 4.884990741020259e-06, "loss": 0.2967, "step": 5295 }, { "epoch": 0.25, "grad_norm": 0.6145977350166221, "learning_rate": 4.88493387355554e-06, "loss": 0.2829, "step": 5296 }, { "epoch": 0.25, "grad_norm": 0.6785701322145553, "learning_rate": 4.884876992366127e-06, "loss": 0.3018, "step": 5297 }, { "epoch": 0.25, "grad_norm": 0.636353476588475, "learning_rate": 4.884820097452345e-06, "loss": 0.2805, "step": 5298 }, { "epoch": 0.25, "grad_norm": 0.6191327684860397, "learning_rate": 4.884763188814523e-06, "loss": 0.2836, "step": 5299 }, { "epoch": 0.25, "grad_norm": 0.6115692107214359, "learning_rate": 4.884706266452989e-06, "loss": 0.2737, "step": 5300 }, { "epoch": 0.25, "grad_norm": 0.6714558854724313, "learning_rate": 4.8846493303680695e-06, "loss": 0.3288, "step": 5301 }, { "epoch": 0.25, "grad_norm": 0.637147308574395, "learning_rate": 4.884592380560093e-06, "loss": 0.3164, "step": 5302 }, { "epoch": 0.25, "grad_norm": 0.6191618170807413, "learning_rate": 4.884535417029385e-06, "loss": 0.288, "step": 5303 }, { "epoch": 0.25, "grad_norm": 0.6230905976517038, "learning_rate": 4.884478439776276e-06, "loss": 0.2869, "step": 5304 }, { "epoch": 0.25, "grad_norm": 0.6580709126462738, "learning_rate": 4.884421448801093e-06, "loss": 0.2969, "step": 5305 }, { "epoch": 0.25, "grad_norm": 0.602372907924338, "learning_rate": 4.884364444104163e-06, "loss": 0.2803, "step": 5306 }, { "epoch": 0.25, "grad_norm": 0.6266681906448633, "learning_rate": 4.884307425685817e-06, "loss": 0.2995, "step": 5307 }, { "epoch": 0.25, "grad_norm": 0.6646680584561576, "learning_rate": 4.884250393546379e-06, "loss": 0.294, "step": 5308 }, { "epoch": 0.25, "grad_norm": 0.5956256169655395, "learning_rate": 4.88419334768618e-06, "loss": 0.2961, "step": 5309 }, { "epoch": 0.25, "grad_norm": 0.6176442644846631, "learning_rate": 4.884136288105548e-06, "loss": 0.3045, "step": 5310 }, { "epoch": 0.25, "grad_norm": 0.6573641654618249, "learning_rate": 4.884079214804811e-06, "loss": 0.2747, "step": 5311 }, { "epoch": 0.25, "grad_norm": 0.6656696442490575, "learning_rate": 4.884022127784296e-06, "loss": 0.3216, "step": 5312 }, { "epoch": 0.25, "grad_norm": 0.6415780250285189, "learning_rate": 4.883965027044334e-06, "loss": 0.2919, "step": 5313 }, { "epoch": 0.25, "grad_norm": 0.5947516911952767, "learning_rate": 4.883907912585252e-06, "loss": 0.2888, "step": 5314 }, { "epoch": 0.25, "grad_norm": 0.6316358912850435, "learning_rate": 4.883850784407379e-06, "loss": 0.2994, "step": 5315 }, { "epoch": 0.25, "grad_norm": 0.6402680620396806, "learning_rate": 4.883793642511045e-06, "loss": 0.304, "step": 5316 }, { "epoch": 0.25, "grad_norm": 0.6119352669147557, "learning_rate": 4.883736486896576e-06, "loss": 0.278, "step": 5317 }, { "epoch": 0.25, "grad_norm": 0.6487446281662871, "learning_rate": 4.883679317564304e-06, "loss": 0.3248, "step": 5318 }, { "epoch": 0.25, "grad_norm": 0.6208988965524863, "learning_rate": 4.883622134514556e-06, "loss": 0.2824, "step": 5319 }, { "epoch": 0.25, "grad_norm": 0.6581250769222275, "learning_rate": 4.883564937747661e-06, "loss": 0.3313, "step": 5320 }, { "epoch": 0.25, "grad_norm": 0.6764493754992806, "learning_rate": 4.8835077272639495e-06, "loss": 0.3101, "step": 5321 }, { "epoch": 0.25, "grad_norm": 0.6190533055414508, "learning_rate": 4.883450503063751e-06, "loss": 0.296, "step": 5322 }, { "epoch": 0.25, "grad_norm": 0.6336743072776989, "learning_rate": 4.883393265147393e-06, "loss": 0.2933, "step": 5323 }, { "epoch": 0.25, "grad_norm": 0.6251373323528577, "learning_rate": 4.8833360135152065e-06, "loss": 0.2996, "step": 5324 }, { "epoch": 0.25, "grad_norm": 0.656540867914268, "learning_rate": 4.883278748167519e-06, "loss": 0.3027, "step": 5325 }, { "epoch": 0.25, "grad_norm": 0.6155764457373823, "learning_rate": 4.883221469104663e-06, "loss": 0.2951, "step": 5326 }, { "epoch": 0.25, "grad_norm": 0.6335328030193575, "learning_rate": 4.8831641763269655e-06, "loss": 0.2899, "step": 5327 }, { "epoch": 0.25, "grad_norm": 0.6964914385543854, "learning_rate": 4.883106869834757e-06, "loss": 0.2991, "step": 5328 }, { "epoch": 0.25, "grad_norm": 0.5885984666412658, "learning_rate": 4.883049549628368e-06, "loss": 0.278, "step": 5329 }, { "epoch": 0.25, "grad_norm": 0.6801394954329106, "learning_rate": 4.882992215708126e-06, "loss": 0.3033, "step": 5330 }, { "epoch": 0.25, "grad_norm": 0.751632568889787, "learning_rate": 4.8829348680743646e-06, "loss": 0.3176, "step": 5331 }, { "epoch": 0.25, "grad_norm": 0.7202380245798502, "learning_rate": 4.882877506727412e-06, "loss": 0.2878, "step": 5332 }, { "epoch": 0.25, "grad_norm": 0.6385300874047006, "learning_rate": 4.882820131667598e-06, "loss": 0.286, "step": 5333 }, { "epoch": 0.25, "grad_norm": 0.6245977018765678, "learning_rate": 4.882762742895253e-06, "loss": 0.3156, "step": 5334 }, { "epoch": 0.25, "grad_norm": 0.6506711510176735, "learning_rate": 4.882705340410707e-06, "loss": 0.3092, "step": 5335 }, { "epoch": 0.25, "grad_norm": 0.6601509990072474, "learning_rate": 4.88264792421429e-06, "loss": 0.2912, "step": 5336 }, { "epoch": 0.25, "grad_norm": 0.7123595456848982, "learning_rate": 4.882590494306334e-06, "loss": 0.3119, "step": 5337 }, { "epoch": 0.25, "grad_norm": 0.6802503816914424, "learning_rate": 4.882533050687169e-06, "loss": 0.2841, "step": 5338 }, { "epoch": 0.25, "grad_norm": 0.6676760020968481, "learning_rate": 4.8824755933571245e-06, "loss": 0.3159, "step": 5339 }, { "epoch": 0.25, "grad_norm": 0.5793452056982231, "learning_rate": 4.8824181223165315e-06, "loss": 0.2863, "step": 5340 }, { "epoch": 0.25, "grad_norm": 0.6523599974808035, "learning_rate": 4.882360637565722e-06, "loss": 0.2847, "step": 5341 }, { "epoch": 0.25, "grad_norm": 0.6530847180945774, "learning_rate": 4.882303139105025e-06, "loss": 0.2995, "step": 5342 }, { "epoch": 0.25, "grad_norm": 0.6611390678448507, "learning_rate": 4.882245626934772e-06, "loss": 0.3145, "step": 5343 }, { "epoch": 0.25, "grad_norm": 0.6041994195204708, "learning_rate": 4.882188101055294e-06, "loss": 0.2971, "step": 5344 }, { "epoch": 0.25, "grad_norm": 0.6273784140265756, "learning_rate": 4.882130561466923e-06, "loss": 0.2798, "step": 5345 }, { "epoch": 0.25, "grad_norm": 0.6929706782931321, "learning_rate": 4.882073008169988e-06, "loss": 0.312, "step": 5346 }, { "epoch": 0.25, "grad_norm": 0.6351430131732594, "learning_rate": 4.882015441164824e-06, "loss": 0.2951, "step": 5347 }, { "epoch": 0.25, "grad_norm": 0.615931745347138, "learning_rate": 4.881957860451758e-06, "loss": 0.2976, "step": 5348 }, { "epoch": 0.25, "grad_norm": 0.6303805940425748, "learning_rate": 4.881900266031123e-06, "loss": 0.2959, "step": 5349 }, { "epoch": 0.25, "grad_norm": 0.6744877076089193, "learning_rate": 4.881842657903252e-06, "loss": 0.307, "step": 5350 }, { "epoch": 0.25, "grad_norm": 0.6597655461028271, "learning_rate": 4.881785036068474e-06, "loss": 0.2931, "step": 5351 }, { "epoch": 0.25, "grad_norm": 0.6040964847792819, "learning_rate": 4.881727400527122e-06, "loss": 0.2842, "step": 5352 }, { "epoch": 0.25, "grad_norm": 0.6450471071945794, "learning_rate": 4.881669751279528e-06, "loss": 0.2984, "step": 5353 }, { "epoch": 0.25, "grad_norm": 0.5669965140183295, "learning_rate": 4.881612088326023e-06, "loss": 0.2707, "step": 5354 }, { "epoch": 0.25, "grad_norm": 0.6253841505245376, "learning_rate": 4.881554411666939e-06, "loss": 0.318, "step": 5355 }, { "epoch": 0.25, "grad_norm": 0.6666246441489277, "learning_rate": 4.881496721302608e-06, "loss": 0.3125, "step": 5356 }, { "epoch": 0.25, "grad_norm": 0.7497169682689956, "learning_rate": 4.881439017233362e-06, "loss": 0.3239, "step": 5357 }, { "epoch": 0.25, "grad_norm": 0.6292632007738315, "learning_rate": 4.881381299459532e-06, "loss": 0.2864, "step": 5358 }, { "epoch": 0.25, "grad_norm": 0.600569547835337, "learning_rate": 4.881323567981452e-06, "loss": 0.2802, "step": 5359 }, { "epoch": 0.25, "grad_norm": 0.6646873188329938, "learning_rate": 4.881265822799453e-06, "loss": 0.3104, "step": 5360 }, { "epoch": 0.25, "grad_norm": 0.6904459108310202, "learning_rate": 4.881208063913868e-06, "loss": 0.3138, "step": 5361 }, { "epoch": 0.25, "grad_norm": 0.6398285457460082, "learning_rate": 4.881150291325029e-06, "loss": 0.2831, "step": 5362 }, { "epoch": 0.25, "grad_norm": 0.7222022091946274, "learning_rate": 4.881092505033268e-06, "loss": 0.3189, "step": 5363 }, { "epoch": 0.25, "grad_norm": 0.6813360167890108, "learning_rate": 4.88103470503892e-06, "loss": 0.316, "step": 5364 }, { "epoch": 0.25, "grad_norm": 0.6200508227355667, "learning_rate": 4.880976891342314e-06, "loss": 0.2754, "step": 5365 }, { "epoch": 0.25, "grad_norm": 0.5730716786354303, "learning_rate": 4.880919063943784e-06, "loss": 0.269, "step": 5366 }, { "epoch": 0.25, "grad_norm": 0.6870096233766069, "learning_rate": 4.880861222843665e-06, "loss": 0.312, "step": 5367 }, { "epoch": 0.25, "grad_norm": 0.6991962473061362, "learning_rate": 4.880803368042286e-06, "loss": 0.2834, "step": 5368 }, { "epoch": 0.25, "grad_norm": 0.7397000083236708, "learning_rate": 4.880745499539984e-06, "loss": 0.3103, "step": 5369 }, { "epoch": 0.25, "grad_norm": 0.7018616838054869, "learning_rate": 4.880687617337089e-06, "loss": 0.2862, "step": 5370 }, { "epoch": 0.25, "grad_norm": 0.641858620608206, "learning_rate": 4.880629721433935e-06, "loss": 0.2991, "step": 5371 }, { "epoch": 0.25, "grad_norm": 0.7185555795772554, "learning_rate": 4.880571811830855e-06, "loss": 0.316, "step": 5372 }, { "epoch": 0.25, "grad_norm": 0.6644213196544215, "learning_rate": 4.880513888528184e-06, "loss": 0.2852, "step": 5373 }, { "epoch": 0.25, "grad_norm": 0.6689240372855696, "learning_rate": 4.880455951526253e-06, "loss": 0.2961, "step": 5374 }, { "epoch": 0.25, "grad_norm": 0.684090071401504, "learning_rate": 4.880398000825396e-06, "loss": 0.3197, "step": 5375 }, { "epoch": 0.25, "grad_norm": 0.6614664340715655, "learning_rate": 4.880340036425947e-06, "loss": 0.2917, "step": 5376 }, { "epoch": 0.25, "grad_norm": 0.6073679691903908, "learning_rate": 4.880282058328239e-06, "loss": 0.3017, "step": 5377 }, { "epoch": 0.25, "grad_norm": 0.6383062647777124, "learning_rate": 4.880224066532608e-06, "loss": 0.3154, "step": 5378 }, { "epoch": 0.25, "grad_norm": 0.6160292866678484, "learning_rate": 4.8801660610393835e-06, "loss": 0.2935, "step": 5379 }, { "epoch": 0.25, "grad_norm": 0.682178296146495, "learning_rate": 4.880108041848902e-06, "loss": 0.2962, "step": 5380 }, { "epoch": 0.25, "grad_norm": 0.6896805872521841, "learning_rate": 4.880050008961498e-06, "loss": 0.3147, "step": 5381 }, { "epoch": 0.25, "grad_norm": 0.652713441426731, "learning_rate": 4.879991962377503e-06, "loss": 0.3013, "step": 5382 }, { "epoch": 0.25, "grad_norm": 0.6526297820184668, "learning_rate": 4.879933902097254e-06, "loss": 0.3, "step": 5383 }, { "epoch": 0.25, "grad_norm": 0.6865510736237388, "learning_rate": 4.879875828121082e-06, "loss": 0.3107, "step": 5384 }, { "epoch": 0.25, "grad_norm": 0.6243362803674456, "learning_rate": 4.879817740449324e-06, "loss": 0.2922, "step": 5385 }, { "epoch": 0.25, "grad_norm": 0.6531656010669502, "learning_rate": 4.879759639082312e-06, "loss": 0.2952, "step": 5386 }, { "epoch": 0.25, "grad_norm": 0.7347146687337638, "learning_rate": 4.879701524020381e-06, "loss": 0.3242, "step": 5387 }, { "epoch": 0.25, "grad_norm": 0.6494395060067831, "learning_rate": 4.8796433952638675e-06, "loss": 0.3014, "step": 5388 }, { "epoch": 0.25, "grad_norm": 0.6662312509995179, "learning_rate": 4.8795852528131035e-06, "loss": 0.3005, "step": 5389 }, { "epoch": 0.25, "grad_norm": 0.6018480054310483, "learning_rate": 4.879527096668425e-06, "loss": 0.2891, "step": 5390 }, { "epoch": 0.25, "grad_norm": 0.6455740357510124, "learning_rate": 4.879468926830166e-06, "loss": 0.2982, "step": 5391 }, { "epoch": 0.25, "grad_norm": 0.613654190867462, "learning_rate": 4.87941074329866e-06, "loss": 0.303, "step": 5392 }, { "epoch": 0.25, "grad_norm": 0.6659548343037033, "learning_rate": 4.879352546074245e-06, "loss": 0.3041, "step": 5393 }, { "epoch": 0.25, "grad_norm": 0.6516343320971907, "learning_rate": 4.879294335157253e-06, "loss": 0.297, "step": 5394 }, { "epoch": 0.25, "grad_norm": 0.6734032997797581, "learning_rate": 4.87923611054802e-06, "loss": 0.2826, "step": 5395 }, { "epoch": 0.25, "grad_norm": 0.5830542665504428, "learning_rate": 4.879177872246882e-06, "loss": 0.2847, "step": 5396 }, { "epoch": 0.25, "grad_norm": 0.696255009741275, "learning_rate": 4.8791196202541734e-06, "loss": 0.3144, "step": 5397 }, { "epoch": 0.25, "grad_norm": 0.6030249921816235, "learning_rate": 4.879061354570229e-06, "loss": 0.2943, "step": 5398 }, { "epoch": 0.25, "grad_norm": 0.7111731192760863, "learning_rate": 4.879003075195385e-06, "loss": 0.3234, "step": 5399 }, { "epoch": 0.25, "grad_norm": 0.6511049181851356, "learning_rate": 4.8789447821299755e-06, "loss": 0.3164, "step": 5400 }, { "epoch": 0.25, "grad_norm": 0.6358417031831467, "learning_rate": 4.878886475374336e-06, "loss": 0.293, "step": 5401 }, { "epoch": 0.25, "grad_norm": 0.6525781823378872, "learning_rate": 4.878828154928804e-06, "loss": 0.3022, "step": 5402 }, { "epoch": 0.25, "grad_norm": 0.630813315568195, "learning_rate": 4.878769820793713e-06, "loss": 0.3263, "step": 5403 }, { "epoch": 0.25, "grad_norm": 0.603694751124343, "learning_rate": 4.878711472969401e-06, "loss": 0.3028, "step": 5404 }, { "epoch": 0.25, "grad_norm": 0.6652780346565992, "learning_rate": 4.878653111456201e-06, "loss": 0.3006, "step": 5405 }, { "epoch": 0.25, "grad_norm": 0.6078488456120821, "learning_rate": 4.878594736254451e-06, "loss": 0.2821, "step": 5406 }, { "epoch": 0.25, "grad_norm": 0.6303028366398514, "learning_rate": 4.878536347364486e-06, "loss": 0.2974, "step": 5407 }, { "epoch": 0.25, "grad_norm": 0.6099722855637346, "learning_rate": 4.878477944786641e-06, "loss": 0.259, "step": 5408 }, { "epoch": 0.25, "grad_norm": 0.6290885300173407, "learning_rate": 4.878419528521254e-06, "loss": 0.2907, "step": 5409 }, { "epoch": 0.25, "grad_norm": 0.6386543649626717, "learning_rate": 4.8783610985686605e-06, "loss": 0.3148, "step": 5410 }, { "epoch": 0.25, "grad_norm": 0.5809114111412179, "learning_rate": 4.878302654929197e-06, "loss": 0.2949, "step": 5411 }, { "epoch": 0.25, "grad_norm": 0.6362736398492193, "learning_rate": 4.878244197603199e-06, "loss": 0.3068, "step": 5412 }, { "epoch": 0.25, "grad_norm": 0.6877009989755354, "learning_rate": 4.878185726591004e-06, "loss": 0.3293, "step": 5413 }, { "epoch": 0.25, "grad_norm": 0.6823498341747958, "learning_rate": 4.878127241892947e-06, "loss": 0.2951, "step": 5414 }, { "epoch": 0.25, "grad_norm": 0.6427874251611962, "learning_rate": 4.8780687435093655e-06, "loss": 0.2817, "step": 5415 }, { "epoch": 0.25, "grad_norm": 0.7249973688138142, "learning_rate": 4.878010231440596e-06, "loss": 0.3001, "step": 5416 }, { "epoch": 0.25, "grad_norm": 0.6804582851287344, "learning_rate": 4.877951705686976e-06, "loss": 0.3108, "step": 5417 }, { "epoch": 0.25, "grad_norm": 0.685579206473282, "learning_rate": 4.877893166248842e-06, "loss": 0.306, "step": 5418 }, { "epoch": 0.25, "grad_norm": 0.6880004165605332, "learning_rate": 4.87783461312653e-06, "loss": 0.3223, "step": 5419 }, { "epoch": 0.25, "grad_norm": 0.6582125326930732, "learning_rate": 4.877776046320377e-06, "loss": 0.3053, "step": 5420 }, { "epoch": 0.25, "grad_norm": 0.604052370122469, "learning_rate": 4.877717465830721e-06, "loss": 0.3042, "step": 5421 }, { "epoch": 0.25, "grad_norm": 0.6076375902544965, "learning_rate": 4.8776588716578984e-06, "loss": 0.2809, "step": 5422 }, { "epoch": 0.25, "grad_norm": 0.7206180908696247, "learning_rate": 4.877600263802246e-06, "loss": 0.3127, "step": 5423 }, { "epoch": 0.25, "grad_norm": 0.7202711203348171, "learning_rate": 4.877541642264103e-06, "loss": 0.3136, "step": 5424 }, { "epoch": 0.25, "grad_norm": 0.6042036075892007, "learning_rate": 4.877483007043804e-06, "loss": 0.3013, "step": 5425 }, { "epoch": 0.25, "grad_norm": 0.6632807696016199, "learning_rate": 4.877424358141689e-06, "loss": 0.2893, "step": 5426 }, { "epoch": 0.25, "grad_norm": 0.6307108059369505, "learning_rate": 4.877365695558094e-06, "loss": 0.2894, "step": 5427 }, { "epoch": 0.25, "grad_norm": 0.6823438608492999, "learning_rate": 4.877307019293356e-06, "loss": 0.2984, "step": 5428 }, { "epoch": 0.25, "grad_norm": 0.6648576516915208, "learning_rate": 4.877248329347815e-06, "loss": 0.3024, "step": 5429 }, { "epoch": 0.25, "grad_norm": 0.695268147067455, "learning_rate": 4.877189625721806e-06, "loss": 0.3249, "step": 5430 }, { "epoch": 0.25, "grad_norm": 0.6535111438009465, "learning_rate": 4.87713090841567e-06, "loss": 0.3086, "step": 5431 }, { "epoch": 0.25, "grad_norm": 0.6506234379060687, "learning_rate": 4.877072177429742e-06, "loss": 0.3185, "step": 5432 }, { "epoch": 0.25, "grad_norm": 0.6307723067488354, "learning_rate": 4.877013432764361e-06, "loss": 0.2909, "step": 5433 }, { "epoch": 0.25, "grad_norm": 0.609097424961254, "learning_rate": 4.8769546744198655e-06, "loss": 0.3021, "step": 5434 }, { "epoch": 0.25, "grad_norm": 0.6110833068102284, "learning_rate": 4.876895902396593e-06, "loss": 0.2862, "step": 5435 }, { "epoch": 0.25, "grad_norm": 0.6690522070357867, "learning_rate": 4.876837116694882e-06, "loss": 0.2939, "step": 5436 }, { "epoch": 0.25, "grad_norm": 0.6556181438924621, "learning_rate": 4.8767783173150705e-06, "loss": 0.3048, "step": 5437 }, { "epoch": 0.25, "grad_norm": 0.6402125035986953, "learning_rate": 4.876719504257498e-06, "loss": 0.2811, "step": 5438 }, { "epoch": 0.25, "grad_norm": 0.6330584592173703, "learning_rate": 4.876660677522501e-06, "loss": 0.2947, "step": 5439 }, { "epoch": 0.25, "grad_norm": 0.6343141263078419, "learning_rate": 4.876601837110421e-06, "loss": 0.3252, "step": 5440 }, { "epoch": 0.25, "grad_norm": 0.6312949494391614, "learning_rate": 4.876542983021593e-06, "loss": 0.2847, "step": 5441 }, { "epoch": 0.25, "grad_norm": 0.698920065098512, "learning_rate": 4.876484115256358e-06, "loss": 0.3057, "step": 5442 }, { "epoch": 0.25, "grad_norm": 0.5686288205101425, "learning_rate": 4.876425233815054e-06, "loss": 0.2631, "step": 5443 }, { "epoch": 0.26, "grad_norm": 0.6413837866212989, "learning_rate": 4.8763663386980205e-06, "loss": 0.3238, "step": 5444 }, { "epoch": 0.26, "grad_norm": 0.6370916495835779, "learning_rate": 4.876307429905596e-06, "loss": 0.308, "step": 5445 }, { "epoch": 0.26, "grad_norm": 0.6294877431508796, "learning_rate": 4.876248507438119e-06, "loss": 0.2921, "step": 5446 }, { "epoch": 0.26, "grad_norm": 0.6529558058379951, "learning_rate": 4.876189571295929e-06, "loss": 0.3066, "step": 5447 }, { "epoch": 0.26, "grad_norm": 0.6097933029952716, "learning_rate": 4.876130621479365e-06, "loss": 0.2993, "step": 5448 }, { "epoch": 0.26, "grad_norm": 0.6094841105652009, "learning_rate": 4.876071657988768e-06, "loss": 0.2928, "step": 5449 }, { "epoch": 0.26, "grad_norm": 0.61358834398206, "learning_rate": 4.8760126808244754e-06, "loss": 0.3043, "step": 5450 }, { "epoch": 0.26, "grad_norm": 0.6162648705526884, "learning_rate": 4.875953689986827e-06, "loss": 0.3044, "step": 5451 }, { "epoch": 0.26, "grad_norm": 0.6321703951217535, "learning_rate": 4.8758946854761616e-06, "loss": 0.3142, "step": 5452 }, { "epoch": 0.26, "grad_norm": 0.6002224111906579, "learning_rate": 4.875835667292819e-06, "loss": 0.2794, "step": 5453 }, { "epoch": 0.26, "grad_norm": 0.631950305737267, "learning_rate": 4.87577663543714e-06, "loss": 0.288, "step": 5454 }, { "epoch": 0.26, "grad_norm": 0.5808630174164485, "learning_rate": 4.875717589909464e-06, "loss": 0.2743, "step": 5455 }, { "epoch": 0.26, "grad_norm": 0.6526910499557632, "learning_rate": 4.87565853071013e-06, "loss": 0.2938, "step": 5456 }, { "epoch": 0.26, "grad_norm": 0.6609335167014255, "learning_rate": 4.875599457839479e-06, "loss": 0.3118, "step": 5457 }, { "epoch": 0.26, "grad_norm": 0.5969801277769665, "learning_rate": 4.875540371297848e-06, "loss": 0.2885, "step": 5458 }, { "epoch": 0.26, "grad_norm": 0.6299313640461759, "learning_rate": 4.875481271085581e-06, "loss": 0.3101, "step": 5459 }, { "epoch": 0.26, "grad_norm": 0.6001954793876108, "learning_rate": 4.875422157203016e-06, "loss": 0.2807, "step": 5460 }, { "epoch": 0.26, "grad_norm": 0.6733646077269637, "learning_rate": 4.875363029650494e-06, "loss": 0.2959, "step": 5461 }, { "epoch": 0.26, "grad_norm": 0.6015113620938866, "learning_rate": 4.875303888428354e-06, "loss": 0.2936, "step": 5462 }, { "epoch": 0.26, "grad_norm": 0.6243177632161654, "learning_rate": 4.875244733536937e-06, "loss": 0.3032, "step": 5463 }, { "epoch": 0.26, "grad_norm": 0.6280284362160226, "learning_rate": 4.875185564976584e-06, "loss": 0.2977, "step": 5464 }, { "epoch": 0.26, "grad_norm": 0.6370598795309392, "learning_rate": 4.875126382747636e-06, "loss": 0.3172, "step": 5465 }, { "epoch": 0.26, "grad_norm": 0.6599173543062189, "learning_rate": 4.875067186850431e-06, "loss": 0.3157, "step": 5466 }, { "epoch": 0.26, "grad_norm": 0.6403471447984175, "learning_rate": 4.875007977285312e-06, "loss": 0.2918, "step": 5467 }, { "epoch": 0.26, "grad_norm": 0.6640829222949213, "learning_rate": 4.874948754052619e-06, "loss": 0.3028, "step": 5468 }, { "epoch": 0.26, "grad_norm": 0.5963614578944388, "learning_rate": 4.874889517152693e-06, "loss": 0.2982, "step": 5469 }, { "epoch": 0.26, "grad_norm": 0.6706388096733958, "learning_rate": 4.874830266585875e-06, "loss": 0.3094, "step": 5470 }, { "epoch": 0.26, "grad_norm": 0.603775169096324, "learning_rate": 4.8747710023525046e-06, "loss": 0.2995, "step": 5471 }, { "epoch": 0.26, "grad_norm": 0.6399394552629114, "learning_rate": 4.8747117244529245e-06, "loss": 0.3054, "step": 5472 }, { "epoch": 0.26, "grad_norm": 0.6292620648564033, "learning_rate": 4.874652432887476e-06, "loss": 0.3027, "step": 5473 }, { "epoch": 0.26, "grad_norm": 0.6083472080148612, "learning_rate": 4.874593127656498e-06, "loss": 0.2892, "step": 5474 }, { "epoch": 0.26, "grad_norm": 0.6419606233320876, "learning_rate": 4.874533808760335e-06, "loss": 0.2994, "step": 5475 }, { "epoch": 0.26, "grad_norm": 0.6872228908690925, "learning_rate": 4.874474476199325e-06, "loss": 0.3153, "step": 5476 }, { "epoch": 0.26, "grad_norm": 0.6493392576103416, "learning_rate": 4.874415129973813e-06, "loss": 0.3068, "step": 5477 }, { "epoch": 0.26, "grad_norm": 0.6101056374466358, "learning_rate": 4.874355770084138e-06, "loss": 0.3015, "step": 5478 }, { "epoch": 0.26, "grad_norm": 0.6551991143325158, "learning_rate": 4.8742963965306426e-06, "loss": 0.2957, "step": 5479 }, { "epoch": 0.26, "grad_norm": 0.5803099986022079, "learning_rate": 4.874237009313668e-06, "loss": 0.2885, "step": 5480 }, { "epoch": 0.26, "grad_norm": 0.6467640325415318, "learning_rate": 4.874177608433556e-06, "loss": 0.2994, "step": 5481 }, { "epoch": 0.26, "grad_norm": 0.6253849125792986, "learning_rate": 4.874118193890649e-06, "loss": 0.3076, "step": 5482 }, { "epoch": 0.26, "grad_norm": 0.609781070525776, "learning_rate": 4.874058765685288e-06, "loss": 0.2801, "step": 5483 }, { "epoch": 0.26, "grad_norm": 0.6284781867900737, "learning_rate": 4.873999323817816e-06, "loss": 0.3077, "step": 5484 }, { "epoch": 0.26, "grad_norm": 0.628800735683364, "learning_rate": 4.873939868288574e-06, "loss": 0.2897, "step": 5485 }, { "epoch": 0.26, "grad_norm": 0.6787889061767544, "learning_rate": 4.873880399097905e-06, "loss": 0.2962, "step": 5486 }, { "epoch": 0.26, "grad_norm": 0.6459128474869755, "learning_rate": 4.873820916246151e-06, "loss": 0.3186, "step": 5487 }, { "epoch": 0.26, "grad_norm": 0.5965923847447279, "learning_rate": 4.873761419733654e-06, "loss": 0.2761, "step": 5488 }, { "epoch": 0.26, "grad_norm": 0.6641256332456922, "learning_rate": 4.8737019095607575e-06, "loss": 0.3089, "step": 5489 }, { "epoch": 0.26, "grad_norm": 0.586565849811383, "learning_rate": 4.873642385727802e-06, "loss": 0.2889, "step": 5490 }, { "epoch": 0.26, "grad_norm": 0.6320893159724281, "learning_rate": 4.873582848235132e-06, "loss": 0.2629, "step": 5491 }, { "epoch": 0.26, "grad_norm": 0.6420192966281518, "learning_rate": 4.873523297083089e-06, "loss": 0.2803, "step": 5492 }, { "epoch": 0.26, "grad_norm": 0.6233607906983637, "learning_rate": 4.8734637322720155e-06, "loss": 0.2958, "step": 5493 }, { "epoch": 0.26, "grad_norm": 0.616508309455779, "learning_rate": 4.873404153802255e-06, "loss": 0.3005, "step": 5494 }, { "epoch": 0.26, "grad_norm": 0.6169112541776156, "learning_rate": 4.873344561674151e-06, "loss": 0.3026, "step": 5495 }, { "epoch": 0.26, "grad_norm": 0.6332342564802367, "learning_rate": 4.873284955888045e-06, "loss": 0.2912, "step": 5496 }, { "epoch": 0.26, "grad_norm": 0.6336853046660206, "learning_rate": 4.8732253364442796e-06, "loss": 0.3011, "step": 5497 }, { "epoch": 0.26, "grad_norm": 0.6419490587362787, "learning_rate": 4.8731657033432e-06, "loss": 0.3144, "step": 5498 }, { "epoch": 0.26, "grad_norm": 0.6481239651519504, "learning_rate": 4.873106056585148e-06, "loss": 0.3084, "step": 5499 }, { "epoch": 0.26, "grad_norm": 0.6218098489321372, "learning_rate": 4.873046396170467e-06, "loss": 0.2766, "step": 5500 }, { "epoch": 0.26, "grad_norm": 0.6926512370936382, "learning_rate": 4.872986722099501e-06, "loss": 0.2957, "step": 5501 }, { "epoch": 0.26, "grad_norm": 0.6270337697455974, "learning_rate": 4.872927034372592e-06, "loss": 0.2957, "step": 5502 }, { "epoch": 0.26, "grad_norm": 0.6788969179817623, "learning_rate": 4.872867332990085e-06, "loss": 0.3078, "step": 5503 }, { "epoch": 0.26, "grad_norm": 0.5829582940646449, "learning_rate": 4.872807617952323e-06, "loss": 0.2785, "step": 5504 }, { "epoch": 0.26, "grad_norm": 0.6190183451435706, "learning_rate": 4.872747889259649e-06, "loss": 0.2874, "step": 5505 }, { "epoch": 0.26, "grad_norm": 0.649338327793758, "learning_rate": 4.872688146912407e-06, "loss": 0.2816, "step": 5506 }, { "epoch": 0.26, "grad_norm": 0.6615760397533403, "learning_rate": 4.872628390910942e-06, "loss": 0.3038, "step": 5507 }, { "epoch": 0.26, "grad_norm": 0.6388625491962739, "learning_rate": 4.872568621255596e-06, "loss": 0.3009, "step": 5508 }, { "epoch": 0.26, "grad_norm": 0.6562057129341733, "learning_rate": 4.8725088379467144e-06, "loss": 0.2989, "step": 5509 }, { "epoch": 0.26, "grad_norm": 0.6762868070539108, "learning_rate": 4.87244904098464e-06, "loss": 0.3073, "step": 5510 }, { "epoch": 0.26, "grad_norm": 0.605503946553365, "learning_rate": 4.872389230369718e-06, "loss": 0.291, "step": 5511 }, { "epoch": 0.26, "grad_norm": 0.6758534964377807, "learning_rate": 4.872329406102294e-06, "loss": 0.3003, "step": 5512 }, { "epoch": 0.26, "grad_norm": 0.6772201076926235, "learning_rate": 4.872269568182709e-06, "loss": 0.316, "step": 5513 }, { "epoch": 0.26, "grad_norm": 0.6330167767476145, "learning_rate": 4.8722097166113086e-06, "loss": 0.2866, "step": 5514 }, { "epoch": 0.26, "grad_norm": 0.669055116480727, "learning_rate": 4.872149851388438e-06, "loss": 0.3102, "step": 5515 }, { "epoch": 0.26, "grad_norm": 0.6679768126394003, "learning_rate": 4.872089972514441e-06, "loss": 0.2943, "step": 5516 }, { "epoch": 0.26, "grad_norm": 0.7085625989839022, "learning_rate": 4.872030079989663e-06, "loss": 0.3175, "step": 5517 }, { "epoch": 0.26, "grad_norm": 0.6653350374194613, "learning_rate": 4.871970173814448e-06, "loss": 0.3125, "step": 5518 }, { "epoch": 0.26, "grad_norm": 0.690445055212461, "learning_rate": 4.871910253989139e-06, "loss": 0.3041, "step": 5519 }, { "epoch": 0.26, "grad_norm": 0.6346021006740935, "learning_rate": 4.871850320514085e-06, "loss": 0.286, "step": 5520 }, { "epoch": 0.26, "grad_norm": 0.6419131932931257, "learning_rate": 4.871790373389628e-06, "loss": 0.2952, "step": 5521 }, { "epoch": 0.26, "grad_norm": 0.6359181839205211, "learning_rate": 4.871730412616112e-06, "loss": 0.298, "step": 5522 }, { "epoch": 0.26, "grad_norm": 0.6071503023477985, "learning_rate": 4.871670438193885e-06, "loss": 0.284, "step": 5523 }, { "epoch": 0.26, "grad_norm": 0.6477801701274801, "learning_rate": 4.871610450123291e-06, "loss": 0.2915, "step": 5524 }, { "epoch": 0.26, "grad_norm": 0.5938788314980942, "learning_rate": 4.871550448404674e-06, "loss": 0.292, "step": 5525 }, { "epoch": 0.26, "grad_norm": 0.639076011398981, "learning_rate": 4.871490433038381e-06, "loss": 0.3176, "step": 5526 }, { "epoch": 0.26, "grad_norm": 0.6258181790468927, "learning_rate": 4.8714304040247565e-06, "loss": 0.2996, "step": 5527 }, { "epoch": 0.26, "grad_norm": 0.6547755864107838, "learning_rate": 4.871370361364145e-06, "loss": 0.3168, "step": 5528 }, { "epoch": 0.26, "grad_norm": 0.680265296899351, "learning_rate": 4.871310305056894e-06, "loss": 0.3049, "step": 5529 }, { "epoch": 0.26, "grad_norm": 0.6193942355548423, "learning_rate": 4.8712502351033485e-06, "loss": 0.2929, "step": 5530 }, { "epoch": 0.26, "grad_norm": 0.6273104166138279, "learning_rate": 4.871190151503854e-06, "loss": 0.3081, "step": 5531 }, { "epoch": 0.26, "grad_norm": 0.6455214572993384, "learning_rate": 4.871130054258756e-06, "loss": 0.2943, "step": 5532 }, { "epoch": 0.26, "grad_norm": 0.6251117570517374, "learning_rate": 4.8710699433684e-06, "loss": 0.3121, "step": 5533 }, { "epoch": 0.26, "grad_norm": 0.6231678571288064, "learning_rate": 4.8710098188331326e-06, "loss": 0.2987, "step": 5534 }, { "epoch": 0.26, "grad_norm": 0.6685615638543873, "learning_rate": 4.870949680653299e-06, "loss": 0.3145, "step": 5535 }, { "epoch": 0.26, "grad_norm": 0.6097711340144087, "learning_rate": 4.8708895288292465e-06, "loss": 0.3132, "step": 5536 }, { "epoch": 0.26, "grad_norm": 0.6015290601536549, "learning_rate": 4.8708293633613205e-06, "loss": 0.3028, "step": 5537 }, { "epoch": 0.26, "grad_norm": 0.6195926503788461, "learning_rate": 4.870769184249868e-06, "loss": 0.288, "step": 5538 }, { "epoch": 0.26, "grad_norm": 0.6176723940223823, "learning_rate": 4.870708991495235e-06, "loss": 0.2965, "step": 5539 }, { "epoch": 0.26, "grad_norm": 0.7223938954409113, "learning_rate": 4.870648785097766e-06, "loss": 0.3134, "step": 5540 }, { "epoch": 0.26, "grad_norm": 0.6218003028484267, "learning_rate": 4.87058856505781e-06, "loss": 0.2805, "step": 5541 }, { "epoch": 0.26, "grad_norm": 0.5991157406729479, "learning_rate": 4.870528331375712e-06, "loss": 0.2842, "step": 5542 }, { "epoch": 0.26, "grad_norm": 0.6306487397995817, "learning_rate": 4.870468084051821e-06, "loss": 0.2886, "step": 5543 }, { "epoch": 0.26, "grad_norm": 0.6102227729769676, "learning_rate": 4.8704078230864805e-06, "loss": 0.3037, "step": 5544 }, { "epoch": 0.26, "grad_norm": 0.5954305539057411, "learning_rate": 4.870347548480039e-06, "loss": 0.3106, "step": 5545 }, { "epoch": 0.26, "grad_norm": 0.6201076631079506, "learning_rate": 4.870287260232843e-06, "loss": 0.3013, "step": 5546 }, { "epoch": 0.26, "grad_norm": 0.5999232910685395, "learning_rate": 4.87022695834524e-06, "loss": 0.2921, "step": 5547 }, { "epoch": 0.26, "grad_norm": 0.5986631057158287, "learning_rate": 4.870166642817577e-06, "loss": 0.3061, "step": 5548 }, { "epoch": 0.26, "grad_norm": 0.5897401954666623, "learning_rate": 4.870106313650199e-06, "loss": 0.2703, "step": 5549 }, { "epoch": 0.26, "grad_norm": 0.5983582149959937, "learning_rate": 4.870045970843456e-06, "loss": 0.2784, "step": 5550 }, { "epoch": 0.26, "grad_norm": 0.6085293882659889, "learning_rate": 4.869985614397694e-06, "loss": 0.2994, "step": 5551 }, { "epoch": 0.26, "grad_norm": 0.6276306429015254, "learning_rate": 4.869925244313261e-06, "loss": 0.3218, "step": 5552 }, { "epoch": 0.26, "grad_norm": 0.5644202928861917, "learning_rate": 4.869864860590504e-06, "loss": 0.2911, "step": 5553 }, { "epoch": 0.26, "grad_norm": 0.6702681786708262, "learning_rate": 4.869804463229769e-06, "loss": 0.3058, "step": 5554 }, { "epoch": 0.26, "grad_norm": 0.609952151343464, "learning_rate": 4.869744052231406e-06, "loss": 0.2948, "step": 5555 }, { "epoch": 0.26, "grad_norm": 0.6196779498622399, "learning_rate": 4.869683627595762e-06, "loss": 0.3047, "step": 5556 }, { "epoch": 0.26, "grad_norm": 0.6107545913499757, "learning_rate": 4.869623189323183e-06, "loss": 0.274, "step": 5557 }, { "epoch": 0.26, "grad_norm": 0.6229171906974388, "learning_rate": 4.8695627374140184e-06, "loss": 0.2956, "step": 5558 }, { "epoch": 0.26, "grad_norm": 0.6081209838075439, "learning_rate": 4.869502271868617e-06, "loss": 0.2925, "step": 5559 }, { "epoch": 0.26, "grad_norm": 0.5948772869132961, "learning_rate": 4.869441792687324e-06, "loss": 0.3015, "step": 5560 }, { "epoch": 0.26, "grad_norm": 0.6825785583314096, "learning_rate": 4.86938129987049e-06, "loss": 0.3099, "step": 5561 }, { "epoch": 0.26, "grad_norm": 0.6432629284019618, "learning_rate": 4.869320793418462e-06, "loss": 0.2983, "step": 5562 }, { "epoch": 0.26, "grad_norm": 0.5772858477198827, "learning_rate": 4.869260273331588e-06, "loss": 0.2934, "step": 5563 }, { "epoch": 0.26, "grad_norm": 0.6229193224474038, "learning_rate": 4.869199739610218e-06, "loss": 0.3034, "step": 5564 }, { "epoch": 0.26, "grad_norm": 0.6406907277945431, "learning_rate": 4.869139192254697e-06, "loss": 0.3076, "step": 5565 }, { "epoch": 0.26, "grad_norm": 0.6440147880325805, "learning_rate": 4.869078631265376e-06, "loss": 0.3199, "step": 5566 }, { "epoch": 0.26, "grad_norm": 0.6199684001403363, "learning_rate": 4.869018056642604e-06, "loss": 0.307, "step": 5567 }, { "epoch": 0.26, "grad_norm": 0.6414696304477919, "learning_rate": 4.868957468386727e-06, "loss": 0.2867, "step": 5568 }, { "epoch": 0.26, "grad_norm": 0.6680808057897228, "learning_rate": 4.868896866498096e-06, "loss": 0.3, "step": 5569 }, { "epoch": 0.26, "grad_norm": 0.6034837734837039, "learning_rate": 4.868836250977058e-06, "loss": 0.3089, "step": 5570 }, { "epoch": 0.26, "grad_norm": 0.6426036130444991, "learning_rate": 4.868775621823964e-06, "loss": 0.3229, "step": 5571 }, { "epoch": 0.26, "grad_norm": 0.6537738526443143, "learning_rate": 4.868714979039162e-06, "loss": 0.2977, "step": 5572 }, { "epoch": 0.26, "grad_norm": 0.6416243204921291, "learning_rate": 4.868654322623e-06, "loss": 0.3009, "step": 5573 }, { "epoch": 0.26, "grad_norm": 0.6160012359452957, "learning_rate": 4.868593652575827e-06, "loss": 0.2947, "step": 5574 }, { "epoch": 0.26, "grad_norm": 0.6635506842784712, "learning_rate": 4.868532968897993e-06, "loss": 0.2947, "step": 5575 }, { "epoch": 0.26, "grad_norm": 0.6177955376953078, "learning_rate": 4.868472271589848e-06, "loss": 0.2935, "step": 5576 }, { "epoch": 0.26, "grad_norm": 0.5837793008422817, "learning_rate": 4.8684115606517405e-06, "loss": 0.2754, "step": 5577 }, { "epoch": 0.26, "grad_norm": 0.6474397491980489, "learning_rate": 4.868350836084019e-06, "loss": 0.3074, "step": 5578 }, { "epoch": 0.26, "grad_norm": 0.618898323734983, "learning_rate": 4.868290097887034e-06, "loss": 0.2621, "step": 5579 }, { "epoch": 0.26, "grad_norm": 0.6213106294557246, "learning_rate": 4.868229346061134e-06, "loss": 0.3031, "step": 5580 }, { "epoch": 0.26, "grad_norm": 0.5962724740784611, "learning_rate": 4.868168580606671e-06, "loss": 0.2954, "step": 5581 }, { "epoch": 0.26, "grad_norm": 0.6658056607205938, "learning_rate": 4.868107801523991e-06, "loss": 0.2886, "step": 5582 }, { "epoch": 0.26, "grad_norm": 0.6410772727933716, "learning_rate": 4.8680470088134465e-06, "loss": 0.2882, "step": 5583 }, { "epoch": 0.26, "grad_norm": 0.6514897315781664, "learning_rate": 4.8679862024753874e-06, "loss": 0.2936, "step": 5584 }, { "epoch": 0.26, "grad_norm": 0.6920407246698469, "learning_rate": 4.867925382510162e-06, "loss": 0.3184, "step": 5585 }, { "epoch": 0.26, "grad_norm": 0.6132581463493805, "learning_rate": 4.867864548918122e-06, "loss": 0.3085, "step": 5586 }, { "epoch": 0.26, "grad_norm": 0.6620486455070685, "learning_rate": 4.867803701699616e-06, "loss": 0.3194, "step": 5587 }, { "epoch": 0.26, "grad_norm": 0.6324377930450368, "learning_rate": 4.8677428408549955e-06, "loss": 0.2972, "step": 5588 }, { "epoch": 0.26, "grad_norm": 0.6345117879483085, "learning_rate": 4.867681966384609e-06, "loss": 0.2993, "step": 5589 }, { "epoch": 0.26, "grad_norm": 0.6258047200764223, "learning_rate": 4.867621078288809e-06, "loss": 0.2908, "step": 5590 }, { "epoch": 0.26, "grad_norm": 0.6766838348007644, "learning_rate": 4.867560176567943e-06, "loss": 0.3001, "step": 5591 }, { "epoch": 0.26, "grad_norm": 0.6360190040600452, "learning_rate": 4.867499261222366e-06, "loss": 0.2978, "step": 5592 }, { "epoch": 0.26, "grad_norm": 0.6212437493482201, "learning_rate": 4.867438332252424e-06, "loss": 0.2849, "step": 5593 }, { "epoch": 0.26, "grad_norm": 0.5734963741304379, "learning_rate": 4.86737738965847e-06, "loss": 0.2975, "step": 5594 }, { "epoch": 0.26, "grad_norm": 0.6237467731308053, "learning_rate": 4.867316433440853e-06, "loss": 0.3186, "step": 5595 }, { "epoch": 0.26, "grad_norm": 0.6313301908289631, "learning_rate": 4.867255463599926e-06, "loss": 0.283, "step": 5596 }, { "epoch": 0.26, "grad_norm": 0.6523707933926233, "learning_rate": 4.867194480136039e-06, "loss": 0.3018, "step": 5597 }, { "epoch": 0.26, "grad_norm": 0.6080162631740829, "learning_rate": 4.867133483049542e-06, "loss": 0.2679, "step": 5598 }, { "epoch": 0.26, "grad_norm": 0.568857451136617, "learning_rate": 4.867072472340787e-06, "loss": 0.2901, "step": 5599 }, { "epoch": 0.26, "grad_norm": 0.6260331346412203, "learning_rate": 4.867011448010125e-06, "loss": 0.3023, "step": 5600 }, { "epoch": 0.26, "grad_norm": 0.6258974594664936, "learning_rate": 4.866950410057907e-06, "loss": 0.2722, "step": 5601 }, { "epoch": 0.26, "grad_norm": 0.6864903459948013, "learning_rate": 4.866889358484485e-06, "loss": 0.3256, "step": 5602 }, { "epoch": 0.26, "grad_norm": 0.6061678615103058, "learning_rate": 4.8668282932902085e-06, "loss": 0.2882, "step": 5603 }, { "epoch": 0.26, "grad_norm": 0.6156425758411151, "learning_rate": 4.866767214475431e-06, "loss": 0.2848, "step": 5604 }, { "epoch": 0.26, "grad_norm": 0.587673100503274, "learning_rate": 4.866706122040502e-06, "loss": 0.276, "step": 5605 }, { "epoch": 0.26, "grad_norm": 0.6140328156431575, "learning_rate": 4.866645015985775e-06, "loss": 0.2862, "step": 5606 }, { "epoch": 0.26, "grad_norm": 0.6478679748656404, "learning_rate": 4.8665838963116e-06, "loss": 0.288, "step": 5607 }, { "epoch": 0.26, "grad_norm": 0.6467711619857247, "learning_rate": 4.86652276301833e-06, "loss": 0.295, "step": 5608 }, { "epoch": 0.26, "grad_norm": 0.6291520924975681, "learning_rate": 4.866461616106316e-06, "loss": 0.3032, "step": 5609 }, { "epoch": 0.26, "grad_norm": 0.6676257838786461, "learning_rate": 4.866400455575911e-06, "loss": 0.323, "step": 5610 }, { "epoch": 0.26, "grad_norm": 0.5846013296268753, "learning_rate": 4.866339281427465e-06, "loss": 0.2763, "step": 5611 }, { "epoch": 0.26, "grad_norm": 0.6203054876279339, "learning_rate": 4.8662780936613315e-06, "loss": 0.2874, "step": 5612 }, { "epoch": 0.26, "grad_norm": 0.60870758578037, "learning_rate": 4.866216892277863e-06, "loss": 0.2999, "step": 5613 }, { "epoch": 0.26, "grad_norm": 0.6720994888711843, "learning_rate": 4.866155677277411e-06, "loss": 0.2948, "step": 5614 }, { "epoch": 0.26, "grad_norm": 0.6844185470388031, "learning_rate": 4.866094448660327e-06, "loss": 0.3077, "step": 5615 }, { "epoch": 0.26, "grad_norm": 0.6174049161346777, "learning_rate": 4.866033206426965e-06, "loss": 0.315, "step": 5616 }, { "epoch": 0.26, "grad_norm": 0.5851921536531463, "learning_rate": 4.865971950577676e-06, "loss": 0.2881, "step": 5617 }, { "epoch": 0.26, "grad_norm": 0.6657462160047708, "learning_rate": 4.865910681112813e-06, "loss": 0.3229, "step": 5618 }, { "epoch": 0.26, "grad_norm": 0.6259857049159009, "learning_rate": 4.865849398032728e-06, "loss": 0.3113, "step": 5619 }, { "epoch": 0.26, "grad_norm": 0.6028837623502489, "learning_rate": 4.865788101337776e-06, "loss": 0.2968, "step": 5620 }, { "epoch": 0.26, "grad_norm": 0.6613134856307777, "learning_rate": 4.865726791028307e-06, "loss": 0.3334, "step": 5621 }, { "epoch": 0.26, "grad_norm": 0.5714656350450178, "learning_rate": 4.8656654671046755e-06, "loss": 0.3038, "step": 5622 }, { "epoch": 0.26, "grad_norm": 0.6258851033194253, "learning_rate": 4.865604129567233e-06, "loss": 0.3098, "step": 5623 }, { "epoch": 0.26, "grad_norm": 0.6220959779621282, "learning_rate": 4.865542778416335e-06, "loss": 0.298, "step": 5624 }, { "epoch": 0.26, "grad_norm": 0.6650737079154327, "learning_rate": 4.865481413652331e-06, "loss": 0.2781, "step": 5625 }, { "epoch": 0.26, "grad_norm": 0.6098947479449187, "learning_rate": 4.865420035275576e-06, "loss": 0.3009, "step": 5626 }, { "epoch": 0.26, "grad_norm": 0.6472862095202022, "learning_rate": 4.865358643286425e-06, "loss": 0.3276, "step": 5627 }, { "epoch": 0.26, "grad_norm": 0.6111498716195589, "learning_rate": 4.865297237685228e-06, "loss": 0.2869, "step": 5628 }, { "epoch": 0.26, "grad_norm": 0.6297264015590932, "learning_rate": 4.86523581847234e-06, "loss": 0.3148, "step": 5629 }, { "epoch": 0.26, "grad_norm": 0.5936215776185949, "learning_rate": 4.865174385648115e-06, "loss": 0.3047, "step": 5630 }, { "epoch": 0.26, "grad_norm": 0.6341147328043187, "learning_rate": 4.865112939212906e-06, "loss": 0.2984, "step": 5631 }, { "epoch": 0.26, "grad_norm": 0.6568388357514422, "learning_rate": 4.865051479167066e-06, "loss": 0.3071, "step": 5632 }, { "epoch": 0.26, "grad_norm": 0.6204303385165802, "learning_rate": 4.864990005510949e-06, "loss": 0.3005, "step": 5633 }, { "epoch": 0.26, "grad_norm": 0.6112547160833431, "learning_rate": 4.864928518244909e-06, "loss": 0.3012, "step": 5634 }, { "epoch": 0.26, "grad_norm": 0.6677567659204444, "learning_rate": 4.8648670173693e-06, "loss": 0.2953, "step": 5635 }, { "epoch": 0.26, "grad_norm": 0.6393532107198567, "learning_rate": 4.864805502884476e-06, "loss": 0.3044, "step": 5636 }, { "epoch": 0.26, "grad_norm": 0.629690378629245, "learning_rate": 4.864743974790791e-06, "loss": 0.2989, "step": 5637 }, { "epoch": 0.26, "grad_norm": 0.6380897457743413, "learning_rate": 4.864682433088597e-06, "loss": 0.3252, "step": 5638 }, { "epoch": 0.26, "grad_norm": 0.6144114511539374, "learning_rate": 4.8646208777782515e-06, "loss": 0.3024, "step": 5639 }, { "epoch": 0.26, "grad_norm": 0.6683888110337018, "learning_rate": 4.864559308860107e-06, "loss": 0.3146, "step": 5640 }, { "epoch": 0.26, "grad_norm": 0.6795694986782569, "learning_rate": 4.864497726334518e-06, "loss": 0.3098, "step": 5641 }, { "epoch": 0.26, "grad_norm": 0.6310221045080122, "learning_rate": 4.864436130201839e-06, "loss": 0.3129, "step": 5642 }, { "epoch": 0.26, "grad_norm": 0.6326839338686994, "learning_rate": 4.8643745204624235e-06, "loss": 0.2931, "step": 5643 }, { "epoch": 0.26, "grad_norm": 0.6432587482134959, "learning_rate": 4.864312897116627e-06, "loss": 0.3323, "step": 5644 }, { "epoch": 0.26, "grad_norm": 0.5969074297900521, "learning_rate": 4.864251260164805e-06, "loss": 0.2904, "step": 5645 }, { "epoch": 0.26, "grad_norm": 0.6001552003368578, "learning_rate": 4.864189609607311e-06, "loss": 0.2935, "step": 5646 }, { "epoch": 0.26, "grad_norm": 0.6280020332022784, "learning_rate": 4.864127945444499e-06, "loss": 0.2996, "step": 5647 }, { "epoch": 0.26, "grad_norm": 0.5789812046006781, "learning_rate": 4.864066267676725e-06, "loss": 0.2961, "step": 5648 }, { "epoch": 0.26, "grad_norm": 0.6366122130823676, "learning_rate": 4.864004576304344e-06, "loss": 0.2774, "step": 5649 }, { "epoch": 0.26, "grad_norm": 0.6361032128160325, "learning_rate": 4.8639428713277114e-06, "loss": 0.3048, "step": 5650 }, { "epoch": 0.26, "grad_norm": 0.6784319736340535, "learning_rate": 4.863881152747182e-06, "loss": 0.3225, "step": 5651 }, { "epoch": 0.26, "grad_norm": 0.6278982537017205, "learning_rate": 4.86381942056311e-06, "loss": 0.3024, "step": 5652 }, { "epoch": 0.26, "grad_norm": 0.6770081642651079, "learning_rate": 4.863757674775852e-06, "loss": 0.3253, "step": 5653 }, { "epoch": 0.26, "grad_norm": 0.6514485134964588, "learning_rate": 4.863695915385762e-06, "loss": 0.3032, "step": 5654 }, { "epoch": 0.26, "grad_norm": 0.6117533840799099, "learning_rate": 4.863634142393197e-06, "loss": 0.2961, "step": 5655 }, { "epoch": 0.26, "grad_norm": 0.6488491779877001, "learning_rate": 4.863572355798511e-06, "loss": 0.301, "step": 5656 }, { "epoch": 0.27, "grad_norm": 0.6609818143663311, "learning_rate": 4.863510555602059e-06, "loss": 0.3078, "step": 5657 }, { "epoch": 0.27, "grad_norm": 0.6324844052359626, "learning_rate": 4.863448741804199e-06, "loss": 0.3047, "step": 5658 }, { "epoch": 0.27, "grad_norm": 0.6404325811178234, "learning_rate": 4.863386914405285e-06, "loss": 0.2773, "step": 5659 }, { "epoch": 0.27, "grad_norm": 0.622255471742013, "learning_rate": 4.863325073405674e-06, "loss": 0.296, "step": 5660 }, { "epoch": 0.27, "grad_norm": 0.6052828523352918, "learning_rate": 4.863263218805721e-06, "loss": 0.3111, "step": 5661 }, { "epoch": 0.27, "grad_norm": 0.6569998654619116, "learning_rate": 4.863201350605782e-06, "loss": 0.2998, "step": 5662 }, { "epoch": 0.27, "grad_norm": 0.6540104870105156, "learning_rate": 4.863139468806213e-06, "loss": 0.2969, "step": 5663 }, { "epoch": 0.27, "grad_norm": 0.6424111210317865, "learning_rate": 4.863077573407371e-06, "loss": 0.2962, "step": 5664 }, { "epoch": 0.27, "grad_norm": 0.6745726812401147, "learning_rate": 4.863015664409611e-06, "loss": 0.3011, "step": 5665 }, { "epoch": 0.27, "grad_norm": 0.6172091086730888, "learning_rate": 4.86295374181329e-06, "loss": 0.2906, "step": 5666 }, { "epoch": 0.27, "grad_norm": 0.6260738035884014, "learning_rate": 4.862891805618764e-06, "loss": 0.3042, "step": 5667 }, { "epoch": 0.27, "grad_norm": 0.6960388943889589, "learning_rate": 4.86282985582639e-06, "loss": 0.3052, "step": 5668 }, { "epoch": 0.27, "grad_norm": 0.6626039191956902, "learning_rate": 4.862767892436523e-06, "loss": 0.3107, "step": 5669 }, { "epoch": 0.27, "grad_norm": 0.6229167415920124, "learning_rate": 4.862705915449522e-06, "loss": 0.2918, "step": 5670 }, { "epoch": 0.27, "grad_norm": 0.63656125516992, "learning_rate": 4.862643924865741e-06, "loss": 0.2921, "step": 5671 }, { "epoch": 0.27, "grad_norm": 0.6048725669484984, "learning_rate": 4.862581920685539e-06, "loss": 0.2877, "step": 5672 }, { "epoch": 0.27, "grad_norm": 0.5533869612763046, "learning_rate": 4.86251990290927e-06, "loss": 0.2777, "step": 5673 }, { "epoch": 0.27, "grad_norm": 0.6287138796456239, "learning_rate": 4.862457871537295e-06, "loss": 0.2988, "step": 5674 }, { "epoch": 0.27, "grad_norm": 0.6580879719152559, "learning_rate": 4.862395826569968e-06, "loss": 0.3161, "step": 5675 }, { "epoch": 0.27, "grad_norm": 0.6765270553092236, "learning_rate": 4.862333768007647e-06, "loss": 0.3073, "step": 5676 }, { "epoch": 0.27, "grad_norm": 0.6007968371582593, "learning_rate": 4.862271695850688e-06, "loss": 0.2839, "step": 5677 }, { "epoch": 0.27, "grad_norm": 0.6394944434652058, "learning_rate": 4.86220961009945e-06, "loss": 0.2824, "step": 5678 }, { "epoch": 0.27, "grad_norm": 0.6025725662421099, "learning_rate": 4.862147510754289e-06, "loss": 0.2886, "step": 5679 }, { "epoch": 0.27, "grad_norm": 0.6150796847717903, "learning_rate": 4.862085397815563e-06, "loss": 0.2935, "step": 5680 }, { "epoch": 0.27, "grad_norm": 0.6359034560175085, "learning_rate": 4.8620232712836294e-06, "loss": 0.2899, "step": 5681 }, { "epoch": 0.27, "grad_norm": 0.6264786599857474, "learning_rate": 4.861961131158846e-06, "loss": 0.2981, "step": 5682 }, { "epoch": 0.27, "grad_norm": 0.6254284797651553, "learning_rate": 4.861898977441568e-06, "loss": 0.3037, "step": 5683 }, { "epoch": 0.27, "grad_norm": 0.6497646426975437, "learning_rate": 4.861836810132157e-06, "loss": 0.3048, "step": 5684 }, { "epoch": 0.27, "grad_norm": 0.6560214896162078, "learning_rate": 4.861774629230967e-06, "loss": 0.3123, "step": 5685 }, { "epoch": 0.27, "grad_norm": 0.6655393379496667, "learning_rate": 4.861712434738359e-06, "loss": 0.3111, "step": 5686 }, { "epoch": 0.27, "grad_norm": 0.5912394869152607, "learning_rate": 4.86165022665469e-06, "loss": 0.2895, "step": 5687 }, { "epoch": 0.27, "grad_norm": 0.6173312675765351, "learning_rate": 4.861588004980316e-06, "loss": 0.2889, "step": 5688 }, { "epoch": 0.27, "grad_norm": 0.6066643687842386, "learning_rate": 4.861525769715598e-06, "loss": 0.3169, "step": 5689 }, { "epoch": 0.27, "grad_norm": 0.7028522466922931, "learning_rate": 4.861463520860892e-06, "loss": 0.3114, "step": 5690 }, { "epoch": 0.27, "grad_norm": 0.6424453279063231, "learning_rate": 4.861401258416557e-06, "loss": 0.2916, "step": 5691 }, { "epoch": 0.27, "grad_norm": 0.6404276799559668, "learning_rate": 4.861338982382952e-06, "loss": 0.3102, "step": 5692 }, { "epoch": 0.27, "grad_norm": 0.6386579551104258, "learning_rate": 4.861276692760433e-06, "loss": 0.2979, "step": 5693 }, { "epoch": 0.27, "grad_norm": 0.6083561176121749, "learning_rate": 4.8612143895493616e-06, "loss": 0.299, "step": 5694 }, { "epoch": 0.27, "grad_norm": 0.6304776688250935, "learning_rate": 4.861152072750094e-06, "loss": 0.3208, "step": 5695 }, { "epoch": 0.27, "grad_norm": 0.6872161001705999, "learning_rate": 4.86108974236299e-06, "loss": 0.2897, "step": 5696 }, { "epoch": 0.27, "grad_norm": 0.6009197588579976, "learning_rate": 4.861027398388408e-06, "loss": 0.2867, "step": 5697 }, { "epoch": 0.27, "grad_norm": 0.6088051777875035, "learning_rate": 4.860965040826707e-06, "loss": 0.2901, "step": 5698 }, { "epoch": 0.27, "grad_norm": 0.6241972271545848, "learning_rate": 4.8609026696782444e-06, "loss": 0.2946, "step": 5699 }, { "epoch": 0.27, "grad_norm": 0.6118763436286392, "learning_rate": 4.860840284943382e-06, "loss": 0.3056, "step": 5700 }, { "epoch": 0.27, "grad_norm": 0.6132569371335402, "learning_rate": 4.860777886622477e-06, "loss": 0.2852, "step": 5701 }, { "epoch": 0.27, "grad_norm": 0.6454487458788597, "learning_rate": 4.860715474715887e-06, "loss": 0.3231, "step": 5702 }, { "epoch": 0.27, "grad_norm": 0.5700756219332328, "learning_rate": 4.860653049223974e-06, "loss": 0.2899, "step": 5703 }, { "epoch": 0.27, "grad_norm": 0.6513170068090945, "learning_rate": 4.860590610147096e-06, "loss": 0.3053, "step": 5704 }, { "epoch": 0.27, "grad_norm": 0.6721537294751206, "learning_rate": 4.860528157485612e-06, "loss": 0.2894, "step": 5705 }, { "epoch": 0.27, "grad_norm": 0.6215148099777349, "learning_rate": 4.860465691239882e-06, "loss": 0.2882, "step": 5706 }, { "epoch": 0.27, "grad_norm": 0.57965808026227, "learning_rate": 4.8604032114102655e-06, "loss": 0.2834, "step": 5707 }, { "epoch": 0.27, "grad_norm": 0.6508347744629961, "learning_rate": 4.860340717997121e-06, "loss": 0.3088, "step": 5708 }, { "epoch": 0.27, "grad_norm": 0.6376078172077033, "learning_rate": 4.8602782110008095e-06, "loss": 0.3012, "step": 5709 }, { "epoch": 0.27, "grad_norm": 0.6597670850488246, "learning_rate": 4.86021569042169e-06, "loss": 0.3142, "step": 5710 }, { "epoch": 0.27, "grad_norm": 0.6226308440745011, "learning_rate": 4.8601531562601225e-06, "loss": 0.2998, "step": 5711 }, { "epoch": 0.27, "grad_norm": 0.6071209326711048, "learning_rate": 4.8600906085164666e-06, "loss": 0.299, "step": 5712 }, { "epoch": 0.27, "grad_norm": 0.6464824543295244, "learning_rate": 4.860028047191083e-06, "loss": 0.3192, "step": 5713 }, { "epoch": 0.27, "grad_norm": 0.5863099341677137, "learning_rate": 4.8599654722843305e-06, "loss": 0.2889, "step": 5714 }, { "epoch": 0.27, "grad_norm": 0.6587756120682531, "learning_rate": 4.85990288379657e-06, "loss": 0.2958, "step": 5715 }, { "epoch": 0.27, "grad_norm": 0.6537443223065056, "learning_rate": 4.8598402817281616e-06, "loss": 0.2919, "step": 5716 }, { "epoch": 0.27, "grad_norm": 0.6350523978180356, "learning_rate": 4.859777666079465e-06, "loss": 0.3087, "step": 5717 }, { "epoch": 0.27, "grad_norm": 0.6634460651419954, "learning_rate": 4.859715036850841e-06, "loss": 0.291, "step": 5718 }, { "epoch": 0.27, "grad_norm": 0.5826953651663683, "learning_rate": 4.859652394042652e-06, "loss": 0.2913, "step": 5719 }, { "epoch": 0.27, "grad_norm": 0.6722687145523714, "learning_rate": 4.859589737655255e-06, "loss": 0.3109, "step": 5720 }, { "epoch": 0.27, "grad_norm": 0.6076754876934568, "learning_rate": 4.859527067689012e-06, "loss": 0.3106, "step": 5721 }, { "epoch": 0.27, "grad_norm": 0.695852781107032, "learning_rate": 4.859464384144284e-06, "loss": 0.3156, "step": 5722 }, { "epoch": 0.27, "grad_norm": 0.7007449996585747, "learning_rate": 4.859401687021431e-06, "loss": 0.3025, "step": 5723 }, { "epoch": 0.27, "grad_norm": 0.7554107677900934, "learning_rate": 4.8593389763208156e-06, "loss": 0.3361, "step": 5724 }, { "epoch": 0.27, "grad_norm": 0.5888083559303612, "learning_rate": 4.859276252042796e-06, "loss": 0.2994, "step": 5725 }, { "epoch": 0.27, "grad_norm": 0.5872021220709797, "learning_rate": 4.859213514187735e-06, "loss": 0.2828, "step": 5726 }, { "epoch": 0.27, "grad_norm": 0.7181925555639597, "learning_rate": 4.859150762755994e-06, "loss": 0.312, "step": 5727 }, { "epoch": 0.27, "grad_norm": 0.6544912818227964, "learning_rate": 4.859087997747932e-06, "loss": 0.3161, "step": 5728 }, { "epoch": 0.27, "grad_norm": 0.6404589382442597, "learning_rate": 4.859025219163912e-06, "loss": 0.2915, "step": 5729 }, { "epoch": 0.27, "grad_norm": 0.6784242873130013, "learning_rate": 4.858962427004296e-06, "loss": 0.3224, "step": 5730 }, { "epoch": 0.27, "grad_norm": 0.677190320684809, "learning_rate": 4.8588996212694424e-06, "loss": 0.3024, "step": 5731 }, { "epoch": 0.27, "grad_norm": 0.6988147320493001, "learning_rate": 4.858836801959715e-06, "loss": 0.3305, "step": 5732 }, { "epoch": 0.27, "grad_norm": 0.6524536089966898, "learning_rate": 4.858773969075476e-06, "loss": 0.2821, "step": 5733 }, { "epoch": 0.27, "grad_norm": 0.5744538897519142, "learning_rate": 4.858711122617084e-06, "loss": 0.278, "step": 5734 }, { "epoch": 0.27, "grad_norm": 0.6687729004363799, "learning_rate": 4.858648262584902e-06, "loss": 0.3189, "step": 5735 }, { "epoch": 0.27, "grad_norm": 0.6289876263987286, "learning_rate": 4.858585388979293e-06, "loss": 0.2986, "step": 5736 }, { "epoch": 0.27, "grad_norm": 0.6107666738550822, "learning_rate": 4.858522501800618e-06, "loss": 0.2903, "step": 5737 }, { "epoch": 0.27, "grad_norm": 0.6574944616233749, "learning_rate": 4.858459601049238e-06, "loss": 0.297, "step": 5738 }, { "epoch": 0.27, "grad_norm": 0.5947047572670836, "learning_rate": 4.858396686725516e-06, "loss": 0.3057, "step": 5739 }, { "epoch": 0.27, "grad_norm": 0.6750385010817668, "learning_rate": 4.858333758829815e-06, "loss": 0.2934, "step": 5740 }, { "epoch": 0.27, "grad_norm": 0.6094401022943908, "learning_rate": 4.858270817362495e-06, "loss": 0.2703, "step": 5741 }, { "epoch": 0.27, "grad_norm": 0.6222618487269143, "learning_rate": 4.858207862323919e-06, "loss": 0.3091, "step": 5742 }, { "epoch": 0.27, "grad_norm": 0.6315129369010182, "learning_rate": 4.85814489371445e-06, "loss": 0.299, "step": 5743 }, { "epoch": 0.27, "grad_norm": 0.6532368606266921, "learning_rate": 4.858081911534449e-06, "loss": 0.2933, "step": 5744 }, { "epoch": 0.27, "grad_norm": 0.6085304953443407, "learning_rate": 4.85801891578428e-06, "loss": 0.2919, "step": 5745 }, { "epoch": 0.27, "grad_norm": 0.6338764875554642, "learning_rate": 4.857955906464304e-06, "loss": 0.2985, "step": 5746 }, { "epoch": 0.27, "grad_norm": 0.6608631255049184, "learning_rate": 4.857892883574886e-06, "loss": 0.2929, "step": 5747 }, { "epoch": 0.27, "grad_norm": 0.6292529934108186, "learning_rate": 4.8578298471163856e-06, "loss": 0.2943, "step": 5748 }, { "epoch": 0.27, "grad_norm": 0.6767439972899137, "learning_rate": 4.857766797089167e-06, "loss": 0.3137, "step": 5749 }, { "epoch": 0.27, "grad_norm": 0.6136188225106958, "learning_rate": 4.857703733493595e-06, "loss": 0.2931, "step": 5750 }, { "epoch": 0.27, "grad_norm": 0.6675454410537499, "learning_rate": 4.8576406563300286e-06, "loss": 0.2975, "step": 5751 }, { "epoch": 0.27, "grad_norm": 0.6579596765022365, "learning_rate": 4.857577565598833e-06, "loss": 0.2899, "step": 5752 }, { "epoch": 0.27, "grad_norm": 0.6892268739315478, "learning_rate": 4.857514461300372e-06, "loss": 0.3067, "step": 5753 }, { "epoch": 0.27, "grad_norm": 0.6284870190170241, "learning_rate": 4.857451343435007e-06, "loss": 0.2968, "step": 5754 }, { "epoch": 0.27, "grad_norm": 0.6739230735922291, "learning_rate": 4.857388212003102e-06, "loss": 0.3157, "step": 5755 }, { "epoch": 0.27, "grad_norm": 0.6089838136120663, "learning_rate": 4.857325067005022e-06, "loss": 0.2853, "step": 5756 }, { "epoch": 0.27, "grad_norm": 0.6179447796296674, "learning_rate": 4.857261908441127e-06, "loss": 0.292, "step": 5757 }, { "epoch": 0.27, "grad_norm": 0.6468542770456118, "learning_rate": 4.857198736311782e-06, "loss": 0.3176, "step": 5758 }, { "epoch": 0.27, "grad_norm": 0.6431219493976394, "learning_rate": 4.8571355506173515e-06, "loss": 0.2924, "step": 5759 }, { "epoch": 0.27, "grad_norm": 0.6080027795434348, "learning_rate": 4.857072351358198e-06, "loss": 0.3035, "step": 5760 }, { "epoch": 0.27, "grad_norm": 0.6270607521129332, "learning_rate": 4.857009138534686e-06, "loss": 0.2817, "step": 5761 }, { "epoch": 0.27, "grad_norm": 0.6650815796210631, "learning_rate": 4.856945912147179e-06, "loss": 0.3243, "step": 5762 }, { "epoch": 0.27, "grad_norm": 0.5924873917058061, "learning_rate": 4.8568826721960406e-06, "loss": 0.291, "step": 5763 }, { "epoch": 0.27, "grad_norm": 0.6244163097997162, "learning_rate": 4.856819418681634e-06, "loss": 0.3078, "step": 5764 }, { "epoch": 0.27, "grad_norm": 0.6305711087913461, "learning_rate": 4.856756151604325e-06, "loss": 0.2828, "step": 5765 }, { "epoch": 0.27, "grad_norm": 0.6166955469515798, "learning_rate": 4.856692870964476e-06, "loss": 0.2851, "step": 5766 }, { "epoch": 0.27, "grad_norm": 0.6100095505749571, "learning_rate": 4.8566295767624516e-06, "loss": 0.3059, "step": 5767 }, { "epoch": 0.27, "grad_norm": 0.5633605778729681, "learning_rate": 4.856566268998617e-06, "loss": 0.2979, "step": 5768 }, { "epoch": 0.27, "grad_norm": 0.6061348352511793, "learning_rate": 4.856502947673335e-06, "loss": 0.2801, "step": 5769 }, { "epoch": 0.27, "grad_norm": 0.7509358901192117, "learning_rate": 4.8564396127869714e-06, "loss": 0.3152, "step": 5770 }, { "epoch": 0.27, "grad_norm": 0.617674214911693, "learning_rate": 4.856376264339891e-06, "loss": 0.295, "step": 5771 }, { "epoch": 0.27, "grad_norm": 0.6037976383787206, "learning_rate": 4.856312902332456e-06, "loss": 0.295, "step": 5772 }, { "epoch": 0.27, "grad_norm": 0.6314304072519684, "learning_rate": 4.856249526765033e-06, "loss": 0.3089, "step": 5773 }, { "epoch": 0.27, "grad_norm": 0.6173613082741368, "learning_rate": 4.856186137637986e-06, "loss": 0.3, "step": 5774 }, { "epoch": 0.27, "grad_norm": 0.6814274791646417, "learning_rate": 4.85612273495168e-06, "loss": 0.3266, "step": 5775 }, { "epoch": 0.27, "grad_norm": 0.6895019741552595, "learning_rate": 4.8560593187064805e-06, "loss": 0.3091, "step": 5776 }, { "epoch": 0.27, "grad_norm": 0.6458984279341645, "learning_rate": 4.855995888902752e-06, "loss": 0.2786, "step": 5777 }, { "epoch": 0.27, "grad_norm": 0.6177518685189285, "learning_rate": 4.855932445540857e-06, "loss": 0.2905, "step": 5778 }, { "epoch": 0.27, "grad_norm": 0.5475410125294297, "learning_rate": 4.855868988621165e-06, "loss": 0.2587, "step": 5779 }, { "epoch": 0.27, "grad_norm": 0.6460383820111196, "learning_rate": 4.855805518144039e-06, "loss": 0.3011, "step": 5780 }, { "epoch": 0.27, "grad_norm": 0.6690534976008322, "learning_rate": 4.855742034109844e-06, "loss": 0.2972, "step": 5781 }, { "epoch": 0.27, "grad_norm": 0.6136544079502261, "learning_rate": 4.855678536518946e-06, "loss": 0.3022, "step": 5782 }, { "epoch": 0.27, "grad_norm": 0.6093455100178301, "learning_rate": 4.85561502537171e-06, "loss": 0.3013, "step": 5783 }, { "epoch": 0.27, "grad_norm": 0.6348469744104498, "learning_rate": 4.855551500668501e-06, "loss": 0.2704, "step": 5784 }, { "epoch": 0.27, "grad_norm": 0.6074464319899573, "learning_rate": 4.855487962409685e-06, "loss": 0.2942, "step": 5785 }, { "epoch": 0.27, "grad_norm": 0.6227751736556532, "learning_rate": 4.855424410595629e-06, "loss": 0.2812, "step": 5786 }, { "epoch": 0.27, "grad_norm": 0.6133003083017416, "learning_rate": 4.855360845226697e-06, "loss": 0.2869, "step": 5787 }, { "epoch": 0.27, "grad_norm": 0.698346179152879, "learning_rate": 4.855297266303255e-06, "loss": 0.3236, "step": 5788 }, { "epoch": 0.27, "grad_norm": 0.6650785803054893, "learning_rate": 4.855233673825669e-06, "loss": 0.3362, "step": 5789 }, { "epoch": 0.27, "grad_norm": 0.6001206815147938, "learning_rate": 4.855170067794306e-06, "loss": 0.2884, "step": 5790 }, { "epoch": 0.27, "grad_norm": 0.6187939840472475, "learning_rate": 4.855106448209531e-06, "loss": 0.2944, "step": 5791 }, { "epoch": 0.27, "grad_norm": 0.5862125855669837, "learning_rate": 4.85504281507171e-06, "loss": 0.2782, "step": 5792 }, { "epoch": 0.27, "grad_norm": 0.6260557564328433, "learning_rate": 4.85497916838121e-06, "loss": 0.2898, "step": 5793 }, { "epoch": 0.27, "grad_norm": 0.6325124747498747, "learning_rate": 4.854915508138396e-06, "loss": 0.3029, "step": 5794 }, { "epoch": 0.27, "grad_norm": 0.6448222250410853, "learning_rate": 4.854851834343636e-06, "loss": 0.3161, "step": 5795 }, { "epoch": 0.27, "grad_norm": 0.6135731039170583, "learning_rate": 4.8547881469972945e-06, "loss": 0.2801, "step": 5796 }, { "epoch": 0.27, "grad_norm": 0.6488779805579212, "learning_rate": 4.85472444609974e-06, "loss": 0.2982, "step": 5797 }, { "epoch": 0.27, "grad_norm": 0.5880940033611937, "learning_rate": 4.854660731651338e-06, "loss": 0.2696, "step": 5798 }, { "epoch": 0.27, "grad_norm": 0.6176965789759894, "learning_rate": 4.8545970036524546e-06, "loss": 0.289, "step": 5799 }, { "epoch": 0.27, "grad_norm": 0.6675968372692602, "learning_rate": 4.854533262103458e-06, "loss": 0.3238, "step": 5800 }, { "epoch": 0.27, "grad_norm": 0.66998009238095, "learning_rate": 4.854469507004714e-06, "loss": 0.326, "step": 5801 }, { "epoch": 0.27, "grad_norm": 0.6204021682776897, "learning_rate": 4.85440573835659e-06, "loss": 0.3183, "step": 5802 }, { "epoch": 0.27, "grad_norm": 0.694906769509233, "learning_rate": 4.854341956159453e-06, "loss": 0.2936, "step": 5803 }, { "epoch": 0.27, "grad_norm": 0.6733156660265672, "learning_rate": 4.854278160413669e-06, "loss": 0.3051, "step": 5804 }, { "epoch": 0.27, "grad_norm": 0.6363149609894377, "learning_rate": 4.854214351119606e-06, "loss": 0.2963, "step": 5805 }, { "epoch": 0.27, "grad_norm": 0.5977539113829826, "learning_rate": 4.854150528277631e-06, "loss": 0.2734, "step": 5806 }, { "epoch": 0.27, "grad_norm": 0.6913076769615131, "learning_rate": 4.854086691888112e-06, "loss": 0.3134, "step": 5807 }, { "epoch": 0.27, "grad_norm": 0.5925711056015109, "learning_rate": 4.854022841951416e-06, "loss": 0.2898, "step": 5808 }, { "epoch": 0.27, "grad_norm": 0.6246486652878591, "learning_rate": 4.85395897846791e-06, "loss": 0.283, "step": 5809 }, { "epoch": 0.27, "grad_norm": 0.6372090014744488, "learning_rate": 4.853895101437961e-06, "loss": 0.2935, "step": 5810 }, { "epoch": 0.27, "grad_norm": 0.621120064515527, "learning_rate": 4.853831210861938e-06, "loss": 0.3088, "step": 5811 }, { "epoch": 0.27, "grad_norm": 0.6182161134673724, "learning_rate": 4.853767306740208e-06, "loss": 0.2992, "step": 5812 }, { "epoch": 0.27, "grad_norm": 0.6324182388234784, "learning_rate": 4.853703389073139e-06, "loss": 0.2965, "step": 5813 }, { "epoch": 0.27, "grad_norm": 0.5895923286283091, "learning_rate": 4.853639457861098e-06, "loss": 0.2833, "step": 5814 }, { "epoch": 0.27, "grad_norm": 0.5929819615300933, "learning_rate": 4.8535755131044536e-06, "loss": 0.294, "step": 5815 }, { "epoch": 0.27, "grad_norm": 0.6082356835887578, "learning_rate": 4.853511554803573e-06, "loss": 0.2786, "step": 5816 }, { "epoch": 0.27, "grad_norm": 0.6416568783352559, "learning_rate": 4.853447582958826e-06, "loss": 0.2846, "step": 5817 }, { "epoch": 0.27, "grad_norm": 0.594386213779637, "learning_rate": 4.853383597570579e-06, "loss": 0.2693, "step": 5818 }, { "epoch": 0.27, "grad_norm": 0.6831387233972898, "learning_rate": 4.8533195986392015e-06, "loss": 0.2849, "step": 5819 }, { "epoch": 0.27, "grad_norm": 0.6144064073312462, "learning_rate": 4.853255586165061e-06, "loss": 0.2766, "step": 5820 }, { "epoch": 0.27, "grad_norm": 0.628008881882958, "learning_rate": 4.853191560148526e-06, "loss": 0.2975, "step": 5821 }, { "epoch": 0.27, "grad_norm": 0.7051758188144693, "learning_rate": 4.8531275205899644e-06, "loss": 0.2845, "step": 5822 }, { "epoch": 0.27, "grad_norm": 0.7042663074860837, "learning_rate": 4.853063467489747e-06, "loss": 0.2752, "step": 5823 }, { "epoch": 0.27, "grad_norm": 0.6039300967897374, "learning_rate": 4.8529994008482394e-06, "loss": 0.3037, "step": 5824 }, { "epoch": 0.27, "grad_norm": 0.6856267994754284, "learning_rate": 4.852935320665811e-06, "loss": 0.3083, "step": 5825 }, { "epoch": 0.27, "grad_norm": 0.5793282251850433, "learning_rate": 4.8528712269428326e-06, "loss": 0.2879, "step": 5826 }, { "epoch": 0.27, "grad_norm": 0.634898837696702, "learning_rate": 4.852807119679671e-06, "loss": 0.2917, "step": 5827 }, { "epoch": 0.27, "grad_norm": 0.6894999375677663, "learning_rate": 4.852742998876696e-06, "loss": 0.305, "step": 5828 }, { "epoch": 0.27, "grad_norm": 0.6461931051655723, "learning_rate": 4.852678864534277e-06, "loss": 0.3113, "step": 5829 }, { "epoch": 0.27, "grad_norm": 0.6978249948601705, "learning_rate": 4.8526147166527816e-06, "loss": 0.2932, "step": 5830 }, { "epoch": 0.27, "grad_norm": 0.7054346414025633, "learning_rate": 4.85255055523258e-06, "loss": 0.3045, "step": 5831 }, { "epoch": 0.27, "grad_norm": 0.6628561387658907, "learning_rate": 4.852486380274042e-06, "loss": 0.291, "step": 5832 }, { "epoch": 0.27, "grad_norm": 0.6406560217160556, "learning_rate": 4.852422191777535e-06, "loss": 0.2884, "step": 5833 }, { "epoch": 0.27, "grad_norm": 0.638903166603644, "learning_rate": 4.85235798974343e-06, "loss": 0.2964, "step": 5834 }, { "epoch": 0.27, "grad_norm": 0.6032785229199948, "learning_rate": 4.852293774172096e-06, "loss": 0.2716, "step": 5835 }, { "epoch": 0.27, "grad_norm": 0.7062279819599317, "learning_rate": 4.8522295450639025e-06, "loss": 0.3088, "step": 5836 }, { "epoch": 0.27, "grad_norm": 0.632586098970034, "learning_rate": 4.852165302419219e-06, "loss": 0.2883, "step": 5837 }, { "epoch": 0.27, "grad_norm": 0.5877975799830472, "learning_rate": 4.852101046238416e-06, "loss": 0.2826, "step": 5838 }, { "epoch": 0.27, "grad_norm": 0.6370965271678429, "learning_rate": 4.852036776521862e-06, "loss": 0.2906, "step": 5839 }, { "epoch": 0.27, "grad_norm": 0.620379588558801, "learning_rate": 4.851972493269929e-06, "loss": 0.2859, "step": 5840 }, { "epoch": 0.27, "grad_norm": 0.6313282261988249, "learning_rate": 4.851908196482983e-06, "loss": 0.2793, "step": 5841 }, { "epoch": 0.27, "grad_norm": 0.6793021781812733, "learning_rate": 4.851843886161398e-06, "loss": 0.3097, "step": 5842 }, { "epoch": 0.27, "grad_norm": 0.6500384089182413, "learning_rate": 4.851779562305543e-06, "loss": 0.3129, "step": 5843 }, { "epoch": 0.27, "grad_norm": 0.6687044758413895, "learning_rate": 4.851715224915786e-06, "loss": 0.2991, "step": 5844 }, { "epoch": 0.27, "grad_norm": 0.5911762724073949, "learning_rate": 4.8516508739925e-06, "loss": 0.2759, "step": 5845 }, { "epoch": 0.27, "grad_norm": 0.6316507573027358, "learning_rate": 4.8515865095360545e-06, "loss": 0.2694, "step": 5846 }, { "epoch": 0.27, "grad_norm": 0.618905481975788, "learning_rate": 4.85152213154682e-06, "loss": 0.29, "step": 5847 }, { "epoch": 0.27, "grad_norm": 0.6173212633890243, "learning_rate": 4.851457740025166e-06, "loss": 0.2762, "step": 5848 }, { "epoch": 0.27, "grad_norm": 0.5932773222503025, "learning_rate": 4.851393334971463e-06, "loss": 0.2721, "step": 5849 }, { "epoch": 0.27, "grad_norm": 0.6197877033662196, "learning_rate": 4.8513289163860834e-06, "loss": 0.2961, "step": 5850 }, { "epoch": 0.27, "grad_norm": 0.6427857617945204, "learning_rate": 4.851264484269397e-06, "loss": 0.2953, "step": 5851 }, { "epoch": 0.27, "grad_norm": 0.610570963602475, "learning_rate": 4.851200038621774e-06, "loss": 0.2938, "step": 5852 }, { "epoch": 0.27, "grad_norm": 0.605532281530976, "learning_rate": 4.851135579443586e-06, "loss": 0.3079, "step": 5853 }, { "epoch": 0.27, "grad_norm": 0.7044554080188318, "learning_rate": 4.8510711067352035e-06, "loss": 0.3199, "step": 5854 }, { "epoch": 0.27, "grad_norm": 0.6403875068606936, "learning_rate": 4.851006620496997e-06, "loss": 0.3025, "step": 5855 }, { "epoch": 0.27, "grad_norm": 0.6343243814149251, "learning_rate": 4.850942120729339e-06, "loss": 0.3071, "step": 5856 }, { "epoch": 0.27, "grad_norm": 0.6290649642846211, "learning_rate": 4.8508776074326e-06, "loss": 0.3044, "step": 5857 }, { "epoch": 0.27, "grad_norm": 0.6115364371393391, "learning_rate": 4.850813080607152e-06, "loss": 0.279, "step": 5858 }, { "epoch": 0.27, "grad_norm": 0.6268258517916696, "learning_rate": 4.850748540253365e-06, "loss": 0.3131, "step": 5859 }, { "epoch": 0.27, "grad_norm": 0.6496006472110328, "learning_rate": 4.8506839863716104e-06, "loss": 0.3004, "step": 5860 }, { "epoch": 0.27, "grad_norm": 0.6116349222516312, "learning_rate": 4.8506194189622605e-06, "loss": 0.292, "step": 5861 }, { "epoch": 0.27, "grad_norm": 0.6439521586585903, "learning_rate": 4.850554838025688e-06, "loss": 0.3082, "step": 5862 }, { "epoch": 0.27, "grad_norm": 0.6418928220898055, "learning_rate": 4.850490243562261e-06, "loss": 0.3003, "step": 5863 }, { "epoch": 0.27, "grad_norm": 0.6544842994944913, "learning_rate": 4.850425635572355e-06, "loss": 0.3038, "step": 5864 }, { "epoch": 0.27, "grad_norm": 0.6056185693902946, "learning_rate": 4.850361014056339e-06, "loss": 0.2758, "step": 5865 }, { "epoch": 0.27, "grad_norm": 0.6098071592499972, "learning_rate": 4.850296379014587e-06, "loss": 0.3035, "step": 5866 }, { "epoch": 0.27, "grad_norm": 0.6035518330459689, "learning_rate": 4.85023173044747e-06, "loss": 0.293, "step": 5867 }, { "epoch": 0.27, "grad_norm": 0.5972453649443912, "learning_rate": 4.850167068355359e-06, "loss": 0.309, "step": 5868 }, { "epoch": 0.27, "grad_norm": 0.6050425155689564, "learning_rate": 4.8501023927386286e-06, "loss": 0.2982, "step": 5869 }, { "epoch": 0.27, "grad_norm": 0.6242664042109345, "learning_rate": 4.850037703597649e-06, "loss": 0.2954, "step": 5870 }, { "epoch": 0.28, "grad_norm": 0.6693099522143127, "learning_rate": 4.849973000932794e-06, "loss": 0.3163, "step": 5871 }, { "epoch": 0.28, "grad_norm": 0.6096340903140555, "learning_rate": 4.849908284744434e-06, "loss": 0.2982, "step": 5872 }, { "epoch": 0.28, "grad_norm": 0.5606104137552246, "learning_rate": 4.849843555032942e-06, "loss": 0.2928, "step": 5873 }, { "epoch": 0.28, "grad_norm": 0.6263681090965362, "learning_rate": 4.849778811798692e-06, "loss": 0.3067, "step": 5874 }, { "epoch": 0.28, "grad_norm": 0.594963296253528, "learning_rate": 4.849714055042055e-06, "loss": 0.2779, "step": 5875 }, { "epoch": 0.28, "grad_norm": 0.6102791873041256, "learning_rate": 4.849649284763404e-06, "loss": 0.3066, "step": 5876 }, { "epoch": 0.28, "grad_norm": 0.6176621813544668, "learning_rate": 4.849584500963113e-06, "loss": 0.293, "step": 5877 }, { "epoch": 0.28, "grad_norm": 0.6497742240914988, "learning_rate": 4.849519703641553e-06, "loss": 0.2716, "step": 5878 }, { "epoch": 0.28, "grad_norm": 0.6371694557510448, "learning_rate": 4.849454892799098e-06, "loss": 0.3062, "step": 5879 }, { "epoch": 0.28, "grad_norm": 0.6208289094386819, "learning_rate": 4.84939006843612e-06, "loss": 0.2997, "step": 5880 }, { "epoch": 0.28, "grad_norm": 0.6085426802534977, "learning_rate": 4.849325230552993e-06, "loss": 0.3031, "step": 5881 }, { "epoch": 0.28, "grad_norm": 0.638562803317257, "learning_rate": 4.84926037915009e-06, "loss": 0.3074, "step": 5882 }, { "epoch": 0.28, "grad_norm": 0.6620331908082419, "learning_rate": 4.849195514227784e-06, "loss": 0.3036, "step": 5883 }, { "epoch": 0.28, "grad_norm": 0.6448685155976454, "learning_rate": 4.849130635786447e-06, "loss": 0.2996, "step": 5884 }, { "epoch": 0.28, "grad_norm": 0.5747798768744754, "learning_rate": 4.849065743826456e-06, "loss": 0.291, "step": 5885 }, { "epoch": 0.28, "grad_norm": 0.6333961062822125, "learning_rate": 4.84900083834818e-06, "loss": 0.3107, "step": 5886 }, { "epoch": 0.28, "grad_norm": 0.6312741454426328, "learning_rate": 4.8489359193519945e-06, "loss": 0.2901, "step": 5887 }, { "epoch": 0.28, "grad_norm": 0.621627608650699, "learning_rate": 4.8488709868382745e-06, "loss": 0.3225, "step": 5888 }, { "epoch": 0.28, "grad_norm": 0.6159992229570009, "learning_rate": 4.848806040807392e-06, "loss": 0.2874, "step": 5889 }, { "epoch": 0.28, "grad_norm": 0.6380281211453016, "learning_rate": 4.84874108125972e-06, "loss": 0.3003, "step": 5890 }, { "epoch": 0.28, "grad_norm": 0.6183726357190389, "learning_rate": 4.848676108195635e-06, "loss": 0.284, "step": 5891 }, { "epoch": 0.28, "grad_norm": 0.6951219980572763, "learning_rate": 4.848611121615509e-06, "loss": 0.3183, "step": 5892 }, { "epoch": 0.28, "grad_norm": 0.6958615683016878, "learning_rate": 4.848546121519715e-06, "loss": 0.3201, "step": 5893 }, { "epoch": 0.28, "grad_norm": 0.6081209006957491, "learning_rate": 4.848481107908629e-06, "loss": 0.2858, "step": 5894 }, { "epoch": 0.28, "grad_norm": 0.6076769412318445, "learning_rate": 4.848416080782625e-06, "loss": 0.288, "step": 5895 }, { "epoch": 0.28, "grad_norm": 0.6799500258313079, "learning_rate": 4.8483510401420765e-06, "loss": 0.2999, "step": 5896 }, { "epoch": 0.28, "grad_norm": 0.6633481890853292, "learning_rate": 4.848285985987358e-06, "loss": 0.3018, "step": 5897 }, { "epoch": 0.28, "grad_norm": 0.6354521014889968, "learning_rate": 4.848220918318843e-06, "loss": 0.2833, "step": 5898 }, { "epoch": 0.28, "grad_norm": 0.6139128039356401, "learning_rate": 4.848155837136908e-06, "loss": 0.2765, "step": 5899 }, { "epoch": 0.28, "grad_norm": 0.5791987978929211, "learning_rate": 4.848090742441926e-06, "loss": 0.2889, "step": 5900 }, { "epoch": 0.28, "grad_norm": 0.678685228087802, "learning_rate": 4.848025634234272e-06, "loss": 0.3133, "step": 5901 }, { "epoch": 0.28, "grad_norm": 0.5966013942749029, "learning_rate": 4.84796051251432e-06, "loss": 0.287, "step": 5902 }, { "epoch": 0.28, "grad_norm": 0.6650508893459215, "learning_rate": 4.847895377282447e-06, "loss": 0.3021, "step": 5903 }, { "epoch": 0.28, "grad_norm": 0.6251614526092083, "learning_rate": 4.847830228539024e-06, "loss": 0.3056, "step": 5904 }, { "epoch": 0.28, "grad_norm": 0.6273426166671646, "learning_rate": 4.84776506628443e-06, "loss": 0.3185, "step": 5905 }, { "epoch": 0.28, "grad_norm": 0.6377784734167955, "learning_rate": 4.847699890519037e-06, "loss": 0.3076, "step": 5906 }, { "epoch": 0.28, "grad_norm": 0.6565241379028509, "learning_rate": 4.847634701243222e-06, "loss": 0.3098, "step": 5907 }, { "epoch": 0.28, "grad_norm": 0.6151020839682104, "learning_rate": 4.847569498457358e-06, "loss": 0.306, "step": 5908 }, { "epoch": 0.28, "grad_norm": 0.6167299334757728, "learning_rate": 4.847504282161824e-06, "loss": 0.3053, "step": 5909 }, { "epoch": 0.28, "grad_norm": 0.6454190510609554, "learning_rate": 4.847439052356991e-06, "loss": 0.2967, "step": 5910 }, { "epoch": 0.28, "grad_norm": 0.6440534166608831, "learning_rate": 4.8473738090432365e-06, "loss": 0.2972, "step": 5911 }, { "epoch": 0.28, "grad_norm": 0.7068583987729024, "learning_rate": 4.847308552220935e-06, "loss": 0.319, "step": 5912 }, { "epoch": 0.28, "grad_norm": 0.6530281892376595, "learning_rate": 4.847243281890464e-06, "loss": 0.3091, "step": 5913 }, { "epoch": 0.28, "grad_norm": 0.7260545248528352, "learning_rate": 4.8471779980521975e-06, "loss": 0.3003, "step": 5914 }, { "epoch": 0.28, "grad_norm": 0.6349126597657393, "learning_rate": 4.8471127007065115e-06, "loss": 0.3024, "step": 5915 }, { "epoch": 0.28, "grad_norm": 0.6373691519885881, "learning_rate": 4.8470473898537815e-06, "loss": 0.3204, "step": 5916 }, { "epoch": 0.28, "grad_norm": 0.6254245858436283, "learning_rate": 4.846982065494384e-06, "loss": 0.2985, "step": 5917 }, { "epoch": 0.28, "grad_norm": 0.6061617149422404, "learning_rate": 4.846916727628695e-06, "loss": 0.2976, "step": 5918 }, { "epoch": 0.28, "grad_norm": 0.6489866453512925, "learning_rate": 4.8468513762570905e-06, "loss": 0.3029, "step": 5919 }, { "epoch": 0.28, "grad_norm": 0.6860126525667131, "learning_rate": 4.846786011379944e-06, "loss": 0.317, "step": 5920 }, { "epoch": 0.28, "grad_norm": 0.6118825880053985, "learning_rate": 4.846720632997637e-06, "loss": 0.3027, "step": 5921 }, { "epoch": 0.28, "grad_norm": 0.5914275707976868, "learning_rate": 4.846655241110541e-06, "loss": 0.2697, "step": 5922 }, { "epoch": 0.28, "grad_norm": 0.6311641048435361, "learning_rate": 4.8465898357190335e-06, "loss": 0.2874, "step": 5923 }, { "epoch": 0.28, "grad_norm": 0.6048375200876132, "learning_rate": 4.846524416823491e-06, "loss": 0.2948, "step": 5924 }, { "epoch": 0.28, "grad_norm": 0.6161870496382548, "learning_rate": 4.846458984424292e-06, "loss": 0.3018, "step": 5925 }, { "epoch": 0.28, "grad_norm": 0.6443447119839462, "learning_rate": 4.8463935385218095e-06, "loss": 0.2865, "step": 5926 }, { "epoch": 0.28, "grad_norm": 0.6710353137588849, "learning_rate": 4.8463280791164225e-06, "loss": 0.2994, "step": 5927 }, { "epoch": 0.28, "grad_norm": 0.6276940469333931, "learning_rate": 4.846262606208507e-06, "loss": 0.3114, "step": 5928 }, { "epoch": 0.28, "grad_norm": 0.7024162231853419, "learning_rate": 4.84619711979844e-06, "loss": 0.3109, "step": 5929 }, { "epoch": 0.28, "grad_norm": 0.5841348807591599, "learning_rate": 4.846131619886598e-06, "loss": 0.3032, "step": 5930 }, { "epoch": 0.28, "grad_norm": 0.6534226747158569, "learning_rate": 4.846066106473359e-06, "loss": 0.291, "step": 5931 }, { "epoch": 0.28, "grad_norm": 0.6296017561828468, "learning_rate": 4.846000579559099e-06, "loss": 0.2887, "step": 5932 }, { "epoch": 0.28, "grad_norm": 0.6428616157651743, "learning_rate": 4.845935039144195e-06, "loss": 0.2824, "step": 5933 }, { "epoch": 0.28, "grad_norm": 0.6108766280129645, "learning_rate": 4.845869485229025e-06, "loss": 0.2967, "step": 5934 }, { "epoch": 0.28, "grad_norm": 0.6429252319502311, "learning_rate": 4.845803917813965e-06, "loss": 0.331, "step": 5935 }, { "epoch": 0.28, "grad_norm": 0.6183421984716312, "learning_rate": 4.845738336899394e-06, "loss": 0.2887, "step": 5936 }, { "epoch": 0.28, "grad_norm": 0.5840255958631215, "learning_rate": 4.8456727424856875e-06, "loss": 0.2919, "step": 5937 }, { "epoch": 0.28, "grad_norm": 0.648885359014251, "learning_rate": 4.845607134573224e-06, "loss": 0.2907, "step": 5938 }, { "epoch": 0.28, "grad_norm": 0.6029531549161292, "learning_rate": 4.845541513162382e-06, "loss": 0.3214, "step": 5939 }, { "epoch": 0.28, "grad_norm": 0.6298162700000636, "learning_rate": 4.845475878253537e-06, "loss": 0.3206, "step": 5940 }, { "epoch": 0.28, "grad_norm": 0.6299881425469819, "learning_rate": 4.845410229847068e-06, "loss": 0.3042, "step": 5941 }, { "epoch": 0.28, "grad_norm": 0.615959553777756, "learning_rate": 4.845344567943353e-06, "loss": 0.3159, "step": 5942 }, { "epoch": 0.28, "grad_norm": 0.6184852570022595, "learning_rate": 4.845278892542769e-06, "loss": 0.289, "step": 5943 }, { "epoch": 0.28, "grad_norm": 0.5925212684799458, "learning_rate": 4.845213203645695e-06, "loss": 0.2851, "step": 5944 }, { "epoch": 0.28, "grad_norm": 0.6753765888952429, "learning_rate": 4.845147501252509e-06, "loss": 0.2846, "step": 5945 }, { "epoch": 0.28, "grad_norm": 0.6442704659932181, "learning_rate": 4.845081785363587e-06, "loss": 0.2997, "step": 5946 }, { "epoch": 0.28, "grad_norm": 0.6236244274867494, "learning_rate": 4.8450160559793095e-06, "loss": 0.3096, "step": 5947 }, { "epoch": 0.28, "grad_norm": 0.621666997266756, "learning_rate": 4.844950313100054e-06, "loss": 0.2919, "step": 5948 }, { "epoch": 0.28, "grad_norm": 0.6437807857714328, "learning_rate": 4.844884556726199e-06, "loss": 0.3096, "step": 5949 }, { "epoch": 0.28, "grad_norm": 0.6488932464428778, "learning_rate": 4.844818786858122e-06, "loss": 0.2835, "step": 5950 }, { "epoch": 0.28, "grad_norm": 0.7147478356101504, "learning_rate": 4.844753003496203e-06, "loss": 0.313, "step": 5951 }, { "epoch": 0.28, "grad_norm": 0.6135327988615731, "learning_rate": 4.84468720664082e-06, "loss": 0.2947, "step": 5952 }, { "epoch": 0.28, "grad_norm": 0.6043083853414978, "learning_rate": 4.84462139629235e-06, "loss": 0.3089, "step": 5953 }, { "epoch": 0.28, "grad_norm": 0.6343764797353192, "learning_rate": 4.844555572451175e-06, "loss": 0.3028, "step": 5954 }, { "epoch": 0.28, "grad_norm": 0.7282141008965419, "learning_rate": 4.844489735117671e-06, "loss": 0.3152, "step": 5955 }, { "epoch": 0.28, "grad_norm": 0.6496386145510998, "learning_rate": 4.844423884292218e-06, "loss": 0.3157, "step": 5956 }, { "epoch": 0.28, "grad_norm": 0.7009042985606619, "learning_rate": 4.844358019975195e-06, "loss": 0.3224, "step": 5957 }, { "epoch": 0.28, "grad_norm": 0.7297610091659886, "learning_rate": 4.844292142166981e-06, "loss": 0.3308, "step": 5958 }, { "epoch": 0.28, "grad_norm": 0.5992932417287048, "learning_rate": 4.844226250867954e-06, "loss": 0.2911, "step": 5959 }, { "epoch": 0.28, "grad_norm": 0.6660495117129963, "learning_rate": 4.844160346078495e-06, "loss": 0.3154, "step": 5960 }, { "epoch": 0.28, "grad_norm": 0.5978414514688847, "learning_rate": 4.844094427798982e-06, "loss": 0.2933, "step": 5961 }, { "epoch": 0.28, "grad_norm": 0.6449413389470685, "learning_rate": 4.844028496029794e-06, "loss": 0.2984, "step": 5962 }, { "epoch": 0.28, "grad_norm": 0.6527732327039397, "learning_rate": 4.843962550771313e-06, "loss": 0.302, "step": 5963 }, { "epoch": 0.28, "grad_norm": 0.7074628963529147, "learning_rate": 4.843896592023916e-06, "loss": 0.3272, "step": 5964 }, { "epoch": 0.28, "grad_norm": 0.6407628107564335, "learning_rate": 4.843830619787982e-06, "loss": 0.3156, "step": 5965 }, { "epoch": 0.28, "grad_norm": 0.573015389404475, "learning_rate": 4.843764634063893e-06, "loss": 0.2892, "step": 5966 }, { "epoch": 0.28, "grad_norm": 0.6321261285959616, "learning_rate": 4.843698634852028e-06, "loss": 0.3247, "step": 5967 }, { "epoch": 0.28, "grad_norm": 0.6702751216847195, "learning_rate": 4.843632622152765e-06, "loss": 0.3047, "step": 5968 }, { "epoch": 0.28, "grad_norm": 0.5834801703686463, "learning_rate": 4.843566595966487e-06, "loss": 0.2836, "step": 5969 }, { "epoch": 0.28, "grad_norm": 0.6722072930320362, "learning_rate": 4.843500556293571e-06, "loss": 0.2825, "step": 5970 }, { "epoch": 0.28, "grad_norm": 0.6281727559992022, "learning_rate": 4.843434503134399e-06, "loss": 0.2994, "step": 5971 }, { "epoch": 0.28, "grad_norm": 0.5570192607239071, "learning_rate": 4.84336843648935e-06, "loss": 0.2815, "step": 5972 }, { "epoch": 0.28, "grad_norm": 0.5940634351938798, "learning_rate": 4.843302356358804e-06, "loss": 0.2892, "step": 5973 }, { "epoch": 0.28, "grad_norm": 0.598629300970294, "learning_rate": 4.843236262743143e-06, "loss": 0.2999, "step": 5974 }, { "epoch": 0.28, "grad_norm": 0.676091004075311, "learning_rate": 4.8431701556427454e-06, "loss": 0.2905, "step": 5975 }, { "epoch": 0.28, "grad_norm": 0.6777386095081095, "learning_rate": 4.8431040350579936e-06, "loss": 0.2918, "step": 5976 }, { "epoch": 0.28, "grad_norm": 0.5958910839647249, "learning_rate": 4.843037900989267e-06, "loss": 0.2861, "step": 5977 }, { "epoch": 0.28, "grad_norm": 0.63366033663219, "learning_rate": 4.842971753436945e-06, "loss": 0.3071, "step": 5978 }, { "epoch": 0.28, "grad_norm": 0.6052643315613083, "learning_rate": 4.84290559240141e-06, "loss": 0.2616, "step": 5979 }, { "epoch": 0.28, "grad_norm": 0.6187568763258997, "learning_rate": 4.842839417883042e-06, "loss": 0.3049, "step": 5980 }, { "epoch": 0.28, "grad_norm": 0.6627525371555457, "learning_rate": 4.842773229882222e-06, "loss": 0.2957, "step": 5981 }, { "epoch": 0.28, "grad_norm": 0.6407090003950746, "learning_rate": 4.842707028399332e-06, "loss": 0.2911, "step": 5982 }, { "epoch": 0.28, "grad_norm": 0.6492213028953588, "learning_rate": 4.84264081343475e-06, "loss": 0.2818, "step": 5983 }, { "epoch": 0.28, "grad_norm": 0.7075078234778968, "learning_rate": 4.84257458498886e-06, "loss": 0.3234, "step": 5984 }, { "epoch": 0.28, "grad_norm": 0.6932538218382338, "learning_rate": 4.842508343062043e-06, "loss": 0.2926, "step": 5985 }, { "epoch": 0.28, "grad_norm": 0.6406411936179812, "learning_rate": 4.842442087654677e-06, "loss": 0.2905, "step": 5986 }, { "epoch": 0.28, "grad_norm": 0.6632189519062031, "learning_rate": 4.842375818767147e-06, "loss": 0.2846, "step": 5987 }, { "epoch": 0.28, "grad_norm": 0.6257200988872497, "learning_rate": 4.842309536399833e-06, "loss": 0.2967, "step": 5988 }, { "epoch": 0.28, "grad_norm": 0.6498463538201537, "learning_rate": 4.842243240553115e-06, "loss": 0.3026, "step": 5989 }, { "epoch": 0.28, "grad_norm": 0.6349059446433909, "learning_rate": 4.842176931227378e-06, "loss": 0.315, "step": 5990 }, { "epoch": 0.28, "grad_norm": 0.6393397482455201, "learning_rate": 4.842110608423e-06, "loss": 0.3111, "step": 5991 }, { "epoch": 0.28, "grad_norm": 0.6087994312109701, "learning_rate": 4.842044272140364e-06, "loss": 0.2903, "step": 5992 }, { "epoch": 0.28, "grad_norm": 0.5884212507921193, "learning_rate": 4.8419779223798525e-06, "loss": 0.2862, "step": 5993 }, { "epoch": 0.28, "grad_norm": 0.6005678718692495, "learning_rate": 4.841911559141846e-06, "loss": 0.3063, "step": 5994 }, { "epoch": 0.28, "grad_norm": 0.5919188893564139, "learning_rate": 4.841845182426727e-06, "loss": 0.2862, "step": 5995 }, { "epoch": 0.28, "grad_norm": 0.5704842534528899, "learning_rate": 4.841778792234879e-06, "loss": 0.2938, "step": 5996 }, { "epoch": 0.28, "grad_norm": 0.6468889948709475, "learning_rate": 4.841712388566681e-06, "loss": 0.3278, "step": 5997 }, { "epoch": 0.28, "grad_norm": 0.665208112223028, "learning_rate": 4.841645971422518e-06, "loss": 0.311, "step": 5998 }, { "epoch": 0.28, "grad_norm": 0.681009090506375, "learning_rate": 4.841579540802771e-06, "loss": 0.301, "step": 5999 }, { "epoch": 0.28, "grad_norm": 0.6594225981331265, "learning_rate": 4.841513096707822e-06, "loss": 0.3021, "step": 6000 }, { "epoch": 0.28, "grad_norm": 0.6415210380952129, "learning_rate": 4.841446639138053e-06, "loss": 0.2888, "step": 6001 }, { "epoch": 0.28, "grad_norm": 0.647564768831147, "learning_rate": 4.841380168093848e-06, "loss": 0.3164, "step": 6002 }, { "epoch": 0.28, "grad_norm": 0.6494893897783077, "learning_rate": 4.841313683575587e-06, "loss": 0.3087, "step": 6003 }, { "epoch": 0.28, "grad_norm": 0.6133433892760255, "learning_rate": 4.8412471855836555e-06, "loss": 0.286, "step": 6004 }, { "epoch": 0.28, "grad_norm": 0.6254624572566048, "learning_rate": 4.841180674118435e-06, "loss": 0.3152, "step": 6005 }, { "epoch": 0.28, "grad_norm": 0.5877895194608139, "learning_rate": 4.841114149180308e-06, "loss": 0.286, "step": 6006 }, { "epoch": 0.28, "grad_norm": 0.6166443065200344, "learning_rate": 4.841047610769657e-06, "loss": 0.2848, "step": 6007 }, { "epoch": 0.28, "grad_norm": 0.7108079930860258, "learning_rate": 4.840981058886866e-06, "loss": 0.3176, "step": 6008 }, { "epoch": 0.28, "grad_norm": 0.589781330953379, "learning_rate": 4.8409144935323175e-06, "loss": 0.2952, "step": 6009 }, { "epoch": 0.28, "grad_norm": 0.6492457250228623, "learning_rate": 4.8408479147063936e-06, "loss": 0.2867, "step": 6010 }, { "epoch": 0.28, "grad_norm": 0.668868037633402, "learning_rate": 4.840781322409479e-06, "loss": 0.2998, "step": 6011 }, { "epoch": 0.28, "grad_norm": 0.6543681766228843, "learning_rate": 4.840714716641956e-06, "loss": 0.3138, "step": 6012 }, { "epoch": 0.28, "grad_norm": 0.5733036875877406, "learning_rate": 4.840648097404208e-06, "loss": 0.2763, "step": 6013 }, { "epoch": 0.28, "grad_norm": 0.6522553630932679, "learning_rate": 4.840581464696619e-06, "loss": 0.2768, "step": 6014 }, { "epoch": 0.28, "grad_norm": 0.6620364403205398, "learning_rate": 4.8405148185195715e-06, "loss": 0.3192, "step": 6015 }, { "epoch": 0.28, "grad_norm": 0.6129089968965832, "learning_rate": 4.84044815887345e-06, "loss": 0.2896, "step": 6016 }, { "epoch": 0.28, "grad_norm": 0.6451039776328191, "learning_rate": 4.8403814857586365e-06, "loss": 0.2776, "step": 6017 }, { "epoch": 0.28, "grad_norm": 0.6480821462219595, "learning_rate": 4.840314799175516e-06, "loss": 0.289, "step": 6018 }, { "epoch": 0.28, "grad_norm": 0.6346161225677094, "learning_rate": 4.840248099124474e-06, "loss": 0.2768, "step": 6019 }, { "epoch": 0.28, "grad_norm": 0.6483488194449233, "learning_rate": 4.84018138560589e-06, "loss": 0.2962, "step": 6020 }, { "epoch": 0.28, "grad_norm": 0.6885072716438164, "learning_rate": 4.840114658620151e-06, "loss": 0.3118, "step": 6021 }, { "epoch": 0.28, "grad_norm": 0.6011158148906872, "learning_rate": 4.840047918167641e-06, "loss": 0.2837, "step": 6022 }, { "epoch": 0.28, "grad_norm": 0.6985708252229311, "learning_rate": 4.839981164248742e-06, "loss": 0.2979, "step": 6023 }, { "epoch": 0.28, "grad_norm": 0.6270059424726496, "learning_rate": 4.839914396863842e-06, "loss": 0.2888, "step": 6024 }, { "epoch": 0.28, "grad_norm": 0.6087609611663648, "learning_rate": 4.839847616013321e-06, "loss": 0.2868, "step": 6025 }, { "epoch": 0.28, "grad_norm": 0.6314168992550299, "learning_rate": 4.839780821697565e-06, "loss": 0.2924, "step": 6026 }, { "epoch": 0.28, "grad_norm": 0.6594596874420995, "learning_rate": 4.839714013916958e-06, "loss": 0.3135, "step": 6027 }, { "epoch": 0.28, "grad_norm": 0.597266336465702, "learning_rate": 4.839647192671886e-06, "loss": 0.2931, "step": 6028 }, { "epoch": 0.28, "grad_norm": 0.6248692643681809, "learning_rate": 4.839580357962732e-06, "loss": 0.3043, "step": 6029 }, { "epoch": 0.28, "grad_norm": 0.6254921285047045, "learning_rate": 4.839513509789882e-06, "loss": 0.2966, "step": 6030 }, { "epoch": 0.28, "grad_norm": 0.6233430227291853, "learning_rate": 4.839446648153718e-06, "loss": 0.2984, "step": 6031 }, { "epoch": 0.28, "grad_norm": 0.6421253951698503, "learning_rate": 4.839379773054628e-06, "loss": 0.3146, "step": 6032 }, { "epoch": 0.28, "grad_norm": 0.6425075285075394, "learning_rate": 4.8393128844929945e-06, "loss": 0.3056, "step": 6033 }, { "epoch": 0.28, "grad_norm": 0.5942111337164186, "learning_rate": 4.839245982469204e-06, "loss": 0.305, "step": 6034 }, { "epoch": 0.28, "grad_norm": 0.6429502914578735, "learning_rate": 4.83917906698364e-06, "loss": 0.3025, "step": 6035 }, { "epoch": 0.28, "grad_norm": 0.6715303495527629, "learning_rate": 4.839112138036689e-06, "loss": 0.3062, "step": 6036 }, { "epoch": 0.28, "grad_norm": 0.6717183658414492, "learning_rate": 4.839045195628735e-06, "loss": 0.2842, "step": 6037 }, { "epoch": 0.28, "grad_norm": 0.6662632180020349, "learning_rate": 4.838978239760165e-06, "loss": 0.2953, "step": 6038 }, { "epoch": 0.28, "grad_norm": 0.6734085100650231, "learning_rate": 4.8389112704313625e-06, "loss": 0.3027, "step": 6039 }, { "epoch": 0.28, "grad_norm": 0.6183295724629325, "learning_rate": 4.838844287642713e-06, "loss": 0.3226, "step": 6040 }, { "epoch": 0.28, "grad_norm": 0.6411325665901693, "learning_rate": 4.838777291394603e-06, "loss": 0.3001, "step": 6041 }, { "epoch": 0.28, "grad_norm": 0.6112278347531354, "learning_rate": 4.8387102816874175e-06, "loss": 0.3014, "step": 6042 }, { "epoch": 0.28, "grad_norm": 0.6611220850655, "learning_rate": 4.838643258521542e-06, "loss": 0.3091, "step": 6043 }, { "epoch": 0.28, "grad_norm": 0.5936041703050642, "learning_rate": 4.838576221897362e-06, "loss": 0.2923, "step": 6044 }, { "epoch": 0.28, "grad_norm": 0.5805467938838257, "learning_rate": 4.838509171815264e-06, "loss": 0.2925, "step": 6045 }, { "epoch": 0.28, "grad_norm": 0.638496511588802, "learning_rate": 4.838442108275634e-06, "loss": 0.2897, "step": 6046 }, { "epoch": 0.28, "grad_norm": 0.6583981333077271, "learning_rate": 4.838375031278857e-06, "loss": 0.3109, "step": 6047 }, { "epoch": 0.28, "grad_norm": 0.672307871166089, "learning_rate": 4.838307940825319e-06, "loss": 0.3298, "step": 6048 }, { "epoch": 0.28, "grad_norm": 0.6286993189607331, "learning_rate": 4.838240836915406e-06, "loss": 0.2862, "step": 6049 }, { "epoch": 0.28, "grad_norm": 0.6835105995084039, "learning_rate": 4.838173719549506e-06, "loss": 0.3353, "step": 6050 }, { "epoch": 0.28, "grad_norm": 0.6714711075136224, "learning_rate": 4.838106588728003e-06, "loss": 0.3104, "step": 6051 }, { "epoch": 0.28, "grad_norm": 0.5685838840548277, "learning_rate": 4.838039444451284e-06, "loss": 0.2757, "step": 6052 }, { "epoch": 0.28, "grad_norm": 0.6623029517172029, "learning_rate": 4.837972286719738e-06, "loss": 0.2758, "step": 6053 }, { "epoch": 0.28, "grad_norm": 0.6440980345704533, "learning_rate": 4.837905115533747e-06, "loss": 0.3099, "step": 6054 }, { "epoch": 0.28, "grad_norm": 0.7067552725960652, "learning_rate": 4.837837930893699e-06, "loss": 0.3192, "step": 6055 }, { "epoch": 0.28, "grad_norm": 0.6411045753223592, "learning_rate": 4.837770732799983e-06, "loss": 0.2984, "step": 6056 }, { "epoch": 0.28, "grad_norm": 0.6308507393043443, "learning_rate": 4.837703521252983e-06, "loss": 0.3114, "step": 6057 }, { "epoch": 0.28, "grad_norm": 0.5915193399436118, "learning_rate": 4.837636296253088e-06, "loss": 0.288, "step": 6058 }, { "epoch": 0.28, "grad_norm": 0.6398371406008709, "learning_rate": 4.837569057800682e-06, "loss": 0.305, "step": 6059 }, { "epoch": 0.28, "grad_norm": 0.6896049416619332, "learning_rate": 4.8375018058961544e-06, "loss": 0.3156, "step": 6060 }, { "epoch": 0.28, "grad_norm": 0.6354591332295186, "learning_rate": 4.837434540539891e-06, "loss": 0.3267, "step": 6061 }, { "epoch": 0.28, "grad_norm": 0.5986843754799889, "learning_rate": 4.8373672617322805e-06, "loss": 0.3009, "step": 6062 }, { "epoch": 0.28, "grad_norm": 0.621101368708306, "learning_rate": 4.837299969473708e-06, "loss": 0.3114, "step": 6063 }, { "epoch": 0.28, "grad_norm": 0.6182569022300022, "learning_rate": 4.837232663764562e-06, "loss": 0.3035, "step": 6064 }, { "epoch": 0.28, "grad_norm": 0.5841591890598196, "learning_rate": 4.837165344605229e-06, "loss": 0.287, "step": 6065 }, { "epoch": 0.28, "grad_norm": 0.5927110735075333, "learning_rate": 4.8370980119960975e-06, "loss": 0.3047, "step": 6066 }, { "epoch": 0.28, "grad_norm": 0.5845960920386993, "learning_rate": 4.837030665937554e-06, "loss": 0.2799, "step": 6067 }, { "epoch": 0.28, "grad_norm": 0.6407592130989415, "learning_rate": 4.836963306429986e-06, "loss": 0.3246, "step": 6068 }, { "epoch": 0.28, "grad_norm": 0.6522987955020332, "learning_rate": 4.836895933473782e-06, "loss": 0.3156, "step": 6069 }, { "epoch": 0.28, "grad_norm": 0.6612106040304513, "learning_rate": 4.836828547069329e-06, "loss": 0.2864, "step": 6070 }, { "epoch": 0.28, "grad_norm": 0.6403054229247401, "learning_rate": 4.836761147217015e-06, "loss": 0.2727, "step": 6071 }, { "epoch": 0.28, "grad_norm": 0.6514679582182634, "learning_rate": 4.836693733917228e-06, "loss": 0.3063, "step": 6072 }, { "epoch": 0.28, "grad_norm": 0.6668351612818895, "learning_rate": 4.836626307170356e-06, "loss": 0.3133, "step": 6073 }, { "epoch": 0.28, "grad_norm": 0.589988857709807, "learning_rate": 4.8365588669767875e-06, "loss": 0.2933, "step": 6074 }, { "epoch": 0.28, "grad_norm": 0.6015470009211403, "learning_rate": 4.836491413336909e-06, "loss": 0.2839, "step": 6075 }, { "epoch": 0.28, "grad_norm": 0.6071497010615169, "learning_rate": 4.83642394625111e-06, "loss": 0.2949, "step": 6076 }, { "epoch": 0.28, "grad_norm": 0.6875401359293061, "learning_rate": 4.836356465719779e-06, "loss": 0.3078, "step": 6077 }, { "epoch": 0.28, "grad_norm": 0.6936024438191227, "learning_rate": 4.836288971743302e-06, "loss": 0.3198, "step": 6078 }, { "epoch": 0.28, "grad_norm": 0.5820892322199928, "learning_rate": 4.836221464322071e-06, "loss": 0.2805, "step": 6079 }, { "epoch": 0.28, "grad_norm": 0.611905587201151, "learning_rate": 4.836153943456472e-06, "loss": 0.3039, "step": 6080 }, { "epoch": 0.28, "grad_norm": 0.6312654863658659, "learning_rate": 4.8360864091468945e-06, "loss": 0.3093, "step": 6081 }, { "epoch": 0.28, "grad_norm": 0.6593349747280555, "learning_rate": 4.836018861393727e-06, "loss": 0.2835, "step": 6082 }, { "epoch": 0.28, "grad_norm": 0.5917654356346687, "learning_rate": 4.835951300197358e-06, "loss": 0.2992, "step": 6083 }, { "epoch": 0.29, "grad_norm": 0.5822493744426046, "learning_rate": 4.835883725558176e-06, "loss": 0.301, "step": 6084 }, { "epoch": 0.29, "grad_norm": 0.6366000970896043, "learning_rate": 4.83581613747657e-06, "loss": 0.2779, "step": 6085 }, { "epoch": 0.29, "grad_norm": 0.6055288708576175, "learning_rate": 4.8357485359529295e-06, "loss": 0.2708, "step": 6086 }, { "epoch": 0.29, "grad_norm": 0.6359981855450109, "learning_rate": 4.835680920987643e-06, "loss": 0.2886, "step": 6087 }, { "epoch": 0.29, "grad_norm": 0.6021867205938356, "learning_rate": 4.8356132925811005e-06, "loss": 0.3128, "step": 6088 }, { "epoch": 0.29, "grad_norm": 0.5623478180645314, "learning_rate": 4.8355456507336905e-06, "loss": 0.2725, "step": 6089 }, { "epoch": 0.29, "grad_norm": 0.5933588984513485, "learning_rate": 4.835477995445802e-06, "loss": 0.3014, "step": 6090 }, { "epoch": 0.29, "grad_norm": 0.626574338029129, "learning_rate": 4.835410326717824e-06, "loss": 0.29, "step": 6091 }, { "epoch": 0.29, "grad_norm": 0.6427367922510132, "learning_rate": 4.835342644550148e-06, "loss": 0.2821, "step": 6092 }, { "epoch": 0.29, "grad_norm": 0.6844135188127077, "learning_rate": 4.835274948943161e-06, "loss": 0.317, "step": 6093 }, { "epoch": 0.29, "grad_norm": 0.6248553683952823, "learning_rate": 4.835207239897254e-06, "loss": 0.312, "step": 6094 }, { "epoch": 0.29, "grad_norm": 0.6482519186252742, "learning_rate": 4.835139517412816e-06, "loss": 0.3015, "step": 6095 }, { "epoch": 0.29, "grad_norm": 0.6555595322431483, "learning_rate": 4.835071781490237e-06, "loss": 0.2881, "step": 6096 }, { "epoch": 0.29, "grad_norm": 0.6161368073613261, "learning_rate": 4.835004032129907e-06, "loss": 0.2656, "step": 6097 }, { "epoch": 0.29, "grad_norm": 0.6489072937901694, "learning_rate": 4.834936269332216e-06, "loss": 0.3217, "step": 6098 }, { "epoch": 0.29, "grad_norm": 0.6340565652621348, "learning_rate": 4.834868493097553e-06, "loss": 0.2963, "step": 6099 }, { "epoch": 0.29, "grad_norm": 0.6620323586581536, "learning_rate": 4.834800703426309e-06, "loss": 0.3138, "step": 6100 }, { "epoch": 0.29, "grad_norm": 0.669412107084942, "learning_rate": 4.834732900318874e-06, "loss": 0.3032, "step": 6101 }, { "epoch": 0.29, "grad_norm": 0.6172699911233478, "learning_rate": 4.834665083775637e-06, "loss": 0.3069, "step": 6102 }, { "epoch": 0.29, "grad_norm": 0.6402761899613656, "learning_rate": 4.834597253796991e-06, "loss": 0.2873, "step": 6103 }, { "epoch": 0.29, "grad_norm": 0.6211924729806145, "learning_rate": 4.834529410383323e-06, "loss": 0.3056, "step": 6104 }, { "epoch": 0.29, "grad_norm": 0.5994686166558912, "learning_rate": 4.834461553535026e-06, "loss": 0.2854, "step": 6105 }, { "epoch": 0.29, "grad_norm": 0.614437868838758, "learning_rate": 4.8343936832524886e-06, "loss": 0.2989, "step": 6106 }, { "epoch": 0.29, "grad_norm": 0.6837533211492401, "learning_rate": 4.834325799536103e-06, "loss": 0.3119, "step": 6107 }, { "epoch": 0.29, "grad_norm": 0.6528006121795278, "learning_rate": 4.834257902386258e-06, "loss": 0.3011, "step": 6108 }, { "epoch": 0.29, "grad_norm": 0.6570276846179137, "learning_rate": 4.834189991803346e-06, "loss": 0.304, "step": 6109 }, { "epoch": 0.29, "grad_norm": 0.5534265579431968, "learning_rate": 4.834122067787758e-06, "loss": 0.2681, "step": 6110 }, { "epoch": 0.29, "grad_norm": 0.6370350337305527, "learning_rate": 4.834054130339883e-06, "loss": 0.2983, "step": 6111 }, { "epoch": 0.29, "grad_norm": 0.572656736708997, "learning_rate": 4.833986179460115e-06, "loss": 0.2657, "step": 6112 }, { "epoch": 0.29, "grad_norm": 0.6514759959371614, "learning_rate": 4.833918215148842e-06, "loss": 0.2906, "step": 6113 }, { "epoch": 0.29, "grad_norm": 0.6227198870633748, "learning_rate": 4.833850237406456e-06, "loss": 0.3006, "step": 6114 }, { "epoch": 0.29, "grad_norm": 0.6644774485763411, "learning_rate": 4.833782246233349e-06, "loss": 0.3194, "step": 6115 }, { "epoch": 0.29, "grad_norm": 0.6115921356479154, "learning_rate": 4.833714241629911e-06, "loss": 0.2932, "step": 6116 }, { "epoch": 0.29, "grad_norm": 0.5945341376399713, "learning_rate": 4.833646223596535e-06, "loss": 0.2838, "step": 6117 }, { "epoch": 0.29, "grad_norm": 0.637783777465528, "learning_rate": 4.833578192133611e-06, "loss": 0.3122, "step": 6118 }, { "epoch": 0.29, "grad_norm": 0.5992910703038457, "learning_rate": 4.8335101472415315e-06, "loss": 0.3107, "step": 6119 }, { "epoch": 0.29, "grad_norm": 0.6139078417388202, "learning_rate": 4.833442088920687e-06, "loss": 0.3051, "step": 6120 }, { "epoch": 0.29, "grad_norm": 0.6650532384465274, "learning_rate": 4.83337401717147e-06, "loss": 0.3013, "step": 6121 }, { "epoch": 0.29, "grad_norm": 0.6775317467842543, "learning_rate": 4.833305931994272e-06, "loss": 0.2948, "step": 6122 }, { "epoch": 0.29, "grad_norm": 0.6728369278225129, "learning_rate": 4.8332378333894845e-06, "loss": 0.3196, "step": 6123 }, { "epoch": 0.29, "grad_norm": 0.5967386511142584, "learning_rate": 4.8331697213575e-06, "loss": 0.2869, "step": 6124 }, { "epoch": 0.29, "grad_norm": 0.6142876650372826, "learning_rate": 4.833101595898711e-06, "loss": 0.2878, "step": 6125 }, { "epoch": 0.29, "grad_norm": 0.6022768204600467, "learning_rate": 4.8330334570135075e-06, "loss": 0.2912, "step": 6126 }, { "epoch": 0.29, "grad_norm": 0.6368535352624674, "learning_rate": 4.832965304702283e-06, "loss": 0.2973, "step": 6127 }, { "epoch": 0.29, "grad_norm": 0.6467868380238526, "learning_rate": 4.83289713896543e-06, "loss": 0.3314, "step": 6128 }, { "epoch": 0.29, "grad_norm": 0.7090568208728026, "learning_rate": 4.832828959803341e-06, "loss": 0.3092, "step": 6129 }, { "epoch": 0.29, "grad_norm": 0.6079219067863283, "learning_rate": 4.832760767216406e-06, "loss": 0.2784, "step": 6130 }, { "epoch": 0.29, "grad_norm": 0.63223182564173, "learning_rate": 4.83269256120502e-06, "loss": 0.3038, "step": 6131 }, { "epoch": 0.29, "grad_norm": 0.6434725459124763, "learning_rate": 4.8326243417695745e-06, "loss": 0.296, "step": 6132 }, { "epoch": 0.29, "grad_norm": 0.6820529138655161, "learning_rate": 4.832556108910462e-06, "loss": 0.3137, "step": 6133 }, { "epoch": 0.29, "grad_norm": 0.6057806537064832, "learning_rate": 4.832487862628076e-06, "loss": 0.293, "step": 6134 }, { "epoch": 0.29, "grad_norm": 0.6645310157882716, "learning_rate": 4.832419602922808e-06, "loss": 0.3294, "step": 6135 }, { "epoch": 0.29, "grad_norm": 0.6466489702068363, "learning_rate": 4.832351329795052e-06, "loss": 0.2886, "step": 6136 }, { "epoch": 0.29, "grad_norm": 0.6576037291064445, "learning_rate": 4.8322830432452e-06, "loss": 0.2898, "step": 6137 }, { "epoch": 0.29, "grad_norm": 0.6096492616969632, "learning_rate": 4.832214743273645e-06, "loss": 0.2928, "step": 6138 }, { "epoch": 0.29, "grad_norm": 0.6201326708403206, "learning_rate": 4.832146429880781e-06, "loss": 0.2853, "step": 6139 }, { "epoch": 0.29, "grad_norm": 0.5785325167318988, "learning_rate": 4.832078103066999e-06, "loss": 0.2872, "step": 6140 }, { "epoch": 0.29, "grad_norm": 0.6498795478121165, "learning_rate": 4.832009762832695e-06, "loss": 0.295, "step": 6141 }, { "epoch": 0.29, "grad_norm": 0.5982552464424951, "learning_rate": 4.83194140917826e-06, "loss": 0.2916, "step": 6142 }, { "epoch": 0.29, "grad_norm": 0.6422558920275796, "learning_rate": 4.831873042104089e-06, "loss": 0.2983, "step": 6143 }, { "epoch": 0.29, "grad_norm": 0.5882375907541039, "learning_rate": 4.831804661610575e-06, "loss": 0.2945, "step": 6144 }, { "epoch": 0.29, "grad_norm": 0.6096071254124906, "learning_rate": 4.831736267698111e-06, "loss": 0.2913, "step": 6145 }, { "epoch": 0.29, "grad_norm": 0.6049377784985884, "learning_rate": 4.831667860367091e-06, "loss": 0.2903, "step": 6146 }, { "epoch": 0.29, "grad_norm": 0.5886565624064719, "learning_rate": 4.831599439617908e-06, "loss": 0.2903, "step": 6147 }, { "epoch": 0.29, "grad_norm": 0.6398377444155218, "learning_rate": 4.831531005450957e-06, "loss": 0.2891, "step": 6148 }, { "epoch": 0.29, "grad_norm": 0.6181813686115628, "learning_rate": 4.83146255786663e-06, "loss": 0.281, "step": 6149 }, { "epoch": 0.29, "grad_norm": 0.6678307382050109, "learning_rate": 4.831394096865323e-06, "loss": 0.2909, "step": 6150 }, { "epoch": 0.29, "grad_norm": 0.597301641306217, "learning_rate": 4.831325622447428e-06, "loss": 0.2697, "step": 6151 }, { "epoch": 0.29, "grad_norm": 0.6529362896774769, "learning_rate": 4.831257134613341e-06, "loss": 0.3074, "step": 6152 }, { "epoch": 0.29, "grad_norm": 0.6521808785017011, "learning_rate": 4.8311886333634535e-06, "loss": 0.3236, "step": 6153 }, { "epoch": 0.29, "grad_norm": 0.6460471686357455, "learning_rate": 4.831120118698162e-06, "loss": 0.2916, "step": 6154 }, { "epoch": 0.29, "grad_norm": 0.5891747211924871, "learning_rate": 4.8310515906178616e-06, "loss": 0.278, "step": 6155 }, { "epoch": 0.29, "grad_norm": 0.5976530378727991, "learning_rate": 4.830983049122944e-06, "loss": 0.2861, "step": 6156 }, { "epoch": 0.29, "grad_norm": 0.6279070832339828, "learning_rate": 4.8309144942138056e-06, "loss": 0.2875, "step": 6157 }, { "epoch": 0.29, "grad_norm": 0.6598691784122568, "learning_rate": 4.830845925890839e-06, "loss": 0.2943, "step": 6158 }, { "epoch": 0.29, "grad_norm": 0.676235546638165, "learning_rate": 4.830777344154441e-06, "loss": 0.3123, "step": 6159 }, { "epoch": 0.29, "grad_norm": 0.6314937752879032, "learning_rate": 4.830708749005004e-06, "loss": 0.2861, "step": 6160 }, { "epoch": 0.29, "grad_norm": 0.6894203032175898, "learning_rate": 4.830640140442925e-06, "loss": 0.2973, "step": 6161 }, { "epoch": 0.29, "grad_norm": 0.5973585543599856, "learning_rate": 4.830571518468597e-06, "loss": 0.3008, "step": 6162 }, { "epoch": 0.29, "grad_norm": 0.6413765248096862, "learning_rate": 4.8305028830824165e-06, "loss": 0.325, "step": 6163 }, { "epoch": 0.29, "grad_norm": 0.6286168214435779, "learning_rate": 4.8304342342847765e-06, "loss": 0.3248, "step": 6164 }, { "epoch": 0.29, "grad_norm": 0.6954494914628044, "learning_rate": 4.830365572076074e-06, "loss": 0.3305, "step": 6165 }, { "epoch": 0.29, "grad_norm": 0.6763239059768207, "learning_rate": 4.830296896456703e-06, "loss": 0.3182, "step": 6166 }, { "epoch": 0.29, "grad_norm": 0.6789381225887489, "learning_rate": 4.83022820742706e-06, "loss": 0.3037, "step": 6167 }, { "epoch": 0.29, "grad_norm": 0.6051282701072567, "learning_rate": 4.830159504987538e-06, "loss": 0.283, "step": 6168 }, { "epoch": 0.29, "grad_norm": 0.6048225510758175, "learning_rate": 4.830090789138535e-06, "loss": 0.2955, "step": 6169 }, { "epoch": 0.29, "grad_norm": 0.61838660935659, "learning_rate": 4.830022059880444e-06, "loss": 0.3049, "step": 6170 }, { "epoch": 0.29, "grad_norm": 0.6170324376194507, "learning_rate": 4.829953317213663e-06, "loss": 0.2873, "step": 6171 }, { "epoch": 0.29, "grad_norm": 0.7009121609658738, "learning_rate": 4.829884561138585e-06, "loss": 0.3057, "step": 6172 }, { "epoch": 0.29, "grad_norm": 0.6706309967351847, "learning_rate": 4.829815791655608e-06, "loss": 0.3121, "step": 6173 }, { "epoch": 0.29, "grad_norm": 0.60741202655159, "learning_rate": 4.829747008765126e-06, "loss": 0.2896, "step": 6174 }, { "epoch": 0.29, "grad_norm": 0.6254078222265819, "learning_rate": 4.829678212467535e-06, "loss": 0.3003, "step": 6175 }, { "epoch": 0.29, "grad_norm": 0.5771814705796915, "learning_rate": 4.829609402763232e-06, "loss": 0.2958, "step": 6176 }, { "epoch": 0.29, "grad_norm": 0.6706467295394561, "learning_rate": 4.829540579652613e-06, "loss": 0.308, "step": 6177 }, { "epoch": 0.29, "grad_norm": 0.650753194087434, "learning_rate": 4.829471743136073e-06, "loss": 0.2982, "step": 6178 }, { "epoch": 0.29, "grad_norm": 0.6080503665808172, "learning_rate": 4.829402893214008e-06, "loss": 0.3045, "step": 6179 }, { "epoch": 0.29, "grad_norm": 0.6296529914749694, "learning_rate": 4.829334029886816e-06, "loss": 0.3237, "step": 6180 }, { "epoch": 0.29, "grad_norm": 0.6033902460577066, "learning_rate": 4.829265153154892e-06, "loss": 0.2864, "step": 6181 }, { "epoch": 0.29, "grad_norm": 0.5912592308095502, "learning_rate": 4.829196263018631e-06, "loss": 0.2908, "step": 6182 }, { "epoch": 0.29, "grad_norm": 0.6064464711070964, "learning_rate": 4.829127359478432e-06, "loss": 0.3053, "step": 6183 }, { "epoch": 0.29, "grad_norm": 0.6412770277734395, "learning_rate": 4.82905844253469e-06, "loss": 0.2997, "step": 6184 }, { "epoch": 0.29, "grad_norm": 0.6159717403169443, "learning_rate": 4.828989512187802e-06, "loss": 0.3046, "step": 6185 }, { "epoch": 0.29, "grad_norm": 0.6792644277316943, "learning_rate": 4.828920568438166e-06, "loss": 0.2938, "step": 6186 }, { "epoch": 0.29, "grad_norm": 0.704502102991183, "learning_rate": 4.828851611286176e-06, "loss": 0.3359, "step": 6187 }, { "epoch": 0.29, "grad_norm": 0.6454318771975012, "learning_rate": 4.828782640732231e-06, "loss": 0.2948, "step": 6188 }, { "epoch": 0.29, "grad_norm": 0.5825006833439049, "learning_rate": 4.828713656776728e-06, "loss": 0.2785, "step": 6189 }, { "epoch": 0.29, "grad_norm": 0.7236492350002763, "learning_rate": 4.828644659420062e-06, "loss": 0.3131, "step": 6190 }, { "epoch": 0.29, "grad_norm": 0.6697252498241998, "learning_rate": 4.828575648662632e-06, "loss": 0.2845, "step": 6191 }, { "epoch": 0.29, "grad_norm": 0.6208464829709375, "learning_rate": 4.828506624504834e-06, "loss": 0.2828, "step": 6192 }, { "epoch": 0.29, "grad_norm": 0.5916982339616055, "learning_rate": 4.828437586947066e-06, "loss": 0.2919, "step": 6193 }, { "epoch": 0.29, "grad_norm": 0.5901090355619375, "learning_rate": 4.828368535989724e-06, "loss": 0.2934, "step": 6194 }, { "epoch": 0.29, "grad_norm": 0.6269521630022687, "learning_rate": 4.8282994716332066e-06, "loss": 0.3163, "step": 6195 }, { "epoch": 0.29, "grad_norm": 0.6168646626339218, "learning_rate": 4.828230393877912e-06, "loss": 0.296, "step": 6196 }, { "epoch": 0.29, "grad_norm": 0.6474631635649819, "learning_rate": 4.828161302724235e-06, "loss": 0.2977, "step": 6197 }, { "epoch": 0.29, "grad_norm": 0.633074779458406, "learning_rate": 4.828092198172576e-06, "loss": 0.3125, "step": 6198 }, { "epoch": 0.29, "grad_norm": 0.6734513639230094, "learning_rate": 4.828023080223331e-06, "loss": 0.3119, "step": 6199 }, { "epoch": 0.29, "grad_norm": 0.641222043044516, "learning_rate": 4.827953948876899e-06, "loss": 0.3054, "step": 6200 }, { "epoch": 0.29, "grad_norm": 0.6120933927239239, "learning_rate": 4.8278848041336765e-06, "loss": 0.2947, "step": 6201 }, { "epoch": 0.29, "grad_norm": 0.651858636680289, "learning_rate": 4.827815645994063e-06, "loss": 0.3184, "step": 6202 }, { "epoch": 0.29, "grad_norm": 0.6541921478678899, "learning_rate": 4.827746474458454e-06, "loss": 0.3141, "step": 6203 }, { "epoch": 0.29, "grad_norm": 0.6362140138901661, "learning_rate": 4.82767728952725e-06, "loss": 0.2816, "step": 6204 }, { "epoch": 0.29, "grad_norm": 0.5612376954588207, "learning_rate": 4.8276080912008484e-06, "loss": 0.2919, "step": 6205 }, { "epoch": 0.29, "grad_norm": 0.6396547392887985, "learning_rate": 4.827538879479647e-06, "loss": 0.3158, "step": 6206 }, { "epoch": 0.29, "grad_norm": 0.5865604286023868, "learning_rate": 4.827469654364044e-06, "loss": 0.2854, "step": 6207 }, { "epoch": 0.29, "grad_norm": 0.5938362860402169, "learning_rate": 4.827400415854439e-06, "loss": 0.2924, "step": 6208 }, { "epoch": 0.29, "grad_norm": 0.6134516504065471, "learning_rate": 4.827331163951229e-06, "loss": 0.2847, "step": 6209 }, { "epoch": 0.29, "grad_norm": 0.6081738293021037, "learning_rate": 4.827261898654812e-06, "loss": 0.2759, "step": 6210 }, { "epoch": 0.29, "grad_norm": 0.6027121772861329, "learning_rate": 4.827192619965589e-06, "loss": 0.3017, "step": 6211 }, { "epoch": 0.29, "grad_norm": 0.630974561705865, "learning_rate": 4.827123327883958e-06, "loss": 0.3135, "step": 6212 }, { "epoch": 0.29, "grad_norm": 0.6326911609121806, "learning_rate": 4.8270540224103165e-06, "loss": 0.3011, "step": 6213 }, { "epoch": 0.29, "grad_norm": 0.6590548382741164, "learning_rate": 4.826984703545064e-06, "loss": 0.3417, "step": 6214 }, { "epoch": 0.29, "grad_norm": 0.5728792832178264, "learning_rate": 4.826915371288599e-06, "loss": 0.2858, "step": 6215 }, { "epoch": 0.29, "grad_norm": 0.6404974736456153, "learning_rate": 4.8268460256413205e-06, "loss": 0.3137, "step": 6216 }, { "epoch": 0.29, "grad_norm": 0.6503251717981495, "learning_rate": 4.8267766666036295e-06, "loss": 0.3004, "step": 6217 }, { "epoch": 0.29, "grad_norm": 0.6524337358572297, "learning_rate": 4.826707294175922e-06, "loss": 0.3224, "step": 6218 }, { "epoch": 0.29, "grad_norm": 0.7004827445272358, "learning_rate": 4.8266379083586e-06, "loss": 0.3418, "step": 6219 }, { "epoch": 0.29, "grad_norm": 0.5908097073587754, "learning_rate": 4.826568509152061e-06, "loss": 0.2831, "step": 6220 }, { "epoch": 0.29, "grad_norm": 0.5982626412902401, "learning_rate": 4.826499096556705e-06, "loss": 0.2822, "step": 6221 }, { "epoch": 0.29, "grad_norm": 0.6187232839989909, "learning_rate": 4.826429670572932e-06, "loss": 0.3103, "step": 6222 }, { "epoch": 0.29, "grad_norm": 0.6242442852854508, "learning_rate": 4.82636023120114e-06, "loss": 0.3018, "step": 6223 }, { "epoch": 0.29, "grad_norm": 0.6086009876073766, "learning_rate": 4.82629077844173e-06, "loss": 0.2901, "step": 6224 }, { "epoch": 0.29, "grad_norm": 0.6170700033010145, "learning_rate": 4.826221312295102e-06, "loss": 0.3147, "step": 6225 }, { "epoch": 0.29, "grad_norm": 0.5820348572619357, "learning_rate": 4.826151832761654e-06, "loss": 0.3059, "step": 6226 }, { "epoch": 0.29, "grad_norm": 0.6331252224948062, "learning_rate": 4.826082339841788e-06, "loss": 0.2825, "step": 6227 }, { "epoch": 0.29, "grad_norm": 0.6204015862275749, "learning_rate": 4.826012833535901e-06, "loss": 0.2954, "step": 6228 }, { "epoch": 0.29, "grad_norm": 0.6991658640526733, "learning_rate": 4.825943313844396e-06, "loss": 0.331, "step": 6229 }, { "epoch": 0.29, "grad_norm": 0.6348837396177787, "learning_rate": 4.825873780767672e-06, "loss": 0.2845, "step": 6230 }, { "epoch": 0.29, "grad_norm": 0.6496128958363722, "learning_rate": 4.825804234306128e-06, "loss": 0.2999, "step": 6231 }, { "epoch": 0.29, "grad_norm": 0.583214809642661, "learning_rate": 4.825734674460166e-06, "loss": 0.2814, "step": 6232 }, { "epoch": 0.29, "grad_norm": 0.6716636893626531, "learning_rate": 4.825665101230186e-06, "loss": 0.2969, "step": 6233 }, { "epoch": 0.29, "grad_norm": 0.6073187392325825, "learning_rate": 4.825595514616587e-06, "loss": 0.2864, "step": 6234 }, { "epoch": 0.29, "grad_norm": 0.6050366744698725, "learning_rate": 4.82552591461977e-06, "loss": 0.2759, "step": 6235 }, { "epoch": 0.29, "grad_norm": 0.6166834005617311, "learning_rate": 4.825456301240137e-06, "loss": 0.3055, "step": 6236 }, { "epoch": 0.29, "grad_norm": 0.6448745295715907, "learning_rate": 4.825386674478087e-06, "loss": 0.3155, "step": 6237 }, { "epoch": 0.29, "grad_norm": 0.6333298280665817, "learning_rate": 4.825317034334021e-06, "loss": 0.3019, "step": 6238 }, { "epoch": 0.29, "grad_norm": 0.6128801458695848, "learning_rate": 4.825247380808339e-06, "loss": 0.284, "step": 6239 }, { "epoch": 0.29, "grad_norm": 0.6018249359682017, "learning_rate": 4.8251777139014446e-06, "loss": 0.2953, "step": 6240 }, { "epoch": 0.29, "grad_norm": 0.6097062938721395, "learning_rate": 4.825108033613736e-06, "loss": 0.3029, "step": 6241 }, { "epoch": 0.29, "grad_norm": 0.6480676947375177, "learning_rate": 4.825038339945615e-06, "loss": 0.3012, "step": 6242 }, { "epoch": 0.29, "grad_norm": 0.6080002268223272, "learning_rate": 4.824968632897482e-06, "loss": 0.3053, "step": 6243 }, { "epoch": 0.29, "grad_norm": 0.601383805836042, "learning_rate": 4.8248989124697395e-06, "loss": 0.2919, "step": 6244 }, { "epoch": 0.29, "grad_norm": 0.6388275344916537, "learning_rate": 4.824829178662789e-06, "loss": 0.2953, "step": 6245 }, { "epoch": 0.29, "grad_norm": 0.6089038689160932, "learning_rate": 4.824759431477029e-06, "loss": 0.2815, "step": 6246 }, { "epoch": 0.29, "grad_norm": 0.593185595824597, "learning_rate": 4.824689670912864e-06, "loss": 0.2925, "step": 6247 }, { "epoch": 0.29, "grad_norm": 0.6416295180858544, "learning_rate": 4.824619896970694e-06, "loss": 0.3101, "step": 6248 }, { "epoch": 0.29, "grad_norm": 0.6216879117077058, "learning_rate": 4.824550109650922e-06, "loss": 0.3115, "step": 6249 }, { "epoch": 0.29, "grad_norm": 0.6068488287014918, "learning_rate": 4.824480308953947e-06, "loss": 0.2865, "step": 6250 }, { "epoch": 0.29, "grad_norm": 0.6283863450383598, "learning_rate": 4.8244104948801715e-06, "loss": 0.2848, "step": 6251 }, { "epoch": 0.29, "grad_norm": 0.6222193812955549, "learning_rate": 4.824340667429999e-06, "loss": 0.2922, "step": 6252 }, { "epoch": 0.29, "grad_norm": 0.667240762147269, "learning_rate": 4.82427082660383e-06, "loss": 0.3245, "step": 6253 }, { "epoch": 0.29, "grad_norm": 0.6989825463117072, "learning_rate": 4.824200972402066e-06, "loss": 0.2998, "step": 6254 }, { "epoch": 0.29, "grad_norm": 0.6125829307319174, "learning_rate": 4.82413110482511e-06, "loss": 0.3021, "step": 6255 }, { "epoch": 0.29, "grad_norm": 0.6541049535929004, "learning_rate": 4.824061223873364e-06, "loss": 0.3063, "step": 6256 }, { "epoch": 0.29, "grad_norm": 0.5912084419052653, "learning_rate": 4.823991329547229e-06, "loss": 0.2825, "step": 6257 }, { "epoch": 0.29, "grad_norm": 0.5940284268866959, "learning_rate": 4.8239214218471085e-06, "loss": 0.2997, "step": 6258 }, { "epoch": 0.29, "grad_norm": 0.6377009525479451, "learning_rate": 4.823851500773404e-06, "loss": 0.2983, "step": 6259 }, { "epoch": 0.29, "grad_norm": 0.622773327934764, "learning_rate": 4.823781566326518e-06, "loss": 0.2978, "step": 6260 }, { "epoch": 0.29, "grad_norm": 0.6059864876233799, "learning_rate": 4.823711618506854e-06, "loss": 0.2854, "step": 6261 }, { "epoch": 0.29, "grad_norm": 0.640723701040838, "learning_rate": 4.823641657314814e-06, "loss": 0.2965, "step": 6262 }, { "epoch": 0.29, "grad_norm": 0.6753156265423013, "learning_rate": 4.823571682750799e-06, "loss": 0.2898, "step": 6263 }, { "epoch": 0.29, "grad_norm": 0.6161735674979459, "learning_rate": 4.823501694815213e-06, "loss": 0.2901, "step": 6264 }, { "epoch": 0.29, "grad_norm": 0.6125230546562012, "learning_rate": 4.823431693508459e-06, "loss": 0.3054, "step": 6265 }, { "epoch": 0.29, "grad_norm": 0.6190175007193714, "learning_rate": 4.82336167883094e-06, "loss": 0.293, "step": 6266 }, { "epoch": 0.29, "grad_norm": 0.6622429062263344, "learning_rate": 4.823291650783058e-06, "loss": 0.313, "step": 6267 }, { "epoch": 0.29, "grad_norm": 0.6551202127690198, "learning_rate": 4.8232216093652175e-06, "loss": 0.2842, "step": 6268 }, { "epoch": 0.29, "grad_norm": 0.6479131190333739, "learning_rate": 4.82315155457782e-06, "loss": 0.3046, "step": 6269 }, { "epoch": 0.29, "grad_norm": 0.6501957695487285, "learning_rate": 4.823081486421268e-06, "loss": 0.2915, "step": 6270 }, { "epoch": 0.29, "grad_norm": 0.5894734634415305, "learning_rate": 4.823011404895967e-06, "loss": 0.2701, "step": 6271 }, { "epoch": 0.29, "grad_norm": 0.6872826614220638, "learning_rate": 4.82294131000232e-06, "loss": 0.2861, "step": 6272 }, { "epoch": 0.29, "grad_norm": 0.6042059707282164, "learning_rate": 4.822871201740729e-06, "loss": 0.2892, "step": 6273 }, { "epoch": 0.29, "grad_norm": 0.6458100040970107, "learning_rate": 4.822801080111598e-06, "loss": 0.2831, "step": 6274 }, { "epoch": 0.29, "grad_norm": 0.6368445320513748, "learning_rate": 4.82273094511533e-06, "loss": 0.2994, "step": 6275 }, { "epoch": 0.29, "grad_norm": 0.6827989749286195, "learning_rate": 4.82266079675233e-06, "loss": 0.3332, "step": 6276 }, { "epoch": 0.29, "grad_norm": 0.5863885666635745, "learning_rate": 4.822590635023e-06, "loss": 0.2915, "step": 6277 }, { "epoch": 0.29, "grad_norm": 0.6465857853220457, "learning_rate": 4.822520459927746e-06, "loss": 0.2908, "step": 6278 }, { "epoch": 0.29, "grad_norm": 0.7982706344112456, "learning_rate": 4.822450271466969e-06, "loss": 0.3014, "step": 6279 }, { "epoch": 0.29, "grad_norm": 0.6626623155642585, "learning_rate": 4.8223800696410746e-06, "loss": 0.282, "step": 6280 }, { "epoch": 0.29, "grad_norm": 0.7242156119607864, "learning_rate": 4.822309854450467e-06, "loss": 0.3153, "step": 6281 }, { "epoch": 0.29, "grad_norm": 0.5886483399012389, "learning_rate": 4.82223962589555e-06, "loss": 0.2958, "step": 6282 }, { "epoch": 0.29, "grad_norm": 0.6082227233721815, "learning_rate": 4.822169383976728e-06, "loss": 0.2988, "step": 6283 }, { "epoch": 0.29, "grad_norm": 0.6232157721654364, "learning_rate": 4.822099128694405e-06, "loss": 0.3083, "step": 6284 }, { "epoch": 0.29, "grad_norm": 0.5829230382358075, "learning_rate": 4.822028860048985e-06, "loss": 0.2769, "step": 6285 }, { "epoch": 0.29, "grad_norm": 0.6816487252675236, "learning_rate": 4.821958578040872e-06, "loss": 0.3122, "step": 6286 }, { "epoch": 0.29, "grad_norm": 0.750931258675068, "learning_rate": 4.821888282670471e-06, "loss": 0.3215, "step": 6287 }, { "epoch": 0.29, "grad_norm": 0.6835623464081387, "learning_rate": 4.821817973938186e-06, "loss": 0.3148, "step": 6288 }, { "epoch": 0.29, "grad_norm": 0.5763163490588055, "learning_rate": 4.8217476518444225e-06, "loss": 0.2769, "step": 6289 }, { "epoch": 0.29, "grad_norm": 0.6086600061804546, "learning_rate": 4.821677316389585e-06, "loss": 0.2879, "step": 6290 }, { "epoch": 0.29, "grad_norm": 0.5981848882621146, "learning_rate": 4.821606967574079e-06, "loss": 0.2974, "step": 6291 }, { "epoch": 0.29, "grad_norm": 0.6169587255884997, "learning_rate": 4.821536605398308e-06, "loss": 0.293, "step": 6292 }, { "epoch": 0.29, "grad_norm": 0.6216270350604298, "learning_rate": 4.8214662298626765e-06, "loss": 0.2998, "step": 6293 }, { "epoch": 0.29, "grad_norm": 0.6246563521904208, "learning_rate": 4.8213958409675906e-06, "loss": 0.2925, "step": 6294 }, { "epoch": 0.29, "grad_norm": 0.6579877408952429, "learning_rate": 4.821325438713456e-06, "loss": 0.2855, "step": 6295 }, { "epoch": 0.29, "grad_norm": 0.587035946586122, "learning_rate": 4.821255023100676e-06, "loss": 0.2721, "step": 6296 }, { "epoch": 0.29, "grad_norm": 0.6505611089431194, "learning_rate": 4.8211845941296565e-06, "loss": 0.2979, "step": 6297 }, { "epoch": 0.3, "grad_norm": 0.6438933185777362, "learning_rate": 4.821114151800804e-06, "loss": 0.3057, "step": 6298 }, { "epoch": 0.3, "grad_norm": 0.5812430208732796, "learning_rate": 4.8210436961145224e-06, "loss": 0.2804, "step": 6299 }, { "epoch": 0.3, "grad_norm": 0.6031075333795619, "learning_rate": 4.820973227071217e-06, "loss": 0.3046, "step": 6300 }, { "epoch": 0.3, "grad_norm": 0.6178707812542462, "learning_rate": 4.820902744671295e-06, "loss": 0.2949, "step": 6301 }, { "epoch": 0.3, "grad_norm": 0.5920259938798449, "learning_rate": 4.820832248915161e-06, "loss": 0.2697, "step": 6302 }, { "epoch": 0.3, "grad_norm": 0.5760494635213464, "learning_rate": 4.820761739803221e-06, "loss": 0.285, "step": 6303 }, { "epoch": 0.3, "grad_norm": 0.5979070079412995, "learning_rate": 4.82069121733588e-06, "loss": 0.303, "step": 6304 }, { "epoch": 0.3, "grad_norm": 0.5539268686764561, "learning_rate": 4.820620681513544e-06, "loss": 0.2623, "step": 6305 }, { "epoch": 0.3, "grad_norm": 0.6519233307814334, "learning_rate": 4.82055013233662e-06, "loss": 0.3128, "step": 6306 }, { "epoch": 0.3, "grad_norm": 0.6471732910565137, "learning_rate": 4.820479569805513e-06, "loss": 0.3056, "step": 6307 }, { "epoch": 0.3, "grad_norm": 0.62758591815652, "learning_rate": 4.820408993920629e-06, "loss": 0.3004, "step": 6308 }, { "epoch": 0.3, "grad_norm": 0.627947736618736, "learning_rate": 4.820338404682375e-06, "loss": 0.2986, "step": 6309 }, { "epoch": 0.3, "grad_norm": 0.6611571179163412, "learning_rate": 4.820267802091156e-06, "loss": 0.3183, "step": 6310 }, { "epoch": 0.3, "grad_norm": 0.6165864063792296, "learning_rate": 4.820197186147379e-06, "loss": 0.2988, "step": 6311 }, { "epoch": 0.3, "grad_norm": 0.5994265720705076, "learning_rate": 4.820126556851451e-06, "loss": 0.2724, "step": 6312 }, { "epoch": 0.3, "grad_norm": 0.6204074898483616, "learning_rate": 4.820055914203777e-06, "loss": 0.2846, "step": 6313 }, { "epoch": 0.3, "grad_norm": 0.6388746296685581, "learning_rate": 4.819985258204765e-06, "loss": 0.3078, "step": 6314 }, { "epoch": 0.3, "grad_norm": 0.6539440410111488, "learning_rate": 4.819914588854821e-06, "loss": 0.3081, "step": 6315 }, { "epoch": 0.3, "grad_norm": 0.6186710528026385, "learning_rate": 4.819843906154351e-06, "loss": 0.2929, "step": 6316 }, { "epoch": 0.3, "grad_norm": 0.6108606320922768, "learning_rate": 4.819773210103763e-06, "loss": 0.3073, "step": 6317 }, { "epoch": 0.3, "grad_norm": 0.6356061904770577, "learning_rate": 4.819702500703463e-06, "loss": 0.2705, "step": 6318 }, { "epoch": 0.3, "grad_norm": 0.609572435083713, "learning_rate": 4.819631777953858e-06, "loss": 0.313, "step": 6319 }, { "epoch": 0.3, "grad_norm": 0.5961475313247442, "learning_rate": 4.819561041855355e-06, "loss": 0.2969, "step": 6320 }, { "epoch": 0.3, "grad_norm": 0.5963696167051685, "learning_rate": 4.819490292408362e-06, "loss": 0.2989, "step": 6321 }, { "epoch": 0.3, "grad_norm": 0.6106750857664064, "learning_rate": 4.819419529613285e-06, "loss": 0.2699, "step": 6322 }, { "epoch": 0.3, "grad_norm": 0.5971103064735996, "learning_rate": 4.819348753470531e-06, "loss": 0.2845, "step": 6323 }, { "epoch": 0.3, "grad_norm": 0.5917183999655773, "learning_rate": 4.819277963980509e-06, "loss": 0.2742, "step": 6324 }, { "epoch": 0.3, "grad_norm": 0.5969677410471294, "learning_rate": 4.819207161143624e-06, "loss": 0.2892, "step": 6325 }, { "epoch": 0.3, "grad_norm": 0.6827897923135162, "learning_rate": 4.819136344960285e-06, "loss": 0.3107, "step": 6326 }, { "epoch": 0.3, "grad_norm": 0.5809833434576422, "learning_rate": 4.8190655154309e-06, "loss": 0.2897, "step": 6327 }, { "epoch": 0.3, "grad_norm": 0.5968272258114613, "learning_rate": 4.818994672555875e-06, "loss": 0.2897, "step": 6328 }, { "epoch": 0.3, "grad_norm": 0.6288655330939714, "learning_rate": 4.818923816335619e-06, "loss": 0.3025, "step": 6329 }, { "epoch": 0.3, "grad_norm": 0.648185438005464, "learning_rate": 4.818852946770539e-06, "loss": 0.329, "step": 6330 }, { "epoch": 0.3, "grad_norm": 0.5985358080700368, "learning_rate": 4.818782063861043e-06, "loss": 0.2804, "step": 6331 }, { "epoch": 0.3, "grad_norm": 0.5912461788932037, "learning_rate": 4.818711167607539e-06, "loss": 0.2926, "step": 6332 }, { "epoch": 0.3, "grad_norm": 0.663832054646061, "learning_rate": 4.8186402580104355e-06, "loss": 0.3138, "step": 6333 }, { "epoch": 0.3, "grad_norm": 0.6045605637277787, "learning_rate": 4.8185693350701396e-06, "loss": 0.299, "step": 6334 }, { "epoch": 0.3, "grad_norm": 0.5857215052808619, "learning_rate": 4.8184983987870605e-06, "loss": 0.2869, "step": 6335 }, { "epoch": 0.3, "grad_norm": 0.6267587365356722, "learning_rate": 4.818427449161605e-06, "loss": 0.2855, "step": 6336 }, { "epoch": 0.3, "grad_norm": 0.5967560493004298, "learning_rate": 4.8183564861941825e-06, "loss": 0.2893, "step": 6337 }, { "epoch": 0.3, "grad_norm": 0.6086176953248059, "learning_rate": 4.818285509885202e-06, "loss": 0.3018, "step": 6338 }, { "epoch": 0.3, "grad_norm": 0.6409207853994683, "learning_rate": 4.81821452023507e-06, "loss": 0.3047, "step": 6339 }, { "epoch": 0.3, "grad_norm": 0.6392105616347321, "learning_rate": 4.818143517244196e-06, "loss": 0.3012, "step": 6340 }, { "epoch": 0.3, "grad_norm": 0.7304961244534058, "learning_rate": 4.81807250091299e-06, "loss": 0.3062, "step": 6341 }, { "epoch": 0.3, "grad_norm": 0.6441649775753865, "learning_rate": 4.818001471241859e-06, "loss": 0.2931, "step": 6342 }, { "epoch": 0.3, "grad_norm": 0.6056882514340324, "learning_rate": 4.817930428231211e-06, "loss": 0.2856, "step": 6343 }, { "epoch": 0.3, "grad_norm": 0.5913566189648113, "learning_rate": 4.817859371881457e-06, "loss": 0.2818, "step": 6344 }, { "epoch": 0.3, "grad_norm": 0.6213227473747966, "learning_rate": 4.817788302193004e-06, "loss": 0.2877, "step": 6345 }, { "epoch": 0.3, "grad_norm": 0.6170797414231646, "learning_rate": 4.817717219166263e-06, "loss": 0.3147, "step": 6346 }, { "epoch": 0.3, "grad_norm": 0.5880426736361009, "learning_rate": 4.817646122801641e-06, "loss": 0.2853, "step": 6347 }, { "epoch": 0.3, "grad_norm": 0.6111334449275346, "learning_rate": 4.817575013099549e-06, "loss": 0.3076, "step": 6348 }, { "epoch": 0.3, "grad_norm": 0.6344948585621523, "learning_rate": 4.817503890060395e-06, "loss": 0.3185, "step": 6349 }, { "epoch": 0.3, "grad_norm": 0.6356013303636913, "learning_rate": 4.817432753684588e-06, "loss": 0.2941, "step": 6350 }, { "epoch": 0.3, "grad_norm": 0.6175166875532276, "learning_rate": 4.817361603972538e-06, "loss": 0.2976, "step": 6351 }, { "epoch": 0.3, "grad_norm": 0.590597464869481, "learning_rate": 4.8172904409246555e-06, "loss": 0.3041, "step": 6352 }, { "epoch": 0.3, "grad_norm": 0.5723256144337745, "learning_rate": 4.817219264541348e-06, "loss": 0.2846, "step": 6353 }, { "epoch": 0.3, "grad_norm": 0.5949285100837298, "learning_rate": 4.817148074823026e-06, "loss": 0.2688, "step": 6354 }, { "epoch": 0.3, "grad_norm": 0.6352154640476165, "learning_rate": 4.817076871770099e-06, "loss": 0.2904, "step": 6355 }, { "epoch": 0.3, "grad_norm": 0.6104435870124612, "learning_rate": 4.817005655382978e-06, "loss": 0.3099, "step": 6356 }, { "epoch": 0.3, "grad_norm": 0.6099345103087882, "learning_rate": 4.816934425662071e-06, "loss": 0.2881, "step": 6357 }, { "epoch": 0.3, "grad_norm": 0.6343563761838131, "learning_rate": 4.816863182607789e-06, "loss": 0.306, "step": 6358 }, { "epoch": 0.3, "grad_norm": 0.6167417767150614, "learning_rate": 4.816791926220542e-06, "loss": 0.2779, "step": 6359 }, { "epoch": 0.3, "grad_norm": 0.5773057888628518, "learning_rate": 4.81672065650074e-06, "loss": 0.2924, "step": 6360 }, { "epoch": 0.3, "grad_norm": 0.6162397898423463, "learning_rate": 4.816649373448792e-06, "loss": 0.2945, "step": 6361 }, { "epoch": 0.3, "grad_norm": 0.6272748298499355, "learning_rate": 4.8165780770651095e-06, "loss": 0.2897, "step": 6362 }, { "epoch": 0.3, "grad_norm": 0.6546946148503158, "learning_rate": 4.816506767350102e-06, "loss": 0.2913, "step": 6363 }, { "epoch": 0.3, "grad_norm": 0.6106342271389288, "learning_rate": 4.81643544430418e-06, "loss": 0.2962, "step": 6364 }, { "epoch": 0.3, "grad_norm": 0.6079612509346274, "learning_rate": 4.816364107927756e-06, "loss": 0.2959, "step": 6365 }, { "epoch": 0.3, "grad_norm": 0.6321869055360821, "learning_rate": 4.816292758221237e-06, "loss": 0.2943, "step": 6366 }, { "epoch": 0.3, "grad_norm": 0.6261667561894849, "learning_rate": 4.816221395185036e-06, "loss": 0.2938, "step": 6367 }, { "epoch": 0.3, "grad_norm": 0.6195850874488738, "learning_rate": 4.816150018819564e-06, "loss": 0.2959, "step": 6368 }, { "epoch": 0.3, "grad_norm": 0.722763876297791, "learning_rate": 4.816078629125229e-06, "loss": 0.3063, "step": 6369 }, { "epoch": 0.3, "grad_norm": 0.5808285395814748, "learning_rate": 4.816007226102445e-06, "loss": 0.2963, "step": 6370 }, { "epoch": 0.3, "grad_norm": 0.6222818431202006, "learning_rate": 4.81593580975162e-06, "loss": 0.3284, "step": 6371 }, { "epoch": 0.3, "grad_norm": 0.6030374243441949, "learning_rate": 4.815864380073168e-06, "loss": 0.2786, "step": 6372 }, { "epoch": 0.3, "grad_norm": 0.5708308411593986, "learning_rate": 4.815792937067498e-06, "loss": 0.2694, "step": 6373 }, { "epoch": 0.3, "grad_norm": 0.6158695235814774, "learning_rate": 4.8157214807350225e-06, "loss": 0.2715, "step": 6374 }, { "epoch": 0.3, "grad_norm": 0.6240512210148889, "learning_rate": 4.81565001107615e-06, "loss": 0.2865, "step": 6375 }, { "epoch": 0.3, "grad_norm": 0.6118311827830286, "learning_rate": 4.815578528091296e-06, "loss": 0.286, "step": 6376 }, { "epoch": 0.3, "grad_norm": 0.6610445479413224, "learning_rate": 4.815507031780868e-06, "loss": 0.3005, "step": 6377 }, { "epoch": 0.3, "grad_norm": 0.5855354217463637, "learning_rate": 4.81543552214528e-06, "loss": 0.2691, "step": 6378 }, { "epoch": 0.3, "grad_norm": 0.6621557529623174, "learning_rate": 4.815363999184941e-06, "loss": 0.3029, "step": 6379 }, { "epoch": 0.3, "grad_norm": 0.6562866931922059, "learning_rate": 4.815292462900266e-06, "loss": 0.2818, "step": 6380 }, { "epoch": 0.3, "grad_norm": 0.6190382818577949, "learning_rate": 4.815220913291664e-06, "loss": 0.2898, "step": 6381 }, { "epoch": 0.3, "grad_norm": 0.6280111942357042, "learning_rate": 4.815149350359547e-06, "loss": 0.2964, "step": 6382 }, { "epoch": 0.3, "grad_norm": 0.6771049495735094, "learning_rate": 4.815077774104328e-06, "loss": 0.3041, "step": 6383 }, { "epoch": 0.3, "grad_norm": 0.7225541557884111, "learning_rate": 4.815006184526418e-06, "loss": 0.314, "step": 6384 }, { "epoch": 0.3, "grad_norm": 0.6556620252178403, "learning_rate": 4.8149345816262295e-06, "loss": 0.3025, "step": 6385 }, { "epoch": 0.3, "grad_norm": 0.622969505052788, "learning_rate": 4.814862965404174e-06, "loss": 0.2804, "step": 6386 }, { "epoch": 0.3, "grad_norm": 0.5985764049006888, "learning_rate": 4.814791335860665e-06, "loss": 0.299, "step": 6387 }, { "epoch": 0.3, "grad_norm": 0.6430875985775593, "learning_rate": 4.814719692996112e-06, "loss": 0.2948, "step": 6388 }, { "epoch": 0.3, "grad_norm": 0.6201936259036896, "learning_rate": 4.81464803681093e-06, "loss": 0.302, "step": 6389 }, { "epoch": 0.3, "grad_norm": 0.6103685187327702, "learning_rate": 4.81457636730553e-06, "loss": 0.3041, "step": 6390 }, { "epoch": 0.3, "grad_norm": 0.6210428512942501, "learning_rate": 4.814504684480325e-06, "loss": 0.2949, "step": 6391 }, { "epoch": 0.3, "grad_norm": 0.700819643727102, "learning_rate": 4.814432988335727e-06, "loss": 0.2991, "step": 6392 }, { "epoch": 0.3, "grad_norm": 0.7608995322394798, "learning_rate": 4.814361278872149e-06, "loss": 0.2812, "step": 6393 }, { "epoch": 0.3, "grad_norm": 0.6201020257131432, "learning_rate": 4.814289556090004e-06, "loss": 0.2931, "step": 6394 }, { "epoch": 0.3, "grad_norm": 0.6037201673739065, "learning_rate": 4.814217819989703e-06, "loss": 0.2678, "step": 6395 }, { "epoch": 0.3, "grad_norm": 0.6205761054644319, "learning_rate": 4.8141460705716615e-06, "loss": 0.3039, "step": 6396 }, { "epoch": 0.3, "grad_norm": 0.6329816181508058, "learning_rate": 4.81407430783629e-06, "loss": 0.2913, "step": 6397 }, { "epoch": 0.3, "grad_norm": 0.6117809984799216, "learning_rate": 4.8140025317840035e-06, "loss": 0.2861, "step": 6398 }, { "epoch": 0.3, "grad_norm": 0.6297320357799048, "learning_rate": 4.813930742415213e-06, "loss": 0.2981, "step": 6399 }, { "epoch": 0.3, "grad_norm": 0.6225415465609447, "learning_rate": 4.813858939730333e-06, "loss": 0.2968, "step": 6400 }, { "epoch": 0.3, "grad_norm": 0.6353694837768441, "learning_rate": 4.813787123729776e-06, "loss": 0.2825, "step": 6401 }, { "epoch": 0.3, "grad_norm": 0.6414678855319967, "learning_rate": 4.813715294413957e-06, "loss": 0.3017, "step": 6402 }, { "epoch": 0.3, "grad_norm": 0.6684713367316452, "learning_rate": 4.813643451783286e-06, "loss": 0.3222, "step": 6403 }, { "epoch": 0.3, "grad_norm": 0.6882574412986505, "learning_rate": 4.81357159583818e-06, "loss": 0.3032, "step": 6404 }, { "epoch": 0.3, "grad_norm": 0.6092845972739893, "learning_rate": 4.813499726579051e-06, "loss": 0.277, "step": 6405 }, { "epoch": 0.3, "grad_norm": 0.6095180590819789, "learning_rate": 4.813427844006312e-06, "loss": 0.2941, "step": 6406 }, { "epoch": 0.3, "grad_norm": 0.588486032852792, "learning_rate": 4.813355948120377e-06, "loss": 0.2828, "step": 6407 }, { "epoch": 0.3, "grad_norm": 0.6730638836748449, "learning_rate": 4.813284038921661e-06, "loss": 0.304, "step": 6408 }, { "epoch": 0.3, "grad_norm": 0.5600752128915978, "learning_rate": 4.813212116410575e-06, "loss": 0.2829, "step": 6409 }, { "epoch": 0.3, "grad_norm": 0.624219892222901, "learning_rate": 4.813140180587536e-06, "loss": 0.3065, "step": 6410 }, { "epoch": 0.3, "grad_norm": 0.6446660043434537, "learning_rate": 4.813068231452956e-06, "loss": 0.2897, "step": 6411 }, { "epoch": 0.3, "grad_norm": 0.6148963260695935, "learning_rate": 4.812996269007251e-06, "loss": 0.3058, "step": 6412 }, { "epoch": 0.3, "grad_norm": 0.6246086838554485, "learning_rate": 4.812924293250833e-06, "loss": 0.3141, "step": 6413 }, { "epoch": 0.3, "grad_norm": 0.622416353900839, "learning_rate": 4.812852304184116e-06, "loss": 0.2941, "step": 6414 }, { "epoch": 0.3, "grad_norm": 0.7284111655756387, "learning_rate": 4.812780301807516e-06, "loss": 0.325, "step": 6415 }, { "epoch": 0.3, "grad_norm": 0.5920657883713981, "learning_rate": 4.812708286121447e-06, "loss": 0.2986, "step": 6416 }, { "epoch": 0.3, "grad_norm": 0.6166957484495198, "learning_rate": 4.812636257126323e-06, "loss": 0.2777, "step": 6417 }, { "epoch": 0.3, "grad_norm": 0.6261745414961125, "learning_rate": 4.812564214822558e-06, "loss": 0.2873, "step": 6418 }, { "epoch": 0.3, "grad_norm": 0.6302808250048847, "learning_rate": 4.812492159210568e-06, "loss": 0.2833, "step": 6419 }, { "epoch": 0.3, "grad_norm": 0.6307072442021344, "learning_rate": 4.812420090290767e-06, "loss": 0.3112, "step": 6420 }, { "epoch": 0.3, "grad_norm": 0.6301700550277375, "learning_rate": 4.812348008063569e-06, "loss": 0.2963, "step": 6421 }, { "epoch": 0.3, "grad_norm": 0.6007420139015295, "learning_rate": 4.812275912529389e-06, "loss": 0.3217, "step": 6422 }, { "epoch": 0.3, "grad_norm": 0.6127970255175152, "learning_rate": 4.8122038036886435e-06, "loss": 0.2869, "step": 6423 }, { "epoch": 0.3, "grad_norm": 0.5975960974127681, "learning_rate": 4.8121316815417454e-06, "loss": 0.2861, "step": 6424 }, { "epoch": 0.3, "grad_norm": 0.6167037350228425, "learning_rate": 4.812059546089111e-06, "loss": 0.2845, "step": 6425 }, { "epoch": 0.3, "grad_norm": 0.6591712816399506, "learning_rate": 4.811987397331155e-06, "loss": 0.2965, "step": 6426 }, { "epoch": 0.3, "grad_norm": 0.6128541100314858, "learning_rate": 4.811915235268292e-06, "loss": 0.2875, "step": 6427 }, { "epoch": 0.3, "grad_norm": 0.6246851493811437, "learning_rate": 4.811843059900939e-06, "loss": 0.3012, "step": 6428 }, { "epoch": 0.3, "grad_norm": 0.6417661298502622, "learning_rate": 4.811770871229509e-06, "loss": 0.3229, "step": 6429 }, { "epoch": 0.3, "grad_norm": 0.6246264861541114, "learning_rate": 4.811698669254419e-06, "loss": 0.2948, "step": 6430 }, { "epoch": 0.3, "grad_norm": 0.5748540892061558, "learning_rate": 4.811626453976085e-06, "loss": 0.2812, "step": 6431 }, { "epoch": 0.3, "grad_norm": 0.6382665423334578, "learning_rate": 4.811554225394921e-06, "loss": 0.2994, "step": 6432 }, { "epoch": 0.3, "grad_norm": 0.6725340544611266, "learning_rate": 4.811481983511344e-06, "loss": 0.3125, "step": 6433 }, { "epoch": 0.3, "grad_norm": 0.6393712380364579, "learning_rate": 4.811409728325769e-06, "loss": 0.3114, "step": 6434 }, { "epoch": 0.3, "grad_norm": 0.5768839723766772, "learning_rate": 4.8113374598386105e-06, "loss": 0.3052, "step": 6435 }, { "epoch": 0.3, "grad_norm": 0.7224761235150189, "learning_rate": 4.811265178050288e-06, "loss": 0.3054, "step": 6436 }, { "epoch": 0.3, "grad_norm": 0.6266439347096646, "learning_rate": 4.811192882961214e-06, "loss": 0.292, "step": 6437 }, { "epoch": 0.3, "grad_norm": 0.6170806104676169, "learning_rate": 4.811120574571806e-06, "loss": 0.3051, "step": 6438 }, { "epoch": 0.3, "grad_norm": 0.646316333416476, "learning_rate": 4.811048252882481e-06, "loss": 0.3269, "step": 6439 }, { "epoch": 0.3, "grad_norm": 0.6366213707715748, "learning_rate": 4.810975917893654e-06, "loss": 0.3038, "step": 6440 }, { "epoch": 0.3, "grad_norm": 0.6401185791311386, "learning_rate": 4.81090356960574e-06, "loss": 0.292, "step": 6441 }, { "epoch": 0.3, "grad_norm": 0.5813696649356882, "learning_rate": 4.810831208019158e-06, "loss": 0.2744, "step": 6442 }, { "epoch": 0.3, "grad_norm": 0.6737471087427953, "learning_rate": 4.810758833134322e-06, "loss": 0.2924, "step": 6443 }, { "epoch": 0.3, "grad_norm": 0.6572562049475186, "learning_rate": 4.810686444951651e-06, "loss": 0.3117, "step": 6444 }, { "epoch": 0.3, "grad_norm": 0.6267209434519709, "learning_rate": 4.81061404347156e-06, "loss": 0.2912, "step": 6445 }, { "epoch": 0.3, "grad_norm": 0.627615535696452, "learning_rate": 4.810541628694466e-06, "loss": 0.2897, "step": 6446 }, { "epoch": 0.3, "grad_norm": 0.6435156853210229, "learning_rate": 4.810469200620785e-06, "loss": 0.3142, "step": 6447 }, { "epoch": 0.3, "grad_norm": 0.6443176155037775, "learning_rate": 4.810396759250935e-06, "loss": 0.3288, "step": 6448 }, { "epoch": 0.3, "grad_norm": 0.6178581713659251, "learning_rate": 4.810324304585332e-06, "loss": 0.2962, "step": 6449 }, { "epoch": 0.3, "grad_norm": 0.6493621646001294, "learning_rate": 4.810251836624394e-06, "loss": 0.3053, "step": 6450 }, { "epoch": 0.3, "grad_norm": 0.6526336551414509, "learning_rate": 4.810179355368536e-06, "loss": 0.3075, "step": 6451 }, { "epoch": 0.3, "grad_norm": 0.5914525629236534, "learning_rate": 4.810106860818178e-06, "loss": 0.2867, "step": 6452 }, { "epoch": 0.3, "grad_norm": 0.6455080376669529, "learning_rate": 4.8100343529737345e-06, "loss": 0.3104, "step": 6453 }, { "epoch": 0.3, "grad_norm": 0.6571377485982773, "learning_rate": 4.809961831835624e-06, "loss": 0.2889, "step": 6454 }, { "epoch": 0.3, "grad_norm": 0.624768001404484, "learning_rate": 4.809889297404265e-06, "loss": 0.2994, "step": 6455 }, { "epoch": 0.3, "grad_norm": 0.5849137201154148, "learning_rate": 4.809816749680073e-06, "loss": 0.2809, "step": 6456 }, { "epoch": 0.3, "grad_norm": 0.6392349253029025, "learning_rate": 4.809744188663465e-06, "loss": 0.2805, "step": 6457 }, { "epoch": 0.3, "grad_norm": 0.6550707193595322, "learning_rate": 4.8096716143548615e-06, "loss": 0.2957, "step": 6458 }, { "epoch": 0.3, "grad_norm": 0.6402508659767708, "learning_rate": 4.809599026754677e-06, "loss": 0.2742, "step": 6459 }, { "epoch": 0.3, "grad_norm": 0.7121755108019602, "learning_rate": 4.8095264258633315e-06, "loss": 0.306, "step": 6460 }, { "epoch": 0.3, "grad_norm": 0.5918562983608701, "learning_rate": 4.809453811681242e-06, "loss": 0.2808, "step": 6461 }, { "epoch": 0.3, "grad_norm": 0.6383077379420466, "learning_rate": 4.809381184208825e-06, "loss": 0.3083, "step": 6462 }, { "epoch": 0.3, "grad_norm": 0.572664315399171, "learning_rate": 4.809308543446501e-06, "loss": 0.2781, "step": 6463 }, { "epoch": 0.3, "grad_norm": 0.6390357808564294, "learning_rate": 4.809235889394686e-06, "loss": 0.2957, "step": 6464 }, { "epoch": 0.3, "grad_norm": 0.6129225894179203, "learning_rate": 4.8091632220537996e-06, "loss": 0.2891, "step": 6465 }, { "epoch": 0.3, "grad_norm": 0.6241001037312398, "learning_rate": 4.809090541424258e-06, "loss": 0.3221, "step": 6466 }, { "epoch": 0.3, "grad_norm": 0.6847994936636033, "learning_rate": 4.809017847506482e-06, "loss": 0.2988, "step": 6467 }, { "epoch": 0.3, "grad_norm": 0.6376000029855408, "learning_rate": 4.808945140300888e-06, "loss": 0.303, "step": 6468 }, { "epoch": 0.3, "grad_norm": 0.6382894691770082, "learning_rate": 4.808872419807895e-06, "loss": 0.2943, "step": 6469 }, { "epoch": 0.3, "grad_norm": 0.5989442388908199, "learning_rate": 4.808799686027922e-06, "loss": 0.3077, "step": 6470 }, { "epoch": 0.3, "grad_norm": 0.6130464788548811, "learning_rate": 4.808726938961387e-06, "loss": 0.3055, "step": 6471 }, { "epoch": 0.3, "grad_norm": 0.6745267842600877, "learning_rate": 4.808654178608708e-06, "loss": 0.2971, "step": 6472 }, { "epoch": 0.3, "grad_norm": 0.6803910461364004, "learning_rate": 4.808581404970305e-06, "loss": 0.308, "step": 6473 }, { "epoch": 0.3, "grad_norm": 0.5894326509128436, "learning_rate": 4.8085086180465965e-06, "loss": 0.2975, "step": 6474 }, { "epoch": 0.3, "grad_norm": 0.6016019147455104, "learning_rate": 4.808435817838001e-06, "loss": 0.3015, "step": 6475 }, { "epoch": 0.3, "grad_norm": 0.620189861686934, "learning_rate": 4.808363004344937e-06, "loss": 0.317, "step": 6476 }, { "epoch": 0.3, "grad_norm": 0.6486698098974619, "learning_rate": 4.808290177567825e-06, "loss": 0.3312, "step": 6477 }, { "epoch": 0.3, "grad_norm": 0.6262559938244873, "learning_rate": 4.808217337507083e-06, "loss": 0.2807, "step": 6478 }, { "epoch": 0.3, "grad_norm": 0.5747359469399156, "learning_rate": 4.80814448416313e-06, "loss": 0.2809, "step": 6479 }, { "epoch": 0.3, "grad_norm": 0.6309126136832485, "learning_rate": 4.808071617536386e-06, "loss": 0.3031, "step": 6480 }, { "epoch": 0.3, "grad_norm": 0.6389349167354913, "learning_rate": 4.80799873762727e-06, "loss": 0.3011, "step": 6481 }, { "epoch": 0.3, "grad_norm": 0.6108958947710788, "learning_rate": 4.8079258444362e-06, "loss": 0.2793, "step": 6482 }, { "epoch": 0.3, "grad_norm": 0.6738419122858498, "learning_rate": 4.807852937963598e-06, "loss": 0.3068, "step": 6483 }, { "epoch": 0.3, "grad_norm": 0.6147211747523066, "learning_rate": 4.807780018209882e-06, "loss": 0.2897, "step": 6484 }, { "epoch": 0.3, "grad_norm": 0.6405070183380235, "learning_rate": 4.807707085175473e-06, "loss": 0.3178, "step": 6485 }, { "epoch": 0.3, "grad_norm": 0.6085285584759855, "learning_rate": 4.807634138860789e-06, "loss": 0.2944, "step": 6486 }, { "epoch": 0.3, "grad_norm": 0.6374980779580163, "learning_rate": 4.80756117926625e-06, "loss": 0.2963, "step": 6487 }, { "epoch": 0.3, "grad_norm": 0.6302689393726897, "learning_rate": 4.807488206392278e-06, "loss": 0.3073, "step": 6488 }, { "epoch": 0.3, "grad_norm": 0.6246124331670857, "learning_rate": 4.80741522023929e-06, "loss": 0.2913, "step": 6489 }, { "epoch": 0.3, "grad_norm": 0.6130412843848921, "learning_rate": 4.807342220807708e-06, "loss": 0.2994, "step": 6490 }, { "epoch": 0.3, "grad_norm": 0.5820482813421187, "learning_rate": 4.807269208097951e-06, "loss": 0.2803, "step": 6491 }, { "epoch": 0.3, "grad_norm": 0.6468755214674841, "learning_rate": 4.80719618211044e-06, "loss": 0.2933, "step": 6492 }, { "epoch": 0.3, "grad_norm": 0.6065954967605646, "learning_rate": 4.807123142845594e-06, "loss": 0.303, "step": 6493 }, { "epoch": 0.3, "grad_norm": 0.6236460548276084, "learning_rate": 4.807050090303835e-06, "loss": 0.2942, "step": 6494 }, { "epoch": 0.3, "grad_norm": 0.5749239612680118, "learning_rate": 4.806977024485582e-06, "loss": 0.2944, "step": 6495 }, { "epoch": 0.3, "grad_norm": 0.6449756413693455, "learning_rate": 4.806903945391257e-06, "loss": 0.3004, "step": 6496 }, { "epoch": 0.3, "grad_norm": 0.5893072892141427, "learning_rate": 4.806830853021279e-06, "loss": 0.2824, "step": 6497 }, { "epoch": 0.3, "grad_norm": 0.5786528647214324, "learning_rate": 4.8067577473760695e-06, "loss": 0.2896, "step": 6498 }, { "epoch": 0.3, "grad_norm": 0.671734458091093, "learning_rate": 4.806684628456048e-06, "loss": 0.3053, "step": 6499 }, { "epoch": 0.3, "grad_norm": 0.5990191772527703, "learning_rate": 4.8066114962616375e-06, "loss": 0.2845, "step": 6500 }, { "epoch": 0.3, "grad_norm": 0.6399192145257537, "learning_rate": 4.806538350793257e-06, "loss": 0.2691, "step": 6501 }, { "epoch": 0.3, "grad_norm": 0.6334864989984511, "learning_rate": 4.806465192051328e-06, "loss": 0.3055, "step": 6502 }, { "epoch": 0.3, "grad_norm": 0.7094927566553819, "learning_rate": 4.806392020036272e-06, "loss": 0.3105, "step": 6503 }, { "epoch": 0.3, "grad_norm": 0.6203260654697483, "learning_rate": 4.806318834748509e-06, "loss": 0.2909, "step": 6504 }, { "epoch": 0.3, "grad_norm": 0.6166059363047092, "learning_rate": 4.806245636188461e-06, "loss": 0.3054, "step": 6505 }, { "epoch": 0.3, "grad_norm": 0.6191022713388296, "learning_rate": 4.806172424356549e-06, "loss": 0.3026, "step": 6506 }, { "epoch": 0.3, "grad_norm": 0.6542721153217897, "learning_rate": 4.806099199253195e-06, "loss": 0.2877, "step": 6507 }, { "epoch": 0.3, "grad_norm": 0.6417005880224027, "learning_rate": 4.806025960878818e-06, "loss": 0.2996, "step": 6508 }, { "epoch": 0.3, "grad_norm": 0.5832763155452052, "learning_rate": 4.8059527092338435e-06, "loss": 0.2799, "step": 6509 }, { "epoch": 0.3, "grad_norm": 0.6075451502720174, "learning_rate": 4.805879444318688e-06, "loss": 0.2998, "step": 6510 }, { "epoch": 0.31, "grad_norm": 0.6457267297344885, "learning_rate": 4.805806166133778e-06, "loss": 0.3072, "step": 6511 }, { "epoch": 0.31, "grad_norm": 0.6187314240406843, "learning_rate": 4.805732874679533e-06, "loss": 0.3033, "step": 6512 }, { "epoch": 0.31, "grad_norm": 0.5903937919013081, "learning_rate": 4.805659569956374e-06, "loss": 0.288, "step": 6513 }, { "epoch": 0.31, "grad_norm": 0.6135563377921627, "learning_rate": 4.805586251964724e-06, "loss": 0.29, "step": 6514 }, { "epoch": 0.31, "grad_norm": 0.6331425363836822, "learning_rate": 4.805512920705004e-06, "loss": 0.3096, "step": 6515 }, { "epoch": 0.31, "grad_norm": 0.6084329386651036, "learning_rate": 4.805439576177637e-06, "loss": 0.2908, "step": 6516 }, { "epoch": 0.31, "grad_norm": 0.6415808491224002, "learning_rate": 4.805366218383045e-06, "loss": 0.3048, "step": 6517 }, { "epoch": 0.31, "grad_norm": 0.5701720898373069, "learning_rate": 4.80529284732165e-06, "loss": 0.2819, "step": 6518 }, { "epoch": 0.31, "grad_norm": 0.5658624478029088, "learning_rate": 4.805219462993874e-06, "loss": 0.2748, "step": 6519 }, { "epoch": 0.31, "grad_norm": 0.6956166150815454, "learning_rate": 4.805146065400139e-06, "loss": 0.312, "step": 6520 }, { "epoch": 0.31, "grad_norm": 0.6198089317217974, "learning_rate": 4.805072654540868e-06, "loss": 0.2817, "step": 6521 }, { "epoch": 0.31, "grad_norm": 0.6355267637224158, "learning_rate": 4.804999230416482e-06, "loss": 0.3033, "step": 6522 }, { "epoch": 0.31, "grad_norm": 0.6367611650587656, "learning_rate": 4.804925793027408e-06, "loss": 0.2847, "step": 6523 }, { "epoch": 0.31, "grad_norm": 0.663301738871258, "learning_rate": 4.804852342374062e-06, "loss": 0.3038, "step": 6524 }, { "epoch": 0.31, "grad_norm": 0.6834817323753362, "learning_rate": 4.804778878456872e-06, "loss": 0.3093, "step": 6525 }, { "epoch": 0.31, "grad_norm": 0.5840099176461229, "learning_rate": 4.804705401276258e-06, "loss": 0.2971, "step": 6526 }, { "epoch": 0.31, "grad_norm": 0.5949594743232001, "learning_rate": 4.804631910832644e-06, "loss": 0.2841, "step": 6527 }, { "epoch": 0.31, "grad_norm": 0.65395359383188, "learning_rate": 4.804558407126452e-06, "loss": 0.3127, "step": 6528 }, { "epoch": 0.31, "grad_norm": 0.6588590376918197, "learning_rate": 4.804484890158107e-06, "loss": 0.3008, "step": 6529 }, { "epoch": 0.31, "grad_norm": 0.5841838721618343, "learning_rate": 4.804411359928029e-06, "loss": 0.2936, "step": 6530 }, { "epoch": 0.31, "grad_norm": 0.6108634817202248, "learning_rate": 4.804337816436644e-06, "loss": 0.3, "step": 6531 }, { "epoch": 0.31, "grad_norm": 0.6286268180700968, "learning_rate": 4.804264259684374e-06, "loss": 0.2856, "step": 6532 }, { "epoch": 0.31, "grad_norm": 0.6078917732309338, "learning_rate": 4.804190689671641e-06, "loss": 0.2815, "step": 6533 }, { "epoch": 0.31, "grad_norm": 0.5634832065752128, "learning_rate": 4.8041171063988715e-06, "loss": 0.2816, "step": 6534 }, { "epoch": 0.31, "grad_norm": 0.6410445398076255, "learning_rate": 4.804043509866486e-06, "loss": 0.3052, "step": 6535 }, { "epoch": 0.31, "grad_norm": 0.6194473501247195, "learning_rate": 4.80396990007491e-06, "loss": 0.2903, "step": 6536 }, { "epoch": 0.31, "grad_norm": 0.643311093545187, "learning_rate": 4.8038962770245655e-06, "loss": 0.3056, "step": 6537 }, { "epoch": 0.31, "grad_norm": 0.6133287250965888, "learning_rate": 4.803822640715877e-06, "loss": 0.2857, "step": 6538 }, { "epoch": 0.31, "grad_norm": 0.6680585518507811, "learning_rate": 4.803748991149268e-06, "loss": 0.3192, "step": 6539 }, { "epoch": 0.31, "grad_norm": 0.6080445679122998, "learning_rate": 4.803675328325164e-06, "loss": 0.2989, "step": 6540 }, { "epoch": 0.31, "grad_norm": 0.6072412865214252, "learning_rate": 4.803601652243987e-06, "loss": 0.2856, "step": 6541 }, { "epoch": 0.31, "grad_norm": 0.6265345246644659, "learning_rate": 4.80352796290616e-06, "loss": 0.2901, "step": 6542 }, { "epoch": 0.31, "grad_norm": 0.5782739637526352, "learning_rate": 4.80345426031211e-06, "loss": 0.2824, "step": 6543 }, { "epoch": 0.31, "grad_norm": 0.6759411794340711, "learning_rate": 4.803380544462259e-06, "loss": 0.2935, "step": 6544 }, { "epoch": 0.31, "grad_norm": 0.6521158845799649, "learning_rate": 4.803306815357033e-06, "loss": 0.3064, "step": 6545 }, { "epoch": 0.31, "grad_norm": 0.641987882917084, "learning_rate": 4.803233072996855e-06, "loss": 0.2901, "step": 6546 }, { "epoch": 0.31, "grad_norm": 0.6462769051127518, "learning_rate": 4.803159317382149e-06, "loss": 0.3061, "step": 6547 }, { "epoch": 0.31, "grad_norm": 0.5797282255058513, "learning_rate": 4.8030855485133396e-06, "loss": 0.2612, "step": 6548 }, { "epoch": 0.31, "grad_norm": 0.5868847590570614, "learning_rate": 4.8030117663908525e-06, "loss": 0.3, "step": 6549 }, { "epoch": 0.31, "grad_norm": 0.6097508790505088, "learning_rate": 4.802937971015111e-06, "loss": 0.2993, "step": 6550 }, { "epoch": 0.31, "grad_norm": 0.6195315693796465, "learning_rate": 4.8028641623865405e-06, "loss": 0.2783, "step": 6551 }, { "epoch": 0.31, "grad_norm": 0.6731406245280781, "learning_rate": 4.802790340505565e-06, "loss": 0.2942, "step": 6552 }, { "epoch": 0.31, "grad_norm": 0.6389087361476891, "learning_rate": 4.802716505372611e-06, "loss": 0.3158, "step": 6553 }, { "epoch": 0.31, "grad_norm": 0.576947424586191, "learning_rate": 4.802642656988101e-06, "loss": 0.2801, "step": 6554 }, { "epoch": 0.31, "grad_norm": 0.6118709183749439, "learning_rate": 4.802568795352462e-06, "loss": 0.2992, "step": 6555 }, { "epoch": 0.31, "grad_norm": 0.6376459177834826, "learning_rate": 4.802494920466118e-06, "loss": 0.3026, "step": 6556 }, { "epoch": 0.31, "grad_norm": 0.659826189696323, "learning_rate": 4.802421032329495e-06, "loss": 0.3029, "step": 6557 }, { "epoch": 0.31, "grad_norm": 0.6466212524639132, "learning_rate": 4.802347130943018e-06, "loss": 0.2924, "step": 6558 }, { "epoch": 0.31, "grad_norm": 0.6811639048324153, "learning_rate": 4.8022732163071104e-06, "loss": 0.3203, "step": 6559 }, { "epoch": 0.31, "grad_norm": 0.6366532117447259, "learning_rate": 4.8021992884222005e-06, "loss": 0.2778, "step": 6560 }, { "epoch": 0.31, "grad_norm": 0.6568987182274207, "learning_rate": 4.802125347288712e-06, "loss": 0.2949, "step": 6561 }, { "epoch": 0.31, "grad_norm": 0.6541603905711327, "learning_rate": 4.802051392907071e-06, "loss": 0.291, "step": 6562 }, { "epoch": 0.31, "grad_norm": 0.6249046643188265, "learning_rate": 4.801977425277702e-06, "loss": 0.3016, "step": 6563 }, { "epoch": 0.31, "grad_norm": 0.6877936972503381, "learning_rate": 4.801903444401032e-06, "loss": 0.3199, "step": 6564 }, { "epoch": 0.31, "grad_norm": 0.6836810468038866, "learning_rate": 4.801829450277486e-06, "loss": 0.2857, "step": 6565 }, { "epoch": 0.31, "grad_norm": 0.5751963311902337, "learning_rate": 4.801755442907489e-06, "loss": 0.2862, "step": 6566 }, { "epoch": 0.31, "grad_norm": 0.7054452852169807, "learning_rate": 4.80168142229147e-06, "loss": 0.2979, "step": 6567 }, { "epoch": 0.31, "grad_norm": 0.6146910837851159, "learning_rate": 4.801607388429852e-06, "loss": 0.287, "step": 6568 }, { "epoch": 0.31, "grad_norm": 0.6025778864675411, "learning_rate": 4.801533341323063e-06, "loss": 0.2946, "step": 6569 }, { "epoch": 0.31, "grad_norm": 0.5987218994882247, "learning_rate": 4.801459280971527e-06, "loss": 0.2927, "step": 6570 }, { "epoch": 0.31, "grad_norm": 0.6420377053659762, "learning_rate": 4.801385207375671e-06, "loss": 0.2964, "step": 6571 }, { "epoch": 0.31, "grad_norm": 0.6038172583776691, "learning_rate": 4.801311120535922e-06, "loss": 0.2778, "step": 6572 }, { "epoch": 0.31, "grad_norm": 0.6129574535153148, "learning_rate": 4.801237020452706e-06, "loss": 0.2887, "step": 6573 }, { "epoch": 0.31, "grad_norm": 0.6098410078015327, "learning_rate": 4.8011629071264486e-06, "loss": 0.2824, "step": 6574 }, { "epoch": 0.31, "grad_norm": 0.5533143007576484, "learning_rate": 4.801088780557578e-06, "loss": 0.2895, "step": 6575 }, { "epoch": 0.31, "grad_norm": 0.6405879907226196, "learning_rate": 4.801014640746519e-06, "loss": 0.2909, "step": 6576 }, { "epoch": 0.31, "grad_norm": 0.6361072674039717, "learning_rate": 4.8009404876937e-06, "loss": 0.2958, "step": 6577 }, { "epoch": 0.31, "grad_norm": 0.5642640354540319, "learning_rate": 4.8008663213995465e-06, "loss": 0.2852, "step": 6578 }, { "epoch": 0.31, "grad_norm": 0.625812147293599, "learning_rate": 4.800792141864485e-06, "loss": 0.2997, "step": 6579 }, { "epoch": 0.31, "grad_norm": 0.6404500514717565, "learning_rate": 4.800717949088944e-06, "loss": 0.3233, "step": 6580 }, { "epoch": 0.31, "grad_norm": 0.6143100780623463, "learning_rate": 4.800643743073349e-06, "loss": 0.3018, "step": 6581 }, { "epoch": 0.31, "grad_norm": 0.6081778637819225, "learning_rate": 4.800569523818127e-06, "loss": 0.3087, "step": 6582 }, { "epoch": 0.31, "grad_norm": 0.6424163590274653, "learning_rate": 4.800495291323706e-06, "loss": 0.2996, "step": 6583 }, { "epoch": 0.31, "grad_norm": 0.6450175997142548, "learning_rate": 4.800421045590513e-06, "loss": 0.2886, "step": 6584 }, { "epoch": 0.31, "grad_norm": 0.6863062514724276, "learning_rate": 4.800346786618975e-06, "loss": 0.2955, "step": 6585 }, { "epoch": 0.31, "grad_norm": 0.6362325349961918, "learning_rate": 4.80027251440952e-06, "loss": 0.2957, "step": 6586 }, { "epoch": 0.31, "grad_norm": 0.5767325855907771, "learning_rate": 4.8001982289625735e-06, "loss": 0.2767, "step": 6587 }, { "epoch": 0.31, "grad_norm": 0.6178176211762827, "learning_rate": 4.800123930278565e-06, "loss": 0.2941, "step": 6588 }, { "epoch": 0.31, "grad_norm": 0.5916952709350408, "learning_rate": 4.800049618357921e-06, "loss": 0.284, "step": 6589 }, { "epoch": 0.31, "grad_norm": 0.6328067747835833, "learning_rate": 4.799975293201071e-06, "loss": 0.2898, "step": 6590 }, { "epoch": 0.31, "grad_norm": 0.6147006386493905, "learning_rate": 4.79990095480844e-06, "loss": 0.3077, "step": 6591 }, { "epoch": 0.31, "grad_norm": 0.6195132405117054, "learning_rate": 4.799826603180457e-06, "loss": 0.2993, "step": 6592 }, { "epoch": 0.31, "grad_norm": 0.6370957039515163, "learning_rate": 4.79975223831755e-06, "loss": 0.2999, "step": 6593 }, { "epoch": 0.31, "grad_norm": 0.5902115560315078, "learning_rate": 4.799677860220148e-06, "loss": 0.2943, "step": 6594 }, { "epoch": 0.31, "grad_norm": 0.5835544224182313, "learning_rate": 4.7996034688886765e-06, "loss": 0.2933, "step": 6595 }, { "epoch": 0.31, "grad_norm": 0.5957694952830577, "learning_rate": 4.799529064323566e-06, "loss": 0.2896, "step": 6596 }, { "epoch": 0.31, "grad_norm": 0.6149179804873621, "learning_rate": 4.799454646525243e-06, "loss": 0.2993, "step": 6597 }, { "epoch": 0.31, "grad_norm": 0.5886503714088258, "learning_rate": 4.799380215494137e-06, "loss": 0.2836, "step": 6598 }, { "epoch": 0.31, "grad_norm": 0.6024844718527667, "learning_rate": 4.799305771230675e-06, "loss": 0.2937, "step": 6599 }, { "epoch": 0.31, "grad_norm": 0.6449029501578568, "learning_rate": 4.7992313137352866e-06, "loss": 0.3055, "step": 6600 }, { "epoch": 0.31, "grad_norm": 0.6820149952429254, "learning_rate": 4.7991568430084e-06, "loss": 0.3212, "step": 6601 }, { "epoch": 0.31, "grad_norm": 0.6137704328795461, "learning_rate": 4.799082359050445e-06, "loss": 0.2899, "step": 6602 }, { "epoch": 0.31, "grad_norm": 0.6109930833210385, "learning_rate": 4.7990078618618464e-06, "loss": 0.2846, "step": 6603 }, { "epoch": 0.31, "grad_norm": 0.5825907093467428, "learning_rate": 4.798933351443037e-06, "loss": 0.2825, "step": 6604 }, { "epoch": 0.31, "grad_norm": 0.6131612941251774, "learning_rate": 4.798858827794443e-06, "loss": 0.3093, "step": 6605 }, { "epoch": 0.31, "grad_norm": 0.6000372948305601, "learning_rate": 4.798784290916495e-06, "loss": 0.3014, "step": 6606 }, { "epoch": 0.31, "grad_norm": 0.6378393865748176, "learning_rate": 4.798709740809621e-06, "loss": 0.277, "step": 6607 }, { "epoch": 0.31, "grad_norm": 0.6526871549516099, "learning_rate": 4.79863517747425e-06, "loss": 0.2934, "step": 6608 }, { "epoch": 0.31, "grad_norm": 0.5894638489566595, "learning_rate": 4.798560600910811e-06, "loss": 0.2845, "step": 6609 }, { "epoch": 0.31, "grad_norm": 0.6191671254326977, "learning_rate": 4.798486011119734e-06, "loss": 0.2775, "step": 6610 }, { "epoch": 0.31, "grad_norm": 0.5973676958914304, "learning_rate": 4.798411408101448e-06, "loss": 0.2765, "step": 6611 }, { "epoch": 0.31, "grad_norm": 0.5951338601337697, "learning_rate": 4.798336791856381e-06, "loss": 0.3061, "step": 6612 }, { "epoch": 0.31, "grad_norm": 0.6412733831626449, "learning_rate": 4.798262162384964e-06, "loss": 0.2956, "step": 6613 }, { "epoch": 0.31, "grad_norm": 0.6194380977700076, "learning_rate": 4.798187519687626e-06, "loss": 0.2863, "step": 6614 }, { "epoch": 0.31, "grad_norm": 0.6277245890795148, "learning_rate": 4.798112863764797e-06, "loss": 0.2827, "step": 6615 }, { "epoch": 0.31, "grad_norm": 0.5455141748231288, "learning_rate": 4.798038194616905e-06, "loss": 0.2815, "step": 6616 }, { "epoch": 0.31, "grad_norm": 0.5858496722263657, "learning_rate": 4.797963512244381e-06, "loss": 0.2959, "step": 6617 }, { "epoch": 0.31, "grad_norm": 0.5964263615214668, "learning_rate": 4.7978888166476544e-06, "loss": 0.2917, "step": 6618 }, { "epoch": 0.31, "grad_norm": 0.5976644035144325, "learning_rate": 4.797814107827156e-06, "loss": 0.2873, "step": 6619 }, { "epoch": 0.31, "grad_norm": 0.6627735000460602, "learning_rate": 4.797739385783314e-06, "loss": 0.2963, "step": 6620 }, { "epoch": 0.31, "grad_norm": 0.6064239643775109, "learning_rate": 4.797664650516561e-06, "loss": 0.2958, "step": 6621 }, { "epoch": 0.31, "grad_norm": 0.5877642827506953, "learning_rate": 4.797589902027324e-06, "loss": 0.2868, "step": 6622 }, { "epoch": 0.31, "grad_norm": 0.6146017303146296, "learning_rate": 4.7975151403160344e-06, "loss": 0.2909, "step": 6623 }, { "epoch": 0.31, "grad_norm": 0.6591928764514204, "learning_rate": 4.797440365383124e-06, "loss": 0.3067, "step": 6624 }, { "epoch": 0.31, "grad_norm": 0.5882978253448711, "learning_rate": 4.797365577229021e-06, "loss": 0.2996, "step": 6625 }, { "epoch": 0.31, "grad_norm": 0.6140878191830048, "learning_rate": 4.7972907758541555e-06, "loss": 0.3024, "step": 6626 }, { "epoch": 0.31, "grad_norm": 0.6533384749330697, "learning_rate": 4.79721596125896e-06, "loss": 0.3034, "step": 6627 }, { "epoch": 0.31, "grad_norm": 0.6639343987972169, "learning_rate": 4.797141133443864e-06, "loss": 0.3116, "step": 6628 }, { "epoch": 0.31, "grad_norm": 0.6790637082836986, "learning_rate": 4.7970662924092985e-06, "loss": 0.3233, "step": 6629 }, { "epoch": 0.31, "grad_norm": 0.615488789433508, "learning_rate": 4.796991438155693e-06, "loss": 0.3017, "step": 6630 }, { "epoch": 0.31, "grad_norm": 0.5927135461788344, "learning_rate": 4.79691657068348e-06, "loss": 0.2733, "step": 6631 }, { "epoch": 0.31, "grad_norm": 0.665492664354327, "learning_rate": 4.796841689993089e-06, "loss": 0.3151, "step": 6632 }, { "epoch": 0.31, "grad_norm": 0.6370342427568181, "learning_rate": 4.7967667960849504e-06, "loss": 0.2937, "step": 6633 }, { "epoch": 0.31, "grad_norm": 0.5935732237053637, "learning_rate": 4.796691888959497e-06, "loss": 0.2584, "step": 6634 }, { "epoch": 0.31, "grad_norm": 0.5828079407223811, "learning_rate": 4.796616968617159e-06, "loss": 0.2627, "step": 6635 }, { "epoch": 0.31, "grad_norm": 0.6050323626042696, "learning_rate": 4.796542035058368e-06, "loss": 0.3042, "step": 6636 }, { "epoch": 0.31, "grad_norm": 0.5805932566780997, "learning_rate": 4.796467088283555e-06, "loss": 0.2842, "step": 6637 }, { "epoch": 0.31, "grad_norm": 0.7347956572561547, "learning_rate": 4.79639212829315e-06, "loss": 0.3122, "step": 6638 }, { "epoch": 0.31, "grad_norm": 0.6433448044985199, "learning_rate": 4.796317155087586e-06, "loss": 0.2789, "step": 6639 }, { "epoch": 0.31, "grad_norm": 0.5989634090721363, "learning_rate": 4.796242168667295e-06, "loss": 0.2971, "step": 6640 }, { "epoch": 0.31, "grad_norm": 0.6296040837749279, "learning_rate": 4.796167169032706e-06, "loss": 0.3004, "step": 6641 }, { "epoch": 0.31, "grad_norm": 0.6326675329726228, "learning_rate": 4.796092156184252e-06, "loss": 0.3031, "step": 6642 }, { "epoch": 0.31, "grad_norm": 0.6634038745963424, "learning_rate": 4.796017130122365e-06, "loss": 0.3056, "step": 6643 }, { "epoch": 0.31, "grad_norm": 0.6770647404640497, "learning_rate": 4.795942090847478e-06, "loss": 0.32, "step": 6644 }, { "epoch": 0.31, "grad_norm": 0.6706711749994808, "learning_rate": 4.795867038360019e-06, "loss": 0.298, "step": 6645 }, { "epoch": 0.31, "grad_norm": 0.5880985630812148, "learning_rate": 4.795791972660424e-06, "loss": 0.2764, "step": 6646 }, { "epoch": 0.31, "grad_norm": 0.6219002141490056, "learning_rate": 4.7957168937491226e-06, "loss": 0.3042, "step": 6647 }, { "epoch": 0.31, "grad_norm": 0.639652122693227, "learning_rate": 4.7956418016265475e-06, "loss": 0.2896, "step": 6648 }, { "epoch": 0.31, "grad_norm": 0.6260988215901895, "learning_rate": 4.79556669629313e-06, "loss": 0.3027, "step": 6649 }, { "epoch": 0.31, "grad_norm": 0.617941509346728, "learning_rate": 4.7954915777493035e-06, "loss": 0.2802, "step": 6650 }, { "epoch": 0.31, "grad_norm": 0.561340436822796, "learning_rate": 4.795416445995501e-06, "loss": 0.2729, "step": 6651 }, { "epoch": 0.31, "grad_norm": 0.6148074760243509, "learning_rate": 4.795341301032153e-06, "loss": 0.2835, "step": 6652 }, { "epoch": 0.31, "grad_norm": 0.692770100251545, "learning_rate": 4.7952661428596926e-06, "loss": 0.3161, "step": 6653 }, { "epoch": 0.31, "grad_norm": 0.7126980473857287, "learning_rate": 4.795190971478553e-06, "loss": 0.2852, "step": 6654 }, { "epoch": 0.31, "grad_norm": 0.6301328318728442, "learning_rate": 4.7951157868891656e-06, "loss": 0.2754, "step": 6655 }, { "epoch": 0.31, "grad_norm": 0.5726366043469484, "learning_rate": 4.795040589091964e-06, "loss": 0.2885, "step": 6656 }, { "epoch": 0.31, "grad_norm": 0.6240730520662572, "learning_rate": 4.794965378087381e-06, "loss": 0.2836, "step": 6657 }, { "epoch": 0.31, "grad_norm": 0.6222533822226746, "learning_rate": 4.794890153875849e-06, "loss": 0.291, "step": 6658 }, { "epoch": 0.31, "grad_norm": 0.640442329211622, "learning_rate": 4.7948149164578e-06, "loss": 0.3115, "step": 6659 }, { "epoch": 0.31, "grad_norm": 0.6831851121508192, "learning_rate": 4.794739665833669e-06, "loss": 0.3306, "step": 6660 }, { "epoch": 0.31, "grad_norm": 0.592965251231151, "learning_rate": 4.794664402003887e-06, "loss": 0.3015, "step": 6661 }, { "epoch": 0.31, "grad_norm": 0.7254873960449615, "learning_rate": 4.794589124968889e-06, "loss": 0.3029, "step": 6662 }, { "epoch": 0.31, "grad_norm": 0.5602976291405168, "learning_rate": 4.794513834729107e-06, "loss": 0.2644, "step": 6663 }, { "epoch": 0.31, "grad_norm": 0.6338927777242748, "learning_rate": 4.794438531284974e-06, "loss": 0.2888, "step": 6664 }, { "epoch": 0.31, "grad_norm": 0.6731663862700482, "learning_rate": 4.794363214636925e-06, "loss": 0.313, "step": 6665 }, { "epoch": 0.31, "grad_norm": 0.6840316531301414, "learning_rate": 4.7942878847853915e-06, "loss": 0.2977, "step": 6666 }, { "epoch": 0.31, "grad_norm": 0.645505778809843, "learning_rate": 4.7942125417308084e-06, "loss": 0.3052, "step": 6667 }, { "epoch": 0.31, "grad_norm": 0.5938865889826843, "learning_rate": 4.794137185473609e-06, "loss": 0.2973, "step": 6668 }, { "epoch": 0.31, "grad_norm": 0.7047186940203763, "learning_rate": 4.794061816014226e-06, "loss": 0.2946, "step": 6669 }, { "epoch": 0.31, "grad_norm": 0.6191222357865546, "learning_rate": 4.7939864333530946e-06, "loss": 0.2994, "step": 6670 }, { "epoch": 0.31, "grad_norm": 0.7636567356982716, "learning_rate": 4.793911037490647e-06, "loss": 0.312, "step": 6671 }, { "epoch": 0.31, "grad_norm": 0.6326594578554282, "learning_rate": 4.793835628427319e-06, "loss": 0.2879, "step": 6672 }, { "epoch": 0.31, "grad_norm": 0.6750034061379222, "learning_rate": 4.793760206163542e-06, "loss": 0.2915, "step": 6673 }, { "epoch": 0.31, "grad_norm": 0.649350508536125, "learning_rate": 4.7936847706997525e-06, "loss": 0.3092, "step": 6674 }, { "epoch": 0.31, "grad_norm": 0.622716440238515, "learning_rate": 4.793609322036384e-06, "loss": 0.3041, "step": 6675 }, { "epoch": 0.31, "grad_norm": 0.599786176397505, "learning_rate": 4.79353386017387e-06, "loss": 0.2826, "step": 6676 }, { "epoch": 0.31, "grad_norm": 0.6361712216892169, "learning_rate": 4.7934583851126444e-06, "loss": 0.2714, "step": 6677 }, { "epoch": 0.31, "grad_norm": 0.6163925266254576, "learning_rate": 4.793382896853143e-06, "loss": 0.3202, "step": 6678 }, { "epoch": 0.31, "grad_norm": 0.6288188081285712, "learning_rate": 4.793307395395798e-06, "loss": 0.2925, "step": 6679 }, { "epoch": 0.31, "grad_norm": 0.606253653005436, "learning_rate": 4.793231880741048e-06, "loss": 0.2952, "step": 6680 }, { "epoch": 0.31, "grad_norm": 0.6487424763013017, "learning_rate": 4.793156352889323e-06, "loss": 0.3022, "step": 6681 }, { "epoch": 0.31, "grad_norm": 0.7052177169863901, "learning_rate": 4.7930808118410595e-06, "loss": 0.3201, "step": 6682 }, { "epoch": 0.31, "grad_norm": 0.6296155390665952, "learning_rate": 4.793005257596694e-06, "loss": 0.298, "step": 6683 }, { "epoch": 0.31, "grad_norm": 0.6643801924919703, "learning_rate": 4.792929690156658e-06, "loss": 0.3135, "step": 6684 }, { "epoch": 0.31, "grad_norm": 0.590317934174981, "learning_rate": 4.7928541095213875e-06, "loss": 0.2875, "step": 6685 }, { "epoch": 0.31, "grad_norm": 0.6435750115115787, "learning_rate": 4.792778515691319e-06, "loss": 0.2865, "step": 6686 }, { "epoch": 0.31, "grad_norm": 0.7090336334555117, "learning_rate": 4.792702908666887e-06, "loss": 0.2971, "step": 6687 }, { "epoch": 0.31, "grad_norm": 0.5966235143625357, "learning_rate": 4.792627288448524e-06, "loss": 0.2819, "step": 6688 }, { "epoch": 0.31, "grad_norm": 0.670573516584797, "learning_rate": 4.792551655036668e-06, "loss": 0.3169, "step": 6689 }, { "epoch": 0.31, "grad_norm": 0.6757905061308881, "learning_rate": 4.792476008431754e-06, "loss": 0.308, "step": 6690 }, { "epoch": 0.31, "grad_norm": 0.6312286669035938, "learning_rate": 4.792400348634216e-06, "loss": 0.3163, "step": 6691 }, { "epoch": 0.31, "grad_norm": 0.6145417416624962, "learning_rate": 4.792324675644491e-06, "loss": 0.2934, "step": 6692 }, { "epoch": 0.31, "grad_norm": 0.6457028597708877, "learning_rate": 4.792248989463014e-06, "loss": 0.2887, "step": 6693 }, { "epoch": 0.31, "grad_norm": 0.6188506195661526, "learning_rate": 4.792173290090219e-06, "loss": 0.2897, "step": 6694 }, { "epoch": 0.31, "grad_norm": 0.6181428217394258, "learning_rate": 4.792097577526543e-06, "loss": 0.2823, "step": 6695 }, { "epoch": 0.31, "grad_norm": 0.7456178214363496, "learning_rate": 4.792021851772423e-06, "loss": 0.3188, "step": 6696 }, { "epoch": 0.31, "grad_norm": 0.6468176827607621, "learning_rate": 4.791946112828292e-06, "loss": 0.3098, "step": 6697 }, { "epoch": 0.31, "grad_norm": 0.5850055121991444, "learning_rate": 4.791870360694587e-06, "loss": 0.2785, "step": 6698 }, { "epoch": 0.31, "grad_norm": 0.626951965872251, "learning_rate": 4.7917945953717445e-06, "loss": 0.2992, "step": 6699 }, { "epoch": 0.31, "grad_norm": 0.6263571845629468, "learning_rate": 4.7917188168602e-06, "loss": 0.2914, "step": 6700 }, { "epoch": 0.31, "grad_norm": 0.6442215765003929, "learning_rate": 4.79164302516039e-06, "loss": 0.3013, "step": 6701 }, { "epoch": 0.31, "grad_norm": 0.6471642858025701, "learning_rate": 4.79156722027275e-06, "loss": 0.3057, "step": 6702 }, { "epoch": 0.31, "grad_norm": 0.6113110561010493, "learning_rate": 4.791491402197717e-06, "loss": 0.3119, "step": 6703 }, { "epoch": 0.31, "grad_norm": 0.6492154125568546, "learning_rate": 4.7914155709357265e-06, "loss": 0.3111, "step": 6704 }, { "epoch": 0.31, "grad_norm": 0.6148472322261296, "learning_rate": 4.7913397264872156e-06, "loss": 0.2762, "step": 6705 }, { "epoch": 0.31, "grad_norm": 0.6290295236359765, "learning_rate": 4.79126386885262e-06, "loss": 0.2936, "step": 6706 }, { "epoch": 0.31, "grad_norm": 0.6246486760697918, "learning_rate": 4.791187998032377e-06, "loss": 0.2897, "step": 6707 }, { "epoch": 0.31, "grad_norm": 0.6078149633000904, "learning_rate": 4.791112114026923e-06, "loss": 0.2829, "step": 6708 }, { "epoch": 0.31, "grad_norm": 0.5966608104265818, "learning_rate": 4.791036216836695e-06, "loss": 0.2745, "step": 6709 }, { "epoch": 0.31, "grad_norm": 0.6099565135123983, "learning_rate": 4.790960306462129e-06, "loss": 0.293, "step": 6710 }, { "epoch": 0.31, "grad_norm": 0.7501994783935046, "learning_rate": 4.790884382903662e-06, "loss": 0.3082, "step": 6711 }, { "epoch": 0.31, "grad_norm": 0.7014386149646997, "learning_rate": 4.790808446161732e-06, "loss": 0.2935, "step": 6712 }, { "epoch": 0.31, "grad_norm": 0.6769457018049438, "learning_rate": 4.790732496236774e-06, "loss": 0.3135, "step": 6713 }, { "epoch": 0.31, "grad_norm": 0.6380809458960379, "learning_rate": 4.790656533129228e-06, "loss": 0.3157, "step": 6714 }, { "epoch": 0.31, "grad_norm": 0.6099779918092356, "learning_rate": 4.790580556839528e-06, "loss": 0.2919, "step": 6715 }, { "epoch": 0.31, "grad_norm": 0.6347304812905186, "learning_rate": 4.790504567368113e-06, "loss": 0.3311, "step": 6716 }, { "epoch": 0.31, "grad_norm": 0.6247916632375933, "learning_rate": 4.79042856471542e-06, "loss": 0.3189, "step": 6717 }, { "epoch": 0.31, "grad_norm": 0.664923934262849, "learning_rate": 4.790352548881886e-06, "loss": 0.3273, "step": 6718 }, { "epoch": 0.31, "grad_norm": 0.6252472590699943, "learning_rate": 4.790276519867949e-06, "loss": 0.3133, "step": 6719 }, { "epoch": 0.31, "grad_norm": 0.6855472675526801, "learning_rate": 4.790200477674046e-06, "loss": 0.2857, "step": 6720 }, { "epoch": 0.31, "grad_norm": 0.6029401150361539, "learning_rate": 4.7901244223006145e-06, "loss": 0.2899, "step": 6721 }, { "epoch": 0.31, "grad_norm": 0.6133143757620835, "learning_rate": 4.790048353748094e-06, "loss": 0.2922, "step": 6722 }, { "epoch": 0.31, "grad_norm": 0.6318162007734442, "learning_rate": 4.7899722720169196e-06, "loss": 0.2866, "step": 6723 }, { "epoch": 0.31, "grad_norm": 0.6223759182590276, "learning_rate": 4.7898961771075305e-06, "loss": 0.2874, "step": 6724 }, { "epoch": 0.32, "grad_norm": 0.6224980366319585, "learning_rate": 4.7898200690203645e-06, "loss": 0.3132, "step": 6725 }, { "epoch": 0.32, "grad_norm": 0.6109897212781759, "learning_rate": 4.7897439477558595e-06, "loss": 0.2983, "step": 6726 }, { "epoch": 0.32, "grad_norm": 0.5697920692909192, "learning_rate": 4.7896678133144535e-06, "loss": 0.3053, "step": 6727 }, { "epoch": 0.32, "grad_norm": 0.684233963301494, "learning_rate": 4.789591665696584e-06, "loss": 0.3065, "step": 6728 }, { "epoch": 0.32, "grad_norm": 0.6072741429535305, "learning_rate": 4.789515504902692e-06, "loss": 0.2968, "step": 6729 }, { "epoch": 0.32, "grad_norm": 0.6390916857327231, "learning_rate": 4.789439330933212e-06, "loss": 0.2882, "step": 6730 }, { "epoch": 0.32, "grad_norm": 0.6129609736875438, "learning_rate": 4.789363143788584e-06, "loss": 0.2821, "step": 6731 }, { "epoch": 0.32, "grad_norm": 0.6625924122098164, "learning_rate": 4.7892869434692476e-06, "loss": 0.3113, "step": 6732 }, { "epoch": 0.32, "grad_norm": 0.5878670812373302, "learning_rate": 4.789210729975641e-06, "loss": 0.2896, "step": 6733 }, { "epoch": 0.32, "grad_norm": 0.6574339929157921, "learning_rate": 4.7891345033082e-06, "loss": 0.3054, "step": 6734 }, { "epoch": 0.32, "grad_norm": 0.623326047502507, "learning_rate": 4.789058263467366e-06, "loss": 0.2739, "step": 6735 }, { "epoch": 0.32, "grad_norm": 0.6757525783755244, "learning_rate": 4.788982010453577e-06, "loss": 0.291, "step": 6736 }, { "epoch": 0.32, "grad_norm": 0.6911255559800534, "learning_rate": 4.7889057442672724e-06, "loss": 0.319, "step": 6737 }, { "epoch": 0.32, "grad_norm": 0.6299679778536131, "learning_rate": 4.788829464908889e-06, "loss": 0.2734, "step": 6738 }, { "epoch": 0.32, "grad_norm": 0.6241080874558047, "learning_rate": 4.788753172378869e-06, "loss": 0.2875, "step": 6739 }, { "epoch": 0.32, "grad_norm": 0.6455873562561697, "learning_rate": 4.788676866677649e-06, "loss": 0.2907, "step": 6740 }, { "epoch": 0.32, "grad_norm": 0.6682973621926189, "learning_rate": 4.788600547805669e-06, "loss": 0.2946, "step": 6741 }, { "epoch": 0.32, "grad_norm": 0.5931462191412469, "learning_rate": 4.788524215763368e-06, "loss": 0.2965, "step": 6742 }, { "epoch": 0.32, "grad_norm": 0.6648557098311126, "learning_rate": 4.7884478705511855e-06, "loss": 0.2954, "step": 6743 }, { "epoch": 0.32, "grad_norm": 0.6388154819469779, "learning_rate": 4.7883715121695605e-06, "loss": 0.2938, "step": 6744 }, { "epoch": 0.32, "grad_norm": 0.558961828845344, "learning_rate": 4.788295140618933e-06, "loss": 0.2888, "step": 6745 }, { "epoch": 0.32, "grad_norm": 0.6629056615736998, "learning_rate": 4.788218755899742e-06, "loss": 0.3032, "step": 6746 }, { "epoch": 0.32, "grad_norm": 0.6695878639321495, "learning_rate": 4.7881423580124265e-06, "loss": 0.3151, "step": 6747 }, { "epoch": 0.32, "grad_norm": 0.6575427836994759, "learning_rate": 4.7880659469574275e-06, "loss": 0.3082, "step": 6748 }, { "epoch": 0.32, "grad_norm": 0.5852660603030773, "learning_rate": 4.7879895227351836e-06, "loss": 0.2845, "step": 6749 }, { "epoch": 0.32, "grad_norm": 0.6518253289274784, "learning_rate": 4.787913085346135e-06, "loss": 0.304, "step": 6750 }, { "epoch": 0.32, "grad_norm": 0.6148065539955525, "learning_rate": 4.787836634790722e-06, "loss": 0.307, "step": 6751 }, { "epoch": 0.32, "grad_norm": 0.5351692415082352, "learning_rate": 4.7877601710693845e-06, "loss": 0.2795, "step": 6752 }, { "epoch": 0.32, "grad_norm": 0.667388200448351, "learning_rate": 4.787683694182562e-06, "loss": 0.2918, "step": 6753 }, { "epoch": 0.32, "grad_norm": 0.6023435623156916, "learning_rate": 4.787607204130695e-06, "loss": 0.2765, "step": 6754 }, { "epoch": 0.32, "grad_norm": 0.6450969166164892, "learning_rate": 4.787530700914223e-06, "loss": 0.2993, "step": 6755 }, { "epoch": 0.32, "grad_norm": 0.5834895216242275, "learning_rate": 4.787454184533587e-06, "loss": 0.2703, "step": 6756 }, { "epoch": 0.32, "grad_norm": 0.6064004174333638, "learning_rate": 4.787377654989227e-06, "loss": 0.2794, "step": 6757 }, { "epoch": 0.32, "grad_norm": 0.6969585252257512, "learning_rate": 4.787301112281584e-06, "loss": 0.2971, "step": 6758 }, { "epoch": 0.32, "grad_norm": 0.6992205772874777, "learning_rate": 4.7872245564110975e-06, "loss": 0.3316, "step": 6759 }, { "epoch": 0.32, "grad_norm": 0.6401331802430191, "learning_rate": 4.787147987378209e-06, "loss": 0.3179, "step": 6760 }, { "epoch": 0.32, "grad_norm": 0.6211937766733787, "learning_rate": 4.787071405183358e-06, "loss": 0.299, "step": 6761 }, { "epoch": 0.32, "grad_norm": 0.5824347457752344, "learning_rate": 4.7869948098269856e-06, "loss": 0.2783, "step": 6762 }, { "epoch": 0.32, "grad_norm": 0.6195950381905977, "learning_rate": 4.786918201309534e-06, "loss": 0.2909, "step": 6763 }, { "epoch": 0.32, "grad_norm": 0.6477807892730595, "learning_rate": 4.7868415796314425e-06, "loss": 0.2887, "step": 6764 }, { "epoch": 0.32, "grad_norm": 0.6670174025589377, "learning_rate": 4.786764944793152e-06, "loss": 0.3356, "step": 6765 }, { "epoch": 0.32, "grad_norm": 0.6188279223658039, "learning_rate": 4.786688296795105e-06, "loss": 0.3047, "step": 6766 }, { "epoch": 0.32, "grad_norm": 0.6741267148210651, "learning_rate": 4.7866116356377404e-06, "loss": 0.3004, "step": 6767 }, { "epoch": 0.32, "grad_norm": 0.5994487934448157, "learning_rate": 4.7865349613215014e-06, "loss": 0.3002, "step": 6768 }, { "epoch": 0.32, "grad_norm": 0.6704127956627155, "learning_rate": 4.7864582738468275e-06, "loss": 0.2983, "step": 6769 }, { "epoch": 0.32, "grad_norm": 0.6662407922567143, "learning_rate": 4.7863815732141625e-06, "loss": 0.2935, "step": 6770 }, { "epoch": 0.32, "grad_norm": 0.6173779776452293, "learning_rate": 4.786304859423944e-06, "loss": 0.3273, "step": 6771 }, { "epoch": 0.32, "grad_norm": 0.6301128113779848, "learning_rate": 4.786228132476618e-06, "loss": 0.2948, "step": 6772 }, { "epoch": 0.32, "grad_norm": 0.5947473090481369, "learning_rate": 4.786151392372622e-06, "loss": 0.2893, "step": 6773 }, { "epoch": 0.32, "grad_norm": 0.6101037281559053, "learning_rate": 4.7860746391124e-06, "loss": 0.3146, "step": 6774 }, { "epoch": 0.32, "grad_norm": 0.6273713081466549, "learning_rate": 4.785997872696394e-06, "loss": 0.2863, "step": 6775 }, { "epoch": 0.32, "grad_norm": 0.5748642267924943, "learning_rate": 4.785921093125044e-06, "loss": 0.2772, "step": 6776 }, { "epoch": 0.32, "grad_norm": 0.6502310238182333, "learning_rate": 4.785844300398792e-06, "loss": 0.2861, "step": 6777 }, { "epoch": 0.32, "grad_norm": 0.570011096977999, "learning_rate": 4.785767494518081e-06, "loss": 0.2944, "step": 6778 }, { "epoch": 0.32, "grad_norm": 0.5689021417692347, "learning_rate": 4.785690675483353e-06, "loss": 0.275, "step": 6779 }, { "epoch": 0.32, "grad_norm": 0.6712474267699603, "learning_rate": 4.785613843295049e-06, "loss": 0.2845, "step": 6780 }, { "epoch": 0.32, "grad_norm": 0.591727580554672, "learning_rate": 4.785536997953613e-06, "loss": 0.2919, "step": 6781 }, { "epoch": 0.32, "grad_norm": 0.5909458118353709, "learning_rate": 4.7854601394594846e-06, "loss": 0.3032, "step": 6782 }, { "epoch": 0.32, "grad_norm": 0.5637877725991514, "learning_rate": 4.785383267813108e-06, "loss": 0.2737, "step": 6783 }, { "epoch": 0.32, "grad_norm": 0.6155577960558054, "learning_rate": 4.785306383014925e-06, "loss": 0.2983, "step": 6784 }, { "epoch": 0.32, "grad_norm": 0.6042147794478928, "learning_rate": 4.785229485065378e-06, "loss": 0.2831, "step": 6785 }, { "epoch": 0.32, "grad_norm": 0.5887734778505844, "learning_rate": 4.7851525739649105e-06, "loss": 0.2909, "step": 6786 }, { "epoch": 0.32, "grad_norm": 0.6461546542553256, "learning_rate": 4.785075649713964e-06, "loss": 0.301, "step": 6787 }, { "epoch": 0.32, "grad_norm": 0.6139417171760021, "learning_rate": 4.784998712312981e-06, "loss": 0.3116, "step": 6788 }, { "epoch": 0.32, "grad_norm": 0.6284704004926761, "learning_rate": 4.784921761762405e-06, "loss": 0.2988, "step": 6789 }, { "epoch": 0.32, "grad_norm": 0.5917915263096103, "learning_rate": 4.784844798062679e-06, "loss": 0.2924, "step": 6790 }, { "epoch": 0.32, "grad_norm": 0.6188436580567324, "learning_rate": 4.784767821214245e-06, "loss": 0.2962, "step": 6791 }, { "epoch": 0.32, "grad_norm": 0.6113449721497909, "learning_rate": 4.784690831217546e-06, "loss": 0.3009, "step": 6792 }, { "epoch": 0.32, "grad_norm": 0.583631993181195, "learning_rate": 4.784613828073026e-06, "loss": 0.2956, "step": 6793 }, { "epoch": 0.32, "grad_norm": 0.6221818201698222, "learning_rate": 4.784536811781127e-06, "loss": 0.3108, "step": 6794 }, { "epoch": 0.32, "grad_norm": 0.5725422472647465, "learning_rate": 4.784459782342294e-06, "loss": 0.2807, "step": 6795 }, { "epoch": 0.32, "grad_norm": 0.7008295801822253, "learning_rate": 4.784382739756968e-06, "loss": 0.3206, "step": 6796 }, { "epoch": 0.32, "grad_norm": 0.6538713999691355, "learning_rate": 4.784305684025594e-06, "loss": 0.3088, "step": 6797 }, { "epoch": 0.32, "grad_norm": 0.6212150216608995, "learning_rate": 4.784228615148615e-06, "loss": 0.2906, "step": 6798 }, { "epoch": 0.32, "grad_norm": 0.5639768447016785, "learning_rate": 4.784151533126475e-06, "loss": 0.2778, "step": 6799 }, { "epoch": 0.32, "grad_norm": 0.6030192855797651, "learning_rate": 4.784074437959616e-06, "loss": 0.2986, "step": 6800 }, { "epoch": 0.32, "grad_norm": 0.6350657816984873, "learning_rate": 4.7839973296484836e-06, "loss": 0.2827, "step": 6801 }, { "epoch": 0.32, "grad_norm": 0.6364443889090742, "learning_rate": 4.78392020819352e-06, "loss": 0.2998, "step": 6802 }, { "epoch": 0.32, "grad_norm": 0.7045160286300739, "learning_rate": 4.78384307359517e-06, "loss": 0.3018, "step": 6803 }, { "epoch": 0.32, "grad_norm": 0.6815115230781608, "learning_rate": 4.783765925853877e-06, "loss": 0.3092, "step": 6804 }, { "epoch": 0.32, "grad_norm": 0.6092682137549182, "learning_rate": 4.783688764970085e-06, "loss": 0.2827, "step": 6805 }, { "epoch": 0.32, "grad_norm": 0.6680930208324195, "learning_rate": 4.783611590944239e-06, "loss": 0.3288, "step": 6806 }, { "epoch": 0.32, "grad_norm": 0.6351258940397989, "learning_rate": 4.7835344037767804e-06, "loss": 0.3104, "step": 6807 }, { "epoch": 0.32, "grad_norm": 0.6309457376466625, "learning_rate": 4.783457203468157e-06, "loss": 0.2931, "step": 6808 }, { "epoch": 0.32, "grad_norm": 0.6020773426007577, "learning_rate": 4.78337999001881e-06, "loss": 0.3037, "step": 6809 }, { "epoch": 0.32, "grad_norm": 0.6309135856639192, "learning_rate": 4.783302763429186e-06, "loss": 0.2856, "step": 6810 }, { "epoch": 0.32, "grad_norm": 0.5831483316474185, "learning_rate": 4.7832255236997286e-06, "loss": 0.3, "step": 6811 }, { "epoch": 0.32, "grad_norm": 0.609270118259804, "learning_rate": 4.783148270830881e-06, "loss": 0.2897, "step": 6812 }, { "epoch": 0.32, "grad_norm": 0.6224170702397583, "learning_rate": 4.78307100482309e-06, "loss": 0.302, "step": 6813 }, { "epoch": 0.32, "grad_norm": 0.6443703099207507, "learning_rate": 4.782993725676799e-06, "loss": 0.3047, "step": 6814 }, { "epoch": 0.32, "grad_norm": 0.6092279435220377, "learning_rate": 4.782916433392453e-06, "loss": 0.306, "step": 6815 }, { "epoch": 0.32, "grad_norm": 0.6127376993298204, "learning_rate": 4.782839127970495e-06, "loss": 0.2914, "step": 6816 }, { "epoch": 0.32, "grad_norm": 0.649063912770158, "learning_rate": 4.7827618094113734e-06, "loss": 0.291, "step": 6817 }, { "epoch": 0.32, "grad_norm": 0.6204403538097648, "learning_rate": 4.782684477715531e-06, "loss": 0.2976, "step": 6818 }, { "epoch": 0.32, "grad_norm": 0.5888514833942345, "learning_rate": 4.782607132883414e-06, "loss": 0.2811, "step": 6819 }, { "epoch": 0.32, "grad_norm": 0.6313710589567815, "learning_rate": 4.782529774915465e-06, "loss": 0.2914, "step": 6820 }, { "epoch": 0.32, "grad_norm": 0.5932390718057042, "learning_rate": 4.782452403812132e-06, "loss": 0.289, "step": 6821 }, { "epoch": 0.32, "grad_norm": 0.5618396015009922, "learning_rate": 4.782375019573859e-06, "loss": 0.2765, "step": 6822 }, { "epoch": 0.32, "grad_norm": 0.6204794475531198, "learning_rate": 4.7822976222010915e-06, "loss": 0.2902, "step": 6823 }, { "epoch": 0.32, "grad_norm": 0.6262322112419082, "learning_rate": 4.782220211694274e-06, "loss": 0.299, "step": 6824 }, { "epoch": 0.32, "grad_norm": 0.6281256440368811, "learning_rate": 4.782142788053854e-06, "loss": 0.3035, "step": 6825 }, { "epoch": 0.32, "grad_norm": 0.6383922273714296, "learning_rate": 4.782065351280275e-06, "loss": 0.2973, "step": 6826 }, { "epoch": 0.32, "grad_norm": 0.5656904484296362, "learning_rate": 4.781987901373983e-06, "loss": 0.2881, "step": 6827 }, { "epoch": 0.32, "grad_norm": 0.6715884724787493, "learning_rate": 4.781910438335426e-06, "loss": 0.2985, "step": 6828 }, { "epoch": 0.32, "grad_norm": 0.6291035567595853, "learning_rate": 4.7818329621650465e-06, "loss": 0.2804, "step": 6829 }, { "epoch": 0.32, "grad_norm": 0.620758583063766, "learning_rate": 4.781755472863292e-06, "loss": 0.2939, "step": 6830 }, { "epoch": 0.32, "grad_norm": 0.650924754590003, "learning_rate": 4.7816779704306085e-06, "loss": 0.2986, "step": 6831 }, { "epoch": 0.32, "grad_norm": 0.6085321084575459, "learning_rate": 4.781600454867441e-06, "loss": 0.2988, "step": 6832 }, { "epoch": 0.32, "grad_norm": 0.5994576972819505, "learning_rate": 4.781522926174237e-06, "loss": 0.2938, "step": 6833 }, { "epoch": 0.32, "grad_norm": 0.6758918399641763, "learning_rate": 4.7814453843514416e-06, "loss": 0.3038, "step": 6834 }, { "epoch": 0.32, "grad_norm": 0.5937991193945171, "learning_rate": 4.781367829399503e-06, "loss": 0.2839, "step": 6835 }, { "epoch": 0.32, "grad_norm": 0.6641726634887735, "learning_rate": 4.781290261318864e-06, "loss": 0.2914, "step": 6836 }, { "epoch": 0.32, "grad_norm": 0.6384837249315415, "learning_rate": 4.781212680109974e-06, "loss": 0.3036, "step": 6837 }, { "epoch": 0.32, "grad_norm": 0.5954759030320778, "learning_rate": 4.781135085773278e-06, "loss": 0.2988, "step": 6838 }, { "epoch": 0.32, "grad_norm": 0.6220518496427733, "learning_rate": 4.781057478309223e-06, "loss": 0.2975, "step": 6839 }, { "epoch": 0.32, "grad_norm": 0.5806033915142267, "learning_rate": 4.780979857718256e-06, "loss": 0.2836, "step": 6840 }, { "epoch": 0.32, "grad_norm": 0.6253846226355699, "learning_rate": 4.7809022240008215e-06, "loss": 0.3071, "step": 6841 }, { "epoch": 0.32, "grad_norm": 0.6603912190931942, "learning_rate": 4.78082457715737e-06, "loss": 0.293, "step": 6842 }, { "epoch": 0.32, "grad_norm": 0.6400882675389638, "learning_rate": 4.780746917188345e-06, "loss": 0.3064, "step": 6843 }, { "epoch": 0.32, "grad_norm": 0.622664185538138, "learning_rate": 4.780669244094196e-06, "loss": 0.2912, "step": 6844 }, { "epoch": 0.32, "grad_norm": 0.6118787506645604, "learning_rate": 4.780591557875368e-06, "loss": 0.2939, "step": 6845 }, { "epoch": 0.32, "grad_norm": 0.9743457454672286, "learning_rate": 4.780513858532309e-06, "loss": 0.2995, "step": 6846 }, { "epoch": 0.32, "grad_norm": 0.6059604560769997, "learning_rate": 4.780436146065465e-06, "loss": 0.2686, "step": 6847 }, { "epoch": 0.32, "grad_norm": 0.6139895860433021, "learning_rate": 4.780358420475285e-06, "loss": 0.2788, "step": 6848 }, { "epoch": 0.32, "grad_norm": 0.6670356703390972, "learning_rate": 4.780280681762216e-06, "loss": 0.3223, "step": 6849 }, { "epoch": 0.32, "grad_norm": 0.6605972631815106, "learning_rate": 4.7802029299267035e-06, "loss": 0.2863, "step": 6850 }, { "epoch": 0.32, "grad_norm": 0.7331128327862084, "learning_rate": 4.780125164969197e-06, "loss": 0.3073, "step": 6851 }, { "epoch": 0.32, "grad_norm": 0.6259920651525873, "learning_rate": 4.780047386890143e-06, "loss": 0.2922, "step": 6852 }, { "epoch": 0.32, "grad_norm": 0.620904717971174, "learning_rate": 4.7799695956899896e-06, "loss": 0.2882, "step": 6853 }, { "epoch": 0.32, "grad_norm": 0.6356842972001107, "learning_rate": 4.779891791369184e-06, "loss": 0.2963, "step": 6854 }, { "epoch": 0.32, "grad_norm": 0.6237533951426569, "learning_rate": 4.779813973928175e-06, "loss": 0.3028, "step": 6855 }, { "epoch": 0.32, "grad_norm": 0.5782287708169944, "learning_rate": 4.779736143367409e-06, "loss": 0.2766, "step": 6856 }, { "epoch": 0.32, "grad_norm": 0.7079453739370345, "learning_rate": 4.779658299687333e-06, "loss": 0.325, "step": 6857 }, { "epoch": 0.32, "grad_norm": 0.704575800524028, "learning_rate": 4.779580442888399e-06, "loss": 0.2976, "step": 6858 }, { "epoch": 0.32, "grad_norm": 0.6571145612738398, "learning_rate": 4.779502572971051e-06, "loss": 0.3009, "step": 6859 }, { "epoch": 0.32, "grad_norm": 0.565900801317214, "learning_rate": 4.779424689935739e-06, "loss": 0.2845, "step": 6860 }, { "epoch": 0.32, "grad_norm": 0.6735709729251539, "learning_rate": 4.77934679378291e-06, "loss": 0.3171, "step": 6861 }, { "epoch": 0.32, "grad_norm": 0.6311796225196313, "learning_rate": 4.779268884513014e-06, "loss": 0.317, "step": 6862 }, { "epoch": 0.32, "grad_norm": 0.6636718053656234, "learning_rate": 4.779190962126498e-06, "loss": 0.3034, "step": 6863 }, { "epoch": 0.32, "grad_norm": 0.63731013150209, "learning_rate": 4.779113026623812e-06, "loss": 0.2917, "step": 6864 }, { "epoch": 0.32, "grad_norm": 0.5904525035346447, "learning_rate": 4.779035078005401e-06, "loss": 0.2987, "step": 6865 }, { "epoch": 0.32, "grad_norm": 0.6158919631873302, "learning_rate": 4.778957116271718e-06, "loss": 0.3084, "step": 6866 }, { "epoch": 0.32, "grad_norm": 0.7041754626773252, "learning_rate": 4.7788791414232096e-06, "loss": 0.2924, "step": 6867 }, { "epoch": 0.32, "grad_norm": 0.6322086340769173, "learning_rate": 4.7788011534603244e-06, "loss": 0.2983, "step": 6868 }, { "epoch": 0.32, "grad_norm": 0.6279541614720138, "learning_rate": 4.77872315238351e-06, "loss": 0.2944, "step": 6869 }, { "epoch": 0.32, "grad_norm": 0.5928544613913441, "learning_rate": 4.778645138193218e-06, "loss": 0.2635, "step": 6870 }, { "epoch": 0.32, "grad_norm": 0.6525388696479292, "learning_rate": 4.778567110889895e-06, "loss": 0.3064, "step": 6871 }, { "epoch": 0.32, "grad_norm": 0.6277433671258675, "learning_rate": 4.778489070473992e-06, "loss": 0.3171, "step": 6872 }, { "epoch": 0.32, "grad_norm": 0.6811379906196401, "learning_rate": 4.778411016945956e-06, "loss": 0.2993, "step": 6873 }, { "epoch": 0.32, "grad_norm": 0.6145624104052538, "learning_rate": 4.778332950306238e-06, "loss": 0.3072, "step": 6874 }, { "epoch": 0.32, "grad_norm": 0.6285731811832048, "learning_rate": 4.7782548705552865e-06, "loss": 0.2848, "step": 6875 }, { "epoch": 0.32, "grad_norm": 0.6531905605339305, "learning_rate": 4.77817677769355e-06, "loss": 0.2848, "step": 6876 }, { "epoch": 0.32, "grad_norm": 0.6310477424804443, "learning_rate": 4.77809867172148e-06, "loss": 0.288, "step": 6877 }, { "epoch": 0.32, "grad_norm": 0.637799097474085, "learning_rate": 4.778020552639523e-06, "loss": 0.3053, "step": 6878 }, { "epoch": 0.32, "grad_norm": 0.6569581661851782, "learning_rate": 4.777942420448132e-06, "loss": 0.3031, "step": 6879 }, { "epoch": 0.32, "grad_norm": 0.6407593536890467, "learning_rate": 4.777864275147754e-06, "loss": 0.299, "step": 6880 }, { "epoch": 0.32, "grad_norm": 0.6285656788411782, "learning_rate": 4.777786116738839e-06, "loss": 0.2972, "step": 6881 }, { "epoch": 0.32, "grad_norm": 0.6043201643682852, "learning_rate": 4.777707945221839e-06, "loss": 0.2729, "step": 6882 }, { "epoch": 0.32, "grad_norm": 0.619901347439824, "learning_rate": 4.777629760597202e-06, "loss": 0.3115, "step": 6883 }, { "epoch": 0.32, "grad_norm": 0.6152715708123422, "learning_rate": 4.777551562865377e-06, "loss": 0.2911, "step": 6884 }, { "epoch": 0.32, "grad_norm": 0.6467148258292518, "learning_rate": 4.777473352026816e-06, "loss": 0.2899, "step": 6885 }, { "epoch": 0.32, "grad_norm": 0.6361383191414287, "learning_rate": 4.7773951280819685e-06, "loss": 0.321, "step": 6886 }, { "epoch": 0.32, "grad_norm": 0.5505632951954131, "learning_rate": 4.777316891031284e-06, "loss": 0.2795, "step": 6887 }, { "epoch": 0.32, "grad_norm": 0.6391451964099006, "learning_rate": 4.777238640875213e-06, "loss": 0.3061, "step": 6888 }, { "epoch": 0.32, "grad_norm": 0.6078904886118004, "learning_rate": 4.777160377614206e-06, "loss": 0.2875, "step": 6889 }, { "epoch": 0.32, "grad_norm": 0.6162767007101196, "learning_rate": 4.777082101248714e-06, "loss": 0.2911, "step": 6890 }, { "epoch": 0.32, "grad_norm": 0.5779215106587001, "learning_rate": 4.777003811779186e-06, "loss": 0.2996, "step": 6891 }, { "epoch": 0.32, "grad_norm": 0.6264385371847897, "learning_rate": 4.776925509206074e-06, "loss": 0.3058, "step": 6892 }, { "epoch": 0.32, "grad_norm": 0.608093785313454, "learning_rate": 4.776847193529828e-06, "loss": 0.2893, "step": 6893 }, { "epoch": 0.32, "grad_norm": 0.5542114547614223, "learning_rate": 4.776768864750898e-06, "loss": 0.2864, "step": 6894 }, { "epoch": 0.32, "grad_norm": 0.6061762314918059, "learning_rate": 4.7766905228697365e-06, "loss": 0.2782, "step": 6895 }, { "epoch": 0.32, "grad_norm": 0.6205152439645388, "learning_rate": 4.776612167886793e-06, "loss": 0.2944, "step": 6896 }, { "epoch": 0.32, "grad_norm": 0.5983424462705927, "learning_rate": 4.776533799802518e-06, "loss": 0.2946, "step": 6897 }, { "epoch": 0.32, "grad_norm": 0.6422914127873295, "learning_rate": 4.776455418617363e-06, "loss": 0.2864, "step": 6898 }, { "epoch": 0.32, "grad_norm": 0.5907634534736821, "learning_rate": 4.7763770243317805e-06, "loss": 0.2979, "step": 6899 }, { "epoch": 0.32, "grad_norm": 0.6070560161529902, "learning_rate": 4.776298616946219e-06, "loss": 0.2854, "step": 6900 }, { "epoch": 0.32, "grad_norm": 0.5817809193081751, "learning_rate": 4.7762201964611325e-06, "loss": 0.2801, "step": 6901 }, { "epoch": 0.32, "grad_norm": 0.6603072347985666, "learning_rate": 4.77614176287697e-06, "loss": 0.2897, "step": 6902 }, { "epoch": 0.32, "grad_norm": 0.6584245181106828, "learning_rate": 4.776063316194183e-06, "loss": 0.3105, "step": 6903 }, { "epoch": 0.32, "grad_norm": 0.6561539665677425, "learning_rate": 4.775984856413225e-06, "loss": 0.3229, "step": 6904 }, { "epoch": 0.32, "grad_norm": 0.6321164135619515, "learning_rate": 4.775906383534545e-06, "loss": 0.3033, "step": 6905 }, { "epoch": 0.32, "grad_norm": 0.6333743763636945, "learning_rate": 4.775827897558597e-06, "loss": 0.2998, "step": 6906 }, { "epoch": 0.32, "grad_norm": 0.6493859072896295, "learning_rate": 4.775749398485831e-06, "loss": 0.298, "step": 6907 }, { "epoch": 0.32, "grad_norm": 0.6814098663175101, "learning_rate": 4.775670886316699e-06, "loss": 0.3083, "step": 6908 }, { "epoch": 0.32, "grad_norm": 0.647873421730969, "learning_rate": 4.775592361051653e-06, "loss": 0.3043, "step": 6909 }, { "epoch": 0.32, "grad_norm": 0.6328999790137633, "learning_rate": 4.775513822691146e-06, "loss": 0.3006, "step": 6910 }, { "epoch": 0.32, "grad_norm": 0.7329129211810387, "learning_rate": 4.775435271235627e-06, "loss": 0.3189, "step": 6911 }, { "epoch": 0.32, "grad_norm": 0.6232436760873628, "learning_rate": 4.775356706685551e-06, "loss": 0.3084, "step": 6912 }, { "epoch": 0.32, "grad_norm": 0.6931477265604515, "learning_rate": 4.7752781290413695e-06, "loss": 0.3075, "step": 6913 }, { "epoch": 0.32, "grad_norm": 0.5966481330824122, "learning_rate": 4.775199538303533e-06, "loss": 0.2948, "step": 6914 }, { "epoch": 0.32, "grad_norm": 0.6705587099620299, "learning_rate": 4.7751209344724955e-06, "loss": 0.3013, "step": 6915 }, { "epoch": 0.32, "grad_norm": 0.6695072284883249, "learning_rate": 4.775042317548709e-06, "loss": 0.2839, "step": 6916 }, { "epoch": 0.32, "grad_norm": 0.6531158081212579, "learning_rate": 4.774963687532626e-06, "loss": 0.2787, "step": 6917 }, { "epoch": 0.32, "grad_norm": 0.7297831235384425, "learning_rate": 4.774885044424698e-06, "loss": 0.3125, "step": 6918 }, { "epoch": 0.32, "grad_norm": 0.627417999704011, "learning_rate": 4.774806388225379e-06, "loss": 0.2893, "step": 6919 }, { "epoch": 0.32, "grad_norm": 0.5874691761166103, "learning_rate": 4.774727718935121e-06, "loss": 0.2888, "step": 6920 }, { "epoch": 0.32, "grad_norm": 0.7275796870045356, "learning_rate": 4.7746490365543776e-06, "loss": 0.3165, "step": 6921 }, { "epoch": 0.32, "grad_norm": 0.6703825573659111, "learning_rate": 4.7745703410835995e-06, "loss": 0.2883, "step": 6922 }, { "epoch": 0.32, "grad_norm": 0.6292320655562654, "learning_rate": 4.77449163252324e-06, "loss": 0.3219, "step": 6923 }, { "epoch": 0.32, "grad_norm": 0.5806085421339808, "learning_rate": 4.774412910873754e-06, "loss": 0.2832, "step": 6924 }, { "epoch": 0.32, "grad_norm": 0.6303914859735954, "learning_rate": 4.7743341761355935e-06, "loss": 0.3043, "step": 6925 }, { "epoch": 0.32, "grad_norm": 0.6279011942444226, "learning_rate": 4.774255428309211e-06, "loss": 0.2827, "step": 6926 }, { "epoch": 0.32, "grad_norm": 0.5705021919480717, "learning_rate": 4.7741766673950605e-06, "loss": 0.2904, "step": 6927 }, { "epoch": 0.32, "grad_norm": 0.6394144880740725, "learning_rate": 4.774097893393595e-06, "loss": 0.2915, "step": 6928 }, { "epoch": 0.32, "grad_norm": 0.6329032688867489, "learning_rate": 4.7740191063052664e-06, "loss": 0.3116, "step": 6929 }, { "epoch": 0.32, "grad_norm": 0.6383253102850106, "learning_rate": 4.773940306130531e-06, "loss": 0.3151, "step": 6930 }, { "epoch": 0.32, "grad_norm": 0.6335809914617321, "learning_rate": 4.77386149286984e-06, "loss": 0.2873, "step": 6931 }, { "epoch": 0.32, "grad_norm": 0.7207473374775368, "learning_rate": 4.773782666523647e-06, "loss": 0.3037, "step": 6932 }, { "epoch": 0.32, "grad_norm": 0.5601801987713836, "learning_rate": 4.773703827092407e-06, "loss": 0.2756, "step": 6933 }, { "epoch": 0.32, "grad_norm": 0.5889195029285713, "learning_rate": 4.7736249745765725e-06, "loss": 0.2804, "step": 6934 }, { "epoch": 0.32, "grad_norm": 0.6103029852845783, "learning_rate": 4.773546108976599e-06, "loss": 0.299, "step": 6935 }, { "epoch": 0.32, "grad_norm": 0.6331213635367405, "learning_rate": 4.773467230292937e-06, "loss": 0.3007, "step": 6936 }, { "epoch": 0.32, "grad_norm": 0.6333781729832997, "learning_rate": 4.773388338526044e-06, "loss": 0.3046, "step": 6937 }, { "epoch": 0.33, "grad_norm": 0.689127575854476, "learning_rate": 4.773309433676372e-06, "loss": 0.3185, "step": 6938 }, { "epoch": 0.33, "grad_norm": 0.6285071409110452, "learning_rate": 4.773230515744376e-06, "loss": 0.2891, "step": 6939 }, { "epoch": 0.33, "grad_norm": 0.5430985266581835, "learning_rate": 4.773151584730509e-06, "loss": 0.2765, "step": 6940 }, { "epoch": 0.33, "grad_norm": 0.6146998309589403, "learning_rate": 4.773072640635226e-06, "loss": 0.3027, "step": 6941 }, { "epoch": 0.33, "grad_norm": 0.6547561497840626, "learning_rate": 4.772993683458982e-06, "loss": 0.3077, "step": 6942 }, { "epoch": 0.33, "grad_norm": 0.6614887298609512, "learning_rate": 4.77291471320223e-06, "loss": 0.3233, "step": 6943 }, { "epoch": 0.33, "grad_norm": 0.6349677893471269, "learning_rate": 4.772835729865426e-06, "loss": 0.3152, "step": 6944 }, { "epoch": 0.33, "grad_norm": 0.6414363714539312, "learning_rate": 4.772756733449023e-06, "loss": 0.2815, "step": 6945 }, { "epoch": 0.33, "grad_norm": 0.6393520009670259, "learning_rate": 4.772677723953476e-06, "loss": 0.2948, "step": 6946 }, { "epoch": 0.33, "grad_norm": 0.6400241212965291, "learning_rate": 4.7725987013792405e-06, "loss": 0.2983, "step": 6947 }, { "epoch": 0.33, "grad_norm": 0.617667135703893, "learning_rate": 4.7725196657267705e-06, "loss": 0.2918, "step": 6948 }, { "epoch": 0.33, "grad_norm": 0.6106337430683656, "learning_rate": 4.77244061699652e-06, "loss": 0.278, "step": 6949 }, { "epoch": 0.33, "grad_norm": 0.6179652106143176, "learning_rate": 4.772361555188947e-06, "loss": 0.3213, "step": 6950 }, { "epoch": 0.33, "grad_norm": 0.628504829009979, "learning_rate": 4.772282480304502e-06, "loss": 0.2942, "step": 6951 }, { "epoch": 0.33, "grad_norm": 0.6094614363109827, "learning_rate": 4.772203392343644e-06, "loss": 0.2898, "step": 6952 }, { "epoch": 0.33, "grad_norm": 0.6165117983329105, "learning_rate": 4.772124291306826e-06, "loss": 0.2935, "step": 6953 }, { "epoch": 0.33, "grad_norm": 0.5977317405396716, "learning_rate": 4.772045177194504e-06, "loss": 0.3003, "step": 6954 }, { "epoch": 0.33, "grad_norm": 0.6206767751193423, "learning_rate": 4.771966050007133e-06, "loss": 0.296, "step": 6955 }, { "epoch": 0.33, "grad_norm": 0.5829626062331945, "learning_rate": 4.771886909745168e-06, "loss": 0.2804, "step": 6956 }, { "epoch": 0.33, "grad_norm": 0.626698729970327, "learning_rate": 4.771807756409066e-06, "loss": 0.2963, "step": 6957 }, { "epoch": 0.33, "grad_norm": 0.6273496339539957, "learning_rate": 4.77172858999928e-06, "loss": 0.2951, "step": 6958 }, { "epoch": 0.33, "grad_norm": 0.5584716502760156, "learning_rate": 4.771649410516268e-06, "loss": 0.2731, "step": 6959 }, { "epoch": 0.33, "grad_norm": 0.5975724842683257, "learning_rate": 4.771570217960484e-06, "loss": 0.2839, "step": 6960 }, { "epoch": 0.33, "grad_norm": 0.646160584812177, "learning_rate": 4.771491012332384e-06, "loss": 0.3042, "step": 6961 }, { "epoch": 0.33, "grad_norm": 0.625503959381413, "learning_rate": 4.771411793632425e-06, "loss": 0.2954, "step": 6962 }, { "epoch": 0.33, "grad_norm": 0.6620967667237448, "learning_rate": 4.771332561861062e-06, "loss": 0.2989, "step": 6963 }, { "epoch": 0.33, "grad_norm": 0.6247014197879976, "learning_rate": 4.77125331701875e-06, "loss": 0.3011, "step": 6964 }, { "epoch": 0.33, "grad_norm": 0.630884954104319, "learning_rate": 4.771174059105947e-06, "loss": 0.3058, "step": 6965 }, { "epoch": 0.33, "grad_norm": 0.6639847541607893, "learning_rate": 4.771094788123108e-06, "loss": 0.3212, "step": 6966 }, { "epoch": 0.33, "grad_norm": 0.5863160705535437, "learning_rate": 4.771015504070689e-06, "loss": 0.2874, "step": 6967 }, { "epoch": 0.33, "grad_norm": 0.6517866284635684, "learning_rate": 4.770936206949147e-06, "loss": 0.3089, "step": 6968 }, { "epoch": 0.33, "grad_norm": 0.5823336246306774, "learning_rate": 4.770856896758937e-06, "loss": 0.2988, "step": 6969 }, { "epoch": 0.33, "grad_norm": 0.631190755788091, "learning_rate": 4.770777573500517e-06, "loss": 0.274, "step": 6970 }, { "epoch": 0.33, "grad_norm": 0.6372886385746989, "learning_rate": 4.7706982371743424e-06, "loss": 0.29, "step": 6971 }, { "epoch": 0.33, "grad_norm": 0.610338425093599, "learning_rate": 4.770618887780871e-06, "loss": 0.2991, "step": 6972 }, { "epoch": 0.33, "grad_norm": 0.570774317194474, "learning_rate": 4.7705395253205575e-06, "loss": 0.2548, "step": 6973 }, { "epoch": 0.33, "grad_norm": 0.6829429987651153, "learning_rate": 4.770460149793861e-06, "loss": 0.3034, "step": 6974 }, { "epoch": 0.33, "grad_norm": 0.6350591559232837, "learning_rate": 4.7703807612012365e-06, "loss": 0.2844, "step": 6975 }, { "epoch": 0.33, "grad_norm": 0.6375107914960665, "learning_rate": 4.770301359543141e-06, "loss": 0.2982, "step": 6976 }, { "epoch": 0.33, "grad_norm": 0.6000690135121659, "learning_rate": 4.770221944820032e-06, "loss": 0.2791, "step": 6977 }, { "epoch": 0.33, "grad_norm": 0.6110313827078676, "learning_rate": 4.770142517032365e-06, "loss": 0.3248, "step": 6978 }, { "epoch": 0.33, "grad_norm": 0.5923127986847405, "learning_rate": 4.770063076180601e-06, "loss": 0.2777, "step": 6979 }, { "epoch": 0.33, "grad_norm": 0.637572786575788, "learning_rate": 4.769983622265193e-06, "loss": 0.2966, "step": 6980 }, { "epoch": 0.33, "grad_norm": 0.5628244087563559, "learning_rate": 4.7699041552866e-06, "loss": 0.2739, "step": 6981 }, { "epoch": 0.33, "grad_norm": 0.6102094880790113, "learning_rate": 4.769824675245279e-06, "loss": 0.2847, "step": 6982 }, { "epoch": 0.33, "grad_norm": 0.6267471792814153, "learning_rate": 4.7697451821416875e-06, "loss": 0.3052, "step": 6983 }, { "epoch": 0.33, "grad_norm": 0.6639817912069355, "learning_rate": 4.769665675976283e-06, "loss": 0.3216, "step": 6984 }, { "epoch": 0.33, "grad_norm": 0.631743765829883, "learning_rate": 4.769586156749523e-06, "loss": 0.2957, "step": 6985 }, { "epoch": 0.33, "grad_norm": 0.5802457443106747, "learning_rate": 4.769506624461865e-06, "loss": 0.2862, "step": 6986 }, { "epoch": 0.33, "grad_norm": 0.6292770534141418, "learning_rate": 4.769427079113767e-06, "loss": 0.2942, "step": 6987 }, { "epoch": 0.33, "grad_norm": 0.6228761372200878, "learning_rate": 4.769347520705687e-06, "loss": 0.3051, "step": 6988 }, { "epoch": 0.33, "grad_norm": 0.6858461165556816, "learning_rate": 4.769267949238081e-06, "loss": 0.3061, "step": 6989 }, { "epoch": 0.33, "grad_norm": 0.64110446593106, "learning_rate": 4.769188364711409e-06, "loss": 0.2947, "step": 6990 }, { "epoch": 0.33, "grad_norm": 0.6521783161525703, "learning_rate": 4.769108767126129e-06, "loss": 0.2964, "step": 6991 }, { "epoch": 0.33, "grad_norm": 0.5711400321563834, "learning_rate": 4.769029156482698e-06, "loss": 0.2909, "step": 6992 }, { "epoch": 0.33, "grad_norm": 0.624535116429599, "learning_rate": 4.768949532781574e-06, "loss": 0.3259, "step": 6993 }, { "epoch": 0.33, "grad_norm": 0.6391456828455228, "learning_rate": 4.768869896023217e-06, "loss": 0.324, "step": 6994 }, { "epoch": 0.33, "grad_norm": 0.7088403673153436, "learning_rate": 4.7687902462080825e-06, "loss": 0.3029, "step": 6995 }, { "epoch": 0.33, "grad_norm": 0.5925343119926854, "learning_rate": 4.768710583336631e-06, "loss": 0.319, "step": 6996 }, { "epoch": 0.33, "grad_norm": 0.5741227441861887, "learning_rate": 4.768630907409321e-06, "loss": 0.2783, "step": 6997 }, { "epoch": 0.33, "grad_norm": 0.6211042834938812, "learning_rate": 4.768551218426609e-06, "loss": 0.3071, "step": 6998 }, { "epoch": 0.33, "grad_norm": 0.5857725115224344, "learning_rate": 4.768471516388955e-06, "loss": 0.2897, "step": 6999 }, { "epoch": 0.33, "grad_norm": 0.6507531252397484, "learning_rate": 4.768391801296819e-06, "loss": 0.295, "step": 7000 }, { "epoch": 0.33, "grad_norm": 0.6486294894327876, "learning_rate": 4.768312073150657e-06, "loss": 0.3161, "step": 7001 }, { "epoch": 0.33, "grad_norm": 0.5655086992257424, "learning_rate": 4.768232331950929e-06, "loss": 0.2799, "step": 7002 }, { "epoch": 0.33, "grad_norm": 0.6138823370519436, "learning_rate": 4.7681525776980955e-06, "loss": 0.2979, "step": 7003 }, { "epoch": 0.33, "grad_norm": 0.6248059422985991, "learning_rate": 4.768072810392613e-06, "loss": 0.2997, "step": 7004 }, { "epoch": 0.33, "grad_norm": 0.5929748459874332, "learning_rate": 4.767993030034941e-06, "loss": 0.3053, "step": 7005 }, { "epoch": 0.33, "grad_norm": 0.6144842918219847, "learning_rate": 4.76791323662554e-06, "loss": 0.2916, "step": 7006 }, { "epoch": 0.33, "grad_norm": 0.5730485678665298, "learning_rate": 4.767833430164868e-06, "loss": 0.2793, "step": 7007 }, { "epoch": 0.33, "grad_norm": 0.6006790526520538, "learning_rate": 4.767753610653385e-06, "loss": 0.2979, "step": 7008 }, { "epoch": 0.33, "grad_norm": 0.5871091306347785, "learning_rate": 4.7676737780915495e-06, "loss": 0.2639, "step": 7009 }, { "epoch": 0.33, "grad_norm": 0.6289315774664356, "learning_rate": 4.767593932479822e-06, "loss": 0.2678, "step": 7010 }, { "epoch": 0.33, "grad_norm": 0.5911125269627426, "learning_rate": 4.76751407381866e-06, "loss": 0.2872, "step": 7011 }, { "epoch": 0.33, "grad_norm": 0.5454318481432804, "learning_rate": 4.767434202108527e-06, "loss": 0.2828, "step": 7012 }, { "epoch": 0.33, "grad_norm": 0.6251924110309678, "learning_rate": 4.767354317349877e-06, "loss": 0.2927, "step": 7013 }, { "epoch": 0.33, "grad_norm": 0.6185054906863586, "learning_rate": 4.767274419543174e-06, "loss": 0.2877, "step": 7014 }, { "epoch": 0.33, "grad_norm": 0.6310532806394057, "learning_rate": 4.767194508688877e-06, "loss": 0.308, "step": 7015 }, { "epoch": 0.33, "grad_norm": 0.5983710625465795, "learning_rate": 4.767114584787446e-06, "loss": 0.2961, "step": 7016 }, { "epoch": 0.33, "grad_norm": 0.5979639018269131, "learning_rate": 4.767034647839339e-06, "loss": 0.299, "step": 7017 }, { "epoch": 0.33, "grad_norm": 0.642577902080583, "learning_rate": 4.766954697845018e-06, "loss": 0.2896, "step": 7018 }, { "epoch": 0.33, "grad_norm": 0.587816445646305, "learning_rate": 4.766874734804942e-06, "loss": 0.3052, "step": 7019 }, { "epoch": 0.33, "grad_norm": 0.578909654006269, "learning_rate": 4.766794758719572e-06, "loss": 0.277, "step": 7020 }, { "epoch": 0.33, "grad_norm": 0.5638104704251016, "learning_rate": 4.766714769589368e-06, "loss": 0.2835, "step": 7021 }, { "epoch": 0.33, "grad_norm": 0.6432167448510111, "learning_rate": 4.76663476741479e-06, "loss": 0.291, "step": 7022 }, { "epoch": 0.33, "grad_norm": 0.6129462367405053, "learning_rate": 4.7665547521962995e-06, "loss": 0.3091, "step": 7023 }, { "epoch": 0.33, "grad_norm": 0.5744364677373599, "learning_rate": 4.766474723934356e-06, "loss": 0.28, "step": 7024 }, { "epoch": 0.33, "grad_norm": 0.6194023652930074, "learning_rate": 4.766394682629419e-06, "loss": 0.2869, "step": 7025 }, { "epoch": 0.33, "grad_norm": 0.6599919618100214, "learning_rate": 4.766314628281951e-06, "loss": 0.3135, "step": 7026 }, { "epoch": 0.33, "grad_norm": 0.7109787413686554, "learning_rate": 4.766234560892411e-06, "loss": 0.3167, "step": 7027 }, { "epoch": 0.33, "grad_norm": 0.6156930881914601, "learning_rate": 4.766154480461261e-06, "loss": 0.2838, "step": 7028 }, { "epoch": 0.33, "grad_norm": 0.6341648880815232, "learning_rate": 4.766074386988963e-06, "loss": 0.311, "step": 7029 }, { "epoch": 0.33, "grad_norm": 0.5839632671065301, "learning_rate": 4.765994280475975e-06, "loss": 0.2805, "step": 7030 }, { "epoch": 0.33, "grad_norm": 0.5748894857111683, "learning_rate": 4.76591416092276e-06, "loss": 0.2998, "step": 7031 }, { "epoch": 0.33, "grad_norm": 0.6109018363901091, "learning_rate": 4.765834028329778e-06, "loss": 0.2777, "step": 7032 }, { "epoch": 0.33, "grad_norm": 0.7030136764840285, "learning_rate": 4.765753882697491e-06, "loss": 0.2975, "step": 7033 }, { "epoch": 0.33, "grad_norm": 0.6016574760807887, "learning_rate": 4.7656737240263604e-06, "loss": 0.2906, "step": 7034 }, { "epoch": 0.33, "grad_norm": 0.5727721953281582, "learning_rate": 4.765593552316846e-06, "loss": 0.2931, "step": 7035 }, { "epoch": 0.33, "grad_norm": 0.6503213741479242, "learning_rate": 4.765513367569411e-06, "loss": 0.2901, "step": 7036 }, { "epoch": 0.33, "grad_norm": 0.6563638381041748, "learning_rate": 4.765433169784516e-06, "loss": 0.2963, "step": 7037 }, { "epoch": 0.33, "grad_norm": 0.6142654420285537, "learning_rate": 4.7653529589626216e-06, "loss": 0.273, "step": 7038 }, { "epoch": 0.33, "grad_norm": 0.6426377536804755, "learning_rate": 4.765272735104191e-06, "loss": 0.2791, "step": 7039 }, { "epoch": 0.33, "grad_norm": 0.6158822635235748, "learning_rate": 4.765192498209685e-06, "loss": 0.2846, "step": 7040 }, { "epoch": 0.33, "grad_norm": 0.6211366358409897, "learning_rate": 4.765112248279566e-06, "loss": 0.286, "step": 7041 }, { "epoch": 0.33, "grad_norm": 0.5906989032423492, "learning_rate": 4.7650319853142945e-06, "loss": 0.2931, "step": 7042 }, { "epoch": 0.33, "grad_norm": 0.6158503703020657, "learning_rate": 4.764951709314333e-06, "loss": 0.2974, "step": 7043 }, { "epoch": 0.33, "grad_norm": 0.55617524643091, "learning_rate": 4.764871420280145e-06, "loss": 0.2808, "step": 7044 }, { "epoch": 0.33, "grad_norm": 0.5861922644095505, "learning_rate": 4.76479111821219e-06, "loss": 0.2835, "step": 7045 }, { "epoch": 0.33, "grad_norm": 0.6668030924333569, "learning_rate": 4.7647108031109315e-06, "loss": 0.3008, "step": 7046 }, { "epoch": 0.33, "grad_norm": 0.6188127217332317, "learning_rate": 4.764630474976833e-06, "loss": 0.2805, "step": 7047 }, { "epoch": 0.33, "grad_norm": 0.6243068378833373, "learning_rate": 4.764550133810353e-06, "loss": 0.2945, "step": 7048 }, { "epoch": 0.33, "grad_norm": 0.6343223404640562, "learning_rate": 4.764469779611958e-06, "loss": 0.3059, "step": 7049 }, { "epoch": 0.33, "grad_norm": 0.6018067375605, "learning_rate": 4.764389412382107e-06, "loss": 0.2851, "step": 7050 }, { "epoch": 0.33, "grad_norm": 0.5614419054979537, "learning_rate": 4.7643090321212655e-06, "loss": 0.2743, "step": 7051 }, { "epoch": 0.33, "grad_norm": 0.5832417097764869, "learning_rate": 4.764228638829894e-06, "loss": 0.2984, "step": 7052 }, { "epoch": 0.33, "grad_norm": 0.6122644028428802, "learning_rate": 4.7641482325084556e-06, "loss": 0.3008, "step": 7053 }, { "epoch": 0.33, "grad_norm": 0.5858916995376349, "learning_rate": 4.764067813157413e-06, "loss": 0.3002, "step": 7054 }, { "epoch": 0.33, "grad_norm": 0.61990618770254, "learning_rate": 4.76398738077723e-06, "loss": 0.2848, "step": 7055 }, { "epoch": 0.33, "grad_norm": 0.6697728170249563, "learning_rate": 4.763906935368368e-06, "loss": 0.3159, "step": 7056 }, { "epoch": 0.33, "grad_norm": 0.6018178530278753, "learning_rate": 4.7638264769312915e-06, "loss": 0.2968, "step": 7057 }, { "epoch": 0.33, "grad_norm": 0.6262265844622066, "learning_rate": 4.763746005466462e-06, "loss": 0.2981, "step": 7058 }, { "epoch": 0.33, "grad_norm": 0.6111233825369398, "learning_rate": 4.763665520974343e-06, "loss": 0.2703, "step": 7059 }, { "epoch": 0.33, "grad_norm": 0.6579616224506106, "learning_rate": 4.763585023455398e-06, "loss": 0.2874, "step": 7060 }, { "epoch": 0.33, "grad_norm": 0.662005296592419, "learning_rate": 4.763504512910091e-06, "loss": 0.3076, "step": 7061 }, { "epoch": 0.33, "grad_norm": 0.6322256836776048, "learning_rate": 4.763423989338883e-06, "loss": 0.3143, "step": 7062 }, { "epoch": 0.33, "grad_norm": 0.5362249080614689, "learning_rate": 4.763343452742239e-06, "loss": 0.2672, "step": 7063 }, { "epoch": 0.33, "grad_norm": 0.6448306375904549, "learning_rate": 4.763262903120624e-06, "loss": 0.3125, "step": 7064 }, { "epoch": 0.33, "grad_norm": 0.5758701164409384, "learning_rate": 4.763182340474498e-06, "loss": 0.2769, "step": 7065 }, { "epoch": 0.33, "grad_norm": 0.5672727706618069, "learning_rate": 4.7631017648043275e-06, "loss": 0.299, "step": 7066 }, { "epoch": 0.33, "grad_norm": 0.6067629765679432, "learning_rate": 4.763021176110575e-06, "loss": 0.2867, "step": 7067 }, { "epoch": 0.33, "grad_norm": 0.6161864975384377, "learning_rate": 4.762940574393703e-06, "loss": 0.2978, "step": 7068 }, { "epoch": 0.33, "grad_norm": 0.610901313389123, "learning_rate": 4.762859959654179e-06, "loss": 0.2689, "step": 7069 }, { "epoch": 0.33, "grad_norm": 0.7553162863863713, "learning_rate": 4.762779331892463e-06, "loss": 0.3071, "step": 7070 }, { "epoch": 0.33, "grad_norm": 0.5885878935552987, "learning_rate": 4.762698691109021e-06, "loss": 0.2817, "step": 7071 }, { "epoch": 0.33, "grad_norm": 0.6078252916828982, "learning_rate": 4.762618037304317e-06, "loss": 0.2979, "step": 7072 }, { "epoch": 0.33, "grad_norm": 0.6153763724860114, "learning_rate": 4.762537370478815e-06, "loss": 0.3025, "step": 7073 }, { "epoch": 0.33, "grad_norm": 0.6238580014374452, "learning_rate": 4.762456690632979e-06, "loss": 0.3015, "step": 7074 }, { "epoch": 0.33, "grad_norm": 0.687016877673287, "learning_rate": 4.762375997767273e-06, "loss": 0.308, "step": 7075 }, { "epoch": 0.33, "grad_norm": 0.6803545451005307, "learning_rate": 4.762295291882163e-06, "loss": 0.2978, "step": 7076 }, { "epoch": 0.33, "grad_norm": 0.6476542692974647, "learning_rate": 4.762214572978111e-06, "loss": 0.2823, "step": 7077 }, { "epoch": 0.33, "grad_norm": 0.6502421929911508, "learning_rate": 4.762133841055583e-06, "loss": 0.3002, "step": 7078 }, { "epoch": 0.33, "grad_norm": 0.6795767773985185, "learning_rate": 4.762053096115044e-06, "loss": 0.3252, "step": 7079 }, { "epoch": 0.33, "grad_norm": 0.6711201217165709, "learning_rate": 4.7619723381569575e-06, "loss": 0.3054, "step": 7080 }, { "epoch": 0.33, "grad_norm": 0.6849784779701654, "learning_rate": 4.761891567181788e-06, "loss": 0.318, "step": 7081 }, { "epoch": 0.33, "grad_norm": 0.6644916818637296, "learning_rate": 4.761810783190002e-06, "loss": 0.3126, "step": 7082 }, { "epoch": 0.33, "grad_norm": 0.6290289381771458, "learning_rate": 4.761729986182063e-06, "loss": 0.2805, "step": 7083 }, { "epoch": 0.33, "grad_norm": 0.6780456774596239, "learning_rate": 4.761649176158436e-06, "loss": 0.3273, "step": 7084 }, { "epoch": 0.33, "grad_norm": 0.5840186386416225, "learning_rate": 4.761568353119587e-06, "loss": 0.2822, "step": 7085 }, { "epoch": 0.33, "grad_norm": 0.5863102408046682, "learning_rate": 4.76148751706598e-06, "loss": 0.2846, "step": 7086 }, { "epoch": 0.33, "grad_norm": 0.627777346174055, "learning_rate": 4.761406667998082e-06, "loss": 0.304, "step": 7087 }, { "epoch": 0.33, "grad_norm": 0.60491242365142, "learning_rate": 4.761325805916356e-06, "loss": 0.2707, "step": 7088 }, { "epoch": 0.33, "grad_norm": 0.5840242808683851, "learning_rate": 4.761244930821268e-06, "loss": 0.2873, "step": 7089 }, { "epoch": 0.33, "grad_norm": 0.6546114673676194, "learning_rate": 4.761164042713284e-06, "loss": 0.3264, "step": 7090 }, { "epoch": 0.33, "grad_norm": 0.599051051001699, "learning_rate": 4.76108314159287e-06, "loss": 0.3099, "step": 7091 }, { "epoch": 0.33, "grad_norm": 0.5886445748978977, "learning_rate": 4.761002227460491e-06, "loss": 0.2875, "step": 7092 }, { "epoch": 0.33, "grad_norm": 0.6305027965813983, "learning_rate": 4.760921300316611e-06, "loss": 0.2901, "step": 7093 }, { "epoch": 0.33, "grad_norm": 0.6285455947455879, "learning_rate": 4.760840360161698e-06, "loss": 0.2763, "step": 7094 }, { "epoch": 0.33, "grad_norm": 0.6129587651114137, "learning_rate": 4.760759406996217e-06, "loss": 0.2788, "step": 7095 }, { "epoch": 0.33, "grad_norm": 0.5584969309480788, "learning_rate": 4.760678440820634e-06, "loss": 0.2725, "step": 7096 }, { "epoch": 0.33, "grad_norm": 0.5800539720831643, "learning_rate": 4.760597461635414e-06, "loss": 0.2644, "step": 7097 }, { "epoch": 0.33, "grad_norm": 0.6237134845370319, "learning_rate": 4.760516469441025e-06, "loss": 0.2868, "step": 7098 }, { "epoch": 0.33, "grad_norm": 0.6773897850985493, "learning_rate": 4.760435464237932e-06, "loss": 0.2907, "step": 7099 }, { "epoch": 0.33, "grad_norm": 0.5868304635383637, "learning_rate": 4.7603544460266e-06, "loss": 0.2873, "step": 7100 }, { "epoch": 0.33, "grad_norm": 0.6124479613264049, "learning_rate": 4.7602734148074955e-06, "loss": 0.2814, "step": 7101 }, { "epoch": 0.33, "grad_norm": 0.5999626190576688, "learning_rate": 4.760192370581087e-06, "loss": 0.3066, "step": 7102 }, { "epoch": 0.33, "grad_norm": 1.1239927609798785, "learning_rate": 4.760111313347839e-06, "loss": 0.2916, "step": 7103 }, { "epoch": 0.33, "grad_norm": 0.6014176527776756, "learning_rate": 4.760030243108219e-06, "loss": 0.2865, "step": 7104 }, { "epoch": 0.33, "grad_norm": 0.6285056515279009, "learning_rate": 4.759949159862693e-06, "loss": 0.3044, "step": 7105 }, { "epoch": 0.33, "grad_norm": 0.6201244189609648, "learning_rate": 4.759868063611727e-06, "loss": 0.2648, "step": 7106 }, { "epoch": 0.33, "grad_norm": 0.6864507874251349, "learning_rate": 4.759786954355788e-06, "loss": 0.3174, "step": 7107 }, { "epoch": 0.33, "grad_norm": 0.5998908108846531, "learning_rate": 4.759705832095344e-06, "loss": 0.2889, "step": 7108 }, { "epoch": 0.33, "grad_norm": 0.6296268639018545, "learning_rate": 4.7596246968308605e-06, "loss": 0.2873, "step": 7109 }, { "epoch": 0.33, "grad_norm": 0.6853723651532513, "learning_rate": 4.759543548562805e-06, "loss": 0.3075, "step": 7110 }, { "epoch": 0.33, "grad_norm": 0.649138451319389, "learning_rate": 4.7594623872916436e-06, "loss": 0.2996, "step": 7111 }, { "epoch": 0.33, "grad_norm": 0.595214217727851, "learning_rate": 4.7593812130178445e-06, "loss": 0.2857, "step": 7112 }, { "epoch": 0.33, "grad_norm": 0.6815890342315153, "learning_rate": 4.7593000257418745e-06, "loss": 0.3168, "step": 7113 }, { "epoch": 0.33, "grad_norm": 0.6441090180352892, "learning_rate": 4.7592188254642e-06, "loss": 0.2875, "step": 7114 }, { "epoch": 0.33, "grad_norm": 0.6478353901669629, "learning_rate": 4.75913761218529e-06, "loss": 0.2762, "step": 7115 }, { "epoch": 0.33, "grad_norm": 0.5794174267287522, "learning_rate": 4.759056385905611e-06, "loss": 0.2807, "step": 7116 }, { "epoch": 0.33, "grad_norm": 0.6422204047817497, "learning_rate": 4.7589751466256295e-06, "loss": 0.2963, "step": 7117 }, { "epoch": 0.33, "grad_norm": 0.5869346405260522, "learning_rate": 4.758893894345814e-06, "loss": 0.298, "step": 7118 }, { "epoch": 0.33, "grad_norm": 0.6520441119038152, "learning_rate": 4.758812629066631e-06, "loss": 0.3044, "step": 7119 }, { "epoch": 0.33, "grad_norm": 0.5590673698603426, "learning_rate": 4.758731350788551e-06, "loss": 0.281, "step": 7120 }, { "epoch": 0.33, "grad_norm": 0.6592953679144379, "learning_rate": 4.758650059512038e-06, "loss": 0.2989, "step": 7121 }, { "epoch": 0.33, "grad_norm": 0.6057467424398495, "learning_rate": 4.758568755237562e-06, "loss": 0.3233, "step": 7122 }, { "epoch": 0.33, "grad_norm": 0.5733130789744583, "learning_rate": 4.7584874379655925e-06, "loss": 0.287, "step": 7123 }, { "epoch": 0.33, "grad_norm": 0.6376742635815434, "learning_rate": 4.7584061076965926e-06, "loss": 0.3147, "step": 7124 }, { "epoch": 0.33, "grad_norm": 0.5898143422396902, "learning_rate": 4.758324764431035e-06, "loss": 0.2991, "step": 7125 }, { "epoch": 0.33, "grad_norm": 0.6240396045206196, "learning_rate": 4.758243408169385e-06, "loss": 0.2913, "step": 7126 }, { "epoch": 0.33, "grad_norm": 0.5672514428013556, "learning_rate": 4.7581620389121115e-06, "loss": 0.269, "step": 7127 }, { "epoch": 0.33, "grad_norm": 0.5991488735740395, "learning_rate": 4.758080656659684e-06, "loss": 0.3017, "step": 7128 }, { "epoch": 0.33, "grad_norm": 0.6123608971168918, "learning_rate": 4.75799926141257e-06, "loss": 0.2883, "step": 7129 }, { "epoch": 0.33, "grad_norm": 0.5919607754715226, "learning_rate": 4.757917853171237e-06, "loss": 0.2911, "step": 7130 }, { "epoch": 0.33, "grad_norm": 0.6535304728547953, "learning_rate": 4.7578364319361545e-06, "loss": 0.306, "step": 7131 }, { "epoch": 0.33, "grad_norm": 0.5999261838463317, "learning_rate": 4.757754997707791e-06, "loss": 0.2855, "step": 7132 }, { "epoch": 0.33, "grad_norm": 0.6513761886079614, "learning_rate": 4.757673550486615e-06, "loss": 0.2963, "step": 7133 }, { "epoch": 0.33, "grad_norm": 0.5968129367525258, "learning_rate": 4.757592090273095e-06, "loss": 0.2948, "step": 7134 }, { "epoch": 0.33, "grad_norm": 0.6143774367846461, "learning_rate": 4.7575106170677e-06, "loss": 0.2822, "step": 7135 }, { "epoch": 0.33, "grad_norm": 0.6183053844600567, "learning_rate": 4.757429130870899e-06, "loss": 0.2718, "step": 7136 }, { "epoch": 0.33, "grad_norm": 0.6116022424362277, "learning_rate": 4.75734763168316e-06, "loss": 0.2825, "step": 7137 }, { "epoch": 0.33, "grad_norm": 0.6698052039229963, "learning_rate": 4.757266119504953e-06, "loss": 0.2974, "step": 7138 }, { "epoch": 0.33, "grad_norm": 0.6351806501995143, "learning_rate": 4.757184594336747e-06, "loss": 0.2871, "step": 7139 }, { "epoch": 0.33, "grad_norm": 0.6544855267118413, "learning_rate": 4.757103056179012e-06, "loss": 0.292, "step": 7140 }, { "epoch": 0.33, "grad_norm": 0.6039144638427417, "learning_rate": 4.757021505032214e-06, "loss": 0.2968, "step": 7141 }, { "epoch": 0.33, "grad_norm": 0.6973730468712719, "learning_rate": 4.756939940896826e-06, "loss": 0.3176, "step": 7142 }, { "epoch": 0.33, "grad_norm": 0.6379533935627639, "learning_rate": 4.7568583637733165e-06, "loss": 0.3126, "step": 7143 }, { "epoch": 0.33, "grad_norm": 0.6184769182239683, "learning_rate": 4.756776773662153e-06, "loss": 0.2837, "step": 7144 }, { "epoch": 0.33, "grad_norm": 0.6718833529217535, "learning_rate": 4.756695170563807e-06, "loss": 0.3178, "step": 7145 }, { "epoch": 0.33, "grad_norm": 0.6291734176122026, "learning_rate": 4.756613554478747e-06, "loss": 0.2995, "step": 7146 }, { "epoch": 0.33, "grad_norm": 0.6618080495411175, "learning_rate": 4.756531925407444e-06, "loss": 0.3046, "step": 7147 }, { "epoch": 0.33, "grad_norm": 0.6815547370501628, "learning_rate": 4.756450283350367e-06, "loss": 0.2908, "step": 7148 }, { "epoch": 0.33, "grad_norm": 0.612940795444423, "learning_rate": 4.756368628307985e-06, "loss": 0.282, "step": 7149 }, { "epoch": 0.33, "grad_norm": 0.5574265508507142, "learning_rate": 4.756286960280768e-06, "loss": 0.2563, "step": 7150 }, { "epoch": 0.33, "grad_norm": 0.6149979258408274, "learning_rate": 4.756205279269188e-06, "loss": 0.2849, "step": 7151 }, { "epoch": 0.34, "grad_norm": 0.6664096281497885, "learning_rate": 4.756123585273714e-06, "loss": 0.3201, "step": 7152 }, { "epoch": 0.34, "grad_norm": 0.6928019915590308, "learning_rate": 4.756041878294814e-06, "loss": 0.3321, "step": 7153 }, { "epoch": 0.34, "grad_norm": 0.6493029801826877, "learning_rate": 4.755960158332961e-06, "loss": 0.2884, "step": 7154 }, { "epoch": 0.34, "grad_norm": 0.6495034204744425, "learning_rate": 4.755878425388625e-06, "loss": 0.3054, "step": 7155 }, { "epoch": 0.34, "grad_norm": 0.642903029429679, "learning_rate": 4.755796679462275e-06, "loss": 0.3101, "step": 7156 }, { "epoch": 0.34, "grad_norm": 0.6640352843556018, "learning_rate": 4.755714920554382e-06, "loss": 0.3151, "step": 7157 }, { "epoch": 0.34, "grad_norm": 0.7013541090844082, "learning_rate": 4.7556331486654174e-06, "loss": 0.3063, "step": 7158 }, { "epoch": 0.34, "grad_norm": 0.57649035797841, "learning_rate": 4.75555136379585e-06, "loss": 0.2551, "step": 7159 }, { "epoch": 0.34, "grad_norm": 0.6701092215947037, "learning_rate": 4.755469565946151e-06, "loss": 0.3034, "step": 7160 }, { "epoch": 0.34, "grad_norm": 0.6362213242018914, "learning_rate": 4.755387755116792e-06, "loss": 0.2897, "step": 7161 }, { "epoch": 0.34, "grad_norm": 0.6003342195316453, "learning_rate": 4.755305931308244e-06, "loss": 0.2966, "step": 7162 }, { "epoch": 0.34, "grad_norm": 0.6511564571837568, "learning_rate": 4.755224094520977e-06, "loss": 0.2888, "step": 7163 }, { "epoch": 0.34, "grad_norm": 0.6844268079345391, "learning_rate": 4.755142244755462e-06, "loss": 0.2881, "step": 7164 }, { "epoch": 0.34, "grad_norm": 0.6242163341723366, "learning_rate": 4.75506038201217e-06, "loss": 0.2617, "step": 7165 }, { "epoch": 0.34, "grad_norm": 0.5803786079067867, "learning_rate": 4.754978506291572e-06, "loss": 0.2828, "step": 7166 }, { "epoch": 0.34, "grad_norm": 0.6499127725502019, "learning_rate": 4.75489661759414e-06, "loss": 0.3223, "step": 7167 }, { "epoch": 0.34, "grad_norm": 0.5704410708115618, "learning_rate": 4.754814715920345e-06, "loss": 0.2731, "step": 7168 }, { "epoch": 0.34, "grad_norm": 0.6215680515310124, "learning_rate": 4.754732801270658e-06, "loss": 0.2836, "step": 7169 }, { "epoch": 0.34, "grad_norm": 0.7028119085972645, "learning_rate": 4.75465087364555e-06, "loss": 0.2949, "step": 7170 }, { "epoch": 0.34, "grad_norm": 0.598436104604755, "learning_rate": 4.754568933045493e-06, "loss": 0.2773, "step": 7171 }, { "epoch": 0.34, "grad_norm": 0.6212215532451201, "learning_rate": 4.754486979470958e-06, "loss": 0.2933, "step": 7172 }, { "epoch": 0.34, "grad_norm": 0.6066040855129653, "learning_rate": 4.754405012922418e-06, "loss": 0.2595, "step": 7173 }, { "epoch": 0.34, "grad_norm": 0.6520723340971312, "learning_rate": 4.754323033400343e-06, "loss": 0.2921, "step": 7174 }, { "epoch": 0.34, "grad_norm": 0.9436646207160776, "learning_rate": 4.754241040905206e-06, "loss": 0.2701, "step": 7175 }, { "epoch": 0.34, "grad_norm": 0.6724493974287007, "learning_rate": 4.754159035437478e-06, "loss": 0.314, "step": 7176 }, { "epoch": 0.34, "grad_norm": 0.6925249721582137, "learning_rate": 4.754077016997632e-06, "loss": 0.3259, "step": 7177 }, { "epoch": 0.34, "grad_norm": 0.5956027048879808, "learning_rate": 4.753994985586139e-06, "loss": 0.2848, "step": 7178 }, { "epoch": 0.34, "grad_norm": 0.6144552683655146, "learning_rate": 4.7539129412034715e-06, "loss": 0.2868, "step": 7179 }, { "epoch": 0.34, "grad_norm": 0.6063352447249405, "learning_rate": 4.7538308838501005e-06, "loss": 0.2947, "step": 7180 }, { "epoch": 0.34, "grad_norm": 0.6282360180999127, "learning_rate": 4.7537488135265e-06, "loss": 0.2951, "step": 7181 }, { "epoch": 0.34, "grad_norm": 0.6556366530914399, "learning_rate": 4.753666730233142e-06, "loss": 0.3088, "step": 7182 }, { "epoch": 0.34, "grad_norm": 0.7000066455994356, "learning_rate": 4.7535846339704975e-06, "loss": 0.2951, "step": 7183 }, { "epoch": 0.34, "grad_norm": 0.6661372763780156, "learning_rate": 4.753502524739041e-06, "loss": 0.3025, "step": 7184 }, { "epoch": 0.34, "grad_norm": 0.6206338570694383, "learning_rate": 4.7534204025392425e-06, "loss": 0.29, "step": 7185 }, { "epoch": 0.34, "grad_norm": 0.6519517848126124, "learning_rate": 4.753338267371576e-06, "loss": 0.2731, "step": 7186 }, { "epoch": 0.34, "grad_norm": 0.6114125280146399, "learning_rate": 4.753256119236516e-06, "loss": 0.2821, "step": 7187 }, { "epoch": 0.34, "grad_norm": 0.6716911641615539, "learning_rate": 4.753173958134531e-06, "loss": 0.2899, "step": 7188 }, { "epoch": 0.34, "grad_norm": 0.6163981951262699, "learning_rate": 4.7530917840660975e-06, "loss": 0.2978, "step": 7189 }, { "epoch": 0.34, "grad_norm": 0.6289637870857777, "learning_rate": 4.753009597031687e-06, "loss": 0.3005, "step": 7190 }, { "epoch": 0.34, "grad_norm": 0.6265897709094497, "learning_rate": 4.752927397031772e-06, "loss": 0.2981, "step": 7191 }, { "epoch": 0.34, "grad_norm": 0.6211587537157097, "learning_rate": 4.7528451840668276e-06, "loss": 0.2921, "step": 7192 }, { "epoch": 0.34, "grad_norm": 0.6835521895294715, "learning_rate": 4.752762958137324e-06, "loss": 0.3025, "step": 7193 }, { "epoch": 0.34, "grad_norm": 0.7018145112595019, "learning_rate": 4.7526807192437366e-06, "loss": 0.3158, "step": 7194 }, { "epoch": 0.34, "grad_norm": 0.6459319459377256, "learning_rate": 4.752598467386536e-06, "loss": 0.2993, "step": 7195 }, { "epoch": 0.34, "grad_norm": 0.6011960559177745, "learning_rate": 4.7525162025662e-06, "loss": 0.3045, "step": 7196 }, { "epoch": 0.34, "grad_norm": 0.6478754058558188, "learning_rate": 4.752433924783197e-06, "loss": 0.2853, "step": 7197 }, { "epoch": 0.34, "grad_norm": 0.5696136515436422, "learning_rate": 4.752351634038005e-06, "loss": 0.2883, "step": 7198 }, { "epoch": 0.34, "grad_norm": 0.6559624220890086, "learning_rate": 4.752269330331094e-06, "loss": 0.2833, "step": 7199 }, { "epoch": 0.34, "grad_norm": 0.6421252809314947, "learning_rate": 4.75218701366294e-06, "loss": 0.3032, "step": 7200 }, { "epoch": 0.34, "grad_norm": 0.5741540694593871, "learning_rate": 4.752104684034015e-06, "loss": 0.292, "step": 7201 }, { "epoch": 0.34, "grad_norm": 0.6151840367300169, "learning_rate": 4.752022341444794e-06, "loss": 0.2933, "step": 7202 }, { "epoch": 0.34, "grad_norm": 0.584992616863506, "learning_rate": 4.751939985895751e-06, "loss": 0.2728, "step": 7203 }, { "epoch": 0.34, "grad_norm": 0.6066708434766975, "learning_rate": 4.751857617387358e-06, "loss": 0.2899, "step": 7204 }, { "epoch": 0.34, "grad_norm": 0.6183984545895418, "learning_rate": 4.75177523592009e-06, "loss": 0.2875, "step": 7205 }, { "epoch": 0.34, "grad_norm": 0.6671255705247428, "learning_rate": 4.7516928414944245e-06, "loss": 0.2999, "step": 7206 }, { "epoch": 0.34, "grad_norm": 0.6417201542744615, "learning_rate": 4.751610434110831e-06, "loss": 0.3127, "step": 7207 }, { "epoch": 0.34, "grad_norm": 0.6157300606505982, "learning_rate": 4.751528013769784e-06, "loss": 0.285, "step": 7208 }, { "epoch": 0.34, "grad_norm": 0.6240382363134814, "learning_rate": 4.751445580471761e-06, "loss": 0.3009, "step": 7209 }, { "epoch": 0.34, "grad_norm": 0.7321035344611591, "learning_rate": 4.751363134217234e-06, "loss": 0.2829, "step": 7210 }, { "epoch": 0.34, "grad_norm": 0.5790923856747446, "learning_rate": 4.751280675006677e-06, "loss": 0.2929, "step": 7211 }, { "epoch": 0.34, "grad_norm": 0.6464711010787253, "learning_rate": 4.751198202840567e-06, "loss": 0.3137, "step": 7212 }, { "epoch": 0.34, "grad_norm": 0.6034490448056324, "learning_rate": 4.751115717719377e-06, "loss": 0.2938, "step": 7213 }, { "epoch": 0.34, "grad_norm": 0.6497595745281062, "learning_rate": 4.751033219643582e-06, "loss": 0.2878, "step": 7214 }, { "epoch": 0.34, "grad_norm": 0.7058118644604822, "learning_rate": 4.750950708613655e-06, "loss": 0.3024, "step": 7215 }, { "epoch": 0.34, "grad_norm": 0.6642341024960456, "learning_rate": 4.750868184630074e-06, "loss": 0.3167, "step": 7216 }, { "epoch": 0.34, "grad_norm": 0.6011815557794722, "learning_rate": 4.750785647693311e-06, "loss": 0.2898, "step": 7217 }, { "epoch": 0.34, "grad_norm": 0.5647072691772882, "learning_rate": 4.750703097803843e-06, "loss": 0.2861, "step": 7218 }, { "epoch": 0.34, "grad_norm": 0.5669805642152219, "learning_rate": 4.750620534962146e-06, "loss": 0.2818, "step": 7219 }, { "epoch": 0.34, "grad_norm": 0.7022284012736645, "learning_rate": 4.7505379591686915e-06, "loss": 0.3009, "step": 7220 }, { "epoch": 0.34, "grad_norm": 0.6724331357004378, "learning_rate": 4.750455370423958e-06, "loss": 0.3055, "step": 7221 }, { "epoch": 0.34, "grad_norm": 0.7630029830306165, "learning_rate": 4.750372768728418e-06, "loss": 0.2773, "step": 7222 }, { "epoch": 0.34, "grad_norm": 0.6803883431745448, "learning_rate": 4.750290154082548e-06, "loss": 0.3175, "step": 7223 }, { "epoch": 0.34, "grad_norm": 0.565525159233524, "learning_rate": 4.750207526486826e-06, "loss": 0.2968, "step": 7224 }, { "epoch": 0.34, "grad_norm": 0.6817550338660445, "learning_rate": 4.750124885941724e-06, "loss": 0.2814, "step": 7225 }, { "epoch": 0.34, "grad_norm": 0.6560683061070527, "learning_rate": 4.750042232447719e-06, "loss": 0.3171, "step": 7226 }, { "epoch": 0.34, "grad_norm": 0.6411275834394, "learning_rate": 4.749959566005286e-06, "loss": 0.2739, "step": 7227 }, { "epoch": 0.34, "grad_norm": 0.6088689138710041, "learning_rate": 4.749876886614901e-06, "loss": 0.2883, "step": 7228 }, { "epoch": 0.34, "grad_norm": 0.620780929599305, "learning_rate": 4.749794194277041e-06, "loss": 0.3023, "step": 7229 }, { "epoch": 0.34, "grad_norm": 0.6275011543426623, "learning_rate": 4.74971148899218e-06, "loss": 0.2919, "step": 7230 }, { "epoch": 0.34, "grad_norm": 0.5980059753818008, "learning_rate": 4.749628770760796e-06, "loss": 0.307, "step": 7231 }, { "epoch": 0.34, "grad_norm": 0.5873871123742035, "learning_rate": 4.7495460395833624e-06, "loss": 0.2808, "step": 7232 }, { "epoch": 0.34, "grad_norm": 0.6010368825560967, "learning_rate": 4.749463295460358e-06, "loss": 0.2785, "step": 7233 }, { "epoch": 0.34, "grad_norm": 0.5994259188840438, "learning_rate": 4.749380538392257e-06, "loss": 0.2742, "step": 7234 }, { "epoch": 0.34, "grad_norm": 0.6563591848237115, "learning_rate": 4.749297768379536e-06, "loss": 0.2857, "step": 7235 }, { "epoch": 0.34, "grad_norm": 0.6227398398514761, "learning_rate": 4.749214985422672e-06, "loss": 0.2999, "step": 7236 }, { "epoch": 0.34, "grad_norm": 0.6587135998837244, "learning_rate": 4.749132189522142e-06, "loss": 0.2932, "step": 7237 }, { "epoch": 0.34, "grad_norm": 0.6205032790420216, "learning_rate": 4.749049380678421e-06, "loss": 0.2857, "step": 7238 }, { "epoch": 0.34, "grad_norm": 0.6430971854151044, "learning_rate": 4.748966558891986e-06, "loss": 0.3186, "step": 7239 }, { "epoch": 0.34, "grad_norm": 1.037721280947973, "learning_rate": 4.748883724163313e-06, "loss": 0.288, "step": 7240 }, { "epoch": 0.34, "grad_norm": 0.6531118764574526, "learning_rate": 4.74880087649288e-06, "loss": 0.3047, "step": 7241 }, { "epoch": 0.34, "grad_norm": 0.5766651167927717, "learning_rate": 4.748718015881163e-06, "loss": 0.2883, "step": 7242 }, { "epoch": 0.34, "grad_norm": 0.6229171022549717, "learning_rate": 4.748635142328639e-06, "loss": 0.2923, "step": 7243 }, { "epoch": 0.34, "grad_norm": 0.6125456606219442, "learning_rate": 4.748552255835786e-06, "loss": 0.2801, "step": 7244 }, { "epoch": 0.34, "grad_norm": 0.6067925197477961, "learning_rate": 4.748469356403079e-06, "loss": 0.2808, "step": 7245 }, { "epoch": 0.34, "grad_norm": 0.6533486958804539, "learning_rate": 4.748386444030996e-06, "loss": 0.2746, "step": 7246 }, { "epoch": 0.34, "grad_norm": 0.6164721217163126, "learning_rate": 4.748303518720014e-06, "loss": 0.2862, "step": 7247 }, { "epoch": 0.34, "grad_norm": 0.6387748752707747, "learning_rate": 4.748220580470611e-06, "loss": 0.3091, "step": 7248 }, { "epoch": 0.34, "grad_norm": 0.6193593572303298, "learning_rate": 4.7481376292832626e-06, "loss": 0.2861, "step": 7249 }, { "epoch": 0.34, "grad_norm": 0.6345327636321084, "learning_rate": 4.748054665158448e-06, "loss": 0.2859, "step": 7250 }, { "epoch": 0.34, "grad_norm": 0.6165447919929902, "learning_rate": 4.747971688096643e-06, "loss": 0.3005, "step": 7251 }, { "epoch": 0.34, "grad_norm": 0.536563125887946, "learning_rate": 4.747888698098326e-06, "loss": 0.2732, "step": 7252 }, { "epoch": 0.34, "grad_norm": 0.6108605044877217, "learning_rate": 4.747805695163976e-06, "loss": 0.3145, "step": 7253 }, { "epoch": 0.34, "grad_norm": 0.6645285284732863, "learning_rate": 4.747722679294068e-06, "loss": 0.3006, "step": 7254 }, { "epoch": 0.34, "grad_norm": 0.6753541037276439, "learning_rate": 4.747639650489081e-06, "loss": 0.3175, "step": 7255 }, { "epoch": 0.34, "grad_norm": 0.5921240621141299, "learning_rate": 4.747556608749493e-06, "loss": 0.2834, "step": 7256 }, { "epoch": 0.34, "grad_norm": 0.6354183641464077, "learning_rate": 4.747473554075782e-06, "loss": 0.3078, "step": 7257 }, { "epoch": 0.34, "grad_norm": 0.5854180347063513, "learning_rate": 4.7473904864684245e-06, "loss": 0.2908, "step": 7258 }, { "epoch": 0.34, "grad_norm": 0.577481352757495, "learning_rate": 4.7473074059279e-06, "loss": 0.2732, "step": 7259 }, { "epoch": 0.34, "grad_norm": 0.6338031435227918, "learning_rate": 4.747224312454687e-06, "loss": 0.2908, "step": 7260 }, { "epoch": 0.34, "grad_norm": 0.6823575777342201, "learning_rate": 4.747141206049261e-06, "loss": 0.2971, "step": 7261 }, { "epoch": 0.34, "grad_norm": 0.6103492480374147, "learning_rate": 4.7470580867121045e-06, "loss": 0.311, "step": 7262 }, { "epoch": 0.34, "grad_norm": 0.6144019581245761, "learning_rate": 4.746974954443692e-06, "loss": 0.3024, "step": 7263 }, { "epoch": 0.34, "grad_norm": 0.6390827220942267, "learning_rate": 4.746891809244504e-06, "loss": 0.2795, "step": 7264 }, { "epoch": 0.34, "grad_norm": 0.5759696294591825, "learning_rate": 4.746808651115018e-06, "loss": 0.2844, "step": 7265 }, { "epoch": 0.34, "grad_norm": 0.6208031736955902, "learning_rate": 4.7467254800557135e-06, "loss": 0.2939, "step": 7266 }, { "epoch": 0.34, "grad_norm": 0.6500562393228494, "learning_rate": 4.746642296067068e-06, "loss": 0.3027, "step": 7267 }, { "epoch": 0.34, "grad_norm": 0.6514534898337022, "learning_rate": 4.746559099149561e-06, "loss": 0.321, "step": 7268 }, { "epoch": 0.34, "grad_norm": 0.6119673334615678, "learning_rate": 4.746475889303671e-06, "loss": 0.2987, "step": 7269 }, { "epoch": 0.34, "grad_norm": 0.6075930204295305, "learning_rate": 4.746392666529876e-06, "loss": 0.2857, "step": 7270 }, { "epoch": 0.34, "grad_norm": 0.6428827554912468, "learning_rate": 4.746309430828657e-06, "loss": 0.3145, "step": 7271 }, { "epoch": 0.34, "grad_norm": 0.6393512348089222, "learning_rate": 4.746226182200492e-06, "loss": 0.3014, "step": 7272 }, { "epoch": 0.34, "grad_norm": 0.6486437558920397, "learning_rate": 4.74614292064586e-06, "loss": 0.3055, "step": 7273 }, { "epoch": 0.34, "grad_norm": 0.6439634639385253, "learning_rate": 4.746059646165239e-06, "loss": 0.2965, "step": 7274 }, { "epoch": 0.34, "grad_norm": 0.6346952117684235, "learning_rate": 4.745976358759109e-06, "loss": 0.2862, "step": 7275 }, { "epoch": 0.34, "grad_norm": 0.6816563489945843, "learning_rate": 4.7458930584279504e-06, "loss": 0.3019, "step": 7276 }, { "epoch": 0.34, "grad_norm": 0.5846872169921644, "learning_rate": 4.745809745172242e-06, "loss": 0.2947, "step": 7277 }, { "epoch": 0.34, "grad_norm": 0.6603056737979823, "learning_rate": 4.745726418992463e-06, "loss": 0.2954, "step": 7278 }, { "epoch": 0.34, "grad_norm": 0.6040385037589162, "learning_rate": 4.7456430798890915e-06, "loss": 0.2729, "step": 7279 }, { "epoch": 0.34, "grad_norm": 0.586916076483442, "learning_rate": 4.74555972786261e-06, "loss": 0.2911, "step": 7280 }, { "epoch": 0.34, "grad_norm": 0.592004202749219, "learning_rate": 4.7454763629134955e-06, "loss": 0.3107, "step": 7281 }, { "epoch": 0.34, "grad_norm": 0.6756336056878958, "learning_rate": 4.7453929850422294e-06, "loss": 0.2947, "step": 7282 }, { "epoch": 0.34, "grad_norm": 0.6452581113801746, "learning_rate": 4.745309594249292e-06, "loss": 0.3091, "step": 7283 }, { "epoch": 0.34, "grad_norm": 0.6546642368696872, "learning_rate": 4.745226190535162e-06, "loss": 0.3096, "step": 7284 }, { "epoch": 0.34, "grad_norm": 0.5952705508463214, "learning_rate": 4.745142773900319e-06, "loss": 0.2891, "step": 7285 }, { "epoch": 0.34, "grad_norm": 0.6115257389775568, "learning_rate": 4.7450593443452434e-06, "loss": 0.2931, "step": 7286 }, { "epoch": 0.34, "grad_norm": 0.6992856888908054, "learning_rate": 4.744975901870415e-06, "loss": 0.3039, "step": 7287 }, { "epoch": 0.34, "grad_norm": 0.5989050792247038, "learning_rate": 4.7448924464763154e-06, "loss": 0.2656, "step": 7288 }, { "epoch": 0.34, "grad_norm": 0.6326903950967547, "learning_rate": 4.744808978163424e-06, "loss": 0.2817, "step": 7289 }, { "epoch": 0.34, "grad_norm": 0.6395124101818844, "learning_rate": 4.7447254969322206e-06, "loss": 0.3105, "step": 7290 }, { "epoch": 0.34, "grad_norm": 0.5964310899220346, "learning_rate": 4.744642002783188e-06, "loss": 0.2925, "step": 7291 }, { "epoch": 0.34, "grad_norm": 0.582901456469109, "learning_rate": 4.744558495716803e-06, "loss": 0.298, "step": 7292 }, { "epoch": 0.34, "grad_norm": 0.6302688841412526, "learning_rate": 4.744474975733548e-06, "loss": 0.2901, "step": 7293 }, { "epoch": 0.34, "grad_norm": 0.5744716183567613, "learning_rate": 4.744391442833905e-06, "loss": 0.2735, "step": 7294 }, { "epoch": 0.34, "grad_norm": 0.6772165128398817, "learning_rate": 4.744307897018352e-06, "loss": 0.2811, "step": 7295 }, { "epoch": 0.34, "grad_norm": 0.6964087099344451, "learning_rate": 4.7442243382873706e-06, "loss": 0.3188, "step": 7296 }, { "epoch": 0.34, "grad_norm": 0.605181789833805, "learning_rate": 4.7441407666414435e-06, "loss": 0.2793, "step": 7297 }, { "epoch": 0.34, "grad_norm": 0.6018178009101748, "learning_rate": 4.74405718208105e-06, "loss": 0.2911, "step": 7298 }, { "epoch": 0.34, "grad_norm": 0.6659122227295353, "learning_rate": 4.743973584606672e-06, "loss": 0.2895, "step": 7299 }, { "epoch": 0.34, "grad_norm": 0.6564832433080829, "learning_rate": 4.743889974218789e-06, "loss": 0.2902, "step": 7300 }, { "epoch": 0.34, "grad_norm": 0.5941989772755056, "learning_rate": 4.743806350917884e-06, "loss": 0.309, "step": 7301 }, { "epoch": 0.34, "grad_norm": 0.6058243476313172, "learning_rate": 4.743722714704437e-06, "loss": 0.2879, "step": 7302 }, { "epoch": 0.34, "grad_norm": 0.671909462532767, "learning_rate": 4.74363906557893e-06, "loss": 0.3086, "step": 7303 }, { "epoch": 0.34, "grad_norm": 0.6346524364097609, "learning_rate": 4.743555403541843e-06, "loss": 0.2903, "step": 7304 }, { "epoch": 0.34, "grad_norm": 0.644375589367703, "learning_rate": 4.7434717285936595e-06, "loss": 0.309, "step": 7305 }, { "epoch": 0.34, "grad_norm": 0.5851089747792914, "learning_rate": 4.74338804073486e-06, "loss": 0.2638, "step": 7306 }, { "epoch": 0.34, "grad_norm": 0.7112938537736015, "learning_rate": 4.7433043399659264e-06, "loss": 0.2887, "step": 7307 }, { "epoch": 0.34, "grad_norm": 0.692016985591383, "learning_rate": 4.74322062628734e-06, "loss": 0.297, "step": 7308 }, { "epoch": 0.34, "grad_norm": 0.6741592246797493, "learning_rate": 4.743136899699583e-06, "loss": 0.3117, "step": 7309 }, { "epoch": 0.34, "grad_norm": 0.5793987718866819, "learning_rate": 4.743053160203136e-06, "loss": 0.2741, "step": 7310 }, { "epoch": 0.34, "grad_norm": 0.6473480876090695, "learning_rate": 4.7429694077984825e-06, "loss": 0.3281, "step": 7311 }, { "epoch": 0.34, "grad_norm": 0.6029878278063002, "learning_rate": 4.742885642486104e-06, "loss": 0.2816, "step": 7312 }, { "epoch": 0.34, "grad_norm": 0.6735315370079036, "learning_rate": 4.742801864266482e-06, "loss": 0.301, "step": 7313 }, { "epoch": 0.34, "grad_norm": 0.7513493591627997, "learning_rate": 4.742718073140099e-06, "loss": 0.3146, "step": 7314 }, { "epoch": 0.34, "grad_norm": 0.5925501855273473, "learning_rate": 4.742634269107437e-06, "loss": 0.2871, "step": 7315 }, { "epoch": 0.34, "grad_norm": 0.6138075265372565, "learning_rate": 4.742550452168979e-06, "loss": 0.3187, "step": 7316 }, { "epoch": 0.34, "grad_norm": 0.6584708680553479, "learning_rate": 4.742466622325206e-06, "loss": 0.2951, "step": 7317 }, { "epoch": 0.34, "grad_norm": 0.6481195101772236, "learning_rate": 4.742382779576602e-06, "loss": 0.2777, "step": 7318 }, { "epoch": 0.34, "grad_norm": 0.6374152730363315, "learning_rate": 4.742298923923649e-06, "loss": 0.2961, "step": 7319 }, { "epoch": 0.34, "grad_norm": 0.591357548865933, "learning_rate": 4.742215055366828e-06, "loss": 0.2932, "step": 7320 }, { "epoch": 0.34, "grad_norm": 0.6390704593521823, "learning_rate": 4.742131173906624e-06, "loss": 0.2883, "step": 7321 }, { "epoch": 0.34, "grad_norm": 0.5705413200807159, "learning_rate": 4.742047279543518e-06, "loss": 0.2854, "step": 7322 }, { "epoch": 0.34, "grad_norm": 0.5725275725908526, "learning_rate": 4.741963372277993e-06, "loss": 0.277, "step": 7323 }, { "epoch": 0.34, "grad_norm": 0.5722491920345919, "learning_rate": 4.7418794521105334e-06, "loss": 0.27, "step": 7324 }, { "epoch": 0.34, "grad_norm": 0.5930035205914467, "learning_rate": 4.74179551904162e-06, "loss": 0.2975, "step": 7325 }, { "epoch": 0.34, "grad_norm": 0.6388100525548736, "learning_rate": 4.741711573071738e-06, "loss": 0.2697, "step": 7326 }, { "epoch": 0.34, "grad_norm": 0.5912016103435015, "learning_rate": 4.741627614201369e-06, "loss": 0.288, "step": 7327 }, { "epoch": 0.34, "grad_norm": 0.6299023025944666, "learning_rate": 4.741543642430996e-06, "loss": 0.2778, "step": 7328 }, { "epoch": 0.34, "grad_norm": 0.6272349487779347, "learning_rate": 4.741459657761103e-06, "loss": 0.2902, "step": 7329 }, { "epoch": 0.34, "grad_norm": 0.5930204947647809, "learning_rate": 4.7413756601921726e-06, "loss": 0.2952, "step": 7330 }, { "epoch": 0.34, "grad_norm": 0.5840098390184031, "learning_rate": 4.741291649724689e-06, "loss": 0.2975, "step": 7331 }, { "epoch": 0.34, "grad_norm": 0.621320391219713, "learning_rate": 4.741207626359135e-06, "loss": 0.3059, "step": 7332 }, { "epoch": 0.34, "grad_norm": 0.6190211806248461, "learning_rate": 4.741123590095995e-06, "loss": 0.3166, "step": 7333 }, { "epoch": 0.34, "grad_norm": 0.5858636904134076, "learning_rate": 4.741039540935751e-06, "loss": 0.2875, "step": 7334 }, { "epoch": 0.34, "grad_norm": 0.6282168322032932, "learning_rate": 4.7409554788788895e-06, "loss": 0.3084, "step": 7335 }, { "epoch": 0.34, "grad_norm": 0.6502642240435842, "learning_rate": 4.74087140392589e-06, "loss": 0.3081, "step": 7336 }, { "epoch": 0.34, "grad_norm": 0.6312953675013228, "learning_rate": 4.740787316077241e-06, "loss": 0.2857, "step": 7337 }, { "epoch": 0.34, "grad_norm": 0.6600686224691668, "learning_rate": 4.740703215333423e-06, "loss": 0.3078, "step": 7338 }, { "epoch": 0.34, "grad_norm": 0.6244942102047918, "learning_rate": 4.7406191016949225e-06, "loss": 0.3028, "step": 7339 }, { "epoch": 0.34, "grad_norm": 0.5738620547231426, "learning_rate": 4.740534975162221e-06, "loss": 0.2778, "step": 7340 }, { "epoch": 0.34, "grad_norm": 0.6492707877616962, "learning_rate": 4.740450835735803e-06, "loss": 0.2846, "step": 7341 }, { "epoch": 0.34, "grad_norm": 0.5950681994140015, "learning_rate": 4.740366683416156e-06, "loss": 0.2879, "step": 7342 }, { "epoch": 0.34, "grad_norm": 0.5850684411135006, "learning_rate": 4.74028251820376e-06, "loss": 0.2795, "step": 7343 }, { "epoch": 0.34, "grad_norm": 0.6436434335458912, "learning_rate": 4.740198340099102e-06, "loss": 0.2908, "step": 7344 }, { "epoch": 0.34, "grad_norm": 0.631836985204286, "learning_rate": 4.740114149102665e-06, "loss": 0.3141, "step": 7345 }, { "epoch": 0.34, "grad_norm": 0.6060944748557834, "learning_rate": 4.740029945214935e-06, "loss": 0.2925, "step": 7346 }, { "epoch": 0.34, "grad_norm": 0.6394034279146307, "learning_rate": 4.739945728436395e-06, "loss": 0.2965, "step": 7347 }, { "epoch": 0.34, "grad_norm": 0.6528097293370982, "learning_rate": 4.7398614987675305e-06, "loss": 0.2941, "step": 7348 }, { "epoch": 0.34, "grad_norm": 0.5896056261260759, "learning_rate": 4.739777256208825e-06, "loss": 0.2869, "step": 7349 }, { "epoch": 0.34, "grad_norm": 0.6174283851300725, "learning_rate": 4.739693000760766e-06, "loss": 0.2901, "step": 7350 }, { "epoch": 0.34, "grad_norm": 0.6166363651363348, "learning_rate": 4.739608732423836e-06, "loss": 0.2945, "step": 7351 }, { "epoch": 0.34, "grad_norm": 0.5952609453493468, "learning_rate": 4.739524451198521e-06, "loss": 0.2777, "step": 7352 }, { "epoch": 0.34, "grad_norm": 0.650210194146987, "learning_rate": 4.739440157085305e-06, "loss": 0.3284, "step": 7353 }, { "epoch": 0.34, "grad_norm": 0.6215740334366563, "learning_rate": 4.739355850084674e-06, "loss": 0.2911, "step": 7354 }, { "epoch": 0.34, "grad_norm": 0.6154241092777475, "learning_rate": 4.7392715301971126e-06, "loss": 0.2849, "step": 7355 }, { "epoch": 0.34, "grad_norm": 0.6176849627648645, "learning_rate": 4.739187197423108e-06, "loss": 0.2985, "step": 7356 }, { "epoch": 0.34, "grad_norm": 0.6621833506542315, "learning_rate": 4.7391028517631415e-06, "loss": 0.3009, "step": 7357 }, { "epoch": 0.34, "grad_norm": 0.5804767651161813, "learning_rate": 4.739018493217702e-06, "loss": 0.2883, "step": 7358 }, { "epoch": 0.34, "grad_norm": 0.603801173512278, "learning_rate": 4.738934121787274e-06, "loss": 0.2817, "step": 7359 }, { "epoch": 0.34, "grad_norm": 0.590537351883969, "learning_rate": 4.738849737472343e-06, "loss": 0.296, "step": 7360 }, { "epoch": 0.34, "grad_norm": 0.6258293721207157, "learning_rate": 4.738765340273394e-06, "loss": 0.2951, "step": 7361 }, { "epoch": 0.34, "grad_norm": 0.5868596098093799, "learning_rate": 4.738680930190913e-06, "loss": 0.2928, "step": 7362 }, { "epoch": 0.34, "grad_norm": 0.6260452178874372, "learning_rate": 4.738596507225386e-06, "loss": 0.2861, "step": 7363 }, { "epoch": 0.34, "grad_norm": 0.6649140824525693, "learning_rate": 4.738512071377299e-06, "loss": 0.325, "step": 7364 }, { "epoch": 0.35, "grad_norm": 0.6266757287709587, "learning_rate": 4.738427622647138e-06, "loss": 0.3031, "step": 7365 }, { "epoch": 0.35, "grad_norm": 0.5345159366453675, "learning_rate": 4.738343161035388e-06, "loss": 0.282, "step": 7366 }, { "epoch": 0.35, "grad_norm": 0.5481607497363084, "learning_rate": 4.738258686542536e-06, "loss": 0.2708, "step": 7367 }, { "epoch": 0.35, "grad_norm": 0.6360087467604868, "learning_rate": 4.7381741991690685e-06, "loss": 0.3035, "step": 7368 }, { "epoch": 0.35, "grad_norm": 0.6324355259218096, "learning_rate": 4.73808969891547e-06, "loss": 0.3125, "step": 7369 }, { "epoch": 0.35, "grad_norm": 0.6321335008061877, "learning_rate": 4.738005185782229e-06, "loss": 0.2779, "step": 7370 }, { "epoch": 0.35, "grad_norm": 0.6251594750474004, "learning_rate": 4.737920659769829e-06, "loss": 0.3048, "step": 7371 }, { "epoch": 0.35, "grad_norm": 0.6140199173014192, "learning_rate": 4.737836120878759e-06, "loss": 0.2972, "step": 7372 }, { "epoch": 0.35, "grad_norm": 0.5971546801333115, "learning_rate": 4.737751569109504e-06, "loss": 0.2974, "step": 7373 }, { "epoch": 0.35, "grad_norm": 0.6388456609927984, "learning_rate": 4.737667004462552e-06, "loss": 0.3025, "step": 7374 }, { "epoch": 0.35, "grad_norm": 0.6935715012262686, "learning_rate": 4.737582426938389e-06, "loss": 0.3172, "step": 7375 }, { "epoch": 0.35, "grad_norm": 0.5956967082467002, "learning_rate": 4.7374978365375e-06, "loss": 0.2783, "step": 7376 }, { "epoch": 0.35, "grad_norm": 0.6081299390098551, "learning_rate": 4.737413233260374e-06, "loss": 0.2819, "step": 7377 }, { "epoch": 0.35, "grad_norm": 0.6348300303470137, "learning_rate": 4.737328617107498e-06, "loss": 0.3079, "step": 7378 }, { "epoch": 0.35, "grad_norm": 0.6477136287287921, "learning_rate": 4.737243988079358e-06, "loss": 0.2895, "step": 7379 }, { "epoch": 0.35, "grad_norm": 0.6169424809116343, "learning_rate": 4.737159346176441e-06, "loss": 0.2966, "step": 7380 }, { "epoch": 0.35, "grad_norm": 0.6165398354415678, "learning_rate": 4.7370746913992334e-06, "loss": 0.2989, "step": 7381 }, { "epoch": 0.35, "grad_norm": 0.5701986551524167, "learning_rate": 4.7369900237482245e-06, "loss": 0.289, "step": 7382 }, { "epoch": 0.35, "grad_norm": 0.6491765472909399, "learning_rate": 4.736905343223899e-06, "loss": 0.2965, "step": 7383 }, { "epoch": 0.35, "grad_norm": 0.6005806478073736, "learning_rate": 4.7368206498267465e-06, "loss": 0.2949, "step": 7384 }, { "epoch": 0.35, "grad_norm": 0.7159725170550657, "learning_rate": 4.736735943557252e-06, "loss": 0.3336, "step": 7385 }, { "epoch": 0.35, "grad_norm": 0.6522987900860786, "learning_rate": 4.736651224415906e-06, "loss": 0.2987, "step": 7386 }, { "epoch": 0.35, "grad_norm": 0.6628886753835984, "learning_rate": 4.736566492403194e-06, "loss": 0.311, "step": 7387 }, { "epoch": 0.35, "grad_norm": 0.7321082161546635, "learning_rate": 4.736481747519603e-06, "loss": 0.3126, "step": 7388 }, { "epoch": 0.35, "grad_norm": 0.6397600962396963, "learning_rate": 4.736396989765623e-06, "loss": 0.3062, "step": 7389 }, { "epoch": 0.35, "grad_norm": 0.631157227208804, "learning_rate": 4.736312219141739e-06, "loss": 0.2908, "step": 7390 }, { "epoch": 0.35, "grad_norm": 0.6736829220157787, "learning_rate": 4.736227435648441e-06, "loss": 0.2949, "step": 7391 }, { "epoch": 0.35, "grad_norm": 0.6263150095181184, "learning_rate": 4.736142639286216e-06, "loss": 0.2901, "step": 7392 }, { "epoch": 0.35, "grad_norm": 0.6659169256377541, "learning_rate": 4.736057830055553e-06, "loss": 0.3172, "step": 7393 }, { "epoch": 0.35, "grad_norm": 0.5988931080348462, "learning_rate": 4.735973007956938e-06, "loss": 0.2881, "step": 7394 }, { "epoch": 0.35, "grad_norm": 0.6262826443254675, "learning_rate": 4.7358881729908605e-06, "loss": 0.293, "step": 7395 }, { "epoch": 0.35, "grad_norm": 0.6078260190667134, "learning_rate": 4.73580332515781e-06, "loss": 0.2786, "step": 7396 }, { "epoch": 0.35, "grad_norm": 0.6797955791828301, "learning_rate": 4.735718464458271e-06, "loss": 0.3006, "step": 7397 }, { "epoch": 0.35, "grad_norm": 0.5794601435475575, "learning_rate": 4.7356335908927356e-06, "loss": 0.2882, "step": 7398 }, { "epoch": 0.35, "grad_norm": 0.6503263414943407, "learning_rate": 4.735548704461691e-06, "loss": 0.2841, "step": 7399 }, { "epoch": 0.35, "grad_norm": 0.655828408580984, "learning_rate": 4.735463805165624e-06, "loss": 0.3111, "step": 7400 }, { "epoch": 0.35, "grad_norm": 0.6627015518892322, "learning_rate": 4.735378893005026e-06, "loss": 0.2946, "step": 7401 }, { "epoch": 0.35, "grad_norm": 0.6073223709838094, "learning_rate": 4.735293967980384e-06, "loss": 0.2746, "step": 7402 }, { "epoch": 0.35, "grad_norm": 0.6326471708168707, "learning_rate": 4.735209030092187e-06, "loss": 0.3032, "step": 7403 }, { "epoch": 0.35, "grad_norm": 0.5985955881145648, "learning_rate": 4.7351240793409235e-06, "loss": 0.2933, "step": 7404 }, { "epoch": 0.35, "grad_norm": 0.5989023406066852, "learning_rate": 4.735039115727084e-06, "loss": 0.2881, "step": 7405 }, { "epoch": 0.35, "grad_norm": 0.5818935896832561, "learning_rate": 4.734954139251155e-06, "loss": 0.2958, "step": 7406 }, { "epoch": 0.35, "grad_norm": 0.6448965807240151, "learning_rate": 4.734869149913626e-06, "loss": 0.2807, "step": 7407 }, { "epoch": 0.35, "grad_norm": 0.6444005248660022, "learning_rate": 4.734784147714988e-06, "loss": 0.3082, "step": 7408 }, { "epoch": 0.35, "grad_norm": 0.6285551887118892, "learning_rate": 4.7346991326557284e-06, "loss": 0.3032, "step": 7409 }, { "epoch": 0.35, "grad_norm": 0.6224437582886647, "learning_rate": 4.734614104736337e-06, "loss": 0.2833, "step": 7410 }, { "epoch": 0.35, "grad_norm": 0.6212496158753902, "learning_rate": 4.734529063957303e-06, "loss": 0.285, "step": 7411 }, { "epoch": 0.35, "grad_norm": 0.615455025205431, "learning_rate": 4.7344440103191156e-06, "loss": 0.3129, "step": 7412 }, { "epoch": 0.35, "grad_norm": 0.5875086416223626, "learning_rate": 4.734358943822266e-06, "loss": 0.2689, "step": 7413 }, { "epoch": 0.35, "grad_norm": 0.6514004846903191, "learning_rate": 4.734273864467241e-06, "loss": 0.3145, "step": 7414 }, { "epoch": 0.35, "grad_norm": 0.6077008926328643, "learning_rate": 4.734188772254531e-06, "loss": 0.2733, "step": 7415 }, { "epoch": 0.35, "grad_norm": 0.6012491593832805, "learning_rate": 4.734103667184628e-06, "loss": 0.2651, "step": 7416 }, { "epoch": 0.35, "grad_norm": 0.5809444589782784, "learning_rate": 4.734018549258018e-06, "loss": 0.2956, "step": 7417 }, { "epoch": 0.35, "grad_norm": 0.600624314396837, "learning_rate": 4.7339334184751935e-06, "loss": 0.2906, "step": 7418 }, { "epoch": 0.35, "grad_norm": 0.5994129671455996, "learning_rate": 4.733848274836644e-06, "loss": 0.3067, "step": 7419 }, { "epoch": 0.35, "grad_norm": 0.5854944762981685, "learning_rate": 4.733763118342858e-06, "loss": 0.2835, "step": 7420 }, { "epoch": 0.35, "grad_norm": 0.6456209010474646, "learning_rate": 4.733677948994328e-06, "loss": 0.2718, "step": 7421 }, { "epoch": 0.35, "grad_norm": 0.6464684116198124, "learning_rate": 4.733592766791542e-06, "loss": 0.3112, "step": 7422 }, { "epoch": 0.35, "grad_norm": 0.5675560103391237, "learning_rate": 4.733507571734992e-06, "loss": 0.2853, "step": 7423 }, { "epoch": 0.35, "grad_norm": 0.6176334372189565, "learning_rate": 4.733422363825166e-06, "loss": 0.2886, "step": 7424 }, { "epoch": 0.35, "grad_norm": 0.561613505272589, "learning_rate": 4.733337143062557e-06, "loss": 0.2774, "step": 7425 }, { "epoch": 0.35, "grad_norm": 0.6276048036368561, "learning_rate": 4.733251909447653e-06, "loss": 0.3032, "step": 7426 }, { "epoch": 0.35, "grad_norm": 0.6692028494169617, "learning_rate": 4.733166662980946e-06, "loss": 0.3253, "step": 7427 }, { "epoch": 0.35, "grad_norm": 0.679613332127996, "learning_rate": 4.733081403662926e-06, "loss": 0.3186, "step": 7428 }, { "epoch": 0.35, "grad_norm": 0.6194360895032495, "learning_rate": 4.7329961314940835e-06, "loss": 0.2827, "step": 7429 }, { "epoch": 0.35, "grad_norm": 0.6407381044022489, "learning_rate": 4.73291084647491e-06, "loss": 0.2975, "step": 7430 }, { "epoch": 0.35, "grad_norm": 0.610279486402482, "learning_rate": 4.732825548605895e-06, "loss": 0.3303, "step": 7431 }, { "epoch": 0.35, "grad_norm": 0.5954216818558042, "learning_rate": 4.732740237887531e-06, "loss": 0.2796, "step": 7432 }, { "epoch": 0.35, "grad_norm": 0.6279963821766901, "learning_rate": 4.732654914320308e-06, "loss": 0.314, "step": 7433 }, { "epoch": 0.35, "grad_norm": 0.6169344205823378, "learning_rate": 4.732569577904717e-06, "loss": 0.2911, "step": 7434 }, { "epoch": 0.35, "grad_norm": 0.6264548238440034, "learning_rate": 4.73248422864125e-06, "loss": 0.3106, "step": 7435 }, { "epoch": 0.35, "grad_norm": 0.6451225890079316, "learning_rate": 4.732398866530396e-06, "loss": 0.2921, "step": 7436 }, { "epoch": 0.35, "grad_norm": 0.6588151940246139, "learning_rate": 4.732313491572648e-06, "loss": 0.3205, "step": 7437 }, { "epoch": 0.35, "grad_norm": 0.6279328348065062, "learning_rate": 4.732228103768498e-06, "loss": 0.2993, "step": 7438 }, { "epoch": 0.35, "grad_norm": 0.6287789972668989, "learning_rate": 4.732142703118435e-06, "loss": 0.2806, "step": 7439 }, { "epoch": 0.35, "grad_norm": 0.5604337369195165, "learning_rate": 4.7320572896229524e-06, "loss": 0.2771, "step": 7440 }, { "epoch": 0.35, "grad_norm": 0.6270497831012652, "learning_rate": 4.73197186328254e-06, "loss": 0.3035, "step": 7441 }, { "epoch": 0.35, "grad_norm": 0.6377913726438902, "learning_rate": 4.731886424097693e-06, "loss": 0.278, "step": 7442 }, { "epoch": 0.35, "grad_norm": 0.6497297198243744, "learning_rate": 4.731800972068898e-06, "loss": 0.3169, "step": 7443 }, { "epoch": 0.35, "grad_norm": 0.6240134702281946, "learning_rate": 4.73171550719665e-06, "loss": 0.2956, "step": 7444 }, { "epoch": 0.35, "grad_norm": 0.6055118778405407, "learning_rate": 4.731630029481441e-06, "loss": 0.2924, "step": 7445 }, { "epoch": 0.35, "grad_norm": 0.640720756127532, "learning_rate": 4.731544538923762e-06, "loss": 0.318, "step": 7446 }, { "epoch": 0.35, "grad_norm": 0.5917864390824095, "learning_rate": 4.731459035524104e-06, "loss": 0.3069, "step": 7447 }, { "epoch": 0.35, "grad_norm": 0.6336800433180175, "learning_rate": 4.731373519282961e-06, "loss": 0.2883, "step": 7448 }, { "epoch": 0.35, "grad_norm": 0.5994243761930602, "learning_rate": 4.7312879902008245e-06, "loss": 0.3055, "step": 7449 }, { "epoch": 0.35, "grad_norm": 0.6482704855697231, "learning_rate": 4.731202448278186e-06, "loss": 0.3097, "step": 7450 }, { "epoch": 0.35, "grad_norm": 0.5977640667427278, "learning_rate": 4.731116893515539e-06, "loss": 0.2834, "step": 7451 }, { "epoch": 0.35, "grad_norm": 0.59242595937296, "learning_rate": 4.7310313259133735e-06, "loss": 0.2743, "step": 7452 }, { "epoch": 0.35, "grad_norm": 0.5896172512577673, "learning_rate": 4.730945745472184e-06, "loss": 0.2981, "step": 7453 }, { "epoch": 0.35, "grad_norm": 0.6374576613009049, "learning_rate": 4.730860152192462e-06, "loss": 0.2743, "step": 7454 }, { "epoch": 0.35, "grad_norm": 0.6559596238982796, "learning_rate": 4.730774546074702e-06, "loss": 0.3015, "step": 7455 }, { "epoch": 0.35, "grad_norm": 0.6223390885182779, "learning_rate": 4.730688927119395e-06, "loss": 0.293, "step": 7456 }, { "epoch": 0.35, "grad_norm": 0.5954302092423079, "learning_rate": 4.730603295327032e-06, "loss": 0.2989, "step": 7457 }, { "epoch": 0.35, "grad_norm": 0.664769764369213, "learning_rate": 4.7305176506981094e-06, "loss": 0.29, "step": 7458 }, { "epoch": 0.35, "grad_norm": 0.6301515579461444, "learning_rate": 4.730431993233118e-06, "loss": 0.3085, "step": 7459 }, { "epoch": 0.35, "grad_norm": 0.6304641649603712, "learning_rate": 4.730346322932551e-06, "loss": 0.3113, "step": 7460 }, { "epoch": 0.35, "grad_norm": 0.6028899819516099, "learning_rate": 4.730260639796901e-06, "loss": 0.2785, "step": 7461 }, { "epoch": 0.35, "grad_norm": 0.5683950018814918, "learning_rate": 4.730174943826662e-06, "loss": 0.2735, "step": 7462 }, { "epoch": 0.35, "grad_norm": 0.5849956048103411, "learning_rate": 4.730089235022327e-06, "loss": 0.277, "step": 7463 }, { "epoch": 0.35, "grad_norm": 0.6403640234330732, "learning_rate": 4.730003513384389e-06, "loss": 0.3133, "step": 7464 }, { "epoch": 0.35, "grad_norm": 0.6247419405645814, "learning_rate": 4.7299177789133405e-06, "loss": 0.2884, "step": 7465 }, { "epoch": 0.35, "grad_norm": 0.6219422291851344, "learning_rate": 4.729832031609676e-06, "loss": 0.3117, "step": 7466 }, { "epoch": 0.35, "grad_norm": 0.6107915881342317, "learning_rate": 4.729746271473889e-06, "loss": 0.3023, "step": 7467 }, { "epoch": 0.35, "grad_norm": 0.5693716092712382, "learning_rate": 4.729660498506472e-06, "loss": 0.2908, "step": 7468 }, { "epoch": 0.35, "grad_norm": 0.5570921217538, "learning_rate": 4.72957471270792e-06, "loss": 0.2762, "step": 7469 }, { "epoch": 0.35, "grad_norm": 0.5732256071775237, "learning_rate": 4.729488914078725e-06, "loss": 0.2997, "step": 7470 }, { "epoch": 0.35, "grad_norm": 0.6038654987373043, "learning_rate": 4.729403102619382e-06, "loss": 0.3013, "step": 7471 }, { "epoch": 0.35, "grad_norm": 0.5991633453442625, "learning_rate": 4.729317278330385e-06, "loss": 0.2875, "step": 7472 }, { "epoch": 0.35, "grad_norm": 0.6316182936179064, "learning_rate": 4.729231441212228e-06, "loss": 0.3003, "step": 7473 }, { "epoch": 0.35, "grad_norm": 0.5614949615726194, "learning_rate": 4.729145591265403e-06, "loss": 0.2935, "step": 7474 }, { "epoch": 0.35, "grad_norm": 0.5435288618165629, "learning_rate": 4.729059728490406e-06, "loss": 0.2702, "step": 7475 }, { "epoch": 0.35, "grad_norm": 0.5766025454822886, "learning_rate": 4.728973852887729e-06, "loss": 0.2898, "step": 7476 }, { "epoch": 0.35, "grad_norm": 0.620404499496171, "learning_rate": 4.72888796445787e-06, "loss": 0.3057, "step": 7477 }, { "epoch": 0.35, "grad_norm": 0.6097016930205563, "learning_rate": 4.72880206320132e-06, "loss": 0.2887, "step": 7478 }, { "epoch": 0.35, "grad_norm": 0.6096539343054278, "learning_rate": 4.728716149118574e-06, "loss": 0.2964, "step": 7479 }, { "epoch": 0.35, "grad_norm": 0.5831839484791332, "learning_rate": 4.7286302222101265e-06, "loss": 0.289, "step": 7480 }, { "epoch": 0.35, "grad_norm": 0.6251878680306576, "learning_rate": 4.728544282476473e-06, "loss": 0.3104, "step": 7481 }, { "epoch": 0.35, "grad_norm": 0.5781146199693133, "learning_rate": 4.728458329918107e-06, "loss": 0.2876, "step": 7482 }, { "epoch": 0.35, "grad_norm": 0.6643873741105488, "learning_rate": 4.728372364535524e-06, "loss": 0.2866, "step": 7483 }, { "epoch": 0.35, "grad_norm": 0.668929581002265, "learning_rate": 4.728286386329218e-06, "loss": 0.2903, "step": 7484 }, { "epoch": 0.35, "grad_norm": 0.6124011756200206, "learning_rate": 4.7282003952996825e-06, "loss": 0.3112, "step": 7485 }, { "epoch": 0.35, "grad_norm": 0.618319255081924, "learning_rate": 4.7281143914474146e-06, "loss": 0.2968, "step": 7486 }, { "epoch": 0.35, "grad_norm": 0.6509246162686734, "learning_rate": 4.728028374772909e-06, "loss": 0.2978, "step": 7487 }, { "epoch": 0.35, "grad_norm": 0.6226062295453259, "learning_rate": 4.7279423452766594e-06, "loss": 0.2805, "step": 7488 }, { "epoch": 0.35, "grad_norm": 0.6710359748104449, "learning_rate": 4.727856302959162e-06, "loss": 0.3087, "step": 7489 }, { "epoch": 0.35, "grad_norm": 0.6169973317292393, "learning_rate": 4.7277702478209105e-06, "loss": 0.2824, "step": 7490 }, { "epoch": 0.35, "grad_norm": 0.5902322668956256, "learning_rate": 4.727684179862403e-06, "loss": 0.2806, "step": 7491 }, { "epoch": 0.35, "grad_norm": 0.6106625117743126, "learning_rate": 4.727598099084131e-06, "loss": 0.2778, "step": 7492 }, { "epoch": 0.35, "grad_norm": 0.6558958591199224, "learning_rate": 4.727512005486593e-06, "loss": 0.3017, "step": 7493 }, { "epoch": 0.35, "grad_norm": 0.6162474535652104, "learning_rate": 4.727425899070283e-06, "loss": 0.3022, "step": 7494 }, { "epoch": 0.35, "grad_norm": 0.6649691875047167, "learning_rate": 4.727339779835697e-06, "loss": 0.3026, "step": 7495 }, { "epoch": 0.35, "grad_norm": 0.6127363375398361, "learning_rate": 4.727253647783331e-06, "loss": 0.3039, "step": 7496 }, { "epoch": 0.35, "grad_norm": 0.6350355436000771, "learning_rate": 4.727167502913679e-06, "loss": 0.3113, "step": 7497 }, { "epoch": 0.35, "grad_norm": 0.6158602924381316, "learning_rate": 4.727081345227237e-06, "loss": 0.2924, "step": 7498 }, { "epoch": 0.35, "grad_norm": 0.6260763733082504, "learning_rate": 4.726995174724503e-06, "loss": 0.2929, "step": 7499 }, { "epoch": 0.35, "grad_norm": 0.6176365559141517, "learning_rate": 4.7269089914059716e-06, "loss": 0.3094, "step": 7500 }, { "epoch": 0.35, "grad_norm": 0.6214822044041161, "learning_rate": 4.726822795272138e-06, "loss": 0.32, "step": 7501 }, { "epoch": 0.35, "grad_norm": 0.6696192782076573, "learning_rate": 4.726736586323499e-06, "loss": 0.3172, "step": 7502 }, { "epoch": 0.35, "grad_norm": 0.5859629309254354, "learning_rate": 4.726650364560551e-06, "loss": 0.2861, "step": 7503 }, { "epoch": 0.35, "grad_norm": 0.5998867285924453, "learning_rate": 4.726564129983789e-06, "loss": 0.2844, "step": 7504 }, { "epoch": 0.35, "grad_norm": 0.6312821677540199, "learning_rate": 4.72647788259371e-06, "loss": 0.2859, "step": 7505 }, { "epoch": 0.35, "grad_norm": 0.5668166030551222, "learning_rate": 4.726391622390812e-06, "loss": 0.2777, "step": 7506 }, { "epoch": 0.35, "grad_norm": 0.6279469148025668, "learning_rate": 4.726305349375589e-06, "loss": 0.303, "step": 7507 }, { "epoch": 0.35, "grad_norm": 0.6614207409748224, "learning_rate": 4.726219063548538e-06, "loss": 0.3037, "step": 7508 }, { "epoch": 0.35, "grad_norm": 0.5718739304593271, "learning_rate": 4.726132764910156e-06, "loss": 0.269, "step": 7509 }, { "epoch": 0.35, "grad_norm": 0.6068684750317885, "learning_rate": 4.726046453460939e-06, "loss": 0.3032, "step": 7510 }, { "epoch": 0.35, "grad_norm": 0.6463288532780276, "learning_rate": 4.7259601292013845e-06, "loss": 0.285, "step": 7511 }, { "epoch": 0.35, "grad_norm": 0.6360224919109698, "learning_rate": 4.7258737921319895e-06, "loss": 0.2789, "step": 7512 }, { "epoch": 0.35, "grad_norm": 0.5733324334010266, "learning_rate": 4.725787442253249e-06, "loss": 0.2725, "step": 7513 }, { "epoch": 0.35, "grad_norm": 0.61712398758651, "learning_rate": 4.725701079565662e-06, "loss": 0.2879, "step": 7514 }, { "epoch": 0.35, "grad_norm": 0.5874861642199224, "learning_rate": 4.7256147040697245e-06, "loss": 0.2962, "step": 7515 }, { "epoch": 0.35, "grad_norm": 0.6530584743450764, "learning_rate": 4.725528315765934e-06, "loss": 0.3183, "step": 7516 }, { "epoch": 0.35, "grad_norm": 0.6881849244458905, "learning_rate": 4.725441914654788e-06, "loss": 0.3202, "step": 7517 }, { "epoch": 0.35, "grad_norm": 0.6128281079075308, "learning_rate": 4.725355500736782e-06, "loss": 0.2823, "step": 7518 }, { "epoch": 0.35, "grad_norm": 0.6711245994580595, "learning_rate": 4.7252690740124155e-06, "loss": 0.2927, "step": 7519 }, { "epoch": 0.35, "grad_norm": 0.6416757962027441, "learning_rate": 4.725182634482183e-06, "loss": 0.3102, "step": 7520 }, { "epoch": 0.35, "grad_norm": 0.6392493053953677, "learning_rate": 4.725096182146585e-06, "loss": 0.3065, "step": 7521 }, { "epoch": 0.35, "grad_norm": 0.6875526812045405, "learning_rate": 4.725009717006117e-06, "loss": 0.2959, "step": 7522 }, { "epoch": 0.35, "grad_norm": 0.5593352747536343, "learning_rate": 4.724923239061279e-06, "loss": 0.2913, "step": 7523 }, { "epoch": 0.35, "grad_norm": 0.6085440847343042, "learning_rate": 4.724836748312565e-06, "loss": 0.2805, "step": 7524 }, { "epoch": 0.35, "grad_norm": 0.6391363068697624, "learning_rate": 4.724750244760476e-06, "loss": 0.3058, "step": 7525 }, { "epoch": 0.35, "grad_norm": 0.631808366446407, "learning_rate": 4.724663728405508e-06, "loss": 0.2833, "step": 7526 }, { "epoch": 0.35, "grad_norm": 0.5876430406885658, "learning_rate": 4.724577199248159e-06, "loss": 0.2906, "step": 7527 }, { "epoch": 0.35, "grad_norm": 0.619317133349142, "learning_rate": 4.724490657288929e-06, "loss": 0.2979, "step": 7528 }, { "epoch": 0.35, "grad_norm": 0.6870275163089027, "learning_rate": 4.724404102528313e-06, "loss": 0.3104, "step": 7529 }, { "epoch": 0.35, "grad_norm": 0.6181060884643654, "learning_rate": 4.72431753496681e-06, "loss": 0.2944, "step": 7530 }, { "epoch": 0.35, "grad_norm": 0.6468077974924066, "learning_rate": 4.7242309546049194e-06, "loss": 0.3039, "step": 7531 }, { "epoch": 0.35, "grad_norm": 0.587675781448775, "learning_rate": 4.7241443614431385e-06, "loss": 0.2621, "step": 7532 }, { "epoch": 0.35, "grad_norm": 0.6399413243756152, "learning_rate": 4.724057755481966e-06, "loss": 0.3152, "step": 7533 }, { "epoch": 0.35, "grad_norm": 0.5773423834263149, "learning_rate": 4.7239711367219e-06, "loss": 0.2821, "step": 7534 }, { "epoch": 0.35, "grad_norm": 0.6629664901429864, "learning_rate": 4.723884505163439e-06, "loss": 0.3, "step": 7535 }, { "epoch": 0.35, "grad_norm": 0.6401802821679173, "learning_rate": 4.7237978608070825e-06, "loss": 0.2889, "step": 7536 }, { "epoch": 0.35, "grad_norm": 0.6358942655489674, "learning_rate": 4.723711203653327e-06, "loss": 0.2958, "step": 7537 }, { "epoch": 0.35, "grad_norm": 0.6065120012340971, "learning_rate": 4.723624533702672e-06, "loss": 0.3044, "step": 7538 }, { "epoch": 0.35, "grad_norm": 0.5540282581024263, "learning_rate": 4.723537850955619e-06, "loss": 0.2737, "step": 7539 }, { "epoch": 0.35, "grad_norm": 0.5478965679205589, "learning_rate": 4.723451155412663e-06, "loss": 0.2854, "step": 7540 }, { "epoch": 0.35, "grad_norm": 0.6271327475755252, "learning_rate": 4.723364447074304e-06, "loss": 0.2848, "step": 7541 }, { "epoch": 0.35, "grad_norm": 0.5976693852346072, "learning_rate": 4.723277725941042e-06, "loss": 0.303, "step": 7542 }, { "epoch": 0.35, "grad_norm": 0.625259640198638, "learning_rate": 4.723190992013376e-06, "loss": 0.2822, "step": 7543 }, { "epoch": 0.35, "grad_norm": 0.5835791749978609, "learning_rate": 4.7231042452918035e-06, "loss": 0.2882, "step": 7544 }, { "epoch": 0.35, "grad_norm": 0.6010395182774803, "learning_rate": 4.723017485776825e-06, "loss": 0.2971, "step": 7545 }, { "epoch": 0.35, "grad_norm": 0.6303934477515845, "learning_rate": 4.72293071346894e-06, "loss": 0.2984, "step": 7546 }, { "epoch": 0.35, "grad_norm": 0.6302654307637993, "learning_rate": 4.722843928368647e-06, "loss": 0.27, "step": 7547 }, { "epoch": 0.35, "grad_norm": 0.6049048386361194, "learning_rate": 4.722757130476448e-06, "loss": 0.3002, "step": 7548 }, { "epoch": 0.35, "grad_norm": 0.8885709523964564, "learning_rate": 4.722670319792838e-06, "loss": 0.3203, "step": 7549 }, { "epoch": 0.35, "grad_norm": 0.5694413572977337, "learning_rate": 4.72258349631832e-06, "loss": 0.2938, "step": 7550 }, { "epoch": 0.35, "grad_norm": 0.6496669324544125, "learning_rate": 4.722496660053392e-06, "loss": 0.2715, "step": 7551 }, { "epoch": 0.35, "grad_norm": 0.6858458472280337, "learning_rate": 4.722409810998555e-06, "loss": 0.2859, "step": 7552 }, { "epoch": 0.35, "grad_norm": 0.6335148913465047, "learning_rate": 4.722322949154308e-06, "loss": 0.3249, "step": 7553 }, { "epoch": 0.35, "grad_norm": 0.6514706677738663, "learning_rate": 4.72223607452115e-06, "loss": 0.3101, "step": 7554 }, { "epoch": 0.35, "grad_norm": 0.5629434839175803, "learning_rate": 4.722149187099583e-06, "loss": 0.2629, "step": 7555 }, { "epoch": 0.35, "grad_norm": 0.6721532873375645, "learning_rate": 4.722062286890105e-06, "loss": 0.2855, "step": 7556 }, { "epoch": 0.35, "grad_norm": 0.664431673068973, "learning_rate": 4.7219753738932185e-06, "loss": 0.3086, "step": 7557 }, { "epoch": 0.35, "grad_norm": 0.6610185976312628, "learning_rate": 4.721888448109421e-06, "loss": 0.2976, "step": 7558 }, { "epoch": 0.35, "grad_norm": 0.5931165328255824, "learning_rate": 4.721801509539214e-06, "loss": 0.3044, "step": 7559 }, { "epoch": 0.35, "grad_norm": 0.6372847722016072, "learning_rate": 4.721714558183098e-06, "loss": 0.2857, "step": 7560 }, { "epoch": 0.35, "grad_norm": 0.6211667324317501, "learning_rate": 4.721627594041574e-06, "loss": 0.3068, "step": 7561 }, { "epoch": 0.35, "grad_norm": 0.623989812970508, "learning_rate": 4.7215406171151405e-06, "loss": 0.3018, "step": 7562 }, { "epoch": 0.35, "grad_norm": 0.5985927283057672, "learning_rate": 4.721453627404299e-06, "loss": 0.2923, "step": 7563 }, { "epoch": 0.35, "grad_norm": 0.6676974193541655, "learning_rate": 4.72136662490955e-06, "loss": 0.3144, "step": 7564 }, { "epoch": 0.35, "grad_norm": 0.6573285584240216, "learning_rate": 4.721279609631395e-06, "loss": 0.3074, "step": 7565 }, { "epoch": 0.35, "grad_norm": 0.6353788774365766, "learning_rate": 4.721192581570334e-06, "loss": 0.3037, "step": 7566 }, { "epoch": 0.35, "grad_norm": 0.6177850922692162, "learning_rate": 4.721105540726868e-06, "loss": 0.3025, "step": 7567 }, { "epoch": 0.35, "grad_norm": 0.6521441260261635, "learning_rate": 4.721018487101498e-06, "loss": 0.3018, "step": 7568 }, { "epoch": 0.35, "grad_norm": 0.6486926059054905, "learning_rate": 4.7209314206947254e-06, "loss": 0.2937, "step": 7569 }, { "epoch": 0.35, "grad_norm": 0.6052050517850074, "learning_rate": 4.7208443415070504e-06, "loss": 0.2852, "step": 7570 }, { "epoch": 0.35, "grad_norm": 0.6292136304949348, "learning_rate": 4.720757249538974e-06, "loss": 0.2594, "step": 7571 }, { "epoch": 0.35, "grad_norm": 0.6245541372948232, "learning_rate": 4.720670144790997e-06, "loss": 0.299, "step": 7572 }, { "epoch": 0.35, "grad_norm": 0.6040506979237273, "learning_rate": 4.720583027263623e-06, "loss": 0.2945, "step": 7573 }, { "epoch": 0.35, "grad_norm": 0.5953025668194678, "learning_rate": 4.720495896957351e-06, "loss": 0.2968, "step": 7574 }, { "epoch": 0.35, "grad_norm": 0.6812801411282351, "learning_rate": 4.720408753872682e-06, "loss": 0.2975, "step": 7575 }, { "epoch": 0.35, "grad_norm": 0.6117373375411794, "learning_rate": 4.720321598010121e-06, "loss": 0.3001, "step": 7576 }, { "epoch": 0.35, "grad_norm": 0.6132609766553516, "learning_rate": 4.7202344293701665e-06, "loss": 0.2941, "step": 7577 }, { "epoch": 0.35, "grad_norm": 0.6333390407828674, "learning_rate": 4.72014724795332e-06, "loss": 0.2858, "step": 7578 }, { "epoch": 0.36, "grad_norm": 0.6217367131036128, "learning_rate": 4.720060053760086e-06, "loss": 0.3026, "step": 7579 }, { "epoch": 0.36, "grad_norm": 0.6450579287941302, "learning_rate": 4.719972846790962e-06, "loss": 0.2911, "step": 7580 }, { "epoch": 0.36, "grad_norm": 0.6261720610309989, "learning_rate": 4.719885627046455e-06, "loss": 0.293, "step": 7581 }, { "epoch": 0.36, "grad_norm": 0.6193894004175268, "learning_rate": 4.719798394527062e-06, "loss": 0.2983, "step": 7582 }, { "epoch": 0.36, "grad_norm": 0.6090331890654185, "learning_rate": 4.719711149233287e-06, "loss": 0.3112, "step": 7583 }, { "epoch": 0.36, "grad_norm": 0.5575110474651018, "learning_rate": 4.719623891165633e-06, "loss": 0.2718, "step": 7584 }, { "epoch": 0.36, "grad_norm": 0.627813205988202, "learning_rate": 4.719536620324601e-06, "loss": 0.2918, "step": 7585 }, { "epoch": 0.36, "grad_norm": 0.6382041545059722, "learning_rate": 4.719449336710695e-06, "loss": 0.3016, "step": 7586 }, { "epoch": 0.36, "grad_norm": 0.6405954884363028, "learning_rate": 4.719362040324414e-06, "loss": 0.2906, "step": 7587 }, { "epoch": 0.36, "grad_norm": 0.6263698728309975, "learning_rate": 4.719274731166263e-06, "loss": 0.2908, "step": 7588 }, { "epoch": 0.36, "grad_norm": 0.5939546157225213, "learning_rate": 4.719187409236745e-06, "loss": 0.2936, "step": 7589 }, { "epoch": 0.36, "grad_norm": 0.6129855089106118, "learning_rate": 4.719100074536359e-06, "loss": 0.279, "step": 7590 }, { "epoch": 0.36, "grad_norm": 0.6370631110338236, "learning_rate": 4.719012727065611e-06, "loss": 0.3125, "step": 7591 }, { "epoch": 0.36, "grad_norm": 0.6481796761424748, "learning_rate": 4.718925366825003e-06, "loss": 0.2955, "step": 7592 }, { "epoch": 0.36, "grad_norm": 0.5580259164975809, "learning_rate": 4.718837993815036e-06, "loss": 0.2684, "step": 7593 }, { "epoch": 0.36, "grad_norm": 0.6550065310348321, "learning_rate": 4.718750608036216e-06, "loss": 0.3086, "step": 7594 }, { "epoch": 0.36, "grad_norm": 0.6541172653735561, "learning_rate": 4.718663209489041e-06, "loss": 0.2977, "step": 7595 }, { "epoch": 0.36, "grad_norm": 0.6233136930884109, "learning_rate": 4.718575798174018e-06, "loss": 0.3003, "step": 7596 }, { "epoch": 0.36, "grad_norm": 0.6546440801208433, "learning_rate": 4.71848837409165e-06, "loss": 0.3051, "step": 7597 }, { "epoch": 0.36, "grad_norm": 0.6333495287497994, "learning_rate": 4.7184009372424385e-06, "loss": 0.3041, "step": 7598 }, { "epoch": 0.36, "grad_norm": 0.6756722220039494, "learning_rate": 4.718313487626888e-06, "loss": 0.3155, "step": 7599 }, { "epoch": 0.36, "grad_norm": 0.562736751957822, "learning_rate": 4.7182260252455e-06, "loss": 0.2634, "step": 7600 }, { "epoch": 0.36, "grad_norm": 0.6069866620730213, "learning_rate": 4.7181385500987785e-06, "loss": 0.3051, "step": 7601 }, { "epoch": 0.36, "grad_norm": 0.63026989584359, "learning_rate": 4.718051062187227e-06, "loss": 0.2791, "step": 7602 }, { "epoch": 0.36, "grad_norm": 0.6338602853463451, "learning_rate": 4.717963561511349e-06, "loss": 0.2846, "step": 7603 }, { "epoch": 0.36, "grad_norm": 0.5927971100011602, "learning_rate": 4.717876048071649e-06, "loss": 0.2716, "step": 7604 }, { "epoch": 0.36, "grad_norm": 0.6539302477681378, "learning_rate": 4.717788521868629e-06, "loss": 0.298, "step": 7605 }, { "epoch": 0.36, "grad_norm": 0.6175938964266003, "learning_rate": 4.717700982902794e-06, "loss": 0.2825, "step": 7606 }, { "epoch": 0.36, "grad_norm": 0.6163988455976963, "learning_rate": 4.717613431174648e-06, "loss": 0.298, "step": 7607 }, { "epoch": 0.36, "grad_norm": 0.6430943713709129, "learning_rate": 4.717525866684692e-06, "loss": 0.3044, "step": 7608 }, { "epoch": 0.36, "grad_norm": 0.6392720016316104, "learning_rate": 4.717438289433434e-06, "loss": 0.2847, "step": 7609 }, { "epoch": 0.36, "grad_norm": 0.6763512683761648, "learning_rate": 4.717350699421375e-06, "loss": 0.3013, "step": 7610 }, { "epoch": 0.36, "grad_norm": 0.6170105389833304, "learning_rate": 4.71726309664902e-06, "loss": 0.2971, "step": 7611 }, { "epoch": 0.36, "grad_norm": 0.6074988372628519, "learning_rate": 4.717175481116873e-06, "loss": 0.2789, "step": 7612 }, { "epoch": 0.36, "grad_norm": 0.6083990892671407, "learning_rate": 4.717087852825439e-06, "loss": 0.2899, "step": 7613 }, { "epoch": 0.36, "grad_norm": 0.6295528540628662, "learning_rate": 4.717000211775221e-06, "loss": 0.2982, "step": 7614 }, { "epoch": 0.36, "grad_norm": 0.6378900737007075, "learning_rate": 4.716912557966725e-06, "loss": 0.3166, "step": 7615 }, { "epoch": 0.36, "grad_norm": 0.6500678304505181, "learning_rate": 4.7168248914004535e-06, "loss": 0.2918, "step": 7616 }, { "epoch": 0.36, "grad_norm": 0.638334313612095, "learning_rate": 4.716737212076913e-06, "loss": 0.3183, "step": 7617 }, { "epoch": 0.36, "grad_norm": 0.6336672539158332, "learning_rate": 4.716649519996606e-06, "loss": 0.2889, "step": 7618 }, { "epoch": 0.36, "grad_norm": 0.5860119282809304, "learning_rate": 4.716561815160038e-06, "loss": 0.2863, "step": 7619 }, { "epoch": 0.36, "grad_norm": 0.5888617480754655, "learning_rate": 4.7164740975677145e-06, "loss": 0.281, "step": 7620 }, { "epoch": 0.36, "grad_norm": 0.5837013469891027, "learning_rate": 4.71638636722014e-06, "loss": 0.2868, "step": 7621 }, { "epoch": 0.36, "grad_norm": 0.5770954158603129, "learning_rate": 4.716298624117818e-06, "loss": 0.2731, "step": 7622 }, { "epoch": 0.36, "grad_norm": 0.5728094754425737, "learning_rate": 4.716210868261255e-06, "loss": 0.2505, "step": 7623 }, { "epoch": 0.36, "grad_norm": 0.559550718578162, "learning_rate": 4.7161230996509555e-06, "loss": 0.2873, "step": 7624 }, { "epoch": 0.36, "grad_norm": 0.6259382222858794, "learning_rate": 4.716035318287424e-06, "loss": 0.299, "step": 7625 }, { "epoch": 0.36, "grad_norm": 0.5836627972351173, "learning_rate": 4.715947524171167e-06, "loss": 0.3106, "step": 7626 }, { "epoch": 0.36, "grad_norm": 0.615038968841859, "learning_rate": 4.715859717302688e-06, "loss": 0.305, "step": 7627 }, { "epoch": 0.36, "grad_norm": 0.5658285970306189, "learning_rate": 4.715771897682495e-06, "loss": 0.2764, "step": 7628 }, { "epoch": 0.36, "grad_norm": 0.594814688661201, "learning_rate": 4.71568406531109e-06, "loss": 0.2615, "step": 7629 }, { "epoch": 0.36, "grad_norm": 0.6402957635480249, "learning_rate": 4.715596220188981e-06, "loss": 0.304, "step": 7630 }, { "epoch": 0.36, "grad_norm": 0.6343098622346086, "learning_rate": 4.715508362316672e-06, "loss": 0.2986, "step": 7631 }, { "epoch": 0.36, "grad_norm": 0.6108475217412314, "learning_rate": 4.71542049169467e-06, "loss": 0.3089, "step": 7632 }, { "epoch": 0.36, "grad_norm": 0.5814589048434232, "learning_rate": 4.7153326083234794e-06, "loss": 0.2868, "step": 7633 }, { "epoch": 0.36, "grad_norm": 0.5844255532930799, "learning_rate": 4.715244712203606e-06, "loss": 0.2768, "step": 7634 }, { "epoch": 0.36, "grad_norm": 0.6425127362299846, "learning_rate": 4.715156803335557e-06, "loss": 0.3026, "step": 7635 }, { "epoch": 0.36, "grad_norm": 0.6411820506041295, "learning_rate": 4.715068881719837e-06, "loss": 0.2809, "step": 7636 }, { "epoch": 0.36, "grad_norm": 0.6023047935386585, "learning_rate": 4.714980947356952e-06, "loss": 0.3037, "step": 7637 }, { "epoch": 0.36, "grad_norm": 0.6279026202215866, "learning_rate": 4.714893000247408e-06, "loss": 0.3031, "step": 7638 }, { "epoch": 0.36, "grad_norm": 0.744299684510718, "learning_rate": 4.714805040391712e-06, "loss": 0.2992, "step": 7639 }, { "epoch": 0.36, "grad_norm": 0.6139101689333187, "learning_rate": 4.71471706779037e-06, "loss": 0.2796, "step": 7640 }, { "epoch": 0.36, "grad_norm": 0.6619204139039121, "learning_rate": 4.714629082443888e-06, "loss": 0.3091, "step": 7641 }, { "epoch": 0.36, "grad_norm": 0.5809399156462101, "learning_rate": 4.714541084352771e-06, "loss": 0.2966, "step": 7642 }, { "epoch": 0.36, "grad_norm": 0.6171099722550906, "learning_rate": 4.714453073517528e-06, "loss": 0.2861, "step": 7643 }, { "epoch": 0.36, "grad_norm": 0.6590209120920258, "learning_rate": 4.714365049938664e-06, "loss": 0.3024, "step": 7644 }, { "epoch": 0.36, "grad_norm": 0.5773738703520496, "learning_rate": 4.714277013616685e-06, "loss": 0.2847, "step": 7645 }, { "epoch": 0.36, "grad_norm": 0.5814154297394668, "learning_rate": 4.7141889645520985e-06, "loss": 0.2884, "step": 7646 }, { "epoch": 0.36, "grad_norm": 0.6343875988508536, "learning_rate": 4.714100902745411e-06, "loss": 0.3036, "step": 7647 }, { "epoch": 0.36, "grad_norm": 0.6473872494436865, "learning_rate": 4.71401282819713e-06, "loss": 0.3024, "step": 7648 }, { "epoch": 0.36, "grad_norm": 0.6039895368967144, "learning_rate": 4.713924740907761e-06, "loss": 0.2807, "step": 7649 }, { "epoch": 0.36, "grad_norm": 0.6454968290831432, "learning_rate": 4.713836640877811e-06, "loss": 0.3043, "step": 7650 }, { "epoch": 0.36, "grad_norm": 0.6736199139683001, "learning_rate": 4.7137485281077885e-06, "loss": 0.3159, "step": 7651 }, { "epoch": 0.36, "grad_norm": 0.6607998353405472, "learning_rate": 4.7136604025982e-06, "loss": 0.3016, "step": 7652 }, { "epoch": 0.36, "grad_norm": 0.5819396157806805, "learning_rate": 4.713572264349552e-06, "loss": 0.2864, "step": 7653 }, { "epoch": 0.36, "grad_norm": 0.6783293845854382, "learning_rate": 4.713484113362351e-06, "loss": 0.3027, "step": 7654 }, { "epoch": 0.36, "grad_norm": 0.6533068063469948, "learning_rate": 4.713395949637106e-06, "loss": 0.3014, "step": 7655 }, { "epoch": 0.36, "grad_norm": 0.6264152180435957, "learning_rate": 4.713307773174324e-06, "loss": 0.2912, "step": 7656 }, { "epoch": 0.36, "grad_norm": 0.6254233442311393, "learning_rate": 4.713219583974511e-06, "loss": 0.2837, "step": 7657 }, { "epoch": 0.36, "grad_norm": 0.588277012646326, "learning_rate": 4.713131382038176e-06, "loss": 0.2948, "step": 7658 }, { "epoch": 0.36, "grad_norm": 0.6273749529543962, "learning_rate": 4.713043167365827e-06, "loss": 0.3093, "step": 7659 }, { "epoch": 0.36, "grad_norm": 0.5761078595557682, "learning_rate": 4.71295493995797e-06, "loss": 0.3059, "step": 7660 }, { "epoch": 0.36, "grad_norm": 0.5925716654096674, "learning_rate": 4.712866699815113e-06, "loss": 0.2976, "step": 7661 }, { "epoch": 0.36, "grad_norm": 0.6089878600326886, "learning_rate": 4.712778446937765e-06, "loss": 0.2815, "step": 7662 }, { "epoch": 0.36, "grad_norm": 0.6249525130201824, "learning_rate": 4.7126901813264334e-06, "loss": 0.283, "step": 7663 }, { "epoch": 0.36, "grad_norm": 0.5970128875264514, "learning_rate": 4.712601902981626e-06, "loss": 0.2894, "step": 7664 }, { "epoch": 0.36, "grad_norm": 0.6249013385193074, "learning_rate": 4.712513611903851e-06, "loss": 0.2836, "step": 7665 }, { "epoch": 0.36, "grad_norm": 0.5797493655091198, "learning_rate": 4.712425308093615e-06, "loss": 0.2887, "step": 7666 }, { "epoch": 0.36, "grad_norm": 0.5957286052172068, "learning_rate": 4.712336991551428e-06, "loss": 0.2906, "step": 7667 }, { "epoch": 0.36, "grad_norm": 0.6186746630699107, "learning_rate": 4.712248662277798e-06, "loss": 0.2903, "step": 7668 }, { "epoch": 0.36, "grad_norm": 0.62479550841855, "learning_rate": 4.712160320273232e-06, "loss": 0.2934, "step": 7669 }, { "epoch": 0.36, "grad_norm": 0.6345634959849168, "learning_rate": 4.712071965538241e-06, "loss": 0.2933, "step": 7670 }, { "epoch": 0.36, "grad_norm": 0.6073926158003639, "learning_rate": 4.711983598073331e-06, "loss": 0.2725, "step": 7671 }, { "epoch": 0.36, "grad_norm": 0.6142752796345038, "learning_rate": 4.7118952178790115e-06, "loss": 0.294, "step": 7672 }, { "epoch": 0.36, "grad_norm": 0.6267566776054355, "learning_rate": 4.71180682495579e-06, "loss": 0.2805, "step": 7673 }, { "epoch": 0.36, "grad_norm": 0.6066844694705806, "learning_rate": 4.711718419304177e-06, "loss": 0.292, "step": 7674 }, { "epoch": 0.36, "grad_norm": 0.635886831730509, "learning_rate": 4.711630000924681e-06, "loss": 0.2894, "step": 7675 }, { "epoch": 0.36, "grad_norm": 0.6038439854257852, "learning_rate": 4.7115415698178095e-06, "loss": 0.2811, "step": 7676 }, { "epoch": 0.36, "grad_norm": 0.5860829999355569, "learning_rate": 4.7114531259840725e-06, "loss": 0.2758, "step": 7677 }, { "epoch": 0.36, "grad_norm": 0.6165728342862139, "learning_rate": 4.711364669423978e-06, "loss": 0.2973, "step": 7678 }, { "epoch": 0.36, "grad_norm": 0.6153905609034794, "learning_rate": 4.711276200138035e-06, "loss": 0.3103, "step": 7679 }, { "epoch": 0.36, "grad_norm": 0.65877652975603, "learning_rate": 4.711187718126755e-06, "loss": 0.2905, "step": 7680 }, { "epoch": 0.36, "grad_norm": 0.5890482803176528, "learning_rate": 4.711099223390644e-06, "loss": 0.29, "step": 7681 }, { "epoch": 0.36, "grad_norm": 0.6063779782482279, "learning_rate": 4.711010715930214e-06, "loss": 0.3124, "step": 7682 }, { "epoch": 0.36, "grad_norm": 0.6787702059358063, "learning_rate": 4.710922195745972e-06, "loss": 0.3082, "step": 7683 }, { "epoch": 0.36, "grad_norm": 0.5955479270783727, "learning_rate": 4.710833662838429e-06, "loss": 0.2917, "step": 7684 }, { "epoch": 0.36, "grad_norm": 0.6324742579767287, "learning_rate": 4.710745117208093e-06, "loss": 0.2941, "step": 7685 }, { "epoch": 0.36, "grad_norm": 0.5756042226453724, "learning_rate": 4.710656558855475e-06, "loss": 0.2769, "step": 7686 }, { "epoch": 0.36, "grad_norm": 0.6373858315091397, "learning_rate": 4.710567987781085e-06, "loss": 0.3169, "step": 7687 }, { "epoch": 0.36, "grad_norm": 0.6011754822021567, "learning_rate": 4.7104794039854305e-06, "loss": 0.2927, "step": 7688 }, { "epoch": 0.36, "grad_norm": 0.6584700579970695, "learning_rate": 4.710390807469024e-06, "loss": 0.2921, "step": 7689 }, { "epoch": 0.36, "grad_norm": 0.5847739678065091, "learning_rate": 4.7103021982323735e-06, "loss": 0.2954, "step": 7690 }, { "epoch": 0.36, "grad_norm": 0.5826365536243069, "learning_rate": 4.710213576275989e-06, "loss": 0.2748, "step": 7691 }, { "epoch": 0.36, "grad_norm": 0.6404922326446054, "learning_rate": 4.710124941600381e-06, "loss": 0.2985, "step": 7692 }, { "epoch": 0.36, "grad_norm": 0.5643039937920155, "learning_rate": 4.71003629420606e-06, "loss": 0.3018, "step": 7693 }, { "epoch": 0.36, "grad_norm": 0.602117149415449, "learning_rate": 4.709947634093535e-06, "loss": 0.3001, "step": 7694 }, { "epoch": 0.36, "grad_norm": 0.603357743633819, "learning_rate": 4.709858961263316e-06, "loss": 0.2999, "step": 7695 }, { "epoch": 0.36, "grad_norm": 0.7314053553433765, "learning_rate": 4.709770275715916e-06, "loss": 0.3043, "step": 7696 }, { "epoch": 0.36, "grad_norm": 0.5945130836952959, "learning_rate": 4.709681577451842e-06, "loss": 0.2949, "step": 7697 }, { "epoch": 0.36, "grad_norm": 0.6885876535883964, "learning_rate": 4.709592866471606e-06, "loss": 0.3221, "step": 7698 }, { "epoch": 0.36, "grad_norm": 0.565981104663937, "learning_rate": 4.709504142775719e-06, "loss": 0.2797, "step": 7699 }, { "epoch": 0.36, "grad_norm": 0.571623142846503, "learning_rate": 4.70941540636469e-06, "loss": 0.2743, "step": 7700 }, { "epoch": 0.36, "grad_norm": 0.5999409223553965, "learning_rate": 4.709326657239032e-06, "loss": 0.2939, "step": 7701 }, { "epoch": 0.36, "grad_norm": 0.6131370400149947, "learning_rate": 4.709237895399254e-06, "loss": 0.2893, "step": 7702 }, { "epoch": 0.36, "grad_norm": 0.5899259678918449, "learning_rate": 4.709149120845867e-06, "loss": 0.2895, "step": 7703 }, { "epoch": 0.36, "grad_norm": 0.6337650592457487, "learning_rate": 4.709060333579382e-06, "loss": 0.3013, "step": 7704 }, { "epoch": 0.36, "grad_norm": 0.6027923912659624, "learning_rate": 4.70897153360031e-06, "loss": 0.277, "step": 7705 }, { "epoch": 0.36, "grad_norm": 0.5841463178086898, "learning_rate": 4.708882720909163e-06, "loss": 0.284, "step": 7706 }, { "epoch": 0.36, "grad_norm": 0.6106127702576534, "learning_rate": 4.70879389550645e-06, "loss": 0.3038, "step": 7707 }, { "epoch": 0.36, "grad_norm": 0.6146607015154779, "learning_rate": 4.708705057392683e-06, "loss": 0.2781, "step": 7708 }, { "epoch": 0.36, "grad_norm": 0.5662140695845386, "learning_rate": 4.708616206568374e-06, "loss": 0.279, "step": 7709 }, { "epoch": 0.36, "grad_norm": 0.6046564823618386, "learning_rate": 4.708527343034034e-06, "loss": 0.2809, "step": 7710 }, { "epoch": 0.36, "grad_norm": 0.6251207749968273, "learning_rate": 4.708438466790174e-06, "loss": 0.3014, "step": 7711 }, { "epoch": 0.36, "grad_norm": 0.5939254949860868, "learning_rate": 4.708349577837306e-06, "loss": 0.2898, "step": 7712 }, { "epoch": 0.36, "grad_norm": 0.552071578596804, "learning_rate": 4.708260676175941e-06, "loss": 0.2764, "step": 7713 }, { "epoch": 0.36, "grad_norm": 0.6181599890695686, "learning_rate": 4.708171761806591e-06, "loss": 0.2958, "step": 7714 }, { "epoch": 0.36, "grad_norm": 0.6299156618375744, "learning_rate": 4.708082834729767e-06, "loss": 0.3293, "step": 7715 }, { "epoch": 0.36, "grad_norm": 0.5766372782661704, "learning_rate": 4.707993894945982e-06, "loss": 0.286, "step": 7716 }, { "epoch": 0.36, "grad_norm": 0.5797713057451661, "learning_rate": 4.707904942455747e-06, "loss": 0.2911, "step": 7717 }, { "epoch": 0.36, "grad_norm": 0.6039346489283011, "learning_rate": 4.707815977259573e-06, "loss": 0.2972, "step": 7718 }, { "epoch": 0.36, "grad_norm": 0.5756458330613189, "learning_rate": 4.707726999357975e-06, "loss": 0.2899, "step": 7719 }, { "epoch": 0.36, "grad_norm": 0.6150503417856578, "learning_rate": 4.707638008751461e-06, "loss": 0.2898, "step": 7720 }, { "epoch": 0.36, "grad_norm": 0.6505912914717936, "learning_rate": 4.707549005440546e-06, "loss": 0.3234, "step": 7721 }, { "epoch": 0.36, "grad_norm": 0.6248302419807579, "learning_rate": 4.7074599894257415e-06, "loss": 0.297, "step": 7722 }, { "epoch": 0.36, "grad_norm": 0.6542749459832027, "learning_rate": 4.707370960707559e-06, "loss": 0.2851, "step": 7723 }, { "epoch": 0.36, "grad_norm": 0.5662868036543015, "learning_rate": 4.707281919286511e-06, "loss": 0.2824, "step": 7724 }, { "epoch": 0.36, "grad_norm": 0.6184303941044846, "learning_rate": 4.707192865163112e-06, "loss": 0.2809, "step": 7725 }, { "epoch": 0.36, "grad_norm": 0.6634289347533826, "learning_rate": 4.707103798337871e-06, "loss": 0.3096, "step": 7726 }, { "epoch": 0.36, "grad_norm": 0.6022627118259771, "learning_rate": 4.707014718811304e-06, "loss": 0.3015, "step": 7727 }, { "epoch": 0.36, "grad_norm": 0.7717181381129723, "learning_rate": 4.70692562658392e-06, "loss": 0.3247, "step": 7728 }, { "epoch": 0.36, "grad_norm": 0.6169275065536336, "learning_rate": 4.706836521656236e-06, "loss": 0.2918, "step": 7729 }, { "epoch": 0.36, "grad_norm": 0.6858094305834624, "learning_rate": 4.706747404028761e-06, "loss": 0.2893, "step": 7730 }, { "epoch": 0.36, "grad_norm": 0.5978676002836691, "learning_rate": 4.70665827370201e-06, "loss": 0.2836, "step": 7731 }, { "epoch": 0.36, "grad_norm": 0.5702610878558729, "learning_rate": 4.706569130676495e-06, "loss": 0.2598, "step": 7732 }, { "epoch": 0.36, "grad_norm": 0.6220604434922183, "learning_rate": 4.706479974952729e-06, "loss": 0.2993, "step": 7733 }, { "epoch": 0.36, "grad_norm": 0.587836808188556, "learning_rate": 4.7063908065312255e-06, "loss": 0.2952, "step": 7734 }, { "epoch": 0.36, "grad_norm": 0.5853351583038789, "learning_rate": 4.706301625412498e-06, "loss": 0.3004, "step": 7735 }, { "epoch": 0.36, "grad_norm": 0.6168432088294373, "learning_rate": 4.706212431597058e-06, "loss": 0.3191, "step": 7736 }, { "epoch": 0.36, "grad_norm": 0.6206391034139919, "learning_rate": 4.706123225085421e-06, "loss": 0.3019, "step": 7737 }, { "epoch": 0.36, "grad_norm": 0.5956190774826864, "learning_rate": 4.706034005878099e-06, "loss": 0.2912, "step": 7738 }, { "epoch": 0.36, "grad_norm": 0.5517584672555077, "learning_rate": 4.7059447739756056e-06, "loss": 0.2751, "step": 7739 }, { "epoch": 0.36, "grad_norm": 0.5863880998155926, "learning_rate": 4.705855529378454e-06, "loss": 0.279, "step": 7740 }, { "epoch": 0.36, "grad_norm": 0.655563998298645, "learning_rate": 4.70576627208716e-06, "loss": 0.3141, "step": 7741 }, { "epoch": 0.36, "grad_norm": 0.6013319720966585, "learning_rate": 4.705677002102234e-06, "loss": 0.2937, "step": 7742 }, { "epoch": 0.36, "grad_norm": 0.6075717664594433, "learning_rate": 4.705587719424192e-06, "loss": 0.2785, "step": 7743 }, { "epoch": 0.36, "grad_norm": 0.6417583069137612, "learning_rate": 4.705498424053546e-06, "loss": 0.2979, "step": 7744 }, { "epoch": 0.36, "grad_norm": 0.5919097519741037, "learning_rate": 4.7054091159908126e-06, "loss": 0.2848, "step": 7745 }, { "epoch": 0.36, "grad_norm": 0.6137911436753837, "learning_rate": 4.705319795236503e-06, "loss": 0.2862, "step": 7746 }, { "epoch": 0.36, "grad_norm": 0.5855078334881106, "learning_rate": 4.705230461791132e-06, "loss": 0.2828, "step": 7747 }, { "epoch": 0.36, "grad_norm": 0.6639086847299912, "learning_rate": 4.705141115655214e-06, "loss": 0.2978, "step": 7748 }, { "epoch": 0.36, "grad_norm": 0.639024370575996, "learning_rate": 4.705051756829263e-06, "loss": 0.3025, "step": 7749 }, { "epoch": 0.36, "grad_norm": 0.5897655780011504, "learning_rate": 4.704962385313794e-06, "loss": 0.2828, "step": 7750 }, { "epoch": 0.36, "grad_norm": 0.6630604030961043, "learning_rate": 4.70487300110932e-06, "loss": 0.3176, "step": 7751 }, { "epoch": 0.36, "grad_norm": 0.6712253614771628, "learning_rate": 4.7047836042163564e-06, "loss": 0.288, "step": 7752 }, { "epoch": 0.36, "grad_norm": 0.5804144502492644, "learning_rate": 4.704694194635418e-06, "loss": 0.2655, "step": 7753 }, { "epoch": 0.36, "grad_norm": 0.5888726888420472, "learning_rate": 4.7046047723670174e-06, "loss": 0.2786, "step": 7754 }, { "epoch": 0.36, "grad_norm": 0.653423040876757, "learning_rate": 4.704515337411671e-06, "loss": 0.2985, "step": 7755 }, { "epoch": 0.36, "grad_norm": 0.6150888880892509, "learning_rate": 4.704425889769893e-06, "loss": 0.3139, "step": 7756 }, { "epoch": 0.36, "grad_norm": 0.5683746583235633, "learning_rate": 4.704336429442198e-06, "loss": 0.2749, "step": 7757 }, { "epoch": 0.36, "grad_norm": 0.6293840902190744, "learning_rate": 4.704246956429101e-06, "loss": 0.3103, "step": 7758 }, { "epoch": 0.36, "grad_norm": 0.6550641829618967, "learning_rate": 4.704157470731116e-06, "loss": 0.2989, "step": 7759 }, { "epoch": 0.36, "grad_norm": 0.6492266589482649, "learning_rate": 4.704067972348761e-06, "loss": 0.3117, "step": 7760 }, { "epoch": 0.36, "grad_norm": 0.5646136214683609, "learning_rate": 4.703978461282546e-06, "loss": 0.2716, "step": 7761 }, { "epoch": 0.36, "grad_norm": 0.5622641506902338, "learning_rate": 4.70388893753299e-06, "loss": 0.3037, "step": 7762 }, { "epoch": 0.36, "grad_norm": 0.648517749337197, "learning_rate": 4.703799401100608e-06, "loss": 0.2751, "step": 7763 }, { "epoch": 0.36, "grad_norm": 0.6268121855485749, "learning_rate": 4.703709851985914e-06, "loss": 0.298, "step": 7764 }, { "epoch": 0.36, "grad_norm": 0.6321276126942336, "learning_rate": 4.703620290189423e-06, "loss": 0.2782, "step": 7765 }, { "epoch": 0.36, "grad_norm": 0.6482348397238785, "learning_rate": 4.7035307157116505e-06, "loss": 0.3032, "step": 7766 }, { "epoch": 0.36, "grad_norm": 0.6458809570108384, "learning_rate": 4.703441128553113e-06, "loss": 0.3088, "step": 7767 }, { "epoch": 0.36, "grad_norm": 0.6281751295064122, "learning_rate": 4.703351528714327e-06, "loss": 0.288, "step": 7768 }, { "epoch": 0.36, "grad_norm": 0.619410653952691, "learning_rate": 4.703261916195805e-06, "loss": 0.2893, "step": 7769 }, { "epoch": 0.36, "grad_norm": 0.6359723446561245, "learning_rate": 4.703172290998066e-06, "loss": 0.3102, "step": 7770 }, { "epoch": 0.36, "grad_norm": 0.6150625417693381, "learning_rate": 4.703082653121623e-06, "loss": 0.2925, "step": 7771 }, { "epoch": 0.36, "grad_norm": 0.6011282775158721, "learning_rate": 4.702993002566993e-06, "loss": 0.2917, "step": 7772 }, { "epoch": 0.36, "grad_norm": 0.609009448439883, "learning_rate": 4.702903339334693e-06, "loss": 0.2812, "step": 7773 }, { "epoch": 0.36, "grad_norm": 0.5977876786420239, "learning_rate": 4.702813663425238e-06, "loss": 0.2929, "step": 7774 }, { "epoch": 0.36, "grad_norm": 0.5453600622996616, "learning_rate": 4.702723974839143e-06, "loss": 0.2731, "step": 7775 }, { "epoch": 0.36, "grad_norm": 0.6011333035435652, "learning_rate": 4.702634273576925e-06, "loss": 0.2743, "step": 7776 }, { "epoch": 0.36, "grad_norm": 0.669601306524421, "learning_rate": 4.7025445596391014e-06, "loss": 0.2969, "step": 7777 }, { "epoch": 0.36, "grad_norm": 0.7442781668459115, "learning_rate": 4.702454833026186e-06, "loss": 0.2731, "step": 7778 }, { "epoch": 0.36, "grad_norm": 0.6524023057154816, "learning_rate": 4.702365093738699e-06, "loss": 0.2947, "step": 7779 }, { "epoch": 0.36, "grad_norm": 0.6263024768008335, "learning_rate": 4.702275341777153e-06, "loss": 0.3062, "step": 7780 }, { "epoch": 0.36, "grad_norm": 0.6258235656249616, "learning_rate": 4.702185577142065e-06, "loss": 0.2973, "step": 7781 }, { "epoch": 0.36, "grad_norm": 0.647550196717022, "learning_rate": 4.702095799833954e-06, "loss": 0.2648, "step": 7782 }, { "epoch": 0.36, "grad_norm": 0.6049143794875999, "learning_rate": 4.702006009853335e-06, "loss": 0.286, "step": 7783 }, { "epoch": 0.36, "grad_norm": 0.5815601318743924, "learning_rate": 4.701916207200724e-06, "loss": 0.2822, "step": 7784 }, { "epoch": 0.36, "grad_norm": 0.632674338831161, "learning_rate": 4.7018263918766394e-06, "loss": 0.3143, "step": 7785 }, { "epoch": 0.36, "grad_norm": 0.6124112815378865, "learning_rate": 4.701736563881597e-06, "loss": 0.2998, "step": 7786 }, { "epoch": 0.36, "grad_norm": 0.6010986786680103, "learning_rate": 4.701646723216114e-06, "loss": 0.2889, "step": 7787 }, { "epoch": 0.36, "grad_norm": 0.594019071905626, "learning_rate": 4.701556869880708e-06, "loss": 0.2826, "step": 7788 }, { "epoch": 0.36, "grad_norm": 0.6015369284988177, "learning_rate": 4.701467003875894e-06, "loss": 0.2877, "step": 7789 }, { "epoch": 0.36, "grad_norm": 0.5840689679920329, "learning_rate": 4.701377125202192e-06, "loss": 0.2875, "step": 7790 }, { "epoch": 0.36, "grad_norm": 0.6308108563187159, "learning_rate": 4.701287233860118e-06, "loss": 0.2953, "step": 7791 }, { "epoch": 0.37, "grad_norm": 0.6273721691738067, "learning_rate": 4.701197329850189e-06, "loss": 0.2918, "step": 7792 }, { "epoch": 0.37, "grad_norm": 0.6401380429237857, "learning_rate": 4.701107413172923e-06, "loss": 0.3004, "step": 7793 }, { "epoch": 0.37, "grad_norm": 0.5860859314690561, "learning_rate": 4.7010174838288365e-06, "loss": 0.2937, "step": 7794 }, { "epoch": 0.37, "grad_norm": 0.6608888331142511, "learning_rate": 4.700927541818448e-06, "loss": 0.2963, "step": 7795 }, { "epoch": 0.37, "grad_norm": 0.6286239924951222, "learning_rate": 4.7008375871422745e-06, "loss": 0.2819, "step": 7796 }, { "epoch": 0.37, "grad_norm": 0.6270010129386842, "learning_rate": 4.700747619800834e-06, "loss": 0.2999, "step": 7797 }, { "epoch": 0.37, "grad_norm": 0.6363846647537886, "learning_rate": 4.700657639794644e-06, "loss": 0.3003, "step": 7798 }, { "epoch": 0.37, "grad_norm": 0.573856885534457, "learning_rate": 4.700567647124222e-06, "loss": 0.2711, "step": 7799 }, { "epoch": 0.37, "grad_norm": 0.6116044597397478, "learning_rate": 4.700477641790087e-06, "loss": 0.3041, "step": 7800 }, { "epoch": 0.37, "grad_norm": 0.615355112765537, "learning_rate": 4.7003876237927555e-06, "loss": 0.294, "step": 7801 }, { "epoch": 0.37, "grad_norm": 0.7004832878958295, "learning_rate": 4.700297593132747e-06, "loss": 0.3123, "step": 7802 }, { "epoch": 0.37, "grad_norm": 0.5751271359628699, "learning_rate": 4.700207549810578e-06, "loss": 0.2801, "step": 7803 }, { "epoch": 0.37, "grad_norm": 0.6398836996220582, "learning_rate": 4.700117493826768e-06, "loss": 0.2839, "step": 7804 }, { "epoch": 0.37, "grad_norm": 0.6128082125465235, "learning_rate": 4.700027425181835e-06, "loss": 0.2729, "step": 7805 }, { "epoch": 0.37, "grad_norm": 0.5859305612496025, "learning_rate": 4.699937343876297e-06, "loss": 0.2852, "step": 7806 }, { "epoch": 0.37, "grad_norm": 0.6109377769560826, "learning_rate": 4.699847249910672e-06, "loss": 0.3155, "step": 7807 }, { "epoch": 0.37, "grad_norm": 0.6208716571623698, "learning_rate": 4.69975714328548e-06, "loss": 0.2796, "step": 7808 }, { "epoch": 0.37, "grad_norm": 0.6452845239425294, "learning_rate": 4.699667024001237e-06, "loss": 0.3152, "step": 7809 }, { "epoch": 0.37, "grad_norm": 0.6849235602401481, "learning_rate": 4.699576892058465e-06, "loss": 0.2848, "step": 7810 }, { "epoch": 0.37, "grad_norm": 0.6840591699565356, "learning_rate": 4.69948674745768e-06, "loss": 0.3076, "step": 7811 }, { "epoch": 0.37, "grad_norm": 0.604180109905256, "learning_rate": 4.699396590199402e-06, "loss": 0.2841, "step": 7812 }, { "epoch": 0.37, "grad_norm": 0.6538269350988732, "learning_rate": 4.699306420284149e-06, "loss": 0.2926, "step": 7813 }, { "epoch": 0.37, "grad_norm": 0.6056035917099287, "learning_rate": 4.69921623771244e-06, "loss": 0.313, "step": 7814 }, { "epoch": 0.37, "grad_norm": 0.6069961101513964, "learning_rate": 4.699126042484794e-06, "loss": 0.2848, "step": 7815 }, { "epoch": 0.37, "grad_norm": 0.5927312139316416, "learning_rate": 4.699035834601732e-06, "loss": 0.2927, "step": 7816 }, { "epoch": 0.37, "grad_norm": 0.605858841035002, "learning_rate": 4.698945614063769e-06, "loss": 0.2969, "step": 7817 }, { "epoch": 0.37, "grad_norm": 0.6251246225679867, "learning_rate": 4.698855380871429e-06, "loss": 0.3043, "step": 7818 }, { "epoch": 0.37, "grad_norm": 0.5786180199329934, "learning_rate": 4.698765135025228e-06, "loss": 0.2662, "step": 7819 }, { "epoch": 0.37, "grad_norm": 0.6288919933935104, "learning_rate": 4.698674876525686e-06, "loss": 0.291, "step": 7820 }, { "epoch": 0.37, "grad_norm": 0.6602428804209477, "learning_rate": 4.698584605373323e-06, "loss": 0.311, "step": 7821 }, { "epoch": 0.37, "grad_norm": 0.6176657793446733, "learning_rate": 4.698494321568658e-06, "loss": 0.3119, "step": 7822 }, { "epoch": 0.37, "grad_norm": 0.6386774253716098, "learning_rate": 4.698404025112212e-06, "loss": 0.303, "step": 7823 }, { "epoch": 0.37, "grad_norm": 0.5909407270522986, "learning_rate": 4.698313716004503e-06, "loss": 0.2944, "step": 7824 }, { "epoch": 0.37, "grad_norm": 0.6671346489315099, "learning_rate": 4.69822339424605e-06, "loss": 0.2838, "step": 7825 }, { "epoch": 0.37, "grad_norm": 0.6316159026017192, "learning_rate": 4.698133059837374e-06, "loss": 0.2911, "step": 7826 }, { "epoch": 0.37, "grad_norm": 0.634940163000047, "learning_rate": 4.698042712778995e-06, "loss": 0.297, "step": 7827 }, { "epoch": 0.37, "grad_norm": 0.5511790679351893, "learning_rate": 4.697952353071432e-06, "loss": 0.2613, "step": 7828 }, { "epoch": 0.37, "grad_norm": 0.6108276069447744, "learning_rate": 4.697861980715207e-06, "loss": 0.3032, "step": 7829 }, { "epoch": 0.37, "grad_norm": 0.5935954171021093, "learning_rate": 4.697771595710837e-06, "loss": 0.2774, "step": 7830 }, { "epoch": 0.37, "grad_norm": 0.625904762435695, "learning_rate": 4.697681198058846e-06, "loss": 0.2886, "step": 7831 }, { "epoch": 0.37, "grad_norm": 0.6584199564940985, "learning_rate": 4.697590787759751e-06, "loss": 0.3272, "step": 7832 }, { "epoch": 0.37, "grad_norm": 0.7156994537924352, "learning_rate": 4.697500364814073e-06, "loss": 0.3059, "step": 7833 }, { "epoch": 0.37, "grad_norm": 0.5946332422736031, "learning_rate": 4.697409929222333e-06, "loss": 0.303, "step": 7834 }, { "epoch": 0.37, "grad_norm": 0.6070995371831206, "learning_rate": 4.6973194809850505e-06, "loss": 0.2891, "step": 7835 }, { "epoch": 0.37, "grad_norm": 0.6041899180838869, "learning_rate": 4.697229020102748e-06, "loss": 0.2772, "step": 7836 }, { "epoch": 0.37, "grad_norm": 0.6430585549778006, "learning_rate": 4.6971385465759445e-06, "loss": 0.2926, "step": 7837 }, { "epoch": 0.37, "grad_norm": 0.5729353137300652, "learning_rate": 4.697048060405161e-06, "loss": 0.2839, "step": 7838 }, { "epoch": 0.37, "grad_norm": 0.5665440188474059, "learning_rate": 4.696957561590917e-06, "loss": 0.2865, "step": 7839 }, { "epoch": 0.37, "grad_norm": 0.6260632210324839, "learning_rate": 4.696867050133735e-06, "loss": 0.2989, "step": 7840 }, { "epoch": 0.37, "grad_norm": 0.6486620310600872, "learning_rate": 4.696776526034135e-06, "loss": 0.3106, "step": 7841 }, { "epoch": 0.37, "grad_norm": 0.6124733737836113, "learning_rate": 4.696685989292639e-06, "loss": 0.2895, "step": 7842 }, { "epoch": 0.37, "grad_norm": 0.6303911704298711, "learning_rate": 4.696595439909767e-06, "loss": 0.2969, "step": 7843 }, { "epoch": 0.37, "grad_norm": 0.63630855738645, "learning_rate": 4.69650487788604e-06, "loss": 0.2942, "step": 7844 }, { "epoch": 0.37, "grad_norm": 0.597990290629458, "learning_rate": 4.69641430322198e-06, "loss": 0.308, "step": 7845 }, { "epoch": 0.37, "grad_norm": 0.6029450981527762, "learning_rate": 4.696323715918107e-06, "loss": 0.2964, "step": 7846 }, { "epoch": 0.37, "grad_norm": 0.5838569997757805, "learning_rate": 4.696233115974943e-06, "loss": 0.2772, "step": 7847 }, { "epoch": 0.37, "grad_norm": 0.6166855554473893, "learning_rate": 4.69614250339301e-06, "loss": 0.3058, "step": 7848 }, { "epoch": 0.37, "grad_norm": 0.6006553089002855, "learning_rate": 4.696051878172829e-06, "loss": 0.3074, "step": 7849 }, { "epoch": 0.37, "grad_norm": 0.6053268867965773, "learning_rate": 4.695961240314921e-06, "loss": 0.2972, "step": 7850 }, { "epoch": 0.37, "grad_norm": 0.5817281498761105, "learning_rate": 4.695870589819808e-06, "loss": 0.2733, "step": 7851 }, { "epoch": 0.37, "grad_norm": 0.6059821443606532, "learning_rate": 4.695779926688012e-06, "loss": 0.3053, "step": 7852 }, { "epoch": 0.37, "grad_norm": 0.6314729864315841, "learning_rate": 4.695689250920054e-06, "loss": 0.292, "step": 7853 }, { "epoch": 0.37, "grad_norm": 0.5913746801140407, "learning_rate": 4.695598562516457e-06, "loss": 0.298, "step": 7854 }, { "epoch": 0.37, "grad_norm": 0.6985200779256864, "learning_rate": 4.695507861477741e-06, "loss": 0.3059, "step": 7855 }, { "epoch": 0.37, "grad_norm": 0.5740995777787801, "learning_rate": 4.695417147804429e-06, "loss": 0.2913, "step": 7856 }, { "epoch": 0.37, "grad_norm": 0.5794710840209394, "learning_rate": 4.695326421497044e-06, "loss": 0.287, "step": 7857 }, { "epoch": 0.37, "grad_norm": 0.6257917764815948, "learning_rate": 4.695235682556108e-06, "loss": 0.303, "step": 7858 }, { "epoch": 0.37, "grad_norm": 0.6492307099245958, "learning_rate": 4.695144930982141e-06, "loss": 0.3106, "step": 7859 }, { "epoch": 0.37, "grad_norm": 0.6791783416004068, "learning_rate": 4.695054166775666e-06, "loss": 0.3071, "step": 7860 }, { "epoch": 0.37, "grad_norm": 0.5857193987293203, "learning_rate": 4.694963389937208e-06, "loss": 0.3091, "step": 7861 }, { "epoch": 0.37, "grad_norm": 0.6081483002774918, "learning_rate": 4.694872600467286e-06, "loss": 0.2905, "step": 7862 }, { "epoch": 0.37, "grad_norm": 0.5734491832189816, "learning_rate": 4.6947817983664245e-06, "loss": 0.2627, "step": 7863 }, { "epoch": 0.37, "grad_norm": 0.6437225760975692, "learning_rate": 4.694690983635145e-06, "loss": 0.3086, "step": 7864 }, { "epoch": 0.37, "grad_norm": 0.661101810216683, "learning_rate": 4.69460015627397e-06, "loss": 0.2826, "step": 7865 }, { "epoch": 0.37, "grad_norm": 0.6944968622468727, "learning_rate": 4.694509316283423e-06, "loss": 0.2752, "step": 7866 }, { "epoch": 0.37, "grad_norm": 0.6363918001402822, "learning_rate": 4.694418463664027e-06, "loss": 0.308, "step": 7867 }, { "epoch": 0.37, "grad_norm": 0.5719947298191866, "learning_rate": 4.694327598416304e-06, "loss": 0.2961, "step": 7868 }, { "epoch": 0.37, "grad_norm": 0.6231911986842524, "learning_rate": 4.694236720540777e-06, "loss": 0.3023, "step": 7869 }, { "epoch": 0.37, "grad_norm": 0.6749938793618656, "learning_rate": 4.694145830037969e-06, "loss": 0.3072, "step": 7870 }, { "epoch": 0.37, "grad_norm": 0.6799043999783516, "learning_rate": 4.6940549269084046e-06, "loss": 0.3062, "step": 7871 }, { "epoch": 0.37, "grad_norm": 0.5935561317097681, "learning_rate": 4.693964011152604e-06, "loss": 0.2854, "step": 7872 }, { "epoch": 0.37, "grad_norm": 0.6379504361708588, "learning_rate": 4.6938730827710935e-06, "loss": 0.2979, "step": 7873 }, { "epoch": 0.37, "grad_norm": 0.615424207751934, "learning_rate": 4.693782141764393e-06, "loss": 0.2945, "step": 7874 }, { "epoch": 0.37, "grad_norm": 0.6077488279839168, "learning_rate": 4.6936911881330285e-06, "loss": 0.292, "step": 7875 }, { "epoch": 0.37, "grad_norm": 0.6916244680321015, "learning_rate": 4.693600221877523e-06, "loss": 0.3066, "step": 7876 }, { "epoch": 0.37, "grad_norm": 0.6622515668257103, "learning_rate": 4.693509242998399e-06, "loss": 0.3047, "step": 7877 }, { "epoch": 0.37, "grad_norm": 0.6467474194811859, "learning_rate": 4.693418251496181e-06, "loss": 0.3059, "step": 7878 }, { "epoch": 0.37, "grad_norm": 0.6279547879889716, "learning_rate": 4.693327247371392e-06, "loss": 0.2855, "step": 7879 }, { "epoch": 0.37, "grad_norm": 0.6005590308169272, "learning_rate": 4.693236230624556e-06, "loss": 0.2878, "step": 7880 }, { "epoch": 0.37, "grad_norm": 0.6787204892930947, "learning_rate": 4.693145201256196e-06, "loss": 0.3053, "step": 7881 }, { "epoch": 0.37, "grad_norm": 0.6203571776207674, "learning_rate": 4.693054159266838e-06, "loss": 0.3054, "step": 7882 }, { "epoch": 0.37, "grad_norm": 0.6355079344612886, "learning_rate": 4.6929631046570034e-06, "loss": 0.3119, "step": 7883 }, { "epoch": 0.37, "grad_norm": 0.6132265062304851, "learning_rate": 4.692872037427218e-06, "loss": 0.3027, "step": 7884 }, { "epoch": 0.37, "grad_norm": 0.5585077623064606, "learning_rate": 4.692780957578005e-06, "loss": 0.2899, "step": 7885 }, { "epoch": 0.37, "grad_norm": 0.5972324722435262, "learning_rate": 4.692689865109888e-06, "loss": 0.2892, "step": 7886 }, { "epoch": 0.37, "grad_norm": 0.6332322419425506, "learning_rate": 4.692598760023393e-06, "loss": 0.3048, "step": 7887 }, { "epoch": 0.37, "grad_norm": 0.6524495041847572, "learning_rate": 4.692507642319043e-06, "loss": 0.3017, "step": 7888 }, { "epoch": 0.37, "grad_norm": 0.6321603272156315, "learning_rate": 4.692416511997362e-06, "loss": 0.2829, "step": 7889 }, { "epoch": 0.37, "grad_norm": 0.5717118604895699, "learning_rate": 4.6923253690588755e-06, "loss": 0.3066, "step": 7890 }, { "epoch": 0.37, "grad_norm": 0.643042059518752, "learning_rate": 4.6922342135041075e-06, "loss": 0.3001, "step": 7891 }, { "epoch": 0.37, "grad_norm": 0.5929885818661151, "learning_rate": 4.6921430453335824e-06, "loss": 0.282, "step": 7892 }, { "epoch": 0.37, "grad_norm": 0.6601233880363554, "learning_rate": 4.6920518645478256e-06, "loss": 0.3096, "step": 7893 }, { "epoch": 0.37, "grad_norm": 0.5635374309689458, "learning_rate": 4.691960671147361e-06, "loss": 0.2789, "step": 7894 }, { "epoch": 0.37, "grad_norm": 0.720953496166522, "learning_rate": 4.6918694651327136e-06, "loss": 0.3431, "step": 7895 }, { "epoch": 0.37, "grad_norm": 0.6012231979256338, "learning_rate": 4.691778246504408e-06, "loss": 0.2943, "step": 7896 }, { "epoch": 0.37, "grad_norm": 0.6512992295597774, "learning_rate": 4.691687015262969e-06, "loss": 0.2854, "step": 7897 }, { "epoch": 0.37, "grad_norm": 0.6781749090589807, "learning_rate": 4.691595771408923e-06, "loss": 0.2943, "step": 7898 }, { "epoch": 0.37, "grad_norm": 0.6078290950139383, "learning_rate": 4.691504514942794e-06, "loss": 0.2814, "step": 7899 }, { "epoch": 0.37, "grad_norm": 0.6216789655325204, "learning_rate": 4.691413245865107e-06, "loss": 0.3061, "step": 7900 }, { "epoch": 0.37, "grad_norm": 0.575291126903269, "learning_rate": 4.691321964176389e-06, "loss": 0.2908, "step": 7901 }, { "epoch": 0.37, "grad_norm": 0.5657274823652346, "learning_rate": 4.691230669877162e-06, "loss": 0.2743, "step": 7902 }, { "epoch": 0.37, "grad_norm": 0.6264810807634674, "learning_rate": 4.691139362967954e-06, "loss": 0.3063, "step": 7903 }, { "epoch": 0.37, "grad_norm": 0.651992212417819, "learning_rate": 4.69104804344929e-06, "loss": 0.2979, "step": 7904 }, { "epoch": 0.37, "grad_norm": 0.6542666465850167, "learning_rate": 4.6909567113216945e-06, "loss": 0.3051, "step": 7905 }, { "epoch": 0.37, "grad_norm": 0.6720424772643706, "learning_rate": 4.690865366585694e-06, "loss": 0.3176, "step": 7906 }, { "epoch": 0.37, "grad_norm": 0.6317638576649125, "learning_rate": 4.6907740092418145e-06, "loss": 0.2981, "step": 7907 }, { "epoch": 0.37, "grad_norm": 0.5753259441088354, "learning_rate": 4.690682639290581e-06, "loss": 0.3019, "step": 7908 }, { "epoch": 0.37, "grad_norm": 0.6218965504729793, "learning_rate": 4.690591256732519e-06, "loss": 0.2868, "step": 7909 }, { "epoch": 0.37, "grad_norm": 0.6892311425121, "learning_rate": 4.6904998615681554e-06, "loss": 0.2945, "step": 7910 }, { "epoch": 0.37, "grad_norm": 0.6081871100863288, "learning_rate": 4.690408453798015e-06, "loss": 0.3006, "step": 7911 }, { "epoch": 0.37, "grad_norm": 0.569800410464724, "learning_rate": 4.6903170334226255e-06, "loss": 0.2753, "step": 7912 }, { "epoch": 0.37, "grad_norm": 0.6205941766493258, "learning_rate": 4.690225600442512e-06, "loss": 0.2905, "step": 7913 }, { "epoch": 0.37, "grad_norm": 0.5845025297809048, "learning_rate": 4.6901341548582e-06, "loss": 0.2851, "step": 7914 }, { "epoch": 0.37, "grad_norm": 0.6036743013240284, "learning_rate": 4.690042696670216e-06, "loss": 0.2951, "step": 7915 }, { "epoch": 0.37, "grad_norm": 0.6583903545901154, "learning_rate": 4.689951225879088e-06, "loss": 0.2923, "step": 7916 }, { "epoch": 0.37, "grad_norm": 0.6429232917513239, "learning_rate": 4.68985974248534e-06, "loss": 0.285, "step": 7917 }, { "epoch": 0.37, "grad_norm": 0.5943804667019489, "learning_rate": 4.6897682464895e-06, "loss": 0.2757, "step": 7918 }, { "epoch": 0.37, "grad_norm": 0.6078507339610559, "learning_rate": 4.689676737892093e-06, "loss": 0.294, "step": 7919 }, { "epoch": 0.37, "grad_norm": 0.639831716676586, "learning_rate": 4.689585216693649e-06, "loss": 0.3052, "step": 7920 }, { "epoch": 0.37, "grad_norm": 0.6929878337051812, "learning_rate": 4.689493682894692e-06, "loss": 0.3029, "step": 7921 }, { "epoch": 0.37, "grad_norm": 0.6275262220402575, "learning_rate": 4.689402136495748e-06, "loss": 0.2993, "step": 7922 }, { "epoch": 0.37, "grad_norm": 0.6722310661006557, "learning_rate": 4.689310577497345e-06, "loss": 0.3024, "step": 7923 }, { "epoch": 0.37, "grad_norm": 0.5867828842936603, "learning_rate": 4.68921900590001e-06, "loss": 0.3053, "step": 7924 }, { "epoch": 0.37, "grad_norm": 0.5768903573481121, "learning_rate": 4.689127421704271e-06, "loss": 0.2853, "step": 7925 }, { "epoch": 0.37, "grad_norm": 0.5726539936665879, "learning_rate": 4.689035824910653e-06, "loss": 0.3072, "step": 7926 }, { "epoch": 0.37, "grad_norm": 0.5937197169212595, "learning_rate": 4.6889442155196845e-06, "loss": 0.3094, "step": 7927 }, { "epoch": 0.37, "grad_norm": 0.6516048567508587, "learning_rate": 4.6888525935318905e-06, "loss": 0.2887, "step": 7928 }, { "epoch": 0.37, "grad_norm": 0.6153479868204964, "learning_rate": 4.688760958947802e-06, "loss": 0.2801, "step": 7929 }, { "epoch": 0.37, "grad_norm": 0.6509777772266402, "learning_rate": 4.688669311767944e-06, "loss": 0.3186, "step": 7930 }, { "epoch": 0.37, "grad_norm": 0.6312133741418839, "learning_rate": 4.688577651992843e-06, "loss": 0.2976, "step": 7931 }, { "epoch": 0.37, "grad_norm": 0.5687321915545803, "learning_rate": 4.6884859796230285e-06, "loss": 0.2908, "step": 7932 }, { "epoch": 0.37, "grad_norm": 0.590137485509356, "learning_rate": 4.688394294659028e-06, "loss": 0.2783, "step": 7933 }, { "epoch": 0.37, "grad_norm": 0.5948476502978023, "learning_rate": 4.688302597101367e-06, "loss": 0.2832, "step": 7934 }, { "epoch": 0.37, "grad_norm": 0.6627626803183245, "learning_rate": 4.688210886950575e-06, "loss": 0.2894, "step": 7935 }, { "epoch": 0.37, "grad_norm": 0.6095217417131086, "learning_rate": 4.68811916420718e-06, "loss": 0.2817, "step": 7936 }, { "epoch": 0.37, "grad_norm": 0.6284214578704649, "learning_rate": 4.6880274288717085e-06, "loss": 0.2733, "step": 7937 }, { "epoch": 0.37, "grad_norm": 0.5796613242321557, "learning_rate": 4.687935680944689e-06, "loss": 0.2825, "step": 7938 }, { "epoch": 0.37, "grad_norm": 0.5660070870881792, "learning_rate": 4.68784392042665e-06, "loss": 0.2863, "step": 7939 }, { "epoch": 0.37, "grad_norm": 0.6237347280302903, "learning_rate": 4.687752147318119e-06, "loss": 0.2936, "step": 7940 }, { "epoch": 0.37, "grad_norm": 0.6682777829035075, "learning_rate": 4.687660361619624e-06, "loss": 0.3131, "step": 7941 }, { "epoch": 0.37, "grad_norm": 0.5869609147511474, "learning_rate": 4.687568563331693e-06, "loss": 0.2701, "step": 7942 }, { "epoch": 0.37, "grad_norm": 0.5746774788783204, "learning_rate": 4.687476752454856e-06, "loss": 0.2947, "step": 7943 }, { "epoch": 0.37, "grad_norm": 0.624179286103026, "learning_rate": 4.687384928989639e-06, "loss": 0.3031, "step": 7944 }, { "epoch": 0.37, "grad_norm": 0.6208554428012625, "learning_rate": 4.687293092936573e-06, "loss": 0.2779, "step": 7945 }, { "epoch": 0.37, "grad_norm": 0.6833052021084399, "learning_rate": 4.687201244296183e-06, "loss": 0.2892, "step": 7946 }, { "epoch": 0.37, "grad_norm": 0.6212049422120416, "learning_rate": 4.687109383069001e-06, "loss": 0.2948, "step": 7947 }, { "epoch": 0.37, "grad_norm": 0.688273355863363, "learning_rate": 4.687017509255553e-06, "loss": 0.3147, "step": 7948 }, { "epoch": 0.37, "grad_norm": 0.6281846683618091, "learning_rate": 4.686925622856371e-06, "loss": 0.2984, "step": 7949 }, { "epoch": 0.37, "grad_norm": 0.572884923833013, "learning_rate": 4.68683372387198e-06, "loss": 0.2898, "step": 7950 }, { "epoch": 0.37, "grad_norm": 0.6157266589364276, "learning_rate": 4.686741812302911e-06, "loss": 0.2787, "step": 7951 }, { "epoch": 0.37, "grad_norm": 0.6216130056563692, "learning_rate": 4.686649888149693e-06, "loss": 0.294, "step": 7952 }, { "epoch": 0.37, "grad_norm": 0.6634517729338609, "learning_rate": 4.686557951412854e-06, "loss": 0.3055, "step": 7953 }, { "epoch": 0.37, "grad_norm": 0.6136170654224669, "learning_rate": 4.686466002092923e-06, "loss": 0.3045, "step": 7954 }, { "epoch": 0.37, "grad_norm": 0.6474016669937301, "learning_rate": 4.68637404019043e-06, "loss": 0.2953, "step": 7955 }, { "epoch": 0.37, "grad_norm": 0.6203977841865035, "learning_rate": 4.6862820657059045e-06, "loss": 0.2821, "step": 7956 }, { "epoch": 0.37, "grad_norm": 0.613431251292297, "learning_rate": 4.686190078639875e-06, "loss": 0.3069, "step": 7957 }, { "epoch": 0.37, "grad_norm": 0.6855564818614842, "learning_rate": 4.686098078992871e-06, "loss": 0.3112, "step": 7958 }, { "epoch": 0.37, "grad_norm": 0.5764435360893988, "learning_rate": 4.686006066765422e-06, "loss": 0.2811, "step": 7959 }, { "epoch": 0.37, "grad_norm": 0.5870935158006586, "learning_rate": 4.685914041958058e-06, "loss": 0.2884, "step": 7960 }, { "epoch": 0.37, "grad_norm": 0.6516230700092052, "learning_rate": 4.685822004571307e-06, "loss": 0.3032, "step": 7961 }, { "epoch": 0.37, "grad_norm": 0.6155887088613136, "learning_rate": 4.6857299546057e-06, "loss": 0.3029, "step": 7962 }, { "epoch": 0.37, "grad_norm": 0.6609782138489936, "learning_rate": 4.685637892061767e-06, "loss": 0.3112, "step": 7963 }, { "epoch": 0.37, "grad_norm": 0.6408093657878937, "learning_rate": 4.685545816940037e-06, "loss": 0.2915, "step": 7964 }, { "epoch": 0.37, "grad_norm": 0.5970190583005369, "learning_rate": 4.68545372924104e-06, "loss": 0.2981, "step": 7965 }, { "epoch": 0.37, "grad_norm": 0.6179500883715455, "learning_rate": 4.685361628965306e-06, "loss": 0.2941, "step": 7966 }, { "epoch": 0.37, "grad_norm": 0.6257809854280456, "learning_rate": 4.685269516113366e-06, "loss": 0.3018, "step": 7967 }, { "epoch": 0.37, "grad_norm": 0.6846005046072022, "learning_rate": 4.6851773906857485e-06, "loss": 0.3166, "step": 7968 }, { "epoch": 0.37, "grad_norm": 0.6346152834380082, "learning_rate": 4.685085252682984e-06, "loss": 0.2946, "step": 7969 }, { "epoch": 0.37, "grad_norm": 0.6739294623529666, "learning_rate": 4.684993102105604e-06, "loss": 0.3178, "step": 7970 }, { "epoch": 0.37, "grad_norm": 0.5861388738384115, "learning_rate": 4.6849009389541365e-06, "loss": 0.2933, "step": 7971 }, { "epoch": 0.37, "grad_norm": 0.6189933249314236, "learning_rate": 4.684808763229115e-06, "loss": 0.2947, "step": 7972 }, { "epoch": 0.37, "grad_norm": 0.6141738187460444, "learning_rate": 4.6847165749310675e-06, "loss": 0.3135, "step": 7973 }, { "epoch": 0.37, "grad_norm": 0.5966132873018127, "learning_rate": 4.6846243740605244e-06, "loss": 0.2903, "step": 7974 }, { "epoch": 0.37, "grad_norm": 0.621243605511704, "learning_rate": 4.684532160618018e-06, "loss": 0.2734, "step": 7975 }, { "epoch": 0.37, "grad_norm": 0.6049013166512276, "learning_rate": 4.6844399346040774e-06, "loss": 0.2888, "step": 7976 }, { "epoch": 0.37, "grad_norm": 0.613502719068337, "learning_rate": 4.684347696019235e-06, "loss": 0.2988, "step": 7977 }, { "epoch": 0.37, "grad_norm": 0.5865265883372957, "learning_rate": 4.68425544486402e-06, "loss": 0.2777, "step": 7978 }, { "epoch": 0.37, "grad_norm": 0.628894371719093, "learning_rate": 4.6841631811389635e-06, "loss": 0.2881, "step": 7979 }, { "epoch": 0.37, "grad_norm": 0.6801390125509238, "learning_rate": 4.684070904844598e-06, "loss": 0.2897, "step": 7980 }, { "epoch": 0.37, "grad_norm": 0.5820315046100675, "learning_rate": 4.683978615981452e-06, "loss": 0.2833, "step": 7981 }, { "epoch": 0.37, "grad_norm": 0.5816991144016991, "learning_rate": 4.683886314550059e-06, "loss": 0.2844, "step": 7982 }, { "epoch": 0.37, "grad_norm": 0.6112152039533928, "learning_rate": 4.6837940005509485e-06, "loss": 0.2936, "step": 7983 }, { "epoch": 0.37, "grad_norm": 0.5939161167786976, "learning_rate": 4.683701673984653e-06, "loss": 0.2782, "step": 7984 }, { "epoch": 0.37, "grad_norm": 0.6568649371891433, "learning_rate": 4.683609334851703e-06, "loss": 0.2858, "step": 7985 }, { "epoch": 0.37, "grad_norm": 0.5806508974629652, "learning_rate": 4.68351698315263e-06, "loss": 0.26, "step": 7986 }, { "epoch": 0.37, "grad_norm": 0.590169205038156, "learning_rate": 4.683424618887966e-06, "loss": 0.3033, "step": 7987 }, { "epoch": 0.37, "grad_norm": 0.60682592113871, "learning_rate": 4.6833322420582415e-06, "loss": 0.2978, "step": 7988 }, { "epoch": 0.37, "grad_norm": 0.666443324233266, "learning_rate": 4.683239852663989e-06, "loss": 0.2918, "step": 7989 }, { "epoch": 0.37, "grad_norm": 0.6473825002143476, "learning_rate": 4.6831474507057395e-06, "loss": 0.2711, "step": 7990 }, { "epoch": 0.37, "grad_norm": 0.6325327374541804, "learning_rate": 4.683055036184026e-06, "loss": 0.2944, "step": 7991 }, { "epoch": 0.37, "grad_norm": 0.6243652240082793, "learning_rate": 4.682962609099378e-06, "loss": 0.3012, "step": 7992 }, { "epoch": 0.37, "grad_norm": 0.5747657854810039, "learning_rate": 4.68287016945233e-06, "loss": 0.2874, "step": 7993 }, { "epoch": 0.37, "grad_norm": 0.5933691217974291, "learning_rate": 4.682777717243413e-06, "loss": 0.288, "step": 7994 }, { "epoch": 0.37, "grad_norm": 0.5937830235526678, "learning_rate": 4.682685252473158e-06, "loss": 0.2923, "step": 7995 }, { "epoch": 0.37, "grad_norm": 0.6791152174923264, "learning_rate": 4.682592775142099e-06, "loss": 0.3168, "step": 7996 }, { "epoch": 0.37, "grad_norm": 0.6414898302203019, "learning_rate": 4.682500285250766e-06, "loss": 0.2763, "step": 7997 }, { "epoch": 0.37, "grad_norm": 0.607928035352787, "learning_rate": 4.682407782799693e-06, "loss": 0.2761, "step": 7998 }, { "epoch": 0.37, "grad_norm": 0.5933606596376021, "learning_rate": 4.682315267789412e-06, "loss": 0.2864, "step": 7999 }, { "epoch": 0.37, "grad_norm": 0.6487509460848228, "learning_rate": 4.682222740220455e-06, "loss": 0.3304, "step": 8000 }, { "epoch": 0.37480676441654565, "grad_norm": 0.6151063431301349, "learning_rate": 4.682130200093355e-06, "loss": 0.2935, "step": 8001 }, { "epoch": 0.374853609406474, "grad_norm": 0.6436945007434538, "learning_rate": 4.6820376474086435e-06, "loss": 0.2905, "step": 8002 }, { "epoch": 0.3749004543964023, "grad_norm": 0.5886203579645443, "learning_rate": 4.6819450821668535e-06, "loss": 0.2807, "step": 8003 }, { "epoch": 0.37494729938633065, "grad_norm": 0.5862431720198265, "learning_rate": 4.681852504368518e-06, "loss": 0.2892, "step": 8004 }, { "epoch": 0.37499414437625894, "grad_norm": 0.5628448980471719, "learning_rate": 4.681759914014171e-06, "loss": 0.268, "step": 8005 }, { "epoch": 0.3750409893661873, "grad_norm": 0.5820621192016705, "learning_rate": 4.681667311104343e-06, "loss": 0.2951, "step": 8006 }, { "epoch": 0.3750878343561156, "grad_norm": 0.6499490525350226, "learning_rate": 4.681574695639568e-06, "loss": 0.3184, "step": 8007 }, { "epoch": 0.37513467934604394, "grad_norm": 0.5981756762917774, "learning_rate": 4.681482067620379e-06, "loss": 0.2881, "step": 8008 }, { "epoch": 0.3751815243359723, "grad_norm": 0.6715450635655325, "learning_rate": 4.681389427047309e-06, "loss": 0.3281, "step": 8009 }, { "epoch": 0.3752283693259006, "grad_norm": 0.6264050016555577, "learning_rate": 4.681296773920891e-06, "loss": 0.2957, "step": 8010 }, { "epoch": 0.37527521431582894, "grad_norm": 0.5962531619351673, "learning_rate": 4.681204108241658e-06, "loss": 0.2999, "step": 8011 }, { "epoch": 0.37532205930575724, "grad_norm": 0.6317879255777394, "learning_rate": 4.681111430010144e-06, "loss": 0.2767, "step": 8012 }, { "epoch": 0.3753689042956856, "grad_norm": 0.56376408122957, "learning_rate": 4.681018739226882e-06, "loss": 0.2808, "step": 8013 }, { "epoch": 0.3754157492856139, "grad_norm": 0.6297467017983789, "learning_rate": 4.680926035892406e-06, "loss": 0.2892, "step": 8014 }, { "epoch": 0.37546259427554224, "grad_norm": 0.6413861478759617, "learning_rate": 4.680833320007248e-06, "loss": 0.2964, "step": 8015 }, { "epoch": 0.37550943926547053, "grad_norm": 0.6022461625479768, "learning_rate": 4.680740591571943e-06, "loss": 0.3108, "step": 8016 }, { "epoch": 0.3755562842553989, "grad_norm": 0.5581277876094017, "learning_rate": 4.680647850587024e-06, "loss": 0.2889, "step": 8017 }, { "epoch": 0.37560312924532724, "grad_norm": 0.5996485950516065, "learning_rate": 4.680555097053023e-06, "loss": 0.2829, "step": 8018 }, { "epoch": 0.37564997423525553, "grad_norm": 0.6409235855527884, "learning_rate": 4.680462330970477e-06, "loss": 0.291, "step": 8019 }, { "epoch": 0.3756968192251839, "grad_norm": 0.6690411347180362, "learning_rate": 4.680369552339918e-06, "loss": 0.2989, "step": 8020 }, { "epoch": 0.3757436642151122, "grad_norm": 0.6357444610882675, "learning_rate": 4.6802767611618805e-06, "loss": 0.28, "step": 8021 }, { "epoch": 0.37579050920504053, "grad_norm": 0.6591185916845879, "learning_rate": 4.680183957436898e-06, "loss": 0.3009, "step": 8022 }, { "epoch": 0.37583735419496883, "grad_norm": 0.5940102148879115, "learning_rate": 4.6800911411655046e-06, "loss": 0.2884, "step": 8023 }, { "epoch": 0.3758841991848972, "grad_norm": 0.6059534662969357, "learning_rate": 4.6799983123482355e-06, "loss": 0.273, "step": 8024 }, { "epoch": 0.3759310441748255, "grad_norm": 0.6264521910666999, "learning_rate": 4.679905470985623e-06, "loss": 0.2826, "step": 8025 }, { "epoch": 0.37597788916475383, "grad_norm": 0.6441548243668929, "learning_rate": 4.679812617078202e-06, "loss": 0.3033, "step": 8026 }, { "epoch": 0.3760247341546822, "grad_norm": 0.6629318601974437, "learning_rate": 4.679719750626509e-06, "loss": 0.3031, "step": 8027 }, { "epoch": 0.3760715791446105, "grad_norm": 0.6417206463880644, "learning_rate": 4.679626871631076e-06, "loss": 0.2987, "step": 8028 }, { "epoch": 0.37611842413453883, "grad_norm": 0.6182209328469348, "learning_rate": 4.679533980092437e-06, "loss": 0.2944, "step": 8029 }, { "epoch": 0.3761652691244671, "grad_norm": 0.6693236624001603, "learning_rate": 4.679441076011128e-06, "loss": 0.3018, "step": 8030 }, { "epoch": 0.3762121141143955, "grad_norm": 0.6027858884995437, "learning_rate": 4.6793481593876846e-06, "loss": 0.277, "step": 8031 }, { "epoch": 0.3762589591043238, "grad_norm": 0.5953412346907294, "learning_rate": 4.67925523022264e-06, "loss": 0.2835, "step": 8032 }, { "epoch": 0.3763058040942521, "grad_norm": 0.6553277090857541, "learning_rate": 4.679162288516529e-06, "loss": 0.2648, "step": 8033 }, { "epoch": 0.3763526490841804, "grad_norm": 0.6361707983347428, "learning_rate": 4.6790693342698865e-06, "loss": 0.3216, "step": 8034 }, { "epoch": 0.3763994940741088, "grad_norm": 0.6307830755797301, "learning_rate": 4.678976367483249e-06, "loss": 0.3022, "step": 8035 }, { "epoch": 0.3764463390640371, "grad_norm": 0.6865414718459619, "learning_rate": 4.678883388157148e-06, "loss": 0.3178, "step": 8036 }, { "epoch": 0.3764931840539654, "grad_norm": 0.6107757341788135, "learning_rate": 4.6787903962921225e-06, "loss": 0.2956, "step": 8037 }, { "epoch": 0.3765400290438938, "grad_norm": 0.5702402055043431, "learning_rate": 4.6786973918887054e-06, "loss": 0.2895, "step": 8038 }, { "epoch": 0.37658687403382207, "grad_norm": 0.5418478858696318, "learning_rate": 4.678604374947432e-06, "loss": 0.2868, "step": 8039 }, { "epoch": 0.3766337190237504, "grad_norm": 0.6567027537965887, "learning_rate": 4.678511345468838e-06, "loss": 0.2936, "step": 8040 }, { "epoch": 0.3766805640136787, "grad_norm": 0.5833882254505282, "learning_rate": 4.678418303453459e-06, "loss": 0.2761, "step": 8041 }, { "epoch": 0.37672740900360707, "grad_norm": 0.6263517962488567, "learning_rate": 4.678325248901831e-06, "loss": 0.301, "step": 8042 }, { "epoch": 0.37677425399353537, "grad_norm": 0.6159259372724848, "learning_rate": 4.6782321818144875e-06, "loss": 0.2853, "step": 8043 }, { "epoch": 0.3768210989834637, "grad_norm": 0.5951914251547327, "learning_rate": 4.6781391021919655e-06, "loss": 0.2947, "step": 8044 }, { "epoch": 0.37686794397339207, "grad_norm": 0.6083901863148247, "learning_rate": 4.678046010034801e-06, "loss": 0.2995, "step": 8045 }, { "epoch": 0.37691478896332037, "grad_norm": 0.5998393077388384, "learning_rate": 4.677952905343528e-06, "loss": 0.285, "step": 8046 }, { "epoch": 0.3769616339532487, "grad_norm": 0.5785821500633166, "learning_rate": 4.6778597881186845e-06, "loss": 0.2721, "step": 8047 }, { "epoch": 0.377008478943177, "grad_norm": 0.6268475588925311, "learning_rate": 4.677766658360805e-06, "loss": 0.2996, "step": 8048 }, { "epoch": 0.37705532393310537, "grad_norm": 0.5753097133171355, "learning_rate": 4.677673516070426e-06, "loss": 0.3026, "step": 8049 }, { "epoch": 0.37710216892303366, "grad_norm": 0.5661139951163775, "learning_rate": 4.677580361248083e-06, "loss": 0.2788, "step": 8050 }, { "epoch": 0.377149013912962, "grad_norm": 0.618216284854778, "learning_rate": 4.677487193894312e-06, "loss": 0.2866, "step": 8051 }, { "epoch": 0.3771958589028903, "grad_norm": 0.6381819126458851, "learning_rate": 4.677394014009649e-06, "loss": 0.2854, "step": 8052 }, { "epoch": 0.37724270389281866, "grad_norm": 0.6322872711320495, "learning_rate": 4.677300821594633e-06, "loss": 0.3032, "step": 8053 }, { "epoch": 0.377289548882747, "grad_norm": 0.6120217620891641, "learning_rate": 4.677207616649796e-06, "loss": 0.3017, "step": 8054 }, { "epoch": 0.3773363938726753, "grad_norm": 0.5934745390590933, "learning_rate": 4.677114399175677e-06, "loss": 0.2882, "step": 8055 }, { "epoch": 0.37738323886260366, "grad_norm": 0.6246761373794376, "learning_rate": 4.677021169172811e-06, "loss": 0.2787, "step": 8056 }, { "epoch": 0.37743008385253196, "grad_norm": 0.5832254837480733, "learning_rate": 4.676927926641737e-06, "loss": 0.279, "step": 8057 }, { "epoch": 0.3774769288424603, "grad_norm": 0.6478737412041985, "learning_rate": 4.676834671582988e-06, "loss": 0.3062, "step": 8058 }, { "epoch": 0.3775237738323886, "grad_norm": 0.6159356277808763, "learning_rate": 4.6767414039971035e-06, "loss": 0.3007, "step": 8059 }, { "epoch": 0.37757061882231696, "grad_norm": 0.6279312440665882, "learning_rate": 4.6766481238846205e-06, "loss": 0.3165, "step": 8060 }, { "epoch": 0.37761746381224526, "grad_norm": 0.6148811397789046, "learning_rate": 4.676554831246073e-06, "loss": 0.2858, "step": 8061 }, { "epoch": 0.3776643088021736, "grad_norm": 0.5956499951723667, "learning_rate": 4.676461526082e-06, "loss": 0.3073, "step": 8062 }, { "epoch": 0.37771115379210196, "grad_norm": 0.5956808229293836, "learning_rate": 4.676368208392938e-06, "loss": 0.2829, "step": 8063 }, { "epoch": 0.37775799878203026, "grad_norm": 0.5851617640590717, "learning_rate": 4.676274878179425e-06, "loss": 0.2673, "step": 8064 }, { "epoch": 0.3778048437719586, "grad_norm": 0.6404800079542692, "learning_rate": 4.676181535441995e-06, "loss": 0.2947, "step": 8065 }, { "epoch": 0.3778516887618869, "grad_norm": 0.5749735280059926, "learning_rate": 4.676088180181189e-06, "loss": 0.2734, "step": 8066 }, { "epoch": 0.37789853375181526, "grad_norm": 0.5844815901477007, "learning_rate": 4.675994812397541e-06, "loss": 0.3035, "step": 8067 }, { "epoch": 0.37794537874174355, "grad_norm": 0.6667301339524921, "learning_rate": 4.675901432091591e-06, "loss": 0.2993, "step": 8068 }, { "epoch": 0.3779922237316719, "grad_norm": 0.6261356895244029, "learning_rate": 4.675808039263875e-06, "loss": 0.3227, "step": 8069 }, { "epoch": 0.3780390687216002, "grad_norm": 0.5553591560454437, "learning_rate": 4.675714633914929e-06, "loss": 0.2771, "step": 8070 }, { "epoch": 0.37808591371152855, "grad_norm": 0.5974443554605229, "learning_rate": 4.675621216045294e-06, "loss": 0.294, "step": 8071 }, { "epoch": 0.3781327587014569, "grad_norm": 0.6048721872354011, "learning_rate": 4.675527785655505e-06, "loss": 0.2929, "step": 8072 }, { "epoch": 0.3781796036913852, "grad_norm": 0.5758633898898832, "learning_rate": 4.675434342746101e-06, "loss": 0.267, "step": 8073 }, { "epoch": 0.37822644868131355, "grad_norm": 0.6052849632782316, "learning_rate": 4.6753408873176184e-06, "loss": 0.2955, "step": 8074 }, { "epoch": 0.37827329367124185, "grad_norm": 0.6146624289032809, "learning_rate": 4.6752474193705954e-06, "loss": 0.2986, "step": 8075 }, { "epoch": 0.3783201386611702, "grad_norm": 0.6325666315136316, "learning_rate": 4.6751539389055715e-06, "loss": 0.2856, "step": 8076 }, { "epoch": 0.3783669836510985, "grad_norm": 0.5693842246982558, "learning_rate": 4.675060445923082e-06, "loss": 0.2587, "step": 8077 }, { "epoch": 0.37841382864102685, "grad_norm": 0.6120321668235477, "learning_rate": 4.674966940423667e-06, "loss": 0.2862, "step": 8078 }, { "epoch": 0.37846067363095515, "grad_norm": 0.5996179882692736, "learning_rate": 4.6748734224078644e-06, "loss": 0.2955, "step": 8079 }, { "epoch": 0.3785075186208835, "grad_norm": 0.6326427101535815, "learning_rate": 4.674779891876211e-06, "loss": 0.303, "step": 8080 }, { "epoch": 0.37855436361081185, "grad_norm": 0.6192269453196256, "learning_rate": 4.6746863488292465e-06, "loss": 0.2991, "step": 8081 }, { "epoch": 0.37860120860074015, "grad_norm": 0.6003285051089137, "learning_rate": 4.674592793267509e-06, "loss": 0.292, "step": 8082 }, { "epoch": 0.3786480535906685, "grad_norm": 0.5482876027901626, "learning_rate": 4.674499225191535e-06, "loss": 0.2878, "step": 8083 }, { "epoch": 0.3786948985805968, "grad_norm": 0.6927493373538258, "learning_rate": 4.674405644601866e-06, "loss": 0.317, "step": 8084 }, { "epoch": 0.37874174357052515, "grad_norm": 0.6247674858351956, "learning_rate": 4.674312051499039e-06, "loss": 0.3157, "step": 8085 }, { "epoch": 0.37878858856045344, "grad_norm": 0.6216383389156286, "learning_rate": 4.674218445883592e-06, "loss": 0.2953, "step": 8086 }, { "epoch": 0.3788354335503818, "grad_norm": 0.6440832486773679, "learning_rate": 4.674124827756064e-06, "loss": 0.3117, "step": 8087 }, { "epoch": 0.3788822785403101, "grad_norm": 0.6142160703978009, "learning_rate": 4.674031197116995e-06, "loss": 0.2946, "step": 8088 }, { "epoch": 0.37892912353023844, "grad_norm": 0.6136556376936015, "learning_rate": 4.673937553966922e-06, "loss": 0.2883, "step": 8089 }, { "epoch": 0.3789759685201668, "grad_norm": 0.5798498119720688, "learning_rate": 4.673843898306385e-06, "loss": 0.2899, "step": 8090 }, { "epoch": 0.3790228135100951, "grad_norm": 0.626593609524154, "learning_rate": 4.6737502301359235e-06, "loss": 0.3018, "step": 8091 }, { "epoch": 0.37906965850002344, "grad_norm": 0.6835866591313815, "learning_rate": 4.673656549456075e-06, "loss": 0.2984, "step": 8092 }, { "epoch": 0.37911650348995174, "grad_norm": 0.6376714545107013, "learning_rate": 4.67356285626738e-06, "loss": 0.2807, "step": 8093 }, { "epoch": 0.3791633484798801, "grad_norm": 0.6701129004507743, "learning_rate": 4.673469150570376e-06, "loss": 0.2945, "step": 8094 }, { "epoch": 0.3792101934698084, "grad_norm": 0.6384600927709628, "learning_rate": 4.673375432365605e-06, "loss": 0.2909, "step": 8095 }, { "epoch": 0.37925703845973674, "grad_norm": 0.6022099703713238, "learning_rate": 4.673281701653604e-06, "loss": 0.2875, "step": 8096 }, { "epoch": 0.37930388344966504, "grad_norm": 0.5898228036986238, "learning_rate": 4.673187958434912e-06, "loss": 0.2852, "step": 8097 }, { "epoch": 0.3793507284395934, "grad_norm": 0.5843362040010518, "learning_rate": 4.67309420271007e-06, "loss": 0.2853, "step": 8098 }, { "epoch": 0.37939757342952174, "grad_norm": 0.6293888793769303, "learning_rate": 4.673000434479618e-06, "loss": 0.301, "step": 8099 }, { "epoch": 0.37944441841945004, "grad_norm": 0.6426458236611117, "learning_rate": 4.672906653744094e-06, "loss": 0.3068, "step": 8100 }, { "epoch": 0.3794912634093784, "grad_norm": 0.6777935683213647, "learning_rate": 4.672812860504038e-06, "loss": 0.3196, "step": 8101 }, { "epoch": 0.3795381083993067, "grad_norm": 0.6186776078693277, "learning_rate": 4.67271905475999e-06, "loss": 0.2996, "step": 8102 }, { "epoch": 0.37958495338923504, "grad_norm": 0.6087388867537217, "learning_rate": 4.67262523651249e-06, "loss": 0.3106, "step": 8103 }, { "epoch": 0.37963179837916333, "grad_norm": 0.5982271109788057, "learning_rate": 4.672531405762078e-06, "loss": 0.2931, "step": 8104 }, { "epoch": 0.3796786433690917, "grad_norm": 0.6479593169227886, "learning_rate": 4.672437562509295e-06, "loss": 0.3215, "step": 8105 }, { "epoch": 0.37972548835902, "grad_norm": 0.5918960883598913, "learning_rate": 4.6723437067546785e-06, "loss": 0.2831, "step": 8106 }, { "epoch": 0.37977233334894833, "grad_norm": 0.5916483614730957, "learning_rate": 4.67224983849877e-06, "loss": 0.2974, "step": 8107 }, { "epoch": 0.3798191783388767, "grad_norm": 0.6312527730160756, "learning_rate": 4.67215595774211e-06, "loss": 0.2945, "step": 8108 }, { "epoch": 0.379866023328805, "grad_norm": 0.6043771941114381, "learning_rate": 4.672062064485238e-06, "loss": 0.3032, "step": 8109 }, { "epoch": 0.37991286831873333, "grad_norm": 0.589515567562007, "learning_rate": 4.671968158728696e-06, "loss": 0.2984, "step": 8110 }, { "epoch": 0.37995971330866163, "grad_norm": 0.5917486989215637, "learning_rate": 4.671874240473022e-06, "loss": 0.2964, "step": 8111 }, { "epoch": 0.38000655829859, "grad_norm": 0.5981278641738449, "learning_rate": 4.671780309718758e-06, "loss": 0.2861, "step": 8112 }, { "epoch": 0.3800534032885183, "grad_norm": 0.5893178851960308, "learning_rate": 4.671686366466444e-06, "loss": 0.2933, "step": 8113 }, { "epoch": 0.38010024827844663, "grad_norm": 0.5684929942308025, "learning_rate": 4.67159241071662e-06, "loss": 0.2854, "step": 8114 }, { "epoch": 0.3801470932683749, "grad_norm": 0.655873216547238, "learning_rate": 4.671498442469828e-06, "loss": 0.3063, "step": 8115 }, { "epoch": 0.3801939382583033, "grad_norm": 0.624177722366291, "learning_rate": 4.671404461726609e-06, "loss": 0.2891, "step": 8116 }, { "epoch": 0.38024078324823163, "grad_norm": 0.6777886417317127, "learning_rate": 4.671310468487503e-06, "loss": 0.3019, "step": 8117 }, { "epoch": 0.3802876282381599, "grad_norm": 0.6086804296567769, "learning_rate": 4.6712164627530505e-06, "loss": 0.2945, "step": 8118 }, { "epoch": 0.3803344732280883, "grad_norm": 0.6000958039043615, "learning_rate": 4.671122444523793e-06, "loss": 0.2941, "step": 8119 }, { "epoch": 0.3803813182180166, "grad_norm": 0.6357709273610624, "learning_rate": 4.671028413800271e-06, "loss": 0.2983, "step": 8120 }, { "epoch": 0.3804281632079449, "grad_norm": 0.6262986562759141, "learning_rate": 4.670934370583028e-06, "loss": 0.2895, "step": 8121 }, { "epoch": 0.3804750081978732, "grad_norm": 0.5904963314162102, "learning_rate": 4.670840314872602e-06, "loss": 0.2923, "step": 8122 }, { "epoch": 0.3805218531878016, "grad_norm": 0.589226356719282, "learning_rate": 4.670746246669535e-06, "loss": 0.304, "step": 8123 }, { "epoch": 0.38056869817772987, "grad_norm": 0.6252311109148858, "learning_rate": 4.67065216597437e-06, "loss": 0.3033, "step": 8124 }, { "epoch": 0.3806155431676582, "grad_norm": 0.6013758989053954, "learning_rate": 4.670558072787647e-06, "loss": 0.3051, "step": 8125 }, { "epoch": 0.3806623881575866, "grad_norm": 0.5845058690223393, "learning_rate": 4.6704639671099075e-06, "loss": 0.3021, "step": 8126 }, { "epoch": 0.38070923314751487, "grad_norm": 0.6207870805920859, "learning_rate": 4.670369848941694e-06, "loss": 0.2821, "step": 8127 }, { "epoch": 0.3807560781374432, "grad_norm": 0.5905465585443967, "learning_rate": 4.670275718283547e-06, "loss": 0.2916, "step": 8128 }, { "epoch": 0.3808029231273715, "grad_norm": 0.6026409153231279, "learning_rate": 4.670181575136009e-06, "loss": 0.2958, "step": 8129 }, { "epoch": 0.38084976811729987, "grad_norm": 0.6346933588503281, "learning_rate": 4.670087419499621e-06, "loss": 0.3016, "step": 8130 }, { "epoch": 0.38089661310722817, "grad_norm": 0.581111603793017, "learning_rate": 4.669993251374927e-06, "loss": 0.2832, "step": 8131 }, { "epoch": 0.3809434580971565, "grad_norm": 0.6425726206616624, "learning_rate": 4.669899070762466e-06, "loss": 0.3105, "step": 8132 }, { "epoch": 0.3809903030870848, "grad_norm": 0.6464819655760733, "learning_rate": 4.669804877662782e-06, "loss": 0.3152, "step": 8133 }, { "epoch": 0.38103714807701317, "grad_norm": 0.5737831137196656, "learning_rate": 4.669710672076414e-06, "loss": 0.2902, "step": 8134 }, { "epoch": 0.3810839930669415, "grad_norm": 0.6154128567825004, "learning_rate": 4.669616454003908e-06, "loss": 0.3043, "step": 8135 }, { "epoch": 0.3811308380568698, "grad_norm": 0.5494989987056252, "learning_rate": 4.669522223445805e-06, "loss": 0.283, "step": 8136 }, { "epoch": 0.38117768304679817, "grad_norm": 0.6098234251002066, "learning_rate": 4.669427980402646e-06, "loss": 0.2943, "step": 8137 }, { "epoch": 0.38122452803672646, "grad_norm": 0.6257662942118829, "learning_rate": 4.669333724874975e-06, "loss": 0.2902, "step": 8138 }, { "epoch": 0.3812713730266548, "grad_norm": 0.6008463900376514, "learning_rate": 4.669239456863333e-06, "loss": 0.2922, "step": 8139 }, { "epoch": 0.3813182180165831, "grad_norm": 0.6863361411794141, "learning_rate": 4.669145176368265e-06, "loss": 0.3242, "step": 8140 }, { "epoch": 0.38136506300651146, "grad_norm": 0.5541730119515982, "learning_rate": 4.66905088339031e-06, "loss": 0.2746, "step": 8141 }, { "epoch": 0.38141190799643976, "grad_norm": 0.6258694673845171, "learning_rate": 4.6689565779300126e-06, "loss": 0.2914, "step": 8142 }, { "epoch": 0.3814587529863681, "grad_norm": 0.5957849293212659, "learning_rate": 4.668862259987916e-06, "loss": 0.29, "step": 8143 }, { "epoch": 0.38150559797629646, "grad_norm": 0.6943850509013298, "learning_rate": 4.66876792956456e-06, "loss": 0.2687, "step": 8144 }, { "epoch": 0.38155244296622476, "grad_norm": 0.6458641565662584, "learning_rate": 4.668673586660491e-06, "loss": 0.2976, "step": 8145 }, { "epoch": 0.3815992879561531, "grad_norm": 0.5678024724641344, "learning_rate": 4.6685792312762516e-06, "loss": 0.2895, "step": 8146 }, { "epoch": 0.3816461329460814, "grad_norm": 0.6393775260942035, "learning_rate": 4.668484863412382e-06, "loss": 0.3033, "step": 8147 }, { "epoch": 0.38169297793600976, "grad_norm": 0.5936451497843426, "learning_rate": 4.668390483069428e-06, "loss": 0.3082, "step": 8148 }, { "epoch": 0.38173982292593805, "grad_norm": 0.6286851765819296, "learning_rate": 4.668296090247932e-06, "loss": 0.3131, "step": 8149 }, { "epoch": 0.3817866679158664, "grad_norm": 0.5962376418501033, "learning_rate": 4.6682016849484366e-06, "loss": 0.2758, "step": 8150 }, { "epoch": 0.3818335129057947, "grad_norm": 0.5764117898647125, "learning_rate": 4.668107267171486e-06, "loss": 0.2788, "step": 8151 }, { "epoch": 0.38188035789572305, "grad_norm": 0.5785843515154998, "learning_rate": 4.6680128369176226e-06, "loss": 0.266, "step": 8152 }, { "epoch": 0.3819272028856514, "grad_norm": 0.5956529908948814, "learning_rate": 4.667918394187389e-06, "loss": 0.2786, "step": 8153 }, { "epoch": 0.3819740478755797, "grad_norm": 0.6126670609019486, "learning_rate": 4.6678239389813315e-06, "loss": 0.2864, "step": 8154 }, { "epoch": 0.38202089286550805, "grad_norm": 0.619970599406927, "learning_rate": 4.667729471299992e-06, "loss": 0.3046, "step": 8155 }, { "epoch": 0.38206773785543635, "grad_norm": 0.609767127949157, "learning_rate": 4.667634991143914e-06, "loss": 0.2986, "step": 8156 }, { "epoch": 0.3821145828453647, "grad_norm": 0.5960648223430725, "learning_rate": 4.6675404985136416e-06, "loss": 0.2906, "step": 8157 }, { "epoch": 0.382161427835293, "grad_norm": 0.5780156925006499, "learning_rate": 4.667445993409718e-06, "loss": 0.2922, "step": 8158 }, { "epoch": 0.38220827282522135, "grad_norm": 0.5951840209325112, "learning_rate": 4.667351475832687e-06, "loss": 0.2903, "step": 8159 }, { "epoch": 0.38225511781514965, "grad_norm": 0.7574599064014302, "learning_rate": 4.667256945783094e-06, "loss": 0.2968, "step": 8160 }, { "epoch": 0.382301962805078, "grad_norm": 0.6040588804351511, "learning_rate": 4.667162403261482e-06, "loss": 0.2989, "step": 8161 }, { "epoch": 0.38234880779500635, "grad_norm": 0.6164992883882928, "learning_rate": 4.667067848268395e-06, "loss": 0.2867, "step": 8162 }, { "epoch": 0.38239565278493465, "grad_norm": 0.6457167773669297, "learning_rate": 4.666973280804376e-06, "loss": 0.3056, "step": 8163 }, { "epoch": 0.382442497774863, "grad_norm": 0.6181798905616177, "learning_rate": 4.666878700869973e-06, "loss": 0.2832, "step": 8164 }, { "epoch": 0.3824893427647913, "grad_norm": 0.6027141446474434, "learning_rate": 4.666784108465725e-06, "loss": 0.3148, "step": 8165 }, { "epoch": 0.38253618775471965, "grad_norm": 0.6477298495160416, "learning_rate": 4.66668950359218e-06, "loss": 0.3097, "step": 8166 }, { "epoch": 0.38258303274464794, "grad_norm": 0.5449262993204882, "learning_rate": 4.666594886249882e-06, "loss": 0.2733, "step": 8167 }, { "epoch": 0.3826298777345763, "grad_norm": 0.5855318650708923, "learning_rate": 4.666500256439375e-06, "loss": 0.2872, "step": 8168 }, { "epoch": 0.3826767227245046, "grad_norm": 0.58916767066723, "learning_rate": 4.666405614161202e-06, "loss": 0.2755, "step": 8169 }, { "epoch": 0.38272356771443294, "grad_norm": 0.6192021164728868, "learning_rate": 4.66631095941591e-06, "loss": 0.2988, "step": 8170 }, { "epoch": 0.3827704127043613, "grad_norm": 0.6349629091181175, "learning_rate": 4.666216292204044e-06, "loss": 0.2893, "step": 8171 }, { "epoch": 0.3828172576942896, "grad_norm": 0.6175216977180994, "learning_rate": 4.6661216125261465e-06, "loss": 0.2865, "step": 8172 }, { "epoch": 0.38286410268421794, "grad_norm": 0.6097952840210082, "learning_rate": 4.666026920382765e-06, "loss": 0.3074, "step": 8173 }, { "epoch": 0.38291094767414624, "grad_norm": 0.656951498056689, "learning_rate": 4.6659322157744415e-06, "loss": 0.3088, "step": 8174 }, { "epoch": 0.3829577926640746, "grad_norm": 0.622667799908864, "learning_rate": 4.665837498701723e-06, "loss": 0.2861, "step": 8175 }, { "epoch": 0.3830046376540029, "grad_norm": 0.5812185410311721, "learning_rate": 4.665742769165153e-06, "loss": 0.2858, "step": 8176 }, { "epoch": 0.38305148264393124, "grad_norm": 0.5994057853630027, "learning_rate": 4.6656480271652795e-06, "loss": 0.2654, "step": 8177 }, { "epoch": 0.38309832763385954, "grad_norm": 0.6225492383278777, "learning_rate": 4.665553272702646e-06, "loss": 0.3046, "step": 8178 }, { "epoch": 0.3831451726237879, "grad_norm": 0.5903836256878368, "learning_rate": 4.665458505777796e-06, "loss": 0.2964, "step": 8179 }, { "epoch": 0.38319201761371624, "grad_norm": 0.5806459313447908, "learning_rate": 4.665363726391278e-06, "loss": 0.268, "step": 8180 }, { "epoch": 0.38323886260364454, "grad_norm": 0.6120897680123123, "learning_rate": 4.665268934543635e-06, "loss": 0.2837, "step": 8181 }, { "epoch": 0.3832857075935729, "grad_norm": 0.6047680163240364, "learning_rate": 4.665174130235414e-06, "loss": 0.289, "step": 8182 }, { "epoch": 0.3833325525835012, "grad_norm": 0.6025308528342528, "learning_rate": 4.66507931346716e-06, "loss": 0.3166, "step": 8183 }, { "epoch": 0.38337939757342954, "grad_norm": 0.6110473866823808, "learning_rate": 4.664984484239418e-06, "loss": 0.2998, "step": 8184 }, { "epoch": 0.38342624256335783, "grad_norm": 0.6128884612082263, "learning_rate": 4.664889642552736e-06, "loss": 0.287, "step": 8185 }, { "epoch": 0.3834730875532862, "grad_norm": 0.5681211491509454, "learning_rate": 4.664794788407657e-06, "loss": 0.2879, "step": 8186 }, { "epoch": 0.3835199325432145, "grad_norm": 0.6096231178868724, "learning_rate": 4.664699921804728e-06, "loss": 0.2914, "step": 8187 }, { "epoch": 0.38356677753314283, "grad_norm": 0.6520301335258848, "learning_rate": 4.664605042744496e-06, "loss": 0.2747, "step": 8188 }, { "epoch": 0.3836136225230712, "grad_norm": 0.6277578964558441, "learning_rate": 4.664510151227506e-06, "loss": 0.2956, "step": 8189 }, { "epoch": 0.3836604675129995, "grad_norm": 0.6043204598357554, "learning_rate": 4.664415247254303e-06, "loss": 0.2964, "step": 8190 }, { "epoch": 0.38370731250292783, "grad_norm": 0.5692349717114435, "learning_rate": 4.664320330825435e-06, "loss": 0.287, "step": 8191 }, { "epoch": 0.38375415749285613, "grad_norm": 0.621130409513103, "learning_rate": 4.664225401941448e-06, "loss": 0.2863, "step": 8192 }, { "epoch": 0.3838010024827845, "grad_norm": 0.559272887287353, "learning_rate": 4.664130460602887e-06, "loss": 0.2932, "step": 8193 }, { "epoch": 0.3838478474727128, "grad_norm": 0.628673415641359, "learning_rate": 4.664035506810299e-06, "loss": 0.2966, "step": 8194 }, { "epoch": 0.38389469246264113, "grad_norm": 0.6122155871085467, "learning_rate": 4.663940540564231e-06, "loss": 0.2946, "step": 8195 }, { "epoch": 0.3839415374525694, "grad_norm": 0.6064451340763274, "learning_rate": 4.663845561865229e-06, "loss": 0.3058, "step": 8196 }, { "epoch": 0.3839883824424978, "grad_norm": 0.6005221508075357, "learning_rate": 4.663750570713839e-06, "loss": 0.3095, "step": 8197 }, { "epoch": 0.38403522743242613, "grad_norm": 0.6696510549769334, "learning_rate": 4.663655567110609e-06, "loss": 0.3022, "step": 8198 }, { "epoch": 0.3840820724223544, "grad_norm": 0.6142065678802524, "learning_rate": 4.663560551056085e-06, "loss": 0.2814, "step": 8199 }, { "epoch": 0.3841289174122828, "grad_norm": 0.5973943581938601, "learning_rate": 4.663465522550814e-06, "loss": 0.2853, "step": 8200 }, { "epoch": 0.3841757624022111, "grad_norm": 0.6078238189299897, "learning_rate": 4.663370481595341e-06, "loss": 0.308, "step": 8201 }, { "epoch": 0.3842226073921394, "grad_norm": 0.5902964836243902, "learning_rate": 4.6632754281902175e-06, "loss": 0.2964, "step": 8202 }, { "epoch": 0.3842694523820677, "grad_norm": 0.6205753165007762, "learning_rate": 4.663180362335986e-06, "loss": 0.2913, "step": 8203 }, { "epoch": 0.3843162973719961, "grad_norm": 0.626100598466684, "learning_rate": 4.663085284033195e-06, "loss": 0.2964, "step": 8204 }, { "epoch": 0.38436314236192437, "grad_norm": 0.6327381021456107, "learning_rate": 4.662990193282393e-06, "loss": 0.2953, "step": 8205 }, { "epoch": 0.3844099873518527, "grad_norm": 0.571262464753857, "learning_rate": 4.662895090084124e-06, "loss": 0.2811, "step": 8206 }, { "epoch": 0.3844568323417811, "grad_norm": 0.6322882320951126, "learning_rate": 4.66279997443894e-06, "loss": 0.3058, "step": 8207 }, { "epoch": 0.38450367733170937, "grad_norm": 0.5773259296517064, "learning_rate": 4.662704846347384e-06, "loss": 0.2797, "step": 8208 }, { "epoch": 0.3845505223216377, "grad_norm": 0.6421140426632032, "learning_rate": 4.662609705810005e-06, "loss": 0.298, "step": 8209 }, { "epoch": 0.384597367311566, "grad_norm": 0.6566637105356957, "learning_rate": 4.662514552827352e-06, "loss": 0.3088, "step": 8210 }, { "epoch": 0.38464421230149437, "grad_norm": 0.5758301081397769, "learning_rate": 4.66241938739997e-06, "loss": 0.3161, "step": 8211 }, { "epoch": 0.38469105729142267, "grad_norm": 0.6234383621052008, "learning_rate": 4.662324209528409e-06, "loss": 0.298, "step": 8212 }, { "epoch": 0.384737902281351, "grad_norm": 0.6699571741029555, "learning_rate": 4.6622290192132155e-06, "loss": 0.2999, "step": 8213 }, { "epoch": 0.3847847472712793, "grad_norm": 0.5895500744984259, "learning_rate": 4.662133816454937e-06, "loss": 0.2852, "step": 8214 }, { "epoch": 0.38483159226120767, "grad_norm": 0.6287599708687974, "learning_rate": 4.662038601254122e-06, "loss": 0.2779, "step": 8215 }, { "epoch": 0.384878437251136, "grad_norm": 0.5902444442628162, "learning_rate": 4.661943373611318e-06, "loss": 0.2769, "step": 8216 }, { "epoch": 0.3849252822410643, "grad_norm": 0.5985416033373315, "learning_rate": 4.661848133527073e-06, "loss": 0.2928, "step": 8217 }, { "epoch": 0.38497212723099267, "grad_norm": 0.6183054652780026, "learning_rate": 4.661752881001936e-06, "loss": 0.2857, "step": 8218 }, { "epoch": 0.38501897222092096, "grad_norm": 0.5403014546977989, "learning_rate": 4.661657616036455e-06, "loss": 0.2624, "step": 8219 }, { "epoch": 0.3850658172108493, "grad_norm": 0.6223166239260621, "learning_rate": 4.661562338631177e-06, "loss": 0.2658, "step": 8220 }, { "epoch": 0.3851126622007776, "grad_norm": 0.6006144160114317, "learning_rate": 4.661467048786651e-06, "loss": 0.2624, "step": 8221 }, { "epoch": 0.38515950719070596, "grad_norm": 0.6415453708884973, "learning_rate": 4.661371746503425e-06, "loss": 0.3116, "step": 8222 }, { "epoch": 0.38520635218063426, "grad_norm": 0.5769351070022724, "learning_rate": 4.661276431782048e-06, "loss": 0.2665, "step": 8223 }, { "epoch": 0.3852531971705626, "grad_norm": 0.6472533829899499, "learning_rate": 4.661181104623069e-06, "loss": 0.2843, "step": 8224 }, { "epoch": 0.38530004216049096, "grad_norm": 0.5962367660993023, "learning_rate": 4.661085765027036e-06, "loss": 0.2663, "step": 8225 }, { "epoch": 0.38534688715041926, "grad_norm": 0.6867887834161461, "learning_rate": 4.6609904129944974e-06, "loss": 0.3106, "step": 8226 }, { "epoch": 0.3853937321403476, "grad_norm": 0.6390223473462247, "learning_rate": 4.660895048526002e-06, "loss": 0.2815, "step": 8227 }, { "epoch": 0.3854405771302759, "grad_norm": 0.5735754836292525, "learning_rate": 4.660799671622098e-06, "loss": 0.2964, "step": 8228 }, { "epoch": 0.38548742212020426, "grad_norm": 0.5826599825469667, "learning_rate": 4.660704282283336e-06, "loss": 0.2755, "step": 8229 }, { "epoch": 0.38553426711013256, "grad_norm": 0.6460346095623752, "learning_rate": 4.660608880510264e-06, "loss": 0.2746, "step": 8230 }, { "epoch": 0.3855811121000609, "grad_norm": 0.6140490157245685, "learning_rate": 4.66051346630343e-06, "loss": 0.2763, "step": 8231 }, { "epoch": 0.3856279570899892, "grad_norm": 0.6999769843395394, "learning_rate": 4.6604180396633845e-06, "loss": 0.2918, "step": 8232 }, { "epoch": 0.38567480207991756, "grad_norm": 0.5922878366906232, "learning_rate": 4.660322600590676e-06, "loss": 0.2961, "step": 8233 }, { "epoch": 0.3857216470698459, "grad_norm": 0.5894458015279491, "learning_rate": 4.660227149085854e-06, "loss": 0.2871, "step": 8234 }, { "epoch": 0.3857684920597742, "grad_norm": 0.5613246367945897, "learning_rate": 4.660131685149468e-06, "loss": 0.2631, "step": 8235 }, { "epoch": 0.38581533704970256, "grad_norm": 0.6314715583621936, "learning_rate": 4.6600362087820675e-06, "loss": 0.2954, "step": 8236 }, { "epoch": 0.38586218203963085, "grad_norm": 0.5975369008015327, "learning_rate": 4.659940719984201e-06, "loss": 0.2601, "step": 8237 }, { "epoch": 0.3859090270295592, "grad_norm": 0.6229138382024227, "learning_rate": 4.659845218756418e-06, "loss": 0.2858, "step": 8238 }, { "epoch": 0.3859558720194875, "grad_norm": 0.6160393492692421, "learning_rate": 4.659749705099269e-06, "loss": 0.3026, "step": 8239 }, { "epoch": 0.38600271700941585, "grad_norm": 0.6219577316429737, "learning_rate": 4.659654179013303e-06, "loss": 0.2997, "step": 8240 }, { "epoch": 0.38604956199934415, "grad_norm": 0.6113958267141307, "learning_rate": 4.659558640499071e-06, "loss": 0.271, "step": 8241 }, { "epoch": 0.3860964069892725, "grad_norm": 0.6494364871267216, "learning_rate": 4.659463089557121e-06, "loss": 0.3002, "step": 8242 }, { "epoch": 0.38614325197920085, "grad_norm": 0.6349023494208589, "learning_rate": 4.659367526188003e-06, "loss": 0.2962, "step": 8243 }, { "epoch": 0.38619009696912915, "grad_norm": 0.5717004134840422, "learning_rate": 4.659271950392269e-06, "loss": 0.2744, "step": 8244 }, { "epoch": 0.3862369419590575, "grad_norm": 0.6128996843209176, "learning_rate": 4.659176362170468e-06, "loss": 0.2746, "step": 8245 }, { "epoch": 0.3862837869489858, "grad_norm": 0.5832777576385152, "learning_rate": 4.659080761523148e-06, "loss": 0.2867, "step": 8246 }, { "epoch": 0.38633063193891415, "grad_norm": 0.7095777513830323, "learning_rate": 4.658985148450862e-06, "loss": 0.3245, "step": 8247 }, { "epoch": 0.38637747692884244, "grad_norm": 0.6291738828337329, "learning_rate": 4.658889522954158e-06, "loss": 0.28, "step": 8248 }, { "epoch": 0.3864243219187708, "grad_norm": 0.6612845567302673, "learning_rate": 4.658793885033589e-06, "loss": 0.3038, "step": 8249 }, { "epoch": 0.3864711669086991, "grad_norm": 0.6100278423903371, "learning_rate": 4.6586982346897035e-06, "loss": 0.2887, "step": 8250 }, { "epoch": 0.38651801189862744, "grad_norm": 0.648235696734137, "learning_rate": 4.658602571923051e-06, "loss": 0.3041, "step": 8251 }, { "epoch": 0.38656485688855574, "grad_norm": 0.6548828269836641, "learning_rate": 4.6585068967341836e-06, "loss": 0.2929, "step": 8252 }, { "epoch": 0.3866117018784841, "grad_norm": 0.6503346398167132, "learning_rate": 4.658411209123653e-06, "loss": 0.3098, "step": 8253 }, { "epoch": 0.38665854686841244, "grad_norm": 0.6194032194549836, "learning_rate": 4.6583155090920074e-06, "loss": 0.3142, "step": 8254 }, { "epoch": 0.38670539185834074, "grad_norm": 0.5811311724175463, "learning_rate": 4.658219796639798e-06, "loss": 0.2801, "step": 8255 }, { "epoch": 0.3867522368482691, "grad_norm": 0.6944613728852179, "learning_rate": 4.658124071767577e-06, "loss": 0.3177, "step": 8256 }, { "epoch": 0.3867990818381974, "grad_norm": 0.5644531665451177, "learning_rate": 4.658028334475893e-06, "loss": 0.2993, "step": 8257 }, { "epoch": 0.38684592682812574, "grad_norm": 0.6076699917281467, "learning_rate": 4.6579325847653e-06, "loss": 0.2979, "step": 8258 }, { "epoch": 0.38689277181805404, "grad_norm": 0.6058134292093285, "learning_rate": 4.657836822636347e-06, "loss": 0.2949, "step": 8259 }, { "epoch": 0.3869396168079824, "grad_norm": 0.577219295346553, "learning_rate": 4.657741048089585e-06, "loss": 0.2915, "step": 8260 }, { "epoch": 0.3869864617979107, "grad_norm": 0.6135608521917661, "learning_rate": 4.657645261125565e-06, "loss": 0.3041, "step": 8261 }, { "epoch": 0.38703330678783904, "grad_norm": 0.6230311767509685, "learning_rate": 4.657549461744841e-06, "loss": 0.2976, "step": 8262 }, { "epoch": 0.3870801517777674, "grad_norm": 0.6227916811587138, "learning_rate": 4.657453649947959e-06, "loss": 0.2738, "step": 8263 }, { "epoch": 0.3871269967676957, "grad_norm": 0.6108799008662636, "learning_rate": 4.657357825735476e-06, "loss": 0.2902, "step": 8264 }, { "epoch": 0.38717384175762404, "grad_norm": 0.6272789891119681, "learning_rate": 4.6572619891079395e-06, "loss": 0.2843, "step": 8265 }, { "epoch": 0.38722068674755233, "grad_norm": 0.6337436718501643, "learning_rate": 4.657166140065903e-06, "loss": 0.309, "step": 8266 }, { "epoch": 0.3872675317374807, "grad_norm": 0.6442213567089193, "learning_rate": 4.657070278609918e-06, "loss": 0.2922, "step": 8267 }, { "epoch": 0.387314376727409, "grad_norm": 0.6016188067955175, "learning_rate": 4.656974404740535e-06, "loss": 0.2878, "step": 8268 }, { "epoch": 0.38736122171733733, "grad_norm": 0.5834893051200145, "learning_rate": 4.656878518458306e-06, "loss": 0.2809, "step": 8269 }, { "epoch": 0.38740806670726563, "grad_norm": 0.5565982220511946, "learning_rate": 4.656782619763785e-06, "loss": 0.2923, "step": 8270 }, { "epoch": 0.387454911697194, "grad_norm": 0.5484564181484827, "learning_rate": 4.656686708657521e-06, "loss": 0.2638, "step": 8271 }, { "epoch": 0.38750175668712233, "grad_norm": 0.5645344656556361, "learning_rate": 4.6565907851400674e-06, "loss": 0.2856, "step": 8272 }, { "epoch": 0.38754860167705063, "grad_norm": 0.6719831245296622, "learning_rate": 4.6564948492119756e-06, "loss": 0.3075, "step": 8273 }, { "epoch": 0.387595446666979, "grad_norm": 0.6283242959213429, "learning_rate": 4.656398900873797e-06, "loss": 0.2796, "step": 8274 }, { "epoch": 0.3876422916569073, "grad_norm": 0.6042504656414799, "learning_rate": 4.656302940126086e-06, "loss": 0.2944, "step": 8275 }, { "epoch": 0.38768913664683563, "grad_norm": 0.6264687313167429, "learning_rate": 4.656206966969393e-06, "loss": 0.2925, "step": 8276 }, { "epoch": 0.3877359816367639, "grad_norm": 0.5735062884349297, "learning_rate": 4.656110981404271e-06, "loss": 0.2782, "step": 8277 }, { "epoch": 0.3877828266266923, "grad_norm": 0.6060552629455733, "learning_rate": 4.6560149834312726e-06, "loss": 0.3101, "step": 8278 }, { "epoch": 0.3878296716166206, "grad_norm": 0.5970122049170239, "learning_rate": 4.65591897305095e-06, "loss": 0.2833, "step": 8279 }, { "epoch": 0.3878765166065489, "grad_norm": 0.6266909615825043, "learning_rate": 4.655822950263855e-06, "loss": 0.2996, "step": 8280 }, { "epoch": 0.3879233615964773, "grad_norm": 0.6712553380467635, "learning_rate": 4.6557269150705406e-06, "loss": 0.3227, "step": 8281 }, { "epoch": 0.3879702065864056, "grad_norm": 0.6522492639339192, "learning_rate": 4.65563086747156e-06, "loss": 0.2979, "step": 8282 }, { "epoch": 0.3880170515763339, "grad_norm": 0.6597470827617113, "learning_rate": 4.655534807467466e-06, "loss": 0.296, "step": 8283 }, { "epoch": 0.3880638965662622, "grad_norm": 0.6048006286723869, "learning_rate": 4.6554387350588094e-06, "loss": 0.3063, "step": 8284 }, { "epoch": 0.3881107415561906, "grad_norm": 0.589428280268448, "learning_rate": 4.6553426502461466e-06, "loss": 0.3006, "step": 8285 }, { "epoch": 0.38815758654611887, "grad_norm": 0.6094486593361762, "learning_rate": 4.655246553030027e-06, "loss": 0.2841, "step": 8286 }, { "epoch": 0.3882044315360472, "grad_norm": 0.6059426570243359, "learning_rate": 4.6551504434110065e-06, "loss": 0.2888, "step": 8287 }, { "epoch": 0.3882512765259755, "grad_norm": 0.6558019261660635, "learning_rate": 4.655054321389636e-06, "loss": 0.2847, "step": 8288 }, { "epoch": 0.38829812151590387, "grad_norm": 0.6168079469211606, "learning_rate": 4.65495818696647e-06, "loss": 0.2967, "step": 8289 }, { "epoch": 0.3883449665058322, "grad_norm": 0.5769119140263914, "learning_rate": 4.654862040142061e-06, "loss": 0.2751, "step": 8290 }, { "epoch": 0.3883918114957605, "grad_norm": 0.6020081559023102, "learning_rate": 4.654765880916962e-06, "loss": 0.2908, "step": 8291 }, { "epoch": 0.38843865648568887, "grad_norm": 0.6032416472719793, "learning_rate": 4.654669709291727e-06, "loss": 0.2912, "step": 8292 }, { "epoch": 0.38848550147561717, "grad_norm": 0.6092281576684184, "learning_rate": 4.65457352526691e-06, "loss": 0.306, "step": 8293 }, { "epoch": 0.3885323464655455, "grad_norm": 0.6639399207388638, "learning_rate": 4.654477328843065e-06, "loss": 0.3166, "step": 8294 }, { "epoch": 0.3885791914554738, "grad_norm": 0.6511275251641953, "learning_rate": 4.654381120020742e-06, "loss": 0.3198, "step": 8295 }, { "epoch": 0.38862603644540217, "grad_norm": 0.6447411137998238, "learning_rate": 4.654284898800498e-06, "loss": 0.3023, "step": 8296 }, { "epoch": 0.38867288143533046, "grad_norm": 0.5626689352298129, "learning_rate": 4.654188665182887e-06, "loss": 0.2841, "step": 8297 }, { "epoch": 0.3887197264252588, "grad_norm": 0.5823158370970296, "learning_rate": 4.654092419168461e-06, "loss": 0.2804, "step": 8298 }, { "epoch": 0.38876657141518717, "grad_norm": 0.6097815830780323, "learning_rate": 4.653996160757775e-06, "loss": 0.2884, "step": 8299 }, { "epoch": 0.38881341640511546, "grad_norm": 0.5636817954714332, "learning_rate": 4.653899889951381e-06, "loss": 0.2884, "step": 8300 }, { "epoch": 0.3888602613950438, "grad_norm": 0.5953058539041898, "learning_rate": 4.653803606749836e-06, "loss": 0.2848, "step": 8301 }, { "epoch": 0.3889071063849721, "grad_norm": 0.6062772688504012, "learning_rate": 4.6537073111536915e-06, "loss": 0.2827, "step": 8302 }, { "epoch": 0.38895395137490046, "grad_norm": 0.6538271735003733, "learning_rate": 4.653611003163504e-06, "loss": 0.3023, "step": 8303 }, { "epoch": 0.38900079636482876, "grad_norm": 0.630890561081098, "learning_rate": 4.653514682779825e-06, "loss": 0.3077, "step": 8304 }, { "epoch": 0.3890476413547571, "grad_norm": 0.5453998465266212, "learning_rate": 4.653418350003211e-06, "loss": 0.2865, "step": 8305 }, { "epoch": 0.3890944863446854, "grad_norm": 0.6085862741489728, "learning_rate": 4.653322004834216e-06, "loss": 0.2809, "step": 8306 }, { "epoch": 0.38914133133461376, "grad_norm": 0.6008943895098112, "learning_rate": 4.653225647273394e-06, "loss": 0.2918, "step": 8307 }, { "epoch": 0.3891881763245421, "grad_norm": 0.5576885880611392, "learning_rate": 4.653129277321299e-06, "loss": 0.2666, "step": 8308 }, { "epoch": 0.3892350213144704, "grad_norm": 0.6570819204042411, "learning_rate": 4.653032894978487e-06, "loss": 0.308, "step": 8309 }, { "epoch": 0.38928186630439876, "grad_norm": 0.639530692812391, "learning_rate": 4.652936500245511e-06, "loss": 0.3002, "step": 8310 }, { "epoch": 0.38932871129432706, "grad_norm": 0.5855875509147103, "learning_rate": 4.652840093122927e-06, "loss": 0.2758, "step": 8311 }, { "epoch": 0.3893755562842554, "grad_norm": 0.6046737036290917, "learning_rate": 4.652743673611288e-06, "loss": 0.2935, "step": 8312 }, { "epoch": 0.3894224012741837, "grad_norm": 0.5735936333128606, "learning_rate": 4.652647241711153e-06, "loss": 0.301, "step": 8313 }, { "epoch": 0.38946924626411206, "grad_norm": 0.5305963778345104, "learning_rate": 4.652550797423071e-06, "loss": 0.2846, "step": 8314 }, { "epoch": 0.38951609125404035, "grad_norm": 0.6169748870204033, "learning_rate": 4.652454340747602e-06, "loss": 0.2951, "step": 8315 }, { "epoch": 0.3895629362439687, "grad_norm": 0.601371240809353, "learning_rate": 4.6523578716852995e-06, "loss": 0.2754, "step": 8316 }, { "epoch": 0.38960978123389706, "grad_norm": 0.6725740597838952, "learning_rate": 4.652261390236718e-06, "loss": 0.3069, "step": 8317 }, { "epoch": 0.38965662622382535, "grad_norm": 0.6210222912236725, "learning_rate": 4.652164896402413e-06, "loss": 0.3096, "step": 8318 }, { "epoch": 0.3897034712137537, "grad_norm": 0.6087596105505217, "learning_rate": 4.65206839018294e-06, "loss": 0.2797, "step": 8319 }, { "epoch": 0.389750316203682, "grad_norm": 0.5915713995697638, "learning_rate": 4.6519718715788545e-06, "loss": 0.2821, "step": 8320 }, { "epoch": 0.38979716119361035, "grad_norm": 0.5967249624391171, "learning_rate": 4.651875340590712e-06, "loss": 0.2879, "step": 8321 }, { "epoch": 0.38984400618353865, "grad_norm": 0.6013273339511359, "learning_rate": 4.6517787972190674e-06, "loss": 0.2921, "step": 8322 }, { "epoch": 0.389890851173467, "grad_norm": 0.5819339728966771, "learning_rate": 4.651682241464477e-06, "loss": 0.2927, "step": 8323 }, { "epoch": 0.3899376961633953, "grad_norm": 0.6519157011370083, "learning_rate": 4.651585673327496e-06, "loss": 0.308, "step": 8324 }, { "epoch": 0.38998454115332365, "grad_norm": 0.5822415952166837, "learning_rate": 4.651489092808679e-06, "loss": 0.2869, "step": 8325 }, { "epoch": 0.390031386143252, "grad_norm": 0.5819823913221206, "learning_rate": 4.651392499908584e-06, "loss": 0.2967, "step": 8326 }, { "epoch": 0.3900782311331803, "grad_norm": 0.6201926934377635, "learning_rate": 4.651295894627766e-06, "loss": 0.3052, "step": 8327 }, { "epoch": 0.39012507612310865, "grad_norm": 0.6575752966822874, "learning_rate": 4.651199276966781e-06, "loss": 0.2825, "step": 8328 }, { "epoch": 0.39017192111303695, "grad_norm": 0.5942518104759139, "learning_rate": 4.651102646926185e-06, "loss": 0.2566, "step": 8329 }, { "epoch": 0.3902187661029653, "grad_norm": 0.5934290397665086, "learning_rate": 4.6510060045065335e-06, "loss": 0.2873, "step": 8330 }, { "epoch": 0.3902656110928936, "grad_norm": 0.6195132596789302, "learning_rate": 4.6509093497083835e-06, "loss": 0.2896, "step": 8331 }, { "epoch": 0.39031245608282195, "grad_norm": 0.5989307924973631, "learning_rate": 4.65081268253229e-06, "loss": 0.2774, "step": 8332 }, { "epoch": 0.39035930107275024, "grad_norm": 0.6240638712650672, "learning_rate": 4.6507160029788105e-06, "loss": 0.3006, "step": 8333 }, { "epoch": 0.3904061460626786, "grad_norm": 0.6793783622528202, "learning_rate": 4.6506193110485005e-06, "loss": 0.3096, "step": 8334 }, { "epoch": 0.39045299105260695, "grad_norm": 0.6393050283160352, "learning_rate": 4.650522606741918e-06, "loss": 0.2695, "step": 8335 }, { "epoch": 0.39049983604253524, "grad_norm": 0.6007851446837887, "learning_rate": 4.650425890059618e-06, "loss": 0.2882, "step": 8336 }, { "epoch": 0.3905466810324636, "grad_norm": 0.6547393258604237, "learning_rate": 4.650329161002157e-06, "loss": 0.3056, "step": 8337 }, { "epoch": 0.3905935260223919, "grad_norm": 0.6804197612616003, "learning_rate": 4.650232419570093e-06, "loss": 0.2913, "step": 8338 }, { "epoch": 0.39064037101232024, "grad_norm": 0.6122685827061997, "learning_rate": 4.65013566576398e-06, "loss": 0.299, "step": 8339 }, { "epoch": 0.39068721600224854, "grad_norm": 0.6599530944098451, "learning_rate": 4.650038899584378e-06, "loss": 0.2918, "step": 8340 }, { "epoch": 0.3907340609921769, "grad_norm": 0.6217702171438021, "learning_rate": 4.649942121031843e-06, "loss": 0.2943, "step": 8341 }, { "epoch": 0.3907809059821052, "grad_norm": 0.6057534522847227, "learning_rate": 4.649845330106931e-06, "loss": 0.2685, "step": 8342 }, { "epoch": 0.39082775097203354, "grad_norm": 0.6307915021153768, "learning_rate": 4.6497485268101995e-06, "loss": 0.2628, "step": 8343 }, { "epoch": 0.3908745959619619, "grad_norm": 0.5521545055066627, "learning_rate": 4.649651711142205e-06, "loss": 0.2692, "step": 8344 }, { "epoch": 0.3909214409518902, "grad_norm": 0.6210530524011972, "learning_rate": 4.649554883103507e-06, "loss": 0.2948, "step": 8345 }, { "epoch": 0.39096828594181854, "grad_norm": 0.5933287736693179, "learning_rate": 4.649458042694659e-06, "loss": 0.2668, "step": 8346 }, { "epoch": 0.39101513093174683, "grad_norm": 0.6115893397514462, "learning_rate": 4.649361189916221e-06, "loss": 0.3057, "step": 8347 }, { "epoch": 0.3910619759216752, "grad_norm": 0.6298034052810149, "learning_rate": 4.649264324768749e-06, "loss": 0.289, "step": 8348 }, { "epoch": 0.3911088209116035, "grad_norm": 0.6130359515201685, "learning_rate": 4.649167447252802e-06, "loss": 0.2968, "step": 8349 }, { "epoch": 0.39115566590153183, "grad_norm": 0.5675449959592381, "learning_rate": 4.649070557368935e-06, "loss": 0.2705, "step": 8350 }, { "epoch": 0.39120251089146013, "grad_norm": 0.5855970492251571, "learning_rate": 4.648973655117709e-06, "loss": 0.277, "step": 8351 }, { "epoch": 0.3912493558813885, "grad_norm": 0.6415126754556666, "learning_rate": 4.648876740499678e-06, "loss": 0.3122, "step": 8352 }, { "epoch": 0.39129620087131683, "grad_norm": 0.6977000676738492, "learning_rate": 4.648779813515402e-06, "loss": 0.2916, "step": 8353 }, { "epoch": 0.39134304586124513, "grad_norm": 0.626378984146021, "learning_rate": 4.648682874165439e-06, "loss": 0.2922, "step": 8354 }, { "epoch": 0.3913898908511735, "grad_norm": 0.5873081635876475, "learning_rate": 4.648585922450346e-06, "loss": 0.2907, "step": 8355 }, { "epoch": 0.3914367358411018, "grad_norm": 0.6106660844592497, "learning_rate": 4.64848895837068e-06, "loss": 0.2815, "step": 8356 }, { "epoch": 0.39148358083103013, "grad_norm": 0.6054321804046892, "learning_rate": 4.648391981927e-06, "loss": 0.291, "step": 8357 }, { "epoch": 0.3915304258209584, "grad_norm": 0.6141919101039808, "learning_rate": 4.648294993119865e-06, "loss": 0.2822, "step": 8358 }, { "epoch": 0.3915772708108868, "grad_norm": 0.6034680798741897, "learning_rate": 4.648197991949832e-06, "loss": 0.2862, "step": 8359 }, { "epoch": 0.3916241158008151, "grad_norm": 0.580408212646096, "learning_rate": 4.648100978417461e-06, "loss": 0.2752, "step": 8360 }, { "epoch": 0.3916709607907434, "grad_norm": 0.5726065386014855, "learning_rate": 4.6480039525233064e-06, "loss": 0.2794, "step": 8361 }, { "epoch": 0.3917178057806718, "grad_norm": 0.5689529221156914, "learning_rate": 4.6479069142679295e-06, "loss": 0.285, "step": 8362 }, { "epoch": 0.3917646507706001, "grad_norm": 0.6220595575784731, "learning_rate": 4.647809863651889e-06, "loss": 0.2887, "step": 8363 }, { "epoch": 0.3918114957605284, "grad_norm": 0.6032975905322381, "learning_rate": 4.647712800675742e-06, "loss": 0.3002, "step": 8364 }, { "epoch": 0.3918583407504567, "grad_norm": 0.6584846237991409, "learning_rate": 4.647615725340047e-06, "loss": 0.2953, "step": 8365 }, { "epoch": 0.3919051857403851, "grad_norm": 0.6031705782908392, "learning_rate": 4.647518637645364e-06, "loss": 0.2932, "step": 8366 }, { "epoch": 0.39195203073031337, "grad_norm": 0.6193285852121444, "learning_rate": 4.647421537592252e-06, "loss": 0.2829, "step": 8367 }, { "epoch": 0.3919988757202417, "grad_norm": 0.6038469512564261, "learning_rate": 4.647324425181266e-06, "loss": 0.2482, "step": 8368 }, { "epoch": 0.39204572071017, "grad_norm": 0.6166382234909087, "learning_rate": 4.647227300412971e-06, "loss": 0.3105, "step": 8369 }, { "epoch": 0.39209256570009837, "grad_norm": 0.6723365466761426, "learning_rate": 4.64713016328792e-06, "loss": 0.2952, "step": 8370 }, { "epoch": 0.3921394106900267, "grad_norm": 0.6261556837888183, "learning_rate": 4.647033013806676e-06, "loss": 0.2988, "step": 8371 }, { "epoch": 0.392186255679955, "grad_norm": 0.6143507951995844, "learning_rate": 4.646935851969796e-06, "loss": 0.294, "step": 8372 }, { "epoch": 0.39223310066988337, "grad_norm": 0.653923331369186, "learning_rate": 4.64683867777784e-06, "loss": 0.3045, "step": 8373 }, { "epoch": 0.39227994565981167, "grad_norm": 0.6231910972340183, "learning_rate": 4.646741491231367e-06, "loss": 0.2832, "step": 8374 }, { "epoch": 0.39232679064974, "grad_norm": 0.6064223981055197, "learning_rate": 4.6466442923309365e-06, "loss": 0.2886, "step": 8375 }, { "epoch": 0.3923736356396683, "grad_norm": 0.6118492972844108, "learning_rate": 4.6465470810771074e-06, "loss": 0.2901, "step": 8376 }, { "epoch": 0.39242048062959667, "grad_norm": 0.6047952224024875, "learning_rate": 4.6464498574704396e-06, "loss": 0.292, "step": 8377 }, { "epoch": 0.39246732561952496, "grad_norm": 0.6257247898693807, "learning_rate": 4.646352621511492e-06, "loss": 0.2932, "step": 8378 }, { "epoch": 0.3925141706094533, "grad_norm": 0.6523842969521585, "learning_rate": 4.646255373200824e-06, "loss": 0.2987, "step": 8379 }, { "epoch": 0.39256101559938167, "grad_norm": 0.6312658457505721, "learning_rate": 4.646158112538997e-06, "loss": 0.3108, "step": 8380 }, { "epoch": 0.39260786058930996, "grad_norm": 0.5884538238268158, "learning_rate": 4.646060839526568e-06, "loss": 0.3112, "step": 8381 }, { "epoch": 0.3926547055792383, "grad_norm": 0.5732744823603169, "learning_rate": 4.6459635541641006e-06, "loss": 0.2933, "step": 8382 }, { "epoch": 0.3927015505691666, "grad_norm": 0.58699110614516, "learning_rate": 4.64586625645215e-06, "loss": 0.2849, "step": 8383 }, { "epoch": 0.39274839555909496, "grad_norm": 0.6346165148013496, "learning_rate": 4.6457689463912795e-06, "loss": 0.2793, "step": 8384 }, { "epoch": 0.39279524054902326, "grad_norm": 0.6756361329313699, "learning_rate": 4.645671623982048e-06, "loss": 0.2902, "step": 8385 }, { "epoch": 0.3928420855389516, "grad_norm": 0.6100999188242285, "learning_rate": 4.6455742892250146e-06, "loss": 0.2967, "step": 8386 }, { "epoch": 0.3928889305288799, "grad_norm": 0.5856317320791825, "learning_rate": 4.645476942120742e-06, "loss": 0.2638, "step": 8387 }, { "epoch": 0.39293577551880826, "grad_norm": 0.6031529577109979, "learning_rate": 4.645379582669788e-06, "loss": 0.3053, "step": 8388 }, { "epoch": 0.3929826205087366, "grad_norm": 0.6387480212003025, "learning_rate": 4.645282210872714e-06, "loss": 0.3121, "step": 8389 }, { "epoch": 0.3930294654986649, "grad_norm": 0.636782790458431, "learning_rate": 4.6451848267300795e-06, "loss": 0.2885, "step": 8390 }, { "epoch": 0.39307631048859326, "grad_norm": 0.6162119597306828, "learning_rate": 4.6450874302424455e-06, "loss": 0.3015, "step": 8391 }, { "epoch": 0.39312315547852156, "grad_norm": 0.5877947243859933, "learning_rate": 4.644990021410374e-06, "loss": 0.282, "step": 8392 }, { "epoch": 0.3931700004684499, "grad_norm": 0.5963215463733444, "learning_rate": 4.644892600234423e-06, "loss": 0.2918, "step": 8393 }, { "epoch": 0.3932168454583782, "grad_norm": 0.625451473466171, "learning_rate": 4.644795166715154e-06, "loss": 0.301, "step": 8394 }, { "epoch": 0.39326369044830656, "grad_norm": 0.5859257419149542, "learning_rate": 4.644697720853127e-06, "loss": 0.2798, "step": 8395 }, { "epoch": 0.39331053543823485, "grad_norm": 0.6681944929409046, "learning_rate": 4.644600262648905e-06, "loss": 0.31, "step": 8396 }, { "epoch": 0.3933573804281632, "grad_norm": 0.6505507400032916, "learning_rate": 4.644502792103048e-06, "loss": 0.2836, "step": 8397 }, { "epoch": 0.39340422541809156, "grad_norm": 0.6424300000872878, "learning_rate": 4.644405309216114e-06, "loss": 0.3006, "step": 8398 }, { "epoch": 0.39345107040801985, "grad_norm": 0.5744938251956845, "learning_rate": 4.644307813988669e-06, "loss": 0.3015, "step": 8399 }, { "epoch": 0.3934979153979482, "grad_norm": 0.6245747135402859, "learning_rate": 4.64421030642127e-06, "loss": 0.3092, "step": 8400 }, { "epoch": 0.3935447603878765, "grad_norm": 0.591313735440372, "learning_rate": 4.644112786514481e-06, "loss": 0.2939, "step": 8401 }, { "epoch": 0.39359160537780485, "grad_norm": 0.5766422899268779, "learning_rate": 4.6440152542688605e-06, "loss": 0.2754, "step": 8402 }, { "epoch": 0.39363845036773315, "grad_norm": 0.5752795265907396, "learning_rate": 4.643917709684971e-06, "loss": 0.2984, "step": 8403 }, { "epoch": 0.3936852953576615, "grad_norm": 0.5836804869172539, "learning_rate": 4.6438201527633755e-06, "loss": 0.2774, "step": 8404 }, { "epoch": 0.3937321403475898, "grad_norm": 0.6231614079322763, "learning_rate": 4.643722583504632e-06, "loss": 0.3098, "step": 8405 }, { "epoch": 0.39377898533751815, "grad_norm": 0.6522555836737851, "learning_rate": 4.6436250019093045e-06, "loss": 0.3218, "step": 8406 }, { "epoch": 0.3938258303274465, "grad_norm": 0.602705355615152, "learning_rate": 4.643527407977954e-06, "loss": 0.2783, "step": 8407 }, { "epoch": 0.3938726753173748, "grad_norm": 0.5909211035676495, "learning_rate": 4.643429801711142e-06, "loss": 0.2769, "step": 8408 }, { "epoch": 0.39391952030730315, "grad_norm": 0.580944028196557, "learning_rate": 4.643332183109431e-06, "loss": 0.3011, "step": 8409 }, { "epoch": 0.39396636529723145, "grad_norm": 0.6436048009285193, "learning_rate": 4.6432345521733816e-06, "loss": 0.2991, "step": 8410 }, { "epoch": 0.3940132102871598, "grad_norm": 0.6215578813914117, "learning_rate": 4.6431369089035556e-06, "loss": 0.2765, "step": 8411 }, { "epoch": 0.3940600552770881, "grad_norm": 0.6427196648136666, "learning_rate": 4.643039253300516e-06, "loss": 0.2856, "step": 8412 }, { "epoch": 0.39410690026701645, "grad_norm": 0.6058177336967702, "learning_rate": 4.642941585364823e-06, "loss": 0.2698, "step": 8413 }, { "epoch": 0.39415374525694474, "grad_norm": 0.57204110546751, "learning_rate": 4.6428439050970405e-06, "loss": 0.2538, "step": 8414 }, { "epoch": 0.3942005902468731, "grad_norm": 0.602227638901857, "learning_rate": 4.642746212497729e-06, "loss": 0.3025, "step": 8415 }, { "epoch": 0.39424743523680145, "grad_norm": 0.6331433625817466, "learning_rate": 4.642648507567453e-06, "loss": 0.2882, "step": 8416 }, { "epoch": 0.39429428022672974, "grad_norm": 0.6178798437366548, "learning_rate": 4.642550790306772e-06, "loss": 0.3018, "step": 8417 }, { "epoch": 0.3943411252166581, "grad_norm": 0.5972721499598933, "learning_rate": 4.642453060716251e-06, "loss": 0.2704, "step": 8418 }, { "epoch": 0.3943879702065864, "grad_norm": 0.6537571122356087, "learning_rate": 4.642355318796451e-06, "loss": 0.3082, "step": 8419 }, { "epoch": 0.39443481519651474, "grad_norm": 0.5705690719912716, "learning_rate": 4.642257564547934e-06, "loss": 0.2632, "step": 8420 }, { "epoch": 0.39448166018644304, "grad_norm": 0.5710948263363763, "learning_rate": 4.642159797971263e-06, "loss": 0.2743, "step": 8421 }, { "epoch": 0.3945285051763714, "grad_norm": 0.6064399580449704, "learning_rate": 4.6420620190670015e-06, "loss": 0.2871, "step": 8422 }, { "epoch": 0.3945753501662997, "grad_norm": 0.6029550136874854, "learning_rate": 4.641964227835711e-06, "loss": 0.2869, "step": 8423 }, { "epoch": 0.39462219515622804, "grad_norm": 0.643856375184798, "learning_rate": 4.641866424277955e-06, "loss": 0.2939, "step": 8424 }, { "epoch": 0.3946690401461564, "grad_norm": 0.6073472764664151, "learning_rate": 4.641768608394296e-06, "loss": 0.2974, "step": 8425 }, { "epoch": 0.3947158851360847, "grad_norm": 0.6511050169785941, "learning_rate": 4.641670780185296e-06, "loss": 0.302, "step": 8426 }, { "epoch": 0.39476273012601304, "grad_norm": 0.571445097596282, "learning_rate": 4.641572939651521e-06, "loss": 0.2749, "step": 8427 }, { "epoch": 0.39480957511594134, "grad_norm": 0.5882542665645545, "learning_rate": 4.64147508679353e-06, "loss": 0.2712, "step": 8428 }, { "epoch": 0.3948564201058697, "grad_norm": 0.6944444559507394, "learning_rate": 4.641377221611889e-06, "loss": 0.2968, "step": 8429 }, { "epoch": 0.394903265095798, "grad_norm": 0.6291186789814771, "learning_rate": 4.64127934410716e-06, "loss": 0.2937, "step": 8430 }, { "epoch": 0.39495011008572634, "grad_norm": 0.660253719988038, "learning_rate": 4.6411814542799075e-06, "loss": 0.3039, "step": 8431 }, { "epoch": 0.39499695507565463, "grad_norm": 0.6051759186074926, "learning_rate": 4.641083552130693e-06, "loss": 0.2674, "step": 8432 }, { "epoch": 0.395043800065583, "grad_norm": 0.6387922120899999, "learning_rate": 4.640985637660081e-06, "loss": 0.3039, "step": 8433 }, { "epoch": 0.39509064505551134, "grad_norm": 0.6079395516439433, "learning_rate": 4.640887710868634e-06, "loss": 0.2863, "step": 8434 }, { "epoch": 0.39513749004543963, "grad_norm": 0.5931177037687306, "learning_rate": 4.640789771756918e-06, "loss": 0.2744, "step": 8435 }, { "epoch": 0.395184335035368, "grad_norm": 0.5275537344182858, "learning_rate": 4.6406918203254934e-06, "loss": 0.2558, "step": 8436 }, { "epoch": 0.3952311800252963, "grad_norm": 0.6620334493458856, "learning_rate": 4.640593856574927e-06, "loss": 0.3067, "step": 8437 }, { "epoch": 0.39527802501522463, "grad_norm": 0.5359536653988121, "learning_rate": 4.64049588050578e-06, "loss": 0.2598, "step": 8438 }, { "epoch": 0.39532487000515293, "grad_norm": 0.5318209539800548, "learning_rate": 4.640397892118617e-06, "loss": 0.2787, "step": 8439 }, { "epoch": 0.3953717149950813, "grad_norm": 0.6279540714511265, "learning_rate": 4.640299891414002e-06, "loss": 0.2843, "step": 8440 }, { "epoch": 0.3954185599850096, "grad_norm": 0.6439458004914475, "learning_rate": 4.640201878392499e-06, "loss": 0.2711, "step": 8441 }, { "epoch": 0.39546540497493793, "grad_norm": 0.6503093495615582, "learning_rate": 4.640103853054673e-06, "loss": 0.3042, "step": 8442 }, { "epoch": 0.3955122499648663, "grad_norm": 0.6517623852298757, "learning_rate": 4.640005815401086e-06, "loss": 0.3038, "step": 8443 }, { "epoch": 0.3955590949547946, "grad_norm": 0.6058768378222285, "learning_rate": 4.639907765432304e-06, "loss": 0.2841, "step": 8444 }, { "epoch": 0.39560593994472293, "grad_norm": 0.6132581417650567, "learning_rate": 4.639809703148891e-06, "loss": 0.2846, "step": 8445 }, { "epoch": 0.3956527849346512, "grad_norm": 0.603869588228309, "learning_rate": 4.639711628551411e-06, "loss": 0.2995, "step": 8446 }, { "epoch": 0.3956996299245796, "grad_norm": 0.6049544405419497, "learning_rate": 4.639613541640428e-06, "loss": 0.2862, "step": 8447 }, { "epoch": 0.3957464749145079, "grad_norm": 0.6185132361747553, "learning_rate": 4.6395154424165065e-06, "loss": 0.282, "step": 8448 }, { "epoch": 0.3957933199044362, "grad_norm": 0.5955666962382412, "learning_rate": 4.639417330880213e-06, "loss": 0.2858, "step": 8449 }, { "epoch": 0.3958401648943645, "grad_norm": 0.567407030886221, "learning_rate": 4.639319207032109e-06, "loss": 0.2939, "step": 8450 }, { "epoch": 0.3958870098842929, "grad_norm": 0.5903153256057972, "learning_rate": 4.639221070872761e-06, "loss": 0.2813, "step": 8451 }, { "epoch": 0.3959338548742212, "grad_norm": 0.5741582159996932, "learning_rate": 4.639122922402734e-06, "loss": 0.2819, "step": 8452 }, { "epoch": 0.3959806998641495, "grad_norm": 0.5690689988640152, "learning_rate": 4.639024761622591e-06, "loss": 0.2894, "step": 8453 }, { "epoch": 0.3960275448540779, "grad_norm": 0.7024077633482259, "learning_rate": 4.638926588532898e-06, "loss": 0.2951, "step": 8454 }, { "epoch": 0.39607438984400617, "grad_norm": 0.5638148819003919, "learning_rate": 4.638828403134221e-06, "loss": 0.2746, "step": 8455 }, { "epoch": 0.3961212348339345, "grad_norm": 0.5806273774580201, "learning_rate": 4.638730205427124e-06, "loss": 0.2757, "step": 8456 }, { "epoch": 0.3961680798238628, "grad_norm": 0.5674796855685887, "learning_rate": 4.638631995412173e-06, "loss": 0.2771, "step": 8457 }, { "epoch": 0.39621492481379117, "grad_norm": 0.6480878954938778, "learning_rate": 4.638533773089931e-06, "loss": 0.3066, "step": 8458 }, { "epoch": 0.39626176980371947, "grad_norm": 0.5792735164900108, "learning_rate": 4.638435538460965e-06, "loss": 0.2826, "step": 8459 }, { "epoch": 0.3963086147936478, "grad_norm": 0.6170143574802713, "learning_rate": 4.6383372915258406e-06, "loss": 0.3214, "step": 8460 }, { "epoch": 0.39635545978357617, "grad_norm": 0.6000515032976755, "learning_rate": 4.6382390322851215e-06, "loss": 0.2921, "step": 8461 }, { "epoch": 0.39640230477350447, "grad_norm": 0.5740432226493524, "learning_rate": 4.638140760739374e-06, "loss": 0.273, "step": 8462 }, { "epoch": 0.3964491497634328, "grad_norm": 0.6312124919276101, "learning_rate": 4.638042476889166e-06, "loss": 0.3073, "step": 8463 }, { "epoch": 0.3964959947533611, "grad_norm": 0.6159196383376261, "learning_rate": 4.637944180735059e-06, "loss": 0.3012, "step": 8464 }, { "epoch": 0.39654283974328947, "grad_norm": 0.5800722467474344, "learning_rate": 4.637845872277621e-06, "loss": 0.297, "step": 8465 }, { "epoch": 0.39658968473321776, "grad_norm": 0.5767918723993213, "learning_rate": 4.637747551517418e-06, "loss": 0.2738, "step": 8466 }, { "epoch": 0.3966365297231461, "grad_norm": 0.5890923535742847, "learning_rate": 4.637649218455013e-06, "loss": 0.2875, "step": 8467 }, { "epoch": 0.3966833747130744, "grad_norm": 0.6010263219310967, "learning_rate": 4.637550873090977e-06, "loss": 0.2815, "step": 8468 }, { "epoch": 0.39673021970300276, "grad_norm": 0.5867435557023579, "learning_rate": 4.637452515425871e-06, "loss": 0.2791, "step": 8469 }, { "epoch": 0.3967770646929311, "grad_norm": 0.6048055497102511, "learning_rate": 4.637354145460264e-06, "loss": 0.2939, "step": 8470 }, { "epoch": 0.3968239096828594, "grad_norm": 0.5926728118531254, "learning_rate": 4.63725576319472e-06, "loss": 0.2851, "step": 8471 }, { "epoch": 0.39687075467278776, "grad_norm": 0.5783217234400929, "learning_rate": 4.637157368629808e-06, "loss": 0.2852, "step": 8472 }, { "epoch": 0.39691759966271606, "grad_norm": 0.6135255277886561, "learning_rate": 4.637058961766091e-06, "loss": 0.2906, "step": 8473 }, { "epoch": 0.3969644446526444, "grad_norm": 0.6007876883324231, "learning_rate": 4.636960542604138e-06, "loss": 0.3012, "step": 8474 }, { "epoch": 0.3970112896425727, "grad_norm": 0.6401542127532298, "learning_rate": 4.636862111144512e-06, "loss": 0.3108, "step": 8475 }, { "epoch": 0.39705813463250106, "grad_norm": 0.6135085387236441, "learning_rate": 4.636763667387783e-06, "loss": 0.2973, "step": 8476 }, { "epoch": 0.39710497962242935, "grad_norm": 0.6392372871754302, "learning_rate": 4.636665211334517e-06, "loss": 0.3031, "step": 8477 }, { "epoch": 0.3971518246123577, "grad_norm": 0.612613408403346, "learning_rate": 4.636566742985279e-06, "loss": 0.2899, "step": 8478 }, { "epoch": 0.39719866960228606, "grad_norm": 0.5915708156092375, "learning_rate": 4.636468262340637e-06, "loss": 0.2973, "step": 8479 }, { "epoch": 0.39724551459221435, "grad_norm": 0.6206669349645433, "learning_rate": 4.636369769401156e-06, "loss": 0.2712, "step": 8480 }, { "epoch": 0.3972923595821427, "grad_norm": 0.6573043824716803, "learning_rate": 4.636271264167404e-06, "loss": 0.2995, "step": 8481 }, { "epoch": 0.397339204572071, "grad_norm": 0.6499296730494152, "learning_rate": 4.6361727466399484e-06, "loss": 0.309, "step": 8482 }, { "epoch": 0.39738604956199935, "grad_norm": 0.6394828162423198, "learning_rate": 4.636074216819355e-06, "loss": 0.2709, "step": 8483 }, { "epoch": 0.39743289455192765, "grad_norm": 0.5794129090721195, "learning_rate": 4.635975674706192e-06, "loss": 0.2727, "step": 8484 }, { "epoch": 0.397479739541856, "grad_norm": 0.6231476173296142, "learning_rate": 4.635877120301025e-06, "loss": 0.2786, "step": 8485 }, { "epoch": 0.3975265845317843, "grad_norm": 0.5757717068695223, "learning_rate": 4.635778553604423e-06, "loss": 0.2888, "step": 8486 }, { "epoch": 0.39757342952171265, "grad_norm": 0.6664877239355463, "learning_rate": 4.6356799746169525e-06, "loss": 0.3012, "step": 8487 }, { "epoch": 0.397620274511641, "grad_norm": 0.6296138678782801, "learning_rate": 4.635581383339179e-06, "loss": 0.307, "step": 8488 }, { "epoch": 0.3976671195015693, "grad_norm": 0.5821646930690898, "learning_rate": 4.635482779771673e-06, "loss": 0.2709, "step": 8489 }, { "epoch": 0.39771396449149765, "grad_norm": 0.6045404446439399, "learning_rate": 4.635384163914999e-06, "loss": 0.2825, "step": 8490 }, { "epoch": 0.39776080948142595, "grad_norm": 0.6163580948332301, "learning_rate": 4.635285535769727e-06, "loss": 0.2949, "step": 8491 }, { "epoch": 0.3978076544713543, "grad_norm": 0.6654675167924248, "learning_rate": 4.635186895336422e-06, "loss": 0.3067, "step": 8492 }, { "epoch": 0.3978544994612826, "grad_norm": 0.603438942607899, "learning_rate": 4.635088242615654e-06, "loss": 0.277, "step": 8493 }, { "epoch": 0.39790134445121095, "grad_norm": 0.6281153603137672, "learning_rate": 4.63498957760799e-06, "loss": 0.2938, "step": 8494 }, { "epoch": 0.39794818944113924, "grad_norm": 0.6712627980753875, "learning_rate": 4.6348909003139976e-06, "loss": 0.3094, "step": 8495 }, { "epoch": 0.3979950344310676, "grad_norm": 0.6189573608005012, "learning_rate": 4.634792210734244e-06, "loss": 0.279, "step": 8496 }, { "epoch": 0.39804187942099595, "grad_norm": 0.6293091758446266, "learning_rate": 4.634693508869298e-06, "loss": 0.2991, "step": 8497 }, { "epoch": 0.39808872441092424, "grad_norm": 0.6034078023107531, "learning_rate": 4.634594794719728e-06, "loss": 0.2926, "step": 8498 }, { "epoch": 0.3981355694008526, "grad_norm": 0.6822742948386064, "learning_rate": 4.634496068286101e-06, "loss": 0.2888, "step": 8499 }, { "epoch": 0.3981824143907809, "grad_norm": 0.6539744023274111, "learning_rate": 4.634397329568985e-06, "loss": 0.299, "step": 8500 }, { "epoch": 0.39822925938070924, "grad_norm": 0.587679782492564, "learning_rate": 4.63429857856895e-06, "loss": 0.2938, "step": 8501 }, { "epoch": 0.39827610437063754, "grad_norm": 0.6039019462953538, "learning_rate": 4.6341998152865626e-06, "loss": 0.2751, "step": 8502 }, { "epoch": 0.3983229493605659, "grad_norm": 0.6534960403806025, "learning_rate": 4.6341010397223915e-06, "loss": 0.2936, "step": 8503 }, { "epoch": 0.3983697943504942, "grad_norm": 0.6441772307698309, "learning_rate": 4.634002251877006e-06, "loss": 0.2845, "step": 8504 }, { "epoch": 0.39841663934042254, "grad_norm": 0.6519317837778967, "learning_rate": 4.633903451750973e-06, "loss": 0.2933, "step": 8505 }, { "epoch": 0.3984634843303509, "grad_norm": 0.6681592968881009, "learning_rate": 4.633804639344862e-06, "loss": 0.3048, "step": 8506 }, { "epoch": 0.3985103293202792, "grad_norm": 0.6600299608926891, "learning_rate": 4.633705814659242e-06, "loss": 0.306, "step": 8507 }, { "epoch": 0.39855717431020754, "grad_norm": 0.6283688550923369, "learning_rate": 4.6336069776946816e-06, "loss": 0.3028, "step": 8508 }, { "epoch": 0.39860401930013584, "grad_norm": 0.591066358800634, "learning_rate": 4.6335081284517485e-06, "loss": 0.292, "step": 8509 }, { "epoch": 0.3986508642900642, "grad_norm": 0.5369968831773321, "learning_rate": 4.633409266931013e-06, "loss": 0.2778, "step": 8510 }, { "epoch": 0.3986977092799925, "grad_norm": 0.6078349242226785, "learning_rate": 4.633310393133043e-06, "loss": 0.2912, "step": 8511 }, { "epoch": 0.39874455426992084, "grad_norm": 0.6144980888904664, "learning_rate": 4.633211507058408e-06, "loss": 0.2898, "step": 8512 }, { "epoch": 0.39879139925984913, "grad_norm": 0.6073299877077346, "learning_rate": 4.633112608707677e-06, "loss": 0.3016, "step": 8513 }, { "epoch": 0.3988382442497775, "grad_norm": 0.6105453424819847, "learning_rate": 4.633013698081419e-06, "loss": 0.2916, "step": 8514 }, { "epoch": 0.39888508923970584, "grad_norm": 0.5802154939072832, "learning_rate": 4.632914775180204e-06, "loss": 0.281, "step": 8515 }, { "epoch": 0.39893193422963413, "grad_norm": 0.5961924893318525, "learning_rate": 4.6328158400046e-06, "loss": 0.2982, "step": 8516 }, { "epoch": 0.3989787792195625, "grad_norm": 0.6020330036077153, "learning_rate": 4.632716892555177e-06, "loss": 0.2933, "step": 8517 }, { "epoch": 0.3990256242094908, "grad_norm": 0.6790727458043493, "learning_rate": 4.6326179328325035e-06, "loss": 0.2976, "step": 8518 }, { "epoch": 0.39907246919941913, "grad_norm": 0.6020639967209559, "learning_rate": 4.632518960837151e-06, "loss": 0.2909, "step": 8519 }, { "epoch": 0.39911931418934743, "grad_norm": 0.6001874032747049, "learning_rate": 4.632419976569687e-06, "loss": 0.2848, "step": 8520 }, { "epoch": 0.3991661591792758, "grad_norm": 0.5992942656787249, "learning_rate": 4.632320980030682e-06, "loss": 0.274, "step": 8521 }, { "epoch": 0.3992130041692041, "grad_norm": 0.5954244059671997, "learning_rate": 4.632221971220706e-06, "loss": 0.2912, "step": 8522 }, { "epoch": 0.39925984915913243, "grad_norm": 0.558734629398423, "learning_rate": 4.6321229501403285e-06, "loss": 0.2777, "step": 8523 }, { "epoch": 0.3993066941490608, "grad_norm": 0.6659226454767755, "learning_rate": 4.632023916790119e-06, "loss": 0.3164, "step": 8524 }, { "epoch": 0.3993535391389891, "grad_norm": 0.586600050101798, "learning_rate": 4.631924871170649e-06, "loss": 0.2869, "step": 8525 }, { "epoch": 0.39940038412891743, "grad_norm": 0.598948867691713, "learning_rate": 4.631825813282485e-06, "loss": 0.3017, "step": 8526 }, { "epoch": 0.3994472291188457, "grad_norm": 0.5871280633577078, "learning_rate": 4.631726743126201e-06, "loss": 0.288, "step": 8527 }, { "epoch": 0.3994940741087741, "grad_norm": 0.6029733456769034, "learning_rate": 4.6316276607023654e-06, "loss": 0.2882, "step": 8528 }, { "epoch": 0.3995409190987024, "grad_norm": 0.5715597189421711, "learning_rate": 4.631528566011547e-06, "loss": 0.2777, "step": 8529 }, { "epoch": 0.3995877640886307, "grad_norm": 0.5602401815155361, "learning_rate": 4.631429459054319e-06, "loss": 0.2577, "step": 8530 }, { "epoch": 0.399634609078559, "grad_norm": 0.638171133384287, "learning_rate": 4.631330339831249e-06, "loss": 0.3086, "step": 8531 }, { "epoch": 0.3996814540684874, "grad_norm": 0.6311066396812712, "learning_rate": 4.63123120834291e-06, "loss": 0.2995, "step": 8532 }, { "epoch": 0.3997282990584157, "grad_norm": 0.5404591037468374, "learning_rate": 4.631132064589869e-06, "loss": 0.2818, "step": 8533 }, { "epoch": 0.399775144048344, "grad_norm": 0.586249845925964, "learning_rate": 4.6310329085727e-06, "loss": 0.2898, "step": 8534 }, { "epoch": 0.3998219890382724, "grad_norm": 0.6091414579109318, "learning_rate": 4.630933740291972e-06, "loss": 0.2893, "step": 8535 }, { "epoch": 0.39986883402820067, "grad_norm": 0.5897069627900386, "learning_rate": 4.6308345597482565e-06, "loss": 0.2728, "step": 8536 }, { "epoch": 0.399915679018129, "grad_norm": 0.5788620796923593, "learning_rate": 4.630735366942123e-06, "loss": 0.2902, "step": 8537 }, { "epoch": 0.3999625240080573, "grad_norm": 0.6085937901332714, "learning_rate": 4.630636161874143e-06, "loss": 0.3018, "step": 8538 }, { "epoch": 0.40000936899798567, "grad_norm": 0.5898815596247531, "learning_rate": 4.630536944544887e-06, "loss": 0.2901, "step": 8539 }, { "epoch": 0.40005621398791397, "grad_norm": 0.6046997650110302, "learning_rate": 4.630437714954927e-06, "loss": 0.2998, "step": 8540 }, { "epoch": 0.4001030589778423, "grad_norm": 0.6396788828021214, "learning_rate": 4.630338473104833e-06, "loss": 0.3034, "step": 8541 }, { "epoch": 0.40014990396777067, "grad_norm": 0.6156951510077852, "learning_rate": 4.630239218995177e-06, "loss": 0.2895, "step": 8542 }, { "epoch": 0.40019674895769897, "grad_norm": 0.5755990150883756, "learning_rate": 4.630139952626529e-06, "loss": 0.2872, "step": 8543 }, { "epoch": 0.4002435939476273, "grad_norm": 0.5686701604653406, "learning_rate": 4.630040673999462e-06, "loss": 0.2819, "step": 8544 }, { "epoch": 0.4002904389375556, "grad_norm": 0.6521513763949693, "learning_rate": 4.629941383114545e-06, "loss": 0.318, "step": 8545 }, { "epoch": 0.40033728392748397, "grad_norm": 0.6044595226001906, "learning_rate": 4.629842079972352e-06, "loss": 0.2678, "step": 8546 }, { "epoch": 0.40038412891741226, "grad_norm": 0.6242506019969059, "learning_rate": 4.629742764573453e-06, "loss": 0.3116, "step": 8547 }, { "epoch": 0.4004309739073406, "grad_norm": 0.6293240479095663, "learning_rate": 4.629643436918419e-06, "loss": 0.2978, "step": 8548 }, { "epoch": 0.4004778188972689, "grad_norm": 0.6449527795435078, "learning_rate": 4.629544097007822e-06, "loss": 0.3156, "step": 8549 }, { "epoch": 0.40052466388719726, "grad_norm": 0.6096831861210518, "learning_rate": 4.629444744842235e-06, "loss": 0.2839, "step": 8550 }, { "epoch": 0.4005715088771256, "grad_norm": 0.6341565591339748, "learning_rate": 4.629345380422228e-06, "loss": 0.2878, "step": 8551 }, { "epoch": 0.4006183538670539, "grad_norm": 0.6132709885441723, "learning_rate": 4.629246003748374e-06, "loss": 0.2813, "step": 8552 }, { "epoch": 0.40066519885698226, "grad_norm": 0.6085042305145638, "learning_rate": 4.6291466148212435e-06, "loss": 0.2901, "step": 8553 }, { "epoch": 0.40071204384691056, "grad_norm": 0.6414524390945582, "learning_rate": 4.629047213641411e-06, "loss": 0.283, "step": 8554 }, { "epoch": 0.4007588888368389, "grad_norm": 0.6304948601451681, "learning_rate": 4.6289478002094454e-06, "loss": 0.2871, "step": 8555 }, { "epoch": 0.4008057338267672, "grad_norm": 0.6837113924058461, "learning_rate": 4.62884837452592e-06, "loss": 0.2878, "step": 8556 }, { "epoch": 0.40085257881669556, "grad_norm": 0.5886805762866953, "learning_rate": 4.6287489365914085e-06, "loss": 0.2909, "step": 8557 }, { "epoch": 0.40089942380662386, "grad_norm": 0.5973080250080914, "learning_rate": 4.628649486406482e-06, "loss": 0.2845, "step": 8558 }, { "epoch": 0.4009462687965522, "grad_norm": 0.616371102013071, "learning_rate": 4.628550023971712e-06, "loss": 0.2938, "step": 8559 }, { "epoch": 0.40099311378648056, "grad_norm": 0.6408694353069422, "learning_rate": 4.628450549287672e-06, "loss": 0.3018, "step": 8560 }, { "epoch": 0.40103995877640886, "grad_norm": 0.5698756446842308, "learning_rate": 4.628351062354934e-06, "loss": 0.2806, "step": 8561 }, { "epoch": 0.4010868037663372, "grad_norm": 0.6287180167171269, "learning_rate": 4.6282515631740695e-06, "loss": 0.2812, "step": 8562 }, { "epoch": 0.4011336487562655, "grad_norm": 0.62866841611808, "learning_rate": 4.628152051745654e-06, "loss": 0.2944, "step": 8563 }, { "epoch": 0.40118049374619386, "grad_norm": 0.6666504094751692, "learning_rate": 4.628052528070257e-06, "loss": 0.2924, "step": 8564 }, { "epoch": 0.40122733873612215, "grad_norm": 0.5558404515545976, "learning_rate": 4.627952992148454e-06, "loss": 0.2809, "step": 8565 }, { "epoch": 0.4012741837260505, "grad_norm": 0.6413707827282725, "learning_rate": 4.627853443980814e-06, "loss": 0.2955, "step": 8566 }, { "epoch": 0.4013210287159788, "grad_norm": 0.6658446277414343, "learning_rate": 4.627753883567914e-06, "loss": 0.2925, "step": 8567 }, { "epoch": 0.40136787370590715, "grad_norm": 0.5616222711626142, "learning_rate": 4.627654310910325e-06, "loss": 0.2808, "step": 8568 }, { "epoch": 0.4014147186958355, "grad_norm": 0.5762690796636502, "learning_rate": 4.62755472600862e-06, "loss": 0.323, "step": 8569 }, { "epoch": 0.4014615636857638, "grad_norm": 0.5700645160925895, "learning_rate": 4.627455128863372e-06, "loss": 0.2916, "step": 8570 }, { "epoch": 0.40150840867569215, "grad_norm": 0.6776535615447676, "learning_rate": 4.627355519475155e-06, "loss": 0.3165, "step": 8571 }, { "epoch": 0.40155525366562045, "grad_norm": 0.6326608228466142, "learning_rate": 4.627255897844541e-06, "loss": 0.3129, "step": 8572 }, { "epoch": 0.4016020986555488, "grad_norm": 0.5935670762611837, "learning_rate": 4.627156263972105e-06, "loss": 0.2825, "step": 8573 }, { "epoch": 0.4016489436454771, "grad_norm": 0.5934278640657438, "learning_rate": 4.6270566178584185e-06, "loss": 0.2817, "step": 8574 }, { "epoch": 0.40169578863540545, "grad_norm": 0.5838907725576887, "learning_rate": 4.626956959504057e-06, "loss": 0.2971, "step": 8575 }, { "epoch": 0.40174263362533374, "grad_norm": 0.6267235470393918, "learning_rate": 4.626857288909591e-06, "loss": 0.2993, "step": 8576 }, { "epoch": 0.4017894786152621, "grad_norm": 0.6319986461810286, "learning_rate": 4.626757606075597e-06, "loss": 0.282, "step": 8577 }, { "epoch": 0.40183632360519045, "grad_norm": 0.5757399174191892, "learning_rate": 4.6266579110026466e-06, "loss": 0.2927, "step": 8578 }, { "epoch": 0.40188316859511874, "grad_norm": 0.5809353276120217, "learning_rate": 4.626558203691316e-06, "loss": 0.2864, "step": 8579 }, { "epoch": 0.4019300135850471, "grad_norm": 0.6104900128917722, "learning_rate": 4.6264584841421764e-06, "loss": 0.3112, "step": 8580 }, { "epoch": 0.4019768585749754, "grad_norm": 0.599140720239169, "learning_rate": 4.626358752355803e-06, "loss": 0.291, "step": 8581 }, { "epoch": 0.40202370356490374, "grad_norm": 0.6044983837373795, "learning_rate": 4.626259008332768e-06, "loss": 0.289, "step": 8582 }, { "epoch": 0.40207054855483204, "grad_norm": 0.6038516489365906, "learning_rate": 4.6261592520736485e-06, "loss": 0.2811, "step": 8583 }, { "epoch": 0.4021173935447604, "grad_norm": 0.5660690047819037, "learning_rate": 4.626059483579017e-06, "loss": 0.2838, "step": 8584 }, { "epoch": 0.4021642385346887, "grad_norm": 0.5863025379102196, "learning_rate": 4.625959702849446e-06, "loss": 0.3042, "step": 8585 }, { "epoch": 0.40221108352461704, "grad_norm": 0.6413694608762806, "learning_rate": 4.625859909885513e-06, "loss": 0.2928, "step": 8586 }, { "epoch": 0.4022579285145454, "grad_norm": 0.6098641734479988, "learning_rate": 4.62576010468779e-06, "loss": 0.2698, "step": 8587 }, { "epoch": 0.4023047735044737, "grad_norm": 0.6284089278923584, "learning_rate": 4.625660287256851e-06, "loss": 0.2961, "step": 8588 }, { "epoch": 0.40235161849440204, "grad_norm": 0.6718956896398367, "learning_rate": 4.625560457593272e-06, "loss": 0.2811, "step": 8589 }, { "epoch": 0.40239846348433034, "grad_norm": 0.6386896427972538, "learning_rate": 4.6254606156976265e-06, "loss": 0.3, "step": 8590 }, { "epoch": 0.4024453084742587, "grad_norm": 0.5835213073675696, "learning_rate": 4.6253607615704895e-06, "loss": 0.2815, "step": 8591 }, { "epoch": 0.402492153464187, "grad_norm": 0.5698394868446863, "learning_rate": 4.6252608952124356e-06, "loss": 0.2735, "step": 8592 }, { "epoch": 0.40253899845411534, "grad_norm": 0.5814868621310145, "learning_rate": 4.62516101662404e-06, "loss": 0.2908, "step": 8593 }, { "epoch": 0.40258584344404363, "grad_norm": 0.6593114687508782, "learning_rate": 4.625061125805876e-06, "loss": 0.2886, "step": 8594 }, { "epoch": 0.402632688433972, "grad_norm": 0.6252230794348574, "learning_rate": 4.62496122275852e-06, "loss": 0.303, "step": 8595 }, { "epoch": 0.40267953342390034, "grad_norm": 0.6012349139709574, "learning_rate": 4.624861307482545e-06, "loss": 0.302, "step": 8596 }, { "epoch": 0.40272637841382863, "grad_norm": 0.6016499327777097, "learning_rate": 4.624761379978529e-06, "loss": 0.302, "step": 8597 }, { "epoch": 0.402773223403757, "grad_norm": 0.5845788279742324, "learning_rate": 4.624661440247045e-06, "loss": 0.2799, "step": 8598 }, { "epoch": 0.4028200683936853, "grad_norm": 0.6461262978623794, "learning_rate": 4.624561488288667e-06, "loss": 0.2831, "step": 8599 }, { "epoch": 0.40286691338361363, "grad_norm": 0.5995997355612589, "learning_rate": 4.6244615241039726e-06, "loss": 0.2986, "step": 8600 }, { "epoch": 0.40291375837354193, "grad_norm": 0.6379957625157874, "learning_rate": 4.624361547693536e-06, "loss": 0.3043, "step": 8601 }, { "epoch": 0.4029606033634703, "grad_norm": 0.5330443648723713, "learning_rate": 4.624261559057932e-06, "loss": 0.2899, "step": 8602 }, { "epoch": 0.4030074483533986, "grad_norm": 0.6591725514740655, "learning_rate": 4.6241615581977375e-06, "loss": 0.3006, "step": 8603 }, { "epoch": 0.40305429334332693, "grad_norm": 0.6025533817629103, "learning_rate": 4.624061545113527e-06, "loss": 0.2969, "step": 8604 }, { "epoch": 0.4031011383332553, "grad_norm": 0.621087723819579, "learning_rate": 4.6239615198058764e-06, "loss": 0.3037, "step": 8605 }, { "epoch": 0.4031479833231836, "grad_norm": 0.5852944631435459, "learning_rate": 4.62386148227536e-06, "loss": 0.2933, "step": 8606 }, { "epoch": 0.40319482831311193, "grad_norm": 0.5957696864271578, "learning_rate": 4.623761432522555e-06, "loss": 0.2871, "step": 8607 }, { "epoch": 0.4032416733030402, "grad_norm": 0.6961905648357694, "learning_rate": 4.623661370548038e-06, "loss": 0.3002, "step": 8608 }, { "epoch": 0.4032885182929686, "grad_norm": 0.7022502333552141, "learning_rate": 4.623561296352382e-06, "loss": 0.3062, "step": 8609 }, { "epoch": 0.4033353632828969, "grad_norm": 0.6162487637711235, "learning_rate": 4.6234612099361655e-06, "loss": 0.3069, "step": 8610 }, { "epoch": 0.4033822082728252, "grad_norm": 0.6298181271328221, "learning_rate": 4.623361111299963e-06, "loss": 0.3081, "step": 8611 }, { "epoch": 0.4034290532627535, "grad_norm": 0.6240175237742414, "learning_rate": 4.623261000444351e-06, "loss": 0.2976, "step": 8612 }, { "epoch": 0.4034758982526819, "grad_norm": 0.5728167345032632, "learning_rate": 4.6231608773699055e-06, "loss": 0.2781, "step": 8613 }, { "epoch": 0.4035227432426102, "grad_norm": 0.616956679192847, "learning_rate": 4.623060742077204e-06, "loss": 0.2923, "step": 8614 }, { "epoch": 0.4035695882325385, "grad_norm": 0.6866788682306535, "learning_rate": 4.62296059456682e-06, "loss": 0.3003, "step": 8615 }, { "epoch": 0.4036164332224669, "grad_norm": 0.6261258756251793, "learning_rate": 4.622860434839331e-06, "loss": 0.2621, "step": 8616 }, { "epoch": 0.40366327821239517, "grad_norm": 0.5636020703170888, "learning_rate": 4.622760262895315e-06, "loss": 0.2854, "step": 8617 }, { "epoch": 0.4037101232023235, "grad_norm": 0.6111755939842697, "learning_rate": 4.6226600787353475e-06, "loss": 0.2896, "step": 8618 }, { "epoch": 0.4037569681922518, "grad_norm": 0.5814695360354663, "learning_rate": 4.622559882360004e-06, "loss": 0.2799, "step": 8619 }, { "epoch": 0.40380381318218017, "grad_norm": 0.6419358420205452, "learning_rate": 4.622459673769861e-06, "loss": 0.2916, "step": 8620 }, { "epoch": 0.40385065817210847, "grad_norm": 0.6264759652435784, "learning_rate": 4.622359452965497e-06, "loss": 0.2886, "step": 8621 }, { "epoch": 0.4038975031620368, "grad_norm": 0.6694052125972203, "learning_rate": 4.622259219947488e-06, "loss": 0.3005, "step": 8622 }, { "epoch": 0.40394434815196517, "grad_norm": 0.6623994358756243, "learning_rate": 4.622158974716411e-06, "loss": 0.3087, "step": 8623 }, { "epoch": 0.40399119314189347, "grad_norm": 0.6518706968436938, "learning_rate": 4.622058717272841e-06, "loss": 0.3103, "step": 8624 }, { "epoch": 0.4040380381318218, "grad_norm": 0.6824021129816926, "learning_rate": 4.621958447617357e-06, "loss": 0.305, "step": 8625 }, { "epoch": 0.4040848831217501, "grad_norm": 0.6228368943410892, "learning_rate": 4.621858165750537e-06, "loss": 0.2918, "step": 8626 }, { "epoch": 0.40413172811167847, "grad_norm": 0.6055476658714016, "learning_rate": 4.621757871672955e-06, "loss": 0.2839, "step": 8627 }, { "epoch": 0.40417857310160676, "grad_norm": 0.6304794036649758, "learning_rate": 4.621657565385189e-06, "loss": 0.2802, "step": 8628 }, { "epoch": 0.4042254180915351, "grad_norm": 0.6228320465959053, "learning_rate": 4.621557246887819e-06, "loss": 0.3089, "step": 8629 }, { "epoch": 0.4042722630814634, "grad_norm": 0.6059289907059473, "learning_rate": 4.62145691618142e-06, "loss": 0.2812, "step": 8630 }, { "epoch": 0.40431910807139176, "grad_norm": 0.5715437924002671, "learning_rate": 4.621356573266568e-06, "loss": 0.2773, "step": 8631 }, { "epoch": 0.4043659530613201, "grad_norm": 0.5887095323539946, "learning_rate": 4.6212562181438435e-06, "loss": 0.3015, "step": 8632 }, { "epoch": 0.4044127980512484, "grad_norm": 0.6472036057355334, "learning_rate": 4.621155850813822e-06, "loss": 0.3231, "step": 8633 }, { "epoch": 0.40445964304117676, "grad_norm": 0.6100243934826486, "learning_rate": 4.621055471277082e-06, "loss": 0.2886, "step": 8634 }, { "epoch": 0.40450648803110506, "grad_norm": 0.6197731109323414, "learning_rate": 4.6209550795342005e-06, "loss": 0.3034, "step": 8635 }, { "epoch": 0.4045533330210334, "grad_norm": 0.5720432740297311, "learning_rate": 4.6208546755857556e-06, "loss": 0.2929, "step": 8636 }, { "epoch": 0.4046001780109617, "grad_norm": 0.6663414416961846, "learning_rate": 4.620754259432326e-06, "loss": 0.2858, "step": 8637 }, { "epoch": 0.40464702300089006, "grad_norm": 0.6274516249234605, "learning_rate": 4.620653831074488e-06, "loss": 0.3093, "step": 8638 }, { "epoch": 0.40469386799081836, "grad_norm": 0.6546045914650813, "learning_rate": 4.62055339051282e-06, "loss": 0.2878, "step": 8639 }, { "epoch": 0.4047407129807467, "grad_norm": 0.6031585615088212, "learning_rate": 4.6204529377479e-06, "loss": 0.2889, "step": 8640 }, { "epoch": 0.40478755797067506, "grad_norm": 0.6539261523048918, "learning_rate": 4.620352472780307e-06, "loss": 0.2962, "step": 8641 }, { "epoch": 0.40483440296060336, "grad_norm": 0.6346823961959533, "learning_rate": 4.6202519956106185e-06, "loss": 0.3095, "step": 8642 }, { "epoch": 0.4048812479505317, "grad_norm": 0.6628770059190068, "learning_rate": 4.620151506239412e-06, "loss": 0.3284, "step": 8643 }, { "epoch": 0.40492809294046, "grad_norm": 0.6187281180221658, "learning_rate": 4.620051004667268e-06, "loss": 0.3241, "step": 8644 }, { "epoch": 0.40497493793038836, "grad_norm": 0.6250251120445671, "learning_rate": 4.619950490894761e-06, "loss": 0.2859, "step": 8645 }, { "epoch": 0.40502178292031665, "grad_norm": 0.5637791640660027, "learning_rate": 4.619849964922473e-06, "loss": 0.2631, "step": 8646 }, { "epoch": 0.405068627910245, "grad_norm": 0.598384625016336, "learning_rate": 4.6197494267509815e-06, "loss": 0.2799, "step": 8647 }, { "epoch": 0.4051154729001733, "grad_norm": 0.6148451192746766, "learning_rate": 4.619648876380865e-06, "loss": 0.2895, "step": 8648 }, { "epoch": 0.40516231789010165, "grad_norm": 0.6036715087106567, "learning_rate": 4.619548313812701e-06, "loss": 0.2719, "step": 8649 }, { "epoch": 0.40520916288003, "grad_norm": 0.6703598157242887, "learning_rate": 4.6194477390470694e-06, "loss": 0.2879, "step": 8650 }, { "epoch": 0.4052560078699583, "grad_norm": 0.5955133107890987, "learning_rate": 4.619347152084549e-06, "loss": 0.2937, "step": 8651 }, { "epoch": 0.40530285285988665, "grad_norm": 0.5874560248657017, "learning_rate": 4.619246552925718e-06, "loss": 0.2916, "step": 8652 }, { "epoch": 0.40534969784981495, "grad_norm": 0.5833676198476183, "learning_rate": 4.619145941571157e-06, "loss": 0.2971, "step": 8653 }, { "epoch": 0.4053965428397433, "grad_norm": 0.647687775643518, "learning_rate": 4.619045318021442e-06, "loss": 0.311, "step": 8654 }, { "epoch": 0.4054433878296716, "grad_norm": 0.6529814777816152, "learning_rate": 4.618944682277155e-06, "loss": 0.2979, "step": 8655 }, { "epoch": 0.40549023281959995, "grad_norm": 0.6409125388227094, "learning_rate": 4.618844034338874e-06, "loss": 0.303, "step": 8656 }, { "epoch": 0.40553707780952825, "grad_norm": 0.5909003858406173, "learning_rate": 4.618743374207178e-06, "loss": 0.3011, "step": 8657 }, { "epoch": 0.4055839227994566, "grad_norm": 0.5781934106780838, "learning_rate": 4.618642701882646e-06, "loss": 0.295, "step": 8658 }, { "epoch": 0.40563076778938495, "grad_norm": 0.5989432744480301, "learning_rate": 4.618542017365858e-06, "loss": 0.2873, "step": 8659 }, { "epoch": 0.40567761277931325, "grad_norm": 0.5760944323673816, "learning_rate": 4.618441320657393e-06, "loss": 0.2848, "step": 8660 }, { "epoch": 0.4057244577692416, "grad_norm": 0.6255093945260068, "learning_rate": 4.618340611757831e-06, "loss": 0.2932, "step": 8661 }, { "epoch": 0.4057713027591699, "grad_norm": 0.6049394455613514, "learning_rate": 4.6182398906677505e-06, "loss": 0.2954, "step": 8662 }, { "epoch": 0.40581814774909825, "grad_norm": 0.597708546277453, "learning_rate": 4.618139157387732e-06, "loss": 0.2935, "step": 8663 }, { "epoch": 0.40586499273902654, "grad_norm": 0.6379800919709265, "learning_rate": 4.618038411918356e-06, "loss": 0.3028, "step": 8664 }, { "epoch": 0.4059118377289549, "grad_norm": 0.5883855660662012, "learning_rate": 4.617937654260201e-06, "loss": 0.2779, "step": 8665 }, { "epoch": 0.4059586827188832, "grad_norm": 0.6036445838182213, "learning_rate": 4.617836884413846e-06, "loss": 0.2819, "step": 8666 }, { "epoch": 0.40600552770881154, "grad_norm": 0.6086687034889314, "learning_rate": 4.617736102379873e-06, "loss": 0.2937, "step": 8667 }, { "epoch": 0.4060523726987399, "grad_norm": 0.5710152566775081, "learning_rate": 4.61763530815886e-06, "loss": 0.2728, "step": 8668 }, { "epoch": 0.4060992176886682, "grad_norm": 0.6009013190409853, "learning_rate": 4.617534501751389e-06, "loss": 0.2824, "step": 8669 }, { "epoch": 0.40614606267859654, "grad_norm": 0.5630879140324582, "learning_rate": 4.617433683158039e-06, "loss": 0.2859, "step": 8670 }, { "epoch": 0.40619290766852484, "grad_norm": 0.7828476206215812, "learning_rate": 4.61733285237939e-06, "loss": 0.3098, "step": 8671 }, { "epoch": 0.4062397526584532, "grad_norm": 0.5798174625569532, "learning_rate": 4.617232009416024e-06, "loss": 0.2939, "step": 8672 }, { "epoch": 0.4062865976483815, "grad_norm": 0.5769100396593089, "learning_rate": 4.617131154268518e-06, "loss": 0.2937, "step": 8673 }, { "epoch": 0.40633344263830984, "grad_norm": 0.6316812100748027, "learning_rate": 4.617030286937455e-06, "loss": 0.3332, "step": 8674 }, { "epoch": 0.40638028762823813, "grad_norm": 0.5871353721180803, "learning_rate": 4.616929407423416e-06, "loss": 0.2882, "step": 8675 }, { "epoch": 0.4064271326181665, "grad_norm": 0.6284412508667113, "learning_rate": 4.6168285157269785e-06, "loss": 0.2903, "step": 8676 }, { "epoch": 0.40647397760809484, "grad_norm": 0.5863136416099259, "learning_rate": 4.616727611848726e-06, "loss": 0.2815, "step": 8677 }, { "epoch": 0.40652082259802313, "grad_norm": 0.6750663284792869, "learning_rate": 4.616626695789238e-06, "loss": 0.3012, "step": 8678 }, { "epoch": 0.4065676675879515, "grad_norm": 0.648334764839544, "learning_rate": 4.616525767549095e-06, "loss": 0.3046, "step": 8679 }, { "epoch": 0.4066145125778798, "grad_norm": 0.6002649752707524, "learning_rate": 4.616424827128878e-06, "loss": 0.3013, "step": 8680 }, { "epoch": 0.40666135756780813, "grad_norm": 0.5860059922881815, "learning_rate": 4.616323874529169e-06, "loss": 0.2701, "step": 8681 }, { "epoch": 0.40670820255773643, "grad_norm": 0.6160929412065946, "learning_rate": 4.616222909750547e-06, "loss": 0.2852, "step": 8682 }, { "epoch": 0.4067550475476648, "grad_norm": 0.5678739404753828, "learning_rate": 4.616121932793595e-06, "loss": 0.2641, "step": 8683 }, { "epoch": 0.4068018925375931, "grad_norm": 0.6010789286402058, "learning_rate": 4.616020943658892e-06, "loss": 0.2872, "step": 8684 }, { "epoch": 0.40684873752752143, "grad_norm": 0.5911122803676647, "learning_rate": 4.615919942347022e-06, "loss": 0.3039, "step": 8685 }, { "epoch": 0.4068955825174498, "grad_norm": 0.5856159746167968, "learning_rate": 4.615818928858563e-06, "loss": 0.2901, "step": 8686 }, { "epoch": 0.4069424275073781, "grad_norm": 0.6201680344436782, "learning_rate": 4.615717903194098e-06, "loss": 0.2886, "step": 8687 }, { "epoch": 0.40698927249730643, "grad_norm": 0.5885513929544374, "learning_rate": 4.615616865354209e-06, "loss": 0.2742, "step": 8688 }, { "epoch": 0.4070361174872347, "grad_norm": 0.6099949698784651, "learning_rate": 4.615515815339476e-06, "loss": 0.3035, "step": 8689 }, { "epoch": 0.4070829624771631, "grad_norm": 0.5979953738125338, "learning_rate": 4.615414753150482e-06, "loss": 0.2883, "step": 8690 }, { "epoch": 0.4071298074670914, "grad_norm": 0.5869884506287417, "learning_rate": 4.615313678787807e-06, "loss": 0.2806, "step": 8691 }, { "epoch": 0.4071766524570197, "grad_norm": 0.5977232684447903, "learning_rate": 4.615212592252034e-06, "loss": 0.3271, "step": 8692 }, { "epoch": 0.407223497446948, "grad_norm": 0.6033129252080153, "learning_rate": 4.615111493543744e-06, "loss": 0.3098, "step": 8693 }, { "epoch": 0.4072703424368764, "grad_norm": 0.7124140546078874, "learning_rate": 4.615010382663519e-06, "loss": 0.3035, "step": 8694 }, { "epoch": 0.4073171874268047, "grad_norm": 0.5911942959593482, "learning_rate": 4.61490925961194e-06, "loss": 0.2817, "step": 8695 }, { "epoch": 0.407364032416733, "grad_norm": 0.6578838165417463, "learning_rate": 4.614808124389591e-06, "loss": 0.3081, "step": 8696 }, { "epoch": 0.4074108774066614, "grad_norm": 0.6089854802776196, "learning_rate": 4.614706976997052e-06, "loss": 0.2987, "step": 8697 }, { "epoch": 0.40745772239658967, "grad_norm": 0.5482237216325192, "learning_rate": 4.614605817434907e-06, "loss": 0.273, "step": 8698 }, { "epoch": 0.407504567386518, "grad_norm": 0.6568619829469103, "learning_rate": 4.614504645703735e-06, "loss": 0.3014, "step": 8699 }, { "epoch": 0.4075514123764463, "grad_norm": 0.6115506009813719, "learning_rate": 4.614403461804121e-06, "loss": 0.2894, "step": 8700 }, { "epoch": 0.40759825736637467, "grad_norm": 0.6286544277227188, "learning_rate": 4.614302265736648e-06, "loss": 0.3076, "step": 8701 }, { "epoch": 0.40764510235630297, "grad_norm": 0.5775114478334397, "learning_rate": 4.614201057501895e-06, "loss": 0.2952, "step": 8702 }, { "epoch": 0.4076919473462313, "grad_norm": 0.6077176254923677, "learning_rate": 4.614099837100447e-06, "loss": 0.2828, "step": 8703 }, { "epoch": 0.40773879233615967, "grad_norm": 0.6155094205474976, "learning_rate": 4.613998604532885e-06, "loss": 0.2888, "step": 8704 }, { "epoch": 0.40778563732608797, "grad_norm": 0.6409560128990501, "learning_rate": 4.613897359799794e-06, "loss": 0.284, "step": 8705 }, { "epoch": 0.4078324823160163, "grad_norm": 0.5864176778066603, "learning_rate": 4.613796102901754e-06, "loss": 0.2993, "step": 8706 }, { "epoch": 0.4078793273059446, "grad_norm": 0.5541243500843176, "learning_rate": 4.613694833839349e-06, "loss": 0.2586, "step": 8707 }, { "epoch": 0.40792617229587297, "grad_norm": 0.6352780332227893, "learning_rate": 4.613593552613162e-06, "loss": 0.2976, "step": 8708 }, { "epoch": 0.40797301728580126, "grad_norm": 0.6987571027569817, "learning_rate": 4.613492259223774e-06, "loss": 0.286, "step": 8709 }, { "epoch": 0.4080198622757296, "grad_norm": 0.5734043454983905, "learning_rate": 4.61339095367177e-06, "loss": 0.2811, "step": 8710 }, { "epoch": 0.4080667072656579, "grad_norm": 0.6473029828616277, "learning_rate": 4.613289635957733e-06, "loss": 0.3084, "step": 8711 }, { "epoch": 0.40811355225558626, "grad_norm": 0.6632740984383084, "learning_rate": 4.613188306082243e-06, "loss": 0.2968, "step": 8712 }, { "epoch": 0.4081603972455146, "grad_norm": 0.6764585765498883, "learning_rate": 4.613086964045888e-06, "loss": 0.3036, "step": 8713 }, { "epoch": 0.4082072422354429, "grad_norm": 0.6261551388126996, "learning_rate": 4.6129856098492474e-06, "loss": 0.278, "step": 8714 }, { "epoch": 0.40825408722537126, "grad_norm": 0.6174921104130826, "learning_rate": 4.6128842434929054e-06, "loss": 0.2981, "step": 8715 }, { "epoch": 0.40830093221529956, "grad_norm": 0.6016703780659484, "learning_rate": 4.612782864977446e-06, "loss": 0.2808, "step": 8716 }, { "epoch": 0.4083477772052279, "grad_norm": 0.6482959862576428, "learning_rate": 4.612681474303453e-06, "loss": 0.3039, "step": 8717 }, { "epoch": 0.4083946221951562, "grad_norm": 0.5822968300251846, "learning_rate": 4.6125800714715084e-06, "loss": 0.2911, "step": 8718 }, { "epoch": 0.40844146718508456, "grad_norm": 0.5858734096847206, "learning_rate": 4.612478656482196e-06, "loss": 0.2846, "step": 8719 }, { "epoch": 0.40848831217501286, "grad_norm": 0.610824666857837, "learning_rate": 4.6123772293361005e-06, "loss": 0.2531, "step": 8720 }, { "epoch": 0.4085351571649412, "grad_norm": 0.6086265782484644, "learning_rate": 4.6122757900338054e-06, "loss": 0.2931, "step": 8721 }, { "epoch": 0.40858200215486956, "grad_norm": 0.6016427000737679, "learning_rate": 4.612174338575893e-06, "loss": 0.2885, "step": 8722 }, { "epoch": 0.40862884714479786, "grad_norm": 0.5675323785132387, "learning_rate": 4.612072874962949e-06, "loss": 0.2694, "step": 8723 }, { "epoch": 0.4086756921347262, "grad_norm": 0.6704311058190693, "learning_rate": 4.611971399195556e-06, "loss": 0.2968, "step": 8724 }, { "epoch": 0.4087225371246545, "grad_norm": 0.6027875262815126, "learning_rate": 4.6118699112742986e-06, "loss": 0.3059, "step": 8725 }, { "epoch": 0.40876938211458286, "grad_norm": 0.6232257312482397, "learning_rate": 4.61176841119976e-06, "loss": 0.2852, "step": 8726 }, { "epoch": 0.40881622710451115, "grad_norm": 0.605841325966805, "learning_rate": 4.611666898972526e-06, "loss": 0.2923, "step": 8727 }, { "epoch": 0.4088630720944395, "grad_norm": 0.6016132546975586, "learning_rate": 4.61156537459318e-06, "loss": 0.3029, "step": 8728 }, { "epoch": 0.4089099170843678, "grad_norm": 0.6603647467854514, "learning_rate": 4.611463838062305e-06, "loss": 0.302, "step": 8729 }, { "epoch": 0.40895676207429615, "grad_norm": 0.5729983474103172, "learning_rate": 4.611362289380487e-06, "loss": 0.2905, "step": 8730 }, { "epoch": 0.4090036070642245, "grad_norm": 0.6483397257721644, "learning_rate": 4.611260728548309e-06, "loss": 0.3142, "step": 8731 }, { "epoch": 0.4090504520541528, "grad_norm": 0.6113887063799347, "learning_rate": 4.611159155566356e-06, "loss": 0.2959, "step": 8732 }, { "epoch": 0.40909729704408115, "grad_norm": 0.6368592750187374, "learning_rate": 4.611057570435214e-06, "loss": 0.305, "step": 8733 }, { "epoch": 0.40914414203400945, "grad_norm": 0.5852951240798075, "learning_rate": 4.610955973155464e-06, "loss": 0.2736, "step": 8734 }, { "epoch": 0.4091909870239378, "grad_norm": 0.5616919563831995, "learning_rate": 4.610854363727694e-06, "loss": 0.2749, "step": 8735 }, { "epoch": 0.4092378320138661, "grad_norm": 0.5697504232763844, "learning_rate": 4.610752742152489e-06, "loss": 0.2579, "step": 8736 }, { "epoch": 0.40928467700379445, "grad_norm": 0.5741662948129758, "learning_rate": 4.6106511084304315e-06, "loss": 0.2774, "step": 8737 }, { "epoch": 0.40933152199372275, "grad_norm": 0.5574766935957883, "learning_rate": 4.610549462562107e-06, "loss": 0.2734, "step": 8738 }, { "epoch": 0.4093783669836511, "grad_norm": 0.5839772064501909, "learning_rate": 4.610447804548102e-06, "loss": 0.27, "step": 8739 }, { "epoch": 0.40942521197357945, "grad_norm": 0.655559374284203, "learning_rate": 4.6103461343889994e-06, "loss": 0.3117, "step": 8740 }, { "epoch": 0.40947205696350775, "grad_norm": 0.6477245487929768, "learning_rate": 4.610244452085385e-06, "loss": 0.3068, "step": 8741 }, { "epoch": 0.4095189019534361, "grad_norm": 0.629018942155701, "learning_rate": 4.610142757637845e-06, "loss": 0.3002, "step": 8742 }, { "epoch": 0.4095657469433644, "grad_norm": 0.6170915664296767, "learning_rate": 4.610041051046963e-06, "loss": 0.2977, "step": 8743 }, { "epoch": 0.40961259193329275, "grad_norm": 0.6343599175279094, "learning_rate": 4.609939332313325e-06, "loss": 0.2931, "step": 8744 }, { "epoch": 0.40965943692322104, "grad_norm": 0.6430207265570429, "learning_rate": 4.609837601437517e-06, "loss": 0.3092, "step": 8745 }, { "epoch": 0.4097062819131494, "grad_norm": 0.5786127939764387, "learning_rate": 4.609735858420124e-06, "loss": 0.2931, "step": 8746 }, { "epoch": 0.4097531269030777, "grad_norm": 0.5486500945886934, "learning_rate": 4.609634103261731e-06, "loss": 0.2744, "step": 8747 }, { "epoch": 0.40979997189300604, "grad_norm": 0.6166831406437296, "learning_rate": 4.609532335962924e-06, "loss": 0.2962, "step": 8748 }, { "epoch": 0.4098468168829344, "grad_norm": 0.6570769808795228, "learning_rate": 4.609430556524289e-06, "loss": 0.2945, "step": 8749 }, { "epoch": 0.4098936618728627, "grad_norm": 0.6779795118289781, "learning_rate": 4.609328764946411e-06, "loss": 0.2914, "step": 8750 }, { "epoch": 0.40994050686279104, "grad_norm": 0.5738659811578843, "learning_rate": 4.609226961229876e-06, "loss": 0.2945, "step": 8751 }, { "epoch": 0.40998735185271934, "grad_norm": 0.6075196568946366, "learning_rate": 4.609125145375271e-06, "loss": 0.2979, "step": 8752 }, { "epoch": 0.4100341968426477, "grad_norm": 0.6024115865460041, "learning_rate": 4.609023317383179e-06, "loss": 0.2891, "step": 8753 }, { "epoch": 0.410081041832576, "grad_norm": 0.5876865373108472, "learning_rate": 4.608921477254189e-06, "loss": 0.2875, "step": 8754 }, { "epoch": 0.41012788682250434, "grad_norm": 0.5887755629998075, "learning_rate": 4.608819624988886e-06, "loss": 0.276, "step": 8755 }, { "epoch": 0.41017473181243264, "grad_norm": 0.6017573409120713, "learning_rate": 4.608717760587856e-06, "loss": 0.283, "step": 8756 }, { "epoch": 0.410221576802361, "grad_norm": 0.5717545590036358, "learning_rate": 4.608615884051686e-06, "loss": 0.2832, "step": 8757 }, { "epoch": 0.41026842179228934, "grad_norm": 0.6010593569618756, "learning_rate": 4.60851399538096e-06, "loss": 0.2714, "step": 8758 }, { "epoch": 0.41031526678221764, "grad_norm": 0.5962415134388862, "learning_rate": 4.608412094576267e-06, "loss": 0.2941, "step": 8759 }, { "epoch": 0.410362111772146, "grad_norm": 0.6335470548204533, "learning_rate": 4.608310181638192e-06, "loss": 0.2828, "step": 8760 }, { "epoch": 0.4104089567620743, "grad_norm": 0.6032254378676172, "learning_rate": 4.608208256567322e-06, "loss": 0.2935, "step": 8761 }, { "epoch": 0.41045580175200264, "grad_norm": 0.5900454106788189, "learning_rate": 4.6081063193642425e-06, "loss": 0.3039, "step": 8762 }, { "epoch": 0.41050264674193093, "grad_norm": 0.5727159227680806, "learning_rate": 4.608004370029542e-06, "loss": 0.2821, "step": 8763 }, { "epoch": 0.4105494917318593, "grad_norm": 0.6623887186928964, "learning_rate": 4.607902408563806e-06, "loss": 0.3076, "step": 8764 }, { "epoch": 0.4105963367217876, "grad_norm": 0.6234190450232915, "learning_rate": 4.6078004349676215e-06, "loss": 0.2948, "step": 8765 }, { "epoch": 0.41064318171171593, "grad_norm": 0.5841698112576406, "learning_rate": 4.607698449241575e-06, "loss": 0.2679, "step": 8766 }, { "epoch": 0.4106900267016443, "grad_norm": 0.5901953880621695, "learning_rate": 4.6075964513862535e-06, "loss": 0.2951, "step": 8767 }, { "epoch": 0.4107368716915726, "grad_norm": 0.5926626556977563, "learning_rate": 4.607494441402245e-06, "loss": 0.2911, "step": 8768 }, { "epoch": 0.41078371668150093, "grad_norm": 0.6061419606292714, "learning_rate": 4.607392419290135e-06, "loss": 0.2818, "step": 8769 }, { "epoch": 0.41083056167142923, "grad_norm": 0.6497408329153112, "learning_rate": 4.607290385050511e-06, "loss": 0.295, "step": 8770 }, { "epoch": 0.4108774066613576, "grad_norm": 0.6287288902901734, "learning_rate": 4.607188338683961e-06, "loss": 0.3214, "step": 8771 }, { "epoch": 0.4109242516512859, "grad_norm": 0.6204227377561234, "learning_rate": 4.607086280191072e-06, "loss": 0.2755, "step": 8772 }, { "epoch": 0.41097109664121423, "grad_norm": 0.5993568441859738, "learning_rate": 4.606984209572431e-06, "loss": 0.2994, "step": 8773 }, { "epoch": 0.4110179416311425, "grad_norm": 0.5636492415779619, "learning_rate": 4.606882126828625e-06, "loss": 0.285, "step": 8774 }, { "epoch": 0.4110647866210709, "grad_norm": 0.5958139920889239, "learning_rate": 4.606780031960242e-06, "loss": 0.2683, "step": 8775 }, { "epoch": 0.41111163161099923, "grad_norm": 0.6463583994807994, "learning_rate": 4.606677924967869e-06, "loss": 0.3187, "step": 8776 }, { "epoch": 0.4111584766009275, "grad_norm": 0.6592335358491249, "learning_rate": 4.606575805852095e-06, "loss": 0.3066, "step": 8777 }, { "epoch": 0.4112053215908559, "grad_norm": 0.5986463475225497, "learning_rate": 4.606473674613507e-06, "loss": 0.2759, "step": 8778 }, { "epoch": 0.4112521665807842, "grad_norm": 0.5603771565428856, "learning_rate": 4.60637153125269e-06, "loss": 0.2846, "step": 8779 }, { "epoch": 0.4112990115707125, "grad_norm": 0.5643772396606281, "learning_rate": 4.606269375770237e-06, "loss": 0.2915, "step": 8780 }, { "epoch": 0.4113458565606408, "grad_norm": 0.6000580637776152, "learning_rate": 4.606167208166732e-06, "loss": 0.3071, "step": 8781 }, { "epoch": 0.4113927015505692, "grad_norm": 0.6187873036754209, "learning_rate": 4.606065028442764e-06, "loss": 0.2995, "step": 8782 }, { "epoch": 0.41143954654049747, "grad_norm": 0.568539273533983, "learning_rate": 4.605962836598921e-06, "loss": 0.2948, "step": 8783 }, { "epoch": 0.4114863915304258, "grad_norm": 0.6435394813952348, "learning_rate": 4.605860632635791e-06, "loss": 0.2953, "step": 8784 }, { "epoch": 0.4115332365203542, "grad_norm": 0.5972458318369681, "learning_rate": 4.605758416553963e-06, "loss": 0.2884, "step": 8785 }, { "epoch": 0.41158008151028247, "grad_norm": 0.6711625037971767, "learning_rate": 4.605656188354025e-06, "loss": 0.3119, "step": 8786 }, { "epoch": 0.4116269265002108, "grad_norm": 0.6091356283855369, "learning_rate": 4.605553948036564e-06, "loss": 0.2849, "step": 8787 }, { "epoch": 0.4116737714901391, "grad_norm": 0.6057388048758423, "learning_rate": 4.605451695602169e-06, "loss": 0.2707, "step": 8788 }, { "epoch": 0.41172061648006747, "grad_norm": 0.5330382463328583, "learning_rate": 4.605349431051429e-06, "loss": 0.2726, "step": 8789 }, { "epoch": 0.41176746146999577, "grad_norm": 0.5761556881965529, "learning_rate": 4.605247154384933e-06, "loss": 0.2743, "step": 8790 }, { "epoch": 0.4118143064599241, "grad_norm": 0.5758117378836316, "learning_rate": 4.605144865603268e-06, "loss": 0.258, "step": 8791 }, { "epoch": 0.4118611514498524, "grad_norm": 0.6617273057500944, "learning_rate": 4.605042564707023e-06, "loss": 0.3059, "step": 8792 }, { "epoch": 0.41190799643978077, "grad_norm": 0.535744972122774, "learning_rate": 4.604940251696788e-06, "loss": 0.2577, "step": 8793 }, { "epoch": 0.4119548414297091, "grad_norm": 0.6089192242665002, "learning_rate": 4.60483792657315e-06, "loss": 0.3083, "step": 8794 }, { "epoch": 0.4120016864196374, "grad_norm": 0.6081422064745111, "learning_rate": 4.604735589336699e-06, "loss": 0.2993, "step": 8795 }, { "epoch": 0.41204853140956577, "grad_norm": 0.5764958034473602, "learning_rate": 4.604633239988025e-06, "loss": 0.2778, "step": 8796 }, { "epoch": 0.41209537639949406, "grad_norm": 0.6428706023534821, "learning_rate": 4.604530878527714e-06, "loss": 0.2987, "step": 8797 }, { "epoch": 0.4121422213894224, "grad_norm": 0.5899739805935983, "learning_rate": 4.604428504956357e-06, "loss": 0.2918, "step": 8798 }, { "epoch": 0.4121890663793507, "grad_norm": 0.5682196867872318, "learning_rate": 4.604326119274544e-06, "loss": 0.2908, "step": 8799 }, { "epoch": 0.41223591136927906, "grad_norm": 0.6463863787327956, "learning_rate": 4.604223721482862e-06, "loss": 0.3046, "step": 8800 }, { "epoch": 0.41228275635920736, "grad_norm": 0.6288753852965446, "learning_rate": 4.604121311581902e-06, "loss": 0.3121, "step": 8801 }, { "epoch": 0.4123296013491357, "grad_norm": 0.631540125837496, "learning_rate": 4.604018889572253e-06, "loss": 0.3111, "step": 8802 }, { "epoch": 0.41237644633906406, "grad_norm": 0.591927642998439, "learning_rate": 4.603916455454504e-06, "loss": 0.2887, "step": 8803 }, { "epoch": 0.41242329132899236, "grad_norm": 0.6250935017717906, "learning_rate": 4.603814009229243e-06, "loss": 0.2714, "step": 8804 }, { "epoch": 0.4124701363189207, "grad_norm": 0.5944279040639083, "learning_rate": 4.603711550897062e-06, "loss": 0.2793, "step": 8805 }, { "epoch": 0.412516981308849, "grad_norm": 0.621144039970112, "learning_rate": 4.603609080458551e-06, "loss": 0.3084, "step": 8806 }, { "epoch": 0.41256382629877736, "grad_norm": 0.5607852402210868, "learning_rate": 4.603506597914297e-06, "loss": 0.2932, "step": 8807 }, { "epoch": 0.41261067128870565, "grad_norm": 0.6092239810391066, "learning_rate": 4.603404103264892e-06, "loss": 0.3008, "step": 8808 }, { "epoch": 0.412657516278634, "grad_norm": 0.6404144842602949, "learning_rate": 4.603301596510924e-06, "loss": 0.2874, "step": 8809 }, { "epoch": 0.4127043612685623, "grad_norm": 0.5686193248871748, "learning_rate": 4.6031990776529855e-06, "loss": 0.3039, "step": 8810 }, { "epoch": 0.41275120625849065, "grad_norm": 0.6563422215225814, "learning_rate": 4.603096546691664e-06, "loss": 0.3128, "step": 8811 }, { "epoch": 0.412798051248419, "grad_norm": 0.5574392561590708, "learning_rate": 4.602994003627551e-06, "loss": 0.286, "step": 8812 }, { "epoch": 0.4128448962383473, "grad_norm": 0.5518941168185385, "learning_rate": 4.602891448461236e-06, "loss": 0.2689, "step": 8813 }, { "epoch": 0.41289174122827565, "grad_norm": 0.6059601478511546, "learning_rate": 4.602788881193308e-06, "loss": 0.292, "step": 8814 }, { "epoch": 0.41293858621820395, "grad_norm": 0.6392778381298259, "learning_rate": 4.602686301824361e-06, "loss": 0.3113, "step": 8815 }, { "epoch": 0.4129854312081323, "grad_norm": 0.617144665957547, "learning_rate": 4.602583710354981e-06, "loss": 0.266, "step": 8816 }, { "epoch": 0.4130322761980606, "grad_norm": 0.6062360669971044, "learning_rate": 4.60248110678576e-06, "loss": 0.2818, "step": 8817 }, { "epoch": 0.41307912118798895, "grad_norm": 0.623118479872317, "learning_rate": 4.602378491117289e-06, "loss": 0.3175, "step": 8818 }, { "epoch": 0.41312596617791725, "grad_norm": 0.5836987114015386, "learning_rate": 4.6022758633501585e-06, "loss": 0.2971, "step": 8819 }, { "epoch": 0.4131728111678456, "grad_norm": 0.5912525754852438, "learning_rate": 4.602173223484959e-06, "loss": 0.2934, "step": 8820 }, { "epoch": 0.41321965615777395, "grad_norm": 0.6412196738465701, "learning_rate": 4.60207057152228e-06, "loss": 0.2942, "step": 8821 }, { "epoch": 0.41326650114770225, "grad_norm": 0.6359985483944358, "learning_rate": 4.6019679074627145e-06, "loss": 0.2977, "step": 8822 }, { "epoch": 0.4133133461376306, "grad_norm": 0.578061454013568, "learning_rate": 4.601865231306851e-06, "loss": 0.2875, "step": 8823 }, { "epoch": 0.4133601911275589, "grad_norm": 0.6127392758145647, "learning_rate": 4.601762543055282e-06, "loss": 0.285, "step": 8824 }, { "epoch": 0.41340703611748725, "grad_norm": 0.6203319161532945, "learning_rate": 4.601659842708598e-06, "loss": 0.294, "step": 8825 }, { "epoch": 0.41345388110741554, "grad_norm": 0.6437173337129869, "learning_rate": 4.601557130267389e-06, "loss": 0.3129, "step": 8826 }, { "epoch": 0.4135007260973439, "grad_norm": 0.6634639880076189, "learning_rate": 4.601454405732248e-06, "loss": 0.2959, "step": 8827 }, { "epoch": 0.4135475710872722, "grad_norm": 0.5996644894543111, "learning_rate": 4.601351669103764e-06, "loss": 0.2745, "step": 8828 }, { "epoch": 0.41359441607720054, "grad_norm": 0.6444110020585889, "learning_rate": 4.601248920382529e-06, "loss": 0.2984, "step": 8829 }, { "epoch": 0.4136412610671289, "grad_norm": 0.5834606584909904, "learning_rate": 4.601146159569135e-06, "loss": 0.2871, "step": 8830 }, { "epoch": 0.4136881060570572, "grad_norm": 0.604622915302667, "learning_rate": 4.601043386664174e-06, "loss": 0.2825, "step": 8831 }, { "epoch": 0.41373495104698554, "grad_norm": 0.5751427086734134, "learning_rate": 4.600940601668236e-06, "loss": 0.2711, "step": 8832 }, { "epoch": 0.41378179603691384, "grad_norm": 0.6157190009542108, "learning_rate": 4.600837804581912e-06, "loss": 0.2816, "step": 8833 }, { "epoch": 0.4138286410268422, "grad_norm": 0.6164873470099238, "learning_rate": 4.600734995405795e-06, "loss": 0.2887, "step": 8834 }, { "epoch": 0.4138754860167705, "grad_norm": 0.6541549477345983, "learning_rate": 4.600632174140476e-06, "loss": 0.3017, "step": 8835 }, { "epoch": 0.41392233100669884, "grad_norm": 0.606790561828752, "learning_rate": 4.600529340786547e-06, "loss": 0.2832, "step": 8836 }, { "epoch": 0.41396917599662714, "grad_norm": 0.5859863405360527, "learning_rate": 4.600426495344599e-06, "loss": 0.2792, "step": 8837 }, { "epoch": 0.4140160209865555, "grad_norm": 0.6445255725693138, "learning_rate": 4.600323637815226e-06, "loss": 0.2888, "step": 8838 }, { "epoch": 0.41406286597648384, "grad_norm": 0.5926742669286823, "learning_rate": 4.600220768199017e-06, "loss": 0.2824, "step": 8839 }, { "epoch": 0.41410971096641214, "grad_norm": 0.5940479892037973, "learning_rate": 4.600117886496565e-06, "loss": 0.2911, "step": 8840 }, { "epoch": 0.4141565559563405, "grad_norm": 0.6261881487271513, "learning_rate": 4.600014992708464e-06, "loss": 0.3213, "step": 8841 }, { "epoch": 0.4142034009462688, "grad_norm": 0.6298237506464963, "learning_rate": 4.599912086835303e-06, "loss": 0.2867, "step": 8842 }, { "epoch": 0.41425024593619714, "grad_norm": 0.5723704311237036, "learning_rate": 4.599809168877676e-06, "loss": 0.2689, "step": 8843 }, { "epoch": 0.41429709092612543, "grad_norm": 0.6282835268149312, "learning_rate": 4.599706238836176e-06, "loss": 0.2847, "step": 8844 }, { "epoch": 0.4143439359160538, "grad_norm": 0.6270941751102574, "learning_rate": 4.599603296711393e-06, "loss": 0.2879, "step": 8845 }, { "epoch": 0.4143907809059821, "grad_norm": 0.6193160657063644, "learning_rate": 4.599500342503922e-06, "loss": 0.2877, "step": 8846 }, { "epoch": 0.41443762589591043, "grad_norm": 0.6266805970290261, "learning_rate": 4.599397376214353e-06, "loss": 0.2933, "step": 8847 }, { "epoch": 0.4144844708858388, "grad_norm": 0.6596164832222476, "learning_rate": 4.599294397843281e-06, "loss": 0.3064, "step": 8848 }, { "epoch": 0.4145313158757671, "grad_norm": 0.6348743076864957, "learning_rate": 4.599191407391296e-06, "loss": 0.2853, "step": 8849 }, { "epoch": 0.41457816086569543, "grad_norm": 0.6122612572424079, "learning_rate": 4.5990884048589935e-06, "loss": 0.3074, "step": 8850 }, { "epoch": 0.41462500585562373, "grad_norm": 0.5813923466817439, "learning_rate": 4.5989853902469635e-06, "loss": 0.28, "step": 8851 }, { "epoch": 0.4146718508455521, "grad_norm": 0.5687330190671989, "learning_rate": 4.598882363555801e-06, "loss": 0.2717, "step": 8852 }, { "epoch": 0.4147186958354804, "grad_norm": 0.6298692310200782, "learning_rate": 4.598779324786098e-06, "loss": 0.3158, "step": 8853 }, { "epoch": 0.41476554082540873, "grad_norm": 0.6293803951353678, "learning_rate": 4.598676273938447e-06, "loss": 0.2873, "step": 8854 }, { "epoch": 0.414812385815337, "grad_norm": 0.5877304746618169, "learning_rate": 4.598573211013441e-06, "loss": 0.284, "step": 8855 }, { "epoch": 0.4148592308052654, "grad_norm": 0.6090768748539188, "learning_rate": 4.598470136011676e-06, "loss": 0.2888, "step": 8856 }, { "epoch": 0.41490607579519373, "grad_norm": 0.6373405625557448, "learning_rate": 4.598367048933741e-06, "loss": 0.329, "step": 8857 }, { "epoch": 0.414952920785122, "grad_norm": 0.6237825017909133, "learning_rate": 4.598263949780231e-06, "loss": 0.2896, "step": 8858 }, { "epoch": 0.4149997657750504, "grad_norm": 0.5898458739310094, "learning_rate": 4.598160838551739e-06, "loss": 0.2904, "step": 8859 }, { "epoch": 0.4150466107649787, "grad_norm": 0.6197624100557818, "learning_rate": 4.59805771524886e-06, "loss": 0.3084, "step": 8860 }, { "epoch": 0.415093455754907, "grad_norm": 0.6325903020068809, "learning_rate": 4.597954579872186e-06, "loss": 0.3004, "step": 8861 }, { "epoch": 0.4151403007448353, "grad_norm": 0.5401233464887465, "learning_rate": 4.59785143242231e-06, "loss": 0.2615, "step": 8862 }, { "epoch": 0.4151871457347637, "grad_norm": 0.6326154464917867, "learning_rate": 4.597748272899827e-06, "loss": 0.2868, "step": 8863 }, { "epoch": 0.41523399072469197, "grad_norm": 0.5990555137969267, "learning_rate": 4.597645101305329e-06, "loss": 0.2851, "step": 8864 }, { "epoch": 0.4152808357146203, "grad_norm": 0.5759044520815833, "learning_rate": 4.597541917639411e-06, "loss": 0.2629, "step": 8865 }, { "epoch": 0.4153276807045487, "grad_norm": 0.659900482181257, "learning_rate": 4.5974387219026665e-06, "loss": 0.2825, "step": 8866 }, { "epoch": 0.41537452569447697, "grad_norm": 0.6165295250171617, "learning_rate": 4.597335514095689e-06, "loss": 0.2793, "step": 8867 }, { "epoch": 0.4154213706844053, "grad_norm": 0.5808019729398056, "learning_rate": 4.597232294219074e-06, "loss": 0.302, "step": 8868 }, { "epoch": 0.4154682156743336, "grad_norm": 0.5825867875171319, "learning_rate": 4.597129062273413e-06, "loss": 0.2857, "step": 8869 }, { "epoch": 0.41551506066426197, "grad_norm": 0.615557111076171, "learning_rate": 4.597025818259302e-06, "loss": 0.2984, "step": 8870 }, { "epoch": 0.41556190565419027, "grad_norm": 0.5733383009158812, "learning_rate": 4.596922562177334e-06, "loss": 0.2886, "step": 8871 }, { "epoch": 0.4156087506441186, "grad_norm": 0.5871832206430398, "learning_rate": 4.596819294028103e-06, "loss": 0.2892, "step": 8872 }, { "epoch": 0.4156555956340469, "grad_norm": 0.6707156027500023, "learning_rate": 4.596716013812205e-06, "loss": 0.2876, "step": 8873 }, { "epoch": 0.41570244062397527, "grad_norm": 0.621869121761059, "learning_rate": 4.596612721530234e-06, "loss": 0.2935, "step": 8874 }, { "epoch": 0.4157492856139036, "grad_norm": 0.671984815770959, "learning_rate": 4.596509417182783e-06, "loss": 0.3035, "step": 8875 }, { "epoch": 0.4157961306038319, "grad_norm": 0.592453267879297, "learning_rate": 4.596406100770448e-06, "loss": 0.2908, "step": 8876 }, { "epoch": 0.41584297559376027, "grad_norm": 0.5700702890737865, "learning_rate": 4.596302772293821e-06, "loss": 0.288, "step": 8877 }, { "epoch": 0.41588982058368856, "grad_norm": 0.5513382635463704, "learning_rate": 4.5961994317535e-06, "loss": 0.2724, "step": 8878 }, { "epoch": 0.4159366655736169, "grad_norm": 0.5546844171719169, "learning_rate": 4.596096079150077e-06, "loss": 0.278, "step": 8879 }, { "epoch": 0.4159835105635452, "grad_norm": 0.6652694121186157, "learning_rate": 4.595992714484149e-06, "loss": 0.3077, "step": 8880 }, { "epoch": 0.41603035555347356, "grad_norm": 0.5580558278217757, "learning_rate": 4.59588933775631e-06, "loss": 0.2914, "step": 8881 }, { "epoch": 0.41607720054340186, "grad_norm": 0.6209547821343978, "learning_rate": 4.595785948967153e-06, "loss": 0.2992, "step": 8882 }, { "epoch": 0.4161240455333302, "grad_norm": 0.6378582294185541, "learning_rate": 4.5956825481172765e-06, "loss": 0.2816, "step": 8883 }, { "epoch": 0.41617089052325856, "grad_norm": 0.5763411853709415, "learning_rate": 4.595579135207272e-06, "loss": 0.2696, "step": 8884 }, { "epoch": 0.41621773551318686, "grad_norm": 0.5900480048866749, "learning_rate": 4.595475710237736e-06, "loss": 0.2737, "step": 8885 }, { "epoch": 0.4162645805031152, "grad_norm": 0.5953284748570105, "learning_rate": 4.595372273209265e-06, "loss": 0.2945, "step": 8886 }, { "epoch": 0.4163114254930435, "grad_norm": 0.5931501488428844, "learning_rate": 4.595268824122454e-06, "loss": 0.297, "step": 8887 }, { "epoch": 0.41635827048297186, "grad_norm": 0.6205126541661667, "learning_rate": 4.595165362977897e-06, "loss": 0.3061, "step": 8888 }, { "epoch": 0.41640511547290016, "grad_norm": 0.5769282545218178, "learning_rate": 4.595061889776189e-06, "loss": 0.2723, "step": 8889 }, { "epoch": 0.4164519604628285, "grad_norm": 0.6382377783710449, "learning_rate": 4.594958404517927e-06, "loss": 0.2977, "step": 8890 }, { "epoch": 0.4164988054527568, "grad_norm": 0.5707753275016447, "learning_rate": 4.594854907203706e-06, "loss": 0.2895, "step": 8891 }, { "epoch": 0.41654565044268516, "grad_norm": 0.5949219530110861, "learning_rate": 4.594751397834122e-06, "loss": 0.2769, "step": 8892 }, { "epoch": 0.4165924954326135, "grad_norm": 0.6356226136996511, "learning_rate": 4.594647876409769e-06, "loss": 0.2873, "step": 8893 }, { "epoch": 0.4166393404225418, "grad_norm": 0.6556403868791764, "learning_rate": 4.594544342931245e-06, "loss": 0.2773, "step": 8894 }, { "epoch": 0.41668618541247016, "grad_norm": 0.5788126573435223, "learning_rate": 4.594440797399145e-06, "loss": 0.2713, "step": 8895 }, { "epoch": 0.41673303040239845, "grad_norm": 0.62925013354837, "learning_rate": 4.594337239814063e-06, "loss": 0.2821, "step": 8896 }, { "epoch": 0.4167798753923268, "grad_norm": 0.5762757765123517, "learning_rate": 4.594233670176597e-06, "loss": 0.2644, "step": 8897 }, { "epoch": 0.4168267203822551, "grad_norm": 0.6298928077347891, "learning_rate": 4.594130088487344e-06, "loss": 0.3096, "step": 8898 }, { "epoch": 0.41687356537218345, "grad_norm": 0.6088996775924495, "learning_rate": 4.5940264947468986e-06, "loss": 0.3035, "step": 8899 }, { "epoch": 0.41692041036211175, "grad_norm": 0.650640900070595, "learning_rate": 4.593922888955856e-06, "loss": 0.2798, "step": 8900 }, { "epoch": 0.4169672553520401, "grad_norm": 0.605056482219107, "learning_rate": 4.593819271114814e-06, "loss": 0.2825, "step": 8901 }, { "epoch": 0.41701410034196845, "grad_norm": 0.5905013938458983, "learning_rate": 4.593715641224368e-06, "loss": 0.3057, "step": 8902 }, { "epoch": 0.41706094533189675, "grad_norm": 0.5858275627827837, "learning_rate": 4.593611999285115e-06, "loss": 0.2729, "step": 8903 }, { "epoch": 0.4171077903218251, "grad_norm": 0.5825002645168708, "learning_rate": 4.59350834529765e-06, "loss": 0.3097, "step": 8904 }, { "epoch": 0.4171546353117534, "grad_norm": 0.5327796517245014, "learning_rate": 4.593404679262572e-06, "loss": 0.2964, "step": 8905 }, { "epoch": 0.41720148030168175, "grad_norm": 0.6001376794497125, "learning_rate": 4.593301001180476e-06, "loss": 0.3055, "step": 8906 }, { "epoch": 0.41724832529161004, "grad_norm": 0.6285910837797577, "learning_rate": 4.593197311051959e-06, "loss": 0.2868, "step": 8907 }, { "epoch": 0.4172951702815384, "grad_norm": 0.6005220915351154, "learning_rate": 4.5930936088776166e-06, "loss": 0.2927, "step": 8908 }, { "epoch": 0.4173420152714667, "grad_norm": 0.6188338453853557, "learning_rate": 4.592989894658048e-06, "loss": 0.3052, "step": 8909 }, { "epoch": 0.41738886026139504, "grad_norm": 0.5864431664998095, "learning_rate": 4.592886168393848e-06, "loss": 0.2763, "step": 8910 }, { "epoch": 0.4174357052513234, "grad_norm": 0.59093682419857, "learning_rate": 4.592782430085614e-06, "loss": 0.2949, "step": 8911 }, { "epoch": 0.4174825502412517, "grad_norm": 0.5625699109448082, "learning_rate": 4.5926786797339425e-06, "loss": 0.2683, "step": 8912 }, { "epoch": 0.41752939523118004, "grad_norm": 0.6693132935268062, "learning_rate": 4.592574917339432e-06, "loss": 0.3032, "step": 8913 }, { "epoch": 0.41757624022110834, "grad_norm": 0.670530529015677, "learning_rate": 4.592471142902679e-06, "loss": 0.2972, "step": 8914 }, { "epoch": 0.4176230852110367, "grad_norm": 0.5625816466990538, "learning_rate": 4.59236735642428e-06, "loss": 0.2858, "step": 8915 }, { "epoch": 0.417669930200965, "grad_norm": 0.5676061442163095, "learning_rate": 4.592263557904833e-06, "loss": 0.2771, "step": 8916 }, { "epoch": 0.41771677519089334, "grad_norm": 0.5805303602307419, "learning_rate": 4.592159747344935e-06, "loss": 0.3021, "step": 8917 }, { "epoch": 0.41776362018082164, "grad_norm": 0.5818573910609278, "learning_rate": 4.592055924745183e-06, "loss": 0.2881, "step": 8918 }, { "epoch": 0.41781046517075, "grad_norm": 0.583624582567928, "learning_rate": 4.5919520901061755e-06, "loss": 0.2702, "step": 8919 }, { "epoch": 0.41785731016067834, "grad_norm": 0.5740943620059696, "learning_rate": 4.591848243428509e-06, "loss": 0.2802, "step": 8920 }, { "epoch": 0.41790415515060664, "grad_norm": 0.6548996481806261, "learning_rate": 4.591744384712783e-06, "loss": 0.2834, "step": 8921 }, { "epoch": 0.417951000140535, "grad_norm": 0.5907966078839836, "learning_rate": 4.591640513959592e-06, "loss": 0.2742, "step": 8922 }, { "epoch": 0.4179978451304633, "grad_norm": 0.6271581180193766, "learning_rate": 4.591536631169537e-06, "loss": 0.3081, "step": 8923 }, { "epoch": 0.41804469012039164, "grad_norm": 0.6263178826043996, "learning_rate": 4.5914327363432135e-06, "loss": 0.3059, "step": 8924 }, { "epoch": 0.41809153511031993, "grad_norm": 0.6250314881951518, "learning_rate": 4.5913288294812204e-06, "loss": 0.3025, "step": 8925 }, { "epoch": 0.4181383801002483, "grad_norm": 0.6477505327844706, "learning_rate": 4.591224910584156e-06, "loss": 0.3264, "step": 8926 }, { "epoch": 0.4181852250901766, "grad_norm": 0.6111727216919538, "learning_rate": 4.591120979652618e-06, "loss": 0.3018, "step": 8927 }, { "epoch": 0.41823207008010493, "grad_norm": 0.6221548652086919, "learning_rate": 4.591017036687203e-06, "loss": 0.2956, "step": 8928 }, { "epoch": 0.4182789150700333, "grad_norm": 0.6321144715748002, "learning_rate": 4.5909130816885114e-06, "loss": 0.2998, "step": 8929 }, { "epoch": 0.4183257600599616, "grad_norm": 0.6466967750784809, "learning_rate": 4.590809114657141e-06, "loss": 0.2855, "step": 8930 }, { "epoch": 0.41837260504988993, "grad_norm": 0.6399230228213202, "learning_rate": 4.590705135593689e-06, "loss": 0.2989, "step": 8931 }, { "epoch": 0.41841945003981823, "grad_norm": 0.6128635322034025, "learning_rate": 4.5906011444987555e-06, "loss": 0.2767, "step": 8932 }, { "epoch": 0.4184662950297466, "grad_norm": 0.6137885701672573, "learning_rate": 4.590497141372936e-06, "loss": 0.2883, "step": 8933 }, { "epoch": 0.4185131400196749, "grad_norm": 0.6114442325975394, "learning_rate": 4.590393126216832e-06, "loss": 0.2851, "step": 8934 }, { "epoch": 0.41855998500960323, "grad_norm": 0.6155662710717387, "learning_rate": 4.5902890990310415e-06, "loss": 0.2699, "step": 8935 }, { "epoch": 0.4186068299995315, "grad_norm": 0.5647580375400837, "learning_rate": 4.590185059816161e-06, "loss": 0.2866, "step": 8936 }, { "epoch": 0.4186536749894599, "grad_norm": 0.6782122677108805, "learning_rate": 4.590081008572792e-06, "loss": 0.3031, "step": 8937 }, { "epoch": 0.41870051997938823, "grad_norm": 0.5783485902181256, "learning_rate": 4.589976945301532e-06, "loss": 0.2997, "step": 8938 }, { "epoch": 0.4187473649693165, "grad_norm": 0.635423976111719, "learning_rate": 4.589872870002981e-06, "loss": 0.3112, "step": 8939 }, { "epoch": 0.4187942099592449, "grad_norm": 0.6198998806334087, "learning_rate": 4.589768782677735e-06, "loss": 0.2952, "step": 8940 }, { "epoch": 0.4188410549491732, "grad_norm": 0.5849078966665331, "learning_rate": 4.589664683326396e-06, "loss": 0.2599, "step": 8941 }, { "epoch": 0.4188878999391015, "grad_norm": 0.5596497018795142, "learning_rate": 4.589560571949561e-06, "loss": 0.2846, "step": 8942 }, { "epoch": 0.4189347449290298, "grad_norm": 0.6600412006153294, "learning_rate": 4.589456448547831e-06, "loss": 0.3007, "step": 8943 }, { "epoch": 0.4189815899189582, "grad_norm": 0.6330668288109949, "learning_rate": 4.589352313121804e-06, "loss": 0.2962, "step": 8944 }, { "epoch": 0.41902843490888647, "grad_norm": 0.5965498299129439, "learning_rate": 4.5892481656720785e-06, "loss": 0.2813, "step": 8945 }, { "epoch": 0.4190752798988148, "grad_norm": 0.5051064265830665, "learning_rate": 4.5891440061992565e-06, "loss": 0.2413, "step": 8946 }, { "epoch": 0.4191221248887432, "grad_norm": 0.5892095270387068, "learning_rate": 4.589039834703936e-06, "loss": 0.3118, "step": 8947 }, { "epoch": 0.41916896987867147, "grad_norm": 0.5805869088374296, "learning_rate": 4.588935651186716e-06, "loss": 0.2617, "step": 8948 }, { "epoch": 0.4192158148685998, "grad_norm": 0.6084503182827439, "learning_rate": 4.588831455648195e-06, "loss": 0.3009, "step": 8949 }, { "epoch": 0.4192626598585281, "grad_norm": 0.5456243513556537, "learning_rate": 4.588727248088975e-06, "loss": 0.2578, "step": 8950 }, { "epoch": 0.41930950484845647, "grad_norm": 0.5760195701168013, "learning_rate": 4.588623028509655e-06, "loss": 0.2962, "step": 8951 }, { "epoch": 0.41935634983838477, "grad_norm": 0.5742063873615674, "learning_rate": 4.588518796910834e-06, "loss": 0.2987, "step": 8952 }, { "epoch": 0.4194031948283131, "grad_norm": 0.5459089690038788, "learning_rate": 4.588414553293112e-06, "loss": 0.2812, "step": 8953 }, { "epoch": 0.4194500398182414, "grad_norm": 0.6025602208551842, "learning_rate": 4.58831029765709e-06, "loss": 0.2804, "step": 8954 }, { "epoch": 0.41949688480816977, "grad_norm": 0.651815807753746, "learning_rate": 4.588206030003367e-06, "loss": 0.2992, "step": 8955 }, { "epoch": 0.4195437297980981, "grad_norm": 0.5814205607627231, "learning_rate": 4.5881017503325425e-06, "loss": 0.2887, "step": 8956 }, { "epoch": 0.4195905747880264, "grad_norm": 0.6589823868209643, "learning_rate": 4.587997458645218e-06, "loss": 0.3051, "step": 8957 }, { "epoch": 0.41963741977795477, "grad_norm": 0.6253023784805336, "learning_rate": 4.587893154941992e-06, "loss": 0.2918, "step": 8958 }, { "epoch": 0.41968426476788306, "grad_norm": 0.576869327554698, "learning_rate": 4.587788839223466e-06, "loss": 0.2836, "step": 8959 }, { "epoch": 0.4197311097578114, "grad_norm": 0.6943372532302392, "learning_rate": 4.58768451149024e-06, "loss": 0.3077, "step": 8960 }, { "epoch": 0.4197779547477397, "grad_norm": 0.5818127232310272, "learning_rate": 4.587580171742915e-06, "loss": 0.271, "step": 8961 }, { "epoch": 0.41982479973766806, "grad_norm": 0.5689520922873543, "learning_rate": 4.58747581998209e-06, "loss": 0.2716, "step": 8962 }, { "epoch": 0.41987164472759636, "grad_norm": 0.6150870918485615, "learning_rate": 4.587371456208366e-06, "loss": 0.2973, "step": 8963 }, { "epoch": 0.4199184897175247, "grad_norm": 0.6086884546188855, "learning_rate": 4.587267080422345e-06, "loss": 0.3155, "step": 8964 }, { "epoch": 0.41996533470745306, "grad_norm": 0.5720154404486028, "learning_rate": 4.587162692624626e-06, "loss": 0.2818, "step": 8965 }, { "epoch": 0.42001217969738136, "grad_norm": 0.6182308619534707, "learning_rate": 4.58705829281581e-06, "loss": 0.2898, "step": 8966 }, { "epoch": 0.4200590246873097, "grad_norm": 0.5971152441808417, "learning_rate": 4.5869538809964986e-06, "loss": 0.2687, "step": 8967 }, { "epoch": 0.420105869677238, "grad_norm": 0.6476891551813444, "learning_rate": 4.5868494571672916e-06, "loss": 0.3117, "step": 8968 }, { "epoch": 0.42015271466716636, "grad_norm": 0.6048751507269783, "learning_rate": 4.586745021328791e-06, "loss": 0.2972, "step": 8969 }, { "epoch": 0.42019955965709466, "grad_norm": 0.5957023448467401, "learning_rate": 4.5866405734815975e-06, "loss": 0.2999, "step": 8970 }, { "epoch": 0.420246404647023, "grad_norm": 0.5882290588022151, "learning_rate": 4.586536113626312e-06, "loss": 0.2566, "step": 8971 }, { "epoch": 0.4202932496369513, "grad_norm": 0.5855245853594782, "learning_rate": 4.586431641763535e-06, "loss": 0.2787, "step": 8972 }, { "epoch": 0.42034009462687966, "grad_norm": 0.6030751498086425, "learning_rate": 4.5863271578938685e-06, "loss": 0.296, "step": 8973 }, { "epoch": 0.420386939616808, "grad_norm": 0.6051589549602626, "learning_rate": 4.5862226620179135e-06, "loss": 0.2732, "step": 8974 }, { "epoch": 0.4204337846067363, "grad_norm": 0.5884567826590751, "learning_rate": 4.586118154136271e-06, "loss": 0.2776, "step": 8975 }, { "epoch": 0.42048062959666466, "grad_norm": 0.5884592626253511, "learning_rate": 4.5860136342495434e-06, "loss": 0.2846, "step": 8976 }, { "epoch": 0.42052747458659295, "grad_norm": 0.5803072691350356, "learning_rate": 4.585909102358332e-06, "loss": 0.2806, "step": 8977 }, { "epoch": 0.4205743195765213, "grad_norm": 0.6344166873415253, "learning_rate": 4.585804558463238e-06, "loss": 0.2797, "step": 8978 }, { "epoch": 0.4206211645664496, "grad_norm": 0.6063152847122046, "learning_rate": 4.585700002564862e-06, "loss": 0.2658, "step": 8979 }, { "epoch": 0.42066800955637795, "grad_norm": 0.6948414128302612, "learning_rate": 4.585595434663808e-06, "loss": 0.3382, "step": 8980 }, { "epoch": 0.42071485454630625, "grad_norm": 0.5487014816955788, "learning_rate": 4.585490854760675e-06, "loss": 0.2665, "step": 8981 }, { "epoch": 0.4207616995362346, "grad_norm": 0.5801532403089379, "learning_rate": 4.585386262856067e-06, "loss": 0.2792, "step": 8982 }, { "epoch": 0.42080854452616295, "grad_norm": 0.5965449246375123, "learning_rate": 4.585281658950585e-06, "loss": 0.273, "step": 8983 }, { "epoch": 0.42085538951609125, "grad_norm": 0.6270032061854568, "learning_rate": 4.585177043044831e-06, "loss": 0.278, "step": 8984 }, { "epoch": 0.4209022345060196, "grad_norm": 0.611716160478262, "learning_rate": 4.585072415139409e-06, "loss": 0.2819, "step": 8985 }, { "epoch": 0.4209490794959479, "grad_norm": 0.6203765080528363, "learning_rate": 4.584967775234918e-06, "loss": 0.2933, "step": 8986 }, { "epoch": 0.42099592448587625, "grad_norm": 0.6141272525312675, "learning_rate": 4.584863123331962e-06, "loss": 0.298, "step": 8987 }, { "epoch": 0.42104276947580455, "grad_norm": 0.6444647255243179, "learning_rate": 4.584758459431141e-06, "loss": 0.2952, "step": 8988 }, { "epoch": 0.4210896144657329, "grad_norm": 0.6349718178999652, "learning_rate": 4.584653783533061e-06, "loss": 0.2959, "step": 8989 }, { "epoch": 0.4211364594556612, "grad_norm": 0.6304213650289728, "learning_rate": 4.584549095638321e-06, "loss": 0.3091, "step": 8990 }, { "epoch": 0.42118330444558955, "grad_norm": 0.6170697814320156, "learning_rate": 4.584444395747526e-06, "loss": 0.3043, "step": 8991 }, { "epoch": 0.4212301494355179, "grad_norm": 0.5932010360667674, "learning_rate": 4.584339683861277e-06, "loss": 0.2711, "step": 8992 }, { "epoch": 0.4212769944254462, "grad_norm": 0.6481284301149566, "learning_rate": 4.584234959980178e-06, "loss": 0.2993, "step": 8993 }, { "epoch": 0.42132383941537455, "grad_norm": 0.6225187386083681, "learning_rate": 4.584130224104828e-06, "loss": 0.3006, "step": 8994 }, { "epoch": 0.42137068440530284, "grad_norm": 0.596874680035242, "learning_rate": 4.584025476235835e-06, "loss": 0.3131, "step": 8995 }, { "epoch": 0.4214175293952312, "grad_norm": 0.6436350788957423, "learning_rate": 4.583920716373798e-06, "loss": 0.2949, "step": 8996 }, { "epoch": 0.4214643743851595, "grad_norm": 0.6653437213720061, "learning_rate": 4.583815944519321e-06, "loss": 0.299, "step": 8997 }, { "epoch": 0.42151121937508784, "grad_norm": 0.5890565858803467, "learning_rate": 4.583711160673007e-06, "loss": 0.2949, "step": 8998 }, { "epoch": 0.42155806436501614, "grad_norm": 0.66133171424962, "learning_rate": 4.583606364835459e-06, "loss": 0.2983, "step": 8999 }, { "epoch": 0.4216049093549445, "grad_norm": 0.643438222774161, "learning_rate": 4.58350155700728e-06, "loss": 0.2895, "step": 9000 }, { "epoch": 0.42165175434487284, "grad_norm": 0.5907154727990711, "learning_rate": 4.583396737189074e-06, "loss": 0.2891, "step": 9001 }, { "epoch": 0.42169859933480114, "grad_norm": 0.616188222168616, "learning_rate": 4.583291905381443e-06, "loss": 0.289, "step": 9002 }, { "epoch": 0.4217454443247295, "grad_norm": 0.619467455611846, "learning_rate": 4.5831870615849914e-06, "loss": 0.2732, "step": 9003 }, { "epoch": 0.4217922893146578, "grad_norm": 0.5902240994785306, "learning_rate": 4.583082205800321e-06, "loss": 0.2726, "step": 9004 }, { "epoch": 0.42183913430458614, "grad_norm": 0.6157143392744506, "learning_rate": 4.582977338028036e-06, "loss": 0.3021, "step": 9005 }, { "epoch": 0.42188597929451443, "grad_norm": 0.5747523130014752, "learning_rate": 4.58287245826874e-06, "loss": 0.2626, "step": 9006 }, { "epoch": 0.4219328242844428, "grad_norm": 0.6628394597537147, "learning_rate": 4.582767566523037e-06, "loss": 0.2903, "step": 9007 }, { "epoch": 0.4219796692743711, "grad_norm": 0.5715251548382873, "learning_rate": 4.582662662791529e-06, "loss": 0.2765, "step": 9008 }, { "epoch": 0.42202651426429943, "grad_norm": 0.6478079955816443, "learning_rate": 4.582557747074822e-06, "loss": 0.3086, "step": 9009 }, { "epoch": 0.4220733592542278, "grad_norm": 0.5962437069161897, "learning_rate": 4.582452819373518e-06, "loss": 0.3002, "step": 9010 }, { "epoch": 0.4221202042441561, "grad_norm": 0.6047700286823006, "learning_rate": 4.582347879688222e-06, "loss": 0.2864, "step": 9011 }, { "epoch": 0.42216704923408443, "grad_norm": 0.6262785802913869, "learning_rate": 4.582242928019537e-06, "loss": 0.3038, "step": 9012 }, { "epoch": 0.42221389422401273, "grad_norm": 0.6053182287077127, "learning_rate": 4.582137964368067e-06, "loss": 0.289, "step": 9013 }, { "epoch": 0.4222607392139411, "grad_norm": 0.6047818520029639, "learning_rate": 4.582032988734416e-06, "loss": 0.2903, "step": 9014 }, { "epoch": 0.4223075842038694, "grad_norm": 0.5795191665386149, "learning_rate": 4.581928001119189e-06, "loss": 0.2706, "step": 9015 }, { "epoch": 0.42235442919379773, "grad_norm": 0.6647222144405562, "learning_rate": 4.58182300152299e-06, "loss": 0.3017, "step": 9016 }, { "epoch": 0.422401274183726, "grad_norm": 0.6402983524626673, "learning_rate": 4.5817179899464226e-06, "loss": 0.2844, "step": 9017 }, { "epoch": 0.4224481191736544, "grad_norm": 0.6326170566219013, "learning_rate": 4.581612966390091e-06, "loss": 0.283, "step": 9018 }, { "epoch": 0.42249496416358273, "grad_norm": 0.5700805091095109, "learning_rate": 4.581507930854601e-06, "loss": 0.2803, "step": 9019 }, { "epoch": 0.422541809153511, "grad_norm": 0.5972107126067004, "learning_rate": 4.581402883340555e-06, "loss": 0.3035, "step": 9020 }, { "epoch": 0.4225886541434394, "grad_norm": 0.6572518633042298, "learning_rate": 4.581297823848559e-06, "loss": 0.2833, "step": 9021 }, { "epoch": 0.4226354991333677, "grad_norm": 0.6021518661029803, "learning_rate": 4.581192752379217e-06, "loss": 0.3048, "step": 9022 }, { "epoch": 0.422682344123296, "grad_norm": 0.6140763226470541, "learning_rate": 4.581087668933134e-06, "loss": 0.2785, "step": 9023 }, { "epoch": 0.4227291891132243, "grad_norm": 0.5750901122014328, "learning_rate": 4.580982573510913e-06, "loss": 0.2821, "step": 9024 }, { "epoch": 0.4227760341031527, "grad_norm": 0.5929869708496421, "learning_rate": 4.580877466113163e-06, "loss": 0.2858, "step": 9025 }, { "epoch": 0.42282287909308097, "grad_norm": 0.6179760010045708, "learning_rate": 4.580772346740484e-06, "loss": 0.3008, "step": 9026 }, { "epoch": 0.4228697240830093, "grad_norm": 0.6286320566702132, "learning_rate": 4.580667215393484e-06, "loss": 0.307, "step": 9027 }, { "epoch": 0.4229165690729377, "grad_norm": 0.6293134123524163, "learning_rate": 4.580562072072767e-06, "loss": 0.2966, "step": 9028 }, { "epoch": 0.42296341406286597, "grad_norm": 0.607931191734931, "learning_rate": 4.5804569167789396e-06, "loss": 0.2821, "step": 9029 }, { "epoch": 0.4230102590527943, "grad_norm": 0.5729767244161446, "learning_rate": 4.580351749512604e-06, "loss": 0.2901, "step": 9030 }, { "epoch": 0.4230571040427226, "grad_norm": 0.5521417272278155, "learning_rate": 4.580246570274367e-06, "loss": 0.2848, "step": 9031 }, { "epoch": 0.42310394903265097, "grad_norm": 0.5564283097956889, "learning_rate": 4.580141379064834e-06, "loss": 0.2674, "step": 9032 }, { "epoch": 0.42315079402257927, "grad_norm": 0.590800814310942, "learning_rate": 4.5800361758846105e-06, "loss": 0.2863, "step": 9033 }, { "epoch": 0.4231976390125076, "grad_norm": 0.5625147171918211, "learning_rate": 4.579930960734301e-06, "loss": 0.291, "step": 9034 }, { "epoch": 0.4232444840024359, "grad_norm": 0.5846061531876323, "learning_rate": 4.579825733614512e-06, "loss": 0.2913, "step": 9035 }, { "epoch": 0.42329132899236427, "grad_norm": 0.6314813935314837, "learning_rate": 4.579720494525849e-06, "loss": 0.2912, "step": 9036 }, { "epoch": 0.4233381739822926, "grad_norm": 0.5817349278697461, "learning_rate": 4.579615243468917e-06, "loss": 0.2956, "step": 9037 }, { "epoch": 0.4233850189722209, "grad_norm": 0.6594589328205174, "learning_rate": 4.5795099804443214e-06, "loss": 0.292, "step": 9038 }, { "epoch": 0.42343186396214927, "grad_norm": 0.6000462298218384, "learning_rate": 4.579404705452669e-06, "loss": 0.2713, "step": 9039 }, { "epoch": 0.42347870895207756, "grad_norm": 0.6303674828544102, "learning_rate": 4.579299418494565e-06, "loss": 0.3051, "step": 9040 }, { "epoch": 0.4235255539420059, "grad_norm": 0.5813512265352443, "learning_rate": 4.579194119570616e-06, "loss": 0.2991, "step": 9041 }, { "epoch": 0.4235723989319342, "grad_norm": 0.5739130148513841, "learning_rate": 4.579088808681427e-06, "loss": 0.2764, "step": 9042 }, { "epoch": 0.42361924392186256, "grad_norm": 0.6472594984859726, "learning_rate": 4.5789834858276035e-06, "loss": 0.317, "step": 9043 }, { "epoch": 0.42366608891179086, "grad_norm": 0.6131837019484176, "learning_rate": 4.578878151009753e-06, "loss": 0.3112, "step": 9044 }, { "epoch": 0.4237129339017192, "grad_norm": 0.6376390205620563, "learning_rate": 4.578772804228483e-06, "loss": 0.2787, "step": 9045 }, { "epoch": 0.42375977889164756, "grad_norm": 0.5536430487615116, "learning_rate": 4.578667445484396e-06, "loss": 0.2656, "step": 9046 }, { "epoch": 0.42380662388157586, "grad_norm": 0.6726023158759408, "learning_rate": 4.578562074778101e-06, "loss": 0.2874, "step": 9047 }, { "epoch": 0.4238534688715042, "grad_norm": 0.6281172999979413, "learning_rate": 4.578456692110204e-06, "loss": 0.3055, "step": 9048 }, { "epoch": 0.4239003138614325, "grad_norm": 0.542130793381497, "learning_rate": 4.578351297481312e-06, "loss": 0.2706, "step": 9049 }, { "epoch": 0.42394715885136086, "grad_norm": 0.5999403366351842, "learning_rate": 4.578245890892029e-06, "loss": 0.2936, "step": 9050 }, { "epoch": 0.42399400384128916, "grad_norm": 0.6275466848646188, "learning_rate": 4.578140472342963e-06, "loss": 0.301, "step": 9051 }, { "epoch": 0.4240408488312175, "grad_norm": 0.6073003966473015, "learning_rate": 4.578035041834722e-06, "loss": 0.2962, "step": 9052 }, { "epoch": 0.4240876938211458, "grad_norm": 0.6138666040191146, "learning_rate": 4.5779295993679115e-06, "loss": 0.2724, "step": 9053 }, { "epoch": 0.42413453881107416, "grad_norm": 0.6204466498441165, "learning_rate": 4.577824144943138e-06, "loss": 0.2794, "step": 9054 }, { "epoch": 0.4241813838010025, "grad_norm": 0.5944450496617689, "learning_rate": 4.57771867856101e-06, "loss": 0.2924, "step": 9055 }, { "epoch": 0.4242282287909308, "grad_norm": 0.6274459075729195, "learning_rate": 4.5776132002221315e-06, "loss": 0.3032, "step": 9056 }, { "epoch": 0.42427507378085916, "grad_norm": 0.6109649158400863, "learning_rate": 4.577507709927113e-06, "loss": 0.3017, "step": 9057 }, { "epoch": 0.42432191877078745, "grad_norm": 0.6414867843687596, "learning_rate": 4.577402207676558e-06, "loss": 0.3091, "step": 9058 }, { "epoch": 0.4243687637607158, "grad_norm": 0.7146767030183019, "learning_rate": 4.5772966934710765e-06, "loss": 0.3172, "step": 9059 }, { "epoch": 0.4244156087506441, "grad_norm": 0.7357146795852432, "learning_rate": 4.577191167311274e-06, "loss": 0.3083, "step": 9060 }, { "epoch": 0.42446245374057245, "grad_norm": 0.6033505828324294, "learning_rate": 4.5770856291977585e-06, "loss": 0.2806, "step": 9061 }, { "epoch": 0.42450929873050075, "grad_norm": 0.6175030890753617, "learning_rate": 4.576980079131138e-06, "loss": 0.2929, "step": 9062 }, { "epoch": 0.4245561437204291, "grad_norm": 0.6113894898946485, "learning_rate": 4.576874517112019e-06, "loss": 0.2769, "step": 9063 }, { "epoch": 0.42460298871035745, "grad_norm": 0.6262734046008132, "learning_rate": 4.576768943141009e-06, "loss": 0.2795, "step": 9064 }, { "epoch": 0.42464983370028575, "grad_norm": 0.6245253317364766, "learning_rate": 4.576663357218715e-06, "loss": 0.3042, "step": 9065 }, { "epoch": 0.4246966786902141, "grad_norm": 0.5949312941034254, "learning_rate": 4.5765577593457454e-06, "loss": 0.2794, "step": 9066 }, { "epoch": 0.4247435236801424, "grad_norm": 0.6236171299679503, "learning_rate": 4.576452149522708e-06, "loss": 0.2907, "step": 9067 }, { "epoch": 0.42479036867007075, "grad_norm": 0.6063357233809304, "learning_rate": 4.576346527750212e-06, "loss": 0.3029, "step": 9068 }, { "epoch": 0.42483721365999905, "grad_norm": 0.5876066274699503, "learning_rate": 4.576240894028862e-06, "loss": 0.282, "step": 9069 }, { "epoch": 0.4248840586499274, "grad_norm": 0.5888825658576031, "learning_rate": 4.576135248359267e-06, "loss": 0.2997, "step": 9070 }, { "epoch": 0.4249309036398557, "grad_norm": 0.673823599225713, "learning_rate": 4.576029590742037e-06, "loss": 0.2929, "step": 9071 }, { "epoch": 0.42497774862978405, "grad_norm": 0.6349789058653957, "learning_rate": 4.575923921177777e-06, "loss": 0.2819, "step": 9072 }, { "epoch": 0.4250245936197124, "grad_norm": 0.6384819991125943, "learning_rate": 4.575818239667098e-06, "loss": 0.2815, "step": 9073 }, { "epoch": 0.4250714386096407, "grad_norm": 0.5676425628861448, "learning_rate": 4.575712546210606e-06, "loss": 0.2938, "step": 9074 }, { "epoch": 0.42511828359956905, "grad_norm": 0.5926271024811262, "learning_rate": 4.575606840808911e-06, "loss": 0.2851, "step": 9075 }, { "epoch": 0.42516512858949734, "grad_norm": 0.6072085646087027, "learning_rate": 4.5755011234626185e-06, "loss": 0.2786, "step": 9076 }, { "epoch": 0.4252119735794257, "grad_norm": 0.610102564111489, "learning_rate": 4.575395394172339e-06, "loss": 0.268, "step": 9077 }, { "epoch": 0.425258818569354, "grad_norm": 0.65245620908248, "learning_rate": 4.575289652938682e-06, "loss": 0.2938, "step": 9078 }, { "epoch": 0.42530566355928234, "grad_norm": 0.5612655619111657, "learning_rate": 4.575183899762254e-06, "loss": 0.2829, "step": 9079 }, { "epoch": 0.42535250854921064, "grad_norm": 0.6568803818597201, "learning_rate": 4.575078134643664e-06, "loss": 0.2945, "step": 9080 }, { "epoch": 0.425399353539139, "grad_norm": 0.6030337432879049, "learning_rate": 4.574972357583522e-06, "loss": 0.2945, "step": 9081 }, { "epoch": 0.42544619852906734, "grad_norm": 0.5833867010009081, "learning_rate": 4.574866568582433e-06, "loss": 0.2925, "step": 9082 }, { "epoch": 0.42549304351899564, "grad_norm": 0.6510415076959625, "learning_rate": 4.574760767641011e-06, "loss": 0.3146, "step": 9083 }, { "epoch": 0.425539888508924, "grad_norm": 0.6494093784291418, "learning_rate": 4.574654954759861e-06, "loss": 0.2886, "step": 9084 }, { "epoch": 0.4255867334988523, "grad_norm": 0.5706350832382303, "learning_rate": 4.574549129939593e-06, "loss": 0.284, "step": 9085 }, { "epoch": 0.42563357848878064, "grad_norm": 0.5499701052615085, "learning_rate": 4.574443293180817e-06, "loss": 0.2766, "step": 9086 }, { "epoch": 0.42568042347870894, "grad_norm": 0.5911625695628033, "learning_rate": 4.574337444484141e-06, "loss": 0.2641, "step": 9087 }, { "epoch": 0.4257272684686373, "grad_norm": 0.5834822992445216, "learning_rate": 4.574231583850175e-06, "loss": 0.2877, "step": 9088 }, { "epoch": 0.4257741134585656, "grad_norm": 0.6221021325644458, "learning_rate": 4.5741257112795265e-06, "loss": 0.294, "step": 9089 }, { "epoch": 0.42582095844849394, "grad_norm": 0.6125659211470665, "learning_rate": 4.574019826772806e-06, "loss": 0.2774, "step": 9090 }, { "epoch": 0.4258678034384223, "grad_norm": 0.5513927027930496, "learning_rate": 4.573913930330622e-06, "loss": 0.2766, "step": 9091 }, { "epoch": 0.4259146484283506, "grad_norm": 0.6650261427728926, "learning_rate": 4.573808021953587e-06, "loss": 0.3053, "step": 9092 }, { "epoch": 0.42596149341827894, "grad_norm": 0.5966652743973263, "learning_rate": 4.573702101642306e-06, "loss": 0.2764, "step": 9093 }, { "epoch": 0.42600833840820723, "grad_norm": 0.6498662103088267, "learning_rate": 4.573596169397391e-06, "loss": 0.2764, "step": 9094 }, { "epoch": 0.4260551833981356, "grad_norm": 0.5968021182361923, "learning_rate": 4.573490225219451e-06, "loss": 0.2915, "step": 9095 }, { "epoch": 0.4261020283880639, "grad_norm": 0.6459758599444537, "learning_rate": 4.573384269109096e-06, "loss": 0.3144, "step": 9096 }, { "epoch": 0.42614887337799223, "grad_norm": 0.6141613039184637, "learning_rate": 4.573278301066937e-06, "loss": 0.2967, "step": 9097 }, { "epoch": 0.42619571836792053, "grad_norm": 0.648195036845452, "learning_rate": 4.573172321093581e-06, "loss": 0.2682, "step": 9098 }, { "epoch": 0.4262425633578489, "grad_norm": 0.5525271710335709, "learning_rate": 4.573066329189639e-06, "loss": 0.2738, "step": 9099 }, { "epoch": 0.42628940834777723, "grad_norm": 0.6431054790353623, "learning_rate": 4.5729603253557226e-06, "loss": 0.3026, "step": 9100 }, { "epoch": 0.42633625333770553, "grad_norm": 0.625659735004161, "learning_rate": 4.57285430959244e-06, "loss": 0.2934, "step": 9101 }, { "epoch": 0.4263830983276339, "grad_norm": 0.6769453133389642, "learning_rate": 4.572748281900402e-06, "loss": 0.2994, "step": 9102 }, { "epoch": 0.4264299433175622, "grad_norm": 0.6446952280244533, "learning_rate": 4.572642242280219e-06, "loss": 0.2833, "step": 9103 }, { "epoch": 0.42647678830749053, "grad_norm": 0.5703412326812198, "learning_rate": 4.5725361907325e-06, "loss": 0.2802, "step": 9104 }, { "epoch": 0.4265236332974188, "grad_norm": 0.6151796552337906, "learning_rate": 4.5724301272578565e-06, "loss": 0.2723, "step": 9105 }, { "epoch": 0.4265704782873472, "grad_norm": 0.6887554254971766, "learning_rate": 4.572324051856898e-06, "loss": 0.3258, "step": 9106 }, { "epoch": 0.4266173232772755, "grad_norm": 0.5871403728892995, "learning_rate": 4.572217964530235e-06, "loss": 0.292, "step": 9107 }, { "epoch": 0.4266641682672038, "grad_norm": 0.6588544792497426, "learning_rate": 4.57211186527848e-06, "loss": 0.2829, "step": 9108 }, { "epoch": 0.4267110132571322, "grad_norm": 0.6161644219769637, "learning_rate": 4.572005754102241e-06, "loss": 0.2798, "step": 9109 }, { "epoch": 0.4267578582470605, "grad_norm": 0.6012697976135856, "learning_rate": 4.5718996310021305e-06, "loss": 0.2812, "step": 9110 }, { "epoch": 0.4268047032369888, "grad_norm": 0.5982989768850525, "learning_rate": 4.571793495978758e-06, "loss": 0.2794, "step": 9111 }, { "epoch": 0.4268515482269171, "grad_norm": 0.6532857075110509, "learning_rate": 4.571687349032735e-06, "loss": 0.3032, "step": 9112 }, { "epoch": 0.4268983932168455, "grad_norm": 0.6845015139819952, "learning_rate": 4.571581190164671e-06, "loss": 0.3286, "step": 9113 }, { "epoch": 0.42694523820677377, "grad_norm": 0.6111335356498412, "learning_rate": 4.571475019375178e-06, "loss": 0.2823, "step": 9114 }, { "epoch": 0.4269920831967021, "grad_norm": 0.6312110936081228, "learning_rate": 4.5713688366648675e-06, "loss": 0.3138, "step": 9115 }, { "epoch": 0.4270389281866304, "grad_norm": 0.6257386114902181, "learning_rate": 4.57126264203435e-06, "loss": 0.3021, "step": 9116 }, { "epoch": 0.42708577317655877, "grad_norm": 0.5958337874244711, "learning_rate": 4.571156435484236e-06, "loss": 0.2769, "step": 9117 }, { "epoch": 0.4271326181664871, "grad_norm": 0.5956210050070108, "learning_rate": 4.5710502170151385e-06, "loss": 0.3018, "step": 9118 }, { "epoch": 0.4271794631564154, "grad_norm": 0.5741448746022542, "learning_rate": 4.5709439866276665e-06, "loss": 0.288, "step": 9119 }, { "epoch": 0.42722630814634377, "grad_norm": 0.6151639063306976, "learning_rate": 4.5708377443224325e-06, "loss": 0.3053, "step": 9120 }, { "epoch": 0.42727315313627207, "grad_norm": 0.6286629121215159, "learning_rate": 4.570731490100048e-06, "loss": 0.3084, "step": 9121 }, { "epoch": 0.4273199981262004, "grad_norm": 0.6040377780995928, "learning_rate": 4.5706252239611245e-06, "loss": 0.261, "step": 9122 }, { "epoch": 0.4273668431161287, "grad_norm": 0.5860400364526792, "learning_rate": 4.570518945906273e-06, "loss": 0.2842, "step": 9123 }, { "epoch": 0.42741368810605707, "grad_norm": 0.6244817443648508, "learning_rate": 4.570412655936105e-06, "loss": 0.3023, "step": 9124 }, { "epoch": 0.42746053309598536, "grad_norm": 0.5992847619499481, "learning_rate": 4.570306354051233e-06, "loss": 0.2767, "step": 9125 }, { "epoch": 0.4275073780859137, "grad_norm": 0.6053746930260723, "learning_rate": 4.570200040252268e-06, "loss": 0.2916, "step": 9126 }, { "epoch": 0.42755422307584207, "grad_norm": 0.5708250790557494, "learning_rate": 4.570093714539821e-06, "loss": 0.2779, "step": 9127 }, { "epoch": 0.42760106806577036, "grad_norm": 0.6268138916181561, "learning_rate": 4.569987376914508e-06, "loss": 0.2959, "step": 9128 }, { "epoch": 0.4276479130556987, "grad_norm": 0.6167535436609839, "learning_rate": 4.569881027376935e-06, "loss": 0.2762, "step": 9129 }, { "epoch": 0.427694758045627, "grad_norm": 0.5705649193410756, "learning_rate": 4.5697746659277185e-06, "loss": 0.3044, "step": 9130 }, { "epoch": 0.42774160303555536, "grad_norm": 0.6018144836305834, "learning_rate": 4.569668292567468e-06, "loss": 0.3084, "step": 9131 }, { "epoch": 0.42778844802548366, "grad_norm": 0.6117196121874803, "learning_rate": 4.569561907296797e-06, "loss": 0.2833, "step": 9132 }, { "epoch": 0.427835293015412, "grad_norm": 0.6005338938819283, "learning_rate": 4.569455510116318e-06, "loss": 0.2748, "step": 9133 }, { "epoch": 0.4278821380053403, "grad_norm": 0.6275983405978537, "learning_rate": 4.569349101026642e-06, "loss": 0.2991, "step": 9134 }, { "epoch": 0.42792898299526866, "grad_norm": 0.6230784385275393, "learning_rate": 4.569242680028383e-06, "loss": 0.2985, "step": 9135 }, { "epoch": 0.427975827985197, "grad_norm": 0.6232365475737063, "learning_rate": 4.569136247122151e-06, "loss": 0.2843, "step": 9136 }, { "epoch": 0.4280226729751253, "grad_norm": 0.6178073138559655, "learning_rate": 4.569029802308561e-06, "loss": 0.2904, "step": 9137 }, { "epoch": 0.42806951796505366, "grad_norm": 0.5831185963389354, "learning_rate": 4.5689233455882244e-06, "loss": 0.2826, "step": 9138 }, { "epoch": 0.42811636295498195, "grad_norm": 0.583549107524166, "learning_rate": 4.568816876961753e-06, "loss": 0.2769, "step": 9139 }, { "epoch": 0.4281632079449103, "grad_norm": 0.6071906234743701, "learning_rate": 4.568710396429762e-06, "loss": 0.3074, "step": 9140 }, { "epoch": 0.4282100529348386, "grad_norm": 0.5626992987203271, "learning_rate": 4.568603903992862e-06, "loss": 0.2817, "step": 9141 }, { "epoch": 0.42825689792476695, "grad_norm": 0.6155226044225205, "learning_rate": 4.568497399651666e-06, "loss": 0.2782, "step": 9142 }, { "epoch": 0.42830374291469525, "grad_norm": 0.5687499817798072, "learning_rate": 4.568390883406788e-06, "loss": 0.2812, "step": 9143 }, { "epoch": 0.4283505879046236, "grad_norm": 0.663088207092625, "learning_rate": 4.56828435525884e-06, "loss": 0.307, "step": 9144 }, { "epoch": 0.42839743289455195, "grad_norm": 0.6175697973292901, "learning_rate": 4.568177815208435e-06, "loss": 0.2921, "step": 9145 }, { "epoch": 0.42844427788448025, "grad_norm": 0.6125939732962166, "learning_rate": 4.568071263256186e-06, "loss": 0.2942, "step": 9146 }, { "epoch": 0.4284911228744086, "grad_norm": 0.6101143883343012, "learning_rate": 4.567964699402708e-06, "loss": 0.2994, "step": 9147 }, { "epoch": 0.4285379678643369, "grad_norm": 0.624027547457458, "learning_rate": 4.567858123648613e-06, "loss": 0.2954, "step": 9148 }, { "epoch": 0.42858481285426525, "grad_norm": 0.6380369711913352, "learning_rate": 4.567751535994514e-06, "loss": 0.2903, "step": 9149 }, { "epoch": 0.42863165784419355, "grad_norm": 0.606984301042552, "learning_rate": 4.5676449364410234e-06, "loss": 0.2707, "step": 9150 }, { "epoch": 0.4286785028341219, "grad_norm": 0.5997547555942512, "learning_rate": 4.567538324988757e-06, "loss": 0.3023, "step": 9151 }, { "epoch": 0.4287253478240502, "grad_norm": 0.6293242922264298, "learning_rate": 4.567431701638328e-06, "loss": 0.2977, "step": 9152 }, { "epoch": 0.42877219281397855, "grad_norm": 0.5742389974488344, "learning_rate": 4.567325066390347e-06, "loss": 0.2731, "step": 9153 }, { "epoch": 0.4288190378039069, "grad_norm": 0.6346321082287143, "learning_rate": 4.567218419245431e-06, "loss": 0.2961, "step": 9154 }, { "epoch": 0.4288658827938352, "grad_norm": 0.6115188463305006, "learning_rate": 4.567111760204193e-06, "loss": 0.3132, "step": 9155 }, { "epoch": 0.42891272778376355, "grad_norm": 0.604591417025153, "learning_rate": 4.5670050892672455e-06, "loss": 0.2982, "step": 9156 }, { "epoch": 0.42895957277369184, "grad_norm": 0.6310606180155464, "learning_rate": 4.566898406435204e-06, "loss": 0.2957, "step": 9157 }, { "epoch": 0.4290064177636202, "grad_norm": 0.6343343137653805, "learning_rate": 4.566791711708681e-06, "loss": 0.2985, "step": 9158 }, { "epoch": 0.4290532627535485, "grad_norm": 0.6071768651226812, "learning_rate": 4.566685005088292e-06, "loss": 0.3062, "step": 9159 }, { "epoch": 0.42910010774347684, "grad_norm": 0.564976567043917, "learning_rate": 4.56657828657465e-06, "loss": 0.2638, "step": 9160 }, { "epoch": 0.42914695273340514, "grad_norm": 0.5743524317054479, "learning_rate": 4.566471556168369e-06, "loss": 0.3009, "step": 9161 }, { "epoch": 0.4291937977233335, "grad_norm": 0.6239832723626015, "learning_rate": 4.566364813870063e-06, "loss": 0.2938, "step": 9162 }, { "epoch": 0.42924064271326184, "grad_norm": 0.6465884468356471, "learning_rate": 4.566258059680348e-06, "loss": 0.3195, "step": 9163 }, { "epoch": 0.42928748770319014, "grad_norm": 0.55499270408732, "learning_rate": 4.566151293599836e-06, "loss": 0.269, "step": 9164 }, { "epoch": 0.4293343326931185, "grad_norm": 0.6040992155491597, "learning_rate": 4.5660445156291445e-06, "loss": 0.2698, "step": 9165 }, { "epoch": 0.4293811776830468, "grad_norm": 0.622864271856502, "learning_rate": 4.565937725768885e-06, "loss": 0.3188, "step": 9166 }, { "epoch": 0.42942802267297514, "grad_norm": 0.6597376384604032, "learning_rate": 4.565830924019673e-06, "loss": 0.3057, "step": 9167 }, { "epoch": 0.42947486766290344, "grad_norm": 0.6086077513450048, "learning_rate": 4.5657241103821234e-06, "loss": 0.285, "step": 9168 }, { "epoch": 0.4295217126528318, "grad_norm": 0.5832441082757736, "learning_rate": 4.5656172848568505e-06, "loss": 0.3034, "step": 9169 }, { "epoch": 0.4295685576427601, "grad_norm": 0.656876014183591, "learning_rate": 4.5655104474444704e-06, "loss": 0.3144, "step": 9170 }, { "epoch": 0.42961540263268844, "grad_norm": 0.6272083080526841, "learning_rate": 4.565403598145595e-06, "loss": 0.2953, "step": 9171 }, { "epoch": 0.4296622476226168, "grad_norm": 0.619579220786761, "learning_rate": 4.565296736960842e-06, "loss": 0.2953, "step": 9172 }, { "epoch": 0.4297090926125451, "grad_norm": 0.5757134685338497, "learning_rate": 4.5651898638908255e-06, "loss": 0.2955, "step": 9173 }, { "epoch": 0.42975593760247344, "grad_norm": 0.6238209493592507, "learning_rate": 4.565082978936161e-06, "loss": 0.2992, "step": 9174 }, { "epoch": 0.42980278259240173, "grad_norm": 0.5492582889373864, "learning_rate": 4.564976082097461e-06, "loss": 0.2817, "step": 9175 }, { "epoch": 0.4298496275823301, "grad_norm": 0.6406883803926138, "learning_rate": 4.564869173375345e-06, "loss": 0.3008, "step": 9176 }, { "epoch": 0.4298964725722584, "grad_norm": 0.5623588696989613, "learning_rate": 4.564762252770424e-06, "loss": 0.2772, "step": 9177 }, { "epoch": 0.42994331756218673, "grad_norm": 0.6243671816217871, "learning_rate": 4.564655320283317e-06, "loss": 0.2883, "step": 9178 }, { "epoch": 0.42999016255211503, "grad_norm": 0.6602823401751143, "learning_rate": 4.564548375914636e-06, "loss": 0.3067, "step": 9179 }, { "epoch": 0.4300370075420434, "grad_norm": 0.6088994052818909, "learning_rate": 4.564441419664998e-06, "loss": 0.2896, "step": 9180 }, { "epoch": 0.43008385253197173, "grad_norm": 0.6174816986757614, "learning_rate": 4.564334451535019e-06, "loss": 0.2878, "step": 9181 }, { "epoch": 0.43013069752190003, "grad_norm": 0.5708763722805129, "learning_rate": 4.564227471525314e-06, "loss": 0.2948, "step": 9182 }, { "epoch": 0.4301775425118284, "grad_norm": 0.5912919454131471, "learning_rate": 4.564120479636499e-06, "loss": 0.2916, "step": 9183 }, { "epoch": 0.4302243875017567, "grad_norm": 0.6193055591340876, "learning_rate": 4.564013475869189e-06, "loss": 0.2814, "step": 9184 }, { "epoch": 0.43027123249168503, "grad_norm": 0.5388010931410214, "learning_rate": 4.563906460224001e-06, "loss": 0.2632, "step": 9185 }, { "epoch": 0.4303180774816133, "grad_norm": 0.60341285281852, "learning_rate": 4.56379943270155e-06, "loss": 0.2843, "step": 9186 }, { "epoch": 0.4303649224715417, "grad_norm": 0.5744885557125559, "learning_rate": 4.563692393302452e-06, "loss": 0.2755, "step": 9187 }, { "epoch": 0.43041176746147, "grad_norm": 0.5964365080864651, "learning_rate": 4.563585342027322e-06, "loss": 0.2725, "step": 9188 }, { "epoch": 0.4304586124513983, "grad_norm": 0.6362601018493415, "learning_rate": 4.563478278876777e-06, "loss": 0.2881, "step": 9189 }, { "epoch": 0.4305054574413267, "grad_norm": 0.6420016504147313, "learning_rate": 4.563371203851434e-06, "loss": 0.3121, "step": 9190 }, { "epoch": 0.430552302431255, "grad_norm": 0.5662327339615825, "learning_rate": 4.563264116951907e-06, "loss": 0.2675, "step": 9191 }, { "epoch": 0.4305991474211833, "grad_norm": 0.6164796110038906, "learning_rate": 4.563157018178816e-06, "loss": 0.2855, "step": 9192 }, { "epoch": 0.4306459924111116, "grad_norm": 0.5937940587976083, "learning_rate": 4.563049907532773e-06, "loss": 0.2717, "step": 9193 }, { "epoch": 0.43069283740104, "grad_norm": 0.5911836115694756, "learning_rate": 4.562942785014397e-06, "loss": 0.2896, "step": 9194 }, { "epoch": 0.43073968239096827, "grad_norm": 0.5903638100844543, "learning_rate": 4.562835650624303e-06, "loss": 0.2896, "step": 9195 }, { "epoch": 0.4307865273808966, "grad_norm": 0.6052866293535533, "learning_rate": 4.56272850436311e-06, "loss": 0.2808, "step": 9196 }, { "epoch": 0.4308333723708249, "grad_norm": 0.5872401478646371, "learning_rate": 4.562621346231431e-06, "loss": 0.2899, "step": 9197 }, { "epoch": 0.43088021736075327, "grad_norm": 0.6222059981216791, "learning_rate": 4.562514176229886e-06, "loss": 0.3193, "step": 9198 }, { "epoch": 0.4309270623506816, "grad_norm": 0.5879590997808111, "learning_rate": 4.562406994359089e-06, "loss": 0.2692, "step": 9199 }, { "epoch": 0.4309739073406099, "grad_norm": 0.6190107733902995, "learning_rate": 4.562299800619659e-06, "loss": 0.3086, "step": 9200 }, { "epoch": 0.43102075233053827, "grad_norm": 0.5844366367212136, "learning_rate": 4.562192595012212e-06, "loss": 0.2975, "step": 9201 }, { "epoch": 0.43106759732046657, "grad_norm": 0.5998363136741206, "learning_rate": 4.562085377537365e-06, "loss": 0.284, "step": 9202 }, { "epoch": 0.4311144423103949, "grad_norm": 0.5677078766207213, "learning_rate": 4.5619781481957345e-06, "loss": 0.2872, "step": 9203 }, { "epoch": 0.4311612873003232, "grad_norm": 0.6029494585271123, "learning_rate": 4.561870906987938e-06, "loss": 0.2962, "step": 9204 }, { "epoch": 0.43120813229025157, "grad_norm": 0.6490853116926083, "learning_rate": 4.5617636539145926e-06, "loss": 0.3011, "step": 9205 }, { "epoch": 0.43125497728017986, "grad_norm": 0.6427580641952997, "learning_rate": 4.561656388976316e-06, "loss": 0.2911, "step": 9206 }, { "epoch": 0.4313018222701082, "grad_norm": 0.6131946827233713, "learning_rate": 4.5615491121737245e-06, "loss": 0.2854, "step": 9207 }, { "epoch": 0.43134866726003657, "grad_norm": 0.6331252612937815, "learning_rate": 4.561441823507436e-06, "loss": 0.2935, "step": 9208 }, { "epoch": 0.43139551224996486, "grad_norm": 0.6100237373324523, "learning_rate": 4.561334522978069e-06, "loss": 0.276, "step": 9209 }, { "epoch": 0.4314423572398932, "grad_norm": 0.5760851891790276, "learning_rate": 4.561227210586239e-06, "loss": 0.2886, "step": 9210 }, { "epoch": 0.4314892022298215, "grad_norm": 0.5656323111213498, "learning_rate": 4.561119886332565e-06, "loss": 0.2819, "step": 9211 }, { "epoch": 0.43153604721974986, "grad_norm": 0.6741528651323819, "learning_rate": 4.561012550217664e-06, "loss": 0.2751, "step": 9212 }, { "epoch": 0.43158289220967816, "grad_norm": 0.5872214921011225, "learning_rate": 4.560905202242153e-06, "loss": 0.2773, "step": 9213 }, { "epoch": 0.4316297371996065, "grad_norm": 0.5690753064432492, "learning_rate": 4.560797842406651e-06, "loss": 0.2674, "step": 9214 }, { "epoch": 0.4316765821895348, "grad_norm": 0.6092781584536318, "learning_rate": 4.560690470711776e-06, "loss": 0.282, "step": 9215 }, { "epoch": 0.43172342717946316, "grad_norm": 0.6529556414995478, "learning_rate": 4.560583087158145e-06, "loss": 0.3172, "step": 9216 }, { "epoch": 0.4317702721693915, "grad_norm": 0.568180393235941, "learning_rate": 4.5604756917463764e-06, "loss": 0.2644, "step": 9217 }, { "epoch": 0.4318171171593198, "grad_norm": 0.5812924652171365, "learning_rate": 4.560368284477087e-06, "loss": 0.2799, "step": 9218 }, { "epoch": 0.43186396214924816, "grad_norm": 0.5949432741750247, "learning_rate": 4.560260865350897e-06, "loss": 0.2845, "step": 9219 }, { "epoch": 0.43191080713917646, "grad_norm": 0.6376842740169248, "learning_rate": 4.560153434368423e-06, "loss": 0.3021, "step": 9220 }, { "epoch": 0.4319576521291048, "grad_norm": 0.6092317661924138, "learning_rate": 4.560045991530284e-06, "loss": 0.3088, "step": 9221 }, { "epoch": 0.4320044971190331, "grad_norm": 0.6075594167011112, "learning_rate": 4.559938536837098e-06, "loss": 0.2781, "step": 9222 }, { "epoch": 0.43205134210896146, "grad_norm": 0.6339225861075876, "learning_rate": 4.559831070289484e-06, "loss": 0.2878, "step": 9223 }, { "epoch": 0.43209818709888975, "grad_norm": 0.5736494909157599, "learning_rate": 4.559723591888059e-06, "loss": 0.2788, "step": 9224 }, { "epoch": 0.4321450320888181, "grad_norm": 0.636599121564416, "learning_rate": 4.559616101633443e-06, "loss": 0.2994, "step": 9225 }, { "epoch": 0.4321918770787464, "grad_norm": 0.6093227293298564, "learning_rate": 4.5595085995262545e-06, "loss": 0.2843, "step": 9226 }, { "epoch": 0.43223872206867475, "grad_norm": 0.6365111667398141, "learning_rate": 4.559401085567111e-06, "loss": 0.3058, "step": 9227 }, { "epoch": 0.4322855670586031, "grad_norm": 0.6307579623095745, "learning_rate": 4.559293559756631e-06, "loss": 0.2849, "step": 9228 }, { "epoch": 0.4323324120485314, "grad_norm": 0.620501956615436, "learning_rate": 4.559186022095435e-06, "loss": 0.302, "step": 9229 }, { "epoch": 0.43237925703845975, "grad_norm": 0.5807742655887319, "learning_rate": 4.559078472584142e-06, "loss": 0.2905, "step": 9230 }, { "epoch": 0.43242610202838805, "grad_norm": 0.6004842904157849, "learning_rate": 4.5589709112233684e-06, "loss": 0.2937, "step": 9231 }, { "epoch": 0.4324729470183164, "grad_norm": 0.5534082272222637, "learning_rate": 4.558863338013736e-06, "loss": 0.2777, "step": 9232 }, { "epoch": 0.4325197920082447, "grad_norm": 0.6492004373970247, "learning_rate": 4.558755752955861e-06, "loss": 0.303, "step": 9233 }, { "epoch": 0.43256663699817305, "grad_norm": 0.5690554155268348, "learning_rate": 4.558648156050365e-06, "loss": 0.2782, "step": 9234 }, { "epoch": 0.43261348198810134, "grad_norm": 0.5537859370852455, "learning_rate": 4.558540547297867e-06, "loss": 0.2683, "step": 9235 }, { "epoch": 0.4326603269780297, "grad_norm": 0.7006493234925909, "learning_rate": 4.558432926698985e-06, "loss": 0.3338, "step": 9236 }, { "epoch": 0.43270717196795805, "grad_norm": 0.5694862696646684, "learning_rate": 4.558325294254338e-06, "loss": 0.2786, "step": 9237 }, { "epoch": 0.43275401695788634, "grad_norm": 0.5931291443265352, "learning_rate": 4.558217649964547e-06, "loss": 0.2951, "step": 9238 }, { "epoch": 0.4328008619478147, "grad_norm": 0.5843770903573181, "learning_rate": 4.558109993830231e-06, "loss": 0.2805, "step": 9239 }, { "epoch": 0.432847706937743, "grad_norm": 0.6108289801038492, "learning_rate": 4.558002325852009e-06, "loss": 0.3135, "step": 9240 }, { "epoch": 0.43289455192767134, "grad_norm": 0.6499008398905805, "learning_rate": 4.557894646030501e-06, "loss": 0.301, "step": 9241 }, { "epoch": 0.43294139691759964, "grad_norm": 0.6090959344338257, "learning_rate": 4.557786954366325e-06, "loss": 0.2899, "step": 9242 }, { "epoch": 0.432988241907528, "grad_norm": 0.6082589869104642, "learning_rate": 4.557679250860104e-06, "loss": 0.2952, "step": 9243 }, { "epoch": 0.4330350868974563, "grad_norm": 0.5871577349544592, "learning_rate": 4.557571535512456e-06, "loss": 0.267, "step": 9244 }, { "epoch": 0.43308193188738464, "grad_norm": 0.6081669372177206, "learning_rate": 4.557463808324001e-06, "loss": 0.2815, "step": 9245 }, { "epoch": 0.433128776877313, "grad_norm": 0.5361610551668403, "learning_rate": 4.557356069295359e-06, "loss": 0.2571, "step": 9246 }, { "epoch": 0.4331756218672413, "grad_norm": 0.6278185679548549, "learning_rate": 4.557248318427149e-06, "loss": 0.2872, "step": 9247 }, { "epoch": 0.43322246685716964, "grad_norm": 0.6684552030741275, "learning_rate": 4.557140555719992e-06, "loss": 0.3024, "step": 9248 }, { "epoch": 0.43326931184709794, "grad_norm": 0.5809345518279103, "learning_rate": 4.55703278117451e-06, "loss": 0.2834, "step": 9249 }, { "epoch": 0.4333161568370263, "grad_norm": 0.6062387328126018, "learning_rate": 4.556924994791319e-06, "loss": 0.278, "step": 9250 }, { "epoch": 0.4333630018269546, "grad_norm": 0.6740213150167043, "learning_rate": 4.556817196571043e-06, "loss": 0.3072, "step": 9251 }, { "epoch": 0.43340984681688294, "grad_norm": 0.6256322953005489, "learning_rate": 4.5567093865143005e-06, "loss": 0.2984, "step": 9252 }, { "epoch": 0.43345669180681123, "grad_norm": 0.5973129134323689, "learning_rate": 4.556601564621713e-06, "loss": 0.2935, "step": 9253 }, { "epoch": 0.4335035367967396, "grad_norm": 0.5883356787103908, "learning_rate": 4.5564937308938995e-06, "loss": 0.2678, "step": 9254 }, { "epoch": 0.43355038178666794, "grad_norm": 0.5951792272985706, "learning_rate": 4.556385885331483e-06, "loss": 0.292, "step": 9255 }, { "epoch": 0.43359722677659623, "grad_norm": 0.5970526198417524, "learning_rate": 4.55627802793508e-06, "loss": 0.2869, "step": 9256 }, { "epoch": 0.4336440717665246, "grad_norm": 0.6475257989864979, "learning_rate": 4.556170158705316e-06, "loss": 0.2915, "step": 9257 }, { "epoch": 0.4336909167564529, "grad_norm": 0.5529184568734667, "learning_rate": 4.556062277642809e-06, "loss": 0.2892, "step": 9258 }, { "epoch": 0.43373776174638123, "grad_norm": 0.5950972151243014, "learning_rate": 4.555954384748181e-06, "loss": 0.2878, "step": 9259 }, { "epoch": 0.43378460673630953, "grad_norm": 0.5894770629373538, "learning_rate": 4.5558464800220505e-06, "loss": 0.2899, "step": 9260 }, { "epoch": 0.4338314517262379, "grad_norm": 0.6082350858272866, "learning_rate": 4.555738563465041e-06, "loss": 0.2966, "step": 9261 }, { "epoch": 0.4338782967161662, "grad_norm": 0.6437725475131386, "learning_rate": 4.555630635077773e-06, "loss": 0.3047, "step": 9262 }, { "epoch": 0.43392514170609453, "grad_norm": 0.5675291744276737, "learning_rate": 4.555522694860867e-06, "loss": 0.2956, "step": 9263 }, { "epoch": 0.4339719866960229, "grad_norm": 0.5557789435134339, "learning_rate": 4.555414742814946e-06, "loss": 0.2989, "step": 9264 }, { "epoch": 0.4340188316859512, "grad_norm": 0.8586038308122453, "learning_rate": 4.555306778940628e-06, "loss": 0.2993, "step": 9265 }, { "epoch": 0.43406567667587953, "grad_norm": 0.6030498466670531, "learning_rate": 4.555198803238536e-06, "loss": 0.3, "step": 9266 }, { "epoch": 0.4341125216658078, "grad_norm": 0.6127660490540042, "learning_rate": 4.555090815709292e-06, "loss": 0.2804, "step": 9267 }, { "epoch": 0.4341593666557362, "grad_norm": 0.673077208153458, "learning_rate": 4.554982816353518e-06, "loss": 0.3017, "step": 9268 }, { "epoch": 0.4342062116456645, "grad_norm": 0.6242771971625818, "learning_rate": 4.554874805171833e-06, "loss": 0.2933, "step": 9269 }, { "epoch": 0.4342530566355928, "grad_norm": 0.5859005904344182, "learning_rate": 4.55476678216486e-06, "loss": 0.3035, "step": 9270 }, { "epoch": 0.4342999016255211, "grad_norm": 0.5923794593246604, "learning_rate": 4.554658747333221e-06, "loss": 0.3091, "step": 9271 }, { "epoch": 0.4343467466154495, "grad_norm": 0.5954765184474548, "learning_rate": 4.554550700677537e-06, "loss": 0.29, "step": 9272 }, { "epoch": 0.4343935916053778, "grad_norm": 0.6043969384143315, "learning_rate": 4.554442642198431e-06, "loss": 0.3049, "step": 9273 }, { "epoch": 0.4344404365953061, "grad_norm": 0.6064564296186133, "learning_rate": 4.554334571896523e-06, "loss": 0.2759, "step": 9274 }, { "epoch": 0.4344872815852345, "grad_norm": 0.6315395431233468, "learning_rate": 4.554226489772437e-06, "loss": 0.3004, "step": 9275 }, { "epoch": 0.43453412657516277, "grad_norm": 0.5740497063133485, "learning_rate": 4.554118395826792e-06, "loss": 0.2713, "step": 9276 }, { "epoch": 0.4345809715650911, "grad_norm": 0.6314356933223013, "learning_rate": 4.554010290060214e-06, "loss": 0.2861, "step": 9277 }, { "epoch": 0.4346278165550194, "grad_norm": 0.5974368714820999, "learning_rate": 4.5539021724733216e-06, "loss": 0.3007, "step": 9278 }, { "epoch": 0.43467466154494777, "grad_norm": 0.5512338042552772, "learning_rate": 4.553794043066739e-06, "loss": 0.2766, "step": 9279 }, { "epoch": 0.43472150653487607, "grad_norm": 0.5784642631730783, "learning_rate": 4.553685901841087e-06, "loss": 0.2823, "step": 9280 }, { "epoch": 0.4347683515248044, "grad_norm": 0.6550321082740598, "learning_rate": 4.55357774879699e-06, "loss": 0.2936, "step": 9281 }, { "epoch": 0.43481519651473277, "grad_norm": 0.591304304645967, "learning_rate": 4.553469583935069e-06, "loss": 0.2676, "step": 9282 }, { "epoch": 0.43486204150466107, "grad_norm": 0.5956002034593325, "learning_rate": 4.553361407255946e-06, "loss": 0.2822, "step": 9283 }, { "epoch": 0.4349088864945894, "grad_norm": 0.5987313765931175, "learning_rate": 4.5532532187602455e-06, "loss": 0.288, "step": 9284 }, { "epoch": 0.4349557314845177, "grad_norm": 0.5323302792993754, "learning_rate": 4.553145018448587e-06, "loss": 0.2712, "step": 9285 }, { "epoch": 0.43500257647444607, "grad_norm": 0.6041709703167041, "learning_rate": 4.5530368063215966e-06, "loss": 0.2715, "step": 9286 }, { "epoch": 0.43504942146437436, "grad_norm": 0.6533177496971195, "learning_rate": 4.552928582379894e-06, "loss": 0.2869, "step": 9287 }, { "epoch": 0.4350962664543027, "grad_norm": 0.6571358969625524, "learning_rate": 4.552820346624105e-06, "loss": 0.2891, "step": 9288 }, { "epoch": 0.435143111444231, "grad_norm": 0.6116501244053085, "learning_rate": 4.552712099054849e-06, "loss": 0.3008, "step": 9289 }, { "epoch": 0.43518995643415936, "grad_norm": 0.5989992672378389, "learning_rate": 4.552603839672752e-06, "loss": 0.2935, "step": 9290 }, { "epoch": 0.4352368014240877, "grad_norm": 0.5946146874709934, "learning_rate": 4.552495568478436e-06, "loss": 0.2998, "step": 9291 }, { "epoch": 0.435283646414016, "grad_norm": 0.5452235077188149, "learning_rate": 4.552387285472523e-06, "loss": 0.2769, "step": 9292 }, { "epoch": 0.43533049140394436, "grad_norm": 0.6440149322602404, "learning_rate": 4.552278990655637e-06, "loss": 0.2998, "step": 9293 }, { "epoch": 0.43537733639387266, "grad_norm": 0.6510307321980953, "learning_rate": 4.552170684028402e-06, "loss": 0.309, "step": 9294 }, { "epoch": 0.435424181383801, "grad_norm": 0.5887957683399998, "learning_rate": 4.5520623655914405e-06, "loss": 0.2807, "step": 9295 }, { "epoch": 0.4354710263737293, "grad_norm": 0.6115104573465706, "learning_rate": 4.551954035345375e-06, "loss": 0.3001, "step": 9296 }, { "epoch": 0.43551787136365766, "grad_norm": 0.5988086133590937, "learning_rate": 4.55184569329083e-06, "loss": 0.2935, "step": 9297 }, { "epoch": 0.43556471635358596, "grad_norm": 0.6590892153151622, "learning_rate": 4.551737339428429e-06, "loss": 0.3083, "step": 9298 }, { "epoch": 0.4356115613435143, "grad_norm": 0.5186327239948626, "learning_rate": 4.551628973758796e-06, "loss": 0.2669, "step": 9299 }, { "epoch": 0.43565840633344266, "grad_norm": 0.6322664069196791, "learning_rate": 4.551520596282553e-06, "loss": 0.2832, "step": 9300 }, { "epoch": 0.43570525132337096, "grad_norm": 0.6468519843624938, "learning_rate": 4.551412207000325e-06, "loss": 0.2984, "step": 9301 }, { "epoch": 0.4357520963132993, "grad_norm": 0.5718654829363622, "learning_rate": 4.551303805912735e-06, "loss": 0.2846, "step": 9302 }, { "epoch": 0.4357989413032276, "grad_norm": 0.6611890489328248, "learning_rate": 4.551195393020408e-06, "loss": 0.3026, "step": 9303 }, { "epoch": 0.43584578629315596, "grad_norm": 0.5649501910411417, "learning_rate": 4.551086968323965e-06, "loss": 0.2967, "step": 9304 }, { "epoch": 0.43589263128308425, "grad_norm": 0.5945763987062138, "learning_rate": 4.550978531824035e-06, "loss": 0.284, "step": 9305 }, { "epoch": 0.4359394762730126, "grad_norm": 0.5952970648128799, "learning_rate": 4.550870083521237e-06, "loss": 0.2925, "step": 9306 }, { "epoch": 0.4359863212629409, "grad_norm": 0.5893196839346273, "learning_rate": 4.550761623416198e-06, "loss": 0.2814, "step": 9307 }, { "epoch": 0.43603316625286925, "grad_norm": 0.6023594033498152, "learning_rate": 4.550653151509541e-06, "loss": 0.3024, "step": 9308 }, { "epoch": 0.4360800112427976, "grad_norm": 0.6642143075221709, "learning_rate": 4.55054466780189e-06, "loss": 0.2783, "step": 9309 }, { "epoch": 0.4361268562327259, "grad_norm": 0.655373834985085, "learning_rate": 4.5504361722938705e-06, "loss": 0.298, "step": 9310 }, { "epoch": 0.43617370122265425, "grad_norm": 0.5950677154727367, "learning_rate": 4.5503276649861054e-06, "loss": 0.2844, "step": 9311 }, { "epoch": 0.43622054621258255, "grad_norm": 0.6153983129442624, "learning_rate": 4.5502191458792214e-06, "loss": 0.3094, "step": 9312 }, { "epoch": 0.4362673912025109, "grad_norm": 0.5849519355413463, "learning_rate": 4.550110614973839e-06, "loss": 0.2734, "step": 9313 }, { "epoch": 0.4363142361924392, "grad_norm": 0.5639569936748935, "learning_rate": 4.550002072270588e-06, "loss": 0.288, "step": 9314 }, { "epoch": 0.43636108118236755, "grad_norm": 0.5911375627244932, "learning_rate": 4.549893517770089e-06, "loss": 0.2892, "step": 9315 }, { "epoch": 0.43640792617229585, "grad_norm": 0.6219708965150834, "learning_rate": 4.549784951472968e-06, "loss": 0.3062, "step": 9316 }, { "epoch": 0.4364547711622242, "grad_norm": 0.6084928467610956, "learning_rate": 4.549676373379851e-06, "loss": 0.2754, "step": 9317 }, { "epoch": 0.43650161615215255, "grad_norm": 0.5908457244420238, "learning_rate": 4.54956778349136e-06, "loss": 0.2841, "step": 9318 }, { "epoch": 0.43654846114208085, "grad_norm": 0.607841097525629, "learning_rate": 4.549459181808122e-06, "loss": 0.291, "step": 9319 }, { "epoch": 0.4365953061320092, "grad_norm": 0.5967720672365016, "learning_rate": 4.5493505683307616e-06, "loss": 0.2836, "step": 9320 }, { "epoch": 0.4366421511219375, "grad_norm": 0.6067353742345539, "learning_rate": 4.5492419430599045e-06, "loss": 0.2966, "step": 9321 }, { "epoch": 0.43668899611186585, "grad_norm": 0.5615832132422391, "learning_rate": 4.549133305996174e-06, "loss": 0.2844, "step": 9322 }, { "epoch": 0.43673584110179414, "grad_norm": 0.6205688404828391, "learning_rate": 4.549024657140197e-06, "loss": 0.281, "step": 9323 }, { "epoch": 0.4367826860917225, "grad_norm": 0.5737330791930265, "learning_rate": 4.548915996492598e-06, "loss": 0.2768, "step": 9324 }, { "epoch": 0.4368295310816508, "grad_norm": 0.6723713007436397, "learning_rate": 4.548807324054002e-06, "loss": 0.2839, "step": 9325 }, { "epoch": 0.43687637607157914, "grad_norm": 0.6633046536355478, "learning_rate": 4.5486986398250355e-06, "loss": 0.2928, "step": 9326 }, { "epoch": 0.4369232210615075, "grad_norm": 0.6161866649201698, "learning_rate": 4.548589943806323e-06, "loss": 0.274, "step": 9327 }, { "epoch": 0.4369700660514358, "grad_norm": 0.5992411715414568, "learning_rate": 4.54848123599849e-06, "loss": 0.2888, "step": 9328 }, { "epoch": 0.43701691104136414, "grad_norm": 0.6008641040820683, "learning_rate": 4.548372516402162e-06, "loss": 0.2886, "step": 9329 }, { "epoch": 0.43706375603129244, "grad_norm": 0.6481602970219275, "learning_rate": 4.548263785017966e-06, "loss": 0.3031, "step": 9330 }, { "epoch": 0.4371106010212208, "grad_norm": 0.5998066992398418, "learning_rate": 4.548155041846526e-06, "loss": 0.3069, "step": 9331 }, { "epoch": 0.4371574460111491, "grad_norm": 0.6079023584631082, "learning_rate": 4.548046286888469e-06, "loss": 0.2862, "step": 9332 }, { "epoch": 0.43720429100107744, "grad_norm": 0.6676416563907838, "learning_rate": 4.54793752014442e-06, "loss": 0.2901, "step": 9333 }, { "epoch": 0.43725113599100573, "grad_norm": 0.570366345248749, "learning_rate": 4.547828741615006e-06, "loss": 0.2706, "step": 9334 }, { "epoch": 0.4372979809809341, "grad_norm": 0.6353108203703303, "learning_rate": 4.5477199513008514e-06, "loss": 0.3102, "step": 9335 }, { "epoch": 0.43734482597086244, "grad_norm": 0.6399103617202018, "learning_rate": 4.547611149202583e-06, "loss": 0.3085, "step": 9336 }, { "epoch": 0.43739167096079073, "grad_norm": 0.63802080408167, "learning_rate": 4.547502335320828e-06, "loss": 0.2935, "step": 9337 }, { "epoch": 0.4374385159507191, "grad_norm": 0.5816606387984045, "learning_rate": 4.547393509656212e-06, "loss": 0.2774, "step": 9338 }, { "epoch": 0.4374853609406474, "grad_norm": 0.6288465160042661, "learning_rate": 4.5472846722093585e-06, "loss": 0.2908, "step": 9339 }, { "epoch": 0.43753220593057573, "grad_norm": 0.6568364271511951, "learning_rate": 4.547175822980899e-06, "loss": 0.316, "step": 9340 }, { "epoch": 0.43757905092050403, "grad_norm": 0.5840901406095407, "learning_rate": 4.547066961971456e-06, "loss": 0.2712, "step": 9341 }, { "epoch": 0.4376258959104324, "grad_norm": 0.610632718178045, "learning_rate": 4.546958089181657e-06, "loss": 0.2964, "step": 9342 }, { "epoch": 0.4376727409003607, "grad_norm": 0.6329038972175036, "learning_rate": 4.546849204612129e-06, "loss": 0.3042, "step": 9343 }, { "epoch": 0.43771958589028903, "grad_norm": 0.6019392125641141, "learning_rate": 4.5467403082634985e-06, "loss": 0.3018, "step": 9344 }, { "epoch": 0.4377664308802174, "grad_norm": 0.6817532690941581, "learning_rate": 4.546631400136392e-06, "loss": 0.3107, "step": 9345 }, { "epoch": 0.4378132758701457, "grad_norm": 0.6395607001939451, "learning_rate": 4.5465224802314355e-06, "loss": 0.3122, "step": 9346 }, { "epoch": 0.43786012086007403, "grad_norm": 0.6414116164521196, "learning_rate": 4.5464135485492575e-06, "loss": 0.2912, "step": 9347 }, { "epoch": 0.4379069658500023, "grad_norm": 0.6156375602594851, "learning_rate": 4.546304605090482e-06, "loss": 0.293, "step": 9348 }, { "epoch": 0.4379538108399307, "grad_norm": 0.5405944726190204, "learning_rate": 4.54619564985574e-06, "loss": 0.2608, "step": 9349 }, { "epoch": 0.438000655829859, "grad_norm": 0.6378245395460393, "learning_rate": 4.546086682845655e-06, "loss": 0.2923, "step": 9350 }, { "epoch": 0.4380475008197873, "grad_norm": 0.5750459364473008, "learning_rate": 4.5459777040608565e-06, "loss": 0.2787, "step": 9351 }, { "epoch": 0.4380943458097156, "grad_norm": 0.6569439370597667, "learning_rate": 4.54586871350197e-06, "loss": 0.2923, "step": 9352 }, { "epoch": 0.438141190799644, "grad_norm": 0.6063248580639987, "learning_rate": 4.545759711169622e-06, "loss": 0.2858, "step": 9353 }, { "epoch": 0.4381880357895723, "grad_norm": 0.6055886773478619, "learning_rate": 4.545650697064443e-06, "loss": 0.299, "step": 9354 }, { "epoch": 0.4382348807795006, "grad_norm": 0.5587208439624445, "learning_rate": 4.545541671187058e-06, "loss": 0.2693, "step": 9355 }, { "epoch": 0.438281725769429, "grad_norm": 0.5732639251224061, "learning_rate": 4.545432633538094e-06, "loss": 0.3045, "step": 9356 }, { "epoch": 0.43832857075935727, "grad_norm": 0.6060518338751361, "learning_rate": 4.54532358411818e-06, "loss": 0.2912, "step": 9357 }, { "epoch": 0.4383754157492856, "grad_norm": 0.6391594612640443, "learning_rate": 4.5452145229279425e-06, "loss": 0.3127, "step": 9358 }, { "epoch": 0.4384222607392139, "grad_norm": 0.6667356325617008, "learning_rate": 4.545105449968009e-06, "loss": 0.2965, "step": 9359 }, { "epoch": 0.43846910572914227, "grad_norm": 0.5936707385325988, "learning_rate": 4.544996365239008e-06, "loss": 0.2735, "step": 9360 }, { "epoch": 0.43851595071907057, "grad_norm": 0.6466208206593598, "learning_rate": 4.5448872687415675e-06, "loss": 0.2998, "step": 9361 }, { "epoch": 0.4385627957089989, "grad_norm": 0.5677234113958435, "learning_rate": 4.544778160476314e-06, "loss": 0.2945, "step": 9362 }, { "epoch": 0.43860964069892727, "grad_norm": 0.6654148678067348, "learning_rate": 4.544669040443877e-06, "loss": 0.3136, "step": 9363 }, { "epoch": 0.43865648568885557, "grad_norm": 0.5884446850313448, "learning_rate": 4.544559908644883e-06, "loss": 0.2791, "step": 9364 }, { "epoch": 0.4387033306787839, "grad_norm": 0.6246548321268984, "learning_rate": 4.54445076507996e-06, "loss": 0.3073, "step": 9365 }, { "epoch": 0.4387501756687122, "grad_norm": 0.552985047261081, "learning_rate": 4.544341609749737e-06, "loss": 0.2722, "step": 9366 }, { "epoch": 0.43879702065864057, "grad_norm": 0.6236673141960866, "learning_rate": 4.544232442654842e-06, "loss": 0.2905, "step": 9367 }, { "epoch": 0.43884386564856886, "grad_norm": 0.5905899199039967, "learning_rate": 4.544123263795904e-06, "loss": 0.273, "step": 9368 }, { "epoch": 0.4388907106384972, "grad_norm": 0.5781935717953592, "learning_rate": 4.544014073173549e-06, "loss": 0.2736, "step": 9369 }, { "epoch": 0.4389375556284255, "grad_norm": 0.5657403255817173, "learning_rate": 4.543904870788408e-06, "loss": 0.2805, "step": 9370 }, { "epoch": 0.43898440061835386, "grad_norm": 0.7006803123957143, "learning_rate": 4.543795656641108e-06, "loss": 0.3171, "step": 9371 }, { "epoch": 0.4390312456082822, "grad_norm": 0.5958391110548665, "learning_rate": 4.543686430732277e-06, "loss": 0.2907, "step": 9372 }, { "epoch": 0.4390780905982105, "grad_norm": 0.5641766973069423, "learning_rate": 4.543577193062545e-06, "loss": 0.2988, "step": 9373 }, { "epoch": 0.43912493558813886, "grad_norm": 0.6195868459641068, "learning_rate": 4.543467943632539e-06, "loss": 0.2889, "step": 9374 }, { "epoch": 0.43917178057806716, "grad_norm": 0.5920223153475089, "learning_rate": 4.543358682442889e-06, "loss": 0.2934, "step": 9375 }, { "epoch": 0.4392186255679955, "grad_norm": 0.568949782333206, "learning_rate": 4.543249409494223e-06, "loss": 0.2796, "step": 9376 }, { "epoch": 0.4392654705579238, "grad_norm": 0.5659095495174649, "learning_rate": 4.543140124787171e-06, "loss": 0.2816, "step": 9377 }, { "epoch": 0.43931231554785216, "grad_norm": 0.6144303408180588, "learning_rate": 4.54303082832236e-06, "loss": 0.3147, "step": 9378 }, { "epoch": 0.43935916053778046, "grad_norm": 0.5751053111743802, "learning_rate": 4.542921520100421e-06, "loss": 0.2721, "step": 9379 }, { "epoch": 0.4394060055277088, "grad_norm": 0.63077962577165, "learning_rate": 4.542812200121982e-06, "loss": 0.2805, "step": 9380 }, { "epoch": 0.43945285051763716, "grad_norm": 0.6526880135455067, "learning_rate": 4.542702868387672e-06, "loss": 0.293, "step": 9381 }, { "epoch": 0.43949969550756546, "grad_norm": 0.6228816357829994, "learning_rate": 4.542593524898121e-06, "loss": 0.303, "step": 9382 }, { "epoch": 0.4395465404974938, "grad_norm": 0.6020153237534078, "learning_rate": 4.542484169653957e-06, "loss": 0.3, "step": 9383 }, { "epoch": 0.4395933854874221, "grad_norm": 0.5766608676996181, "learning_rate": 4.54237480265581e-06, "loss": 0.2886, "step": 9384 }, { "epoch": 0.43964023047735046, "grad_norm": 0.563771498716549, "learning_rate": 4.5422654239043095e-06, "loss": 0.2802, "step": 9385 }, { "epoch": 0.43968707546727875, "grad_norm": 0.5777016966399956, "learning_rate": 4.5421560334000845e-06, "loss": 0.2794, "step": 9386 }, { "epoch": 0.4397339204572071, "grad_norm": 0.6168225766754095, "learning_rate": 4.542046631143765e-06, "loss": 0.2879, "step": 9387 }, { "epoch": 0.4397807654471354, "grad_norm": 0.6267045593837297, "learning_rate": 4.5419372171359804e-06, "loss": 0.2977, "step": 9388 }, { "epoch": 0.43982761043706375, "grad_norm": 0.63393120330197, "learning_rate": 4.541827791377361e-06, "loss": 0.3053, "step": 9389 }, { "epoch": 0.4398744554269921, "grad_norm": 0.6248613702943838, "learning_rate": 4.541718353868535e-06, "loss": 0.2916, "step": 9390 }, { "epoch": 0.4399213004169204, "grad_norm": 0.6196515951408856, "learning_rate": 4.541608904610133e-06, "loss": 0.3092, "step": 9391 }, { "epoch": 0.43996814540684875, "grad_norm": 0.6244071985510593, "learning_rate": 4.541499443602785e-06, "loss": 0.3012, "step": 9392 }, { "epoch": 0.44001499039677705, "grad_norm": 0.6143733135008766, "learning_rate": 4.541389970847121e-06, "loss": 0.305, "step": 9393 }, { "epoch": 0.4400618353867054, "grad_norm": 0.6194096404214705, "learning_rate": 4.541280486343771e-06, "loss": 0.3038, "step": 9394 }, { "epoch": 0.4401086803766337, "grad_norm": 0.6122779260817552, "learning_rate": 4.541170990093364e-06, "loss": 0.295, "step": 9395 }, { "epoch": 0.44015552536656205, "grad_norm": 0.6072873368677716, "learning_rate": 4.541061482096532e-06, "loss": 0.2948, "step": 9396 }, { "epoch": 0.44020237035649035, "grad_norm": 0.6342008268817747, "learning_rate": 4.540951962353904e-06, "loss": 0.3018, "step": 9397 }, { "epoch": 0.4402492153464187, "grad_norm": 0.6209685728536581, "learning_rate": 4.5408424308661104e-06, "loss": 0.3152, "step": 9398 }, { "epoch": 0.44029606033634705, "grad_norm": 0.579612417688979, "learning_rate": 4.5407328876337805e-06, "loss": 0.2986, "step": 9399 }, { "epoch": 0.44034290532627535, "grad_norm": 0.64404763026545, "learning_rate": 4.540623332657547e-06, "loss": 0.3018, "step": 9400 }, { "epoch": 0.4403897503162037, "grad_norm": 0.5692647087773425, "learning_rate": 4.54051376593804e-06, "loss": 0.2951, "step": 9401 }, { "epoch": 0.440436595306132, "grad_norm": 0.6020072559958531, "learning_rate": 4.540404187475887e-06, "loss": 0.2805, "step": 9402 }, { "epoch": 0.44048344029606035, "grad_norm": 0.5761196930561395, "learning_rate": 4.5402945972717216e-06, "loss": 0.2597, "step": 9403 }, { "epoch": 0.44053028528598864, "grad_norm": 0.5871863925634113, "learning_rate": 4.540184995326174e-06, "loss": 0.2873, "step": 9404 }, { "epoch": 0.440577130275917, "grad_norm": 0.5870289901985517, "learning_rate": 4.540075381639874e-06, "loss": 0.2752, "step": 9405 }, { "epoch": 0.4406239752658453, "grad_norm": 0.5842085942472465, "learning_rate": 4.539965756213453e-06, "loss": 0.2745, "step": 9406 }, { "epoch": 0.44067082025577364, "grad_norm": 0.5957318580651969, "learning_rate": 4.539856119047542e-06, "loss": 0.2786, "step": 9407 }, { "epoch": 0.440717665245702, "grad_norm": 0.5647034354115262, "learning_rate": 4.539746470142772e-06, "loss": 0.2923, "step": 9408 }, { "epoch": 0.4407645102356303, "grad_norm": 0.595789586639055, "learning_rate": 4.539636809499772e-06, "loss": 0.3026, "step": 9409 }, { "epoch": 0.44081135522555864, "grad_norm": 0.6147303024933349, "learning_rate": 4.539527137119177e-06, "loss": 0.3096, "step": 9410 }, { "epoch": 0.44085820021548694, "grad_norm": 0.5997515252121832, "learning_rate": 4.539417453001615e-06, "loss": 0.3033, "step": 9411 }, { "epoch": 0.4409050452054153, "grad_norm": 0.6402559903589173, "learning_rate": 4.539307757147718e-06, "loss": 0.2997, "step": 9412 }, { "epoch": 0.4409518901953436, "grad_norm": 0.6026403401213944, "learning_rate": 4.539198049558118e-06, "loss": 0.2824, "step": 9413 }, { "epoch": 0.44099873518527194, "grad_norm": 0.5592111433394948, "learning_rate": 4.539088330233446e-06, "loss": 0.2748, "step": 9414 }, { "epoch": 0.44104558017520024, "grad_norm": 0.6123592703149627, "learning_rate": 4.538978599174332e-06, "loss": 0.3071, "step": 9415 }, { "epoch": 0.4410924251651286, "grad_norm": 0.5769731264479584, "learning_rate": 4.538868856381408e-06, "loss": 0.2915, "step": 9416 }, { "epoch": 0.44113927015505694, "grad_norm": 0.5689988172746797, "learning_rate": 4.538759101855309e-06, "loss": 0.2756, "step": 9417 }, { "epoch": 0.44118611514498524, "grad_norm": 0.5980415253807763, "learning_rate": 4.538649335596662e-06, "loss": 0.2803, "step": 9418 }, { "epoch": 0.4412329601349136, "grad_norm": 0.5795293088575483, "learning_rate": 4.538539557606101e-06, "loss": 0.2802, "step": 9419 }, { "epoch": 0.4412798051248419, "grad_norm": 0.633970949037154, "learning_rate": 4.5384297678842555e-06, "loss": 0.2797, "step": 9420 }, { "epoch": 0.44132665011477024, "grad_norm": 0.6486492247164224, "learning_rate": 4.5383199664317615e-06, "loss": 0.2856, "step": 9421 }, { "epoch": 0.44137349510469853, "grad_norm": 0.6050645828290053, "learning_rate": 4.538210153249247e-06, "loss": 0.282, "step": 9422 }, { "epoch": 0.4414203400946269, "grad_norm": 0.5601149712268889, "learning_rate": 4.538100328337346e-06, "loss": 0.2593, "step": 9423 }, { "epoch": 0.4414671850845552, "grad_norm": 0.6621805676842736, "learning_rate": 4.53799049169669e-06, "loss": 0.2824, "step": 9424 }, { "epoch": 0.44151403007448353, "grad_norm": 0.6644643905444123, "learning_rate": 4.53788064332791e-06, "loss": 0.3108, "step": 9425 }, { "epoch": 0.4415608750644119, "grad_norm": 0.6714698526986808, "learning_rate": 4.537770783231639e-06, "loss": 0.3004, "step": 9426 }, { "epoch": 0.4416077200543402, "grad_norm": 0.5884691289075693, "learning_rate": 4.53766091140851e-06, "loss": 0.2675, "step": 9427 }, { "epoch": 0.44165456504426853, "grad_norm": 0.5525264311087901, "learning_rate": 4.537551027859155e-06, "loss": 0.2543, "step": 9428 }, { "epoch": 0.44170141003419683, "grad_norm": 0.6147821324580715, "learning_rate": 4.537441132584205e-06, "loss": 0.2737, "step": 9429 }, { "epoch": 0.4417482550241252, "grad_norm": 0.6130174784436152, "learning_rate": 4.537331225584294e-06, "loss": 0.285, "step": 9430 }, { "epoch": 0.4417951000140535, "grad_norm": 0.6060181994227921, "learning_rate": 4.537221306860053e-06, "loss": 0.2872, "step": 9431 }, { "epoch": 0.44184194500398183, "grad_norm": 0.6744733483131375, "learning_rate": 4.537111376412117e-06, "loss": 0.2935, "step": 9432 }, { "epoch": 0.4418887899939101, "grad_norm": 0.6898774226935142, "learning_rate": 4.537001434241116e-06, "loss": 0.3099, "step": 9433 }, { "epoch": 0.4419356349838385, "grad_norm": 0.5867713267287009, "learning_rate": 4.5368914803476835e-06, "loss": 0.2877, "step": 9434 }, { "epoch": 0.44198247997376683, "grad_norm": 0.5993689766203883, "learning_rate": 4.536781514732453e-06, "loss": 0.2984, "step": 9435 }, { "epoch": 0.4420293249636951, "grad_norm": 0.5914722402763616, "learning_rate": 4.536671537396057e-06, "loss": 0.2702, "step": 9436 }, { "epoch": 0.4420761699536235, "grad_norm": 0.6962042116883265, "learning_rate": 4.536561548339128e-06, "loss": 0.3357, "step": 9437 }, { "epoch": 0.4421230149435518, "grad_norm": 0.6171702907344379, "learning_rate": 4.536451547562299e-06, "loss": 0.2844, "step": 9438 }, { "epoch": 0.4421698599334801, "grad_norm": 0.5823095099980226, "learning_rate": 4.536341535066203e-06, "loss": 0.2682, "step": 9439 }, { "epoch": 0.4422167049234084, "grad_norm": 0.6410457540996066, "learning_rate": 4.536231510851473e-06, "loss": 0.3056, "step": 9440 }, { "epoch": 0.4422635499133368, "grad_norm": 0.5953425083018281, "learning_rate": 4.536121474918743e-06, "loss": 0.2964, "step": 9441 }, { "epoch": 0.44231039490326507, "grad_norm": 0.6481201827374317, "learning_rate": 4.536011427268646e-06, "loss": 0.2942, "step": 9442 }, { "epoch": 0.4423572398931934, "grad_norm": 0.6096262914191414, "learning_rate": 4.535901367901815e-06, "loss": 0.3008, "step": 9443 }, { "epoch": 0.4424040848831218, "grad_norm": 0.6284015094529971, "learning_rate": 4.535791296818882e-06, "loss": 0.2897, "step": 9444 }, { "epoch": 0.44245092987305007, "grad_norm": 0.6271844699865382, "learning_rate": 4.535681214020483e-06, "loss": 0.3008, "step": 9445 }, { "epoch": 0.4424977748629784, "grad_norm": 0.6663981545294174, "learning_rate": 4.53557111950725e-06, "loss": 0.2928, "step": 9446 }, { "epoch": 0.4425446198529067, "grad_norm": 0.6426184121859768, "learning_rate": 4.535461013279817e-06, "loss": 0.2873, "step": 9447 }, { "epoch": 0.44259146484283507, "grad_norm": 0.6268864835967052, "learning_rate": 4.535350895338817e-06, "loss": 0.2937, "step": 9448 }, { "epoch": 0.44263830983276337, "grad_norm": 0.6190771553679678, "learning_rate": 4.535240765684884e-06, "loss": 0.3047, "step": 9449 }, { "epoch": 0.4426851548226917, "grad_norm": 0.6077490055271559, "learning_rate": 4.535130624318653e-06, "loss": 0.29, "step": 9450 }, { "epoch": 0.44273199981262, "grad_norm": 0.5638491551831574, "learning_rate": 4.535020471240756e-06, "loss": 0.2842, "step": 9451 }, { "epoch": 0.44277884480254837, "grad_norm": 0.604531994302314, "learning_rate": 4.534910306451827e-06, "loss": 0.2899, "step": 9452 }, { "epoch": 0.4428256897924767, "grad_norm": 0.6210488081870936, "learning_rate": 4.534800129952502e-06, "loss": 0.2662, "step": 9453 }, { "epoch": 0.442872534782405, "grad_norm": 0.6663672010686691, "learning_rate": 4.5346899417434135e-06, "loss": 0.2923, "step": 9454 }, { "epoch": 0.44291937977233337, "grad_norm": 0.6012486893836281, "learning_rate": 4.534579741825195e-06, "loss": 0.2952, "step": 9455 }, { "epoch": 0.44296622476226166, "grad_norm": 0.6001356393795939, "learning_rate": 4.5344695301984824e-06, "loss": 0.3, "step": 9456 }, { "epoch": 0.44301306975219, "grad_norm": 0.6019378376216635, "learning_rate": 4.534359306863908e-06, "loss": 0.3009, "step": 9457 }, { "epoch": 0.4430599147421183, "grad_norm": 0.5629618815881618, "learning_rate": 4.534249071822108e-06, "loss": 0.2905, "step": 9458 }, { "epoch": 0.44310675973204666, "grad_norm": 0.6209248004261066, "learning_rate": 4.534138825073715e-06, "loss": 0.2991, "step": 9459 }, { "epoch": 0.44315360472197496, "grad_norm": 0.6503190905649749, "learning_rate": 4.534028566619365e-06, "loss": 0.2927, "step": 9460 }, { "epoch": 0.4432004497119033, "grad_norm": 0.620861194776798, "learning_rate": 4.533918296459691e-06, "loss": 0.2953, "step": 9461 }, { "epoch": 0.44324729470183166, "grad_norm": 0.6344731710549285, "learning_rate": 4.533808014595329e-06, "loss": 0.3051, "step": 9462 }, { "epoch": 0.44329413969175996, "grad_norm": 0.6249487851899547, "learning_rate": 4.533697721026913e-06, "loss": 0.2903, "step": 9463 }, { "epoch": 0.4433409846816883, "grad_norm": 0.595739778978344, "learning_rate": 4.533587415755078e-06, "loss": 0.297, "step": 9464 }, { "epoch": 0.4433878296716166, "grad_norm": 0.6416516182600785, "learning_rate": 4.533477098780458e-06, "loss": 0.3101, "step": 9465 }, { "epoch": 0.44343467466154496, "grad_norm": 0.6737312724477099, "learning_rate": 4.533366770103689e-06, "loss": 0.302, "step": 9466 }, { "epoch": 0.44348151965147325, "grad_norm": 0.5928640354046686, "learning_rate": 4.533256429725405e-06, "loss": 0.2824, "step": 9467 }, { "epoch": 0.4435283646414016, "grad_norm": 0.6067648816604144, "learning_rate": 4.533146077646241e-06, "loss": 0.2884, "step": 9468 }, { "epoch": 0.4435752096313299, "grad_norm": 0.563497436419075, "learning_rate": 4.533035713866833e-06, "loss": 0.2896, "step": 9469 }, { "epoch": 0.44362205462125825, "grad_norm": 0.5881128684012878, "learning_rate": 4.5329253383878145e-06, "loss": 0.2715, "step": 9470 }, { "epoch": 0.4436688996111866, "grad_norm": 0.5674584884872781, "learning_rate": 4.532814951209823e-06, "loss": 0.2794, "step": 9471 }, { "epoch": 0.4437157446011149, "grad_norm": 0.5636236261691013, "learning_rate": 4.5327045523334915e-06, "loss": 0.2722, "step": 9472 }, { "epoch": 0.44376258959104325, "grad_norm": 0.5774220012466392, "learning_rate": 4.532594141759457e-06, "loss": 0.269, "step": 9473 }, { "epoch": 0.44380943458097155, "grad_norm": 0.6196950607220103, "learning_rate": 4.532483719488353e-06, "loss": 0.2757, "step": 9474 }, { "epoch": 0.4438562795708999, "grad_norm": 0.5652294955677389, "learning_rate": 4.532373285520816e-06, "loss": 0.2947, "step": 9475 }, { "epoch": 0.4439031245608282, "grad_norm": 0.6372239636583749, "learning_rate": 4.532262839857482e-06, "loss": 0.2744, "step": 9476 }, { "epoch": 0.44394996955075655, "grad_norm": 0.611560974696411, "learning_rate": 4.532152382498987e-06, "loss": 0.2846, "step": 9477 }, { "epoch": 0.44399681454068485, "grad_norm": 0.5875869563460056, "learning_rate": 4.532041913445964e-06, "loss": 0.2939, "step": 9478 }, { "epoch": 0.4440436595306132, "grad_norm": 0.630108026974609, "learning_rate": 4.5319314326990515e-06, "loss": 0.3002, "step": 9479 }, { "epoch": 0.44409050452054155, "grad_norm": 0.5975515784058472, "learning_rate": 4.531820940258885e-06, "loss": 0.2759, "step": 9480 }, { "epoch": 0.44413734951046985, "grad_norm": 0.5632413348156288, "learning_rate": 4.531710436126099e-06, "loss": 0.2892, "step": 9481 }, { "epoch": 0.4441841945003982, "grad_norm": 0.6173147242265828, "learning_rate": 4.531599920301329e-06, "loss": 0.2929, "step": 9482 }, { "epoch": 0.4442310394903265, "grad_norm": 0.5758769164216413, "learning_rate": 4.531489392785214e-06, "loss": 0.2932, "step": 9483 }, { "epoch": 0.44427788448025485, "grad_norm": 0.6134054381979789, "learning_rate": 4.531378853578388e-06, "loss": 0.3047, "step": 9484 }, { "epoch": 0.44432472947018314, "grad_norm": 0.592524710534006, "learning_rate": 4.531268302681486e-06, "loss": 0.2949, "step": 9485 }, { "epoch": 0.4443715744601115, "grad_norm": 0.619149103958697, "learning_rate": 4.531157740095146e-06, "loss": 0.2892, "step": 9486 }, { "epoch": 0.4444184194500398, "grad_norm": 0.5738730566672744, "learning_rate": 4.5310471658200035e-06, "loss": 0.301, "step": 9487 }, { "epoch": 0.44446526443996814, "grad_norm": 0.5633223188666737, "learning_rate": 4.530936579856695e-06, "loss": 0.2746, "step": 9488 }, { "epoch": 0.4445121094298965, "grad_norm": 0.6188319901536856, "learning_rate": 4.530825982205857e-06, "loss": 0.3001, "step": 9489 }, { "epoch": 0.4445589544198248, "grad_norm": 0.6460933510630835, "learning_rate": 4.5307153728681266e-06, "loss": 0.3239, "step": 9490 }, { "epoch": 0.44460579940975314, "grad_norm": 0.6547019270974116, "learning_rate": 4.530604751844139e-06, "loss": 0.3013, "step": 9491 }, { "epoch": 0.44465264439968144, "grad_norm": 0.6512963284774879, "learning_rate": 4.530494119134532e-06, "loss": 0.2962, "step": 9492 }, { "epoch": 0.4446994893896098, "grad_norm": 0.6204997847417038, "learning_rate": 4.530383474739941e-06, "loss": 0.2936, "step": 9493 }, { "epoch": 0.4447463343795381, "grad_norm": 0.5918726817055359, "learning_rate": 4.530272818661003e-06, "loss": 0.2935, "step": 9494 }, { "epoch": 0.44479317936946644, "grad_norm": 0.6104755361232311, "learning_rate": 4.530162150898356e-06, "loss": 0.2934, "step": 9495 }, { "epoch": 0.44484002435939474, "grad_norm": 0.5734206989302305, "learning_rate": 4.5300514714526365e-06, "loss": 0.2788, "step": 9496 }, { "epoch": 0.4448868693493231, "grad_norm": 0.5778145704043074, "learning_rate": 4.52994078032448e-06, "loss": 0.2756, "step": 9497 }, { "epoch": 0.44493371433925144, "grad_norm": 0.624201022928397, "learning_rate": 4.529830077514525e-06, "loss": 0.299, "step": 9498 }, { "epoch": 0.44498055932917974, "grad_norm": 0.6221947963312174, "learning_rate": 4.529719363023409e-06, "loss": 0.2943, "step": 9499 }, { "epoch": 0.4450274043191081, "grad_norm": 0.6265331621384819, "learning_rate": 4.529608636851766e-06, "loss": 0.2999, "step": 9500 }, { "epoch": 0.4450742493090364, "grad_norm": 0.6360490679616181, "learning_rate": 4.529497899000238e-06, "loss": 0.3174, "step": 9501 }, { "epoch": 0.44512109429896474, "grad_norm": 0.5862344435522403, "learning_rate": 4.529387149469458e-06, "loss": 0.2856, "step": 9502 }, { "epoch": 0.44516793928889303, "grad_norm": 0.6054656449080777, "learning_rate": 4.529276388260066e-06, "loss": 0.2739, "step": 9503 }, { "epoch": 0.4452147842788214, "grad_norm": 0.5494218249396977, "learning_rate": 4.529165615372697e-06, "loss": 0.2845, "step": 9504 }, { "epoch": 0.4452616292687497, "grad_norm": 0.6198506427357584, "learning_rate": 4.529054830807991e-06, "loss": 0.2945, "step": 9505 }, { "epoch": 0.44530847425867803, "grad_norm": 0.57884333970234, "learning_rate": 4.528944034566585e-06, "loss": 0.307, "step": 9506 }, { "epoch": 0.4453553192486064, "grad_norm": 0.5881495733357699, "learning_rate": 4.528833226649115e-06, "loss": 0.2829, "step": 9507 }, { "epoch": 0.4454021642385347, "grad_norm": 0.5753839726636603, "learning_rate": 4.528722407056219e-06, "loss": 0.2938, "step": 9508 }, { "epoch": 0.44544900922846303, "grad_norm": 0.6342967646606078, "learning_rate": 4.528611575788537e-06, "loss": 0.3102, "step": 9509 }, { "epoch": 0.44549585421839133, "grad_norm": 0.597108607412318, "learning_rate": 4.528500732846705e-06, "loss": 0.2861, "step": 9510 }, { "epoch": 0.4455426992083197, "grad_norm": 0.537723289292712, "learning_rate": 4.528389878231361e-06, "loss": 0.2711, "step": 9511 }, { "epoch": 0.445589544198248, "grad_norm": 0.6088940335963915, "learning_rate": 4.528279011943142e-06, "loss": 0.3094, "step": 9512 }, { "epoch": 0.44563638918817633, "grad_norm": 0.627151018123468, "learning_rate": 4.528168133982688e-06, "loss": 0.2833, "step": 9513 }, { "epoch": 0.4456832341781046, "grad_norm": 0.58877521659489, "learning_rate": 4.528057244350636e-06, "loss": 0.2894, "step": 9514 }, { "epoch": 0.445730079168033, "grad_norm": 0.6578462576942999, "learning_rate": 4.527946343047625e-06, "loss": 0.2974, "step": 9515 }, { "epoch": 0.44577692415796133, "grad_norm": 0.5953360357937445, "learning_rate": 4.527835430074291e-06, "loss": 0.2943, "step": 9516 }, { "epoch": 0.4458237691478896, "grad_norm": 0.6537420839824416, "learning_rate": 4.527724505431275e-06, "loss": 0.3037, "step": 9517 }, { "epoch": 0.445870614137818, "grad_norm": 0.5981350665526789, "learning_rate": 4.527613569119213e-06, "loss": 0.2993, "step": 9518 }, { "epoch": 0.4459174591277463, "grad_norm": 0.52741200295637, "learning_rate": 4.527502621138746e-06, "loss": 0.2571, "step": 9519 }, { "epoch": 0.4459643041176746, "grad_norm": 0.6369835885490587, "learning_rate": 4.52739166149051e-06, "loss": 0.2863, "step": 9520 }, { "epoch": 0.4460111491076029, "grad_norm": 0.5792577618527626, "learning_rate": 4.527280690175145e-06, "loss": 0.2614, "step": 9521 }, { "epoch": 0.4460579940975313, "grad_norm": 0.6213661096655889, "learning_rate": 4.5271697071932895e-06, "loss": 0.2736, "step": 9522 }, { "epoch": 0.44610483908745957, "grad_norm": 0.5533115773740684, "learning_rate": 4.527058712545581e-06, "loss": 0.2764, "step": 9523 }, { "epoch": 0.4461516840773879, "grad_norm": 0.6182977870809199, "learning_rate": 4.5269477062326595e-06, "loss": 0.2939, "step": 9524 }, { "epoch": 0.4461985290673163, "grad_norm": 0.549635913428749, "learning_rate": 4.526836688255163e-06, "loss": 0.2558, "step": 9525 }, { "epoch": 0.44624537405724457, "grad_norm": 0.6278422263973577, "learning_rate": 4.526725658613731e-06, "loss": 0.2809, "step": 9526 }, { "epoch": 0.4462922190471729, "grad_norm": 0.6103919004715236, "learning_rate": 4.526614617309003e-06, "loss": 0.2851, "step": 9527 }, { "epoch": 0.4463390640371012, "grad_norm": 0.6448300660961481, "learning_rate": 4.5265035643416164e-06, "loss": 0.3029, "step": 9528 }, { "epoch": 0.44638590902702957, "grad_norm": 0.6012533887766403, "learning_rate": 4.526392499712211e-06, "loss": 0.2987, "step": 9529 }, { "epoch": 0.44643275401695787, "grad_norm": 0.6458881623253457, "learning_rate": 4.526281423421427e-06, "loss": 0.2875, "step": 9530 }, { "epoch": 0.4464795990068862, "grad_norm": 0.6346951470174488, "learning_rate": 4.526170335469901e-06, "loss": 0.3015, "step": 9531 }, { "epoch": 0.4465264439968145, "grad_norm": 0.6015001683048397, "learning_rate": 4.526059235858276e-06, "loss": 0.286, "step": 9532 }, { "epoch": 0.44657328898674287, "grad_norm": 0.6548906402427375, "learning_rate": 4.5259481245871885e-06, "loss": 0.301, "step": 9533 }, { "epoch": 0.4466201339766712, "grad_norm": 0.6291067540278896, "learning_rate": 4.525837001657279e-06, "loss": 0.2983, "step": 9534 }, { "epoch": 0.4466669789665995, "grad_norm": 0.6141790822282214, "learning_rate": 4.525725867069186e-06, "loss": 0.2915, "step": 9535 }, { "epoch": 0.44671382395652787, "grad_norm": 0.6281714471417609, "learning_rate": 4.52561472082355e-06, "loss": 0.2826, "step": 9536 }, { "epoch": 0.44676066894645616, "grad_norm": 0.6427570864907378, "learning_rate": 4.525503562921011e-06, "loss": 0.304, "step": 9537 }, { "epoch": 0.4468075139363845, "grad_norm": 0.5936889397609365, "learning_rate": 4.525392393362207e-06, "loss": 0.2878, "step": 9538 }, { "epoch": 0.4468543589263128, "grad_norm": 0.5628269602011119, "learning_rate": 4.52528121214778e-06, "loss": 0.2868, "step": 9539 }, { "epoch": 0.44690120391624116, "grad_norm": 0.562010092159554, "learning_rate": 4.525170019278368e-06, "loss": 0.2748, "step": 9540 }, { "epoch": 0.44694804890616946, "grad_norm": 0.6527070239744619, "learning_rate": 4.5250588147546115e-06, "loss": 0.3035, "step": 9541 }, { "epoch": 0.4469948938960978, "grad_norm": 0.6329449983216956, "learning_rate": 4.524947598577151e-06, "loss": 0.2994, "step": 9542 }, { "epoch": 0.44704173888602616, "grad_norm": 0.559736297476402, "learning_rate": 4.5248363707466255e-06, "loss": 0.2766, "step": 9543 }, { "epoch": 0.44708858387595446, "grad_norm": 0.6068773766302515, "learning_rate": 4.524725131263676e-06, "loss": 0.2922, "step": 9544 }, { "epoch": 0.4471354288658828, "grad_norm": 0.6324450376875683, "learning_rate": 4.524613880128942e-06, "loss": 0.3001, "step": 9545 }, { "epoch": 0.4471822738558111, "grad_norm": 0.586749890766047, "learning_rate": 4.5245026173430646e-06, "loss": 0.2986, "step": 9546 }, { "epoch": 0.44722911884573946, "grad_norm": 0.6513971513746809, "learning_rate": 4.524391342906683e-06, "loss": 0.3121, "step": 9547 }, { "epoch": 0.44727596383566776, "grad_norm": 0.5703383282337076, "learning_rate": 4.524280056820438e-06, "loss": 0.275, "step": 9548 }, { "epoch": 0.4473228088255961, "grad_norm": 0.5973382759003248, "learning_rate": 4.52416875908497e-06, "loss": 0.301, "step": 9549 }, { "epoch": 0.4473696538155244, "grad_norm": 0.6166778327899648, "learning_rate": 4.5240574497009195e-06, "loss": 0.2836, "step": 9550 }, { "epoch": 0.44741649880545276, "grad_norm": 0.5546759378634603, "learning_rate": 4.523946128668927e-06, "loss": 0.2801, "step": 9551 }, { "epoch": 0.4474633437953811, "grad_norm": 0.599852052616321, "learning_rate": 4.523834795989633e-06, "loss": 0.2816, "step": 9552 }, { "epoch": 0.4475101887853094, "grad_norm": 0.6021526078067265, "learning_rate": 4.52372345166368e-06, "loss": 0.2857, "step": 9553 }, { "epoch": 0.44755703377523776, "grad_norm": 0.6389609040019344, "learning_rate": 4.5236120956917054e-06, "loss": 0.2988, "step": 9554 }, { "epoch": 0.44760387876516605, "grad_norm": 0.6055422711066196, "learning_rate": 4.523500728074353e-06, "loss": 0.2919, "step": 9555 }, { "epoch": 0.4476507237550944, "grad_norm": 0.6337256721973981, "learning_rate": 4.523389348812262e-06, "loss": 0.316, "step": 9556 }, { "epoch": 0.4476975687450227, "grad_norm": 0.6511727482416685, "learning_rate": 4.523277957906074e-06, "loss": 0.2983, "step": 9557 }, { "epoch": 0.44774441373495105, "grad_norm": 0.6043134315306427, "learning_rate": 4.523166555356428e-06, "loss": 0.3103, "step": 9558 }, { "epoch": 0.44779125872487935, "grad_norm": 0.5901426512813643, "learning_rate": 4.52305514116397e-06, "loss": 0.2926, "step": 9559 }, { "epoch": 0.4478381037148077, "grad_norm": 0.5771669496150272, "learning_rate": 4.522943715329336e-06, "loss": 0.2696, "step": 9560 }, { "epoch": 0.44788494870473605, "grad_norm": 0.6104877582931553, "learning_rate": 4.522832277853171e-06, "loss": 0.2738, "step": 9561 }, { "epoch": 0.44793179369466435, "grad_norm": 0.6526216600987311, "learning_rate": 4.522720828736114e-06, "loss": 0.3142, "step": 9562 }, { "epoch": 0.4479786386845927, "grad_norm": 0.5555846040908528, "learning_rate": 4.522609367978806e-06, "loss": 0.27, "step": 9563 }, { "epoch": 0.448025483674521, "grad_norm": 0.5915434740059343, "learning_rate": 4.522497895581891e-06, "loss": 0.2743, "step": 9564 }, { "epoch": 0.44807232866444935, "grad_norm": 0.5913479921180789, "learning_rate": 4.5223864115460085e-06, "loss": 0.2832, "step": 9565 }, { "epoch": 0.44811917365437764, "grad_norm": 0.6353932682876884, "learning_rate": 4.522274915871799e-06, "loss": 0.3096, "step": 9566 }, { "epoch": 0.448166018644306, "grad_norm": 0.6304193083238063, "learning_rate": 4.522163408559907e-06, "loss": 0.2701, "step": 9567 }, { "epoch": 0.4482128636342343, "grad_norm": 0.6291497238969764, "learning_rate": 4.522051889610973e-06, "loss": 0.2989, "step": 9568 }, { "epoch": 0.44825970862416264, "grad_norm": 0.5378666041474424, "learning_rate": 4.521940359025638e-06, "loss": 0.2937, "step": 9569 }, { "epoch": 0.448306553614091, "grad_norm": 0.5931774995542544, "learning_rate": 4.521828816804546e-06, "loss": 0.2921, "step": 9570 }, { "epoch": 0.4483533986040193, "grad_norm": 0.5694865083777723, "learning_rate": 4.521717262948336e-06, "loss": 0.3048, "step": 9571 }, { "epoch": 0.44840024359394764, "grad_norm": 0.5980086463339606, "learning_rate": 4.521605697457651e-06, "loss": 0.2796, "step": 9572 }, { "epoch": 0.44844708858387594, "grad_norm": 0.6186243157774948, "learning_rate": 4.5214941203331335e-06, "loss": 0.3027, "step": 9573 }, { "epoch": 0.4484939335738043, "grad_norm": 0.6293511403970001, "learning_rate": 4.521382531575426e-06, "loss": 0.2903, "step": 9574 }, { "epoch": 0.4485407785637326, "grad_norm": 0.6096989099004726, "learning_rate": 4.521270931185169e-06, "loss": 0.2814, "step": 9575 }, { "epoch": 0.44858762355366094, "grad_norm": 0.5972642080286227, "learning_rate": 4.521159319163007e-06, "loss": 0.2917, "step": 9576 }, { "epoch": 0.44863446854358924, "grad_norm": 0.6421587874722295, "learning_rate": 4.5210476955095804e-06, "loss": 0.3159, "step": 9577 }, { "epoch": 0.4486813135335176, "grad_norm": 0.5986889182407964, "learning_rate": 4.520936060225533e-06, "loss": 0.2904, "step": 9578 }, { "epoch": 0.44872815852344594, "grad_norm": 0.604887861654326, "learning_rate": 4.520824413311506e-06, "loss": 0.2998, "step": 9579 }, { "epoch": 0.44877500351337424, "grad_norm": 0.6165855737074736, "learning_rate": 4.520712754768143e-06, "loss": 0.2983, "step": 9580 }, { "epoch": 0.4488218485033026, "grad_norm": 0.5462896100481005, "learning_rate": 4.520601084596085e-06, "loss": 0.2615, "step": 9581 }, { "epoch": 0.4488686934932309, "grad_norm": 0.6242543635325998, "learning_rate": 4.520489402795976e-06, "loss": 0.2754, "step": 9582 }, { "epoch": 0.44891553848315924, "grad_norm": 0.6210771747050209, "learning_rate": 4.520377709368459e-06, "loss": 0.2861, "step": 9583 }, { "epoch": 0.44896238347308753, "grad_norm": 0.5926182971380921, "learning_rate": 4.520266004314174e-06, "loss": 0.2909, "step": 9584 }, { "epoch": 0.4490092284630159, "grad_norm": 0.6002857961721327, "learning_rate": 4.520154287633768e-06, "loss": 0.3055, "step": 9585 }, { "epoch": 0.4490560734529442, "grad_norm": 0.6017881148606296, "learning_rate": 4.520042559327881e-06, "loss": 0.2834, "step": 9586 }, { "epoch": 0.44910291844287253, "grad_norm": 0.6447439379698738, "learning_rate": 4.5199308193971575e-06, "loss": 0.2852, "step": 9587 }, { "epoch": 0.4491497634328009, "grad_norm": 0.5819424092627619, "learning_rate": 4.51981906784224e-06, "loss": 0.2783, "step": 9588 }, { "epoch": 0.4491966084227292, "grad_norm": 0.613425730112215, "learning_rate": 4.519707304663769e-06, "loss": 0.304, "step": 9589 }, { "epoch": 0.44924345341265753, "grad_norm": 0.6146337383882439, "learning_rate": 4.519595529862393e-06, "loss": 0.2872, "step": 9590 }, { "epoch": 0.44929029840258583, "grad_norm": 0.5751644220414146, "learning_rate": 4.519483743438751e-06, "loss": 0.3039, "step": 9591 }, { "epoch": 0.4493371433925142, "grad_norm": 0.583337082287852, "learning_rate": 4.519371945393488e-06, "loss": 0.2901, "step": 9592 }, { "epoch": 0.4493839883824425, "grad_norm": 0.6408745103138708, "learning_rate": 4.519260135727247e-06, "loss": 0.2919, "step": 9593 }, { "epoch": 0.44943083337237083, "grad_norm": 0.5944808352248772, "learning_rate": 4.519148314440671e-06, "loss": 0.2827, "step": 9594 }, { "epoch": 0.4494776783622991, "grad_norm": 0.5738381350100982, "learning_rate": 4.519036481534405e-06, "loss": 0.2696, "step": 9595 }, { "epoch": 0.4495245233522275, "grad_norm": 0.5995669172731882, "learning_rate": 4.51892463700909e-06, "loss": 0.2728, "step": 9596 }, { "epoch": 0.44957136834215583, "grad_norm": 0.6265008973822763, "learning_rate": 4.518812780865371e-06, "loss": 0.2819, "step": 9597 }, { "epoch": 0.4496182133320841, "grad_norm": 0.5729263225971102, "learning_rate": 4.5187009131038935e-06, "loss": 0.268, "step": 9598 }, { "epoch": 0.4496650583220125, "grad_norm": 0.5635861599032379, "learning_rate": 4.518589033725299e-06, "loss": 0.2654, "step": 9599 }, { "epoch": 0.4497119033119408, "grad_norm": 0.6380746489778761, "learning_rate": 4.518477142730232e-06, "loss": 0.2826, "step": 9600 }, { "epoch": 0.4497587483018691, "grad_norm": 0.572118535226733, "learning_rate": 4.518365240119336e-06, "loss": 0.267, "step": 9601 }, { "epoch": 0.4498055932917974, "grad_norm": 0.5821771418340612, "learning_rate": 4.518253325893255e-06, "loss": 0.2735, "step": 9602 }, { "epoch": 0.4498524382817258, "grad_norm": 0.6551051414257538, "learning_rate": 4.518141400052635e-06, "loss": 0.2949, "step": 9603 }, { "epoch": 0.44989928327165407, "grad_norm": 0.610888744065006, "learning_rate": 4.518029462598117e-06, "loss": 0.2808, "step": 9604 }, { "epoch": 0.4499461282615824, "grad_norm": 0.66478953242056, "learning_rate": 4.517917513530347e-06, "loss": 0.3199, "step": 9605 }, { "epoch": 0.4499929732515108, "grad_norm": 0.6011436287106693, "learning_rate": 4.517805552849969e-06, "loss": 0.2994, "step": 9606 }, { "epoch": 0.45003981824143907, "grad_norm": 0.6060782806074002, "learning_rate": 4.517693580557626e-06, "loss": 0.2711, "step": 9607 }, { "epoch": 0.4500866632313674, "grad_norm": 0.5982232046036212, "learning_rate": 4.517581596653966e-06, "loss": 0.2877, "step": 9608 }, { "epoch": 0.4501335082212957, "grad_norm": 0.6436906658413317, "learning_rate": 4.517469601139629e-06, "loss": 0.2904, "step": 9609 }, { "epoch": 0.45018035321122407, "grad_norm": 0.5638735854868067, "learning_rate": 4.517357594015262e-06, "loss": 0.2717, "step": 9610 }, { "epoch": 0.45022719820115237, "grad_norm": 0.6056994980014117, "learning_rate": 4.51724557528151e-06, "loss": 0.288, "step": 9611 }, { "epoch": 0.4502740431910807, "grad_norm": 0.5708986548332758, "learning_rate": 4.517133544939016e-06, "loss": 0.2705, "step": 9612 }, { "epoch": 0.450320888181009, "grad_norm": 0.6435847031106972, "learning_rate": 4.517021502988426e-06, "loss": 0.3145, "step": 9613 }, { "epoch": 0.45036773317093737, "grad_norm": 0.6183918489238485, "learning_rate": 4.516909449430383e-06, "loss": 0.293, "step": 9614 }, { "epoch": 0.4504145781608657, "grad_norm": 0.6549288945617616, "learning_rate": 4.516797384265534e-06, "loss": 0.3248, "step": 9615 }, { "epoch": 0.450461423150794, "grad_norm": 0.6054353624979363, "learning_rate": 4.516685307494523e-06, "loss": 0.2976, "step": 9616 }, { "epoch": 0.45050826814072237, "grad_norm": 0.6041446737997058, "learning_rate": 4.516573219117995e-06, "loss": 0.3005, "step": 9617 }, { "epoch": 0.45055511313065066, "grad_norm": 0.600136346061137, "learning_rate": 4.516461119136593e-06, "loss": 0.2793, "step": 9618 }, { "epoch": 0.450601958120579, "grad_norm": 0.5553364587552834, "learning_rate": 4.516349007550966e-06, "loss": 0.2824, "step": 9619 }, { "epoch": 0.4506488031105073, "grad_norm": 0.5700317846520051, "learning_rate": 4.516236884361758e-06, "loss": 0.2984, "step": 9620 }, { "epoch": 0.45069564810043566, "grad_norm": 0.6371108823491317, "learning_rate": 4.516124749569612e-06, "loss": 0.3098, "step": 9621 }, { "epoch": 0.45074249309036396, "grad_norm": 0.5969394662791384, "learning_rate": 4.516012603175175e-06, "loss": 0.3025, "step": 9622 }, { "epoch": 0.4507893380802923, "grad_norm": 0.6027858650999857, "learning_rate": 4.5159004451790925e-06, "loss": 0.2898, "step": 9623 }, { "epoch": 0.45083618307022066, "grad_norm": 0.5850997346038387, "learning_rate": 4.515788275582009e-06, "loss": 0.2854, "step": 9624 }, { "epoch": 0.45088302806014896, "grad_norm": 0.5751451118200366, "learning_rate": 4.515676094384572e-06, "loss": 0.2859, "step": 9625 }, { "epoch": 0.4509298730500773, "grad_norm": 0.6353052330350184, "learning_rate": 4.515563901587424e-06, "loss": 0.3055, "step": 9626 }, { "epoch": 0.4509767180400056, "grad_norm": 0.5665952141185507, "learning_rate": 4.515451697191214e-06, "loss": 0.2806, "step": 9627 }, { "epoch": 0.45102356302993396, "grad_norm": 0.6163977223172434, "learning_rate": 4.515339481196585e-06, "loss": 0.3044, "step": 9628 }, { "epoch": 0.45107040801986226, "grad_norm": 0.627669350001631, "learning_rate": 4.5152272536041844e-06, "loss": 0.3004, "step": 9629 }, { "epoch": 0.4511172530097906, "grad_norm": 0.5990893607675968, "learning_rate": 4.515115014414657e-06, "loss": 0.285, "step": 9630 }, { "epoch": 0.4511640979997189, "grad_norm": 0.5829987607915249, "learning_rate": 4.515002763628648e-06, "loss": 0.2773, "step": 9631 }, { "epoch": 0.45121094298964726, "grad_norm": 0.5901602047840601, "learning_rate": 4.514890501246807e-06, "loss": 0.2779, "step": 9632 }, { "epoch": 0.4512577879795756, "grad_norm": 0.6491004532914175, "learning_rate": 4.514778227269776e-06, "loss": 0.299, "step": 9633 }, { "epoch": 0.4513046329695039, "grad_norm": 0.5732643405883969, "learning_rate": 4.5146659416982035e-06, "loss": 0.2811, "step": 9634 }, { "epoch": 0.45135147795943226, "grad_norm": 0.582984451500609, "learning_rate": 4.5145536445327345e-06, "loss": 0.2916, "step": 9635 }, { "epoch": 0.45139832294936055, "grad_norm": 0.6112728565210781, "learning_rate": 4.514441335774015e-06, "loss": 0.2753, "step": 9636 }, { "epoch": 0.4514451679392889, "grad_norm": 0.618831378220304, "learning_rate": 4.514329015422693e-06, "loss": 0.2873, "step": 9637 }, { "epoch": 0.4514920129292172, "grad_norm": 0.6062297429642151, "learning_rate": 4.514216683479413e-06, "loss": 0.2799, "step": 9638 }, { "epoch": 0.45153885791914555, "grad_norm": 0.5757994043260244, "learning_rate": 4.514104339944823e-06, "loss": 0.297, "step": 9639 }, { "epoch": 0.45158570290907385, "grad_norm": 0.6193010065912986, "learning_rate": 4.513991984819568e-06, "loss": 0.2877, "step": 9640 }, { "epoch": 0.4516325478990022, "grad_norm": 0.6108712450069865, "learning_rate": 4.513879618104296e-06, "loss": 0.297, "step": 9641 }, { "epoch": 0.45167939288893055, "grad_norm": 0.5719230854300209, "learning_rate": 4.513767239799653e-06, "loss": 0.2675, "step": 9642 }, { "epoch": 0.45172623787885885, "grad_norm": 0.6047289942007416, "learning_rate": 4.513654849906285e-06, "loss": 0.303, "step": 9643 }, { "epoch": 0.4517730828687872, "grad_norm": 0.6361316272854707, "learning_rate": 4.5135424484248394e-06, "loss": 0.3295, "step": 9644 }, { "epoch": 0.4518199278587155, "grad_norm": 0.569253979249529, "learning_rate": 4.5134300353559635e-06, "loss": 0.287, "step": 9645 }, { "epoch": 0.45186677284864385, "grad_norm": 0.6234791638371582, "learning_rate": 4.513317610700304e-06, "loss": 0.2904, "step": 9646 }, { "epoch": 0.45191361783857215, "grad_norm": 0.5704311664715489, "learning_rate": 4.513205174458507e-06, "loss": 0.2825, "step": 9647 }, { "epoch": 0.4519604628285005, "grad_norm": 0.5972636201218078, "learning_rate": 4.513092726631221e-06, "loss": 0.3041, "step": 9648 }, { "epoch": 0.4520073078184288, "grad_norm": 0.61441977099996, "learning_rate": 4.512980267219092e-06, "loss": 0.2913, "step": 9649 }, { "epoch": 0.45205415280835715, "grad_norm": 0.6208373393173017, "learning_rate": 4.512867796222767e-06, "loss": 0.2802, "step": 9650 }, { "epoch": 0.4521009977982855, "grad_norm": 0.5930498596488138, "learning_rate": 4.512755313642895e-06, "loss": 0.2931, "step": 9651 }, { "epoch": 0.4521478427882138, "grad_norm": 0.6100059097746048, "learning_rate": 4.5126428194801205e-06, "loss": 0.303, "step": 9652 }, { "epoch": 0.45219468777814215, "grad_norm": 0.6035347438224943, "learning_rate": 4.512530313735094e-06, "loss": 0.2762, "step": 9653 }, { "epoch": 0.45224153276807044, "grad_norm": 0.6034851685648088, "learning_rate": 4.51241779640846e-06, "loss": 0.3134, "step": 9654 }, { "epoch": 0.4522883777579988, "grad_norm": 0.5580022623115172, "learning_rate": 4.512305267500868e-06, "loss": 0.2784, "step": 9655 }, { "epoch": 0.4523352227479271, "grad_norm": 0.5893411355373063, "learning_rate": 4.512192727012965e-06, "loss": 0.2777, "step": 9656 }, { "epoch": 0.45238206773785544, "grad_norm": 0.5651301231915469, "learning_rate": 4.512080174945399e-06, "loss": 0.2683, "step": 9657 }, { "epoch": 0.45242891272778374, "grad_norm": 0.6348131092255221, "learning_rate": 4.511967611298817e-06, "loss": 0.283, "step": 9658 }, { "epoch": 0.4524757577177121, "grad_norm": 0.5613979636703845, "learning_rate": 4.511855036073866e-06, "loss": 0.284, "step": 9659 }, { "epoch": 0.45252260270764044, "grad_norm": 0.6174969375091333, "learning_rate": 4.511742449271197e-06, "loss": 0.3057, "step": 9660 }, { "epoch": 0.45256944769756874, "grad_norm": 0.5948935927896455, "learning_rate": 4.5116298508914545e-06, "loss": 0.2868, "step": 9661 }, { "epoch": 0.4526162926874971, "grad_norm": 0.547916294968488, "learning_rate": 4.511517240935288e-06, "loss": 0.2728, "step": 9662 }, { "epoch": 0.4526631376774254, "grad_norm": 0.6114375800963737, "learning_rate": 4.511404619403345e-06, "loss": 0.2696, "step": 9663 }, { "epoch": 0.45270998266735374, "grad_norm": 0.6084647632487292, "learning_rate": 4.511291986296274e-06, "loss": 0.2838, "step": 9664 }, { "epoch": 0.45275682765728203, "grad_norm": 0.5973159986606097, "learning_rate": 4.511179341614724e-06, "loss": 0.2817, "step": 9665 }, { "epoch": 0.4528036726472104, "grad_norm": 0.6143523097705462, "learning_rate": 4.511066685359342e-06, "loss": 0.2895, "step": 9666 }, { "epoch": 0.4528505176371387, "grad_norm": 0.5668631510786984, "learning_rate": 4.510954017530776e-06, "loss": 0.269, "step": 9667 }, { "epoch": 0.45289736262706703, "grad_norm": 0.6242503394439323, "learning_rate": 4.5108413381296756e-06, "loss": 0.3147, "step": 9668 }, { "epoch": 0.4529442076169954, "grad_norm": 0.5692657769723413, "learning_rate": 4.510728647156689e-06, "loss": 0.292, "step": 9669 }, { "epoch": 0.4529910526069237, "grad_norm": 0.5924765630570229, "learning_rate": 4.510615944612464e-06, "loss": 0.2858, "step": 9670 }, { "epoch": 0.45303789759685203, "grad_norm": 0.5771610351003139, "learning_rate": 4.510503230497649e-06, "loss": 0.3149, "step": 9671 }, { "epoch": 0.45308474258678033, "grad_norm": 0.6011544792670175, "learning_rate": 4.5103905048128945e-06, "loss": 0.2885, "step": 9672 }, { "epoch": 0.4531315875767087, "grad_norm": 0.5705561714530325, "learning_rate": 4.510277767558847e-06, "loss": 0.283, "step": 9673 }, { "epoch": 0.453178432566637, "grad_norm": 0.6331581321667616, "learning_rate": 4.510165018736157e-06, "loss": 0.2908, "step": 9674 }, { "epoch": 0.45322527755656533, "grad_norm": 0.5874867734762319, "learning_rate": 4.510052258345472e-06, "loss": 0.2859, "step": 9675 }, { "epoch": 0.4532721225464936, "grad_norm": 0.5781848370364345, "learning_rate": 4.5099394863874414e-06, "loss": 0.2791, "step": 9676 }, { "epoch": 0.453318967536422, "grad_norm": 0.5925909464211897, "learning_rate": 4.509826702862715e-06, "loss": 0.2746, "step": 9677 }, { "epoch": 0.45336581252635033, "grad_norm": 0.6291882415208565, "learning_rate": 4.50971390777194e-06, "loss": 0.2989, "step": 9678 }, { "epoch": 0.4534126575162786, "grad_norm": 0.6148093412893011, "learning_rate": 4.509601101115768e-06, "loss": 0.2881, "step": 9679 }, { "epoch": 0.453459502506207, "grad_norm": 0.5843225433736228, "learning_rate": 4.509488282894845e-06, "loss": 0.291, "step": 9680 }, { "epoch": 0.4535063474961353, "grad_norm": 0.5805253181278061, "learning_rate": 4.509375453109823e-06, "loss": 0.3002, "step": 9681 }, { "epoch": 0.4535531924860636, "grad_norm": 0.5948374860984084, "learning_rate": 4.509262611761351e-06, "loss": 0.2911, "step": 9682 }, { "epoch": 0.4536000374759919, "grad_norm": 0.6485636031542104, "learning_rate": 4.509149758850076e-06, "loss": 0.3227, "step": 9683 }, { "epoch": 0.4536468824659203, "grad_norm": 0.5945752089490708, "learning_rate": 4.509036894376651e-06, "loss": 0.2798, "step": 9684 }, { "epoch": 0.45369372745584857, "grad_norm": 0.6211285400941501, "learning_rate": 4.508924018341722e-06, "loss": 0.2946, "step": 9685 }, { "epoch": 0.4537405724457769, "grad_norm": 0.5726416306206742, "learning_rate": 4.508811130745941e-06, "loss": 0.2737, "step": 9686 }, { "epoch": 0.4537874174357053, "grad_norm": 0.6457137349066, "learning_rate": 4.5086982315899565e-06, "loss": 0.2871, "step": 9687 }, { "epoch": 0.45383426242563357, "grad_norm": 0.5751603956272184, "learning_rate": 4.508585320874419e-06, "loss": 0.3003, "step": 9688 }, { "epoch": 0.4538811074155619, "grad_norm": 0.584740161351357, "learning_rate": 4.5084723985999765e-06, "loss": 0.2735, "step": 9689 }, { "epoch": 0.4539279524054902, "grad_norm": 0.6863586181825903, "learning_rate": 4.508359464767282e-06, "loss": 0.3014, "step": 9690 }, { "epoch": 0.45397479739541857, "grad_norm": 0.5523080511366344, "learning_rate": 4.508246519376982e-06, "loss": 0.2787, "step": 9691 }, { "epoch": 0.45402164238534687, "grad_norm": 0.6028309508243932, "learning_rate": 4.508133562429728e-06, "loss": 0.3014, "step": 9692 }, { "epoch": 0.4540684873752752, "grad_norm": 0.5853039116553532, "learning_rate": 4.508020593926172e-06, "loss": 0.2834, "step": 9693 }, { "epoch": 0.4541153323652035, "grad_norm": 0.6631830696686883, "learning_rate": 4.507907613866961e-06, "loss": 0.3067, "step": 9694 }, { "epoch": 0.45416217735513187, "grad_norm": 0.6090282354899487, "learning_rate": 4.507794622252746e-06, "loss": 0.2979, "step": 9695 }, { "epoch": 0.4542090223450602, "grad_norm": 0.5752312918582709, "learning_rate": 4.507681619084178e-06, "loss": 0.2673, "step": 9696 }, { "epoch": 0.4542558673349885, "grad_norm": 0.6145632439275779, "learning_rate": 4.5075686043619066e-06, "loss": 0.2912, "step": 9697 }, { "epoch": 0.45430271232491687, "grad_norm": 0.6259414862822944, "learning_rate": 4.507455578086583e-06, "loss": 0.2942, "step": 9698 }, { "epoch": 0.45434955731484516, "grad_norm": 0.6321044073820306, "learning_rate": 4.507342540258856e-06, "loss": 0.3069, "step": 9699 }, { "epoch": 0.4543964023047735, "grad_norm": 0.6013313764415522, "learning_rate": 4.5072294908793784e-06, "loss": 0.2778, "step": 9700 }, { "epoch": 0.4544432472947018, "grad_norm": 0.5974319154937193, "learning_rate": 4.507116429948799e-06, "loss": 0.296, "step": 9701 }, { "epoch": 0.45449009228463016, "grad_norm": 0.5551314266843687, "learning_rate": 4.507003357467769e-06, "loss": 0.2931, "step": 9702 }, { "epoch": 0.45453693727455846, "grad_norm": 0.6215198357371209, "learning_rate": 4.50689027343694e-06, "loss": 0.3037, "step": 9703 }, { "epoch": 0.4545837822644868, "grad_norm": 0.5562402793606644, "learning_rate": 4.506777177856961e-06, "loss": 0.2712, "step": 9704 }, { "epoch": 0.45463062725441516, "grad_norm": 0.5996961351435004, "learning_rate": 4.506664070728484e-06, "loss": 0.2871, "step": 9705 }, { "epoch": 0.45467747224434346, "grad_norm": 0.6454499559541117, "learning_rate": 4.50655095205216e-06, "loss": 0.3279, "step": 9706 }, { "epoch": 0.4547243172342718, "grad_norm": 0.582237157666163, "learning_rate": 4.506437821828638e-06, "loss": 0.2823, "step": 9707 }, { "epoch": 0.4547711622242001, "grad_norm": 0.5937744893643905, "learning_rate": 4.5063246800585724e-06, "loss": 0.2709, "step": 9708 }, { "epoch": 0.45481800721412846, "grad_norm": 0.6419215894968824, "learning_rate": 4.506211526742612e-06, "loss": 0.2988, "step": 9709 }, { "epoch": 0.45486485220405676, "grad_norm": 0.6147008317003402, "learning_rate": 4.506098361881408e-06, "loss": 0.2791, "step": 9710 }, { "epoch": 0.4549116971939851, "grad_norm": 0.6354845601203691, "learning_rate": 4.505985185475613e-06, "loss": 0.2901, "step": 9711 }, { "epoch": 0.4549585421839134, "grad_norm": 0.6500213518874287, "learning_rate": 4.5058719975258765e-06, "loss": 0.3157, "step": 9712 }, { "epoch": 0.45500538717384176, "grad_norm": 0.6084279282524836, "learning_rate": 4.505758798032852e-06, "loss": 0.3041, "step": 9713 }, { "epoch": 0.4550522321637701, "grad_norm": 0.6158721881325162, "learning_rate": 4.505645586997189e-06, "loss": 0.304, "step": 9714 }, { "epoch": 0.4550990771536984, "grad_norm": 0.5838215494630022, "learning_rate": 4.505532364419539e-06, "loss": 0.2886, "step": 9715 }, { "epoch": 0.45514592214362676, "grad_norm": 0.5781668960248624, "learning_rate": 4.505419130300556e-06, "loss": 0.2819, "step": 9716 }, { "epoch": 0.45519276713355505, "grad_norm": 0.5850153133095763, "learning_rate": 4.5053058846408885e-06, "loss": 0.2923, "step": 9717 }, { "epoch": 0.4552396121234834, "grad_norm": 0.5886803878314222, "learning_rate": 4.505192627441191e-06, "loss": 0.2863, "step": 9718 }, { "epoch": 0.4552864571134117, "grad_norm": 0.638233643578569, "learning_rate": 4.5050793587021125e-06, "loss": 0.303, "step": 9719 }, { "epoch": 0.45533330210334005, "grad_norm": 0.6366592506547499, "learning_rate": 4.504966078424307e-06, "loss": 0.2934, "step": 9720 }, { "epoch": 0.45538014709326835, "grad_norm": 0.570647925083397, "learning_rate": 4.504852786608426e-06, "loss": 0.3, "step": 9721 }, { "epoch": 0.4554269920831967, "grad_norm": 0.5611947309687542, "learning_rate": 4.504739483255121e-06, "loss": 0.2823, "step": 9722 }, { "epoch": 0.45547383707312505, "grad_norm": 0.6065819016210895, "learning_rate": 4.5046261683650425e-06, "loss": 0.2933, "step": 9723 }, { "epoch": 0.45552068206305335, "grad_norm": 0.6251311760303898, "learning_rate": 4.504512841938846e-06, "loss": 0.2901, "step": 9724 }, { "epoch": 0.4555675270529817, "grad_norm": 0.630477938474995, "learning_rate": 4.504399503977181e-06, "loss": 0.2856, "step": 9725 }, { "epoch": 0.45561437204291, "grad_norm": 0.5833989592152403, "learning_rate": 4.504286154480701e-06, "loss": 0.2791, "step": 9726 }, { "epoch": 0.45566121703283835, "grad_norm": 0.5881425072817442, "learning_rate": 4.504172793450059e-06, "loss": 0.2743, "step": 9727 }, { "epoch": 0.45570806202276665, "grad_norm": 0.6440246854200239, "learning_rate": 4.504059420885905e-06, "loss": 0.3249, "step": 9728 }, { "epoch": 0.455754907012695, "grad_norm": 0.5793456291997447, "learning_rate": 4.503946036788893e-06, "loss": 0.2897, "step": 9729 }, { "epoch": 0.4558017520026233, "grad_norm": 0.5683792420051393, "learning_rate": 4.503832641159675e-06, "loss": 0.2636, "step": 9730 }, { "epoch": 0.45584859699255165, "grad_norm": 0.5888080717493961, "learning_rate": 4.503719233998905e-06, "loss": 0.2955, "step": 9731 }, { "epoch": 0.45589544198248, "grad_norm": 0.6248830289363112, "learning_rate": 4.503605815307232e-06, "loss": 0.2982, "step": 9732 }, { "epoch": 0.4559422869724083, "grad_norm": 0.6147092214626727, "learning_rate": 4.503492385085313e-06, "loss": 0.2932, "step": 9733 }, { "epoch": 0.45598913196233665, "grad_norm": 0.6246079627727532, "learning_rate": 4.503378943333799e-06, "loss": 0.3182, "step": 9734 }, { "epoch": 0.45603597695226494, "grad_norm": 0.5870078169383961, "learning_rate": 4.5032654900533424e-06, "loss": 0.2824, "step": 9735 }, { "epoch": 0.4560828219421933, "grad_norm": 0.5526848413953626, "learning_rate": 4.503152025244596e-06, "loss": 0.2583, "step": 9736 }, { "epoch": 0.4561296669321216, "grad_norm": 0.5764626214196487, "learning_rate": 4.503038548908214e-06, "loss": 0.2768, "step": 9737 }, { "epoch": 0.45617651192204994, "grad_norm": 0.5992449034145204, "learning_rate": 4.502925061044847e-06, "loss": 0.2843, "step": 9738 }, { "epoch": 0.45622335691197824, "grad_norm": 0.5640269214095478, "learning_rate": 4.502811561655151e-06, "loss": 0.2865, "step": 9739 }, { "epoch": 0.4562702019019066, "grad_norm": 0.5950367749869959, "learning_rate": 4.502698050739778e-06, "loss": 0.3053, "step": 9740 }, { "epoch": 0.45631704689183494, "grad_norm": 0.5778692750322821, "learning_rate": 4.502584528299381e-06, "loss": 0.2863, "step": 9741 }, { "epoch": 0.45636389188176324, "grad_norm": 0.6478089932766207, "learning_rate": 4.502470994334612e-06, "loss": 0.3034, "step": 9742 }, { "epoch": 0.4564107368716916, "grad_norm": 0.6317196352725545, "learning_rate": 4.5023574488461276e-06, "loss": 0.2783, "step": 9743 }, { "epoch": 0.4564575818616199, "grad_norm": 0.633004998966678, "learning_rate": 4.502243891834578e-06, "loss": 0.2874, "step": 9744 }, { "epoch": 0.45650442685154824, "grad_norm": 0.6088704330545817, "learning_rate": 4.502130323300618e-06, "loss": 0.27, "step": 9745 }, { "epoch": 0.45655127184147654, "grad_norm": 0.5817735305896031, "learning_rate": 4.502016743244902e-06, "loss": 0.2829, "step": 9746 }, { "epoch": 0.4565981168314049, "grad_norm": 0.5654661402002522, "learning_rate": 4.501903151668083e-06, "loss": 0.2717, "step": 9747 }, { "epoch": 0.4566449618213332, "grad_norm": 0.6263667110371018, "learning_rate": 4.501789548570814e-06, "loss": 0.3009, "step": 9748 }, { "epoch": 0.45669180681126154, "grad_norm": 0.5489609647847205, "learning_rate": 4.50167593395375e-06, "loss": 0.2826, "step": 9749 }, { "epoch": 0.4567386518011899, "grad_norm": 0.6285235834054099, "learning_rate": 4.501562307817543e-06, "loss": 0.3044, "step": 9750 }, { "epoch": 0.4567854967911182, "grad_norm": 0.6147345842925699, "learning_rate": 4.5014486701628475e-06, "loss": 0.2779, "step": 9751 }, { "epoch": 0.45683234178104654, "grad_norm": 0.5940769124472767, "learning_rate": 4.5013350209903196e-06, "loss": 0.284, "step": 9752 }, { "epoch": 0.45687918677097483, "grad_norm": 0.6324226729025392, "learning_rate": 4.501221360300611e-06, "loss": 0.3168, "step": 9753 }, { "epoch": 0.4569260317609032, "grad_norm": 0.5636355180958712, "learning_rate": 4.501107688094376e-06, "loss": 0.2813, "step": 9754 }, { "epoch": 0.4569728767508315, "grad_norm": 0.5618567921706209, "learning_rate": 4.50099400437227e-06, "loss": 0.278, "step": 9755 }, { "epoch": 0.45701972174075983, "grad_norm": 0.670841088977289, "learning_rate": 4.500880309134946e-06, "loss": 0.2727, "step": 9756 }, { "epoch": 0.45706656673068813, "grad_norm": 0.593423493350436, "learning_rate": 4.500766602383059e-06, "loss": 0.2982, "step": 9757 }, { "epoch": 0.4571134117206165, "grad_norm": 0.5898685682025946, "learning_rate": 4.500652884117263e-06, "loss": 0.2899, "step": 9758 }, { "epoch": 0.45716025671054483, "grad_norm": 0.5900276609453198, "learning_rate": 4.5005391543382125e-06, "loss": 0.2794, "step": 9759 }, { "epoch": 0.45720710170047313, "grad_norm": 0.6105483318542234, "learning_rate": 4.500425413046562e-06, "loss": 0.2717, "step": 9760 }, { "epoch": 0.4572539466904015, "grad_norm": 0.5854687474942891, "learning_rate": 4.500311660242965e-06, "loss": 0.29, "step": 9761 }, { "epoch": 0.4573007916803298, "grad_norm": 0.5673469750712514, "learning_rate": 4.500197895928078e-06, "loss": 0.2701, "step": 9762 }, { "epoch": 0.45734763667025813, "grad_norm": 0.5792956775229693, "learning_rate": 4.500084120102556e-06, "loss": 0.2737, "step": 9763 }, { "epoch": 0.4573944816601864, "grad_norm": 0.5575277048816188, "learning_rate": 4.499970332767051e-06, "loss": 0.2811, "step": 9764 }, { "epoch": 0.4574413266501148, "grad_norm": 0.6441688452704488, "learning_rate": 4.49985653392222e-06, "loss": 0.2987, "step": 9765 }, { "epoch": 0.4574881716400431, "grad_norm": 0.7457885794696099, "learning_rate": 4.499742723568718e-06, "loss": 0.2952, "step": 9766 }, { "epoch": 0.4575350166299714, "grad_norm": 0.6378301365466512, "learning_rate": 4.499628901707198e-06, "loss": 0.3124, "step": 9767 }, { "epoch": 0.4575818616198998, "grad_norm": 0.5678324636498266, "learning_rate": 4.499515068338316e-06, "loss": 0.2855, "step": 9768 }, { "epoch": 0.4576287066098281, "grad_norm": 0.6180320332799043, "learning_rate": 4.4994012234627285e-06, "loss": 0.2701, "step": 9769 }, { "epoch": 0.4576755515997564, "grad_norm": 0.607761954855303, "learning_rate": 4.499287367081089e-06, "loss": 0.2896, "step": 9770 }, { "epoch": 0.4577223965896847, "grad_norm": 0.6525646902031769, "learning_rate": 4.4991734991940524e-06, "loss": 0.2997, "step": 9771 }, { "epoch": 0.4577692415796131, "grad_norm": 0.6193069116221311, "learning_rate": 4.4990596198022766e-06, "loss": 0.269, "step": 9772 }, { "epoch": 0.45781608656954137, "grad_norm": 0.6403234876028518, "learning_rate": 4.4989457289064134e-06, "loss": 0.2853, "step": 9773 }, { "epoch": 0.4578629315594697, "grad_norm": 0.5714910598331261, "learning_rate": 4.49883182650712e-06, "loss": 0.28, "step": 9774 }, { "epoch": 0.457909776549398, "grad_norm": 0.6171548065164888, "learning_rate": 4.498717912605054e-06, "loss": 0.2887, "step": 9775 }, { "epoch": 0.45795662153932637, "grad_norm": 0.6158616371266961, "learning_rate": 4.498603987200867e-06, "loss": 0.3024, "step": 9776 }, { "epoch": 0.4580034665292547, "grad_norm": 0.6161338146446803, "learning_rate": 4.498490050295217e-06, "loss": 0.3099, "step": 9777 }, { "epoch": 0.458050311519183, "grad_norm": 0.6080345842568802, "learning_rate": 4.498376101888758e-06, "loss": 0.2769, "step": 9778 }, { "epoch": 0.45809715650911137, "grad_norm": 0.6659002202440111, "learning_rate": 4.498262141982148e-06, "loss": 0.3219, "step": 9779 }, { "epoch": 0.45814400149903967, "grad_norm": 0.6262974922040315, "learning_rate": 4.498148170576041e-06, "loss": 0.2816, "step": 9780 }, { "epoch": 0.458190846488968, "grad_norm": 0.6569596144526745, "learning_rate": 4.498034187671094e-06, "loss": 0.3047, "step": 9781 }, { "epoch": 0.4582376914788963, "grad_norm": 0.5686382984493563, "learning_rate": 4.497920193267962e-06, "loss": 0.2717, "step": 9782 }, { "epoch": 0.45828453646882467, "grad_norm": 0.5715685239345808, "learning_rate": 4.4978061873673015e-06, "loss": 0.273, "step": 9783 }, { "epoch": 0.45833138145875296, "grad_norm": 0.6443151328322702, "learning_rate": 4.497692169969768e-06, "loss": 0.2987, "step": 9784 }, { "epoch": 0.4583782264486813, "grad_norm": 0.638558500123935, "learning_rate": 4.497578141076019e-06, "loss": 0.2796, "step": 9785 }, { "epoch": 0.45842507143860967, "grad_norm": 0.6261387775817862, "learning_rate": 4.4974641006867094e-06, "loss": 0.2924, "step": 9786 }, { "epoch": 0.45847191642853796, "grad_norm": 0.6012786554207269, "learning_rate": 4.497350048802496e-06, "loss": 0.3015, "step": 9787 }, { "epoch": 0.4585187614184663, "grad_norm": 0.5854509827300363, "learning_rate": 4.497235985424035e-06, "loss": 0.3036, "step": 9788 }, { "epoch": 0.4585656064083946, "grad_norm": 0.6344826276599372, "learning_rate": 4.497121910551984e-06, "loss": 0.2833, "step": 9789 }, { "epoch": 0.45861245139832296, "grad_norm": 0.5854872210981593, "learning_rate": 4.497007824186998e-06, "loss": 0.263, "step": 9790 }, { "epoch": 0.45865929638825126, "grad_norm": 0.5743581486116123, "learning_rate": 4.4968937263297326e-06, "loss": 0.2836, "step": 9791 }, { "epoch": 0.4587061413781796, "grad_norm": 0.6002594445889424, "learning_rate": 4.496779616980847e-06, "loss": 0.2952, "step": 9792 }, { "epoch": 0.4587529863681079, "grad_norm": 0.6492711046382786, "learning_rate": 4.496665496140995e-06, "loss": 0.3213, "step": 9793 }, { "epoch": 0.45879983135803626, "grad_norm": 0.6299640811556146, "learning_rate": 4.496551363810836e-06, "loss": 0.3082, "step": 9794 }, { "epoch": 0.4588466763479646, "grad_norm": 0.6041846653882386, "learning_rate": 4.496437219991026e-06, "loss": 0.2731, "step": 9795 }, { "epoch": 0.4588935213378929, "grad_norm": 0.6469468367689897, "learning_rate": 4.496323064682221e-06, "loss": 0.2892, "step": 9796 }, { "epoch": 0.45894036632782126, "grad_norm": 0.6043928123431438, "learning_rate": 4.496208897885079e-06, "loss": 0.2877, "step": 9797 }, { "epoch": 0.45898721131774955, "grad_norm": 0.7065594066995592, "learning_rate": 4.496094719600257e-06, "loss": 0.3029, "step": 9798 }, { "epoch": 0.4590340563076779, "grad_norm": 0.6324317912407916, "learning_rate": 4.49598052982841e-06, "loss": 0.2965, "step": 9799 }, { "epoch": 0.4590809012976062, "grad_norm": 0.6176730321541518, "learning_rate": 4.4958663285701974e-06, "loss": 0.3046, "step": 9800 }, { "epoch": 0.45912774628753455, "grad_norm": 0.6135459605127606, "learning_rate": 4.495752115826276e-06, "loss": 0.2979, "step": 9801 }, { "epoch": 0.45917459127746285, "grad_norm": 0.6154737338772284, "learning_rate": 4.495637891597303e-06, "loss": 0.2667, "step": 9802 }, { "epoch": 0.4592214362673912, "grad_norm": 0.6289722002830685, "learning_rate": 4.495523655883935e-06, "loss": 0.3044, "step": 9803 }, { "epoch": 0.45926828125731955, "grad_norm": 0.6596095422319229, "learning_rate": 4.495409408686831e-06, "loss": 0.2727, "step": 9804 }, { "epoch": 0.45931512624724785, "grad_norm": 0.5793835476545777, "learning_rate": 4.495295150006646e-06, "loss": 0.2739, "step": 9805 }, { "epoch": 0.4593619712371762, "grad_norm": 0.5736127080276042, "learning_rate": 4.495180879844039e-06, "loss": 0.2851, "step": 9806 }, { "epoch": 0.4594088162271045, "grad_norm": 0.565877907417802, "learning_rate": 4.495066598199669e-06, "loss": 0.2574, "step": 9807 }, { "epoch": 0.45945566121703285, "grad_norm": 0.6113181611979821, "learning_rate": 4.49495230507419e-06, "loss": 0.2705, "step": 9808 }, { "epoch": 0.45950250620696115, "grad_norm": 0.571996851536526, "learning_rate": 4.494838000468264e-06, "loss": 0.2804, "step": 9809 }, { "epoch": 0.4595493511968895, "grad_norm": 0.583608121024647, "learning_rate": 4.494723684382546e-06, "loss": 0.2766, "step": 9810 }, { "epoch": 0.4595961961868178, "grad_norm": 0.7212707023123434, "learning_rate": 4.494609356817693e-06, "loss": 0.2826, "step": 9811 }, { "epoch": 0.45964304117674615, "grad_norm": 0.6847405619728271, "learning_rate": 4.494495017774366e-06, "loss": 0.3224, "step": 9812 }, { "epoch": 0.4596898861666745, "grad_norm": 0.6208705570888741, "learning_rate": 4.494380667253222e-06, "loss": 0.2761, "step": 9813 }, { "epoch": 0.4597367311566028, "grad_norm": 0.6006759366128955, "learning_rate": 4.4942663052549175e-06, "loss": 0.2879, "step": 9814 }, { "epoch": 0.45978357614653115, "grad_norm": 0.6266364793691499, "learning_rate": 4.494151931780112e-06, "loss": 0.2929, "step": 9815 }, { "epoch": 0.45983042113645944, "grad_norm": 0.6953955179768375, "learning_rate": 4.494037546829463e-06, "loss": 0.2797, "step": 9816 }, { "epoch": 0.4598772661263878, "grad_norm": 0.6061298094870524, "learning_rate": 4.493923150403629e-06, "loss": 0.2876, "step": 9817 }, { "epoch": 0.4599241111163161, "grad_norm": 0.6051457662970197, "learning_rate": 4.493808742503269e-06, "loss": 0.2846, "step": 9818 }, { "epoch": 0.45997095610624444, "grad_norm": 0.6578524169190091, "learning_rate": 4.493694323129041e-06, "loss": 0.3106, "step": 9819 }, { "epoch": 0.46001780109617274, "grad_norm": 0.5964857805551301, "learning_rate": 4.493579892281602e-06, "loss": 0.2783, "step": 9820 }, { "epoch": 0.4600646460861011, "grad_norm": 0.6204451068430223, "learning_rate": 4.493465449961613e-06, "loss": 0.2737, "step": 9821 }, { "epoch": 0.46011149107602944, "grad_norm": 0.5728338754389233, "learning_rate": 4.493350996169731e-06, "loss": 0.2698, "step": 9822 }, { "epoch": 0.46015833606595774, "grad_norm": 0.592676980757148, "learning_rate": 4.493236530906615e-06, "loss": 0.3015, "step": 9823 }, { "epoch": 0.4602051810558861, "grad_norm": 0.6950872653838744, "learning_rate": 4.4931220541729234e-06, "loss": 0.2973, "step": 9824 }, { "epoch": 0.4602520260458144, "grad_norm": 0.5979460860280904, "learning_rate": 4.493007565969316e-06, "loss": 0.2617, "step": 9825 }, { "epoch": 0.46029887103574274, "grad_norm": 0.6287661005242443, "learning_rate": 4.492893066296451e-06, "loss": 0.3099, "step": 9826 }, { "epoch": 0.46034571602567104, "grad_norm": 0.585732875597475, "learning_rate": 4.4927785551549865e-06, "loss": 0.2742, "step": 9827 }, { "epoch": 0.4603925610155994, "grad_norm": 0.5865537755032347, "learning_rate": 4.4926640325455826e-06, "loss": 0.3046, "step": 9828 }, { "epoch": 0.4604394060055277, "grad_norm": 0.5980470130321374, "learning_rate": 4.492549498468899e-06, "loss": 0.2753, "step": 9829 }, { "epoch": 0.46048625099545604, "grad_norm": 0.6456242294060619, "learning_rate": 4.492434952925593e-06, "loss": 0.2919, "step": 9830 }, { "epoch": 0.4605330959853844, "grad_norm": 0.6071500806972171, "learning_rate": 4.492320395916324e-06, "loss": 0.2865, "step": 9831 }, { "epoch": 0.4605799409753127, "grad_norm": 0.6334246693404576, "learning_rate": 4.492205827441753e-06, "loss": 0.2862, "step": 9832 }, { "epoch": 0.46062678596524104, "grad_norm": 0.524619532456226, "learning_rate": 4.492091247502538e-06, "loss": 0.2605, "step": 9833 }, { "epoch": 0.46067363095516933, "grad_norm": 0.5876425810672105, "learning_rate": 4.491976656099338e-06, "loss": 0.2807, "step": 9834 }, { "epoch": 0.4607204759450977, "grad_norm": 0.6054044653477488, "learning_rate": 4.491862053232813e-06, "loss": 0.2921, "step": 9835 }, { "epoch": 0.460767320935026, "grad_norm": 0.5761982602219218, "learning_rate": 4.491747438903623e-06, "loss": 0.2858, "step": 9836 }, { "epoch": 0.46081416592495433, "grad_norm": 0.5715389473595264, "learning_rate": 4.491632813112427e-06, "loss": 0.2833, "step": 9837 }, { "epoch": 0.46086101091488263, "grad_norm": 0.6247632544782937, "learning_rate": 4.491518175859885e-06, "loss": 0.3034, "step": 9838 }, { "epoch": 0.460907855904811, "grad_norm": 0.6322605934057435, "learning_rate": 4.491403527146656e-06, "loss": 0.2874, "step": 9839 }, { "epoch": 0.46095470089473933, "grad_norm": 0.6548724895593366, "learning_rate": 4.4912888669734e-06, "loss": 0.3123, "step": 9840 }, { "epoch": 0.46100154588466763, "grad_norm": 0.6257186244375859, "learning_rate": 4.491174195340777e-06, "loss": 0.2904, "step": 9841 }, { "epoch": 0.461048390874596, "grad_norm": 0.6013871381623184, "learning_rate": 4.491059512249448e-06, "loss": 0.3016, "step": 9842 }, { "epoch": 0.4610952358645243, "grad_norm": 0.6680127028579222, "learning_rate": 4.490944817700071e-06, "loss": 0.3082, "step": 9843 }, { "epoch": 0.46114208085445263, "grad_norm": 0.5751425727427909, "learning_rate": 4.490830111693307e-06, "loss": 0.2898, "step": 9844 }, { "epoch": 0.4611889258443809, "grad_norm": 0.5953150131741198, "learning_rate": 4.490715394229817e-06, "loss": 0.2853, "step": 9845 }, { "epoch": 0.4612357708343093, "grad_norm": 0.5905734384823506, "learning_rate": 4.49060066531026e-06, "loss": 0.257, "step": 9846 }, { "epoch": 0.4612826158242376, "grad_norm": 0.6188598307756374, "learning_rate": 4.490485924935295e-06, "loss": 0.2705, "step": 9847 }, { "epoch": 0.4613294608141659, "grad_norm": 0.5693222154804748, "learning_rate": 4.490371173105586e-06, "loss": 0.2879, "step": 9848 }, { "epoch": 0.4613763058040943, "grad_norm": 0.6556834124356301, "learning_rate": 4.4902564098217894e-06, "loss": 0.285, "step": 9849 }, { "epoch": 0.4614231507940226, "grad_norm": 0.5897621709145767, "learning_rate": 4.490141635084568e-06, "loss": 0.2908, "step": 9850 }, { "epoch": 0.4614699957839509, "grad_norm": 0.6428429265929588, "learning_rate": 4.490026848894582e-06, "loss": 0.2844, "step": 9851 }, { "epoch": 0.4615168407738792, "grad_norm": 0.6395362792333221, "learning_rate": 4.489912051252491e-06, "loss": 0.2955, "step": 9852 }, { "epoch": 0.4615636857638076, "grad_norm": 0.6047022087137364, "learning_rate": 4.4897972421589565e-06, "loss": 0.2775, "step": 9853 }, { "epoch": 0.46161053075373587, "grad_norm": 0.6098190161474499, "learning_rate": 4.489682421614639e-06, "loss": 0.2771, "step": 9854 }, { "epoch": 0.4616573757436642, "grad_norm": 0.5795043007376117, "learning_rate": 4.4895675896201995e-06, "loss": 0.2811, "step": 9855 }, { "epoch": 0.4617042207335925, "grad_norm": 0.6172083920123657, "learning_rate": 4.489452746176299e-06, "loss": 0.3069, "step": 9856 }, { "epoch": 0.46175106572352087, "grad_norm": 0.6423002486016549, "learning_rate": 4.4893378912835975e-06, "loss": 0.3112, "step": 9857 }, { "epoch": 0.4617979107134492, "grad_norm": 0.5937654531241355, "learning_rate": 4.489223024942756e-06, "loss": 0.2748, "step": 9858 }, { "epoch": 0.4618447557033775, "grad_norm": 0.5570756767082712, "learning_rate": 4.489108147154436e-06, "loss": 0.2568, "step": 9859 }, { "epoch": 0.46189160069330587, "grad_norm": 0.5730296808774741, "learning_rate": 4.488993257919299e-06, "loss": 0.2998, "step": 9860 }, { "epoch": 0.46193844568323417, "grad_norm": 0.5417140539644731, "learning_rate": 4.488878357238007e-06, "loss": 0.2906, "step": 9861 }, { "epoch": 0.4619852906731625, "grad_norm": 0.604078774240237, "learning_rate": 4.4887634451112175e-06, "loss": 0.2902, "step": 9862 }, { "epoch": 0.4620321356630908, "grad_norm": 0.5754278475429199, "learning_rate": 4.488648521539596e-06, "loss": 0.2809, "step": 9863 }, { "epoch": 0.46207898065301917, "grad_norm": 0.607149174107249, "learning_rate": 4.488533586523801e-06, "loss": 0.2698, "step": 9864 }, { "epoch": 0.46212582564294746, "grad_norm": 0.6113965074432147, "learning_rate": 4.4884186400644956e-06, "loss": 0.2907, "step": 9865 }, { "epoch": 0.4621726706328758, "grad_norm": 0.5844703412143262, "learning_rate": 4.48830368216234e-06, "loss": 0.2933, "step": 9866 }, { "epoch": 0.46221951562280417, "grad_norm": 0.6030879951851275, "learning_rate": 4.488188712817998e-06, "loss": 0.2883, "step": 9867 }, { "epoch": 0.46226636061273246, "grad_norm": 0.5951789948985251, "learning_rate": 4.488073732032128e-06, "loss": 0.2808, "step": 9868 }, { "epoch": 0.4623132056026608, "grad_norm": 0.6236760811380746, "learning_rate": 4.487958739805394e-06, "loss": 0.2983, "step": 9869 }, { "epoch": 0.4623600505925891, "grad_norm": 0.5740583836605945, "learning_rate": 4.487843736138458e-06, "loss": 0.2797, "step": 9870 }, { "epoch": 0.46240689558251746, "grad_norm": 0.5569117737211525, "learning_rate": 4.4877287210319794e-06, "loss": 0.2822, "step": 9871 }, { "epoch": 0.46245374057244576, "grad_norm": 0.5929612416954891, "learning_rate": 4.487613694486622e-06, "loss": 0.2921, "step": 9872 }, { "epoch": 0.4625005855623741, "grad_norm": 0.5976562386914761, "learning_rate": 4.487498656503048e-06, "loss": 0.2874, "step": 9873 }, { "epoch": 0.4625474305523024, "grad_norm": 0.5866991080827422, "learning_rate": 4.487383607081917e-06, "loss": 0.2623, "step": 9874 }, { "epoch": 0.46259427554223076, "grad_norm": 0.5447183784168168, "learning_rate": 4.487268546223895e-06, "loss": 0.2734, "step": 9875 }, { "epoch": 0.4626411205321591, "grad_norm": 0.5774853772236911, "learning_rate": 4.487153473929642e-06, "loss": 0.2707, "step": 9876 }, { "epoch": 0.4626879655220874, "grad_norm": 0.5974487942106527, "learning_rate": 4.487038390199819e-06, "loss": 0.2782, "step": 9877 }, { "epoch": 0.46273481051201576, "grad_norm": 0.6610155457679212, "learning_rate": 4.48692329503509e-06, "loss": 0.2837, "step": 9878 }, { "epoch": 0.46278165550194406, "grad_norm": 0.6209366271352756, "learning_rate": 4.4868081884361165e-06, "loss": 0.279, "step": 9879 }, { "epoch": 0.4628285004918724, "grad_norm": 0.5865760907708643, "learning_rate": 4.4866930704035615e-06, "loss": 0.2623, "step": 9880 }, { "epoch": 0.4628753454818007, "grad_norm": 0.5659345254931112, "learning_rate": 4.486577940938087e-06, "loss": 0.2707, "step": 9881 }, { "epoch": 0.46292219047172906, "grad_norm": 0.6090832472999586, "learning_rate": 4.486462800040357e-06, "loss": 0.2884, "step": 9882 }, { "epoch": 0.46296903546165735, "grad_norm": 0.6019878361421664, "learning_rate": 4.486347647711031e-06, "loss": 0.281, "step": 9883 }, { "epoch": 0.4630158804515857, "grad_norm": 0.5721516564684038, "learning_rate": 4.486232483950774e-06, "loss": 0.2668, "step": 9884 }, { "epoch": 0.46306272544151406, "grad_norm": 0.5843997503757148, "learning_rate": 4.486117308760249e-06, "loss": 0.3165, "step": 9885 }, { "epoch": 0.46310957043144235, "grad_norm": 0.5597122319313441, "learning_rate": 4.486002122140118e-06, "loss": 0.2849, "step": 9886 }, { "epoch": 0.4631564154213707, "grad_norm": 0.6683888526368346, "learning_rate": 4.485886924091043e-06, "loss": 0.2957, "step": 9887 }, { "epoch": 0.463203260411299, "grad_norm": 0.7040337934438687, "learning_rate": 4.485771714613689e-06, "loss": 0.3194, "step": 9888 }, { "epoch": 0.46325010540122735, "grad_norm": 0.6024291169801356, "learning_rate": 4.485656493708717e-06, "loss": 0.3106, "step": 9889 }, { "epoch": 0.46329695039115565, "grad_norm": 0.6440313094402347, "learning_rate": 4.485541261376791e-06, "loss": 0.3044, "step": 9890 }, { "epoch": 0.463343795381084, "grad_norm": 0.5502293701278557, "learning_rate": 4.485426017618575e-06, "loss": 0.2732, "step": 9891 }, { "epoch": 0.4633906403710123, "grad_norm": 0.5637653196692205, "learning_rate": 4.485310762434731e-06, "loss": 0.2812, "step": 9892 }, { "epoch": 0.46343748536094065, "grad_norm": 0.5821793112226178, "learning_rate": 4.485195495825922e-06, "loss": 0.2884, "step": 9893 }, { "epoch": 0.463484330350869, "grad_norm": 0.6478312482929383, "learning_rate": 4.485080217792812e-06, "loss": 0.2967, "step": 9894 }, { "epoch": 0.4635311753407973, "grad_norm": 0.5671855594843432, "learning_rate": 4.484964928336065e-06, "loss": 0.278, "step": 9895 }, { "epoch": 0.46357802033072565, "grad_norm": 0.5903941770684059, "learning_rate": 4.484849627456343e-06, "loss": 0.2828, "step": 9896 }, { "epoch": 0.46362486532065394, "grad_norm": 0.5453191272178507, "learning_rate": 4.4847343151543104e-06, "loss": 0.2856, "step": 9897 }, { "epoch": 0.4636717103105823, "grad_norm": 0.5551696136717379, "learning_rate": 4.48461899143063e-06, "loss": 0.2886, "step": 9898 }, { "epoch": 0.4637185553005106, "grad_norm": 0.5557990184005301, "learning_rate": 4.484503656285968e-06, "loss": 0.2439, "step": 9899 }, { "epoch": 0.46376540029043894, "grad_norm": 0.5954822371545339, "learning_rate": 4.484388309720985e-06, "loss": 0.2939, "step": 9900 }, { "epoch": 0.46381224528036724, "grad_norm": 0.5272778882409271, "learning_rate": 4.484272951736346e-06, "loss": 0.2701, "step": 9901 }, { "epoch": 0.4638590902702956, "grad_norm": 0.6526217404838364, "learning_rate": 4.484157582332715e-06, "loss": 0.3252, "step": 9902 }, { "epoch": 0.46390593526022394, "grad_norm": 0.5840830981309135, "learning_rate": 4.484042201510756e-06, "loss": 0.2904, "step": 9903 }, { "epoch": 0.46395278025015224, "grad_norm": 0.6278704494668911, "learning_rate": 4.4839268092711316e-06, "loss": 0.2979, "step": 9904 }, { "epoch": 0.4639996252400806, "grad_norm": 0.563359686571619, "learning_rate": 4.483811405614509e-06, "loss": 0.2821, "step": 9905 }, { "epoch": 0.4640464702300089, "grad_norm": 0.613445864304411, "learning_rate": 4.48369599054155e-06, "loss": 0.3101, "step": 9906 }, { "epoch": 0.46409331521993724, "grad_norm": 0.5785818480238716, "learning_rate": 4.4835805640529175e-06, "loss": 0.2871, "step": 9907 }, { "epoch": 0.46414016020986554, "grad_norm": 0.5588448514065941, "learning_rate": 4.483465126149279e-06, "loss": 0.2603, "step": 9908 }, { "epoch": 0.4641870051997939, "grad_norm": 0.5520742364967758, "learning_rate": 4.483349676831297e-06, "loss": 0.2794, "step": 9909 }, { "epoch": 0.4642338501897222, "grad_norm": 0.5946483307757331, "learning_rate": 4.483234216099636e-06, "loss": 0.304, "step": 9910 }, { "epoch": 0.46428069517965054, "grad_norm": 0.5796686675133059, "learning_rate": 4.4831187439549604e-06, "loss": 0.3068, "step": 9911 }, { "epoch": 0.4643275401695789, "grad_norm": 0.6301484857477158, "learning_rate": 4.483003260397935e-06, "loss": 0.2827, "step": 9912 }, { "epoch": 0.4643743851595072, "grad_norm": 0.6568333508345626, "learning_rate": 4.482887765429223e-06, "loss": 0.2898, "step": 9913 }, { "epoch": 0.46442123014943554, "grad_norm": 0.5940084889634127, "learning_rate": 4.482772259049492e-06, "loss": 0.2774, "step": 9914 }, { "epoch": 0.46446807513936383, "grad_norm": 0.6551568536106269, "learning_rate": 4.482656741259405e-06, "loss": 0.2785, "step": 9915 }, { "epoch": 0.4645149201292922, "grad_norm": 0.605771700818105, "learning_rate": 4.482541212059626e-06, "loss": 0.2847, "step": 9916 }, { "epoch": 0.4645617651192205, "grad_norm": 0.5701596261633678, "learning_rate": 4.482425671450821e-06, "loss": 0.2951, "step": 9917 }, { "epoch": 0.46460861010914883, "grad_norm": 0.6194457517347679, "learning_rate": 4.482310119433654e-06, "loss": 0.3006, "step": 9918 }, { "epoch": 0.46465545509907713, "grad_norm": 0.6145080503146569, "learning_rate": 4.482194556008791e-06, "loss": 0.307, "step": 9919 }, { "epoch": 0.4647023000890055, "grad_norm": 0.5621852429345444, "learning_rate": 4.482078981176896e-06, "loss": 0.2797, "step": 9920 }, { "epoch": 0.46474914507893383, "grad_norm": 0.5979684564936323, "learning_rate": 4.481963394938636e-06, "loss": 0.3014, "step": 9921 }, { "epoch": 0.46479599006886213, "grad_norm": 0.621893583116383, "learning_rate": 4.481847797294673e-06, "loss": 0.2989, "step": 9922 }, { "epoch": 0.4648428350587905, "grad_norm": 0.5770340378389381, "learning_rate": 4.481732188245675e-06, "loss": 0.2717, "step": 9923 }, { "epoch": 0.4648896800487188, "grad_norm": 0.6487570577371196, "learning_rate": 4.481616567792306e-06, "loss": 0.3113, "step": 9924 }, { "epoch": 0.46493652503864713, "grad_norm": 0.5964423300559654, "learning_rate": 4.481500935935232e-06, "loss": 0.2883, "step": 9925 }, { "epoch": 0.4649833700285754, "grad_norm": 0.6142144326025804, "learning_rate": 4.481385292675118e-06, "loss": 0.3002, "step": 9926 }, { "epoch": 0.4650302150185038, "grad_norm": 0.5889231136878037, "learning_rate": 4.48126963801263e-06, "loss": 0.2786, "step": 9927 }, { "epoch": 0.4650770600084321, "grad_norm": 0.5634614977979021, "learning_rate": 4.4811539719484325e-06, "loss": 0.2775, "step": 9928 }, { "epoch": 0.4651239049983604, "grad_norm": 0.5874700326730417, "learning_rate": 4.481038294483192e-06, "loss": 0.2955, "step": 9929 }, { "epoch": 0.4651707499882888, "grad_norm": 0.6219730210238898, "learning_rate": 4.480922605617575e-06, "loss": 0.3096, "step": 9930 }, { "epoch": 0.4652175949782171, "grad_norm": 0.5949268885455244, "learning_rate": 4.480806905352245e-06, "loss": 0.2976, "step": 9931 }, { "epoch": 0.4652644399681454, "grad_norm": 0.6242972555190136, "learning_rate": 4.48069119368787e-06, "loss": 0.2813, "step": 9932 }, { "epoch": 0.4653112849580737, "grad_norm": 0.5517608868510971, "learning_rate": 4.480575470625115e-06, "loss": 0.2862, "step": 9933 }, { "epoch": 0.4653581299480021, "grad_norm": 0.618543148702215, "learning_rate": 4.480459736164645e-06, "loss": 0.312, "step": 9934 }, { "epoch": 0.46540497493793037, "grad_norm": 0.6435864932672064, "learning_rate": 4.480343990307128e-06, "loss": 0.3008, "step": 9935 }, { "epoch": 0.4654518199278587, "grad_norm": 0.623357047332773, "learning_rate": 4.4802282330532285e-06, "loss": 0.2988, "step": 9936 }, { "epoch": 0.465498664917787, "grad_norm": 0.642656500317996, "learning_rate": 4.480112464403614e-06, "loss": 0.3089, "step": 9937 }, { "epoch": 0.46554550990771537, "grad_norm": 0.6837285585628486, "learning_rate": 4.479996684358949e-06, "loss": 0.3027, "step": 9938 }, { "epoch": 0.4655923548976437, "grad_norm": 0.6003282039661413, "learning_rate": 4.479880892919901e-06, "loss": 0.2753, "step": 9939 }, { "epoch": 0.465639199887572, "grad_norm": 0.5797828744142962, "learning_rate": 4.479765090087136e-06, "loss": 0.2783, "step": 9940 }, { "epoch": 0.46568604487750037, "grad_norm": 0.6554920287176954, "learning_rate": 4.479649275861321e-06, "loss": 0.2944, "step": 9941 }, { "epoch": 0.46573288986742867, "grad_norm": 0.5788068584031911, "learning_rate": 4.479533450243122e-06, "loss": 0.299, "step": 9942 }, { "epoch": 0.465779734857357, "grad_norm": 0.5928714845931936, "learning_rate": 4.479417613233205e-06, "loss": 0.2738, "step": 9943 }, { "epoch": 0.4658265798472853, "grad_norm": 0.5717916815953822, "learning_rate": 4.4793017648322375e-06, "loss": 0.2891, "step": 9944 }, { "epoch": 0.46587342483721367, "grad_norm": 0.6086051104909284, "learning_rate": 4.479185905040886e-06, "loss": 0.2908, "step": 9945 }, { "epoch": 0.46592026982714196, "grad_norm": 0.6594738680821062, "learning_rate": 4.479070033859817e-06, "loss": 0.3152, "step": 9946 }, { "epoch": 0.4659671148170703, "grad_norm": 0.6433991764091077, "learning_rate": 4.478954151289697e-06, "loss": 0.3196, "step": 9947 }, { "epoch": 0.46601395980699867, "grad_norm": 0.5975260762902878, "learning_rate": 4.478838257331193e-06, "loss": 0.294, "step": 9948 }, { "epoch": 0.46606080479692696, "grad_norm": 0.7354351970075191, "learning_rate": 4.478722351984973e-06, "loss": 0.2961, "step": 9949 }, { "epoch": 0.4661076497868553, "grad_norm": 0.5595403749960216, "learning_rate": 4.478606435251702e-06, "loss": 0.2719, "step": 9950 }, { "epoch": 0.4661544947767836, "grad_norm": 0.6109100494081812, "learning_rate": 4.47849050713205e-06, "loss": 0.2894, "step": 9951 }, { "epoch": 0.46620133976671196, "grad_norm": 0.6495901915377411, "learning_rate": 4.478374567626681e-06, "loss": 0.3115, "step": 9952 }, { "epoch": 0.46624818475664026, "grad_norm": 0.5958137260837356, "learning_rate": 4.478258616736264e-06, "loss": 0.2891, "step": 9953 }, { "epoch": 0.4662950297465686, "grad_norm": 0.6066321875720345, "learning_rate": 4.4781426544614656e-06, "loss": 0.2861, "step": 9954 }, { "epoch": 0.4663418747364969, "grad_norm": 0.5938779095627695, "learning_rate": 4.4780266808029546e-06, "loss": 0.2911, "step": 9955 }, { "epoch": 0.46638871972642526, "grad_norm": 0.5986618123242292, "learning_rate": 4.477910695761396e-06, "loss": 0.2821, "step": 9956 }, { "epoch": 0.4664355647163536, "grad_norm": 0.6594455520776724, "learning_rate": 4.477794699337459e-06, "loss": 0.2714, "step": 9957 }, { "epoch": 0.4664824097062819, "grad_norm": 0.6088922662612455, "learning_rate": 4.477678691531811e-06, "loss": 0.3018, "step": 9958 }, { "epoch": 0.46652925469621026, "grad_norm": 0.6513986287112926, "learning_rate": 4.477562672345118e-06, "loss": 0.294, "step": 9959 }, { "epoch": 0.46657609968613856, "grad_norm": 0.556470606483092, "learning_rate": 4.4774466417780495e-06, "loss": 0.2792, "step": 9960 }, { "epoch": 0.4666229446760669, "grad_norm": 0.5549774440900717, "learning_rate": 4.477330599831273e-06, "loss": 0.2886, "step": 9961 }, { "epoch": 0.4666697896659952, "grad_norm": 0.5900212401815718, "learning_rate": 4.477214546505455e-06, "loss": 0.2847, "step": 9962 }, { "epoch": 0.46671663465592356, "grad_norm": 0.6174451653876495, "learning_rate": 4.477098481801265e-06, "loss": 0.2895, "step": 9963 }, { "epoch": 0.46676347964585185, "grad_norm": 0.5962493198243363, "learning_rate": 4.47698240571937e-06, "loss": 0.2806, "step": 9964 }, { "epoch": 0.4668103246357802, "grad_norm": 0.5925605099024367, "learning_rate": 4.4768663182604375e-06, "loss": 0.2899, "step": 9965 }, { "epoch": 0.46685716962570856, "grad_norm": 0.5971769343405965, "learning_rate": 4.476750219425137e-06, "loss": 0.2815, "step": 9966 }, { "epoch": 0.46690401461563685, "grad_norm": 0.5866138699787343, "learning_rate": 4.4766341092141354e-06, "loss": 0.2828, "step": 9967 }, { "epoch": 0.4669508596055652, "grad_norm": 0.5901096266191356, "learning_rate": 4.4765179876281016e-06, "loss": 0.2916, "step": 9968 }, { "epoch": 0.4669977045954935, "grad_norm": 0.5973894140311794, "learning_rate": 4.4764018546677036e-06, "loss": 0.298, "step": 9969 }, { "epoch": 0.46704454958542185, "grad_norm": 0.557508620327156, "learning_rate": 4.47628571033361e-06, "loss": 0.2719, "step": 9970 }, { "epoch": 0.46709139457535015, "grad_norm": 0.5877398604093397, "learning_rate": 4.476169554626488e-06, "loss": 0.2763, "step": 9971 }, { "epoch": 0.4671382395652785, "grad_norm": 0.5983454786701395, "learning_rate": 4.476053387547007e-06, "loss": 0.2842, "step": 9972 }, { "epoch": 0.4671850845552068, "grad_norm": 0.6358488318081298, "learning_rate": 4.475937209095836e-06, "loss": 0.2969, "step": 9973 }, { "epoch": 0.46723192954513515, "grad_norm": 0.6086526978864839, "learning_rate": 4.475821019273643e-06, "loss": 0.2799, "step": 9974 }, { "epoch": 0.4672787745350635, "grad_norm": 0.5708230201770427, "learning_rate": 4.4757048180810955e-06, "loss": 0.2894, "step": 9975 }, { "epoch": 0.4673256195249918, "grad_norm": 0.5768486880024495, "learning_rate": 4.4755886055188645e-06, "loss": 0.2871, "step": 9976 }, { "epoch": 0.46737246451492015, "grad_norm": 0.5803344727328237, "learning_rate": 4.4754723815876175e-06, "loss": 0.281, "step": 9977 }, { "epoch": 0.46741930950484845, "grad_norm": 0.5795793509319618, "learning_rate": 4.475356146288024e-06, "loss": 0.2818, "step": 9978 }, { "epoch": 0.4674661544947768, "grad_norm": 0.5714874030619884, "learning_rate": 4.4752398996207514e-06, "loss": 0.2739, "step": 9979 }, { "epoch": 0.4675129994847051, "grad_norm": 0.6783180246742124, "learning_rate": 4.47512364158647e-06, "loss": 0.2817, "step": 9980 }, { "epoch": 0.46755984447463345, "grad_norm": 0.6175294578456527, "learning_rate": 4.475007372185848e-06, "loss": 0.3002, "step": 9981 }, { "epoch": 0.46760668946456174, "grad_norm": 0.5580996342035534, "learning_rate": 4.474891091419555e-06, "loss": 0.2811, "step": 9982 }, { "epoch": 0.4676535344544901, "grad_norm": 0.5466993580977497, "learning_rate": 4.47477479928826e-06, "loss": 0.282, "step": 9983 }, { "epoch": 0.46770037944441845, "grad_norm": 0.5595826045647448, "learning_rate": 4.474658495792633e-06, "loss": 0.2919, "step": 9984 }, { "epoch": 0.46774722443434674, "grad_norm": 0.6281707267165335, "learning_rate": 4.4745421809333424e-06, "loss": 0.2972, "step": 9985 }, { "epoch": 0.4677940694242751, "grad_norm": 0.5319419605141187, "learning_rate": 4.474425854711059e-06, "loss": 0.2731, "step": 9986 }, { "epoch": 0.4678409144142034, "grad_norm": 0.6421395829996586, "learning_rate": 4.4743095171264495e-06, "loss": 0.3181, "step": 9987 }, { "epoch": 0.46788775940413174, "grad_norm": 0.6025293405502004, "learning_rate": 4.474193168180185e-06, "loss": 0.3043, "step": 9988 }, { "epoch": 0.46793460439406004, "grad_norm": 0.5683719681805611, "learning_rate": 4.4740768078729355e-06, "loss": 0.2733, "step": 9989 }, { "epoch": 0.4679814493839884, "grad_norm": 0.6271308933149244, "learning_rate": 4.47396043620537e-06, "loss": 0.2864, "step": 9990 }, { "epoch": 0.4680282943739167, "grad_norm": 0.6333864849972753, "learning_rate": 4.473844053178159e-06, "loss": 0.3041, "step": 9991 }, { "epoch": 0.46807513936384504, "grad_norm": 0.6405186142940908, "learning_rate": 4.473727658791971e-06, "loss": 0.2922, "step": 9992 }, { "epoch": 0.4681219843537734, "grad_norm": 0.5764088789252599, "learning_rate": 4.473611253047476e-06, "loss": 0.2834, "step": 9993 }, { "epoch": 0.4681688293437017, "grad_norm": 0.6247354413589774, "learning_rate": 4.473494835945344e-06, "loss": 0.3048, "step": 9994 }, { "epoch": 0.46821567433363004, "grad_norm": 0.6053594971123629, "learning_rate": 4.473378407486246e-06, "loss": 0.3127, "step": 9995 }, { "epoch": 0.46826251932355833, "grad_norm": 0.581019890805408, "learning_rate": 4.4732619676708524e-06, "loss": 0.2807, "step": 9996 }, { "epoch": 0.4683093643134867, "grad_norm": 0.6350234217488573, "learning_rate": 4.473145516499831e-06, "loss": 0.2948, "step": 9997 }, { "epoch": 0.468356209303415, "grad_norm": 0.6139063876994338, "learning_rate": 4.473029053973852e-06, "loss": 0.3166, "step": 9998 }, { "epoch": 0.46840305429334333, "grad_norm": 0.5925668876586535, "learning_rate": 4.472912580093588e-06, "loss": 0.306, "step": 9999 }, { "epoch": 0.46844989928327163, "grad_norm": 0.5754499459374639, "learning_rate": 4.472796094859707e-06, "loss": 0.2756, "step": 10000 }, { "epoch": 0.4684967442732, "grad_norm": 0.5747768450887732, "learning_rate": 4.472679598272881e-06, "loss": 0.2781, "step": 10001 }, { "epoch": 0.46854358926312833, "grad_norm": 0.5576488230624447, "learning_rate": 4.47256309033378e-06, "loss": 0.2806, "step": 10002 }, { "epoch": 0.46859043425305663, "grad_norm": 0.576306796437993, "learning_rate": 4.472446571043074e-06, "loss": 0.2826, "step": 10003 }, { "epoch": 0.468637279242985, "grad_norm": 0.5605668955642369, "learning_rate": 4.4723300404014335e-06, "loss": 0.2816, "step": 10004 }, { "epoch": 0.4686841242329133, "grad_norm": 0.6227659148485529, "learning_rate": 4.47221349840953e-06, "loss": 0.2822, "step": 10005 }, { "epoch": 0.46873096922284163, "grad_norm": 0.6684889196054119, "learning_rate": 4.472096945068033e-06, "loss": 0.2936, "step": 10006 }, { "epoch": 0.4687778142127699, "grad_norm": 0.6035797310498127, "learning_rate": 4.471980380377613e-06, "loss": 0.2717, "step": 10007 }, { "epoch": 0.4688246592026983, "grad_norm": 0.5960278858586311, "learning_rate": 4.471863804338943e-06, "loss": 0.2899, "step": 10008 }, { "epoch": 0.4688715041926266, "grad_norm": 0.5794547012564639, "learning_rate": 4.471747216952692e-06, "loss": 0.3081, "step": 10009 }, { "epoch": 0.4689183491825549, "grad_norm": 0.5260952879294899, "learning_rate": 4.471630618219531e-06, "loss": 0.2585, "step": 10010 }, { "epoch": 0.4689651941724833, "grad_norm": 0.6005142509605415, "learning_rate": 4.471514008140131e-06, "loss": 0.2896, "step": 10011 }, { "epoch": 0.4690120391624116, "grad_norm": 0.605382241402844, "learning_rate": 4.471397386715164e-06, "loss": 0.3044, "step": 10012 }, { "epoch": 0.4690588841523399, "grad_norm": 0.5783600614038443, "learning_rate": 4.4712807539453004e-06, "loss": 0.2862, "step": 10013 }, { "epoch": 0.4691057291422682, "grad_norm": 0.6767369555932453, "learning_rate": 4.471164109831211e-06, "loss": 0.2847, "step": 10014 }, { "epoch": 0.4691525741321966, "grad_norm": 0.6878758384366906, "learning_rate": 4.471047454373568e-06, "loss": 0.2821, "step": 10015 }, { "epoch": 0.46919941912212487, "grad_norm": 0.5758281550098973, "learning_rate": 4.470930787573042e-06, "loss": 0.2785, "step": 10016 }, { "epoch": 0.4692462641120532, "grad_norm": 0.5681411516598498, "learning_rate": 4.470814109430306e-06, "loss": 0.2856, "step": 10017 }, { "epoch": 0.4692931091019815, "grad_norm": 0.5709379516403466, "learning_rate": 4.4706974199460285e-06, "loss": 0.2771, "step": 10018 }, { "epoch": 0.4693399540919099, "grad_norm": 0.6496513656123256, "learning_rate": 4.4705807191208835e-06, "loss": 0.291, "step": 10019 }, { "epoch": 0.4693867990818382, "grad_norm": 0.6646576134492549, "learning_rate": 4.470464006955542e-06, "loss": 0.2936, "step": 10020 }, { "epoch": 0.4694336440717665, "grad_norm": 0.6146258920120939, "learning_rate": 4.470347283450675e-06, "loss": 0.2968, "step": 10021 }, { "epoch": 0.4694804890616949, "grad_norm": 0.6123120931707648, "learning_rate": 4.4702305486069544e-06, "loss": 0.2911, "step": 10022 }, { "epoch": 0.46952733405162317, "grad_norm": 0.6031816880156043, "learning_rate": 4.470113802425053e-06, "loss": 0.2781, "step": 10023 }, { "epoch": 0.4695741790415515, "grad_norm": 0.5951742668700652, "learning_rate": 4.46999704490564e-06, "loss": 0.2786, "step": 10024 }, { "epoch": 0.4696210240314798, "grad_norm": 0.5897531427377855, "learning_rate": 4.469880276049391e-06, "loss": 0.284, "step": 10025 }, { "epoch": 0.46966786902140817, "grad_norm": 0.6109755921773926, "learning_rate": 4.469763495856976e-06, "loss": 0.2833, "step": 10026 }, { "epoch": 0.46971471401133646, "grad_norm": 0.6200351801473512, "learning_rate": 4.469646704329066e-06, "loss": 0.2963, "step": 10027 }, { "epoch": 0.4697615590012648, "grad_norm": 0.598103924214478, "learning_rate": 4.469529901466335e-06, "loss": 0.2845, "step": 10028 }, { "epoch": 0.46980840399119317, "grad_norm": 0.5831398996715301, "learning_rate": 4.469413087269454e-06, "loss": 0.2802, "step": 10029 }, { "epoch": 0.46985524898112146, "grad_norm": 0.5832844845385788, "learning_rate": 4.469296261739097e-06, "loss": 0.2828, "step": 10030 }, { "epoch": 0.4699020939710498, "grad_norm": 0.5680125215133927, "learning_rate": 4.469179424875933e-06, "loss": 0.2779, "step": 10031 }, { "epoch": 0.4699489389609781, "grad_norm": 0.6204274948506032, "learning_rate": 4.469062576680638e-06, "loss": 0.3106, "step": 10032 }, { "epoch": 0.46999578395090646, "grad_norm": 0.5921114674553006, "learning_rate": 4.4689457171538816e-06, "loss": 0.2626, "step": 10033 }, { "epoch": 0.47004262894083476, "grad_norm": 0.6014819036358873, "learning_rate": 4.468828846296339e-06, "loss": 0.2842, "step": 10034 }, { "epoch": 0.4700894739307631, "grad_norm": 0.6174389955355921, "learning_rate": 4.468711964108679e-06, "loss": 0.2925, "step": 10035 }, { "epoch": 0.4701363189206914, "grad_norm": 0.6058809532527851, "learning_rate": 4.468595070591579e-06, "loss": 0.2979, "step": 10036 }, { "epoch": 0.47018316391061976, "grad_norm": 0.620881484338299, "learning_rate": 4.4684781657457074e-06, "loss": 0.2794, "step": 10037 }, { "epoch": 0.4702300089005481, "grad_norm": 0.6267283861077745, "learning_rate": 4.468361249571739e-06, "loss": 0.2871, "step": 10038 }, { "epoch": 0.4702768538904764, "grad_norm": 0.6660435153963283, "learning_rate": 4.468244322070347e-06, "loss": 0.283, "step": 10039 }, { "epoch": 0.47032369888040476, "grad_norm": 0.6015721205092446, "learning_rate": 4.4681273832422035e-06, "loss": 0.299, "step": 10040 }, { "epoch": 0.47037054387033306, "grad_norm": 0.5846334987996283, "learning_rate": 4.468010433087981e-06, "loss": 0.2711, "step": 10041 }, { "epoch": 0.4704173888602614, "grad_norm": 0.6272659670751697, "learning_rate": 4.467893471608353e-06, "loss": 0.2874, "step": 10042 }, { "epoch": 0.4704642338501897, "grad_norm": 0.6336999271497756, "learning_rate": 4.467776498803993e-06, "loss": 0.2853, "step": 10043 }, { "epoch": 0.47051107884011806, "grad_norm": 0.6601734970093801, "learning_rate": 4.467659514675574e-06, "loss": 0.3141, "step": 10044 }, { "epoch": 0.47055792383004635, "grad_norm": 0.5968966526365903, "learning_rate": 4.467542519223769e-06, "loss": 0.3078, "step": 10045 }, { "epoch": 0.4706047688199747, "grad_norm": 0.6155058248714216, "learning_rate": 4.467425512449252e-06, "loss": 0.2951, "step": 10046 }, { "epoch": 0.47065161380990306, "grad_norm": 0.5989315540912072, "learning_rate": 4.467308494352694e-06, "loss": 0.2689, "step": 10047 }, { "epoch": 0.47069845879983135, "grad_norm": 0.5760495045031275, "learning_rate": 4.467191464934772e-06, "loss": 0.2847, "step": 10048 }, { "epoch": 0.4707453037897597, "grad_norm": 0.6727756993263567, "learning_rate": 4.467074424196155e-06, "loss": 0.3063, "step": 10049 }, { "epoch": 0.470792148779688, "grad_norm": 0.5674061371190405, "learning_rate": 4.4669573721375214e-06, "loss": 0.2698, "step": 10050 }, { "epoch": 0.47083899376961635, "grad_norm": 0.6513094072368525, "learning_rate": 4.4668403087595415e-06, "loss": 0.3028, "step": 10051 }, { "epoch": 0.47088583875954465, "grad_norm": 0.5729849802341241, "learning_rate": 4.466723234062891e-06, "loss": 0.2655, "step": 10052 }, { "epoch": 0.470932683749473, "grad_norm": 0.5842023472962642, "learning_rate": 4.466606148048241e-06, "loss": 0.2893, "step": 10053 }, { "epoch": 0.4709795287394013, "grad_norm": 0.6395228689667967, "learning_rate": 4.466489050716268e-06, "loss": 0.2992, "step": 10054 }, { "epoch": 0.47102637372932965, "grad_norm": 0.632713635273358, "learning_rate": 4.466371942067644e-06, "loss": 0.2814, "step": 10055 }, { "epoch": 0.471073218719258, "grad_norm": 0.615578081266522, "learning_rate": 4.466254822103045e-06, "loss": 0.3112, "step": 10056 }, { "epoch": 0.4711200637091863, "grad_norm": 0.5461623107886433, "learning_rate": 4.466137690823142e-06, "loss": 0.2599, "step": 10057 }, { "epoch": 0.47116690869911465, "grad_norm": 0.6295308123184475, "learning_rate": 4.466020548228612e-06, "loss": 0.2826, "step": 10058 }, { "epoch": 0.47121375368904295, "grad_norm": 0.6046624995799929, "learning_rate": 4.465903394320128e-06, "loss": 0.2806, "step": 10059 }, { "epoch": 0.4712605986789713, "grad_norm": 0.5866529303281521, "learning_rate": 4.465786229098363e-06, "loss": 0.292, "step": 10060 }, { "epoch": 0.4713074436688996, "grad_norm": 0.6266848073923104, "learning_rate": 4.465669052563994e-06, "loss": 0.2963, "step": 10061 }, { "epoch": 0.47135428865882795, "grad_norm": 0.6418532347808489, "learning_rate": 4.465551864717692e-06, "loss": 0.3078, "step": 10062 }, { "epoch": 0.47140113364875624, "grad_norm": 0.6066545381612557, "learning_rate": 4.465434665560133e-06, "loss": 0.2931, "step": 10063 }, { "epoch": 0.4714479786386846, "grad_norm": 0.6573202203613208, "learning_rate": 4.465317455091992e-06, "loss": 0.2518, "step": 10064 }, { "epoch": 0.47149482362861295, "grad_norm": 0.5941103558744459, "learning_rate": 4.465200233313943e-06, "loss": 0.2877, "step": 10065 }, { "epoch": 0.47154166861854124, "grad_norm": 0.5666458725742679, "learning_rate": 4.465083000226661e-06, "loss": 0.2881, "step": 10066 }, { "epoch": 0.4715885136084696, "grad_norm": 0.5962250927027322, "learning_rate": 4.46496575583082e-06, "loss": 0.2681, "step": 10067 }, { "epoch": 0.4716353585983979, "grad_norm": 0.5761279557920082, "learning_rate": 4.464848500127095e-06, "loss": 0.2828, "step": 10068 }, { "epoch": 0.47168220358832624, "grad_norm": 0.5645197786231875, "learning_rate": 4.464731233116161e-06, "loss": 0.2855, "step": 10069 }, { "epoch": 0.47172904857825454, "grad_norm": 0.6579793823972099, "learning_rate": 4.464613954798692e-06, "loss": 0.2931, "step": 10070 }, { "epoch": 0.4717758935681829, "grad_norm": 0.6059670333790216, "learning_rate": 4.464496665175363e-06, "loss": 0.2744, "step": 10071 }, { "epoch": 0.4718227385581112, "grad_norm": 0.5715193862814307, "learning_rate": 4.464379364246851e-06, "loss": 0.2673, "step": 10072 }, { "epoch": 0.47186958354803954, "grad_norm": 0.5741572783896196, "learning_rate": 4.464262052013828e-06, "loss": 0.2926, "step": 10073 }, { "epoch": 0.4719164285379679, "grad_norm": 0.6649384302023867, "learning_rate": 4.464144728476971e-06, "loss": 0.3144, "step": 10074 }, { "epoch": 0.4719632735278962, "grad_norm": 0.5938914490035172, "learning_rate": 4.464027393636956e-06, "loss": 0.2771, "step": 10075 }, { "epoch": 0.47201011851782454, "grad_norm": 0.5972330185738819, "learning_rate": 4.463910047494455e-06, "loss": 0.2975, "step": 10076 }, { "epoch": 0.47205696350775284, "grad_norm": 0.6012246470574742, "learning_rate": 4.463792690050147e-06, "loss": 0.28, "step": 10077 }, { "epoch": 0.4721038084976812, "grad_norm": 0.569165722726075, "learning_rate": 4.463675321304705e-06, "loss": 0.2959, "step": 10078 }, { "epoch": 0.4721506534876095, "grad_norm": 0.5752193907205789, "learning_rate": 4.463557941258805e-06, "loss": 0.2978, "step": 10079 }, { "epoch": 0.47219749847753784, "grad_norm": 0.5449419776033853, "learning_rate": 4.463440549913123e-06, "loss": 0.2509, "step": 10080 }, { "epoch": 0.47224434346746613, "grad_norm": 0.6082009845156267, "learning_rate": 4.463323147268333e-06, "loss": 0.2919, "step": 10081 }, { "epoch": 0.4722911884573945, "grad_norm": 0.6261055313733933, "learning_rate": 4.463205733325112e-06, "loss": 0.2897, "step": 10082 }, { "epoch": 0.47233803344732284, "grad_norm": 0.5882828453403555, "learning_rate": 4.463088308084136e-06, "loss": 0.2736, "step": 10083 }, { "epoch": 0.47238487843725113, "grad_norm": 0.5267759454159758, "learning_rate": 4.46297087154608e-06, "loss": 0.2684, "step": 10084 }, { "epoch": 0.4724317234271795, "grad_norm": 0.5856739572128303, "learning_rate": 4.4628534237116205e-06, "loss": 0.2828, "step": 10085 }, { "epoch": 0.4724785684171078, "grad_norm": 0.6272026348786263, "learning_rate": 4.462735964581432e-06, "loss": 0.2858, "step": 10086 }, { "epoch": 0.47252541340703613, "grad_norm": 0.547381625564227, "learning_rate": 4.462618494156193e-06, "loss": 0.2665, "step": 10087 }, { "epoch": 0.47257225839696443, "grad_norm": 0.6559578560002726, "learning_rate": 4.462501012436575e-06, "loss": 0.3092, "step": 10088 }, { "epoch": 0.4726191033868928, "grad_norm": 0.580873303821446, "learning_rate": 4.462383519423259e-06, "loss": 0.3003, "step": 10089 }, { "epoch": 0.4726659483768211, "grad_norm": 0.6780324923974057, "learning_rate": 4.462266015116918e-06, "loss": 0.3241, "step": 10090 }, { "epoch": 0.47271279336674943, "grad_norm": 0.6285654198149678, "learning_rate": 4.462148499518231e-06, "loss": 0.297, "step": 10091 }, { "epoch": 0.4727596383566778, "grad_norm": 0.5720746278744915, "learning_rate": 4.462030972627871e-06, "loss": 0.2576, "step": 10092 }, { "epoch": 0.4728064833466061, "grad_norm": 0.6832617849342189, "learning_rate": 4.461913434446517e-06, "loss": 0.2986, "step": 10093 }, { "epoch": 0.47285332833653443, "grad_norm": 0.6229314199440901, "learning_rate": 4.4617958849748435e-06, "loss": 0.3113, "step": 10094 }, { "epoch": 0.4729001733264627, "grad_norm": 0.6305310993076149, "learning_rate": 4.461678324213527e-06, "loss": 0.2951, "step": 10095 }, { "epoch": 0.4729470183163911, "grad_norm": 0.5965713465264006, "learning_rate": 4.461560752163247e-06, "loss": 0.2866, "step": 10096 }, { "epoch": 0.4729938633063194, "grad_norm": 0.6189890813682262, "learning_rate": 4.4614431688246764e-06, "loss": 0.279, "step": 10097 }, { "epoch": 0.4730407082962477, "grad_norm": 0.5694331540589717, "learning_rate": 4.461325574198493e-06, "loss": 0.2747, "step": 10098 }, { "epoch": 0.473087553286176, "grad_norm": 0.5957679197993401, "learning_rate": 4.4612079682853746e-06, "loss": 0.2812, "step": 10099 }, { "epoch": 0.4731343982761044, "grad_norm": 0.6188963035560959, "learning_rate": 4.461090351085997e-06, "loss": 0.2892, "step": 10100 }, { "epoch": 0.4731812432660327, "grad_norm": 0.6149102754634475, "learning_rate": 4.460972722601038e-06, "loss": 0.2965, "step": 10101 }, { "epoch": 0.473228088255961, "grad_norm": 0.6123785125448672, "learning_rate": 4.4608550828311725e-06, "loss": 0.2962, "step": 10102 }, { "epoch": 0.4732749332458894, "grad_norm": 0.5900890847399101, "learning_rate": 4.46073743177708e-06, "loss": 0.2916, "step": 10103 }, { "epoch": 0.47332177823581767, "grad_norm": 0.703000477476826, "learning_rate": 4.460619769439436e-06, "loss": 0.3014, "step": 10104 }, { "epoch": 0.473368623225746, "grad_norm": 0.6514433860976241, "learning_rate": 4.460502095818919e-06, "loss": 0.2916, "step": 10105 }, { "epoch": 0.4734154682156743, "grad_norm": 0.6105127507842908, "learning_rate": 4.460384410916203e-06, "loss": 0.291, "step": 10106 }, { "epoch": 0.47346231320560267, "grad_norm": 0.6226222532433554, "learning_rate": 4.460266714731969e-06, "loss": 0.2866, "step": 10107 }, { "epoch": 0.47350915819553097, "grad_norm": 0.6453340588621718, "learning_rate": 4.460149007266893e-06, "loss": 0.3049, "step": 10108 }, { "epoch": 0.4735560031854593, "grad_norm": 0.6785211595040312, "learning_rate": 4.460031288521651e-06, "loss": 0.3171, "step": 10109 }, { "epoch": 0.47360284817538767, "grad_norm": 0.635532020629571, "learning_rate": 4.459913558496923e-06, "loss": 0.2924, "step": 10110 }, { "epoch": 0.47364969316531597, "grad_norm": 0.6392664747841503, "learning_rate": 4.459795817193383e-06, "loss": 0.2995, "step": 10111 }, { "epoch": 0.4736965381552443, "grad_norm": 0.6242900015851515, "learning_rate": 4.459678064611712e-06, "loss": 0.2822, "step": 10112 }, { "epoch": 0.4737433831451726, "grad_norm": 0.6243715252718982, "learning_rate": 4.459560300752586e-06, "loss": 0.2819, "step": 10113 }, { "epoch": 0.47379022813510097, "grad_norm": 0.640404343060409, "learning_rate": 4.459442525616683e-06, "loss": 0.3045, "step": 10114 }, { "epoch": 0.47383707312502926, "grad_norm": 0.627007771339414, "learning_rate": 4.459324739204681e-06, "loss": 0.284, "step": 10115 }, { "epoch": 0.4738839181149576, "grad_norm": 0.6063319338604503, "learning_rate": 4.459206941517258e-06, "loss": 0.2825, "step": 10116 }, { "epoch": 0.4739307631048859, "grad_norm": 0.6506500677057911, "learning_rate": 4.4590891325550905e-06, "loss": 0.2985, "step": 10117 }, { "epoch": 0.47397760809481426, "grad_norm": 0.6115637874389095, "learning_rate": 4.458971312318858e-06, "loss": 0.3022, "step": 10118 }, { "epoch": 0.4740244530847426, "grad_norm": 0.562081387333904, "learning_rate": 4.458853480809238e-06, "loss": 0.264, "step": 10119 }, { "epoch": 0.4740712980746709, "grad_norm": 0.6214751872332828, "learning_rate": 4.458735638026908e-06, "loss": 0.2895, "step": 10120 }, { "epoch": 0.47411814306459926, "grad_norm": 0.605700061587971, "learning_rate": 4.458617783972548e-06, "loss": 0.2751, "step": 10121 }, { "epoch": 0.47416498805452756, "grad_norm": 0.6433753182601296, "learning_rate": 4.458499918646834e-06, "loss": 0.3205, "step": 10122 }, { "epoch": 0.4742118330444559, "grad_norm": 0.6420737773784949, "learning_rate": 4.4583820420504444e-06, "loss": 0.3026, "step": 10123 }, { "epoch": 0.4742586780343842, "grad_norm": 0.6061960132976972, "learning_rate": 4.458264154184059e-06, "loss": 0.3058, "step": 10124 }, { "epoch": 0.47430552302431256, "grad_norm": 0.5596529503766344, "learning_rate": 4.458146255048357e-06, "loss": 0.2904, "step": 10125 }, { "epoch": 0.47435236801424085, "grad_norm": 0.5895446912915351, "learning_rate": 4.458028344644013e-06, "loss": 0.2914, "step": 10126 }, { "epoch": 0.4743992130041692, "grad_norm": 0.6152159907381956, "learning_rate": 4.4579104229717094e-06, "loss": 0.2895, "step": 10127 }, { "epoch": 0.47444605799409756, "grad_norm": 0.666079071710392, "learning_rate": 4.457792490032123e-06, "loss": 0.2841, "step": 10128 }, { "epoch": 0.47449290298402585, "grad_norm": 0.6452027224088824, "learning_rate": 4.457674545825934e-06, "loss": 0.2981, "step": 10129 }, { "epoch": 0.4745397479739542, "grad_norm": 0.5795360695760182, "learning_rate": 4.457556590353819e-06, "loss": 0.2942, "step": 10130 }, { "epoch": 0.4745865929638825, "grad_norm": 0.548850786728285, "learning_rate": 4.457438623616458e-06, "loss": 0.271, "step": 10131 }, { "epoch": 0.47463343795381085, "grad_norm": 0.58150921397782, "learning_rate": 4.45732064561453e-06, "loss": 0.2938, "step": 10132 }, { "epoch": 0.47468028294373915, "grad_norm": 0.6329450497336861, "learning_rate": 4.457202656348714e-06, "loss": 0.2982, "step": 10133 }, { "epoch": 0.4747271279336675, "grad_norm": 0.6027882105681782, "learning_rate": 4.457084655819687e-06, "loss": 0.3051, "step": 10134 }, { "epoch": 0.4747739729235958, "grad_norm": 0.576000987316435, "learning_rate": 4.456966644028131e-06, "loss": 0.2945, "step": 10135 }, { "epoch": 0.47482081791352415, "grad_norm": 0.674037450607678, "learning_rate": 4.456848620974724e-06, "loss": 0.3066, "step": 10136 }, { "epoch": 0.4748676629034525, "grad_norm": 0.6017140186672761, "learning_rate": 4.456730586660144e-06, "loss": 0.2734, "step": 10137 }, { "epoch": 0.4749145078933808, "grad_norm": 0.611617480205596, "learning_rate": 4.456612541085072e-06, "loss": 0.3046, "step": 10138 }, { "epoch": 0.47496135288330915, "grad_norm": 0.6111242040693126, "learning_rate": 4.456494484250187e-06, "loss": 0.2971, "step": 10139 }, { "epoch": 0.47500819787323745, "grad_norm": 0.6007590046062454, "learning_rate": 4.456376416156168e-06, "loss": 0.2898, "step": 10140 }, { "epoch": 0.4750550428631658, "grad_norm": 0.6102392659434246, "learning_rate": 4.456258336803693e-06, "loss": 0.3065, "step": 10141 }, { "epoch": 0.4751018878530941, "grad_norm": 0.604462654417268, "learning_rate": 4.456140246193444e-06, "loss": 0.2894, "step": 10142 }, { "epoch": 0.47514873284302245, "grad_norm": 0.5779758505744677, "learning_rate": 4.456022144326099e-06, "loss": 0.2771, "step": 10143 }, { "epoch": 0.47519557783295074, "grad_norm": 0.645750336094592, "learning_rate": 4.455904031202339e-06, "loss": 0.2946, "step": 10144 }, { "epoch": 0.4752424228228791, "grad_norm": 0.612053544963038, "learning_rate": 4.455785906822843e-06, "loss": 0.2839, "step": 10145 }, { "epoch": 0.47528926781280745, "grad_norm": 0.6592753911589371, "learning_rate": 4.455667771188289e-06, "loss": 0.3222, "step": 10146 }, { "epoch": 0.47533611280273574, "grad_norm": 0.5589462428422582, "learning_rate": 4.4555496242993605e-06, "loss": 0.2995, "step": 10147 }, { "epoch": 0.4753829577926641, "grad_norm": 0.5896191158102227, "learning_rate": 4.455431466156734e-06, "loss": 0.2959, "step": 10148 }, { "epoch": 0.4754298027825924, "grad_norm": 0.623114104344042, "learning_rate": 4.455313296761092e-06, "loss": 0.2731, "step": 10149 }, { "epoch": 0.47547664777252074, "grad_norm": 0.6052037683803521, "learning_rate": 4.4551951161131126e-06, "loss": 0.2908, "step": 10150 }, { "epoch": 0.47552349276244904, "grad_norm": 0.7089389890851443, "learning_rate": 4.455076924213477e-06, "loss": 0.3185, "step": 10151 }, { "epoch": 0.4755703377523774, "grad_norm": 0.6761983588088383, "learning_rate": 4.454958721062865e-06, "loss": 0.295, "step": 10152 }, { "epoch": 0.4756171827423057, "grad_norm": 0.5756989667138034, "learning_rate": 4.454840506661957e-06, "loss": 0.2645, "step": 10153 }, { "epoch": 0.47566402773223404, "grad_norm": 0.6368424007068979, "learning_rate": 4.454722281011434e-06, "loss": 0.3246, "step": 10154 }, { "epoch": 0.4757108727221624, "grad_norm": 0.6852248559085438, "learning_rate": 4.454604044111974e-06, "loss": 0.293, "step": 10155 }, { "epoch": 0.4757577177120907, "grad_norm": 0.5809090452530071, "learning_rate": 4.45448579596426e-06, "loss": 0.2847, "step": 10156 }, { "epoch": 0.47580456270201904, "grad_norm": 0.6265284794733995, "learning_rate": 4.454367536568972e-06, "loss": 0.3173, "step": 10157 }, { "epoch": 0.47585140769194734, "grad_norm": 0.5971039558588696, "learning_rate": 4.454249265926789e-06, "loss": 0.2699, "step": 10158 }, { "epoch": 0.4758982526818757, "grad_norm": 0.5541085144603646, "learning_rate": 4.454130984038393e-06, "loss": 0.2863, "step": 10159 }, { "epoch": 0.475945097671804, "grad_norm": 0.5945165931158037, "learning_rate": 4.4540126909044644e-06, "loss": 0.2624, "step": 10160 }, { "epoch": 0.47599194266173234, "grad_norm": 0.6081412866727784, "learning_rate": 4.453894386525684e-06, "loss": 0.2731, "step": 10161 }, { "epoch": 0.47603878765166063, "grad_norm": 0.5908475104970051, "learning_rate": 4.453776070902732e-06, "loss": 0.2857, "step": 10162 }, { "epoch": 0.476085632641589, "grad_norm": 0.6107277448326827, "learning_rate": 4.4536577440362905e-06, "loss": 0.2898, "step": 10163 }, { "epoch": 0.47613247763151734, "grad_norm": 0.6604862559049434, "learning_rate": 4.453539405927039e-06, "loss": 0.2952, "step": 10164 }, { "epoch": 0.47617932262144563, "grad_norm": 0.5898551121204664, "learning_rate": 4.453421056575659e-06, "loss": 0.3032, "step": 10165 }, { "epoch": 0.476226167611374, "grad_norm": 0.5638720612104449, "learning_rate": 4.453302695982833e-06, "loss": 0.2991, "step": 10166 }, { "epoch": 0.4762730126013023, "grad_norm": 0.58483341602883, "learning_rate": 4.45318432414924e-06, "loss": 0.2956, "step": 10167 }, { "epoch": 0.47631985759123063, "grad_norm": 0.6140504536764948, "learning_rate": 4.453065941075563e-06, "loss": 0.2963, "step": 10168 }, { "epoch": 0.47636670258115893, "grad_norm": 0.568819233643343, "learning_rate": 4.452947546762482e-06, "loss": 0.2755, "step": 10169 }, { "epoch": 0.4764135475710873, "grad_norm": 0.6160131769342697, "learning_rate": 4.452829141210678e-06, "loss": 0.2838, "step": 10170 }, { "epoch": 0.4764603925610156, "grad_norm": 0.5832869083499105, "learning_rate": 4.452710724420834e-06, "loss": 0.2914, "step": 10171 }, { "epoch": 0.47650723755094393, "grad_norm": 0.5668902743133455, "learning_rate": 4.452592296393629e-06, "loss": 0.2907, "step": 10172 }, { "epoch": 0.4765540825408723, "grad_norm": 0.5977584632528851, "learning_rate": 4.452473857129749e-06, "loss": 0.2975, "step": 10173 }, { "epoch": 0.4766009275308006, "grad_norm": 0.5937316977604746, "learning_rate": 4.45235540662987e-06, "loss": 0.265, "step": 10174 }, { "epoch": 0.47664777252072893, "grad_norm": 0.5874419407257919, "learning_rate": 4.452236944894678e-06, "loss": 0.2811, "step": 10175 }, { "epoch": 0.4766946175106572, "grad_norm": 0.6175161424535958, "learning_rate": 4.452118471924852e-06, "loss": 0.2921, "step": 10176 }, { "epoch": 0.4767414625005856, "grad_norm": 0.5691734907990732, "learning_rate": 4.451999987721076e-06, "loss": 0.2636, "step": 10177 }, { "epoch": 0.4767883074905139, "grad_norm": 0.6059719663330049, "learning_rate": 4.4518814922840295e-06, "loss": 0.2867, "step": 10178 }, { "epoch": 0.4768351524804422, "grad_norm": 0.5977050410388356, "learning_rate": 4.451762985614396e-06, "loss": 0.3051, "step": 10179 }, { "epoch": 0.4768819974703705, "grad_norm": 0.6181537742054168, "learning_rate": 4.451644467712857e-06, "loss": 0.2805, "step": 10180 }, { "epoch": 0.4769288424602989, "grad_norm": 0.6107593293232838, "learning_rate": 4.451525938580094e-06, "loss": 0.2725, "step": 10181 }, { "epoch": 0.4769756874502272, "grad_norm": 0.5997974206219624, "learning_rate": 4.4514073982167915e-06, "loss": 0.2767, "step": 10182 }, { "epoch": 0.4770225324401555, "grad_norm": 0.6496566731913539, "learning_rate": 4.4512888466236285e-06, "loss": 0.3269, "step": 10183 }, { "epoch": 0.4770693774300839, "grad_norm": 0.5908162802931156, "learning_rate": 4.4511702838012895e-06, "loss": 0.2849, "step": 10184 }, { "epoch": 0.47711622242001217, "grad_norm": 0.5967934732168362, "learning_rate": 4.451051709750456e-06, "loss": 0.3168, "step": 10185 }, { "epoch": 0.4771630674099405, "grad_norm": 0.5858033077969385, "learning_rate": 4.450933124471809e-06, "loss": 0.2691, "step": 10186 }, { "epoch": 0.4772099123998688, "grad_norm": 0.628166269093498, "learning_rate": 4.450814527966032e-06, "loss": 0.2947, "step": 10187 }, { "epoch": 0.47725675738979717, "grad_norm": 0.6060546461468882, "learning_rate": 4.45069592023381e-06, "loss": 0.2908, "step": 10188 }, { "epoch": 0.47730360237972547, "grad_norm": 0.6330949338831582, "learning_rate": 4.450577301275821e-06, "loss": 0.3144, "step": 10189 }, { "epoch": 0.4773504473696538, "grad_norm": 0.5996359805739752, "learning_rate": 4.4504586710927515e-06, "loss": 0.3134, "step": 10190 }, { "epoch": 0.4773972923595821, "grad_norm": 0.6368506478074599, "learning_rate": 4.450340029685282e-06, "loss": 0.2909, "step": 10191 }, { "epoch": 0.47744413734951047, "grad_norm": 0.6145970735343056, "learning_rate": 4.450221377054095e-06, "loss": 0.2878, "step": 10192 }, { "epoch": 0.4774909823394388, "grad_norm": 0.5980294607645056, "learning_rate": 4.450102713199874e-06, "loss": 0.2919, "step": 10193 }, { "epoch": 0.4775378273293671, "grad_norm": 0.5742059031867851, "learning_rate": 4.449984038123304e-06, "loss": 0.2815, "step": 10194 }, { "epoch": 0.47758467231929547, "grad_norm": 0.6051680505095309, "learning_rate": 4.449865351825065e-06, "loss": 0.3061, "step": 10195 }, { "epoch": 0.47763151730922376, "grad_norm": 0.6284179768846159, "learning_rate": 4.44974665430584e-06, "loss": 0.2947, "step": 10196 }, { "epoch": 0.4776783622991521, "grad_norm": 0.6099068425790448, "learning_rate": 4.449627945566314e-06, "loss": 0.2976, "step": 10197 }, { "epoch": 0.4777252072890804, "grad_norm": 0.5563686206529984, "learning_rate": 4.4495092256071696e-06, "loss": 0.2825, "step": 10198 }, { "epoch": 0.47777205227900876, "grad_norm": 0.6239760500213724, "learning_rate": 4.449390494429089e-06, "loss": 0.2774, "step": 10199 }, { "epoch": 0.47781889726893706, "grad_norm": 0.6373479591148744, "learning_rate": 4.4492717520327555e-06, "loss": 0.3056, "step": 10200 }, { "epoch": 0.4778657422588654, "grad_norm": 0.577063665323822, "learning_rate": 4.449152998418854e-06, "loss": 0.2931, "step": 10201 }, { "epoch": 0.47791258724879376, "grad_norm": 0.6938423463425779, "learning_rate": 4.449034233588066e-06, "loss": 0.284, "step": 10202 }, { "epoch": 0.47795943223872206, "grad_norm": 0.5898931692067264, "learning_rate": 4.4489154575410765e-06, "loss": 0.2831, "step": 10203 }, { "epoch": 0.4780062772286504, "grad_norm": 0.5871835769260453, "learning_rate": 4.4487966702785685e-06, "loss": 0.2931, "step": 10204 }, { "epoch": 0.4780531222185787, "grad_norm": 0.6057644276953237, "learning_rate": 4.448677871801225e-06, "loss": 0.3012, "step": 10205 }, { "epoch": 0.47809996720850706, "grad_norm": 0.6134284218797618, "learning_rate": 4.448559062109731e-06, "loss": 0.2988, "step": 10206 }, { "epoch": 0.47814681219843536, "grad_norm": 0.6568738722144137, "learning_rate": 4.448440241204768e-06, "loss": 0.3273, "step": 10207 }, { "epoch": 0.4781936571883637, "grad_norm": 0.6004389989673021, "learning_rate": 4.448321409087022e-06, "loss": 0.2706, "step": 10208 }, { "epoch": 0.478240502178292, "grad_norm": 0.6565412104371956, "learning_rate": 4.448202565757176e-06, "loss": 0.2919, "step": 10209 }, { "epoch": 0.47828734716822036, "grad_norm": 0.6108634433007244, "learning_rate": 4.4480837112159135e-06, "loss": 0.2765, "step": 10210 }, { "epoch": 0.4783341921581487, "grad_norm": 0.6041471415373115, "learning_rate": 4.447964845463919e-06, "loss": 0.2876, "step": 10211 }, { "epoch": 0.478381037148077, "grad_norm": 0.5440959881266396, "learning_rate": 4.447845968501876e-06, "loss": 0.2657, "step": 10212 }, { "epoch": 0.47842788213800536, "grad_norm": 0.5994826469100312, "learning_rate": 4.44772708033047e-06, "loss": 0.2869, "step": 10213 }, { "epoch": 0.47847472712793365, "grad_norm": 0.6021992882128473, "learning_rate": 4.447608180950384e-06, "loss": 0.2929, "step": 10214 }, { "epoch": 0.478521572117862, "grad_norm": 0.6314164477365841, "learning_rate": 4.447489270362302e-06, "loss": 0.2971, "step": 10215 }, { "epoch": 0.4785684171077903, "grad_norm": 0.5957588176357447, "learning_rate": 4.4473703485669085e-06, "loss": 0.2714, "step": 10216 }, { "epoch": 0.47861526209771865, "grad_norm": 0.5858398221999493, "learning_rate": 4.447251415564889e-06, "loss": 0.2741, "step": 10217 }, { "epoch": 0.47866210708764695, "grad_norm": 0.5631364230136019, "learning_rate": 4.447132471356926e-06, "loss": 0.2633, "step": 10218 }, { "epoch": 0.4787089520775753, "grad_norm": 0.5661443500299053, "learning_rate": 4.447013515943706e-06, "loss": 0.2688, "step": 10219 }, { "epoch": 0.47875579706750365, "grad_norm": 0.5188978948208441, "learning_rate": 4.446894549325912e-06, "loss": 0.2843, "step": 10220 }, { "epoch": 0.47880264205743195, "grad_norm": 0.5281145418666033, "learning_rate": 4.446775571504229e-06, "loss": 0.2565, "step": 10221 }, { "epoch": 0.4788494870473603, "grad_norm": 0.6656581500875038, "learning_rate": 4.446656582479343e-06, "loss": 0.2878, "step": 10222 }, { "epoch": 0.4788963320372886, "grad_norm": 0.6143490092243592, "learning_rate": 4.446537582251936e-06, "loss": 0.3026, "step": 10223 }, { "epoch": 0.47894317702721695, "grad_norm": 0.6285605988909991, "learning_rate": 4.446418570822696e-06, "loss": 0.3154, "step": 10224 }, { "epoch": 0.47899002201714524, "grad_norm": 0.5862005986019284, "learning_rate": 4.446299548192306e-06, "loss": 0.2857, "step": 10225 }, { "epoch": 0.4790368670070736, "grad_norm": 0.6153955717547043, "learning_rate": 4.44618051436145e-06, "loss": 0.2881, "step": 10226 }, { "epoch": 0.4790837119970019, "grad_norm": 0.5750066080071546, "learning_rate": 4.446061469330816e-06, "loss": 0.2783, "step": 10227 }, { "epoch": 0.47913055698693024, "grad_norm": 0.546495627360414, "learning_rate": 4.445942413101086e-06, "loss": 0.2711, "step": 10228 }, { "epoch": 0.4791774019768586, "grad_norm": 0.6375247381357154, "learning_rate": 4.4458233456729475e-06, "loss": 0.3184, "step": 10229 }, { "epoch": 0.4792242469667869, "grad_norm": 0.5638126352497609, "learning_rate": 4.445704267047084e-06, "loss": 0.2814, "step": 10230 }, { "epoch": 0.47927109195671524, "grad_norm": 0.5577383457237703, "learning_rate": 4.445585177224182e-06, "loss": 0.2778, "step": 10231 }, { "epoch": 0.47931793694664354, "grad_norm": 0.5993246599923617, "learning_rate": 4.445466076204926e-06, "loss": 0.2974, "step": 10232 }, { "epoch": 0.4793647819365719, "grad_norm": 0.6412663359353473, "learning_rate": 4.445346963990002e-06, "loss": 0.3133, "step": 10233 }, { "epoch": 0.4794116269265002, "grad_norm": 0.5689233693467379, "learning_rate": 4.445227840580094e-06, "loss": 0.2852, "step": 10234 }, { "epoch": 0.47945847191642854, "grad_norm": 0.5997754188948878, "learning_rate": 4.44510870597589e-06, "loss": 0.3049, "step": 10235 }, { "epoch": 0.47950531690635684, "grad_norm": 0.5960826400647661, "learning_rate": 4.444989560178074e-06, "loss": 0.265, "step": 10236 }, { "epoch": 0.4795521618962852, "grad_norm": 0.5799943358855366, "learning_rate": 4.444870403187332e-06, "loss": 0.2858, "step": 10237 }, { "epoch": 0.47959900688621354, "grad_norm": 0.5926978410659465, "learning_rate": 4.44475123500435e-06, "loss": 0.2959, "step": 10238 }, { "epoch": 0.47964585187614184, "grad_norm": 0.6289479646209962, "learning_rate": 4.444632055629813e-06, "loss": 0.3101, "step": 10239 }, { "epoch": 0.4796926968660702, "grad_norm": 0.6188704088063598, "learning_rate": 4.444512865064407e-06, "loss": 0.3001, "step": 10240 }, { "epoch": 0.4797395418559985, "grad_norm": 0.6372948787256663, "learning_rate": 4.444393663308818e-06, "loss": 0.3028, "step": 10241 }, { "epoch": 0.47978638684592684, "grad_norm": 0.5901459834689636, "learning_rate": 4.444274450363732e-06, "loss": 0.2739, "step": 10242 }, { "epoch": 0.47983323183585513, "grad_norm": 0.6422291165511158, "learning_rate": 4.444155226229837e-06, "loss": 0.2796, "step": 10243 }, { "epoch": 0.4798800768257835, "grad_norm": 0.6321942099094676, "learning_rate": 4.444035990907816e-06, "loss": 0.3149, "step": 10244 }, { "epoch": 0.4799269218157118, "grad_norm": 0.6021204297099311, "learning_rate": 4.443916744398357e-06, "loss": 0.2904, "step": 10245 }, { "epoch": 0.47997376680564013, "grad_norm": 0.6225916027625685, "learning_rate": 4.443797486702145e-06, "loss": 0.2896, "step": 10246 }, { "epoch": 0.4800206117955685, "grad_norm": 0.5711683922819453, "learning_rate": 4.443678217819867e-06, "loss": 0.2926, "step": 10247 }, { "epoch": 0.4800674567854968, "grad_norm": 0.6311923382917409, "learning_rate": 4.44355893775221e-06, "loss": 0.2915, "step": 10248 }, { "epoch": 0.48011430177542513, "grad_norm": 0.5931353553188531, "learning_rate": 4.44343964649986e-06, "loss": 0.287, "step": 10249 }, { "epoch": 0.48016114676535343, "grad_norm": 0.56224665566381, "learning_rate": 4.4433203440635025e-06, "loss": 0.2638, "step": 10250 }, { "epoch": 0.4802079917552818, "grad_norm": 0.5835286899158911, "learning_rate": 4.443201030443826e-06, "loss": 0.2796, "step": 10251 }, { "epoch": 0.4802548367452101, "grad_norm": 0.5762693107116247, "learning_rate": 4.443081705641515e-06, "loss": 0.2933, "step": 10252 }, { "epoch": 0.48030168173513843, "grad_norm": 0.6370501788034229, "learning_rate": 4.442962369657257e-06, "loss": 0.2902, "step": 10253 }, { "epoch": 0.4803485267250667, "grad_norm": 0.6551945910958891, "learning_rate": 4.442843022491739e-06, "loss": 0.3079, "step": 10254 }, { "epoch": 0.4803953717149951, "grad_norm": 0.5677966552893909, "learning_rate": 4.442723664145648e-06, "loss": 0.2828, "step": 10255 }, { "epoch": 0.48044221670492343, "grad_norm": 0.5732478805427698, "learning_rate": 4.44260429461967e-06, "loss": 0.2757, "step": 10256 }, { "epoch": 0.4804890616948517, "grad_norm": 0.6168318756251305, "learning_rate": 4.442484913914493e-06, "loss": 0.2668, "step": 10257 }, { "epoch": 0.4805359066847801, "grad_norm": 0.5955287435516651, "learning_rate": 4.442365522030804e-06, "loss": 0.2979, "step": 10258 }, { "epoch": 0.4805827516747084, "grad_norm": 0.5591869402973076, "learning_rate": 4.44224611896929e-06, "loss": 0.2801, "step": 10259 }, { "epoch": 0.4806295966646367, "grad_norm": 0.5929891842042478, "learning_rate": 4.442126704730637e-06, "loss": 0.2894, "step": 10260 }, { "epoch": 0.480676441654565, "grad_norm": 0.6219723765060039, "learning_rate": 4.442007279315532e-06, "loss": 0.2872, "step": 10261 }, { "epoch": 0.4807232866444934, "grad_norm": 0.6649403219530651, "learning_rate": 4.441887842724665e-06, "loss": 0.3028, "step": 10262 }, { "epoch": 0.48077013163442167, "grad_norm": 0.5828086394424266, "learning_rate": 4.44176839495872e-06, "loss": 0.291, "step": 10263 }, { "epoch": 0.48081697662435, "grad_norm": 0.6176241725863678, "learning_rate": 4.441648936018387e-06, "loss": 0.303, "step": 10264 }, { "epoch": 0.4808638216142784, "grad_norm": 0.578514946263962, "learning_rate": 4.441529465904352e-06, "loss": 0.2853, "step": 10265 }, { "epoch": 0.48091066660420667, "grad_norm": 0.5331193043168444, "learning_rate": 4.441409984617303e-06, "loss": 0.2631, "step": 10266 }, { "epoch": 0.480957511594135, "grad_norm": 0.5803878985243388, "learning_rate": 4.441290492157928e-06, "loss": 0.2849, "step": 10267 }, { "epoch": 0.4810043565840633, "grad_norm": 0.605759292717855, "learning_rate": 4.441170988526913e-06, "loss": 0.2957, "step": 10268 }, { "epoch": 0.48105120157399167, "grad_norm": 0.630727059046045, "learning_rate": 4.441051473724948e-06, "loss": 0.2951, "step": 10269 }, { "epoch": 0.48109804656391997, "grad_norm": 0.6197577645254866, "learning_rate": 4.440931947752719e-06, "loss": 0.304, "step": 10270 }, { "epoch": 0.4811448915538483, "grad_norm": 0.5714643304655255, "learning_rate": 4.440812410610914e-06, "loss": 0.2837, "step": 10271 }, { "epoch": 0.4811917365437766, "grad_norm": 0.6180088025987516, "learning_rate": 4.440692862300222e-06, "loss": 0.2862, "step": 10272 }, { "epoch": 0.48123858153370497, "grad_norm": 0.6076434480061395, "learning_rate": 4.440573302821331e-06, "loss": 0.2776, "step": 10273 }, { "epoch": 0.4812854265236333, "grad_norm": 0.5812322903079357, "learning_rate": 4.440453732174927e-06, "loss": 0.2749, "step": 10274 }, { "epoch": 0.4813322715135616, "grad_norm": 0.594944295046824, "learning_rate": 4.4403341503617e-06, "loss": 0.2765, "step": 10275 }, { "epoch": 0.48137911650348997, "grad_norm": 0.6011526985146388, "learning_rate": 4.4402145573823384e-06, "loss": 0.296, "step": 10276 }, { "epoch": 0.48142596149341826, "grad_norm": 0.5933361770542074, "learning_rate": 4.440094953237529e-06, "loss": 0.2876, "step": 10277 }, { "epoch": 0.4814728064833466, "grad_norm": 0.5497427929793038, "learning_rate": 4.439975337927961e-06, "loss": 0.2663, "step": 10278 }, { "epoch": 0.4815196514732749, "grad_norm": 0.5647243412164494, "learning_rate": 4.439855711454323e-06, "loss": 0.2814, "step": 10279 }, { "epoch": 0.48156649646320326, "grad_norm": 0.5489953760030856, "learning_rate": 4.439736073817301e-06, "loss": 0.2498, "step": 10280 }, { "epoch": 0.48161334145313156, "grad_norm": 0.600583359725135, "learning_rate": 4.439616425017587e-06, "loss": 0.292, "step": 10281 }, { "epoch": 0.4816601864430599, "grad_norm": 0.6254552937683334, "learning_rate": 4.439496765055868e-06, "loss": 0.3038, "step": 10282 }, { "epoch": 0.48170703143298826, "grad_norm": 0.577360817993271, "learning_rate": 4.439377093932832e-06, "loss": 0.269, "step": 10283 }, { "epoch": 0.48175387642291656, "grad_norm": 0.59847550194786, "learning_rate": 4.439257411649169e-06, "loss": 0.293, "step": 10284 }, { "epoch": 0.4818007214128449, "grad_norm": 0.6068603304326747, "learning_rate": 4.439137718205567e-06, "loss": 0.3045, "step": 10285 }, { "epoch": 0.4818475664027732, "grad_norm": 0.556111459064891, "learning_rate": 4.439018013602714e-06, "loss": 0.2753, "step": 10286 }, { "epoch": 0.48189441139270156, "grad_norm": 0.5969401112115278, "learning_rate": 4.4388982978413e-06, "loss": 0.2943, "step": 10287 }, { "epoch": 0.48194125638262986, "grad_norm": 0.5992268029904018, "learning_rate": 4.4387785709220146e-06, "loss": 0.2729, "step": 10288 }, { "epoch": 0.4819881013725582, "grad_norm": 0.6047292170487601, "learning_rate": 4.438658832845544e-06, "loss": 0.3026, "step": 10289 }, { "epoch": 0.4820349463624865, "grad_norm": 0.57779653052187, "learning_rate": 4.4385390836125805e-06, "loss": 0.3025, "step": 10290 }, { "epoch": 0.48208179135241486, "grad_norm": 0.5771687420311788, "learning_rate": 4.438419323223811e-06, "loss": 0.2809, "step": 10291 }, { "epoch": 0.4821286363423432, "grad_norm": 0.6036176768805308, "learning_rate": 4.438299551679926e-06, "loss": 0.3022, "step": 10292 }, { "epoch": 0.4821754813322715, "grad_norm": 0.5289376566815455, "learning_rate": 4.438179768981614e-06, "loss": 0.2731, "step": 10293 }, { "epoch": 0.48222232632219986, "grad_norm": 0.5901920850581492, "learning_rate": 4.438059975129565e-06, "loss": 0.3047, "step": 10294 }, { "epoch": 0.48226917131212815, "grad_norm": 0.5673717297380784, "learning_rate": 4.437940170124467e-06, "loss": 0.2755, "step": 10295 }, { "epoch": 0.4823160163020565, "grad_norm": 0.576063575912949, "learning_rate": 4.4378203539670115e-06, "loss": 0.3054, "step": 10296 }, { "epoch": 0.4823628612919848, "grad_norm": 0.6047754764701726, "learning_rate": 4.4377005266578866e-06, "loss": 0.2973, "step": 10297 }, { "epoch": 0.48240970628191315, "grad_norm": 0.5496826089399183, "learning_rate": 4.437580688197782e-06, "loss": 0.2824, "step": 10298 }, { "epoch": 0.48245655127184145, "grad_norm": 0.6060846693143631, "learning_rate": 4.437460838587388e-06, "loss": 0.2861, "step": 10299 }, { "epoch": 0.4825033962617698, "grad_norm": 0.5947761969248732, "learning_rate": 4.4373409778273925e-06, "loss": 0.29, "step": 10300 }, { "epoch": 0.48255024125169815, "grad_norm": 0.6052242160963133, "learning_rate": 4.437221105918488e-06, "loss": 0.2881, "step": 10301 }, { "epoch": 0.48259708624162645, "grad_norm": 0.6148209636326217, "learning_rate": 4.437101222861363e-06, "loss": 0.2906, "step": 10302 }, { "epoch": 0.4826439312315548, "grad_norm": 0.6143747218709243, "learning_rate": 4.436981328656706e-06, "loss": 0.3014, "step": 10303 }, { "epoch": 0.4826907762214831, "grad_norm": 0.5871403409360166, "learning_rate": 4.436861423305209e-06, "loss": 0.298, "step": 10304 }, { "epoch": 0.48273762121141145, "grad_norm": 0.5752288764917449, "learning_rate": 4.436741506807561e-06, "loss": 0.2663, "step": 10305 }, { "epoch": 0.48278446620133975, "grad_norm": 0.5765822493289211, "learning_rate": 4.436621579164453e-06, "loss": 0.2831, "step": 10306 }, { "epoch": 0.4828313111912681, "grad_norm": 0.5846056555599507, "learning_rate": 4.4365016403765746e-06, "loss": 0.3177, "step": 10307 }, { "epoch": 0.4828781561811964, "grad_norm": 0.5526747958703915, "learning_rate": 4.436381690444616e-06, "loss": 0.2696, "step": 10308 }, { "epoch": 0.48292500117112475, "grad_norm": 0.6252989509969329, "learning_rate": 4.436261729369266e-06, "loss": 0.3099, "step": 10309 }, { "epoch": 0.4829718461610531, "grad_norm": 0.6364875428631683, "learning_rate": 4.436141757151219e-06, "loss": 0.2708, "step": 10310 }, { "epoch": 0.4830186911509814, "grad_norm": 0.5451813398157686, "learning_rate": 4.4360217737911614e-06, "loss": 0.2907, "step": 10311 }, { "epoch": 0.48306553614090975, "grad_norm": 0.61252368796526, "learning_rate": 4.435901779289785e-06, "loss": 0.2993, "step": 10312 }, { "epoch": 0.48311238113083804, "grad_norm": 0.5898182902159902, "learning_rate": 4.43578177364778e-06, "loss": 0.2755, "step": 10313 }, { "epoch": 0.4831592261207664, "grad_norm": 0.5887081008829324, "learning_rate": 4.435661756865838e-06, "loss": 0.2955, "step": 10314 }, { "epoch": 0.4832060711106947, "grad_norm": 0.6102931503608756, "learning_rate": 4.435541728944649e-06, "loss": 0.2975, "step": 10315 }, { "epoch": 0.48325291610062304, "grad_norm": 0.5789416685826021, "learning_rate": 4.435421689884905e-06, "loss": 0.3068, "step": 10316 }, { "epoch": 0.48329976109055134, "grad_norm": 0.5916706999392626, "learning_rate": 4.435301639687294e-06, "loss": 0.2792, "step": 10317 }, { "epoch": 0.4833466060804797, "grad_norm": 0.6249302845433883, "learning_rate": 4.435181578352509e-06, "loss": 0.2896, "step": 10318 }, { "epoch": 0.48339345107040804, "grad_norm": 0.6248446179105608, "learning_rate": 4.435061505881241e-06, "loss": 0.3072, "step": 10319 }, { "epoch": 0.48344029606033634, "grad_norm": 0.6006263218063508, "learning_rate": 4.43494142227418e-06, "loss": 0.2821, "step": 10320 }, { "epoch": 0.4834871410502647, "grad_norm": 0.6918177242198102, "learning_rate": 4.4348213275320166e-06, "loss": 0.3306, "step": 10321 }, { "epoch": 0.483533986040193, "grad_norm": 0.6653597096689339, "learning_rate": 4.434701221655444e-06, "loss": 0.3013, "step": 10322 }, { "epoch": 0.48358083103012134, "grad_norm": 0.6039770434261189, "learning_rate": 4.434581104645151e-06, "loss": 0.286, "step": 10323 }, { "epoch": 0.48362767602004963, "grad_norm": 0.6201580816430466, "learning_rate": 4.434460976501831e-06, "loss": 0.2853, "step": 10324 }, { "epoch": 0.483674521009978, "grad_norm": 0.5538130797602507, "learning_rate": 4.434340837226173e-06, "loss": 0.2883, "step": 10325 }, { "epoch": 0.4837213659999063, "grad_norm": 0.5842853625473642, "learning_rate": 4.4342206868188705e-06, "loss": 0.2765, "step": 10326 }, { "epoch": 0.48376821098983463, "grad_norm": 0.6096716175767452, "learning_rate": 4.434100525280615e-06, "loss": 0.2939, "step": 10327 }, { "epoch": 0.483815055979763, "grad_norm": 0.6007739527447223, "learning_rate": 4.433980352612096e-06, "loss": 0.2845, "step": 10328 }, { "epoch": 0.4838619009696913, "grad_norm": 0.6530032583605504, "learning_rate": 4.433860168814006e-06, "loss": 0.3181, "step": 10329 }, { "epoch": 0.48390874595961964, "grad_norm": 0.6440131235920231, "learning_rate": 4.433739973887037e-06, "loss": 0.3028, "step": 10330 }, { "epoch": 0.48395559094954793, "grad_norm": 0.5844936614531437, "learning_rate": 4.433619767831881e-06, "loss": 0.266, "step": 10331 }, { "epoch": 0.4840024359394763, "grad_norm": 0.6026228902725801, "learning_rate": 4.433499550649229e-06, "loss": 0.2928, "step": 10332 }, { "epoch": 0.4840492809294046, "grad_norm": 0.6133177047555928, "learning_rate": 4.433379322339773e-06, "loss": 0.3066, "step": 10333 }, { "epoch": 0.48409612591933293, "grad_norm": 0.5996346212608568, "learning_rate": 4.433259082904204e-06, "loss": 0.2919, "step": 10334 }, { "epoch": 0.4841429709092612, "grad_norm": 0.6121496929542382, "learning_rate": 4.433138832343216e-06, "loss": 0.2946, "step": 10335 }, { "epoch": 0.4841898158991896, "grad_norm": 0.6268974479243129, "learning_rate": 4.4330185706575e-06, "loss": 0.3061, "step": 10336 }, { "epoch": 0.48423666088911793, "grad_norm": 0.5828244895016983, "learning_rate": 4.4328982978477474e-06, "loss": 0.3044, "step": 10337 }, { "epoch": 0.4842835058790462, "grad_norm": 0.5649098053828636, "learning_rate": 4.432778013914651e-06, "loss": 0.2776, "step": 10338 }, { "epoch": 0.4843303508689746, "grad_norm": 0.5575094513193153, "learning_rate": 4.432657718858903e-06, "loss": 0.2898, "step": 10339 }, { "epoch": 0.4843771958589029, "grad_norm": 0.5769082001025057, "learning_rate": 4.432537412681196e-06, "loss": 0.2765, "step": 10340 }, { "epoch": 0.4844240408488312, "grad_norm": 0.5921217004977362, "learning_rate": 4.432417095382221e-06, "loss": 0.2911, "step": 10341 }, { "epoch": 0.4844708858387595, "grad_norm": 0.6290618325285926, "learning_rate": 4.432296766962672e-06, "loss": 0.2933, "step": 10342 }, { "epoch": 0.4845177308286879, "grad_norm": 0.5834230949460812, "learning_rate": 4.43217642742324e-06, "loss": 0.2742, "step": 10343 }, { "epoch": 0.48456457581861617, "grad_norm": 0.6126354978671925, "learning_rate": 4.432056076764619e-06, "loss": 0.2958, "step": 10344 }, { "epoch": 0.4846114208085445, "grad_norm": 0.5819082316013742, "learning_rate": 4.431935714987501e-06, "loss": 0.2615, "step": 10345 }, { "epoch": 0.4846582657984729, "grad_norm": 0.5850523188967638, "learning_rate": 4.4318153420925795e-06, "loss": 0.2827, "step": 10346 }, { "epoch": 0.4847051107884012, "grad_norm": 0.6142022550772077, "learning_rate": 4.431694958080544e-06, "loss": 0.286, "step": 10347 }, { "epoch": 0.4847519557783295, "grad_norm": 0.5528758030110389, "learning_rate": 4.431574562952091e-06, "loss": 0.2769, "step": 10348 }, { "epoch": 0.4847988007682578, "grad_norm": 0.5736034918466059, "learning_rate": 4.431454156707912e-06, "loss": 0.2776, "step": 10349 }, { "epoch": 0.4848456457581862, "grad_norm": 0.6814476904871992, "learning_rate": 4.431333739348699e-06, "loss": 0.3, "step": 10350 }, { "epoch": 0.48489249074811447, "grad_norm": 0.6413522482379174, "learning_rate": 4.431213310875145e-06, "loss": 0.3003, "step": 10351 }, { "epoch": 0.4849393357380428, "grad_norm": 0.5778376674206273, "learning_rate": 4.431092871287945e-06, "loss": 0.2849, "step": 10352 }, { "epoch": 0.4849861807279711, "grad_norm": 0.600530535918328, "learning_rate": 4.430972420587792e-06, "loss": 0.2852, "step": 10353 }, { "epoch": 0.48503302571789947, "grad_norm": 0.620568032148611, "learning_rate": 4.4308519587753755e-06, "loss": 0.2862, "step": 10354 }, { "epoch": 0.4850798707078278, "grad_norm": 0.7043990352061655, "learning_rate": 4.430731485851393e-06, "loss": 0.3201, "step": 10355 }, { "epoch": 0.4851267156977561, "grad_norm": 0.6725394856381176, "learning_rate": 4.4306110018165355e-06, "loss": 0.3003, "step": 10356 }, { "epoch": 0.48517356068768447, "grad_norm": 0.5989884281105177, "learning_rate": 4.430490506671497e-06, "loss": 0.2878, "step": 10357 }, { "epoch": 0.48522040567761276, "grad_norm": 0.5976476925518227, "learning_rate": 4.430370000416971e-06, "loss": 0.2896, "step": 10358 }, { "epoch": 0.4852672506675411, "grad_norm": 0.6232550155988821, "learning_rate": 4.430249483053651e-06, "loss": 0.289, "step": 10359 }, { "epoch": 0.4853140956574694, "grad_norm": 0.5866458933410513, "learning_rate": 4.430128954582229e-06, "loss": 0.2841, "step": 10360 }, { "epoch": 0.48536094064739776, "grad_norm": 0.5585883380487613, "learning_rate": 4.430008415003401e-06, "loss": 0.2879, "step": 10361 }, { "epoch": 0.48540778563732606, "grad_norm": 0.6010342975103407, "learning_rate": 4.42988786431786e-06, "loss": 0.2774, "step": 10362 }, { "epoch": 0.4854546306272544, "grad_norm": 0.605285326285586, "learning_rate": 4.4297673025262985e-06, "loss": 0.2914, "step": 10363 }, { "epoch": 0.48550147561718277, "grad_norm": 0.5344034581759779, "learning_rate": 4.429646729629413e-06, "loss": 0.2552, "step": 10364 }, { "epoch": 0.48554832060711106, "grad_norm": 0.6286565097112926, "learning_rate": 4.429526145627894e-06, "loss": 0.3106, "step": 10365 }, { "epoch": 0.4855951655970394, "grad_norm": 0.5303353519424945, "learning_rate": 4.4294055505224375e-06, "loss": 0.2739, "step": 10366 }, { "epoch": 0.4856420105869677, "grad_norm": 0.5746117941171908, "learning_rate": 4.4292849443137365e-06, "loss": 0.2962, "step": 10367 }, { "epoch": 0.48568885557689606, "grad_norm": 0.544601391000232, "learning_rate": 4.429164327002486e-06, "loss": 0.2744, "step": 10368 }, { "epoch": 0.48573570056682436, "grad_norm": 0.6060077275065509, "learning_rate": 4.429043698589379e-06, "loss": 0.301, "step": 10369 }, { "epoch": 0.4857825455567527, "grad_norm": 0.5995471166183923, "learning_rate": 4.4289230590751115e-06, "loss": 0.3035, "step": 10370 }, { "epoch": 0.485829390546681, "grad_norm": 0.5808524508739403, "learning_rate": 4.428802408460376e-06, "loss": 0.2736, "step": 10371 }, { "epoch": 0.48587623553660936, "grad_norm": 0.7385531328779612, "learning_rate": 4.428681746745868e-06, "loss": 0.2879, "step": 10372 }, { "epoch": 0.4859230805265377, "grad_norm": 0.5991781109905202, "learning_rate": 4.42856107393228e-06, "loss": 0.2715, "step": 10373 }, { "epoch": 0.485969925516466, "grad_norm": 0.5943436211646232, "learning_rate": 4.4284403900203085e-06, "loss": 0.2921, "step": 10374 }, { "epoch": 0.48601677050639436, "grad_norm": 0.6246918368353026, "learning_rate": 4.428319695010648e-06, "loss": 0.3055, "step": 10375 }, { "epoch": 0.48606361549632265, "grad_norm": 0.5655234933657235, "learning_rate": 4.428198988903991e-06, "loss": 0.2903, "step": 10376 }, { "epoch": 0.486110460486251, "grad_norm": 0.5711134353384023, "learning_rate": 4.428078271701034e-06, "loss": 0.2745, "step": 10377 }, { "epoch": 0.4861573054761793, "grad_norm": 0.6063317295893532, "learning_rate": 4.4279575434024716e-06, "loss": 0.3006, "step": 10378 }, { "epoch": 0.48620415046610765, "grad_norm": 0.6087263736534477, "learning_rate": 4.427836804008998e-06, "loss": 0.2953, "step": 10379 }, { "epoch": 0.48625099545603595, "grad_norm": 0.6498818314952829, "learning_rate": 4.427716053521308e-06, "loss": 0.2926, "step": 10380 }, { "epoch": 0.4862978404459643, "grad_norm": 0.584637440930463, "learning_rate": 4.427595291940096e-06, "loss": 0.263, "step": 10381 }, { "epoch": 0.48634468543589265, "grad_norm": 0.6253413279580748, "learning_rate": 4.427474519266058e-06, "loss": 0.2972, "step": 10382 }, { "epoch": 0.48639153042582095, "grad_norm": 0.6178465169045343, "learning_rate": 4.427353735499889e-06, "loss": 0.2903, "step": 10383 }, { "epoch": 0.4864383754157493, "grad_norm": 0.544150993472323, "learning_rate": 4.427232940642283e-06, "loss": 0.2817, "step": 10384 }, { "epoch": 0.4864852204056776, "grad_norm": 0.6407540590776748, "learning_rate": 4.427112134693936e-06, "loss": 0.2809, "step": 10385 }, { "epoch": 0.48653206539560595, "grad_norm": 0.6268458971247446, "learning_rate": 4.426991317655543e-06, "loss": 0.2862, "step": 10386 }, { "epoch": 0.48657891038553425, "grad_norm": 0.610377586615275, "learning_rate": 4.4268704895278005e-06, "loss": 0.2785, "step": 10387 }, { "epoch": 0.4866257553754626, "grad_norm": 0.6455477728872282, "learning_rate": 4.426749650311401e-06, "loss": 0.2833, "step": 10388 }, { "epoch": 0.4866726003653909, "grad_norm": 0.591207231455331, "learning_rate": 4.426628800007043e-06, "loss": 0.2992, "step": 10389 }, { "epoch": 0.48671944535531925, "grad_norm": 0.5595833227043733, "learning_rate": 4.426507938615418e-06, "loss": 0.2737, "step": 10390 }, { "epoch": 0.4867662903452476, "grad_norm": 0.6202481722336198, "learning_rate": 4.426387066137227e-06, "loss": 0.3027, "step": 10391 }, { "epoch": 0.4868131353351759, "grad_norm": 0.5706301508316101, "learning_rate": 4.426266182573161e-06, "loss": 0.2705, "step": 10392 }, { "epoch": 0.48685998032510425, "grad_norm": 0.6132841820769069, "learning_rate": 4.426145287923918e-06, "loss": 0.2733, "step": 10393 }, { "epoch": 0.48690682531503254, "grad_norm": 0.6437334263172074, "learning_rate": 4.426024382190191e-06, "loss": 0.3069, "step": 10394 }, { "epoch": 0.4869536703049609, "grad_norm": 0.6166732135494062, "learning_rate": 4.42590346537268e-06, "loss": 0.2931, "step": 10395 }, { "epoch": 0.4870005152948892, "grad_norm": 0.5749390234643316, "learning_rate": 4.425782537472077e-06, "loss": 0.292, "step": 10396 }, { "epoch": 0.48704736028481754, "grad_norm": 0.585194993482895, "learning_rate": 4.4256615984890816e-06, "loss": 0.2913, "step": 10397 }, { "epoch": 0.48709420527474584, "grad_norm": 0.6103608671687979, "learning_rate": 4.425540648424386e-06, "loss": 0.3032, "step": 10398 }, { "epoch": 0.4871410502646742, "grad_norm": 0.6202801039086613, "learning_rate": 4.425419687278689e-06, "loss": 0.2979, "step": 10399 }, { "epoch": 0.48718789525460254, "grad_norm": 0.5752609533473378, "learning_rate": 4.425298715052685e-06, "loss": 0.2743, "step": 10400 }, { "epoch": 0.48723474024453084, "grad_norm": 0.6079251384142244, "learning_rate": 4.4251777317470706e-06, "loss": 0.294, "step": 10401 }, { "epoch": 0.4872815852344592, "grad_norm": 0.5823479250149112, "learning_rate": 4.425056737362543e-06, "loss": 0.2918, "step": 10402 }, { "epoch": 0.4873284302243875, "grad_norm": 0.6150277052965428, "learning_rate": 4.424935731899796e-06, "loss": 0.2859, "step": 10403 }, { "epoch": 0.48737527521431584, "grad_norm": 0.5989037443607084, "learning_rate": 4.42481471535953e-06, "loss": 0.2825, "step": 10404 }, { "epoch": 0.48742212020424414, "grad_norm": 0.6045576816029559, "learning_rate": 4.424693687742438e-06, "loss": 0.29, "step": 10405 }, { "epoch": 0.4874689651941725, "grad_norm": 0.5946449587445526, "learning_rate": 4.424572649049218e-06, "loss": 0.286, "step": 10406 }, { "epoch": 0.4875158101841008, "grad_norm": 0.5964752877624383, "learning_rate": 4.424451599280565e-06, "loss": 0.2795, "step": 10407 }, { "epoch": 0.48756265517402914, "grad_norm": 0.5724991091571576, "learning_rate": 4.424330538437178e-06, "loss": 0.2627, "step": 10408 }, { "epoch": 0.4876095001639575, "grad_norm": 0.5540899222877983, "learning_rate": 4.4242094665197514e-06, "loss": 0.2955, "step": 10409 }, { "epoch": 0.4876563451538858, "grad_norm": 0.5722788960772155, "learning_rate": 4.4240883835289835e-06, "loss": 0.266, "step": 10410 }, { "epoch": 0.48770319014381414, "grad_norm": 0.645853777018566, "learning_rate": 4.4239672894655705e-06, "loss": 0.296, "step": 10411 }, { "epoch": 0.48775003513374243, "grad_norm": 0.5568609389456952, "learning_rate": 4.42384618433021e-06, "loss": 0.2948, "step": 10412 }, { "epoch": 0.4877968801236708, "grad_norm": 0.606719799310657, "learning_rate": 4.423725068123597e-06, "loss": 0.298, "step": 10413 }, { "epoch": 0.4878437251135991, "grad_norm": 0.5868520516128116, "learning_rate": 4.4236039408464305e-06, "loss": 0.2765, "step": 10414 }, { "epoch": 0.48789057010352743, "grad_norm": 0.6183329266004381, "learning_rate": 4.4234828024994066e-06, "loss": 0.2964, "step": 10415 }, { "epoch": 0.48793741509345573, "grad_norm": 0.5692129285175067, "learning_rate": 4.423361653083222e-06, "loss": 0.2737, "step": 10416 }, { "epoch": 0.4879842600833841, "grad_norm": 0.5636393789917656, "learning_rate": 4.4232404925985755e-06, "loss": 0.2848, "step": 10417 }, { "epoch": 0.48803110507331243, "grad_norm": 0.5672798136752242, "learning_rate": 4.4231193210461635e-06, "loss": 0.2783, "step": 10418 }, { "epoch": 0.48807795006324073, "grad_norm": 0.5802964596019543, "learning_rate": 4.4229981384266815e-06, "loss": 0.2757, "step": 10419 }, { "epoch": 0.4881247950531691, "grad_norm": 0.607664606281963, "learning_rate": 4.42287694474083e-06, "loss": 0.2828, "step": 10420 }, { "epoch": 0.4881716400430974, "grad_norm": 0.5699826403200053, "learning_rate": 4.422755739989305e-06, "loss": 0.2842, "step": 10421 }, { "epoch": 0.48821848503302573, "grad_norm": 0.6336330142527618, "learning_rate": 4.422634524172804e-06, "loss": 0.2871, "step": 10422 }, { "epoch": 0.488265330022954, "grad_norm": 0.5960199556225758, "learning_rate": 4.422513297292024e-06, "loss": 0.2735, "step": 10423 }, { "epoch": 0.4883121750128824, "grad_norm": 0.5485459374990069, "learning_rate": 4.422392059347663e-06, "loss": 0.2708, "step": 10424 }, { "epoch": 0.4883590200028107, "grad_norm": 0.6074921853217913, "learning_rate": 4.42227081034042e-06, "loss": 0.2964, "step": 10425 }, { "epoch": 0.488405864992739, "grad_norm": 0.5770983356332113, "learning_rate": 4.42214955027099e-06, "loss": 0.2827, "step": 10426 }, { "epoch": 0.4884527099826674, "grad_norm": 0.5575348581398857, "learning_rate": 4.422028279140074e-06, "loss": 0.2791, "step": 10427 }, { "epoch": 0.4884995549725957, "grad_norm": 0.5679323026810541, "learning_rate": 4.421906996948367e-06, "loss": 0.2935, "step": 10428 }, { "epoch": 0.488546399962524, "grad_norm": 0.6401415809142403, "learning_rate": 4.4217857036965695e-06, "loss": 0.2959, "step": 10429 }, { "epoch": 0.4885932449524523, "grad_norm": 0.5582010523567484, "learning_rate": 4.4216643993853785e-06, "loss": 0.2779, "step": 10430 }, { "epoch": 0.4886400899423807, "grad_norm": 0.6047292574134094, "learning_rate": 4.4215430840154905e-06, "loss": 0.288, "step": 10431 }, { "epoch": 0.48868693493230897, "grad_norm": 0.5687243172257095, "learning_rate": 4.421421757587606e-06, "loss": 0.2663, "step": 10432 }, { "epoch": 0.4887337799222373, "grad_norm": 0.5892508451391588, "learning_rate": 4.421300420102421e-06, "loss": 0.2977, "step": 10433 }, { "epoch": 0.4887806249121656, "grad_norm": 0.5717639575569738, "learning_rate": 4.421179071560636e-06, "loss": 0.2741, "step": 10434 }, { "epoch": 0.48882746990209397, "grad_norm": 0.6123225718263984, "learning_rate": 4.421057711962948e-06, "loss": 0.2742, "step": 10435 }, { "epoch": 0.4888743148920223, "grad_norm": 0.6046266881557898, "learning_rate": 4.420936341310056e-06, "loss": 0.3051, "step": 10436 }, { "epoch": 0.4889211598819506, "grad_norm": 0.5692840321435114, "learning_rate": 4.420814959602659e-06, "loss": 0.2708, "step": 10437 }, { "epoch": 0.48896800487187897, "grad_norm": 0.5892860572825174, "learning_rate": 4.420693566841453e-06, "loss": 0.2819, "step": 10438 }, { "epoch": 0.48901484986180727, "grad_norm": 0.6213515179599924, "learning_rate": 4.420572163027139e-06, "loss": 0.2745, "step": 10439 }, { "epoch": 0.4890616948517356, "grad_norm": 0.5754144320714389, "learning_rate": 4.420450748160415e-06, "loss": 0.2934, "step": 10440 }, { "epoch": 0.4891085398416639, "grad_norm": 0.5563729446193508, "learning_rate": 4.42032932224198e-06, "loss": 0.2893, "step": 10441 }, { "epoch": 0.48915538483159227, "grad_norm": 0.582026465596137, "learning_rate": 4.420207885272531e-06, "loss": 0.285, "step": 10442 }, { "epoch": 0.48920222982152056, "grad_norm": 0.5513990860790332, "learning_rate": 4.42008643725277e-06, "loss": 0.2862, "step": 10443 }, { "epoch": 0.4892490748114489, "grad_norm": 0.5386914855135684, "learning_rate": 4.4199649781833935e-06, "loss": 0.2812, "step": 10444 }, { "epoch": 0.48929591980137727, "grad_norm": 0.5838190256810184, "learning_rate": 4.419843508065101e-06, "loss": 0.2834, "step": 10445 }, { "epoch": 0.48934276479130556, "grad_norm": 0.585618505920112, "learning_rate": 4.4197220268985904e-06, "loss": 0.2777, "step": 10446 }, { "epoch": 0.4893896097812339, "grad_norm": 0.5870112213444473, "learning_rate": 4.419600534684564e-06, "loss": 0.291, "step": 10447 }, { "epoch": 0.4894364547711622, "grad_norm": 0.5755331906879138, "learning_rate": 4.419479031423718e-06, "loss": 0.2812, "step": 10448 }, { "epoch": 0.48948329976109056, "grad_norm": 0.5867862249987748, "learning_rate": 4.419357517116753e-06, "loss": 0.2977, "step": 10449 }, { "epoch": 0.48953014475101886, "grad_norm": 0.624189386308373, "learning_rate": 4.419235991764368e-06, "loss": 0.3151, "step": 10450 }, { "epoch": 0.4895769897409472, "grad_norm": 0.5798368953621742, "learning_rate": 4.419114455367262e-06, "loss": 0.2888, "step": 10451 }, { "epoch": 0.4896238347308755, "grad_norm": 0.5706824468947832, "learning_rate": 4.418992907926134e-06, "loss": 0.2657, "step": 10452 }, { "epoch": 0.48967067972080386, "grad_norm": 0.6099553564708066, "learning_rate": 4.418871349441685e-06, "loss": 0.2707, "step": 10453 }, { "epoch": 0.4897175247107322, "grad_norm": 0.5453146635314848, "learning_rate": 4.418749779914614e-06, "loss": 0.287, "step": 10454 }, { "epoch": 0.4897643697006605, "grad_norm": 0.6110191317755138, "learning_rate": 4.418628199345619e-06, "loss": 0.3157, "step": 10455 }, { "epoch": 0.48981121469058886, "grad_norm": 0.6158869286619569, "learning_rate": 4.4185066077354025e-06, "loss": 0.2973, "step": 10456 }, { "epoch": 0.48985805968051715, "grad_norm": 0.6329415019753023, "learning_rate": 4.418385005084661e-06, "loss": 0.2884, "step": 10457 }, { "epoch": 0.4899049046704455, "grad_norm": 0.641421352289711, "learning_rate": 4.418263391394098e-06, "loss": 0.3019, "step": 10458 }, { "epoch": 0.4899517496603738, "grad_norm": 0.570308003344298, "learning_rate": 4.4181417666644095e-06, "loss": 0.2997, "step": 10459 }, { "epoch": 0.48999859465030215, "grad_norm": 0.5561493185609575, "learning_rate": 4.418020130896298e-06, "loss": 0.2732, "step": 10460 }, { "epoch": 0.49004543964023045, "grad_norm": 0.6720761440402737, "learning_rate": 4.417898484090463e-06, "loss": 0.3166, "step": 10461 }, { "epoch": 0.4900922846301588, "grad_norm": 0.6092261906765385, "learning_rate": 4.417776826247604e-06, "loss": 0.2959, "step": 10462 }, { "epoch": 0.49013912962008716, "grad_norm": 0.579039650999089, "learning_rate": 4.41765515736842e-06, "loss": 0.2918, "step": 10463 }, { "epoch": 0.49018597461001545, "grad_norm": 0.6202614453336461, "learning_rate": 4.417533477453615e-06, "loss": 0.2742, "step": 10464 }, { "epoch": 0.4902328195999438, "grad_norm": 0.6175422029916995, "learning_rate": 4.417411786503885e-06, "loss": 0.3064, "step": 10465 }, { "epoch": 0.4902796645898721, "grad_norm": 0.5873198692031549, "learning_rate": 4.417290084519933e-06, "loss": 0.2897, "step": 10466 }, { "epoch": 0.49032650957980045, "grad_norm": 0.6031666276813993, "learning_rate": 4.417168371502459e-06, "loss": 0.276, "step": 10467 }, { "epoch": 0.49037335456972875, "grad_norm": 0.6567535274200524, "learning_rate": 4.417046647452161e-06, "loss": 0.3129, "step": 10468 }, { "epoch": 0.4904201995596571, "grad_norm": 0.5786896945047805, "learning_rate": 4.416924912369742e-06, "loss": 0.2954, "step": 10469 }, { "epoch": 0.4904670445495854, "grad_norm": 0.6118913968546027, "learning_rate": 4.416803166255903e-06, "loss": 0.2688, "step": 10470 }, { "epoch": 0.49051388953951375, "grad_norm": 0.6822486614607723, "learning_rate": 4.416681409111343e-06, "loss": 0.2923, "step": 10471 }, { "epoch": 0.4905607345294421, "grad_norm": 0.6032655680326172, "learning_rate": 4.416559640936763e-06, "loss": 0.2732, "step": 10472 }, { "epoch": 0.4906075795193704, "grad_norm": 0.5605447099776373, "learning_rate": 4.416437861732864e-06, "loss": 0.2866, "step": 10473 }, { "epoch": 0.49065442450929875, "grad_norm": 0.6199802957874482, "learning_rate": 4.4163160715003465e-06, "loss": 0.3029, "step": 10474 }, { "epoch": 0.49070126949922704, "grad_norm": 0.6042667627851634, "learning_rate": 4.416194270239912e-06, "loss": 0.2749, "step": 10475 }, { "epoch": 0.4907481144891554, "grad_norm": 0.5840702317604788, "learning_rate": 4.416072457952261e-06, "loss": 0.2698, "step": 10476 }, { "epoch": 0.4907949594790837, "grad_norm": 0.5878892627982361, "learning_rate": 4.415950634638095e-06, "loss": 0.2797, "step": 10477 }, { "epoch": 0.49084180446901204, "grad_norm": 0.594260379233208, "learning_rate": 4.4158288002981145e-06, "loss": 0.2755, "step": 10478 }, { "epoch": 0.49088864945894034, "grad_norm": 0.5659490069168559, "learning_rate": 4.415706954933021e-06, "loss": 0.2788, "step": 10479 }, { "epoch": 0.4909354944488687, "grad_norm": 0.580744388239348, "learning_rate": 4.4155850985435144e-06, "loss": 0.2768, "step": 10480 }, { "epoch": 0.49098233943879704, "grad_norm": 0.5649121262873212, "learning_rate": 4.415463231130298e-06, "loss": 0.2823, "step": 10481 }, { "epoch": 0.49102918442872534, "grad_norm": 0.6424952441161341, "learning_rate": 4.415341352694072e-06, "loss": 0.2904, "step": 10482 }, { "epoch": 0.4910760294186537, "grad_norm": 0.6385501471776006, "learning_rate": 4.4152194632355375e-06, "loss": 0.2939, "step": 10483 }, { "epoch": 0.491122874408582, "grad_norm": 0.5896593515226731, "learning_rate": 4.415097562755397e-06, "loss": 0.2807, "step": 10484 }, { "epoch": 0.49116971939851034, "grad_norm": 0.566197652871045, "learning_rate": 4.41497565125435e-06, "loss": 0.277, "step": 10485 }, { "epoch": 0.49121656438843864, "grad_norm": 0.636068074661671, "learning_rate": 4.414853728733102e-06, "loss": 0.2745, "step": 10486 }, { "epoch": 0.491263409378367, "grad_norm": 0.5924301514155413, "learning_rate": 4.41473179519235e-06, "loss": 0.287, "step": 10487 }, { "epoch": 0.4913102543682953, "grad_norm": 0.635749076938315, "learning_rate": 4.414609850632798e-06, "loss": 0.3063, "step": 10488 }, { "epoch": 0.49135709935822364, "grad_norm": 0.6670142279158497, "learning_rate": 4.414487895055148e-06, "loss": 0.2923, "step": 10489 }, { "epoch": 0.491403944348152, "grad_norm": 0.5642175118680686, "learning_rate": 4.414365928460101e-06, "loss": 0.2529, "step": 10490 }, { "epoch": 0.4914507893380803, "grad_norm": 0.5737000245195836, "learning_rate": 4.414243950848361e-06, "loss": 0.2929, "step": 10491 }, { "epoch": 0.49149763432800864, "grad_norm": 0.6054492805862389, "learning_rate": 4.4141219622206255e-06, "loss": 0.2877, "step": 10492 }, { "epoch": 0.49154447931793693, "grad_norm": 0.641865280138882, "learning_rate": 4.4139999625776e-06, "loss": 0.3157, "step": 10493 }, { "epoch": 0.4915913243078653, "grad_norm": 0.6122049351519252, "learning_rate": 4.413877951919987e-06, "loss": 0.2965, "step": 10494 }, { "epoch": 0.4916381692977936, "grad_norm": 0.6392985957891135, "learning_rate": 4.4137559302484864e-06, "loss": 0.3091, "step": 10495 }, { "epoch": 0.49168501428772193, "grad_norm": 0.5753087849085486, "learning_rate": 4.413633897563801e-06, "loss": 0.2966, "step": 10496 }, { "epoch": 0.49173185927765023, "grad_norm": 0.5678860790844992, "learning_rate": 4.413511853866634e-06, "loss": 0.3068, "step": 10497 }, { "epoch": 0.4917787042675786, "grad_norm": 0.5803431104598553, "learning_rate": 4.413389799157688e-06, "loss": 0.2946, "step": 10498 }, { "epoch": 0.49182554925750693, "grad_norm": 0.6305987635706701, "learning_rate": 4.413267733437663e-06, "loss": 0.2921, "step": 10499 }, { "epoch": 0.49187239424743523, "grad_norm": 0.5909917380776674, "learning_rate": 4.413145656707265e-06, "loss": 0.2906, "step": 10500 }, { "epoch": 0.4919192392373636, "grad_norm": 0.6511964545807791, "learning_rate": 4.413023568967193e-06, "loss": 0.3103, "step": 10501 }, { "epoch": 0.4919660842272919, "grad_norm": 0.6075529901839171, "learning_rate": 4.412901470218152e-06, "loss": 0.3121, "step": 10502 }, { "epoch": 0.49201292921722023, "grad_norm": 0.6212806747080051, "learning_rate": 4.4127793604608435e-06, "loss": 0.3053, "step": 10503 }, { "epoch": 0.4920597742071485, "grad_norm": 0.5663967748054411, "learning_rate": 4.41265723969597e-06, "loss": 0.299, "step": 10504 }, { "epoch": 0.4921066191970769, "grad_norm": 0.5784447704709126, "learning_rate": 4.412535107924236e-06, "loss": 0.2901, "step": 10505 }, { "epoch": 0.4921534641870052, "grad_norm": 0.5634491230908305, "learning_rate": 4.412412965146342e-06, "loss": 0.2846, "step": 10506 }, { "epoch": 0.4922003091769335, "grad_norm": 0.5614605443061346, "learning_rate": 4.412290811362993e-06, "loss": 0.2718, "step": 10507 }, { "epoch": 0.4922471541668619, "grad_norm": 0.5713945875987849, "learning_rate": 4.4121686465748904e-06, "loss": 0.2944, "step": 10508 }, { "epoch": 0.4922939991567902, "grad_norm": 0.5312934895744084, "learning_rate": 4.4120464707827386e-06, "loss": 0.2723, "step": 10509 }, { "epoch": 0.4923408441467185, "grad_norm": 0.6115345824055527, "learning_rate": 4.411924283987239e-06, "loss": 0.2778, "step": 10510 }, { "epoch": 0.4923876891366468, "grad_norm": 0.6050448135721561, "learning_rate": 4.411802086189097e-06, "loss": 0.2776, "step": 10511 }, { "epoch": 0.4924345341265752, "grad_norm": 0.5851661709545082, "learning_rate": 4.411679877389013e-06, "loss": 0.279, "step": 10512 }, { "epoch": 0.49248137911650347, "grad_norm": 0.641445279258362, "learning_rate": 4.411557657587692e-06, "loss": 0.2825, "step": 10513 }, { "epoch": 0.4925282241064318, "grad_norm": 0.5476381259039529, "learning_rate": 4.4114354267858376e-06, "loss": 0.2755, "step": 10514 }, { "epoch": 0.4925750690963601, "grad_norm": 0.5520589753746513, "learning_rate": 4.411313184984153e-06, "loss": 0.2785, "step": 10515 }, { "epoch": 0.49262191408628847, "grad_norm": 0.6046854596617082, "learning_rate": 4.411190932183341e-06, "loss": 0.2879, "step": 10516 }, { "epoch": 0.4926687590762168, "grad_norm": 0.6155570542682839, "learning_rate": 4.411068668384105e-06, "loss": 0.2894, "step": 10517 }, { "epoch": 0.4927156040661451, "grad_norm": 0.5741654840875479, "learning_rate": 4.41094639358715e-06, "loss": 0.2844, "step": 10518 }, { "epoch": 0.49276244905607347, "grad_norm": 0.5757911508816093, "learning_rate": 4.410824107793179e-06, "loss": 0.2763, "step": 10519 }, { "epoch": 0.49280929404600177, "grad_norm": 0.6200851857342646, "learning_rate": 4.410701811002895e-06, "loss": 0.2897, "step": 10520 }, { "epoch": 0.4928561390359301, "grad_norm": 0.558149636452917, "learning_rate": 4.410579503217003e-06, "loss": 0.2709, "step": 10521 }, { "epoch": 0.4929029840258584, "grad_norm": 0.6377779031752692, "learning_rate": 4.410457184436205e-06, "loss": 0.3087, "step": 10522 }, { "epoch": 0.49294982901578677, "grad_norm": 0.6431040183734157, "learning_rate": 4.410334854661207e-06, "loss": 0.2978, "step": 10523 }, { "epoch": 0.49299667400571506, "grad_norm": 0.6248068174779507, "learning_rate": 4.4102125138927114e-06, "loss": 0.2885, "step": 10524 }, { "epoch": 0.4930435189956434, "grad_norm": 0.6203552986274288, "learning_rate": 4.410090162131423e-06, "loss": 0.297, "step": 10525 }, { "epoch": 0.49309036398557177, "grad_norm": 0.621673750725323, "learning_rate": 4.409967799378047e-06, "loss": 0.3007, "step": 10526 }, { "epoch": 0.49313720897550006, "grad_norm": 0.6634986108085691, "learning_rate": 4.409845425633285e-06, "loss": 0.2855, "step": 10527 }, { "epoch": 0.4931840539654284, "grad_norm": 0.5916356654732703, "learning_rate": 4.409723040897843e-06, "loss": 0.3055, "step": 10528 }, { "epoch": 0.4932308989553567, "grad_norm": 0.6473992919974981, "learning_rate": 4.409600645172425e-06, "loss": 0.2907, "step": 10529 }, { "epoch": 0.49327774394528506, "grad_norm": 0.6608162082049959, "learning_rate": 4.409478238457735e-06, "loss": 0.3019, "step": 10530 }, { "epoch": 0.49332458893521336, "grad_norm": 0.6934292264148306, "learning_rate": 4.4093558207544776e-06, "loss": 0.2841, "step": 10531 }, { "epoch": 0.4933714339251417, "grad_norm": 0.6324348805553001, "learning_rate": 4.409233392063358e-06, "loss": 0.2817, "step": 10532 }, { "epoch": 0.49341827891507, "grad_norm": 0.6066078828091412, "learning_rate": 4.40911095238508e-06, "loss": 0.2884, "step": 10533 }, { "epoch": 0.49346512390499836, "grad_norm": 0.6624826413674888, "learning_rate": 4.4089885017203475e-06, "loss": 0.2879, "step": 10534 }, { "epoch": 0.4935119688949267, "grad_norm": 0.6321379249580279, "learning_rate": 4.4088660400698666e-06, "loss": 0.2881, "step": 10535 }, { "epoch": 0.493558813884855, "grad_norm": 0.5878437257195565, "learning_rate": 4.408743567434341e-06, "loss": 0.2711, "step": 10536 }, { "epoch": 0.49360565887478336, "grad_norm": 0.5735946735405013, "learning_rate": 4.408621083814476e-06, "loss": 0.2856, "step": 10537 }, { "epoch": 0.49365250386471166, "grad_norm": 0.6326751560123721, "learning_rate": 4.4084985892109765e-06, "loss": 0.2919, "step": 10538 }, { "epoch": 0.49369934885464, "grad_norm": 0.5722134005813506, "learning_rate": 4.408376083624547e-06, "loss": 0.2748, "step": 10539 }, { "epoch": 0.4937461938445683, "grad_norm": 0.6192442531837061, "learning_rate": 4.408253567055894e-06, "loss": 0.2947, "step": 10540 }, { "epoch": 0.49379303883449666, "grad_norm": 0.6213412974700844, "learning_rate": 4.40813103950572e-06, "loss": 0.2939, "step": 10541 }, { "epoch": 0.49383988382442495, "grad_norm": 0.6047109822787208, "learning_rate": 4.408008500974732e-06, "loss": 0.2884, "step": 10542 }, { "epoch": 0.4938867288143533, "grad_norm": 0.5997955194319233, "learning_rate": 4.407885951463634e-06, "loss": 0.297, "step": 10543 }, { "epoch": 0.49393357380428166, "grad_norm": 0.5930102508710956, "learning_rate": 4.407763390973133e-06, "loss": 0.269, "step": 10544 }, { "epoch": 0.49398041879420995, "grad_norm": 0.6149866012096963, "learning_rate": 4.407640819503932e-06, "loss": 0.3046, "step": 10545 }, { "epoch": 0.4940272637841383, "grad_norm": 0.6022997602369314, "learning_rate": 4.407518237056738e-06, "loss": 0.3105, "step": 10546 }, { "epoch": 0.4940741087740666, "grad_norm": 0.5580879192533168, "learning_rate": 4.407395643632257e-06, "loss": 0.2835, "step": 10547 }, { "epoch": 0.49412095376399495, "grad_norm": 0.6382861586557532, "learning_rate": 4.407273039231193e-06, "loss": 0.2993, "step": 10548 }, { "epoch": 0.49416779875392325, "grad_norm": 0.613811423743024, "learning_rate": 4.4071504238542515e-06, "loss": 0.2922, "step": 10549 }, { "epoch": 0.4942146437438516, "grad_norm": 0.608166092940092, "learning_rate": 4.407027797502138e-06, "loss": 0.3081, "step": 10550 }, { "epoch": 0.4942614887337799, "grad_norm": 0.5480360537150866, "learning_rate": 4.40690516017556e-06, "loss": 0.2785, "step": 10551 }, { "epoch": 0.49430833372370825, "grad_norm": 0.64519237791046, "learning_rate": 4.406782511875222e-06, "loss": 0.2951, "step": 10552 }, { "epoch": 0.4943551787136366, "grad_norm": 0.6063499357011846, "learning_rate": 4.40665985260183e-06, "loss": 0.2787, "step": 10553 }, { "epoch": 0.4944020237035649, "grad_norm": 0.6027732058900204, "learning_rate": 4.406537182356089e-06, "loss": 0.2927, "step": 10554 }, { "epoch": 0.49444886869349325, "grad_norm": 0.5971949154437068, "learning_rate": 4.406414501138706e-06, "loss": 0.2771, "step": 10555 }, { "epoch": 0.49449571368342154, "grad_norm": 0.5722342051860939, "learning_rate": 4.406291808950387e-06, "loss": 0.2829, "step": 10556 }, { "epoch": 0.4945425586733499, "grad_norm": 0.5685702458673944, "learning_rate": 4.406169105791838e-06, "loss": 0.2736, "step": 10557 }, { "epoch": 0.4945894036632782, "grad_norm": 0.5881985564638542, "learning_rate": 4.406046391663764e-06, "loss": 0.2916, "step": 10558 }, { "epoch": 0.49463624865320655, "grad_norm": 0.5391549880981249, "learning_rate": 4.405923666566872e-06, "loss": 0.2767, "step": 10559 }, { "epoch": 0.49468309364313484, "grad_norm": 0.5523106354450035, "learning_rate": 4.405800930501869e-06, "loss": 0.2703, "step": 10560 }, { "epoch": 0.4947299386330632, "grad_norm": 0.5760478353963118, "learning_rate": 4.405678183469461e-06, "loss": 0.2753, "step": 10561 }, { "epoch": 0.49477678362299155, "grad_norm": 0.6111376046395004, "learning_rate": 4.405555425470353e-06, "loss": 0.2983, "step": 10562 }, { "epoch": 0.49482362861291984, "grad_norm": 0.6843770125805442, "learning_rate": 4.4054326565052535e-06, "loss": 0.2909, "step": 10563 }, { "epoch": 0.4948704736028482, "grad_norm": 0.6325187415207723, "learning_rate": 4.4053098765748675e-06, "loss": 0.3029, "step": 10564 }, { "epoch": 0.4949173185927765, "grad_norm": 0.6345998000508571, "learning_rate": 4.405187085679903e-06, "loss": 0.3115, "step": 10565 }, { "epoch": 0.49496416358270484, "grad_norm": 0.6836151298851737, "learning_rate": 4.405064283821064e-06, "loss": 0.309, "step": 10566 }, { "epoch": 0.49501100857263314, "grad_norm": 0.5633842331598332, "learning_rate": 4.40494147099906e-06, "loss": 0.2741, "step": 10567 }, { "epoch": 0.4950578535625615, "grad_norm": 0.5913178969358985, "learning_rate": 4.404818647214596e-06, "loss": 0.2873, "step": 10568 }, { "epoch": 0.4951046985524898, "grad_norm": 0.5811764104983114, "learning_rate": 4.4046958124683796e-06, "loss": 0.2835, "step": 10569 }, { "epoch": 0.49515154354241814, "grad_norm": 0.5566964822761922, "learning_rate": 4.404572966761117e-06, "loss": 0.2706, "step": 10570 }, { "epoch": 0.4951983885323465, "grad_norm": 0.5804921164061914, "learning_rate": 4.404450110093516e-06, "loss": 0.273, "step": 10571 }, { "epoch": 0.4952452335222748, "grad_norm": 0.6374863316757939, "learning_rate": 4.404327242466284e-06, "loss": 0.279, "step": 10572 }, { "epoch": 0.49529207851220314, "grad_norm": 0.5788761082248961, "learning_rate": 4.4042043638801265e-06, "loss": 0.2967, "step": 10573 }, { "epoch": 0.49533892350213143, "grad_norm": 0.6071899208153001, "learning_rate": 4.404081474335753e-06, "loss": 0.2777, "step": 10574 }, { "epoch": 0.4953857684920598, "grad_norm": 0.6072244689663729, "learning_rate": 4.403958573833868e-06, "loss": 0.2799, "step": 10575 }, { "epoch": 0.4954326134819881, "grad_norm": 0.6374991086665828, "learning_rate": 4.40383566237518e-06, "loss": 0.2833, "step": 10576 }, { "epoch": 0.49547945847191643, "grad_norm": 0.584639084027702, "learning_rate": 4.403712739960396e-06, "loss": 0.2883, "step": 10577 }, { "epoch": 0.49552630346184473, "grad_norm": 0.6188530075533366, "learning_rate": 4.403589806590224e-06, "loss": 0.3038, "step": 10578 }, { "epoch": 0.4955731484517731, "grad_norm": 0.603843196100808, "learning_rate": 4.403466862265371e-06, "loss": 0.2864, "step": 10579 }, { "epoch": 0.49561999344170143, "grad_norm": 0.5603291625625365, "learning_rate": 4.403343906986545e-06, "loss": 0.2839, "step": 10580 }, { "epoch": 0.49566683843162973, "grad_norm": 0.6129232550790172, "learning_rate": 4.403220940754452e-06, "loss": 0.2885, "step": 10581 }, { "epoch": 0.4957136834215581, "grad_norm": 0.5899632129400249, "learning_rate": 4.403097963569802e-06, "loss": 0.2721, "step": 10582 }, { "epoch": 0.4957605284114864, "grad_norm": 0.6146284314702909, "learning_rate": 4.402974975433302e-06, "loss": 0.2762, "step": 10583 }, { "epoch": 0.49580737340141473, "grad_norm": 0.6171188227411355, "learning_rate": 4.402851976345658e-06, "loss": 0.2847, "step": 10584 }, { "epoch": 0.495854218391343, "grad_norm": 0.6123556340114893, "learning_rate": 4.40272896630758e-06, "loss": 0.2939, "step": 10585 }, { "epoch": 0.4959010633812714, "grad_norm": 0.6337057043325847, "learning_rate": 4.402605945319775e-06, "loss": 0.3108, "step": 10586 }, { "epoch": 0.4959479083711997, "grad_norm": 0.5943113700773797, "learning_rate": 4.40248291338295e-06, "loss": 0.2844, "step": 10587 }, { "epoch": 0.495994753361128, "grad_norm": 0.6034583399766547, "learning_rate": 4.4023598704978156e-06, "loss": 0.2926, "step": 10588 }, { "epoch": 0.4960415983510564, "grad_norm": 0.5982389587505523, "learning_rate": 4.402236816665077e-06, "loss": 0.2795, "step": 10589 }, { "epoch": 0.4960884433409847, "grad_norm": 0.6234612623022586, "learning_rate": 4.402113751885444e-06, "loss": 0.3082, "step": 10590 }, { "epoch": 0.496135288330913, "grad_norm": 0.6248581292986394, "learning_rate": 4.401990676159625e-06, "loss": 0.296, "step": 10591 }, { "epoch": 0.4961821333208413, "grad_norm": 0.5895942928816023, "learning_rate": 4.401867589488327e-06, "loss": 0.2904, "step": 10592 }, { "epoch": 0.4962289783107697, "grad_norm": 0.6017869912867273, "learning_rate": 4.4017444918722596e-06, "loss": 0.2903, "step": 10593 }, { "epoch": 0.49627582330069797, "grad_norm": 0.5670507859243595, "learning_rate": 4.4016213833121305e-06, "loss": 0.2888, "step": 10594 }, { "epoch": 0.4963226682906263, "grad_norm": 0.5790458357425043, "learning_rate": 4.401498263808648e-06, "loss": 0.2811, "step": 10595 }, { "epoch": 0.4963695132805546, "grad_norm": 0.6210578766639236, "learning_rate": 4.401375133362521e-06, "loss": 0.2833, "step": 10596 }, { "epoch": 0.49641635827048297, "grad_norm": 0.605859180532853, "learning_rate": 4.401251991974458e-06, "loss": 0.2966, "step": 10597 }, { "epoch": 0.4964632032604113, "grad_norm": 0.6163380905307992, "learning_rate": 4.401128839645168e-06, "loss": 0.3058, "step": 10598 }, { "epoch": 0.4965100482503396, "grad_norm": 0.6515385301646791, "learning_rate": 4.401005676375358e-06, "loss": 0.3062, "step": 10599 }, { "epoch": 0.49655689324026797, "grad_norm": 0.5322002972186407, "learning_rate": 4.40088250216574e-06, "loss": 0.2657, "step": 10600 }, { "epoch": 0.49660373823019627, "grad_norm": 0.5712380279894632, "learning_rate": 4.40075931701702e-06, "loss": 0.2762, "step": 10601 }, { "epoch": 0.4966505832201246, "grad_norm": 0.5469798698309883, "learning_rate": 4.400636120929908e-06, "loss": 0.2856, "step": 10602 }, { "epoch": 0.4966974282100529, "grad_norm": 0.6066394315320169, "learning_rate": 4.4005129139051125e-06, "loss": 0.303, "step": 10603 }, { "epoch": 0.49674427319998127, "grad_norm": 0.6473267603786294, "learning_rate": 4.400389695943344e-06, "loss": 0.28, "step": 10604 }, { "epoch": 0.49679111818990956, "grad_norm": 0.5984845185327091, "learning_rate": 4.400266467045309e-06, "loss": 0.2995, "step": 10605 }, { "epoch": 0.4968379631798379, "grad_norm": 0.5690880050757992, "learning_rate": 4.400143227211718e-06, "loss": 0.2892, "step": 10606 }, { "epoch": 0.49688480816976627, "grad_norm": 0.6152542588043767, "learning_rate": 4.400019976443282e-06, "loss": 0.3109, "step": 10607 }, { "epoch": 0.49693165315969456, "grad_norm": 0.5882438562269933, "learning_rate": 4.399896714740707e-06, "loss": 0.2894, "step": 10608 }, { "epoch": 0.4969784981496229, "grad_norm": 0.6099690164673115, "learning_rate": 4.399773442104705e-06, "loss": 0.2773, "step": 10609 }, { "epoch": 0.4970253431395512, "grad_norm": 0.6024840818126723, "learning_rate": 4.399650158535984e-06, "loss": 0.2855, "step": 10610 }, { "epoch": 0.49707218812947956, "grad_norm": 0.6532373030799519, "learning_rate": 4.3995268640352535e-06, "loss": 0.3051, "step": 10611 }, { "epoch": 0.49711903311940786, "grad_norm": 0.5981036516645261, "learning_rate": 4.399403558603223e-06, "loss": 0.2931, "step": 10612 }, { "epoch": 0.4971658781093362, "grad_norm": 0.6143225601684834, "learning_rate": 4.399280242240604e-06, "loss": 0.2782, "step": 10613 }, { "epoch": 0.4972127230992645, "grad_norm": 0.6508813998190248, "learning_rate": 4.399156914948103e-06, "loss": 0.2897, "step": 10614 }, { "epoch": 0.49725956808919286, "grad_norm": 0.6118316318222369, "learning_rate": 4.399033576726431e-06, "loss": 0.297, "step": 10615 }, { "epoch": 0.4973064130791212, "grad_norm": 0.5852841151887541, "learning_rate": 4.3989102275762995e-06, "loss": 0.2914, "step": 10616 }, { "epoch": 0.4973532580690495, "grad_norm": 0.6650730824770402, "learning_rate": 4.398786867498416e-06, "loss": 0.2963, "step": 10617 }, { "epoch": 0.49740010305897786, "grad_norm": 0.5574372972274032, "learning_rate": 4.398663496493491e-06, "loss": 0.2763, "step": 10618 }, { "epoch": 0.49744694804890616, "grad_norm": 0.6025047905470281, "learning_rate": 4.398540114562237e-06, "loss": 0.3068, "step": 10619 }, { "epoch": 0.4974937930388345, "grad_norm": 0.5890207960827839, "learning_rate": 4.39841672170536e-06, "loss": 0.2896, "step": 10620 }, { "epoch": 0.4975406380287628, "grad_norm": 0.6116193991764604, "learning_rate": 4.398293317923572e-06, "loss": 0.2827, "step": 10621 }, { "epoch": 0.49758748301869116, "grad_norm": 0.6610744942671487, "learning_rate": 4.398169903217583e-06, "loss": 0.2802, "step": 10622 }, { "epoch": 0.49763432800861945, "grad_norm": 0.5433361747566672, "learning_rate": 4.3980464775881034e-06, "loss": 0.2628, "step": 10623 }, { "epoch": 0.4976811729985478, "grad_norm": 0.6224199373734496, "learning_rate": 4.397923041035843e-06, "loss": 0.2907, "step": 10624 }, { "epoch": 0.49772801798847616, "grad_norm": 0.5344248931694383, "learning_rate": 4.3977995935615136e-06, "loss": 0.272, "step": 10625 }, { "epoch": 0.49777486297840445, "grad_norm": 0.5674482230858046, "learning_rate": 4.397676135165823e-06, "loss": 0.291, "step": 10626 }, { "epoch": 0.4978217079683328, "grad_norm": 0.6038164082809035, "learning_rate": 4.397552665849485e-06, "loss": 0.277, "step": 10627 }, { "epoch": 0.4978685529582611, "grad_norm": 0.6098164269959905, "learning_rate": 4.397429185613208e-06, "loss": 0.308, "step": 10628 }, { "epoch": 0.49791539794818945, "grad_norm": 0.6345315644454577, "learning_rate": 4.397305694457702e-06, "loss": 0.2526, "step": 10629 }, { "epoch": 0.49796224293811775, "grad_norm": 0.5833536039890457, "learning_rate": 4.397182192383679e-06, "loss": 0.2686, "step": 10630 }, { "epoch": 0.4980090879280461, "grad_norm": 0.6215454265542117, "learning_rate": 4.397058679391849e-06, "loss": 0.2843, "step": 10631 }, { "epoch": 0.4980559329179744, "grad_norm": 0.6238497647681341, "learning_rate": 4.396935155482923e-06, "loss": 0.2974, "step": 10632 }, { "epoch": 0.49810277790790275, "grad_norm": 0.5766728892673229, "learning_rate": 4.396811620657613e-06, "loss": 0.261, "step": 10633 }, { "epoch": 0.4981496228978311, "grad_norm": 0.5987981067625008, "learning_rate": 4.396688074916628e-06, "loss": 0.2834, "step": 10634 }, { "epoch": 0.4981964678877594, "grad_norm": 0.6053087340397262, "learning_rate": 4.39656451826068e-06, "loss": 0.2909, "step": 10635 }, { "epoch": 0.49824331287768775, "grad_norm": 0.5825885807721856, "learning_rate": 4.3964409506904806e-06, "loss": 0.2761, "step": 10636 }, { "epoch": 0.49829015786761605, "grad_norm": 0.6104389496436652, "learning_rate": 4.396317372206738e-06, "loss": 0.2916, "step": 10637 }, { "epoch": 0.4983370028575444, "grad_norm": 0.5969937963534767, "learning_rate": 4.396193782810168e-06, "loss": 0.2838, "step": 10638 }, { "epoch": 0.4983838478474727, "grad_norm": 0.5889856801858276, "learning_rate": 4.396070182501479e-06, "loss": 0.2876, "step": 10639 }, { "epoch": 0.49843069283740105, "grad_norm": 0.6075365660753523, "learning_rate": 4.395946571281382e-06, "loss": 0.2785, "step": 10640 }, { "epoch": 0.49847753782732934, "grad_norm": 0.6026456758033156, "learning_rate": 4.3958229491505885e-06, "loss": 0.2908, "step": 10641 }, { "epoch": 0.4985243828172577, "grad_norm": 0.6019332215143998, "learning_rate": 4.395699316109812e-06, "loss": 0.2944, "step": 10642 }, { "epoch": 0.49857122780718605, "grad_norm": 0.6139558043239176, "learning_rate": 4.395575672159761e-06, "loss": 0.2963, "step": 10643 }, { "epoch": 0.49861807279711434, "grad_norm": 0.6347491797592717, "learning_rate": 4.395452017301149e-06, "loss": 0.3014, "step": 10644 }, { "epoch": 0.4986649177870427, "grad_norm": 0.5497913786312655, "learning_rate": 4.395328351534687e-06, "loss": 0.2559, "step": 10645 }, { "epoch": 0.498711762776971, "grad_norm": 0.5771445981969324, "learning_rate": 4.395204674861087e-06, "loss": 0.2866, "step": 10646 }, { "epoch": 0.49875860776689934, "grad_norm": 0.5599205219512469, "learning_rate": 4.395080987281059e-06, "loss": 0.2692, "step": 10647 }, { "epoch": 0.49880545275682764, "grad_norm": 0.5671905758937511, "learning_rate": 4.394957288795318e-06, "loss": 0.2729, "step": 10648 }, { "epoch": 0.498852297746756, "grad_norm": 0.5684820991763594, "learning_rate": 4.394833579404573e-06, "loss": 0.3052, "step": 10649 }, { "epoch": 0.4988991427366843, "grad_norm": 0.563693269333779, "learning_rate": 4.394709859109537e-06, "loss": 0.2734, "step": 10650 }, { "epoch": 0.49894598772661264, "grad_norm": 0.5762100159216111, "learning_rate": 4.3945861279109225e-06, "loss": 0.2719, "step": 10651 }, { "epoch": 0.498992832716541, "grad_norm": 0.6642051520563651, "learning_rate": 4.394462385809442e-06, "loss": 0.2832, "step": 10652 }, { "epoch": 0.4990396777064693, "grad_norm": 0.6024822022911245, "learning_rate": 4.3943386328058044e-06, "loss": 0.3021, "step": 10653 }, { "epoch": 0.49908652269639764, "grad_norm": 0.610714702331871, "learning_rate": 4.394214868900726e-06, "loss": 0.3074, "step": 10654 }, { "epoch": 0.49913336768632593, "grad_norm": 0.5285315157144246, "learning_rate": 4.394091094094916e-06, "loss": 0.2676, "step": 10655 }, { "epoch": 0.4991802126762543, "grad_norm": 0.5892492000816001, "learning_rate": 4.393967308389088e-06, "loss": 0.305, "step": 10656 }, { "epoch": 0.4992270576661826, "grad_norm": 0.5869921652178035, "learning_rate": 4.393843511783955e-06, "loss": 0.2855, "step": 10657 }, { "epoch": 0.49927390265611094, "grad_norm": 0.5643454150937809, "learning_rate": 4.393719704280228e-06, "loss": 0.2925, "step": 10658 }, { "epoch": 0.49932074764603923, "grad_norm": 0.5835470210616824, "learning_rate": 4.39359588587862e-06, "loss": 0.285, "step": 10659 }, { "epoch": 0.4993675926359676, "grad_norm": 0.6296627248589441, "learning_rate": 4.393472056579843e-06, "loss": 0.2813, "step": 10660 }, { "epoch": 0.49941443762589594, "grad_norm": 0.6396315247661989, "learning_rate": 4.393348216384611e-06, "loss": 0.3132, "step": 10661 }, { "epoch": 0.49946128261582423, "grad_norm": 0.6390230273694176, "learning_rate": 4.393224365293636e-06, "loss": 0.2726, "step": 10662 }, { "epoch": 0.4995081276057526, "grad_norm": 0.5920914260435434, "learning_rate": 4.39310050330763e-06, "loss": 0.2855, "step": 10663 }, { "epoch": 0.4995549725956809, "grad_norm": 0.5903964372596113, "learning_rate": 4.392976630427307e-06, "loss": 0.2487, "step": 10664 }, { "epoch": 0.49960181758560923, "grad_norm": 0.5802322814679552, "learning_rate": 4.3928527466533786e-06, "loss": 0.3045, "step": 10665 }, { "epoch": 0.4996486625755375, "grad_norm": 0.5773175085649668, "learning_rate": 4.392728851986559e-06, "loss": 0.2776, "step": 10666 }, { "epoch": 0.4996955075654659, "grad_norm": 0.6367651292416884, "learning_rate": 4.39260494642756e-06, "loss": 0.2935, "step": 10667 }, { "epoch": 0.4997423525553942, "grad_norm": 0.6187348063573072, "learning_rate": 4.392481029977096e-06, "loss": 0.3033, "step": 10668 }, { "epoch": 0.4997891975453225, "grad_norm": 0.5424084741868621, "learning_rate": 4.392357102635879e-06, "loss": 0.2623, "step": 10669 }, { "epoch": 0.4998360425352509, "grad_norm": 0.5630399394305639, "learning_rate": 4.392233164404622e-06, "loss": 0.2741, "step": 10670 }, { "epoch": 0.4998828875251792, "grad_norm": 0.6264281910043926, "learning_rate": 4.39210921528404e-06, "loss": 0.2917, "step": 10671 }, { "epoch": 0.4999297325151075, "grad_norm": 0.6098961599771104, "learning_rate": 4.391985255274844e-06, "loss": 0.3002, "step": 10672 }, { "epoch": 0.4999765775050358, "grad_norm": 0.5632739476871446, "learning_rate": 4.391861284377749e-06, "loss": 0.3075, "step": 10673 }, { "epoch": 0.5000234224949641, "grad_norm": 0.536336675250781, "learning_rate": 4.391737302593467e-06, "loss": 0.2723, "step": 10674 }, { "epoch": 0.5000702674848925, "grad_norm": 0.5567463384452543, "learning_rate": 4.391613309922712e-06, "loss": 0.2639, "step": 10675 }, { "epoch": 0.5001171124748208, "grad_norm": 0.5850333937326637, "learning_rate": 4.391489306366199e-06, "loss": 0.2811, "step": 10676 }, { "epoch": 0.5001639574647492, "grad_norm": 0.5779644500528679, "learning_rate": 4.39136529192464e-06, "loss": 0.2768, "step": 10677 }, { "epoch": 0.5002108024546774, "grad_norm": 0.5710018175786533, "learning_rate": 4.391241266598749e-06, "loss": 0.2897, "step": 10678 }, { "epoch": 0.5002576474446058, "grad_norm": 0.6466787078355192, "learning_rate": 4.39111723038924e-06, "loss": 0.2995, "step": 10679 }, { "epoch": 0.5003044924345341, "grad_norm": 0.677256649524843, "learning_rate": 4.390993183296827e-06, "loss": 0.2998, "step": 10680 }, { "epoch": 0.5003513374244625, "grad_norm": 0.5579001205707733, "learning_rate": 4.390869125322223e-06, "loss": 0.2749, "step": 10681 }, { "epoch": 0.5003981824143908, "grad_norm": 0.5691202550637986, "learning_rate": 4.390745056466143e-06, "loss": 0.2888, "step": 10682 }, { "epoch": 0.5004450274043191, "grad_norm": 0.5844602215965454, "learning_rate": 4.390620976729299e-06, "loss": 0.3101, "step": 10683 }, { "epoch": 0.5004918723942474, "grad_norm": 0.5840611221822517, "learning_rate": 4.390496886112408e-06, "loss": 0.28, "step": 10684 }, { "epoch": 0.5005387173841758, "grad_norm": 0.5926399077175061, "learning_rate": 4.390372784616182e-06, "loss": 0.2857, "step": 10685 }, { "epoch": 0.5005855623741041, "grad_norm": 0.5753497449530179, "learning_rate": 4.390248672241336e-06, "loss": 0.2825, "step": 10686 }, { "epoch": 0.5006324073640324, "grad_norm": 0.6084711379653757, "learning_rate": 4.390124548988583e-06, "loss": 0.2896, "step": 10687 }, { "epoch": 0.5006792523539607, "grad_norm": 0.5924282252750391, "learning_rate": 4.390000414858639e-06, "loss": 0.2912, "step": 10688 }, { "epoch": 0.5007260973438891, "grad_norm": 0.6424315638089945, "learning_rate": 4.389876269852219e-06, "loss": 0.3101, "step": 10689 }, { "epoch": 0.5007729423338174, "grad_norm": 0.5632363832366378, "learning_rate": 4.389752113970034e-06, "loss": 0.2808, "step": 10690 }, { "epoch": 0.5008197873237458, "grad_norm": 0.5910022323097303, "learning_rate": 4.389627947212801e-06, "loss": 0.2938, "step": 10691 }, { "epoch": 0.500866632313674, "grad_norm": 0.5893446345819374, "learning_rate": 4.3895037695812345e-06, "loss": 0.2766, "step": 10692 }, { "epoch": 0.5009134773036024, "grad_norm": 0.6192241269776861, "learning_rate": 4.389379581076049e-06, "loss": 0.2726, "step": 10693 }, { "epoch": 0.5009603222935307, "grad_norm": 0.5591707124880501, "learning_rate": 4.389255381697959e-06, "loss": 0.2918, "step": 10694 }, { "epoch": 0.5010071672834591, "grad_norm": 0.6275806767657902, "learning_rate": 4.389131171447678e-06, "loss": 0.2848, "step": 10695 }, { "epoch": 0.5010540122733873, "grad_norm": 0.5752329121441041, "learning_rate": 4.389006950325923e-06, "loss": 0.2745, "step": 10696 }, { "epoch": 0.5011008572633157, "grad_norm": 0.6027824207715465, "learning_rate": 4.388882718333407e-06, "loss": 0.2592, "step": 10697 }, { "epoch": 0.501147702253244, "grad_norm": 0.5982995050524794, "learning_rate": 4.388758475470847e-06, "loss": 0.3032, "step": 10698 }, { "epoch": 0.5011945472431724, "grad_norm": 0.652687678130228, "learning_rate": 4.388634221738955e-06, "loss": 0.3024, "step": 10699 }, { "epoch": 0.5012413922331007, "grad_norm": 0.6466341865903922, "learning_rate": 4.388509957138448e-06, "loss": 0.295, "step": 10700 }, { "epoch": 0.501288237223029, "grad_norm": 0.6425360728819707, "learning_rate": 4.388385681670042e-06, "loss": 0.2825, "step": 10701 }, { "epoch": 0.5013350822129573, "grad_norm": 0.6073445285379795, "learning_rate": 4.388261395334451e-06, "loss": 0.2753, "step": 10702 }, { "epoch": 0.5013819272028857, "grad_norm": 0.5860953244662801, "learning_rate": 4.388137098132389e-06, "loss": 0.276, "step": 10703 }, { "epoch": 0.501428772192814, "grad_norm": 0.6125994102404749, "learning_rate": 4.388012790064574e-06, "loss": 0.2829, "step": 10704 }, { "epoch": 0.5014756171827423, "grad_norm": 0.6266485424046636, "learning_rate": 4.387888471131719e-06, "loss": 0.2888, "step": 10705 }, { "epoch": 0.5015224621726706, "grad_norm": 0.5522980469682852, "learning_rate": 4.387764141334541e-06, "loss": 0.2711, "step": 10706 }, { "epoch": 0.501569307162599, "grad_norm": 0.5948388371363859, "learning_rate": 4.387639800673753e-06, "loss": 0.3032, "step": 10707 }, { "epoch": 0.5016161521525273, "grad_norm": 0.6230355643848601, "learning_rate": 4.387515449150075e-06, "loss": 0.298, "step": 10708 }, { "epoch": 0.5016629971424557, "grad_norm": 0.5942107468611812, "learning_rate": 4.3873910867642175e-06, "loss": 0.2963, "step": 10709 }, { "epoch": 0.5017098421323839, "grad_norm": 0.6217662171599312, "learning_rate": 4.3872667135169e-06, "loss": 0.2804, "step": 10710 }, { "epoch": 0.5017566871223123, "grad_norm": 0.5991270252751395, "learning_rate": 4.387142329408838e-06, "loss": 0.2878, "step": 10711 }, { "epoch": 0.5018035321122406, "grad_norm": 0.6328229686337538, "learning_rate": 4.387017934440745e-06, "loss": 0.2804, "step": 10712 }, { "epoch": 0.501850377102169, "grad_norm": 0.6113556654534655, "learning_rate": 4.386893528613337e-06, "loss": 0.2903, "step": 10713 }, { "epoch": 0.5018972220920972, "grad_norm": 0.6007536795777548, "learning_rate": 4.386769111927333e-06, "loss": 0.2997, "step": 10714 }, { "epoch": 0.5019440670820255, "grad_norm": 0.5426511474893553, "learning_rate": 4.386644684383446e-06, "loss": 0.2768, "step": 10715 }, { "epoch": 0.5019909120719539, "grad_norm": 0.5581014728394624, "learning_rate": 4.386520245982393e-06, "loss": 0.2736, "step": 10716 }, { "epoch": 0.5020377570618823, "grad_norm": 0.5782036538969856, "learning_rate": 4.38639579672489e-06, "loss": 0.2985, "step": 10717 }, { "epoch": 0.5020846020518106, "grad_norm": 0.6056851388483283, "learning_rate": 4.3862713366116535e-06, "loss": 0.2827, "step": 10718 }, { "epoch": 0.5021314470417388, "grad_norm": 0.6064927975136039, "learning_rate": 4.386146865643401e-06, "loss": 0.2946, "step": 10719 }, { "epoch": 0.5021782920316672, "grad_norm": 0.6050960726587021, "learning_rate": 4.386022383820845e-06, "loss": 0.295, "step": 10720 }, { "epoch": 0.5022251370215955, "grad_norm": 0.6232336541580361, "learning_rate": 4.3858978911447044e-06, "loss": 0.3181, "step": 10721 }, { "epoch": 0.5022719820115239, "grad_norm": 0.5833706760710589, "learning_rate": 4.385773387615697e-06, "loss": 0.2744, "step": 10722 }, { "epoch": 0.5023188270014521, "grad_norm": 0.6546346947351562, "learning_rate": 4.385648873234537e-06, "loss": 0.3068, "step": 10723 }, { "epoch": 0.5023656719913805, "grad_norm": 0.6547526895471433, "learning_rate": 4.385524348001942e-06, "loss": 0.2883, "step": 10724 }, { "epoch": 0.5024125169813088, "grad_norm": 0.5902960814083054, "learning_rate": 4.385399811918627e-06, "loss": 0.2797, "step": 10725 }, { "epoch": 0.5024593619712372, "grad_norm": 0.542415482045684, "learning_rate": 4.385275264985311e-06, "loss": 0.2746, "step": 10726 }, { "epoch": 0.5025062069611655, "grad_norm": 0.6077514984881778, "learning_rate": 4.38515070720271e-06, "loss": 0.2973, "step": 10727 }, { "epoch": 0.5025530519510938, "grad_norm": 0.5490855154868265, "learning_rate": 4.385026138571541e-06, "loss": 0.2736, "step": 10728 }, { "epoch": 0.5025998969410221, "grad_norm": 0.5841157062787226, "learning_rate": 4.3849015590925184e-06, "loss": 0.2957, "step": 10729 }, { "epoch": 0.5026467419309505, "grad_norm": 0.6130253238727682, "learning_rate": 4.384776968766362e-06, "loss": 0.271, "step": 10730 }, { "epoch": 0.5026935869208788, "grad_norm": 0.5786167247507337, "learning_rate": 4.384652367593788e-06, "loss": 0.2776, "step": 10731 }, { "epoch": 0.5027404319108071, "grad_norm": 0.5906072416809108, "learning_rate": 4.384527755575514e-06, "loss": 0.2964, "step": 10732 }, { "epoch": 0.5027872769007354, "grad_norm": 0.5811584973750189, "learning_rate": 4.3844031327122554e-06, "loss": 0.3003, "step": 10733 }, { "epoch": 0.5028341218906638, "grad_norm": 0.5710111594301884, "learning_rate": 4.38427849900473e-06, "loss": 0.2789, "step": 10734 }, { "epoch": 0.5028809668805921, "grad_norm": 0.5577708107946294, "learning_rate": 4.3841538544536564e-06, "loss": 0.2838, "step": 10735 }, { "epoch": 0.5029278118705205, "grad_norm": 0.6295421305204274, "learning_rate": 4.384029199059752e-06, "loss": 0.2993, "step": 10736 }, { "epoch": 0.5029746568604487, "grad_norm": 0.5861741846670446, "learning_rate": 4.383904532823731e-06, "loss": 0.2921, "step": 10737 }, { "epoch": 0.5030215018503771, "grad_norm": 0.6226167511894756, "learning_rate": 4.383779855746314e-06, "loss": 0.2934, "step": 10738 }, { "epoch": 0.5030683468403054, "grad_norm": 0.5623152537378444, "learning_rate": 4.383655167828217e-06, "loss": 0.294, "step": 10739 }, { "epoch": 0.5031151918302338, "grad_norm": 0.5939850315556633, "learning_rate": 4.383530469070158e-06, "loss": 0.3046, "step": 10740 }, { "epoch": 0.503162036820162, "grad_norm": 0.5691092909944567, "learning_rate": 4.383405759472855e-06, "loss": 0.2763, "step": 10741 }, { "epoch": 0.5032088818100904, "grad_norm": 0.571675695244153, "learning_rate": 4.383281039037024e-06, "loss": 0.2902, "step": 10742 }, { "epoch": 0.5032557268000187, "grad_norm": 0.6321450014700746, "learning_rate": 4.383156307763386e-06, "loss": 0.2839, "step": 10743 }, { "epoch": 0.5033025717899471, "grad_norm": 0.5913140554725556, "learning_rate": 4.383031565652654e-06, "loss": 0.2998, "step": 10744 }, { "epoch": 0.5033494167798754, "grad_norm": 0.6170714433010881, "learning_rate": 4.382906812705551e-06, "loss": 0.2829, "step": 10745 }, { "epoch": 0.5033962617698037, "grad_norm": 0.6582408964477904, "learning_rate": 4.3827820489227915e-06, "loss": 0.2853, "step": 10746 }, { "epoch": 0.503443106759732, "grad_norm": 0.7064659385321725, "learning_rate": 4.382657274305095e-06, "loss": 0.311, "step": 10747 }, { "epoch": 0.5034899517496604, "grad_norm": 0.6032337250591607, "learning_rate": 4.382532488853178e-06, "loss": 0.2915, "step": 10748 }, { "epoch": 0.5035367967395887, "grad_norm": 0.6379385426171162, "learning_rate": 4.382407692567761e-06, "loss": 0.2905, "step": 10749 }, { "epoch": 0.503583641729517, "grad_norm": 0.5956269299808049, "learning_rate": 4.38228288544956e-06, "loss": 0.2868, "step": 10750 }, { "epoch": 0.5036304867194453, "grad_norm": 0.6759354997081367, "learning_rate": 4.382158067499294e-06, "loss": 0.3039, "step": 10751 }, { "epoch": 0.5036773317093737, "grad_norm": 0.6259095468211945, "learning_rate": 4.382033238717683e-06, "loss": 0.2921, "step": 10752 }, { "epoch": 0.503724176699302, "grad_norm": 0.6223371567958846, "learning_rate": 4.381908399105442e-06, "loss": 0.31, "step": 10753 }, { "epoch": 0.5037710216892304, "grad_norm": 0.596317414114125, "learning_rate": 4.381783548663292e-06, "loss": 0.3145, "step": 10754 }, { "epoch": 0.5038178666791586, "grad_norm": 0.5855188800398702, "learning_rate": 4.381658687391951e-06, "loss": 0.297, "step": 10755 }, { "epoch": 0.503864711669087, "grad_norm": 0.6574566877089931, "learning_rate": 4.3815338152921364e-06, "loss": 0.3138, "step": 10756 }, { "epoch": 0.5039115566590153, "grad_norm": 0.5683141357441224, "learning_rate": 4.381408932364568e-06, "loss": 0.2753, "step": 10757 }, { "epoch": 0.5039584016489437, "grad_norm": 0.6093800780396905, "learning_rate": 4.3812840386099635e-06, "loss": 0.2868, "step": 10758 }, { "epoch": 0.5040052466388719, "grad_norm": 0.5925140823603094, "learning_rate": 4.381159134029043e-06, "loss": 0.3212, "step": 10759 }, { "epoch": 0.5040520916288003, "grad_norm": 0.5683614013860613, "learning_rate": 4.381034218622524e-06, "loss": 0.2886, "step": 10760 }, { "epoch": 0.5040989366187286, "grad_norm": 0.5900855353932115, "learning_rate": 4.380909292391126e-06, "loss": 0.2876, "step": 10761 }, { "epoch": 0.504145781608657, "grad_norm": 0.6080226326665873, "learning_rate": 4.380784355335567e-06, "loss": 0.2778, "step": 10762 }, { "epoch": 0.5041926265985853, "grad_norm": 0.602075472298293, "learning_rate": 4.380659407456568e-06, "loss": 0.2846, "step": 10763 }, { "epoch": 0.5042394715885136, "grad_norm": 0.6430619485504037, "learning_rate": 4.3805344487548455e-06, "loss": 0.2861, "step": 10764 }, { "epoch": 0.5042863165784419, "grad_norm": 0.6082295440144866, "learning_rate": 4.380409479231121e-06, "loss": 0.2988, "step": 10765 }, { "epoch": 0.5043331615683703, "grad_norm": 0.5954983858541951, "learning_rate": 4.380284498886112e-06, "loss": 0.2767, "step": 10766 }, { "epoch": 0.5043800065582986, "grad_norm": 0.6104111438206544, "learning_rate": 4.3801595077205385e-06, "loss": 0.2773, "step": 10767 }, { "epoch": 0.5044268515482269, "grad_norm": 0.558385174468706, "learning_rate": 4.380034505735119e-06, "loss": 0.2749, "step": 10768 }, { "epoch": 0.5044736965381552, "grad_norm": 0.6314710867618026, "learning_rate": 4.3799094929305744e-06, "loss": 0.3066, "step": 10769 }, { "epoch": 0.5045205415280836, "grad_norm": 0.7004291486966566, "learning_rate": 4.379784469307623e-06, "loss": 0.3028, "step": 10770 }, { "epoch": 0.5045673865180119, "grad_norm": 0.6626207976638818, "learning_rate": 4.379659434866984e-06, "loss": 0.2631, "step": 10771 }, { "epoch": 0.5046142315079403, "grad_norm": 0.681939350402645, "learning_rate": 4.379534389609378e-06, "loss": 0.3022, "step": 10772 }, { "epoch": 0.5046610764978685, "grad_norm": 0.6036842179129124, "learning_rate": 4.379409333535524e-06, "loss": 0.2784, "step": 10773 }, { "epoch": 0.5047079214877969, "grad_norm": 0.6224488474940424, "learning_rate": 4.379284266646141e-06, "loss": 0.3145, "step": 10774 }, { "epoch": 0.5047547664777252, "grad_norm": 0.6074366040085506, "learning_rate": 4.379159188941949e-06, "loss": 0.2808, "step": 10775 }, { "epoch": 0.5048016114676536, "grad_norm": 0.559165820274422, "learning_rate": 4.379034100423669e-06, "loss": 0.2675, "step": 10776 }, { "epoch": 0.5048484564575818, "grad_norm": 0.6300976351747426, "learning_rate": 4.3789090010920204e-06, "loss": 0.2969, "step": 10777 }, { "epoch": 0.5048953014475102, "grad_norm": 0.5565052184367485, "learning_rate": 4.378783890947722e-06, "loss": 0.2662, "step": 10778 }, { "epoch": 0.5049421464374385, "grad_norm": 0.5764431365793331, "learning_rate": 4.378658769991495e-06, "loss": 0.2783, "step": 10779 }, { "epoch": 0.5049889914273669, "grad_norm": 0.6266879991397843, "learning_rate": 4.378533638224059e-06, "loss": 0.2846, "step": 10780 }, { "epoch": 0.5050358364172952, "grad_norm": 0.5708971954483889, "learning_rate": 4.378408495646134e-06, "loss": 0.2613, "step": 10781 }, { "epoch": 0.5050826814072235, "grad_norm": 0.5968032707178422, "learning_rate": 4.378283342258439e-06, "loss": 0.2802, "step": 10782 }, { "epoch": 0.5051295263971518, "grad_norm": 0.5957425421798036, "learning_rate": 4.378158178061697e-06, "loss": 0.2854, "step": 10783 }, { "epoch": 0.5051763713870802, "grad_norm": 0.5905588604152398, "learning_rate": 4.378033003056626e-06, "loss": 0.2985, "step": 10784 }, { "epoch": 0.5052232163770085, "grad_norm": 0.6303952628532212, "learning_rate": 4.377907817243947e-06, "loss": 0.3047, "step": 10785 }, { "epoch": 0.5052700613669368, "grad_norm": 0.6092206350330243, "learning_rate": 4.377782620624381e-06, "loss": 0.283, "step": 10786 }, { "epoch": 0.5053169063568651, "grad_norm": 0.6228764328017171, "learning_rate": 4.377657413198648e-06, "loss": 0.2881, "step": 10787 }, { "epoch": 0.5053637513467935, "grad_norm": 0.6602411764454648, "learning_rate": 4.377532194967468e-06, "loss": 0.2948, "step": 10788 }, { "epoch": 0.5054105963367218, "grad_norm": 0.5736029150698888, "learning_rate": 4.377406965931563e-06, "loss": 0.3011, "step": 10789 }, { "epoch": 0.5054574413266502, "grad_norm": 0.6028684170646093, "learning_rate": 4.377281726091652e-06, "loss": 0.2738, "step": 10790 }, { "epoch": 0.5055042863165784, "grad_norm": 0.6595238542205525, "learning_rate": 4.3771564754484565e-06, "loss": 0.3057, "step": 10791 }, { "epoch": 0.5055511313065068, "grad_norm": 0.6348554374470501, "learning_rate": 4.3770312140026975e-06, "loss": 0.3012, "step": 10792 }, { "epoch": 0.5055979762964351, "grad_norm": 0.6412558648349361, "learning_rate": 4.376905941755095e-06, "loss": 0.3051, "step": 10793 }, { "epoch": 0.5056448212863635, "grad_norm": 0.5863940590128883, "learning_rate": 4.376780658706371e-06, "loss": 0.2749, "step": 10794 }, { "epoch": 0.5056916662762917, "grad_norm": 0.622610561027951, "learning_rate": 4.376655364857247e-06, "loss": 0.3069, "step": 10795 }, { "epoch": 0.50573851126622, "grad_norm": 0.6224153305154216, "learning_rate": 4.376530060208442e-06, "loss": 0.2878, "step": 10796 }, { "epoch": 0.5057853562561484, "grad_norm": 0.6211958214419402, "learning_rate": 4.376404744760678e-06, "loss": 0.2964, "step": 10797 }, { "epoch": 0.5058322012460768, "grad_norm": 0.6212948389521105, "learning_rate": 4.376279418514677e-06, "loss": 0.296, "step": 10798 }, { "epoch": 0.5058790462360051, "grad_norm": 0.6124903603111945, "learning_rate": 4.37615408147116e-06, "loss": 0.3139, "step": 10799 }, { "epoch": 0.5059258912259333, "grad_norm": 0.6100274803433494, "learning_rate": 4.3760287336308465e-06, "loss": 0.2792, "step": 10800 }, { "epoch": 0.5059727362158617, "grad_norm": 0.566297581985701, "learning_rate": 4.37590337499446e-06, "loss": 0.2847, "step": 10801 }, { "epoch": 0.50601958120579, "grad_norm": 0.5686749636014263, "learning_rate": 4.375778005562721e-06, "loss": 0.282, "step": 10802 }, { "epoch": 0.5060664261957184, "grad_norm": 0.5867317298745042, "learning_rate": 4.375652625336351e-06, "loss": 0.2766, "step": 10803 }, { "epoch": 0.5061132711856466, "grad_norm": 0.6023503801817168, "learning_rate": 4.3755272343160705e-06, "loss": 0.3018, "step": 10804 }, { "epoch": 0.506160116175575, "grad_norm": 0.635360030310758, "learning_rate": 4.3754018325026035e-06, "loss": 0.3247, "step": 10805 }, { "epoch": 0.5062069611655033, "grad_norm": 0.5951406186797622, "learning_rate": 4.3752764198966695e-06, "loss": 0.3019, "step": 10806 }, { "epoch": 0.5062538061554317, "grad_norm": 0.5992517287798798, "learning_rate": 4.375150996498991e-06, "loss": 0.2812, "step": 10807 }, { "epoch": 0.50630065114536, "grad_norm": 0.5895334691959453, "learning_rate": 4.375025562310291e-06, "loss": 0.284, "step": 10808 }, { "epoch": 0.5063474961352883, "grad_norm": 0.5911917237134738, "learning_rate": 4.374900117331289e-06, "loss": 0.2835, "step": 10809 }, { "epoch": 0.5063943411252166, "grad_norm": 0.5846922407657591, "learning_rate": 4.374774661562709e-06, "loss": 0.291, "step": 10810 }, { "epoch": 0.506441186115145, "grad_norm": 0.6222315261226526, "learning_rate": 4.374649195005271e-06, "loss": 0.3062, "step": 10811 }, { "epoch": 0.5064880311050733, "grad_norm": 0.573251021119514, "learning_rate": 4.374523717659699e-06, "loss": 0.2747, "step": 10812 }, { "epoch": 0.5065348760950016, "grad_norm": 0.6008588358963926, "learning_rate": 4.374398229526713e-06, "loss": 0.2671, "step": 10813 }, { "epoch": 0.5065817210849299, "grad_norm": 0.6140094432557286, "learning_rate": 4.374272730607037e-06, "loss": 0.2891, "step": 10814 }, { "epoch": 0.5066285660748583, "grad_norm": 0.615306737497435, "learning_rate": 4.374147220901392e-06, "loss": 0.294, "step": 10815 }, { "epoch": 0.5066754110647866, "grad_norm": 0.6359691220511875, "learning_rate": 4.374021700410502e-06, "loss": 0.2974, "step": 10816 }, { "epoch": 0.506722256054715, "grad_norm": 0.6138676752174841, "learning_rate": 4.373896169135087e-06, "loss": 0.288, "step": 10817 }, { "epoch": 0.5067691010446432, "grad_norm": 0.6212026196676708, "learning_rate": 4.373770627075871e-06, "loss": 0.2951, "step": 10818 }, { "epoch": 0.5068159460345716, "grad_norm": 0.712878659623635, "learning_rate": 4.373645074233576e-06, "loss": 0.3088, "step": 10819 }, { "epoch": 0.5068627910244999, "grad_norm": 0.5978224072840111, "learning_rate": 4.373519510608925e-06, "loss": 0.31, "step": 10820 }, { "epoch": 0.5069096360144283, "grad_norm": 0.6045564783831148, "learning_rate": 4.373393936202639e-06, "loss": 0.2922, "step": 10821 }, { "epoch": 0.5069564810043565, "grad_norm": 0.612429304756673, "learning_rate": 4.3732683510154425e-06, "loss": 0.2982, "step": 10822 }, { "epoch": 0.5070033259942849, "grad_norm": 0.6831008970138881, "learning_rate": 4.373142755048058e-06, "loss": 0.3085, "step": 10823 }, { "epoch": 0.5070501709842132, "grad_norm": 0.6427488074947759, "learning_rate": 4.373017148301206e-06, "loss": 0.2708, "step": 10824 }, { "epoch": 0.5070970159741416, "grad_norm": 0.5899617719672324, "learning_rate": 4.3728915307756125e-06, "loss": 0.2954, "step": 10825 }, { "epoch": 0.5071438609640699, "grad_norm": 0.6059109547533169, "learning_rate": 4.372765902471999e-06, "loss": 0.2963, "step": 10826 }, { "epoch": 0.5071907059539982, "grad_norm": 0.6137615869534286, "learning_rate": 4.372640263391088e-06, "loss": 0.2943, "step": 10827 }, { "epoch": 0.5072375509439265, "grad_norm": 0.6432094695833764, "learning_rate": 4.372514613533602e-06, "loss": 0.2965, "step": 10828 }, { "epoch": 0.5072843959338549, "grad_norm": 0.6253783830062826, "learning_rate": 4.372388952900267e-06, "loss": 0.2878, "step": 10829 }, { "epoch": 0.5073312409237832, "grad_norm": 0.5832170355625556, "learning_rate": 4.372263281491803e-06, "loss": 0.2946, "step": 10830 }, { "epoch": 0.5073780859137115, "grad_norm": 0.5701875361091671, "learning_rate": 4.372137599308935e-06, "loss": 0.2802, "step": 10831 }, { "epoch": 0.5074249309036398, "grad_norm": 0.6475472538733678, "learning_rate": 4.372011906352385e-06, "loss": 0.3008, "step": 10832 }, { "epoch": 0.5074717758935682, "grad_norm": 0.5594302420085703, "learning_rate": 4.371886202622877e-06, "loss": 0.2971, "step": 10833 }, { "epoch": 0.5075186208834965, "grad_norm": 0.5336082352507528, "learning_rate": 4.371760488121134e-06, "loss": 0.2701, "step": 10834 }, { "epoch": 0.5075654658734249, "grad_norm": 0.6218814675314804, "learning_rate": 4.371634762847881e-06, "loss": 0.3018, "step": 10835 }, { "epoch": 0.5076123108633531, "grad_norm": 0.5702592622284004, "learning_rate": 4.37150902680384e-06, "loss": 0.2776, "step": 10836 }, { "epoch": 0.5076591558532815, "grad_norm": 0.6065937533989485, "learning_rate": 4.3713832799897345e-06, "loss": 0.2861, "step": 10837 }, { "epoch": 0.5077060008432098, "grad_norm": 0.5943661026767255, "learning_rate": 4.371257522406289e-06, "loss": 0.3023, "step": 10838 }, { "epoch": 0.5077528458331382, "grad_norm": 0.5532297398993009, "learning_rate": 4.371131754054226e-06, "loss": 0.2691, "step": 10839 }, { "epoch": 0.5077996908230664, "grad_norm": 0.5551306809463611, "learning_rate": 4.3710059749342714e-06, "loss": 0.2692, "step": 10840 }, { "epoch": 0.5078465358129948, "grad_norm": 0.693242448078701, "learning_rate": 4.370880185047147e-06, "loss": 0.2951, "step": 10841 }, { "epoch": 0.5078933808029231, "grad_norm": 0.5980214471546337, "learning_rate": 4.3707543843935775e-06, "loss": 0.2905, "step": 10842 }, { "epoch": 0.5079402257928515, "grad_norm": 0.647201761811614, "learning_rate": 4.370628572974286e-06, "loss": 0.3181, "step": 10843 }, { "epoch": 0.5079870707827798, "grad_norm": 0.638192582862921, "learning_rate": 4.370502750789998e-06, "loss": 0.3019, "step": 10844 }, { "epoch": 0.5080339157727081, "grad_norm": 0.6511212796531295, "learning_rate": 4.370376917841437e-06, "loss": 0.2835, "step": 10845 }, { "epoch": 0.5080807607626364, "grad_norm": 0.5637422158445544, "learning_rate": 4.3702510741293255e-06, "loss": 0.2818, "step": 10846 }, { "epoch": 0.5081276057525648, "grad_norm": 0.6274491028456378, "learning_rate": 4.3701252196543905e-06, "loss": 0.2976, "step": 10847 }, { "epoch": 0.5081744507424931, "grad_norm": 0.568377868204566, "learning_rate": 4.369999354417355e-06, "loss": 0.2809, "step": 10848 }, { "epoch": 0.5082212957324214, "grad_norm": 0.6057275269078596, "learning_rate": 4.3698734784189425e-06, "loss": 0.2792, "step": 10849 }, { "epoch": 0.5082681407223497, "grad_norm": 0.600027496432478, "learning_rate": 4.369747591659879e-06, "loss": 0.2791, "step": 10850 }, { "epoch": 0.5083149857122781, "grad_norm": 0.6636402359759861, "learning_rate": 4.369621694140887e-06, "loss": 0.297, "step": 10851 }, { "epoch": 0.5083618307022064, "grad_norm": 0.5856928118254461, "learning_rate": 4.369495785862692e-06, "loss": 0.2782, "step": 10852 }, { "epoch": 0.5084086756921348, "grad_norm": 0.615385313058964, "learning_rate": 4.3693698668260195e-06, "loss": 0.2933, "step": 10853 }, { "epoch": 0.508455520682063, "grad_norm": 0.5977328259556339, "learning_rate": 4.369243937031593e-06, "loss": 0.2872, "step": 10854 }, { "epoch": 0.5085023656719914, "grad_norm": 0.5809247096306257, "learning_rate": 4.369117996480136e-06, "loss": 0.2649, "step": 10855 }, { "epoch": 0.5085492106619197, "grad_norm": 0.599862733641651, "learning_rate": 4.368992045172377e-06, "loss": 0.286, "step": 10856 }, { "epoch": 0.5085960556518481, "grad_norm": 0.6091280701121821, "learning_rate": 4.368866083109038e-06, "loss": 0.2979, "step": 10857 }, { "epoch": 0.5086429006417763, "grad_norm": 0.615539635981425, "learning_rate": 4.368740110290843e-06, "loss": 0.2827, "step": 10858 }, { "epoch": 0.5086897456317047, "grad_norm": 0.5842691238222787, "learning_rate": 4.36861412671852e-06, "loss": 0.3018, "step": 10859 }, { "epoch": 0.508736590621633, "grad_norm": 0.6288645846116823, "learning_rate": 4.3684881323927915e-06, "loss": 0.3018, "step": 10860 }, { "epoch": 0.5087834356115614, "grad_norm": 0.5895016278758751, "learning_rate": 4.368362127314384e-06, "loss": 0.2802, "step": 10861 }, { "epoch": 0.5088302806014897, "grad_norm": 0.7134576229313263, "learning_rate": 4.368236111484022e-06, "loss": 0.2948, "step": 10862 }, { "epoch": 0.508877125591418, "grad_norm": 0.6292054785327147, "learning_rate": 4.368110084902429e-06, "loss": 0.2869, "step": 10863 }, { "epoch": 0.5089239705813463, "grad_norm": 0.6421274992070274, "learning_rate": 4.367984047570334e-06, "loss": 0.2748, "step": 10864 }, { "epoch": 0.5089708155712747, "grad_norm": 0.594969857089847, "learning_rate": 4.36785799948846e-06, "loss": 0.286, "step": 10865 }, { "epoch": 0.509017660561203, "grad_norm": 0.6059485011420628, "learning_rate": 4.367731940657532e-06, "loss": 0.2867, "step": 10866 }, { "epoch": 0.5090645055511313, "grad_norm": 0.609500828366851, "learning_rate": 4.367605871078277e-06, "loss": 0.2835, "step": 10867 }, { "epoch": 0.5091113505410596, "grad_norm": 0.5916102003503755, "learning_rate": 4.367479790751419e-06, "loss": 0.2929, "step": 10868 }, { "epoch": 0.509158195530988, "grad_norm": 0.5314815703651073, "learning_rate": 4.3673536996776846e-06, "loss": 0.2636, "step": 10869 }, { "epoch": 0.5092050405209163, "grad_norm": 0.5744000123716474, "learning_rate": 4.367227597857798e-06, "loss": 0.2856, "step": 10870 }, { "epoch": 0.5092518855108447, "grad_norm": 0.6232693087243759, "learning_rate": 4.3671014852924875e-06, "loss": 0.2649, "step": 10871 }, { "epoch": 0.5092987305007729, "grad_norm": 0.626522208562942, "learning_rate": 4.366975361982476e-06, "loss": 0.2771, "step": 10872 }, { "epoch": 0.5093455754907013, "grad_norm": 0.5435737816578535, "learning_rate": 4.366849227928491e-06, "loss": 0.281, "step": 10873 }, { "epoch": 0.5093924204806296, "grad_norm": 0.6063190522805165, "learning_rate": 4.366723083131258e-06, "loss": 0.2797, "step": 10874 }, { "epoch": 0.509439265470558, "grad_norm": 0.5647069034738688, "learning_rate": 4.366596927591502e-06, "loss": 0.2908, "step": 10875 }, { "epoch": 0.5094861104604862, "grad_norm": 0.6717548405954846, "learning_rate": 4.366470761309951e-06, "loss": 0.2817, "step": 10876 }, { "epoch": 0.5095329554504145, "grad_norm": 0.6517109377488439, "learning_rate": 4.366344584287329e-06, "loss": 0.2925, "step": 10877 }, { "epoch": 0.5095798004403429, "grad_norm": 0.5887986268864666, "learning_rate": 4.366218396524363e-06, "loss": 0.2888, "step": 10878 }, { "epoch": 0.5096266454302713, "grad_norm": 0.6415183857111919, "learning_rate": 4.36609219802178e-06, "loss": 0.2915, "step": 10879 }, { "epoch": 0.5096734904201996, "grad_norm": 0.681079484497841, "learning_rate": 4.3659659887803055e-06, "loss": 0.3231, "step": 10880 }, { "epoch": 0.5097203354101278, "grad_norm": 0.5860397570167261, "learning_rate": 4.365839768800665e-06, "loss": 0.2636, "step": 10881 }, { "epoch": 0.5097671804000562, "grad_norm": 0.6007410957539492, "learning_rate": 4.365713538083585e-06, "loss": 0.2919, "step": 10882 }, { "epoch": 0.5098140253899845, "grad_norm": 0.5593450816276071, "learning_rate": 4.365587296629794e-06, "loss": 0.294, "step": 10883 }, { "epoch": 0.5098608703799129, "grad_norm": 0.6030268044279107, "learning_rate": 4.3654610444400155e-06, "loss": 0.2864, "step": 10884 }, { "epoch": 0.5099077153698411, "grad_norm": 0.6074715821360485, "learning_rate": 4.3653347815149785e-06, "loss": 0.2886, "step": 10885 }, { "epoch": 0.5099545603597695, "grad_norm": 0.5920414762659657, "learning_rate": 4.365208507855408e-06, "loss": 0.3112, "step": 10886 }, { "epoch": 0.5100014053496978, "grad_norm": 0.5933199889346827, "learning_rate": 4.365082223462033e-06, "loss": 0.2783, "step": 10887 }, { "epoch": 0.5100482503396262, "grad_norm": 0.5346553292501096, "learning_rate": 4.364955928335577e-06, "loss": 0.2491, "step": 10888 }, { "epoch": 0.5100950953295545, "grad_norm": 0.6450928094190488, "learning_rate": 4.364829622476769e-06, "loss": 0.2932, "step": 10889 }, { "epoch": 0.5101419403194828, "grad_norm": 0.5716103847260949, "learning_rate": 4.364703305886336e-06, "loss": 0.2945, "step": 10890 }, { "epoch": 0.5101887853094111, "grad_norm": 0.6573438598684109, "learning_rate": 4.364576978565002e-06, "loss": 0.2964, "step": 10891 }, { "epoch": 0.5102356302993395, "grad_norm": 0.5427946817734562, "learning_rate": 4.364450640513498e-06, "loss": 0.2713, "step": 10892 }, { "epoch": 0.5102824752892678, "grad_norm": 0.6123984081647678, "learning_rate": 4.364324291732548e-06, "loss": 0.2973, "step": 10893 }, { "epoch": 0.5103293202791961, "grad_norm": 0.5892363753774525, "learning_rate": 4.364197932222881e-06, "loss": 0.2971, "step": 10894 }, { "epoch": 0.5103761652691244, "grad_norm": 0.6635280565833397, "learning_rate": 4.364071561985223e-06, "loss": 0.2921, "step": 10895 }, { "epoch": 0.5104230102590528, "grad_norm": 0.5947864294136348, "learning_rate": 4.363945181020302e-06, "loss": 0.3022, "step": 10896 }, { "epoch": 0.5104698552489811, "grad_norm": 0.5417606339567944, "learning_rate": 4.363818789328844e-06, "loss": 0.2614, "step": 10897 }, { "epoch": 0.5105167002389095, "grad_norm": 0.5743963455427283, "learning_rate": 4.363692386911579e-06, "loss": 0.2665, "step": 10898 }, { "epoch": 0.5105635452288377, "grad_norm": 0.6251943957815442, "learning_rate": 4.3635659737692314e-06, "loss": 0.2891, "step": 10899 }, { "epoch": 0.5106103902187661, "grad_norm": 0.6056779953047704, "learning_rate": 4.36343954990253e-06, "loss": 0.294, "step": 10900 }, { "epoch": 0.5106572352086944, "grad_norm": 0.5850690652738985, "learning_rate": 4.363313115312204e-06, "loss": 0.299, "step": 10901 }, { "epoch": 0.5107040801986228, "grad_norm": 0.5729962809492793, "learning_rate": 4.363186669998978e-06, "loss": 0.2947, "step": 10902 }, { "epoch": 0.510750925188551, "grad_norm": 0.6620572342768816, "learning_rate": 4.3630602139635815e-06, "loss": 0.292, "step": 10903 }, { "epoch": 0.5107977701784794, "grad_norm": 0.5928407872333019, "learning_rate": 4.362933747206741e-06, "loss": 0.3005, "step": 10904 }, { "epoch": 0.5108446151684077, "grad_norm": 0.6239937397357609, "learning_rate": 4.362807269729186e-06, "loss": 0.2976, "step": 10905 }, { "epoch": 0.5108914601583361, "grad_norm": 0.6247381757852981, "learning_rate": 4.362680781531643e-06, "loss": 0.2802, "step": 10906 }, { "epoch": 0.5109383051482644, "grad_norm": 0.5782163858851751, "learning_rate": 4.3625542826148395e-06, "loss": 0.2824, "step": 10907 }, { "epoch": 0.5109851501381927, "grad_norm": 0.6367281085933494, "learning_rate": 4.362427772979505e-06, "loss": 0.3124, "step": 10908 }, { "epoch": 0.511031995128121, "grad_norm": 0.5449279785112618, "learning_rate": 4.362301252626366e-06, "loss": 0.2859, "step": 10909 }, { "epoch": 0.5110788401180494, "grad_norm": 0.5905921599462509, "learning_rate": 4.362174721556153e-06, "loss": 0.2851, "step": 10910 }, { "epoch": 0.5111256851079777, "grad_norm": 0.6225192416516391, "learning_rate": 4.3620481797695916e-06, "loss": 0.2931, "step": 10911 }, { "epoch": 0.511172530097906, "grad_norm": 0.5944863779583155, "learning_rate": 4.36192162726741e-06, "loss": 0.2905, "step": 10912 }, { "epoch": 0.5112193750878343, "grad_norm": 0.6013244688800703, "learning_rate": 4.3617950640503385e-06, "loss": 0.2762, "step": 10913 }, { "epoch": 0.5112662200777627, "grad_norm": 0.6064049168021676, "learning_rate": 4.361668490119105e-06, "loss": 0.2983, "step": 10914 }, { "epoch": 0.511313065067691, "grad_norm": 0.58146417794149, "learning_rate": 4.361541905474435e-06, "loss": 0.2891, "step": 10915 }, { "epoch": 0.5113599100576194, "grad_norm": 0.6670627203473183, "learning_rate": 4.361415310117062e-06, "loss": 0.2825, "step": 10916 }, { "epoch": 0.5114067550475476, "grad_norm": 0.608111138786237, "learning_rate": 4.361288704047709e-06, "loss": 0.2711, "step": 10917 }, { "epoch": 0.511453600037476, "grad_norm": 0.5629151119946032, "learning_rate": 4.361162087267109e-06, "loss": 0.2929, "step": 10918 }, { "epoch": 0.5115004450274043, "grad_norm": 0.6066905689987779, "learning_rate": 4.361035459775988e-06, "loss": 0.2787, "step": 10919 }, { "epoch": 0.5115472900173327, "grad_norm": 0.5845125401045368, "learning_rate": 4.3609088215750775e-06, "loss": 0.2929, "step": 10920 }, { "epoch": 0.5115941350072609, "grad_norm": 0.6380456367435742, "learning_rate": 4.360782172665103e-06, "loss": 0.3034, "step": 10921 }, { "epoch": 0.5116409799971893, "grad_norm": 0.6386282175280821, "learning_rate": 4.360655513046795e-06, "loss": 0.2952, "step": 10922 }, { "epoch": 0.5116878249871176, "grad_norm": 0.552245415844012, "learning_rate": 4.360528842720882e-06, "loss": 0.2803, "step": 10923 }, { "epoch": 0.511734669977046, "grad_norm": 0.6078303904093414, "learning_rate": 4.3604021616880935e-06, "loss": 0.2837, "step": 10924 }, { "epoch": 0.5117815149669743, "grad_norm": 0.5890836216677383, "learning_rate": 4.3602754699491574e-06, "loss": 0.2981, "step": 10925 }, { "epoch": 0.5118283599569026, "grad_norm": 0.5886095040970194, "learning_rate": 4.360148767504805e-06, "loss": 0.2661, "step": 10926 }, { "epoch": 0.5118752049468309, "grad_norm": 0.607561020688491, "learning_rate": 4.360022054355763e-06, "loss": 0.2792, "step": 10927 }, { "epoch": 0.5119220499367593, "grad_norm": 0.586151193166057, "learning_rate": 4.359895330502762e-06, "loss": 0.2892, "step": 10928 }, { "epoch": 0.5119688949266876, "grad_norm": 0.6085742490300448, "learning_rate": 4.35976859594653e-06, "loss": 0.283, "step": 10929 }, { "epoch": 0.5120157399166159, "grad_norm": 0.5918603868257238, "learning_rate": 4.359641850687798e-06, "loss": 0.287, "step": 10930 }, { "epoch": 0.5120625849065442, "grad_norm": 0.6403681776496993, "learning_rate": 4.359515094727294e-06, "loss": 0.3092, "step": 10931 }, { "epoch": 0.5121094298964726, "grad_norm": 0.6338117747909188, "learning_rate": 4.3593883280657485e-06, "loss": 0.303, "step": 10932 }, { "epoch": 0.5121562748864009, "grad_norm": 0.5728585459039284, "learning_rate": 4.35926155070389e-06, "loss": 0.2857, "step": 10933 }, { "epoch": 0.5122031198763293, "grad_norm": 0.5412835865592368, "learning_rate": 4.359134762642448e-06, "loss": 0.268, "step": 10934 }, { "epoch": 0.5122499648662575, "grad_norm": 0.6250932275251829, "learning_rate": 4.359007963882155e-06, "loss": 0.2867, "step": 10935 }, { "epoch": 0.5122968098561859, "grad_norm": 0.5833694587648298, "learning_rate": 4.358881154423737e-06, "loss": 0.2976, "step": 10936 }, { "epoch": 0.5123436548461142, "grad_norm": 0.5842196864446895, "learning_rate": 4.358754334267924e-06, "loss": 0.2858, "step": 10937 }, { "epoch": 0.5123904998360426, "grad_norm": 0.6363711520921457, "learning_rate": 4.358627503415449e-06, "loss": 0.2856, "step": 10938 }, { "epoch": 0.5124373448259708, "grad_norm": 0.5882446258772631, "learning_rate": 4.358500661867039e-06, "loss": 0.2853, "step": 10939 }, { "epoch": 0.5124841898158992, "grad_norm": 0.6106785391710393, "learning_rate": 4.358373809623424e-06, "loss": 0.2794, "step": 10940 }, { "epoch": 0.5125310348058275, "grad_norm": 0.5783978192579202, "learning_rate": 4.358246946685336e-06, "loss": 0.2919, "step": 10941 }, { "epoch": 0.5125778797957559, "grad_norm": 0.5995502753759056, "learning_rate": 4.358120073053503e-06, "loss": 0.3021, "step": 10942 }, { "epoch": 0.5126247247856842, "grad_norm": 0.5465857582900612, "learning_rate": 4.357993188728657e-06, "loss": 0.2826, "step": 10943 }, { "epoch": 0.5126715697756125, "grad_norm": 0.5809416061384174, "learning_rate": 4.357866293711527e-06, "loss": 0.2955, "step": 10944 }, { "epoch": 0.5127184147655408, "grad_norm": 0.5926711275219101, "learning_rate": 4.357739388002843e-06, "loss": 0.2832, "step": 10945 }, { "epoch": 0.5127652597554692, "grad_norm": 0.5849063469133279, "learning_rate": 4.357612471603336e-06, "loss": 0.3007, "step": 10946 }, { "epoch": 0.5128121047453975, "grad_norm": 0.5768954416976567, "learning_rate": 4.357485544513735e-06, "loss": 0.288, "step": 10947 }, { "epoch": 0.5128589497353258, "grad_norm": 0.5576920960543003, "learning_rate": 4.357358606734773e-06, "loss": 0.2761, "step": 10948 }, { "epoch": 0.5129057947252541, "grad_norm": 0.6114051958704301, "learning_rate": 4.357231658267179e-06, "loss": 0.2858, "step": 10949 }, { "epoch": 0.5129526397151825, "grad_norm": 0.6097657053638755, "learning_rate": 4.3571046991116825e-06, "loss": 0.2834, "step": 10950 }, { "epoch": 0.5129994847051108, "grad_norm": 0.6373980005676643, "learning_rate": 4.356977729269016e-06, "loss": 0.2905, "step": 10951 }, { "epoch": 0.5130463296950392, "grad_norm": 0.5922924229853713, "learning_rate": 4.35685074873991e-06, "loss": 0.2947, "step": 10952 }, { "epoch": 0.5130931746849674, "grad_norm": 0.5788626105694363, "learning_rate": 4.356723757525093e-06, "loss": 0.2853, "step": 10953 }, { "epoch": 0.5131400196748958, "grad_norm": 0.5934831826921472, "learning_rate": 4.3565967556252985e-06, "loss": 0.2702, "step": 10954 }, { "epoch": 0.5131868646648241, "grad_norm": 0.6301133856951998, "learning_rate": 4.356469743041257e-06, "loss": 0.3111, "step": 10955 }, { "epoch": 0.5132337096547525, "grad_norm": 0.5744319387453717, "learning_rate": 4.3563427197736974e-06, "loss": 0.2815, "step": 10956 }, { "epoch": 0.5132805546446807, "grad_norm": 0.6484008107529765, "learning_rate": 4.356215685823353e-06, "loss": 0.2988, "step": 10957 }, { "epoch": 0.513327399634609, "grad_norm": 0.6143220193645244, "learning_rate": 4.356088641190953e-06, "loss": 0.2994, "step": 10958 }, { "epoch": 0.5133742446245374, "grad_norm": 0.6086160160505258, "learning_rate": 4.35596158587723e-06, "loss": 0.2867, "step": 10959 }, { "epoch": 0.5134210896144658, "grad_norm": 0.5592530211039295, "learning_rate": 4.355834519882914e-06, "loss": 0.2691, "step": 10960 }, { "epoch": 0.5134679346043941, "grad_norm": 0.6307151175068796, "learning_rate": 4.355707443208737e-06, "loss": 0.2929, "step": 10961 }, { "epoch": 0.5135147795943223, "grad_norm": 0.5843080384297433, "learning_rate": 4.355580355855431e-06, "loss": 0.2861, "step": 10962 }, { "epoch": 0.5135616245842507, "grad_norm": 0.5612821389786762, "learning_rate": 4.355453257823725e-06, "loss": 0.2817, "step": 10963 }, { "epoch": 0.513608469574179, "grad_norm": 0.6656790969377437, "learning_rate": 4.355326149114353e-06, "loss": 0.3049, "step": 10964 }, { "epoch": 0.5136553145641074, "grad_norm": 0.5990240379586759, "learning_rate": 4.355199029728044e-06, "loss": 0.2915, "step": 10965 }, { "epoch": 0.5137021595540356, "grad_norm": 0.6042689863427488, "learning_rate": 4.355071899665533e-06, "loss": 0.283, "step": 10966 }, { "epoch": 0.513749004543964, "grad_norm": 0.5851652515020958, "learning_rate": 4.354944758927548e-06, "loss": 0.2931, "step": 10967 }, { "epoch": 0.5137958495338923, "grad_norm": 0.6254266332206565, "learning_rate": 4.354817607514822e-06, "loss": 0.2974, "step": 10968 }, { "epoch": 0.5138426945238207, "grad_norm": 0.6182095899919435, "learning_rate": 4.354690445428087e-06, "loss": 0.2791, "step": 10969 }, { "epoch": 0.513889539513749, "grad_norm": 0.5870859676614121, "learning_rate": 4.354563272668076e-06, "loss": 0.2718, "step": 10970 }, { "epoch": 0.5139363845036773, "grad_norm": 0.591257064721887, "learning_rate": 4.354436089235518e-06, "loss": 0.2945, "step": 10971 }, { "epoch": 0.5139832294936056, "grad_norm": 0.6547385483308447, "learning_rate": 4.354308895131147e-06, "loss": 0.2883, "step": 10972 }, { "epoch": 0.514030074483534, "grad_norm": 0.5739288045154834, "learning_rate": 4.354181690355693e-06, "loss": 0.2758, "step": 10973 }, { "epoch": 0.5140769194734623, "grad_norm": 0.6198010601650219, "learning_rate": 4.354054474909891e-06, "loss": 0.2712, "step": 10974 }, { "epoch": 0.5141237644633906, "grad_norm": 0.6068636670167341, "learning_rate": 4.353927248794471e-06, "loss": 0.2909, "step": 10975 }, { "epoch": 0.5141706094533189, "grad_norm": 0.5534999496060814, "learning_rate": 4.3538000120101654e-06, "loss": 0.2733, "step": 10976 }, { "epoch": 0.5142174544432473, "grad_norm": 0.5772088371375789, "learning_rate": 4.353672764557707e-06, "loss": 0.2846, "step": 10977 }, { "epoch": 0.5142642994331756, "grad_norm": 0.5370010584142416, "learning_rate": 4.3535455064378275e-06, "loss": 0.2577, "step": 10978 }, { "epoch": 0.514311144423104, "grad_norm": 0.6240570626257836, "learning_rate": 4.35341823765126e-06, "loss": 0.2959, "step": 10979 }, { "epoch": 0.5143579894130322, "grad_norm": 0.5867470984947015, "learning_rate": 4.353290958198736e-06, "loss": 0.2877, "step": 10980 }, { "epoch": 0.5144048344029606, "grad_norm": 0.5695363187230797, "learning_rate": 4.353163668080988e-06, "loss": 0.2623, "step": 10981 }, { "epoch": 0.5144516793928889, "grad_norm": 0.6454276058577122, "learning_rate": 4.353036367298749e-06, "loss": 0.3003, "step": 10982 }, { "epoch": 0.5144985243828173, "grad_norm": 0.6767868449265192, "learning_rate": 4.352909055852752e-06, "loss": 0.2947, "step": 10983 }, { "epoch": 0.5145453693727455, "grad_norm": 0.5416581821138132, "learning_rate": 4.3527817337437286e-06, "loss": 0.2789, "step": 10984 }, { "epoch": 0.5145922143626739, "grad_norm": 0.5862165205154818, "learning_rate": 4.352654400972413e-06, "loss": 0.2838, "step": 10985 }, { "epoch": 0.5146390593526022, "grad_norm": 0.6398292809227069, "learning_rate": 4.352527057539535e-06, "loss": 0.2853, "step": 10986 }, { "epoch": 0.5146859043425306, "grad_norm": 0.6201580011569658, "learning_rate": 4.352399703445831e-06, "loss": 0.2813, "step": 10987 }, { "epoch": 0.5147327493324589, "grad_norm": 0.641302388292579, "learning_rate": 4.352272338692032e-06, "loss": 0.2958, "step": 10988 }, { "epoch": 0.5147795943223872, "grad_norm": 0.6212965976903155, "learning_rate": 4.35214496327887e-06, "loss": 0.2865, "step": 10989 }, { "epoch": 0.5148264393123155, "grad_norm": 0.6645670804844257, "learning_rate": 4.3520175772070815e-06, "loss": 0.2944, "step": 10990 }, { "epoch": 0.5148732843022439, "grad_norm": 0.6407531838506201, "learning_rate": 4.3518901804773956e-06, "loss": 0.295, "step": 10991 }, { "epoch": 0.5149201292921722, "grad_norm": 0.6699213854301312, "learning_rate": 4.351762773090548e-06, "loss": 0.3181, "step": 10992 }, { "epoch": 0.5149669742821005, "grad_norm": 0.6117399599331638, "learning_rate": 4.351635355047271e-06, "loss": 0.2714, "step": 10993 }, { "epoch": 0.5150138192720288, "grad_norm": 0.5579120074821137, "learning_rate": 4.351507926348297e-06, "loss": 0.2804, "step": 10994 }, { "epoch": 0.5150606642619572, "grad_norm": 0.5848823794541352, "learning_rate": 4.351380486994361e-06, "loss": 0.273, "step": 10995 }, { "epoch": 0.5151075092518855, "grad_norm": 0.6502564144771003, "learning_rate": 4.351253036986196e-06, "loss": 0.2739, "step": 10996 }, { "epoch": 0.5151543542418139, "grad_norm": 0.6010382415563921, "learning_rate": 4.351125576324534e-06, "loss": 0.3051, "step": 10997 }, { "epoch": 0.5152011992317421, "grad_norm": 0.6363110748440205, "learning_rate": 4.350998105010111e-06, "loss": 0.3062, "step": 10998 }, { "epoch": 0.5152480442216705, "grad_norm": 0.619525914392577, "learning_rate": 4.350870623043658e-06, "loss": 0.313, "step": 10999 }, { "epoch": 0.5152948892115988, "grad_norm": 0.6083752844572994, "learning_rate": 4.35074313042591e-06, "loss": 0.2864, "step": 11000 }, { "epoch": 0.5153417342015272, "grad_norm": 0.5472840212187731, "learning_rate": 4.3506156271576e-06, "loss": 0.2739, "step": 11001 }, { "epoch": 0.5153885791914554, "grad_norm": 0.6097517623887676, "learning_rate": 4.3504881132394635e-06, "loss": 0.29, "step": 11002 }, { "epoch": 0.5154354241813838, "grad_norm": 0.5035491114009069, "learning_rate": 4.350360588672232e-06, "loss": 0.244, "step": 11003 }, { "epoch": 0.5154822691713121, "grad_norm": 0.5676850184631465, "learning_rate": 4.35023305345664e-06, "loss": 0.2828, "step": 11004 }, { "epoch": 0.5155291141612405, "grad_norm": 0.5500082119837534, "learning_rate": 4.3501055075934235e-06, "loss": 0.2854, "step": 11005 }, { "epoch": 0.5155759591511688, "grad_norm": 0.6169947907546166, "learning_rate": 4.349977951083314e-06, "loss": 0.3022, "step": 11006 }, { "epoch": 0.5156228041410971, "grad_norm": 0.5974461483385453, "learning_rate": 4.349850383927046e-06, "loss": 0.269, "step": 11007 }, { "epoch": 0.5156696491310254, "grad_norm": 0.6041703799533459, "learning_rate": 4.349722806125354e-06, "loss": 0.2941, "step": 11008 }, { "epoch": 0.5157164941209538, "grad_norm": 0.6142984152213213, "learning_rate": 4.349595217678972e-06, "loss": 0.2892, "step": 11009 }, { "epoch": 0.5157633391108821, "grad_norm": 0.5809878353414932, "learning_rate": 4.349467618588635e-06, "loss": 0.2892, "step": 11010 }, { "epoch": 0.5158101841008104, "grad_norm": 0.5826923830397778, "learning_rate": 4.349340008855077e-06, "loss": 0.3019, "step": 11011 }, { "epoch": 0.5158570290907387, "grad_norm": 0.6504833368151896, "learning_rate": 4.3492123884790304e-06, "loss": 0.3053, "step": 11012 }, { "epoch": 0.5159038740806671, "grad_norm": 0.6527297536246299, "learning_rate": 4.349084757461232e-06, "loss": 0.2863, "step": 11013 }, { "epoch": 0.5159507190705954, "grad_norm": 0.6477121662514643, "learning_rate": 4.348957115802417e-06, "loss": 0.3159, "step": 11014 }, { "epoch": 0.5159975640605238, "grad_norm": 0.6363976134473628, "learning_rate": 4.348829463503317e-06, "loss": 0.2897, "step": 11015 }, { "epoch": 0.516044409050452, "grad_norm": 0.619238293889244, "learning_rate": 4.348701800564669e-06, "loss": 0.3007, "step": 11016 }, { "epoch": 0.5160912540403804, "grad_norm": 0.6334827911551695, "learning_rate": 4.3485741269872064e-06, "loss": 0.283, "step": 11017 }, { "epoch": 0.5161380990303087, "grad_norm": 0.5626615439214172, "learning_rate": 4.348446442771664e-06, "loss": 0.2559, "step": 11018 }, { "epoch": 0.5161849440202371, "grad_norm": 0.5723950736503084, "learning_rate": 4.348318747918777e-06, "loss": 0.2722, "step": 11019 }, { "epoch": 0.5162317890101653, "grad_norm": 0.590279551766315, "learning_rate": 4.348191042429281e-06, "loss": 0.3034, "step": 11020 }, { "epoch": 0.5162786340000937, "grad_norm": 0.691093258683552, "learning_rate": 4.348063326303909e-06, "loss": 0.2999, "step": 11021 }, { "epoch": 0.516325478990022, "grad_norm": 0.5922206775444705, "learning_rate": 4.347935599543397e-06, "loss": 0.3043, "step": 11022 }, { "epoch": 0.5163723239799504, "grad_norm": 0.5655936046577124, "learning_rate": 4.347807862148481e-06, "loss": 0.2737, "step": 11023 }, { "epoch": 0.5164191689698787, "grad_norm": 0.6103555186440048, "learning_rate": 4.3476801141198945e-06, "loss": 0.2781, "step": 11024 }, { "epoch": 0.516466013959807, "grad_norm": 0.6197777506051857, "learning_rate": 4.347552355458373e-06, "loss": 0.3191, "step": 11025 }, { "epoch": 0.5165128589497353, "grad_norm": 0.6464222186915843, "learning_rate": 4.347424586164653e-06, "loss": 0.2957, "step": 11026 }, { "epoch": 0.5165597039396637, "grad_norm": 0.570614128724917, "learning_rate": 4.347296806239468e-06, "loss": 0.2669, "step": 11027 }, { "epoch": 0.516606548929592, "grad_norm": 0.5808819425874109, "learning_rate": 4.347169015683555e-06, "loss": 0.2713, "step": 11028 }, { "epoch": 0.5166533939195203, "grad_norm": 0.6367110387389219, "learning_rate": 4.347041214497649e-06, "loss": 0.3094, "step": 11029 }, { "epoch": 0.5167002389094486, "grad_norm": 0.6164573819719218, "learning_rate": 4.346913402682484e-06, "loss": 0.2974, "step": 11030 }, { "epoch": 0.516747083899377, "grad_norm": 0.5715632525033899, "learning_rate": 4.346785580238797e-06, "loss": 0.269, "step": 11031 }, { "epoch": 0.5167939288893053, "grad_norm": 0.6006135260873297, "learning_rate": 4.346657747167323e-06, "loss": 0.2943, "step": 11032 }, { "epoch": 0.5168407738792337, "grad_norm": 0.5919619549251229, "learning_rate": 4.346529903468798e-06, "loss": 0.2755, "step": 11033 }, { "epoch": 0.5168876188691619, "grad_norm": 0.6262577229342742, "learning_rate": 4.346402049143957e-06, "loss": 0.2961, "step": 11034 }, { "epoch": 0.5169344638590903, "grad_norm": 0.597155993569627, "learning_rate": 4.346274184193537e-06, "loss": 0.2941, "step": 11035 }, { "epoch": 0.5169813088490186, "grad_norm": 0.6329025129874728, "learning_rate": 4.346146308618272e-06, "loss": 0.2878, "step": 11036 }, { "epoch": 0.517028153838947, "grad_norm": 0.5886180411452757, "learning_rate": 4.346018422418901e-06, "loss": 0.2841, "step": 11037 }, { "epoch": 0.5170749988288752, "grad_norm": 0.6423790043572538, "learning_rate": 4.345890525596156e-06, "loss": 0.3154, "step": 11038 }, { "epoch": 0.5171218438188036, "grad_norm": 0.6025440455036738, "learning_rate": 4.345762618150776e-06, "loss": 0.2892, "step": 11039 }, { "epoch": 0.5171686888087319, "grad_norm": 0.636145860183095, "learning_rate": 4.345634700083495e-06, "loss": 0.2784, "step": 11040 }, { "epoch": 0.5172155337986603, "grad_norm": 0.6232945330334855, "learning_rate": 4.345506771395051e-06, "loss": 0.2921, "step": 11041 }, { "epoch": 0.5172623787885886, "grad_norm": 0.5759200052556299, "learning_rate": 4.34537883208618e-06, "loss": 0.2772, "step": 11042 }, { "epoch": 0.5173092237785168, "grad_norm": 0.6601667394032723, "learning_rate": 4.345250882157617e-06, "loss": 0.2907, "step": 11043 }, { "epoch": 0.5173560687684452, "grad_norm": 0.5751770512092533, "learning_rate": 4.345122921610098e-06, "loss": 0.2818, "step": 11044 }, { "epoch": 0.5174029137583736, "grad_norm": 0.6145733251806964, "learning_rate": 4.3449949504443615e-06, "loss": 0.289, "step": 11045 }, { "epoch": 0.5174497587483019, "grad_norm": 0.6114288813587997, "learning_rate": 4.344866968661142e-06, "loss": 0.3069, "step": 11046 }, { "epoch": 0.5174966037382301, "grad_norm": 0.6654717956966402, "learning_rate": 4.3447389762611776e-06, "loss": 0.291, "step": 11047 }, { "epoch": 0.5175434487281585, "grad_norm": 0.6393737303657416, "learning_rate": 4.344610973245204e-06, "loss": 0.311, "step": 11048 }, { "epoch": 0.5175902937180868, "grad_norm": 0.5938972628526403, "learning_rate": 4.344482959613957e-06, "loss": 0.2931, "step": 11049 }, { "epoch": 0.5176371387080152, "grad_norm": 0.5674770338535831, "learning_rate": 4.3443549353681755e-06, "loss": 0.296, "step": 11050 }, { "epoch": 0.5176839836979436, "grad_norm": 0.624320952342484, "learning_rate": 4.344226900508594e-06, "loss": 0.2822, "step": 11051 }, { "epoch": 0.5177308286878718, "grad_norm": 0.5900038964458734, "learning_rate": 4.34409885503595e-06, "loss": 0.2843, "step": 11052 }, { "epoch": 0.5177776736778001, "grad_norm": 0.6146983667857396, "learning_rate": 4.343970798950981e-06, "loss": 0.3005, "step": 11053 }, { "epoch": 0.5178245186677285, "grad_norm": 0.6122804050879163, "learning_rate": 4.343842732254424e-06, "loss": 0.2777, "step": 11054 }, { "epoch": 0.5178713636576568, "grad_norm": 0.5744214755268583, "learning_rate": 4.343714654947015e-06, "loss": 0.2739, "step": 11055 }, { "epoch": 0.5179182086475851, "grad_norm": 0.633736541180878, "learning_rate": 4.3435865670294915e-06, "loss": 0.2844, "step": 11056 }, { "epoch": 0.5179650536375134, "grad_norm": 0.6225508648066066, "learning_rate": 4.34345846850259e-06, "loss": 0.2842, "step": 11057 }, { "epoch": 0.5180118986274418, "grad_norm": 0.6669966614419691, "learning_rate": 4.343330359367049e-06, "loss": 0.286, "step": 11058 }, { "epoch": 0.5180587436173701, "grad_norm": 0.568765790672564, "learning_rate": 4.343202239623606e-06, "loss": 0.2579, "step": 11059 }, { "epoch": 0.5181055886072985, "grad_norm": 0.5817054191869325, "learning_rate": 4.343074109272997e-06, "loss": 0.2888, "step": 11060 }, { "epoch": 0.5181524335972267, "grad_norm": 0.6169551127856375, "learning_rate": 4.342945968315959e-06, "loss": 0.2996, "step": 11061 }, { "epoch": 0.5181992785871551, "grad_norm": 0.5685703692317077, "learning_rate": 4.34281781675323e-06, "loss": 0.2799, "step": 11062 }, { "epoch": 0.5182461235770834, "grad_norm": 0.5978874780514013, "learning_rate": 4.342689654585548e-06, "loss": 0.3048, "step": 11063 }, { "epoch": 0.5182929685670118, "grad_norm": 0.6344792258754772, "learning_rate": 4.342561481813651e-06, "loss": 0.3104, "step": 11064 }, { "epoch": 0.51833981355694, "grad_norm": 0.6032304816459773, "learning_rate": 4.342433298438275e-06, "loss": 0.2959, "step": 11065 }, { "epoch": 0.5183866585468684, "grad_norm": 0.6008341647273985, "learning_rate": 4.342305104460158e-06, "loss": 0.2806, "step": 11066 }, { "epoch": 0.5184335035367967, "grad_norm": 0.5970968870092914, "learning_rate": 4.342176899880038e-06, "loss": 0.3051, "step": 11067 }, { "epoch": 0.5184803485267251, "grad_norm": 0.5613420794218114, "learning_rate": 4.342048684698654e-06, "loss": 0.283, "step": 11068 }, { "epoch": 0.5185271935166534, "grad_norm": 0.5825841468761527, "learning_rate": 4.341920458916743e-06, "loss": 0.2796, "step": 11069 }, { "epoch": 0.5185740385065817, "grad_norm": 0.7039616710432619, "learning_rate": 4.3417922225350415e-06, "loss": 0.3089, "step": 11070 }, { "epoch": 0.51862088349651, "grad_norm": 0.5917791608727438, "learning_rate": 4.34166397555429e-06, "loss": 0.2713, "step": 11071 }, { "epoch": 0.5186677284864384, "grad_norm": 0.7302619500399576, "learning_rate": 4.341535717975225e-06, "loss": 0.2945, "step": 11072 }, { "epoch": 0.5187145734763667, "grad_norm": 0.6535464067134279, "learning_rate": 4.341407449798583e-06, "loss": 0.2939, "step": 11073 }, { "epoch": 0.518761418466295, "grad_norm": 0.6326136312494309, "learning_rate": 4.341279171025105e-06, "loss": 0.2901, "step": 11074 }, { "epoch": 0.5188082634562233, "grad_norm": 0.6033623811026761, "learning_rate": 4.341150881655529e-06, "loss": 0.2805, "step": 11075 }, { "epoch": 0.5188551084461517, "grad_norm": 0.5871201270671512, "learning_rate": 4.341022581690592e-06, "loss": 0.2844, "step": 11076 }, { "epoch": 0.51890195343608, "grad_norm": 0.620956423437137, "learning_rate": 4.340894271131033e-06, "loss": 0.3123, "step": 11077 }, { "epoch": 0.5189487984260084, "grad_norm": 0.6027917836785641, "learning_rate": 4.3407659499775904e-06, "loss": 0.309, "step": 11078 }, { "epoch": 0.5189956434159366, "grad_norm": 0.590278243429245, "learning_rate": 4.3406376182310014e-06, "loss": 0.2847, "step": 11079 }, { "epoch": 0.519042488405865, "grad_norm": 0.5594569680442453, "learning_rate": 4.340509275892005e-06, "loss": 0.2813, "step": 11080 }, { "epoch": 0.5190893333957933, "grad_norm": 0.6062031854442725, "learning_rate": 4.340380922961342e-06, "loss": 0.2696, "step": 11081 }, { "epoch": 0.5191361783857217, "grad_norm": 0.6408621466306025, "learning_rate": 4.340252559439748e-06, "loss": 0.2954, "step": 11082 }, { "epoch": 0.5191830233756499, "grad_norm": 0.6342495993082392, "learning_rate": 4.3401241853279645e-06, "loss": 0.2955, "step": 11083 }, { "epoch": 0.5192298683655783, "grad_norm": 0.6360197732215863, "learning_rate": 4.339995800626727e-06, "loss": 0.2917, "step": 11084 }, { "epoch": 0.5192767133555066, "grad_norm": 0.5789009518631654, "learning_rate": 4.339867405336777e-06, "loss": 0.2821, "step": 11085 }, { "epoch": 0.519323558345435, "grad_norm": 0.6285802610942233, "learning_rate": 4.339738999458854e-06, "loss": 0.2938, "step": 11086 }, { "epoch": 0.5193704033353633, "grad_norm": 0.6317635853958837, "learning_rate": 4.339610582993694e-06, "loss": 0.2991, "step": 11087 }, { "epoch": 0.5194172483252916, "grad_norm": 0.6483314956008173, "learning_rate": 4.339482155942037e-06, "loss": 0.3068, "step": 11088 }, { "epoch": 0.5194640933152199, "grad_norm": 0.633187680261084, "learning_rate": 4.339353718304623e-06, "loss": 0.2793, "step": 11089 }, { "epoch": 0.5195109383051483, "grad_norm": 0.5636564515737358, "learning_rate": 4.3392252700821904e-06, "loss": 0.2643, "step": 11090 }, { "epoch": 0.5195577832950766, "grad_norm": 0.6513021162777123, "learning_rate": 4.33909681127548e-06, "loss": 0.3021, "step": 11091 }, { "epoch": 0.5196046282850049, "grad_norm": 0.699842856298427, "learning_rate": 4.338968341885228e-06, "loss": 0.3294, "step": 11092 }, { "epoch": 0.5196514732749332, "grad_norm": 0.5662589727297924, "learning_rate": 4.338839861912176e-06, "loss": 0.2841, "step": 11093 }, { "epoch": 0.5196983182648616, "grad_norm": 0.5738133169179584, "learning_rate": 4.338711371357063e-06, "loss": 0.2925, "step": 11094 }, { "epoch": 0.5197451632547899, "grad_norm": 0.5844963321795227, "learning_rate": 4.338582870220628e-06, "loss": 0.3012, "step": 11095 }, { "epoch": 0.5197920082447183, "grad_norm": 0.6548315634505245, "learning_rate": 4.33845435850361e-06, "loss": 0.3097, "step": 11096 }, { "epoch": 0.5198388532346465, "grad_norm": 0.6139986550474338, "learning_rate": 4.33832583620675e-06, "loss": 0.2922, "step": 11097 }, { "epoch": 0.5198856982245749, "grad_norm": 0.5934690572835432, "learning_rate": 4.338197303330787e-06, "loss": 0.3079, "step": 11098 }, { "epoch": 0.5199325432145032, "grad_norm": 0.6317736405990328, "learning_rate": 4.3380687598764604e-06, "loss": 0.2945, "step": 11099 }, { "epoch": 0.5199793882044316, "grad_norm": 0.5793709234237141, "learning_rate": 4.33794020584451e-06, "loss": 0.2861, "step": 11100 }, { "epoch": 0.5200262331943598, "grad_norm": 0.5994727388931396, "learning_rate": 4.337811641235676e-06, "loss": 0.2811, "step": 11101 }, { "epoch": 0.5200730781842882, "grad_norm": 0.5929356795445042, "learning_rate": 4.337683066050698e-06, "loss": 0.2667, "step": 11102 }, { "epoch": 0.5201199231742165, "grad_norm": 0.6208713708966874, "learning_rate": 4.3375544802903144e-06, "loss": 0.3135, "step": 11103 }, { "epoch": 0.5201667681641449, "grad_norm": 0.5569919628106705, "learning_rate": 4.337425883955269e-06, "loss": 0.2669, "step": 11104 }, { "epoch": 0.5202136131540732, "grad_norm": 0.6021219756154674, "learning_rate": 4.337297277046297e-06, "loss": 0.2669, "step": 11105 }, { "epoch": 0.5202604581440015, "grad_norm": 0.5766657142445404, "learning_rate": 4.337168659564143e-06, "loss": 0.283, "step": 11106 }, { "epoch": 0.5203073031339298, "grad_norm": 0.5647432356616247, "learning_rate": 4.337040031509544e-06, "loss": 0.2645, "step": 11107 }, { "epoch": 0.5203541481238582, "grad_norm": 0.659986018937969, "learning_rate": 4.336911392883241e-06, "loss": 0.2988, "step": 11108 }, { "epoch": 0.5204009931137865, "grad_norm": 0.6149099324654016, "learning_rate": 4.336782743685976e-06, "loss": 0.2743, "step": 11109 }, { "epoch": 0.5204478381037148, "grad_norm": 0.6841760566749358, "learning_rate": 4.336654083918487e-06, "loss": 0.2852, "step": 11110 }, { "epoch": 0.5204946830936431, "grad_norm": 0.5895099649577563, "learning_rate": 4.336525413581515e-06, "loss": 0.2955, "step": 11111 }, { "epoch": 0.5205415280835715, "grad_norm": 0.6344601766683874, "learning_rate": 4.336396732675802e-06, "loss": 0.3242, "step": 11112 }, { "epoch": 0.5205883730734998, "grad_norm": 0.6149456180147616, "learning_rate": 4.336268041202087e-06, "loss": 0.3023, "step": 11113 }, { "epoch": 0.5206352180634282, "grad_norm": 0.6047002062721029, "learning_rate": 4.3361393391611094e-06, "loss": 0.2825, "step": 11114 }, { "epoch": 0.5206820630533564, "grad_norm": 0.6460515518810478, "learning_rate": 4.336010626553613e-06, "loss": 0.2976, "step": 11115 }, { "epoch": 0.5207289080432848, "grad_norm": 0.6176623187139234, "learning_rate": 4.335881903380337e-06, "loss": 0.2897, "step": 11116 }, { "epoch": 0.5207757530332131, "grad_norm": 0.63678259941562, "learning_rate": 4.33575316964202e-06, "loss": 0.2615, "step": 11117 }, { "epoch": 0.5208225980231415, "grad_norm": 0.5677566821748921, "learning_rate": 4.335624425339407e-06, "loss": 0.2785, "step": 11118 }, { "epoch": 0.5208694430130697, "grad_norm": 0.5685376968261605, "learning_rate": 4.335495670473237e-06, "loss": 0.2648, "step": 11119 }, { "epoch": 0.520916288002998, "grad_norm": 0.6202865540668495, "learning_rate": 4.33536690504425e-06, "loss": 0.3089, "step": 11120 }, { "epoch": 0.5209631329929264, "grad_norm": 0.6162822809217452, "learning_rate": 4.335238129053188e-06, "loss": 0.3108, "step": 11121 }, { "epoch": 0.5210099779828548, "grad_norm": 0.5645379154517977, "learning_rate": 4.335109342500792e-06, "loss": 0.2753, "step": 11122 }, { "epoch": 0.5210568229727831, "grad_norm": 0.5967602918593906, "learning_rate": 4.334980545387802e-06, "loss": 0.2851, "step": 11123 }, { "epoch": 0.5211036679627113, "grad_norm": 0.6024150218691483, "learning_rate": 4.334851737714961e-06, "loss": 0.2673, "step": 11124 }, { "epoch": 0.5211505129526397, "grad_norm": 0.5747780440967651, "learning_rate": 4.3347229194830095e-06, "loss": 0.282, "step": 11125 }, { "epoch": 0.521197357942568, "grad_norm": 0.5671842510761486, "learning_rate": 4.334594090692689e-06, "loss": 0.2696, "step": 11126 }, { "epoch": 0.5212442029324964, "grad_norm": 0.524457122620761, "learning_rate": 4.334465251344739e-06, "loss": 0.2618, "step": 11127 }, { "epoch": 0.5212910479224246, "grad_norm": 0.6524386854318016, "learning_rate": 4.334336401439904e-06, "loss": 0.2835, "step": 11128 }, { "epoch": 0.521337892912353, "grad_norm": 0.6264405421877305, "learning_rate": 4.334207540978924e-06, "loss": 0.2744, "step": 11129 }, { "epoch": 0.5213847379022813, "grad_norm": 0.6367494847602089, "learning_rate": 4.3340786699625405e-06, "loss": 0.3038, "step": 11130 }, { "epoch": 0.5214315828922097, "grad_norm": 0.5803937159678048, "learning_rate": 4.333949788391495e-06, "loss": 0.2842, "step": 11131 }, { "epoch": 0.521478427882138, "grad_norm": 0.5614792791519654, "learning_rate": 4.333820896266529e-06, "loss": 0.2773, "step": 11132 }, { "epoch": 0.5215252728720663, "grad_norm": 0.5772995621941769, "learning_rate": 4.333691993588385e-06, "loss": 0.2811, "step": 11133 }, { "epoch": 0.5215721178619946, "grad_norm": 0.6366040226092267, "learning_rate": 4.333563080357805e-06, "loss": 0.3106, "step": 11134 }, { "epoch": 0.521618962851923, "grad_norm": 0.6924293035604718, "learning_rate": 4.33343415657553e-06, "loss": 0.3136, "step": 11135 }, { "epoch": 0.5216658078418513, "grad_norm": 0.6400077720137565, "learning_rate": 4.3333052222423024e-06, "loss": 0.2912, "step": 11136 }, { "epoch": 0.5217126528317796, "grad_norm": 0.5955845693348228, "learning_rate": 4.333176277358864e-06, "loss": 0.3114, "step": 11137 }, { "epoch": 0.5217594978217079, "grad_norm": 0.5831059829029404, "learning_rate": 4.333047321925956e-06, "loss": 0.3026, "step": 11138 }, { "epoch": 0.5218063428116363, "grad_norm": 0.6402354132736229, "learning_rate": 4.3329183559443226e-06, "loss": 0.2958, "step": 11139 }, { "epoch": 0.5218531878015646, "grad_norm": 0.6162048751344211, "learning_rate": 4.332789379414704e-06, "loss": 0.3124, "step": 11140 }, { "epoch": 0.521900032791493, "grad_norm": 0.6355054005550503, "learning_rate": 4.332660392337843e-06, "loss": 0.2696, "step": 11141 }, { "epoch": 0.5219468777814212, "grad_norm": 0.6106197946493224, "learning_rate": 4.332531394714482e-06, "loss": 0.2798, "step": 11142 }, { "epoch": 0.5219937227713496, "grad_norm": 0.6336691850131995, "learning_rate": 4.332402386545365e-06, "loss": 0.3026, "step": 11143 }, { "epoch": 0.5220405677612779, "grad_norm": 0.5690508362141015, "learning_rate": 4.332273367831231e-06, "loss": 0.2693, "step": 11144 }, { "epoch": 0.5220874127512063, "grad_norm": 0.5675484351628478, "learning_rate": 4.332144338572825e-06, "loss": 0.2983, "step": 11145 }, { "epoch": 0.5221342577411345, "grad_norm": 0.552801376765233, "learning_rate": 4.3320152987708885e-06, "loss": 0.2707, "step": 11146 }, { "epoch": 0.5221811027310629, "grad_norm": 0.5970721522825074, "learning_rate": 4.331886248426164e-06, "loss": 0.2941, "step": 11147 }, { "epoch": 0.5222279477209912, "grad_norm": 0.6085513994281997, "learning_rate": 4.331757187539396e-06, "loss": 0.2826, "step": 11148 }, { "epoch": 0.5222747927109196, "grad_norm": 0.5889752523138961, "learning_rate": 4.331628116111326e-06, "loss": 0.2936, "step": 11149 }, { "epoch": 0.5223216377008479, "grad_norm": 0.5590676828071188, "learning_rate": 4.331499034142695e-06, "loss": 0.266, "step": 11150 }, { "epoch": 0.5223684826907762, "grad_norm": 0.6137356720870348, "learning_rate": 4.331369941634248e-06, "loss": 0.2876, "step": 11151 }, { "epoch": 0.5224153276807045, "grad_norm": 0.6025004788892071, "learning_rate": 4.331240838586727e-06, "loss": 0.3014, "step": 11152 }, { "epoch": 0.5224621726706329, "grad_norm": 0.5544764127775614, "learning_rate": 4.331111725000876e-06, "loss": 0.2875, "step": 11153 }, { "epoch": 0.5225090176605612, "grad_norm": 0.6241733339922937, "learning_rate": 4.3309826008774365e-06, "loss": 0.3045, "step": 11154 }, { "epoch": 0.5225558626504895, "grad_norm": 0.6096403688117996, "learning_rate": 4.330853466217153e-06, "loss": 0.3041, "step": 11155 }, { "epoch": 0.5226027076404178, "grad_norm": 0.5812958940905015, "learning_rate": 4.330724321020767e-06, "loss": 0.2846, "step": 11156 }, { "epoch": 0.5226495526303462, "grad_norm": 0.615778521783335, "learning_rate": 4.3305951652890235e-06, "loss": 0.2852, "step": 11157 }, { "epoch": 0.5226963976202745, "grad_norm": 0.5949445472510068, "learning_rate": 4.330465999022665e-06, "loss": 0.2785, "step": 11158 }, { "epoch": 0.5227432426102029, "grad_norm": 0.6578038247842574, "learning_rate": 4.330336822222434e-06, "loss": 0.3067, "step": 11159 }, { "epoch": 0.5227900876001311, "grad_norm": 0.587853906354717, "learning_rate": 4.330207634889075e-06, "loss": 0.2954, "step": 11160 }, { "epoch": 0.5228369325900595, "grad_norm": 0.6113799066145682, "learning_rate": 4.330078437023331e-06, "loss": 0.291, "step": 11161 }, { "epoch": 0.5228837775799878, "grad_norm": 0.6143376785590579, "learning_rate": 4.329949228625946e-06, "loss": 0.3006, "step": 11162 }, { "epoch": 0.5229306225699162, "grad_norm": 0.606968898696699, "learning_rate": 4.329820009697663e-06, "loss": 0.2741, "step": 11163 }, { "epoch": 0.5229774675598444, "grad_norm": 0.6153974264382114, "learning_rate": 4.329690780239225e-06, "loss": 0.2973, "step": 11164 }, { "epoch": 0.5230243125497728, "grad_norm": 0.6202157566990727, "learning_rate": 4.329561540251377e-06, "loss": 0.2903, "step": 11165 }, { "epoch": 0.5230711575397011, "grad_norm": 0.5852564271139712, "learning_rate": 4.329432289734862e-06, "loss": 0.2856, "step": 11166 }, { "epoch": 0.5231180025296295, "grad_norm": 0.607489401794436, "learning_rate": 4.329303028690425e-06, "loss": 0.2979, "step": 11167 }, { "epoch": 0.5231648475195578, "grad_norm": 0.6350340317039198, "learning_rate": 4.329173757118808e-06, "loss": 0.3144, "step": 11168 }, { "epoch": 0.5232116925094861, "grad_norm": 0.5825203744031735, "learning_rate": 4.329044475020755e-06, "loss": 0.3021, "step": 11169 }, { "epoch": 0.5232585374994144, "grad_norm": 0.6340621915312294, "learning_rate": 4.328915182397012e-06, "loss": 0.2989, "step": 11170 }, { "epoch": 0.5233053824893428, "grad_norm": 0.5631226444299061, "learning_rate": 4.328785879248321e-06, "loss": 0.2706, "step": 11171 }, { "epoch": 0.5233522274792711, "grad_norm": 0.597621318908221, "learning_rate": 4.328656565575427e-06, "loss": 0.2778, "step": 11172 }, { "epoch": 0.5233990724691994, "grad_norm": 0.6181459003720743, "learning_rate": 4.3285272413790745e-06, "loss": 0.2761, "step": 11173 }, { "epoch": 0.5234459174591277, "grad_norm": 0.645492135616877, "learning_rate": 4.328397906660007e-06, "loss": 0.2863, "step": 11174 }, { "epoch": 0.5234927624490561, "grad_norm": 0.5386895381524486, "learning_rate": 4.32826856141897e-06, "loss": 0.2785, "step": 11175 }, { "epoch": 0.5235396074389844, "grad_norm": 0.5452722545270224, "learning_rate": 4.328139205656705e-06, "loss": 0.2648, "step": 11176 }, { "epoch": 0.5235864524289128, "grad_norm": 0.6245906043137692, "learning_rate": 4.32800983937396e-06, "loss": 0.2831, "step": 11177 }, { "epoch": 0.523633297418841, "grad_norm": 0.5559201576271079, "learning_rate": 4.327880462571478e-06, "loss": 0.2737, "step": 11178 }, { "epoch": 0.5236801424087694, "grad_norm": 0.6023751567550435, "learning_rate": 4.327751075250003e-06, "loss": 0.2893, "step": 11179 }, { "epoch": 0.5237269873986977, "grad_norm": 0.619190679001321, "learning_rate": 4.3276216774102795e-06, "loss": 0.2908, "step": 11180 }, { "epoch": 0.5237738323886261, "grad_norm": 0.6514077501103835, "learning_rate": 4.3274922690530525e-06, "loss": 0.312, "step": 11181 }, { "epoch": 0.5238206773785543, "grad_norm": 0.5612996652147356, "learning_rate": 4.327362850179067e-06, "loss": 0.2821, "step": 11182 }, { "epoch": 0.5238675223684827, "grad_norm": 0.6005174829110926, "learning_rate": 4.3272334207890675e-06, "loss": 0.2894, "step": 11183 }, { "epoch": 0.523914367358411, "grad_norm": 0.642668833973062, "learning_rate": 4.3271039808837985e-06, "loss": 0.3099, "step": 11184 }, { "epoch": 0.5239612123483394, "grad_norm": 0.5848491544343796, "learning_rate": 4.326974530464006e-06, "loss": 0.3009, "step": 11185 }, { "epoch": 0.5240080573382677, "grad_norm": 0.6020490665146434, "learning_rate": 4.326845069530435e-06, "loss": 0.2962, "step": 11186 }, { "epoch": 0.524054902328196, "grad_norm": 0.5661507929371233, "learning_rate": 4.326715598083829e-06, "loss": 0.286, "step": 11187 }, { "epoch": 0.5241017473181243, "grad_norm": 0.5634578536447936, "learning_rate": 4.326586116124933e-06, "loss": 0.2827, "step": 11188 }, { "epoch": 0.5241485923080527, "grad_norm": 0.5443235810413771, "learning_rate": 4.326456623654495e-06, "loss": 0.2628, "step": 11189 }, { "epoch": 0.524195437297981, "grad_norm": 0.6183546922197048, "learning_rate": 4.326327120673256e-06, "loss": 0.2744, "step": 11190 }, { "epoch": 0.5242422822879093, "grad_norm": 0.5813307024929032, "learning_rate": 4.326197607181965e-06, "loss": 0.2997, "step": 11191 }, { "epoch": 0.5242891272778376, "grad_norm": 0.5558420561555407, "learning_rate": 4.326068083181365e-06, "loss": 0.2871, "step": 11192 }, { "epoch": 0.524335972267766, "grad_norm": 0.519323759634061, "learning_rate": 4.325938548672203e-06, "loss": 0.2637, "step": 11193 }, { "epoch": 0.5243828172576943, "grad_norm": 0.5737252428737483, "learning_rate": 4.325809003655222e-06, "loss": 0.2774, "step": 11194 }, { "epoch": 0.5244296622476227, "grad_norm": 0.5998531869625489, "learning_rate": 4.325679448131171e-06, "loss": 0.3023, "step": 11195 }, { "epoch": 0.5244765072375509, "grad_norm": 0.6210627059957736, "learning_rate": 4.3255498821007925e-06, "loss": 0.3017, "step": 11196 }, { "epoch": 0.5245233522274793, "grad_norm": 0.545605169114864, "learning_rate": 4.325420305564833e-06, "loss": 0.263, "step": 11197 }, { "epoch": 0.5245701972174076, "grad_norm": 0.5641712878033072, "learning_rate": 4.325290718524039e-06, "loss": 0.2665, "step": 11198 }, { "epoch": 0.524617042207336, "grad_norm": 0.5834024627465492, "learning_rate": 4.325161120979156e-06, "loss": 0.2855, "step": 11199 }, { "epoch": 0.5246638871972642, "grad_norm": 0.5442000997349271, "learning_rate": 4.325031512930929e-06, "loss": 0.278, "step": 11200 }, { "epoch": 0.5247107321871926, "grad_norm": 0.6681653540052613, "learning_rate": 4.3249018943801045e-06, "loss": 0.3031, "step": 11201 }, { "epoch": 0.5247575771771209, "grad_norm": 0.6647178855276488, "learning_rate": 4.324772265327428e-06, "loss": 0.2989, "step": 11202 }, { "epoch": 0.5248044221670493, "grad_norm": 0.624018524224535, "learning_rate": 4.324642625773646e-06, "loss": 0.2775, "step": 11203 }, { "epoch": 0.5248512671569776, "grad_norm": 0.6173240671450925, "learning_rate": 4.324512975719504e-06, "loss": 0.2762, "step": 11204 }, { "epoch": 0.5248981121469058, "grad_norm": 0.5926707779905205, "learning_rate": 4.3243833151657484e-06, "loss": 0.2917, "step": 11205 }, { "epoch": 0.5249449571368342, "grad_norm": 0.604008956707494, "learning_rate": 4.324253644113126e-06, "loss": 0.2873, "step": 11206 }, { "epoch": 0.5249918021267626, "grad_norm": 0.6123615366961542, "learning_rate": 4.324123962562382e-06, "loss": 0.2864, "step": 11207 }, { "epoch": 0.5250386471166909, "grad_norm": 0.6749602090468068, "learning_rate": 4.323994270514263e-06, "loss": 0.3044, "step": 11208 }, { "epoch": 0.5250854921066191, "grad_norm": 0.6440117524476826, "learning_rate": 4.323864567969516e-06, "loss": 0.2992, "step": 11209 }, { "epoch": 0.5251323370965475, "grad_norm": 0.6238580169359921, "learning_rate": 4.323734854928886e-06, "loss": 0.3072, "step": 11210 }, { "epoch": 0.5251791820864758, "grad_norm": 0.5552410064971803, "learning_rate": 4.32360513139312e-06, "loss": 0.2678, "step": 11211 }, { "epoch": 0.5252260270764042, "grad_norm": 0.6035837450348757, "learning_rate": 4.323475397362966e-06, "loss": 0.2817, "step": 11212 }, { "epoch": 0.5252728720663326, "grad_norm": 0.6089050511406808, "learning_rate": 4.323345652839168e-06, "loss": 0.3047, "step": 11213 }, { "epoch": 0.5253197170562608, "grad_norm": 0.6198678103858211, "learning_rate": 4.323215897822476e-06, "loss": 0.2923, "step": 11214 }, { "epoch": 0.5253665620461891, "grad_norm": 0.651090756923342, "learning_rate": 4.323086132313633e-06, "loss": 0.304, "step": 11215 }, { "epoch": 0.5254134070361175, "grad_norm": 0.5944503201931962, "learning_rate": 4.322956356313388e-06, "loss": 0.2879, "step": 11216 }, { "epoch": 0.5254602520260458, "grad_norm": 0.5795637817937612, "learning_rate": 4.322826569822488e-06, "loss": 0.287, "step": 11217 }, { "epoch": 0.5255070970159741, "grad_norm": 0.6333825875486857, "learning_rate": 4.322696772841679e-06, "loss": 0.2859, "step": 11218 }, { "epoch": 0.5255539420059024, "grad_norm": 0.6428318312589405, "learning_rate": 4.322566965371707e-06, "loss": 0.32, "step": 11219 }, { "epoch": 0.5256007869958308, "grad_norm": 0.6197250107079925, "learning_rate": 4.322437147413321e-06, "loss": 0.303, "step": 11220 }, { "epoch": 0.5256476319857591, "grad_norm": 0.6443247206071037, "learning_rate": 4.322307318967268e-06, "loss": 0.305, "step": 11221 }, { "epoch": 0.5256944769756875, "grad_norm": 0.6301589678890631, "learning_rate": 4.322177480034293e-06, "loss": 0.3122, "step": 11222 }, { "epoch": 0.5257413219656157, "grad_norm": 0.5809818536070755, "learning_rate": 4.322047630615146e-06, "loss": 0.2925, "step": 11223 }, { "epoch": 0.5257881669555441, "grad_norm": 0.5905678573177042, "learning_rate": 4.321917770710572e-06, "loss": 0.2983, "step": 11224 }, { "epoch": 0.5258350119454724, "grad_norm": 0.552183848470397, "learning_rate": 4.321787900321319e-06, "loss": 0.2857, "step": 11225 }, { "epoch": 0.5258818569354008, "grad_norm": 0.6355753007616421, "learning_rate": 4.321658019448135e-06, "loss": 0.2899, "step": 11226 }, { "epoch": 0.525928701925329, "grad_norm": 0.6040471430997754, "learning_rate": 4.321528128091768e-06, "loss": 0.2962, "step": 11227 }, { "epoch": 0.5259755469152574, "grad_norm": 0.5879236961651527, "learning_rate": 4.321398226252963e-06, "loss": 0.2816, "step": 11228 }, { "epoch": 0.5260223919051857, "grad_norm": 0.5960948399304394, "learning_rate": 4.321268313932469e-06, "loss": 0.2676, "step": 11229 }, { "epoch": 0.5260692368951141, "grad_norm": 0.6201553339298324, "learning_rate": 4.3211383911310335e-06, "loss": 0.2687, "step": 11230 }, { "epoch": 0.5261160818850424, "grad_norm": 0.5594483864799087, "learning_rate": 4.321008457849405e-06, "loss": 0.2685, "step": 11231 }, { "epoch": 0.5261629268749707, "grad_norm": 0.6383183110138034, "learning_rate": 4.3208785140883305e-06, "loss": 0.2985, "step": 11232 }, { "epoch": 0.526209771864899, "grad_norm": 0.5857403563191396, "learning_rate": 4.320748559848558e-06, "loss": 0.2759, "step": 11233 }, { "epoch": 0.5262566168548274, "grad_norm": 0.5675172091341794, "learning_rate": 4.320618595130834e-06, "loss": 0.2752, "step": 11234 }, { "epoch": 0.5263034618447557, "grad_norm": 0.6036301509435799, "learning_rate": 4.320488619935909e-06, "loss": 0.2898, "step": 11235 }, { "epoch": 0.526350306834684, "grad_norm": 0.6216328004540036, "learning_rate": 4.320358634264527e-06, "loss": 0.3174, "step": 11236 }, { "epoch": 0.5263971518246123, "grad_norm": 0.5869446151371548, "learning_rate": 4.320228638117442e-06, "loss": 0.2699, "step": 11237 }, { "epoch": 0.5264439968145407, "grad_norm": 0.6147176771643923, "learning_rate": 4.320098631495397e-06, "loss": 0.2904, "step": 11238 }, { "epoch": 0.526490841804469, "grad_norm": 0.6007825143172656, "learning_rate": 4.319968614399141e-06, "loss": 0.2782, "step": 11239 }, { "epoch": 0.5265376867943974, "grad_norm": 0.6142162129144018, "learning_rate": 4.3198385868294245e-06, "loss": 0.2869, "step": 11240 }, { "epoch": 0.5265845317843256, "grad_norm": 0.5920078283446599, "learning_rate": 4.319708548786994e-06, "loss": 0.2793, "step": 11241 }, { "epoch": 0.526631376774254, "grad_norm": 0.631644813626642, "learning_rate": 4.319578500272598e-06, "loss": 0.297, "step": 11242 }, { "epoch": 0.5266782217641823, "grad_norm": 0.5863564964508207, "learning_rate": 4.3194484412869845e-06, "loss": 0.2681, "step": 11243 }, { "epoch": 0.5267250667541107, "grad_norm": 0.5881293989218185, "learning_rate": 4.319318371830903e-06, "loss": 0.2806, "step": 11244 }, { "epoch": 0.5267719117440389, "grad_norm": 0.6334908918928498, "learning_rate": 4.319188291905102e-06, "loss": 0.301, "step": 11245 }, { "epoch": 0.5268187567339673, "grad_norm": 0.5798310257190257, "learning_rate": 4.319058201510329e-06, "loss": 0.278, "step": 11246 }, { "epoch": 0.5268656017238956, "grad_norm": 0.6168057187331119, "learning_rate": 4.318928100647334e-06, "loss": 0.3083, "step": 11247 }, { "epoch": 0.526912446713824, "grad_norm": 0.6030897978828261, "learning_rate": 4.318797989316864e-06, "loss": 0.3022, "step": 11248 }, { "epoch": 0.5269592917037523, "grad_norm": 0.5954481073038266, "learning_rate": 4.318667867519669e-06, "loss": 0.2607, "step": 11249 }, { "epoch": 0.5270061366936806, "grad_norm": 0.6878411164337394, "learning_rate": 4.318537735256498e-06, "loss": 0.305, "step": 11250 }, { "epoch": 0.5270529816836089, "grad_norm": 0.5799120147129967, "learning_rate": 4.318407592528099e-06, "loss": 0.2881, "step": 11251 }, { "epoch": 0.5270998266735373, "grad_norm": 0.57759969070608, "learning_rate": 4.3182774393352216e-06, "loss": 0.288, "step": 11252 }, { "epoch": 0.5271466716634656, "grad_norm": 0.6182600571846496, "learning_rate": 4.318147275678615e-06, "loss": 0.2957, "step": 11253 }, { "epoch": 0.5271935166533939, "grad_norm": 0.5664742971585459, "learning_rate": 4.318017101559027e-06, "loss": 0.291, "step": 11254 }, { "epoch": 0.5272403616433222, "grad_norm": 0.6149664202969206, "learning_rate": 4.317886916977208e-06, "loss": 0.2702, "step": 11255 }, { "epoch": 0.5272872066332506, "grad_norm": 0.5798044227280318, "learning_rate": 4.317756721933906e-06, "loss": 0.2899, "step": 11256 }, { "epoch": 0.5273340516231789, "grad_norm": 0.5809952440872196, "learning_rate": 4.317626516429871e-06, "loss": 0.2753, "step": 11257 }, { "epoch": 0.5273808966131073, "grad_norm": 0.6605740348512601, "learning_rate": 4.317496300465852e-06, "loss": 0.3084, "step": 11258 }, { "epoch": 0.5274277416030355, "grad_norm": 0.5905848890735769, "learning_rate": 4.3173660740426e-06, "loss": 0.286, "step": 11259 }, { "epoch": 0.5274745865929639, "grad_norm": 0.6168534964938415, "learning_rate": 4.317235837160861e-06, "loss": 0.2679, "step": 11260 }, { "epoch": 0.5275214315828922, "grad_norm": 0.5833688190595254, "learning_rate": 4.317105589821389e-06, "loss": 0.2857, "step": 11261 }, { "epoch": 0.5275682765728206, "grad_norm": 0.5750734849596388, "learning_rate": 4.316975332024929e-06, "loss": 0.2975, "step": 11262 }, { "epoch": 0.5276151215627488, "grad_norm": 0.5863298911873385, "learning_rate": 4.316845063772234e-06, "loss": 0.2799, "step": 11263 }, { "epoch": 0.5276619665526772, "grad_norm": 0.5739099167250009, "learning_rate": 4.316714785064051e-06, "loss": 0.2825, "step": 11264 }, { "epoch": 0.5277088115426055, "grad_norm": 0.6024671289068025, "learning_rate": 4.316584495901132e-06, "loss": 0.2952, "step": 11265 }, { "epoch": 0.5277556565325339, "grad_norm": 0.6502946608526564, "learning_rate": 4.316454196284226e-06, "loss": 0.3096, "step": 11266 }, { "epoch": 0.5278025015224622, "grad_norm": 0.5932284549092569, "learning_rate": 4.316323886214082e-06, "loss": 0.2903, "step": 11267 }, { "epoch": 0.5278493465123905, "grad_norm": 0.6058733365878423, "learning_rate": 4.3161935656914505e-06, "loss": 0.2969, "step": 11268 }, { "epoch": 0.5278961915023188, "grad_norm": 0.5693324184715548, "learning_rate": 4.3160632347170815e-06, "loss": 0.276, "step": 11269 }, { "epoch": 0.5279430364922472, "grad_norm": 0.5863394062353212, "learning_rate": 4.315932893291725e-06, "loss": 0.2758, "step": 11270 }, { "epoch": 0.5279898814821755, "grad_norm": 0.5796132386413186, "learning_rate": 4.315802541416132e-06, "loss": 0.287, "step": 11271 }, { "epoch": 0.5280367264721038, "grad_norm": 0.59047276855687, "learning_rate": 4.315672179091051e-06, "loss": 0.3062, "step": 11272 }, { "epoch": 0.5280835714620321, "grad_norm": 0.5561638733427162, "learning_rate": 4.315541806317233e-06, "loss": 0.2733, "step": 11273 }, { "epoch": 0.5281304164519605, "grad_norm": 0.5674715611833585, "learning_rate": 4.315411423095428e-06, "loss": 0.282, "step": 11274 }, { "epoch": 0.5281772614418888, "grad_norm": 0.6164861356255212, "learning_rate": 4.315281029426388e-06, "loss": 0.2909, "step": 11275 }, { "epoch": 0.5282241064318172, "grad_norm": 0.5950736059873349, "learning_rate": 4.31515062531086e-06, "loss": 0.2708, "step": 11276 }, { "epoch": 0.5282709514217454, "grad_norm": 0.630554853225131, "learning_rate": 4.3150202107495975e-06, "loss": 0.3097, "step": 11277 }, { "epoch": 0.5283177964116738, "grad_norm": 0.6079355272073889, "learning_rate": 4.314889785743349e-06, "loss": 0.2692, "step": 11278 }, { "epoch": 0.5283646414016021, "grad_norm": 0.595154144836531, "learning_rate": 4.314759350292867e-06, "loss": 0.2831, "step": 11279 }, { "epoch": 0.5284114863915305, "grad_norm": 0.5678957248874362, "learning_rate": 4.3146289043989e-06, "loss": 0.2952, "step": 11280 }, { "epoch": 0.5284583313814587, "grad_norm": 0.5519357331360858, "learning_rate": 4.314498448062201e-06, "loss": 0.2664, "step": 11281 }, { "epoch": 0.528505176371387, "grad_norm": 0.5926193374083015, "learning_rate": 4.314367981283518e-06, "loss": 0.2945, "step": 11282 }, { "epoch": 0.5285520213613154, "grad_norm": 0.591284237567691, "learning_rate": 4.314237504063604e-06, "loss": 0.2841, "step": 11283 }, { "epoch": 0.5285988663512438, "grad_norm": 0.6374228409006919, "learning_rate": 4.31410701640321e-06, "loss": 0.2905, "step": 11284 }, { "epoch": 0.5286457113411721, "grad_norm": 0.5871945538309367, "learning_rate": 4.313976518303085e-06, "loss": 0.299, "step": 11285 }, { "epoch": 0.5286925563311003, "grad_norm": 0.598894796201752, "learning_rate": 4.313846009763981e-06, "loss": 0.2783, "step": 11286 }, { "epoch": 0.5287394013210287, "grad_norm": 0.5834824504076689, "learning_rate": 4.3137154907866505e-06, "loss": 0.2841, "step": 11287 }, { "epoch": 0.528786246310957, "grad_norm": 0.5273800142393499, "learning_rate": 4.313584961371842e-06, "loss": 0.2706, "step": 11288 }, { "epoch": 0.5288330913008854, "grad_norm": 0.6813690460009445, "learning_rate": 4.313454421520308e-06, "loss": 0.3243, "step": 11289 }, { "epoch": 0.5288799362908136, "grad_norm": 0.5679117948897489, "learning_rate": 4.3133238712328e-06, "loss": 0.3041, "step": 11290 }, { "epoch": 0.528926781280742, "grad_norm": 0.5991442388206435, "learning_rate": 4.313193310510068e-06, "loss": 0.3045, "step": 11291 }, { "epoch": 0.5289736262706703, "grad_norm": 0.597682301375767, "learning_rate": 4.3130627393528645e-06, "loss": 0.2885, "step": 11292 }, { "epoch": 0.5290204712605987, "grad_norm": 0.681208511137408, "learning_rate": 4.312932157761941e-06, "loss": 0.284, "step": 11293 }, { "epoch": 0.529067316250527, "grad_norm": 0.6559497544659332, "learning_rate": 4.312801565738049e-06, "loss": 0.3062, "step": 11294 }, { "epoch": 0.5291141612404553, "grad_norm": 0.6173524428851636, "learning_rate": 4.312670963281938e-06, "loss": 0.2968, "step": 11295 }, { "epoch": 0.5291610062303836, "grad_norm": 0.6421693507982457, "learning_rate": 4.312540350394363e-06, "loss": 0.3255, "step": 11296 }, { "epoch": 0.529207851220312, "grad_norm": 0.5859463821578861, "learning_rate": 4.3124097270760735e-06, "loss": 0.2735, "step": 11297 }, { "epoch": 0.5292546962102403, "grad_norm": 0.6294564699898239, "learning_rate": 4.312279093327822e-06, "loss": 0.2977, "step": 11298 }, { "epoch": 0.5293015412001686, "grad_norm": 0.5152388770414839, "learning_rate": 4.312148449150358e-06, "loss": 0.2449, "step": 11299 }, { "epoch": 0.5293483861900969, "grad_norm": 0.544923304051618, "learning_rate": 4.312017794544437e-06, "loss": 0.2632, "step": 11300 }, { "epoch": 0.5293952311800253, "grad_norm": 0.5929483532282955, "learning_rate": 4.311887129510808e-06, "loss": 0.273, "step": 11301 }, { "epoch": 0.5294420761699536, "grad_norm": 0.5769037464221285, "learning_rate": 4.311756454050224e-06, "loss": 0.2851, "step": 11302 }, { "epoch": 0.529488921159882, "grad_norm": 0.6265418888505405, "learning_rate": 4.3116257681634365e-06, "loss": 0.3065, "step": 11303 }, { "epoch": 0.5295357661498102, "grad_norm": 0.5616621223125908, "learning_rate": 4.311495071851199e-06, "loss": 0.2754, "step": 11304 }, { "epoch": 0.5295826111397386, "grad_norm": 0.5452020167515149, "learning_rate": 4.311364365114262e-06, "loss": 0.29, "step": 11305 }, { "epoch": 0.5296294561296669, "grad_norm": 0.6489677081350045, "learning_rate": 4.311233647953379e-06, "loss": 0.2887, "step": 11306 }, { "epoch": 0.5296763011195953, "grad_norm": 0.6286663034973345, "learning_rate": 4.311102920369301e-06, "loss": 0.2895, "step": 11307 }, { "epoch": 0.5297231461095235, "grad_norm": 0.5997464492256149, "learning_rate": 4.310972182362782e-06, "loss": 0.2732, "step": 11308 }, { "epoch": 0.5297699910994519, "grad_norm": 0.590705892782721, "learning_rate": 4.310841433934572e-06, "loss": 0.2807, "step": 11309 }, { "epoch": 0.5298168360893802, "grad_norm": 0.6138295205364634, "learning_rate": 4.310710675085426e-06, "loss": 0.2987, "step": 11310 }, { "epoch": 0.5298636810793086, "grad_norm": 0.651364695086753, "learning_rate": 4.3105799058160935e-06, "loss": 0.2844, "step": 11311 }, { "epoch": 0.5299105260692369, "grad_norm": 0.6698509901924303, "learning_rate": 4.310449126127329e-06, "loss": 0.2973, "step": 11312 }, { "epoch": 0.5299573710591652, "grad_norm": 0.6399057561211476, "learning_rate": 4.310318336019886e-06, "loss": 0.2929, "step": 11313 }, { "epoch": 0.5300042160490935, "grad_norm": 0.6529761209348478, "learning_rate": 4.310187535494516e-06, "loss": 0.3297, "step": 11314 }, { "epoch": 0.5300510610390219, "grad_norm": 0.617429270526789, "learning_rate": 4.3100567245519705e-06, "loss": 0.2932, "step": 11315 }, { "epoch": 0.5300979060289502, "grad_norm": 0.6242095200479084, "learning_rate": 4.309925903193004e-06, "loss": 0.3067, "step": 11316 }, { "epoch": 0.5301447510188785, "grad_norm": 0.6125739640662836, "learning_rate": 4.309795071418369e-06, "loss": 0.3273, "step": 11317 }, { "epoch": 0.5301915960088068, "grad_norm": 0.5629428787014248, "learning_rate": 4.3096642292288185e-06, "loss": 0.2637, "step": 11318 }, { "epoch": 0.5302384409987352, "grad_norm": 0.6172769463068702, "learning_rate": 4.309533376625104e-06, "loss": 0.2803, "step": 11319 }, { "epoch": 0.5302852859886635, "grad_norm": 0.5969420061384545, "learning_rate": 4.309402513607981e-06, "loss": 0.2748, "step": 11320 }, { "epoch": 0.5303321309785919, "grad_norm": 0.5508704447143699, "learning_rate": 4.3092716401782005e-06, "loss": 0.2704, "step": 11321 }, { "epoch": 0.5303789759685201, "grad_norm": 0.5886030200484023, "learning_rate": 4.309140756336518e-06, "loss": 0.2703, "step": 11322 }, { "epoch": 0.5304258209584485, "grad_norm": 0.6064156060448119, "learning_rate": 4.309009862083683e-06, "loss": 0.2899, "step": 11323 }, { "epoch": 0.5304726659483768, "grad_norm": 0.6384749924393588, "learning_rate": 4.308878957420451e-06, "loss": 0.2819, "step": 11324 }, { "epoch": 0.5305195109383052, "grad_norm": 0.5842682405137487, "learning_rate": 4.308748042347577e-06, "loss": 0.2753, "step": 11325 }, { "epoch": 0.5305663559282334, "grad_norm": 0.5809090353012908, "learning_rate": 4.308617116865812e-06, "loss": 0.2847, "step": 11326 }, { "epoch": 0.5306132009181618, "grad_norm": 0.6387561197445051, "learning_rate": 4.30848618097591e-06, "loss": 0.3133, "step": 11327 }, { "epoch": 0.5306600459080901, "grad_norm": 0.5906275526272372, "learning_rate": 4.308355234678624e-06, "loss": 0.2677, "step": 11328 }, { "epoch": 0.5307068908980185, "grad_norm": 0.598615985664105, "learning_rate": 4.308224277974708e-06, "loss": 0.2854, "step": 11329 }, { "epoch": 0.5307537358879468, "grad_norm": 0.557484838110234, "learning_rate": 4.308093310864917e-06, "loss": 0.2742, "step": 11330 }, { "epoch": 0.5308005808778751, "grad_norm": 0.6424123296001039, "learning_rate": 4.307962333350002e-06, "loss": 0.2794, "step": 11331 }, { "epoch": 0.5308474258678034, "grad_norm": 0.5802314721915315, "learning_rate": 4.307831345430719e-06, "loss": 0.284, "step": 11332 }, { "epoch": 0.5308942708577318, "grad_norm": 0.5907501667427894, "learning_rate": 4.307700347107821e-06, "loss": 0.2879, "step": 11333 }, { "epoch": 0.5309411158476601, "grad_norm": 0.6365903717184603, "learning_rate": 4.307569338382061e-06, "loss": 0.3036, "step": 11334 }, { "epoch": 0.5309879608375884, "grad_norm": 0.5895898880103324, "learning_rate": 4.307438319254196e-06, "loss": 0.2851, "step": 11335 }, { "epoch": 0.5310348058275167, "grad_norm": 0.545559824246059, "learning_rate": 4.307307289724975e-06, "loss": 0.2673, "step": 11336 }, { "epoch": 0.5310816508174451, "grad_norm": 0.5711252520976919, "learning_rate": 4.307176249795157e-06, "loss": 0.2937, "step": 11337 }, { "epoch": 0.5311284958073734, "grad_norm": 0.5825139148466763, "learning_rate": 4.307045199465493e-06, "loss": 0.2854, "step": 11338 }, { "epoch": 0.5311753407973018, "grad_norm": 0.6422298178226802, "learning_rate": 4.306914138736738e-06, "loss": 0.311, "step": 11339 }, { "epoch": 0.53122218578723, "grad_norm": 0.5941619283233918, "learning_rate": 4.3067830676096465e-06, "loss": 0.3009, "step": 11340 }, { "epoch": 0.5312690307771584, "grad_norm": 0.6119972817080275, "learning_rate": 4.306651986084972e-06, "loss": 0.3009, "step": 11341 }, { "epoch": 0.5313158757670867, "grad_norm": 0.5952035176236423, "learning_rate": 4.30652089416347e-06, "loss": 0.2796, "step": 11342 }, { "epoch": 0.5313627207570151, "grad_norm": 0.5942250182666166, "learning_rate": 4.306389791845894e-06, "loss": 0.2807, "step": 11343 }, { "epoch": 0.5314095657469433, "grad_norm": 0.6226474085791369, "learning_rate": 4.306258679132999e-06, "loss": 0.3149, "step": 11344 }, { "epoch": 0.5314564107368717, "grad_norm": 0.6313319787149545, "learning_rate": 4.306127556025539e-06, "loss": 0.3284, "step": 11345 }, { "epoch": 0.5315032557268, "grad_norm": 0.6048092066473686, "learning_rate": 4.305996422524269e-06, "loss": 0.2934, "step": 11346 }, { "epoch": 0.5315501007167284, "grad_norm": 0.5794844100581438, "learning_rate": 4.305865278629944e-06, "loss": 0.2873, "step": 11347 }, { "epoch": 0.5315969457066567, "grad_norm": 0.5960257328663404, "learning_rate": 4.305734124343317e-06, "loss": 0.2882, "step": 11348 }, { "epoch": 0.531643790696585, "grad_norm": 0.647168890944848, "learning_rate": 4.305602959665145e-06, "loss": 0.2977, "step": 11349 }, { "epoch": 0.5316906356865133, "grad_norm": 0.5960008705362227, "learning_rate": 4.3054717845961814e-06, "loss": 0.2945, "step": 11350 }, { "epoch": 0.5317374806764417, "grad_norm": 0.5955119136425254, "learning_rate": 4.305340599137183e-06, "loss": 0.2996, "step": 11351 }, { "epoch": 0.53178432566637, "grad_norm": 0.6328018669373507, "learning_rate": 4.305209403288901e-06, "loss": 0.2926, "step": 11352 }, { "epoch": 0.5318311706562983, "grad_norm": 0.579061497139973, "learning_rate": 4.305078197052093e-06, "loss": 0.2854, "step": 11353 }, { "epoch": 0.5318780156462266, "grad_norm": 0.6414871302962672, "learning_rate": 4.304946980427514e-06, "loss": 0.2941, "step": 11354 }, { "epoch": 0.531924860636155, "grad_norm": 0.5841405817926082, "learning_rate": 4.304815753415918e-06, "loss": 0.299, "step": 11355 }, { "epoch": 0.5319717056260833, "grad_norm": 0.5717091134018313, "learning_rate": 4.304684516018063e-06, "loss": 0.2649, "step": 11356 }, { "epoch": 0.5320185506160117, "grad_norm": 0.6679978284192308, "learning_rate": 4.304553268234701e-06, "loss": 0.2904, "step": 11357 }, { "epoch": 0.5320653956059399, "grad_norm": 0.6165100263585362, "learning_rate": 4.304422010066588e-06, "loss": 0.2834, "step": 11358 }, { "epoch": 0.5321122405958683, "grad_norm": 0.6390544283089918, "learning_rate": 4.304290741514481e-06, "loss": 0.2868, "step": 11359 }, { "epoch": 0.5321590855857966, "grad_norm": 0.5575798229983181, "learning_rate": 4.3041594625791324e-06, "loss": 0.2789, "step": 11360 }, { "epoch": 0.532205930575725, "grad_norm": 0.5619187225031966, "learning_rate": 4.3040281732613014e-06, "loss": 0.2704, "step": 11361 }, { "epoch": 0.5322527755656532, "grad_norm": 0.6686157859514064, "learning_rate": 4.303896873561742e-06, "loss": 0.3192, "step": 11362 }, { "epoch": 0.5322996205555816, "grad_norm": 0.5738412651372552, "learning_rate": 4.303765563481208e-06, "loss": 0.2714, "step": 11363 }, { "epoch": 0.5323464655455099, "grad_norm": 0.6262998120145054, "learning_rate": 4.303634243020457e-06, "loss": 0.2751, "step": 11364 }, { "epoch": 0.5323933105354383, "grad_norm": 0.6471801620716114, "learning_rate": 4.303502912180244e-06, "loss": 0.2997, "step": 11365 }, { "epoch": 0.5324401555253666, "grad_norm": 0.5369364033447693, "learning_rate": 4.303371570961326e-06, "loss": 0.2615, "step": 11366 }, { "epoch": 0.5324870005152949, "grad_norm": 0.6262140235778382, "learning_rate": 4.303240219364457e-06, "loss": 0.3104, "step": 11367 }, { "epoch": 0.5325338455052232, "grad_norm": 0.6349285573170685, "learning_rate": 4.303108857390394e-06, "loss": 0.2793, "step": 11368 }, { "epoch": 0.5325806904951516, "grad_norm": 0.5746929341024775, "learning_rate": 4.302977485039892e-06, "loss": 0.2753, "step": 11369 }, { "epoch": 0.5326275354850799, "grad_norm": 0.6054583322694195, "learning_rate": 4.302846102313709e-06, "loss": 0.2875, "step": 11370 }, { "epoch": 0.5326743804750081, "grad_norm": 0.586565608167303, "learning_rate": 4.3027147092125996e-06, "loss": 0.2754, "step": 11371 }, { "epoch": 0.5327212254649365, "grad_norm": 0.6174172461843961, "learning_rate": 4.302583305737319e-06, "loss": 0.2781, "step": 11372 }, { "epoch": 0.5327680704548649, "grad_norm": 0.6080426490652256, "learning_rate": 4.302451891888626e-06, "loss": 0.2786, "step": 11373 }, { "epoch": 0.5328149154447932, "grad_norm": 0.5838824203510917, "learning_rate": 4.302320467667275e-06, "loss": 0.2708, "step": 11374 }, { "epoch": 0.5328617604347216, "grad_norm": 0.6071208466532764, "learning_rate": 4.302189033074023e-06, "loss": 0.2641, "step": 11375 }, { "epoch": 0.5329086054246498, "grad_norm": 0.5756736186899585, "learning_rate": 4.302057588109625e-06, "loss": 0.2911, "step": 11376 }, { "epoch": 0.5329554504145781, "grad_norm": 0.6064188407641148, "learning_rate": 4.301926132774838e-06, "loss": 0.2807, "step": 11377 }, { "epoch": 0.5330022954045065, "grad_norm": 0.6035302053756196, "learning_rate": 4.301794667070421e-06, "loss": 0.2818, "step": 11378 }, { "epoch": 0.5330491403944349, "grad_norm": 0.5979021196651829, "learning_rate": 4.3016631909971275e-06, "loss": 0.2939, "step": 11379 }, { "epoch": 0.5330959853843631, "grad_norm": 0.6358903423730443, "learning_rate": 4.3015317045557155e-06, "loss": 0.284, "step": 11380 }, { "epoch": 0.5331428303742914, "grad_norm": 0.5946920293253163, "learning_rate": 4.30140020774694e-06, "loss": 0.3123, "step": 11381 }, { "epoch": 0.5331896753642198, "grad_norm": 0.6693066333608908, "learning_rate": 4.301268700571561e-06, "loss": 0.2853, "step": 11382 }, { "epoch": 0.5332365203541481, "grad_norm": 0.6147190590456739, "learning_rate": 4.301137183030333e-06, "loss": 0.2943, "step": 11383 }, { "epoch": 0.5332833653440765, "grad_norm": 0.6255615472506166, "learning_rate": 4.301005655124012e-06, "loss": 0.2932, "step": 11384 }, { "epoch": 0.5333302103340047, "grad_norm": 0.5203075726302592, "learning_rate": 4.300874116853357e-06, "loss": 0.2727, "step": 11385 }, { "epoch": 0.5333770553239331, "grad_norm": 0.5917447285733448, "learning_rate": 4.300742568219124e-06, "loss": 0.2915, "step": 11386 }, { "epoch": 0.5334239003138614, "grad_norm": 0.5896488406822133, "learning_rate": 4.30061100922207e-06, "loss": 0.2872, "step": 11387 }, { "epoch": 0.5334707453037898, "grad_norm": 0.5963711262638348, "learning_rate": 4.300479439862952e-06, "loss": 0.2769, "step": 11388 }, { "epoch": 0.533517590293718, "grad_norm": 0.6094926614787939, "learning_rate": 4.300347860142528e-06, "loss": 0.2948, "step": 11389 }, { "epoch": 0.5335644352836464, "grad_norm": 0.6528320373098491, "learning_rate": 4.300216270061555e-06, "loss": 0.3162, "step": 11390 }, { "epoch": 0.5336112802735747, "grad_norm": 0.642943681659117, "learning_rate": 4.300084669620789e-06, "loss": 0.2954, "step": 11391 }, { "epoch": 0.5336581252635031, "grad_norm": 0.5808185198922908, "learning_rate": 4.299953058820988e-06, "loss": 0.263, "step": 11392 }, { "epoch": 0.5337049702534314, "grad_norm": 0.615431619049176, "learning_rate": 4.29982143766291e-06, "loss": 0.2939, "step": 11393 }, { "epoch": 0.5337518152433597, "grad_norm": 0.5927510188542722, "learning_rate": 4.299689806147311e-06, "loss": 0.2672, "step": 11394 }, { "epoch": 0.533798660233288, "grad_norm": 0.664898973359716, "learning_rate": 4.299558164274951e-06, "loss": 0.3019, "step": 11395 }, { "epoch": 0.5338455052232164, "grad_norm": 0.5527678024915794, "learning_rate": 4.299426512046585e-06, "loss": 0.2734, "step": 11396 }, { "epoch": 0.5338923502131447, "grad_norm": 0.5754837461450508, "learning_rate": 4.299294849462971e-06, "loss": 0.316, "step": 11397 }, { "epoch": 0.533939195203073, "grad_norm": 0.5991640153352094, "learning_rate": 4.299163176524869e-06, "loss": 0.3017, "step": 11398 }, { "epoch": 0.5339860401930013, "grad_norm": 0.5812850698900138, "learning_rate": 4.2990314932330345e-06, "loss": 0.3057, "step": 11399 }, { "epoch": 0.5340328851829297, "grad_norm": 0.6103429303564384, "learning_rate": 4.298899799588226e-06, "loss": 0.2992, "step": 11400 }, { "epoch": 0.534079730172858, "grad_norm": 0.590840119339859, "learning_rate": 4.298768095591201e-06, "loss": 0.2815, "step": 11401 }, { "epoch": 0.5341265751627864, "grad_norm": 0.6314740868810684, "learning_rate": 4.298636381242718e-06, "loss": 0.2792, "step": 11402 }, { "epoch": 0.5341734201527146, "grad_norm": 0.6311704389315523, "learning_rate": 4.2985046565435355e-06, "loss": 0.2732, "step": 11403 }, { "epoch": 0.534220265142643, "grad_norm": 0.6081662694162839, "learning_rate": 4.2983729214944095e-06, "loss": 0.2876, "step": 11404 }, { "epoch": 0.5342671101325713, "grad_norm": 0.6277221496897568, "learning_rate": 4.298241176096099e-06, "loss": 0.2982, "step": 11405 }, { "epoch": 0.5343139551224997, "grad_norm": 0.6026631451208251, "learning_rate": 4.298109420349363e-06, "loss": 0.3, "step": 11406 }, { "epoch": 0.5343608001124279, "grad_norm": 0.5683708294884219, "learning_rate": 4.29797765425496e-06, "loss": 0.2865, "step": 11407 }, { "epoch": 0.5344076451023563, "grad_norm": 0.5905948486901923, "learning_rate": 4.297845877813646e-06, "loss": 0.2985, "step": 11408 }, { "epoch": 0.5344544900922846, "grad_norm": 0.5762373688665717, "learning_rate": 4.2977140910261826e-06, "loss": 0.2745, "step": 11409 }, { "epoch": 0.534501335082213, "grad_norm": 0.5722237515607155, "learning_rate": 4.297582293893326e-06, "loss": 0.2756, "step": 11410 }, { "epoch": 0.5345481800721413, "grad_norm": 0.6526841617303434, "learning_rate": 4.297450486415834e-06, "loss": 0.2967, "step": 11411 }, { "epoch": 0.5345950250620696, "grad_norm": 0.5427804162082527, "learning_rate": 4.297318668594468e-06, "loss": 0.2949, "step": 11412 }, { "epoch": 0.5346418700519979, "grad_norm": 0.5722303990959109, "learning_rate": 4.297186840429983e-06, "loss": 0.2919, "step": 11413 }, { "epoch": 0.5346887150419263, "grad_norm": 0.5773523162133947, "learning_rate": 4.29705500192314e-06, "loss": 0.285, "step": 11414 }, { "epoch": 0.5347355600318546, "grad_norm": 0.6070491529117119, "learning_rate": 4.296923153074698e-06, "loss": 0.302, "step": 11415 }, { "epoch": 0.5347824050217829, "grad_norm": 0.5588203498181832, "learning_rate": 4.296791293885414e-06, "loss": 0.2784, "step": 11416 }, { "epoch": 0.5348292500117112, "grad_norm": 0.5839921050221059, "learning_rate": 4.296659424356048e-06, "loss": 0.2854, "step": 11417 }, { "epoch": 0.5348760950016396, "grad_norm": 0.6168012311534857, "learning_rate": 4.2965275444873584e-06, "loss": 0.2863, "step": 11418 }, { "epoch": 0.5349229399915679, "grad_norm": 0.5801850543649936, "learning_rate": 4.296395654280104e-06, "loss": 0.2793, "step": 11419 }, { "epoch": 0.5349697849814963, "grad_norm": 0.6182499600707058, "learning_rate": 4.296263753735045e-06, "loss": 0.2718, "step": 11420 }, { "epoch": 0.5350166299714245, "grad_norm": 0.6243879446811565, "learning_rate": 4.296131842852939e-06, "loss": 0.2909, "step": 11421 }, { "epoch": 0.5350634749613529, "grad_norm": 0.5757987869142331, "learning_rate": 4.295999921634546e-06, "loss": 0.2801, "step": 11422 }, { "epoch": 0.5351103199512812, "grad_norm": 0.5692357928830372, "learning_rate": 4.295867990080625e-06, "loss": 0.2939, "step": 11423 }, { "epoch": 0.5351571649412096, "grad_norm": 0.5674700297555985, "learning_rate": 4.295736048191935e-06, "loss": 0.285, "step": 11424 }, { "epoch": 0.5352040099311378, "grad_norm": 0.591316285463412, "learning_rate": 4.2956040959692345e-06, "loss": 0.2883, "step": 11425 }, { "epoch": 0.5352508549210662, "grad_norm": 0.6454652648590282, "learning_rate": 4.2954721334132846e-06, "loss": 0.3077, "step": 11426 }, { "epoch": 0.5352976999109945, "grad_norm": 0.5417235844340065, "learning_rate": 4.295340160524844e-06, "loss": 0.2654, "step": 11427 }, { "epoch": 0.5353445449009229, "grad_norm": 0.5915079356474181, "learning_rate": 4.295208177304671e-06, "loss": 0.3001, "step": 11428 }, { "epoch": 0.5353913898908512, "grad_norm": 0.6403852993567286, "learning_rate": 4.295076183753527e-06, "loss": 0.3031, "step": 11429 }, { "epoch": 0.5354382348807795, "grad_norm": 0.6552660025924434, "learning_rate": 4.29494417987217e-06, "loss": 0.3022, "step": 11430 }, { "epoch": 0.5354850798707078, "grad_norm": 0.5988164511890456, "learning_rate": 4.2948121656613616e-06, "loss": 0.2849, "step": 11431 }, { "epoch": 0.5355319248606362, "grad_norm": 0.6161937299309677, "learning_rate": 4.29468014112186e-06, "loss": 0.2871, "step": 11432 }, { "epoch": 0.5355787698505645, "grad_norm": 0.5807248283113529, "learning_rate": 4.294548106254425e-06, "loss": 0.2719, "step": 11433 }, { "epoch": 0.5356256148404928, "grad_norm": 0.5693811701152013, "learning_rate": 4.294416061059816e-06, "loss": 0.2705, "step": 11434 }, { "epoch": 0.5356724598304211, "grad_norm": 0.556881839419766, "learning_rate": 4.294284005538794e-06, "loss": 0.2556, "step": 11435 }, { "epoch": 0.5357193048203495, "grad_norm": 0.6079962257934488, "learning_rate": 4.294151939692119e-06, "loss": 0.3036, "step": 11436 }, { "epoch": 0.5357661498102778, "grad_norm": 0.5723042194991153, "learning_rate": 4.29401986352055e-06, "loss": 0.259, "step": 11437 }, { "epoch": 0.5358129948002062, "grad_norm": 0.6024039693985693, "learning_rate": 4.293887777024847e-06, "loss": 0.2766, "step": 11438 }, { "epoch": 0.5358598397901344, "grad_norm": 0.6769600845545584, "learning_rate": 4.293755680205772e-06, "loss": 0.2815, "step": 11439 }, { "epoch": 0.5359066847800628, "grad_norm": 0.5739217586999655, "learning_rate": 4.293623573064084e-06, "loss": 0.2958, "step": 11440 }, { "epoch": 0.5359535297699911, "grad_norm": 0.5631734412148228, "learning_rate": 4.293491455600543e-06, "loss": 0.2926, "step": 11441 }, { "epoch": 0.5360003747599195, "grad_norm": 0.606045062940084, "learning_rate": 4.2933593278159085e-06, "loss": 0.2996, "step": 11442 }, { "epoch": 0.5360472197498477, "grad_norm": 0.6007225520432381, "learning_rate": 4.293227189710942e-06, "loss": 0.2615, "step": 11443 }, { "epoch": 0.536094064739776, "grad_norm": 0.6133004338370007, "learning_rate": 4.2930950412864044e-06, "loss": 0.3138, "step": 11444 }, { "epoch": 0.5361409097297044, "grad_norm": 0.6033737614202144, "learning_rate": 4.292962882543055e-06, "loss": 0.3063, "step": 11445 }, { "epoch": 0.5361877547196328, "grad_norm": 0.5092415196230977, "learning_rate": 4.292830713481655e-06, "loss": 0.2798, "step": 11446 }, { "epoch": 0.5362345997095611, "grad_norm": 0.5510971936804995, "learning_rate": 4.292698534102965e-06, "loss": 0.2896, "step": 11447 }, { "epoch": 0.5362814446994894, "grad_norm": 0.594340268868751, "learning_rate": 4.292566344407746e-06, "loss": 0.3069, "step": 11448 }, { "epoch": 0.5363282896894177, "grad_norm": 0.5757408765324636, "learning_rate": 4.292434144396757e-06, "loss": 0.3017, "step": 11449 }, { "epoch": 0.536375134679346, "grad_norm": 0.6146379764977888, "learning_rate": 4.29230193407076e-06, "loss": 0.2786, "step": 11450 }, { "epoch": 0.5364219796692744, "grad_norm": 0.546200769247726, "learning_rate": 4.292169713430517e-06, "loss": 0.261, "step": 11451 }, { "epoch": 0.5364688246592026, "grad_norm": 0.586574068544976, "learning_rate": 4.292037482476787e-06, "loss": 0.2991, "step": 11452 }, { "epoch": 0.536515669649131, "grad_norm": 0.5614292447746732, "learning_rate": 4.291905241210332e-06, "loss": 0.2663, "step": 11453 }, { "epoch": 0.5365625146390594, "grad_norm": 0.6295039815660636, "learning_rate": 4.291772989631913e-06, "loss": 0.3026, "step": 11454 }, { "epoch": 0.5366093596289877, "grad_norm": 0.5477857819132934, "learning_rate": 4.29164072774229e-06, "loss": 0.2847, "step": 11455 }, { "epoch": 0.536656204618916, "grad_norm": 0.6062640653359281, "learning_rate": 4.291508455542226e-06, "loss": 0.2855, "step": 11456 }, { "epoch": 0.5367030496088443, "grad_norm": 0.5861619877239231, "learning_rate": 4.291376173032481e-06, "loss": 0.2935, "step": 11457 }, { "epoch": 0.5367498945987726, "grad_norm": 0.5536828077342258, "learning_rate": 4.291243880213815e-06, "loss": 0.2825, "step": 11458 }, { "epoch": 0.536796739588701, "grad_norm": 0.6299350405868566, "learning_rate": 4.291111577086992e-06, "loss": 0.2916, "step": 11459 }, { "epoch": 0.5368435845786294, "grad_norm": 0.5655474799057287, "learning_rate": 4.290979263652772e-06, "loss": 0.2637, "step": 11460 }, { "epoch": 0.5368904295685576, "grad_norm": 0.5628761374877658, "learning_rate": 4.2908469399119154e-06, "loss": 0.2827, "step": 11461 }, { "epoch": 0.5369372745584859, "grad_norm": 0.5851804077161303, "learning_rate": 4.290714605865186e-06, "loss": 0.2601, "step": 11462 }, { "epoch": 0.5369841195484143, "grad_norm": 0.5974831649068771, "learning_rate": 4.290582261513343e-06, "loss": 0.3087, "step": 11463 }, { "epoch": 0.5370309645383426, "grad_norm": 0.5985595191373599, "learning_rate": 4.29044990685715e-06, "loss": 0.3054, "step": 11464 }, { "epoch": 0.537077809528271, "grad_norm": 0.6105673015798393, "learning_rate": 4.290317541897367e-06, "loss": 0.2775, "step": 11465 }, { "epoch": 0.5371246545181992, "grad_norm": 0.5649808626608448, "learning_rate": 4.290185166634757e-06, "loss": 0.2855, "step": 11466 }, { "epoch": 0.5371714995081276, "grad_norm": 0.6316254240370165, "learning_rate": 4.290052781070082e-06, "loss": 0.2927, "step": 11467 }, { "epoch": 0.5372183444980559, "grad_norm": 0.6106454600664013, "learning_rate": 4.289920385204103e-06, "loss": 0.2922, "step": 11468 }, { "epoch": 0.5372651894879843, "grad_norm": 0.5786839083182875, "learning_rate": 4.289787979037581e-06, "loss": 0.2824, "step": 11469 }, { "epoch": 0.5373120344779125, "grad_norm": 0.581232652959623, "learning_rate": 4.289655562571279e-06, "loss": 0.291, "step": 11470 }, { "epoch": 0.5373588794678409, "grad_norm": 0.6136813997609598, "learning_rate": 4.28952313580596e-06, "loss": 0.2907, "step": 11471 }, { "epoch": 0.5374057244577692, "grad_norm": 0.5809089249753481, "learning_rate": 4.289390698742385e-06, "loss": 0.2763, "step": 11472 }, { "epoch": 0.5374525694476976, "grad_norm": 0.618053708246491, "learning_rate": 4.2892582513813165e-06, "loss": 0.3057, "step": 11473 }, { "epoch": 0.5374994144376259, "grad_norm": 0.5855249879958725, "learning_rate": 4.289125793723515e-06, "loss": 0.2951, "step": 11474 }, { "epoch": 0.5375462594275542, "grad_norm": 0.5914263412181038, "learning_rate": 4.288993325769746e-06, "loss": 0.2863, "step": 11475 }, { "epoch": 0.5375931044174825, "grad_norm": 0.5767450407570393, "learning_rate": 4.288860847520769e-06, "loss": 0.2675, "step": 11476 }, { "epoch": 0.5376399494074109, "grad_norm": 0.5862724533362904, "learning_rate": 4.288728358977348e-06, "loss": 0.2875, "step": 11477 }, { "epoch": 0.5376867943973392, "grad_norm": 0.5821075129156134, "learning_rate": 4.288595860140245e-06, "loss": 0.2894, "step": 11478 }, { "epoch": 0.5377336393872675, "grad_norm": 0.6202794147169427, "learning_rate": 4.288463351010221e-06, "loss": 0.2981, "step": 11479 }, { "epoch": 0.5377804843771958, "grad_norm": 0.6112564003429687, "learning_rate": 4.288330831588041e-06, "loss": 0.2999, "step": 11480 }, { "epoch": 0.5378273293671242, "grad_norm": 0.564009361911459, "learning_rate": 4.288198301874467e-06, "loss": 0.2734, "step": 11481 }, { "epoch": 0.5378741743570525, "grad_norm": 0.605440395193702, "learning_rate": 4.2880657618702606e-06, "loss": 0.2945, "step": 11482 }, { "epoch": 0.5379210193469809, "grad_norm": 0.5703863192354761, "learning_rate": 4.287933211576185e-06, "loss": 0.2815, "step": 11483 }, { "epoch": 0.5379678643369091, "grad_norm": 0.5383277732750498, "learning_rate": 4.287800650993003e-06, "loss": 0.2616, "step": 11484 }, { "epoch": 0.5380147093268375, "grad_norm": 0.5651702681633196, "learning_rate": 4.287668080121479e-06, "loss": 0.3093, "step": 11485 }, { "epoch": 0.5380615543167658, "grad_norm": 0.5747009614798562, "learning_rate": 4.287535498962373e-06, "loss": 0.2734, "step": 11486 }, { "epoch": 0.5381083993066942, "grad_norm": 0.6259219917393574, "learning_rate": 4.287402907516451e-06, "loss": 0.2977, "step": 11487 }, { "epoch": 0.5381552442966224, "grad_norm": 0.6291945730889474, "learning_rate": 4.2872703057844726e-06, "loss": 0.303, "step": 11488 }, { "epoch": 0.5382020892865508, "grad_norm": 0.5612063089588164, "learning_rate": 4.287137693767204e-06, "loss": 0.2655, "step": 11489 }, { "epoch": 0.5382489342764791, "grad_norm": 0.5907288209118201, "learning_rate": 4.2870050714654066e-06, "loss": 0.2837, "step": 11490 }, { "epoch": 0.5382957792664075, "grad_norm": 0.6164353784726613, "learning_rate": 4.286872438879844e-06, "loss": 0.2839, "step": 11491 }, { "epoch": 0.5383426242563358, "grad_norm": 0.6088708603735482, "learning_rate": 4.286739796011281e-06, "loss": 0.282, "step": 11492 }, { "epoch": 0.5383894692462641, "grad_norm": 0.6663561841481135, "learning_rate": 4.286607142860478e-06, "loss": 0.3101, "step": 11493 }, { "epoch": 0.5384363142361924, "grad_norm": 0.5727227702745593, "learning_rate": 4.286474479428201e-06, "loss": 0.274, "step": 11494 }, { "epoch": 0.5384831592261208, "grad_norm": 0.5915099435358641, "learning_rate": 4.286341805715212e-06, "loss": 0.3036, "step": 11495 }, { "epoch": 0.5385300042160491, "grad_norm": 0.6211592822603361, "learning_rate": 4.286209121722274e-06, "loss": 0.3098, "step": 11496 }, { "epoch": 0.5385768492059774, "grad_norm": 0.5555578291779123, "learning_rate": 4.286076427450153e-06, "loss": 0.2791, "step": 11497 }, { "epoch": 0.5386236941959057, "grad_norm": 0.596811736505642, "learning_rate": 4.285943722899611e-06, "loss": 0.2917, "step": 11498 }, { "epoch": 0.5386705391858341, "grad_norm": 0.5723933101147677, "learning_rate": 4.28581100807141e-06, "loss": 0.2847, "step": 11499 }, { "epoch": 0.5387173841757624, "grad_norm": 0.6112743881832284, "learning_rate": 4.2856782829663165e-06, "loss": 0.2879, "step": 11500 }, { "epoch": 0.5387642291656908, "grad_norm": 0.5675901339598433, "learning_rate": 4.285545547585094e-06, "loss": 0.2744, "step": 11501 }, { "epoch": 0.538811074155619, "grad_norm": 0.6058449969684189, "learning_rate": 4.285412801928505e-06, "loss": 0.2808, "step": 11502 }, { "epoch": 0.5388579191455474, "grad_norm": 0.5789725941397529, "learning_rate": 4.285280045997313e-06, "loss": 0.2785, "step": 11503 }, { "epoch": 0.5389047641354757, "grad_norm": 0.5831947238619302, "learning_rate": 4.285147279792285e-06, "loss": 0.2765, "step": 11504 }, { "epoch": 0.5389516091254041, "grad_norm": 0.5723903179431575, "learning_rate": 4.285014503314182e-06, "loss": 0.2935, "step": 11505 }, { "epoch": 0.5389984541153323, "grad_norm": 0.6029288404172345, "learning_rate": 4.28488171656377e-06, "loss": 0.2798, "step": 11506 }, { "epoch": 0.5390452991052607, "grad_norm": 0.5437342200176383, "learning_rate": 4.284748919541811e-06, "loss": 0.2722, "step": 11507 }, { "epoch": 0.539092144095189, "grad_norm": 0.6091449625516553, "learning_rate": 4.284616112249071e-06, "loss": 0.2999, "step": 11508 }, { "epoch": 0.5391389890851174, "grad_norm": 0.633248468982462, "learning_rate": 4.284483294686314e-06, "loss": 0.294, "step": 11509 }, { "epoch": 0.5391858340750457, "grad_norm": 0.6082563450283217, "learning_rate": 4.284350466854305e-06, "loss": 0.2872, "step": 11510 }, { "epoch": 0.539232679064974, "grad_norm": 0.5962815430793414, "learning_rate": 4.284217628753807e-06, "loss": 0.2979, "step": 11511 }, { "epoch": 0.5392795240549023, "grad_norm": 0.6047945263771167, "learning_rate": 4.284084780385584e-06, "loss": 0.2779, "step": 11512 }, { "epoch": 0.5393263690448307, "grad_norm": 0.583141435947207, "learning_rate": 4.283951921750403e-06, "loss": 0.2745, "step": 11513 }, { "epoch": 0.539373214034759, "grad_norm": 0.5887261841129084, "learning_rate": 4.2838190528490265e-06, "loss": 0.3055, "step": 11514 }, { "epoch": 0.5394200590246873, "grad_norm": 0.5965882760895878, "learning_rate": 4.28368617368222e-06, "loss": 0.2769, "step": 11515 }, { "epoch": 0.5394669040146156, "grad_norm": 0.5760203595187342, "learning_rate": 4.283553284250747e-06, "loss": 0.2835, "step": 11516 }, { "epoch": 0.539513749004544, "grad_norm": 0.5773059515476314, "learning_rate": 4.283420384555374e-06, "loss": 0.2932, "step": 11517 }, { "epoch": 0.5395605939944723, "grad_norm": 0.6625599775793972, "learning_rate": 4.2832874745968645e-06, "loss": 0.3163, "step": 11518 }, { "epoch": 0.5396074389844007, "grad_norm": 0.6630093684553485, "learning_rate": 4.2831545543759835e-06, "loss": 0.2944, "step": 11519 }, { "epoch": 0.5396542839743289, "grad_norm": 0.6813338056136711, "learning_rate": 4.283021623893497e-06, "loss": 0.2787, "step": 11520 }, { "epoch": 0.5397011289642573, "grad_norm": 0.6191838054182739, "learning_rate": 4.282888683150168e-06, "loss": 0.2949, "step": 11521 }, { "epoch": 0.5397479739541856, "grad_norm": 0.6036151295007349, "learning_rate": 4.2827557321467635e-06, "loss": 0.2933, "step": 11522 }, { "epoch": 0.539794818944114, "grad_norm": 0.6097624502406729, "learning_rate": 4.282622770884049e-06, "loss": 0.3009, "step": 11523 }, { "epoch": 0.5398416639340422, "grad_norm": 0.5907084429711538, "learning_rate": 4.282489799362787e-06, "loss": 0.2942, "step": 11524 }, { "epoch": 0.5398885089239706, "grad_norm": 0.6624185994591122, "learning_rate": 4.282356817583744e-06, "loss": 0.2967, "step": 11525 }, { "epoch": 0.5399353539138989, "grad_norm": 0.5835308030635463, "learning_rate": 4.282223825547686e-06, "loss": 0.2731, "step": 11526 }, { "epoch": 0.5399821989038273, "grad_norm": 0.6109747801182461, "learning_rate": 4.282090823255378e-06, "loss": 0.2754, "step": 11527 }, { "epoch": 0.5400290438937556, "grad_norm": 0.6066046978023258, "learning_rate": 4.281957810707585e-06, "loss": 0.297, "step": 11528 }, { "epoch": 0.5400758888836839, "grad_norm": 0.65831415817742, "learning_rate": 4.281824787905073e-06, "loss": 0.305, "step": 11529 }, { "epoch": 0.5401227338736122, "grad_norm": 0.5979639507083694, "learning_rate": 4.281691754848607e-06, "loss": 0.2819, "step": 11530 }, { "epoch": 0.5401695788635406, "grad_norm": 0.6189533841288419, "learning_rate": 4.281558711538952e-06, "loss": 0.284, "step": 11531 }, { "epoch": 0.5402164238534689, "grad_norm": 0.5795128269001542, "learning_rate": 4.281425657976875e-06, "loss": 0.2891, "step": 11532 }, { "epoch": 0.5402632688433971, "grad_norm": 0.6544994442467679, "learning_rate": 4.281292594163142e-06, "loss": 0.3145, "step": 11533 }, { "epoch": 0.5403101138333255, "grad_norm": 0.5870158368190377, "learning_rate": 4.281159520098517e-06, "loss": 0.2692, "step": 11534 }, { "epoch": 0.5403569588232539, "grad_norm": 0.6332751726033764, "learning_rate": 4.281026435783766e-06, "loss": 0.2882, "step": 11535 }, { "epoch": 0.5404038038131822, "grad_norm": 0.5591174916345129, "learning_rate": 4.280893341219656e-06, "loss": 0.2897, "step": 11536 }, { "epoch": 0.5404506488031106, "grad_norm": 0.5814885041333052, "learning_rate": 4.2807602364069535e-06, "loss": 0.3008, "step": 11537 }, { "epoch": 0.5404974937930388, "grad_norm": 0.5956510601251621, "learning_rate": 4.280627121346423e-06, "loss": 0.2855, "step": 11538 }, { "epoch": 0.5405443387829671, "grad_norm": 0.5518597434362029, "learning_rate": 4.2804939960388294e-06, "loss": 0.264, "step": 11539 }, { "epoch": 0.5405911837728955, "grad_norm": 0.589017394634095, "learning_rate": 4.280360860484942e-06, "loss": 0.3011, "step": 11540 }, { "epoch": 0.5406380287628239, "grad_norm": 0.5840477075592758, "learning_rate": 4.280227714685525e-06, "loss": 0.2997, "step": 11541 }, { "epoch": 0.5406848737527521, "grad_norm": 0.5885383272403364, "learning_rate": 4.280094558641344e-06, "loss": 0.2904, "step": 11542 }, { "epoch": 0.5407317187426804, "grad_norm": 0.5825015430544851, "learning_rate": 4.279961392353167e-06, "loss": 0.2807, "step": 11543 }, { "epoch": 0.5407785637326088, "grad_norm": 0.5665802752214036, "learning_rate": 4.27982821582176e-06, "loss": 0.2649, "step": 11544 }, { "epoch": 0.5408254087225371, "grad_norm": 0.6385832751679049, "learning_rate": 4.279695029047888e-06, "loss": 0.2709, "step": 11545 }, { "epoch": 0.5408722537124655, "grad_norm": 0.581271355881992, "learning_rate": 4.279561832032319e-06, "loss": 0.2949, "step": 11546 }, { "epoch": 0.5409190987023937, "grad_norm": 0.5960388986270082, "learning_rate": 4.279428624775818e-06, "loss": 0.2778, "step": 11547 }, { "epoch": 0.5409659436923221, "grad_norm": 0.6755912341950169, "learning_rate": 4.279295407279154e-06, "loss": 0.3076, "step": 11548 }, { "epoch": 0.5410127886822504, "grad_norm": 0.5428703264726867, "learning_rate": 4.279162179543092e-06, "loss": 0.2754, "step": 11549 }, { "epoch": 0.5410596336721788, "grad_norm": 0.6199735641526392, "learning_rate": 4.279028941568398e-06, "loss": 0.2729, "step": 11550 }, { "epoch": 0.541106478662107, "grad_norm": 0.6245840392139496, "learning_rate": 4.2788956933558396e-06, "loss": 0.292, "step": 11551 }, { "epoch": 0.5411533236520354, "grad_norm": 0.5976664721215968, "learning_rate": 4.278762434906184e-06, "loss": 0.288, "step": 11552 }, { "epoch": 0.5412001686419637, "grad_norm": 0.6422062930199113, "learning_rate": 4.278629166220197e-06, "loss": 0.2933, "step": 11553 }, { "epoch": 0.5412470136318921, "grad_norm": 0.5769609777398961, "learning_rate": 4.278495887298647e-06, "loss": 0.2791, "step": 11554 }, { "epoch": 0.5412938586218204, "grad_norm": 0.5746317715536525, "learning_rate": 4.2783625981423e-06, "loss": 0.2819, "step": 11555 }, { "epoch": 0.5413407036117487, "grad_norm": 0.6069000012026053, "learning_rate": 4.278229298751924e-06, "loss": 0.2923, "step": 11556 }, { "epoch": 0.541387548601677, "grad_norm": 0.6206570307670639, "learning_rate": 4.2780959891282835e-06, "loss": 0.2745, "step": 11557 }, { "epoch": 0.5414343935916054, "grad_norm": 0.6541235899340759, "learning_rate": 4.277962669272149e-06, "loss": 0.2902, "step": 11558 }, { "epoch": 0.5414812385815337, "grad_norm": 0.605117096733631, "learning_rate": 4.277829339184285e-06, "loss": 0.274, "step": 11559 }, { "epoch": 0.541528083571462, "grad_norm": 0.5953309222100853, "learning_rate": 4.277695998865461e-06, "loss": 0.2791, "step": 11560 }, { "epoch": 0.5415749285613903, "grad_norm": 0.5823453518046336, "learning_rate": 4.2775626483164426e-06, "loss": 0.2699, "step": 11561 }, { "epoch": 0.5416217735513187, "grad_norm": 0.5448965405492708, "learning_rate": 4.277429287537999e-06, "loss": 0.2623, "step": 11562 }, { "epoch": 0.541668618541247, "grad_norm": 0.602964445188202, "learning_rate": 4.277295916530896e-06, "loss": 0.2862, "step": 11563 }, { "epoch": 0.5417154635311754, "grad_norm": 0.5843024752013797, "learning_rate": 4.277162535295901e-06, "loss": 0.2845, "step": 11564 }, { "epoch": 0.5417623085211036, "grad_norm": 0.578234114759847, "learning_rate": 4.277029143833783e-06, "loss": 0.28, "step": 11565 }, { "epoch": 0.541809153511032, "grad_norm": 0.6147594434507022, "learning_rate": 4.276895742145309e-06, "loss": 0.2786, "step": 11566 }, { "epoch": 0.5418559985009603, "grad_norm": 0.6140878001127577, "learning_rate": 4.2767623302312466e-06, "loss": 0.2883, "step": 11567 }, { "epoch": 0.5419028434908887, "grad_norm": 0.6114346289507196, "learning_rate": 4.276628908092363e-06, "loss": 0.2773, "step": 11568 }, { "epoch": 0.5419496884808169, "grad_norm": 0.5516148242633928, "learning_rate": 4.276495475729428e-06, "loss": 0.2843, "step": 11569 }, { "epoch": 0.5419965334707453, "grad_norm": 0.5932330238233872, "learning_rate": 4.276362033143206e-06, "loss": 0.2985, "step": 11570 }, { "epoch": 0.5420433784606736, "grad_norm": 0.6125926041046568, "learning_rate": 4.2762285803344685e-06, "loss": 0.3159, "step": 11571 }, { "epoch": 0.542090223450602, "grad_norm": 0.5608382865369207, "learning_rate": 4.276095117303981e-06, "loss": 0.2829, "step": 11572 }, { "epoch": 0.5421370684405303, "grad_norm": 0.5718673496364614, "learning_rate": 4.275961644052513e-06, "loss": 0.2857, "step": 11573 }, { "epoch": 0.5421839134304586, "grad_norm": 0.6755506765361178, "learning_rate": 4.275828160580832e-06, "loss": 0.297, "step": 11574 }, { "epoch": 0.5422307584203869, "grad_norm": 0.5838253066090856, "learning_rate": 4.275694666889706e-06, "loss": 0.2821, "step": 11575 }, { "epoch": 0.5422776034103153, "grad_norm": 0.5640948822198908, "learning_rate": 4.275561162979904e-06, "loss": 0.2806, "step": 11576 }, { "epoch": 0.5423244484002436, "grad_norm": 0.6028535146064022, "learning_rate": 4.275427648852193e-06, "loss": 0.2979, "step": 11577 }, { "epoch": 0.5423712933901719, "grad_norm": 0.5793148026203916, "learning_rate": 4.275294124507343e-06, "loss": 0.2961, "step": 11578 }, { "epoch": 0.5424181383801002, "grad_norm": 0.6077337200141707, "learning_rate": 4.275160589946122e-06, "loss": 0.2653, "step": 11579 }, { "epoch": 0.5424649833700286, "grad_norm": 0.6352923304300738, "learning_rate": 4.275027045169297e-06, "loss": 0.3098, "step": 11580 }, { "epoch": 0.5425118283599569, "grad_norm": 0.6025349704989984, "learning_rate": 4.2748934901776375e-06, "loss": 0.2931, "step": 11581 }, { "epoch": 0.5425586733498853, "grad_norm": 0.5444109148118126, "learning_rate": 4.274759924971912e-06, "loss": 0.2906, "step": 11582 }, { "epoch": 0.5426055183398135, "grad_norm": 0.6470639251457571, "learning_rate": 4.274626349552889e-06, "loss": 0.2944, "step": 11583 }, { "epoch": 0.5426523633297419, "grad_norm": 0.5909086303056535, "learning_rate": 4.2744927639213385e-06, "loss": 0.2734, "step": 11584 }, { "epoch": 0.5426992083196702, "grad_norm": 0.5853362778412303, "learning_rate": 4.274359168078027e-06, "loss": 0.282, "step": 11585 }, { "epoch": 0.5427460533095986, "grad_norm": 0.576366858361252, "learning_rate": 4.274225562023725e-06, "loss": 0.296, "step": 11586 }, { "epoch": 0.5427928982995268, "grad_norm": 0.6193538852882906, "learning_rate": 4.274091945759201e-06, "loss": 0.282, "step": 11587 }, { "epoch": 0.5428397432894552, "grad_norm": 0.6264691486183169, "learning_rate": 4.273958319285224e-06, "loss": 0.3048, "step": 11588 }, { "epoch": 0.5428865882793835, "grad_norm": 0.6148202292197176, "learning_rate": 4.273824682602562e-06, "loss": 0.2719, "step": 11589 }, { "epoch": 0.5429334332693119, "grad_norm": 0.6043075688477632, "learning_rate": 4.2736910357119854e-06, "loss": 0.3152, "step": 11590 }, { "epoch": 0.5429802782592402, "grad_norm": 0.5748659840872277, "learning_rate": 4.273557378614262e-06, "loss": 0.2725, "step": 11591 }, { "epoch": 0.5430271232491685, "grad_norm": 0.6145759832518181, "learning_rate": 4.273423711310162e-06, "loss": 0.2804, "step": 11592 }, { "epoch": 0.5430739682390968, "grad_norm": 0.5881800966141698, "learning_rate": 4.273290033800455e-06, "loss": 0.2731, "step": 11593 }, { "epoch": 0.5431208132290252, "grad_norm": 0.6159662364689613, "learning_rate": 4.2731563460859085e-06, "loss": 0.2649, "step": 11594 }, { "epoch": 0.5431676582189535, "grad_norm": 0.5771887270654179, "learning_rate": 4.273022648167293e-06, "loss": 0.2905, "step": 11595 }, { "epoch": 0.5432145032088818, "grad_norm": 0.6516654065130414, "learning_rate": 4.2728889400453776e-06, "loss": 0.2995, "step": 11596 }, { "epoch": 0.5432613481988101, "grad_norm": 0.5728379918750586, "learning_rate": 4.272755221720933e-06, "loss": 0.2854, "step": 11597 }, { "epoch": 0.5433081931887385, "grad_norm": 0.5690457919126981, "learning_rate": 4.2726214931947264e-06, "loss": 0.2906, "step": 11598 }, { "epoch": 0.5433550381786668, "grad_norm": 0.5906656571350063, "learning_rate": 4.27248775446753e-06, "loss": 0.291, "step": 11599 }, { "epoch": 0.5434018831685952, "grad_norm": 0.6144203561942135, "learning_rate": 4.2723540055401104e-06, "loss": 0.288, "step": 11600 }, { "epoch": 0.5434487281585234, "grad_norm": 0.5536383431969372, "learning_rate": 4.27222024641324e-06, "loss": 0.2715, "step": 11601 }, { "epoch": 0.5434955731484518, "grad_norm": 0.6519863798899487, "learning_rate": 4.272086477087688e-06, "loss": 0.2869, "step": 11602 }, { "epoch": 0.5435424181383801, "grad_norm": 0.557142787666856, "learning_rate": 4.271952697564223e-06, "loss": 0.2746, "step": 11603 }, { "epoch": 0.5435892631283085, "grad_norm": 0.6758789078023365, "learning_rate": 4.271818907843616e-06, "loss": 0.3, "step": 11604 }, { "epoch": 0.5436361081182367, "grad_norm": 0.6336719247642899, "learning_rate": 4.271685107926636e-06, "loss": 0.2934, "step": 11605 }, { "epoch": 0.5436829531081651, "grad_norm": 0.6217875966560271, "learning_rate": 4.2715512978140534e-06, "loss": 0.2864, "step": 11606 }, { "epoch": 0.5437297980980934, "grad_norm": 0.599798294903596, "learning_rate": 4.2714174775066384e-06, "loss": 0.3037, "step": 11607 }, { "epoch": 0.5437766430880218, "grad_norm": 0.5826207490235479, "learning_rate": 4.271283647005162e-06, "loss": 0.267, "step": 11608 }, { "epoch": 0.5438234880779501, "grad_norm": 0.5759452945627495, "learning_rate": 4.271149806310393e-06, "loss": 0.3107, "step": 11609 }, { "epoch": 0.5438703330678784, "grad_norm": 0.5498119848006019, "learning_rate": 4.271015955423101e-06, "loss": 0.2774, "step": 11610 }, { "epoch": 0.5439171780578067, "grad_norm": 0.6368249820496539, "learning_rate": 4.2708820943440585e-06, "loss": 0.2889, "step": 11611 }, { "epoch": 0.5439640230477351, "grad_norm": 0.6461751700967882, "learning_rate": 4.270748223074034e-06, "loss": 0.314, "step": 11612 }, { "epoch": 0.5440108680376634, "grad_norm": 0.5745696045612224, "learning_rate": 4.270614341613799e-06, "loss": 0.2755, "step": 11613 }, { "epoch": 0.5440577130275916, "grad_norm": 0.5636462704741713, "learning_rate": 4.270480449964123e-06, "loss": 0.2796, "step": 11614 }, { "epoch": 0.54410455801752, "grad_norm": 0.6057681647613908, "learning_rate": 4.270346548125777e-06, "loss": 0.3163, "step": 11615 }, { "epoch": 0.5441514030074484, "grad_norm": 0.6357905267916465, "learning_rate": 4.270212636099532e-06, "loss": 0.2974, "step": 11616 }, { "epoch": 0.5441982479973767, "grad_norm": 0.6184384812687669, "learning_rate": 4.270078713886158e-06, "loss": 0.3084, "step": 11617 }, { "epoch": 0.5442450929873051, "grad_norm": 0.5490107154419791, "learning_rate": 4.269944781486426e-06, "loss": 0.2688, "step": 11618 }, { "epoch": 0.5442919379772333, "grad_norm": 0.5724937345573515, "learning_rate": 4.269810838901106e-06, "loss": 0.2887, "step": 11619 }, { "epoch": 0.5443387829671616, "grad_norm": 0.6001464762263421, "learning_rate": 4.269676886130971e-06, "loss": 0.3007, "step": 11620 }, { "epoch": 0.54438562795709, "grad_norm": 0.5515379356620531, "learning_rate": 4.269542923176789e-06, "loss": 0.2792, "step": 11621 }, { "epoch": 0.5444324729470184, "grad_norm": 0.6001249232082724, "learning_rate": 4.269408950039332e-06, "loss": 0.2643, "step": 11622 }, { "epoch": 0.5444793179369466, "grad_norm": 0.6252981630493384, "learning_rate": 4.269274966719372e-06, "loss": 0.2828, "step": 11623 }, { "epoch": 0.544526162926875, "grad_norm": 0.6332370802787766, "learning_rate": 4.269140973217679e-06, "loss": 0.29, "step": 11624 }, { "epoch": 0.5445730079168033, "grad_norm": 0.6005691003120657, "learning_rate": 4.2690069695350244e-06, "loss": 0.2962, "step": 11625 }, { "epoch": 0.5446198529067316, "grad_norm": 0.5618581464510803, "learning_rate": 4.26887295567218e-06, "loss": 0.2795, "step": 11626 }, { "epoch": 0.54466669789666, "grad_norm": 0.6073945369987817, "learning_rate": 4.268738931629916e-06, "loss": 0.2718, "step": 11627 }, { "epoch": 0.5447135428865882, "grad_norm": 0.591725305129573, "learning_rate": 4.268604897409003e-06, "loss": 0.29, "step": 11628 }, { "epoch": 0.5447603878765166, "grad_norm": 0.5462715533180225, "learning_rate": 4.2684708530102134e-06, "loss": 0.2903, "step": 11629 }, { "epoch": 0.544807232866445, "grad_norm": 0.6065028172455551, "learning_rate": 4.26833679843432e-06, "loss": 0.2752, "step": 11630 }, { "epoch": 0.5448540778563733, "grad_norm": 0.6430642561888551, "learning_rate": 4.268202733682092e-06, "loss": 0.3148, "step": 11631 }, { "epoch": 0.5449009228463015, "grad_norm": 0.6064330422765397, "learning_rate": 4.268068658754301e-06, "loss": 0.3105, "step": 11632 }, { "epoch": 0.5449477678362299, "grad_norm": 0.5820178657507462, "learning_rate": 4.26793457365172e-06, "loss": 0.2887, "step": 11633 }, { "epoch": 0.5449946128261582, "grad_norm": 0.5969032028909151, "learning_rate": 4.267800478375119e-06, "loss": 0.2889, "step": 11634 }, { "epoch": 0.5450414578160866, "grad_norm": 0.5624186035142743, "learning_rate": 4.267666372925272e-06, "loss": 0.2947, "step": 11635 }, { "epoch": 0.545088302806015, "grad_norm": 0.6092509286642934, "learning_rate": 4.267532257302948e-06, "loss": 0.2841, "step": 11636 }, { "epoch": 0.5451351477959432, "grad_norm": 0.5743060010094679, "learning_rate": 4.267398131508921e-06, "loss": 0.2728, "step": 11637 }, { "epoch": 0.5451819927858715, "grad_norm": 0.5700005990386416, "learning_rate": 4.267263995543962e-06, "loss": 0.2751, "step": 11638 }, { "epoch": 0.5452288377757999, "grad_norm": 0.587309156574532, "learning_rate": 4.267129849408842e-06, "loss": 0.2905, "step": 11639 }, { "epoch": 0.5452756827657282, "grad_norm": 0.6181260795703413, "learning_rate": 4.2669956931043345e-06, "loss": 0.2808, "step": 11640 }, { "epoch": 0.5453225277556565, "grad_norm": 0.6242796314058631, "learning_rate": 4.266861526631211e-06, "loss": 0.2765, "step": 11641 }, { "epoch": 0.5453693727455848, "grad_norm": 0.5213529277760317, "learning_rate": 4.266727349990243e-06, "loss": 0.2772, "step": 11642 }, { "epoch": 0.5454162177355132, "grad_norm": 0.580626566668986, "learning_rate": 4.266593163182203e-06, "loss": 0.2836, "step": 11643 }, { "epoch": 0.5454630627254415, "grad_norm": 0.5848598713959071, "learning_rate": 4.266458966207865e-06, "loss": 0.2875, "step": 11644 }, { "epoch": 0.5455099077153699, "grad_norm": 0.6193599925356992, "learning_rate": 4.266324759067998e-06, "loss": 0.299, "step": 11645 }, { "epoch": 0.5455567527052981, "grad_norm": 0.6249066207922455, "learning_rate": 4.266190541763377e-06, "loss": 0.2997, "step": 11646 }, { "epoch": 0.5456035976952265, "grad_norm": 0.5771230695399084, "learning_rate": 4.266056314294772e-06, "loss": 0.2695, "step": 11647 }, { "epoch": 0.5456504426851548, "grad_norm": 0.5727178531130666, "learning_rate": 4.265922076662958e-06, "loss": 0.277, "step": 11648 }, { "epoch": 0.5456972876750832, "grad_norm": 0.5678057344051536, "learning_rate": 4.265787828868705e-06, "loss": 0.28, "step": 11649 }, { "epoch": 0.5457441326650114, "grad_norm": 0.5591373633722366, "learning_rate": 4.2656535709127885e-06, "loss": 0.2828, "step": 11650 }, { "epoch": 0.5457909776549398, "grad_norm": 0.6553789591942714, "learning_rate": 4.265519302795979e-06, "loss": 0.3134, "step": 11651 }, { "epoch": 0.5458378226448681, "grad_norm": 0.5672524533905338, "learning_rate": 4.265385024519049e-06, "loss": 0.2725, "step": 11652 }, { "epoch": 0.5458846676347965, "grad_norm": 0.5734199999100186, "learning_rate": 4.2652507360827726e-06, "loss": 0.2971, "step": 11653 }, { "epoch": 0.5459315126247248, "grad_norm": 0.5972547442718568, "learning_rate": 4.265116437487921e-06, "loss": 0.2858, "step": 11654 }, { "epoch": 0.5459783576146531, "grad_norm": 0.5992078010950813, "learning_rate": 4.264982128735269e-06, "loss": 0.2902, "step": 11655 }, { "epoch": 0.5460252026045814, "grad_norm": 0.6393364901453896, "learning_rate": 4.264847809825587e-06, "loss": 0.3047, "step": 11656 }, { "epoch": 0.5460720475945098, "grad_norm": 0.6184506599944446, "learning_rate": 4.264713480759651e-06, "loss": 0.2914, "step": 11657 }, { "epoch": 0.5461188925844381, "grad_norm": 0.5977944662577737, "learning_rate": 4.264579141538232e-06, "loss": 0.2967, "step": 11658 }, { "epoch": 0.5461657375743664, "grad_norm": 0.6504437044236967, "learning_rate": 4.2644447921621035e-06, "loss": 0.2764, "step": 11659 }, { "epoch": 0.5462125825642947, "grad_norm": 0.6306389474926565, "learning_rate": 4.264310432632039e-06, "loss": 0.2951, "step": 11660 }, { "epoch": 0.5462594275542231, "grad_norm": 0.5922354507249947, "learning_rate": 4.26417606294881e-06, "loss": 0.2766, "step": 11661 }, { "epoch": 0.5463062725441514, "grad_norm": 0.6141821381013552, "learning_rate": 4.264041683113192e-06, "loss": 0.2877, "step": 11662 }, { "epoch": 0.5463531175340798, "grad_norm": 0.5947279003400403, "learning_rate": 4.263907293125958e-06, "loss": 0.2811, "step": 11663 }, { "epoch": 0.546399962524008, "grad_norm": 0.6800759619318053, "learning_rate": 4.263772892987881e-06, "loss": 0.2913, "step": 11664 }, { "epoch": 0.5464468075139364, "grad_norm": 0.6222817590520275, "learning_rate": 4.263638482699734e-06, "loss": 0.2715, "step": 11665 }, { "epoch": 0.5464936525038647, "grad_norm": 0.5933360085779813, "learning_rate": 4.26350406226229e-06, "loss": 0.2755, "step": 11666 }, { "epoch": 0.5465404974937931, "grad_norm": 0.606253522497514, "learning_rate": 4.263369631676324e-06, "loss": 0.2803, "step": 11667 }, { "epoch": 0.5465873424837213, "grad_norm": 0.6125557358858806, "learning_rate": 4.26323519094261e-06, "loss": 0.2983, "step": 11668 }, { "epoch": 0.5466341874736497, "grad_norm": 0.5641750714623375, "learning_rate": 4.2631007400619186e-06, "loss": 0.293, "step": 11669 }, { "epoch": 0.546681032463578, "grad_norm": 0.6615335642136089, "learning_rate": 4.262966279035027e-06, "loss": 0.2956, "step": 11670 }, { "epoch": 0.5467278774535064, "grad_norm": 0.5912227464304934, "learning_rate": 4.262831807862707e-06, "loss": 0.295, "step": 11671 }, { "epoch": 0.5467747224434347, "grad_norm": 0.5809669505544514, "learning_rate": 4.262697326545734e-06, "loss": 0.2938, "step": 11672 }, { "epoch": 0.546821567433363, "grad_norm": 0.5692933155324608, "learning_rate": 4.262562835084879e-06, "loss": 0.2732, "step": 11673 }, { "epoch": 0.5468684124232913, "grad_norm": 0.6341450146366681, "learning_rate": 4.262428333480919e-06, "loss": 0.2851, "step": 11674 }, { "epoch": 0.5469152574132197, "grad_norm": 0.6122379414773198, "learning_rate": 4.262293821734627e-06, "loss": 0.2693, "step": 11675 }, { "epoch": 0.546962102403148, "grad_norm": 0.6039949543809647, "learning_rate": 4.262159299846777e-06, "loss": 0.2827, "step": 11676 }, { "epoch": 0.5470089473930763, "grad_norm": 0.5678420525693642, "learning_rate": 4.262024767818144e-06, "loss": 0.291, "step": 11677 }, { "epoch": 0.5470557923830046, "grad_norm": 0.6149000827284502, "learning_rate": 4.2618902256495e-06, "loss": 0.2855, "step": 11678 }, { "epoch": 0.547102637372933, "grad_norm": 0.5855439777533558, "learning_rate": 4.26175567334162e-06, "loss": 0.2613, "step": 11679 }, { "epoch": 0.5471494823628613, "grad_norm": 0.6028474002179341, "learning_rate": 4.261621110895281e-06, "loss": 0.2802, "step": 11680 }, { "epoch": 0.5471963273527897, "grad_norm": 0.5855165650557654, "learning_rate": 4.261486538311254e-06, "loss": 0.2694, "step": 11681 }, { "epoch": 0.5472431723427179, "grad_norm": 0.6440614947553257, "learning_rate": 4.261351955590314e-06, "loss": 0.2961, "step": 11682 }, { "epoch": 0.5472900173326463, "grad_norm": 0.589861366360887, "learning_rate": 4.2612173627332375e-06, "loss": 0.2867, "step": 11683 }, { "epoch": 0.5473368623225746, "grad_norm": 0.6193181541986297, "learning_rate": 4.261082759740798e-06, "loss": 0.2824, "step": 11684 }, { "epoch": 0.547383707312503, "grad_norm": 0.6138037000667512, "learning_rate": 4.260948146613768e-06, "loss": 0.2863, "step": 11685 }, { "epoch": 0.5474305523024312, "grad_norm": 0.5642223373298108, "learning_rate": 4.2608135233529256e-06, "loss": 0.2975, "step": 11686 }, { "epoch": 0.5474773972923596, "grad_norm": 0.5368677588832058, "learning_rate": 4.260678889959043e-06, "loss": 0.2737, "step": 11687 }, { "epoch": 0.5475242422822879, "grad_norm": 0.631464992873002, "learning_rate": 4.260544246432896e-06, "loss": 0.2889, "step": 11688 }, { "epoch": 0.5475710872722163, "grad_norm": 0.627579599585543, "learning_rate": 4.2604095927752604e-06, "loss": 0.2728, "step": 11689 }, { "epoch": 0.5476179322621446, "grad_norm": 0.6198939947291822, "learning_rate": 4.26027492898691e-06, "loss": 0.2974, "step": 11690 }, { "epoch": 0.5476647772520729, "grad_norm": 0.6079518606650044, "learning_rate": 4.260140255068619e-06, "loss": 0.2914, "step": 11691 }, { "epoch": 0.5477116222420012, "grad_norm": 0.6308283223293144, "learning_rate": 4.260005571021163e-06, "loss": 0.2913, "step": 11692 }, { "epoch": 0.5477584672319296, "grad_norm": 0.5640781063571396, "learning_rate": 4.259870876845318e-06, "loss": 0.2871, "step": 11693 }, { "epoch": 0.5478053122218579, "grad_norm": 0.5689899195061381, "learning_rate": 4.2597361725418575e-06, "loss": 0.2911, "step": 11694 }, { "epoch": 0.5478521572117862, "grad_norm": 0.6559960465713536, "learning_rate": 4.259601458111558e-06, "loss": 0.3118, "step": 11695 }, { "epoch": 0.5478990022017145, "grad_norm": 0.5990796837001603, "learning_rate": 4.259466733555195e-06, "loss": 0.2884, "step": 11696 }, { "epoch": 0.5479458471916429, "grad_norm": 0.5596211176254055, "learning_rate": 4.259331998873544e-06, "loss": 0.2795, "step": 11697 }, { "epoch": 0.5479926921815712, "grad_norm": 0.6254837082260651, "learning_rate": 4.2591972540673775e-06, "loss": 0.2845, "step": 11698 }, { "epoch": 0.5480395371714996, "grad_norm": 0.5931101737689322, "learning_rate": 4.259062499137474e-06, "loss": 0.2878, "step": 11699 }, { "epoch": 0.5480863821614278, "grad_norm": 0.5162396711225367, "learning_rate": 4.258927734084608e-06, "loss": 0.2618, "step": 11700 }, { "epoch": 0.5481332271513562, "grad_norm": 0.5796501237283245, "learning_rate": 4.258792958909555e-06, "loss": 0.2627, "step": 11701 }, { "epoch": 0.5481800721412845, "grad_norm": 0.5633618379114402, "learning_rate": 4.25865817361309e-06, "loss": 0.2993, "step": 11702 }, { "epoch": 0.5482269171312129, "grad_norm": 0.6073598003936899, "learning_rate": 4.25852337819599e-06, "loss": 0.2837, "step": 11703 }, { "epoch": 0.5482737621211411, "grad_norm": 0.5274837988009657, "learning_rate": 4.258388572659029e-06, "loss": 0.2829, "step": 11704 }, { "epoch": 0.5483206071110694, "grad_norm": 0.5878939155483457, "learning_rate": 4.258253757002984e-06, "loss": 0.2751, "step": 11705 }, { "epoch": 0.5483674521009978, "grad_norm": 0.5911360365758741, "learning_rate": 4.258118931228631e-06, "loss": 0.2808, "step": 11706 }, { "epoch": 0.5484142970909262, "grad_norm": 0.670895685962835, "learning_rate": 4.257984095336745e-06, "loss": 0.3168, "step": 11707 }, { "epoch": 0.5484611420808545, "grad_norm": 0.5975031574458909, "learning_rate": 4.257849249328103e-06, "loss": 0.2802, "step": 11708 }, { "epoch": 0.5485079870707827, "grad_norm": 0.6101853420407087, "learning_rate": 4.25771439320348e-06, "loss": 0.2987, "step": 11709 }, { "epoch": 0.5485548320607111, "grad_norm": 0.6004972588442331, "learning_rate": 4.257579526963652e-06, "loss": 0.272, "step": 11710 }, { "epoch": 0.5486016770506394, "grad_norm": 0.5251627012902812, "learning_rate": 4.257444650609396e-06, "loss": 0.2562, "step": 11711 }, { "epoch": 0.5486485220405678, "grad_norm": 0.5916286132742631, "learning_rate": 4.257309764141488e-06, "loss": 0.29, "step": 11712 }, { "epoch": 0.548695367030496, "grad_norm": 0.607672405831568, "learning_rate": 4.257174867560704e-06, "loss": 0.2982, "step": 11713 }, { "epoch": 0.5487422120204244, "grad_norm": 0.5732870631023869, "learning_rate": 4.25703996086782e-06, "loss": 0.2748, "step": 11714 }, { "epoch": 0.5487890570103527, "grad_norm": 0.5868662070985664, "learning_rate": 4.256905044063613e-06, "loss": 0.2807, "step": 11715 }, { "epoch": 0.5488359020002811, "grad_norm": 0.6668020634033125, "learning_rate": 4.256770117148859e-06, "loss": 0.296, "step": 11716 }, { "epoch": 0.5488827469902094, "grad_norm": 0.5885818837219615, "learning_rate": 4.256635180124334e-06, "loss": 0.2821, "step": 11717 }, { "epoch": 0.5489295919801377, "grad_norm": 0.6084829289387562, "learning_rate": 4.256500232990815e-06, "loss": 0.2913, "step": 11718 }, { "epoch": 0.548976436970066, "grad_norm": 0.6226274438573223, "learning_rate": 4.2563652757490795e-06, "loss": 0.2989, "step": 11719 }, { "epoch": 0.5490232819599944, "grad_norm": 0.6022630474162024, "learning_rate": 4.256230308399902e-06, "loss": 0.3018, "step": 11720 }, { "epoch": 0.5490701269499227, "grad_norm": 0.558326934010503, "learning_rate": 4.256095330944061e-06, "loss": 0.2587, "step": 11721 }, { "epoch": 0.549116971939851, "grad_norm": 0.6276237873604641, "learning_rate": 4.255960343382333e-06, "loss": 0.2977, "step": 11722 }, { "epoch": 0.5491638169297793, "grad_norm": 0.5543726834241873, "learning_rate": 4.255825345715495e-06, "loss": 0.2739, "step": 11723 }, { "epoch": 0.5492106619197077, "grad_norm": 0.5918033295619207, "learning_rate": 4.255690337944323e-06, "loss": 0.2866, "step": 11724 }, { "epoch": 0.549257506909636, "grad_norm": 0.5843406468980031, "learning_rate": 4.255555320069594e-06, "loss": 0.2821, "step": 11725 }, { "epoch": 0.5493043518995644, "grad_norm": 0.6217486450818129, "learning_rate": 4.255420292092086e-06, "loss": 0.263, "step": 11726 }, { "epoch": 0.5493511968894926, "grad_norm": 0.6152874514037783, "learning_rate": 4.255285254012576e-06, "loss": 0.2848, "step": 11727 }, { "epoch": 0.549398041879421, "grad_norm": 0.6102393004648204, "learning_rate": 4.255150205831839e-06, "loss": 0.2961, "step": 11728 }, { "epoch": 0.5494448868693493, "grad_norm": 0.5877051466296037, "learning_rate": 4.255015147550654e-06, "loss": 0.2936, "step": 11729 }, { "epoch": 0.5494917318592777, "grad_norm": 0.5764164536309418, "learning_rate": 4.254880079169799e-06, "loss": 0.2929, "step": 11730 }, { "epoch": 0.5495385768492059, "grad_norm": 0.6329223335843147, "learning_rate": 4.25474500069005e-06, "loss": 0.2921, "step": 11731 }, { "epoch": 0.5495854218391343, "grad_norm": 0.6067960018468528, "learning_rate": 4.254609912112183e-06, "loss": 0.2935, "step": 11732 }, { "epoch": 0.5496322668290626, "grad_norm": 0.5594019230036501, "learning_rate": 4.254474813436978e-06, "loss": 0.2657, "step": 11733 }, { "epoch": 0.549679111818991, "grad_norm": 0.5620215597923394, "learning_rate": 4.2543397046652115e-06, "loss": 0.2706, "step": 11734 }, { "epoch": 0.5497259568089193, "grad_norm": 0.6033273106977365, "learning_rate": 4.254204585797661e-06, "loss": 0.2938, "step": 11735 }, { "epoch": 0.5497728017988476, "grad_norm": 0.5882094048781974, "learning_rate": 4.254069456835104e-06, "loss": 0.2764, "step": 11736 }, { "epoch": 0.5498196467887759, "grad_norm": 0.5808183791907818, "learning_rate": 4.253934317778318e-06, "loss": 0.3002, "step": 11737 }, { "epoch": 0.5498664917787043, "grad_norm": 0.5813258353928653, "learning_rate": 4.253799168628081e-06, "loss": 0.2693, "step": 11738 }, { "epoch": 0.5499133367686326, "grad_norm": 0.6508312904854089, "learning_rate": 4.253664009385171e-06, "loss": 0.3028, "step": 11739 }, { "epoch": 0.5499601817585609, "grad_norm": 0.6062937619126075, "learning_rate": 4.253528840050365e-06, "loss": 0.2882, "step": 11740 }, { "epoch": 0.5500070267484892, "grad_norm": 0.612967220535609, "learning_rate": 4.25339366062444e-06, "loss": 0.2853, "step": 11741 }, { "epoch": 0.5500538717384176, "grad_norm": 0.5845423902267031, "learning_rate": 4.253258471108177e-06, "loss": 0.292, "step": 11742 }, { "epoch": 0.5501007167283459, "grad_norm": 0.641285451643179, "learning_rate": 4.253123271502352e-06, "loss": 0.2986, "step": 11743 }, { "epoch": 0.5501475617182743, "grad_norm": 0.5915961482166875, "learning_rate": 4.252988061807742e-06, "loss": 0.2952, "step": 11744 }, { "epoch": 0.5501944067082025, "grad_norm": 0.5905202558756267, "learning_rate": 4.2528528420251275e-06, "loss": 0.2852, "step": 11745 }, { "epoch": 0.5502412516981309, "grad_norm": 0.5889987992461758, "learning_rate": 4.252717612155285e-06, "loss": 0.272, "step": 11746 }, { "epoch": 0.5502880966880592, "grad_norm": 0.6075589319179485, "learning_rate": 4.252582372198993e-06, "loss": 0.3, "step": 11747 }, { "epoch": 0.5503349416779876, "grad_norm": 0.5909466745915504, "learning_rate": 4.25244712215703e-06, "loss": 0.2764, "step": 11748 }, { "epoch": 0.5503817866679158, "grad_norm": 0.6052358785310732, "learning_rate": 4.252311862030175e-06, "loss": 0.2772, "step": 11749 }, { "epoch": 0.5504286316578442, "grad_norm": 0.5958512556015368, "learning_rate": 4.2521765918192046e-06, "loss": 0.2747, "step": 11750 }, { "epoch": 0.5504754766477725, "grad_norm": 0.592303449884679, "learning_rate": 4.252041311524899e-06, "loss": 0.2829, "step": 11751 }, { "epoch": 0.5505223216377009, "grad_norm": 0.6171470932616911, "learning_rate": 4.2519060211480356e-06, "loss": 0.3009, "step": 11752 }, { "epoch": 0.5505691666276292, "grad_norm": 0.6060070465885754, "learning_rate": 4.2517707206893935e-06, "loss": 0.2786, "step": 11753 }, { "epoch": 0.5506160116175575, "grad_norm": 0.557552928504734, "learning_rate": 4.251635410149752e-06, "loss": 0.2786, "step": 11754 }, { "epoch": 0.5506628566074858, "grad_norm": 0.6219817695604295, "learning_rate": 4.251500089529887e-06, "loss": 0.3027, "step": 11755 }, { "epoch": 0.5507097015974142, "grad_norm": 0.6165941671208828, "learning_rate": 4.251364758830581e-06, "loss": 0.2768, "step": 11756 }, { "epoch": 0.5507565465873425, "grad_norm": 0.5885079316541417, "learning_rate": 4.251229418052611e-06, "loss": 0.2616, "step": 11757 }, { "epoch": 0.5508033915772708, "grad_norm": 0.6135189278330992, "learning_rate": 4.251094067196754e-06, "loss": 0.2742, "step": 11758 }, { "epoch": 0.5508502365671991, "grad_norm": 0.6696294471329368, "learning_rate": 4.250958706263793e-06, "loss": 0.3017, "step": 11759 }, { "epoch": 0.5508970815571275, "grad_norm": 0.6190074964273172, "learning_rate": 4.250823335254503e-06, "loss": 0.2694, "step": 11760 }, { "epoch": 0.5509439265470558, "grad_norm": 0.6078691482177216, "learning_rate": 4.250687954169666e-06, "loss": 0.2876, "step": 11761 }, { "epoch": 0.5509907715369842, "grad_norm": 0.6153220464442731, "learning_rate": 4.250552563010059e-06, "loss": 0.2859, "step": 11762 }, { "epoch": 0.5510376165269124, "grad_norm": 0.5727735996484913, "learning_rate": 4.250417161776462e-06, "loss": 0.284, "step": 11763 }, { "epoch": 0.5510844615168408, "grad_norm": 0.5719086127289769, "learning_rate": 4.250281750469655e-06, "loss": 0.2706, "step": 11764 }, { "epoch": 0.5511313065067691, "grad_norm": 0.6056102767901219, "learning_rate": 4.250146329090416e-06, "loss": 0.2986, "step": 11765 }, { "epoch": 0.5511781514966975, "grad_norm": 0.620913909539941, "learning_rate": 4.250010897639525e-06, "loss": 0.2869, "step": 11766 }, { "epoch": 0.5512249964866257, "grad_norm": 0.6040457708090654, "learning_rate": 4.24987545611776e-06, "loss": 0.2943, "step": 11767 }, { "epoch": 0.5512718414765541, "grad_norm": 0.56816259285063, "learning_rate": 4.249740004525903e-06, "loss": 0.2924, "step": 11768 }, { "epoch": 0.5513186864664824, "grad_norm": 0.6200778758755036, "learning_rate": 4.249604542864731e-06, "loss": 0.2975, "step": 11769 }, { "epoch": 0.5513655314564108, "grad_norm": 0.6399841310629862, "learning_rate": 4.249469071135025e-06, "loss": 0.2813, "step": 11770 }, { "epoch": 0.5514123764463391, "grad_norm": 0.5780305676937828, "learning_rate": 4.2493335893375645e-06, "loss": 0.2805, "step": 11771 }, { "epoch": 0.5514592214362674, "grad_norm": 0.5836427559590776, "learning_rate": 4.249198097473129e-06, "loss": 0.2922, "step": 11772 }, { "epoch": 0.5515060664261957, "grad_norm": 0.6532395960597535, "learning_rate": 4.249062595542497e-06, "loss": 0.2935, "step": 11773 }, { "epoch": 0.5515529114161241, "grad_norm": 0.5667896690467924, "learning_rate": 4.24892708354645e-06, "loss": 0.2799, "step": 11774 }, { "epoch": 0.5515997564060524, "grad_norm": 0.6037572483720367, "learning_rate": 4.2487915614857675e-06, "loss": 0.2613, "step": 11775 }, { "epoch": 0.5516466013959807, "grad_norm": 0.6226284135585737, "learning_rate": 4.248656029361229e-06, "loss": 0.3103, "step": 11776 }, { "epoch": 0.551693446385909, "grad_norm": 0.6576998335509239, "learning_rate": 4.248520487173615e-06, "loss": 0.2877, "step": 11777 }, { "epoch": 0.5517402913758374, "grad_norm": 0.588993568956143, "learning_rate": 4.248384934923704e-06, "loss": 0.2645, "step": 11778 }, { "epoch": 0.5517871363657657, "grad_norm": 0.624701201641651, "learning_rate": 4.248249372612278e-06, "loss": 0.2931, "step": 11779 }, { "epoch": 0.5518339813556941, "grad_norm": 0.571162827823553, "learning_rate": 4.248113800240115e-06, "loss": 0.2675, "step": 11780 }, { "epoch": 0.5518808263456223, "grad_norm": 0.6091362664051984, "learning_rate": 4.247978217807999e-06, "loss": 0.2741, "step": 11781 }, { "epoch": 0.5519276713355507, "grad_norm": 0.6010341953302663, "learning_rate": 4.247842625316706e-06, "loss": 0.2923, "step": 11782 }, { "epoch": 0.551974516325479, "grad_norm": 0.5724759836330141, "learning_rate": 4.247707022767017e-06, "loss": 0.28, "step": 11783 }, { "epoch": 0.5520213613154074, "grad_norm": 0.600648676904622, "learning_rate": 4.247571410159716e-06, "loss": 0.2965, "step": 11784 }, { "epoch": 0.5520682063053356, "grad_norm": 0.5601146603308288, "learning_rate": 4.247435787495578e-06, "loss": 0.2862, "step": 11785 }, { "epoch": 0.552115051295264, "grad_norm": 0.6008096529623764, "learning_rate": 4.247300154775388e-06, "loss": 0.3066, "step": 11786 }, { "epoch": 0.5521618962851923, "grad_norm": 0.5181780530917934, "learning_rate": 4.2471645119999235e-06, "loss": 0.2547, "step": 11787 }, { "epoch": 0.5522087412751207, "grad_norm": 0.6600236501383026, "learning_rate": 4.247028859169967e-06, "loss": 0.2974, "step": 11788 }, { "epoch": 0.552255586265049, "grad_norm": 0.6259914183176974, "learning_rate": 4.246893196286299e-06, "loss": 0.3018, "step": 11789 }, { "epoch": 0.5523024312549772, "grad_norm": 0.5864564535125284, "learning_rate": 4.2467575233497e-06, "loss": 0.2736, "step": 11790 }, { "epoch": 0.5523492762449056, "grad_norm": 0.5483634774965214, "learning_rate": 4.246621840360949e-06, "loss": 0.2773, "step": 11791 }, { "epoch": 0.552396121234834, "grad_norm": 0.5994606267025662, "learning_rate": 4.2464861473208296e-06, "loss": 0.279, "step": 11792 }, { "epoch": 0.5524429662247623, "grad_norm": 0.6054001360675931, "learning_rate": 4.246350444230121e-06, "loss": 0.3121, "step": 11793 }, { "epoch": 0.5524898112146905, "grad_norm": 0.5843353625968613, "learning_rate": 4.2462147310896045e-06, "loss": 0.284, "step": 11794 }, { "epoch": 0.5525366562046189, "grad_norm": 0.5836027853130115, "learning_rate": 4.246079007900061e-06, "loss": 0.2886, "step": 11795 }, { "epoch": 0.5525835011945472, "grad_norm": 0.6272019711082212, "learning_rate": 4.245943274662272e-06, "loss": 0.2926, "step": 11796 }, { "epoch": 0.5526303461844756, "grad_norm": 0.5355178718903468, "learning_rate": 4.245807531377017e-06, "loss": 0.2774, "step": 11797 }, { "epoch": 0.552677191174404, "grad_norm": 0.6023823699149768, "learning_rate": 4.2456717780450805e-06, "loss": 0.29, "step": 11798 }, { "epoch": 0.5527240361643322, "grad_norm": 0.5850043195148666, "learning_rate": 4.245536014667241e-06, "loss": 0.283, "step": 11799 }, { "epoch": 0.5527708811542605, "grad_norm": 0.6361631846871126, "learning_rate": 4.24540024124428e-06, "loss": 0.3026, "step": 11800 }, { "epoch": 0.5528177261441889, "grad_norm": 0.6974080004164418, "learning_rate": 4.245264457776978e-06, "loss": 0.2911, "step": 11801 }, { "epoch": 0.5528645711341172, "grad_norm": 0.564059272056094, "learning_rate": 4.245128664266121e-06, "loss": 0.2719, "step": 11802 }, { "epoch": 0.5529114161240455, "grad_norm": 0.5565920610144767, "learning_rate": 4.244992860712484e-06, "loss": 0.2791, "step": 11803 }, { "epoch": 0.5529582611139738, "grad_norm": 0.5999622668184771, "learning_rate": 4.244857047116853e-06, "loss": 0.2752, "step": 11804 }, { "epoch": 0.5530051061039022, "grad_norm": 0.5932771952117023, "learning_rate": 4.244721223480008e-06, "loss": 0.2924, "step": 11805 }, { "epoch": 0.5530519510938305, "grad_norm": 0.6275284289150753, "learning_rate": 4.244585389802731e-06, "loss": 0.2701, "step": 11806 }, { "epoch": 0.5530987960837589, "grad_norm": 0.6046393913748757, "learning_rate": 4.244449546085804e-06, "loss": 0.2885, "step": 11807 }, { "epoch": 0.5531456410736871, "grad_norm": 0.5970674858353576, "learning_rate": 4.2443136923300086e-06, "loss": 0.2809, "step": 11808 }, { "epoch": 0.5531924860636155, "grad_norm": 0.5634629095169982, "learning_rate": 4.244177828536125e-06, "loss": 0.2786, "step": 11809 }, { "epoch": 0.5532393310535438, "grad_norm": 0.6125051968776348, "learning_rate": 4.244041954704937e-06, "loss": 0.2925, "step": 11810 }, { "epoch": 0.5532861760434722, "grad_norm": 0.6047040745561364, "learning_rate": 4.243906070837226e-06, "loss": 0.2785, "step": 11811 }, { "epoch": 0.5533330210334004, "grad_norm": 0.6087456938192309, "learning_rate": 4.243770176933774e-06, "loss": 0.2681, "step": 11812 }, { "epoch": 0.5533798660233288, "grad_norm": 0.5984127200612529, "learning_rate": 4.243634272995363e-06, "loss": 0.2769, "step": 11813 }, { "epoch": 0.5534267110132571, "grad_norm": 0.6255961924378528, "learning_rate": 4.243498359022775e-06, "loss": 0.2778, "step": 11814 }, { "epoch": 0.5534735560031855, "grad_norm": 0.5769278000525402, "learning_rate": 4.243362435016791e-06, "loss": 0.2798, "step": 11815 }, { "epoch": 0.5535204009931138, "grad_norm": 0.5714665873429701, "learning_rate": 4.243226500978196e-06, "loss": 0.2734, "step": 11816 }, { "epoch": 0.5535672459830421, "grad_norm": 0.6306635999205226, "learning_rate": 4.2430905569077696e-06, "loss": 0.2938, "step": 11817 }, { "epoch": 0.5536140909729704, "grad_norm": 0.6490339607474889, "learning_rate": 4.242954602806295e-06, "loss": 0.2918, "step": 11818 }, { "epoch": 0.5536609359628988, "grad_norm": 0.6095488674216258, "learning_rate": 4.242818638674555e-06, "loss": 0.2875, "step": 11819 }, { "epoch": 0.5537077809528271, "grad_norm": 0.6212807660165001, "learning_rate": 4.242682664513332e-06, "loss": 0.2738, "step": 11820 }, { "epoch": 0.5537546259427554, "grad_norm": 0.5737675918582451, "learning_rate": 4.242546680323409e-06, "loss": 0.2608, "step": 11821 }, { "epoch": 0.5538014709326837, "grad_norm": 0.6232203401820334, "learning_rate": 4.242410686105567e-06, "loss": 0.2797, "step": 11822 }, { "epoch": 0.5538483159226121, "grad_norm": 0.5869916615683644, "learning_rate": 4.242274681860589e-06, "loss": 0.2756, "step": 11823 }, { "epoch": 0.5538951609125404, "grad_norm": 0.6352482033028841, "learning_rate": 4.242138667589258e-06, "loss": 0.2915, "step": 11824 }, { "epoch": 0.5539420059024688, "grad_norm": 0.6518840173061804, "learning_rate": 4.242002643292357e-06, "loss": 0.3071, "step": 11825 }, { "epoch": 0.553988850892397, "grad_norm": 0.5813735336988625, "learning_rate": 4.241866608970671e-06, "loss": 0.268, "step": 11826 }, { "epoch": 0.5540356958823254, "grad_norm": 0.6200251547440749, "learning_rate": 4.241730564624978e-06, "loss": 0.2748, "step": 11827 }, { "epoch": 0.5540825408722537, "grad_norm": 0.6157052749387547, "learning_rate": 4.241594510256064e-06, "loss": 0.29, "step": 11828 }, { "epoch": 0.5541293858621821, "grad_norm": 0.6067393285521747, "learning_rate": 4.241458445864711e-06, "loss": 0.2951, "step": 11829 }, { "epoch": 0.5541762308521103, "grad_norm": 0.5907472263205231, "learning_rate": 4.241322371451703e-06, "loss": 0.2756, "step": 11830 }, { "epoch": 0.5542230758420387, "grad_norm": 0.5637197742857973, "learning_rate": 4.241186287017821e-06, "loss": 0.2703, "step": 11831 }, { "epoch": 0.554269920831967, "grad_norm": 0.5966623957915175, "learning_rate": 4.241050192563851e-06, "loss": 0.283, "step": 11832 }, { "epoch": 0.5543167658218954, "grad_norm": 0.5896727781451361, "learning_rate": 4.240914088090574e-06, "loss": 0.2852, "step": 11833 }, { "epoch": 0.5543636108118237, "grad_norm": 0.6090647084167041, "learning_rate": 4.240777973598774e-06, "loss": 0.2863, "step": 11834 }, { "epoch": 0.554410455801752, "grad_norm": 0.6231513260961341, "learning_rate": 4.240641849089235e-06, "loss": 0.2743, "step": 11835 }, { "epoch": 0.5544573007916803, "grad_norm": 0.617245386289185, "learning_rate": 4.2405057145627395e-06, "loss": 0.2767, "step": 11836 }, { "epoch": 0.5545041457816087, "grad_norm": 0.6775694275402887, "learning_rate": 4.240369570020071e-06, "loss": 0.3304, "step": 11837 }, { "epoch": 0.554550990771537, "grad_norm": 0.6118023178336666, "learning_rate": 4.240233415462013e-06, "loss": 0.3017, "step": 11838 }, { "epoch": 0.5545978357614653, "grad_norm": 0.6013716508835897, "learning_rate": 4.2400972508893484e-06, "loss": 0.2853, "step": 11839 }, { "epoch": 0.5546446807513936, "grad_norm": 0.5882764569600843, "learning_rate": 4.239961076302862e-06, "loss": 0.3047, "step": 11840 }, { "epoch": 0.554691525741322, "grad_norm": 0.6098336286070313, "learning_rate": 4.239824891703337e-06, "loss": 0.2984, "step": 11841 }, { "epoch": 0.5547383707312503, "grad_norm": 0.5899339472487407, "learning_rate": 4.239688697091557e-06, "loss": 0.2941, "step": 11842 }, { "epoch": 0.5547852157211787, "grad_norm": 0.5957124533906044, "learning_rate": 4.239552492468305e-06, "loss": 0.2761, "step": 11843 }, { "epoch": 0.5548320607111069, "grad_norm": 0.5923834181382134, "learning_rate": 4.2394162778343665e-06, "loss": 0.2676, "step": 11844 }, { "epoch": 0.5548789057010353, "grad_norm": 0.5744958533414678, "learning_rate": 4.2392800531905246e-06, "loss": 0.2801, "step": 11845 }, { "epoch": 0.5549257506909636, "grad_norm": 0.5833510323221107, "learning_rate": 4.239143818537564e-06, "loss": 0.2798, "step": 11846 }, { "epoch": 0.554972595680892, "grad_norm": 0.5732038373399011, "learning_rate": 4.2390075738762656e-06, "loss": 0.282, "step": 11847 }, { "epoch": 0.5550194406708202, "grad_norm": 0.6454729069450401, "learning_rate": 4.238871319207417e-06, "loss": 0.2997, "step": 11848 }, { "epoch": 0.5550662856607486, "grad_norm": 0.6011018350513138, "learning_rate": 4.238735054531801e-06, "loss": 0.267, "step": 11849 }, { "epoch": 0.5551131306506769, "grad_norm": 0.5303059404491207, "learning_rate": 4.238598779850201e-06, "loss": 0.2601, "step": 11850 }, { "epoch": 0.5551599756406053, "grad_norm": 0.5448569495564154, "learning_rate": 4.238462495163404e-06, "loss": 0.2634, "step": 11851 }, { "epoch": 0.5552068206305336, "grad_norm": 0.6197126586651076, "learning_rate": 4.23832620047219e-06, "loss": 0.2823, "step": 11852 }, { "epoch": 0.5552536656204619, "grad_norm": 0.5659396130211484, "learning_rate": 4.238189895777347e-06, "loss": 0.2665, "step": 11853 }, { "epoch": 0.5553005106103902, "grad_norm": 0.584763944931214, "learning_rate": 4.238053581079658e-06, "loss": 0.2775, "step": 11854 }, { "epoch": 0.5553473556003186, "grad_norm": 0.5452569332175357, "learning_rate": 4.237917256379907e-06, "loss": 0.2597, "step": 11855 }, { "epoch": 0.5553942005902469, "grad_norm": 0.6020759840433922, "learning_rate": 4.237780921678878e-06, "loss": 0.2995, "step": 11856 }, { "epoch": 0.5554410455801752, "grad_norm": 0.5666215402572602, "learning_rate": 4.237644576977358e-06, "loss": 0.2794, "step": 11857 }, { "epoch": 0.5554878905701035, "grad_norm": 0.6218922607983042, "learning_rate": 4.23750822227613e-06, "loss": 0.2887, "step": 11858 }, { "epoch": 0.5555347355600319, "grad_norm": 0.6214767297191897, "learning_rate": 4.237371857575979e-06, "loss": 0.3033, "step": 11859 }, { "epoch": 0.5555815805499602, "grad_norm": 0.5952050041999836, "learning_rate": 4.237235482877689e-06, "loss": 0.3011, "step": 11860 }, { "epoch": 0.5556284255398886, "grad_norm": 0.5908992269528512, "learning_rate": 4.2370990981820456e-06, "loss": 0.289, "step": 11861 }, { "epoch": 0.5556752705298168, "grad_norm": 0.5818476884701473, "learning_rate": 4.236962703489834e-06, "loss": 0.2963, "step": 11862 }, { "epoch": 0.5557221155197452, "grad_norm": 0.5442320252770414, "learning_rate": 4.236826298801838e-06, "loss": 0.2776, "step": 11863 }, { "epoch": 0.5557689605096735, "grad_norm": 0.6051555927699375, "learning_rate": 4.236689884118844e-06, "loss": 0.2754, "step": 11864 }, { "epoch": 0.5558158054996019, "grad_norm": 0.6404687814039729, "learning_rate": 4.2365534594416354e-06, "loss": 0.2981, "step": 11865 }, { "epoch": 0.5558626504895301, "grad_norm": 0.6077079790044163, "learning_rate": 4.236417024770999e-06, "loss": 0.2872, "step": 11866 }, { "epoch": 0.5559094954794584, "grad_norm": 0.5599597472254959, "learning_rate": 4.236280580107718e-06, "loss": 0.2736, "step": 11867 }, { "epoch": 0.5559563404693868, "grad_norm": 0.6656348570099866, "learning_rate": 4.236144125452579e-06, "loss": 0.2905, "step": 11868 }, { "epoch": 0.5560031854593152, "grad_norm": 0.5563384013665786, "learning_rate": 4.236007660806367e-06, "loss": 0.277, "step": 11869 }, { "epoch": 0.5560500304492435, "grad_norm": 0.5730636520093183, "learning_rate": 4.235871186169867e-06, "loss": 0.273, "step": 11870 }, { "epoch": 0.5560968754391717, "grad_norm": 0.5843537082888417, "learning_rate": 4.235734701543866e-06, "loss": 0.2924, "step": 11871 }, { "epoch": 0.5561437204291001, "grad_norm": 0.5612740841745125, "learning_rate": 4.235598206929147e-06, "loss": 0.2923, "step": 11872 }, { "epoch": 0.5561905654190284, "grad_norm": 0.5796424073811602, "learning_rate": 4.235461702326496e-06, "loss": 0.3023, "step": 11873 }, { "epoch": 0.5562374104089568, "grad_norm": 0.5727949316187883, "learning_rate": 4.2353251877367e-06, "loss": 0.2788, "step": 11874 }, { "epoch": 0.556284255398885, "grad_norm": 0.6446261242977457, "learning_rate": 4.235188663160543e-06, "loss": 0.2895, "step": 11875 }, { "epoch": 0.5563311003888134, "grad_norm": 0.5810142125667082, "learning_rate": 4.235052128598812e-06, "loss": 0.3053, "step": 11876 }, { "epoch": 0.5563779453787417, "grad_norm": 0.5542947834061638, "learning_rate": 4.234915584052291e-06, "loss": 0.2657, "step": 11877 }, { "epoch": 0.5564247903686701, "grad_norm": 0.6101616498173441, "learning_rate": 4.234779029521769e-06, "loss": 0.2987, "step": 11878 }, { "epoch": 0.5564716353585984, "grad_norm": 0.6149517968696664, "learning_rate": 4.234642465008028e-06, "loss": 0.2858, "step": 11879 }, { "epoch": 0.5565184803485267, "grad_norm": 0.552184103845372, "learning_rate": 4.234505890511855e-06, "loss": 0.2741, "step": 11880 }, { "epoch": 0.556565325338455, "grad_norm": 0.6007937754425978, "learning_rate": 4.234369306034038e-06, "loss": 0.3015, "step": 11881 }, { "epoch": 0.5566121703283834, "grad_norm": 0.5971799428798477, "learning_rate": 4.234232711575361e-06, "loss": 0.285, "step": 11882 }, { "epoch": 0.5566590153183117, "grad_norm": 0.6518969190463558, "learning_rate": 4.2340961071366105e-06, "loss": 0.2894, "step": 11883 }, { "epoch": 0.55670586030824, "grad_norm": 0.6053536793136195, "learning_rate": 4.233959492718573e-06, "loss": 0.2702, "step": 11884 }, { "epoch": 0.5567527052981683, "grad_norm": 0.6170268031170519, "learning_rate": 4.233822868322034e-06, "loss": 0.2907, "step": 11885 }, { "epoch": 0.5567995502880967, "grad_norm": 0.593822283549883, "learning_rate": 4.233686233947781e-06, "loss": 0.284, "step": 11886 }, { "epoch": 0.556846395278025, "grad_norm": 0.5769392576515358, "learning_rate": 4.233549589596599e-06, "loss": 0.2814, "step": 11887 }, { "epoch": 0.5568932402679534, "grad_norm": 0.6180446243638209, "learning_rate": 4.233412935269274e-06, "loss": 0.286, "step": 11888 }, { "epoch": 0.5569400852578816, "grad_norm": 0.7128756741566508, "learning_rate": 4.2332762709665945e-06, "loss": 0.2884, "step": 11889 }, { "epoch": 0.55698693024781, "grad_norm": 0.6172152976192902, "learning_rate": 4.233139596689345e-06, "loss": 0.3007, "step": 11890 }, { "epoch": 0.5570337752377383, "grad_norm": 0.5626913836390857, "learning_rate": 4.233002912438313e-06, "loss": 0.2579, "step": 11891 }, { "epoch": 0.5570806202276667, "grad_norm": 0.5559901619947264, "learning_rate": 4.232866218214286e-06, "loss": 0.2699, "step": 11892 }, { "epoch": 0.5571274652175949, "grad_norm": 0.6398087060066285, "learning_rate": 4.232729514018047e-06, "loss": 0.2937, "step": 11893 }, { "epoch": 0.5571743102075233, "grad_norm": 0.6720125617490268, "learning_rate": 4.232592799850387e-06, "loss": 0.3067, "step": 11894 }, { "epoch": 0.5572211551974516, "grad_norm": 0.6141517130340443, "learning_rate": 4.23245607571209e-06, "loss": 0.2762, "step": 11895 }, { "epoch": 0.55726800018738, "grad_norm": 0.6307361408101106, "learning_rate": 4.2323193416039445e-06, "loss": 0.3015, "step": 11896 }, { "epoch": 0.5573148451773083, "grad_norm": 0.5762740609783026, "learning_rate": 4.232182597526736e-06, "loss": 0.2792, "step": 11897 }, { "epoch": 0.5573616901672366, "grad_norm": 0.5613491854072882, "learning_rate": 4.232045843481252e-06, "loss": 0.2613, "step": 11898 }, { "epoch": 0.5574085351571649, "grad_norm": 0.5955385423510069, "learning_rate": 4.231909079468279e-06, "loss": 0.2947, "step": 11899 }, { "epoch": 0.5574553801470933, "grad_norm": 0.6997260402080057, "learning_rate": 4.231772305488606e-06, "loss": 0.3164, "step": 11900 }, { "epoch": 0.5575022251370216, "grad_norm": 0.6366000362849479, "learning_rate": 4.2316355215430174e-06, "loss": 0.2789, "step": 11901 }, { "epoch": 0.5575490701269499, "grad_norm": 0.6078872412508052, "learning_rate": 4.231498727632302e-06, "loss": 0.2833, "step": 11902 }, { "epoch": 0.5575959151168782, "grad_norm": 0.6049540885986665, "learning_rate": 4.2313619237572465e-06, "loss": 0.2792, "step": 11903 }, { "epoch": 0.5576427601068066, "grad_norm": 0.5983697309013865, "learning_rate": 4.231225109918639e-06, "loss": 0.2907, "step": 11904 }, { "epoch": 0.5576896050967349, "grad_norm": 0.5779116433950667, "learning_rate": 4.231088286117265e-06, "loss": 0.2699, "step": 11905 }, { "epoch": 0.5577364500866633, "grad_norm": 0.5818237132990997, "learning_rate": 4.230951452353914e-06, "loss": 0.2892, "step": 11906 }, { "epoch": 0.5577832950765915, "grad_norm": 0.59462160776867, "learning_rate": 4.2308146086293724e-06, "loss": 0.2889, "step": 11907 }, { "epoch": 0.5578301400665199, "grad_norm": 0.5711443178896521, "learning_rate": 4.230677754944427e-06, "loss": 0.2652, "step": 11908 }, { "epoch": 0.5578769850564482, "grad_norm": 0.554338268219613, "learning_rate": 4.230540891299867e-06, "loss": 0.2871, "step": 11909 }, { "epoch": 0.5579238300463766, "grad_norm": 0.6387357101681456, "learning_rate": 4.230404017696479e-06, "loss": 0.3027, "step": 11910 }, { "epoch": 0.5579706750363048, "grad_norm": 0.6310467171737274, "learning_rate": 4.23026713413505e-06, "loss": 0.3081, "step": 11911 }, { "epoch": 0.5580175200262332, "grad_norm": 0.5934151163653232, "learning_rate": 4.230130240616369e-06, "loss": 0.2874, "step": 11912 }, { "epoch": 0.5580643650161615, "grad_norm": 0.609967133669874, "learning_rate": 4.2299933371412224e-06, "loss": 0.2829, "step": 11913 }, { "epoch": 0.5581112100060899, "grad_norm": 0.5642491512513048, "learning_rate": 4.2298564237104e-06, "loss": 0.2641, "step": 11914 }, { "epoch": 0.5581580549960182, "grad_norm": 0.5995452188102686, "learning_rate": 4.229719500324689e-06, "loss": 0.3181, "step": 11915 }, { "epoch": 0.5582048999859465, "grad_norm": 0.6152505399715511, "learning_rate": 4.229582566984876e-06, "loss": 0.3032, "step": 11916 }, { "epoch": 0.5582517449758748, "grad_norm": 0.6246044961336177, "learning_rate": 4.229445623691751e-06, "loss": 0.2937, "step": 11917 }, { "epoch": 0.5582985899658032, "grad_norm": 0.6459942244894948, "learning_rate": 4.229308670446101e-06, "loss": 0.2776, "step": 11918 }, { "epoch": 0.5583454349557315, "grad_norm": 0.7229316391511911, "learning_rate": 4.2291717072487135e-06, "loss": 0.2818, "step": 11919 }, { "epoch": 0.5583922799456598, "grad_norm": 0.633935001701916, "learning_rate": 4.229034734100378e-06, "loss": 0.3135, "step": 11920 }, { "epoch": 0.5584391249355881, "grad_norm": 0.6230699546467635, "learning_rate": 4.228897751001882e-06, "loss": 0.2918, "step": 11921 }, { "epoch": 0.5584859699255165, "grad_norm": 0.5701113555027454, "learning_rate": 4.228760757954014e-06, "loss": 0.2912, "step": 11922 }, { "epoch": 0.5585328149154448, "grad_norm": 0.6574145085633566, "learning_rate": 4.228623754957562e-06, "loss": 0.3109, "step": 11923 }, { "epoch": 0.5585796599053732, "grad_norm": 0.6255858638766344, "learning_rate": 4.228486742013316e-06, "loss": 0.2955, "step": 11924 }, { "epoch": 0.5586265048953014, "grad_norm": 0.5753747467692971, "learning_rate": 4.228349719122062e-06, "loss": 0.254, "step": 11925 }, { "epoch": 0.5586733498852298, "grad_norm": 0.6235507449425839, "learning_rate": 4.2282126862845905e-06, "loss": 0.2956, "step": 11926 }, { "epoch": 0.5587201948751581, "grad_norm": 0.617513647934462, "learning_rate": 4.228075643501689e-06, "loss": 0.2757, "step": 11927 }, { "epoch": 0.5587670398650865, "grad_norm": 0.642010690157537, "learning_rate": 4.227938590774146e-06, "loss": 0.307, "step": 11928 }, { "epoch": 0.5588138848550147, "grad_norm": 0.5847883486152392, "learning_rate": 4.227801528102751e-06, "loss": 0.3168, "step": 11929 }, { "epoch": 0.5588607298449431, "grad_norm": 0.593658930357267, "learning_rate": 4.227664455488293e-06, "loss": 0.3013, "step": 11930 }, { "epoch": 0.5589075748348714, "grad_norm": 0.5972001450220603, "learning_rate": 4.2275273729315604e-06, "loss": 0.2811, "step": 11931 }, { "epoch": 0.5589544198247998, "grad_norm": 0.668366827982659, "learning_rate": 4.227390280433341e-06, "loss": 0.3036, "step": 11932 }, { "epoch": 0.5590012648147281, "grad_norm": 0.7011445335976491, "learning_rate": 4.227253177994426e-06, "loss": 0.3386, "step": 11933 }, { "epoch": 0.5590481098046564, "grad_norm": 0.6523018935124035, "learning_rate": 4.227116065615602e-06, "loss": 0.2924, "step": 11934 }, { "epoch": 0.5590949547945847, "grad_norm": 0.5686669331822641, "learning_rate": 4.226978943297659e-06, "loss": 0.2595, "step": 11935 }, { "epoch": 0.5591417997845131, "grad_norm": 0.5970614855955912, "learning_rate": 4.226841811041387e-06, "loss": 0.2676, "step": 11936 }, { "epoch": 0.5591886447744414, "grad_norm": 0.5626296315135386, "learning_rate": 4.226704668847574e-06, "loss": 0.2836, "step": 11937 }, { "epoch": 0.5592354897643697, "grad_norm": 0.6080493461880928, "learning_rate": 4.22656751671701e-06, "loss": 0.2877, "step": 11938 }, { "epoch": 0.559282334754298, "grad_norm": 0.6397511820299551, "learning_rate": 4.226430354650484e-06, "loss": 0.2973, "step": 11939 }, { "epoch": 0.5593291797442264, "grad_norm": 0.6134845971063406, "learning_rate": 4.226293182648784e-06, "loss": 0.2828, "step": 11940 }, { "epoch": 0.5593760247341547, "grad_norm": 0.5972515201664699, "learning_rate": 4.226156000712702e-06, "loss": 0.2959, "step": 11941 }, { "epoch": 0.5594228697240831, "grad_norm": 0.5881076595184361, "learning_rate": 4.226018808843025e-06, "loss": 0.2843, "step": 11942 }, { "epoch": 0.5594697147140113, "grad_norm": 0.6103456021427123, "learning_rate": 4.2258816070405444e-06, "loss": 0.2907, "step": 11943 }, { "epoch": 0.5595165597039397, "grad_norm": 0.6341326435186856, "learning_rate": 4.2257443953060485e-06, "loss": 0.2739, "step": 11944 }, { "epoch": 0.559563404693868, "grad_norm": 0.6273866919652814, "learning_rate": 4.2256071736403284e-06, "loss": 0.2872, "step": 11945 }, { "epoch": 0.5596102496837964, "grad_norm": 0.6401646796462434, "learning_rate": 4.225469942044171e-06, "loss": 0.2797, "step": 11946 }, { "epoch": 0.5596570946737246, "grad_norm": 0.6024969873716852, "learning_rate": 4.2253327005183685e-06, "loss": 0.3039, "step": 11947 }, { "epoch": 0.559703939663653, "grad_norm": 0.6077746604406404, "learning_rate": 4.22519544906371e-06, "loss": 0.2934, "step": 11948 }, { "epoch": 0.5597507846535813, "grad_norm": 0.598254464668365, "learning_rate": 4.225058187680986e-06, "loss": 0.2984, "step": 11949 }, { "epoch": 0.5597976296435097, "grad_norm": 0.5706401696968798, "learning_rate": 4.224920916370984e-06, "loss": 0.2811, "step": 11950 }, { "epoch": 0.559844474633438, "grad_norm": 0.6179322539611192, "learning_rate": 4.224783635134498e-06, "loss": 0.3033, "step": 11951 }, { "epoch": 0.5598913196233662, "grad_norm": 0.6199838900924823, "learning_rate": 4.224646343972314e-06, "loss": 0.2952, "step": 11952 }, { "epoch": 0.5599381646132946, "grad_norm": 0.609437082082529, "learning_rate": 4.224509042885224e-06, "loss": 0.2846, "step": 11953 }, { "epoch": 0.559985009603223, "grad_norm": 0.5786079296191826, "learning_rate": 4.224371731874018e-06, "loss": 0.3055, "step": 11954 }, { "epoch": 0.5600318545931513, "grad_norm": 0.5610781947168174, "learning_rate": 4.224234410939487e-06, "loss": 0.2922, "step": 11955 }, { "epoch": 0.5600786995830795, "grad_norm": 0.5934409996204052, "learning_rate": 4.224097080082419e-06, "loss": 0.2776, "step": 11956 }, { "epoch": 0.5601255445730079, "grad_norm": 0.5833921558604808, "learning_rate": 4.2239597393036066e-06, "loss": 0.2915, "step": 11957 }, { "epoch": 0.5601723895629362, "grad_norm": 0.5973170584376389, "learning_rate": 4.223822388603838e-06, "loss": 0.2692, "step": 11958 }, { "epoch": 0.5602192345528646, "grad_norm": 0.6069300017266644, "learning_rate": 4.223685027983906e-06, "loss": 0.2821, "step": 11959 }, { "epoch": 0.560266079542793, "grad_norm": 0.6021627520118875, "learning_rate": 4.223547657444599e-06, "loss": 0.297, "step": 11960 }, { "epoch": 0.5603129245327212, "grad_norm": 0.57766788359473, "learning_rate": 4.22341027698671e-06, "loss": 0.2891, "step": 11961 }, { "epoch": 0.5603597695226495, "grad_norm": 0.5412855825725342, "learning_rate": 4.223272886611026e-06, "loss": 0.2639, "step": 11962 }, { "epoch": 0.5604066145125779, "grad_norm": 0.6531981041387999, "learning_rate": 4.223135486318341e-06, "loss": 0.284, "step": 11963 }, { "epoch": 0.5604534595025062, "grad_norm": 0.6146815312582291, "learning_rate": 4.222998076109444e-06, "loss": 0.3009, "step": 11964 }, { "epoch": 0.5605003044924345, "grad_norm": 0.5224972298740419, "learning_rate": 4.222860655985126e-06, "loss": 0.2744, "step": 11965 }, { "epoch": 0.5605471494823628, "grad_norm": 0.6525463709530108, "learning_rate": 4.222723225946178e-06, "loss": 0.3017, "step": 11966 }, { "epoch": 0.5605939944722912, "grad_norm": 0.6945171298347609, "learning_rate": 4.222585785993391e-06, "loss": 0.2997, "step": 11967 }, { "epoch": 0.5606408394622195, "grad_norm": 0.6006273631640078, "learning_rate": 4.222448336127556e-06, "loss": 0.2797, "step": 11968 }, { "epoch": 0.5606876844521479, "grad_norm": 0.6500840900600944, "learning_rate": 4.222310876349463e-06, "loss": 0.2954, "step": 11969 }, { "epoch": 0.5607345294420761, "grad_norm": 0.567164596586867, "learning_rate": 4.222173406659904e-06, "loss": 0.2935, "step": 11970 }, { "epoch": 0.5607813744320045, "grad_norm": 0.5994245844846093, "learning_rate": 4.22203592705967e-06, "loss": 0.2905, "step": 11971 }, { "epoch": 0.5608282194219328, "grad_norm": 0.6265229673910119, "learning_rate": 4.221898437549552e-06, "loss": 0.2967, "step": 11972 }, { "epoch": 0.5608750644118612, "grad_norm": 0.5661779091329345, "learning_rate": 4.221760938130342e-06, "loss": 0.2667, "step": 11973 }, { "epoch": 0.5609219094017894, "grad_norm": 0.6392210108103556, "learning_rate": 4.22162342880283e-06, "loss": 0.3008, "step": 11974 }, { "epoch": 0.5609687543917178, "grad_norm": 0.6266675179649811, "learning_rate": 4.2214859095678074e-06, "loss": 0.2863, "step": 11975 }, { "epoch": 0.5610155993816461, "grad_norm": 0.6411461137145228, "learning_rate": 4.221348380426067e-06, "loss": 0.3061, "step": 11976 }, { "epoch": 0.5610624443715745, "grad_norm": 0.5920166191616966, "learning_rate": 4.221210841378398e-06, "loss": 0.2881, "step": 11977 }, { "epoch": 0.5611092893615028, "grad_norm": 0.641989323700067, "learning_rate": 4.2210732924255945e-06, "loss": 0.3057, "step": 11978 }, { "epoch": 0.5611561343514311, "grad_norm": 0.5559755452719346, "learning_rate": 4.220935733568447e-06, "loss": 0.2594, "step": 11979 }, { "epoch": 0.5612029793413594, "grad_norm": 0.6105402913565718, "learning_rate": 4.220798164807746e-06, "loss": 0.2676, "step": 11980 }, { "epoch": 0.5612498243312878, "grad_norm": 0.6135333991583743, "learning_rate": 4.220660586144284e-06, "loss": 0.2869, "step": 11981 }, { "epoch": 0.5612966693212161, "grad_norm": 0.5836143513057199, "learning_rate": 4.220522997578853e-06, "loss": 0.2989, "step": 11982 }, { "epoch": 0.5613435143111444, "grad_norm": 0.5845103153427172, "learning_rate": 4.220385399112245e-06, "loss": 0.29, "step": 11983 }, { "epoch": 0.5613903593010727, "grad_norm": 0.5997586819459877, "learning_rate": 4.220247790745251e-06, "loss": 0.2947, "step": 11984 }, { "epoch": 0.5614372042910011, "grad_norm": 0.5861750581147823, "learning_rate": 4.220110172478663e-06, "loss": 0.2976, "step": 11985 }, { "epoch": 0.5614840492809294, "grad_norm": 0.5946481436702559, "learning_rate": 4.219972544313274e-06, "loss": 0.2824, "step": 11986 }, { "epoch": 0.5615308942708578, "grad_norm": 0.6050138720829259, "learning_rate": 4.219834906249875e-06, "loss": 0.2797, "step": 11987 }, { "epoch": 0.561577739260786, "grad_norm": 0.5687210814001565, "learning_rate": 4.219697258289258e-06, "loss": 0.28, "step": 11988 }, { "epoch": 0.5616245842507144, "grad_norm": 0.5683110812533422, "learning_rate": 4.219559600432216e-06, "loss": 0.2777, "step": 11989 }, { "epoch": 0.5616714292406427, "grad_norm": 0.5732518607824266, "learning_rate": 4.219421932679541e-06, "loss": 0.2865, "step": 11990 }, { "epoch": 0.5617182742305711, "grad_norm": 0.6320656802604289, "learning_rate": 4.219284255032024e-06, "loss": 0.2989, "step": 11991 }, { "epoch": 0.5617651192204993, "grad_norm": 0.6091686872046504, "learning_rate": 4.219146567490458e-06, "loss": 0.281, "step": 11992 }, { "epoch": 0.5618119642104277, "grad_norm": 0.5841590329805864, "learning_rate": 4.219008870055637e-06, "loss": 0.2773, "step": 11993 }, { "epoch": 0.561858809200356, "grad_norm": 0.5803339174708252, "learning_rate": 4.2188711627283515e-06, "loss": 0.2934, "step": 11994 }, { "epoch": 0.5619056541902844, "grad_norm": 0.5971576293553191, "learning_rate": 4.218733445509395e-06, "loss": 0.3023, "step": 11995 }, { "epoch": 0.5619524991802127, "grad_norm": 0.577488167880642, "learning_rate": 4.218595718399559e-06, "loss": 0.2727, "step": 11996 }, { "epoch": 0.561999344170141, "grad_norm": 0.6114455478561973, "learning_rate": 4.218457981399636e-06, "loss": 0.3078, "step": 11997 }, { "epoch": 0.5620461891600693, "grad_norm": 0.5847902898350618, "learning_rate": 4.21832023451042e-06, "loss": 0.282, "step": 11998 }, { "epoch": 0.5620930341499977, "grad_norm": 0.5616782271182397, "learning_rate": 4.218182477732702e-06, "loss": 0.2743, "step": 11999 }, { "epoch": 0.562139879139926, "grad_norm": 0.5901228580108312, "learning_rate": 4.218044711067277e-06, "loss": 0.2672, "step": 12000 }, { "epoch": 0.5621867241298543, "grad_norm": 0.564402255897131, "learning_rate": 4.217906934514936e-06, "loss": 0.2796, "step": 12001 }, { "epoch": 0.5622335691197826, "grad_norm": 0.592724074020255, "learning_rate": 4.217769148076472e-06, "loss": 0.2915, "step": 12002 }, { "epoch": 0.562280414109711, "grad_norm": 0.5909220361111236, "learning_rate": 4.217631351752678e-06, "loss": 0.2802, "step": 12003 }, { "epoch": 0.5623272590996393, "grad_norm": 0.5387945048973379, "learning_rate": 4.217493545544348e-06, "loss": 0.2804, "step": 12004 }, { "epoch": 0.5623741040895677, "grad_norm": 0.602725910387869, "learning_rate": 4.2173557294522735e-06, "loss": 0.2742, "step": 12005 }, { "epoch": 0.5624209490794959, "grad_norm": 0.6723925644328083, "learning_rate": 4.21721790347725e-06, "loss": 0.3083, "step": 12006 }, { "epoch": 0.5624677940694243, "grad_norm": 0.5492712370109497, "learning_rate": 4.2170800676200675e-06, "loss": 0.2737, "step": 12007 }, { "epoch": 0.5625146390593526, "grad_norm": 0.6384228856056583, "learning_rate": 4.2169422218815216e-06, "loss": 0.3071, "step": 12008 }, { "epoch": 0.562561484049281, "grad_norm": 0.6425689828125777, "learning_rate": 4.216804366262405e-06, "loss": 0.2882, "step": 12009 }, { "epoch": 0.5626083290392092, "grad_norm": 0.6416508261760712, "learning_rate": 4.21666650076351e-06, "loss": 0.2889, "step": 12010 }, { "epoch": 0.5626551740291376, "grad_norm": 0.595805397623262, "learning_rate": 4.2165286253856306e-06, "loss": 0.27, "step": 12011 }, { "epoch": 0.5627020190190659, "grad_norm": 0.5991620158014055, "learning_rate": 4.216390740129561e-06, "loss": 0.2814, "step": 12012 }, { "epoch": 0.5627488640089943, "grad_norm": 0.6216009424468765, "learning_rate": 4.216252844996094e-06, "loss": 0.2916, "step": 12013 }, { "epoch": 0.5627957089989226, "grad_norm": 0.6440905300348823, "learning_rate": 4.216114939986023e-06, "loss": 0.2986, "step": 12014 }, { "epoch": 0.5628425539888509, "grad_norm": 0.5830935994575152, "learning_rate": 4.215977025100142e-06, "loss": 0.2831, "step": 12015 }, { "epoch": 0.5628893989787792, "grad_norm": 0.5736729004234672, "learning_rate": 4.215839100339245e-06, "loss": 0.2903, "step": 12016 }, { "epoch": 0.5629362439687076, "grad_norm": 0.5946189791136816, "learning_rate": 4.215701165704125e-06, "loss": 0.2934, "step": 12017 }, { "epoch": 0.5629830889586359, "grad_norm": 0.582258919701526, "learning_rate": 4.215563221195576e-06, "loss": 0.2723, "step": 12018 }, { "epoch": 0.5630299339485642, "grad_norm": 0.6117541903423513, "learning_rate": 4.215425266814391e-06, "loss": 0.2825, "step": 12019 }, { "epoch": 0.5630767789384925, "grad_norm": 0.5998199135157658, "learning_rate": 4.215287302561365e-06, "loss": 0.2665, "step": 12020 }, { "epoch": 0.5631236239284209, "grad_norm": 0.609119193842258, "learning_rate": 4.2151493284372925e-06, "loss": 0.2954, "step": 12021 }, { "epoch": 0.5631704689183492, "grad_norm": 0.5655761016473982, "learning_rate": 4.215011344442966e-06, "loss": 0.2835, "step": 12022 }, { "epoch": 0.5632173139082776, "grad_norm": 0.583889690324393, "learning_rate": 4.214873350579181e-06, "loss": 0.2757, "step": 12023 }, { "epoch": 0.5632641588982058, "grad_norm": 0.5838301787869944, "learning_rate": 4.21473534684673e-06, "loss": 0.2752, "step": 12024 }, { "epoch": 0.5633110038881342, "grad_norm": 0.5937730734219502, "learning_rate": 4.2145973332464085e-06, "loss": 0.2968, "step": 12025 }, { "epoch": 0.5633578488780625, "grad_norm": 0.554454643449964, "learning_rate": 4.21445930977901e-06, "loss": 0.2765, "step": 12026 }, { "epoch": 0.5634046938679909, "grad_norm": 0.6035034591655462, "learning_rate": 4.214321276445329e-06, "loss": 0.2897, "step": 12027 }, { "epoch": 0.5634515388579191, "grad_norm": 0.6248830584132856, "learning_rate": 4.21418323324616e-06, "loss": 0.3039, "step": 12028 }, { "epoch": 0.5634983838478475, "grad_norm": 0.6095982056959829, "learning_rate": 4.214045180182299e-06, "loss": 0.2906, "step": 12029 }, { "epoch": 0.5635452288377758, "grad_norm": 0.6503106640591034, "learning_rate": 4.213907117254537e-06, "loss": 0.2957, "step": 12030 }, { "epoch": 0.5635920738277042, "grad_norm": 0.5910404909729781, "learning_rate": 4.213769044463671e-06, "loss": 0.271, "step": 12031 }, { "epoch": 0.5636389188176325, "grad_norm": 0.6078195237143245, "learning_rate": 4.213630961810494e-06, "loss": 0.3039, "step": 12032 }, { "epoch": 0.5636857638075607, "grad_norm": 0.6441318450077093, "learning_rate": 4.213492869295802e-06, "loss": 0.3043, "step": 12033 }, { "epoch": 0.5637326087974891, "grad_norm": 0.5461900857365841, "learning_rate": 4.21335476692039e-06, "loss": 0.2804, "step": 12034 }, { "epoch": 0.5637794537874175, "grad_norm": 0.6859747978276001, "learning_rate": 4.213216654685052e-06, "loss": 0.2956, "step": 12035 }, { "epoch": 0.5638262987773458, "grad_norm": 0.5698243367715301, "learning_rate": 4.213078532590581e-06, "loss": 0.2636, "step": 12036 }, { "epoch": 0.563873143767274, "grad_norm": 0.6264863139609081, "learning_rate": 4.212940400637775e-06, "loss": 0.2907, "step": 12037 }, { "epoch": 0.5639199887572024, "grad_norm": 0.6163809236753535, "learning_rate": 4.212802258827428e-06, "loss": 0.2868, "step": 12038 }, { "epoch": 0.5639668337471307, "grad_norm": 0.5788000562412272, "learning_rate": 4.212664107160333e-06, "loss": 0.2952, "step": 12039 }, { "epoch": 0.5640136787370591, "grad_norm": 0.5836887250911216, "learning_rate": 4.2125259456372876e-06, "loss": 0.2764, "step": 12040 }, { "epoch": 0.5640605237269875, "grad_norm": 0.6090861951822553, "learning_rate": 4.212387774259085e-06, "loss": 0.2803, "step": 12041 }, { "epoch": 0.5641073687169157, "grad_norm": 0.626574043370508, "learning_rate": 4.212249593026521e-06, "loss": 0.2766, "step": 12042 }, { "epoch": 0.564154213706844, "grad_norm": 0.5997053810450047, "learning_rate": 4.212111401940392e-06, "loss": 0.2732, "step": 12043 }, { "epoch": 0.5642010586967724, "grad_norm": 0.6273023501059816, "learning_rate": 4.211973201001492e-06, "loss": 0.2859, "step": 12044 }, { "epoch": 0.5642479036867007, "grad_norm": 0.5825599116849476, "learning_rate": 4.211834990210616e-06, "loss": 0.2864, "step": 12045 }, { "epoch": 0.564294748676629, "grad_norm": 0.6643339162991189, "learning_rate": 4.211696769568561e-06, "loss": 0.2832, "step": 12046 }, { "epoch": 0.5643415936665573, "grad_norm": 0.5565085836949232, "learning_rate": 4.2115585390761196e-06, "loss": 0.2726, "step": 12047 }, { "epoch": 0.5643884386564857, "grad_norm": 0.6041571696034077, "learning_rate": 4.21142029873409e-06, "loss": 0.2932, "step": 12048 }, { "epoch": 0.564435283646414, "grad_norm": 0.5621747621357598, "learning_rate": 4.211282048543267e-06, "loss": 0.2919, "step": 12049 }, { "epoch": 0.5644821286363424, "grad_norm": 0.6320428220943003, "learning_rate": 4.211143788504446e-06, "loss": 0.2887, "step": 12050 }, { "epoch": 0.5645289736262706, "grad_norm": 0.5593120629309108, "learning_rate": 4.211005518618421e-06, "loss": 0.2865, "step": 12051 }, { "epoch": 0.564575818616199, "grad_norm": 0.6510647318633547, "learning_rate": 4.21086723888599e-06, "loss": 0.3179, "step": 12052 }, { "epoch": 0.5646226636061273, "grad_norm": 0.572541117731483, "learning_rate": 4.21072894930795e-06, "loss": 0.2787, "step": 12053 }, { "epoch": 0.5646695085960557, "grad_norm": 0.6116875344739365, "learning_rate": 4.210590649885092e-06, "loss": 0.2962, "step": 12054 }, { "epoch": 0.5647163535859839, "grad_norm": 0.639259473475011, "learning_rate": 4.210452340618216e-06, "loss": 0.2951, "step": 12055 }, { "epoch": 0.5647631985759123, "grad_norm": 0.5626816333500627, "learning_rate": 4.210314021508117e-06, "loss": 0.2718, "step": 12056 }, { "epoch": 0.5648100435658406, "grad_norm": 0.6260886495098912, "learning_rate": 4.21017569255559e-06, "loss": 0.2876, "step": 12057 }, { "epoch": 0.564856888555769, "grad_norm": 0.6128127251454728, "learning_rate": 4.2100373537614324e-06, "loss": 0.2753, "step": 12058 }, { "epoch": 0.5649037335456973, "grad_norm": 0.5995574886604305, "learning_rate": 4.20989900512644e-06, "loss": 0.2842, "step": 12059 }, { "epoch": 0.5649505785356256, "grad_norm": 0.5758584357289974, "learning_rate": 4.209760646651407e-06, "loss": 0.2775, "step": 12060 }, { "epoch": 0.5649974235255539, "grad_norm": 0.5978764671741394, "learning_rate": 4.2096222783371335e-06, "loss": 0.2939, "step": 12061 }, { "epoch": 0.5650442685154823, "grad_norm": 0.6221609965196179, "learning_rate": 4.2094839001844115e-06, "loss": 0.2925, "step": 12062 }, { "epoch": 0.5650911135054106, "grad_norm": 0.6176767388946308, "learning_rate": 4.20934551219404e-06, "loss": 0.2971, "step": 12063 }, { "epoch": 0.5651379584953389, "grad_norm": 0.6265445771802897, "learning_rate": 4.209207114366815e-06, "loss": 0.2962, "step": 12064 }, { "epoch": 0.5651848034852672, "grad_norm": 0.5802077530910655, "learning_rate": 4.209068706703532e-06, "loss": 0.2894, "step": 12065 }, { "epoch": 0.5652316484751956, "grad_norm": 0.6362313719411967, "learning_rate": 4.2089302892049885e-06, "loss": 0.2969, "step": 12066 }, { "epoch": 0.5652784934651239, "grad_norm": 0.6174720044158343, "learning_rate": 4.208791861871981e-06, "loss": 0.2927, "step": 12067 }, { "epoch": 0.5653253384550523, "grad_norm": 0.6194491284899802, "learning_rate": 4.208653424705305e-06, "loss": 0.3014, "step": 12068 }, { "epoch": 0.5653721834449805, "grad_norm": 0.5677639709222443, "learning_rate": 4.208514977705759e-06, "loss": 0.2782, "step": 12069 }, { "epoch": 0.5654190284349089, "grad_norm": 0.5600458816228026, "learning_rate": 4.208376520874138e-06, "loss": 0.2652, "step": 12070 }, { "epoch": 0.5654658734248372, "grad_norm": 0.6331038038135578, "learning_rate": 4.20823805421124e-06, "loss": 0.3056, "step": 12071 }, { "epoch": 0.5655127184147656, "grad_norm": 0.5388502407858813, "learning_rate": 4.2080995777178605e-06, "loss": 0.274, "step": 12072 }, { "epoch": 0.5655595634046938, "grad_norm": 0.5623159038530586, "learning_rate": 4.2079610913947984e-06, "loss": 0.2772, "step": 12073 }, { "epoch": 0.5656064083946222, "grad_norm": 0.5957882022742068, "learning_rate": 4.207822595242848e-06, "loss": 0.2951, "step": 12074 }, { "epoch": 0.5656532533845505, "grad_norm": 0.581627769231528, "learning_rate": 4.207684089262809e-06, "loss": 0.2734, "step": 12075 }, { "epoch": 0.5657000983744789, "grad_norm": 0.5821564404408845, "learning_rate": 4.207545573455477e-06, "loss": 0.298, "step": 12076 }, { "epoch": 0.5657469433644072, "grad_norm": 0.6153507168799582, "learning_rate": 4.20740704782165e-06, "loss": 0.2918, "step": 12077 }, { "epoch": 0.5657937883543355, "grad_norm": 0.5850510343750432, "learning_rate": 4.207268512362124e-06, "loss": 0.2838, "step": 12078 }, { "epoch": 0.5658406333442638, "grad_norm": 0.5826269871240513, "learning_rate": 4.2071299670776965e-06, "loss": 0.293, "step": 12079 }, { "epoch": 0.5658874783341922, "grad_norm": 0.5834323594187943, "learning_rate": 4.206991411969166e-06, "loss": 0.2757, "step": 12080 }, { "epoch": 0.5659343233241205, "grad_norm": 0.5740187040661489, "learning_rate": 4.206852847037328e-06, "loss": 0.2765, "step": 12081 }, { "epoch": 0.5659811683140488, "grad_norm": 0.6072846334714361, "learning_rate": 4.206714272282981e-06, "loss": 0.2505, "step": 12082 }, { "epoch": 0.5660280133039771, "grad_norm": 0.6379793261064401, "learning_rate": 4.206575687706923e-06, "loss": 0.2915, "step": 12083 }, { "epoch": 0.5660748582939055, "grad_norm": 0.5453809912746578, "learning_rate": 4.20643709330995e-06, "loss": 0.2742, "step": 12084 }, { "epoch": 0.5661217032838338, "grad_norm": 0.5746227302845585, "learning_rate": 4.206298489092861e-06, "loss": 0.2877, "step": 12085 }, { "epoch": 0.5661685482737622, "grad_norm": 0.5747933964044107, "learning_rate": 4.206159875056453e-06, "loss": 0.277, "step": 12086 }, { "epoch": 0.5662153932636904, "grad_norm": 0.612149297312845, "learning_rate": 4.206021251201524e-06, "loss": 0.2878, "step": 12087 }, { "epoch": 0.5662622382536188, "grad_norm": 0.6168909128008423, "learning_rate": 4.205882617528871e-06, "loss": 0.2558, "step": 12088 }, { "epoch": 0.5663090832435471, "grad_norm": 0.5873451552402406, "learning_rate": 4.205743974039293e-06, "loss": 0.2917, "step": 12089 }, { "epoch": 0.5663559282334755, "grad_norm": 0.558237087148972, "learning_rate": 4.2056053207335854e-06, "loss": 0.2595, "step": 12090 }, { "epoch": 0.5664027732234037, "grad_norm": 0.6593196577640975, "learning_rate": 4.20546665761255e-06, "loss": 0.2928, "step": 12091 }, { "epoch": 0.5664496182133321, "grad_norm": 0.5817910008144227, "learning_rate": 4.205327984676981e-06, "loss": 0.263, "step": 12092 }, { "epoch": 0.5664964632032604, "grad_norm": 0.5690418797536279, "learning_rate": 4.205189301927679e-06, "loss": 0.2745, "step": 12093 }, { "epoch": 0.5665433081931888, "grad_norm": 0.623837726233399, "learning_rate": 4.205050609365441e-06, "loss": 0.2996, "step": 12094 }, { "epoch": 0.5665901531831171, "grad_norm": 0.5730523803019897, "learning_rate": 4.204911906991065e-06, "loss": 0.2699, "step": 12095 }, { "epoch": 0.5666369981730454, "grad_norm": 0.5706847091460241, "learning_rate": 4.2047731948053495e-06, "loss": 0.2796, "step": 12096 }, { "epoch": 0.5666838431629737, "grad_norm": 0.5560643191214965, "learning_rate": 4.2046344728090935e-06, "loss": 0.2742, "step": 12097 }, { "epoch": 0.5667306881529021, "grad_norm": 0.5799613507555297, "learning_rate": 4.204495741003094e-06, "loss": 0.259, "step": 12098 }, { "epoch": 0.5667775331428304, "grad_norm": 0.6527792297863908, "learning_rate": 4.20435699938815e-06, "loss": 0.314, "step": 12099 }, { "epoch": 0.5668243781327587, "grad_norm": 0.5560854106488304, "learning_rate": 4.2042182479650595e-06, "loss": 0.2669, "step": 12100 }, { "epoch": 0.566871223122687, "grad_norm": 0.5799297498708154, "learning_rate": 4.204079486734621e-06, "loss": 0.294, "step": 12101 }, { "epoch": 0.5669180681126154, "grad_norm": 0.5847948511530854, "learning_rate": 4.203940715697634e-06, "loss": 0.28, "step": 12102 }, { "epoch": 0.5669649131025437, "grad_norm": 0.6310572346329509, "learning_rate": 4.203801934854897e-06, "loss": 0.3047, "step": 12103 }, { "epoch": 0.5670117580924721, "grad_norm": 0.5718615645437191, "learning_rate": 4.203663144207207e-06, "loss": 0.2847, "step": 12104 }, { "epoch": 0.5670586030824003, "grad_norm": 0.6271245848801077, "learning_rate": 4.203524343755364e-06, "loss": 0.3068, "step": 12105 }, { "epoch": 0.5671054480723287, "grad_norm": 0.5246337667356075, "learning_rate": 4.203385533500167e-06, "loss": 0.2557, "step": 12106 }, { "epoch": 0.567152293062257, "grad_norm": 0.5476727405673213, "learning_rate": 4.203246713442415e-06, "loss": 0.2683, "step": 12107 }, { "epoch": 0.5671991380521854, "grad_norm": 0.6613307597611414, "learning_rate": 4.2031078835829056e-06, "loss": 0.2776, "step": 12108 }, { "epoch": 0.5672459830421136, "grad_norm": 0.5916556005191546, "learning_rate": 4.202969043922438e-06, "loss": 0.2758, "step": 12109 }, { "epoch": 0.567292828032042, "grad_norm": 0.638086405587034, "learning_rate": 4.2028301944618115e-06, "loss": 0.2915, "step": 12110 }, { "epoch": 0.5673396730219703, "grad_norm": 0.6368331521469022, "learning_rate": 4.202691335201826e-06, "loss": 0.3028, "step": 12111 }, { "epoch": 0.5673865180118987, "grad_norm": 0.6431741653699571, "learning_rate": 4.202552466143279e-06, "loss": 0.2857, "step": 12112 }, { "epoch": 0.567433363001827, "grad_norm": 0.5717658666554929, "learning_rate": 4.2024135872869706e-06, "loss": 0.2863, "step": 12113 }, { "epoch": 0.5674802079917552, "grad_norm": 0.6670990475386579, "learning_rate": 4.202274698633701e-06, "loss": 0.3126, "step": 12114 }, { "epoch": 0.5675270529816836, "grad_norm": 0.5507198584015284, "learning_rate": 4.202135800184267e-06, "loss": 0.2929, "step": 12115 }, { "epoch": 0.567573897971612, "grad_norm": 0.6166406842294039, "learning_rate": 4.20199689193947e-06, "loss": 0.2765, "step": 12116 }, { "epoch": 0.5676207429615403, "grad_norm": 0.5897944392320444, "learning_rate": 4.201857973900108e-06, "loss": 0.3109, "step": 12117 }, { "epoch": 0.5676675879514685, "grad_norm": 0.5728666066750548, "learning_rate": 4.201719046066982e-06, "loss": 0.2865, "step": 12118 }, { "epoch": 0.5677144329413969, "grad_norm": 0.5908223311833627, "learning_rate": 4.20158010844089e-06, "loss": 0.2989, "step": 12119 }, { "epoch": 0.5677612779313252, "grad_norm": 0.620501452025624, "learning_rate": 4.2014411610226326e-06, "loss": 0.2856, "step": 12120 }, { "epoch": 0.5678081229212536, "grad_norm": 0.5719897801568399, "learning_rate": 4.201302203813009e-06, "loss": 0.2706, "step": 12121 }, { "epoch": 0.567854967911182, "grad_norm": 0.6347882399907402, "learning_rate": 4.2011632368128185e-06, "loss": 0.2833, "step": 12122 }, { "epoch": 0.5679018129011102, "grad_norm": 0.6067138699526179, "learning_rate": 4.201024260022861e-06, "loss": 0.2774, "step": 12123 }, { "epoch": 0.5679486578910385, "grad_norm": 0.5392985298470457, "learning_rate": 4.200885273443937e-06, "loss": 0.2677, "step": 12124 }, { "epoch": 0.5679955028809669, "grad_norm": 0.6491893848253489, "learning_rate": 4.200746277076845e-06, "loss": 0.3127, "step": 12125 }, { "epoch": 0.5680423478708952, "grad_norm": 0.6085659461509111, "learning_rate": 4.200607270922385e-06, "loss": 0.293, "step": 12126 }, { "epoch": 0.5680891928608235, "grad_norm": 0.6926070668117367, "learning_rate": 4.200468254981359e-06, "loss": 0.2763, "step": 12127 }, { "epoch": 0.5681360378507518, "grad_norm": 0.5746813119867202, "learning_rate": 4.200329229254566e-06, "loss": 0.2793, "step": 12128 }, { "epoch": 0.5681828828406802, "grad_norm": 0.5820991838060257, "learning_rate": 4.200190193742805e-06, "loss": 0.2681, "step": 12129 }, { "epoch": 0.5682297278306085, "grad_norm": 0.5781211164491721, "learning_rate": 4.200051148446876e-06, "loss": 0.2663, "step": 12130 }, { "epoch": 0.5682765728205369, "grad_norm": 0.6452971354349627, "learning_rate": 4.199912093367581e-06, "loss": 0.3055, "step": 12131 }, { "epoch": 0.5683234178104651, "grad_norm": 0.6327011640204372, "learning_rate": 4.1997730285057185e-06, "loss": 0.2803, "step": 12132 }, { "epoch": 0.5683702628003935, "grad_norm": 0.5658256426589708, "learning_rate": 4.199633953862089e-06, "loss": 0.2878, "step": 12133 }, { "epoch": 0.5684171077903218, "grad_norm": 0.6644343622596185, "learning_rate": 4.199494869437494e-06, "loss": 0.2939, "step": 12134 }, { "epoch": 0.5684639527802502, "grad_norm": 0.6099692227104113, "learning_rate": 4.199355775232733e-06, "loss": 0.293, "step": 12135 }, { "epoch": 0.5685107977701784, "grad_norm": 0.5808041954046552, "learning_rate": 4.199216671248607e-06, "loss": 0.2773, "step": 12136 }, { "epoch": 0.5685576427601068, "grad_norm": 0.6210681938044145, "learning_rate": 4.199077557485916e-06, "loss": 0.305, "step": 12137 }, { "epoch": 0.5686044877500351, "grad_norm": 0.5795095011970964, "learning_rate": 4.19893843394546e-06, "loss": 0.2738, "step": 12138 }, { "epoch": 0.5686513327399635, "grad_norm": 0.6033940208384041, "learning_rate": 4.198799300628042e-06, "loss": 0.2882, "step": 12139 }, { "epoch": 0.5686981777298918, "grad_norm": 0.6002073321092848, "learning_rate": 4.1986601575344595e-06, "loss": 0.2872, "step": 12140 }, { "epoch": 0.5687450227198201, "grad_norm": 0.6172136805216718, "learning_rate": 4.198521004665515e-06, "loss": 0.306, "step": 12141 }, { "epoch": 0.5687918677097484, "grad_norm": 0.6153446703688619, "learning_rate": 4.198381842022009e-06, "loss": 0.2955, "step": 12142 }, { "epoch": 0.5688387126996768, "grad_norm": 0.6333709978285221, "learning_rate": 4.198242669604743e-06, "loss": 0.3147, "step": 12143 }, { "epoch": 0.5688855576896051, "grad_norm": 0.6056950804416076, "learning_rate": 4.198103487414516e-06, "loss": 0.2823, "step": 12144 }, { "epoch": 0.5689324026795334, "grad_norm": 0.6089008939859392, "learning_rate": 4.197964295452131e-06, "loss": 0.3077, "step": 12145 }, { "epoch": 0.5689792476694617, "grad_norm": 0.5627640141649807, "learning_rate": 4.197825093718388e-06, "loss": 0.2683, "step": 12146 }, { "epoch": 0.5690260926593901, "grad_norm": 0.5615359848802792, "learning_rate": 4.197685882214088e-06, "loss": 0.2737, "step": 12147 }, { "epoch": 0.5690729376493184, "grad_norm": 0.5867507960046912, "learning_rate": 4.197546660940034e-06, "loss": 0.2789, "step": 12148 }, { "epoch": 0.5691197826392468, "grad_norm": 0.5724677161594606, "learning_rate": 4.197407429897024e-06, "loss": 0.2834, "step": 12149 }, { "epoch": 0.569166627629175, "grad_norm": 0.6485147803737575, "learning_rate": 4.197268189085862e-06, "loss": 0.2969, "step": 12150 }, { "epoch": 0.5692134726191034, "grad_norm": 0.5875419590770491, "learning_rate": 4.197128938507348e-06, "loss": 0.2942, "step": 12151 }, { "epoch": 0.5692603176090317, "grad_norm": 0.5382604404535002, "learning_rate": 4.196989678162283e-06, "loss": 0.2812, "step": 12152 }, { "epoch": 0.5693071625989601, "grad_norm": 0.5971339279795764, "learning_rate": 4.196850408051469e-06, "loss": 0.2856, "step": 12153 }, { "epoch": 0.5693540075888883, "grad_norm": 0.6108759136193537, "learning_rate": 4.196711128175708e-06, "loss": 0.29, "step": 12154 }, { "epoch": 0.5694008525788167, "grad_norm": 0.6728590283569094, "learning_rate": 4.1965718385358e-06, "loss": 0.2861, "step": 12155 }, { "epoch": 0.569447697568745, "grad_norm": 0.5719825197980631, "learning_rate": 4.196432539132548e-06, "loss": 0.2806, "step": 12156 }, { "epoch": 0.5694945425586734, "grad_norm": 0.5226705654689578, "learning_rate": 4.196293229966753e-06, "loss": 0.2559, "step": 12157 }, { "epoch": 0.5695413875486017, "grad_norm": 0.6103049781592903, "learning_rate": 4.196153911039217e-06, "loss": 0.2958, "step": 12158 }, { "epoch": 0.56958823253853, "grad_norm": 0.5857178782182143, "learning_rate": 4.196014582350742e-06, "loss": 0.2672, "step": 12159 }, { "epoch": 0.5696350775284583, "grad_norm": 0.6490730095154984, "learning_rate": 4.195875243902127e-06, "loss": 0.3098, "step": 12160 }, { "epoch": 0.5696819225183867, "grad_norm": 0.5667423112117971, "learning_rate": 4.195735895694179e-06, "loss": 0.2773, "step": 12161 }, { "epoch": 0.569728767508315, "grad_norm": 0.5574926885109937, "learning_rate": 4.195596537727697e-06, "loss": 0.261, "step": 12162 }, { "epoch": 0.5697756124982433, "grad_norm": 0.5903004441896195, "learning_rate": 4.195457170003481e-06, "loss": 0.2919, "step": 12163 }, { "epoch": 0.5698224574881716, "grad_norm": 0.5698564916909928, "learning_rate": 4.195317792522337e-06, "loss": 0.2515, "step": 12164 }, { "epoch": 0.5698693024781, "grad_norm": 0.5821450793156527, "learning_rate": 4.195178405285064e-06, "loss": 0.311, "step": 12165 }, { "epoch": 0.5699161474680283, "grad_norm": 0.6044171903415401, "learning_rate": 4.195039008292466e-06, "loss": 0.28, "step": 12166 }, { "epoch": 0.5699629924579567, "grad_norm": 0.5777712501604273, "learning_rate": 4.194899601545344e-06, "loss": 0.2768, "step": 12167 }, { "epoch": 0.5700098374478849, "grad_norm": 0.6546379206052473, "learning_rate": 4.194760185044502e-06, "loss": 0.2808, "step": 12168 }, { "epoch": 0.5700566824378133, "grad_norm": 0.5956205142355476, "learning_rate": 4.19462075879074e-06, "loss": 0.2651, "step": 12169 }, { "epoch": 0.5701035274277416, "grad_norm": 0.6144184943799337, "learning_rate": 4.194481322784862e-06, "loss": 0.3059, "step": 12170 }, { "epoch": 0.57015037241767, "grad_norm": 0.5779297594548857, "learning_rate": 4.194341877027669e-06, "loss": 0.2887, "step": 12171 }, { "epoch": 0.5701972174075982, "grad_norm": 0.6259740415644143, "learning_rate": 4.194202421519965e-06, "loss": 0.2923, "step": 12172 }, { "epoch": 0.5702440623975266, "grad_norm": 0.5813722809918637, "learning_rate": 4.1940629562625515e-06, "loss": 0.2917, "step": 12173 }, { "epoch": 0.5702909073874549, "grad_norm": 0.622196223329422, "learning_rate": 4.193923481256232e-06, "loss": 0.3065, "step": 12174 }, { "epoch": 0.5703377523773833, "grad_norm": 0.5391291801907695, "learning_rate": 4.193783996501809e-06, "loss": 0.291, "step": 12175 }, { "epoch": 0.5703845973673116, "grad_norm": 0.6530568807670522, "learning_rate": 4.193644502000084e-06, "loss": 0.3, "step": 12176 }, { "epoch": 0.5704314423572399, "grad_norm": 0.5948422796688337, "learning_rate": 4.19350499775186e-06, "loss": 0.2991, "step": 12177 }, { "epoch": 0.5704782873471682, "grad_norm": 0.6185615563798083, "learning_rate": 4.193365483757942e-06, "loss": 0.2932, "step": 12178 }, { "epoch": 0.5705251323370966, "grad_norm": 0.6216766833344571, "learning_rate": 4.19322596001913e-06, "loss": 0.2988, "step": 12179 }, { "epoch": 0.5705719773270249, "grad_norm": 0.5936377126281882, "learning_rate": 4.193086426536229e-06, "loss": 0.2765, "step": 12180 }, { "epoch": 0.5706188223169532, "grad_norm": 0.581862521199703, "learning_rate": 4.192946883310041e-06, "loss": 0.284, "step": 12181 }, { "epoch": 0.5706656673068815, "grad_norm": 0.5650053203103256, "learning_rate": 4.192807330341369e-06, "loss": 0.2778, "step": 12182 }, { "epoch": 0.5707125122968099, "grad_norm": 0.5784955985632835, "learning_rate": 4.192667767631017e-06, "loss": 0.2918, "step": 12183 }, { "epoch": 0.5707593572867382, "grad_norm": 0.6233968276129108, "learning_rate": 4.192528195179786e-06, "loss": 0.2912, "step": 12184 }, { "epoch": 0.5708062022766666, "grad_norm": 0.6633468414042425, "learning_rate": 4.192388612988482e-06, "loss": 0.2968, "step": 12185 }, { "epoch": 0.5708530472665948, "grad_norm": 0.5947576723465584, "learning_rate": 4.192249021057906e-06, "loss": 0.2704, "step": 12186 }, { "epoch": 0.5708998922565232, "grad_norm": 0.6279459352138358, "learning_rate": 4.192109419388862e-06, "loss": 0.2814, "step": 12187 }, { "epoch": 0.5709467372464515, "grad_norm": 0.607982844313163, "learning_rate": 4.191969807982154e-06, "loss": 0.293, "step": 12188 }, { "epoch": 0.5709935822363799, "grad_norm": 0.6162708394805896, "learning_rate": 4.191830186838586e-06, "loss": 0.3073, "step": 12189 }, { "epoch": 0.5710404272263081, "grad_norm": 0.5901029662398803, "learning_rate": 4.191690555958959e-06, "loss": 0.3, "step": 12190 }, { "epoch": 0.5710872722162365, "grad_norm": 0.5380410123722498, "learning_rate": 4.191550915344078e-06, "loss": 0.2686, "step": 12191 }, { "epoch": 0.5711341172061648, "grad_norm": 0.5615954190644821, "learning_rate": 4.191411264994748e-06, "loss": 0.2725, "step": 12192 }, { "epoch": 0.5711809621960932, "grad_norm": 0.5941103459484233, "learning_rate": 4.19127160491177e-06, "loss": 0.2926, "step": 12193 }, { "epoch": 0.5712278071860215, "grad_norm": 0.6031919216735016, "learning_rate": 4.19113193509595e-06, "loss": 0.2906, "step": 12194 }, { "epoch": 0.5712746521759497, "grad_norm": 0.5728734965400968, "learning_rate": 4.19099225554809e-06, "loss": 0.2741, "step": 12195 }, { "epoch": 0.5713214971658781, "grad_norm": 0.5590253624039329, "learning_rate": 4.190852566268995e-06, "loss": 0.2838, "step": 12196 }, { "epoch": 0.5713683421558065, "grad_norm": 0.5717877750248231, "learning_rate": 4.190712867259468e-06, "loss": 0.2804, "step": 12197 }, { "epoch": 0.5714151871457348, "grad_norm": 0.5631205796110533, "learning_rate": 4.190573158520314e-06, "loss": 0.2711, "step": 12198 }, { "epoch": 0.571462032135663, "grad_norm": 0.6468311734122253, "learning_rate": 4.190433440052336e-06, "loss": 0.2865, "step": 12199 }, { "epoch": 0.5715088771255914, "grad_norm": 0.6146440637251733, "learning_rate": 4.1902937118563385e-06, "loss": 0.2844, "step": 12200 }, { "epoch": 0.5715557221155197, "grad_norm": 0.5742165790048261, "learning_rate": 4.190153973933126e-06, "loss": 0.2919, "step": 12201 }, { "epoch": 0.5716025671054481, "grad_norm": 0.637948332738444, "learning_rate": 4.190014226283502e-06, "loss": 0.3039, "step": 12202 }, { "epoch": 0.5716494120953765, "grad_norm": 0.6127269354055496, "learning_rate": 4.1898744689082705e-06, "loss": 0.2763, "step": 12203 }, { "epoch": 0.5716962570853047, "grad_norm": 0.6052249014713297, "learning_rate": 4.189734701808237e-06, "loss": 0.3072, "step": 12204 }, { "epoch": 0.571743102075233, "grad_norm": 0.5942748019330836, "learning_rate": 4.1895949249842035e-06, "loss": 0.282, "step": 12205 }, { "epoch": 0.5717899470651614, "grad_norm": 0.5972859315957532, "learning_rate": 4.189455138436977e-06, "loss": 0.2858, "step": 12206 }, { "epoch": 0.5718367920550897, "grad_norm": 0.5365629685542559, "learning_rate": 4.189315342167361e-06, "loss": 0.2663, "step": 12207 }, { "epoch": 0.571883637045018, "grad_norm": 0.6178888197428276, "learning_rate": 4.189175536176159e-06, "loss": 0.2886, "step": 12208 }, { "epoch": 0.5719304820349463, "grad_norm": 0.6165166555496249, "learning_rate": 4.189035720464177e-06, "loss": 0.2953, "step": 12209 }, { "epoch": 0.5719773270248747, "grad_norm": 0.5996324050150575, "learning_rate": 4.188895895032218e-06, "loss": 0.2836, "step": 12210 }, { "epoch": 0.572024172014803, "grad_norm": 0.6504350997298192, "learning_rate": 4.188756059881088e-06, "loss": 0.279, "step": 12211 }, { "epoch": 0.5720710170047314, "grad_norm": 0.6003314892302861, "learning_rate": 4.188616215011592e-06, "loss": 0.2793, "step": 12212 }, { "epoch": 0.5721178619946596, "grad_norm": 0.5883616612789203, "learning_rate": 4.188476360424533e-06, "loss": 0.283, "step": 12213 }, { "epoch": 0.572164706984588, "grad_norm": 0.6063906725559665, "learning_rate": 4.188336496120717e-06, "loss": 0.2758, "step": 12214 }, { "epoch": 0.5722115519745163, "grad_norm": 0.6097615612429876, "learning_rate": 4.188196622100949e-06, "loss": 0.2752, "step": 12215 }, { "epoch": 0.5722583969644447, "grad_norm": 0.6183142846587015, "learning_rate": 4.188056738366035e-06, "loss": 0.3003, "step": 12216 }, { "epoch": 0.5723052419543729, "grad_norm": 0.5749828045494952, "learning_rate": 4.187916844916778e-06, "loss": 0.2943, "step": 12217 }, { "epoch": 0.5723520869443013, "grad_norm": 0.5782881364916825, "learning_rate": 4.187776941753984e-06, "loss": 0.2747, "step": 12218 }, { "epoch": 0.5723989319342296, "grad_norm": 0.5865090072096604, "learning_rate": 4.1876370288784574e-06, "loss": 0.2826, "step": 12219 }, { "epoch": 0.572445776924158, "grad_norm": 0.5635599332161264, "learning_rate": 4.1874971062910045e-06, "loss": 0.2618, "step": 12220 }, { "epoch": 0.5724926219140863, "grad_norm": 0.5545133795022235, "learning_rate": 4.187357173992429e-06, "loss": 0.273, "step": 12221 }, { "epoch": 0.5725394669040146, "grad_norm": 0.5662408679443668, "learning_rate": 4.187217231983538e-06, "loss": 0.2404, "step": 12222 }, { "epoch": 0.5725863118939429, "grad_norm": 0.6198018223823079, "learning_rate": 4.187077280265135e-06, "loss": 0.299, "step": 12223 }, { "epoch": 0.5726331568838713, "grad_norm": 0.5871604422535976, "learning_rate": 4.186937318838026e-06, "loss": 0.2895, "step": 12224 }, { "epoch": 0.5726800018737996, "grad_norm": 0.5396784467507603, "learning_rate": 4.186797347703018e-06, "loss": 0.2564, "step": 12225 }, { "epoch": 0.5727268468637279, "grad_norm": 0.5840836962497429, "learning_rate": 4.186657366860915e-06, "loss": 0.2771, "step": 12226 }, { "epoch": 0.5727736918536562, "grad_norm": 0.6399443965791362, "learning_rate": 4.186517376312522e-06, "loss": 0.2966, "step": 12227 }, { "epoch": 0.5728205368435846, "grad_norm": 0.5773379382484377, "learning_rate": 4.1863773760586455e-06, "loss": 0.2878, "step": 12228 }, { "epoch": 0.5728673818335129, "grad_norm": 0.6219565153451319, "learning_rate": 4.186237366100092e-06, "loss": 0.2772, "step": 12229 }, { "epoch": 0.5729142268234413, "grad_norm": 0.553572422941865, "learning_rate": 4.186097346437665e-06, "loss": 0.2661, "step": 12230 }, { "epoch": 0.5729610718133695, "grad_norm": 0.6342442220844593, "learning_rate": 4.185957317072173e-06, "loss": 0.3076, "step": 12231 }, { "epoch": 0.5730079168032979, "grad_norm": 0.6191964283803596, "learning_rate": 4.185817278004419e-06, "loss": 0.288, "step": 12232 }, { "epoch": 0.5730547617932262, "grad_norm": 0.6064308727245915, "learning_rate": 4.185677229235211e-06, "loss": 0.2848, "step": 12233 }, { "epoch": 0.5731016067831546, "grad_norm": 0.5657239349732894, "learning_rate": 4.1855371707653535e-06, "loss": 0.2757, "step": 12234 }, { "epoch": 0.5731484517730828, "grad_norm": 0.628234491034497, "learning_rate": 4.185397102595654e-06, "loss": 0.309, "step": 12235 }, { "epoch": 0.5731952967630112, "grad_norm": 0.6162354077844263, "learning_rate": 4.185257024726918e-06, "loss": 0.2897, "step": 12236 }, { "epoch": 0.5732421417529395, "grad_norm": 0.7119880737667041, "learning_rate": 4.185116937159951e-06, "loss": 0.2966, "step": 12237 }, { "epoch": 0.5732889867428679, "grad_norm": 0.6307019696924661, "learning_rate": 4.1849768398955596e-06, "loss": 0.2644, "step": 12238 }, { "epoch": 0.5733358317327962, "grad_norm": 0.5925407247010843, "learning_rate": 4.184836732934549e-06, "loss": 0.2817, "step": 12239 }, { "epoch": 0.5733826767227245, "grad_norm": 0.5817738143299794, "learning_rate": 4.184696616277728e-06, "loss": 0.2795, "step": 12240 }, { "epoch": 0.5734295217126528, "grad_norm": 0.5904563843088738, "learning_rate": 4.184556489925902e-06, "loss": 0.2861, "step": 12241 }, { "epoch": 0.5734763667025812, "grad_norm": 0.6153017289580228, "learning_rate": 4.1844163538798746e-06, "loss": 0.2773, "step": 12242 }, { "epoch": 0.5735232116925095, "grad_norm": 0.5587675815365946, "learning_rate": 4.184276208140455e-06, "loss": 0.2735, "step": 12243 }, { "epoch": 0.5735700566824378, "grad_norm": 0.6245744773336842, "learning_rate": 4.184136052708451e-06, "loss": 0.2864, "step": 12244 }, { "epoch": 0.5736169016723661, "grad_norm": 0.626100618391699, "learning_rate": 4.1839958875846654e-06, "loss": 0.3035, "step": 12245 }, { "epoch": 0.5736637466622945, "grad_norm": 0.6279934160175925, "learning_rate": 4.183855712769907e-06, "loss": 0.2984, "step": 12246 }, { "epoch": 0.5737105916522228, "grad_norm": 0.6096371487471072, "learning_rate": 4.183715528264982e-06, "loss": 0.2897, "step": 12247 }, { "epoch": 0.5737574366421512, "grad_norm": 0.5810886046961292, "learning_rate": 4.183575334070698e-06, "loss": 0.2726, "step": 12248 }, { "epoch": 0.5738042816320794, "grad_norm": 0.6050795387688868, "learning_rate": 4.1834351301878615e-06, "loss": 0.2595, "step": 12249 }, { "epoch": 0.5738511266220078, "grad_norm": 0.6208204621412341, "learning_rate": 4.183294916617278e-06, "loss": 0.3017, "step": 12250 }, { "epoch": 0.5738979716119361, "grad_norm": 0.5912881557338631, "learning_rate": 4.183154693359756e-06, "loss": 0.2913, "step": 12251 }, { "epoch": 0.5739448166018645, "grad_norm": 0.618098824281691, "learning_rate": 4.183014460416101e-06, "loss": 0.2906, "step": 12252 }, { "epoch": 0.5739916615917927, "grad_norm": 0.5917444989090563, "learning_rate": 4.182874217787122e-06, "loss": 0.2922, "step": 12253 }, { "epoch": 0.5740385065817211, "grad_norm": 0.5787710399853668, "learning_rate": 4.182733965473624e-06, "loss": 0.2844, "step": 12254 }, { "epoch": 0.5740853515716494, "grad_norm": 0.5640334480095379, "learning_rate": 4.182593703476414e-06, "loss": 0.2725, "step": 12255 }, { "epoch": 0.5741321965615778, "grad_norm": 0.6069620131100794, "learning_rate": 4.182453431796302e-06, "loss": 0.2929, "step": 12256 }, { "epoch": 0.5741790415515061, "grad_norm": 0.6377524886686081, "learning_rate": 4.182313150434092e-06, "loss": 0.2807, "step": 12257 }, { "epoch": 0.5742258865414344, "grad_norm": 0.6972733510346613, "learning_rate": 4.1821728593905934e-06, "loss": 0.2817, "step": 12258 }, { "epoch": 0.5742727315313627, "grad_norm": 0.5452950573934034, "learning_rate": 4.182032558666613e-06, "loss": 0.2702, "step": 12259 }, { "epoch": 0.5743195765212911, "grad_norm": 0.5527189235986654, "learning_rate": 4.181892248262957e-06, "loss": 0.262, "step": 12260 }, { "epoch": 0.5743664215112194, "grad_norm": 0.6094650326349361, "learning_rate": 4.1817519281804346e-06, "loss": 0.3032, "step": 12261 }, { "epoch": 0.5744132665011477, "grad_norm": 0.5487838758109893, "learning_rate": 4.181611598419852e-06, "loss": 0.2683, "step": 12262 }, { "epoch": 0.574460111491076, "grad_norm": 0.589361213639348, "learning_rate": 4.181471258982018e-06, "loss": 0.2866, "step": 12263 }, { "epoch": 0.5745069564810044, "grad_norm": 0.6007974408943987, "learning_rate": 4.181330909867739e-06, "loss": 0.279, "step": 12264 }, { "epoch": 0.5745538014709327, "grad_norm": 0.5731023036323541, "learning_rate": 4.181190551077824e-06, "loss": 0.2896, "step": 12265 }, { "epoch": 0.5746006464608611, "grad_norm": 0.6255539980368898, "learning_rate": 4.181050182613079e-06, "loss": 0.2829, "step": 12266 }, { "epoch": 0.5746474914507893, "grad_norm": 0.5863408344898912, "learning_rate": 4.180909804474313e-06, "loss": 0.2931, "step": 12267 }, { "epoch": 0.5746943364407177, "grad_norm": 0.5883240115665168, "learning_rate": 4.180769416662334e-06, "loss": 0.2883, "step": 12268 }, { "epoch": 0.574741181430646, "grad_norm": 0.6263679100373963, "learning_rate": 4.180629019177949e-06, "loss": 0.3009, "step": 12269 }, { "epoch": 0.5747880264205744, "grad_norm": 0.5775508914989803, "learning_rate": 4.180488612021966e-06, "loss": 0.2833, "step": 12270 }, { "epoch": 0.5748348714105026, "grad_norm": 0.6146391397493594, "learning_rate": 4.180348195195194e-06, "loss": 0.2828, "step": 12271 }, { "epoch": 0.574881716400431, "grad_norm": 0.6109605474698702, "learning_rate": 4.18020776869844e-06, "loss": 0.2868, "step": 12272 }, { "epoch": 0.5749285613903593, "grad_norm": 0.6162904525857034, "learning_rate": 4.180067332532513e-06, "loss": 0.2761, "step": 12273 }, { "epoch": 0.5749754063802877, "grad_norm": 0.6157929755713064, "learning_rate": 4.179926886698221e-06, "loss": 0.2995, "step": 12274 }, { "epoch": 0.575022251370216, "grad_norm": 0.6146702125510074, "learning_rate": 4.179786431196372e-06, "loss": 0.294, "step": 12275 }, { "epoch": 0.5750690963601442, "grad_norm": 0.5907756836507757, "learning_rate": 4.179645966027773e-06, "loss": 0.2742, "step": 12276 }, { "epoch": 0.5751159413500726, "grad_norm": 0.5804015801271445, "learning_rate": 4.179505491193235e-06, "loss": 0.2939, "step": 12277 }, { "epoch": 0.575162786340001, "grad_norm": 0.6065470110543195, "learning_rate": 4.1793650066935655e-06, "loss": 0.2824, "step": 12278 }, { "epoch": 0.5752096313299293, "grad_norm": 0.6378994502111778, "learning_rate": 4.1792245125295715e-06, "loss": 0.2855, "step": 12279 }, { "epoch": 0.5752564763198575, "grad_norm": 0.6241861338003233, "learning_rate": 4.179084008702062e-06, "loss": 0.2968, "step": 12280 }, { "epoch": 0.5753033213097859, "grad_norm": 0.6567706738772788, "learning_rate": 4.178943495211847e-06, "loss": 0.2974, "step": 12281 }, { "epoch": 0.5753501662997142, "grad_norm": 0.598860337246414, "learning_rate": 4.1788029720597335e-06, "loss": 0.3029, "step": 12282 }, { "epoch": 0.5753970112896426, "grad_norm": 0.5709333144785899, "learning_rate": 4.178662439246532e-06, "loss": 0.2846, "step": 12283 }, { "epoch": 0.575443856279571, "grad_norm": 0.5548273167155654, "learning_rate": 4.178521896773049e-06, "loss": 0.2569, "step": 12284 }, { "epoch": 0.5754907012694992, "grad_norm": 0.6405813797482173, "learning_rate": 4.178381344640094e-06, "loss": 0.2851, "step": 12285 }, { "epoch": 0.5755375462594275, "grad_norm": 0.5610407271439173, "learning_rate": 4.178240782848477e-06, "loss": 0.2689, "step": 12286 }, { "epoch": 0.5755843912493559, "grad_norm": 0.6147960460104103, "learning_rate": 4.178100211399007e-06, "loss": 0.2904, "step": 12287 }, { "epoch": 0.5756312362392842, "grad_norm": 0.5554098463147895, "learning_rate": 4.177959630292491e-06, "loss": 0.2798, "step": 12288 }, { "epoch": 0.5756780812292125, "grad_norm": 0.6037432582130671, "learning_rate": 4.177819039529738e-06, "loss": 0.2867, "step": 12289 }, { "epoch": 0.5757249262191408, "grad_norm": 0.5949037732735888, "learning_rate": 4.17767843911156e-06, "loss": 0.2877, "step": 12290 }, { "epoch": 0.5757717712090692, "grad_norm": 0.6309458732673968, "learning_rate": 4.177537829038763e-06, "loss": 0.2959, "step": 12291 }, { "epoch": 0.5758186161989975, "grad_norm": 0.5593780624780685, "learning_rate": 4.177397209312158e-06, "loss": 0.2789, "step": 12292 }, { "epoch": 0.5758654611889259, "grad_norm": 0.6073861086439554, "learning_rate": 4.1772565799325536e-06, "loss": 0.2975, "step": 12293 }, { "epoch": 0.5759123061788541, "grad_norm": 0.5972637681631505, "learning_rate": 4.1771159409007586e-06, "loss": 0.2985, "step": 12294 }, { "epoch": 0.5759591511687825, "grad_norm": 0.5765663264184197, "learning_rate": 4.176975292217584e-06, "loss": 0.274, "step": 12295 }, { "epoch": 0.5760059961587108, "grad_norm": 0.5831994753560252, "learning_rate": 4.1768346338838375e-06, "loss": 0.2915, "step": 12296 }, { "epoch": 0.5760528411486392, "grad_norm": 0.6300291763314971, "learning_rate": 4.1766939659003285e-06, "loss": 0.2826, "step": 12297 }, { "epoch": 0.5760996861385674, "grad_norm": 0.6113518787173907, "learning_rate": 4.176553288267868e-06, "loss": 0.2817, "step": 12298 }, { "epoch": 0.5761465311284958, "grad_norm": 0.552603641076066, "learning_rate": 4.176412600987264e-06, "loss": 0.2854, "step": 12299 }, { "epoch": 0.5761933761184241, "grad_norm": 0.5469179225274038, "learning_rate": 4.176271904059328e-06, "loss": 0.2828, "step": 12300 }, { "epoch": 0.5762402211083525, "grad_norm": 0.6013316489375216, "learning_rate": 4.176131197484867e-06, "loss": 0.2858, "step": 12301 }, { "epoch": 0.5762870660982808, "grad_norm": 0.631805404872598, "learning_rate": 4.1759904812646936e-06, "loss": 0.2826, "step": 12302 }, { "epoch": 0.5763339110882091, "grad_norm": 0.5784003219337817, "learning_rate": 4.175849755399616e-06, "loss": 0.2661, "step": 12303 }, { "epoch": 0.5763807560781374, "grad_norm": 0.5604798836808339, "learning_rate": 4.175709019890443e-06, "loss": 0.2605, "step": 12304 }, { "epoch": 0.5764276010680658, "grad_norm": 0.5663021013201622, "learning_rate": 4.175568274737987e-06, "loss": 0.2776, "step": 12305 }, { "epoch": 0.5764744460579941, "grad_norm": 0.6756246159358157, "learning_rate": 4.175427519943056e-06, "loss": 0.2869, "step": 12306 }, { "epoch": 0.5765212910479224, "grad_norm": 0.5779877472469536, "learning_rate": 4.175286755506461e-06, "loss": 0.2912, "step": 12307 }, { "epoch": 0.5765681360378507, "grad_norm": 0.633851573335314, "learning_rate": 4.175145981429013e-06, "loss": 0.2955, "step": 12308 }, { "epoch": 0.5766149810277791, "grad_norm": 0.617806968294691, "learning_rate": 4.1750051977115195e-06, "loss": 0.2911, "step": 12309 }, { "epoch": 0.5766618260177074, "grad_norm": 0.6044908227263164, "learning_rate": 4.174864404354793e-06, "loss": 0.3083, "step": 12310 }, { "epoch": 0.5767086710076358, "grad_norm": 0.5853660838007981, "learning_rate": 4.174723601359641e-06, "loss": 0.294, "step": 12311 }, { "epoch": 0.576755515997564, "grad_norm": 0.6148431478124623, "learning_rate": 4.1745827887268776e-06, "loss": 0.2955, "step": 12312 }, { "epoch": 0.5768023609874924, "grad_norm": 0.6098561206695322, "learning_rate": 4.17444196645731e-06, "loss": 0.2899, "step": 12313 }, { "epoch": 0.5768492059774207, "grad_norm": 0.6180046661020762, "learning_rate": 4.1743011345517506e-06, "loss": 0.2857, "step": 12314 }, { "epoch": 0.5768960509673491, "grad_norm": 0.6133444676530397, "learning_rate": 4.1741602930110085e-06, "loss": 0.2876, "step": 12315 }, { "epoch": 0.5769428959572773, "grad_norm": 0.5982410100439891, "learning_rate": 4.174019441835894e-06, "loss": 0.2737, "step": 12316 }, { "epoch": 0.5769897409472057, "grad_norm": 0.6147661713506877, "learning_rate": 4.1738785810272195e-06, "loss": 0.3005, "step": 12317 }, { "epoch": 0.577036585937134, "grad_norm": 0.5666500096437758, "learning_rate": 4.173737710585794e-06, "loss": 0.2769, "step": 12318 }, { "epoch": 0.5770834309270624, "grad_norm": 0.6199387948226316, "learning_rate": 4.1735968305124286e-06, "loss": 0.2899, "step": 12319 }, { "epoch": 0.5771302759169907, "grad_norm": 0.6667151967967978, "learning_rate": 4.1734559408079345e-06, "loss": 0.2974, "step": 12320 }, { "epoch": 0.577177120906919, "grad_norm": 0.6743448157839084, "learning_rate": 4.1733150414731214e-06, "loss": 0.3106, "step": 12321 }, { "epoch": 0.5772239658968473, "grad_norm": 0.5691187103153459, "learning_rate": 4.1731741325088016e-06, "loss": 0.2791, "step": 12322 }, { "epoch": 0.5772708108867757, "grad_norm": 0.5966418851037193, "learning_rate": 4.173033213915785e-06, "loss": 0.3061, "step": 12323 }, { "epoch": 0.577317655876704, "grad_norm": 0.5743478898239822, "learning_rate": 4.1728922856948814e-06, "loss": 0.2826, "step": 12324 }, { "epoch": 0.5773645008666323, "grad_norm": 0.5755790902065464, "learning_rate": 4.172751347846905e-06, "loss": 0.2816, "step": 12325 }, { "epoch": 0.5774113458565606, "grad_norm": 0.6320334383797599, "learning_rate": 4.172610400372664e-06, "loss": 0.2955, "step": 12326 }, { "epoch": 0.577458190846489, "grad_norm": 0.6123325517928007, "learning_rate": 4.1724694432729704e-06, "loss": 0.2668, "step": 12327 }, { "epoch": 0.5775050358364173, "grad_norm": 0.588336507847417, "learning_rate": 4.172328476548636e-06, "loss": 0.2813, "step": 12328 }, { "epoch": 0.5775518808263457, "grad_norm": 0.5952047556867308, "learning_rate": 4.172187500200472e-06, "loss": 0.2789, "step": 12329 }, { "epoch": 0.5775987258162739, "grad_norm": 0.552072571013452, "learning_rate": 4.1720465142292884e-06, "loss": 0.2659, "step": 12330 }, { "epoch": 0.5776455708062023, "grad_norm": 0.5307792710141264, "learning_rate": 4.171905518635898e-06, "loss": 0.254, "step": 12331 }, { "epoch": 0.5776924157961306, "grad_norm": 0.6054189247964148, "learning_rate": 4.171764513421112e-06, "loss": 0.2844, "step": 12332 }, { "epoch": 0.577739260786059, "grad_norm": 0.555583296774657, "learning_rate": 4.17162349858574e-06, "loss": 0.2788, "step": 12333 }, { "epoch": 0.5777861057759872, "grad_norm": 0.6098986493537518, "learning_rate": 4.171482474130595e-06, "loss": 0.2753, "step": 12334 }, { "epoch": 0.5778329507659156, "grad_norm": 0.639104604118574, "learning_rate": 4.171341440056489e-06, "loss": 0.3093, "step": 12335 }, { "epoch": 0.5778797957558439, "grad_norm": 0.5966781237622923, "learning_rate": 4.171200396364234e-06, "loss": 0.2636, "step": 12336 }, { "epoch": 0.5779266407457723, "grad_norm": 0.6381128681835615, "learning_rate": 4.17105934305464e-06, "loss": 0.3154, "step": 12337 }, { "epoch": 0.5779734857357006, "grad_norm": 0.6098747673758518, "learning_rate": 4.170918280128519e-06, "loss": 0.296, "step": 12338 }, { "epoch": 0.5780203307256289, "grad_norm": 0.6151179906096298, "learning_rate": 4.170777207586684e-06, "loss": 0.2874, "step": 12339 }, { "epoch": 0.5780671757155572, "grad_norm": 0.6131451485573498, "learning_rate": 4.1706361254299455e-06, "loss": 0.2957, "step": 12340 }, { "epoch": 0.5781140207054856, "grad_norm": 0.5738731121717446, "learning_rate": 4.170495033659116e-06, "loss": 0.2674, "step": 12341 }, { "epoch": 0.5781608656954139, "grad_norm": 0.606051683208936, "learning_rate": 4.170353932275007e-06, "loss": 0.2905, "step": 12342 }, { "epoch": 0.5782077106853422, "grad_norm": 0.6125025331280677, "learning_rate": 4.170212821278432e-06, "loss": 0.2814, "step": 12343 }, { "epoch": 0.5782545556752705, "grad_norm": 0.6073286654747417, "learning_rate": 4.170071700670202e-06, "loss": 0.2619, "step": 12344 }, { "epoch": 0.5783014006651989, "grad_norm": 0.6687548452641581, "learning_rate": 4.169930570451128e-06, "loss": 0.2848, "step": 12345 }, { "epoch": 0.5783482456551272, "grad_norm": 0.6107060674497802, "learning_rate": 4.169789430622024e-06, "loss": 0.2758, "step": 12346 }, { "epoch": 0.5783950906450556, "grad_norm": 0.5871766145816605, "learning_rate": 4.169648281183703e-06, "loss": 0.2696, "step": 12347 }, { "epoch": 0.5784419356349838, "grad_norm": 0.5913448395196182, "learning_rate": 4.1695071221369735e-06, "loss": 0.2976, "step": 12348 }, { "epoch": 0.5784887806249122, "grad_norm": 0.5733539028753603, "learning_rate": 4.169365953482651e-06, "loss": 0.2798, "step": 12349 }, { "epoch": 0.5785356256148405, "grad_norm": 0.6075738939545099, "learning_rate": 4.169224775221548e-06, "loss": 0.287, "step": 12350 }, { "epoch": 0.5785824706047689, "grad_norm": 0.5522115092754935, "learning_rate": 4.169083587354474e-06, "loss": 0.2869, "step": 12351 }, { "epoch": 0.5786293155946971, "grad_norm": 0.6533370084129598, "learning_rate": 4.1689423898822445e-06, "loss": 0.3096, "step": 12352 }, { "epoch": 0.5786761605846255, "grad_norm": 0.6059614302059321, "learning_rate": 4.168801182805671e-06, "loss": 0.2742, "step": 12353 }, { "epoch": 0.5787230055745538, "grad_norm": 0.6350471085831453, "learning_rate": 4.168659966125565e-06, "loss": 0.3054, "step": 12354 }, { "epoch": 0.5787698505644822, "grad_norm": 0.5825817917573884, "learning_rate": 4.168518739842742e-06, "loss": 0.3042, "step": 12355 }, { "epoch": 0.5788166955544105, "grad_norm": 0.6107118015335238, "learning_rate": 4.168377503958013e-06, "loss": 0.28, "step": 12356 }, { "epoch": 0.5788635405443388, "grad_norm": 0.6755693373608461, "learning_rate": 4.1682362584721906e-06, "loss": 0.3054, "step": 12357 }, { "epoch": 0.5789103855342671, "grad_norm": 0.7065772691388208, "learning_rate": 4.168095003386087e-06, "loss": 0.3156, "step": 12358 }, { "epoch": 0.5789572305241955, "grad_norm": 0.5663421814831262, "learning_rate": 4.167953738700517e-06, "loss": 0.2886, "step": 12359 }, { "epoch": 0.5790040755141238, "grad_norm": 0.6161932144869936, "learning_rate": 4.167812464416291e-06, "loss": 0.2876, "step": 12360 }, { "epoch": 0.579050920504052, "grad_norm": 0.6157651565957282, "learning_rate": 4.167671180534224e-06, "loss": 0.3024, "step": 12361 }, { "epoch": 0.5790977654939804, "grad_norm": 0.6153385488923188, "learning_rate": 4.167529887055129e-06, "loss": 0.2733, "step": 12362 }, { "epoch": 0.5791446104839088, "grad_norm": 0.5586086635018425, "learning_rate": 4.167388583979818e-06, "loss": 0.2621, "step": 12363 }, { "epoch": 0.5791914554738371, "grad_norm": 0.5974374919353579, "learning_rate": 4.167247271309105e-06, "loss": 0.2803, "step": 12364 }, { "epoch": 0.5792383004637655, "grad_norm": 0.6213269022647265, "learning_rate": 4.167105949043804e-06, "loss": 0.2784, "step": 12365 }, { "epoch": 0.5792851454536937, "grad_norm": 0.6245876140923287, "learning_rate": 4.166964617184726e-06, "loss": 0.2952, "step": 12366 }, { "epoch": 0.579331990443622, "grad_norm": 0.562457046388376, "learning_rate": 4.1668232757326855e-06, "loss": 0.2933, "step": 12367 }, { "epoch": 0.5793788354335504, "grad_norm": 0.6088488775531226, "learning_rate": 4.166681924688497e-06, "loss": 0.2914, "step": 12368 }, { "epoch": 0.5794256804234788, "grad_norm": 0.636068894334873, "learning_rate": 4.166540564052972e-06, "loss": 0.2822, "step": 12369 }, { "epoch": 0.579472525413407, "grad_norm": 0.6165493351031119, "learning_rate": 4.166399193826926e-06, "loss": 0.2918, "step": 12370 }, { "epoch": 0.5795193704033353, "grad_norm": 0.5622543961219905, "learning_rate": 4.1662578140111706e-06, "loss": 0.2675, "step": 12371 }, { "epoch": 0.5795662153932637, "grad_norm": 0.6109569825093482, "learning_rate": 4.1661164246065206e-06, "loss": 0.2912, "step": 12372 }, { "epoch": 0.579613060383192, "grad_norm": 0.6294336040871231, "learning_rate": 4.165975025613789e-06, "loss": 0.2813, "step": 12373 }, { "epoch": 0.5796599053731204, "grad_norm": 0.5509347324549708, "learning_rate": 4.16583361703379e-06, "loss": 0.257, "step": 12374 }, { "epoch": 0.5797067503630486, "grad_norm": 0.5632629574269333, "learning_rate": 4.165692198867337e-06, "loss": 0.2668, "step": 12375 }, { "epoch": 0.579753595352977, "grad_norm": 0.6277111889443939, "learning_rate": 4.1655507711152446e-06, "loss": 0.2995, "step": 12376 }, { "epoch": 0.5798004403429053, "grad_norm": 0.5791354803766662, "learning_rate": 4.1654093337783265e-06, "loss": 0.2745, "step": 12377 }, { "epoch": 0.5798472853328337, "grad_norm": 0.63056670574803, "learning_rate": 4.165267886857395e-06, "loss": 0.293, "step": 12378 }, { "epoch": 0.5798941303227619, "grad_norm": 0.5845944041511819, "learning_rate": 4.165126430353267e-06, "loss": 0.2814, "step": 12379 }, { "epoch": 0.5799409753126903, "grad_norm": 0.5814307193398793, "learning_rate": 4.164984964266753e-06, "loss": 0.2793, "step": 12380 }, { "epoch": 0.5799878203026186, "grad_norm": 0.9231553660283898, "learning_rate": 4.164843488598671e-06, "loss": 0.2826, "step": 12381 }, { "epoch": 0.580034665292547, "grad_norm": 0.5819719462569142, "learning_rate": 4.164702003349831e-06, "loss": 0.3082, "step": 12382 }, { "epoch": 0.5800815102824753, "grad_norm": 0.5533561601995426, "learning_rate": 4.164560508521051e-06, "loss": 0.2795, "step": 12383 }, { "epoch": 0.5801283552724036, "grad_norm": 0.6097971370699783, "learning_rate": 4.164419004113143e-06, "loss": 0.2928, "step": 12384 }, { "epoch": 0.5801752002623319, "grad_norm": 0.5833479227331971, "learning_rate": 4.164277490126922e-06, "loss": 0.2817, "step": 12385 }, { "epoch": 0.5802220452522603, "grad_norm": 0.560378447138068, "learning_rate": 4.164135966563202e-06, "loss": 0.2645, "step": 12386 }, { "epoch": 0.5802688902421886, "grad_norm": 0.600777315533433, "learning_rate": 4.163994433422799e-06, "loss": 0.2878, "step": 12387 }, { "epoch": 0.5803157352321169, "grad_norm": 0.5188280196652434, "learning_rate": 4.1638528907065255e-06, "loss": 0.2497, "step": 12388 }, { "epoch": 0.5803625802220452, "grad_norm": 0.6356212864748079, "learning_rate": 4.163711338415197e-06, "loss": 0.292, "step": 12389 }, { "epoch": 0.5804094252119736, "grad_norm": 0.5658256513557236, "learning_rate": 4.163569776549628e-06, "loss": 0.2767, "step": 12390 }, { "epoch": 0.5804562702019019, "grad_norm": 0.5849090177090761, "learning_rate": 4.163428205110632e-06, "loss": 0.2888, "step": 12391 }, { "epoch": 0.5805031151918303, "grad_norm": 0.6018546343711697, "learning_rate": 4.163286624099025e-06, "loss": 0.3034, "step": 12392 }, { "epoch": 0.5805499601817585, "grad_norm": 0.5752286533378115, "learning_rate": 4.163145033515623e-06, "loss": 0.2969, "step": 12393 }, { "epoch": 0.5805968051716869, "grad_norm": 0.5859915741166817, "learning_rate": 4.163003433361238e-06, "loss": 0.2929, "step": 12394 }, { "epoch": 0.5806436501616152, "grad_norm": 0.5669164469922068, "learning_rate": 4.162861823636687e-06, "loss": 0.2639, "step": 12395 }, { "epoch": 0.5806904951515436, "grad_norm": 0.6339772986818818, "learning_rate": 4.162720204342784e-06, "loss": 0.2992, "step": 12396 }, { "epoch": 0.5807373401414718, "grad_norm": 0.6224391608300426, "learning_rate": 4.1625785754803436e-06, "loss": 0.2812, "step": 12397 }, { "epoch": 0.5807841851314002, "grad_norm": 0.5888833475545262, "learning_rate": 4.162436937050181e-06, "loss": 0.3023, "step": 12398 }, { "epoch": 0.5808310301213285, "grad_norm": 0.6196093170849664, "learning_rate": 4.162295289053113e-06, "loss": 0.2872, "step": 12399 }, { "epoch": 0.5808778751112569, "grad_norm": 0.5751805221845423, "learning_rate": 4.162153631489952e-06, "loss": 0.2732, "step": 12400 }, { "epoch": 0.5809247201011852, "grad_norm": 0.596804207668192, "learning_rate": 4.162011964361516e-06, "loss": 0.3021, "step": 12401 }, { "epoch": 0.5809715650911135, "grad_norm": 0.5947331821316276, "learning_rate": 4.161870287668619e-06, "loss": 0.2958, "step": 12402 }, { "epoch": 0.5810184100810418, "grad_norm": 0.7169805118438015, "learning_rate": 4.161728601412075e-06, "loss": 0.3063, "step": 12403 }, { "epoch": 0.5810652550709702, "grad_norm": 0.6318722970685232, "learning_rate": 4.161586905592701e-06, "loss": 0.2925, "step": 12404 }, { "epoch": 0.5811121000608985, "grad_norm": 0.5764168886565574, "learning_rate": 4.161445200211312e-06, "loss": 0.269, "step": 12405 }, { "epoch": 0.5811589450508268, "grad_norm": 0.6027565282517282, "learning_rate": 4.161303485268723e-06, "loss": 0.3096, "step": 12406 }, { "epoch": 0.5812057900407551, "grad_norm": 0.6401308725773962, "learning_rate": 4.16116176076575e-06, "loss": 0.2814, "step": 12407 }, { "epoch": 0.5812526350306835, "grad_norm": 0.573670057596585, "learning_rate": 4.161020026703209e-06, "loss": 0.3065, "step": 12408 }, { "epoch": 0.5812994800206118, "grad_norm": 0.5978872038489367, "learning_rate": 4.160878283081916e-06, "loss": 0.2886, "step": 12409 }, { "epoch": 0.5813463250105402, "grad_norm": 0.5990444462028325, "learning_rate": 4.160736529902684e-06, "loss": 0.2725, "step": 12410 }, { "epoch": 0.5813931700004684, "grad_norm": 0.5774621631048432, "learning_rate": 4.160594767166333e-06, "loss": 0.2756, "step": 12411 }, { "epoch": 0.5814400149903968, "grad_norm": 0.6095892622303991, "learning_rate": 4.160452994873675e-06, "loss": 0.2889, "step": 12412 }, { "epoch": 0.5814868599803251, "grad_norm": 0.581160949109297, "learning_rate": 4.1603112130255284e-06, "loss": 0.2811, "step": 12413 }, { "epoch": 0.5815337049702535, "grad_norm": 0.6039751091636198, "learning_rate": 4.160169421622708e-06, "loss": 0.2871, "step": 12414 }, { "epoch": 0.5815805499601817, "grad_norm": 0.6154287656538548, "learning_rate": 4.160027620666029e-06, "loss": 0.2853, "step": 12415 }, { "epoch": 0.5816273949501101, "grad_norm": 0.5424546502679178, "learning_rate": 4.159885810156308e-06, "loss": 0.2382, "step": 12416 }, { "epoch": 0.5816742399400384, "grad_norm": 0.6377258644063023, "learning_rate": 4.159743990094362e-06, "loss": 0.301, "step": 12417 }, { "epoch": 0.5817210849299668, "grad_norm": 0.6164106499337235, "learning_rate": 4.1596021604810065e-06, "loss": 0.2862, "step": 12418 }, { "epoch": 0.5817679299198951, "grad_norm": 0.6656804793799586, "learning_rate": 4.159460321317057e-06, "loss": 0.2955, "step": 12419 }, { "epoch": 0.5818147749098234, "grad_norm": 0.571721725827776, "learning_rate": 4.159318472603332e-06, "loss": 0.2782, "step": 12420 }, { "epoch": 0.5818616198997517, "grad_norm": 0.5690858602533706, "learning_rate": 4.1591766143406445e-06, "loss": 0.2862, "step": 12421 }, { "epoch": 0.5819084648896801, "grad_norm": 0.6062922117470593, "learning_rate": 4.159034746529813e-06, "loss": 0.293, "step": 12422 }, { "epoch": 0.5819553098796084, "grad_norm": 0.5502949821925363, "learning_rate": 4.158892869171654e-06, "loss": 0.2703, "step": 12423 }, { "epoch": 0.5820021548695367, "grad_norm": 0.5847464605167095, "learning_rate": 4.158750982266983e-06, "loss": 0.2708, "step": 12424 }, { "epoch": 0.582048999859465, "grad_norm": 0.6432644959071138, "learning_rate": 4.158609085816618e-06, "loss": 0.3012, "step": 12425 }, { "epoch": 0.5820958448493934, "grad_norm": 0.6752568734666029, "learning_rate": 4.1584671798213735e-06, "loss": 0.3139, "step": 12426 }, { "epoch": 0.5821426898393217, "grad_norm": 0.6237697214656538, "learning_rate": 4.1583252642820686e-06, "loss": 0.2727, "step": 12427 }, { "epoch": 0.5821895348292501, "grad_norm": 0.6127300734702179, "learning_rate": 4.158183339199518e-06, "loss": 0.2832, "step": 12428 }, { "epoch": 0.5822363798191783, "grad_norm": 0.6013403036126307, "learning_rate": 4.158041404574538e-06, "loss": 0.2764, "step": 12429 }, { "epoch": 0.5822832248091067, "grad_norm": 0.5601896039113262, "learning_rate": 4.157899460407947e-06, "loss": 0.2633, "step": 12430 }, { "epoch": 0.582330069799035, "grad_norm": 0.5726379349135332, "learning_rate": 4.1577575067005615e-06, "loss": 0.2803, "step": 12431 }, { "epoch": 0.5823769147889634, "grad_norm": 0.5726730697444059, "learning_rate": 4.157615543453198e-06, "loss": 0.2853, "step": 12432 }, { "epoch": 0.5824237597788916, "grad_norm": 0.6249810458682621, "learning_rate": 4.157473570666674e-06, "loss": 0.2785, "step": 12433 }, { "epoch": 0.58247060476882, "grad_norm": 0.6036130146203623, "learning_rate": 4.157331588341806e-06, "loss": 0.2882, "step": 12434 }, { "epoch": 0.5825174497587483, "grad_norm": 0.5576184173264614, "learning_rate": 4.157189596479412e-06, "loss": 0.2538, "step": 12435 }, { "epoch": 0.5825642947486767, "grad_norm": 0.617488035091027, "learning_rate": 4.1570475950803065e-06, "loss": 0.2777, "step": 12436 }, { "epoch": 0.582611139738605, "grad_norm": 0.5889299727732878, "learning_rate": 4.15690558414531e-06, "loss": 0.2759, "step": 12437 }, { "epoch": 0.5826579847285333, "grad_norm": 0.5832600727616046, "learning_rate": 4.156763563675238e-06, "loss": 0.2789, "step": 12438 }, { "epoch": 0.5827048297184616, "grad_norm": 0.6264558371686272, "learning_rate": 4.156621533670909e-06, "loss": 0.2875, "step": 12439 }, { "epoch": 0.58275167470839, "grad_norm": 0.6290647904412556, "learning_rate": 4.156479494133139e-06, "loss": 0.2937, "step": 12440 }, { "epoch": 0.5827985196983183, "grad_norm": 0.6040981571760295, "learning_rate": 4.1563374450627445e-06, "loss": 0.2982, "step": 12441 }, { "epoch": 0.5828453646882465, "grad_norm": 0.6306900562395055, "learning_rate": 4.1561953864605455e-06, "loss": 0.2887, "step": 12442 }, { "epoch": 0.5828922096781749, "grad_norm": 0.6627274100479484, "learning_rate": 4.156053318327358e-06, "loss": 0.2993, "step": 12443 }, { "epoch": 0.5829390546681033, "grad_norm": 0.569121470254923, "learning_rate": 4.155911240664e-06, "loss": 0.2942, "step": 12444 }, { "epoch": 0.5829858996580316, "grad_norm": 0.6306382118272449, "learning_rate": 4.1557691534712894e-06, "loss": 0.2829, "step": 12445 }, { "epoch": 0.58303274464796, "grad_norm": 0.5736500064540353, "learning_rate": 4.155627056750044e-06, "loss": 0.2809, "step": 12446 }, { "epoch": 0.5830795896378882, "grad_norm": 0.6299912486341056, "learning_rate": 4.155484950501079e-06, "loss": 0.2864, "step": 12447 }, { "epoch": 0.5831264346278165, "grad_norm": 0.6430247072495462, "learning_rate": 4.155342834725215e-06, "loss": 0.2957, "step": 12448 }, { "epoch": 0.5831732796177449, "grad_norm": 0.640543076190279, "learning_rate": 4.1552007094232695e-06, "loss": 0.2751, "step": 12449 }, { "epoch": 0.5832201246076733, "grad_norm": 0.6519891350016472, "learning_rate": 4.155058574596061e-06, "loss": 0.2833, "step": 12450 }, { "epoch": 0.5832669695976015, "grad_norm": 0.5928997297411147, "learning_rate": 4.154916430244405e-06, "loss": 0.2643, "step": 12451 }, { "epoch": 0.5833138145875298, "grad_norm": 0.6244760911367382, "learning_rate": 4.15477427636912e-06, "loss": 0.2896, "step": 12452 }, { "epoch": 0.5833606595774582, "grad_norm": 0.5925861829800728, "learning_rate": 4.154632112971026e-06, "loss": 0.2994, "step": 12453 }, { "epoch": 0.5834075045673865, "grad_norm": 0.5933603184125322, "learning_rate": 4.154489940050941e-06, "loss": 0.2818, "step": 12454 }, { "epoch": 0.5834543495573149, "grad_norm": 0.5900714765532579, "learning_rate": 4.1543477576096804e-06, "loss": 0.2871, "step": 12455 }, { "epoch": 0.5835011945472431, "grad_norm": 0.6275764007814905, "learning_rate": 4.1542055656480655e-06, "loss": 0.2739, "step": 12456 }, { "epoch": 0.5835480395371715, "grad_norm": 0.5553660429063182, "learning_rate": 4.154063364166913e-06, "loss": 0.2917, "step": 12457 }, { "epoch": 0.5835948845270998, "grad_norm": 0.5797619144186328, "learning_rate": 4.153921153167042e-06, "loss": 0.2786, "step": 12458 }, { "epoch": 0.5836417295170282, "grad_norm": 0.6000277536498045, "learning_rate": 4.1537789326492696e-06, "loss": 0.283, "step": 12459 }, { "epoch": 0.5836885745069564, "grad_norm": 0.598192737932619, "learning_rate": 4.1536367026144155e-06, "loss": 0.2852, "step": 12460 }, { "epoch": 0.5837354194968848, "grad_norm": 0.6425538022445332, "learning_rate": 4.153494463063298e-06, "loss": 0.3044, "step": 12461 }, { "epoch": 0.5837822644868131, "grad_norm": 0.5573338440862236, "learning_rate": 4.153352213996735e-06, "loss": 0.2726, "step": 12462 }, { "epoch": 0.5838291094767415, "grad_norm": 0.6179211449855435, "learning_rate": 4.153209955415547e-06, "loss": 0.2903, "step": 12463 }, { "epoch": 0.5838759544666698, "grad_norm": 0.6031663073977314, "learning_rate": 4.15306768732055e-06, "loss": 0.2793, "step": 12464 }, { "epoch": 0.5839227994565981, "grad_norm": 0.6248497061453393, "learning_rate": 4.152925409712564e-06, "loss": 0.2781, "step": 12465 }, { "epoch": 0.5839696444465264, "grad_norm": 0.5519833704333253, "learning_rate": 4.152783122592408e-06, "loss": 0.2679, "step": 12466 }, { "epoch": 0.5840164894364548, "grad_norm": 0.584949703365863, "learning_rate": 4.1526408259609e-06, "loss": 0.2964, "step": 12467 }, { "epoch": 0.5840633344263831, "grad_norm": 0.5909267974135665, "learning_rate": 4.15249851981886e-06, "loss": 0.3074, "step": 12468 }, { "epoch": 0.5841101794163114, "grad_norm": 0.564793635948752, "learning_rate": 4.152356204167105e-06, "loss": 0.2811, "step": 12469 }, { "epoch": 0.5841570244062397, "grad_norm": 0.5654654115055922, "learning_rate": 4.152213879006457e-06, "loss": 0.2738, "step": 12470 }, { "epoch": 0.5842038693961681, "grad_norm": 0.5977102681730362, "learning_rate": 4.152071544337732e-06, "loss": 0.2798, "step": 12471 }, { "epoch": 0.5842507143860964, "grad_norm": 0.5974262669265236, "learning_rate": 4.151929200161752e-06, "loss": 0.2811, "step": 12472 }, { "epoch": 0.5842975593760248, "grad_norm": 0.6103147699946933, "learning_rate": 4.151786846479334e-06, "loss": 0.2992, "step": 12473 }, { "epoch": 0.584344404365953, "grad_norm": 0.613278959656116, "learning_rate": 4.151644483291298e-06, "loss": 0.2684, "step": 12474 }, { "epoch": 0.5843912493558814, "grad_norm": 0.6225712140962956, "learning_rate": 4.151502110598463e-06, "loss": 0.277, "step": 12475 }, { "epoch": 0.5844380943458097, "grad_norm": 0.6147211874202505, "learning_rate": 4.151359728401648e-06, "loss": 0.2843, "step": 12476 }, { "epoch": 0.5844849393357381, "grad_norm": 0.5609710891174154, "learning_rate": 4.151217336701673e-06, "loss": 0.2756, "step": 12477 }, { "epoch": 0.5845317843256663, "grad_norm": 0.5958618605660561, "learning_rate": 4.151074935499358e-06, "loss": 0.29, "step": 12478 }, { "epoch": 0.5845786293155947, "grad_norm": 0.5430699124931638, "learning_rate": 4.150932524795521e-06, "loss": 0.293, "step": 12479 }, { "epoch": 0.584625474305523, "grad_norm": 0.601385110396626, "learning_rate": 4.150790104590982e-06, "loss": 0.2728, "step": 12480 }, { "epoch": 0.5846723192954514, "grad_norm": 0.5508952559446499, "learning_rate": 4.150647674886562e-06, "loss": 0.2701, "step": 12481 }, { "epoch": 0.5847191642853797, "grad_norm": 0.5551224302756989, "learning_rate": 4.150505235683079e-06, "loss": 0.2735, "step": 12482 }, { "epoch": 0.584766009275308, "grad_norm": 0.6256779740709546, "learning_rate": 4.1503627869813525e-06, "loss": 0.2911, "step": 12483 }, { "epoch": 0.5848128542652363, "grad_norm": 0.6050285852345285, "learning_rate": 4.150220328782204e-06, "loss": 0.3085, "step": 12484 }, { "epoch": 0.5848596992551647, "grad_norm": 0.6016547390111949, "learning_rate": 4.150077861086451e-06, "loss": 0.3126, "step": 12485 }, { "epoch": 0.584906544245093, "grad_norm": 0.5984562225660578, "learning_rate": 4.149935383894916e-06, "loss": 0.2844, "step": 12486 }, { "epoch": 0.5849533892350213, "grad_norm": 0.5819508926001669, "learning_rate": 4.149792897208416e-06, "loss": 0.2792, "step": 12487 }, { "epoch": 0.5850002342249496, "grad_norm": 0.5984935244585068, "learning_rate": 4.1496504010277744e-06, "loss": 0.2867, "step": 12488 }, { "epoch": 0.585047079214878, "grad_norm": 0.6422858253728045, "learning_rate": 4.149507895353808e-06, "loss": 0.2937, "step": 12489 }, { "epoch": 0.5850939242048063, "grad_norm": 0.5867522780133256, "learning_rate": 4.149365380187339e-06, "loss": 0.2778, "step": 12490 }, { "epoch": 0.5851407691947347, "grad_norm": 0.6466224590324979, "learning_rate": 4.149222855529187e-06, "loss": 0.2747, "step": 12491 }, { "epoch": 0.5851876141846629, "grad_norm": 0.6174424104132623, "learning_rate": 4.149080321380171e-06, "loss": 0.2858, "step": 12492 }, { "epoch": 0.5852344591745913, "grad_norm": 0.5820130002095287, "learning_rate": 4.1489377777411135e-06, "loss": 0.2772, "step": 12493 }, { "epoch": 0.5852813041645196, "grad_norm": 0.569220102166059, "learning_rate": 4.148795224612832e-06, "loss": 0.2941, "step": 12494 }, { "epoch": 0.585328149154448, "grad_norm": 0.5778591157666397, "learning_rate": 4.14865266199615e-06, "loss": 0.275, "step": 12495 }, { "epoch": 0.5853749941443762, "grad_norm": 0.6069274932958506, "learning_rate": 4.148510089891885e-06, "loss": 0.2904, "step": 12496 }, { "epoch": 0.5854218391343046, "grad_norm": 0.547275274041275, "learning_rate": 4.148367508300859e-06, "loss": 0.2948, "step": 12497 }, { "epoch": 0.5854686841242329, "grad_norm": 0.6565776553304653, "learning_rate": 4.148224917223893e-06, "loss": 0.2944, "step": 12498 }, { "epoch": 0.5855155291141613, "grad_norm": 0.622364061817516, "learning_rate": 4.1480823166618054e-06, "loss": 0.2866, "step": 12499 }, { "epoch": 0.5855623741040896, "grad_norm": 0.5807632808882444, "learning_rate": 4.14793970661542e-06, "loss": 0.2745, "step": 12500 }, { "epoch": 0.5856092190940179, "grad_norm": 0.6526087311082196, "learning_rate": 4.147797087085554e-06, "loss": 0.2984, "step": 12501 }, { "epoch": 0.5856560640839462, "grad_norm": 0.5784627366457461, "learning_rate": 4.147654458073032e-06, "loss": 0.2875, "step": 12502 }, { "epoch": 0.5857029090738746, "grad_norm": 0.5896801720062251, "learning_rate": 4.147511819578671e-06, "loss": 0.2884, "step": 12503 }, { "epoch": 0.5857497540638029, "grad_norm": 0.5693707059480692, "learning_rate": 4.1473691716032945e-06, "loss": 0.2649, "step": 12504 }, { "epoch": 0.5857965990537312, "grad_norm": 0.5937291237712599, "learning_rate": 4.147226514147722e-06, "loss": 0.2977, "step": 12505 }, { "epoch": 0.5858434440436595, "grad_norm": 0.6221799957846572, "learning_rate": 4.1470838472127756e-06, "loss": 0.3006, "step": 12506 }, { "epoch": 0.5858902890335879, "grad_norm": 0.5732710998416141, "learning_rate": 4.146941170799275e-06, "loss": 0.2788, "step": 12507 }, { "epoch": 0.5859371340235162, "grad_norm": 0.6067702166575744, "learning_rate": 4.146798484908042e-06, "loss": 0.2842, "step": 12508 }, { "epoch": 0.5859839790134446, "grad_norm": 0.598549676794907, "learning_rate": 4.146655789539898e-06, "loss": 0.2903, "step": 12509 }, { "epoch": 0.5860308240033728, "grad_norm": 0.5881214545314015, "learning_rate": 4.1465130846956636e-06, "loss": 0.2733, "step": 12510 }, { "epoch": 0.5860776689933012, "grad_norm": 0.5948655567595635, "learning_rate": 4.14637037037616e-06, "loss": 0.3024, "step": 12511 }, { "epoch": 0.5861245139832295, "grad_norm": 0.6033958561315622, "learning_rate": 4.14622764658221e-06, "loss": 0.2991, "step": 12512 }, { "epoch": 0.5861713589731579, "grad_norm": 0.6590164978387074, "learning_rate": 4.146084913314631e-06, "loss": 0.3138, "step": 12513 }, { "epoch": 0.5862182039630861, "grad_norm": 0.5960801855909442, "learning_rate": 4.145942170574248e-06, "loss": 0.2789, "step": 12514 }, { "epoch": 0.5862650489530145, "grad_norm": 0.6100050374204097, "learning_rate": 4.145799418361883e-06, "loss": 0.2896, "step": 12515 }, { "epoch": 0.5863118939429428, "grad_norm": 0.5792125405905547, "learning_rate": 4.145656656678355e-06, "loss": 0.269, "step": 12516 }, { "epoch": 0.5863587389328712, "grad_norm": 0.5665936866627245, "learning_rate": 4.145513885524487e-06, "loss": 0.2764, "step": 12517 }, { "epoch": 0.5864055839227995, "grad_norm": 0.5649982658250104, "learning_rate": 4.1453711049011e-06, "loss": 0.2843, "step": 12518 }, { "epoch": 0.5864524289127278, "grad_norm": 0.5833434007802183, "learning_rate": 4.145228314809015e-06, "loss": 0.2851, "step": 12519 }, { "epoch": 0.5864992739026561, "grad_norm": 0.6099599412061981, "learning_rate": 4.145085515249055e-06, "loss": 0.2814, "step": 12520 }, { "epoch": 0.5865461188925845, "grad_norm": 0.6326661903266554, "learning_rate": 4.1449427062220425e-06, "loss": 0.2946, "step": 12521 }, { "epoch": 0.5865929638825128, "grad_norm": 0.5497180591001688, "learning_rate": 4.144799887728797e-06, "loss": 0.2796, "step": 12522 }, { "epoch": 0.586639808872441, "grad_norm": 0.6019399756606406, "learning_rate": 4.1446570597701415e-06, "loss": 0.2794, "step": 12523 }, { "epoch": 0.5866866538623694, "grad_norm": 0.5870610734590836, "learning_rate": 4.144514222346899e-06, "loss": 0.3047, "step": 12524 }, { "epoch": 0.5867334988522978, "grad_norm": 0.5920534187413525, "learning_rate": 4.1443713754598894e-06, "loss": 0.2856, "step": 12525 }, { "epoch": 0.5867803438422261, "grad_norm": 0.6218749635298277, "learning_rate": 4.144228519109936e-06, "loss": 0.2942, "step": 12526 }, { "epoch": 0.5868271888321545, "grad_norm": 0.6451224729752457, "learning_rate": 4.144085653297861e-06, "loss": 0.3235, "step": 12527 }, { "epoch": 0.5868740338220827, "grad_norm": 0.5316013990909454, "learning_rate": 4.143942778024487e-06, "loss": 0.28, "step": 12528 }, { "epoch": 0.586920878812011, "grad_norm": 0.6161301260330735, "learning_rate": 4.143799893290634e-06, "loss": 0.3123, "step": 12529 }, { "epoch": 0.5869677238019394, "grad_norm": 0.5930673724380936, "learning_rate": 4.143656999097126e-06, "loss": 0.2816, "step": 12530 }, { "epoch": 0.5870145687918678, "grad_norm": 0.5287236016386263, "learning_rate": 4.1435140954447865e-06, "loss": 0.2585, "step": 12531 }, { "epoch": 0.587061413781796, "grad_norm": 0.5839616093052564, "learning_rate": 4.143371182334435e-06, "loss": 0.2888, "step": 12532 }, { "epoch": 0.5871082587717243, "grad_norm": 0.6026228150390792, "learning_rate": 4.143228259766896e-06, "loss": 0.2765, "step": 12533 }, { "epoch": 0.5871551037616527, "grad_norm": 0.6007804286074976, "learning_rate": 4.143085327742992e-06, "loss": 0.295, "step": 12534 }, { "epoch": 0.587201948751581, "grad_norm": 0.6122659041223836, "learning_rate": 4.142942386263543e-06, "loss": 0.2917, "step": 12535 }, { "epoch": 0.5872487937415094, "grad_norm": 0.578750455132207, "learning_rate": 4.142799435329376e-06, "loss": 0.2716, "step": 12536 }, { "epoch": 0.5872956387314376, "grad_norm": 0.5309079746068756, "learning_rate": 4.142656474941309e-06, "loss": 0.2527, "step": 12537 }, { "epoch": 0.587342483721366, "grad_norm": 0.574689355273957, "learning_rate": 4.142513505100168e-06, "loss": 0.2779, "step": 12538 }, { "epoch": 0.5873893287112943, "grad_norm": 0.5626075746586698, "learning_rate": 4.142370525806774e-06, "loss": 0.2781, "step": 12539 }, { "epoch": 0.5874361737012227, "grad_norm": 0.5861615499603062, "learning_rate": 4.142227537061951e-06, "loss": 0.2779, "step": 12540 }, { "epoch": 0.5874830186911509, "grad_norm": 0.592706583252793, "learning_rate": 4.142084538866521e-06, "loss": 0.2619, "step": 12541 }, { "epoch": 0.5875298636810793, "grad_norm": 0.5929871429423778, "learning_rate": 4.141941531221308e-06, "loss": 0.3078, "step": 12542 }, { "epoch": 0.5875767086710076, "grad_norm": 0.5496900275594004, "learning_rate": 4.141798514127133e-06, "loss": 0.2749, "step": 12543 }, { "epoch": 0.587623553660936, "grad_norm": 0.5909169053018627, "learning_rate": 4.14165548758482e-06, "loss": 0.2917, "step": 12544 }, { "epoch": 0.5876703986508643, "grad_norm": 0.6134282307825507, "learning_rate": 4.1415124515951936e-06, "loss": 0.2876, "step": 12545 }, { "epoch": 0.5877172436407926, "grad_norm": 0.578372276215589, "learning_rate": 4.1413694061590745e-06, "loss": 0.2867, "step": 12546 }, { "epoch": 0.5877640886307209, "grad_norm": 0.5777839899041158, "learning_rate": 4.141226351277288e-06, "loss": 0.2838, "step": 12547 }, { "epoch": 0.5878109336206493, "grad_norm": 0.6482298805885945, "learning_rate": 4.141083286950655e-06, "loss": 0.3003, "step": 12548 }, { "epoch": 0.5878577786105776, "grad_norm": 0.5862772978550169, "learning_rate": 4.140940213180002e-06, "loss": 0.2698, "step": 12549 }, { "epoch": 0.5879046236005059, "grad_norm": 0.5861003569205299, "learning_rate": 4.140797129966149e-06, "loss": 0.2769, "step": 12550 }, { "epoch": 0.5879514685904342, "grad_norm": 0.6195395825233964, "learning_rate": 4.140654037309921e-06, "loss": 0.3086, "step": 12551 }, { "epoch": 0.5879983135803626, "grad_norm": 0.5627919624521959, "learning_rate": 4.140510935212142e-06, "loss": 0.3024, "step": 12552 }, { "epoch": 0.5880451585702909, "grad_norm": 0.6019997450129395, "learning_rate": 4.140367823673634e-06, "loss": 0.2627, "step": 12553 }, { "epoch": 0.5880920035602193, "grad_norm": 0.5319566548769928, "learning_rate": 4.140224702695222e-06, "loss": 0.2731, "step": 12554 }, { "epoch": 0.5881388485501475, "grad_norm": 0.5760931752695475, "learning_rate": 4.14008157227773e-06, "loss": 0.2914, "step": 12555 }, { "epoch": 0.5881856935400759, "grad_norm": 0.639535874798972, "learning_rate": 4.1399384324219796e-06, "loss": 0.2657, "step": 12556 }, { "epoch": 0.5882325385300042, "grad_norm": 0.632511777083672, "learning_rate": 4.139795283128796e-06, "loss": 0.2905, "step": 12557 }, { "epoch": 0.5882793835199326, "grad_norm": 0.6705308131749241, "learning_rate": 4.139652124399003e-06, "loss": 0.2944, "step": 12558 }, { "epoch": 0.5883262285098608, "grad_norm": 0.5941406339082971, "learning_rate": 4.1395089562334234e-06, "loss": 0.2945, "step": 12559 }, { "epoch": 0.5883730734997892, "grad_norm": 0.5845926361097544, "learning_rate": 4.1393657786328826e-06, "loss": 0.2788, "step": 12560 }, { "epoch": 0.5884199184897175, "grad_norm": 0.5415334218217581, "learning_rate": 4.139222591598204e-06, "loss": 0.2728, "step": 12561 }, { "epoch": 0.5884667634796459, "grad_norm": 0.6481939948831918, "learning_rate": 4.13907939513021e-06, "loss": 0.2777, "step": 12562 }, { "epoch": 0.5885136084695742, "grad_norm": 0.5704607598400219, "learning_rate": 4.138936189229727e-06, "loss": 0.2716, "step": 12563 }, { "epoch": 0.5885604534595025, "grad_norm": 0.6719425597036748, "learning_rate": 4.138792973897579e-06, "loss": 0.288, "step": 12564 }, { "epoch": 0.5886072984494308, "grad_norm": 0.5866467063299365, "learning_rate": 4.138649749134588e-06, "loss": 0.272, "step": 12565 }, { "epoch": 0.5886541434393592, "grad_norm": 0.5999040843455448, "learning_rate": 4.1385065149415804e-06, "loss": 0.2751, "step": 12566 }, { "epoch": 0.5887009884292875, "grad_norm": 0.562990005681485, "learning_rate": 4.138363271319379e-06, "loss": 0.2927, "step": 12567 }, { "epoch": 0.5887478334192158, "grad_norm": 0.5732232814906831, "learning_rate": 4.1382200182688085e-06, "loss": 0.2699, "step": 12568 }, { "epoch": 0.5887946784091441, "grad_norm": 0.6182729975813869, "learning_rate": 4.138076755790694e-06, "loss": 0.2971, "step": 12569 }, { "epoch": 0.5888415233990725, "grad_norm": 0.5492076497198268, "learning_rate": 4.1379334838858595e-06, "loss": 0.2646, "step": 12570 }, { "epoch": 0.5888883683890008, "grad_norm": 0.66745307962436, "learning_rate": 4.1377902025551296e-06, "loss": 0.2922, "step": 12571 }, { "epoch": 0.5889352133789292, "grad_norm": 0.6096680316727818, "learning_rate": 4.137646911799329e-06, "loss": 0.2909, "step": 12572 }, { "epoch": 0.5889820583688574, "grad_norm": 0.5537287709241577, "learning_rate": 4.137503611619281e-06, "loss": 0.2705, "step": 12573 }, { "epoch": 0.5890289033587858, "grad_norm": 0.5492859943523078, "learning_rate": 4.137360302015813e-06, "loss": 0.2801, "step": 12574 }, { "epoch": 0.5890757483487141, "grad_norm": 0.5735420700767094, "learning_rate": 4.137216982989746e-06, "loss": 0.2883, "step": 12575 }, { "epoch": 0.5891225933386425, "grad_norm": 0.5867226282100378, "learning_rate": 4.137073654541908e-06, "loss": 0.2758, "step": 12576 }, { "epoch": 0.5891694383285707, "grad_norm": 0.6039663187273412, "learning_rate": 4.136930316673122e-06, "loss": 0.2857, "step": 12577 }, { "epoch": 0.5892162833184991, "grad_norm": 0.637553958590401, "learning_rate": 4.136786969384214e-06, "loss": 0.2861, "step": 12578 }, { "epoch": 0.5892631283084274, "grad_norm": 0.5908089867392979, "learning_rate": 4.136643612676008e-06, "loss": 0.2876, "step": 12579 }, { "epoch": 0.5893099732983558, "grad_norm": 0.6202904255906175, "learning_rate": 4.13650024654933e-06, "loss": 0.2898, "step": 12580 }, { "epoch": 0.5893568182882841, "grad_norm": 0.5618362858247677, "learning_rate": 4.136356871005003e-06, "loss": 0.2869, "step": 12581 }, { "epoch": 0.5894036632782124, "grad_norm": 0.6103578122252806, "learning_rate": 4.1362134860438554e-06, "loss": 0.28, "step": 12582 }, { "epoch": 0.5894505082681407, "grad_norm": 0.6189457382538965, "learning_rate": 4.136070091666708e-06, "loss": 0.2914, "step": 12583 }, { "epoch": 0.5894973532580691, "grad_norm": 0.5840623398067141, "learning_rate": 4.135926687874391e-06, "loss": 0.2929, "step": 12584 }, { "epoch": 0.5895441982479974, "grad_norm": 0.5818953420633541, "learning_rate": 4.135783274667726e-06, "loss": 0.2857, "step": 12585 }, { "epoch": 0.5895910432379257, "grad_norm": 0.5500320242928995, "learning_rate": 4.135639852047539e-06, "loss": 0.2808, "step": 12586 }, { "epoch": 0.589637888227854, "grad_norm": 0.6375067045993942, "learning_rate": 4.135496420014656e-06, "loss": 0.2992, "step": 12587 }, { "epoch": 0.5896847332177824, "grad_norm": 0.6207116325015802, "learning_rate": 4.135352978569902e-06, "loss": 0.2875, "step": 12588 }, { "epoch": 0.5897315782077107, "grad_norm": 0.5504298161732917, "learning_rate": 4.135209527714103e-06, "loss": 0.2676, "step": 12589 }, { "epoch": 0.5897784231976391, "grad_norm": 0.6551970530979965, "learning_rate": 4.135066067448083e-06, "loss": 0.3048, "step": 12590 }, { "epoch": 0.5898252681875673, "grad_norm": 0.6235847119503912, "learning_rate": 4.134922597772671e-06, "loss": 0.2843, "step": 12591 }, { "epoch": 0.5898721131774957, "grad_norm": 0.5747362208883464, "learning_rate": 4.134779118688689e-06, "loss": 0.2792, "step": 12592 }, { "epoch": 0.589918958167424, "grad_norm": 0.6014469233756595, "learning_rate": 4.134635630196964e-06, "loss": 0.2733, "step": 12593 }, { "epoch": 0.5899658031573524, "grad_norm": 0.5914727373160398, "learning_rate": 4.134492132298322e-06, "loss": 0.2833, "step": 12594 }, { "epoch": 0.5900126481472806, "grad_norm": 0.5502791604714518, "learning_rate": 4.134348624993589e-06, "loss": 0.282, "step": 12595 }, { "epoch": 0.590059493137209, "grad_norm": 0.6488728485126377, "learning_rate": 4.13420510828359e-06, "loss": 0.3163, "step": 12596 }, { "epoch": 0.5901063381271373, "grad_norm": 0.6076221775748148, "learning_rate": 4.134061582169151e-06, "loss": 0.2812, "step": 12597 }, { "epoch": 0.5901531831170657, "grad_norm": 0.5986792327391065, "learning_rate": 4.133918046651099e-06, "loss": 0.274, "step": 12598 }, { "epoch": 0.590200028106994, "grad_norm": 0.6341403928962324, "learning_rate": 4.133774501730259e-06, "loss": 0.305, "step": 12599 }, { "epoch": 0.5902468730969223, "grad_norm": 0.6351833824275993, "learning_rate": 4.133630947407458e-06, "loss": 0.3033, "step": 12600 }, { "epoch": 0.5902937180868506, "grad_norm": 0.6226196640071587, "learning_rate": 4.13348738368352e-06, "loss": 0.2829, "step": 12601 }, { "epoch": 0.590340563076779, "grad_norm": 0.6223097836660537, "learning_rate": 4.133343810559274e-06, "loss": 0.2837, "step": 12602 }, { "epoch": 0.5903874080667073, "grad_norm": 0.6000775391620109, "learning_rate": 4.133200228035544e-06, "loss": 0.2826, "step": 12603 }, { "epoch": 0.5904342530566355, "grad_norm": 0.581129097962562, "learning_rate": 4.133056636113158e-06, "loss": 0.2912, "step": 12604 }, { "epoch": 0.5904810980465639, "grad_norm": 0.5529604444562399, "learning_rate": 4.132913034792941e-06, "loss": 0.2678, "step": 12605 }, { "epoch": 0.5905279430364923, "grad_norm": 0.6608487896863867, "learning_rate": 4.13276942407572e-06, "loss": 0.3101, "step": 12606 }, { "epoch": 0.5905747880264206, "grad_norm": 0.5941366307930259, "learning_rate": 4.1326258039623215e-06, "loss": 0.2937, "step": 12607 }, { "epoch": 0.590621633016349, "grad_norm": 0.5540906371997696, "learning_rate": 4.1324821744535715e-06, "loss": 0.2896, "step": 12608 }, { "epoch": 0.5906684780062772, "grad_norm": 0.6056449404978738, "learning_rate": 4.132338535550297e-06, "loss": 0.2737, "step": 12609 }, { "epoch": 0.5907153229962055, "grad_norm": 0.6081693919751877, "learning_rate": 4.1321948872533245e-06, "loss": 0.2793, "step": 12610 }, { "epoch": 0.5907621679861339, "grad_norm": 0.6005980797041617, "learning_rate": 4.132051229563481e-06, "loss": 0.2869, "step": 12611 }, { "epoch": 0.5908090129760623, "grad_norm": 0.6010144151582693, "learning_rate": 4.131907562481593e-06, "loss": 0.2641, "step": 12612 }, { "epoch": 0.5908558579659905, "grad_norm": 0.5750647122140353, "learning_rate": 4.131763886008486e-06, "loss": 0.2605, "step": 12613 }, { "epoch": 0.5909027029559188, "grad_norm": 0.5233792310416541, "learning_rate": 4.131620200144989e-06, "loss": 0.2567, "step": 12614 }, { "epoch": 0.5909495479458472, "grad_norm": 0.6511834413471361, "learning_rate": 4.131476504891928e-06, "loss": 0.3034, "step": 12615 }, { "epoch": 0.5909963929357755, "grad_norm": 0.6466668212136275, "learning_rate": 4.131332800250129e-06, "loss": 0.2711, "step": 12616 }, { "epoch": 0.5910432379257038, "grad_norm": 0.5876367354921634, "learning_rate": 4.13118908622042e-06, "loss": 0.2767, "step": 12617 }, { "epoch": 0.5910900829156321, "grad_norm": 0.6081976619790364, "learning_rate": 4.131045362803628e-06, "loss": 0.2738, "step": 12618 }, { "epoch": 0.5911369279055605, "grad_norm": 0.618511352585323, "learning_rate": 4.13090163000058e-06, "loss": 0.2786, "step": 12619 }, { "epoch": 0.5911837728954888, "grad_norm": 0.5757944586337282, "learning_rate": 4.130757887812103e-06, "loss": 0.2813, "step": 12620 }, { "epoch": 0.5912306178854172, "grad_norm": 0.5897103801080115, "learning_rate": 4.130614136239024e-06, "loss": 0.2712, "step": 12621 }, { "epoch": 0.5912774628753454, "grad_norm": 0.5963799983711088, "learning_rate": 4.130470375282171e-06, "loss": 0.3247, "step": 12622 }, { "epoch": 0.5913243078652738, "grad_norm": 0.6186856442939076, "learning_rate": 4.1303266049423695e-06, "loss": 0.2823, "step": 12623 }, { "epoch": 0.5913711528552021, "grad_norm": 0.6139593872413239, "learning_rate": 4.130182825220449e-06, "loss": 0.296, "step": 12624 }, { "epoch": 0.5914179978451305, "grad_norm": 0.6570770814127295, "learning_rate": 4.130039036117236e-06, "loss": 0.2938, "step": 12625 }, { "epoch": 0.5914648428350587, "grad_norm": 0.5637151751462626, "learning_rate": 4.129895237633558e-06, "loss": 0.2705, "step": 12626 }, { "epoch": 0.5915116878249871, "grad_norm": 0.6364788177595793, "learning_rate": 4.129751429770243e-06, "loss": 0.3092, "step": 12627 }, { "epoch": 0.5915585328149154, "grad_norm": 0.6288464962407742, "learning_rate": 4.129607612528118e-06, "loss": 0.2946, "step": 12628 }, { "epoch": 0.5916053778048438, "grad_norm": 0.575706125378947, "learning_rate": 4.12946378590801e-06, "loss": 0.2925, "step": 12629 }, { "epoch": 0.5916522227947721, "grad_norm": 0.6158645362201753, "learning_rate": 4.129319949910748e-06, "loss": 0.3034, "step": 12630 }, { "epoch": 0.5916990677847004, "grad_norm": 0.6209636027568872, "learning_rate": 4.129176104537159e-06, "loss": 0.2977, "step": 12631 }, { "epoch": 0.5917459127746287, "grad_norm": 0.5859314475542413, "learning_rate": 4.129032249788072e-06, "loss": 0.2745, "step": 12632 }, { "epoch": 0.5917927577645571, "grad_norm": 0.6356162208820919, "learning_rate": 4.128888385664314e-06, "loss": 0.3147, "step": 12633 }, { "epoch": 0.5918396027544854, "grad_norm": 0.5880380721181002, "learning_rate": 4.128744512166711e-06, "loss": 0.2796, "step": 12634 }, { "epoch": 0.5918864477444137, "grad_norm": 0.5555628227750726, "learning_rate": 4.128600629296093e-06, "loss": 0.2889, "step": 12635 }, { "epoch": 0.591933292734342, "grad_norm": 0.5780170265230037, "learning_rate": 4.128456737053289e-06, "loss": 0.2699, "step": 12636 }, { "epoch": 0.5919801377242704, "grad_norm": 0.609689744098702, "learning_rate": 4.128312835439125e-06, "loss": 0.299, "step": 12637 }, { "epoch": 0.5920269827141987, "grad_norm": 0.5998522699432405, "learning_rate": 4.12816892445443e-06, "loss": 0.2742, "step": 12638 }, { "epoch": 0.5920738277041271, "grad_norm": 0.5902176529355144, "learning_rate": 4.128025004100031e-06, "loss": 0.2814, "step": 12639 }, { "epoch": 0.5921206726940553, "grad_norm": 0.5524974966222097, "learning_rate": 4.127881074376759e-06, "loss": 0.262, "step": 12640 }, { "epoch": 0.5921675176839837, "grad_norm": 0.5857100126663686, "learning_rate": 4.127737135285439e-06, "loss": 0.2862, "step": 12641 }, { "epoch": 0.592214362673912, "grad_norm": 0.6087308448733529, "learning_rate": 4.127593186826903e-06, "loss": 0.2853, "step": 12642 }, { "epoch": 0.5922612076638404, "grad_norm": 0.5683385587467263, "learning_rate": 4.1274492290019755e-06, "loss": 0.2738, "step": 12643 }, { "epoch": 0.5923080526537686, "grad_norm": 0.5273974976569524, "learning_rate": 4.127305261811487e-06, "loss": 0.2481, "step": 12644 }, { "epoch": 0.592354897643697, "grad_norm": 0.5886808359613276, "learning_rate": 4.127161285256266e-06, "loss": 0.305, "step": 12645 }, { "epoch": 0.5924017426336253, "grad_norm": 0.6118853418587391, "learning_rate": 4.127017299337141e-06, "loss": 0.27, "step": 12646 }, { "epoch": 0.5924485876235537, "grad_norm": 0.5729037576540367, "learning_rate": 4.12687330405494e-06, "loss": 0.2734, "step": 12647 }, { "epoch": 0.592495432613482, "grad_norm": 0.6142241120809372, "learning_rate": 4.126729299410492e-06, "loss": 0.2786, "step": 12648 }, { "epoch": 0.5925422776034103, "grad_norm": 0.6207972890191258, "learning_rate": 4.126585285404626e-06, "loss": 0.3037, "step": 12649 }, { "epoch": 0.5925891225933386, "grad_norm": 0.5926317776561053, "learning_rate": 4.1264412620381715e-06, "loss": 0.2954, "step": 12650 }, { "epoch": 0.592635967583267, "grad_norm": 0.6141418367237248, "learning_rate": 4.126297229311954e-06, "loss": 0.3013, "step": 12651 }, { "epoch": 0.5926828125731953, "grad_norm": 0.5804345482247845, "learning_rate": 4.126153187226807e-06, "loss": 0.2627, "step": 12652 }, { "epoch": 0.5927296575631236, "grad_norm": 0.6105047746726925, "learning_rate": 4.126009135783555e-06, "loss": 0.2793, "step": 12653 }, { "epoch": 0.5927765025530519, "grad_norm": 0.656745159832816, "learning_rate": 4.125865074983031e-06, "loss": 0.3187, "step": 12654 }, { "epoch": 0.5928233475429803, "grad_norm": 0.6330577139969803, "learning_rate": 4.125721004826061e-06, "loss": 0.3013, "step": 12655 }, { "epoch": 0.5928701925329086, "grad_norm": 0.6340964263479555, "learning_rate": 4.125576925313476e-06, "loss": 0.335, "step": 12656 }, { "epoch": 0.592917037522837, "grad_norm": 0.550311118771174, "learning_rate": 4.125432836446104e-06, "loss": 0.2797, "step": 12657 }, { "epoch": 0.5929638825127652, "grad_norm": 0.620480607586871, "learning_rate": 4.125288738224774e-06, "loss": 0.2926, "step": 12658 }, { "epoch": 0.5930107275026936, "grad_norm": 0.6491402015929432, "learning_rate": 4.125144630650316e-06, "loss": 0.2762, "step": 12659 }, { "epoch": 0.5930575724926219, "grad_norm": 0.6494012615239878, "learning_rate": 4.125000513723559e-06, "loss": 0.3046, "step": 12660 }, { "epoch": 0.5931044174825503, "grad_norm": 0.6464838308229025, "learning_rate": 4.124856387445334e-06, "loss": 0.301, "step": 12661 }, { "epoch": 0.5931512624724785, "grad_norm": 0.6385345598965084, "learning_rate": 4.124712251816467e-06, "loss": 0.2792, "step": 12662 }, { "epoch": 0.5931981074624069, "grad_norm": 0.6220996187326707, "learning_rate": 4.1245681068377905e-06, "loss": 0.2917, "step": 12663 }, { "epoch": 0.5932449524523352, "grad_norm": 0.6079644545592001, "learning_rate": 4.124423952510133e-06, "loss": 0.3086, "step": 12664 }, { "epoch": 0.5932917974422636, "grad_norm": 0.6716789320689165, "learning_rate": 4.124279788834324e-06, "loss": 0.3165, "step": 12665 }, { "epoch": 0.5933386424321919, "grad_norm": 0.6328415039456582, "learning_rate": 4.124135615811191e-06, "loss": 0.2872, "step": 12666 }, { "epoch": 0.5933854874221202, "grad_norm": 0.5484947883896242, "learning_rate": 4.123991433441568e-06, "loss": 0.2543, "step": 12667 }, { "epoch": 0.5934323324120485, "grad_norm": 0.5926072877170673, "learning_rate": 4.123847241726282e-06, "loss": 0.2941, "step": 12668 }, { "epoch": 0.5934791774019769, "grad_norm": 0.6291541817808763, "learning_rate": 4.123703040666163e-06, "loss": 0.2885, "step": 12669 }, { "epoch": 0.5935260223919052, "grad_norm": 0.5915104859180074, "learning_rate": 4.123558830262041e-06, "loss": 0.2969, "step": 12670 }, { "epoch": 0.5935728673818335, "grad_norm": 0.580380099808745, "learning_rate": 4.123414610514746e-06, "loss": 0.2837, "step": 12671 }, { "epoch": 0.5936197123717618, "grad_norm": 0.5523173190522106, "learning_rate": 4.123270381425107e-06, "loss": 0.2777, "step": 12672 }, { "epoch": 0.5936665573616902, "grad_norm": 0.5465509774918538, "learning_rate": 4.1231261429939565e-06, "loss": 0.2665, "step": 12673 }, { "epoch": 0.5937134023516185, "grad_norm": 0.5751544759572175, "learning_rate": 4.122981895222122e-06, "loss": 0.276, "step": 12674 }, { "epoch": 0.5937602473415469, "grad_norm": 0.563746450424061, "learning_rate": 4.122837638110435e-06, "loss": 0.2671, "step": 12675 }, { "epoch": 0.5938070923314751, "grad_norm": 0.5598695802466229, "learning_rate": 4.122693371659726e-06, "loss": 0.3041, "step": 12676 }, { "epoch": 0.5938539373214035, "grad_norm": 0.5769161745767598, "learning_rate": 4.122549095870823e-06, "loss": 0.2827, "step": 12677 }, { "epoch": 0.5939007823113318, "grad_norm": 0.6013686949298015, "learning_rate": 4.12240481074456e-06, "loss": 0.2849, "step": 12678 }, { "epoch": 0.5939476273012602, "grad_norm": 0.5451976442565828, "learning_rate": 4.122260516281763e-06, "loss": 0.2583, "step": 12679 }, { "epoch": 0.5939944722911884, "grad_norm": 0.555422006570731, "learning_rate": 4.122116212483266e-06, "loss": 0.2543, "step": 12680 }, { "epoch": 0.5940413172811168, "grad_norm": 0.6414892531342027, "learning_rate": 4.121971899349896e-06, "loss": 0.3173, "step": 12681 }, { "epoch": 0.5940881622710451, "grad_norm": 0.6291400739408926, "learning_rate": 4.121827576882487e-06, "loss": 0.2973, "step": 12682 }, { "epoch": 0.5941350072609735, "grad_norm": 0.6066041965578455, "learning_rate": 4.121683245081867e-06, "loss": 0.2664, "step": 12683 }, { "epoch": 0.5941818522509018, "grad_norm": 0.6001642883096857, "learning_rate": 4.121538903948869e-06, "loss": 0.2917, "step": 12684 }, { "epoch": 0.59422869724083, "grad_norm": 0.6138938058186543, "learning_rate": 4.12139455348432e-06, "loss": 0.2882, "step": 12685 }, { "epoch": 0.5942755422307584, "grad_norm": 0.5768451367154478, "learning_rate": 4.121250193689054e-06, "loss": 0.2895, "step": 12686 }, { "epoch": 0.5943223872206868, "grad_norm": 0.5954406579254196, "learning_rate": 4.121105824563901e-06, "loss": 0.308, "step": 12687 }, { "epoch": 0.5943692322106151, "grad_norm": 0.5838920832527832, "learning_rate": 4.120961446109692e-06, "loss": 0.299, "step": 12688 }, { "epoch": 0.5944160772005433, "grad_norm": 0.5583123392302242, "learning_rate": 4.120817058327256e-06, "loss": 0.258, "step": 12689 }, { "epoch": 0.5944629221904717, "grad_norm": 0.6376408772527324, "learning_rate": 4.120672661217424e-06, "loss": 0.3152, "step": 12690 }, { "epoch": 0.5945097671804, "grad_norm": 0.6739070099960806, "learning_rate": 4.1205282547810304e-06, "loss": 0.2799, "step": 12691 }, { "epoch": 0.5945566121703284, "grad_norm": 0.6036158703842773, "learning_rate": 4.120383839018903e-06, "loss": 0.2788, "step": 12692 }, { "epoch": 0.5946034571602568, "grad_norm": 0.6073899056039118, "learning_rate": 4.120239413931875e-06, "loss": 0.2806, "step": 12693 }, { "epoch": 0.594650302150185, "grad_norm": 0.5781356631274512, "learning_rate": 4.120094979520775e-06, "loss": 0.3071, "step": 12694 }, { "epoch": 0.5946971471401133, "grad_norm": 0.5287162589764598, "learning_rate": 4.119950535786436e-06, "loss": 0.2649, "step": 12695 }, { "epoch": 0.5947439921300417, "grad_norm": 0.560234149203671, "learning_rate": 4.119806082729689e-06, "loss": 0.2702, "step": 12696 }, { "epoch": 0.59479083711997, "grad_norm": 0.6452314723303211, "learning_rate": 4.119661620351365e-06, "loss": 0.3044, "step": 12697 }, { "epoch": 0.5948376821098983, "grad_norm": 0.6467620174339536, "learning_rate": 4.119517148652296e-06, "loss": 0.2958, "step": 12698 }, { "epoch": 0.5948845270998266, "grad_norm": 0.6334632422055094, "learning_rate": 4.119372667633312e-06, "loss": 0.303, "step": 12699 }, { "epoch": 0.594931372089755, "grad_norm": 0.6210346077624657, "learning_rate": 4.1192281772952455e-06, "loss": 0.2952, "step": 12700 }, { "epoch": 0.5949782170796833, "grad_norm": 0.5677261156622364, "learning_rate": 4.119083677638929e-06, "loss": 0.2827, "step": 12701 }, { "epoch": 0.5950250620696117, "grad_norm": 0.5433538930502809, "learning_rate": 4.118939168665191e-06, "loss": 0.2747, "step": 12702 }, { "epoch": 0.5950719070595399, "grad_norm": 0.5764840809988517, "learning_rate": 4.1187946503748664e-06, "loss": 0.2971, "step": 12703 }, { "epoch": 0.5951187520494683, "grad_norm": 0.5861232060377319, "learning_rate": 4.118650122768785e-06, "loss": 0.2725, "step": 12704 }, { "epoch": 0.5951655970393966, "grad_norm": 0.6199686326130563, "learning_rate": 4.118505585847778e-06, "loss": 0.3178, "step": 12705 }, { "epoch": 0.595212442029325, "grad_norm": 0.5524273207458773, "learning_rate": 4.118361039612679e-06, "loss": 0.2717, "step": 12706 }, { "epoch": 0.5952592870192532, "grad_norm": 0.5327480389336641, "learning_rate": 4.11821648406432e-06, "loss": 0.2693, "step": 12707 }, { "epoch": 0.5953061320091816, "grad_norm": 0.5999603274880099, "learning_rate": 4.118071919203531e-06, "loss": 0.2886, "step": 12708 }, { "epoch": 0.5953529769991099, "grad_norm": 0.6045292806415565, "learning_rate": 4.117927345031144e-06, "loss": 0.2952, "step": 12709 }, { "epoch": 0.5953998219890383, "grad_norm": 0.5945725607450417, "learning_rate": 4.1177827615479924e-06, "loss": 0.2976, "step": 12710 }, { "epoch": 0.5954466669789666, "grad_norm": 0.5842732839234936, "learning_rate": 4.1176381687549085e-06, "loss": 0.2883, "step": 12711 }, { "epoch": 0.5954935119688949, "grad_norm": 0.5905518705613915, "learning_rate": 4.1174935666527224e-06, "loss": 0.2754, "step": 12712 }, { "epoch": 0.5955403569588232, "grad_norm": 0.5523857211268335, "learning_rate": 4.117348955242268e-06, "loss": 0.2787, "step": 12713 }, { "epoch": 0.5955872019487516, "grad_norm": 0.5652760890841373, "learning_rate": 4.117204334524376e-06, "loss": 0.2716, "step": 12714 }, { "epoch": 0.5956340469386799, "grad_norm": 0.6048221210681779, "learning_rate": 4.11705970449988e-06, "loss": 0.2944, "step": 12715 }, { "epoch": 0.5956808919286082, "grad_norm": 0.6427065666240657, "learning_rate": 4.116915065169612e-06, "loss": 0.2999, "step": 12716 }, { "epoch": 0.5957277369185365, "grad_norm": 0.5713216265252554, "learning_rate": 4.1167704165344045e-06, "loss": 0.3017, "step": 12717 }, { "epoch": 0.5957745819084649, "grad_norm": 0.5858582878506062, "learning_rate": 4.116625758595088e-06, "loss": 0.2698, "step": 12718 }, { "epoch": 0.5958214268983932, "grad_norm": 0.5651647480585338, "learning_rate": 4.116481091352499e-06, "loss": 0.2959, "step": 12719 }, { "epoch": 0.5958682718883216, "grad_norm": 0.5665587256965287, "learning_rate": 4.116336414807466e-06, "loss": 0.2891, "step": 12720 }, { "epoch": 0.5959151168782498, "grad_norm": 0.5673907078767373, "learning_rate": 4.1161917289608235e-06, "loss": 0.2777, "step": 12721 }, { "epoch": 0.5959619618681782, "grad_norm": 0.630668375939894, "learning_rate": 4.116047033813405e-06, "loss": 0.2871, "step": 12722 }, { "epoch": 0.5960088068581065, "grad_norm": 0.6000535132905467, "learning_rate": 4.11590232936604e-06, "loss": 0.3162, "step": 12723 }, { "epoch": 0.5960556518480349, "grad_norm": 0.5678888630039708, "learning_rate": 4.1157576156195645e-06, "loss": 0.261, "step": 12724 }, { "epoch": 0.5961024968379631, "grad_norm": 0.6243923071089302, "learning_rate": 4.11561289257481e-06, "loss": 0.292, "step": 12725 }, { "epoch": 0.5961493418278915, "grad_norm": 0.5630312783133065, "learning_rate": 4.115468160232608e-06, "loss": 0.267, "step": 12726 }, { "epoch": 0.5961961868178198, "grad_norm": 0.5950046413995218, "learning_rate": 4.115323418593794e-06, "loss": 0.2945, "step": 12727 }, { "epoch": 0.5962430318077482, "grad_norm": 0.573742009586318, "learning_rate": 4.1151786676592e-06, "loss": 0.2986, "step": 12728 }, { "epoch": 0.5962898767976765, "grad_norm": 0.6849819206150719, "learning_rate": 4.115033907429658e-06, "loss": 0.3297, "step": 12729 }, { "epoch": 0.5963367217876048, "grad_norm": 0.6028869237285194, "learning_rate": 4.114889137906002e-06, "loss": 0.2644, "step": 12730 }, { "epoch": 0.5963835667775331, "grad_norm": 0.6177422415208479, "learning_rate": 4.114744359089066e-06, "loss": 0.2646, "step": 12731 }, { "epoch": 0.5964304117674615, "grad_norm": 0.6340526156806371, "learning_rate": 4.11459957097968e-06, "loss": 0.2982, "step": 12732 }, { "epoch": 0.5964772567573898, "grad_norm": 0.5890213854049106, "learning_rate": 4.11445477357868e-06, "loss": 0.3041, "step": 12733 }, { "epoch": 0.5965241017473181, "grad_norm": 0.6213955397816205, "learning_rate": 4.114309966886899e-06, "loss": 0.2657, "step": 12734 }, { "epoch": 0.5965709467372464, "grad_norm": 0.5875563070867484, "learning_rate": 4.11416515090517e-06, "loss": 0.278, "step": 12735 }, { "epoch": 0.5966177917271748, "grad_norm": 0.6176299101023848, "learning_rate": 4.114020325634326e-06, "loss": 0.3032, "step": 12736 }, { "epoch": 0.5966646367171031, "grad_norm": 0.6055928918503325, "learning_rate": 4.113875491075201e-06, "loss": 0.2973, "step": 12737 }, { "epoch": 0.5967114817070315, "grad_norm": 0.6515127141419388, "learning_rate": 4.113730647228628e-06, "loss": 0.2729, "step": 12738 }, { "epoch": 0.5967583266969597, "grad_norm": 0.6334022327490897, "learning_rate": 4.11358579409544e-06, "loss": 0.2976, "step": 12739 }, { "epoch": 0.5968051716868881, "grad_norm": 0.5885656729138485, "learning_rate": 4.1134409316764725e-06, "loss": 0.2833, "step": 12740 }, { "epoch": 0.5968520166768164, "grad_norm": 0.6770383030512379, "learning_rate": 4.113296059972557e-06, "loss": 0.29, "step": 12741 }, { "epoch": 0.5968988616667448, "grad_norm": 0.659640113890844, "learning_rate": 4.113151178984528e-06, "loss": 0.2791, "step": 12742 }, { "epoch": 0.596945706656673, "grad_norm": 0.5947950824147332, "learning_rate": 4.113006288713221e-06, "loss": 0.3084, "step": 12743 }, { "epoch": 0.5969925516466014, "grad_norm": 0.635649767747002, "learning_rate": 4.112861389159466e-06, "loss": 0.265, "step": 12744 }, { "epoch": 0.5970393966365297, "grad_norm": 0.6136502390633156, "learning_rate": 4.1127164803241e-06, "loss": 0.2912, "step": 12745 }, { "epoch": 0.5970862416264581, "grad_norm": 0.6359711350712508, "learning_rate": 4.112571562207958e-06, "loss": 0.2758, "step": 12746 }, { "epoch": 0.5971330866163864, "grad_norm": 0.569408525762244, "learning_rate": 4.112426634811869e-06, "loss": 0.2762, "step": 12747 }, { "epoch": 0.5971799316063147, "grad_norm": 0.6025239007425245, "learning_rate": 4.112281698136671e-06, "loss": 0.2945, "step": 12748 }, { "epoch": 0.597226776596243, "grad_norm": 0.6104603673675323, "learning_rate": 4.1121367521831974e-06, "loss": 0.2943, "step": 12749 }, { "epoch": 0.5972736215861714, "grad_norm": 0.6751374812063743, "learning_rate": 4.111991796952283e-06, "loss": 0.3222, "step": 12750 }, { "epoch": 0.5973204665760997, "grad_norm": 0.5694904276789262, "learning_rate": 4.111846832444759e-06, "loss": 0.277, "step": 12751 }, { "epoch": 0.597367311566028, "grad_norm": 0.5981364859881382, "learning_rate": 4.111701858661463e-06, "loss": 0.278, "step": 12752 }, { "epoch": 0.5974141565559563, "grad_norm": 0.6072597634039185, "learning_rate": 4.1115568756032265e-06, "loss": 0.2919, "step": 12753 }, { "epoch": 0.5974610015458847, "grad_norm": 0.6157122866729089, "learning_rate": 4.111411883270886e-06, "loss": 0.2881, "step": 12754 }, { "epoch": 0.597507846535813, "grad_norm": 0.560663968015161, "learning_rate": 4.111266881665275e-06, "loss": 0.2699, "step": 12755 }, { "epoch": 0.5975546915257414, "grad_norm": 0.5583113799468631, "learning_rate": 4.111121870787229e-06, "loss": 0.2796, "step": 12756 }, { "epoch": 0.5976015365156696, "grad_norm": 0.5566496451055276, "learning_rate": 4.11097685063758e-06, "loss": 0.2773, "step": 12757 }, { "epoch": 0.597648381505598, "grad_norm": 0.6258761075137608, "learning_rate": 4.1108318212171655e-06, "loss": 0.2832, "step": 12758 }, { "epoch": 0.5976952264955263, "grad_norm": 0.6091001879555319, "learning_rate": 4.110686782526818e-06, "loss": 0.2548, "step": 12759 }, { "epoch": 0.5977420714854547, "grad_norm": 0.6481099674062016, "learning_rate": 4.110541734567373e-06, "loss": 0.2897, "step": 12760 }, { "epoch": 0.5977889164753829, "grad_norm": 0.6098545919342991, "learning_rate": 4.1103966773396655e-06, "loss": 0.3075, "step": 12761 }, { "epoch": 0.5978357614653113, "grad_norm": 0.6167687549697934, "learning_rate": 4.11025161084453e-06, "loss": 0.2882, "step": 12762 }, { "epoch": 0.5978826064552396, "grad_norm": 0.5954806112917701, "learning_rate": 4.1101065350828e-06, "loss": 0.2656, "step": 12763 }, { "epoch": 0.597929451445168, "grad_norm": 0.5835847682890996, "learning_rate": 4.109961450055312e-06, "loss": 0.253, "step": 12764 }, { "epoch": 0.5979762964350963, "grad_norm": 0.6671276167218978, "learning_rate": 4.1098163557629016e-06, "loss": 0.2913, "step": 12765 }, { "epoch": 0.5980231414250246, "grad_norm": 0.613609078941796, "learning_rate": 4.109671252206402e-06, "loss": 0.2866, "step": 12766 }, { "epoch": 0.5980699864149529, "grad_norm": 0.6067507835010154, "learning_rate": 4.109526139386649e-06, "loss": 0.2787, "step": 12767 }, { "epoch": 0.5981168314048813, "grad_norm": 0.6793744772183123, "learning_rate": 4.109381017304477e-06, "loss": 0.2997, "step": 12768 }, { "epoch": 0.5981636763948096, "grad_norm": 0.603952693700679, "learning_rate": 4.109235885960723e-06, "loss": 0.2922, "step": 12769 }, { "epoch": 0.5982105213847378, "grad_norm": 0.5868709117335696, "learning_rate": 4.10909074535622e-06, "loss": 0.2716, "step": 12770 }, { "epoch": 0.5982573663746662, "grad_norm": 0.6385496521937669, "learning_rate": 4.108945595491804e-06, "loss": 0.3058, "step": 12771 }, { "epoch": 0.5983042113645946, "grad_norm": 0.6376466785931955, "learning_rate": 4.10880043636831e-06, "loss": 0.3327, "step": 12772 }, { "epoch": 0.5983510563545229, "grad_norm": 0.6956546819799273, "learning_rate": 4.1086552679865755e-06, "loss": 0.3076, "step": 12773 }, { "epoch": 0.5983979013444513, "grad_norm": 0.6731406058184036, "learning_rate": 4.108510090347433e-06, "loss": 0.3157, "step": 12774 }, { "epoch": 0.5984447463343795, "grad_norm": 0.7294463667088378, "learning_rate": 4.10836490345172e-06, "loss": 0.3051, "step": 12775 }, { "epoch": 0.5984915913243078, "grad_norm": 0.676945788293545, "learning_rate": 4.10821970730027e-06, "loss": 0.2604, "step": 12776 }, { "epoch": 0.5985384363142362, "grad_norm": 0.5979122830431235, "learning_rate": 4.1080745018939216e-06, "loss": 0.2929, "step": 12777 }, { "epoch": 0.5985852813041646, "grad_norm": 0.577302172010393, "learning_rate": 4.107929287233508e-06, "loss": 0.2782, "step": 12778 }, { "epoch": 0.5986321262940928, "grad_norm": 0.5694278596575025, "learning_rate": 4.107784063319864e-06, "loss": 0.2845, "step": 12779 }, { "epoch": 0.5986789712840211, "grad_norm": 0.5751942055470052, "learning_rate": 4.107638830153829e-06, "loss": 0.2709, "step": 12780 }, { "epoch": 0.5987258162739495, "grad_norm": 0.5956895328154934, "learning_rate": 4.107493587736235e-06, "loss": 0.2771, "step": 12781 }, { "epoch": 0.5987726612638778, "grad_norm": 0.6206891816773277, "learning_rate": 4.10734833606792e-06, "loss": 0.2875, "step": 12782 }, { "epoch": 0.5988195062538062, "grad_norm": 0.6253342329993355, "learning_rate": 4.1072030751497195e-06, "loss": 0.2792, "step": 12783 }, { "epoch": 0.5988663512437344, "grad_norm": 0.5626303418017922, "learning_rate": 4.107057804982469e-06, "loss": 0.2562, "step": 12784 }, { "epoch": 0.5989131962336628, "grad_norm": 0.6413427922689983, "learning_rate": 4.106912525567005e-06, "loss": 0.2791, "step": 12785 }, { "epoch": 0.5989600412235911, "grad_norm": 0.627819727442218, "learning_rate": 4.106767236904163e-06, "loss": 0.287, "step": 12786 }, { "epoch": 0.5990068862135195, "grad_norm": 0.5796299636954235, "learning_rate": 4.10662193899478e-06, "loss": 0.2556, "step": 12787 }, { "epoch": 0.5990537312034477, "grad_norm": 0.6270371900335271, "learning_rate": 4.10647663183969e-06, "loss": 0.2896, "step": 12788 }, { "epoch": 0.5991005761933761, "grad_norm": 0.655250231568074, "learning_rate": 4.106331315439732e-06, "loss": 0.3175, "step": 12789 }, { "epoch": 0.5991474211833044, "grad_norm": 0.5713807617534722, "learning_rate": 4.106185989795741e-06, "loss": 0.2899, "step": 12790 }, { "epoch": 0.5991942661732328, "grad_norm": 0.5676394514397, "learning_rate": 4.1060406549085526e-06, "loss": 0.2842, "step": 12791 }, { "epoch": 0.5992411111631611, "grad_norm": 0.5817837217017146, "learning_rate": 4.105895310779005e-06, "loss": 0.2684, "step": 12792 }, { "epoch": 0.5992879561530894, "grad_norm": 0.6127213342826312, "learning_rate": 4.105749957407933e-06, "loss": 0.2831, "step": 12793 }, { "epoch": 0.5993348011430177, "grad_norm": 0.6635925818982712, "learning_rate": 4.105604594796173e-06, "loss": 0.2989, "step": 12794 }, { "epoch": 0.5993816461329461, "grad_norm": 0.6167478943895996, "learning_rate": 4.105459222944563e-06, "loss": 0.2785, "step": 12795 }, { "epoch": 0.5994284911228744, "grad_norm": 0.5519808021720041, "learning_rate": 4.105313841853939e-06, "loss": 0.2637, "step": 12796 }, { "epoch": 0.5994753361128027, "grad_norm": 0.5724960040455674, "learning_rate": 4.105168451525137e-06, "loss": 0.2886, "step": 12797 }, { "epoch": 0.599522181102731, "grad_norm": 0.6201818839785387, "learning_rate": 4.105023051958993e-06, "loss": 0.2961, "step": 12798 }, { "epoch": 0.5995690260926594, "grad_norm": 0.6151208999711663, "learning_rate": 4.104877643156346e-06, "loss": 0.2807, "step": 12799 }, { "epoch": 0.5996158710825877, "grad_norm": 0.625599945486676, "learning_rate": 4.104732225118031e-06, "loss": 0.2824, "step": 12800 }, { "epoch": 0.5996627160725161, "grad_norm": 0.6192003490042203, "learning_rate": 4.104586797844885e-06, "loss": 0.3105, "step": 12801 }, { "epoch": 0.5997095610624443, "grad_norm": 0.5563486164679218, "learning_rate": 4.1044413613377455e-06, "loss": 0.2582, "step": 12802 }, { "epoch": 0.5997564060523727, "grad_norm": 0.6133107148463485, "learning_rate": 4.104295915597449e-06, "loss": 0.2987, "step": 12803 }, { "epoch": 0.599803251042301, "grad_norm": 0.5803923539105992, "learning_rate": 4.104150460624834e-06, "loss": 0.2828, "step": 12804 }, { "epoch": 0.5998500960322294, "grad_norm": 0.5754128147336552, "learning_rate": 4.104004996420735e-06, "loss": 0.2882, "step": 12805 }, { "epoch": 0.5998969410221576, "grad_norm": 0.5636205709080614, "learning_rate": 4.1038595229859905e-06, "loss": 0.2954, "step": 12806 }, { "epoch": 0.599943786012086, "grad_norm": 0.5842750006087125, "learning_rate": 4.103714040321438e-06, "loss": 0.3011, "step": 12807 }, { "epoch": 0.5999906310020143, "grad_norm": 0.5940032752949752, "learning_rate": 4.103568548427914e-06, "loss": 0.2593, "step": 12808 }, { "epoch": 0.6000374759919427, "grad_norm": 0.6123111708433826, "learning_rate": 4.1034230473062564e-06, "loss": 0.3157, "step": 12809 }, { "epoch": 0.600084320981871, "grad_norm": 0.5882820108525233, "learning_rate": 4.103277536957302e-06, "loss": 0.2841, "step": 12810 }, { "epoch": 0.6001311659717993, "grad_norm": 0.6169466521701462, "learning_rate": 4.103132017381888e-06, "loss": 0.2956, "step": 12811 }, { "epoch": 0.6001780109617276, "grad_norm": 0.5659573424396264, "learning_rate": 4.1029864885808525e-06, "loss": 0.2783, "step": 12812 }, { "epoch": 0.600224855951656, "grad_norm": 0.6005833753295761, "learning_rate": 4.102840950555032e-06, "loss": 0.2975, "step": 12813 }, { "epoch": 0.6002717009415843, "grad_norm": 0.5827540430290125, "learning_rate": 4.102695403305266e-06, "loss": 0.3036, "step": 12814 }, { "epoch": 0.6003185459315126, "grad_norm": 0.5792144094053401, "learning_rate": 4.10254984683239e-06, "loss": 0.2958, "step": 12815 }, { "epoch": 0.6003653909214409, "grad_norm": 0.581601615577912, "learning_rate": 4.102404281137243e-06, "loss": 0.2835, "step": 12816 }, { "epoch": 0.6004122359113693, "grad_norm": 0.5982106000764381, "learning_rate": 4.102258706220661e-06, "loss": 0.2776, "step": 12817 }, { "epoch": 0.6004590809012976, "grad_norm": 0.5867829966886617, "learning_rate": 4.102113122083484e-06, "loss": 0.2843, "step": 12818 }, { "epoch": 0.600505925891226, "grad_norm": 0.5760375231158262, "learning_rate": 4.101967528726548e-06, "loss": 0.2803, "step": 12819 }, { "epoch": 0.6005527708811542, "grad_norm": 0.5774981957117489, "learning_rate": 4.101821926150692e-06, "loss": 0.285, "step": 12820 }, { "epoch": 0.6005996158710826, "grad_norm": 0.5666439207477085, "learning_rate": 4.101676314356752e-06, "loss": 0.271, "step": 12821 }, { "epoch": 0.6006464608610109, "grad_norm": 0.6050675595298911, "learning_rate": 4.101530693345569e-06, "loss": 0.2666, "step": 12822 }, { "epoch": 0.6006933058509393, "grad_norm": 0.6184545118233513, "learning_rate": 4.101385063117979e-06, "loss": 0.2961, "step": 12823 }, { "epoch": 0.6007401508408675, "grad_norm": 0.6493926700290644, "learning_rate": 4.1012394236748195e-06, "loss": 0.3022, "step": 12824 }, { "epoch": 0.6007869958307959, "grad_norm": 0.597894618690812, "learning_rate": 4.101093775016931e-06, "loss": 0.2785, "step": 12825 }, { "epoch": 0.6008338408207242, "grad_norm": 0.57434510174103, "learning_rate": 4.100948117145149e-06, "loss": 0.2594, "step": 12826 }, { "epoch": 0.6008806858106526, "grad_norm": 0.6156184685101475, "learning_rate": 4.100802450060314e-06, "loss": 0.256, "step": 12827 }, { "epoch": 0.6009275308005809, "grad_norm": 0.6059164629582374, "learning_rate": 4.100656773763263e-06, "loss": 0.2725, "step": 12828 }, { "epoch": 0.6009743757905092, "grad_norm": 0.5530125222366458, "learning_rate": 4.100511088254834e-06, "loss": 0.2591, "step": 12829 }, { "epoch": 0.6010212207804375, "grad_norm": 0.6458497792448256, "learning_rate": 4.100365393535866e-06, "loss": 0.3031, "step": 12830 }, { "epoch": 0.6010680657703659, "grad_norm": 0.59553685014254, "learning_rate": 4.100219689607198e-06, "loss": 0.2853, "step": 12831 }, { "epoch": 0.6011149107602942, "grad_norm": 0.5604804291748907, "learning_rate": 4.1000739764696675e-06, "loss": 0.2666, "step": 12832 }, { "epoch": 0.6011617557502225, "grad_norm": 0.6266355122185252, "learning_rate": 4.099928254124114e-06, "loss": 0.2871, "step": 12833 }, { "epoch": 0.6012086007401508, "grad_norm": 0.6113716735738562, "learning_rate": 4.0997825225713746e-06, "loss": 0.2794, "step": 12834 }, { "epoch": 0.6012554457300792, "grad_norm": 0.5755925884849292, "learning_rate": 4.099636781812289e-06, "loss": 0.2915, "step": 12835 }, { "epoch": 0.6013022907200075, "grad_norm": 0.5531972645106042, "learning_rate": 4.099491031847697e-06, "loss": 0.2804, "step": 12836 }, { "epoch": 0.6013491357099359, "grad_norm": 0.5975565796885632, "learning_rate": 4.099345272678435e-06, "loss": 0.2769, "step": 12837 }, { "epoch": 0.6013959806998641, "grad_norm": 0.5482139466695711, "learning_rate": 4.099199504305343e-06, "loss": 0.2722, "step": 12838 }, { "epoch": 0.6014428256897925, "grad_norm": 0.6855087044781755, "learning_rate": 4.099053726729259e-06, "loss": 0.3199, "step": 12839 }, { "epoch": 0.6014896706797208, "grad_norm": 0.6294002958648104, "learning_rate": 4.098907939951025e-06, "loss": 0.3, "step": 12840 }, { "epoch": 0.6015365156696492, "grad_norm": 0.5741696868247383, "learning_rate": 4.0987621439714765e-06, "loss": 0.2628, "step": 12841 }, { "epoch": 0.6015833606595774, "grad_norm": 0.6028726192358038, "learning_rate": 4.098616338791453e-06, "loss": 0.2987, "step": 12842 }, { "epoch": 0.6016302056495058, "grad_norm": 0.5750315653675037, "learning_rate": 4.098470524411795e-06, "loss": 0.2676, "step": 12843 }, { "epoch": 0.6016770506394341, "grad_norm": 0.6242030224856466, "learning_rate": 4.09832470083334e-06, "loss": 0.2809, "step": 12844 }, { "epoch": 0.6017238956293625, "grad_norm": 0.6003289211497157, "learning_rate": 4.098178868056929e-06, "loss": 0.282, "step": 12845 }, { "epoch": 0.6017707406192908, "grad_norm": 0.572145287735151, "learning_rate": 4.0980330260834e-06, "loss": 0.2959, "step": 12846 }, { "epoch": 0.601817585609219, "grad_norm": 0.5831932669274147, "learning_rate": 4.097887174913593e-06, "loss": 0.2863, "step": 12847 }, { "epoch": 0.6018644305991474, "grad_norm": 0.5574857577461154, "learning_rate": 4.097741314548347e-06, "loss": 0.2932, "step": 12848 }, { "epoch": 0.6019112755890758, "grad_norm": 0.587812583715547, "learning_rate": 4.0975954449885005e-06, "loss": 0.2829, "step": 12849 }, { "epoch": 0.6019581205790041, "grad_norm": 0.6050047703834209, "learning_rate": 4.0974495662348945e-06, "loss": 0.2743, "step": 12850 }, { "epoch": 0.6020049655689323, "grad_norm": 0.5941128559899588, "learning_rate": 4.097303678288367e-06, "loss": 0.2929, "step": 12851 }, { "epoch": 0.6020518105588607, "grad_norm": 0.5849150517234348, "learning_rate": 4.097157781149758e-06, "loss": 0.2867, "step": 12852 }, { "epoch": 0.602098655548789, "grad_norm": 0.5658676934794058, "learning_rate": 4.097011874819909e-06, "loss": 0.2704, "step": 12853 }, { "epoch": 0.6021455005387174, "grad_norm": 0.5539808733723246, "learning_rate": 4.096865959299657e-06, "loss": 0.2749, "step": 12854 }, { "epoch": 0.6021923455286458, "grad_norm": 0.6007479380957276, "learning_rate": 4.096720034589843e-06, "loss": 0.305, "step": 12855 }, { "epoch": 0.602239190518574, "grad_norm": 0.573625584308433, "learning_rate": 4.096574100691306e-06, "loss": 0.2927, "step": 12856 }, { "epoch": 0.6022860355085023, "grad_norm": 0.5932249295365647, "learning_rate": 4.096428157604887e-06, "loss": 0.3223, "step": 12857 }, { "epoch": 0.6023328804984307, "grad_norm": 0.601534540366976, "learning_rate": 4.096282205331425e-06, "loss": 0.2845, "step": 12858 }, { "epoch": 0.602379725488359, "grad_norm": 0.5591850032499498, "learning_rate": 4.09613624387176e-06, "loss": 0.2951, "step": 12859 }, { "epoch": 0.6024265704782873, "grad_norm": 0.5860899516849625, "learning_rate": 4.095990273226733e-06, "loss": 0.2809, "step": 12860 }, { "epoch": 0.6024734154682156, "grad_norm": 0.6005891611093122, "learning_rate": 4.095844293397182e-06, "loss": 0.2811, "step": 12861 }, { "epoch": 0.602520260458144, "grad_norm": 0.5739317537755153, "learning_rate": 4.095698304383948e-06, "loss": 0.2913, "step": 12862 }, { "epoch": 0.6025671054480723, "grad_norm": 0.5744187067649692, "learning_rate": 4.0955523061878726e-06, "loss": 0.2673, "step": 12863 }, { "epoch": 0.6026139504380007, "grad_norm": 0.6341423063224779, "learning_rate": 4.0954062988097935e-06, "loss": 0.2932, "step": 12864 }, { "epoch": 0.6026607954279289, "grad_norm": 0.5568104333003462, "learning_rate": 4.0952602822505525e-06, "loss": 0.2717, "step": 12865 }, { "epoch": 0.6027076404178573, "grad_norm": 0.6002986058359369, "learning_rate": 4.095114256510989e-06, "loss": 0.2879, "step": 12866 }, { "epoch": 0.6027544854077856, "grad_norm": 0.5894202524226898, "learning_rate": 4.094968221591945e-06, "loss": 0.2943, "step": 12867 }, { "epoch": 0.602801330397714, "grad_norm": 0.5691151298682824, "learning_rate": 4.0948221774942594e-06, "loss": 0.2848, "step": 12868 }, { "epoch": 0.6028481753876422, "grad_norm": 0.5327484993803543, "learning_rate": 4.094676124218773e-06, "loss": 0.2684, "step": 12869 }, { "epoch": 0.6028950203775706, "grad_norm": 0.6188675977090363, "learning_rate": 4.094530061766325e-06, "loss": 0.2898, "step": 12870 }, { "epoch": 0.6029418653674989, "grad_norm": 0.6069577508810126, "learning_rate": 4.094383990137759e-06, "loss": 0.2917, "step": 12871 }, { "epoch": 0.6029887103574273, "grad_norm": 0.5447756312375006, "learning_rate": 4.094237909333914e-06, "loss": 0.2664, "step": 12872 }, { "epoch": 0.6030355553473556, "grad_norm": 0.5763540670747767, "learning_rate": 4.094091819355629e-06, "loss": 0.2728, "step": 12873 }, { "epoch": 0.6030824003372839, "grad_norm": 0.6357257079905648, "learning_rate": 4.093945720203747e-06, "loss": 0.303, "step": 12874 }, { "epoch": 0.6031292453272122, "grad_norm": 0.6196445170644319, "learning_rate": 4.0937996118791084e-06, "loss": 0.2856, "step": 12875 }, { "epoch": 0.6031760903171406, "grad_norm": 0.6331835678312738, "learning_rate": 4.0936534943825535e-06, "loss": 0.28, "step": 12876 }, { "epoch": 0.6032229353070689, "grad_norm": 0.6117425664822096, "learning_rate": 4.093507367714923e-06, "loss": 0.2822, "step": 12877 }, { "epoch": 0.6032697802969972, "grad_norm": 0.5982661379098242, "learning_rate": 4.093361231877058e-06, "loss": 0.3065, "step": 12878 }, { "epoch": 0.6033166252869255, "grad_norm": 0.580972486706783, "learning_rate": 4.093215086869799e-06, "loss": 0.274, "step": 12879 }, { "epoch": 0.6033634702768539, "grad_norm": 0.607137309312099, "learning_rate": 4.0930689326939895e-06, "loss": 0.2895, "step": 12880 }, { "epoch": 0.6034103152667822, "grad_norm": 0.6038139597213893, "learning_rate": 4.092922769350467e-06, "loss": 0.2947, "step": 12881 }, { "epoch": 0.6034571602567106, "grad_norm": 0.6523026109852249, "learning_rate": 4.092776596840075e-06, "loss": 0.2957, "step": 12882 }, { "epoch": 0.6035040052466388, "grad_norm": 0.5688346141384064, "learning_rate": 4.092630415163654e-06, "loss": 0.2895, "step": 12883 }, { "epoch": 0.6035508502365672, "grad_norm": 0.6057879356320992, "learning_rate": 4.092484224322045e-06, "loss": 0.2931, "step": 12884 }, { "epoch": 0.6035976952264955, "grad_norm": 0.63385552003771, "learning_rate": 4.0923380243160895e-06, "loss": 0.3014, "step": 12885 }, { "epoch": 0.6036445402164239, "grad_norm": 0.610113266168926, "learning_rate": 4.092191815146629e-06, "loss": 0.2884, "step": 12886 }, { "epoch": 0.6036913852063521, "grad_norm": 0.6239452063001591, "learning_rate": 4.092045596814506e-06, "loss": 0.2804, "step": 12887 }, { "epoch": 0.6037382301962805, "grad_norm": 0.5934292044678128, "learning_rate": 4.091899369320559e-06, "loss": 0.2968, "step": 12888 }, { "epoch": 0.6037850751862088, "grad_norm": 0.6274817413075499, "learning_rate": 4.0917531326656326e-06, "loss": 0.2715, "step": 12889 }, { "epoch": 0.6038319201761372, "grad_norm": 0.6053563684675379, "learning_rate": 4.091606886850566e-06, "loss": 0.2836, "step": 12890 }, { "epoch": 0.6038787651660655, "grad_norm": 0.6353786396069077, "learning_rate": 4.091460631876202e-06, "loss": 0.2823, "step": 12891 }, { "epoch": 0.6039256101559938, "grad_norm": 0.5599784048324444, "learning_rate": 4.091314367743382e-06, "loss": 0.2712, "step": 12892 }, { "epoch": 0.6039724551459221, "grad_norm": 0.6175760880792729, "learning_rate": 4.091168094452948e-06, "loss": 0.2996, "step": 12893 }, { "epoch": 0.6040193001358505, "grad_norm": 0.5352879157086721, "learning_rate": 4.091021812005741e-06, "loss": 0.2585, "step": 12894 }, { "epoch": 0.6040661451257788, "grad_norm": 0.6018723571554019, "learning_rate": 4.090875520402604e-06, "loss": 0.2894, "step": 12895 }, { "epoch": 0.6041129901157071, "grad_norm": 0.6538027902803832, "learning_rate": 4.0907292196443785e-06, "loss": 0.2823, "step": 12896 }, { "epoch": 0.6041598351056354, "grad_norm": 0.6338195325408845, "learning_rate": 4.090582909731905e-06, "loss": 0.2989, "step": 12897 }, { "epoch": 0.6042066800955638, "grad_norm": 0.6100158739158535, "learning_rate": 4.090436590666028e-06, "loss": 0.2939, "step": 12898 }, { "epoch": 0.6042535250854921, "grad_norm": 0.5649949644059353, "learning_rate": 4.090290262447587e-06, "loss": 0.2724, "step": 12899 }, { "epoch": 0.6043003700754205, "grad_norm": 0.5864124528163107, "learning_rate": 4.090143925077426e-06, "loss": 0.2747, "step": 12900 }, { "epoch": 0.6043472150653487, "grad_norm": 0.5483011623835272, "learning_rate": 4.089997578556386e-06, "loss": 0.2598, "step": 12901 }, { "epoch": 0.6043940600552771, "grad_norm": 0.6549561265486077, "learning_rate": 4.089851222885309e-06, "loss": 0.2968, "step": 12902 }, { "epoch": 0.6044409050452054, "grad_norm": 0.639468648048857, "learning_rate": 4.089704858065039e-06, "loss": 0.2909, "step": 12903 }, { "epoch": 0.6044877500351338, "grad_norm": 0.5753456391932866, "learning_rate": 4.0895584840964164e-06, "loss": 0.278, "step": 12904 }, { "epoch": 0.604534595025062, "grad_norm": 0.5606450586384827, "learning_rate": 4.089412100980284e-06, "loss": 0.2789, "step": 12905 }, { "epoch": 0.6045814400149904, "grad_norm": 0.616630812920757, "learning_rate": 4.089265708717486e-06, "loss": 0.2985, "step": 12906 }, { "epoch": 0.6046282850049187, "grad_norm": 0.6042917278778243, "learning_rate": 4.0891193073088615e-06, "loss": 0.2909, "step": 12907 }, { "epoch": 0.6046751299948471, "grad_norm": 0.6032289086914693, "learning_rate": 4.088972896755255e-06, "loss": 0.27, "step": 12908 }, { "epoch": 0.6047219749847754, "grad_norm": 0.6043265296041939, "learning_rate": 4.08882647705751e-06, "loss": 0.2717, "step": 12909 }, { "epoch": 0.6047688199747037, "grad_norm": 0.6220033877928116, "learning_rate": 4.088680048216467e-06, "loss": 0.3007, "step": 12910 }, { "epoch": 0.604815664964632, "grad_norm": 0.5780204116772186, "learning_rate": 4.08853361023297e-06, "loss": 0.2972, "step": 12911 }, { "epoch": 0.6048625099545604, "grad_norm": 0.668348170068088, "learning_rate": 4.088387163107861e-06, "loss": 0.296, "step": 12912 }, { "epoch": 0.6049093549444887, "grad_norm": 0.607319187476503, "learning_rate": 4.088240706841984e-06, "loss": 0.2907, "step": 12913 }, { "epoch": 0.604956199934417, "grad_norm": 0.6374310016769136, "learning_rate": 4.088094241436179e-06, "loss": 0.2901, "step": 12914 }, { "epoch": 0.6050030449243453, "grad_norm": 0.6328232591935424, "learning_rate": 4.087947766891292e-06, "loss": 0.2813, "step": 12915 }, { "epoch": 0.6050498899142737, "grad_norm": 0.6400911290010276, "learning_rate": 4.087801283208166e-06, "loss": 0.304, "step": 12916 }, { "epoch": 0.605096734904202, "grad_norm": 0.641570581316346, "learning_rate": 4.087654790387641e-06, "loss": 0.2988, "step": 12917 }, { "epoch": 0.6051435798941304, "grad_norm": 0.6224883003437341, "learning_rate": 4.087508288430562e-06, "loss": 0.3073, "step": 12918 }, { "epoch": 0.6051904248840586, "grad_norm": 0.6158794301827925, "learning_rate": 4.087361777337772e-06, "loss": 0.2822, "step": 12919 }, { "epoch": 0.605237269873987, "grad_norm": 0.6247474290170001, "learning_rate": 4.0872152571101145e-06, "loss": 0.306, "step": 12920 }, { "epoch": 0.6052841148639153, "grad_norm": 0.6621883077639766, "learning_rate": 4.0870687277484315e-06, "loss": 0.3037, "step": 12921 }, { "epoch": 0.6053309598538437, "grad_norm": 0.5934815546104981, "learning_rate": 4.086922189253566e-06, "loss": 0.2761, "step": 12922 }, { "epoch": 0.6053778048437719, "grad_norm": 0.5505680566892146, "learning_rate": 4.0867756416263635e-06, "loss": 0.294, "step": 12923 }, { "epoch": 0.6054246498337003, "grad_norm": 0.5774993715032448, "learning_rate": 4.086629084867665e-06, "loss": 0.2852, "step": 12924 }, { "epoch": 0.6054714948236286, "grad_norm": 0.6066315173274744, "learning_rate": 4.0864825189783155e-06, "loss": 0.3012, "step": 12925 }, { "epoch": 0.605518339813557, "grad_norm": 0.6422134326641279, "learning_rate": 4.086335943959159e-06, "loss": 0.2711, "step": 12926 }, { "epoch": 0.6055651848034853, "grad_norm": 0.6094937608586966, "learning_rate": 4.086189359811036e-06, "loss": 0.2809, "step": 12927 }, { "epoch": 0.6056120297934136, "grad_norm": 0.5838236093082971, "learning_rate": 4.086042766534793e-06, "loss": 0.2932, "step": 12928 }, { "epoch": 0.6056588747833419, "grad_norm": 0.5972953092933455, "learning_rate": 4.0858961641312725e-06, "loss": 0.2907, "step": 12929 }, { "epoch": 0.6057057197732703, "grad_norm": 0.6117389333552359, "learning_rate": 4.085749552601317e-06, "loss": 0.2969, "step": 12930 }, { "epoch": 0.6057525647631986, "grad_norm": 0.5571425113097479, "learning_rate": 4.085602931945772e-06, "loss": 0.284, "step": 12931 }, { "epoch": 0.6057994097531268, "grad_norm": 0.5702173459609045, "learning_rate": 4.085456302165482e-06, "loss": 0.2933, "step": 12932 }, { "epoch": 0.6058462547430552, "grad_norm": 0.6361970954436563, "learning_rate": 4.085309663261287e-06, "loss": 0.2859, "step": 12933 }, { "epoch": 0.6058930997329836, "grad_norm": 0.6012042992902162, "learning_rate": 4.085163015234035e-06, "loss": 0.2708, "step": 12934 }, { "epoch": 0.6059399447229119, "grad_norm": 0.5917833586895256, "learning_rate": 4.085016358084568e-06, "loss": 0.2626, "step": 12935 }, { "epoch": 0.6059867897128403, "grad_norm": 0.5916181390583236, "learning_rate": 4.08486969181373e-06, "loss": 0.2712, "step": 12936 }, { "epoch": 0.6060336347027685, "grad_norm": 0.6336987927884081, "learning_rate": 4.084723016422365e-06, "loss": 0.3031, "step": 12937 }, { "epoch": 0.6060804796926968, "grad_norm": 0.5687933043733778, "learning_rate": 4.084576331911318e-06, "loss": 0.2682, "step": 12938 }, { "epoch": 0.6061273246826252, "grad_norm": 0.5739443015577823, "learning_rate": 4.084429638281432e-06, "loss": 0.2783, "step": 12939 }, { "epoch": 0.6061741696725536, "grad_norm": 0.6600702388206591, "learning_rate": 4.0842829355335525e-06, "loss": 0.3144, "step": 12940 }, { "epoch": 0.6062210146624818, "grad_norm": 0.6173398550080282, "learning_rate": 4.084136223668523e-06, "loss": 0.2718, "step": 12941 }, { "epoch": 0.6062678596524101, "grad_norm": 0.6105173558203396, "learning_rate": 4.0839895026871856e-06, "loss": 0.313, "step": 12942 }, { "epoch": 0.6063147046423385, "grad_norm": 0.5677860841384872, "learning_rate": 4.083842772590388e-06, "loss": 0.2979, "step": 12943 }, { "epoch": 0.6063615496322668, "grad_norm": 0.6059770661387719, "learning_rate": 4.083696033378973e-06, "loss": 0.2702, "step": 12944 }, { "epoch": 0.6064083946221952, "grad_norm": 0.743183359318538, "learning_rate": 4.0835492850537865e-06, "loss": 0.3174, "step": 12945 }, { "epoch": 0.6064552396121234, "grad_norm": 0.5865675339569728, "learning_rate": 4.083402527615671e-06, "loss": 0.2674, "step": 12946 }, { "epoch": 0.6065020846020518, "grad_norm": 0.6497224263383506, "learning_rate": 4.0832557610654725e-06, "loss": 0.304, "step": 12947 }, { "epoch": 0.6065489295919801, "grad_norm": 0.6417335149263977, "learning_rate": 4.0831089854040345e-06, "loss": 0.2929, "step": 12948 }, { "epoch": 0.6065957745819085, "grad_norm": 0.5833220771921268, "learning_rate": 4.082962200632202e-06, "loss": 0.2719, "step": 12949 }, { "epoch": 0.6066426195718367, "grad_norm": 0.6480344562397246, "learning_rate": 4.0828154067508205e-06, "loss": 0.2934, "step": 12950 }, { "epoch": 0.6066894645617651, "grad_norm": 0.6567881948194618, "learning_rate": 4.082668603760734e-06, "loss": 0.2985, "step": 12951 }, { "epoch": 0.6067363095516934, "grad_norm": 0.5724046834007623, "learning_rate": 4.0825217916627874e-06, "loss": 0.2844, "step": 12952 }, { "epoch": 0.6067831545416218, "grad_norm": 0.5604744839346678, "learning_rate": 4.082374970457826e-06, "loss": 0.2987, "step": 12953 }, { "epoch": 0.6068299995315501, "grad_norm": 0.607460281233767, "learning_rate": 4.082228140146693e-06, "loss": 0.2835, "step": 12954 }, { "epoch": 0.6068768445214784, "grad_norm": 0.5620778185853276, "learning_rate": 4.082081300730236e-06, "loss": 0.2735, "step": 12955 }, { "epoch": 0.6069236895114067, "grad_norm": 0.6046768241108899, "learning_rate": 4.081934452209298e-06, "loss": 0.264, "step": 12956 }, { "epoch": 0.6069705345013351, "grad_norm": 0.5873560799980875, "learning_rate": 4.0817875945847265e-06, "loss": 0.2789, "step": 12957 }, { "epoch": 0.6070173794912634, "grad_norm": 0.6245081553574853, "learning_rate": 4.081640727857363e-06, "loss": 0.2707, "step": 12958 }, { "epoch": 0.6070642244811917, "grad_norm": 0.5721280581959625, "learning_rate": 4.081493852028055e-06, "loss": 0.2843, "step": 12959 }, { "epoch": 0.60711106947112, "grad_norm": 0.6421485358746113, "learning_rate": 4.0813469670976485e-06, "loss": 0.3014, "step": 12960 }, { "epoch": 0.6071579144610484, "grad_norm": 0.5710080584415818, "learning_rate": 4.0812000730669865e-06, "loss": 0.2751, "step": 12961 }, { "epoch": 0.6072047594509767, "grad_norm": 0.6182355205150114, "learning_rate": 4.081053169936916e-06, "loss": 0.2894, "step": 12962 }, { "epoch": 0.6072516044409051, "grad_norm": 0.5751800146439953, "learning_rate": 4.080906257708282e-06, "loss": 0.277, "step": 12963 }, { "epoch": 0.6072984494308333, "grad_norm": 0.6156780481884284, "learning_rate": 4.0807593363819295e-06, "loss": 0.2977, "step": 12964 }, { "epoch": 0.6073452944207617, "grad_norm": 0.5807877256310756, "learning_rate": 4.080612405958705e-06, "loss": 0.3, "step": 12965 }, { "epoch": 0.60739213941069, "grad_norm": 0.616353018158756, "learning_rate": 4.080465466439453e-06, "loss": 0.2952, "step": 12966 }, { "epoch": 0.6074389844006184, "grad_norm": 0.6093097023486995, "learning_rate": 4.080318517825018e-06, "loss": 0.2884, "step": 12967 }, { "epoch": 0.6074858293905466, "grad_norm": 0.5748520774266005, "learning_rate": 4.08017156011625e-06, "loss": 0.2659, "step": 12968 }, { "epoch": 0.607532674380475, "grad_norm": 0.5619110644435759, "learning_rate": 4.08002459331399e-06, "loss": 0.2781, "step": 12969 }, { "epoch": 0.6075795193704033, "grad_norm": 0.6399216651422902, "learning_rate": 4.079877617419086e-06, "loss": 0.3195, "step": 12970 }, { "epoch": 0.6076263643603317, "grad_norm": 0.559885009922311, "learning_rate": 4.079730632432383e-06, "loss": 0.3078, "step": 12971 }, { "epoch": 0.60767320935026, "grad_norm": 0.6012425018504023, "learning_rate": 4.079583638354727e-06, "loss": 0.301, "step": 12972 }, { "epoch": 0.6077200543401883, "grad_norm": 0.5412871169103112, "learning_rate": 4.079436635186965e-06, "loss": 0.2768, "step": 12973 }, { "epoch": 0.6077668993301166, "grad_norm": 0.5987169458259035, "learning_rate": 4.079289622929941e-06, "loss": 0.2708, "step": 12974 }, { "epoch": 0.607813744320045, "grad_norm": 0.6050590018698475, "learning_rate": 4.079142601584504e-06, "loss": 0.2946, "step": 12975 }, { "epoch": 0.6078605893099733, "grad_norm": 0.56900370627852, "learning_rate": 4.078995571151497e-06, "loss": 0.2747, "step": 12976 }, { "epoch": 0.6079074342999016, "grad_norm": 0.5248024348640699, "learning_rate": 4.078848531631768e-06, "loss": 0.2574, "step": 12977 }, { "epoch": 0.6079542792898299, "grad_norm": 0.5750339422240732, "learning_rate": 4.0787014830261615e-06, "loss": 0.2806, "step": 12978 }, { "epoch": 0.6080011242797583, "grad_norm": 0.5677105851657018, "learning_rate": 4.078554425335526e-06, "loss": 0.2754, "step": 12979 }, { "epoch": 0.6080479692696866, "grad_norm": 0.5547010501451496, "learning_rate": 4.078407358560706e-06, "loss": 0.2657, "step": 12980 }, { "epoch": 0.608094814259615, "grad_norm": 0.6059540747262341, "learning_rate": 4.078260282702548e-06, "loss": 0.3092, "step": 12981 }, { "epoch": 0.6081416592495432, "grad_norm": 0.5768995261488193, "learning_rate": 4.078113197761899e-06, "loss": 0.2807, "step": 12982 }, { "epoch": 0.6081885042394716, "grad_norm": 0.615328739114309, "learning_rate": 4.077966103739605e-06, "loss": 0.2993, "step": 12983 }, { "epoch": 0.6082353492293999, "grad_norm": 0.5734211771177945, "learning_rate": 4.0778190006365135e-06, "loss": 0.2852, "step": 12984 }, { "epoch": 0.6082821942193283, "grad_norm": 0.6023365036510383, "learning_rate": 4.07767188845347e-06, "loss": 0.2911, "step": 12985 }, { "epoch": 0.6083290392092565, "grad_norm": 0.5770425587006951, "learning_rate": 4.07752476719132e-06, "loss": 0.2877, "step": 12986 }, { "epoch": 0.6083758841991849, "grad_norm": 0.5804685233997233, "learning_rate": 4.077377636850913e-06, "loss": 0.2877, "step": 12987 }, { "epoch": 0.6084227291891132, "grad_norm": 0.5681846260524889, "learning_rate": 4.0772304974330935e-06, "loss": 0.2762, "step": 12988 }, { "epoch": 0.6084695741790416, "grad_norm": 0.6274408075814599, "learning_rate": 4.077083348938709e-06, "loss": 0.2865, "step": 12989 }, { "epoch": 0.6085164191689699, "grad_norm": 0.5997565328745902, "learning_rate": 4.076936191368605e-06, "loss": 0.2843, "step": 12990 }, { "epoch": 0.6085632641588982, "grad_norm": 0.5842889265367074, "learning_rate": 4.076789024723632e-06, "loss": 0.2926, "step": 12991 }, { "epoch": 0.6086101091488265, "grad_norm": 0.6518515745548422, "learning_rate": 4.076641849004632e-06, "loss": 0.281, "step": 12992 }, { "epoch": 0.6086569541387549, "grad_norm": 0.537924980510903, "learning_rate": 4.076494664212456e-06, "loss": 0.2924, "step": 12993 }, { "epoch": 0.6087037991286832, "grad_norm": 0.5614307040868325, "learning_rate": 4.076347470347949e-06, "loss": 0.2714, "step": 12994 }, { "epoch": 0.6087506441186115, "grad_norm": 0.5865902836471789, "learning_rate": 4.07620026741196e-06, "loss": 0.2662, "step": 12995 }, { "epoch": 0.6087974891085398, "grad_norm": 0.5610924477155826, "learning_rate": 4.076053055405333e-06, "loss": 0.2742, "step": 12996 }, { "epoch": 0.6088443340984682, "grad_norm": 0.6080251848315971, "learning_rate": 4.0759058343289164e-06, "loss": 0.2886, "step": 12997 }, { "epoch": 0.6088911790883965, "grad_norm": 0.5839258686150237, "learning_rate": 4.075758604183558e-06, "loss": 0.2854, "step": 12998 }, { "epoch": 0.6089380240783249, "grad_norm": 0.6242713344216563, "learning_rate": 4.075611364970106e-06, "loss": 0.3031, "step": 12999 }, { "epoch": 0.6089848690682531, "grad_norm": 0.6695359687864348, "learning_rate": 4.075464116689406e-06, "loss": 0.2993, "step": 13000 }, { "epoch": 0.6090317140581815, "grad_norm": 0.5796102185977996, "learning_rate": 4.0753168593423055e-06, "loss": 0.2785, "step": 13001 }, { "epoch": 0.6090785590481098, "grad_norm": 0.6036653959677455, "learning_rate": 4.075169592929653e-06, "loss": 0.278, "step": 13002 }, { "epoch": 0.6091254040380382, "grad_norm": 0.5746704450878308, "learning_rate": 4.075022317452295e-06, "loss": 0.2685, "step": 13003 }, { "epoch": 0.6091722490279664, "grad_norm": 0.5998947410151416, "learning_rate": 4.0748750329110794e-06, "loss": 0.2897, "step": 13004 }, { "epoch": 0.6092190940178948, "grad_norm": 0.5545083946711923, "learning_rate": 4.074727739306855e-06, "loss": 0.294, "step": 13005 }, { "epoch": 0.6092659390078231, "grad_norm": 0.563577456482818, "learning_rate": 4.074580436640467e-06, "loss": 0.2736, "step": 13006 }, { "epoch": 0.6093127839977515, "grad_norm": 0.6187351483056831, "learning_rate": 4.074433124912764e-06, "loss": 0.2669, "step": 13007 }, { "epoch": 0.6093596289876798, "grad_norm": 0.5670836542561564, "learning_rate": 4.074285804124596e-06, "loss": 0.27, "step": 13008 }, { "epoch": 0.609406473977608, "grad_norm": 0.56554318417496, "learning_rate": 4.074138474276807e-06, "loss": 0.2829, "step": 13009 }, { "epoch": 0.6094533189675364, "grad_norm": 0.5693070312701989, "learning_rate": 4.0739911353702474e-06, "loss": 0.2788, "step": 13010 }, { "epoch": 0.6095001639574648, "grad_norm": 0.6300338257584143, "learning_rate": 4.073843787405765e-06, "loss": 0.2905, "step": 13011 }, { "epoch": 0.6095470089473931, "grad_norm": 0.5823669605236848, "learning_rate": 4.073696430384206e-06, "loss": 0.2935, "step": 13012 }, { "epoch": 0.6095938539373214, "grad_norm": 0.5738736022599816, "learning_rate": 4.07354906430642e-06, "loss": 0.2909, "step": 13013 }, { "epoch": 0.6096406989272497, "grad_norm": 0.6048691505976317, "learning_rate": 4.073401689173255e-06, "loss": 0.2748, "step": 13014 }, { "epoch": 0.609687543917178, "grad_norm": 0.5941871914619556, "learning_rate": 4.073254304985558e-06, "loss": 0.2822, "step": 13015 }, { "epoch": 0.6097343889071064, "grad_norm": 0.6486948177923882, "learning_rate": 4.073106911744179e-06, "loss": 0.2888, "step": 13016 }, { "epoch": 0.6097812338970348, "grad_norm": 0.5730062734500464, "learning_rate": 4.0729595094499644e-06, "loss": 0.2627, "step": 13017 }, { "epoch": 0.609828078886963, "grad_norm": 0.6352650076492883, "learning_rate": 4.072812098103763e-06, "loss": 0.2972, "step": 13018 }, { "epoch": 0.6098749238768914, "grad_norm": 0.6679215941677684, "learning_rate": 4.0726646777064234e-06, "loss": 0.3023, "step": 13019 }, { "epoch": 0.6099217688668197, "grad_norm": 0.6250570454916825, "learning_rate": 4.072517248258795e-06, "loss": 0.3059, "step": 13020 }, { "epoch": 0.609968613856748, "grad_norm": 0.5626537542468033, "learning_rate": 4.072369809761723e-06, "loss": 0.2886, "step": 13021 }, { "epoch": 0.6100154588466763, "grad_norm": 0.5834590984554014, "learning_rate": 4.07222236221606e-06, "loss": 0.2784, "step": 13022 }, { "epoch": 0.6100623038366046, "grad_norm": 0.5956792142711329, "learning_rate": 4.072074905622652e-06, "loss": 0.2922, "step": 13023 }, { "epoch": 0.610109148826533, "grad_norm": 0.6073502699757497, "learning_rate": 4.0719274399823465e-06, "loss": 0.2764, "step": 13024 }, { "epoch": 0.6101559938164614, "grad_norm": 0.6244638794234905, "learning_rate": 4.071779965295995e-06, "loss": 0.2972, "step": 13025 }, { "epoch": 0.6102028388063897, "grad_norm": 0.5630724748603567, "learning_rate": 4.071632481564445e-06, "loss": 0.2723, "step": 13026 }, { "epoch": 0.6102496837963179, "grad_norm": 0.6232603106664465, "learning_rate": 4.0714849887885445e-06, "loss": 0.2783, "step": 13027 }, { "epoch": 0.6102965287862463, "grad_norm": 0.6368517596825224, "learning_rate": 4.071337486969142e-06, "loss": 0.2842, "step": 13028 }, { "epoch": 0.6103433737761746, "grad_norm": 0.6162715617408302, "learning_rate": 4.071189976107089e-06, "loss": 0.2662, "step": 13029 }, { "epoch": 0.610390218766103, "grad_norm": 0.6289064019200066, "learning_rate": 4.0710424562032315e-06, "loss": 0.2695, "step": 13030 }, { "epoch": 0.6104370637560312, "grad_norm": 0.5514916342448294, "learning_rate": 4.07089492725842e-06, "loss": 0.2709, "step": 13031 }, { "epoch": 0.6104839087459596, "grad_norm": 0.5759183298946358, "learning_rate": 4.070747389273502e-06, "loss": 0.3109, "step": 13032 }, { "epoch": 0.6105307537358879, "grad_norm": 0.5686679741950664, "learning_rate": 4.070599842249329e-06, "loss": 0.2672, "step": 13033 }, { "epoch": 0.6105775987258163, "grad_norm": 0.6470238103204379, "learning_rate": 4.070452286186748e-06, "loss": 0.3189, "step": 13034 }, { "epoch": 0.6106244437157446, "grad_norm": 0.5968221934371792, "learning_rate": 4.0703047210866094e-06, "loss": 0.2838, "step": 13035 }, { "epoch": 0.6106712887056729, "grad_norm": 0.5668023343780747, "learning_rate": 4.070157146949762e-06, "loss": 0.2775, "step": 13036 }, { "epoch": 0.6107181336956012, "grad_norm": 0.6045133662585169, "learning_rate": 4.070009563777053e-06, "loss": 0.2922, "step": 13037 }, { "epoch": 0.6107649786855296, "grad_norm": 0.5813285010659786, "learning_rate": 4.069861971569336e-06, "loss": 0.29, "step": 13038 }, { "epoch": 0.6108118236754579, "grad_norm": 0.5969256949970728, "learning_rate": 4.069714370327456e-06, "loss": 0.2957, "step": 13039 }, { "epoch": 0.6108586686653862, "grad_norm": 0.5679936449489577, "learning_rate": 4.069566760052265e-06, "loss": 0.2611, "step": 13040 }, { "epoch": 0.6109055136553145, "grad_norm": 0.5396844180589166, "learning_rate": 4.069419140744612e-06, "loss": 0.2731, "step": 13041 }, { "epoch": 0.6109523586452429, "grad_norm": 0.6277253516475008, "learning_rate": 4.069271512405346e-06, "loss": 0.3062, "step": 13042 }, { "epoch": 0.6109992036351712, "grad_norm": 0.5670048174701521, "learning_rate": 4.069123875035317e-06, "loss": 0.2797, "step": 13043 }, { "epoch": 0.6110460486250996, "grad_norm": 0.5634108931990408, "learning_rate": 4.068976228635375e-06, "loss": 0.269, "step": 13044 }, { "epoch": 0.6110928936150278, "grad_norm": 0.5912296343376583, "learning_rate": 4.068828573206368e-06, "loss": 0.2874, "step": 13045 }, { "epoch": 0.6111397386049562, "grad_norm": 0.646624202378189, "learning_rate": 4.068680908749148e-06, "loss": 0.2737, "step": 13046 }, { "epoch": 0.6111865835948845, "grad_norm": 0.6337192790663752, "learning_rate": 4.068533235264563e-06, "loss": 0.2721, "step": 13047 }, { "epoch": 0.6112334285848129, "grad_norm": 0.6325906901379404, "learning_rate": 4.068385552753463e-06, "loss": 0.3033, "step": 13048 }, { "epoch": 0.6112802735747411, "grad_norm": 0.5899366980045646, "learning_rate": 4.068237861216699e-06, "loss": 0.2763, "step": 13049 }, { "epoch": 0.6113271185646695, "grad_norm": 0.605922444492718, "learning_rate": 4.068090160655121e-06, "loss": 0.2619, "step": 13050 }, { "epoch": 0.6113739635545978, "grad_norm": 0.6098839931803979, "learning_rate": 4.067942451069577e-06, "loss": 0.2574, "step": 13051 }, { "epoch": 0.6114208085445262, "grad_norm": 0.5827988587759503, "learning_rate": 4.067794732460918e-06, "loss": 0.2739, "step": 13052 }, { "epoch": 0.6114676535344545, "grad_norm": 0.6069814735937454, "learning_rate": 4.067647004829995e-06, "loss": 0.2896, "step": 13053 }, { "epoch": 0.6115144985243828, "grad_norm": 0.5991879759978469, "learning_rate": 4.067499268177658e-06, "loss": 0.2834, "step": 13054 }, { "epoch": 0.6115613435143111, "grad_norm": 0.5611900836485624, "learning_rate": 4.067351522504757e-06, "loss": 0.2515, "step": 13055 }, { "epoch": 0.6116081885042395, "grad_norm": 0.6602555560476052, "learning_rate": 4.0672037678121415e-06, "loss": 0.3202, "step": 13056 }, { "epoch": 0.6116550334941678, "grad_norm": 0.5748776775909406, "learning_rate": 4.067056004100661e-06, "loss": 0.2912, "step": 13057 }, { "epoch": 0.6117018784840961, "grad_norm": 0.5773318822742524, "learning_rate": 4.066908231371168e-06, "loss": 0.2931, "step": 13058 }, { "epoch": 0.6117487234740244, "grad_norm": 0.5634194942782159, "learning_rate": 4.066760449624512e-06, "loss": 0.2865, "step": 13059 }, { "epoch": 0.6117955684639528, "grad_norm": 0.5863607960945341, "learning_rate": 4.066612658861544e-06, "loss": 0.2842, "step": 13060 }, { "epoch": 0.6118424134538811, "grad_norm": 0.5698903135566927, "learning_rate": 4.066464859083113e-06, "loss": 0.2757, "step": 13061 }, { "epoch": 0.6118892584438095, "grad_norm": 0.6023290965840501, "learning_rate": 4.066317050290071e-06, "loss": 0.2918, "step": 13062 }, { "epoch": 0.6119361034337377, "grad_norm": 0.5791562928570316, "learning_rate": 4.066169232483267e-06, "loss": 0.2641, "step": 13063 }, { "epoch": 0.6119829484236661, "grad_norm": 0.579432949209508, "learning_rate": 4.066021405663554e-06, "loss": 0.2529, "step": 13064 }, { "epoch": 0.6120297934135944, "grad_norm": 0.602780699712448, "learning_rate": 4.065873569831781e-06, "loss": 0.2813, "step": 13065 }, { "epoch": 0.6120766384035228, "grad_norm": 0.6045773225665275, "learning_rate": 4.0657257249888e-06, "loss": 0.2987, "step": 13066 }, { "epoch": 0.612123483393451, "grad_norm": 0.5908870032449085, "learning_rate": 4.065577871135459e-06, "loss": 0.2757, "step": 13067 }, { "epoch": 0.6121703283833794, "grad_norm": 0.5998276485754491, "learning_rate": 4.065430008272611e-06, "loss": 0.2854, "step": 13068 }, { "epoch": 0.6122171733733077, "grad_norm": 0.5585640155300937, "learning_rate": 4.0652821364011085e-06, "loss": 0.2828, "step": 13069 }, { "epoch": 0.6122640183632361, "grad_norm": 0.5967207607730767, "learning_rate": 4.0651342555218e-06, "loss": 0.3002, "step": 13070 }, { "epoch": 0.6123108633531644, "grad_norm": 0.553304313686214, "learning_rate": 4.064986365635537e-06, "loss": 0.2832, "step": 13071 }, { "epoch": 0.6123577083430927, "grad_norm": 0.5952523356769596, "learning_rate": 4.0648384667431715e-06, "loss": 0.2883, "step": 13072 }, { "epoch": 0.612404553333021, "grad_norm": 0.5844685007100127, "learning_rate": 4.064690558845553e-06, "loss": 0.2943, "step": 13073 }, { "epoch": 0.6124513983229494, "grad_norm": 0.5917482839126091, "learning_rate": 4.064542641943534e-06, "loss": 0.2633, "step": 13074 }, { "epoch": 0.6124982433128777, "grad_norm": 0.5923914295004695, "learning_rate": 4.064394716037965e-06, "loss": 0.2821, "step": 13075 }, { "epoch": 0.612545088302806, "grad_norm": 0.6074581372476252, "learning_rate": 4.064246781129698e-06, "loss": 0.2656, "step": 13076 }, { "epoch": 0.6125919332927343, "grad_norm": 0.6338595353264059, "learning_rate": 4.064098837219584e-06, "loss": 0.2932, "step": 13077 }, { "epoch": 0.6126387782826627, "grad_norm": 0.6484692722616779, "learning_rate": 4.063950884308474e-06, "loss": 0.2676, "step": 13078 }, { "epoch": 0.612685623272591, "grad_norm": 0.55345298809484, "learning_rate": 4.06380292239722e-06, "loss": 0.2692, "step": 13079 }, { "epoch": 0.6127324682625194, "grad_norm": 0.5885720086652988, "learning_rate": 4.063654951486673e-06, "loss": 0.2803, "step": 13080 }, { "epoch": 0.6127793132524476, "grad_norm": 0.6729004150877516, "learning_rate": 4.063506971577685e-06, "loss": 0.2761, "step": 13081 }, { "epoch": 0.612826158242376, "grad_norm": 0.5901382849988308, "learning_rate": 4.063358982671107e-06, "loss": 0.3081, "step": 13082 }, { "epoch": 0.6128730032323043, "grad_norm": 0.5603498695143312, "learning_rate": 4.063210984767792e-06, "loss": 0.2776, "step": 13083 }, { "epoch": 0.6129198482222327, "grad_norm": 0.6235407042268135, "learning_rate": 4.063062977868589e-06, "loss": 0.2989, "step": 13084 }, { "epoch": 0.6129666932121609, "grad_norm": 0.6229136469533053, "learning_rate": 4.062914961974352e-06, "loss": 0.2886, "step": 13085 }, { "epoch": 0.6130135382020893, "grad_norm": 0.626437476811794, "learning_rate": 4.062766937085933e-06, "loss": 0.3032, "step": 13086 }, { "epoch": 0.6130603831920176, "grad_norm": 0.5852539237741115, "learning_rate": 4.062618903204183e-06, "loss": 0.2774, "step": 13087 }, { "epoch": 0.613107228181946, "grad_norm": 0.568408110212546, "learning_rate": 4.0624708603299535e-06, "loss": 0.2805, "step": 13088 }, { "epoch": 0.6131540731718743, "grad_norm": 0.6073580150751007, "learning_rate": 4.062322808464097e-06, "loss": 0.2734, "step": 13089 }, { "epoch": 0.6132009181618026, "grad_norm": 0.6050606864147969, "learning_rate": 4.062174747607466e-06, "loss": 0.2543, "step": 13090 }, { "epoch": 0.6132477631517309, "grad_norm": 0.5593023471185745, "learning_rate": 4.06202667776091e-06, "loss": 0.2626, "step": 13091 }, { "epoch": 0.6132946081416593, "grad_norm": 0.6388158489560269, "learning_rate": 4.061878598925285e-06, "loss": 0.304, "step": 13092 }, { "epoch": 0.6133414531315876, "grad_norm": 0.6308588616695481, "learning_rate": 4.061730511101441e-06, "loss": 0.2909, "step": 13093 }, { "epoch": 0.6133882981215159, "grad_norm": 0.6111757265189935, "learning_rate": 4.06158241429023e-06, "loss": 0.2815, "step": 13094 }, { "epoch": 0.6134351431114442, "grad_norm": 0.6380613764758886, "learning_rate": 4.061434308492505e-06, "loss": 0.2967, "step": 13095 }, { "epoch": 0.6134819881013726, "grad_norm": 0.5905518025118454, "learning_rate": 4.061286193709117e-06, "loss": 0.2852, "step": 13096 }, { "epoch": 0.6135288330913009, "grad_norm": 0.5961969970664257, "learning_rate": 4.0611380699409205e-06, "loss": 0.2774, "step": 13097 }, { "epoch": 0.6135756780812293, "grad_norm": 0.6214116454211642, "learning_rate": 4.060989937188766e-06, "loss": 0.3009, "step": 13098 }, { "epoch": 0.6136225230711575, "grad_norm": 0.58130205929219, "learning_rate": 4.060841795453508e-06, "loss": 0.2889, "step": 13099 }, { "epoch": 0.6136693680610859, "grad_norm": 0.6028993854456686, "learning_rate": 4.060693644735996e-06, "loss": 0.2933, "step": 13100 }, { "epoch": 0.6137162130510142, "grad_norm": 0.5889910686470107, "learning_rate": 4.0605454850370866e-06, "loss": 0.2667, "step": 13101 }, { "epoch": 0.6137630580409426, "grad_norm": 0.6065342760247908, "learning_rate": 4.0603973163576284e-06, "loss": 0.2786, "step": 13102 }, { "epoch": 0.6138099030308708, "grad_norm": 0.6584988160293884, "learning_rate": 4.060249138698477e-06, "loss": 0.3042, "step": 13103 }, { "epoch": 0.6138567480207991, "grad_norm": 0.6315906918101415, "learning_rate": 4.060100952060483e-06, "loss": 0.3069, "step": 13104 }, { "epoch": 0.6139035930107275, "grad_norm": 0.573242952936321, "learning_rate": 4.059952756444499e-06, "loss": 0.275, "step": 13105 }, { "epoch": 0.6139504380006559, "grad_norm": 0.5935512268918307, "learning_rate": 4.059804551851381e-06, "loss": 0.2658, "step": 13106 }, { "epoch": 0.6139972829905842, "grad_norm": 0.5378821552136505, "learning_rate": 4.05965633828198e-06, "loss": 0.2654, "step": 13107 }, { "epoch": 0.6140441279805124, "grad_norm": 0.5910841998772566, "learning_rate": 4.059508115737147e-06, "loss": 0.2881, "step": 13108 }, { "epoch": 0.6140909729704408, "grad_norm": 0.6062342952609346, "learning_rate": 4.0593598842177385e-06, "loss": 0.2709, "step": 13109 }, { "epoch": 0.6141378179603691, "grad_norm": 0.6539345684112634, "learning_rate": 4.0592116437246045e-06, "loss": 0.2986, "step": 13110 }, { "epoch": 0.6141846629502975, "grad_norm": 0.5760095821817329, "learning_rate": 4.059063394258601e-06, "loss": 0.2852, "step": 13111 }, { "epoch": 0.6142315079402257, "grad_norm": 0.6225202698913437, "learning_rate": 4.058915135820578e-06, "loss": 0.3099, "step": 13112 }, { "epoch": 0.6142783529301541, "grad_norm": 0.6326705691559416, "learning_rate": 4.05876686841139e-06, "loss": 0.2886, "step": 13113 }, { "epoch": 0.6143251979200824, "grad_norm": 0.5804247023473059, "learning_rate": 4.058618592031892e-06, "loss": 0.2697, "step": 13114 }, { "epoch": 0.6143720429100108, "grad_norm": 0.5894059835483142, "learning_rate": 4.0584703066829346e-06, "loss": 0.2958, "step": 13115 }, { "epoch": 0.6144188878999391, "grad_norm": 0.550754110974534, "learning_rate": 4.058322012365372e-06, "loss": 0.2685, "step": 13116 }, { "epoch": 0.6144657328898674, "grad_norm": 0.6508224495857076, "learning_rate": 4.058173709080059e-06, "loss": 0.3023, "step": 13117 }, { "epoch": 0.6145125778797957, "grad_norm": 0.5654641088094539, "learning_rate": 4.058025396827847e-06, "loss": 0.2726, "step": 13118 }, { "epoch": 0.6145594228697241, "grad_norm": 0.5452426774422408, "learning_rate": 4.057877075609591e-06, "loss": 0.292, "step": 13119 }, { "epoch": 0.6146062678596524, "grad_norm": 0.6489149810279923, "learning_rate": 4.057728745426144e-06, "loss": 0.2745, "step": 13120 }, { "epoch": 0.6146531128495807, "grad_norm": 0.5860512268099445, "learning_rate": 4.05758040627836e-06, "loss": 0.3075, "step": 13121 }, { "epoch": 0.614699957839509, "grad_norm": 0.5808367119170821, "learning_rate": 4.057432058167091e-06, "loss": 0.2872, "step": 13122 }, { "epoch": 0.6147468028294374, "grad_norm": 0.612463850742571, "learning_rate": 4.057283701093192e-06, "loss": 0.2743, "step": 13123 }, { "epoch": 0.6147936478193657, "grad_norm": 0.5634818903882499, "learning_rate": 4.057135335057518e-06, "loss": 0.2873, "step": 13124 }, { "epoch": 0.6148404928092941, "grad_norm": 0.5585003477481413, "learning_rate": 4.056986960060921e-06, "loss": 0.2727, "step": 13125 }, { "epoch": 0.6148873377992223, "grad_norm": 0.5465259154061176, "learning_rate": 4.056838576104255e-06, "loss": 0.2843, "step": 13126 }, { "epoch": 0.6149341827891507, "grad_norm": 0.5811166855448198, "learning_rate": 4.056690183188374e-06, "loss": 0.2673, "step": 13127 }, { "epoch": 0.614981027779079, "grad_norm": 0.5731599781545649, "learning_rate": 4.056541781314133e-06, "loss": 0.2734, "step": 13128 }, { "epoch": 0.6150278727690074, "grad_norm": 0.6293458740201123, "learning_rate": 4.0563933704823845e-06, "loss": 0.2967, "step": 13129 }, { "epoch": 0.6150747177589356, "grad_norm": 0.5784125922665166, "learning_rate": 4.056244950693985e-06, "loss": 0.2745, "step": 13130 }, { "epoch": 0.615121562748864, "grad_norm": 0.5422976607305449, "learning_rate": 4.056096521949785e-06, "loss": 0.2766, "step": 13131 }, { "epoch": 0.6151684077387923, "grad_norm": 0.5669101832758573, "learning_rate": 4.055948084250641e-06, "loss": 0.2704, "step": 13132 }, { "epoch": 0.6152152527287207, "grad_norm": 0.6113726770586912, "learning_rate": 4.0557996375974086e-06, "loss": 0.2819, "step": 13133 }, { "epoch": 0.615262097718649, "grad_norm": 0.5855649187768116, "learning_rate": 4.0556511819909386e-06, "loss": 0.2791, "step": 13134 }, { "epoch": 0.6153089427085773, "grad_norm": 0.6127305217511161, "learning_rate": 4.055502717432087e-06, "loss": 0.2933, "step": 13135 }, { "epoch": 0.6153557876985056, "grad_norm": 0.5520643688338251, "learning_rate": 4.055354243921709e-06, "loss": 0.2667, "step": 13136 }, { "epoch": 0.615402632688434, "grad_norm": 0.5500480720470187, "learning_rate": 4.055205761460658e-06, "loss": 0.2462, "step": 13137 }, { "epoch": 0.6154494776783623, "grad_norm": 0.5564454542522564, "learning_rate": 4.055057270049788e-06, "loss": 0.2741, "step": 13138 }, { "epoch": 0.6154963226682906, "grad_norm": 0.6084066388429227, "learning_rate": 4.054908769689956e-06, "loss": 0.2989, "step": 13139 }, { "epoch": 0.6155431676582189, "grad_norm": 0.5949193462762835, "learning_rate": 4.0547602603820135e-06, "loss": 0.2985, "step": 13140 }, { "epoch": 0.6155900126481473, "grad_norm": 0.5935442927709771, "learning_rate": 4.054611742126816e-06, "loss": 0.2682, "step": 13141 }, { "epoch": 0.6156368576380756, "grad_norm": 0.6353531413145915, "learning_rate": 4.0544632149252205e-06, "loss": 0.2877, "step": 13142 }, { "epoch": 0.615683702628004, "grad_norm": 0.5268753621962375, "learning_rate": 4.054314678778078e-06, "loss": 0.2798, "step": 13143 }, { "epoch": 0.6157305476179322, "grad_norm": 0.5723275293082721, "learning_rate": 4.054166133686246e-06, "loss": 0.2683, "step": 13144 }, { "epoch": 0.6157773926078606, "grad_norm": 0.6175508937269673, "learning_rate": 4.054017579650579e-06, "loss": 0.2779, "step": 13145 }, { "epoch": 0.6158242375977889, "grad_norm": 0.6694984207333802, "learning_rate": 4.053869016671931e-06, "loss": 0.2784, "step": 13146 }, { "epoch": 0.6158710825877173, "grad_norm": 0.6262916390196217, "learning_rate": 4.053720444751157e-06, "loss": 0.2973, "step": 13147 }, { "epoch": 0.6159179275776455, "grad_norm": 0.5651945089193916, "learning_rate": 4.053571863889113e-06, "loss": 0.2696, "step": 13148 }, { "epoch": 0.6159647725675739, "grad_norm": 0.6139454084906069, "learning_rate": 4.053423274086653e-06, "loss": 0.2772, "step": 13149 }, { "epoch": 0.6160116175575022, "grad_norm": 0.610118783154947, "learning_rate": 4.053274675344632e-06, "loss": 0.2749, "step": 13150 }, { "epoch": 0.6160584625474306, "grad_norm": 0.6251685183717997, "learning_rate": 4.053126067663907e-06, "loss": 0.2943, "step": 13151 }, { "epoch": 0.6161053075373589, "grad_norm": 0.5667668357745084, "learning_rate": 4.05297745104533e-06, "loss": 0.2691, "step": 13152 }, { "epoch": 0.6161521525272872, "grad_norm": 0.6416077341190849, "learning_rate": 4.05282882548976e-06, "loss": 0.2843, "step": 13153 }, { "epoch": 0.6161989975172155, "grad_norm": 0.6114419227565597, "learning_rate": 4.052680190998049e-06, "loss": 0.2984, "step": 13154 }, { "epoch": 0.6162458425071439, "grad_norm": 0.654523383627141, "learning_rate": 4.052531547571054e-06, "loss": 0.2889, "step": 13155 }, { "epoch": 0.6162926874970722, "grad_norm": 0.610829354934864, "learning_rate": 4.052382895209631e-06, "loss": 0.295, "step": 13156 }, { "epoch": 0.6163395324870005, "grad_norm": 0.5703427818366418, "learning_rate": 4.052234233914634e-06, "loss": 0.2813, "step": 13157 }, { "epoch": 0.6163863774769288, "grad_norm": 0.5905639097506717, "learning_rate": 4.052085563686919e-06, "loss": 0.2919, "step": 13158 }, { "epoch": 0.6164332224668572, "grad_norm": 0.6136097638969532, "learning_rate": 4.051936884527342e-06, "loss": 0.2902, "step": 13159 }, { "epoch": 0.6164800674567855, "grad_norm": 0.6259669224917656, "learning_rate": 4.051788196436758e-06, "loss": 0.2842, "step": 13160 }, { "epoch": 0.6165269124467139, "grad_norm": 0.5573812754629027, "learning_rate": 4.051639499416023e-06, "loss": 0.2795, "step": 13161 }, { "epoch": 0.6165737574366421, "grad_norm": 0.6005473321514582, "learning_rate": 4.051490793465993e-06, "loss": 0.2869, "step": 13162 }, { "epoch": 0.6166206024265705, "grad_norm": 0.6126198177553541, "learning_rate": 4.051342078587523e-06, "loss": 0.2831, "step": 13163 }, { "epoch": 0.6166674474164988, "grad_norm": 0.625599106182029, "learning_rate": 4.05119335478147e-06, "loss": 0.275, "step": 13164 }, { "epoch": 0.6167142924064272, "grad_norm": 0.516749269236419, "learning_rate": 4.051044622048688e-06, "loss": 0.2367, "step": 13165 }, { "epoch": 0.6167611373963554, "grad_norm": 0.5778202817386666, "learning_rate": 4.050895880390034e-06, "loss": 0.2717, "step": 13166 }, { "epoch": 0.6168079823862838, "grad_norm": 0.5874041410148149, "learning_rate": 4.050747129806365e-06, "loss": 0.2824, "step": 13167 }, { "epoch": 0.6168548273762121, "grad_norm": 0.6907967479162668, "learning_rate": 4.050598370298536e-06, "loss": 0.293, "step": 13168 }, { "epoch": 0.6169016723661405, "grad_norm": 0.628596545159779, "learning_rate": 4.050449601867403e-06, "loss": 0.2597, "step": 13169 }, { "epoch": 0.6169485173560688, "grad_norm": 0.623034331448415, "learning_rate": 4.050300824513822e-06, "loss": 0.2798, "step": 13170 }, { "epoch": 0.6169953623459971, "grad_norm": 0.6070886684161432, "learning_rate": 4.0501520382386495e-06, "loss": 0.2946, "step": 13171 }, { "epoch": 0.6170422073359254, "grad_norm": 0.6226265221873097, "learning_rate": 4.0500032430427415e-06, "loss": 0.2557, "step": 13172 }, { "epoch": 0.6170890523258538, "grad_norm": 0.5917257677687215, "learning_rate": 4.049854438926955e-06, "loss": 0.2733, "step": 13173 }, { "epoch": 0.6171358973157821, "grad_norm": 0.6057651281799741, "learning_rate": 4.049705625892144e-06, "loss": 0.2909, "step": 13174 }, { "epoch": 0.6171827423057104, "grad_norm": 0.5779699319492485, "learning_rate": 4.049556803939168e-06, "loss": 0.2727, "step": 13175 }, { "epoch": 0.6172295872956387, "grad_norm": 0.5677759218065007, "learning_rate": 4.049407973068882e-06, "loss": 0.2683, "step": 13176 }, { "epoch": 0.6172764322855671, "grad_norm": 0.5516423204079359, "learning_rate": 4.049259133282143e-06, "loss": 0.2849, "step": 13177 }, { "epoch": 0.6173232772754954, "grad_norm": 0.66091453741401, "learning_rate": 4.049110284579806e-06, "loss": 0.2834, "step": 13178 }, { "epoch": 0.6173701222654238, "grad_norm": 0.5730817838967276, "learning_rate": 4.048961426962729e-06, "loss": 0.2481, "step": 13179 }, { "epoch": 0.617416967255352, "grad_norm": 0.6070705466706768, "learning_rate": 4.048812560431768e-06, "loss": 0.2757, "step": 13180 }, { "epoch": 0.6174638122452804, "grad_norm": 0.6267361114538579, "learning_rate": 4.048663684987782e-06, "loss": 0.2777, "step": 13181 }, { "epoch": 0.6175106572352087, "grad_norm": 0.5659589141226591, "learning_rate": 4.048514800631623e-06, "loss": 0.2664, "step": 13182 }, { "epoch": 0.6175575022251371, "grad_norm": 0.5914610968299775, "learning_rate": 4.048365907364152e-06, "loss": 0.2903, "step": 13183 }, { "epoch": 0.6176043472150653, "grad_norm": 0.6150076732226972, "learning_rate": 4.048217005186225e-06, "loss": 0.284, "step": 13184 }, { "epoch": 0.6176511922049936, "grad_norm": 0.5774474619779092, "learning_rate": 4.048068094098697e-06, "loss": 0.2784, "step": 13185 }, { "epoch": 0.617698037194922, "grad_norm": 0.544632883998311, "learning_rate": 4.047919174102426e-06, "loss": 0.2616, "step": 13186 }, { "epoch": 0.6177448821848504, "grad_norm": 0.6389931726004094, "learning_rate": 4.04777024519827e-06, "loss": 0.2819, "step": 13187 }, { "epoch": 0.6177917271747787, "grad_norm": 0.5782132116966588, "learning_rate": 4.047621307387085e-06, "loss": 0.293, "step": 13188 }, { "epoch": 0.617838572164707, "grad_norm": 0.5645695866037247, "learning_rate": 4.047472360669729e-06, "loss": 0.2713, "step": 13189 }, { "epoch": 0.6178854171546353, "grad_norm": 0.5928392988137012, "learning_rate": 4.047323405047057e-06, "loss": 0.2908, "step": 13190 }, { "epoch": 0.6179322621445636, "grad_norm": 0.5782844215823466, "learning_rate": 4.04717444051993e-06, "loss": 0.2985, "step": 13191 }, { "epoch": 0.617979107134492, "grad_norm": 0.555847763365611, "learning_rate": 4.047025467089201e-06, "loss": 0.2729, "step": 13192 }, { "epoch": 0.6180259521244202, "grad_norm": 0.5660544349709572, "learning_rate": 4.0468764847557296e-06, "loss": 0.2849, "step": 13193 }, { "epoch": 0.6180727971143486, "grad_norm": 0.5835328569908291, "learning_rate": 4.046727493520373e-06, "loss": 0.2752, "step": 13194 }, { "epoch": 0.618119642104277, "grad_norm": 0.5694454997310537, "learning_rate": 4.046578493383988e-06, "loss": 0.2756, "step": 13195 }, { "epoch": 0.6181664870942053, "grad_norm": 0.5904368728727739, "learning_rate": 4.046429484347434e-06, "loss": 0.2831, "step": 13196 }, { "epoch": 0.6182133320841336, "grad_norm": 0.5598642739341495, "learning_rate": 4.0462804664115655e-06, "loss": 0.2742, "step": 13197 }, { "epoch": 0.6182601770740619, "grad_norm": 0.6069543220676024, "learning_rate": 4.046131439577242e-06, "loss": 0.2982, "step": 13198 }, { "epoch": 0.6183070220639902, "grad_norm": 0.6365011220409182, "learning_rate": 4.045982403845321e-06, "loss": 0.2925, "step": 13199 }, { "epoch": 0.6183538670539186, "grad_norm": 0.6199844600113881, "learning_rate": 4.04583335921666e-06, "loss": 0.2872, "step": 13200 }, { "epoch": 0.618400712043847, "grad_norm": 0.6378146145571534, "learning_rate": 4.045684305692116e-06, "loss": 0.2873, "step": 13201 }, { "epoch": 0.6184475570337752, "grad_norm": 0.6170289803411838, "learning_rate": 4.045535243272547e-06, "loss": 0.2889, "step": 13202 }, { "epoch": 0.6184944020237035, "grad_norm": 0.5654421320954804, "learning_rate": 4.045386171958812e-06, "loss": 0.2808, "step": 13203 }, { "epoch": 0.6185412470136319, "grad_norm": 0.5431531405803762, "learning_rate": 4.045237091751767e-06, "loss": 0.2777, "step": 13204 }, { "epoch": 0.6185880920035602, "grad_norm": 0.6095110059088817, "learning_rate": 4.045088002652271e-06, "loss": 0.3049, "step": 13205 }, { "epoch": 0.6186349369934886, "grad_norm": 0.5876581611190079, "learning_rate": 4.0449389046611834e-06, "loss": 0.2892, "step": 13206 }, { "epoch": 0.6186817819834168, "grad_norm": 0.6014360756459092, "learning_rate": 4.044789797779359e-06, "loss": 0.2853, "step": 13207 }, { "epoch": 0.6187286269733452, "grad_norm": 0.5996393764249405, "learning_rate": 4.044640682007658e-06, "loss": 0.3007, "step": 13208 }, { "epoch": 0.6187754719632735, "grad_norm": 0.6100171228145032, "learning_rate": 4.044491557346939e-06, "loss": 0.2978, "step": 13209 }, { "epoch": 0.6188223169532019, "grad_norm": 0.6215272258507588, "learning_rate": 4.044342423798059e-06, "loss": 0.2814, "step": 13210 }, { "epoch": 0.6188691619431301, "grad_norm": 0.6399610155116849, "learning_rate": 4.044193281361876e-06, "loss": 0.2771, "step": 13211 }, { "epoch": 0.6189160069330585, "grad_norm": 0.6311160589692192, "learning_rate": 4.0440441300392485e-06, "loss": 0.3019, "step": 13212 }, { "epoch": 0.6189628519229868, "grad_norm": 0.5992835616643115, "learning_rate": 4.043894969831036e-06, "loss": 0.2873, "step": 13213 }, { "epoch": 0.6190096969129152, "grad_norm": 0.5995786962732271, "learning_rate": 4.043745800738094e-06, "loss": 0.2859, "step": 13214 }, { "epoch": 0.6190565419028435, "grad_norm": 0.6101477822569827, "learning_rate": 4.043596622761286e-06, "loss": 0.2931, "step": 13215 }, { "epoch": 0.6191033868927718, "grad_norm": 0.5499091476809774, "learning_rate": 4.043447435901465e-06, "loss": 0.2745, "step": 13216 }, { "epoch": 0.6191502318827001, "grad_norm": 0.5970409982207417, "learning_rate": 4.043298240159493e-06, "loss": 0.2703, "step": 13217 }, { "epoch": 0.6191970768726285, "grad_norm": 0.586092858636032, "learning_rate": 4.043149035536228e-06, "loss": 0.2616, "step": 13218 }, { "epoch": 0.6192439218625568, "grad_norm": 0.5702608248598806, "learning_rate": 4.042999822032527e-06, "loss": 0.2494, "step": 13219 }, { "epoch": 0.6192907668524851, "grad_norm": 0.5791605052148523, "learning_rate": 4.04285059964925e-06, "loss": 0.3002, "step": 13220 }, { "epoch": 0.6193376118424134, "grad_norm": 0.5911741496706963, "learning_rate": 4.042701368387256e-06, "loss": 0.2701, "step": 13221 }, { "epoch": 0.6193844568323418, "grad_norm": 0.6521935066773877, "learning_rate": 4.042552128247403e-06, "loss": 0.3066, "step": 13222 }, { "epoch": 0.6194313018222701, "grad_norm": 0.5731883487793089, "learning_rate": 4.042402879230551e-06, "loss": 0.2904, "step": 13223 }, { "epoch": 0.6194781468121985, "grad_norm": 0.5512355200219353, "learning_rate": 4.0422536213375575e-06, "loss": 0.2518, "step": 13224 }, { "epoch": 0.6195249918021267, "grad_norm": 0.646275688916999, "learning_rate": 4.0421043545692825e-06, "loss": 0.3137, "step": 13225 }, { "epoch": 0.6195718367920551, "grad_norm": 0.6063952933595378, "learning_rate": 4.041955078926584e-06, "loss": 0.2762, "step": 13226 }, { "epoch": 0.6196186817819834, "grad_norm": 0.5868032850550389, "learning_rate": 4.041805794410322e-06, "loss": 0.3001, "step": 13227 }, { "epoch": 0.6196655267719118, "grad_norm": 0.6268318695288476, "learning_rate": 4.041656501021355e-06, "loss": 0.2913, "step": 13228 }, { "epoch": 0.61971237176184, "grad_norm": 0.591814494178162, "learning_rate": 4.041507198760542e-06, "loss": 0.2864, "step": 13229 }, { "epoch": 0.6197592167517684, "grad_norm": 0.623757370529589, "learning_rate": 4.041357887628743e-06, "loss": 0.261, "step": 13230 }, { "epoch": 0.6198060617416967, "grad_norm": 0.5719728443631423, "learning_rate": 4.041208567626816e-06, "loss": 0.2736, "step": 13231 }, { "epoch": 0.6198529067316251, "grad_norm": 0.6414792935682343, "learning_rate": 4.041059238755621e-06, "loss": 0.2906, "step": 13232 }, { "epoch": 0.6198997517215534, "grad_norm": 0.6177007727741435, "learning_rate": 4.040909901016018e-06, "loss": 0.2891, "step": 13233 }, { "epoch": 0.6199465967114817, "grad_norm": 0.6150763052523521, "learning_rate": 4.0407605544088655e-06, "loss": 0.3194, "step": 13234 }, { "epoch": 0.61999344170141, "grad_norm": 0.5681171406873387, "learning_rate": 4.040611198935024e-06, "loss": 0.2893, "step": 13235 }, { "epoch": 0.6200402866913384, "grad_norm": 0.5647701935329178, "learning_rate": 4.0404618345953514e-06, "loss": 0.2828, "step": 13236 }, { "epoch": 0.6200871316812667, "grad_norm": 0.6476755996304028, "learning_rate": 4.0403124613907095e-06, "loss": 0.3047, "step": 13237 }, { "epoch": 0.620133976671195, "grad_norm": 0.6059742611181734, "learning_rate": 4.040163079321955e-06, "loss": 0.2732, "step": 13238 }, { "epoch": 0.6201808216611233, "grad_norm": 0.5493829876490997, "learning_rate": 4.04001368838995e-06, "loss": 0.2666, "step": 13239 }, { "epoch": 0.6202276666510517, "grad_norm": 0.5863847396784103, "learning_rate": 4.039864288595553e-06, "loss": 0.2853, "step": 13240 }, { "epoch": 0.62027451164098, "grad_norm": 0.5811468927710494, "learning_rate": 4.039714879939624e-06, "loss": 0.2692, "step": 13241 }, { "epoch": 0.6203213566309084, "grad_norm": 0.6501472519323865, "learning_rate": 4.039565462423023e-06, "loss": 0.2997, "step": 13242 }, { "epoch": 0.6203682016208366, "grad_norm": 0.6048248436128041, "learning_rate": 4.03941603604661e-06, "loss": 0.2964, "step": 13243 }, { "epoch": 0.620415046610765, "grad_norm": 0.5884526947516, "learning_rate": 4.039266600811244e-06, "loss": 0.2846, "step": 13244 }, { "epoch": 0.6204618916006933, "grad_norm": 0.5962222010538933, "learning_rate": 4.039117156717786e-06, "loss": 0.287, "step": 13245 }, { "epoch": 0.6205087365906217, "grad_norm": 0.5694716512018283, "learning_rate": 4.038967703767095e-06, "loss": 0.2744, "step": 13246 }, { "epoch": 0.6205555815805499, "grad_norm": 0.6629633895015735, "learning_rate": 4.038818241960032e-06, "loss": 0.3086, "step": 13247 }, { "epoch": 0.6206024265704783, "grad_norm": 0.5798062461650462, "learning_rate": 4.038668771297456e-06, "loss": 0.2709, "step": 13248 }, { "epoch": 0.6206492715604066, "grad_norm": 0.5824406894424182, "learning_rate": 4.038519291780229e-06, "loss": 0.2682, "step": 13249 }, { "epoch": 0.620696116550335, "grad_norm": 0.5801160033500203, "learning_rate": 4.03836980340921e-06, "loss": 0.2827, "step": 13250 }, { "epoch": 0.6207429615402633, "grad_norm": 0.6361905614452694, "learning_rate": 4.03822030618526e-06, "loss": 0.2942, "step": 13251 }, { "epoch": 0.6207898065301916, "grad_norm": 0.6206368705895141, "learning_rate": 4.038070800109237e-06, "loss": 0.2815, "step": 13252 }, { "epoch": 0.6208366515201199, "grad_norm": 0.6398354724061395, "learning_rate": 4.037921285182005e-06, "loss": 0.2862, "step": 13253 }, { "epoch": 0.6208834965100483, "grad_norm": 0.6768901880268648, "learning_rate": 4.037771761404422e-06, "loss": 0.3062, "step": 13254 }, { "epoch": 0.6209303414999766, "grad_norm": 0.5713956057764803, "learning_rate": 4.037622228777349e-06, "loss": 0.2849, "step": 13255 }, { "epoch": 0.6209771864899049, "grad_norm": 0.643130948614081, "learning_rate": 4.037472687301646e-06, "loss": 0.3138, "step": 13256 }, { "epoch": 0.6210240314798332, "grad_norm": 0.6259390308546399, "learning_rate": 4.037323136978174e-06, "loss": 0.3019, "step": 13257 }, { "epoch": 0.6210708764697616, "grad_norm": 0.5668583925545715, "learning_rate": 4.037173577807794e-06, "loss": 0.2739, "step": 13258 }, { "epoch": 0.6211177214596899, "grad_norm": 0.6020141573439189, "learning_rate": 4.0370240097913674e-06, "loss": 0.2792, "step": 13259 }, { "epoch": 0.6211645664496183, "grad_norm": 0.5925612971776925, "learning_rate": 4.036874432929752e-06, "loss": 0.274, "step": 13260 }, { "epoch": 0.6212114114395465, "grad_norm": 0.6651124460381679, "learning_rate": 4.036724847223813e-06, "loss": 0.2947, "step": 13261 }, { "epoch": 0.6212582564294749, "grad_norm": 0.6022630314670033, "learning_rate": 4.036575252674408e-06, "loss": 0.2889, "step": 13262 }, { "epoch": 0.6213051014194032, "grad_norm": 0.6282341404721953, "learning_rate": 4.036425649282398e-06, "loss": 0.2933, "step": 13263 }, { "epoch": 0.6213519464093316, "grad_norm": 0.566132630811011, "learning_rate": 4.036276037048644e-06, "loss": 0.296, "step": 13264 }, { "epoch": 0.6213987913992598, "grad_norm": 0.613006137454145, "learning_rate": 4.036126415974008e-06, "loss": 0.302, "step": 13265 }, { "epoch": 0.6214456363891881, "grad_norm": 0.6175046745190216, "learning_rate": 4.035976786059351e-06, "loss": 0.2704, "step": 13266 }, { "epoch": 0.6214924813791165, "grad_norm": 0.5419081003196949, "learning_rate": 4.035827147305533e-06, "loss": 0.259, "step": 13267 }, { "epoch": 0.6215393263690449, "grad_norm": 0.6721563641051311, "learning_rate": 4.035677499713416e-06, "loss": 0.3105, "step": 13268 }, { "epoch": 0.6215861713589732, "grad_norm": 0.6191072739688703, "learning_rate": 4.035527843283861e-06, "loss": 0.2929, "step": 13269 }, { "epoch": 0.6216330163489014, "grad_norm": 0.5624959461437895, "learning_rate": 4.035378178017731e-06, "loss": 0.2904, "step": 13270 }, { "epoch": 0.6216798613388298, "grad_norm": 0.6412288668608954, "learning_rate": 4.035228503915882e-06, "loss": 0.2871, "step": 13271 }, { "epoch": 0.6217267063287581, "grad_norm": 0.5279634526334848, "learning_rate": 4.0350788209791815e-06, "loss": 0.2638, "step": 13272 }, { "epoch": 0.6217735513186865, "grad_norm": 0.5688997156961703, "learning_rate": 4.034929129208487e-06, "loss": 0.2843, "step": 13273 }, { "epoch": 0.6218203963086147, "grad_norm": 0.6164980853263795, "learning_rate": 4.0347794286046615e-06, "loss": 0.2997, "step": 13274 }, { "epoch": 0.6218672412985431, "grad_norm": 0.568439509310745, "learning_rate": 4.0346297191685665e-06, "loss": 0.2892, "step": 13275 }, { "epoch": 0.6219140862884714, "grad_norm": 0.5634063676790544, "learning_rate": 4.034480000901063e-06, "loss": 0.2702, "step": 13276 }, { "epoch": 0.6219609312783998, "grad_norm": 0.5505184596820392, "learning_rate": 4.034330273803012e-06, "loss": 0.2612, "step": 13277 }, { "epoch": 0.6220077762683281, "grad_norm": 0.6005706749546472, "learning_rate": 4.034180537875277e-06, "loss": 0.2812, "step": 13278 }, { "epoch": 0.6220546212582564, "grad_norm": 0.5492809317827659, "learning_rate": 4.0340307931187185e-06, "loss": 0.2552, "step": 13279 }, { "epoch": 0.6221014662481847, "grad_norm": 0.6504173091700931, "learning_rate": 4.033881039534197e-06, "loss": 0.2826, "step": 13280 }, { "epoch": 0.6221483112381131, "grad_norm": 0.5741158435122712, "learning_rate": 4.0337312771225765e-06, "loss": 0.2957, "step": 13281 }, { "epoch": 0.6221951562280414, "grad_norm": 0.6246315535910725, "learning_rate": 4.033581505884719e-06, "loss": 0.2978, "step": 13282 }, { "epoch": 0.6222420012179697, "grad_norm": 0.5519750765805076, "learning_rate": 4.033431725821484e-06, "loss": 0.2695, "step": 13283 }, { "epoch": 0.622288846207898, "grad_norm": 0.5773507176995251, "learning_rate": 4.033281936933735e-06, "loss": 0.2797, "step": 13284 }, { "epoch": 0.6223356911978264, "grad_norm": 0.5666961928475952, "learning_rate": 4.0331321392223345e-06, "loss": 0.2694, "step": 13285 }, { "epoch": 0.6223825361877547, "grad_norm": 0.5297534764553697, "learning_rate": 4.032982332688143e-06, "loss": 0.2725, "step": 13286 }, { "epoch": 0.6224293811776831, "grad_norm": 0.5598290989637442, "learning_rate": 4.032832517332025e-06, "loss": 0.2795, "step": 13287 }, { "epoch": 0.6224762261676113, "grad_norm": 0.5608158261624988, "learning_rate": 4.03268269315484e-06, "loss": 0.2855, "step": 13288 }, { "epoch": 0.6225230711575397, "grad_norm": 0.6345026399432997, "learning_rate": 4.032532860157452e-06, "loss": 0.2761, "step": 13289 }, { "epoch": 0.622569916147468, "grad_norm": 0.6130272821682468, "learning_rate": 4.032383018340721e-06, "loss": 0.2954, "step": 13290 }, { "epoch": 0.6226167611373964, "grad_norm": 0.5766194187037184, "learning_rate": 4.032233167705513e-06, "loss": 0.2668, "step": 13291 }, { "epoch": 0.6226636061273246, "grad_norm": 0.5613422676147886, "learning_rate": 4.032083308252687e-06, "loss": 0.2823, "step": 13292 }, { "epoch": 0.622710451117253, "grad_norm": 0.64812797162174, "learning_rate": 4.031933439983106e-06, "loss": 0.301, "step": 13293 }, { "epoch": 0.6227572961071813, "grad_norm": 0.576112497914643, "learning_rate": 4.031783562897634e-06, "loss": 0.2879, "step": 13294 }, { "epoch": 0.6228041410971097, "grad_norm": 0.5990644942505313, "learning_rate": 4.0316336769971325e-06, "loss": 0.2696, "step": 13295 }, { "epoch": 0.622850986087038, "grad_norm": 0.6108309478641751, "learning_rate": 4.0314837822824636e-06, "loss": 0.2842, "step": 13296 }, { "epoch": 0.6228978310769663, "grad_norm": 0.5742872926230252, "learning_rate": 4.031333878754492e-06, "loss": 0.2769, "step": 13297 }, { "epoch": 0.6229446760668946, "grad_norm": 0.6427017236547228, "learning_rate": 4.031183966414077e-06, "loss": 0.304, "step": 13298 }, { "epoch": 0.622991521056823, "grad_norm": 0.6441400771663387, "learning_rate": 4.0310340452620845e-06, "loss": 0.2987, "step": 13299 }, { "epoch": 0.6230383660467513, "grad_norm": 0.6136633314941818, "learning_rate": 4.030884115299375e-06, "loss": 0.2714, "step": 13300 }, { "epoch": 0.6230852110366796, "grad_norm": 0.6383268604731301, "learning_rate": 4.030734176526813e-06, "loss": 0.298, "step": 13301 }, { "epoch": 0.6231320560266079, "grad_norm": 0.592658744249545, "learning_rate": 4.030584228945259e-06, "loss": 0.3023, "step": 13302 }, { "epoch": 0.6231789010165363, "grad_norm": 0.5389137625581257, "learning_rate": 4.030434272555579e-06, "loss": 0.2709, "step": 13303 }, { "epoch": 0.6232257460064646, "grad_norm": 0.6182308760419436, "learning_rate": 4.030284307358634e-06, "loss": 0.2943, "step": 13304 }, { "epoch": 0.623272590996393, "grad_norm": 0.564991183679247, "learning_rate": 4.030134333355288e-06, "loss": 0.291, "step": 13305 }, { "epoch": 0.6233194359863212, "grad_norm": 0.5330077084850356, "learning_rate": 4.029984350546402e-06, "loss": 0.2674, "step": 13306 }, { "epoch": 0.6233662809762496, "grad_norm": 0.5716781451063515, "learning_rate": 4.029834358932842e-06, "loss": 0.2619, "step": 13307 }, { "epoch": 0.6234131259661779, "grad_norm": 0.5644832699728477, "learning_rate": 4.029684358515469e-06, "loss": 0.2573, "step": 13308 }, { "epoch": 0.6234599709561063, "grad_norm": 0.6019209899133962, "learning_rate": 4.029534349295147e-06, "loss": 0.2854, "step": 13309 }, { "epoch": 0.6235068159460345, "grad_norm": 0.5647645912245574, "learning_rate": 4.0293843312727395e-06, "loss": 0.2699, "step": 13310 }, { "epoch": 0.6235536609359629, "grad_norm": 0.5867522174187286, "learning_rate": 4.02923430444911e-06, "loss": 0.2667, "step": 13311 }, { "epoch": 0.6236005059258912, "grad_norm": 0.6024838018902539, "learning_rate": 4.02908426882512e-06, "loss": 0.3071, "step": 13312 }, { "epoch": 0.6236473509158196, "grad_norm": 0.6065894487594331, "learning_rate": 4.028934224401636e-06, "loss": 0.2764, "step": 13313 }, { "epoch": 0.6236941959057479, "grad_norm": 0.5931758793124116, "learning_rate": 4.028784171179519e-06, "loss": 0.286, "step": 13314 }, { "epoch": 0.6237410408956762, "grad_norm": 0.542495596590036, "learning_rate": 4.028634109159633e-06, "loss": 0.2593, "step": 13315 }, { "epoch": 0.6237878858856045, "grad_norm": 0.61187124623432, "learning_rate": 4.028484038342843e-06, "loss": 0.2722, "step": 13316 }, { "epoch": 0.6238347308755329, "grad_norm": 0.5820741964374426, "learning_rate": 4.028333958730011e-06, "loss": 0.2963, "step": 13317 }, { "epoch": 0.6238815758654612, "grad_norm": 0.5613431100845045, "learning_rate": 4.028183870322001e-06, "loss": 0.2652, "step": 13318 }, { "epoch": 0.6239284208553895, "grad_norm": 0.6002042903964807, "learning_rate": 4.028033773119677e-06, "loss": 0.2764, "step": 13319 }, { "epoch": 0.6239752658453178, "grad_norm": 0.5748301600825814, "learning_rate": 4.027883667123903e-06, "loss": 0.2775, "step": 13320 }, { "epoch": 0.6240221108352462, "grad_norm": 0.5850055978145978, "learning_rate": 4.027733552335542e-06, "loss": 0.2949, "step": 13321 }, { "epoch": 0.6240689558251745, "grad_norm": 0.6051213240865809, "learning_rate": 4.027583428755458e-06, "loss": 0.2694, "step": 13322 }, { "epoch": 0.6241158008151029, "grad_norm": 0.6093789250593458, "learning_rate": 4.027433296384517e-06, "loss": 0.2806, "step": 13323 }, { "epoch": 0.6241626458050311, "grad_norm": 0.5800228010290809, "learning_rate": 4.02728315522358e-06, "loss": 0.2746, "step": 13324 }, { "epoch": 0.6242094907949595, "grad_norm": 0.6043944062088609, "learning_rate": 4.027133005273512e-06, "loss": 0.3018, "step": 13325 }, { "epoch": 0.6242563357848878, "grad_norm": 0.5904318070136518, "learning_rate": 4.026982846535178e-06, "loss": 0.2651, "step": 13326 }, { "epoch": 0.6243031807748162, "grad_norm": 0.5744055952059945, "learning_rate": 4.026832679009442e-06, "loss": 0.2735, "step": 13327 }, { "epoch": 0.6243500257647444, "grad_norm": 0.554910773653985, "learning_rate": 4.026682502697167e-06, "loss": 0.2711, "step": 13328 }, { "epoch": 0.6243968707546728, "grad_norm": 0.6121726111945962, "learning_rate": 4.026532317599218e-06, "loss": 0.2784, "step": 13329 }, { "epoch": 0.6244437157446011, "grad_norm": 0.5757277077119078, "learning_rate": 4.026382123716459e-06, "loss": 0.2673, "step": 13330 }, { "epoch": 0.6244905607345295, "grad_norm": 0.5458266936477346, "learning_rate": 4.0262319210497545e-06, "loss": 0.262, "step": 13331 }, { "epoch": 0.6245374057244578, "grad_norm": 0.5661451102489227, "learning_rate": 4.0260817095999694e-06, "loss": 0.26, "step": 13332 }, { "epoch": 0.6245842507143861, "grad_norm": 0.595905919395124, "learning_rate": 4.025931489367967e-06, "loss": 0.2813, "step": 13333 }, { "epoch": 0.6246310957043144, "grad_norm": 0.6196333899411591, "learning_rate": 4.025781260354613e-06, "loss": 0.2923, "step": 13334 }, { "epoch": 0.6246779406942428, "grad_norm": 0.6124466182194819, "learning_rate": 4.0256310225607715e-06, "loss": 0.2904, "step": 13335 }, { "epoch": 0.6247247856841711, "grad_norm": 0.6058082528564256, "learning_rate": 4.025480775987306e-06, "loss": 0.2918, "step": 13336 }, { "epoch": 0.6247716306740994, "grad_norm": 0.5942124087367546, "learning_rate": 4.025330520635083e-06, "loss": 0.2831, "step": 13337 }, { "epoch": 0.6248184756640277, "grad_norm": 0.5883885898905995, "learning_rate": 4.025180256504966e-06, "loss": 0.2792, "step": 13338 }, { "epoch": 0.6248653206539561, "grad_norm": 0.6653722244410049, "learning_rate": 4.025029983597819e-06, "loss": 0.2914, "step": 13339 }, { "epoch": 0.6249121656438844, "grad_norm": 0.612160388748058, "learning_rate": 4.0248797019145094e-06, "loss": 0.2834, "step": 13340 }, { "epoch": 0.6249590106338128, "grad_norm": 0.6198564720794346, "learning_rate": 4.024729411455899e-06, "loss": 0.2985, "step": 13341 }, { "epoch": 0.625005855623741, "grad_norm": 0.5331203364877484, "learning_rate": 4.024579112222855e-06, "loss": 0.2635, "step": 13342 }, { "epoch": 0.6250527006136694, "grad_norm": 0.6015409665126286, "learning_rate": 4.024428804216241e-06, "loss": 0.3, "step": 13343 }, { "epoch": 0.6250995456035977, "grad_norm": 0.5558176737129606, "learning_rate": 4.024278487436923e-06, "loss": 0.2563, "step": 13344 }, { "epoch": 0.6251463905935261, "grad_norm": 0.6374094490075596, "learning_rate": 4.0241281618857645e-06, "loss": 0.3008, "step": 13345 }, { "epoch": 0.6251932355834543, "grad_norm": 0.6243158018562961, "learning_rate": 4.023977827563632e-06, "loss": 0.2981, "step": 13346 }, { "epoch": 0.6252400805733827, "grad_norm": 0.6180683243939403, "learning_rate": 4.02382748447139e-06, "loss": 0.2825, "step": 13347 }, { "epoch": 0.625286925563311, "grad_norm": 0.6079043984155801, "learning_rate": 4.023677132609903e-06, "loss": 0.2895, "step": 13348 }, { "epoch": 0.6253337705532394, "grad_norm": 0.633026165886864, "learning_rate": 4.023526771980038e-06, "loss": 0.2983, "step": 13349 }, { "epoch": 0.6253806155431677, "grad_norm": 0.5831790347353599, "learning_rate": 4.02337640258266e-06, "loss": 0.2903, "step": 13350 }, { "epoch": 0.625427460533096, "grad_norm": 0.5727173236613776, "learning_rate": 4.023226024418632e-06, "loss": 0.2926, "step": 13351 }, { "epoch": 0.6254743055230243, "grad_norm": 0.5837290559631985, "learning_rate": 4.023075637488822e-06, "loss": 0.2954, "step": 13352 }, { "epoch": 0.6255211505129527, "grad_norm": 0.5905511303655862, "learning_rate": 4.022925241794095e-06, "loss": 0.2837, "step": 13353 }, { "epoch": 0.625567995502881, "grad_norm": 0.5918550882070289, "learning_rate": 4.022774837335315e-06, "loss": 0.282, "step": 13354 }, { "epoch": 0.6256148404928092, "grad_norm": 0.664569637867571, "learning_rate": 4.022624424113349e-06, "loss": 0.2819, "step": 13355 }, { "epoch": 0.6256616854827376, "grad_norm": 0.5938406183043105, "learning_rate": 4.022474002129062e-06, "loss": 0.2702, "step": 13356 }, { "epoch": 0.625708530472666, "grad_norm": 0.6397240717996313, "learning_rate": 4.022323571383319e-06, "loss": 0.2982, "step": 13357 }, { "epoch": 0.6257553754625943, "grad_norm": 0.5438808168271856, "learning_rate": 4.0221731318769885e-06, "loss": 0.2669, "step": 13358 }, { "epoch": 0.6258022204525227, "grad_norm": 0.5645472940961265, "learning_rate": 4.022022683610933e-06, "loss": 0.2962, "step": 13359 }, { "epoch": 0.6258490654424509, "grad_norm": 0.5636484835057843, "learning_rate": 4.021872226586019e-06, "loss": 0.2738, "step": 13360 }, { "epoch": 0.6258959104323792, "grad_norm": 0.5883372270365413, "learning_rate": 4.0217217608031136e-06, "loss": 0.2802, "step": 13361 }, { "epoch": 0.6259427554223076, "grad_norm": 0.6410256418456799, "learning_rate": 4.021571286263082e-06, "loss": 0.2864, "step": 13362 }, { "epoch": 0.625989600412236, "grad_norm": 0.5776715241527063, "learning_rate": 4.02142080296679e-06, "loss": 0.2686, "step": 13363 }, { "epoch": 0.6260364454021642, "grad_norm": 0.5885439959511644, "learning_rate": 4.021270310915103e-06, "loss": 0.2913, "step": 13364 }, { "epoch": 0.6260832903920925, "grad_norm": 0.5816562017318406, "learning_rate": 4.021119810108889e-06, "loss": 0.2981, "step": 13365 }, { "epoch": 0.6261301353820209, "grad_norm": 0.547818999412411, "learning_rate": 4.020969300549011e-06, "loss": 0.2641, "step": 13366 }, { "epoch": 0.6261769803719492, "grad_norm": 0.6092746224525943, "learning_rate": 4.0208187822363386e-06, "loss": 0.2745, "step": 13367 }, { "epoch": 0.6262238253618776, "grad_norm": 0.5746480143957737, "learning_rate": 4.020668255171736e-06, "loss": 0.2922, "step": 13368 }, { "epoch": 0.6262706703518058, "grad_norm": 0.6317140707371712, "learning_rate": 4.020517719356069e-06, "loss": 0.2624, "step": 13369 }, { "epoch": 0.6263175153417342, "grad_norm": 0.5371579863429701, "learning_rate": 4.020367174790207e-06, "loss": 0.2609, "step": 13370 }, { "epoch": 0.6263643603316625, "grad_norm": 0.5916685180857764, "learning_rate": 4.020216621475011e-06, "loss": 0.2915, "step": 13371 }, { "epoch": 0.6264112053215909, "grad_norm": 0.5882291954375266, "learning_rate": 4.020066059411352e-06, "loss": 0.2915, "step": 13372 }, { "epoch": 0.6264580503115191, "grad_norm": 0.5853160730335802, "learning_rate": 4.019915488600095e-06, "loss": 0.279, "step": 13373 }, { "epoch": 0.6265048953014475, "grad_norm": 0.6089308183119325, "learning_rate": 4.019764909042106e-06, "loss": 0.2946, "step": 13374 }, { "epoch": 0.6265517402913758, "grad_norm": 0.616250050376528, "learning_rate": 4.019614320738251e-06, "loss": 0.2929, "step": 13375 }, { "epoch": 0.6265985852813042, "grad_norm": 0.6007237519244517, "learning_rate": 4.0194637236893984e-06, "loss": 0.2972, "step": 13376 }, { "epoch": 0.6266454302712325, "grad_norm": 0.5831626944676784, "learning_rate": 4.019313117896414e-06, "loss": 0.2759, "step": 13377 }, { "epoch": 0.6266922752611608, "grad_norm": 0.5967845032242236, "learning_rate": 4.019162503360165e-06, "loss": 0.2663, "step": 13378 }, { "epoch": 0.6267391202510891, "grad_norm": 0.6350376886415156, "learning_rate": 4.019011880081517e-06, "loss": 0.2965, "step": 13379 }, { "epoch": 0.6267859652410175, "grad_norm": 0.6108880857292627, "learning_rate": 4.018861248061338e-06, "loss": 0.3111, "step": 13380 }, { "epoch": 0.6268328102309458, "grad_norm": 0.6467318830217871, "learning_rate": 4.018710607300494e-06, "loss": 0.2893, "step": 13381 }, { "epoch": 0.6268796552208741, "grad_norm": 0.5921160375712868, "learning_rate": 4.018559957799851e-06, "loss": 0.2803, "step": 13382 }, { "epoch": 0.6269265002108024, "grad_norm": 0.5930329967460541, "learning_rate": 4.018409299560279e-06, "loss": 0.2841, "step": 13383 }, { "epoch": 0.6269733452007308, "grad_norm": 0.5913240032308705, "learning_rate": 4.018258632582641e-06, "loss": 0.2626, "step": 13384 }, { "epoch": 0.6270201901906591, "grad_norm": 0.6212670240665388, "learning_rate": 4.018107956867808e-06, "loss": 0.2772, "step": 13385 }, { "epoch": 0.6270670351805875, "grad_norm": 0.6029098339712363, "learning_rate": 4.017957272416645e-06, "loss": 0.2729, "step": 13386 }, { "epoch": 0.6271138801705157, "grad_norm": 0.6547058524904555, "learning_rate": 4.017806579230019e-06, "loss": 0.2947, "step": 13387 }, { "epoch": 0.6271607251604441, "grad_norm": 0.5827044360341661, "learning_rate": 4.017655877308797e-06, "loss": 0.2814, "step": 13388 }, { "epoch": 0.6272075701503724, "grad_norm": 0.5967776832107701, "learning_rate": 4.017505166653848e-06, "loss": 0.28, "step": 13389 }, { "epoch": 0.6272544151403008, "grad_norm": 0.5682619906788402, "learning_rate": 4.017354447266037e-06, "loss": 0.2804, "step": 13390 }, { "epoch": 0.627301260130229, "grad_norm": 0.5718545616231673, "learning_rate": 4.017203719146234e-06, "loss": 0.282, "step": 13391 }, { "epoch": 0.6273481051201574, "grad_norm": 0.616515803126268, "learning_rate": 4.0170529822953044e-06, "loss": 0.2935, "step": 13392 }, { "epoch": 0.6273949501100857, "grad_norm": 0.5955151333921938, "learning_rate": 4.016902236714115e-06, "loss": 0.2813, "step": 13393 }, { "epoch": 0.6274417951000141, "grad_norm": 0.700337490148713, "learning_rate": 4.016751482403536e-06, "loss": 0.2859, "step": 13394 }, { "epoch": 0.6274886400899424, "grad_norm": 0.5727998112777919, "learning_rate": 4.016600719364433e-06, "loss": 0.2953, "step": 13395 }, { "epoch": 0.6275354850798707, "grad_norm": 0.62565346025595, "learning_rate": 4.0164499475976735e-06, "loss": 0.2797, "step": 13396 }, { "epoch": 0.627582330069799, "grad_norm": 0.5820906251218072, "learning_rate": 4.016299167104127e-06, "loss": 0.2812, "step": 13397 }, { "epoch": 0.6276291750597274, "grad_norm": 0.6595592584066262, "learning_rate": 4.016148377884659e-06, "loss": 0.3085, "step": 13398 }, { "epoch": 0.6276760200496557, "grad_norm": 0.5751463332684742, "learning_rate": 4.015997579940138e-06, "loss": 0.2604, "step": 13399 }, { "epoch": 0.627722865039584, "grad_norm": 0.5846286582867157, "learning_rate": 4.015846773271432e-06, "loss": 0.3012, "step": 13400 }, { "epoch": 0.6277697100295123, "grad_norm": 0.592751501798231, "learning_rate": 4.015695957879409e-06, "loss": 0.2751, "step": 13401 }, { "epoch": 0.6278165550194407, "grad_norm": 0.6754506638440773, "learning_rate": 4.015545133764937e-06, "loss": 0.2924, "step": 13402 }, { "epoch": 0.627863400009369, "grad_norm": 0.5861875109504777, "learning_rate": 4.015394300928883e-06, "loss": 0.2734, "step": 13403 }, { "epoch": 0.6279102449992974, "grad_norm": 0.5908292853325734, "learning_rate": 4.015243459372116e-06, "loss": 0.2923, "step": 13404 }, { "epoch": 0.6279570899892256, "grad_norm": 0.5965575218804902, "learning_rate": 4.015092609095504e-06, "loss": 0.2806, "step": 13405 }, { "epoch": 0.628003934979154, "grad_norm": 0.6026995783750434, "learning_rate": 4.014941750099915e-06, "loss": 0.3046, "step": 13406 }, { "epoch": 0.6280507799690823, "grad_norm": 0.5922587290380575, "learning_rate": 4.0147908823862155e-06, "loss": 0.2671, "step": 13407 }, { "epoch": 0.6280976249590107, "grad_norm": 0.555945783607279, "learning_rate": 4.014640005955277e-06, "loss": 0.2706, "step": 13408 }, { "epoch": 0.6281444699489389, "grad_norm": 0.6653421288657361, "learning_rate": 4.014489120807965e-06, "loss": 0.3108, "step": 13409 }, { "epoch": 0.6281913149388673, "grad_norm": 0.6405175760162262, "learning_rate": 4.014338226945149e-06, "loss": 0.2836, "step": 13410 }, { "epoch": 0.6282381599287956, "grad_norm": 0.5555732123877516, "learning_rate": 4.014187324367698e-06, "loss": 0.2765, "step": 13411 }, { "epoch": 0.628285004918724, "grad_norm": 0.5611347560314626, "learning_rate": 4.014036413076478e-06, "loss": 0.2701, "step": 13412 }, { "epoch": 0.6283318499086523, "grad_norm": 0.5584476226511447, "learning_rate": 4.01388549307236e-06, "loss": 0.2643, "step": 13413 }, { "epoch": 0.6283786948985806, "grad_norm": 0.558799767140567, "learning_rate": 4.013734564356211e-06, "loss": 0.2814, "step": 13414 }, { "epoch": 0.6284255398885089, "grad_norm": 0.5577944353718758, "learning_rate": 4.0135836269289e-06, "loss": 0.2707, "step": 13415 }, { "epoch": 0.6284723848784373, "grad_norm": 0.56434817867159, "learning_rate": 4.013432680791296e-06, "loss": 0.2855, "step": 13416 }, { "epoch": 0.6285192298683656, "grad_norm": 0.6104211931554142, "learning_rate": 4.013281725944267e-06, "loss": 0.2807, "step": 13417 }, { "epoch": 0.6285660748582939, "grad_norm": 0.6072082741446629, "learning_rate": 4.013130762388682e-06, "loss": 0.2805, "step": 13418 }, { "epoch": 0.6286129198482222, "grad_norm": 0.6138185807626451, "learning_rate": 4.01297979012541e-06, "loss": 0.2927, "step": 13419 }, { "epoch": 0.6286597648381506, "grad_norm": 0.6110203821702926, "learning_rate": 4.012828809155319e-06, "loss": 0.2952, "step": 13420 }, { "epoch": 0.6287066098280789, "grad_norm": 0.5778103224674105, "learning_rate": 4.012677819479279e-06, "loss": 0.2742, "step": 13421 }, { "epoch": 0.6287534548180073, "grad_norm": 0.5762713322593542, "learning_rate": 4.012526821098159e-06, "loss": 0.2801, "step": 13422 }, { "epoch": 0.6288002998079355, "grad_norm": 0.5917224195358715, "learning_rate": 4.012375814012826e-06, "loss": 0.2854, "step": 13423 }, { "epoch": 0.6288471447978639, "grad_norm": 0.5525129547573797, "learning_rate": 4.012224798224151e-06, "loss": 0.2593, "step": 13424 }, { "epoch": 0.6288939897877922, "grad_norm": 0.6714369656516362, "learning_rate": 4.0120737737330025e-06, "loss": 0.2947, "step": 13425 }, { "epoch": 0.6289408347777206, "grad_norm": 0.5499358727113299, "learning_rate": 4.011922740540249e-06, "loss": 0.2821, "step": 13426 }, { "epoch": 0.6289876797676488, "grad_norm": 0.5998997879651853, "learning_rate": 4.011771698646761e-06, "loss": 0.2769, "step": 13427 }, { "epoch": 0.6290345247575772, "grad_norm": 0.5346831236812327, "learning_rate": 4.0116206480534065e-06, "loss": 0.2708, "step": 13428 }, { "epoch": 0.6290813697475055, "grad_norm": 0.5958754534723898, "learning_rate": 4.011469588761055e-06, "loss": 0.2852, "step": 13429 }, { "epoch": 0.6291282147374339, "grad_norm": 0.6090362701817845, "learning_rate": 4.011318520770575e-06, "loss": 0.3081, "step": 13430 }, { "epoch": 0.6291750597273622, "grad_norm": 0.6031617409842273, "learning_rate": 4.011167444082838e-06, "loss": 0.2624, "step": 13431 }, { "epoch": 0.6292219047172904, "grad_norm": 0.5914781746131558, "learning_rate": 4.011016358698712e-06, "loss": 0.2984, "step": 13432 }, { "epoch": 0.6292687497072188, "grad_norm": 0.574215447288072, "learning_rate": 4.010865264619066e-06, "loss": 0.267, "step": 13433 }, { "epoch": 0.6293155946971472, "grad_norm": 0.644722105679829, "learning_rate": 4.01071416184477e-06, "loss": 0.3015, "step": 13434 }, { "epoch": 0.6293624396870755, "grad_norm": 0.5639435364524175, "learning_rate": 4.010563050376695e-06, "loss": 0.2617, "step": 13435 }, { "epoch": 0.6294092846770037, "grad_norm": 0.6175174636243964, "learning_rate": 4.0104119302157076e-06, "loss": 0.2932, "step": 13436 }, { "epoch": 0.6294561296669321, "grad_norm": 0.5799331766592039, "learning_rate": 4.010260801362681e-06, "loss": 0.2851, "step": 13437 }, { "epoch": 0.6295029746568604, "grad_norm": 0.5639789527366849, "learning_rate": 4.010109663818482e-06, "loss": 0.2646, "step": 13438 }, { "epoch": 0.6295498196467888, "grad_norm": 0.6436501496857111, "learning_rate": 4.009958517583982e-06, "loss": 0.2974, "step": 13439 }, { "epoch": 0.6295966646367172, "grad_norm": 0.5941812523963078, "learning_rate": 4.009807362660049e-06, "loss": 0.3012, "step": 13440 }, { "epoch": 0.6296435096266454, "grad_norm": 0.5620824428036286, "learning_rate": 4.009656199047555e-06, "loss": 0.2766, "step": 13441 }, { "epoch": 0.6296903546165737, "grad_norm": 0.5363511585116527, "learning_rate": 4.009505026747369e-06, "loss": 0.2746, "step": 13442 }, { "epoch": 0.6297371996065021, "grad_norm": 0.5769076245073632, "learning_rate": 4.009353845760361e-06, "loss": 0.2843, "step": 13443 }, { "epoch": 0.6297840445964304, "grad_norm": 0.5637890827741661, "learning_rate": 4.009202656087401e-06, "loss": 0.2774, "step": 13444 }, { "epoch": 0.6298308895863587, "grad_norm": 0.702623373720875, "learning_rate": 4.009051457729359e-06, "loss": 0.2731, "step": 13445 }, { "epoch": 0.629877734576287, "grad_norm": 0.5377547152895874, "learning_rate": 4.008900250687105e-06, "loss": 0.2748, "step": 13446 }, { "epoch": 0.6299245795662154, "grad_norm": 0.631667198092289, "learning_rate": 4.0087490349615095e-06, "loss": 0.2894, "step": 13447 }, { "epoch": 0.6299714245561437, "grad_norm": 0.6276669719963137, "learning_rate": 4.0085978105534415e-06, "loss": 0.2967, "step": 13448 }, { "epoch": 0.6300182695460721, "grad_norm": 0.6417431219908301, "learning_rate": 4.008446577463774e-06, "loss": 0.2965, "step": 13449 }, { "epoch": 0.6300651145360003, "grad_norm": 0.6070392980618646, "learning_rate": 4.008295335693374e-06, "loss": 0.2946, "step": 13450 }, { "epoch": 0.6301119595259287, "grad_norm": 0.5905343646207566, "learning_rate": 4.008144085243115e-06, "loss": 0.2832, "step": 13451 }, { "epoch": 0.630158804515857, "grad_norm": 0.6049353658192176, "learning_rate": 4.007992826113865e-06, "loss": 0.2765, "step": 13452 }, { "epoch": 0.6302056495057854, "grad_norm": 0.6314673874301299, "learning_rate": 4.007841558306495e-06, "loss": 0.3039, "step": 13453 }, { "epoch": 0.6302524944957136, "grad_norm": 0.6266966869895335, "learning_rate": 4.0076902818218765e-06, "loss": 0.2784, "step": 13454 }, { "epoch": 0.630299339485642, "grad_norm": 0.58895449215932, "learning_rate": 4.007538996660879e-06, "loss": 0.2593, "step": 13455 }, { "epoch": 0.6303461844755703, "grad_norm": 0.5861918626502244, "learning_rate": 4.0073877028243736e-06, "loss": 0.2948, "step": 13456 }, { "epoch": 0.6303930294654987, "grad_norm": 0.5581139779107799, "learning_rate": 4.0072364003132305e-06, "loss": 0.2667, "step": 13457 }, { "epoch": 0.630439874455427, "grad_norm": 0.5967124816346581, "learning_rate": 4.007085089128321e-06, "loss": 0.2994, "step": 13458 }, { "epoch": 0.6304867194453553, "grad_norm": 0.5609532325477534, "learning_rate": 4.006933769270516e-06, "loss": 0.2724, "step": 13459 }, { "epoch": 0.6305335644352836, "grad_norm": 0.5883169952341772, "learning_rate": 4.006782440740686e-06, "loss": 0.2917, "step": 13460 }, { "epoch": 0.630580409425212, "grad_norm": 0.6052695121679714, "learning_rate": 4.0066311035396995e-06, "loss": 0.2973, "step": 13461 }, { "epoch": 0.6306272544151403, "grad_norm": 0.6144474532022537, "learning_rate": 4.0064797576684325e-06, "loss": 0.296, "step": 13462 }, { "epoch": 0.6306740994050686, "grad_norm": 0.6662859626492847, "learning_rate": 4.006328403127752e-06, "loss": 0.311, "step": 13463 }, { "epoch": 0.6307209443949969, "grad_norm": 0.6087945463064695, "learning_rate": 4.00617703991853e-06, "loss": 0.2671, "step": 13464 }, { "epoch": 0.6307677893849253, "grad_norm": 0.6472807260033346, "learning_rate": 4.006025668041638e-06, "loss": 0.2786, "step": 13465 }, { "epoch": 0.6308146343748536, "grad_norm": 0.5656837528188728, "learning_rate": 4.005874287497947e-06, "loss": 0.2784, "step": 13466 }, { "epoch": 0.630861479364782, "grad_norm": 0.5609509035864307, "learning_rate": 4.005722898288328e-06, "loss": 0.2945, "step": 13467 }, { "epoch": 0.6309083243547102, "grad_norm": 0.5887713967300103, "learning_rate": 4.005571500413652e-06, "loss": 0.2981, "step": 13468 }, { "epoch": 0.6309551693446386, "grad_norm": 0.5767550377695851, "learning_rate": 4.00542009387479e-06, "loss": 0.2903, "step": 13469 }, { "epoch": 0.6310020143345669, "grad_norm": 0.6921117881514277, "learning_rate": 4.005268678672613e-06, "loss": 0.299, "step": 13470 }, { "epoch": 0.6310488593244953, "grad_norm": 0.5812314345251801, "learning_rate": 4.005117254807995e-06, "loss": 0.2628, "step": 13471 }, { "epoch": 0.6310957043144235, "grad_norm": 0.593269403414059, "learning_rate": 4.004965822281804e-06, "loss": 0.2802, "step": 13472 }, { "epoch": 0.6311425493043519, "grad_norm": 0.6495264173189186, "learning_rate": 4.004814381094914e-06, "loss": 0.3047, "step": 13473 }, { "epoch": 0.6311893942942802, "grad_norm": 0.6218579208158038, "learning_rate": 4.004662931248195e-06, "loss": 0.285, "step": 13474 }, { "epoch": 0.6312362392842086, "grad_norm": 0.5288811386720272, "learning_rate": 4.00451147274252e-06, "loss": 0.2642, "step": 13475 }, { "epoch": 0.6312830842741369, "grad_norm": 0.5994426568197762, "learning_rate": 4.004360005578758e-06, "loss": 0.3002, "step": 13476 }, { "epoch": 0.6313299292640652, "grad_norm": 0.619739043459857, "learning_rate": 4.004208529757782e-06, "loss": 0.2779, "step": 13477 }, { "epoch": 0.6313767742539935, "grad_norm": 0.6049252987629801, "learning_rate": 4.004057045280466e-06, "loss": 0.2683, "step": 13478 }, { "epoch": 0.6314236192439219, "grad_norm": 0.6145223671982163, "learning_rate": 4.003905552147677e-06, "loss": 0.2673, "step": 13479 }, { "epoch": 0.6314704642338502, "grad_norm": 0.6557106182885144, "learning_rate": 4.003754050360292e-06, "loss": 0.2865, "step": 13480 }, { "epoch": 0.6315173092237785, "grad_norm": 0.7651876923156373, "learning_rate": 4.00360253991918e-06, "loss": 0.3309, "step": 13481 }, { "epoch": 0.6315641542137068, "grad_norm": 0.5382834846275567, "learning_rate": 4.003451020825212e-06, "loss": 0.2767, "step": 13482 }, { "epoch": 0.6316109992036352, "grad_norm": 0.5910135476807667, "learning_rate": 4.003299493079263e-06, "loss": 0.2755, "step": 13483 }, { "epoch": 0.6316578441935635, "grad_norm": 0.6016467167885721, "learning_rate": 4.003147956682203e-06, "loss": 0.2977, "step": 13484 }, { "epoch": 0.6317046891834919, "grad_norm": 0.6282632915224092, "learning_rate": 4.002996411634903e-06, "loss": 0.2752, "step": 13485 }, { "epoch": 0.6317515341734201, "grad_norm": 0.6262806727910842, "learning_rate": 4.002844857938238e-06, "loss": 0.2801, "step": 13486 }, { "epoch": 0.6317983791633485, "grad_norm": 0.6160040918183494, "learning_rate": 4.002693295593077e-06, "loss": 0.282, "step": 13487 }, { "epoch": 0.6318452241532768, "grad_norm": 0.5819747326590022, "learning_rate": 4.002541724600294e-06, "loss": 0.2739, "step": 13488 }, { "epoch": 0.6318920691432052, "grad_norm": 0.5805584587130513, "learning_rate": 4.002390144960761e-06, "loss": 0.2739, "step": 13489 }, { "epoch": 0.6319389141331334, "grad_norm": 0.6501929753131633, "learning_rate": 4.002238556675352e-06, "loss": 0.3072, "step": 13490 }, { "epoch": 0.6319857591230618, "grad_norm": 0.6140044121412301, "learning_rate": 4.002086959744936e-06, "loss": 0.2697, "step": 13491 }, { "epoch": 0.6320326041129901, "grad_norm": 0.5755297930802975, "learning_rate": 4.001935354170387e-06, "loss": 0.2704, "step": 13492 }, { "epoch": 0.6320794491029185, "grad_norm": 0.591494286938117, "learning_rate": 4.001783739952578e-06, "loss": 0.3035, "step": 13493 }, { "epoch": 0.6321262940928468, "grad_norm": 0.6396492816670062, "learning_rate": 4.001632117092382e-06, "loss": 0.2951, "step": 13494 }, { "epoch": 0.6321731390827751, "grad_norm": 0.6034903788153965, "learning_rate": 4.00148048559067e-06, "loss": 0.2921, "step": 13495 }, { "epoch": 0.6322199840727034, "grad_norm": 0.6692015766663686, "learning_rate": 4.001328845448315e-06, "loss": 0.2774, "step": 13496 }, { "epoch": 0.6322668290626318, "grad_norm": 0.6062334792523063, "learning_rate": 4.0011771966661895e-06, "loss": 0.2708, "step": 13497 }, { "epoch": 0.6323136740525601, "grad_norm": 0.65313229183533, "learning_rate": 4.001025539245167e-06, "loss": 0.3151, "step": 13498 }, { "epoch": 0.6323605190424884, "grad_norm": 0.5882059363841519, "learning_rate": 4.000873873186119e-06, "loss": 0.2829, "step": 13499 }, { "epoch": 0.6324073640324167, "grad_norm": 0.5706511095231241, "learning_rate": 4.0007221984899195e-06, "loss": 0.2852, "step": 13500 }, { "epoch": 0.6324542090223451, "grad_norm": 0.6014758619776526, "learning_rate": 4.0005705151574416e-06, "loss": 0.2809, "step": 13501 }, { "epoch": 0.6325010540122734, "grad_norm": 0.56756271714323, "learning_rate": 4.000418823189557e-06, "loss": 0.2519, "step": 13502 }, { "epoch": 0.6325478990022018, "grad_norm": 0.6191295488398842, "learning_rate": 4.000267122587139e-06, "loss": 0.2841, "step": 13503 }, { "epoch": 0.63259474399213, "grad_norm": 0.636666098168621, "learning_rate": 4.000115413351061e-06, "loss": 0.2815, "step": 13504 }, { "epoch": 0.6326415889820584, "grad_norm": 0.5781063281048648, "learning_rate": 3.999963695482196e-06, "loss": 0.2827, "step": 13505 }, { "epoch": 0.6326884339719867, "grad_norm": 0.6022296438040456, "learning_rate": 3.999811968981416e-06, "loss": 0.2921, "step": 13506 }, { "epoch": 0.6327352789619151, "grad_norm": 0.543754659453636, "learning_rate": 3.999660233849597e-06, "loss": 0.2618, "step": 13507 }, { "epoch": 0.6327821239518433, "grad_norm": 0.576135839708898, "learning_rate": 3.999508490087609e-06, "loss": 0.302, "step": 13508 }, { "epoch": 0.6328289689417717, "grad_norm": 0.5920010721083568, "learning_rate": 3.999356737696327e-06, "loss": 0.2785, "step": 13509 }, { "epoch": 0.6328758139317, "grad_norm": 0.5848178259920711, "learning_rate": 3.999204976676623e-06, "loss": 0.2807, "step": 13510 }, { "epoch": 0.6329226589216284, "grad_norm": 0.5738941650220426, "learning_rate": 3.999053207029372e-06, "loss": 0.2827, "step": 13511 }, { "epoch": 0.6329695039115567, "grad_norm": 0.6127696757281833, "learning_rate": 3.9989014287554454e-06, "loss": 0.2749, "step": 13512 }, { "epoch": 0.633016348901485, "grad_norm": 0.6606518039813009, "learning_rate": 3.99874964185572e-06, "loss": 0.3002, "step": 13513 }, { "epoch": 0.6330631938914133, "grad_norm": 0.605210942916301, "learning_rate": 3.998597846331065e-06, "loss": 0.2862, "step": 13514 }, { "epoch": 0.6331100388813417, "grad_norm": 0.5406980756496359, "learning_rate": 3.9984460421823565e-06, "loss": 0.2515, "step": 13515 }, { "epoch": 0.63315688387127, "grad_norm": 0.6366887388404707, "learning_rate": 3.998294229410468e-06, "loss": 0.2809, "step": 13516 }, { "epoch": 0.6332037288611982, "grad_norm": 0.6353427344427809, "learning_rate": 3.998142408016274e-06, "loss": 0.2804, "step": 13517 }, { "epoch": 0.6332505738511266, "grad_norm": 0.5369749244423937, "learning_rate": 3.997990578000645e-06, "loss": 0.2726, "step": 13518 }, { "epoch": 0.633297418841055, "grad_norm": 0.5824430802941932, "learning_rate": 3.997838739364457e-06, "loss": 0.2943, "step": 13519 }, { "epoch": 0.6333442638309833, "grad_norm": 0.5567743568610243, "learning_rate": 3.997686892108585e-06, "loss": 0.2696, "step": 13520 }, { "epoch": 0.6333911088209117, "grad_norm": 0.6296529169000633, "learning_rate": 3.9975350362338996e-06, "loss": 0.2868, "step": 13521 }, { "epoch": 0.6334379538108399, "grad_norm": 0.5842509694897335, "learning_rate": 3.997383171741277e-06, "loss": 0.269, "step": 13522 }, { "epoch": 0.6334847988007682, "grad_norm": 0.5978432188182831, "learning_rate": 3.997231298631591e-06, "loss": 0.2709, "step": 13523 }, { "epoch": 0.6335316437906966, "grad_norm": 0.6197767270988975, "learning_rate": 3.997079416905715e-06, "loss": 0.3135, "step": 13524 }, { "epoch": 0.633578488780625, "grad_norm": 0.5745882829159692, "learning_rate": 3.996927526564524e-06, "loss": 0.2572, "step": 13525 }, { "epoch": 0.6336253337705532, "grad_norm": 0.6364022881551684, "learning_rate": 3.99677562760889e-06, "loss": 0.2857, "step": 13526 }, { "epoch": 0.6336721787604815, "grad_norm": 0.6278842717826054, "learning_rate": 3.996623720039688e-06, "loss": 0.2921, "step": 13527 }, { "epoch": 0.6337190237504099, "grad_norm": 0.609554452689521, "learning_rate": 3.996471803857795e-06, "loss": 0.2928, "step": 13528 }, { "epoch": 0.6337658687403382, "grad_norm": 0.6115148535139294, "learning_rate": 3.996319879064081e-06, "loss": 0.2843, "step": 13529 }, { "epoch": 0.6338127137302666, "grad_norm": 0.6257028028782068, "learning_rate": 3.996167945659423e-06, "loss": 0.2828, "step": 13530 }, { "epoch": 0.6338595587201948, "grad_norm": 0.5939651911837528, "learning_rate": 3.996016003644694e-06, "loss": 0.2674, "step": 13531 }, { "epoch": 0.6339064037101232, "grad_norm": 0.6137104433390635, "learning_rate": 3.995864053020769e-06, "loss": 0.2844, "step": 13532 }, { "epoch": 0.6339532487000515, "grad_norm": 0.5568560556566087, "learning_rate": 3.995712093788523e-06, "loss": 0.2792, "step": 13533 }, { "epoch": 0.6340000936899799, "grad_norm": 0.5985845686783818, "learning_rate": 3.995560125948829e-06, "loss": 0.2883, "step": 13534 }, { "epoch": 0.6340469386799081, "grad_norm": 0.6381041940180352, "learning_rate": 3.995408149502563e-06, "loss": 0.2988, "step": 13535 }, { "epoch": 0.6340937836698365, "grad_norm": 0.5455653659954997, "learning_rate": 3.995256164450598e-06, "loss": 0.2811, "step": 13536 }, { "epoch": 0.6341406286597648, "grad_norm": 0.6031598529906301, "learning_rate": 3.995104170793811e-06, "loss": 0.2768, "step": 13537 }, { "epoch": 0.6341874736496932, "grad_norm": 0.6611464821491059, "learning_rate": 3.994952168533074e-06, "loss": 0.302, "step": 13538 }, { "epoch": 0.6342343186396215, "grad_norm": 0.6072411716175559, "learning_rate": 3.994800157669264e-06, "loss": 0.2833, "step": 13539 }, { "epoch": 0.6342811636295498, "grad_norm": 0.6331759998189366, "learning_rate": 3.994648138203255e-06, "loss": 0.2878, "step": 13540 }, { "epoch": 0.6343280086194781, "grad_norm": 0.6030567101613081, "learning_rate": 3.99449611013592e-06, "loss": 0.2975, "step": 13541 }, { "epoch": 0.6343748536094065, "grad_norm": 0.6443621987990763, "learning_rate": 3.9943440734681375e-06, "loss": 0.3029, "step": 13542 }, { "epoch": 0.6344216985993348, "grad_norm": 0.5486182323356932, "learning_rate": 3.994192028200778e-06, "loss": 0.279, "step": 13543 }, { "epoch": 0.6344685435892631, "grad_norm": 0.5813270012987176, "learning_rate": 3.994039974334722e-06, "loss": 0.2753, "step": 13544 }, { "epoch": 0.6345153885791914, "grad_norm": 0.5665300614046657, "learning_rate": 3.993887911870839e-06, "loss": 0.2849, "step": 13545 }, { "epoch": 0.6345622335691198, "grad_norm": 0.6119445485141667, "learning_rate": 3.993735840810007e-06, "loss": 0.2863, "step": 13546 }, { "epoch": 0.6346090785590481, "grad_norm": 0.5707755878019761, "learning_rate": 3.993583761153101e-06, "loss": 0.2733, "step": 13547 }, { "epoch": 0.6346559235489765, "grad_norm": 0.5922130276071528, "learning_rate": 3.993431672900996e-06, "loss": 0.2807, "step": 13548 }, { "epoch": 0.6347027685389047, "grad_norm": 0.6307811670652228, "learning_rate": 3.9932795760545675e-06, "loss": 0.3154, "step": 13549 }, { "epoch": 0.6347496135288331, "grad_norm": 0.638807858073789, "learning_rate": 3.99312747061469e-06, "loss": 0.2957, "step": 13550 }, { "epoch": 0.6347964585187614, "grad_norm": 0.5738049307506546, "learning_rate": 3.992975356582239e-06, "loss": 0.2815, "step": 13551 }, { "epoch": 0.6348433035086898, "grad_norm": 0.6264068414762141, "learning_rate": 3.99282323395809e-06, "loss": 0.2956, "step": 13552 }, { "epoch": 0.634890148498618, "grad_norm": 0.5761108786898174, "learning_rate": 3.992671102743118e-06, "loss": 0.294, "step": 13553 }, { "epoch": 0.6349369934885464, "grad_norm": 0.6785170625919764, "learning_rate": 3.9925189629382e-06, "loss": 0.2996, "step": 13554 }, { "epoch": 0.6349838384784747, "grad_norm": 0.5757293301610611, "learning_rate": 3.99236681454421e-06, "loss": 0.2762, "step": 13555 }, { "epoch": 0.6350306834684031, "grad_norm": 0.6170884775272842, "learning_rate": 3.992214657562025e-06, "loss": 0.277, "step": 13556 }, { "epoch": 0.6350775284583314, "grad_norm": 0.5585421223219208, "learning_rate": 3.992062491992518e-06, "loss": 0.2858, "step": 13557 }, { "epoch": 0.6351243734482597, "grad_norm": 0.6005658111626759, "learning_rate": 3.991910317836568e-06, "loss": 0.2782, "step": 13558 }, { "epoch": 0.635171218438188, "grad_norm": 0.6648126639535975, "learning_rate": 3.991758135095048e-06, "loss": 0.302, "step": 13559 }, { "epoch": 0.6352180634281164, "grad_norm": 0.5769718867102429, "learning_rate": 3.991605943768835e-06, "loss": 0.2664, "step": 13560 }, { "epoch": 0.6352649084180447, "grad_norm": 0.6351432825322214, "learning_rate": 3.9914537438588044e-06, "loss": 0.2774, "step": 13561 }, { "epoch": 0.635311753407973, "grad_norm": 0.6292124468603228, "learning_rate": 3.991301535365834e-06, "loss": 0.3015, "step": 13562 }, { "epoch": 0.6353585983979013, "grad_norm": 0.6055895844561452, "learning_rate": 3.991149318290797e-06, "loss": 0.2921, "step": 13563 }, { "epoch": 0.6354054433878297, "grad_norm": 0.5832691290184688, "learning_rate": 3.99099709263457e-06, "loss": 0.2882, "step": 13564 }, { "epoch": 0.635452288377758, "grad_norm": 0.704301681106654, "learning_rate": 3.99084485839803e-06, "loss": 0.2941, "step": 13565 }, { "epoch": 0.6354991333676864, "grad_norm": 0.5815159935180889, "learning_rate": 3.990692615582053e-06, "loss": 0.2872, "step": 13566 }, { "epoch": 0.6355459783576146, "grad_norm": 0.5853209590967245, "learning_rate": 3.990540364187513e-06, "loss": 0.2862, "step": 13567 }, { "epoch": 0.635592823347543, "grad_norm": 0.5665036536131398, "learning_rate": 3.9903881042152895e-06, "loss": 0.2785, "step": 13568 }, { "epoch": 0.6356396683374713, "grad_norm": 0.6141837217152616, "learning_rate": 3.990235835666257e-06, "loss": 0.2898, "step": 13569 }, { "epoch": 0.6356865133273997, "grad_norm": 0.615822722756833, "learning_rate": 3.99008355854129e-06, "loss": 0.2748, "step": 13570 }, { "epoch": 0.6357333583173279, "grad_norm": 0.5345078563995632, "learning_rate": 3.9899312728412685e-06, "loss": 0.2602, "step": 13571 }, { "epoch": 0.6357802033072563, "grad_norm": 0.5816726871344752, "learning_rate": 3.989778978567067e-06, "loss": 0.2886, "step": 13572 }, { "epoch": 0.6358270482971846, "grad_norm": 0.6187811211002611, "learning_rate": 3.9896266757195615e-06, "loss": 0.3019, "step": 13573 }, { "epoch": 0.635873893287113, "grad_norm": 0.6396998844826765, "learning_rate": 3.989474364299628e-06, "loss": 0.3132, "step": 13574 }, { "epoch": 0.6359207382770413, "grad_norm": 0.5505776372040209, "learning_rate": 3.989322044308145e-06, "loss": 0.2757, "step": 13575 }, { "epoch": 0.6359675832669696, "grad_norm": 0.613226114642687, "learning_rate": 3.989169715745987e-06, "loss": 0.2848, "step": 13576 }, { "epoch": 0.6360144282568979, "grad_norm": 0.6115770720502782, "learning_rate": 3.989017378614033e-06, "loss": 0.2877, "step": 13577 }, { "epoch": 0.6360612732468263, "grad_norm": 0.6053576172644188, "learning_rate": 3.988865032913158e-06, "loss": 0.2948, "step": 13578 }, { "epoch": 0.6361081182367546, "grad_norm": 0.561562172844347, "learning_rate": 3.988712678644237e-06, "loss": 0.2747, "step": 13579 }, { "epoch": 0.6361549632266829, "grad_norm": 0.6065668059148476, "learning_rate": 3.988560315808151e-06, "loss": 0.2821, "step": 13580 }, { "epoch": 0.6362018082166112, "grad_norm": 0.5884083726246417, "learning_rate": 3.9884079444057735e-06, "loss": 0.298, "step": 13581 }, { "epoch": 0.6362486532065396, "grad_norm": 0.5687490825624991, "learning_rate": 3.988255564437982e-06, "loss": 0.2764, "step": 13582 }, { "epoch": 0.6362954981964679, "grad_norm": 0.6095161081145508, "learning_rate": 3.9881031759056546e-06, "loss": 0.2976, "step": 13583 }, { "epoch": 0.6363423431863963, "grad_norm": 0.6501318349567687, "learning_rate": 3.9879507788096675e-06, "loss": 0.3048, "step": 13584 }, { "epoch": 0.6363891881763245, "grad_norm": 0.618312718153338, "learning_rate": 3.9877983731508965e-06, "loss": 0.2763, "step": 13585 }, { "epoch": 0.6364360331662529, "grad_norm": 0.5715652699178854, "learning_rate": 3.987645958930221e-06, "loss": 0.2787, "step": 13586 }, { "epoch": 0.6364828781561812, "grad_norm": 0.5882242142947945, "learning_rate": 3.987493536148517e-06, "loss": 0.2662, "step": 13587 }, { "epoch": 0.6365297231461096, "grad_norm": 0.6539191643991138, "learning_rate": 3.98734110480666e-06, "loss": 0.2844, "step": 13588 }, { "epoch": 0.6365765681360378, "grad_norm": 0.6391356542244098, "learning_rate": 3.98718866490553e-06, "loss": 0.2915, "step": 13589 }, { "epoch": 0.6366234131259662, "grad_norm": 0.5825142163373332, "learning_rate": 3.987036216446003e-06, "loss": 0.2899, "step": 13590 }, { "epoch": 0.6366702581158945, "grad_norm": 0.6190776544111217, "learning_rate": 3.986883759428957e-06, "loss": 0.3075, "step": 13591 }, { "epoch": 0.6367171031058229, "grad_norm": 0.6335015528257926, "learning_rate": 3.986731293855267e-06, "loss": 0.3007, "step": 13592 }, { "epoch": 0.6367639480957512, "grad_norm": 0.5633017248782368, "learning_rate": 3.986578819725813e-06, "loss": 0.2738, "step": 13593 }, { "epoch": 0.6368107930856794, "grad_norm": 0.6296944776261448, "learning_rate": 3.986426337041471e-06, "loss": 0.298, "step": 13594 }, { "epoch": 0.6368576380756078, "grad_norm": 0.5717624567109663, "learning_rate": 3.98627384580312e-06, "loss": 0.278, "step": 13595 }, { "epoch": 0.6369044830655362, "grad_norm": 0.6042967603637942, "learning_rate": 3.9861213460116365e-06, "loss": 0.2791, "step": 13596 }, { "epoch": 0.6369513280554645, "grad_norm": 0.6032032268104573, "learning_rate": 3.985968837667897e-06, "loss": 0.3045, "step": 13597 }, { "epoch": 0.6369981730453927, "grad_norm": 0.5326502992442262, "learning_rate": 3.985816320772782e-06, "loss": 0.2656, "step": 13598 }, { "epoch": 0.6370450180353211, "grad_norm": 0.5854694275641019, "learning_rate": 3.985663795327166e-06, "loss": 0.2745, "step": 13599 }, { "epoch": 0.6370918630252494, "grad_norm": 0.6247614720255713, "learning_rate": 3.985511261331929e-06, "loss": 0.2827, "step": 13600 }, { "epoch": 0.6371387080151778, "grad_norm": 0.5846005885721878, "learning_rate": 3.985358718787948e-06, "loss": 0.2888, "step": 13601 }, { "epoch": 0.6371855530051062, "grad_norm": 0.6460815775028379, "learning_rate": 3.985206167696101e-06, "loss": 0.2925, "step": 13602 }, { "epoch": 0.6372323979950344, "grad_norm": 0.5385764994893824, "learning_rate": 3.985053608057266e-06, "loss": 0.2646, "step": 13603 }, { "epoch": 0.6372792429849627, "grad_norm": 0.580543730882628, "learning_rate": 3.98490103987232e-06, "loss": 0.2752, "step": 13604 }, { "epoch": 0.6373260879748911, "grad_norm": 0.6312413586671831, "learning_rate": 3.984748463142142e-06, "loss": 0.2601, "step": 13605 }, { "epoch": 0.6373729329648194, "grad_norm": 0.5815167436524775, "learning_rate": 3.98459587786761e-06, "loss": 0.2813, "step": 13606 }, { "epoch": 0.6374197779547477, "grad_norm": 0.6208479383064052, "learning_rate": 3.984443284049602e-06, "loss": 0.3056, "step": 13607 }, { "epoch": 0.637466622944676, "grad_norm": 0.6420602153652144, "learning_rate": 3.984290681688996e-06, "loss": 0.2786, "step": 13608 }, { "epoch": 0.6375134679346044, "grad_norm": 0.5603479666263114, "learning_rate": 3.984138070786669e-06, "loss": 0.2669, "step": 13609 }, { "epoch": 0.6375603129245327, "grad_norm": 0.5905519573045559, "learning_rate": 3.983985451343502e-06, "loss": 0.2846, "step": 13610 }, { "epoch": 0.6376071579144611, "grad_norm": 0.6519628089295708, "learning_rate": 3.983832823360371e-06, "loss": 0.2886, "step": 13611 }, { "epoch": 0.6376540029043893, "grad_norm": 0.6005739998353733, "learning_rate": 3.983680186838155e-06, "loss": 0.2951, "step": 13612 }, { "epoch": 0.6377008478943177, "grad_norm": 0.5541778239079382, "learning_rate": 3.983527541777732e-06, "loss": 0.2743, "step": 13613 }, { "epoch": 0.637747692884246, "grad_norm": 0.5924870680947313, "learning_rate": 3.983374888179982e-06, "loss": 0.2817, "step": 13614 }, { "epoch": 0.6377945378741744, "grad_norm": 0.5966453170581588, "learning_rate": 3.9832222260457815e-06, "loss": 0.2969, "step": 13615 }, { "epoch": 0.6378413828641026, "grad_norm": 0.5710685627049447, "learning_rate": 3.9830695553760104e-06, "loss": 0.2754, "step": 13616 }, { "epoch": 0.637888227854031, "grad_norm": 0.6206273850935746, "learning_rate": 3.9829168761715464e-06, "loss": 0.2897, "step": 13617 }, { "epoch": 0.6379350728439593, "grad_norm": 0.630201475480145, "learning_rate": 3.982764188433269e-06, "loss": 0.3074, "step": 13618 }, { "epoch": 0.6379819178338877, "grad_norm": 0.6261807829332343, "learning_rate": 3.982611492162055e-06, "loss": 0.3018, "step": 13619 }, { "epoch": 0.638028762823816, "grad_norm": 0.5899178200069201, "learning_rate": 3.982458787358786e-06, "loss": 0.2681, "step": 13620 }, { "epoch": 0.6380756078137443, "grad_norm": 0.6745903205916834, "learning_rate": 3.982306074024338e-06, "loss": 0.2949, "step": 13621 }, { "epoch": 0.6381224528036726, "grad_norm": 0.5690724313495984, "learning_rate": 3.982153352159592e-06, "loss": 0.2819, "step": 13622 }, { "epoch": 0.638169297793601, "grad_norm": 0.5747438351250539, "learning_rate": 3.982000621765426e-06, "loss": 0.288, "step": 13623 }, { "epoch": 0.6382161427835293, "grad_norm": 0.657726904453845, "learning_rate": 3.981847882842719e-06, "loss": 0.2841, "step": 13624 }, { "epoch": 0.6382629877734576, "grad_norm": 0.6077501709268471, "learning_rate": 3.981695135392349e-06, "loss": 0.2889, "step": 13625 }, { "epoch": 0.6383098327633859, "grad_norm": 0.5837134630181094, "learning_rate": 3.981542379415197e-06, "loss": 0.2864, "step": 13626 }, { "epoch": 0.6383566777533143, "grad_norm": 0.589621002854239, "learning_rate": 3.981389614912141e-06, "loss": 0.2674, "step": 13627 }, { "epoch": 0.6384035227432426, "grad_norm": 0.5700641592631721, "learning_rate": 3.9812368418840595e-06, "loss": 0.2766, "step": 13628 }, { "epoch": 0.638450367733171, "grad_norm": 0.5802733882208887, "learning_rate": 3.981084060331832e-06, "loss": 0.2755, "step": 13629 }, { "epoch": 0.6384972127230992, "grad_norm": 0.5858901385564104, "learning_rate": 3.980931270256339e-06, "loss": 0.281, "step": 13630 }, { "epoch": 0.6385440577130276, "grad_norm": 0.5709524403618045, "learning_rate": 3.980778471658458e-06, "loss": 0.2916, "step": 13631 }, { "epoch": 0.6385909027029559, "grad_norm": 0.5842238508282528, "learning_rate": 3.980625664539069e-06, "loss": 0.2906, "step": 13632 }, { "epoch": 0.6386377476928843, "grad_norm": 0.5797049067854314, "learning_rate": 3.980472848899052e-06, "loss": 0.2882, "step": 13633 }, { "epoch": 0.6386845926828125, "grad_norm": 0.6287872855464957, "learning_rate": 3.980320024739286e-06, "loss": 0.3055, "step": 13634 }, { "epoch": 0.6387314376727409, "grad_norm": 0.6466342703266006, "learning_rate": 3.980167192060649e-06, "loss": 0.303, "step": 13635 }, { "epoch": 0.6387782826626692, "grad_norm": 0.594339631124179, "learning_rate": 3.980014350864023e-06, "loss": 0.2861, "step": 13636 }, { "epoch": 0.6388251276525976, "grad_norm": 0.5743789526412927, "learning_rate": 3.979861501150286e-06, "loss": 0.2896, "step": 13637 }, { "epoch": 0.6388719726425259, "grad_norm": 0.6048711678760611, "learning_rate": 3.979708642920318e-06, "loss": 0.2758, "step": 13638 }, { "epoch": 0.6389188176324542, "grad_norm": 0.6093949585548386, "learning_rate": 3.979555776174999e-06, "loss": 0.2917, "step": 13639 }, { "epoch": 0.6389656626223825, "grad_norm": 0.5849740151740979, "learning_rate": 3.9794029009152076e-06, "loss": 0.2851, "step": 13640 }, { "epoch": 0.6390125076123109, "grad_norm": 0.578698075641313, "learning_rate": 3.979250017141826e-06, "loss": 0.2734, "step": 13641 }, { "epoch": 0.6390593526022392, "grad_norm": 0.5625413554231692, "learning_rate": 3.979097124855731e-06, "loss": 0.2821, "step": 13642 }, { "epoch": 0.6391061975921675, "grad_norm": 0.6163962992562717, "learning_rate": 3.978944224057804e-06, "loss": 0.2786, "step": 13643 }, { "epoch": 0.6391530425820958, "grad_norm": 0.6657007966780579, "learning_rate": 3.978791314748924e-06, "loss": 0.2954, "step": 13644 }, { "epoch": 0.6391998875720242, "grad_norm": 0.6528007092019686, "learning_rate": 3.978638396929972e-06, "loss": 0.2956, "step": 13645 }, { "epoch": 0.6392467325619525, "grad_norm": 0.5501880437251718, "learning_rate": 3.978485470601829e-06, "loss": 0.2715, "step": 13646 }, { "epoch": 0.6392935775518809, "grad_norm": 0.6117232701810529, "learning_rate": 3.978332535765373e-06, "loss": 0.2909, "step": 13647 }, { "epoch": 0.6393404225418091, "grad_norm": 0.5610408594500108, "learning_rate": 3.978179592421484e-06, "loss": 0.2932, "step": 13648 }, { "epoch": 0.6393872675317375, "grad_norm": 0.6719499429226643, "learning_rate": 3.978026640571044e-06, "loss": 0.2972, "step": 13649 }, { "epoch": 0.6394341125216658, "grad_norm": 0.5831635808560472, "learning_rate": 3.977873680214932e-06, "loss": 0.2779, "step": 13650 }, { "epoch": 0.6394809575115942, "grad_norm": 0.598715700121829, "learning_rate": 3.977720711354028e-06, "loss": 0.2905, "step": 13651 }, { "epoch": 0.6395278025015224, "grad_norm": 0.5946495618338751, "learning_rate": 3.977567733989213e-06, "loss": 0.2679, "step": 13652 }, { "epoch": 0.6395746474914508, "grad_norm": 0.5815021836691635, "learning_rate": 3.977414748121366e-06, "loss": 0.2931, "step": 13653 }, { "epoch": 0.6396214924813791, "grad_norm": 0.6575442336557421, "learning_rate": 3.97726175375137e-06, "loss": 0.296, "step": 13654 }, { "epoch": 0.6396683374713075, "grad_norm": 0.5757815350729231, "learning_rate": 3.977108750880103e-06, "loss": 0.3053, "step": 13655 }, { "epoch": 0.6397151824612358, "grad_norm": 0.542822242762406, "learning_rate": 3.976955739508447e-06, "loss": 0.2862, "step": 13656 }, { "epoch": 0.6397620274511641, "grad_norm": 0.5834916216115447, "learning_rate": 3.976802719637281e-06, "loss": 0.2717, "step": 13657 }, { "epoch": 0.6398088724410924, "grad_norm": 0.5851475451818385, "learning_rate": 3.976649691267488e-06, "loss": 0.2821, "step": 13658 }, { "epoch": 0.6398557174310208, "grad_norm": 0.5698762841171761, "learning_rate": 3.976496654399946e-06, "loss": 0.2588, "step": 13659 }, { "epoch": 0.6399025624209491, "grad_norm": 0.5554976934799786, "learning_rate": 3.976343609035538e-06, "loss": 0.2808, "step": 13660 }, { "epoch": 0.6399494074108774, "grad_norm": 0.6245752781155823, "learning_rate": 3.9761905551751426e-06, "loss": 0.301, "step": 13661 }, { "epoch": 0.6399962524008057, "grad_norm": 0.5583419543300091, "learning_rate": 3.976037492819641e-06, "loss": 0.2846, "step": 13662 }, { "epoch": 0.6400430973907341, "grad_norm": 0.5618262558730209, "learning_rate": 3.975884421969916e-06, "loss": 0.2885, "step": 13663 }, { "epoch": 0.6400899423806624, "grad_norm": 0.6287890089687284, "learning_rate": 3.975731342626846e-06, "loss": 0.2814, "step": 13664 }, { "epoch": 0.6401367873705908, "grad_norm": 0.6272716831307515, "learning_rate": 3.975578254791313e-06, "loss": 0.2871, "step": 13665 }, { "epoch": 0.640183632360519, "grad_norm": 0.5809951813068265, "learning_rate": 3.9754251584641995e-06, "loss": 0.2914, "step": 13666 }, { "epoch": 0.6402304773504474, "grad_norm": 0.59014507525766, "learning_rate": 3.9752720536463844e-06, "loss": 0.2857, "step": 13667 }, { "epoch": 0.6402773223403757, "grad_norm": 0.6030166678780612, "learning_rate": 3.975118940338749e-06, "loss": 0.3054, "step": 13668 }, { "epoch": 0.6403241673303041, "grad_norm": 0.5858172485617921, "learning_rate": 3.974965818542174e-06, "loss": 0.2934, "step": 13669 }, { "epoch": 0.6403710123202323, "grad_norm": 0.641068149664545, "learning_rate": 3.974812688257544e-06, "loss": 0.2818, "step": 13670 }, { "epoch": 0.6404178573101607, "grad_norm": 0.622363262948413, "learning_rate": 3.974659549485735e-06, "loss": 0.296, "step": 13671 }, { "epoch": 0.640464702300089, "grad_norm": 0.6546039685849948, "learning_rate": 3.9745064022276324e-06, "loss": 0.2776, "step": 13672 }, { "epoch": 0.6405115472900174, "grad_norm": 0.6534865250438423, "learning_rate": 3.974353246484115e-06, "loss": 0.2764, "step": 13673 }, { "epoch": 0.6405583922799457, "grad_norm": 0.6168987313181397, "learning_rate": 3.974200082256067e-06, "loss": 0.2866, "step": 13674 }, { "epoch": 0.640605237269874, "grad_norm": 0.6106053061193233, "learning_rate": 3.974046909544367e-06, "loss": 0.2797, "step": 13675 }, { "epoch": 0.6406520822598023, "grad_norm": 0.5847175650671136, "learning_rate": 3.973893728349897e-06, "loss": 0.2766, "step": 13676 }, { "epoch": 0.6406989272497307, "grad_norm": 0.5695889560964699, "learning_rate": 3.9737405386735395e-06, "loss": 0.2608, "step": 13677 }, { "epoch": 0.640745772239659, "grad_norm": 0.6136368593204978, "learning_rate": 3.973587340516176e-06, "loss": 0.3061, "step": 13678 }, { "epoch": 0.6407926172295872, "grad_norm": 0.5611614803477947, "learning_rate": 3.9734341338786866e-06, "loss": 0.284, "step": 13679 }, { "epoch": 0.6408394622195156, "grad_norm": 0.5764324750186617, "learning_rate": 3.973280918761955e-06, "loss": 0.2842, "step": 13680 }, { "epoch": 0.640886307209444, "grad_norm": 0.5685151567238342, "learning_rate": 3.973127695166862e-06, "loss": 0.2769, "step": 13681 }, { "epoch": 0.6409331521993723, "grad_norm": 0.8105748156084511, "learning_rate": 3.972974463094289e-06, "loss": 0.2871, "step": 13682 }, { "epoch": 0.6409799971893007, "grad_norm": 0.5643489460906184, "learning_rate": 3.972821222545119e-06, "loss": 0.2727, "step": 13683 }, { "epoch": 0.6410268421792289, "grad_norm": 0.6458639266730776, "learning_rate": 3.972667973520232e-06, "loss": 0.3171, "step": 13684 }, { "epoch": 0.6410736871691572, "grad_norm": 0.5661335283559711, "learning_rate": 3.972514716020511e-06, "loss": 0.2754, "step": 13685 }, { "epoch": 0.6411205321590856, "grad_norm": 0.5497925122841858, "learning_rate": 3.972361450046839e-06, "loss": 0.2789, "step": 13686 }, { "epoch": 0.641167377149014, "grad_norm": 0.6207108880871169, "learning_rate": 3.972208175600096e-06, "loss": 0.3034, "step": 13687 }, { "epoch": 0.6412142221389422, "grad_norm": 0.6178671543251691, "learning_rate": 3.972054892681165e-06, "loss": 0.2875, "step": 13688 }, { "epoch": 0.6412610671288705, "grad_norm": 0.6044026323256386, "learning_rate": 3.971901601290928e-06, "loss": 0.281, "step": 13689 }, { "epoch": 0.6413079121187989, "grad_norm": 0.6458624071925886, "learning_rate": 3.971748301430268e-06, "loss": 0.3049, "step": 13690 }, { "epoch": 0.6413547571087272, "grad_norm": 0.6207426623212942, "learning_rate": 3.971594993100066e-06, "loss": 0.2943, "step": 13691 }, { "epoch": 0.6414016020986556, "grad_norm": 0.5928521823225321, "learning_rate": 3.971441676301204e-06, "loss": 0.2701, "step": 13692 }, { "epoch": 0.6414484470885838, "grad_norm": 0.5835489927267791, "learning_rate": 3.971288351034567e-06, "loss": 0.2726, "step": 13693 }, { "epoch": 0.6414952920785122, "grad_norm": 0.5860900269980874, "learning_rate": 3.971135017301033e-06, "loss": 0.2705, "step": 13694 }, { "epoch": 0.6415421370684405, "grad_norm": 0.5773113827869021, "learning_rate": 3.9709816751014876e-06, "loss": 0.2699, "step": 13695 }, { "epoch": 0.6415889820583689, "grad_norm": 0.5742850615489169, "learning_rate": 3.970828324436813e-06, "loss": 0.2753, "step": 13696 }, { "epoch": 0.6416358270482971, "grad_norm": 0.5307134930795973, "learning_rate": 3.97067496530789e-06, "loss": 0.2609, "step": 13697 }, { "epoch": 0.6416826720382255, "grad_norm": 0.6156614350853614, "learning_rate": 3.970521597715603e-06, "loss": 0.2878, "step": 13698 }, { "epoch": 0.6417295170281538, "grad_norm": 0.6418675452495778, "learning_rate": 3.970368221660834e-06, "loss": 0.272, "step": 13699 }, { "epoch": 0.6417763620180822, "grad_norm": 0.5542390990114148, "learning_rate": 3.970214837144465e-06, "loss": 0.2849, "step": 13700 }, { "epoch": 0.6418232070080105, "grad_norm": 0.5646795867885114, "learning_rate": 3.9700614441673794e-06, "loss": 0.2754, "step": 13701 }, { "epoch": 0.6418700519979388, "grad_norm": 0.5976681439893242, "learning_rate": 3.969908042730459e-06, "loss": 0.2972, "step": 13702 }, { "epoch": 0.6419168969878671, "grad_norm": 0.5867010473994497, "learning_rate": 3.969754632834588e-06, "loss": 0.2834, "step": 13703 }, { "epoch": 0.6419637419777955, "grad_norm": 0.5551952538504826, "learning_rate": 3.969601214480648e-06, "loss": 0.2649, "step": 13704 }, { "epoch": 0.6420105869677238, "grad_norm": 0.5771265859893533, "learning_rate": 3.969447787669522e-06, "loss": 0.2999, "step": 13705 }, { "epoch": 0.6420574319576521, "grad_norm": 0.6074502777102534, "learning_rate": 3.969294352402094e-06, "loss": 0.2764, "step": 13706 }, { "epoch": 0.6421042769475804, "grad_norm": 0.6009835748108796, "learning_rate": 3.969140908679246e-06, "loss": 0.2628, "step": 13707 }, { "epoch": 0.6421511219375088, "grad_norm": 0.6037522807106274, "learning_rate": 3.968987456501862e-06, "loss": 0.2789, "step": 13708 }, { "epoch": 0.6421979669274371, "grad_norm": 0.5660408397497716, "learning_rate": 3.968833995870824e-06, "loss": 0.2699, "step": 13709 }, { "epoch": 0.6422448119173655, "grad_norm": 0.5922390288603402, "learning_rate": 3.968680526787015e-06, "loss": 0.3032, "step": 13710 }, { "epoch": 0.6422916569072937, "grad_norm": 0.5815974037287462, "learning_rate": 3.968527049251319e-06, "loss": 0.2739, "step": 13711 }, { "epoch": 0.6423385018972221, "grad_norm": 0.6257597539841684, "learning_rate": 3.968373563264619e-06, "loss": 0.2803, "step": 13712 }, { "epoch": 0.6423853468871504, "grad_norm": 0.6525816731483384, "learning_rate": 3.9682200688277984e-06, "loss": 0.288, "step": 13713 }, { "epoch": 0.6424321918770788, "grad_norm": 0.5353119739223096, "learning_rate": 3.9680665659417405e-06, "loss": 0.2696, "step": 13714 }, { "epoch": 0.642479036867007, "grad_norm": 0.574572098865155, "learning_rate": 3.967913054607328e-06, "loss": 0.2818, "step": 13715 }, { "epoch": 0.6425258818569354, "grad_norm": 0.6058766919191022, "learning_rate": 3.967759534825446e-06, "loss": 0.2742, "step": 13716 }, { "epoch": 0.6425727268468637, "grad_norm": 0.6058316372378244, "learning_rate": 3.967606006596976e-06, "loss": 0.2963, "step": 13717 }, { "epoch": 0.6426195718367921, "grad_norm": 0.5933065365667295, "learning_rate": 3.967452469922801e-06, "loss": 0.2881, "step": 13718 }, { "epoch": 0.6426664168267204, "grad_norm": 0.6273534783878277, "learning_rate": 3.967298924803807e-06, "loss": 0.306, "step": 13719 }, { "epoch": 0.6427132618166487, "grad_norm": 0.5465646080433685, "learning_rate": 3.967145371240877e-06, "loss": 0.2729, "step": 13720 }, { "epoch": 0.642760106806577, "grad_norm": 0.6542001586595054, "learning_rate": 3.966991809234894e-06, "loss": 0.3074, "step": 13721 }, { "epoch": 0.6428069517965054, "grad_norm": 0.5775143520849287, "learning_rate": 3.966838238786741e-06, "loss": 0.2846, "step": 13722 }, { "epoch": 0.6428537967864337, "grad_norm": 0.607256579415405, "learning_rate": 3.966684659897303e-06, "loss": 0.2888, "step": 13723 }, { "epoch": 0.642900641776362, "grad_norm": 0.6008881298589833, "learning_rate": 3.966531072567463e-06, "loss": 0.2884, "step": 13724 }, { "epoch": 0.6429474867662903, "grad_norm": 0.6085082042403537, "learning_rate": 3.9663774767981065e-06, "loss": 0.3007, "step": 13725 }, { "epoch": 0.6429943317562187, "grad_norm": 0.5782062119628953, "learning_rate": 3.966223872590116e-06, "loss": 0.2634, "step": 13726 }, { "epoch": 0.643041176746147, "grad_norm": 0.59834194603078, "learning_rate": 3.966070259944375e-06, "loss": 0.2885, "step": 13727 }, { "epoch": 0.6430880217360754, "grad_norm": 0.6085148599578977, "learning_rate": 3.965916638861768e-06, "loss": 0.2919, "step": 13728 }, { "epoch": 0.6431348667260036, "grad_norm": 0.5768789941581058, "learning_rate": 3.965763009343179e-06, "loss": 0.2909, "step": 13729 }, { "epoch": 0.643181711715932, "grad_norm": 0.5607862437697082, "learning_rate": 3.965609371389493e-06, "loss": 0.2871, "step": 13730 }, { "epoch": 0.6432285567058603, "grad_norm": 0.5370197039878193, "learning_rate": 3.965455725001592e-06, "loss": 0.2471, "step": 13731 }, { "epoch": 0.6432754016957887, "grad_norm": 0.5894725975199426, "learning_rate": 3.965302070180364e-06, "loss": 0.2866, "step": 13732 }, { "epoch": 0.6433222466857169, "grad_norm": 0.5877153918087462, "learning_rate": 3.965148406926689e-06, "loss": 0.2932, "step": 13733 }, { "epoch": 0.6433690916756453, "grad_norm": 0.5756621861265867, "learning_rate": 3.964994735241454e-06, "loss": 0.2845, "step": 13734 }, { "epoch": 0.6434159366655736, "grad_norm": 0.7164766460069718, "learning_rate": 3.964841055125542e-06, "loss": 0.3057, "step": 13735 }, { "epoch": 0.643462781655502, "grad_norm": 0.5949122159646004, "learning_rate": 3.964687366579838e-06, "loss": 0.2755, "step": 13736 }, { "epoch": 0.6435096266454303, "grad_norm": 0.6100296283885394, "learning_rate": 3.964533669605227e-06, "loss": 0.2817, "step": 13737 }, { "epoch": 0.6435564716353586, "grad_norm": 0.5672892383195073, "learning_rate": 3.964379964202593e-06, "loss": 0.2714, "step": 13738 }, { "epoch": 0.6436033166252869, "grad_norm": 0.5997496603339375, "learning_rate": 3.964226250372819e-06, "loss": 0.2978, "step": 13739 }, { "epoch": 0.6436501616152153, "grad_norm": 0.5866646089577395, "learning_rate": 3.964072528116792e-06, "loss": 0.3096, "step": 13740 }, { "epoch": 0.6436970066051436, "grad_norm": 0.5635622236017213, "learning_rate": 3.963918797435395e-06, "loss": 0.2673, "step": 13741 }, { "epoch": 0.6437438515950719, "grad_norm": 0.5893750525226691, "learning_rate": 3.963765058329514e-06, "loss": 0.2636, "step": 13742 }, { "epoch": 0.6437906965850002, "grad_norm": 0.5493523175999735, "learning_rate": 3.963611310800032e-06, "loss": 0.2738, "step": 13743 }, { "epoch": 0.6438375415749286, "grad_norm": 0.6269128157276967, "learning_rate": 3.963457554847835e-06, "loss": 0.3039, "step": 13744 }, { "epoch": 0.6438843865648569, "grad_norm": 0.5523566770920441, "learning_rate": 3.9633037904738084e-06, "loss": 0.2837, "step": 13745 }, { "epoch": 0.6439312315547853, "grad_norm": 0.5859052635782604, "learning_rate": 3.963150017678836e-06, "loss": 0.2843, "step": 13746 }, { "epoch": 0.6439780765447135, "grad_norm": 0.5786724295928382, "learning_rate": 3.962996236463802e-06, "loss": 0.2952, "step": 13747 }, { "epoch": 0.6440249215346419, "grad_norm": 0.5341394858192114, "learning_rate": 3.962842446829594e-06, "loss": 0.2782, "step": 13748 }, { "epoch": 0.6440717665245702, "grad_norm": 0.6430054229624946, "learning_rate": 3.962688648777094e-06, "loss": 0.2978, "step": 13749 }, { "epoch": 0.6441186115144986, "grad_norm": 0.6051679965972674, "learning_rate": 3.962534842307189e-06, "loss": 0.2916, "step": 13750 }, { "epoch": 0.6441654565044268, "grad_norm": 0.5617997922499699, "learning_rate": 3.962381027420764e-06, "loss": 0.2776, "step": 13751 }, { "epoch": 0.6442123014943552, "grad_norm": 0.5690950436376916, "learning_rate": 3.962227204118702e-06, "loss": 0.2772, "step": 13752 }, { "epoch": 0.6442591464842835, "grad_norm": 0.5796588255797347, "learning_rate": 3.962073372401893e-06, "loss": 0.2584, "step": 13753 }, { "epoch": 0.6443059914742119, "grad_norm": 0.5620998287060566, "learning_rate": 3.961919532271217e-06, "loss": 0.2854, "step": 13754 }, { "epoch": 0.6443528364641402, "grad_norm": 0.5754451505411388, "learning_rate": 3.961765683727562e-06, "loss": 0.2718, "step": 13755 }, { "epoch": 0.6443996814540685, "grad_norm": 0.5827675460368266, "learning_rate": 3.961611826771813e-06, "loss": 0.2778, "step": 13756 }, { "epoch": 0.6444465264439968, "grad_norm": 0.5754851337435177, "learning_rate": 3.961457961404855e-06, "loss": 0.2775, "step": 13757 }, { "epoch": 0.6444933714339252, "grad_norm": 0.6270085804006551, "learning_rate": 3.961304087627574e-06, "loss": 0.3017, "step": 13758 }, { "epoch": 0.6445402164238535, "grad_norm": 0.6174408142801037, "learning_rate": 3.961150205440854e-06, "loss": 0.2836, "step": 13759 }, { "epoch": 0.6445870614137817, "grad_norm": 0.6103657498947168, "learning_rate": 3.960996314845584e-06, "loss": 0.2843, "step": 13760 }, { "epoch": 0.6446339064037101, "grad_norm": 0.6074051891140889, "learning_rate": 3.960842415842646e-06, "loss": 0.2705, "step": 13761 }, { "epoch": 0.6446807513936385, "grad_norm": 0.5733857383925117, "learning_rate": 3.960688508432928e-06, "loss": 0.2526, "step": 13762 }, { "epoch": 0.6447275963835668, "grad_norm": 0.6929095504214725, "learning_rate": 3.960534592617315e-06, "loss": 0.3253, "step": 13763 }, { "epoch": 0.6447744413734952, "grad_norm": 0.6442462885240633, "learning_rate": 3.960380668396691e-06, "loss": 0.2794, "step": 13764 }, { "epoch": 0.6448212863634234, "grad_norm": 0.6443074913588004, "learning_rate": 3.960226735771943e-06, "loss": 0.3028, "step": 13765 }, { "epoch": 0.6448681313533517, "grad_norm": 0.5912375313167865, "learning_rate": 3.96007279474396e-06, "loss": 0.3079, "step": 13766 }, { "epoch": 0.6449149763432801, "grad_norm": 0.5864877124155982, "learning_rate": 3.959918845313623e-06, "loss": 0.2649, "step": 13767 }, { "epoch": 0.6449618213332085, "grad_norm": 0.5621023675309644, "learning_rate": 3.95976488748182e-06, "loss": 0.2553, "step": 13768 }, { "epoch": 0.6450086663231367, "grad_norm": 0.619494231384424, "learning_rate": 3.959610921249437e-06, "loss": 0.2979, "step": 13769 }, { "epoch": 0.645055511313065, "grad_norm": 0.6234437715856117, "learning_rate": 3.95945694661736e-06, "loss": 0.2815, "step": 13770 }, { "epoch": 0.6451023563029934, "grad_norm": 0.6173283185728367, "learning_rate": 3.959302963586476e-06, "loss": 0.2976, "step": 13771 }, { "epoch": 0.6451492012929217, "grad_norm": 0.6031360304070778, "learning_rate": 3.959148972157669e-06, "loss": 0.2604, "step": 13772 }, { "epoch": 0.6451960462828501, "grad_norm": 0.5640911188802378, "learning_rate": 3.958994972331827e-06, "loss": 0.2792, "step": 13773 }, { "epoch": 0.6452428912727783, "grad_norm": 0.5680570976134236, "learning_rate": 3.958840964109836e-06, "loss": 0.2969, "step": 13774 }, { "epoch": 0.6452897362627067, "grad_norm": 0.6216966095053764, "learning_rate": 3.958686947492582e-06, "loss": 0.2973, "step": 13775 }, { "epoch": 0.645336581252635, "grad_norm": 0.5725927734796615, "learning_rate": 3.9585329224809505e-06, "loss": 0.2774, "step": 13776 }, { "epoch": 0.6453834262425634, "grad_norm": 0.5612656179701131, "learning_rate": 3.958378889075829e-06, "loss": 0.2806, "step": 13777 }, { "epoch": 0.6454302712324916, "grad_norm": 0.5713121975181924, "learning_rate": 3.9582248472781035e-06, "loss": 0.2631, "step": 13778 }, { "epoch": 0.64547711622242, "grad_norm": 0.5323746874462213, "learning_rate": 3.95807079708866e-06, "loss": 0.2714, "step": 13779 }, { "epoch": 0.6455239612123483, "grad_norm": 0.6083174459174214, "learning_rate": 3.957916738508386e-06, "loss": 0.2912, "step": 13780 }, { "epoch": 0.6455708062022767, "grad_norm": 0.6194986538880283, "learning_rate": 3.957762671538168e-06, "loss": 0.2972, "step": 13781 }, { "epoch": 0.645617651192205, "grad_norm": 0.6145198931725171, "learning_rate": 3.957608596178891e-06, "loss": 0.2887, "step": 13782 }, { "epoch": 0.6456644961821333, "grad_norm": 0.5688237216490135, "learning_rate": 3.957454512431445e-06, "loss": 0.3027, "step": 13783 }, { "epoch": 0.6457113411720616, "grad_norm": 0.5805704911918916, "learning_rate": 3.957300420296713e-06, "loss": 0.2847, "step": 13784 }, { "epoch": 0.64575818616199, "grad_norm": 0.5444000658836886, "learning_rate": 3.957146319775583e-06, "loss": 0.2792, "step": 13785 }, { "epoch": 0.6458050311519183, "grad_norm": 0.5740580072661847, "learning_rate": 3.956992210868943e-06, "loss": 0.2976, "step": 13786 }, { "epoch": 0.6458518761418466, "grad_norm": 0.6341919302585713, "learning_rate": 3.956838093577679e-06, "loss": 0.28, "step": 13787 }, { "epoch": 0.6458987211317749, "grad_norm": 0.5589173061097568, "learning_rate": 3.956683967902678e-06, "loss": 0.2737, "step": 13788 }, { "epoch": 0.6459455661217033, "grad_norm": 0.602788728526704, "learning_rate": 3.956529833844827e-06, "loss": 0.2914, "step": 13789 }, { "epoch": 0.6459924111116316, "grad_norm": 0.5924080548006032, "learning_rate": 3.956375691405012e-06, "loss": 0.2944, "step": 13790 }, { "epoch": 0.64603925610156, "grad_norm": 0.5446391707154924, "learning_rate": 3.956221540584122e-06, "loss": 0.2725, "step": 13791 }, { "epoch": 0.6460861010914882, "grad_norm": 0.6031563935422198, "learning_rate": 3.956067381383042e-06, "loss": 0.2867, "step": 13792 }, { "epoch": 0.6461329460814166, "grad_norm": 0.5545743496179194, "learning_rate": 3.955913213802661e-06, "loss": 0.278, "step": 13793 }, { "epoch": 0.6461797910713449, "grad_norm": 0.6169668176072926, "learning_rate": 3.955759037843865e-06, "loss": 0.305, "step": 13794 }, { "epoch": 0.6462266360612733, "grad_norm": 0.5797026651429625, "learning_rate": 3.955604853507542e-06, "loss": 0.2761, "step": 13795 }, { "epoch": 0.6462734810512015, "grad_norm": 0.605502880611484, "learning_rate": 3.9554506607945786e-06, "loss": 0.2851, "step": 13796 }, { "epoch": 0.6463203260411299, "grad_norm": 0.619264425966528, "learning_rate": 3.955296459705862e-06, "loss": 0.2759, "step": 13797 }, { "epoch": 0.6463671710310582, "grad_norm": 0.6611457248952445, "learning_rate": 3.955142250242281e-06, "loss": 0.2946, "step": 13798 }, { "epoch": 0.6464140160209866, "grad_norm": 0.5519781728414852, "learning_rate": 3.954988032404722e-06, "loss": 0.2843, "step": 13799 }, { "epoch": 0.6464608610109149, "grad_norm": 0.5710857100457343, "learning_rate": 3.954833806194072e-06, "loss": 0.275, "step": 13800 }, { "epoch": 0.6465077060008432, "grad_norm": 0.5661111343380697, "learning_rate": 3.954679571611219e-06, "loss": 0.2738, "step": 13801 }, { "epoch": 0.6465545509907715, "grad_norm": 0.5946106682722774, "learning_rate": 3.954525328657051e-06, "loss": 0.285, "step": 13802 }, { "epoch": 0.6466013959806999, "grad_norm": 0.5381024894209583, "learning_rate": 3.954371077332456e-06, "loss": 0.2686, "step": 13803 }, { "epoch": 0.6466482409706282, "grad_norm": 0.610225141966667, "learning_rate": 3.95421681763832e-06, "loss": 0.2827, "step": 13804 }, { "epoch": 0.6466950859605565, "grad_norm": 0.5778104665373857, "learning_rate": 3.954062549575532e-06, "loss": 0.2855, "step": 13805 }, { "epoch": 0.6467419309504848, "grad_norm": 0.5915715138342753, "learning_rate": 3.953908273144979e-06, "loss": 0.2789, "step": 13806 }, { "epoch": 0.6467887759404132, "grad_norm": 0.5496467259212017, "learning_rate": 3.953753988347551e-06, "loss": 0.2872, "step": 13807 }, { "epoch": 0.6468356209303415, "grad_norm": 0.609360831812123, "learning_rate": 3.953599695184132e-06, "loss": 0.2752, "step": 13808 }, { "epoch": 0.6468824659202699, "grad_norm": 0.5974218151398752, "learning_rate": 3.953445393655614e-06, "loss": 0.2823, "step": 13809 }, { "epoch": 0.6469293109101981, "grad_norm": 0.59358574496529, "learning_rate": 3.953291083762882e-06, "loss": 0.2895, "step": 13810 }, { "epoch": 0.6469761559001265, "grad_norm": 0.580060381701432, "learning_rate": 3.9531367655068245e-06, "loss": 0.2705, "step": 13811 }, { "epoch": 0.6470230008900548, "grad_norm": 0.5746360863558982, "learning_rate": 3.952982438888332e-06, "loss": 0.2604, "step": 13812 }, { "epoch": 0.6470698458799832, "grad_norm": 0.6355650951494118, "learning_rate": 3.952828103908289e-06, "loss": 0.2892, "step": 13813 }, { "epoch": 0.6471166908699114, "grad_norm": 0.545555353183869, "learning_rate": 3.952673760567587e-06, "loss": 0.2834, "step": 13814 }, { "epoch": 0.6471635358598398, "grad_norm": 0.6197554106697788, "learning_rate": 3.952519408867112e-06, "loss": 0.2759, "step": 13815 }, { "epoch": 0.6472103808497681, "grad_norm": 0.5956228110661409, "learning_rate": 3.952365048807752e-06, "loss": 0.2891, "step": 13816 }, { "epoch": 0.6472572258396965, "grad_norm": 0.5993919440705116, "learning_rate": 3.9522106803903965e-06, "loss": 0.2926, "step": 13817 }, { "epoch": 0.6473040708296248, "grad_norm": 0.5693660348544072, "learning_rate": 3.952056303615935e-06, "loss": 0.2833, "step": 13818 }, { "epoch": 0.6473509158195531, "grad_norm": 0.5788233227912862, "learning_rate": 3.951901918485252e-06, "loss": 0.2734, "step": 13819 }, { "epoch": 0.6473977608094814, "grad_norm": 0.6044575820835194, "learning_rate": 3.9517475249992395e-06, "loss": 0.2805, "step": 13820 }, { "epoch": 0.6474446057994098, "grad_norm": 0.5878736637712263, "learning_rate": 3.951593123158786e-06, "loss": 0.282, "step": 13821 }, { "epoch": 0.6474914507893381, "grad_norm": 0.5606420822962983, "learning_rate": 3.951438712964777e-06, "loss": 0.2783, "step": 13822 }, { "epoch": 0.6475382957792664, "grad_norm": 0.6080182442141487, "learning_rate": 3.951284294418104e-06, "loss": 0.2734, "step": 13823 }, { "epoch": 0.6475851407691947, "grad_norm": 0.6340813673620468, "learning_rate": 3.951129867519655e-06, "loss": 0.2869, "step": 13824 }, { "epoch": 0.6476319857591231, "grad_norm": 0.618066649405146, "learning_rate": 3.950975432270318e-06, "loss": 0.2979, "step": 13825 }, { "epoch": 0.6476788307490514, "grad_norm": 0.6371859060121773, "learning_rate": 3.950820988670982e-06, "loss": 0.2909, "step": 13826 }, { "epoch": 0.6477256757389798, "grad_norm": 0.5327673140810265, "learning_rate": 3.950666536722536e-06, "loss": 0.2499, "step": 13827 }, { "epoch": 0.647772520728908, "grad_norm": 0.5761664738191195, "learning_rate": 3.9505120764258686e-06, "loss": 0.2677, "step": 13828 }, { "epoch": 0.6478193657188364, "grad_norm": 0.6626952104848968, "learning_rate": 3.9503576077818686e-06, "loss": 0.2649, "step": 13829 }, { "epoch": 0.6478662107087647, "grad_norm": 0.615856477692264, "learning_rate": 3.950203130791426e-06, "loss": 0.2738, "step": 13830 }, { "epoch": 0.6479130556986931, "grad_norm": 0.5775009258855265, "learning_rate": 3.950048645455428e-06, "loss": 0.2641, "step": 13831 }, { "epoch": 0.6479599006886213, "grad_norm": 0.5789102840160334, "learning_rate": 3.949894151774764e-06, "loss": 0.2825, "step": 13832 }, { "epoch": 0.6480067456785497, "grad_norm": 0.5783706244231325, "learning_rate": 3.9497396497503244e-06, "loss": 0.2743, "step": 13833 }, { "epoch": 0.648053590668478, "grad_norm": 0.6202923145946374, "learning_rate": 3.949585139382998e-06, "loss": 0.2873, "step": 13834 }, { "epoch": 0.6481004356584064, "grad_norm": 0.6273032636174083, "learning_rate": 3.949430620673672e-06, "loss": 0.2976, "step": 13835 }, { "epoch": 0.6481472806483347, "grad_norm": 0.6016265244166351, "learning_rate": 3.9492760936232376e-06, "loss": 0.2931, "step": 13836 }, { "epoch": 0.648194125638263, "grad_norm": 0.6025643068051739, "learning_rate": 3.949121558232584e-06, "loss": 0.2719, "step": 13837 }, { "epoch": 0.6482409706281913, "grad_norm": 0.5921950577542816, "learning_rate": 3.9489670145025995e-06, "loss": 0.2856, "step": 13838 }, { "epoch": 0.6482878156181197, "grad_norm": 0.6703403021733246, "learning_rate": 3.948812462434175e-06, "loss": 0.2785, "step": 13839 }, { "epoch": 0.648334660608048, "grad_norm": 0.6601222869199941, "learning_rate": 3.948657902028198e-06, "loss": 0.3026, "step": 13840 }, { "epoch": 0.6483815055979762, "grad_norm": 0.6011193195098893, "learning_rate": 3.948503333285559e-06, "loss": 0.3028, "step": 13841 }, { "epoch": 0.6484283505879046, "grad_norm": 0.5194612111999413, "learning_rate": 3.948348756207148e-06, "loss": 0.2456, "step": 13842 }, { "epoch": 0.648475195577833, "grad_norm": 0.5920330893160678, "learning_rate": 3.948194170793853e-06, "loss": 0.2625, "step": 13843 }, { "epoch": 0.6485220405677613, "grad_norm": 0.6198970157744007, "learning_rate": 3.948039577046565e-06, "loss": 0.2904, "step": 13844 }, { "epoch": 0.6485688855576897, "grad_norm": 0.6188371651281801, "learning_rate": 3.947884974966174e-06, "loss": 0.2652, "step": 13845 }, { "epoch": 0.6486157305476179, "grad_norm": 0.5749126104753848, "learning_rate": 3.947730364553568e-06, "loss": 0.2754, "step": 13846 }, { "epoch": 0.6486625755375462, "grad_norm": 0.6331723321085001, "learning_rate": 3.947575745809638e-06, "loss": 0.2725, "step": 13847 }, { "epoch": 0.6487094205274746, "grad_norm": 0.5897070965757362, "learning_rate": 3.947421118735273e-06, "loss": 0.2857, "step": 13848 }, { "epoch": 0.648756265517403, "grad_norm": 0.5291870077420833, "learning_rate": 3.947266483331363e-06, "loss": 0.2723, "step": 13849 }, { "epoch": 0.6488031105073312, "grad_norm": 0.5940999229832034, "learning_rate": 3.947111839598798e-06, "loss": 0.2762, "step": 13850 }, { "epoch": 0.6488499554972595, "grad_norm": 0.5781864620464741, "learning_rate": 3.946957187538469e-06, "loss": 0.2779, "step": 13851 }, { "epoch": 0.6488968004871879, "grad_norm": 0.5917747252243066, "learning_rate": 3.9468025271512644e-06, "loss": 0.2912, "step": 13852 }, { "epoch": 0.6489436454771162, "grad_norm": 0.5805813573377855, "learning_rate": 3.9466478584380755e-06, "loss": 0.3011, "step": 13853 }, { "epoch": 0.6489904904670446, "grad_norm": 0.5551323525325463, "learning_rate": 3.946493181399791e-06, "loss": 0.2839, "step": 13854 }, { "epoch": 0.6490373354569728, "grad_norm": 0.5988700491950811, "learning_rate": 3.946338496037303e-06, "loss": 0.2817, "step": 13855 }, { "epoch": 0.6490841804469012, "grad_norm": 0.5706057684484818, "learning_rate": 3.9461838023515e-06, "loss": 0.285, "step": 13856 }, { "epoch": 0.6491310254368295, "grad_norm": 0.6075532569812192, "learning_rate": 3.946029100343272e-06, "loss": 0.2825, "step": 13857 }, { "epoch": 0.6491778704267579, "grad_norm": 0.5789658218651134, "learning_rate": 3.9458743900135104e-06, "loss": 0.2781, "step": 13858 }, { "epoch": 0.6492247154166861, "grad_norm": 0.608365611654585, "learning_rate": 3.945719671363105e-06, "loss": 0.2845, "step": 13859 }, { "epoch": 0.6492715604066145, "grad_norm": 0.6733793543726716, "learning_rate": 3.945564944392947e-06, "loss": 0.2986, "step": 13860 }, { "epoch": 0.6493184053965428, "grad_norm": 0.5479854755073608, "learning_rate": 3.945410209103925e-06, "loss": 0.2772, "step": 13861 }, { "epoch": 0.6493652503864712, "grad_norm": 0.6347946182786129, "learning_rate": 3.945255465496931e-06, "loss": 0.3163, "step": 13862 }, { "epoch": 0.6494120953763995, "grad_norm": 0.5993720698527395, "learning_rate": 3.945100713572855e-06, "loss": 0.2938, "step": 13863 }, { "epoch": 0.6494589403663278, "grad_norm": 0.6112633488460721, "learning_rate": 3.944945953332588e-06, "loss": 0.2875, "step": 13864 }, { "epoch": 0.6495057853562561, "grad_norm": 0.55048670764339, "learning_rate": 3.944791184777019e-06, "loss": 0.2606, "step": 13865 }, { "epoch": 0.6495526303461845, "grad_norm": 0.6252617733305688, "learning_rate": 3.944636407907041e-06, "loss": 0.2844, "step": 13866 }, { "epoch": 0.6495994753361128, "grad_norm": 0.573434517443023, "learning_rate": 3.944481622723544e-06, "loss": 0.2607, "step": 13867 }, { "epoch": 0.6496463203260411, "grad_norm": 0.6351262962440395, "learning_rate": 3.944326829227418e-06, "loss": 0.2775, "step": 13868 }, { "epoch": 0.6496931653159694, "grad_norm": 0.5860218883778255, "learning_rate": 3.944172027419553e-06, "loss": 0.2852, "step": 13869 }, { "epoch": 0.6497400103058978, "grad_norm": 0.5977060232389916, "learning_rate": 3.9440172173008415e-06, "loss": 0.2616, "step": 13870 }, { "epoch": 0.6497868552958261, "grad_norm": 0.5584509379663354, "learning_rate": 3.9438623988721735e-06, "loss": 0.287, "step": 13871 }, { "epoch": 0.6498337002857545, "grad_norm": 0.5763194069760168, "learning_rate": 3.943707572134441e-06, "loss": 0.2915, "step": 13872 }, { "epoch": 0.6498805452756827, "grad_norm": 0.6082787098283935, "learning_rate": 3.943552737088534e-06, "loss": 0.2746, "step": 13873 }, { "epoch": 0.6499273902656111, "grad_norm": 0.6516206253975741, "learning_rate": 3.943397893735344e-06, "loss": 0.2907, "step": 13874 }, { "epoch": 0.6499742352555394, "grad_norm": 0.6395394279423103, "learning_rate": 3.943243042075762e-06, "loss": 0.2887, "step": 13875 }, { "epoch": 0.6500210802454678, "grad_norm": 0.6172480715252022, "learning_rate": 3.943088182110679e-06, "loss": 0.2923, "step": 13876 }, { "epoch": 0.650067925235396, "grad_norm": 0.5761866022360699, "learning_rate": 3.942933313840987e-06, "loss": 0.3016, "step": 13877 }, { "epoch": 0.6501147702253244, "grad_norm": 0.6692486120384648, "learning_rate": 3.9427784372675745e-06, "loss": 0.2715, "step": 13878 }, { "epoch": 0.6501616152152527, "grad_norm": 0.5925420138404952, "learning_rate": 3.942623552391336e-06, "loss": 0.2804, "step": 13879 }, { "epoch": 0.6502084602051811, "grad_norm": 0.5739461841522875, "learning_rate": 3.942468659213161e-06, "loss": 0.2858, "step": 13880 }, { "epoch": 0.6502553051951094, "grad_norm": 0.6337630899419755, "learning_rate": 3.942313757733942e-06, "loss": 0.2868, "step": 13881 }, { "epoch": 0.6503021501850377, "grad_norm": 0.6002262428773345, "learning_rate": 3.942158847954569e-06, "loss": 0.2779, "step": 13882 }, { "epoch": 0.650348995174966, "grad_norm": 0.5160338845067495, "learning_rate": 3.942003929875935e-06, "loss": 0.259, "step": 13883 }, { "epoch": 0.6503958401648944, "grad_norm": 0.5518795034437538, "learning_rate": 3.941849003498931e-06, "loss": 0.2879, "step": 13884 }, { "epoch": 0.6504426851548227, "grad_norm": 0.5813311763806408, "learning_rate": 3.9416940688244485e-06, "loss": 0.2917, "step": 13885 }, { "epoch": 0.650489530144751, "grad_norm": 0.5749701886008046, "learning_rate": 3.941539125853378e-06, "loss": 0.2867, "step": 13886 }, { "epoch": 0.6505363751346793, "grad_norm": 0.5886809327261588, "learning_rate": 3.941384174586612e-06, "loss": 0.2807, "step": 13887 }, { "epoch": 0.6505832201246077, "grad_norm": 0.6106859677910915, "learning_rate": 3.9412292150250445e-06, "loss": 0.2735, "step": 13888 }, { "epoch": 0.650630065114536, "grad_norm": 0.5932916370632259, "learning_rate": 3.941074247169563e-06, "loss": 0.2864, "step": 13889 }, { "epoch": 0.6506769101044644, "grad_norm": 0.6398921383667474, "learning_rate": 3.940919271021062e-06, "loss": 0.2879, "step": 13890 }, { "epoch": 0.6507237550943926, "grad_norm": 0.6399573847960348, "learning_rate": 3.940764286580433e-06, "loss": 0.3065, "step": 13891 }, { "epoch": 0.650770600084321, "grad_norm": 0.6495636789967867, "learning_rate": 3.9406092938485675e-06, "loss": 0.2977, "step": 13892 }, { "epoch": 0.6508174450742493, "grad_norm": 0.5849408603693889, "learning_rate": 3.940454292826357e-06, "loss": 0.2878, "step": 13893 }, { "epoch": 0.6508642900641777, "grad_norm": 0.6056535065756267, "learning_rate": 3.940299283514696e-06, "loss": 0.2886, "step": 13894 }, { "epoch": 0.6509111350541059, "grad_norm": 0.5898438709400929, "learning_rate": 3.940144265914473e-06, "loss": 0.2753, "step": 13895 }, { "epoch": 0.6509579800440343, "grad_norm": 0.5814380719421468, "learning_rate": 3.939989240026582e-06, "loss": 0.2693, "step": 13896 }, { "epoch": 0.6510048250339626, "grad_norm": 0.577408790688595, "learning_rate": 3.939834205851915e-06, "loss": 0.2923, "step": 13897 }, { "epoch": 0.651051670023891, "grad_norm": 0.6102909421120642, "learning_rate": 3.939679163391364e-06, "loss": 0.2964, "step": 13898 }, { "epoch": 0.6510985150138193, "grad_norm": 0.5642518305124018, "learning_rate": 3.93952411264582e-06, "loss": 0.2959, "step": 13899 }, { "epoch": 0.6511453600037476, "grad_norm": 0.5554406820238044, "learning_rate": 3.9393690536161786e-06, "loss": 0.2776, "step": 13900 }, { "epoch": 0.6511922049936759, "grad_norm": 0.5624021966776941, "learning_rate": 3.9392139863033285e-06, "loss": 0.2769, "step": 13901 }, { "epoch": 0.6512390499836043, "grad_norm": 0.5527520463313538, "learning_rate": 3.939058910708165e-06, "loss": 0.2604, "step": 13902 }, { "epoch": 0.6512858949735326, "grad_norm": 0.5774095590399998, "learning_rate": 3.938903826831579e-06, "loss": 0.2762, "step": 13903 }, { "epoch": 0.6513327399634609, "grad_norm": 0.6493113149190471, "learning_rate": 3.938748734674461e-06, "loss": 0.2953, "step": 13904 }, { "epoch": 0.6513795849533892, "grad_norm": 0.5829146019325488, "learning_rate": 3.938593634237707e-06, "loss": 0.2927, "step": 13905 }, { "epoch": 0.6514264299433176, "grad_norm": 0.6057820738001265, "learning_rate": 3.938438525522209e-06, "loss": 0.2881, "step": 13906 }, { "epoch": 0.6514732749332459, "grad_norm": 0.5462359484067486, "learning_rate": 3.938283408528859e-06, "loss": 0.2833, "step": 13907 }, { "epoch": 0.6515201199231743, "grad_norm": 0.6076336954187659, "learning_rate": 3.938128283258548e-06, "loss": 0.2667, "step": 13908 }, { "epoch": 0.6515669649131025, "grad_norm": 0.5849392125159955, "learning_rate": 3.937973149712172e-06, "loss": 0.2867, "step": 13909 }, { "epoch": 0.6516138099030309, "grad_norm": 0.6314946386716926, "learning_rate": 3.93781800789062e-06, "loss": 0.2904, "step": 13910 }, { "epoch": 0.6516606548929592, "grad_norm": 0.607959570862781, "learning_rate": 3.937662857794787e-06, "loss": 0.2808, "step": 13911 }, { "epoch": 0.6517074998828876, "grad_norm": 0.596972626056457, "learning_rate": 3.937507699425567e-06, "loss": 0.2886, "step": 13912 }, { "epoch": 0.6517543448728158, "grad_norm": 0.5504379838376392, "learning_rate": 3.937352532783851e-06, "loss": 0.2688, "step": 13913 }, { "epoch": 0.6518011898627442, "grad_norm": 0.5606953552075613, "learning_rate": 3.937197357870532e-06, "loss": 0.2721, "step": 13914 }, { "epoch": 0.6518480348526725, "grad_norm": 0.5727292876195028, "learning_rate": 3.937042174686504e-06, "loss": 0.2837, "step": 13915 }, { "epoch": 0.6518948798426009, "grad_norm": 0.6123639740366673, "learning_rate": 3.936886983232658e-06, "loss": 0.2764, "step": 13916 }, { "epoch": 0.6519417248325292, "grad_norm": 0.5890341098483695, "learning_rate": 3.936731783509891e-06, "loss": 0.2906, "step": 13917 }, { "epoch": 0.6519885698224575, "grad_norm": 0.5744226080032089, "learning_rate": 3.936576575519092e-06, "loss": 0.2882, "step": 13918 }, { "epoch": 0.6520354148123858, "grad_norm": 0.6475334524105384, "learning_rate": 3.936421359261156e-06, "loss": 0.2738, "step": 13919 }, { "epoch": 0.6520822598023142, "grad_norm": 0.5343084590202888, "learning_rate": 3.9362661347369755e-06, "loss": 0.2639, "step": 13920 }, { "epoch": 0.6521291047922425, "grad_norm": 0.6120581755243704, "learning_rate": 3.936110901947446e-06, "loss": 0.2924, "step": 13921 }, { "epoch": 0.6521759497821707, "grad_norm": 0.5744263361257064, "learning_rate": 3.935955660893459e-06, "loss": 0.2931, "step": 13922 }, { "epoch": 0.6522227947720991, "grad_norm": 0.6095770237412752, "learning_rate": 3.9358004115759066e-06, "loss": 0.2795, "step": 13923 }, { "epoch": 0.6522696397620275, "grad_norm": 0.6184029799395301, "learning_rate": 3.935645153995685e-06, "loss": 0.2921, "step": 13924 }, { "epoch": 0.6523164847519558, "grad_norm": 0.6648156967544391, "learning_rate": 3.935489888153686e-06, "loss": 0.3061, "step": 13925 }, { "epoch": 0.6523633297418842, "grad_norm": 0.6471248882235473, "learning_rate": 3.935334614050803e-06, "loss": 0.282, "step": 13926 }, { "epoch": 0.6524101747318124, "grad_norm": 0.6672394363623576, "learning_rate": 3.935179331687931e-06, "loss": 0.3175, "step": 13927 }, { "epoch": 0.6524570197217407, "grad_norm": 0.5888479356618675, "learning_rate": 3.9350240410659626e-06, "loss": 0.2831, "step": 13928 }, { "epoch": 0.6525038647116691, "grad_norm": 0.6285088244834919, "learning_rate": 3.9348687421857904e-06, "loss": 0.2895, "step": 13929 }, { "epoch": 0.6525507097015975, "grad_norm": 0.6124408025137853, "learning_rate": 3.934713435048311e-06, "loss": 0.2925, "step": 13930 }, { "epoch": 0.6525975546915257, "grad_norm": 0.5880138993280806, "learning_rate": 3.934558119654415e-06, "loss": 0.3084, "step": 13931 }, { "epoch": 0.652644399681454, "grad_norm": 0.5926896624502003, "learning_rate": 3.934402796004997e-06, "loss": 0.2729, "step": 13932 }, { "epoch": 0.6526912446713824, "grad_norm": 0.6218634390943515, "learning_rate": 3.934247464100953e-06, "loss": 0.2824, "step": 13933 }, { "epoch": 0.6527380896613107, "grad_norm": 0.6268849308750235, "learning_rate": 3.934092123943174e-06, "loss": 0.2907, "step": 13934 }, { "epoch": 0.6527849346512391, "grad_norm": 0.5600898206915437, "learning_rate": 3.9339367755325565e-06, "loss": 0.2554, "step": 13935 }, { "epoch": 0.6528317796411673, "grad_norm": 0.6382837158359481, "learning_rate": 3.933781418869993e-06, "loss": 0.2993, "step": 13936 }, { "epoch": 0.6528786246310957, "grad_norm": 0.6198402513939675, "learning_rate": 3.9336260539563776e-06, "loss": 0.2847, "step": 13937 }, { "epoch": 0.652925469621024, "grad_norm": 0.5871851008210367, "learning_rate": 3.9334706807926035e-06, "loss": 0.2767, "step": 13938 }, { "epoch": 0.6529723146109524, "grad_norm": 0.5500522044451678, "learning_rate": 3.933315299379568e-06, "loss": 0.27, "step": 13939 }, { "epoch": 0.6530191596008806, "grad_norm": 0.5550480045554457, "learning_rate": 3.933159909718162e-06, "loss": 0.263, "step": 13940 }, { "epoch": 0.653066004590809, "grad_norm": 0.6129968243899626, "learning_rate": 3.933004511809281e-06, "loss": 0.2851, "step": 13941 }, { "epoch": 0.6531128495807373, "grad_norm": 0.6143491334141584, "learning_rate": 3.932849105653818e-06, "loss": 0.2849, "step": 13942 }, { "epoch": 0.6531596945706657, "grad_norm": 0.6029324654982322, "learning_rate": 3.9326936912526705e-06, "loss": 0.2818, "step": 13943 }, { "epoch": 0.653206539560594, "grad_norm": 0.552450676826493, "learning_rate": 3.93253826860673e-06, "loss": 0.2635, "step": 13944 }, { "epoch": 0.6532533845505223, "grad_norm": 0.5611549840507781, "learning_rate": 3.932382837716892e-06, "loss": 0.2682, "step": 13945 }, { "epoch": 0.6533002295404506, "grad_norm": 0.6371102724932354, "learning_rate": 3.932227398584051e-06, "loss": 0.3067, "step": 13946 }, { "epoch": 0.653347074530379, "grad_norm": 0.5924663854851897, "learning_rate": 3.9320719512091015e-06, "loss": 0.2934, "step": 13947 }, { "epoch": 0.6533939195203073, "grad_norm": 0.5843942477582144, "learning_rate": 3.931916495592937e-06, "loss": 0.2909, "step": 13948 }, { "epoch": 0.6534407645102356, "grad_norm": 0.5368003089669977, "learning_rate": 3.9317610317364545e-06, "loss": 0.2645, "step": 13949 }, { "epoch": 0.6534876095001639, "grad_norm": 0.6130702130156913, "learning_rate": 3.931605559640546e-06, "loss": 0.2939, "step": 13950 }, { "epoch": 0.6535344544900923, "grad_norm": 0.566019059773071, "learning_rate": 3.931450079306107e-06, "loss": 0.2787, "step": 13951 }, { "epoch": 0.6535812994800206, "grad_norm": 0.6518541002515991, "learning_rate": 3.931294590734033e-06, "loss": 0.2961, "step": 13952 }, { "epoch": 0.653628144469949, "grad_norm": 0.5887766197202356, "learning_rate": 3.931139093925218e-06, "loss": 0.2814, "step": 13953 }, { "epoch": 0.6536749894598772, "grad_norm": 0.5560104480148776, "learning_rate": 3.930983588880557e-06, "loss": 0.2633, "step": 13954 }, { "epoch": 0.6537218344498056, "grad_norm": 0.5792092763457788, "learning_rate": 3.930828075600946e-06, "loss": 0.2575, "step": 13955 }, { "epoch": 0.6537686794397339, "grad_norm": 0.5321376112818479, "learning_rate": 3.93067255408728e-06, "loss": 0.2608, "step": 13956 }, { "epoch": 0.6538155244296623, "grad_norm": 0.6722242247699078, "learning_rate": 3.93051702434045e-06, "loss": 0.3011, "step": 13957 }, { "epoch": 0.6538623694195905, "grad_norm": 0.571137811112114, "learning_rate": 3.930361486361357e-06, "loss": 0.2861, "step": 13958 }, { "epoch": 0.6539092144095189, "grad_norm": 0.5583755819796345, "learning_rate": 3.930205940150892e-06, "loss": 0.2791, "step": 13959 }, { "epoch": 0.6539560593994472, "grad_norm": 0.5780639767494009, "learning_rate": 3.930050385709951e-06, "loss": 0.2919, "step": 13960 }, { "epoch": 0.6540029043893756, "grad_norm": 0.6113807083325472, "learning_rate": 3.929894823039429e-06, "loss": 0.2814, "step": 13961 }, { "epoch": 0.6540497493793039, "grad_norm": 0.6090573489271162, "learning_rate": 3.929739252140222e-06, "loss": 0.2973, "step": 13962 }, { "epoch": 0.6540965943692322, "grad_norm": 0.589276303042313, "learning_rate": 3.9295836730132256e-06, "loss": 0.2869, "step": 13963 }, { "epoch": 0.6541434393591605, "grad_norm": 0.5677390301850728, "learning_rate": 3.929428085659335e-06, "loss": 0.2573, "step": 13964 }, { "epoch": 0.6541902843490889, "grad_norm": 0.6635886714472546, "learning_rate": 3.929272490079445e-06, "loss": 0.2846, "step": 13965 }, { "epoch": 0.6542371293390172, "grad_norm": 0.5998249182473453, "learning_rate": 3.929116886274449e-06, "loss": 0.2829, "step": 13966 }, { "epoch": 0.6542839743289455, "grad_norm": 0.5416718767638348, "learning_rate": 3.9289612742452464e-06, "loss": 0.2804, "step": 13967 }, { "epoch": 0.6543308193188738, "grad_norm": 0.6209598669331534, "learning_rate": 3.92880565399273e-06, "loss": 0.3003, "step": 13968 }, { "epoch": 0.6543776643088022, "grad_norm": 0.6247094456875152, "learning_rate": 3.928650025517796e-06, "loss": 0.2957, "step": 13969 }, { "epoch": 0.6544245092987305, "grad_norm": 0.5198925052380673, "learning_rate": 3.928494388821341e-06, "loss": 0.2615, "step": 13970 }, { "epoch": 0.6544713542886589, "grad_norm": 0.5892686564718328, "learning_rate": 3.92833874390426e-06, "loss": 0.2825, "step": 13971 }, { "epoch": 0.6545181992785871, "grad_norm": 0.6137904339679672, "learning_rate": 3.928183090767448e-06, "loss": 0.2818, "step": 13972 }, { "epoch": 0.6545650442685155, "grad_norm": 0.6981163651871714, "learning_rate": 3.9280274294118e-06, "loss": 0.311, "step": 13973 }, { "epoch": 0.6546118892584438, "grad_norm": 0.6106048832589119, "learning_rate": 3.927871759838215e-06, "loss": 0.2958, "step": 13974 }, { "epoch": 0.6546587342483722, "grad_norm": 0.5806150946297866, "learning_rate": 3.927716082047586e-06, "loss": 0.295, "step": 13975 }, { "epoch": 0.6547055792383004, "grad_norm": 0.619047401506748, "learning_rate": 3.92756039604081e-06, "loss": 0.3017, "step": 13976 }, { "epoch": 0.6547524242282288, "grad_norm": 0.6112622830849266, "learning_rate": 3.9274047018187834e-06, "loss": 0.2802, "step": 13977 }, { "epoch": 0.6547992692181571, "grad_norm": 0.5442830902542837, "learning_rate": 3.9272489993824005e-06, "loss": 0.2718, "step": 13978 }, { "epoch": 0.6548461142080855, "grad_norm": 0.6594762749220444, "learning_rate": 3.927093288732558e-06, "loss": 0.2642, "step": 13979 }, { "epoch": 0.6548929591980138, "grad_norm": 0.5695242513580763, "learning_rate": 3.926937569870154e-06, "loss": 0.2884, "step": 13980 }, { "epoch": 0.6549398041879421, "grad_norm": 0.6064530246556012, "learning_rate": 3.926781842796082e-06, "loss": 0.303, "step": 13981 }, { "epoch": 0.6549866491778704, "grad_norm": 0.6055919398575921, "learning_rate": 3.926626107511239e-06, "loss": 0.2772, "step": 13982 }, { "epoch": 0.6550334941677988, "grad_norm": 0.537440715218361, "learning_rate": 3.926470364016521e-06, "loss": 0.2434, "step": 13983 }, { "epoch": 0.6550803391577271, "grad_norm": 0.6117422599325979, "learning_rate": 3.926314612312825e-06, "loss": 0.2896, "step": 13984 }, { "epoch": 0.6551271841476554, "grad_norm": 0.6142227397160092, "learning_rate": 3.926158852401046e-06, "loss": 0.2987, "step": 13985 }, { "epoch": 0.6551740291375837, "grad_norm": 0.5619759678343161, "learning_rate": 3.926003084282083e-06, "loss": 0.284, "step": 13986 }, { "epoch": 0.6552208741275121, "grad_norm": 0.6437705154430073, "learning_rate": 3.925847307956829e-06, "loss": 0.2749, "step": 13987 }, { "epoch": 0.6552677191174404, "grad_norm": 0.5633079336509897, "learning_rate": 3.925691523426183e-06, "loss": 0.2818, "step": 13988 }, { "epoch": 0.6553145641073688, "grad_norm": 0.5615742294473638, "learning_rate": 3.9255357306910406e-06, "loss": 0.2859, "step": 13989 }, { "epoch": 0.655361409097297, "grad_norm": 0.5741163831654491, "learning_rate": 3.9253799297522975e-06, "loss": 0.2846, "step": 13990 }, { "epoch": 0.6554082540872254, "grad_norm": 0.6125187858598028, "learning_rate": 3.925224120610853e-06, "loss": 0.313, "step": 13991 }, { "epoch": 0.6554550990771537, "grad_norm": 0.5642792117680856, "learning_rate": 3.9250683032676e-06, "loss": 0.2873, "step": 13992 }, { "epoch": 0.6555019440670821, "grad_norm": 0.5439960823826883, "learning_rate": 3.924912477723438e-06, "loss": 0.2663, "step": 13993 }, { "epoch": 0.6555487890570103, "grad_norm": 0.6472762977958093, "learning_rate": 3.924756643979263e-06, "loss": 0.2813, "step": 13994 }, { "epoch": 0.6555956340469387, "grad_norm": 0.5673381153170958, "learning_rate": 3.924600802035972e-06, "loss": 0.275, "step": 13995 }, { "epoch": 0.655642479036867, "grad_norm": 0.5696560184650036, "learning_rate": 3.92444495189446e-06, "loss": 0.2621, "step": 13996 }, { "epoch": 0.6556893240267954, "grad_norm": 0.6274737291442514, "learning_rate": 3.9242890935556265e-06, "loss": 0.2745, "step": 13997 }, { "epoch": 0.6557361690167237, "grad_norm": 0.5985207944263096, "learning_rate": 3.924133227020367e-06, "loss": 0.2855, "step": 13998 }, { "epoch": 0.655783014006652, "grad_norm": 0.5947213128778716, "learning_rate": 3.923977352289578e-06, "loss": 0.2955, "step": 13999 }, { "epoch": 0.6558298589965803, "grad_norm": 0.5848490752711988, "learning_rate": 3.923821469364158e-06, "loss": 0.283, "step": 14000 }, { "epoch": 0.6558767039865087, "grad_norm": 0.605055042553698, "learning_rate": 3.923665578245003e-06, "loss": 0.2928, "step": 14001 }, { "epoch": 0.655923548976437, "grad_norm": 0.5899117537973563, "learning_rate": 3.9235096789330106e-06, "loss": 0.2843, "step": 14002 }, { "epoch": 0.6559703939663653, "grad_norm": 0.6093247491435807, "learning_rate": 3.923353771429078e-06, "loss": 0.2899, "step": 14003 }, { "epoch": 0.6560172389562936, "grad_norm": 0.6062750064999788, "learning_rate": 3.923197855734102e-06, "loss": 0.2844, "step": 14004 }, { "epoch": 0.656064083946222, "grad_norm": 0.612309752360483, "learning_rate": 3.9230419318489796e-06, "loss": 0.2986, "step": 14005 }, { "epoch": 0.6561109289361503, "grad_norm": 0.5611697041901319, "learning_rate": 3.922885999774608e-06, "loss": 0.2685, "step": 14006 }, { "epoch": 0.6561577739260787, "grad_norm": 0.5567042530331947, "learning_rate": 3.922730059511887e-06, "loss": 0.2737, "step": 14007 }, { "epoch": 0.6562046189160069, "grad_norm": 0.6264469166520117, "learning_rate": 3.922574111061711e-06, "loss": 0.2851, "step": 14008 }, { "epoch": 0.6562514639059353, "grad_norm": 0.5580113395524092, "learning_rate": 3.922418154424979e-06, "loss": 0.2781, "step": 14009 }, { "epoch": 0.6562983088958636, "grad_norm": 0.6356902473593184, "learning_rate": 3.922262189602587e-06, "loss": 0.3027, "step": 14010 }, { "epoch": 0.656345153885792, "grad_norm": 0.610922048369862, "learning_rate": 3.922106216595434e-06, "loss": 0.2854, "step": 14011 }, { "epoch": 0.6563919988757202, "grad_norm": 0.5970149054156749, "learning_rate": 3.9219502354044175e-06, "loss": 0.2681, "step": 14012 }, { "epoch": 0.6564388438656485, "grad_norm": 0.5984585213164686, "learning_rate": 3.921794246030435e-06, "loss": 0.3015, "step": 14013 }, { "epoch": 0.6564856888555769, "grad_norm": 0.5850452244968836, "learning_rate": 3.921638248474384e-06, "loss": 0.2824, "step": 14014 }, { "epoch": 0.6565325338455053, "grad_norm": 0.5739447895907583, "learning_rate": 3.921482242737161e-06, "loss": 0.2814, "step": 14015 }, { "epoch": 0.6565793788354336, "grad_norm": 0.5880605419543828, "learning_rate": 3.921326228819666e-06, "loss": 0.2788, "step": 14016 }, { "epoch": 0.6566262238253618, "grad_norm": 0.5642470180913884, "learning_rate": 3.9211702067227965e-06, "loss": 0.2926, "step": 14017 }, { "epoch": 0.6566730688152902, "grad_norm": 0.5682184782197293, "learning_rate": 3.9210141764474485e-06, "loss": 0.2688, "step": 14018 }, { "epoch": 0.6567199138052185, "grad_norm": 0.566048666915977, "learning_rate": 3.920858137994521e-06, "loss": 0.2849, "step": 14019 }, { "epoch": 0.6567667587951469, "grad_norm": 0.6729474582856297, "learning_rate": 3.920702091364913e-06, "loss": 0.2864, "step": 14020 }, { "epoch": 0.6568136037850751, "grad_norm": 0.5756812200651987, "learning_rate": 3.920546036559521e-06, "loss": 0.2858, "step": 14021 }, { "epoch": 0.6568604487750035, "grad_norm": 0.5803595396390439, "learning_rate": 3.920389973579243e-06, "loss": 0.272, "step": 14022 }, { "epoch": 0.6569072937649318, "grad_norm": 0.5928606808503761, "learning_rate": 3.920233902424978e-06, "loss": 0.2801, "step": 14023 }, { "epoch": 0.6569541387548602, "grad_norm": 0.6029239598683622, "learning_rate": 3.9200778230976244e-06, "loss": 0.2795, "step": 14024 }, { "epoch": 0.6570009837447885, "grad_norm": 0.6186676192201166, "learning_rate": 3.91992173559808e-06, "loss": 0.2918, "step": 14025 }, { "epoch": 0.6570478287347168, "grad_norm": 0.6262930549033001, "learning_rate": 3.9197656399272424e-06, "loss": 0.292, "step": 14026 }, { "epoch": 0.6570946737246451, "grad_norm": 0.6140565361410617, "learning_rate": 3.91960953608601e-06, "loss": 0.3134, "step": 14027 }, { "epoch": 0.6571415187145735, "grad_norm": 0.6232142680756925, "learning_rate": 3.919453424075282e-06, "loss": 0.2968, "step": 14028 }, { "epoch": 0.6571883637045018, "grad_norm": 0.6016922405337564, "learning_rate": 3.919297303895956e-06, "loss": 0.2925, "step": 14029 }, { "epoch": 0.6572352086944301, "grad_norm": 0.5794096940407449, "learning_rate": 3.919141175548931e-06, "loss": 0.2963, "step": 14030 }, { "epoch": 0.6572820536843584, "grad_norm": 0.5786829265731723, "learning_rate": 3.918985039035106e-06, "loss": 0.2864, "step": 14031 }, { "epoch": 0.6573288986742868, "grad_norm": 0.5883765851955337, "learning_rate": 3.918828894355377e-06, "loss": 0.2829, "step": 14032 }, { "epoch": 0.6573757436642151, "grad_norm": 0.5868693305435437, "learning_rate": 3.918672741510645e-06, "loss": 0.3002, "step": 14033 }, { "epoch": 0.6574225886541435, "grad_norm": 0.5289824853839172, "learning_rate": 3.918516580501809e-06, "loss": 0.2545, "step": 14034 }, { "epoch": 0.6574694336440717, "grad_norm": 0.5941081070888575, "learning_rate": 3.918360411329766e-06, "loss": 0.2852, "step": 14035 }, { "epoch": 0.6575162786340001, "grad_norm": 0.5951258649418519, "learning_rate": 3.918204233995415e-06, "loss": 0.2892, "step": 14036 }, { "epoch": 0.6575631236239284, "grad_norm": 0.589027549777887, "learning_rate": 3.918048048499656e-06, "loss": 0.309, "step": 14037 }, { "epoch": 0.6576099686138568, "grad_norm": 0.6548184579641746, "learning_rate": 3.917891854843385e-06, "loss": 0.3028, "step": 14038 }, { "epoch": 0.657656813603785, "grad_norm": 0.588082590131201, "learning_rate": 3.917735653027503e-06, "loss": 0.2696, "step": 14039 }, { "epoch": 0.6577036585937134, "grad_norm": 0.5923923090547313, "learning_rate": 3.91757944305291e-06, "loss": 0.2935, "step": 14040 }, { "epoch": 0.6577505035836417, "grad_norm": 0.5590593609233473, "learning_rate": 3.917423224920503e-06, "loss": 0.2673, "step": 14041 }, { "epoch": 0.6577973485735701, "grad_norm": 0.5661372401115805, "learning_rate": 3.917266998631182e-06, "loss": 0.2799, "step": 14042 }, { "epoch": 0.6578441935634984, "grad_norm": 0.5739423543187341, "learning_rate": 3.9171107641858465e-06, "loss": 0.2772, "step": 14043 }, { "epoch": 0.6578910385534267, "grad_norm": 0.5964922227765695, "learning_rate": 3.916954521585393e-06, "loss": 0.2865, "step": 14044 }, { "epoch": 0.657937883543355, "grad_norm": 0.5320727481609755, "learning_rate": 3.916798270830723e-06, "loss": 0.2737, "step": 14045 }, { "epoch": 0.6579847285332834, "grad_norm": 0.5983059840492903, "learning_rate": 3.916642011922736e-06, "loss": 0.28, "step": 14046 }, { "epoch": 0.6580315735232117, "grad_norm": 0.5844144176753105, "learning_rate": 3.916485744862328e-06, "loss": 0.2971, "step": 14047 }, { "epoch": 0.65807841851314, "grad_norm": 0.5849605374265348, "learning_rate": 3.916329469650402e-06, "loss": 0.2983, "step": 14048 }, { "epoch": 0.6581252635030683, "grad_norm": 0.6192004616572052, "learning_rate": 3.916173186287856e-06, "loss": 0.2903, "step": 14049 }, { "epoch": 0.6581721084929967, "grad_norm": 0.5843266190967396, "learning_rate": 3.916016894775589e-06, "loss": 0.2859, "step": 14050 }, { "epoch": 0.658218953482925, "grad_norm": 0.5634344147919448, "learning_rate": 3.9158605951145e-06, "loss": 0.2655, "step": 14051 }, { "epoch": 0.6582657984728534, "grad_norm": 0.6295585463586322, "learning_rate": 3.9157042873054905e-06, "loss": 0.2836, "step": 14052 }, { "epoch": 0.6583126434627816, "grad_norm": 0.5938838061249135, "learning_rate": 3.915547971349458e-06, "loss": 0.2712, "step": 14053 }, { "epoch": 0.65835948845271, "grad_norm": 0.5390892829169223, "learning_rate": 3.915391647247303e-06, "loss": 0.2667, "step": 14054 }, { "epoch": 0.6584063334426383, "grad_norm": 0.5536726564557587, "learning_rate": 3.915235314999924e-06, "loss": 0.2788, "step": 14055 }, { "epoch": 0.6584531784325667, "grad_norm": 0.5687766830445463, "learning_rate": 3.9150789746082215e-06, "loss": 0.2831, "step": 14056 }, { "epoch": 0.6585000234224949, "grad_norm": 0.5663546226497004, "learning_rate": 3.9149226260730965e-06, "loss": 0.2742, "step": 14057 }, { "epoch": 0.6585468684124233, "grad_norm": 0.6774376451078687, "learning_rate": 3.914766269395446e-06, "loss": 0.3033, "step": 14058 }, { "epoch": 0.6585937134023516, "grad_norm": 0.6060699856888542, "learning_rate": 3.914609904576172e-06, "loss": 0.2742, "step": 14059 }, { "epoch": 0.65864055839228, "grad_norm": 0.5685402432995371, "learning_rate": 3.914453531616173e-06, "loss": 0.2758, "step": 14060 }, { "epoch": 0.6586874033822083, "grad_norm": 0.5542792088103725, "learning_rate": 3.91429715051635e-06, "loss": 0.2787, "step": 14061 }, { "epoch": 0.6587342483721366, "grad_norm": 0.5701991839590503, "learning_rate": 3.914140761277603e-06, "loss": 0.2691, "step": 14062 }, { "epoch": 0.6587810933620649, "grad_norm": 0.611598151996131, "learning_rate": 3.91398436390083e-06, "loss": 0.2924, "step": 14063 }, { "epoch": 0.6588279383519933, "grad_norm": 0.642520094061918, "learning_rate": 3.913827958386933e-06, "loss": 0.2874, "step": 14064 }, { "epoch": 0.6588747833419216, "grad_norm": 0.5915639394684162, "learning_rate": 3.913671544736811e-06, "loss": 0.276, "step": 14065 }, { "epoch": 0.6589216283318499, "grad_norm": 0.5768552742954225, "learning_rate": 3.913515122951365e-06, "loss": 0.2705, "step": 14066 }, { "epoch": 0.6589684733217782, "grad_norm": 0.6309660902465963, "learning_rate": 3.913358693031494e-06, "loss": 0.2948, "step": 14067 }, { "epoch": 0.6590153183117066, "grad_norm": 0.6034534915042664, "learning_rate": 3.9132022549781e-06, "loss": 0.285, "step": 14068 }, { "epoch": 0.6590621633016349, "grad_norm": 0.5702351219917215, "learning_rate": 3.913045808792082e-06, "loss": 0.2941, "step": 14069 }, { "epoch": 0.6591090082915633, "grad_norm": 0.5583889237858464, "learning_rate": 3.9128893544743405e-06, "loss": 0.2517, "step": 14070 }, { "epoch": 0.6591558532814915, "grad_norm": 0.5807640622517479, "learning_rate": 3.912732892025775e-06, "loss": 0.2708, "step": 14071 }, { "epoch": 0.6592026982714199, "grad_norm": 0.6171648982002026, "learning_rate": 3.912576421447287e-06, "loss": 0.2802, "step": 14072 }, { "epoch": 0.6592495432613482, "grad_norm": 0.6416756286493258, "learning_rate": 3.912419942739778e-06, "loss": 0.2694, "step": 14073 }, { "epoch": 0.6592963882512766, "grad_norm": 0.6387518997001614, "learning_rate": 3.912263455904146e-06, "loss": 0.3026, "step": 14074 }, { "epoch": 0.6593432332412048, "grad_norm": 0.6322096447889847, "learning_rate": 3.912106960941293e-06, "loss": 0.2703, "step": 14075 }, { "epoch": 0.6593900782311332, "grad_norm": 0.644451827475426, "learning_rate": 3.9119504578521195e-06, "loss": 0.292, "step": 14076 }, { "epoch": 0.6594369232210615, "grad_norm": 0.6575966253833213, "learning_rate": 3.911793946637526e-06, "loss": 0.3007, "step": 14077 }, { "epoch": 0.6594837682109899, "grad_norm": 0.5994360615059143, "learning_rate": 3.911637427298413e-06, "loss": 0.2772, "step": 14078 }, { "epoch": 0.6595306132009182, "grad_norm": 0.5571694237344315, "learning_rate": 3.911480899835683e-06, "loss": 0.2716, "step": 14079 }, { "epoch": 0.6595774581908465, "grad_norm": 0.6130390168382159, "learning_rate": 3.911324364250233e-06, "loss": 0.2795, "step": 14080 }, { "epoch": 0.6596243031807748, "grad_norm": 0.576954215851852, "learning_rate": 3.911167820542967e-06, "loss": 0.2854, "step": 14081 }, { "epoch": 0.6596711481707032, "grad_norm": 0.5586698918869536, "learning_rate": 3.911011268714784e-06, "loss": 0.2794, "step": 14082 }, { "epoch": 0.6597179931606315, "grad_norm": 0.5745114656340491, "learning_rate": 3.910854708766587e-06, "loss": 0.273, "step": 14083 }, { "epoch": 0.6597648381505598, "grad_norm": 0.5386573885977446, "learning_rate": 3.910698140699275e-06, "loss": 0.2712, "step": 14084 }, { "epoch": 0.6598116831404881, "grad_norm": 0.6118185637036756, "learning_rate": 3.91054156451375e-06, "loss": 0.2942, "step": 14085 }, { "epoch": 0.6598585281304165, "grad_norm": 0.5900382436437674, "learning_rate": 3.910384980210913e-06, "loss": 0.2771, "step": 14086 }, { "epoch": 0.6599053731203448, "grad_norm": 0.5700740154128305, "learning_rate": 3.910228387791665e-06, "loss": 0.2805, "step": 14087 }, { "epoch": 0.6599522181102732, "grad_norm": 0.5681472936761012, "learning_rate": 3.910071787256906e-06, "loss": 0.2914, "step": 14088 }, { "epoch": 0.6599990631002014, "grad_norm": 0.5984774256076721, "learning_rate": 3.90991517860754e-06, "loss": 0.2767, "step": 14089 }, { "epoch": 0.6600459080901298, "grad_norm": 0.5963018630716577, "learning_rate": 3.909758561844466e-06, "loss": 0.272, "step": 14090 }, { "epoch": 0.6600927530800581, "grad_norm": 0.5872222712518347, "learning_rate": 3.909601936968585e-06, "loss": 0.3023, "step": 14091 }, { "epoch": 0.6601395980699865, "grad_norm": 0.6795404764776775, "learning_rate": 3.9094453039808e-06, "loss": 0.3025, "step": 14092 }, { "epoch": 0.6601864430599147, "grad_norm": 0.5882416742992336, "learning_rate": 3.90928866288201e-06, "loss": 0.2916, "step": 14093 }, { "epoch": 0.660233288049843, "grad_norm": 0.628084006496596, "learning_rate": 3.909132013673119e-06, "loss": 0.3064, "step": 14094 }, { "epoch": 0.6602801330397714, "grad_norm": 0.6310838634319286, "learning_rate": 3.908975356355028e-06, "loss": 0.2835, "step": 14095 }, { "epoch": 0.6603269780296998, "grad_norm": 0.6582779637868054, "learning_rate": 3.908818690928637e-06, "loss": 0.2863, "step": 14096 }, { "epoch": 0.6603738230196281, "grad_norm": 0.6334022946249758, "learning_rate": 3.90866201739485e-06, "loss": 0.3019, "step": 14097 }, { "epoch": 0.6604206680095563, "grad_norm": 0.5887483938871122, "learning_rate": 3.908505335754565e-06, "loss": 0.284, "step": 14098 }, { "epoch": 0.6604675129994847, "grad_norm": 0.5810135817843369, "learning_rate": 3.908348646008688e-06, "loss": 0.2955, "step": 14099 }, { "epoch": 0.660514357989413, "grad_norm": 0.5769394878752389, "learning_rate": 3.908191948158117e-06, "loss": 0.2743, "step": 14100 }, { "epoch": 0.6605612029793414, "grad_norm": 0.6391627356924849, "learning_rate": 3.908035242203756e-06, "loss": 0.3102, "step": 14101 }, { "epoch": 0.6606080479692696, "grad_norm": 0.5760471318328745, "learning_rate": 3.907878528146506e-06, "loss": 0.2784, "step": 14102 }, { "epoch": 0.660654892959198, "grad_norm": 0.5810203789687907, "learning_rate": 3.907721805987268e-06, "loss": 0.2766, "step": 14103 }, { "epoch": 0.6607017379491263, "grad_norm": 0.653252099206587, "learning_rate": 3.907565075726946e-06, "loss": 0.2836, "step": 14104 }, { "epoch": 0.6607485829390547, "grad_norm": 0.5996134264826878, "learning_rate": 3.90740833736644e-06, "loss": 0.282, "step": 14105 }, { "epoch": 0.660795427928983, "grad_norm": 0.5816611208777801, "learning_rate": 3.907251590906654e-06, "loss": 0.2852, "step": 14106 }, { "epoch": 0.6608422729189113, "grad_norm": 0.6163131147541024, "learning_rate": 3.907094836348488e-06, "loss": 0.2746, "step": 14107 }, { "epoch": 0.6608891179088396, "grad_norm": 0.5732860004082639, "learning_rate": 3.906938073692844e-06, "loss": 0.2656, "step": 14108 }, { "epoch": 0.660935962898768, "grad_norm": 0.5891948060312577, "learning_rate": 3.906781302940626e-06, "loss": 0.2784, "step": 14109 }, { "epoch": 0.6609828078886963, "grad_norm": 0.6001046889576509, "learning_rate": 3.9066245240927345e-06, "loss": 0.2801, "step": 14110 }, { "epoch": 0.6610296528786246, "grad_norm": 0.6544887900290527, "learning_rate": 3.906467737150072e-06, "loss": 0.3025, "step": 14111 }, { "epoch": 0.6610764978685529, "grad_norm": 0.5775461649995681, "learning_rate": 3.9063109421135425e-06, "loss": 0.2433, "step": 14112 }, { "epoch": 0.6611233428584813, "grad_norm": 0.5660675416179616, "learning_rate": 3.906154138984047e-06, "loss": 0.2764, "step": 14113 }, { "epoch": 0.6611701878484096, "grad_norm": 0.5962718748553801, "learning_rate": 3.905997327762488e-06, "loss": 0.279, "step": 14114 }, { "epoch": 0.661217032838338, "grad_norm": 0.5800475649452856, "learning_rate": 3.905840508449766e-06, "loss": 0.2689, "step": 14115 }, { "epoch": 0.6612638778282662, "grad_norm": 0.6068712052765529, "learning_rate": 3.905683681046787e-06, "loss": 0.2879, "step": 14116 }, { "epoch": 0.6613107228181946, "grad_norm": 0.5940793772308243, "learning_rate": 3.905526845554451e-06, "loss": 0.2874, "step": 14117 }, { "epoch": 0.6613575678081229, "grad_norm": 0.5691365546789333, "learning_rate": 3.905370001973661e-06, "loss": 0.2826, "step": 14118 }, { "epoch": 0.6614044127980513, "grad_norm": 0.6106047287027948, "learning_rate": 3.905213150305321e-06, "loss": 0.3084, "step": 14119 }, { "epoch": 0.6614512577879795, "grad_norm": 0.5480992959234281, "learning_rate": 3.905056290550331e-06, "loss": 0.2734, "step": 14120 }, { "epoch": 0.6614981027779079, "grad_norm": 0.6387588384845988, "learning_rate": 3.904899422709596e-06, "loss": 0.2802, "step": 14121 }, { "epoch": 0.6615449477678362, "grad_norm": 0.6111713984308286, "learning_rate": 3.904742546784018e-06, "loss": 0.2744, "step": 14122 }, { "epoch": 0.6615917927577646, "grad_norm": 0.5458081949739988, "learning_rate": 3.9045856627744995e-06, "loss": 0.2609, "step": 14123 }, { "epoch": 0.6616386377476929, "grad_norm": 0.6355732457757863, "learning_rate": 3.904428770681943e-06, "loss": 0.3004, "step": 14124 }, { "epoch": 0.6616854827376212, "grad_norm": 0.5644327491140788, "learning_rate": 3.904271870507253e-06, "loss": 0.2804, "step": 14125 }, { "epoch": 0.6617323277275495, "grad_norm": 0.5826513754918556, "learning_rate": 3.904114962251331e-06, "loss": 0.2693, "step": 14126 }, { "epoch": 0.6617791727174779, "grad_norm": 0.5969689027018293, "learning_rate": 3.90395804591508e-06, "loss": 0.2728, "step": 14127 }, { "epoch": 0.6618260177074062, "grad_norm": 0.6531592880285442, "learning_rate": 3.903801121499403e-06, "loss": 0.2947, "step": 14128 }, { "epoch": 0.6618728626973345, "grad_norm": 0.6559966125530919, "learning_rate": 3.9036441890052045e-06, "loss": 0.3006, "step": 14129 }, { "epoch": 0.6619197076872628, "grad_norm": 0.6029242093235346, "learning_rate": 3.9034872484333856e-06, "loss": 0.2785, "step": 14130 }, { "epoch": 0.6619665526771912, "grad_norm": 0.5688122674577583, "learning_rate": 3.903330299784851e-06, "loss": 0.2525, "step": 14131 }, { "epoch": 0.6620133976671195, "grad_norm": 0.6349932998476678, "learning_rate": 3.9031733430605024e-06, "loss": 0.2878, "step": 14132 }, { "epoch": 0.6620602426570479, "grad_norm": 0.6068326412859278, "learning_rate": 3.903016378261244e-06, "loss": 0.2613, "step": 14133 }, { "epoch": 0.6621070876469761, "grad_norm": 0.568467520566826, "learning_rate": 3.90285940538798e-06, "loss": 0.2859, "step": 14134 }, { "epoch": 0.6621539326369045, "grad_norm": 0.5804384614130841, "learning_rate": 3.902702424441611e-06, "loss": 0.2761, "step": 14135 }, { "epoch": 0.6622007776268328, "grad_norm": 0.6061214320327103, "learning_rate": 3.902545435423043e-06, "loss": 0.2836, "step": 14136 }, { "epoch": 0.6622476226167612, "grad_norm": 0.5969807663836106, "learning_rate": 3.902388438333179e-06, "loss": 0.2888, "step": 14137 }, { "epoch": 0.6622944676066894, "grad_norm": 0.5364942357228659, "learning_rate": 3.9022314331729214e-06, "loss": 0.2549, "step": 14138 }, { "epoch": 0.6623413125966178, "grad_norm": 0.5924946527573571, "learning_rate": 3.9020744199431745e-06, "loss": 0.279, "step": 14139 }, { "epoch": 0.6623881575865461, "grad_norm": 0.5561099424168445, "learning_rate": 3.9019173986448425e-06, "loss": 0.268, "step": 14140 }, { "epoch": 0.6624350025764745, "grad_norm": 0.582995530605209, "learning_rate": 3.901760369278827e-06, "loss": 0.2747, "step": 14141 }, { "epoch": 0.6624818475664028, "grad_norm": 0.5922886165420888, "learning_rate": 3.901603331846033e-06, "loss": 0.2857, "step": 14142 }, { "epoch": 0.6625286925563311, "grad_norm": 0.6060051801711552, "learning_rate": 3.901446286347365e-06, "loss": 0.2897, "step": 14143 }, { "epoch": 0.6625755375462594, "grad_norm": 0.5441481212706017, "learning_rate": 3.901289232783725e-06, "loss": 0.2686, "step": 14144 }, { "epoch": 0.6626223825361878, "grad_norm": 0.6140560346266752, "learning_rate": 3.901132171156018e-06, "loss": 0.289, "step": 14145 }, { "epoch": 0.6626692275261161, "grad_norm": 0.6294684327293256, "learning_rate": 3.900975101465148e-06, "loss": 0.2914, "step": 14146 }, { "epoch": 0.6627160725160444, "grad_norm": 0.5909182244865427, "learning_rate": 3.900818023712018e-06, "loss": 0.3, "step": 14147 }, { "epoch": 0.6627629175059727, "grad_norm": 0.5407344974792009, "learning_rate": 3.900660937897532e-06, "loss": 0.2793, "step": 14148 }, { "epoch": 0.6628097624959011, "grad_norm": 0.5762861525032739, "learning_rate": 3.900503844022595e-06, "loss": 0.2898, "step": 14149 }, { "epoch": 0.6628566074858294, "grad_norm": 0.5804110102511786, "learning_rate": 3.90034674208811e-06, "loss": 0.2793, "step": 14150 }, { "epoch": 0.6629034524757578, "grad_norm": 0.593856960813908, "learning_rate": 3.900189632094982e-06, "loss": 0.291, "step": 14151 }, { "epoch": 0.662950297465686, "grad_norm": 0.5749498620198145, "learning_rate": 3.900032514044113e-06, "loss": 0.2823, "step": 14152 }, { "epoch": 0.6629971424556144, "grad_norm": 0.6406255292113933, "learning_rate": 3.8998753879364105e-06, "loss": 0.2839, "step": 14153 }, { "epoch": 0.6630439874455427, "grad_norm": 0.6177571742034292, "learning_rate": 3.899718253772776e-06, "loss": 0.2921, "step": 14154 }, { "epoch": 0.6630908324354711, "grad_norm": 0.6222462302247224, "learning_rate": 3.899561111554115e-06, "loss": 0.3086, "step": 14155 }, { "epoch": 0.6631376774253993, "grad_norm": 0.5947045868819015, "learning_rate": 3.899403961281332e-06, "loss": 0.2768, "step": 14156 }, { "epoch": 0.6631845224153277, "grad_norm": 0.6177804656146599, "learning_rate": 3.899246802955331e-06, "loss": 0.281, "step": 14157 }, { "epoch": 0.663231367405256, "grad_norm": 0.5888530230810475, "learning_rate": 3.8990896365770155e-06, "loss": 0.2808, "step": 14158 }, { "epoch": 0.6632782123951844, "grad_norm": 0.561659018326354, "learning_rate": 3.898932462147291e-06, "loss": 0.2649, "step": 14159 }, { "epoch": 0.6633250573851127, "grad_norm": 0.5759083817957871, "learning_rate": 3.898775279667063e-06, "loss": 0.2917, "step": 14160 }, { "epoch": 0.663371902375041, "grad_norm": 0.624418661540489, "learning_rate": 3.898618089137233e-06, "loss": 0.2876, "step": 14161 }, { "epoch": 0.6634187473649693, "grad_norm": 0.6351455595768837, "learning_rate": 3.898460890558708e-06, "loss": 0.3169, "step": 14162 }, { "epoch": 0.6634655923548977, "grad_norm": 0.563481708170388, "learning_rate": 3.898303683932393e-06, "loss": 0.2876, "step": 14163 }, { "epoch": 0.663512437344826, "grad_norm": 0.6208821707069666, "learning_rate": 3.898146469259191e-06, "loss": 0.2996, "step": 14164 }, { "epoch": 0.6635592823347543, "grad_norm": 0.5688794844959837, "learning_rate": 3.897989246540008e-06, "loss": 0.2691, "step": 14165 }, { "epoch": 0.6636061273246826, "grad_norm": 0.6554533750897794, "learning_rate": 3.8978320157757475e-06, "loss": 0.2771, "step": 14166 }, { "epoch": 0.663652972314611, "grad_norm": 0.5365040993991299, "learning_rate": 3.897674776967315e-06, "loss": 0.2477, "step": 14167 }, { "epoch": 0.6636998173045393, "grad_norm": 0.5774058864886685, "learning_rate": 3.897517530115617e-06, "loss": 0.2647, "step": 14168 }, { "epoch": 0.6637466622944677, "grad_norm": 0.6020723681461948, "learning_rate": 3.897360275221555e-06, "loss": 0.2889, "step": 14169 }, { "epoch": 0.6637935072843959, "grad_norm": 0.5672170885663885, "learning_rate": 3.897203012286036e-06, "loss": 0.2762, "step": 14170 }, { "epoch": 0.6638403522743243, "grad_norm": 0.5504213489445472, "learning_rate": 3.897045741309966e-06, "loss": 0.2801, "step": 14171 }, { "epoch": 0.6638871972642526, "grad_norm": 0.6304584420145325, "learning_rate": 3.896888462294248e-06, "loss": 0.2838, "step": 14172 }, { "epoch": 0.663934042254181, "grad_norm": 0.550380528137928, "learning_rate": 3.896731175239789e-06, "loss": 0.2639, "step": 14173 }, { "epoch": 0.6639808872441092, "grad_norm": 0.597470247284432, "learning_rate": 3.8965738801474915e-06, "loss": 0.2817, "step": 14174 }, { "epoch": 0.6640277322340375, "grad_norm": 0.6296940693339473, "learning_rate": 3.896416577018264e-06, "loss": 0.2896, "step": 14175 }, { "epoch": 0.6640745772239659, "grad_norm": 0.6107398992370384, "learning_rate": 3.896259265853009e-06, "loss": 0.2865, "step": 14176 }, { "epoch": 0.6641214222138943, "grad_norm": 0.5835113241934919, "learning_rate": 3.896101946652634e-06, "loss": 0.2554, "step": 14177 }, { "epoch": 0.6641682672038226, "grad_norm": 0.5683854596981323, "learning_rate": 3.895944619418042e-06, "loss": 0.2651, "step": 14178 }, { "epoch": 0.6642151121937508, "grad_norm": 0.5876590022371984, "learning_rate": 3.895787284150139e-06, "loss": 0.2905, "step": 14179 }, { "epoch": 0.6642619571836792, "grad_norm": 0.5780573755500663, "learning_rate": 3.895629940849833e-06, "loss": 0.2745, "step": 14180 }, { "epoch": 0.6643088021736075, "grad_norm": 0.6263602131549398, "learning_rate": 3.895472589518026e-06, "loss": 0.2921, "step": 14181 }, { "epoch": 0.6643556471635359, "grad_norm": 0.5923919461303689, "learning_rate": 3.8953152301556256e-06, "loss": 0.2728, "step": 14182 }, { "epoch": 0.6644024921534641, "grad_norm": 0.6320244218682313, "learning_rate": 3.895157862763537e-06, "loss": 0.2916, "step": 14183 }, { "epoch": 0.6644493371433925, "grad_norm": 0.5885339690340821, "learning_rate": 3.895000487342664e-06, "loss": 0.2937, "step": 14184 }, { "epoch": 0.6644961821333208, "grad_norm": 0.59981102660286, "learning_rate": 3.894843103893915e-06, "loss": 0.2948, "step": 14185 }, { "epoch": 0.6645430271232492, "grad_norm": 0.5820993404186098, "learning_rate": 3.8946857124181946e-06, "loss": 0.2743, "step": 14186 }, { "epoch": 0.6645898721131775, "grad_norm": 0.5977011196283613, "learning_rate": 3.894528312916409e-06, "loss": 0.2877, "step": 14187 }, { "epoch": 0.6646367171031058, "grad_norm": 0.6331931489055743, "learning_rate": 3.8943709053894625e-06, "loss": 0.2842, "step": 14188 }, { "epoch": 0.6646835620930341, "grad_norm": 0.5425915325917569, "learning_rate": 3.894213489838262e-06, "loss": 0.2667, "step": 14189 }, { "epoch": 0.6647304070829625, "grad_norm": 0.6087718496388875, "learning_rate": 3.894056066263714e-06, "loss": 0.2659, "step": 14190 }, { "epoch": 0.6647772520728908, "grad_norm": 0.5922542115728782, "learning_rate": 3.8938986346667225e-06, "loss": 0.2658, "step": 14191 }, { "epoch": 0.6648240970628191, "grad_norm": 0.6473361322678931, "learning_rate": 3.893741195048196e-06, "loss": 0.2958, "step": 14192 }, { "epoch": 0.6648709420527474, "grad_norm": 0.5825591868593831, "learning_rate": 3.893583747409039e-06, "loss": 0.2843, "step": 14193 }, { "epoch": 0.6649177870426758, "grad_norm": 0.615908517100428, "learning_rate": 3.8934262917501566e-06, "loss": 0.302, "step": 14194 }, { "epoch": 0.6649646320326041, "grad_norm": 0.6117882467654446, "learning_rate": 3.893268828072457e-06, "loss": 0.2838, "step": 14195 }, { "epoch": 0.6650114770225325, "grad_norm": 0.6016760036913346, "learning_rate": 3.893111356376845e-06, "loss": 0.2813, "step": 14196 }, { "epoch": 0.6650583220124607, "grad_norm": 0.5881332289955896, "learning_rate": 3.892953876664229e-06, "loss": 0.3081, "step": 14197 }, { "epoch": 0.6651051670023891, "grad_norm": 0.585685662284055, "learning_rate": 3.892796388935512e-06, "loss": 0.2874, "step": 14198 }, { "epoch": 0.6651520119923174, "grad_norm": 0.587435289042469, "learning_rate": 3.8926388931916016e-06, "loss": 0.2768, "step": 14199 }, { "epoch": 0.6651988569822458, "grad_norm": 0.5858032437817062, "learning_rate": 3.8924813894334045e-06, "loss": 0.2829, "step": 14200 }, { "epoch": 0.665245701972174, "grad_norm": 0.5899676129242584, "learning_rate": 3.892323877661828e-06, "loss": 0.2954, "step": 14201 }, { "epoch": 0.6652925469621024, "grad_norm": 0.6271963447159812, "learning_rate": 3.8921663578777764e-06, "loss": 0.2981, "step": 14202 }, { "epoch": 0.6653393919520307, "grad_norm": 0.580683704476539, "learning_rate": 3.892008830082158e-06, "loss": 0.2792, "step": 14203 }, { "epoch": 0.6653862369419591, "grad_norm": 0.6078029112958337, "learning_rate": 3.891851294275879e-06, "loss": 0.2802, "step": 14204 }, { "epoch": 0.6654330819318874, "grad_norm": 0.6220042335006922, "learning_rate": 3.891693750459845e-06, "loss": 0.3076, "step": 14205 }, { "epoch": 0.6654799269218157, "grad_norm": 0.5949692788573926, "learning_rate": 3.891536198634963e-06, "loss": 0.2844, "step": 14206 }, { "epoch": 0.665526771911744, "grad_norm": 0.6045523707151099, "learning_rate": 3.8913786388021406e-06, "loss": 0.303, "step": 14207 }, { "epoch": 0.6655736169016724, "grad_norm": 0.5753770390850098, "learning_rate": 3.891221070962283e-06, "loss": 0.2757, "step": 14208 }, { "epoch": 0.6656204618916007, "grad_norm": 0.5380035244233045, "learning_rate": 3.891063495116299e-06, "loss": 0.2728, "step": 14209 }, { "epoch": 0.665667306881529, "grad_norm": 0.6077607610084063, "learning_rate": 3.890905911265094e-06, "loss": 0.2778, "step": 14210 }, { "epoch": 0.6657141518714573, "grad_norm": 0.5859866080986361, "learning_rate": 3.890748319409574e-06, "loss": 0.2635, "step": 14211 }, { "epoch": 0.6657609968613857, "grad_norm": 0.5741615853253712, "learning_rate": 3.890590719550647e-06, "loss": 0.2791, "step": 14212 }, { "epoch": 0.665807841851314, "grad_norm": 0.6601582047338166, "learning_rate": 3.890433111689221e-06, "loss": 0.3108, "step": 14213 }, { "epoch": 0.6658546868412424, "grad_norm": 0.6092837155195104, "learning_rate": 3.8902754958262014e-06, "loss": 0.2974, "step": 14214 }, { "epoch": 0.6659015318311706, "grad_norm": 0.5950995180925538, "learning_rate": 3.890117871962496e-06, "loss": 0.2729, "step": 14215 }, { "epoch": 0.665948376821099, "grad_norm": 0.6053839801567807, "learning_rate": 3.889960240099011e-06, "loss": 0.3094, "step": 14216 }, { "epoch": 0.6659952218110273, "grad_norm": 0.5684053371267318, "learning_rate": 3.889802600236655e-06, "loss": 0.2836, "step": 14217 }, { "epoch": 0.6660420668009557, "grad_norm": 0.6391407975305136, "learning_rate": 3.889644952376334e-06, "loss": 0.3119, "step": 14218 }, { "epoch": 0.6660889117908839, "grad_norm": 0.5513007820113615, "learning_rate": 3.889487296518955e-06, "loss": 0.2657, "step": 14219 }, { "epoch": 0.6661357567808123, "grad_norm": 0.6144482156648169, "learning_rate": 3.8893296326654275e-06, "loss": 0.3068, "step": 14220 }, { "epoch": 0.6661826017707406, "grad_norm": 0.5059805315788595, "learning_rate": 3.889171960816656e-06, "loss": 0.2475, "step": 14221 }, { "epoch": 0.666229446760669, "grad_norm": 0.5759378993672962, "learning_rate": 3.889014280973549e-06, "loss": 0.2727, "step": 14222 }, { "epoch": 0.6662762917505973, "grad_norm": 0.6587055919314984, "learning_rate": 3.888856593137014e-06, "loss": 0.2987, "step": 14223 }, { "epoch": 0.6663231367405256, "grad_norm": 0.6897034383761862, "learning_rate": 3.888698897307958e-06, "loss": 0.3057, "step": 14224 }, { "epoch": 0.6663699817304539, "grad_norm": 0.5790811630758006, "learning_rate": 3.888541193487289e-06, "loss": 0.293, "step": 14225 }, { "epoch": 0.6664168267203823, "grad_norm": 0.6586627447904186, "learning_rate": 3.888383481675915e-06, "loss": 0.2976, "step": 14226 }, { "epoch": 0.6664636717103106, "grad_norm": 0.6266245892757124, "learning_rate": 3.888225761874743e-06, "loss": 0.2687, "step": 14227 }, { "epoch": 0.6665105167002389, "grad_norm": 0.5948171874825768, "learning_rate": 3.8880680340846795e-06, "loss": 0.3047, "step": 14228 }, { "epoch": 0.6665573616901672, "grad_norm": 0.6120616054914926, "learning_rate": 3.887910298306636e-06, "loss": 0.2718, "step": 14229 }, { "epoch": 0.6666042066800956, "grad_norm": 0.6242350366370916, "learning_rate": 3.887752554541515e-06, "loss": 0.2908, "step": 14230 }, { "epoch": 0.6666510516700239, "grad_norm": 0.5772411395066191, "learning_rate": 3.887594802790227e-06, "loss": 0.2679, "step": 14231 }, { "epoch": 0.6666978966599523, "grad_norm": 0.61556044244119, "learning_rate": 3.887437043053681e-06, "loss": 0.2887, "step": 14232 }, { "epoch": 0.6667447416498805, "grad_norm": 0.6484235623879444, "learning_rate": 3.887279275332783e-06, "loss": 0.3119, "step": 14233 }, { "epoch": 0.6667915866398089, "grad_norm": 0.5601221907118831, "learning_rate": 3.88712149962844e-06, "loss": 0.2689, "step": 14234 }, { "epoch": 0.6668384316297372, "grad_norm": 0.5857115345608231, "learning_rate": 3.886963715941563e-06, "loss": 0.2626, "step": 14235 }, { "epoch": 0.6668852766196656, "grad_norm": 0.5753839377749641, "learning_rate": 3.8868059242730585e-06, "loss": 0.2876, "step": 14236 }, { "epoch": 0.6669321216095938, "grad_norm": 0.5759880029515414, "learning_rate": 3.8866481246238345e-06, "loss": 0.2881, "step": 14237 }, { "epoch": 0.6669789665995222, "grad_norm": 0.6039203777592201, "learning_rate": 3.886490316994798e-06, "loss": 0.2776, "step": 14238 }, { "epoch": 0.6670258115894505, "grad_norm": 0.6669100044709235, "learning_rate": 3.886332501386859e-06, "loss": 0.3008, "step": 14239 }, { "epoch": 0.6670726565793789, "grad_norm": 0.5887905762862758, "learning_rate": 3.886174677800924e-06, "loss": 0.2669, "step": 14240 }, { "epoch": 0.6671195015693072, "grad_norm": 0.5769860832986029, "learning_rate": 3.886016846237904e-06, "loss": 0.2815, "step": 14241 }, { "epoch": 0.6671663465592355, "grad_norm": 0.573895470301137, "learning_rate": 3.885859006698703e-06, "loss": 0.295, "step": 14242 }, { "epoch": 0.6672131915491638, "grad_norm": 0.5785872320419778, "learning_rate": 3.8857011591842335e-06, "loss": 0.2873, "step": 14243 }, { "epoch": 0.6672600365390922, "grad_norm": 0.6129879007935716, "learning_rate": 3.885543303695402e-06, "loss": 0.2805, "step": 14244 }, { "epoch": 0.6673068815290205, "grad_norm": 0.5605000263314596, "learning_rate": 3.885385440233117e-06, "loss": 0.266, "step": 14245 }, { "epoch": 0.6673537265189488, "grad_norm": 0.5924164144897874, "learning_rate": 3.885227568798287e-06, "loss": 0.2637, "step": 14246 }, { "epoch": 0.6674005715088771, "grad_norm": 0.6187402318887191, "learning_rate": 3.88506968939182e-06, "loss": 0.2935, "step": 14247 }, { "epoch": 0.6674474164988055, "grad_norm": 0.5888023954103839, "learning_rate": 3.884911802014625e-06, "loss": 0.2876, "step": 14248 }, { "epoch": 0.6674942614887338, "grad_norm": 0.6822322508038832, "learning_rate": 3.88475390666761e-06, "loss": 0.3048, "step": 14249 }, { "epoch": 0.6675411064786622, "grad_norm": 0.5766084567740378, "learning_rate": 3.8845960033516864e-06, "loss": 0.263, "step": 14250 }, { "epoch": 0.6675879514685904, "grad_norm": 0.6049575457107492, "learning_rate": 3.8844380920677585e-06, "loss": 0.2914, "step": 14251 }, { "epoch": 0.6676347964585188, "grad_norm": 0.5583288197547249, "learning_rate": 3.884280172816739e-06, "loss": 0.2766, "step": 14252 }, { "epoch": 0.6676816414484471, "grad_norm": 0.6191536598939176, "learning_rate": 3.884122245599534e-06, "loss": 0.2808, "step": 14253 }, { "epoch": 0.6677284864383755, "grad_norm": 0.6089697575044982, "learning_rate": 3.883964310417054e-06, "loss": 0.2844, "step": 14254 }, { "epoch": 0.6677753314283037, "grad_norm": 0.5927915416949366, "learning_rate": 3.883806367270208e-06, "loss": 0.3049, "step": 14255 }, { "epoch": 0.667822176418232, "grad_norm": 0.585310030287585, "learning_rate": 3.883648416159903e-06, "loss": 0.2875, "step": 14256 }, { "epoch": 0.6678690214081604, "grad_norm": 0.5484717935286976, "learning_rate": 3.883490457087049e-06, "loss": 0.2746, "step": 14257 }, { "epoch": 0.6679158663980888, "grad_norm": 0.5794136167374774, "learning_rate": 3.8833324900525555e-06, "loss": 0.2876, "step": 14258 }, { "epoch": 0.6679627113880171, "grad_norm": 0.5368694581293, "learning_rate": 3.883174515057331e-06, "loss": 0.2816, "step": 14259 }, { "epoch": 0.6680095563779453, "grad_norm": 0.623101996413954, "learning_rate": 3.883016532102285e-06, "loss": 0.2927, "step": 14260 }, { "epoch": 0.6680564013678737, "grad_norm": 0.5703538538802049, "learning_rate": 3.882858541188327e-06, "loss": 0.2817, "step": 14261 }, { "epoch": 0.668103246357802, "grad_norm": 0.599313413243638, "learning_rate": 3.882700542316366e-06, "loss": 0.2835, "step": 14262 }, { "epoch": 0.6681500913477304, "grad_norm": 0.5563649795656205, "learning_rate": 3.882542535487309e-06, "loss": 0.277, "step": 14263 }, { "epoch": 0.6681969363376586, "grad_norm": 0.5801660601380624, "learning_rate": 3.882384520702068e-06, "loss": 0.2843, "step": 14264 }, { "epoch": 0.668243781327587, "grad_norm": 0.5734764722792742, "learning_rate": 3.882226497961552e-06, "loss": 0.2869, "step": 14265 }, { "epoch": 0.6682906263175153, "grad_norm": 0.6478209013553473, "learning_rate": 3.882068467266669e-06, "loss": 0.2823, "step": 14266 }, { "epoch": 0.6683374713074437, "grad_norm": 0.578351348265855, "learning_rate": 3.881910428618331e-06, "loss": 0.2794, "step": 14267 }, { "epoch": 0.668384316297372, "grad_norm": 0.6257431743410079, "learning_rate": 3.881752382017446e-06, "loss": 0.2741, "step": 14268 }, { "epoch": 0.6684311612873003, "grad_norm": 0.5765542207563559, "learning_rate": 3.881594327464922e-06, "loss": 0.2812, "step": 14269 }, { "epoch": 0.6684780062772286, "grad_norm": 0.5755439886766217, "learning_rate": 3.881436264961669e-06, "loss": 0.2657, "step": 14270 }, { "epoch": 0.668524851267157, "grad_norm": 0.607452793108574, "learning_rate": 3.881278194508598e-06, "loss": 0.2757, "step": 14271 }, { "epoch": 0.6685716962570853, "grad_norm": 0.6020717704966173, "learning_rate": 3.881120116106619e-06, "loss": 0.286, "step": 14272 }, { "epoch": 0.6686185412470136, "grad_norm": 0.5741344883929754, "learning_rate": 3.880962029756641e-06, "loss": 0.2849, "step": 14273 }, { "epoch": 0.6686653862369419, "grad_norm": 0.587577382512349, "learning_rate": 3.8808039354595736e-06, "loss": 0.2869, "step": 14274 }, { "epoch": 0.6687122312268703, "grad_norm": 0.5603802725725627, "learning_rate": 3.880645833216326e-06, "loss": 0.27, "step": 14275 }, { "epoch": 0.6687590762167986, "grad_norm": 0.5634027335350626, "learning_rate": 3.880487723027809e-06, "loss": 0.2753, "step": 14276 }, { "epoch": 0.668805921206727, "grad_norm": 0.6225947899222632, "learning_rate": 3.880329604894932e-06, "loss": 0.2969, "step": 14277 }, { "epoch": 0.6688527661966552, "grad_norm": 0.5965130341012961, "learning_rate": 3.8801714788186055e-06, "loss": 0.2605, "step": 14278 }, { "epoch": 0.6688996111865836, "grad_norm": 0.5898276522299242, "learning_rate": 3.880013344799738e-06, "loss": 0.2874, "step": 14279 }, { "epoch": 0.6689464561765119, "grad_norm": 0.6195958596365183, "learning_rate": 3.879855202839241e-06, "loss": 0.2815, "step": 14280 }, { "epoch": 0.6689933011664403, "grad_norm": 0.7092281229675972, "learning_rate": 3.879697052938025e-06, "loss": 0.2875, "step": 14281 }, { "epoch": 0.6690401461563685, "grad_norm": 0.553984084351429, "learning_rate": 3.879538895096998e-06, "loss": 0.2727, "step": 14282 }, { "epoch": 0.6690869911462969, "grad_norm": 0.5585009048336105, "learning_rate": 3.879380729317072e-06, "loss": 0.2645, "step": 14283 }, { "epoch": 0.6691338361362252, "grad_norm": 0.5972377804067451, "learning_rate": 3.879222555599157e-06, "loss": 0.2925, "step": 14284 }, { "epoch": 0.6691806811261536, "grad_norm": 0.6050603633851697, "learning_rate": 3.879064373944162e-06, "loss": 0.28, "step": 14285 }, { "epoch": 0.6692275261160819, "grad_norm": 0.6679054965421799, "learning_rate": 3.878906184352998e-06, "loss": 0.2838, "step": 14286 }, { "epoch": 0.6692743711060102, "grad_norm": 0.5953696936638455, "learning_rate": 3.878747986826576e-06, "loss": 0.2698, "step": 14287 }, { "epoch": 0.6693212160959385, "grad_norm": 0.6083884349086207, "learning_rate": 3.878589781365806e-06, "loss": 0.2739, "step": 14288 }, { "epoch": 0.6693680610858669, "grad_norm": 0.6148109223295015, "learning_rate": 3.878431567971598e-06, "loss": 0.2766, "step": 14289 }, { "epoch": 0.6694149060757952, "grad_norm": 0.5808657290563908, "learning_rate": 3.878273346644863e-06, "loss": 0.2623, "step": 14290 }, { "epoch": 0.6694617510657235, "grad_norm": 0.5718662352313122, "learning_rate": 3.878115117386512e-06, "loss": 0.2765, "step": 14291 }, { "epoch": 0.6695085960556518, "grad_norm": 0.6548560621902862, "learning_rate": 3.877956880197454e-06, "loss": 0.3005, "step": 14292 }, { "epoch": 0.6695554410455802, "grad_norm": 0.5451274923608451, "learning_rate": 3.8777986350786e-06, "loss": 0.2733, "step": 14293 }, { "epoch": 0.6696022860355085, "grad_norm": 0.5632353787141019, "learning_rate": 3.877640382030863e-06, "loss": 0.2688, "step": 14294 }, { "epoch": 0.6696491310254369, "grad_norm": 0.5823910271276898, "learning_rate": 3.877482121055149e-06, "loss": 0.2719, "step": 14295 }, { "epoch": 0.6696959760153651, "grad_norm": 0.5797105229725101, "learning_rate": 3.877323852152374e-06, "loss": 0.2883, "step": 14296 }, { "epoch": 0.6697428210052935, "grad_norm": 0.604768886315764, "learning_rate": 3.877165575323446e-06, "loss": 0.2957, "step": 14297 }, { "epoch": 0.6697896659952218, "grad_norm": 0.6134141986938907, "learning_rate": 3.877007290569276e-06, "loss": 0.2847, "step": 14298 }, { "epoch": 0.6698365109851502, "grad_norm": 0.6105975147633705, "learning_rate": 3.876848997890775e-06, "loss": 0.2639, "step": 14299 }, { "epoch": 0.6698833559750784, "grad_norm": 0.5611728120966571, "learning_rate": 3.8766906972888544e-06, "loss": 0.2718, "step": 14300 }, { "epoch": 0.6699302009650068, "grad_norm": 0.6327271839825894, "learning_rate": 3.876532388764424e-06, "loss": 0.2727, "step": 14301 }, { "epoch": 0.6699770459549351, "grad_norm": 0.683616532907205, "learning_rate": 3.876374072318396e-06, "loss": 0.3045, "step": 14302 }, { "epoch": 0.6700238909448635, "grad_norm": 0.5766355477261241, "learning_rate": 3.876215747951681e-06, "loss": 0.2727, "step": 14303 }, { "epoch": 0.6700707359347918, "grad_norm": 0.5516652806627004, "learning_rate": 3.8760574156651905e-06, "loss": 0.2607, "step": 14304 }, { "epoch": 0.6701175809247201, "grad_norm": 0.6038861227557347, "learning_rate": 3.875899075459836e-06, "loss": 0.2869, "step": 14305 }, { "epoch": 0.6701644259146484, "grad_norm": 0.6702954965278755, "learning_rate": 3.875740727336528e-06, "loss": 0.3047, "step": 14306 }, { "epoch": 0.6702112709045768, "grad_norm": 0.5585049799031593, "learning_rate": 3.875582371296177e-06, "loss": 0.29, "step": 14307 }, { "epoch": 0.6702581158945051, "grad_norm": 0.6347956090388276, "learning_rate": 3.875424007339696e-06, "loss": 0.279, "step": 14308 }, { "epoch": 0.6703049608844334, "grad_norm": 0.6252085907603531, "learning_rate": 3.875265635467997e-06, "loss": 0.2983, "step": 14309 }, { "epoch": 0.6703518058743617, "grad_norm": 0.5309054953013526, "learning_rate": 3.875107255681987e-06, "loss": 0.2874, "step": 14310 }, { "epoch": 0.6703986508642901, "grad_norm": 0.6069830208887708, "learning_rate": 3.874948867982582e-06, "loss": 0.3216, "step": 14311 }, { "epoch": 0.6704454958542184, "grad_norm": 0.539494174998389, "learning_rate": 3.874790472370691e-06, "loss": 0.2753, "step": 14312 }, { "epoch": 0.6704923408441468, "grad_norm": 0.5605472251436083, "learning_rate": 3.874632068847227e-06, "loss": 0.2836, "step": 14313 }, { "epoch": 0.670539185834075, "grad_norm": 0.5881729715649732, "learning_rate": 3.874473657413102e-06, "loss": 0.2875, "step": 14314 }, { "epoch": 0.6705860308240034, "grad_norm": 0.5993483377458715, "learning_rate": 3.8743152380692245e-06, "loss": 0.2989, "step": 14315 }, { "epoch": 0.6706328758139317, "grad_norm": 0.595785479526614, "learning_rate": 3.874156810816509e-06, "loss": 0.2902, "step": 14316 }, { "epoch": 0.6706797208038601, "grad_norm": 0.5423785547542996, "learning_rate": 3.873998375655867e-06, "loss": 0.2664, "step": 14317 }, { "epoch": 0.6707265657937883, "grad_norm": 0.5514216280867401, "learning_rate": 3.873839932588209e-06, "loss": 0.2508, "step": 14318 }, { "epoch": 0.6707734107837167, "grad_norm": 0.5605170741899373, "learning_rate": 3.873681481614448e-06, "loss": 0.2741, "step": 14319 }, { "epoch": 0.670820255773645, "grad_norm": 0.5661371983598462, "learning_rate": 3.873523022735495e-06, "loss": 0.2749, "step": 14320 }, { "epoch": 0.6708671007635734, "grad_norm": 0.5614065510391267, "learning_rate": 3.873364555952262e-06, "loss": 0.2712, "step": 14321 }, { "epoch": 0.6709139457535017, "grad_norm": 0.5567243615953195, "learning_rate": 3.873206081265662e-06, "loss": 0.2791, "step": 14322 }, { "epoch": 0.67096079074343, "grad_norm": 0.5703750458862021, "learning_rate": 3.873047598676605e-06, "loss": 0.2825, "step": 14323 }, { "epoch": 0.6710076357333583, "grad_norm": 0.5549524662412358, "learning_rate": 3.872889108186005e-06, "loss": 0.2731, "step": 14324 }, { "epoch": 0.6710544807232867, "grad_norm": 0.6070348321039469, "learning_rate": 3.8727306097947725e-06, "loss": 0.2894, "step": 14325 }, { "epoch": 0.671101325713215, "grad_norm": 0.6228674632212857, "learning_rate": 3.872572103503821e-06, "loss": 0.2876, "step": 14326 }, { "epoch": 0.6711481707031433, "grad_norm": 0.6208225249200736, "learning_rate": 3.8724135893140625e-06, "loss": 0.2867, "step": 14327 }, { "epoch": 0.6711950156930716, "grad_norm": 0.5510850423508428, "learning_rate": 3.8722550672264084e-06, "loss": 0.2578, "step": 14328 }, { "epoch": 0.671241860683, "grad_norm": 0.5985176499229691, "learning_rate": 3.8720965372417705e-06, "loss": 0.2872, "step": 14329 }, { "epoch": 0.6712887056729283, "grad_norm": 0.54874285219302, "learning_rate": 3.871937999361063e-06, "loss": 0.26, "step": 14330 }, { "epoch": 0.6713355506628567, "grad_norm": 0.5835108693716738, "learning_rate": 3.871779453585196e-06, "loss": 0.2539, "step": 14331 }, { "epoch": 0.6713823956527849, "grad_norm": 0.654338102071438, "learning_rate": 3.871620899915084e-06, "loss": 0.2825, "step": 14332 }, { "epoch": 0.6714292406427133, "grad_norm": 0.6417250137353715, "learning_rate": 3.871462338351638e-06, "loss": 0.2868, "step": 14333 }, { "epoch": 0.6714760856326416, "grad_norm": 0.6352177302521155, "learning_rate": 3.87130376889577e-06, "loss": 0.2734, "step": 14334 }, { "epoch": 0.67152293062257, "grad_norm": 0.5613142024196234, "learning_rate": 3.871145191548395e-06, "loss": 0.2744, "step": 14335 }, { "epoch": 0.6715697756124982, "grad_norm": 0.6414542654507814, "learning_rate": 3.870986606310424e-06, "loss": 0.3013, "step": 14336 }, { "epoch": 0.6716166206024266, "grad_norm": 0.5611568860470113, "learning_rate": 3.870828013182769e-06, "loss": 0.2699, "step": 14337 }, { "epoch": 0.6716634655923549, "grad_norm": 0.6075582895519718, "learning_rate": 3.870669412166343e-06, "loss": 0.2889, "step": 14338 }, { "epoch": 0.6717103105822833, "grad_norm": 0.6080476372983578, "learning_rate": 3.87051080326206e-06, "loss": 0.2728, "step": 14339 }, { "epoch": 0.6717571555722116, "grad_norm": 0.6055587010171718, "learning_rate": 3.870352186470831e-06, "loss": 0.262, "step": 14340 }, { "epoch": 0.6718040005621398, "grad_norm": 0.5983861520824401, "learning_rate": 3.87019356179357e-06, "loss": 0.293, "step": 14341 }, { "epoch": 0.6718508455520682, "grad_norm": 0.5988582688467495, "learning_rate": 3.870034929231189e-06, "loss": 0.2855, "step": 14342 }, { "epoch": 0.6718976905419966, "grad_norm": 0.5892491788224553, "learning_rate": 3.869876288784602e-06, "loss": 0.2795, "step": 14343 }, { "epoch": 0.6719445355319249, "grad_norm": 0.5703997272899887, "learning_rate": 3.869717640454721e-06, "loss": 0.2887, "step": 14344 }, { "epoch": 0.6719913805218531, "grad_norm": 0.6088461038189749, "learning_rate": 3.8695589842424594e-06, "loss": 0.2984, "step": 14345 }, { "epoch": 0.6720382255117815, "grad_norm": 0.5576640247474787, "learning_rate": 3.86940032014873e-06, "loss": 0.2687, "step": 14346 }, { "epoch": 0.6720850705017098, "grad_norm": 0.5379521477350717, "learning_rate": 3.869241648174445e-06, "loss": 0.2552, "step": 14347 }, { "epoch": 0.6721319154916382, "grad_norm": 0.5543334985560963, "learning_rate": 3.86908296832052e-06, "loss": 0.2884, "step": 14348 }, { "epoch": 0.6721787604815666, "grad_norm": 0.6072218524755142, "learning_rate": 3.868924280587866e-06, "loss": 0.2619, "step": 14349 }, { "epoch": 0.6722256054714948, "grad_norm": 0.5835681641176276, "learning_rate": 3.868765584977396e-06, "loss": 0.2941, "step": 14350 }, { "epoch": 0.6722724504614231, "grad_norm": 0.5848880121612453, "learning_rate": 3.868606881490026e-06, "loss": 0.2942, "step": 14351 }, { "epoch": 0.6723192954513515, "grad_norm": 0.6103589573173077, "learning_rate": 3.8684481701266654e-06, "loss": 0.2864, "step": 14352 }, { "epoch": 0.6723661404412798, "grad_norm": 0.5956786867963454, "learning_rate": 3.8682894508882304e-06, "loss": 0.262, "step": 14353 }, { "epoch": 0.6724129854312081, "grad_norm": 0.6110842401342156, "learning_rate": 3.868130723775634e-06, "loss": 0.2795, "step": 14354 }, { "epoch": 0.6724598304211364, "grad_norm": 0.6916737829240169, "learning_rate": 3.8679719887897884e-06, "loss": 0.3176, "step": 14355 }, { "epoch": 0.6725066754110648, "grad_norm": 0.6086167320190539, "learning_rate": 3.867813245931608e-06, "loss": 0.2924, "step": 14356 }, { "epoch": 0.6725535204009931, "grad_norm": 0.6624421147751238, "learning_rate": 3.8676544952020055e-06, "loss": 0.2919, "step": 14357 }, { "epoch": 0.6726003653909215, "grad_norm": 0.5784568542400702, "learning_rate": 3.867495736601896e-06, "loss": 0.2754, "step": 14358 }, { "epoch": 0.6726472103808497, "grad_norm": 0.5793641093987658, "learning_rate": 3.867336970132192e-06, "loss": 0.2775, "step": 14359 }, { "epoch": 0.6726940553707781, "grad_norm": 0.6097283389308615, "learning_rate": 3.8671781957938086e-06, "loss": 0.3009, "step": 14360 }, { "epoch": 0.6727409003607064, "grad_norm": 0.5745705975737941, "learning_rate": 3.867019413587656e-06, "loss": 0.2917, "step": 14361 }, { "epoch": 0.6727877453506348, "grad_norm": 0.6114398120889638, "learning_rate": 3.8668606235146515e-06, "loss": 0.2776, "step": 14362 }, { "epoch": 0.672834590340563, "grad_norm": 0.6557846638129692, "learning_rate": 3.866701825575708e-06, "loss": 0.2814, "step": 14363 }, { "epoch": 0.6728814353304914, "grad_norm": 0.6224105147349853, "learning_rate": 3.866543019771738e-06, "loss": 0.3167, "step": 14364 }, { "epoch": 0.6729282803204197, "grad_norm": 0.6897319119466027, "learning_rate": 3.866384206103657e-06, "loss": 0.3145, "step": 14365 }, { "epoch": 0.6729751253103481, "grad_norm": 0.6153342231229678, "learning_rate": 3.866225384572378e-06, "loss": 0.3041, "step": 14366 }, { "epoch": 0.6730219703002764, "grad_norm": 0.5444932808678878, "learning_rate": 3.866066555178817e-06, "loss": 0.2594, "step": 14367 }, { "epoch": 0.6730688152902047, "grad_norm": 0.5617231611650289, "learning_rate": 3.865907717923885e-06, "loss": 0.2814, "step": 14368 }, { "epoch": 0.673115660280133, "grad_norm": 0.6103181734381319, "learning_rate": 3.865748872808497e-06, "loss": 0.29, "step": 14369 }, { "epoch": 0.6731625052700614, "grad_norm": 0.5869312350812759, "learning_rate": 3.865590019833567e-06, "loss": 0.2818, "step": 14370 }, { "epoch": 0.6732093502599897, "grad_norm": 0.6331686133511646, "learning_rate": 3.8654311590000105e-06, "loss": 0.2716, "step": 14371 }, { "epoch": 0.673256195249918, "grad_norm": 0.5914492356091583, "learning_rate": 3.865272290308741e-06, "loss": 0.2662, "step": 14372 }, { "epoch": 0.6733030402398463, "grad_norm": 0.5551194727074189, "learning_rate": 3.865113413760673e-06, "loss": 0.274, "step": 14373 }, { "epoch": 0.6733498852297747, "grad_norm": 0.625985550606182, "learning_rate": 3.864954529356719e-06, "loss": 0.2823, "step": 14374 }, { "epoch": 0.673396730219703, "grad_norm": 0.6441430692915908, "learning_rate": 3.864795637097796e-06, "loss": 0.2864, "step": 14375 }, { "epoch": 0.6734435752096314, "grad_norm": 0.6093723198362878, "learning_rate": 3.864636736984817e-06, "loss": 0.2821, "step": 14376 }, { "epoch": 0.6734904201995596, "grad_norm": 0.5798361476390935, "learning_rate": 3.8644778290186965e-06, "loss": 0.2754, "step": 14377 }, { "epoch": 0.673537265189488, "grad_norm": 0.642340589693481, "learning_rate": 3.864318913200349e-06, "loss": 0.294, "step": 14378 }, { "epoch": 0.6735841101794163, "grad_norm": 0.5761090575361038, "learning_rate": 3.864159989530689e-06, "loss": 0.2624, "step": 14379 }, { "epoch": 0.6736309551693447, "grad_norm": 0.6161149847546649, "learning_rate": 3.864001058010631e-06, "loss": 0.3011, "step": 14380 }, { "epoch": 0.6736778001592729, "grad_norm": 0.5710150026186057, "learning_rate": 3.86384211864109e-06, "loss": 0.2743, "step": 14381 }, { "epoch": 0.6737246451492013, "grad_norm": 0.6075589153740568, "learning_rate": 3.8636831714229795e-06, "loss": 0.2754, "step": 14382 }, { "epoch": 0.6737714901391296, "grad_norm": 0.5750884339182426, "learning_rate": 3.863524216357216e-06, "loss": 0.2821, "step": 14383 }, { "epoch": 0.673818335129058, "grad_norm": 0.6025558030864017, "learning_rate": 3.863365253444713e-06, "loss": 0.3203, "step": 14384 }, { "epoch": 0.6738651801189863, "grad_norm": 0.6353414971039537, "learning_rate": 3.863206282686386e-06, "loss": 0.2948, "step": 14385 }, { "epoch": 0.6739120251089146, "grad_norm": 0.6955329328264972, "learning_rate": 3.86304730408315e-06, "loss": 0.304, "step": 14386 }, { "epoch": 0.6739588700988429, "grad_norm": 0.5860810019120053, "learning_rate": 3.862888317635917e-06, "loss": 0.2907, "step": 14387 }, { "epoch": 0.6740057150887713, "grad_norm": 0.6093217720520683, "learning_rate": 3.862729323345607e-06, "loss": 0.2751, "step": 14388 }, { "epoch": 0.6740525600786996, "grad_norm": 0.6404385311250937, "learning_rate": 3.862570321213131e-06, "loss": 0.2726, "step": 14389 }, { "epoch": 0.6740994050686279, "grad_norm": 0.5351598240005176, "learning_rate": 3.862411311239406e-06, "loss": 0.2575, "step": 14390 }, { "epoch": 0.6741462500585562, "grad_norm": 0.579210413694586, "learning_rate": 3.862252293425345e-06, "loss": 0.2825, "step": 14391 }, { "epoch": 0.6741930950484846, "grad_norm": 0.6043476237844438, "learning_rate": 3.862093267771866e-06, "loss": 0.2696, "step": 14392 }, { "epoch": 0.6742399400384129, "grad_norm": 0.5624378518823954, "learning_rate": 3.861934234279881e-06, "loss": 0.2934, "step": 14393 }, { "epoch": 0.6742867850283413, "grad_norm": 0.5570371796186955, "learning_rate": 3.861775192950308e-06, "loss": 0.2606, "step": 14394 }, { "epoch": 0.6743336300182695, "grad_norm": 0.589524945277054, "learning_rate": 3.8616161437840605e-06, "loss": 0.2791, "step": 14395 }, { "epoch": 0.6743804750081979, "grad_norm": 0.5830256452315585, "learning_rate": 3.861457086782054e-06, "loss": 0.2784, "step": 14396 }, { "epoch": 0.6744273199981262, "grad_norm": 0.5866692323939303, "learning_rate": 3.861298021945205e-06, "loss": 0.2949, "step": 14397 }, { "epoch": 0.6744741649880546, "grad_norm": 0.6635211677475165, "learning_rate": 3.8611389492744276e-06, "loss": 0.3317, "step": 14398 }, { "epoch": 0.6745210099779828, "grad_norm": 0.608786204396792, "learning_rate": 3.8609798687706376e-06, "loss": 0.297, "step": 14399 }, { "epoch": 0.6745678549679112, "grad_norm": 0.5773570491052296, "learning_rate": 3.860820780434751e-06, "loss": 0.2742, "step": 14400 }, { "epoch": 0.6746146999578395, "grad_norm": 0.6495344452755175, "learning_rate": 3.8606616842676815e-06, "loss": 0.2686, "step": 14401 }, { "epoch": 0.6746615449477679, "grad_norm": 0.5765381917267377, "learning_rate": 3.860502580270348e-06, "loss": 0.2863, "step": 14402 }, { "epoch": 0.6747083899376962, "grad_norm": 0.5690895844491778, "learning_rate": 3.860343468443662e-06, "loss": 0.2695, "step": 14403 }, { "epoch": 0.6747552349276245, "grad_norm": 0.6651969356199406, "learning_rate": 3.860184348788542e-06, "loss": 0.2959, "step": 14404 }, { "epoch": 0.6748020799175528, "grad_norm": 0.5946536855000796, "learning_rate": 3.8600252213059045e-06, "loss": 0.2888, "step": 14405 }, { "epoch": 0.6748489249074812, "grad_norm": 0.5396356890911563, "learning_rate": 3.859866085996662e-06, "loss": 0.271, "step": 14406 }, { "epoch": 0.6748957698974095, "grad_norm": 0.5909383251706142, "learning_rate": 3.859706942861732e-06, "loss": 0.3124, "step": 14407 }, { "epoch": 0.6749426148873378, "grad_norm": 0.5582151253874209, "learning_rate": 3.859547791902031e-06, "loss": 0.2856, "step": 14408 }, { "epoch": 0.6749894598772661, "grad_norm": 0.6181587972739526, "learning_rate": 3.859388633118474e-06, "loss": 0.2865, "step": 14409 }, { "epoch": 0.6750363048671945, "grad_norm": 0.6405715435209771, "learning_rate": 3.8592294665119766e-06, "loss": 0.3056, "step": 14410 }, { "epoch": 0.6750831498571228, "grad_norm": 0.6269614515023295, "learning_rate": 3.859070292083456e-06, "loss": 0.3107, "step": 14411 }, { "epoch": 0.6751299948470512, "grad_norm": 0.5977641305259309, "learning_rate": 3.858911109833827e-06, "loss": 0.288, "step": 14412 }, { "epoch": 0.6751768398369794, "grad_norm": 0.6244955134991632, "learning_rate": 3.858751919764006e-06, "loss": 0.3097, "step": 14413 }, { "epoch": 0.6752236848269078, "grad_norm": 0.6574920641354667, "learning_rate": 3.85859272187491e-06, "loss": 0.2955, "step": 14414 }, { "epoch": 0.6752705298168361, "grad_norm": 0.5733658304217036, "learning_rate": 3.8584335161674536e-06, "loss": 0.2723, "step": 14415 }, { "epoch": 0.6753173748067645, "grad_norm": 0.6425375000491595, "learning_rate": 3.858274302642553e-06, "loss": 0.2916, "step": 14416 }, { "epoch": 0.6753642197966927, "grad_norm": 0.6057598726413083, "learning_rate": 3.8581150813011265e-06, "loss": 0.2852, "step": 14417 }, { "epoch": 0.675411064786621, "grad_norm": 0.6068378278269106, "learning_rate": 3.8579558521440885e-06, "loss": 0.3065, "step": 14418 }, { "epoch": 0.6754579097765494, "grad_norm": 0.6246697067990582, "learning_rate": 3.857796615172356e-06, "loss": 0.3068, "step": 14419 }, { "epoch": 0.6755047547664778, "grad_norm": 0.5861676285638466, "learning_rate": 3.857637370386845e-06, "loss": 0.2635, "step": 14420 }, { "epoch": 0.6755515997564061, "grad_norm": 0.577904626109325, "learning_rate": 3.8574781177884725e-06, "loss": 0.2891, "step": 14421 }, { "epoch": 0.6755984447463343, "grad_norm": 0.6465314171651141, "learning_rate": 3.857318857378154e-06, "loss": 0.3124, "step": 14422 }, { "epoch": 0.6756452897362627, "grad_norm": 0.5915563531650051, "learning_rate": 3.857159589156808e-06, "loss": 0.2832, "step": 14423 }, { "epoch": 0.675692134726191, "grad_norm": 0.6423349546201005, "learning_rate": 3.8570003131253484e-06, "loss": 0.307, "step": 14424 }, { "epoch": 0.6757389797161194, "grad_norm": 0.5558452232516781, "learning_rate": 3.8568410292846934e-06, "loss": 0.2843, "step": 14425 }, { "epoch": 0.6757858247060476, "grad_norm": 0.586121659244117, "learning_rate": 3.856681737635759e-06, "loss": 0.295, "step": 14426 }, { "epoch": 0.675832669695976, "grad_norm": 0.5640573861717452, "learning_rate": 3.856522438179463e-06, "loss": 0.2827, "step": 14427 }, { "epoch": 0.6758795146859043, "grad_norm": 0.5975864946302821, "learning_rate": 3.856363130916721e-06, "loss": 0.2882, "step": 14428 }, { "epoch": 0.6759263596758327, "grad_norm": 0.621729962840586, "learning_rate": 3.85620381584845e-06, "loss": 0.2971, "step": 14429 }, { "epoch": 0.675973204665761, "grad_norm": 0.5530607147636222, "learning_rate": 3.856044492975567e-06, "loss": 0.2673, "step": 14430 }, { "epoch": 0.6760200496556893, "grad_norm": 0.578140332849237, "learning_rate": 3.855885162298988e-06, "loss": 0.2961, "step": 14431 }, { "epoch": 0.6760668946456176, "grad_norm": 0.5727403070884566, "learning_rate": 3.855725823819632e-06, "loss": 0.2826, "step": 14432 }, { "epoch": 0.676113739635546, "grad_norm": 0.6083184727568278, "learning_rate": 3.855566477538414e-06, "loss": 0.2965, "step": 14433 }, { "epoch": 0.6761605846254743, "grad_norm": 0.5840985500611202, "learning_rate": 3.855407123456251e-06, "loss": 0.2821, "step": 14434 }, { "epoch": 0.6762074296154026, "grad_norm": 0.6164444112471777, "learning_rate": 3.855247761574061e-06, "loss": 0.2992, "step": 14435 }, { "epoch": 0.6762542746053309, "grad_norm": 0.5927371278324707, "learning_rate": 3.855088391892761e-06, "loss": 0.2803, "step": 14436 }, { "epoch": 0.6763011195952593, "grad_norm": 0.6181207218533669, "learning_rate": 3.854929014413269e-06, "loss": 0.2761, "step": 14437 }, { "epoch": 0.6763479645851876, "grad_norm": 0.6903783408749659, "learning_rate": 3.854769629136499e-06, "loss": 0.3097, "step": 14438 }, { "epoch": 0.676394809575116, "grad_norm": 0.6327821223733923, "learning_rate": 3.854610236063372e-06, "loss": 0.2754, "step": 14439 }, { "epoch": 0.6764416545650442, "grad_norm": 0.644709120415438, "learning_rate": 3.854450835194803e-06, "loss": 0.3025, "step": 14440 }, { "epoch": 0.6764884995549726, "grad_norm": 0.5767251248437197, "learning_rate": 3.8542914265317085e-06, "loss": 0.2902, "step": 14441 }, { "epoch": 0.6765353445449009, "grad_norm": 0.6564817158368479, "learning_rate": 3.85413201007501e-06, "loss": 0.3187, "step": 14442 }, { "epoch": 0.6765821895348293, "grad_norm": 0.563054286685035, "learning_rate": 3.85397258582562e-06, "loss": 0.271, "step": 14443 }, { "epoch": 0.6766290345247575, "grad_norm": 0.6121799864877948, "learning_rate": 3.853813153784458e-06, "loss": 0.2726, "step": 14444 }, { "epoch": 0.6766758795146859, "grad_norm": 0.5199364771490876, "learning_rate": 3.853653713952443e-06, "loss": 0.2538, "step": 14445 }, { "epoch": 0.6767227245046142, "grad_norm": 0.5627346120588342, "learning_rate": 3.85349426633049e-06, "loss": 0.2587, "step": 14446 }, { "epoch": 0.6767695694945426, "grad_norm": 0.651649730179807, "learning_rate": 3.853334810919518e-06, "loss": 0.3013, "step": 14447 }, { "epoch": 0.6768164144844709, "grad_norm": 0.579718678591752, "learning_rate": 3.853175347720445e-06, "loss": 0.2755, "step": 14448 }, { "epoch": 0.6768632594743992, "grad_norm": 0.5570677822041017, "learning_rate": 3.853015876734187e-06, "loss": 0.2615, "step": 14449 }, { "epoch": 0.6769101044643275, "grad_norm": 0.6183562498223565, "learning_rate": 3.852856397961663e-06, "loss": 0.2768, "step": 14450 }, { "epoch": 0.6769569494542559, "grad_norm": 0.6067119432290606, "learning_rate": 3.852696911403791e-06, "loss": 0.2714, "step": 14451 }, { "epoch": 0.6770037944441842, "grad_norm": 0.5809452224557126, "learning_rate": 3.8525374170614875e-06, "loss": 0.2952, "step": 14452 }, { "epoch": 0.6770506394341125, "grad_norm": 0.5481492321638481, "learning_rate": 3.8523779149356714e-06, "loss": 0.2798, "step": 14453 }, { "epoch": 0.6770974844240408, "grad_norm": 0.6116638373576584, "learning_rate": 3.852218405027259e-06, "loss": 0.294, "step": 14454 }, { "epoch": 0.6771443294139692, "grad_norm": 0.677278800150084, "learning_rate": 3.852058887337171e-06, "loss": 0.2928, "step": 14455 }, { "epoch": 0.6771911744038975, "grad_norm": 0.5639961706485968, "learning_rate": 3.851899361866324e-06, "loss": 0.2658, "step": 14456 }, { "epoch": 0.6772380193938259, "grad_norm": 0.5776128731833425, "learning_rate": 3.851739828615636e-06, "loss": 0.2815, "step": 14457 }, { "epoch": 0.6772848643837541, "grad_norm": 0.5858904562603383, "learning_rate": 3.851580287586024e-06, "loss": 0.2684, "step": 14458 }, { "epoch": 0.6773317093736825, "grad_norm": 0.5366490049770974, "learning_rate": 3.851420738778408e-06, "loss": 0.2637, "step": 14459 }, { "epoch": 0.6773785543636108, "grad_norm": 0.6181902012722473, "learning_rate": 3.851261182193705e-06, "loss": 0.2726, "step": 14460 }, { "epoch": 0.6774253993535392, "grad_norm": 0.6039383336331607, "learning_rate": 3.851101617832834e-06, "loss": 0.3003, "step": 14461 }, { "epoch": 0.6774722443434674, "grad_norm": 0.61799337135355, "learning_rate": 3.850942045696713e-06, "loss": 0.2867, "step": 14462 }, { "epoch": 0.6775190893333958, "grad_norm": 0.5450971091179927, "learning_rate": 3.850782465786259e-06, "loss": 0.2831, "step": 14463 }, { "epoch": 0.6775659343233241, "grad_norm": 0.5931244024607858, "learning_rate": 3.850622878102392e-06, "loss": 0.2902, "step": 14464 }, { "epoch": 0.6776127793132525, "grad_norm": 0.6126929345505654, "learning_rate": 3.85046328264603e-06, "loss": 0.2819, "step": 14465 }, { "epoch": 0.6776596243031808, "grad_norm": 0.5947669238394723, "learning_rate": 3.850303679418092e-06, "loss": 0.301, "step": 14466 }, { "epoch": 0.6777064692931091, "grad_norm": 0.6140961048527476, "learning_rate": 3.850144068419494e-06, "loss": 0.2959, "step": 14467 }, { "epoch": 0.6777533142830374, "grad_norm": 0.6016219689194837, "learning_rate": 3.849984449651156e-06, "loss": 0.2779, "step": 14468 }, { "epoch": 0.6778001592729658, "grad_norm": 0.6177196674208643, "learning_rate": 3.849824823113999e-06, "loss": 0.2936, "step": 14469 }, { "epoch": 0.6778470042628941, "grad_norm": 0.6003634545044776, "learning_rate": 3.849665188808938e-06, "loss": 0.2868, "step": 14470 }, { "epoch": 0.6778938492528224, "grad_norm": 0.5729799310122903, "learning_rate": 3.849505546736892e-06, "loss": 0.2702, "step": 14471 }, { "epoch": 0.6779406942427507, "grad_norm": 0.598265815623702, "learning_rate": 3.849345896898783e-06, "loss": 0.287, "step": 14472 }, { "epoch": 0.6779875392326791, "grad_norm": 0.5735100705611419, "learning_rate": 3.8491862392955265e-06, "loss": 0.2646, "step": 14473 }, { "epoch": 0.6780343842226074, "grad_norm": 0.5659604669664705, "learning_rate": 3.849026573928042e-06, "loss": 0.2707, "step": 14474 }, { "epoch": 0.6780812292125358, "grad_norm": 0.6089358447154722, "learning_rate": 3.848866900797249e-06, "loss": 0.2871, "step": 14475 }, { "epoch": 0.678128074202464, "grad_norm": 0.5904163273943162, "learning_rate": 3.848707219904066e-06, "loss": 0.2919, "step": 14476 }, { "epoch": 0.6781749191923924, "grad_norm": 0.585629862143514, "learning_rate": 3.848547531249412e-06, "loss": 0.2845, "step": 14477 }, { "epoch": 0.6782217641823207, "grad_norm": 0.5657290032863136, "learning_rate": 3.8483878348342055e-06, "loss": 0.2746, "step": 14478 }, { "epoch": 0.6782686091722491, "grad_norm": 0.6114266289662286, "learning_rate": 3.848228130659366e-06, "loss": 0.2896, "step": 14479 }, { "epoch": 0.6783154541621773, "grad_norm": 0.6026868513569545, "learning_rate": 3.848068418725813e-06, "loss": 0.2855, "step": 14480 }, { "epoch": 0.6783622991521057, "grad_norm": 0.5555641776861274, "learning_rate": 3.847908699034465e-06, "loss": 0.2783, "step": 14481 }, { "epoch": 0.678409144142034, "grad_norm": 0.645692901083203, "learning_rate": 3.84774897158624e-06, "loss": 0.2984, "step": 14482 }, { "epoch": 0.6784559891319624, "grad_norm": 0.5708192337495571, "learning_rate": 3.84758923638206e-06, "loss": 0.2705, "step": 14483 }, { "epoch": 0.6785028341218907, "grad_norm": 0.598757941590671, "learning_rate": 3.847429493422842e-06, "loss": 0.3016, "step": 14484 }, { "epoch": 0.678549679111819, "grad_norm": 0.5843672877151961, "learning_rate": 3.847269742709506e-06, "loss": 0.3047, "step": 14485 }, { "epoch": 0.6785965241017473, "grad_norm": 0.5424523949908603, "learning_rate": 3.84710998424297e-06, "loss": 0.2816, "step": 14486 }, { "epoch": 0.6786433690916757, "grad_norm": 0.5254031917548422, "learning_rate": 3.846950218024157e-06, "loss": 0.2715, "step": 14487 }, { "epoch": 0.678690214081604, "grad_norm": 0.6155771377866711, "learning_rate": 3.846790444053983e-06, "loss": 0.2996, "step": 14488 }, { "epoch": 0.6787370590715323, "grad_norm": 0.5487882665863776, "learning_rate": 3.846630662333369e-06, "loss": 0.2657, "step": 14489 }, { "epoch": 0.6787839040614606, "grad_norm": 0.6246657972065722, "learning_rate": 3.846470872863233e-06, "loss": 0.2861, "step": 14490 }, { "epoch": 0.678830749051389, "grad_norm": 0.5388975538088625, "learning_rate": 3.846311075644495e-06, "loss": 0.2619, "step": 14491 }, { "epoch": 0.6788775940413173, "grad_norm": 0.6474907010642634, "learning_rate": 3.846151270678076e-06, "loss": 0.3002, "step": 14492 }, { "epoch": 0.6789244390312457, "grad_norm": 0.582954291663068, "learning_rate": 3.845991457964895e-06, "loss": 0.2624, "step": 14493 }, { "epoch": 0.6789712840211739, "grad_norm": 0.5975328470859367, "learning_rate": 3.845831637505872e-06, "loss": 0.2807, "step": 14494 }, { "epoch": 0.6790181290111023, "grad_norm": 0.5610885276236607, "learning_rate": 3.845671809301925e-06, "loss": 0.2854, "step": 14495 }, { "epoch": 0.6790649740010306, "grad_norm": 0.5603919755849555, "learning_rate": 3.845511973353975e-06, "loss": 0.2642, "step": 14496 }, { "epoch": 0.679111818990959, "grad_norm": 0.5977139481072254, "learning_rate": 3.845352129662943e-06, "loss": 0.2724, "step": 14497 }, { "epoch": 0.6791586639808872, "grad_norm": 0.5833507479233907, "learning_rate": 3.845192278229747e-06, "loss": 0.2729, "step": 14498 }, { "epoch": 0.6792055089708156, "grad_norm": 0.5236222818422631, "learning_rate": 3.845032419055307e-06, "loss": 0.2808, "step": 14499 }, { "epoch": 0.6792523539607439, "grad_norm": 0.585930680602558, "learning_rate": 3.844872552140544e-06, "loss": 0.2878, "step": 14500 }, { "epoch": 0.6792991989506723, "grad_norm": 0.5673437835241609, "learning_rate": 3.844712677486377e-06, "loss": 0.2709, "step": 14501 }, { "epoch": 0.6793460439406006, "grad_norm": 0.5951440684030598, "learning_rate": 3.844552795093726e-06, "loss": 0.2903, "step": 14502 }, { "epoch": 0.6793928889305288, "grad_norm": 0.5795805277298238, "learning_rate": 3.844392904963513e-06, "loss": 0.2716, "step": 14503 }, { "epoch": 0.6794397339204572, "grad_norm": 0.5688842697357728, "learning_rate": 3.844233007096657e-06, "loss": 0.2707, "step": 14504 }, { "epoch": 0.6794865789103856, "grad_norm": 0.6128616071333776, "learning_rate": 3.844073101494077e-06, "loss": 0.3105, "step": 14505 }, { "epoch": 0.6795334239003139, "grad_norm": 0.603412381146089, "learning_rate": 3.843913188156694e-06, "loss": 0.2873, "step": 14506 }, { "epoch": 0.6795802688902421, "grad_norm": 0.5791385991712585, "learning_rate": 3.843753267085428e-06, "loss": 0.2761, "step": 14507 }, { "epoch": 0.6796271138801705, "grad_norm": 0.5492107341773403, "learning_rate": 3.843593338281201e-06, "loss": 0.2722, "step": 14508 }, { "epoch": 0.6796739588700988, "grad_norm": 0.5730039168493215, "learning_rate": 3.8434334017449314e-06, "loss": 0.2814, "step": 14509 }, { "epoch": 0.6797208038600272, "grad_norm": 0.5730445161913422, "learning_rate": 3.84327345747754e-06, "loss": 0.275, "step": 14510 }, { "epoch": 0.6797676488499556, "grad_norm": 0.5772194223907933, "learning_rate": 3.843113505479948e-06, "loss": 0.2736, "step": 14511 }, { "epoch": 0.6798144938398838, "grad_norm": 0.6082675996306007, "learning_rate": 3.842953545753076e-06, "loss": 0.2868, "step": 14512 }, { "epoch": 0.6798613388298121, "grad_norm": 0.6418250000634249, "learning_rate": 3.842793578297842e-06, "loss": 0.2937, "step": 14513 }, { "epoch": 0.6799081838197405, "grad_norm": 0.5636783134636544, "learning_rate": 3.842633603115169e-06, "loss": 0.2786, "step": 14514 }, { "epoch": 0.6799550288096688, "grad_norm": 0.6070644532571625, "learning_rate": 3.842473620205978e-06, "loss": 0.2882, "step": 14515 }, { "epoch": 0.6800018737995971, "grad_norm": 0.5614284686110182, "learning_rate": 3.842313629571188e-06, "loss": 0.2787, "step": 14516 }, { "epoch": 0.6800487187895254, "grad_norm": 0.6250235686103871, "learning_rate": 3.84215363121172e-06, "loss": 0.3084, "step": 14517 }, { "epoch": 0.6800955637794538, "grad_norm": 0.6402789238629492, "learning_rate": 3.8419936251284965e-06, "loss": 0.2911, "step": 14518 }, { "epoch": 0.6801424087693821, "grad_norm": 0.6003292992381796, "learning_rate": 3.841833611322436e-06, "loss": 0.2875, "step": 14519 }, { "epoch": 0.6801892537593105, "grad_norm": 0.53400388019951, "learning_rate": 3.84167358979446e-06, "loss": 0.2761, "step": 14520 }, { "epoch": 0.6802360987492387, "grad_norm": 0.5993044428033723, "learning_rate": 3.8415135605454905e-06, "loss": 0.3023, "step": 14521 }, { "epoch": 0.6802829437391671, "grad_norm": 0.5723028677394311, "learning_rate": 3.841353523576448e-06, "loss": 0.276, "step": 14522 }, { "epoch": 0.6803297887290954, "grad_norm": 0.5911888016890824, "learning_rate": 3.841193478888252e-06, "loss": 0.286, "step": 14523 }, { "epoch": 0.6803766337190238, "grad_norm": 0.5777848047090793, "learning_rate": 3.8410334264818254e-06, "loss": 0.2777, "step": 14524 }, { "epoch": 0.680423478708952, "grad_norm": 0.615154368644923, "learning_rate": 3.840873366358089e-06, "loss": 0.2678, "step": 14525 }, { "epoch": 0.6804703236988804, "grad_norm": 0.5687962711188413, "learning_rate": 3.840713298517962e-06, "loss": 0.2846, "step": 14526 }, { "epoch": 0.6805171686888087, "grad_norm": 0.648775625595139, "learning_rate": 3.8405532229623676e-06, "loss": 0.2785, "step": 14527 }, { "epoch": 0.6805640136787371, "grad_norm": 0.5896047483923816, "learning_rate": 3.840393139692225e-06, "loss": 0.2793, "step": 14528 }, { "epoch": 0.6806108586686654, "grad_norm": 0.6199635134884384, "learning_rate": 3.84023304870846e-06, "loss": 0.2644, "step": 14529 }, { "epoch": 0.6806577036585937, "grad_norm": 0.62986708869857, "learning_rate": 3.840072950011988e-06, "loss": 0.3001, "step": 14530 }, { "epoch": 0.680704548648522, "grad_norm": 0.5705957641767971, "learning_rate": 3.839912843603733e-06, "loss": 0.2643, "step": 14531 }, { "epoch": 0.6807513936384504, "grad_norm": 0.6456908822441481, "learning_rate": 3.839752729484618e-06, "loss": 0.2829, "step": 14532 }, { "epoch": 0.6807982386283787, "grad_norm": 0.6306221743416583, "learning_rate": 3.8395926076555624e-06, "loss": 0.2775, "step": 14533 }, { "epoch": 0.680845083618307, "grad_norm": 0.5831438840417194, "learning_rate": 3.839432478117487e-06, "loss": 0.2876, "step": 14534 }, { "epoch": 0.6808919286082353, "grad_norm": 0.6068993091740116, "learning_rate": 3.839272340871315e-06, "loss": 0.2992, "step": 14535 }, { "epoch": 0.6809387735981637, "grad_norm": 0.6780720635742509, "learning_rate": 3.839112195917967e-06, "loss": 0.2979, "step": 14536 }, { "epoch": 0.680985618588092, "grad_norm": 0.606182243552605, "learning_rate": 3.838952043258365e-06, "loss": 0.2659, "step": 14537 }, { "epoch": 0.6810324635780204, "grad_norm": 0.5916952531298044, "learning_rate": 3.8387918828934304e-06, "loss": 0.2872, "step": 14538 }, { "epoch": 0.6810793085679486, "grad_norm": 0.5742560535150477, "learning_rate": 3.838631714824086e-06, "loss": 0.2803, "step": 14539 }, { "epoch": 0.681126153557877, "grad_norm": 0.6369163826251083, "learning_rate": 3.838471539051251e-06, "loss": 0.316, "step": 14540 }, { "epoch": 0.6811729985478053, "grad_norm": 0.5858823016698518, "learning_rate": 3.838311355575849e-06, "loss": 0.2818, "step": 14541 }, { "epoch": 0.6812198435377337, "grad_norm": 0.6386656675133757, "learning_rate": 3.838151164398802e-06, "loss": 0.2897, "step": 14542 }, { "epoch": 0.6812666885276619, "grad_norm": 0.5622746450069819, "learning_rate": 3.837990965521032e-06, "loss": 0.2663, "step": 14543 }, { "epoch": 0.6813135335175903, "grad_norm": 0.5724902962329792, "learning_rate": 3.837830758943459e-06, "loss": 0.2776, "step": 14544 }, { "epoch": 0.6813603785075186, "grad_norm": 0.572636686836756, "learning_rate": 3.837670544667007e-06, "loss": 0.2673, "step": 14545 }, { "epoch": 0.681407223497447, "grad_norm": 0.6018460418754786, "learning_rate": 3.837510322692598e-06, "loss": 0.2814, "step": 14546 }, { "epoch": 0.6814540684873753, "grad_norm": 0.5617856107149133, "learning_rate": 3.837350093021151e-06, "loss": 0.2767, "step": 14547 }, { "epoch": 0.6815009134773036, "grad_norm": 0.5705433775519071, "learning_rate": 3.837189855653592e-06, "loss": 0.2976, "step": 14548 }, { "epoch": 0.6815477584672319, "grad_norm": 0.5952184132036594, "learning_rate": 3.837029610590841e-06, "loss": 0.2893, "step": 14549 }, { "epoch": 0.6815946034571603, "grad_norm": 0.6029061724847248, "learning_rate": 3.836869357833821e-06, "loss": 0.2775, "step": 14550 }, { "epoch": 0.6816414484470886, "grad_norm": 0.5808007488036154, "learning_rate": 3.836709097383454e-06, "loss": 0.2809, "step": 14551 }, { "epoch": 0.6816882934370169, "grad_norm": 0.6224006927355907, "learning_rate": 3.836548829240662e-06, "loss": 0.3051, "step": 14552 }, { "epoch": 0.6817351384269452, "grad_norm": 0.6330035946885912, "learning_rate": 3.836388553406367e-06, "loss": 0.3026, "step": 14553 }, { "epoch": 0.6817819834168736, "grad_norm": 0.5568549103258166, "learning_rate": 3.836228269881491e-06, "loss": 0.2825, "step": 14554 }, { "epoch": 0.6818288284068019, "grad_norm": 0.637130558805172, "learning_rate": 3.836067978666959e-06, "loss": 0.3008, "step": 14555 }, { "epoch": 0.6818756733967302, "grad_norm": 0.5975234055836375, "learning_rate": 3.83590767976369e-06, "loss": 0.2843, "step": 14556 }, { "epoch": 0.6819225183866585, "grad_norm": 0.614777056287611, "learning_rate": 3.835747373172609e-06, "loss": 0.2934, "step": 14557 }, { "epoch": 0.6819693633765869, "grad_norm": 0.5949747438104663, "learning_rate": 3.8355870588946375e-06, "loss": 0.2643, "step": 14558 }, { "epoch": 0.6820162083665152, "grad_norm": 0.583517490201397, "learning_rate": 3.835426736930697e-06, "loss": 0.2859, "step": 14559 }, { "epoch": 0.6820630533564436, "grad_norm": 0.629265673705006, "learning_rate": 3.835266407281714e-06, "loss": 0.277, "step": 14560 }, { "epoch": 0.6821098983463718, "grad_norm": 0.6026638036228686, "learning_rate": 3.835106069948606e-06, "loss": 0.2669, "step": 14561 }, { "epoch": 0.6821567433363002, "grad_norm": 0.5290013947709872, "learning_rate": 3.8349457249322984e-06, "loss": 0.2468, "step": 14562 }, { "epoch": 0.6822035883262285, "grad_norm": 0.5979684841671671, "learning_rate": 3.834785372233715e-06, "loss": 0.2875, "step": 14563 }, { "epoch": 0.6822504333161569, "grad_norm": 0.5776247471228525, "learning_rate": 3.834625011853776e-06, "loss": 0.2754, "step": 14564 }, { "epoch": 0.6822972783060851, "grad_norm": 0.5928218227749518, "learning_rate": 3.834464643793406e-06, "loss": 0.2833, "step": 14565 }, { "epoch": 0.6823441232960135, "grad_norm": 0.5836102551767071, "learning_rate": 3.834304268053527e-06, "loss": 0.2725, "step": 14566 }, { "epoch": 0.6823909682859418, "grad_norm": 0.6128318316563979, "learning_rate": 3.834143884635063e-06, "loss": 0.2872, "step": 14567 }, { "epoch": 0.6824378132758702, "grad_norm": 0.5880581028437357, "learning_rate": 3.833983493538935e-06, "loss": 0.3015, "step": 14568 }, { "epoch": 0.6824846582657985, "grad_norm": 0.5694567839936732, "learning_rate": 3.833823094766068e-06, "loss": 0.2728, "step": 14569 }, { "epoch": 0.6825315032557268, "grad_norm": 0.5422024989049324, "learning_rate": 3.833662688317385e-06, "loss": 0.2599, "step": 14570 }, { "epoch": 0.6825783482456551, "grad_norm": 0.5726498387420191, "learning_rate": 3.833502274193808e-06, "loss": 0.2691, "step": 14571 }, { "epoch": 0.6826251932355835, "grad_norm": 0.5858986045669637, "learning_rate": 3.833341852396261e-06, "loss": 0.2746, "step": 14572 }, { "epoch": 0.6826720382255118, "grad_norm": 0.5686000894969261, "learning_rate": 3.833181422925666e-06, "loss": 0.2841, "step": 14573 }, { "epoch": 0.68271888321544, "grad_norm": 0.5884771892698174, "learning_rate": 3.833020985782947e-06, "loss": 0.2746, "step": 14574 }, { "epoch": 0.6827657282053684, "grad_norm": 0.5047096415580842, "learning_rate": 3.8328605409690275e-06, "loss": 0.2504, "step": 14575 }, { "epoch": 0.6828125731952968, "grad_norm": 0.6728205035944983, "learning_rate": 3.832700088484831e-06, "loss": 0.3039, "step": 14576 }, { "epoch": 0.6828594181852251, "grad_norm": 0.5616321747953925, "learning_rate": 3.832539628331279e-06, "loss": 0.286, "step": 14577 }, { "epoch": 0.6829062631751535, "grad_norm": 0.6026977028653411, "learning_rate": 3.832379160509298e-06, "loss": 0.2953, "step": 14578 }, { "epoch": 0.6829531081650817, "grad_norm": 0.5915755401298161, "learning_rate": 3.83221868501981e-06, "loss": 0.2772, "step": 14579 }, { "epoch": 0.68299995315501, "grad_norm": 0.5776096345866439, "learning_rate": 3.832058201863737e-06, "loss": 0.2745, "step": 14580 }, { "epoch": 0.6830467981449384, "grad_norm": 0.5552185442718925, "learning_rate": 3.831897711042004e-06, "loss": 0.2765, "step": 14581 }, { "epoch": 0.6830936431348668, "grad_norm": 0.5926420922577275, "learning_rate": 3.831737212555535e-06, "loss": 0.2573, "step": 14582 }, { "epoch": 0.683140488124795, "grad_norm": 0.6054695838220913, "learning_rate": 3.831576706405253e-06, "loss": 0.2864, "step": 14583 }, { "epoch": 0.6831873331147233, "grad_norm": 0.6014768303581828, "learning_rate": 3.831416192592081e-06, "loss": 0.2748, "step": 14584 }, { "epoch": 0.6832341781046517, "grad_norm": 0.5828882121940716, "learning_rate": 3.831255671116945e-06, "loss": 0.2966, "step": 14585 }, { "epoch": 0.68328102309458, "grad_norm": 0.6072251716157933, "learning_rate": 3.8310951419807656e-06, "loss": 0.2753, "step": 14586 }, { "epoch": 0.6833278680845084, "grad_norm": 0.5989736414443602, "learning_rate": 3.830934605184469e-06, "loss": 0.279, "step": 14587 }, { "epoch": 0.6833747130744366, "grad_norm": 0.621303813411414, "learning_rate": 3.830774060728978e-06, "loss": 0.2873, "step": 14588 }, { "epoch": 0.683421558064365, "grad_norm": 0.5903322788502225, "learning_rate": 3.830613508615217e-06, "loss": 0.3003, "step": 14589 }, { "epoch": 0.6834684030542933, "grad_norm": 0.6026528892172478, "learning_rate": 3.830452948844109e-06, "loss": 0.2927, "step": 14590 }, { "epoch": 0.6835152480442217, "grad_norm": 0.5833307225803852, "learning_rate": 3.830292381416579e-06, "loss": 0.279, "step": 14591 }, { "epoch": 0.6835620930341499, "grad_norm": 0.6380900784171882, "learning_rate": 3.830131806333552e-06, "loss": 0.2986, "step": 14592 }, { "epoch": 0.6836089380240783, "grad_norm": 0.5680357294322169, "learning_rate": 3.829971223595949e-06, "loss": 0.2874, "step": 14593 }, { "epoch": 0.6836557830140066, "grad_norm": 0.5870477503201971, "learning_rate": 3.829810633204697e-06, "loss": 0.2656, "step": 14594 }, { "epoch": 0.683702628003935, "grad_norm": 0.5747381900988324, "learning_rate": 3.8296500351607195e-06, "loss": 0.26, "step": 14595 }, { "epoch": 0.6837494729938633, "grad_norm": 0.5656780726821262, "learning_rate": 3.829489429464939e-06, "loss": 0.2682, "step": 14596 }, { "epoch": 0.6837963179837916, "grad_norm": 0.5291822512028657, "learning_rate": 3.829328816118281e-06, "loss": 0.2718, "step": 14597 }, { "epoch": 0.6838431629737199, "grad_norm": 0.59318575391777, "learning_rate": 3.82916819512167e-06, "loss": 0.2775, "step": 14598 }, { "epoch": 0.6838900079636483, "grad_norm": 0.5333114894967792, "learning_rate": 3.829007566476031e-06, "loss": 0.2639, "step": 14599 }, { "epoch": 0.6839368529535766, "grad_norm": 0.5513851412951457, "learning_rate": 3.828846930182286e-06, "loss": 0.2763, "step": 14600 }, { "epoch": 0.6839836979435049, "grad_norm": 0.6039703749738897, "learning_rate": 3.828686286241362e-06, "loss": 0.2708, "step": 14601 }, { "epoch": 0.6840305429334332, "grad_norm": 0.57331627580514, "learning_rate": 3.8285256346541825e-06, "loss": 0.2657, "step": 14602 }, { "epoch": 0.6840773879233616, "grad_norm": 0.6757365337561324, "learning_rate": 3.828364975421671e-06, "loss": 0.3114, "step": 14603 }, { "epoch": 0.6841242329132899, "grad_norm": 0.6119036654631727, "learning_rate": 3.828204308544753e-06, "loss": 0.2908, "step": 14604 }, { "epoch": 0.6841710779032183, "grad_norm": 0.601896454815759, "learning_rate": 3.828043634024353e-06, "loss": 0.2872, "step": 14605 }, { "epoch": 0.6842179228931465, "grad_norm": 0.5638529970277397, "learning_rate": 3.827882951861397e-06, "loss": 0.2829, "step": 14606 }, { "epoch": 0.6842647678830749, "grad_norm": 0.5481609268061005, "learning_rate": 3.827722262056807e-06, "loss": 0.2703, "step": 14607 }, { "epoch": 0.6843116128730032, "grad_norm": 0.7082391003571159, "learning_rate": 3.827561564611509e-06, "loss": 0.2874, "step": 14608 }, { "epoch": 0.6843584578629316, "grad_norm": 0.629230360956409, "learning_rate": 3.827400859526429e-06, "loss": 0.2884, "step": 14609 }, { "epoch": 0.6844053028528598, "grad_norm": 0.5781617558372537, "learning_rate": 3.82724014680249e-06, "loss": 0.2827, "step": 14610 }, { "epoch": 0.6844521478427882, "grad_norm": 0.6392720236436393, "learning_rate": 3.827079426440617e-06, "loss": 0.2996, "step": 14611 }, { "epoch": 0.6844989928327165, "grad_norm": 0.583620321274968, "learning_rate": 3.8269186984417374e-06, "loss": 0.2892, "step": 14612 }, { "epoch": 0.6845458378226449, "grad_norm": 0.5953137275574888, "learning_rate": 3.826757962806773e-06, "loss": 0.2758, "step": 14613 }, { "epoch": 0.6845926828125732, "grad_norm": 0.6593687513497364, "learning_rate": 3.826597219536649e-06, "loss": 0.305, "step": 14614 }, { "epoch": 0.6846395278025015, "grad_norm": 0.5954554950737129, "learning_rate": 3.826436468632293e-06, "loss": 0.2739, "step": 14615 }, { "epoch": 0.6846863727924298, "grad_norm": 0.5653459806751964, "learning_rate": 3.826275710094628e-06, "loss": 0.2674, "step": 14616 }, { "epoch": 0.6847332177823582, "grad_norm": 0.6192840780240773, "learning_rate": 3.826114943924579e-06, "loss": 0.2568, "step": 14617 }, { "epoch": 0.6847800627722865, "grad_norm": 0.5398756314435368, "learning_rate": 3.825954170123072e-06, "loss": 0.2799, "step": 14618 }, { "epoch": 0.6848269077622148, "grad_norm": 0.6263796554341281, "learning_rate": 3.825793388691032e-06, "loss": 0.2944, "step": 14619 }, { "epoch": 0.6848737527521431, "grad_norm": 0.5457328766234528, "learning_rate": 3.825632599629385e-06, "loss": 0.276, "step": 14620 }, { "epoch": 0.6849205977420715, "grad_norm": 0.5998398233923568, "learning_rate": 3.825471802939055e-06, "loss": 0.2836, "step": 14621 }, { "epoch": 0.6849674427319998, "grad_norm": 0.5408575084309638, "learning_rate": 3.825310998620968e-06, "loss": 0.28, "step": 14622 }, { "epoch": 0.6850142877219282, "grad_norm": 0.6014660792580594, "learning_rate": 3.82515018667605e-06, "loss": 0.3058, "step": 14623 }, { "epoch": 0.6850611327118564, "grad_norm": 0.6119644410060339, "learning_rate": 3.824989367105224e-06, "loss": 0.2814, "step": 14624 }, { "epoch": 0.6851079777017848, "grad_norm": 0.6158946494033298, "learning_rate": 3.824828539909419e-06, "loss": 0.3009, "step": 14625 }, { "epoch": 0.6851548226917131, "grad_norm": 0.5644245973398376, "learning_rate": 3.824667705089558e-06, "loss": 0.2744, "step": 14626 }, { "epoch": 0.6852016676816415, "grad_norm": 0.5719868929260833, "learning_rate": 3.824506862646567e-06, "loss": 0.2854, "step": 14627 }, { "epoch": 0.6852485126715697, "grad_norm": 0.5953076334844946, "learning_rate": 3.8243460125813725e-06, "loss": 0.2779, "step": 14628 }, { "epoch": 0.6852953576614981, "grad_norm": 0.5849790363294876, "learning_rate": 3.8241851548948995e-06, "loss": 0.2987, "step": 14629 }, { "epoch": 0.6853422026514264, "grad_norm": 0.5989711416720942, "learning_rate": 3.8240242895880734e-06, "loss": 0.286, "step": 14630 }, { "epoch": 0.6853890476413548, "grad_norm": 0.5650221640870442, "learning_rate": 3.82386341666182e-06, "loss": 0.2743, "step": 14631 }, { "epoch": 0.6854358926312831, "grad_norm": 0.5723182815200023, "learning_rate": 3.823702536117066e-06, "loss": 0.2783, "step": 14632 }, { "epoch": 0.6854827376212114, "grad_norm": 0.5520034955411595, "learning_rate": 3.8235416479547365e-06, "loss": 0.2785, "step": 14633 }, { "epoch": 0.6855295826111397, "grad_norm": 0.6169580290002409, "learning_rate": 3.823380752175758e-06, "loss": 0.2905, "step": 14634 }, { "epoch": 0.6855764276010681, "grad_norm": 0.5549380159911796, "learning_rate": 3.823219848781054e-06, "loss": 0.2616, "step": 14635 }, { "epoch": 0.6856232725909964, "grad_norm": 0.6118482278605231, "learning_rate": 3.823058937771554e-06, "loss": 0.294, "step": 14636 }, { "epoch": 0.6856701175809247, "grad_norm": 0.6509395676315438, "learning_rate": 3.8228980191481825e-06, "loss": 0.2933, "step": 14637 }, { "epoch": 0.685716962570853, "grad_norm": 0.6316330728692795, "learning_rate": 3.822737092911865e-06, "loss": 0.2905, "step": 14638 }, { "epoch": 0.6857638075607814, "grad_norm": 0.6199246309255869, "learning_rate": 3.8225761590635275e-06, "loss": 0.2879, "step": 14639 }, { "epoch": 0.6858106525507097, "grad_norm": 0.6517785115185287, "learning_rate": 3.822415217604098e-06, "loss": 0.289, "step": 14640 }, { "epoch": 0.6858574975406381, "grad_norm": 0.6243598999060285, "learning_rate": 3.8222542685345e-06, "loss": 0.2958, "step": 14641 }, { "epoch": 0.6859043425305663, "grad_norm": 0.6129066678131758, "learning_rate": 3.822093311855661e-06, "loss": 0.2719, "step": 14642 }, { "epoch": 0.6859511875204947, "grad_norm": 0.6156115788079173, "learning_rate": 3.821932347568508e-06, "loss": 0.2893, "step": 14643 }, { "epoch": 0.685998032510423, "grad_norm": 0.5965340042528132, "learning_rate": 3.821771375673966e-06, "loss": 0.269, "step": 14644 }, { "epoch": 0.6860448775003514, "grad_norm": 0.6434558887732443, "learning_rate": 3.8216103961729616e-06, "loss": 0.2748, "step": 14645 }, { "epoch": 0.6860917224902796, "grad_norm": 0.62169051203493, "learning_rate": 3.821449409066422e-06, "loss": 0.2692, "step": 14646 }, { "epoch": 0.686138567480208, "grad_norm": 0.5838017498140244, "learning_rate": 3.821288414355273e-06, "loss": 0.2718, "step": 14647 }, { "epoch": 0.6861854124701363, "grad_norm": 0.6251900066506468, "learning_rate": 3.821127412040442e-06, "loss": 0.3049, "step": 14648 }, { "epoch": 0.6862322574600647, "grad_norm": 0.6419313722163428, "learning_rate": 3.820966402122855e-06, "loss": 0.2809, "step": 14649 }, { "epoch": 0.686279102449993, "grad_norm": 0.599063634654441, "learning_rate": 3.820805384603437e-06, "loss": 0.2654, "step": 14650 }, { "epoch": 0.6863259474399213, "grad_norm": 0.6222188249457977, "learning_rate": 3.820644359483117e-06, "loss": 0.2953, "step": 14651 }, { "epoch": 0.6863727924298496, "grad_norm": 0.6768881720016404, "learning_rate": 3.820483326762821e-06, "loss": 0.3012, "step": 14652 }, { "epoch": 0.686419637419778, "grad_norm": 0.6228057644257303, "learning_rate": 3.8203222864434745e-06, "loss": 0.3068, "step": 14653 }, { "epoch": 0.6864664824097063, "grad_norm": 0.6294170467875332, "learning_rate": 3.8201612385260046e-06, "loss": 0.2994, "step": 14654 }, { "epoch": 0.6865133273996346, "grad_norm": 0.5420604542399233, "learning_rate": 3.8200001830113394e-06, "loss": 0.2691, "step": 14655 }, { "epoch": 0.6865601723895629, "grad_norm": 0.5975934342948437, "learning_rate": 3.819839119900405e-06, "loss": 0.2835, "step": 14656 }, { "epoch": 0.6866070173794913, "grad_norm": 0.7164513766536368, "learning_rate": 3.819678049194128e-06, "loss": 0.2877, "step": 14657 }, { "epoch": 0.6866538623694196, "grad_norm": 0.6368332339626123, "learning_rate": 3.819516970893435e-06, "loss": 0.2803, "step": 14658 }, { "epoch": 0.686700707359348, "grad_norm": 0.6467590673140937, "learning_rate": 3.819355884999255e-06, "loss": 0.2904, "step": 14659 }, { "epoch": 0.6867475523492762, "grad_norm": 0.593812281650282, "learning_rate": 3.819194791512512e-06, "loss": 0.2878, "step": 14660 }, { "epoch": 0.6867943973392046, "grad_norm": 0.6342473170130937, "learning_rate": 3.819033690434135e-06, "loss": 0.2796, "step": 14661 }, { "epoch": 0.6868412423291329, "grad_norm": 0.5852081384813177, "learning_rate": 3.818872581765051e-06, "loss": 0.2557, "step": 14662 }, { "epoch": 0.6868880873190613, "grad_norm": 0.6599275581389228, "learning_rate": 3.818711465506186e-06, "loss": 0.3191, "step": 14663 }, { "epoch": 0.6869349323089895, "grad_norm": 0.554410432572325, "learning_rate": 3.8185503416584685e-06, "loss": 0.27, "step": 14664 }, { "epoch": 0.6869817772989179, "grad_norm": 0.5915553155230474, "learning_rate": 3.818389210222825e-06, "loss": 0.2834, "step": 14665 }, { "epoch": 0.6870286222888462, "grad_norm": 0.5780739346899837, "learning_rate": 3.818228071200183e-06, "loss": 0.274, "step": 14666 }, { "epoch": 0.6870754672787746, "grad_norm": 0.593351596145944, "learning_rate": 3.81806692459147e-06, "loss": 0.293, "step": 14667 }, { "epoch": 0.6871223122687029, "grad_norm": 0.6178200045814768, "learning_rate": 3.817905770397612e-06, "loss": 0.2949, "step": 14668 }, { "epoch": 0.6871691572586311, "grad_norm": 0.6022459671941527, "learning_rate": 3.817744608619539e-06, "loss": 0.2897, "step": 14669 }, { "epoch": 0.6872160022485595, "grad_norm": 0.5719447192545584, "learning_rate": 3.817583439258177e-06, "loss": 0.2674, "step": 14670 }, { "epoch": 0.6872628472384879, "grad_norm": 0.5447842108795481, "learning_rate": 3.817422262314452e-06, "loss": 0.2716, "step": 14671 }, { "epoch": 0.6873096922284162, "grad_norm": 0.6143085156332411, "learning_rate": 3.817261077789295e-06, "loss": 0.2913, "step": 14672 }, { "epoch": 0.6873565372183444, "grad_norm": 0.5714600173142229, "learning_rate": 3.817099885683631e-06, "loss": 0.2727, "step": 14673 }, { "epoch": 0.6874033822082728, "grad_norm": 0.5933878525263064, "learning_rate": 3.816938685998387e-06, "loss": 0.2866, "step": 14674 }, { "epoch": 0.6874502271982011, "grad_norm": 0.6172395071436367, "learning_rate": 3.816777478734492e-06, "loss": 0.2921, "step": 14675 }, { "epoch": 0.6874970721881295, "grad_norm": 0.5962304886142856, "learning_rate": 3.816616263892875e-06, "loss": 0.278, "step": 14676 }, { "epoch": 0.6875439171780579, "grad_norm": 0.6642549184894224, "learning_rate": 3.816455041474461e-06, "loss": 0.2841, "step": 14677 }, { "epoch": 0.6875907621679861, "grad_norm": 0.5480757994384365, "learning_rate": 3.8162938114801795e-06, "loss": 0.2807, "step": 14678 }, { "epoch": 0.6876376071579144, "grad_norm": 0.5553164989111719, "learning_rate": 3.816132573910958e-06, "loss": 0.2667, "step": 14679 }, { "epoch": 0.6876844521478428, "grad_norm": 0.5526681216392653, "learning_rate": 3.815971328767725e-06, "loss": 0.2729, "step": 14680 }, { "epoch": 0.6877312971377711, "grad_norm": 0.5705293563002232, "learning_rate": 3.815810076051406e-06, "loss": 0.2706, "step": 14681 }, { "epoch": 0.6877781421276994, "grad_norm": 0.5765283306377414, "learning_rate": 3.815648815762932e-06, "loss": 0.2643, "step": 14682 }, { "epoch": 0.6878249871176277, "grad_norm": 0.666543574234102, "learning_rate": 3.81548754790323e-06, "loss": 0.2909, "step": 14683 }, { "epoch": 0.6878718321075561, "grad_norm": 0.6073202862907845, "learning_rate": 3.815326272473227e-06, "loss": 0.2689, "step": 14684 }, { "epoch": 0.6879186770974844, "grad_norm": 0.588230567670807, "learning_rate": 3.8151649894738525e-06, "loss": 0.2877, "step": 14685 }, { "epoch": 0.6879655220874128, "grad_norm": 0.5696962290428983, "learning_rate": 3.815003698906034e-06, "loss": 0.2864, "step": 14686 }, { "epoch": 0.688012367077341, "grad_norm": 0.6044565656364707, "learning_rate": 3.8148424007707e-06, "loss": 0.2776, "step": 14687 }, { "epoch": 0.6880592120672694, "grad_norm": 0.6471494906885559, "learning_rate": 3.814681095068777e-06, "loss": 0.3083, "step": 14688 }, { "epoch": 0.6881060570571977, "grad_norm": 0.5103862363652848, "learning_rate": 3.814519781801196e-06, "loss": 0.2559, "step": 14689 }, { "epoch": 0.6881529020471261, "grad_norm": 0.562274446880649, "learning_rate": 3.8143584609688837e-06, "loss": 0.2786, "step": 14690 }, { "epoch": 0.6881997470370543, "grad_norm": 0.5616520267108126, "learning_rate": 3.8141971325727688e-06, "loss": 0.2775, "step": 14691 }, { "epoch": 0.6882465920269827, "grad_norm": 0.5440464266877508, "learning_rate": 3.8140357966137796e-06, "loss": 0.272, "step": 14692 }, { "epoch": 0.688293437016911, "grad_norm": 0.6406806435623106, "learning_rate": 3.813874453092845e-06, "loss": 0.3029, "step": 14693 }, { "epoch": 0.6883402820068394, "grad_norm": 0.6282309695691701, "learning_rate": 3.813713102010893e-06, "loss": 0.3134, "step": 14694 }, { "epoch": 0.6883871269967677, "grad_norm": 0.5975442025770912, "learning_rate": 3.813551743368853e-06, "loss": 0.2778, "step": 14695 }, { "epoch": 0.688433971986696, "grad_norm": 0.6350325561279758, "learning_rate": 3.813390377167652e-06, "loss": 0.2803, "step": 14696 }, { "epoch": 0.6884808169766243, "grad_norm": 0.5758528925795752, "learning_rate": 3.81322900340822e-06, "loss": 0.2732, "step": 14697 }, { "epoch": 0.6885276619665527, "grad_norm": 0.5856793465206195, "learning_rate": 3.8130676220914843e-06, "loss": 0.2496, "step": 14698 }, { "epoch": 0.688574506956481, "grad_norm": 0.5784772048269443, "learning_rate": 3.812906233218375e-06, "loss": 0.2788, "step": 14699 }, { "epoch": 0.6886213519464093, "grad_norm": 0.5440788355082733, "learning_rate": 3.81274483678982e-06, "loss": 0.2704, "step": 14700 }, { "epoch": 0.6886681969363376, "grad_norm": 0.610963407277692, "learning_rate": 3.812583432806749e-06, "loss": 0.2807, "step": 14701 }, { "epoch": 0.688715041926266, "grad_norm": 0.5680697648762345, "learning_rate": 3.8124220212700903e-06, "loss": 0.2433, "step": 14702 }, { "epoch": 0.6887618869161943, "grad_norm": 0.6024900879919279, "learning_rate": 3.8122606021807716e-06, "loss": 0.2934, "step": 14703 }, { "epoch": 0.6888087319061227, "grad_norm": 0.5701894861210526, "learning_rate": 3.8120991755397243e-06, "loss": 0.2691, "step": 14704 }, { "epoch": 0.6888555768960509, "grad_norm": 0.5740831891376227, "learning_rate": 3.8119377413478755e-06, "loss": 0.2921, "step": 14705 }, { "epoch": 0.6889024218859793, "grad_norm": 0.6697308276139605, "learning_rate": 3.8117762996061543e-06, "loss": 0.2898, "step": 14706 }, { "epoch": 0.6889492668759076, "grad_norm": 0.650212655940473, "learning_rate": 3.8116148503154905e-06, "loss": 0.2805, "step": 14707 }, { "epoch": 0.688996111865836, "grad_norm": 0.5868975302038891, "learning_rate": 3.811453393476813e-06, "loss": 0.2788, "step": 14708 }, { "epoch": 0.6890429568557642, "grad_norm": 0.5822409771904692, "learning_rate": 3.811291929091051e-06, "loss": 0.281, "step": 14709 }, { "epoch": 0.6890898018456926, "grad_norm": 0.5762493273953923, "learning_rate": 3.8111304571591334e-06, "loss": 0.2767, "step": 14710 }, { "epoch": 0.6891366468356209, "grad_norm": 0.5605503688835002, "learning_rate": 3.81096897768199e-06, "loss": 0.2593, "step": 14711 }, { "epoch": 0.6891834918255493, "grad_norm": 0.5793782149147945, "learning_rate": 3.8108074906605488e-06, "loss": 0.2689, "step": 14712 }, { "epoch": 0.6892303368154776, "grad_norm": 0.590111803484475, "learning_rate": 3.8106459960957405e-06, "loss": 0.301, "step": 14713 }, { "epoch": 0.6892771818054059, "grad_norm": 0.5369329197537727, "learning_rate": 3.810484493988494e-06, "loss": 0.269, "step": 14714 }, { "epoch": 0.6893240267953342, "grad_norm": 0.5988531639702097, "learning_rate": 3.8103229843397383e-06, "loss": 0.2865, "step": 14715 }, { "epoch": 0.6893708717852626, "grad_norm": 0.6031197383379582, "learning_rate": 3.8101614671504026e-06, "loss": 0.3077, "step": 14716 }, { "epoch": 0.6894177167751909, "grad_norm": 0.5731797912119354, "learning_rate": 3.8099999424214175e-06, "loss": 0.2801, "step": 14717 }, { "epoch": 0.6894645617651192, "grad_norm": 0.6034638658537619, "learning_rate": 3.809838410153712e-06, "loss": 0.2798, "step": 14718 }, { "epoch": 0.6895114067550475, "grad_norm": 0.5634282797029719, "learning_rate": 3.8096768703482164e-06, "loss": 0.2804, "step": 14719 }, { "epoch": 0.6895582517449759, "grad_norm": 0.643153998978431, "learning_rate": 3.8095153230058584e-06, "loss": 0.3042, "step": 14720 }, { "epoch": 0.6896050967349042, "grad_norm": 0.5939375994493185, "learning_rate": 3.8093537681275694e-06, "loss": 0.29, "step": 14721 }, { "epoch": 0.6896519417248326, "grad_norm": 0.6166888366454685, "learning_rate": 3.809192205714279e-06, "loss": 0.2878, "step": 14722 }, { "epoch": 0.6896987867147608, "grad_norm": 0.586130978454405, "learning_rate": 3.809030635766916e-06, "loss": 0.2735, "step": 14723 }, { "epoch": 0.6897456317046892, "grad_norm": 0.6157839739680433, "learning_rate": 3.80886905828641e-06, "loss": 0.2989, "step": 14724 }, { "epoch": 0.6897924766946175, "grad_norm": 0.63302644307388, "learning_rate": 3.808707473273693e-06, "loss": 0.2909, "step": 14725 }, { "epoch": 0.6898393216845459, "grad_norm": 0.5793476938141695, "learning_rate": 3.8085458807296927e-06, "loss": 0.2691, "step": 14726 }, { "epoch": 0.6898861666744741, "grad_norm": 0.5923605098185295, "learning_rate": 3.8083842806553396e-06, "loss": 0.266, "step": 14727 }, { "epoch": 0.6899330116644025, "grad_norm": 0.5785344843574197, "learning_rate": 3.8082226730515638e-06, "loss": 0.2735, "step": 14728 }, { "epoch": 0.6899798566543308, "grad_norm": 0.5960916810835506, "learning_rate": 3.8080610579192955e-06, "loss": 0.2833, "step": 14729 }, { "epoch": 0.6900267016442592, "grad_norm": 0.6274166586472185, "learning_rate": 3.8078994352594646e-06, "loss": 0.2966, "step": 14730 }, { "epoch": 0.6900735466341875, "grad_norm": 0.5644902458058086, "learning_rate": 3.807737805073001e-06, "loss": 0.2623, "step": 14731 }, { "epoch": 0.6901203916241158, "grad_norm": 0.6076088407877627, "learning_rate": 3.8075761673608354e-06, "loss": 0.2812, "step": 14732 }, { "epoch": 0.6901672366140441, "grad_norm": 0.5457755335610862, "learning_rate": 3.807414522123897e-06, "loss": 0.2635, "step": 14733 }, { "epoch": 0.6902140816039725, "grad_norm": 0.6042634908280597, "learning_rate": 3.807252869363117e-06, "loss": 0.2739, "step": 14734 }, { "epoch": 0.6902609265939008, "grad_norm": 0.6045202963446574, "learning_rate": 3.8070912090794256e-06, "loss": 0.2819, "step": 14735 }, { "epoch": 0.690307771583829, "grad_norm": 0.5936563821501412, "learning_rate": 3.8069295412737524e-06, "loss": 0.2938, "step": 14736 }, { "epoch": 0.6903546165737574, "grad_norm": 0.5349615742275329, "learning_rate": 3.8067678659470285e-06, "loss": 0.2644, "step": 14737 }, { "epoch": 0.6904014615636858, "grad_norm": 0.5423599765727861, "learning_rate": 3.8066061831001834e-06, "loss": 0.259, "step": 14738 }, { "epoch": 0.6904483065536141, "grad_norm": 0.5995287223331077, "learning_rate": 3.806444492734148e-06, "loss": 0.2679, "step": 14739 }, { "epoch": 0.6904951515435425, "grad_norm": 0.5961496924315449, "learning_rate": 3.806282794849854e-06, "loss": 0.2899, "step": 14740 }, { "epoch": 0.6905419965334707, "grad_norm": 0.5814744347474667, "learning_rate": 3.8061210894482304e-06, "loss": 0.2861, "step": 14741 }, { "epoch": 0.690588841523399, "grad_norm": 0.5897491108333907, "learning_rate": 3.8059593765302076e-06, "loss": 0.2648, "step": 14742 }, { "epoch": 0.6906356865133274, "grad_norm": 0.5840398491005099, "learning_rate": 3.805797656096718e-06, "loss": 0.3106, "step": 14743 }, { "epoch": 0.6906825315032558, "grad_norm": 0.5847734931237435, "learning_rate": 3.8056359281486898e-06, "loss": 0.3006, "step": 14744 }, { "epoch": 0.690729376493184, "grad_norm": 0.5961037965609259, "learning_rate": 3.8054741926870553e-06, "loss": 0.2806, "step": 14745 }, { "epoch": 0.6907762214831124, "grad_norm": 0.5816055123416366, "learning_rate": 3.805312449712746e-06, "loss": 0.2802, "step": 14746 }, { "epoch": 0.6908230664730407, "grad_norm": 0.5939778116865173, "learning_rate": 3.8051506992266906e-06, "loss": 0.2965, "step": 14747 }, { "epoch": 0.690869911462969, "grad_norm": 0.590695278047719, "learning_rate": 3.8049889412298204e-06, "loss": 0.2551, "step": 14748 }, { "epoch": 0.6909167564528974, "grad_norm": 0.5794886181480305, "learning_rate": 3.804827175723068e-06, "loss": 0.2929, "step": 14749 }, { "epoch": 0.6909636014428256, "grad_norm": 0.6040432102570763, "learning_rate": 3.8046654027073625e-06, "loss": 0.2884, "step": 14750 }, { "epoch": 0.691010446432754, "grad_norm": 0.6095359193534879, "learning_rate": 3.8045036221836363e-06, "loss": 0.2888, "step": 14751 }, { "epoch": 0.6910572914226824, "grad_norm": 0.5768435661603026, "learning_rate": 3.8043418341528192e-06, "loss": 0.2719, "step": 14752 }, { "epoch": 0.6911041364126107, "grad_norm": 0.5704668313067932, "learning_rate": 3.8041800386158422e-06, "loss": 0.2928, "step": 14753 }, { "epoch": 0.6911509814025389, "grad_norm": 0.5764347855303616, "learning_rate": 3.8040182355736375e-06, "loss": 0.2793, "step": 14754 }, { "epoch": 0.6911978263924673, "grad_norm": 0.5366601430113269, "learning_rate": 3.8038564250271355e-06, "loss": 0.2861, "step": 14755 }, { "epoch": 0.6912446713823956, "grad_norm": 0.5906837647894928, "learning_rate": 3.8036946069772675e-06, "loss": 0.2994, "step": 14756 }, { "epoch": 0.691291516372324, "grad_norm": 0.5925322856251873, "learning_rate": 3.8035327814249644e-06, "loss": 0.2767, "step": 14757 }, { "epoch": 0.6913383613622524, "grad_norm": 0.5707687394583574, "learning_rate": 3.803370948371158e-06, "loss": 0.2847, "step": 14758 }, { "epoch": 0.6913852063521806, "grad_norm": 0.6232123482510072, "learning_rate": 3.8032091078167786e-06, "loss": 0.2905, "step": 14759 }, { "epoch": 0.6914320513421089, "grad_norm": 0.562939070947084, "learning_rate": 3.80304725976276e-06, "loss": 0.2707, "step": 14760 }, { "epoch": 0.6914788963320373, "grad_norm": 0.5943386155836771, "learning_rate": 3.8028854042100303e-06, "loss": 0.2613, "step": 14761 }, { "epoch": 0.6915257413219656, "grad_norm": 0.6105968130771292, "learning_rate": 3.802723541159523e-06, "loss": 0.2866, "step": 14762 }, { "epoch": 0.6915725863118939, "grad_norm": 0.545575124904537, "learning_rate": 3.8025616706121705e-06, "loss": 0.2683, "step": 14763 }, { "epoch": 0.6916194313018222, "grad_norm": 0.5769863998004175, "learning_rate": 3.802399792568901e-06, "loss": 0.2774, "step": 14764 }, { "epoch": 0.6916662762917506, "grad_norm": 0.5674868870928927, "learning_rate": 3.8022379070306492e-06, "loss": 0.2868, "step": 14765 }, { "epoch": 0.6917131212816789, "grad_norm": 0.6269786231060309, "learning_rate": 3.802076013998345e-06, "loss": 0.2926, "step": 14766 }, { "epoch": 0.6917599662716073, "grad_norm": 0.6244624503193763, "learning_rate": 3.8019141134729208e-06, "loss": 0.2923, "step": 14767 }, { "epoch": 0.6918068112615355, "grad_norm": 0.5526953067083329, "learning_rate": 3.8017522054553085e-06, "loss": 0.2732, "step": 14768 }, { "epoch": 0.6918536562514639, "grad_norm": 0.5630936977891237, "learning_rate": 3.801590289946439e-06, "loss": 0.2584, "step": 14769 }, { "epoch": 0.6919005012413922, "grad_norm": 0.5563585934302541, "learning_rate": 3.8014283669472453e-06, "loss": 0.262, "step": 14770 }, { "epoch": 0.6919473462313206, "grad_norm": 0.5673656562297711, "learning_rate": 3.801266436458657e-06, "loss": 0.271, "step": 14771 }, { "epoch": 0.6919941912212488, "grad_norm": 0.5700248924040994, "learning_rate": 3.801104498481608e-06, "loss": 0.2719, "step": 14772 }, { "epoch": 0.6920410362111772, "grad_norm": 0.6148090916760125, "learning_rate": 3.800942553017031e-06, "loss": 0.2688, "step": 14773 }, { "epoch": 0.6920878812011055, "grad_norm": 0.6184883403035749, "learning_rate": 3.8007806000658556e-06, "loss": 0.2879, "step": 14774 }, { "epoch": 0.6921347261910339, "grad_norm": 0.5767152616421638, "learning_rate": 3.8006186396290135e-06, "loss": 0.2695, "step": 14775 }, { "epoch": 0.6921815711809622, "grad_norm": 0.5908291713170243, "learning_rate": 3.800456671707439e-06, "loss": 0.2768, "step": 14776 }, { "epoch": 0.6922284161708905, "grad_norm": 0.5955788144487374, "learning_rate": 3.800294696302064e-06, "loss": 0.2776, "step": 14777 }, { "epoch": 0.6922752611608188, "grad_norm": 0.5867979478325044, "learning_rate": 3.800132713413819e-06, "loss": 0.2732, "step": 14778 }, { "epoch": 0.6923221061507472, "grad_norm": 0.5444860284455597, "learning_rate": 3.799970723043637e-06, "loss": 0.2683, "step": 14779 }, { "epoch": 0.6923689511406755, "grad_norm": 0.603797691671667, "learning_rate": 3.7998087251924508e-06, "loss": 0.2845, "step": 14780 }, { "epoch": 0.6924157961306038, "grad_norm": 0.5895306195293829, "learning_rate": 3.799646719861192e-06, "loss": 0.275, "step": 14781 }, { "epoch": 0.6924626411205321, "grad_norm": 0.6206535206736368, "learning_rate": 3.7994847070507922e-06, "loss": 0.2983, "step": 14782 }, { "epoch": 0.6925094861104605, "grad_norm": 0.6054417871837481, "learning_rate": 3.7993226867621845e-06, "loss": 0.2944, "step": 14783 }, { "epoch": 0.6925563311003888, "grad_norm": 0.5363975615550055, "learning_rate": 3.799160658996303e-06, "loss": 0.274, "step": 14784 }, { "epoch": 0.6926031760903172, "grad_norm": 0.6242761129247045, "learning_rate": 3.7989986237540767e-06, "loss": 0.2766, "step": 14785 }, { "epoch": 0.6926500210802454, "grad_norm": 0.579465379584607, "learning_rate": 3.79883658103644e-06, "loss": 0.289, "step": 14786 }, { "epoch": 0.6926968660701738, "grad_norm": 0.5660776523331809, "learning_rate": 3.7986745308443257e-06, "loss": 0.2889, "step": 14787 }, { "epoch": 0.6927437110601021, "grad_norm": 0.5850821732676966, "learning_rate": 3.798512473178666e-06, "loss": 0.3029, "step": 14788 }, { "epoch": 0.6927905560500305, "grad_norm": 0.5762797950644084, "learning_rate": 3.7983504080403933e-06, "loss": 0.287, "step": 14789 }, { "epoch": 0.6928374010399587, "grad_norm": 0.5419354000688976, "learning_rate": 3.79818833543044e-06, "loss": 0.2607, "step": 14790 }, { "epoch": 0.6928842460298871, "grad_norm": 0.5927417328198635, "learning_rate": 3.7980262553497394e-06, "loss": 0.2831, "step": 14791 }, { "epoch": 0.6929310910198154, "grad_norm": 0.6029656804606303, "learning_rate": 3.797864167799224e-06, "loss": 0.2972, "step": 14792 }, { "epoch": 0.6929779360097438, "grad_norm": 0.5912231367888016, "learning_rate": 3.7977020727798264e-06, "loss": 0.3019, "step": 14793 }, { "epoch": 0.6930247809996721, "grad_norm": 0.5847138660132772, "learning_rate": 3.7975399702924788e-06, "loss": 0.281, "step": 14794 }, { "epoch": 0.6930716259896004, "grad_norm": 0.6403245006073143, "learning_rate": 3.797377860338116e-06, "loss": 0.3097, "step": 14795 }, { "epoch": 0.6931184709795287, "grad_norm": 0.5842372355829544, "learning_rate": 3.797215742917669e-06, "loss": 0.2952, "step": 14796 }, { "epoch": 0.6931653159694571, "grad_norm": 0.5540693172778699, "learning_rate": 3.7970536180320718e-06, "loss": 0.2859, "step": 14797 }, { "epoch": 0.6932121609593854, "grad_norm": 0.6236618863207765, "learning_rate": 3.796891485682257e-06, "loss": 0.2802, "step": 14798 }, { "epoch": 0.6932590059493137, "grad_norm": 0.5767403214457645, "learning_rate": 3.796729345869158e-06, "loss": 0.2956, "step": 14799 }, { "epoch": 0.693305850939242, "grad_norm": 0.5684652163982404, "learning_rate": 3.796567198593706e-06, "loss": 0.277, "step": 14800 }, { "epoch": 0.6933526959291704, "grad_norm": 0.6066705922119524, "learning_rate": 3.7964050438568373e-06, "loss": 0.2705, "step": 14801 }, { "epoch": 0.6933995409190987, "grad_norm": 0.6148547997180955, "learning_rate": 3.796242881659483e-06, "loss": 0.2784, "step": 14802 }, { "epoch": 0.6934463859090271, "grad_norm": 0.5644143229500553, "learning_rate": 3.7960807120025765e-06, "loss": 0.2852, "step": 14803 }, { "epoch": 0.6934932308989553, "grad_norm": 0.5395825552579264, "learning_rate": 3.7959185348870515e-06, "loss": 0.2568, "step": 14804 }, { "epoch": 0.6935400758888837, "grad_norm": 0.6112241474984879, "learning_rate": 3.7957563503138405e-06, "loss": 0.2846, "step": 14805 }, { "epoch": 0.693586920878812, "grad_norm": 0.6209427034960122, "learning_rate": 3.795594158283878e-06, "loss": 0.2769, "step": 14806 }, { "epoch": 0.6936337658687404, "grad_norm": 0.5740145720789106, "learning_rate": 3.7954319587980963e-06, "loss": 0.2705, "step": 14807 }, { "epoch": 0.6936806108586686, "grad_norm": 0.6099239931153955, "learning_rate": 3.7952697518574294e-06, "loss": 0.2681, "step": 14808 }, { "epoch": 0.693727455848597, "grad_norm": 0.6068461009956838, "learning_rate": 3.795107537462811e-06, "loss": 0.2934, "step": 14809 }, { "epoch": 0.6937743008385253, "grad_norm": 0.550125956161878, "learning_rate": 3.794945315615174e-06, "loss": 0.2713, "step": 14810 }, { "epoch": 0.6938211458284537, "grad_norm": 0.5598767087201518, "learning_rate": 3.7947830863154523e-06, "loss": 0.2778, "step": 14811 }, { "epoch": 0.693867990818382, "grad_norm": 0.6607359758604641, "learning_rate": 3.7946208495645786e-06, "loss": 0.2801, "step": 14812 }, { "epoch": 0.6939148358083103, "grad_norm": 0.5530135340576592, "learning_rate": 3.7944586053634884e-06, "loss": 0.2714, "step": 14813 }, { "epoch": 0.6939616807982386, "grad_norm": 0.6070845706503154, "learning_rate": 3.7942963537131137e-06, "loss": 0.2828, "step": 14814 }, { "epoch": 0.694008525788167, "grad_norm": 0.5897185129033802, "learning_rate": 3.7941340946143886e-06, "loss": 0.3058, "step": 14815 }, { "epoch": 0.6940553707780953, "grad_norm": 0.5872338088680336, "learning_rate": 3.793971828068248e-06, "loss": 0.2642, "step": 14816 }, { "epoch": 0.6941022157680236, "grad_norm": 0.535831472942768, "learning_rate": 3.7938095540756237e-06, "loss": 0.2543, "step": 14817 }, { "epoch": 0.6941490607579519, "grad_norm": 0.6088549630468083, "learning_rate": 3.7936472726374507e-06, "loss": 0.2865, "step": 14818 }, { "epoch": 0.6941959057478803, "grad_norm": 0.5662082149913003, "learning_rate": 3.7934849837546628e-06, "loss": 0.2881, "step": 14819 }, { "epoch": 0.6942427507378086, "grad_norm": 0.5953985914404492, "learning_rate": 3.7933226874281946e-06, "loss": 0.2899, "step": 14820 }, { "epoch": 0.694289595727737, "grad_norm": 0.7105744770846236, "learning_rate": 3.793160383658979e-06, "loss": 0.2933, "step": 14821 }, { "epoch": 0.6943364407176652, "grad_norm": 0.5940086219902284, "learning_rate": 3.7929980724479506e-06, "loss": 0.2756, "step": 14822 }, { "epoch": 0.6943832857075936, "grad_norm": 0.5782585552857381, "learning_rate": 3.792835753796043e-06, "loss": 0.2689, "step": 14823 }, { "epoch": 0.6944301306975219, "grad_norm": 0.6234808693136076, "learning_rate": 3.7926734277041906e-06, "loss": 0.2766, "step": 14824 }, { "epoch": 0.6944769756874503, "grad_norm": 0.5637970690423099, "learning_rate": 3.7925110941733266e-06, "loss": 0.2727, "step": 14825 }, { "epoch": 0.6945238206773785, "grad_norm": 0.5760930644092889, "learning_rate": 3.792348753204388e-06, "loss": 0.275, "step": 14826 }, { "epoch": 0.6945706656673069, "grad_norm": 0.5635819857639321, "learning_rate": 3.792186404798306e-06, "loss": 0.277, "step": 14827 }, { "epoch": 0.6946175106572352, "grad_norm": 0.5751785343183831, "learning_rate": 3.7920240489560163e-06, "loss": 0.2858, "step": 14828 }, { "epoch": 0.6946643556471636, "grad_norm": 0.5626954551035253, "learning_rate": 3.791861685678453e-06, "loss": 0.2779, "step": 14829 }, { "epoch": 0.6947112006370919, "grad_norm": 0.574409725288256, "learning_rate": 3.7916993149665505e-06, "loss": 0.2715, "step": 14830 }, { "epoch": 0.6947580456270201, "grad_norm": 0.5963126052101837, "learning_rate": 3.791536936821243e-06, "loss": 0.2791, "step": 14831 }, { "epoch": 0.6948048906169485, "grad_norm": 0.5396645400885763, "learning_rate": 3.7913745512434647e-06, "loss": 0.2566, "step": 14832 }, { "epoch": 0.6948517356068769, "grad_norm": 0.6136592844597932, "learning_rate": 3.791212158234151e-06, "loss": 0.306, "step": 14833 }, { "epoch": 0.6948985805968052, "grad_norm": 0.661119864020262, "learning_rate": 3.791049757794235e-06, "loss": 0.2844, "step": 14834 }, { "epoch": 0.6949454255867334, "grad_norm": 0.6035805241391257, "learning_rate": 3.790887349924653e-06, "loss": 0.2797, "step": 14835 }, { "epoch": 0.6949922705766618, "grad_norm": 0.5581957043029663, "learning_rate": 3.790724934626338e-06, "loss": 0.3033, "step": 14836 }, { "epoch": 0.6950391155665901, "grad_norm": 0.6311302731529932, "learning_rate": 3.7905625119002264e-06, "loss": 0.2918, "step": 14837 }, { "epoch": 0.6950859605565185, "grad_norm": 0.6160436023063277, "learning_rate": 3.7904000817472507e-06, "loss": 0.2916, "step": 14838 }, { "epoch": 0.6951328055464469, "grad_norm": 0.5734862420096148, "learning_rate": 3.7902376441683477e-06, "loss": 0.274, "step": 14839 }, { "epoch": 0.6951796505363751, "grad_norm": 0.5328232941718816, "learning_rate": 3.7900751991644513e-06, "loss": 0.2701, "step": 14840 }, { "epoch": 0.6952264955263034, "grad_norm": 0.6492506963131284, "learning_rate": 3.7899127467364966e-06, "loss": 0.2853, "step": 14841 }, { "epoch": 0.6952733405162318, "grad_norm": 0.6077580783938547, "learning_rate": 3.7897502868854175e-06, "loss": 0.2871, "step": 14842 }, { "epoch": 0.6953201855061601, "grad_norm": 0.5434791498040236, "learning_rate": 3.7895878196121504e-06, "loss": 0.2998, "step": 14843 }, { "epoch": 0.6953670304960884, "grad_norm": 0.6465030978227514, "learning_rate": 3.7894253449176286e-06, "loss": 0.2856, "step": 14844 }, { "epoch": 0.6954138754860167, "grad_norm": 0.6897943923531082, "learning_rate": 3.789262862802789e-06, "loss": 0.286, "step": 14845 }, { "epoch": 0.6954607204759451, "grad_norm": 0.6322661996179786, "learning_rate": 3.7891003732685648e-06, "loss": 0.2791, "step": 14846 }, { "epoch": 0.6955075654658734, "grad_norm": 0.6612814918640593, "learning_rate": 3.7889378763158933e-06, "loss": 0.3146, "step": 14847 }, { "epoch": 0.6955544104558018, "grad_norm": 0.5880791753524454, "learning_rate": 3.788775371945707e-06, "loss": 0.2851, "step": 14848 }, { "epoch": 0.69560125544573, "grad_norm": 0.6191286171774482, "learning_rate": 3.7886128601589424e-06, "loss": 0.284, "step": 14849 }, { "epoch": 0.6956481004356584, "grad_norm": 0.5286346150996211, "learning_rate": 3.7884503409565353e-06, "loss": 0.2618, "step": 14850 }, { "epoch": 0.6956949454255867, "grad_norm": 0.5793037844082941, "learning_rate": 3.7882878143394204e-06, "loss": 0.2588, "step": 14851 }, { "epoch": 0.6957417904155151, "grad_norm": 0.5523295704100643, "learning_rate": 3.788125280308532e-06, "loss": 0.275, "step": 14852 }, { "epoch": 0.6957886354054433, "grad_norm": 0.6614930540113052, "learning_rate": 3.7879627388648076e-06, "loss": 0.3202, "step": 14853 }, { "epoch": 0.6958354803953717, "grad_norm": 0.6091175985726417, "learning_rate": 3.787800190009181e-06, "loss": 0.2871, "step": 14854 }, { "epoch": 0.6958823253853, "grad_norm": 0.6333181762824183, "learning_rate": 3.787637633742587e-06, "loss": 0.2813, "step": 14855 }, { "epoch": 0.6959291703752284, "grad_norm": 0.6277926324257082, "learning_rate": 3.787475070065963e-06, "loss": 0.2891, "step": 14856 }, { "epoch": 0.6959760153651567, "grad_norm": 0.6329706381375477, "learning_rate": 3.787312498980243e-06, "loss": 0.2935, "step": 14857 }, { "epoch": 0.696022860355085, "grad_norm": 0.6244834914690548, "learning_rate": 3.7871499204863637e-06, "loss": 0.3013, "step": 14858 }, { "epoch": 0.6960697053450133, "grad_norm": 0.5980897089038119, "learning_rate": 3.7869873345852603e-06, "loss": 0.2921, "step": 14859 }, { "epoch": 0.6961165503349417, "grad_norm": 0.6110447607590825, "learning_rate": 3.786824741277867e-06, "loss": 0.2784, "step": 14860 }, { "epoch": 0.69616339532487, "grad_norm": 0.6145639162808861, "learning_rate": 3.7866621405651217e-06, "loss": 0.2661, "step": 14861 }, { "epoch": 0.6962102403147983, "grad_norm": 0.5552659699107249, "learning_rate": 3.7864995324479594e-06, "loss": 0.2663, "step": 14862 }, { "epoch": 0.6962570853047266, "grad_norm": 0.5849802955316723, "learning_rate": 3.786336916927315e-06, "loss": 0.3026, "step": 14863 }, { "epoch": 0.696303930294655, "grad_norm": 0.6131048623118197, "learning_rate": 3.7861742940041246e-06, "loss": 0.2756, "step": 14864 }, { "epoch": 0.6963507752845833, "grad_norm": 0.5900946735487118, "learning_rate": 3.7860116636793267e-06, "loss": 0.2953, "step": 14865 }, { "epoch": 0.6963976202745117, "grad_norm": 0.633128615080831, "learning_rate": 3.785849025953853e-06, "loss": 0.2937, "step": 14866 }, { "epoch": 0.6964444652644399, "grad_norm": 0.6554069210791824, "learning_rate": 3.7856863808286402e-06, "loss": 0.2867, "step": 14867 }, { "epoch": 0.6964913102543683, "grad_norm": 0.5851197895467618, "learning_rate": 3.7855237283046275e-06, "loss": 0.2901, "step": 14868 }, { "epoch": 0.6965381552442966, "grad_norm": 0.6130831990568567, "learning_rate": 3.785361068382748e-06, "loss": 0.2701, "step": 14869 }, { "epoch": 0.696585000234225, "grad_norm": 0.6341004116082113, "learning_rate": 3.7851984010639385e-06, "loss": 0.2953, "step": 14870 }, { "epoch": 0.6966318452241532, "grad_norm": 0.5906819604560636, "learning_rate": 3.7850357263491355e-06, "loss": 0.2827, "step": 14871 }, { "epoch": 0.6966786902140816, "grad_norm": 0.6226499702026783, "learning_rate": 3.7848730442392754e-06, "loss": 0.3085, "step": 14872 }, { "epoch": 0.6967255352040099, "grad_norm": 0.5631571177481028, "learning_rate": 3.7847103547352932e-06, "loss": 0.2718, "step": 14873 }, { "epoch": 0.6967723801939383, "grad_norm": 0.5860087249159416, "learning_rate": 3.784547657838126e-06, "loss": 0.2591, "step": 14874 }, { "epoch": 0.6968192251838666, "grad_norm": 0.6458065623247269, "learning_rate": 3.7843849535487097e-06, "loss": 0.2741, "step": 14875 }, { "epoch": 0.6968660701737949, "grad_norm": 0.5740700146992848, "learning_rate": 3.7842222418679807e-06, "loss": 0.2729, "step": 14876 }, { "epoch": 0.6969129151637232, "grad_norm": 0.6042703799396303, "learning_rate": 3.784059522796876e-06, "loss": 0.2902, "step": 14877 }, { "epoch": 0.6969597601536516, "grad_norm": 0.5887037382961089, "learning_rate": 3.783896796336331e-06, "loss": 0.2647, "step": 14878 }, { "epoch": 0.6970066051435799, "grad_norm": 0.5618068029232949, "learning_rate": 3.783734062487283e-06, "loss": 0.2651, "step": 14879 }, { "epoch": 0.6970534501335082, "grad_norm": 0.5583418568151411, "learning_rate": 3.7835713212506685e-06, "loss": 0.2627, "step": 14880 }, { "epoch": 0.6971002951234365, "grad_norm": 0.623667299138535, "learning_rate": 3.783408572627423e-06, "loss": 0.2947, "step": 14881 }, { "epoch": 0.6971471401133649, "grad_norm": 0.6517160582839474, "learning_rate": 3.7832458166184838e-06, "loss": 0.2636, "step": 14882 }, { "epoch": 0.6971939851032932, "grad_norm": 0.6391840635784217, "learning_rate": 3.7830830532247874e-06, "loss": 0.2697, "step": 14883 }, { "epoch": 0.6972408300932216, "grad_norm": 0.5771711363827798, "learning_rate": 3.7829202824472706e-06, "loss": 0.293, "step": 14884 }, { "epoch": 0.6972876750831498, "grad_norm": 0.5980493247579522, "learning_rate": 3.7827575042868698e-06, "loss": 0.2648, "step": 14885 }, { "epoch": 0.6973345200730782, "grad_norm": 0.6198601119272766, "learning_rate": 3.7825947187445226e-06, "loss": 0.2857, "step": 14886 }, { "epoch": 0.6973813650630065, "grad_norm": 0.597920682970382, "learning_rate": 3.782431925821165e-06, "loss": 0.2714, "step": 14887 }, { "epoch": 0.6974282100529349, "grad_norm": 0.5661617573964386, "learning_rate": 3.7822691255177335e-06, "loss": 0.2789, "step": 14888 }, { "epoch": 0.6974750550428631, "grad_norm": 0.5992309725736211, "learning_rate": 3.7821063178351657e-06, "loss": 0.2694, "step": 14889 }, { "epoch": 0.6975219000327915, "grad_norm": 0.6479156890122233, "learning_rate": 3.7819435027743984e-06, "loss": 0.284, "step": 14890 }, { "epoch": 0.6975687450227198, "grad_norm": 0.5747354822917411, "learning_rate": 3.7817806803363684e-06, "loss": 0.2621, "step": 14891 }, { "epoch": 0.6976155900126482, "grad_norm": 0.5397710117912657, "learning_rate": 3.7816178505220125e-06, "loss": 0.2531, "step": 14892 }, { "epoch": 0.6976624350025765, "grad_norm": 0.5501272737771495, "learning_rate": 3.7814550133322685e-06, "loss": 0.2668, "step": 14893 }, { "epoch": 0.6977092799925048, "grad_norm": 0.5368667338815217, "learning_rate": 3.7812921687680726e-06, "loss": 0.2722, "step": 14894 }, { "epoch": 0.6977561249824331, "grad_norm": 0.6123894088475016, "learning_rate": 3.7811293168303618e-06, "loss": 0.292, "step": 14895 }, { "epoch": 0.6978029699723615, "grad_norm": 0.5484125314228341, "learning_rate": 3.7809664575200753e-06, "loss": 0.2661, "step": 14896 }, { "epoch": 0.6978498149622898, "grad_norm": 0.5830560571781784, "learning_rate": 3.780803590838148e-06, "loss": 0.2746, "step": 14897 }, { "epoch": 0.6978966599522181, "grad_norm": 0.5843555290694002, "learning_rate": 3.7806407167855173e-06, "loss": 0.2797, "step": 14898 }, { "epoch": 0.6979435049421464, "grad_norm": 0.585317061906005, "learning_rate": 3.7804778353631216e-06, "loss": 0.2864, "step": 14899 }, { "epoch": 0.6979903499320748, "grad_norm": 0.639599014502365, "learning_rate": 3.780314946571898e-06, "loss": 0.2838, "step": 14900 }, { "epoch": 0.6980371949220031, "grad_norm": 0.5748334286825847, "learning_rate": 3.780152050412783e-06, "loss": 0.2573, "step": 14901 }, { "epoch": 0.6980840399119315, "grad_norm": 0.5394219949127885, "learning_rate": 3.7799891468867156e-06, "loss": 0.2722, "step": 14902 }, { "epoch": 0.6981308849018597, "grad_norm": 0.599424347533483, "learning_rate": 3.7798262359946324e-06, "loss": 0.2888, "step": 14903 }, { "epoch": 0.6981777298917881, "grad_norm": 0.5395573405889671, "learning_rate": 3.7796633177374703e-06, "loss": 0.2813, "step": 14904 }, { "epoch": 0.6982245748817164, "grad_norm": 0.6207658030482973, "learning_rate": 3.7795003921161677e-06, "loss": 0.2916, "step": 14905 }, { "epoch": 0.6982714198716448, "grad_norm": 0.5413984694532248, "learning_rate": 3.7793374591316624e-06, "loss": 0.2753, "step": 14906 }, { "epoch": 0.698318264861573, "grad_norm": 0.552882505455665, "learning_rate": 3.7791745187848906e-06, "loss": 0.2857, "step": 14907 }, { "epoch": 0.6983651098515014, "grad_norm": 0.5796433692083175, "learning_rate": 3.7790115710767915e-06, "loss": 0.2887, "step": 14908 }, { "epoch": 0.6984119548414297, "grad_norm": 0.5774950843119953, "learning_rate": 3.778848616008302e-06, "loss": 0.2688, "step": 14909 }, { "epoch": 0.6984587998313581, "grad_norm": 0.5883856925204081, "learning_rate": 3.7786856535803606e-06, "loss": 0.2858, "step": 14910 }, { "epoch": 0.6985056448212864, "grad_norm": 0.5846356421765339, "learning_rate": 3.778522683793905e-06, "loss": 0.274, "step": 14911 }, { "epoch": 0.6985524898112146, "grad_norm": 0.5584686694393407, "learning_rate": 3.778359706649871e-06, "loss": 0.2661, "step": 14912 }, { "epoch": 0.698599334801143, "grad_norm": 0.6183997314843954, "learning_rate": 3.7781967221492e-06, "loss": 0.2835, "step": 14913 }, { "epoch": 0.6986461797910714, "grad_norm": 0.5900373403957896, "learning_rate": 3.778033730292827e-06, "loss": 0.2853, "step": 14914 }, { "epoch": 0.6986930247809997, "grad_norm": 0.5112420085630541, "learning_rate": 3.777870731081692e-06, "loss": 0.2675, "step": 14915 }, { "epoch": 0.698739869770928, "grad_norm": 0.5758763567720606, "learning_rate": 3.7777077245167315e-06, "loss": 0.2719, "step": 14916 }, { "epoch": 0.6987867147608563, "grad_norm": 0.5656290964419334, "learning_rate": 3.777544710598885e-06, "loss": 0.2839, "step": 14917 }, { "epoch": 0.6988335597507846, "grad_norm": 0.5915390838732845, "learning_rate": 3.7773816893290884e-06, "loss": 0.2901, "step": 14918 }, { "epoch": 0.698880404740713, "grad_norm": 0.60947287868944, "learning_rate": 3.7772186607082817e-06, "loss": 0.304, "step": 14919 }, { "epoch": 0.6989272497306414, "grad_norm": 0.6260673445928332, "learning_rate": 3.777055624737403e-06, "loss": 0.286, "step": 14920 }, { "epoch": 0.6989740947205696, "grad_norm": 0.5326666502414854, "learning_rate": 3.77689258141739e-06, "loss": 0.2728, "step": 14921 }, { "epoch": 0.699020939710498, "grad_norm": 0.5711968439501355, "learning_rate": 3.7767295307491815e-06, "loss": 0.2821, "step": 14922 }, { "epoch": 0.6990677847004263, "grad_norm": 0.6088382173863685, "learning_rate": 3.7765664727337147e-06, "loss": 0.2726, "step": 14923 }, { "epoch": 0.6991146296903546, "grad_norm": 0.5771962709327012, "learning_rate": 3.776403407371929e-06, "loss": 0.2756, "step": 14924 }, { "epoch": 0.6991614746802829, "grad_norm": 0.586708576728897, "learning_rate": 3.7762403346647624e-06, "loss": 0.2754, "step": 14925 }, { "epoch": 0.6992083196702112, "grad_norm": 0.5782223816712335, "learning_rate": 3.776077254613154e-06, "loss": 0.2626, "step": 14926 }, { "epoch": 0.6992551646601396, "grad_norm": 0.5594995209051151, "learning_rate": 3.775914167218041e-06, "loss": 0.2757, "step": 14927 }, { "epoch": 0.699302009650068, "grad_norm": 0.5622235142038698, "learning_rate": 3.7757510724803637e-06, "loss": 0.2775, "step": 14928 }, { "epoch": 0.6993488546399963, "grad_norm": 0.5768566217905116, "learning_rate": 3.7755879704010588e-06, "loss": 0.2637, "step": 14929 }, { "epoch": 0.6993956996299245, "grad_norm": 0.5776122280840101, "learning_rate": 3.775424860981066e-06, "loss": 0.2783, "step": 14930 }, { "epoch": 0.6994425446198529, "grad_norm": 0.5437190206886998, "learning_rate": 3.7752617442213236e-06, "loss": 0.2603, "step": 14931 }, { "epoch": 0.6994893896097812, "grad_norm": 0.5805109135740104, "learning_rate": 3.7750986201227704e-06, "loss": 0.2655, "step": 14932 }, { "epoch": 0.6995362345997096, "grad_norm": 0.6139932214562611, "learning_rate": 3.774935488686345e-06, "loss": 0.2871, "step": 14933 }, { "epoch": 0.6995830795896378, "grad_norm": 0.6476006369699978, "learning_rate": 3.774772349912986e-06, "loss": 0.2806, "step": 14934 }, { "epoch": 0.6996299245795662, "grad_norm": 0.6778786248425731, "learning_rate": 3.774609203803634e-06, "loss": 0.2867, "step": 14935 }, { "epoch": 0.6996767695694945, "grad_norm": 0.5966736434717927, "learning_rate": 3.774446050359225e-06, "loss": 0.3052, "step": 14936 }, { "epoch": 0.6997236145594229, "grad_norm": 0.5824074354213554, "learning_rate": 3.774282889580699e-06, "loss": 0.2761, "step": 14937 }, { "epoch": 0.6997704595493512, "grad_norm": 0.5895132715034392, "learning_rate": 3.774119721468996e-06, "loss": 0.2787, "step": 14938 }, { "epoch": 0.6998173045392795, "grad_norm": 0.5877793485289602, "learning_rate": 3.7739565460250543e-06, "loss": 0.2975, "step": 14939 }, { "epoch": 0.6998641495292078, "grad_norm": 0.591025317371045, "learning_rate": 3.7737933632498124e-06, "loss": 0.2714, "step": 14940 }, { "epoch": 0.6999109945191362, "grad_norm": 0.5956960036431799, "learning_rate": 3.7736301731442106e-06, "loss": 0.2845, "step": 14941 }, { "epoch": 0.6999578395090645, "grad_norm": 0.5485661630450488, "learning_rate": 3.773466975709187e-06, "loss": 0.2693, "step": 14942 }, { "epoch": 0.7000046844989928, "grad_norm": 0.5745346771424469, "learning_rate": 3.7733037709456804e-06, "loss": 0.29, "step": 14943 }, { "epoch": 0.7000515294889211, "grad_norm": 0.5879677645317154, "learning_rate": 3.773140558854631e-06, "loss": 0.2812, "step": 14944 }, { "epoch": 0.7000983744788495, "grad_norm": 0.5715754068477505, "learning_rate": 3.7729773394369786e-06, "loss": 0.2771, "step": 14945 }, { "epoch": 0.7001452194687778, "grad_norm": 0.5987444691771083, "learning_rate": 3.7728141126936603e-06, "loss": 0.2716, "step": 14946 }, { "epoch": 0.7001920644587062, "grad_norm": 0.5450155020240142, "learning_rate": 3.772650878625617e-06, "loss": 0.2816, "step": 14947 }, { "epoch": 0.7002389094486344, "grad_norm": 0.5876814351633161, "learning_rate": 3.7724876372337877e-06, "loss": 0.2841, "step": 14948 }, { "epoch": 0.7002857544385628, "grad_norm": 0.6018396617716991, "learning_rate": 3.772324388519112e-06, "loss": 0.3139, "step": 14949 }, { "epoch": 0.7003325994284911, "grad_norm": 0.5448703012541644, "learning_rate": 3.77216113248253e-06, "loss": 0.2769, "step": 14950 }, { "epoch": 0.7003794444184195, "grad_norm": 0.6327948112422526, "learning_rate": 3.771997869124979e-06, "loss": 0.2957, "step": 14951 }, { "epoch": 0.7004262894083477, "grad_norm": 0.639808095106606, "learning_rate": 3.7718345984474007e-06, "loss": 0.2952, "step": 14952 }, { "epoch": 0.7004731343982761, "grad_norm": 0.5684732163806246, "learning_rate": 3.7716713204507346e-06, "loss": 0.2855, "step": 14953 }, { "epoch": 0.7005199793882044, "grad_norm": 0.5551487803395292, "learning_rate": 3.7715080351359186e-06, "loss": 0.2786, "step": 14954 }, { "epoch": 0.7005668243781328, "grad_norm": 0.5925083857451366, "learning_rate": 3.771344742503893e-06, "loss": 0.3093, "step": 14955 }, { "epoch": 0.7006136693680611, "grad_norm": 0.5783113125343429, "learning_rate": 3.7711814425555996e-06, "loss": 0.2953, "step": 14956 }, { "epoch": 0.7006605143579894, "grad_norm": 0.5251033596303264, "learning_rate": 3.7710181352919754e-06, "loss": 0.2591, "step": 14957 }, { "epoch": 0.7007073593479177, "grad_norm": 0.596195454806211, "learning_rate": 3.7708548207139617e-06, "loss": 0.2897, "step": 14958 }, { "epoch": 0.7007542043378461, "grad_norm": 0.5635833826997075, "learning_rate": 3.7706914988224974e-06, "loss": 0.2852, "step": 14959 }, { "epoch": 0.7008010493277744, "grad_norm": 0.5435371943789961, "learning_rate": 3.7705281696185235e-06, "loss": 0.2715, "step": 14960 }, { "epoch": 0.7008478943177027, "grad_norm": 0.6134530578369526, "learning_rate": 3.770364833102979e-06, "loss": 0.2936, "step": 14961 }, { "epoch": 0.700894739307631, "grad_norm": 0.5829159986865601, "learning_rate": 3.7702014892768046e-06, "loss": 0.2697, "step": 14962 }, { "epoch": 0.7009415842975594, "grad_norm": 0.6482749723669153, "learning_rate": 3.77003813814094e-06, "loss": 0.3084, "step": 14963 }, { "epoch": 0.7009884292874877, "grad_norm": 0.5693891763387208, "learning_rate": 3.7698747796963244e-06, "loss": 0.2744, "step": 14964 }, { "epoch": 0.7010352742774161, "grad_norm": 0.664200078440164, "learning_rate": 3.7697114139438982e-06, "loss": 0.3234, "step": 14965 }, { "epoch": 0.7010821192673443, "grad_norm": 0.625379535712288, "learning_rate": 3.7695480408846034e-06, "loss": 0.2822, "step": 14966 }, { "epoch": 0.7011289642572727, "grad_norm": 0.5589016435011008, "learning_rate": 3.769384660519378e-06, "loss": 0.2673, "step": 14967 }, { "epoch": 0.701175809247201, "grad_norm": 0.599464067710604, "learning_rate": 3.7692212728491623e-06, "loss": 0.2852, "step": 14968 }, { "epoch": 0.7012226542371294, "grad_norm": 0.5951478296915542, "learning_rate": 3.769057877874898e-06, "loss": 0.2898, "step": 14969 }, { "epoch": 0.7012694992270576, "grad_norm": 0.5498388055683116, "learning_rate": 3.768894475597524e-06, "loss": 0.2733, "step": 14970 }, { "epoch": 0.701316344216986, "grad_norm": 0.554496770700727, "learning_rate": 3.768731066017982e-06, "loss": 0.2796, "step": 14971 }, { "epoch": 0.7013631892069143, "grad_norm": 0.5897720228905361, "learning_rate": 3.7685676491372114e-06, "loss": 0.3019, "step": 14972 }, { "epoch": 0.7014100341968427, "grad_norm": 0.6044788446491407, "learning_rate": 3.7684042249561527e-06, "loss": 0.2874, "step": 14973 }, { "epoch": 0.701456879186771, "grad_norm": 0.5866751772223024, "learning_rate": 3.768240793475746e-06, "loss": 0.2797, "step": 14974 }, { "epoch": 0.7015037241766993, "grad_norm": 0.6138651771800405, "learning_rate": 3.768077354696933e-06, "loss": 0.2921, "step": 14975 }, { "epoch": 0.7015505691666276, "grad_norm": 0.5927762085141598, "learning_rate": 3.767913908620653e-06, "loss": 0.2985, "step": 14976 }, { "epoch": 0.701597414156556, "grad_norm": 0.5840644495645828, "learning_rate": 3.767750455247847e-06, "loss": 0.2761, "step": 14977 }, { "epoch": 0.7016442591464843, "grad_norm": 0.6015086401641211, "learning_rate": 3.7675869945794563e-06, "loss": 0.2836, "step": 14978 }, { "epoch": 0.7016911041364126, "grad_norm": 0.5794807022676287, "learning_rate": 3.7674235266164203e-06, "loss": 0.2679, "step": 14979 }, { "epoch": 0.7017379491263409, "grad_norm": 0.597724295343655, "learning_rate": 3.767260051359682e-06, "loss": 0.2793, "step": 14980 }, { "epoch": 0.7017847941162693, "grad_norm": 0.6302038186058748, "learning_rate": 3.7670965688101795e-06, "loss": 0.3017, "step": 14981 }, { "epoch": 0.7018316391061976, "grad_norm": 0.5718288061838609, "learning_rate": 3.7669330789688547e-06, "loss": 0.2817, "step": 14982 }, { "epoch": 0.701878484096126, "grad_norm": 0.6070500677741356, "learning_rate": 3.766769581836649e-06, "loss": 0.2725, "step": 14983 }, { "epoch": 0.7019253290860542, "grad_norm": 0.5846297906208153, "learning_rate": 3.7666060774145023e-06, "loss": 0.2765, "step": 14984 }, { "epoch": 0.7019721740759826, "grad_norm": 0.6144702243330138, "learning_rate": 3.7664425657033556e-06, "loss": 0.2793, "step": 14985 }, { "epoch": 0.7020190190659109, "grad_norm": 0.5845443686255034, "learning_rate": 3.7662790467041506e-06, "loss": 0.2869, "step": 14986 }, { "epoch": 0.7020658640558393, "grad_norm": 0.542825664556767, "learning_rate": 3.766115520417829e-06, "loss": 0.2784, "step": 14987 }, { "epoch": 0.7021127090457675, "grad_norm": 0.6074444630553755, "learning_rate": 3.7659519868453293e-06, "loss": 0.2916, "step": 14988 }, { "epoch": 0.7021595540356959, "grad_norm": 0.5781122220063327, "learning_rate": 3.765788445987594e-06, "loss": 0.2809, "step": 14989 }, { "epoch": 0.7022063990256242, "grad_norm": 0.5613531910202141, "learning_rate": 3.7656248978455655e-06, "loss": 0.2859, "step": 14990 }, { "epoch": 0.7022532440155526, "grad_norm": 0.6172363375033877, "learning_rate": 3.7654613424201838e-06, "loss": 0.3119, "step": 14991 }, { "epoch": 0.7023000890054809, "grad_norm": 0.5663580225811132, "learning_rate": 3.765297779712389e-06, "loss": 0.2835, "step": 14992 }, { "epoch": 0.7023469339954092, "grad_norm": 0.6114593010709695, "learning_rate": 3.765134209723125e-06, "loss": 0.3072, "step": 14993 }, { "epoch": 0.7023937789853375, "grad_norm": 0.5961030192723527, "learning_rate": 3.7649706324533305e-06, "loss": 0.2667, "step": 14994 }, { "epoch": 0.7024406239752659, "grad_norm": 0.5639286569237186, "learning_rate": 3.764807047903948e-06, "loss": 0.273, "step": 14995 }, { "epoch": 0.7024874689651942, "grad_norm": 0.5717555827356996, "learning_rate": 3.7646434560759187e-06, "loss": 0.2961, "step": 14996 }, { "epoch": 0.7025343139551224, "grad_norm": 0.5781767914921212, "learning_rate": 3.7644798569701847e-06, "loss": 0.2864, "step": 14997 }, { "epoch": 0.7025811589450508, "grad_norm": 0.592849360305199, "learning_rate": 3.7643162505876863e-06, "loss": 0.272, "step": 14998 }, { "epoch": 0.7026280039349792, "grad_norm": 0.5651778859543273, "learning_rate": 3.7641526369293667e-06, "loss": 0.2728, "step": 14999 }, { "epoch": 0.7026748489249075, "grad_norm": 0.5717060732282593, "learning_rate": 3.7639890159961657e-06, "loss": 0.2642, "step": 15000 }, { "epoch": 0.7027216939148359, "grad_norm": 0.6781270225593915, "learning_rate": 3.7638253877890253e-06, "loss": 0.3078, "step": 15001 }, { "epoch": 0.7027685389047641, "grad_norm": 0.6142818286208918, "learning_rate": 3.763661752308888e-06, "loss": 0.2795, "step": 15002 }, { "epoch": 0.7028153838946924, "grad_norm": 0.5611548965795929, "learning_rate": 3.763498109556695e-06, "loss": 0.275, "step": 15003 }, { "epoch": 0.7028622288846208, "grad_norm": 0.6278158823479946, "learning_rate": 3.763334459533387e-06, "loss": 0.3118, "step": 15004 }, { "epoch": 0.7029090738745492, "grad_norm": 0.5890206411727745, "learning_rate": 3.763170802239908e-06, "loss": 0.2661, "step": 15005 }, { "epoch": 0.7029559188644774, "grad_norm": 0.5413514420287932, "learning_rate": 3.7630071376771984e-06, "loss": 0.251, "step": 15006 }, { "epoch": 0.7030027638544057, "grad_norm": 0.6091414906380983, "learning_rate": 3.762843465846199e-06, "loss": 0.3035, "step": 15007 }, { "epoch": 0.7030496088443341, "grad_norm": 0.5806110305718103, "learning_rate": 3.7626797867478536e-06, "loss": 0.2897, "step": 15008 }, { "epoch": 0.7030964538342624, "grad_norm": 0.6036027825258623, "learning_rate": 3.7625161003831036e-06, "loss": 0.299, "step": 15009 }, { "epoch": 0.7031432988241908, "grad_norm": 0.5916642835290143, "learning_rate": 3.7623524067528904e-06, "loss": 0.2963, "step": 15010 }, { "epoch": 0.703190143814119, "grad_norm": 0.5836840088962437, "learning_rate": 3.7621887058581564e-06, "loss": 0.268, "step": 15011 }, { "epoch": 0.7032369888040474, "grad_norm": 0.5615708818517605, "learning_rate": 3.7620249976998437e-06, "loss": 0.2791, "step": 15012 }, { "epoch": 0.7032838337939757, "grad_norm": 0.6296155140061145, "learning_rate": 3.761861282278894e-06, "loss": 0.3044, "step": 15013 }, { "epoch": 0.7033306787839041, "grad_norm": 0.6233879608157942, "learning_rate": 3.7616975595962507e-06, "loss": 0.2631, "step": 15014 }, { "epoch": 0.7033775237738323, "grad_norm": 0.6419515680787227, "learning_rate": 3.7615338296528546e-06, "loss": 0.2992, "step": 15015 }, { "epoch": 0.7034243687637607, "grad_norm": 0.6402545003706871, "learning_rate": 3.7613700924496475e-06, "loss": 0.3196, "step": 15016 }, { "epoch": 0.703471213753689, "grad_norm": 0.6147110753509235, "learning_rate": 3.761206347987574e-06, "loss": 0.2846, "step": 15017 }, { "epoch": 0.7035180587436174, "grad_norm": 0.5776089417300776, "learning_rate": 3.761042596267574e-06, "loss": 0.2975, "step": 15018 }, { "epoch": 0.7035649037335457, "grad_norm": 0.5524436817432516, "learning_rate": 3.7608788372905912e-06, "loss": 0.2693, "step": 15019 }, { "epoch": 0.703611748723474, "grad_norm": 0.5989346692089016, "learning_rate": 3.760715071057568e-06, "loss": 0.2778, "step": 15020 }, { "epoch": 0.7036585937134023, "grad_norm": 0.6231488305207288, "learning_rate": 3.7605512975694457e-06, "loss": 0.3154, "step": 15021 }, { "epoch": 0.7037054387033307, "grad_norm": 0.6201768870489494, "learning_rate": 3.7603875168271675e-06, "loss": 0.2837, "step": 15022 }, { "epoch": 0.703752283693259, "grad_norm": 0.7207334980628111, "learning_rate": 3.7602237288316768e-06, "loss": 0.2876, "step": 15023 }, { "epoch": 0.7037991286831873, "grad_norm": 0.6033345080966376, "learning_rate": 3.760059933583914e-06, "loss": 0.2807, "step": 15024 }, { "epoch": 0.7038459736731156, "grad_norm": 0.5606193481026547, "learning_rate": 3.759896131084824e-06, "loss": 0.2618, "step": 15025 }, { "epoch": 0.703892818663044, "grad_norm": 0.5343563092255046, "learning_rate": 3.759732321335348e-06, "loss": 0.2692, "step": 15026 }, { "epoch": 0.7039396636529723, "grad_norm": 0.6243264437626439, "learning_rate": 3.7595685043364293e-06, "loss": 0.281, "step": 15027 }, { "epoch": 0.7039865086429007, "grad_norm": 0.6114243232162938, "learning_rate": 3.75940468008901e-06, "loss": 0.2881, "step": 15028 }, { "epoch": 0.7040333536328289, "grad_norm": 0.6535878065398575, "learning_rate": 3.7592408485940334e-06, "loss": 0.3169, "step": 15029 }, { "epoch": 0.7040801986227573, "grad_norm": 0.577791030199981, "learning_rate": 3.759077009852443e-06, "loss": 0.2861, "step": 15030 }, { "epoch": 0.7041270436126856, "grad_norm": 0.5722046424712949, "learning_rate": 3.75891316386518e-06, "loss": 0.2755, "step": 15031 }, { "epoch": 0.704173888602614, "grad_norm": 0.5727048872602003, "learning_rate": 3.7587493106331885e-06, "loss": 0.2863, "step": 15032 }, { "epoch": 0.7042207335925422, "grad_norm": 0.6181003001733418, "learning_rate": 3.7585854501574116e-06, "loss": 0.2889, "step": 15033 }, { "epoch": 0.7042675785824706, "grad_norm": 0.5503494789259397, "learning_rate": 3.75842158243879e-06, "loss": 0.2731, "step": 15034 }, { "epoch": 0.7043144235723989, "grad_norm": 0.610330184138104, "learning_rate": 3.75825770747827e-06, "loss": 0.2825, "step": 15035 }, { "epoch": 0.7043612685623273, "grad_norm": 0.5824215206716142, "learning_rate": 3.758093825276794e-06, "loss": 0.271, "step": 15036 }, { "epoch": 0.7044081135522556, "grad_norm": 0.5726507479780989, "learning_rate": 3.7579299358353027e-06, "loss": 0.2775, "step": 15037 }, { "epoch": 0.7044549585421839, "grad_norm": 0.5504071835738883, "learning_rate": 3.757766039154741e-06, "loss": 0.2673, "step": 15038 }, { "epoch": 0.7045018035321122, "grad_norm": 0.57308137531439, "learning_rate": 3.757602135236052e-06, "loss": 0.2763, "step": 15039 }, { "epoch": 0.7045486485220406, "grad_norm": 0.5836269270374358, "learning_rate": 3.757438224080179e-06, "loss": 0.2915, "step": 15040 }, { "epoch": 0.7045954935119689, "grad_norm": 0.5682710223204519, "learning_rate": 3.7572743056880646e-06, "loss": 0.2625, "step": 15041 }, { "epoch": 0.7046423385018972, "grad_norm": 0.6162423162819004, "learning_rate": 3.7571103800606534e-06, "loss": 0.2835, "step": 15042 }, { "epoch": 0.7046891834918255, "grad_norm": 0.5862709085167112, "learning_rate": 3.7569464471988876e-06, "loss": 0.286, "step": 15043 }, { "epoch": 0.7047360284817539, "grad_norm": 0.5712666501806102, "learning_rate": 3.7567825071037102e-06, "loss": 0.2717, "step": 15044 }, { "epoch": 0.7047828734716822, "grad_norm": 0.6064806337374696, "learning_rate": 3.7566185597760662e-06, "loss": 0.2944, "step": 15045 }, { "epoch": 0.7048297184616106, "grad_norm": 0.5398691809131115, "learning_rate": 3.7564546052168975e-06, "loss": 0.2805, "step": 15046 }, { "epoch": 0.7048765634515388, "grad_norm": 0.5847454376141382, "learning_rate": 3.7562906434271494e-06, "loss": 0.2636, "step": 15047 }, { "epoch": 0.7049234084414672, "grad_norm": 0.6263535048593274, "learning_rate": 3.756126674407764e-06, "loss": 0.282, "step": 15048 }, { "epoch": 0.7049702534313955, "grad_norm": 0.5723628700412469, "learning_rate": 3.755962698159684e-06, "loss": 0.3022, "step": 15049 }, { "epoch": 0.7050170984213239, "grad_norm": 0.6860420477417115, "learning_rate": 3.7557987146838558e-06, "loss": 0.3075, "step": 15050 }, { "epoch": 0.7050639434112521, "grad_norm": 0.6151292381636055, "learning_rate": 3.755634723981222e-06, "loss": 0.2956, "step": 15051 }, { "epoch": 0.7051107884011805, "grad_norm": 0.5706109785569136, "learning_rate": 3.7554707260527246e-06, "loss": 0.2708, "step": 15052 }, { "epoch": 0.7051576333911088, "grad_norm": 0.6294938450250481, "learning_rate": 3.75530672089931e-06, "loss": 0.2829, "step": 15053 }, { "epoch": 0.7052044783810372, "grad_norm": 0.5561603192505084, "learning_rate": 3.75514270852192e-06, "loss": 0.2766, "step": 15054 }, { "epoch": 0.7052513233709655, "grad_norm": 0.6071957732527604, "learning_rate": 3.754978688921499e-06, "loss": 0.2942, "step": 15055 }, { "epoch": 0.7052981683608938, "grad_norm": 0.592491670574105, "learning_rate": 3.754814662098991e-06, "loss": 0.2894, "step": 15056 }, { "epoch": 0.7053450133508221, "grad_norm": 0.5810457099788886, "learning_rate": 3.7546506280553408e-06, "loss": 0.2837, "step": 15057 }, { "epoch": 0.7053918583407505, "grad_norm": 0.5804682364790298, "learning_rate": 3.754486586791491e-06, "loss": 0.2732, "step": 15058 }, { "epoch": 0.7054387033306788, "grad_norm": 0.6252527330906709, "learning_rate": 3.754322538308386e-06, "loss": 0.289, "step": 15059 }, { "epoch": 0.7054855483206071, "grad_norm": 0.552080309583587, "learning_rate": 3.7541584826069706e-06, "loss": 0.2886, "step": 15060 }, { "epoch": 0.7055323933105354, "grad_norm": 0.5738551498301274, "learning_rate": 3.753994419688188e-06, "loss": 0.2804, "step": 15061 }, { "epoch": 0.7055792383004638, "grad_norm": 0.6153407033520844, "learning_rate": 3.7538303495529827e-06, "loss": 0.2865, "step": 15062 }, { "epoch": 0.7056260832903921, "grad_norm": 0.6441721522456346, "learning_rate": 3.753666272202299e-06, "loss": 0.3021, "step": 15063 }, { "epoch": 0.7056729282803205, "grad_norm": 0.6581160466988345, "learning_rate": 3.7535021876370816e-06, "loss": 0.29, "step": 15064 }, { "epoch": 0.7057197732702487, "grad_norm": 0.5708999273358917, "learning_rate": 3.7533380958582734e-06, "loss": 0.2899, "step": 15065 }, { "epoch": 0.7057666182601771, "grad_norm": 0.557309538684151, "learning_rate": 3.7531739968668197e-06, "loss": 0.2739, "step": 15066 }, { "epoch": 0.7058134632501054, "grad_norm": 0.5453491971243931, "learning_rate": 3.7530098906636643e-06, "loss": 0.2722, "step": 15067 }, { "epoch": 0.7058603082400338, "grad_norm": 0.5765231048689913, "learning_rate": 3.7528457772497517e-06, "loss": 0.2946, "step": 15068 }, { "epoch": 0.705907153229962, "grad_norm": 0.603096274735025, "learning_rate": 3.7526816566260277e-06, "loss": 0.2851, "step": 15069 }, { "epoch": 0.7059539982198904, "grad_norm": 0.6103730620867995, "learning_rate": 3.7525175287934345e-06, "loss": 0.2722, "step": 15070 }, { "epoch": 0.7060008432098187, "grad_norm": 0.5815284909109875, "learning_rate": 3.7523533937529184e-06, "loss": 0.2663, "step": 15071 }, { "epoch": 0.7060476881997471, "grad_norm": 0.627725433936712, "learning_rate": 3.752189251505423e-06, "loss": 0.2981, "step": 15072 }, { "epoch": 0.7060945331896754, "grad_norm": 0.5578911229651686, "learning_rate": 3.7520251020518927e-06, "loss": 0.2765, "step": 15073 }, { "epoch": 0.7061413781796037, "grad_norm": 0.5773400413165329, "learning_rate": 3.751860945393273e-06, "loss": 0.2804, "step": 15074 }, { "epoch": 0.706188223169532, "grad_norm": 0.5657276942131892, "learning_rate": 3.7516967815305095e-06, "loss": 0.267, "step": 15075 }, { "epoch": 0.7062350681594604, "grad_norm": 0.6116859120626069, "learning_rate": 3.7515326104645437e-06, "loss": 0.3004, "step": 15076 }, { "epoch": 0.7062819131493887, "grad_norm": 0.5546104912019554, "learning_rate": 3.751368432196323e-06, "loss": 0.2821, "step": 15077 }, { "epoch": 0.706328758139317, "grad_norm": 0.5437976754729537, "learning_rate": 3.7512042467267917e-06, "loss": 0.2725, "step": 15078 }, { "epoch": 0.7063756031292453, "grad_norm": 0.5745053806955733, "learning_rate": 3.7510400540568948e-06, "loss": 0.2959, "step": 15079 }, { "epoch": 0.7064224481191737, "grad_norm": 0.5821286228183251, "learning_rate": 3.7508758541875757e-06, "loss": 0.2998, "step": 15080 }, { "epoch": 0.706469293109102, "grad_norm": 0.5748238188284359, "learning_rate": 3.7507116471197814e-06, "loss": 0.2989, "step": 15081 }, { "epoch": 0.7065161380990304, "grad_norm": 0.6137686079253649, "learning_rate": 3.7505474328544555e-06, "loss": 0.2974, "step": 15082 }, { "epoch": 0.7065629830889586, "grad_norm": 0.587791788672615, "learning_rate": 3.7503832113925433e-06, "loss": 0.2777, "step": 15083 }, { "epoch": 0.706609828078887, "grad_norm": 0.6146803098789988, "learning_rate": 3.7502189827349905e-06, "loss": 0.2754, "step": 15084 }, { "epoch": 0.7066566730688153, "grad_norm": 0.607224274199998, "learning_rate": 3.750054746882742e-06, "loss": 0.2957, "step": 15085 }, { "epoch": 0.7067035180587437, "grad_norm": 0.6094357178818187, "learning_rate": 3.7498905038367418e-06, "loss": 0.2715, "step": 15086 }, { "epoch": 0.7067503630486719, "grad_norm": 0.5820230582498768, "learning_rate": 3.7497262535979363e-06, "loss": 0.2894, "step": 15087 }, { "epoch": 0.7067972080386002, "grad_norm": 0.5883804395615498, "learning_rate": 3.749561996167269e-06, "loss": 0.284, "step": 15088 }, { "epoch": 0.7068440530285286, "grad_norm": 0.6039356474064245, "learning_rate": 3.7493977315456882e-06, "loss": 0.2913, "step": 15089 }, { "epoch": 0.706890898018457, "grad_norm": 0.634048951933721, "learning_rate": 3.7492334597341374e-06, "loss": 0.2865, "step": 15090 }, { "epoch": 0.7069377430083853, "grad_norm": 0.5460070528801055, "learning_rate": 3.7490691807335613e-06, "loss": 0.2822, "step": 15091 }, { "epoch": 0.7069845879983135, "grad_norm": 0.6171010373282639, "learning_rate": 3.748904894544906e-06, "loss": 0.2692, "step": 15092 }, { "epoch": 0.7070314329882419, "grad_norm": 0.5692574230150265, "learning_rate": 3.7487406011691173e-06, "loss": 0.2904, "step": 15093 }, { "epoch": 0.7070782779781702, "grad_norm": 0.637994706215439, "learning_rate": 3.74857630060714e-06, "loss": 0.2851, "step": 15094 }, { "epoch": 0.7071251229680986, "grad_norm": 0.5709271305262456, "learning_rate": 3.74841199285992e-06, "loss": 0.2879, "step": 15095 }, { "epoch": 0.7071719679580268, "grad_norm": 0.5675636150180942, "learning_rate": 3.748247677928403e-06, "loss": 0.2725, "step": 15096 }, { "epoch": 0.7072188129479552, "grad_norm": 0.5904548771204469, "learning_rate": 3.7480833558135345e-06, "loss": 0.2744, "step": 15097 }, { "epoch": 0.7072656579378835, "grad_norm": 0.6036998032231109, "learning_rate": 3.747919026516259e-06, "loss": 0.2822, "step": 15098 }, { "epoch": 0.7073125029278119, "grad_norm": 0.5186416299643194, "learning_rate": 3.747754690037524e-06, "loss": 0.2529, "step": 15099 }, { "epoch": 0.7073593479177402, "grad_norm": 0.5584912136758289, "learning_rate": 3.747590346378274e-06, "loss": 0.2912, "step": 15100 }, { "epoch": 0.7074061929076685, "grad_norm": 0.5941192944678259, "learning_rate": 3.7474259955394552e-06, "loss": 0.2932, "step": 15101 }, { "epoch": 0.7074530378975968, "grad_norm": 0.5933288446400978, "learning_rate": 3.747261637522014e-06, "loss": 0.2843, "step": 15102 }, { "epoch": 0.7074998828875252, "grad_norm": 0.6529846860275834, "learning_rate": 3.747097272326895e-06, "loss": 0.2653, "step": 15103 }, { "epoch": 0.7075467278774535, "grad_norm": 0.5759638386021327, "learning_rate": 3.7469328999550446e-06, "loss": 0.2687, "step": 15104 }, { "epoch": 0.7075935728673818, "grad_norm": 0.5925704539879854, "learning_rate": 3.7467685204074085e-06, "loss": 0.277, "step": 15105 }, { "epoch": 0.7076404178573101, "grad_norm": 0.6205287383620292, "learning_rate": 3.7466041336849336e-06, "loss": 0.2821, "step": 15106 }, { "epoch": 0.7076872628472385, "grad_norm": 0.61637231343308, "learning_rate": 3.746439739788565e-06, "loss": 0.2685, "step": 15107 }, { "epoch": 0.7077341078371668, "grad_norm": 0.6049575435165319, "learning_rate": 3.7462753387192484e-06, "loss": 0.2846, "step": 15108 }, { "epoch": 0.7077809528270952, "grad_norm": 0.6501098758799247, "learning_rate": 3.7461109304779308e-06, "loss": 0.3087, "step": 15109 }, { "epoch": 0.7078277978170234, "grad_norm": 0.5755525964380455, "learning_rate": 3.745946515065558e-06, "loss": 0.2851, "step": 15110 }, { "epoch": 0.7078746428069518, "grad_norm": 0.6274988174487697, "learning_rate": 3.745782092483076e-06, "loss": 0.2878, "step": 15111 }, { "epoch": 0.7079214877968801, "grad_norm": 0.5683076455377234, "learning_rate": 3.745617662731432e-06, "loss": 0.293, "step": 15112 }, { "epoch": 0.7079683327868085, "grad_norm": 0.5712638805774459, "learning_rate": 3.745453225811571e-06, "loss": 0.2677, "step": 15113 }, { "epoch": 0.7080151777767367, "grad_norm": 0.6141884191819674, "learning_rate": 3.745288781724439e-06, "loss": 0.2867, "step": 15114 }, { "epoch": 0.7080620227666651, "grad_norm": 0.5558468613888965, "learning_rate": 3.745124330470984e-06, "loss": 0.2833, "step": 15115 }, { "epoch": 0.7081088677565934, "grad_norm": 0.5493868320799835, "learning_rate": 3.744959872052151e-06, "loss": 0.2559, "step": 15116 }, { "epoch": 0.7081557127465218, "grad_norm": 0.6089865229493443, "learning_rate": 3.7447954064688876e-06, "loss": 0.2968, "step": 15117 }, { "epoch": 0.7082025577364501, "grad_norm": 0.6391287944446595, "learning_rate": 3.7446309337221388e-06, "loss": 0.2851, "step": 15118 }, { "epoch": 0.7082494027263784, "grad_norm": 0.6124156998296812, "learning_rate": 3.744466453812851e-06, "loss": 0.3014, "step": 15119 }, { "epoch": 0.7082962477163067, "grad_norm": 0.5910738179181844, "learning_rate": 3.744301966741973e-06, "loss": 0.3006, "step": 15120 }, { "epoch": 0.7083430927062351, "grad_norm": 0.5628231189563925, "learning_rate": 3.7441374725104494e-06, "loss": 0.2725, "step": 15121 }, { "epoch": 0.7083899376961634, "grad_norm": 0.5935907796218686, "learning_rate": 3.743972971119227e-06, "loss": 0.2669, "step": 15122 }, { "epoch": 0.7084367826860917, "grad_norm": 0.6540078435369657, "learning_rate": 3.7438084625692528e-06, "loss": 0.3129, "step": 15123 }, { "epoch": 0.70848362767602, "grad_norm": 0.5550705930071157, "learning_rate": 3.7436439468614744e-06, "loss": 0.2772, "step": 15124 }, { "epoch": 0.7085304726659484, "grad_norm": 0.5723696226044215, "learning_rate": 3.7434794239968365e-06, "loss": 0.2701, "step": 15125 }, { "epoch": 0.7085773176558767, "grad_norm": 0.6344400685470198, "learning_rate": 3.7433148939762876e-06, "loss": 0.2899, "step": 15126 }, { "epoch": 0.7086241626458051, "grad_norm": 0.5619962491827803, "learning_rate": 3.7431503568007743e-06, "loss": 0.2805, "step": 15127 }, { "epoch": 0.7086710076357333, "grad_norm": 0.6278639569201554, "learning_rate": 3.7429858124712427e-06, "loss": 0.2827, "step": 15128 }, { "epoch": 0.7087178526256617, "grad_norm": 0.6032919922002119, "learning_rate": 3.74282126098864e-06, "loss": 0.2732, "step": 15129 }, { "epoch": 0.70876469761559, "grad_norm": 0.5839839489668622, "learning_rate": 3.7426567023539133e-06, "loss": 0.2922, "step": 15130 }, { "epoch": 0.7088115426055184, "grad_norm": 0.6066814494040004, "learning_rate": 3.74249213656801e-06, "loss": 0.283, "step": 15131 }, { "epoch": 0.7088583875954466, "grad_norm": 0.6137535537645602, "learning_rate": 3.7423275636318767e-06, "loss": 0.2939, "step": 15132 }, { "epoch": 0.708905232585375, "grad_norm": 0.6037142077273102, "learning_rate": 3.7421629835464608e-06, "loss": 0.2957, "step": 15133 }, { "epoch": 0.7089520775753033, "grad_norm": 0.6529488052136605, "learning_rate": 3.741998396312709e-06, "loss": 0.2948, "step": 15134 }, { "epoch": 0.7089989225652317, "grad_norm": 0.6149209766694321, "learning_rate": 3.741833801931568e-06, "loss": 0.2565, "step": 15135 }, { "epoch": 0.70904576755516, "grad_norm": 0.5820729195563317, "learning_rate": 3.741669200403986e-06, "loss": 0.2915, "step": 15136 }, { "epoch": 0.7090926125450883, "grad_norm": 0.5737473020962497, "learning_rate": 3.7415045917309097e-06, "loss": 0.2482, "step": 15137 }, { "epoch": 0.7091394575350166, "grad_norm": 0.5770795476223686, "learning_rate": 3.741339975913287e-06, "loss": 0.2823, "step": 15138 }, { "epoch": 0.709186302524945, "grad_norm": 0.6314657084684016, "learning_rate": 3.7411753529520644e-06, "loss": 0.2842, "step": 15139 }, { "epoch": 0.7092331475148733, "grad_norm": 0.6006099144426019, "learning_rate": 3.741010722848189e-06, "loss": 0.2862, "step": 15140 }, { "epoch": 0.7092799925048016, "grad_norm": 0.5754647978110944, "learning_rate": 3.7408460856026098e-06, "loss": 0.2784, "step": 15141 }, { "epoch": 0.7093268374947299, "grad_norm": 0.584240688037232, "learning_rate": 3.740681441216273e-06, "loss": 0.2695, "step": 15142 }, { "epoch": 0.7093736824846583, "grad_norm": 0.666455042560467, "learning_rate": 3.740516789690126e-06, "loss": 0.3149, "step": 15143 }, { "epoch": 0.7094205274745866, "grad_norm": 0.6077655166099388, "learning_rate": 3.740352131025116e-06, "loss": 0.2798, "step": 15144 }, { "epoch": 0.709467372464515, "grad_norm": 0.5843733020461063, "learning_rate": 3.740187465222193e-06, "loss": 0.2817, "step": 15145 }, { "epoch": 0.7095142174544432, "grad_norm": 0.5665214570558893, "learning_rate": 3.740022792282302e-06, "loss": 0.2694, "step": 15146 }, { "epoch": 0.7095610624443716, "grad_norm": 0.5725767191158706, "learning_rate": 3.739858112206391e-06, "loss": 0.2751, "step": 15147 }, { "epoch": 0.7096079074342999, "grad_norm": 0.607622168684724, "learning_rate": 3.739693424995409e-06, "loss": 0.2821, "step": 15148 }, { "epoch": 0.7096547524242283, "grad_norm": 0.5788690012225748, "learning_rate": 3.7395287306503025e-06, "loss": 0.2592, "step": 15149 }, { "epoch": 0.7097015974141565, "grad_norm": 0.5816837743716562, "learning_rate": 3.739364029172019e-06, "loss": 0.2696, "step": 15150 }, { "epoch": 0.7097484424040849, "grad_norm": 0.5934247078379001, "learning_rate": 3.7391993205615085e-06, "loss": 0.277, "step": 15151 }, { "epoch": 0.7097952873940132, "grad_norm": 0.6135335202541198, "learning_rate": 3.739034604819717e-06, "loss": 0.2888, "step": 15152 }, { "epoch": 0.7098421323839416, "grad_norm": 0.5430926786580351, "learning_rate": 3.7388698819475917e-06, "loss": 0.2715, "step": 15153 }, { "epoch": 0.7098889773738699, "grad_norm": 0.6756473315020922, "learning_rate": 3.7387051519460825e-06, "loss": 0.2946, "step": 15154 }, { "epoch": 0.7099358223637982, "grad_norm": 0.5682184126019713, "learning_rate": 3.7385404148161363e-06, "loss": 0.2807, "step": 15155 }, { "epoch": 0.7099826673537265, "grad_norm": 0.613103460671005, "learning_rate": 3.738375670558701e-06, "loss": 0.2771, "step": 15156 }, { "epoch": 0.7100295123436549, "grad_norm": 0.5880544347817739, "learning_rate": 3.738210919174725e-06, "loss": 0.2813, "step": 15157 }, { "epoch": 0.7100763573335832, "grad_norm": 0.5591641275579856, "learning_rate": 3.738046160665157e-06, "loss": 0.2829, "step": 15158 }, { "epoch": 0.7101232023235114, "grad_norm": 0.5727275074006619, "learning_rate": 3.737881395030944e-06, "loss": 0.2889, "step": 15159 }, { "epoch": 0.7101700473134398, "grad_norm": 0.5698641338266337, "learning_rate": 3.7377166222730353e-06, "loss": 0.2906, "step": 15160 }, { "epoch": 0.7102168923033682, "grad_norm": 0.5682181326936931, "learning_rate": 3.7375518423923774e-06, "loss": 0.2868, "step": 15161 }, { "epoch": 0.7102637372932965, "grad_norm": 0.5480735121584275, "learning_rate": 3.7373870553899203e-06, "loss": 0.2596, "step": 15162 }, { "epoch": 0.7103105822832249, "grad_norm": 0.5603994351378274, "learning_rate": 3.7372222612666127e-06, "loss": 0.2754, "step": 15163 }, { "epoch": 0.7103574272731531, "grad_norm": 0.6108650391206428, "learning_rate": 3.7370574600234006e-06, "loss": 0.2947, "step": 15164 }, { "epoch": 0.7104042722630814, "grad_norm": 0.58161140059171, "learning_rate": 3.7368926516612336e-06, "loss": 0.2956, "step": 15165 }, { "epoch": 0.7104511172530098, "grad_norm": 0.6152735692380632, "learning_rate": 3.7367278361810612e-06, "loss": 0.2936, "step": 15166 }, { "epoch": 0.7104979622429382, "grad_norm": 0.617201403650658, "learning_rate": 3.7365630135838305e-06, "loss": 0.3063, "step": 15167 }, { "epoch": 0.7105448072328664, "grad_norm": 0.6359715105168806, "learning_rate": 3.7363981838704905e-06, "loss": 0.3022, "step": 15168 }, { "epoch": 0.7105916522227947, "grad_norm": 0.5482921572830625, "learning_rate": 3.73623334704199e-06, "loss": 0.264, "step": 15169 }, { "epoch": 0.7106384972127231, "grad_norm": 0.5876658946876402, "learning_rate": 3.7360685030992772e-06, "loss": 0.2864, "step": 15170 }, { "epoch": 0.7106853422026514, "grad_norm": 0.6050570414540613, "learning_rate": 3.7359036520433e-06, "loss": 0.2971, "step": 15171 }, { "epoch": 0.7107321871925798, "grad_norm": 0.5806333721465322, "learning_rate": 3.735738793875009e-06, "loss": 0.2839, "step": 15172 }, { "epoch": 0.710779032182508, "grad_norm": 0.6006990739506243, "learning_rate": 3.7355739285953517e-06, "loss": 0.2719, "step": 15173 }, { "epoch": 0.7108258771724364, "grad_norm": 0.6307730357293437, "learning_rate": 3.7354090562052764e-06, "loss": 0.2931, "step": 15174 }, { "epoch": 0.7108727221623647, "grad_norm": 0.5771820530004624, "learning_rate": 3.735244176705732e-06, "loss": 0.2675, "step": 15175 }, { "epoch": 0.7109195671522931, "grad_norm": 0.5969830971187282, "learning_rate": 3.7350792900976698e-06, "loss": 0.2848, "step": 15176 }, { "epoch": 0.7109664121422213, "grad_norm": 0.6172217027276088, "learning_rate": 3.7349143963820357e-06, "loss": 0.2864, "step": 15177 }, { "epoch": 0.7110132571321497, "grad_norm": 0.5632618865659793, "learning_rate": 3.7347494955597783e-06, "loss": 0.2818, "step": 15178 }, { "epoch": 0.711060102122078, "grad_norm": 0.6283872330521661, "learning_rate": 3.73458458763185e-06, "loss": 0.2852, "step": 15179 }, { "epoch": 0.7111069471120064, "grad_norm": 0.616558057760467, "learning_rate": 3.7344196725991966e-06, "loss": 0.2953, "step": 15180 }, { "epoch": 0.7111537921019347, "grad_norm": 0.609448693771008, "learning_rate": 3.734254750462768e-06, "loss": 0.2635, "step": 15181 }, { "epoch": 0.711200637091863, "grad_norm": 0.6060274366989943, "learning_rate": 3.7340898212235144e-06, "loss": 0.2991, "step": 15182 }, { "epoch": 0.7112474820817913, "grad_norm": 0.6871707961115558, "learning_rate": 3.733924884882384e-06, "loss": 0.3073, "step": 15183 }, { "epoch": 0.7112943270717197, "grad_norm": 0.5221712107626335, "learning_rate": 3.7337599414403254e-06, "loss": 0.2531, "step": 15184 }, { "epoch": 0.711341172061648, "grad_norm": 0.5585027441313695, "learning_rate": 3.733594990898288e-06, "loss": 0.274, "step": 15185 }, { "epoch": 0.7113880170515763, "grad_norm": 0.6597333503547606, "learning_rate": 3.733430033257223e-06, "loss": 0.2977, "step": 15186 }, { "epoch": 0.7114348620415046, "grad_norm": 0.6051711814038366, "learning_rate": 3.733265068518077e-06, "loss": 0.2786, "step": 15187 }, { "epoch": 0.711481707031433, "grad_norm": 0.6094914502576453, "learning_rate": 3.7331000966818008e-06, "loss": 0.2916, "step": 15188 }, { "epoch": 0.7115285520213613, "grad_norm": 0.5510023189206644, "learning_rate": 3.7329351177493435e-06, "loss": 0.2722, "step": 15189 }, { "epoch": 0.7115753970112897, "grad_norm": 0.6231306662354045, "learning_rate": 3.7327701317216545e-06, "loss": 0.2743, "step": 15190 }, { "epoch": 0.7116222420012179, "grad_norm": 0.5520637906049871, "learning_rate": 3.732605138599683e-06, "loss": 0.2647, "step": 15191 }, { "epoch": 0.7116690869911463, "grad_norm": 0.5944994105249141, "learning_rate": 3.732440138384379e-06, "loss": 0.2769, "step": 15192 }, { "epoch": 0.7117159319810746, "grad_norm": 0.5779226800525643, "learning_rate": 3.732275131076692e-06, "loss": 0.2773, "step": 15193 }, { "epoch": 0.711762776971003, "grad_norm": 0.6312161453630774, "learning_rate": 3.732110116677571e-06, "loss": 0.2935, "step": 15194 }, { "epoch": 0.7118096219609312, "grad_norm": 0.6509709270079128, "learning_rate": 3.7319450951879655e-06, "loss": 0.278, "step": 15195 }, { "epoch": 0.7118564669508596, "grad_norm": 0.5874118329006236, "learning_rate": 3.731780066608826e-06, "loss": 0.2732, "step": 15196 }, { "epoch": 0.7119033119407879, "grad_norm": 0.5846591024591257, "learning_rate": 3.7316150309411024e-06, "loss": 0.2816, "step": 15197 }, { "epoch": 0.7119501569307163, "grad_norm": 0.6368850841537165, "learning_rate": 3.7314499881857433e-06, "loss": 0.2672, "step": 15198 }, { "epoch": 0.7119970019206446, "grad_norm": 0.5470062701808867, "learning_rate": 3.731284938343699e-06, "loss": 0.2679, "step": 15199 }, { "epoch": 0.7120438469105729, "grad_norm": 0.6126248473473651, "learning_rate": 3.7311198814159196e-06, "loss": 0.296, "step": 15200 }, { "epoch": 0.7120906919005012, "grad_norm": 0.6064416108951729, "learning_rate": 3.730954817403355e-06, "loss": 0.2928, "step": 15201 }, { "epoch": 0.7121375368904296, "grad_norm": 0.631213052649654, "learning_rate": 3.7307897463069535e-06, "loss": 0.3079, "step": 15202 }, { "epoch": 0.7121843818803579, "grad_norm": 0.6152543749471434, "learning_rate": 3.7306246681276674e-06, "loss": 0.3063, "step": 15203 }, { "epoch": 0.7122312268702862, "grad_norm": 0.5678398527669689, "learning_rate": 3.730459582866446e-06, "loss": 0.2702, "step": 15204 }, { "epoch": 0.7122780718602145, "grad_norm": 0.6358510883941069, "learning_rate": 3.7302944905242382e-06, "loss": 0.2862, "step": 15205 }, { "epoch": 0.7123249168501429, "grad_norm": 0.5764394001818817, "learning_rate": 3.7301293911019955e-06, "loss": 0.275, "step": 15206 }, { "epoch": 0.7123717618400712, "grad_norm": 0.6186387512919186, "learning_rate": 3.729964284600666e-06, "loss": 0.2692, "step": 15207 }, { "epoch": 0.7124186068299996, "grad_norm": 0.6731510521096653, "learning_rate": 3.729799171021203e-06, "loss": 0.3094, "step": 15208 }, { "epoch": 0.7124654518199278, "grad_norm": 0.6232532784267711, "learning_rate": 3.729634050364554e-06, "loss": 0.2836, "step": 15209 }, { "epoch": 0.7125122968098562, "grad_norm": 0.5896293275915439, "learning_rate": 3.7294689226316695e-06, "loss": 0.2781, "step": 15210 }, { "epoch": 0.7125591417997845, "grad_norm": 0.5778846952115967, "learning_rate": 3.729303787823501e-06, "loss": 0.2655, "step": 15211 }, { "epoch": 0.7126059867897129, "grad_norm": 0.604831753555272, "learning_rate": 3.7291386459409984e-06, "loss": 0.2804, "step": 15212 }, { "epoch": 0.7126528317796411, "grad_norm": 0.5665612069703577, "learning_rate": 3.728973496985111e-06, "loss": 0.269, "step": 15213 }, { "epoch": 0.7126996767695695, "grad_norm": 0.5841522737973858, "learning_rate": 3.7288083409567906e-06, "loss": 0.2941, "step": 15214 }, { "epoch": 0.7127465217594978, "grad_norm": 0.5803403655139578, "learning_rate": 3.7286431778569877e-06, "loss": 0.286, "step": 15215 }, { "epoch": 0.7127933667494262, "grad_norm": 0.5859448491161503, "learning_rate": 3.728478007686651e-06, "loss": 0.2855, "step": 15216 }, { "epoch": 0.7128402117393545, "grad_norm": 0.6254101715004076, "learning_rate": 3.728312830446732e-06, "loss": 0.294, "step": 15217 }, { "epoch": 0.7128870567292828, "grad_norm": 0.581827681765028, "learning_rate": 3.7281476461381826e-06, "loss": 0.2854, "step": 15218 }, { "epoch": 0.7129339017192111, "grad_norm": 0.5724642249869323, "learning_rate": 3.7279824547619513e-06, "loss": 0.272, "step": 15219 }, { "epoch": 0.7129807467091395, "grad_norm": 0.5831917666092684, "learning_rate": 3.7278172563189897e-06, "loss": 0.2893, "step": 15220 }, { "epoch": 0.7130275916990678, "grad_norm": 0.5907776273947669, "learning_rate": 3.7276520508102487e-06, "loss": 0.2762, "step": 15221 }, { "epoch": 0.7130744366889961, "grad_norm": 0.5461551439693195, "learning_rate": 3.7274868382366786e-06, "loss": 0.2527, "step": 15222 }, { "epoch": 0.7131212816789244, "grad_norm": 0.5639004857870373, "learning_rate": 3.7273216185992302e-06, "loss": 0.2797, "step": 15223 }, { "epoch": 0.7131681266688528, "grad_norm": 0.5818399982478688, "learning_rate": 3.7271563918988544e-06, "loss": 0.2681, "step": 15224 }, { "epoch": 0.7132149716587811, "grad_norm": 0.5666979245543446, "learning_rate": 3.726991158136502e-06, "loss": 0.278, "step": 15225 }, { "epoch": 0.7132618166487095, "grad_norm": 0.5856956735350617, "learning_rate": 3.726825917313124e-06, "loss": 0.2552, "step": 15226 }, { "epoch": 0.7133086616386377, "grad_norm": 0.5960360103336997, "learning_rate": 3.7266606694296708e-06, "loss": 0.2932, "step": 15227 }, { "epoch": 0.7133555066285661, "grad_norm": 0.5764953211086147, "learning_rate": 3.7264954144870934e-06, "loss": 0.2719, "step": 15228 }, { "epoch": 0.7134023516184944, "grad_norm": 0.5854924910566631, "learning_rate": 3.726330152486344e-06, "loss": 0.2694, "step": 15229 }, { "epoch": 0.7134491966084228, "grad_norm": 0.6635988919733583, "learning_rate": 3.726164883428373e-06, "loss": 0.2929, "step": 15230 }, { "epoch": 0.713496041598351, "grad_norm": 0.609086094434895, "learning_rate": 3.72599960731413e-06, "loss": 0.2696, "step": 15231 }, { "epoch": 0.7135428865882794, "grad_norm": 0.5715601491025663, "learning_rate": 3.725834324144569e-06, "loss": 0.2857, "step": 15232 }, { "epoch": 0.7135897315782077, "grad_norm": 0.5746228577522312, "learning_rate": 3.7256690339206392e-06, "loss": 0.2887, "step": 15233 }, { "epoch": 0.7136365765681361, "grad_norm": 0.5660432007183827, "learning_rate": 3.725503736643291e-06, "loss": 0.2591, "step": 15234 }, { "epoch": 0.7136834215580644, "grad_norm": 0.6073651313867992, "learning_rate": 3.7253384323134774e-06, "loss": 0.2894, "step": 15235 }, { "epoch": 0.7137302665479927, "grad_norm": 0.6305312329649946, "learning_rate": 3.7251731209321494e-06, "loss": 0.2884, "step": 15236 }, { "epoch": 0.713777111537921, "grad_norm": 0.6387418217797787, "learning_rate": 3.7250078025002577e-06, "loss": 0.2868, "step": 15237 }, { "epoch": 0.7138239565278494, "grad_norm": 0.5954217651149232, "learning_rate": 3.7248424770187543e-06, "loss": 0.2624, "step": 15238 }, { "epoch": 0.7138708015177777, "grad_norm": 0.6195157977094645, "learning_rate": 3.7246771444885904e-06, "loss": 0.2802, "step": 15239 }, { "epoch": 0.713917646507706, "grad_norm": 0.6151728219351328, "learning_rate": 3.7245118049107177e-06, "loss": 0.2696, "step": 15240 }, { "epoch": 0.7139644914976343, "grad_norm": 0.5843535700649521, "learning_rate": 3.724346458286086e-06, "loss": 0.2728, "step": 15241 }, { "epoch": 0.7140113364875627, "grad_norm": 0.6240020406760584, "learning_rate": 3.724181104615649e-06, "loss": 0.2837, "step": 15242 }, { "epoch": 0.714058181477491, "grad_norm": 0.5698479824249654, "learning_rate": 3.7240157439003578e-06, "loss": 0.2968, "step": 15243 }, { "epoch": 0.7141050264674194, "grad_norm": 0.6173914935652336, "learning_rate": 3.7238503761411627e-06, "loss": 0.3093, "step": 15244 }, { "epoch": 0.7141518714573476, "grad_norm": 0.6019380098909238, "learning_rate": 3.723685001339017e-06, "loss": 0.2787, "step": 15245 }, { "epoch": 0.714198716447276, "grad_norm": 0.574028558698804, "learning_rate": 3.723519619494872e-06, "loss": 0.2816, "step": 15246 }, { "epoch": 0.7142455614372043, "grad_norm": 0.5454424978043737, "learning_rate": 3.7233542306096782e-06, "loss": 0.2838, "step": 15247 }, { "epoch": 0.7142924064271327, "grad_norm": 0.6175228680665278, "learning_rate": 3.723188834684388e-06, "loss": 0.29, "step": 15248 }, { "epoch": 0.7143392514170609, "grad_norm": 0.5832153007595325, "learning_rate": 3.7230234317199544e-06, "loss": 0.2716, "step": 15249 }, { "epoch": 0.7143860964069892, "grad_norm": 0.6083633709105399, "learning_rate": 3.722858021717328e-06, "loss": 0.2829, "step": 15250 }, { "epoch": 0.7144329413969176, "grad_norm": 0.6025407297335237, "learning_rate": 3.7226926046774614e-06, "loss": 0.277, "step": 15251 }, { "epoch": 0.714479786386846, "grad_norm": 0.6117755780547309, "learning_rate": 3.722527180601306e-06, "loss": 0.2807, "step": 15252 }, { "epoch": 0.7145266313767743, "grad_norm": 0.6146737792443379, "learning_rate": 3.722361749489814e-06, "loss": 0.3007, "step": 15253 }, { "epoch": 0.7145734763667025, "grad_norm": 0.5968750677722194, "learning_rate": 3.722196311343937e-06, "loss": 0.2835, "step": 15254 }, { "epoch": 0.7146203213566309, "grad_norm": 0.5696515987520115, "learning_rate": 3.7220308661646277e-06, "loss": 0.2704, "step": 15255 }, { "epoch": 0.7146671663465592, "grad_norm": 0.607702086960688, "learning_rate": 3.7218654139528376e-06, "loss": 0.295, "step": 15256 }, { "epoch": 0.7147140113364876, "grad_norm": 0.5702781006943538, "learning_rate": 3.7216999547095196e-06, "loss": 0.2874, "step": 15257 }, { "epoch": 0.7147608563264158, "grad_norm": 0.6393698833694775, "learning_rate": 3.7215344884356253e-06, "loss": 0.288, "step": 15258 }, { "epoch": 0.7148077013163442, "grad_norm": 0.5516996701343002, "learning_rate": 3.7213690151321062e-06, "loss": 0.281, "step": 15259 }, { "epoch": 0.7148545463062725, "grad_norm": 0.5749748015691639, "learning_rate": 3.7212035347999163e-06, "loss": 0.2604, "step": 15260 }, { "epoch": 0.7149013912962009, "grad_norm": 0.58578649781479, "learning_rate": 3.7210380474400074e-06, "loss": 0.2728, "step": 15261 }, { "epoch": 0.7149482362861292, "grad_norm": 0.6031025502206162, "learning_rate": 3.7208725530533306e-06, "loss": 0.2767, "step": 15262 }, { "epoch": 0.7149950812760575, "grad_norm": 0.5629627912794574, "learning_rate": 3.7207070516408394e-06, "loss": 0.2532, "step": 15263 }, { "epoch": 0.7150419262659858, "grad_norm": 0.5853885214472739, "learning_rate": 3.7205415432034864e-06, "loss": 0.2851, "step": 15264 }, { "epoch": 0.7150887712559142, "grad_norm": 0.6121158613273633, "learning_rate": 3.7203760277422223e-06, "loss": 0.2669, "step": 15265 }, { "epoch": 0.7151356162458425, "grad_norm": 0.6142777213041595, "learning_rate": 3.720210505258002e-06, "loss": 0.3057, "step": 15266 }, { "epoch": 0.7151824612357708, "grad_norm": 0.5512415770956316, "learning_rate": 3.720044975751777e-06, "loss": 0.2575, "step": 15267 }, { "epoch": 0.7152293062256991, "grad_norm": 0.58681939394691, "learning_rate": 3.719879439224499e-06, "loss": 0.2748, "step": 15268 }, { "epoch": 0.7152761512156275, "grad_norm": 0.5825182914428778, "learning_rate": 3.7197138956771218e-06, "loss": 0.2822, "step": 15269 }, { "epoch": 0.7153229962055558, "grad_norm": 0.5457026736449566, "learning_rate": 3.719548345110598e-06, "loss": 0.2543, "step": 15270 }, { "epoch": 0.7153698411954842, "grad_norm": 0.5515705968816159, "learning_rate": 3.71938278752588e-06, "loss": 0.2662, "step": 15271 }, { "epoch": 0.7154166861854124, "grad_norm": 0.5904239973632601, "learning_rate": 3.7192172229239197e-06, "loss": 0.293, "step": 15272 }, { "epoch": 0.7154635311753408, "grad_norm": 0.6139577010811136, "learning_rate": 3.719051651305672e-06, "loss": 0.2851, "step": 15273 }, { "epoch": 0.7155103761652691, "grad_norm": 0.6579117344626323, "learning_rate": 3.7188860726720876e-06, "loss": 0.2939, "step": 15274 }, { "epoch": 0.7155572211551975, "grad_norm": 0.6225010391766299, "learning_rate": 3.71872048702412e-06, "loss": 0.2937, "step": 15275 }, { "epoch": 0.7156040661451257, "grad_norm": 0.6212663191342522, "learning_rate": 3.7185548943627227e-06, "loss": 0.3084, "step": 15276 }, { "epoch": 0.7156509111350541, "grad_norm": 0.6015482261092745, "learning_rate": 3.7183892946888476e-06, "loss": 0.2911, "step": 15277 }, { "epoch": 0.7156977561249824, "grad_norm": 0.5784795677747542, "learning_rate": 3.7182236880034493e-06, "loss": 0.2716, "step": 15278 }, { "epoch": 0.7157446011149108, "grad_norm": 0.608212599277012, "learning_rate": 3.71805807430748e-06, "loss": 0.2767, "step": 15279 }, { "epoch": 0.7157914461048391, "grad_norm": 0.5583190204235868, "learning_rate": 3.717892453601892e-06, "loss": 0.2674, "step": 15280 }, { "epoch": 0.7158382910947674, "grad_norm": 0.5905348250650816, "learning_rate": 3.717726825887639e-06, "loss": 0.2782, "step": 15281 }, { "epoch": 0.7158851360846957, "grad_norm": 0.609632145536254, "learning_rate": 3.717561191165675e-06, "loss": 0.2886, "step": 15282 }, { "epoch": 0.7159319810746241, "grad_norm": 0.5897444537260778, "learning_rate": 3.7173955494369513e-06, "loss": 0.2953, "step": 15283 }, { "epoch": 0.7159788260645524, "grad_norm": 0.5811747830208349, "learning_rate": 3.7172299007024226e-06, "loss": 0.2877, "step": 15284 }, { "epoch": 0.7160256710544807, "grad_norm": 0.5725722028979163, "learning_rate": 3.7170642449630424e-06, "loss": 0.2829, "step": 15285 }, { "epoch": 0.716072516044409, "grad_norm": 0.6135015055160729, "learning_rate": 3.7168985822197624e-06, "loss": 0.2823, "step": 15286 }, { "epoch": 0.7161193610343374, "grad_norm": 0.6009251356286561, "learning_rate": 3.716732912473537e-06, "loss": 0.2889, "step": 15287 }, { "epoch": 0.7161662060242657, "grad_norm": 0.5412201776796663, "learning_rate": 3.71656723572532e-06, "loss": 0.2622, "step": 15288 }, { "epoch": 0.7162130510141941, "grad_norm": 0.6123592564889188, "learning_rate": 3.7164015519760644e-06, "loss": 0.2908, "step": 15289 }, { "epoch": 0.7162598960041223, "grad_norm": 0.5742602143102011, "learning_rate": 3.716235861226723e-06, "loss": 0.2718, "step": 15290 }, { "epoch": 0.7163067409940507, "grad_norm": 0.5622623400019428, "learning_rate": 3.7160701634782502e-06, "loss": 0.2814, "step": 15291 }, { "epoch": 0.716353585983979, "grad_norm": 0.5728453207027475, "learning_rate": 3.7159044587315994e-06, "loss": 0.2779, "step": 15292 }, { "epoch": 0.7164004309739074, "grad_norm": 0.5653065908853038, "learning_rate": 3.715738746987724e-06, "loss": 0.2563, "step": 15293 }, { "epoch": 0.7164472759638356, "grad_norm": 0.5646581667721647, "learning_rate": 3.7155730282475776e-06, "loss": 0.2645, "step": 15294 }, { "epoch": 0.716494120953764, "grad_norm": 0.6162586878893949, "learning_rate": 3.715407302512114e-06, "loss": 0.3134, "step": 15295 }, { "epoch": 0.7165409659436923, "grad_norm": 0.5422404413947045, "learning_rate": 3.7152415697822865e-06, "loss": 0.266, "step": 15296 }, { "epoch": 0.7165878109336207, "grad_norm": 0.5499075493935343, "learning_rate": 3.7150758300590496e-06, "loss": 0.2769, "step": 15297 }, { "epoch": 0.716634655923549, "grad_norm": 0.5662866836916932, "learning_rate": 3.7149100833433567e-06, "loss": 0.2736, "step": 15298 }, { "epoch": 0.7166815009134773, "grad_norm": 0.5670011118868932, "learning_rate": 3.714744329636161e-06, "loss": 0.2988, "step": 15299 }, { "epoch": 0.7167283459034056, "grad_norm": 0.6569530299776957, "learning_rate": 3.7145785689384182e-06, "loss": 0.2978, "step": 15300 }, { "epoch": 0.716775190893334, "grad_norm": 0.614203683264948, "learning_rate": 3.7144128012510798e-06, "loss": 0.3088, "step": 15301 }, { "epoch": 0.7168220358832623, "grad_norm": 0.5623982908103978, "learning_rate": 3.714247026575102e-06, "loss": 0.2745, "step": 15302 }, { "epoch": 0.7168688808731906, "grad_norm": 0.6376632211661222, "learning_rate": 3.714081244911437e-06, "loss": 0.3007, "step": 15303 }, { "epoch": 0.7169157258631189, "grad_norm": 0.5792934878654109, "learning_rate": 3.71391545626104e-06, "loss": 0.3078, "step": 15304 }, { "epoch": 0.7169625708530473, "grad_norm": 0.6168104185608442, "learning_rate": 3.713749660624864e-06, "loss": 0.2792, "step": 15305 }, { "epoch": 0.7170094158429756, "grad_norm": 0.5553099125026036, "learning_rate": 3.7135838580038646e-06, "loss": 0.2828, "step": 15306 }, { "epoch": 0.717056260832904, "grad_norm": 0.6031693602752832, "learning_rate": 3.7134180483989946e-06, "loss": 0.2799, "step": 15307 }, { "epoch": 0.7171031058228322, "grad_norm": 0.5743930453345037, "learning_rate": 3.713252231811209e-06, "loss": 0.2972, "step": 15308 }, { "epoch": 0.7171499508127606, "grad_norm": 0.5815883962434744, "learning_rate": 3.713086408241462e-06, "loss": 0.2908, "step": 15309 }, { "epoch": 0.7171967958026889, "grad_norm": 0.6173756335747098, "learning_rate": 3.712920577690707e-06, "loss": 0.2728, "step": 15310 }, { "epoch": 0.7172436407926173, "grad_norm": 0.5888963853481631, "learning_rate": 3.712754740159899e-06, "loss": 0.2876, "step": 15311 }, { "epoch": 0.7172904857825455, "grad_norm": 0.6272404959737837, "learning_rate": 3.7125888956499926e-06, "loss": 0.2861, "step": 15312 }, { "epoch": 0.7173373307724739, "grad_norm": 0.5900019902077318, "learning_rate": 3.712423044161942e-06, "loss": 0.2923, "step": 15313 }, { "epoch": 0.7173841757624022, "grad_norm": 0.6413834404194162, "learning_rate": 3.712257185696701e-06, "loss": 0.2996, "step": 15314 }, { "epoch": 0.7174310207523306, "grad_norm": 0.6223421035559045, "learning_rate": 3.7120913202552244e-06, "loss": 0.3023, "step": 15315 }, { "epoch": 0.7174778657422589, "grad_norm": 0.6144375013760099, "learning_rate": 3.7119254478384682e-06, "loss": 0.3119, "step": 15316 }, { "epoch": 0.7175247107321872, "grad_norm": 0.6294226022055089, "learning_rate": 3.7117595684473847e-06, "loss": 0.2625, "step": 15317 }, { "epoch": 0.7175715557221155, "grad_norm": 0.6148073018636624, "learning_rate": 3.711593682082929e-06, "loss": 0.2962, "step": 15318 }, { "epoch": 0.7176184007120439, "grad_norm": 0.6110002820448809, "learning_rate": 3.711427788746057e-06, "loss": 0.3005, "step": 15319 }, { "epoch": 0.7176652457019722, "grad_norm": 0.6355935811832696, "learning_rate": 3.7112618884377227e-06, "loss": 0.3068, "step": 15320 }, { "epoch": 0.7177120906919005, "grad_norm": 0.5695202741422489, "learning_rate": 3.7110959811588797e-06, "loss": 0.2733, "step": 15321 }, { "epoch": 0.7177589356818288, "grad_norm": 0.5727423643157533, "learning_rate": 3.710930066910484e-06, "loss": 0.2637, "step": 15322 }, { "epoch": 0.7178057806717572, "grad_norm": 0.5914729173803018, "learning_rate": 3.7107641456934912e-06, "loss": 0.3061, "step": 15323 }, { "epoch": 0.7178526256616855, "grad_norm": 0.5761760902361966, "learning_rate": 3.710598217508854e-06, "loss": 0.2779, "step": 15324 }, { "epoch": 0.7178994706516139, "grad_norm": 0.6713226301660185, "learning_rate": 3.7104322823575285e-06, "loss": 0.3053, "step": 15325 }, { "epoch": 0.7179463156415421, "grad_norm": 0.5879732066514499, "learning_rate": 3.7102663402404694e-06, "loss": 0.2817, "step": 15326 }, { "epoch": 0.7179931606314705, "grad_norm": 0.6015522496721672, "learning_rate": 3.710100391158632e-06, "loss": 0.2961, "step": 15327 }, { "epoch": 0.7180400056213988, "grad_norm": 0.653086283810886, "learning_rate": 3.709934435112971e-06, "loss": 0.2875, "step": 15328 }, { "epoch": 0.7180868506113272, "grad_norm": 0.576406023295332, "learning_rate": 3.709768472104441e-06, "loss": 0.2852, "step": 15329 }, { "epoch": 0.7181336956012554, "grad_norm": 0.5940557376176898, "learning_rate": 3.7096025021339976e-06, "loss": 0.2854, "step": 15330 }, { "epoch": 0.7181805405911837, "grad_norm": 0.582170990044218, "learning_rate": 3.7094365252025966e-06, "loss": 0.2775, "step": 15331 }, { "epoch": 0.7182273855811121, "grad_norm": 0.6231028540259179, "learning_rate": 3.7092705413111914e-06, "loss": 0.2949, "step": 15332 }, { "epoch": 0.7182742305710405, "grad_norm": 0.5655400010902708, "learning_rate": 3.709104550460739e-06, "loss": 0.2649, "step": 15333 }, { "epoch": 0.7183210755609688, "grad_norm": 0.5928825320101082, "learning_rate": 3.708938552652193e-06, "loss": 0.2635, "step": 15334 }, { "epoch": 0.718367920550897, "grad_norm": 0.644830598182271, "learning_rate": 3.7087725478865098e-06, "loss": 0.2903, "step": 15335 }, { "epoch": 0.7184147655408254, "grad_norm": 0.5553212965856191, "learning_rate": 3.708606536164644e-06, "loss": 0.2673, "step": 15336 }, { "epoch": 0.7184616105307537, "grad_norm": 0.5572795970970343, "learning_rate": 3.7084405174875524e-06, "loss": 0.2722, "step": 15337 }, { "epoch": 0.7185084555206821, "grad_norm": 0.5658096268912802, "learning_rate": 3.7082744918561886e-06, "loss": 0.2751, "step": 15338 }, { "epoch": 0.7185553005106103, "grad_norm": 0.5797494461350958, "learning_rate": 3.7081084592715088e-06, "loss": 0.281, "step": 15339 }, { "epoch": 0.7186021455005387, "grad_norm": 0.5842939050931116, "learning_rate": 3.707942419734469e-06, "loss": 0.2623, "step": 15340 }, { "epoch": 0.718648990490467, "grad_norm": 0.5708131517226044, "learning_rate": 3.7077763732460237e-06, "loss": 0.2866, "step": 15341 }, { "epoch": 0.7186958354803954, "grad_norm": 0.5823648494171155, "learning_rate": 3.707610319807129e-06, "loss": 0.281, "step": 15342 }, { "epoch": 0.7187426804703237, "grad_norm": 0.5754300365174491, "learning_rate": 3.7074442594187405e-06, "loss": 0.2606, "step": 15343 }, { "epoch": 0.718789525460252, "grad_norm": 0.6301422054660845, "learning_rate": 3.7072781920818134e-06, "loss": 0.2879, "step": 15344 }, { "epoch": 0.7188363704501803, "grad_norm": 0.6211077171832051, "learning_rate": 3.707112117797304e-06, "loss": 0.2874, "step": 15345 }, { "epoch": 0.7188832154401087, "grad_norm": 0.5392007259663062, "learning_rate": 3.706946036566168e-06, "loss": 0.2617, "step": 15346 }, { "epoch": 0.718930060430037, "grad_norm": 0.5408256202623489, "learning_rate": 3.7067799483893606e-06, "loss": 0.2696, "step": 15347 }, { "epoch": 0.7189769054199653, "grad_norm": 0.577157815724061, "learning_rate": 3.706613853267838e-06, "loss": 0.2786, "step": 15348 }, { "epoch": 0.7190237504098936, "grad_norm": 0.5774000536729247, "learning_rate": 3.7064477512025564e-06, "loss": 0.2645, "step": 15349 }, { "epoch": 0.719070595399822, "grad_norm": 0.5802730388813501, "learning_rate": 3.7062816421944704e-06, "loss": 0.2617, "step": 15350 }, { "epoch": 0.7191174403897503, "grad_norm": 0.572337367792002, "learning_rate": 3.706115526244537e-06, "loss": 0.2715, "step": 15351 }, { "epoch": 0.7191642853796787, "grad_norm": 0.6112891403010193, "learning_rate": 3.7059494033537126e-06, "loss": 0.2982, "step": 15352 }, { "epoch": 0.7192111303696069, "grad_norm": 0.5574590018054181, "learning_rate": 3.7057832735229517e-06, "loss": 0.2727, "step": 15353 }, { "epoch": 0.7192579753595353, "grad_norm": 0.574126842073582, "learning_rate": 3.7056171367532114e-06, "loss": 0.2594, "step": 15354 }, { "epoch": 0.7193048203494636, "grad_norm": 0.6288328534738999, "learning_rate": 3.7054509930454475e-06, "loss": 0.2783, "step": 15355 }, { "epoch": 0.719351665339392, "grad_norm": 0.5662888398892039, "learning_rate": 3.705284842400616e-06, "loss": 0.28, "step": 15356 }, { "epoch": 0.7193985103293202, "grad_norm": 0.5420348771325376, "learning_rate": 3.705118684819673e-06, "loss": 0.2759, "step": 15357 }, { "epoch": 0.7194453553192486, "grad_norm": 0.5754800229720456, "learning_rate": 3.704952520303575e-06, "loss": 0.2878, "step": 15358 }, { "epoch": 0.7194922003091769, "grad_norm": 0.600239650661394, "learning_rate": 3.7047863488532787e-06, "loss": 0.2836, "step": 15359 }, { "epoch": 0.7195390452991053, "grad_norm": 0.5400859290605894, "learning_rate": 3.704620170469739e-06, "loss": 0.2726, "step": 15360 }, { "epoch": 0.7195858902890336, "grad_norm": 0.6435977966289937, "learning_rate": 3.7044539851539128e-06, "loss": 0.2834, "step": 15361 }, { "epoch": 0.7196327352789619, "grad_norm": 0.6085126110124677, "learning_rate": 3.7042877929067577e-06, "loss": 0.2917, "step": 15362 }, { "epoch": 0.7196795802688902, "grad_norm": 0.6298012622012793, "learning_rate": 3.704121593729228e-06, "loss": 0.3029, "step": 15363 }, { "epoch": 0.7197264252588186, "grad_norm": 0.6346754535787841, "learning_rate": 3.7039553876222817e-06, "loss": 0.2925, "step": 15364 }, { "epoch": 0.7197732702487469, "grad_norm": 0.5845079771095145, "learning_rate": 3.7037891745868747e-06, "loss": 0.2843, "step": 15365 }, { "epoch": 0.7198201152386752, "grad_norm": 0.6039599702104704, "learning_rate": 3.7036229546239635e-06, "loss": 0.2856, "step": 15366 }, { "epoch": 0.7198669602286035, "grad_norm": 0.6312889780731997, "learning_rate": 3.7034567277345047e-06, "loss": 0.2768, "step": 15367 }, { "epoch": 0.7199138052185319, "grad_norm": 0.5862722072598476, "learning_rate": 3.7032904939194546e-06, "loss": 0.3019, "step": 15368 }, { "epoch": 0.7199606502084602, "grad_norm": 0.6851918373757611, "learning_rate": 3.703124253179771e-06, "loss": 0.2995, "step": 15369 }, { "epoch": 0.7200074951983886, "grad_norm": 0.5294040640952401, "learning_rate": 3.702958005516409e-06, "loss": 0.2575, "step": 15370 }, { "epoch": 0.7200543401883168, "grad_norm": 0.5967545493503759, "learning_rate": 3.7027917509303262e-06, "loss": 0.2881, "step": 15371 }, { "epoch": 0.7201011851782452, "grad_norm": 0.6084580901947626, "learning_rate": 3.7026254894224797e-06, "loss": 0.2753, "step": 15372 }, { "epoch": 0.7201480301681735, "grad_norm": 0.6109590514633675, "learning_rate": 3.702459220993825e-06, "loss": 0.2832, "step": 15373 }, { "epoch": 0.7201948751581019, "grad_norm": 0.5916500899550837, "learning_rate": 3.70229294564532e-06, "loss": 0.2897, "step": 15374 }, { "epoch": 0.7202417201480301, "grad_norm": 0.5614588028643323, "learning_rate": 3.7021266633779206e-06, "loss": 0.275, "step": 15375 }, { "epoch": 0.7202885651379585, "grad_norm": 0.6047377460370114, "learning_rate": 3.701960374192586e-06, "loss": 0.2847, "step": 15376 }, { "epoch": 0.7203354101278868, "grad_norm": 0.6110022165115977, "learning_rate": 3.7017940780902706e-06, "loss": 0.3014, "step": 15377 }, { "epoch": 0.7203822551178152, "grad_norm": 0.596670132362532, "learning_rate": 3.701627775071932e-06, "loss": 0.3025, "step": 15378 }, { "epoch": 0.7204291001077435, "grad_norm": 0.5447288703894789, "learning_rate": 3.701461465138528e-06, "loss": 0.2713, "step": 15379 }, { "epoch": 0.7204759450976718, "grad_norm": 0.5568687489323891, "learning_rate": 3.7012951482910153e-06, "loss": 0.2659, "step": 15380 }, { "epoch": 0.7205227900876001, "grad_norm": 0.5756679037580689, "learning_rate": 3.7011288245303506e-06, "loss": 0.2753, "step": 15381 }, { "epoch": 0.7205696350775285, "grad_norm": 0.6031028645271878, "learning_rate": 3.7009624938574917e-06, "loss": 0.2942, "step": 15382 }, { "epoch": 0.7206164800674568, "grad_norm": 0.5426211098952686, "learning_rate": 3.700796156273396e-06, "loss": 0.2705, "step": 15383 }, { "epoch": 0.7206633250573851, "grad_norm": 0.5456597834172726, "learning_rate": 3.700629811779019e-06, "loss": 0.2856, "step": 15384 }, { "epoch": 0.7207101700473134, "grad_norm": 0.5885366642549392, "learning_rate": 3.70046346037532e-06, "loss": 0.2792, "step": 15385 }, { "epoch": 0.7207570150372418, "grad_norm": 0.606793233312903, "learning_rate": 3.7002971020632554e-06, "loss": 0.2974, "step": 15386 }, { "epoch": 0.7208038600271701, "grad_norm": 0.5922550248343654, "learning_rate": 3.700130736843783e-06, "loss": 0.2936, "step": 15387 }, { "epoch": 0.7208507050170985, "grad_norm": 0.578848167383787, "learning_rate": 3.699964364717859e-06, "loss": 0.2925, "step": 15388 }, { "epoch": 0.7208975500070267, "grad_norm": 0.6054429808607417, "learning_rate": 3.6997979856864426e-06, "loss": 0.2915, "step": 15389 }, { "epoch": 0.7209443949969551, "grad_norm": 0.649274410100821, "learning_rate": 3.6996315997504895e-06, "loss": 0.3038, "step": 15390 }, { "epoch": 0.7209912399868834, "grad_norm": 0.5962317630225336, "learning_rate": 3.699465206910958e-06, "loss": 0.315, "step": 15391 }, { "epoch": 0.7210380849768118, "grad_norm": 0.5860259087492725, "learning_rate": 3.699298807168807e-06, "loss": 0.2845, "step": 15392 }, { "epoch": 0.72108492996674, "grad_norm": 0.5515809608167163, "learning_rate": 3.699132400524992e-06, "loss": 0.2629, "step": 15393 }, { "epoch": 0.7211317749566684, "grad_norm": 0.6566114281877777, "learning_rate": 3.6989659869804713e-06, "loss": 0.2933, "step": 15394 }, { "epoch": 0.7211786199465967, "grad_norm": 0.5463765925152462, "learning_rate": 3.698799566536203e-06, "loss": 0.2623, "step": 15395 }, { "epoch": 0.7212254649365251, "grad_norm": 0.6327981262626687, "learning_rate": 3.698633139193144e-06, "loss": 0.2907, "step": 15396 }, { "epoch": 0.7212723099264534, "grad_norm": 0.55524538825303, "learning_rate": 3.698466704952253e-06, "loss": 0.2719, "step": 15397 }, { "epoch": 0.7213191549163817, "grad_norm": 0.5910717350666775, "learning_rate": 3.698300263814487e-06, "loss": 0.2779, "step": 15398 }, { "epoch": 0.72136599990631, "grad_norm": 0.5882166852698276, "learning_rate": 3.6981338157808045e-06, "loss": 0.2926, "step": 15399 }, { "epoch": 0.7214128448962384, "grad_norm": 0.5915854860502257, "learning_rate": 3.6979673608521628e-06, "loss": 0.2972, "step": 15400 }, { "epoch": 0.7214596898861667, "grad_norm": 0.5795914207903878, "learning_rate": 3.6978008990295208e-06, "loss": 0.2872, "step": 15401 }, { "epoch": 0.721506534876095, "grad_norm": 0.5851176989022797, "learning_rate": 3.6976344303138343e-06, "loss": 0.2677, "step": 15402 }, { "epoch": 0.7215533798660233, "grad_norm": 0.5627479808442204, "learning_rate": 3.697467954706064e-06, "loss": 0.2729, "step": 15403 }, { "epoch": 0.7216002248559517, "grad_norm": 0.5464973257318644, "learning_rate": 3.697301472207166e-06, "loss": 0.2641, "step": 15404 }, { "epoch": 0.72164706984588, "grad_norm": 0.6338766551572667, "learning_rate": 3.6971349828180985e-06, "loss": 0.2874, "step": 15405 }, { "epoch": 0.7216939148358084, "grad_norm": 0.5902586162508304, "learning_rate": 3.69696848653982e-06, "loss": 0.2632, "step": 15406 }, { "epoch": 0.7217407598257366, "grad_norm": 0.6165120706935681, "learning_rate": 3.69680198337329e-06, "loss": 0.2628, "step": 15407 }, { "epoch": 0.721787604815665, "grad_norm": 0.5754195604444279, "learning_rate": 3.696635473319464e-06, "loss": 0.2774, "step": 15408 }, { "epoch": 0.7218344498055933, "grad_norm": 0.59249946198679, "learning_rate": 3.6964689563793023e-06, "loss": 0.2788, "step": 15409 }, { "epoch": 0.7218812947955217, "grad_norm": 0.6259434648749864, "learning_rate": 3.696302432553762e-06, "loss": 0.3045, "step": 15410 }, { "epoch": 0.7219281397854499, "grad_norm": 0.560936693246062, "learning_rate": 3.6961359018438025e-06, "loss": 0.2573, "step": 15411 }, { "epoch": 0.7219749847753782, "grad_norm": 0.5964363582241047, "learning_rate": 3.6959693642503813e-06, "loss": 0.2889, "step": 15412 }, { "epoch": 0.7220218297653066, "grad_norm": 0.5564133588718775, "learning_rate": 3.695802819774457e-06, "loss": 0.2499, "step": 15413 }, { "epoch": 0.722068674755235, "grad_norm": 0.6312857993877196, "learning_rate": 3.6956362684169885e-06, "loss": 0.2955, "step": 15414 }, { "epoch": 0.7221155197451633, "grad_norm": 0.5737147684365841, "learning_rate": 3.695469710178933e-06, "loss": 0.2709, "step": 15415 }, { "epoch": 0.7221623647350915, "grad_norm": 0.642174948870714, "learning_rate": 3.6953031450612505e-06, "loss": 0.3026, "step": 15416 }, { "epoch": 0.7222092097250199, "grad_norm": 0.5617374568190302, "learning_rate": 3.695136573064898e-06, "loss": 0.2646, "step": 15417 }, { "epoch": 0.7222560547149482, "grad_norm": 0.6312463370572156, "learning_rate": 3.6949699941908352e-06, "loss": 0.2928, "step": 15418 }, { "epoch": 0.7223028997048766, "grad_norm": 0.5800998985432761, "learning_rate": 3.6948034084400208e-06, "loss": 0.2685, "step": 15419 }, { "epoch": 0.7223497446948048, "grad_norm": 0.5936287141631749, "learning_rate": 3.6946368158134125e-06, "loss": 0.2686, "step": 15420 }, { "epoch": 0.7223965896847332, "grad_norm": 0.6048147886730878, "learning_rate": 3.69447021631197e-06, "loss": 0.2775, "step": 15421 }, { "epoch": 0.7224434346746615, "grad_norm": 0.5397285854513507, "learning_rate": 3.6943036099366518e-06, "loss": 0.2637, "step": 15422 }, { "epoch": 0.7224902796645899, "grad_norm": 0.5921326934012658, "learning_rate": 3.694136996688416e-06, "loss": 0.2735, "step": 15423 }, { "epoch": 0.7225371246545182, "grad_norm": 0.6431203663236676, "learning_rate": 3.693970376568222e-06, "loss": 0.3058, "step": 15424 }, { "epoch": 0.7225839696444465, "grad_norm": 0.6131278170520594, "learning_rate": 3.6938037495770294e-06, "loss": 0.2963, "step": 15425 }, { "epoch": 0.7226308146343748, "grad_norm": 0.6072528180671879, "learning_rate": 3.693637115715795e-06, "loss": 0.2818, "step": 15426 }, { "epoch": 0.7226776596243032, "grad_norm": 0.5983828216258411, "learning_rate": 3.6934704749854798e-06, "loss": 0.2712, "step": 15427 }, { "epoch": 0.7227245046142315, "grad_norm": 0.5865592165900734, "learning_rate": 3.693303827387042e-06, "loss": 0.2812, "step": 15428 }, { "epoch": 0.7227713496041598, "grad_norm": 0.5462995401329207, "learning_rate": 3.6931371729214406e-06, "loss": 0.2713, "step": 15429 }, { "epoch": 0.7228181945940881, "grad_norm": 0.6330002239107608, "learning_rate": 3.6929705115896344e-06, "loss": 0.3101, "step": 15430 }, { "epoch": 0.7228650395840165, "grad_norm": 0.5506568141559847, "learning_rate": 3.692803843392583e-06, "loss": 0.2627, "step": 15431 }, { "epoch": 0.7229118845739448, "grad_norm": 0.6050653245378704, "learning_rate": 3.6926371683312447e-06, "loss": 0.3013, "step": 15432 }, { "epoch": 0.7229587295638732, "grad_norm": 0.6104145011542117, "learning_rate": 3.6924704864065797e-06, "loss": 0.3198, "step": 15433 }, { "epoch": 0.7230055745538014, "grad_norm": 0.5844610638740128, "learning_rate": 3.6923037976195465e-06, "loss": 0.2765, "step": 15434 }, { "epoch": 0.7230524195437298, "grad_norm": 0.5776888533792494, "learning_rate": 3.692137101971105e-06, "loss": 0.2856, "step": 15435 }, { "epoch": 0.7230992645336581, "grad_norm": 0.5609114699158654, "learning_rate": 3.6919703994622134e-06, "loss": 0.2701, "step": 15436 }, { "epoch": 0.7231461095235865, "grad_norm": 0.5924648359305759, "learning_rate": 3.691803690093833e-06, "loss": 0.2771, "step": 15437 }, { "epoch": 0.7231929545135147, "grad_norm": 0.5369021703155948, "learning_rate": 3.691636973866921e-06, "loss": 0.2632, "step": 15438 }, { "epoch": 0.7232397995034431, "grad_norm": 0.6019035097924836, "learning_rate": 3.691470250782438e-06, "loss": 0.2978, "step": 15439 }, { "epoch": 0.7232866444933714, "grad_norm": 0.5643319369491766, "learning_rate": 3.6913035208413428e-06, "loss": 0.286, "step": 15440 }, { "epoch": 0.7233334894832998, "grad_norm": 0.6301002651098317, "learning_rate": 3.6911367840445956e-06, "loss": 0.288, "step": 15441 }, { "epoch": 0.7233803344732281, "grad_norm": 0.5505720531240174, "learning_rate": 3.690970040393156e-06, "loss": 0.262, "step": 15442 }, { "epoch": 0.7234271794631564, "grad_norm": 0.5512601280340458, "learning_rate": 3.690803289887982e-06, "loss": 0.268, "step": 15443 }, { "epoch": 0.7234740244530847, "grad_norm": 0.58025362257908, "learning_rate": 3.6906365325300354e-06, "loss": 0.2847, "step": 15444 }, { "epoch": 0.7235208694430131, "grad_norm": 0.6122054432380255, "learning_rate": 3.690469768320274e-06, "loss": 0.2667, "step": 15445 }, { "epoch": 0.7235677144329414, "grad_norm": 0.6686853487998603, "learning_rate": 3.6903029972596595e-06, "loss": 0.3107, "step": 15446 }, { "epoch": 0.7236145594228697, "grad_norm": 0.6116951687755973, "learning_rate": 3.6901362193491498e-06, "loss": 0.2936, "step": 15447 }, { "epoch": 0.723661404412798, "grad_norm": 0.6119632958156224, "learning_rate": 3.689969434589705e-06, "loss": 0.2753, "step": 15448 }, { "epoch": 0.7237082494027264, "grad_norm": 0.6152912881125794, "learning_rate": 3.6898026429822853e-06, "loss": 0.2999, "step": 15449 }, { "epoch": 0.7237550943926547, "grad_norm": 0.5804086769221011, "learning_rate": 3.689635844527851e-06, "loss": 0.2819, "step": 15450 }, { "epoch": 0.7238019393825831, "grad_norm": 0.6062598743265623, "learning_rate": 3.68946903922736e-06, "loss": 0.2808, "step": 15451 }, { "epoch": 0.7238487843725113, "grad_norm": 0.5736521454271172, "learning_rate": 3.689302227081775e-06, "loss": 0.2737, "step": 15452 }, { "epoch": 0.7238956293624397, "grad_norm": 0.6334048827503713, "learning_rate": 3.6891354080920544e-06, "loss": 0.2883, "step": 15453 }, { "epoch": 0.723942474352368, "grad_norm": 0.5809559479887063, "learning_rate": 3.6889685822591577e-06, "loss": 0.2792, "step": 15454 }, { "epoch": 0.7239893193422964, "grad_norm": 0.5975026445279441, "learning_rate": 3.6888017495840458e-06, "loss": 0.2732, "step": 15455 }, { "epoch": 0.7240361643322246, "grad_norm": 0.5751098477574011, "learning_rate": 3.68863491006768e-06, "loss": 0.2904, "step": 15456 }, { "epoch": 0.724083009322153, "grad_norm": 0.5814849976211438, "learning_rate": 3.688468063711017e-06, "loss": 0.272, "step": 15457 }, { "epoch": 0.7241298543120813, "grad_norm": 0.5866211419515848, "learning_rate": 3.68830121051502e-06, "loss": 0.2773, "step": 15458 }, { "epoch": 0.7241766993020097, "grad_norm": 0.5882957403935912, "learning_rate": 3.688134350480648e-06, "loss": 0.3032, "step": 15459 }, { "epoch": 0.724223544291938, "grad_norm": 0.5384753929885576, "learning_rate": 3.687967483608862e-06, "loss": 0.2566, "step": 15460 }, { "epoch": 0.7242703892818663, "grad_norm": 0.5767449958869412, "learning_rate": 3.6878006099006214e-06, "loss": 0.2613, "step": 15461 }, { "epoch": 0.7243172342717946, "grad_norm": 0.5976058620620082, "learning_rate": 3.687633729356887e-06, "loss": 0.2789, "step": 15462 }, { "epoch": 0.724364079261723, "grad_norm": 0.607677934158203, "learning_rate": 3.6874668419786184e-06, "loss": 0.3116, "step": 15463 }, { "epoch": 0.7244109242516513, "grad_norm": 0.574373351490963, "learning_rate": 3.687299947766777e-06, "loss": 0.2783, "step": 15464 }, { "epoch": 0.7244577692415796, "grad_norm": 0.5614354288749256, "learning_rate": 3.687133046722323e-06, "loss": 0.2518, "step": 15465 }, { "epoch": 0.7245046142315079, "grad_norm": 0.5718352733400744, "learning_rate": 3.686966138846216e-06, "loss": 0.2727, "step": 15466 }, { "epoch": 0.7245514592214363, "grad_norm": 0.6577304958464057, "learning_rate": 3.686799224139418e-06, "loss": 0.2918, "step": 15467 }, { "epoch": 0.7245983042113646, "grad_norm": 0.6031223403167734, "learning_rate": 3.6866323026028885e-06, "loss": 0.2869, "step": 15468 }, { "epoch": 0.724645149201293, "grad_norm": 0.5999625223897713, "learning_rate": 3.686465374237588e-06, "loss": 0.2747, "step": 15469 }, { "epoch": 0.7246919941912212, "grad_norm": 0.6077057685922337, "learning_rate": 3.686298439044478e-06, "loss": 0.3005, "step": 15470 }, { "epoch": 0.7247388391811496, "grad_norm": 0.595031010517552, "learning_rate": 3.6861314970245186e-06, "loss": 0.2606, "step": 15471 }, { "epoch": 0.7247856841710779, "grad_norm": 0.5922121088527325, "learning_rate": 3.68596454817867e-06, "loss": 0.281, "step": 15472 }, { "epoch": 0.7248325291610063, "grad_norm": 0.6203565712228079, "learning_rate": 3.685797592507894e-06, "loss": 0.3012, "step": 15473 }, { "epoch": 0.7248793741509345, "grad_norm": 0.8832271406834357, "learning_rate": 3.6856306300131513e-06, "loss": 0.2801, "step": 15474 }, { "epoch": 0.7249262191408629, "grad_norm": 0.6066429650316334, "learning_rate": 3.685463660695401e-06, "loss": 0.2843, "step": 15475 }, { "epoch": 0.7249730641307912, "grad_norm": 0.6404691122003132, "learning_rate": 3.685296684555606e-06, "loss": 0.3098, "step": 15476 }, { "epoch": 0.7250199091207196, "grad_norm": 0.5995747498314477, "learning_rate": 3.6851297015947274e-06, "loss": 0.2958, "step": 15477 }, { "epoch": 0.7250667541106479, "grad_norm": 0.5949580485942253, "learning_rate": 3.6849627118137236e-06, "loss": 0.2944, "step": 15478 }, { "epoch": 0.7251135991005762, "grad_norm": 0.5664229057683737, "learning_rate": 3.6847957152135583e-06, "loss": 0.2757, "step": 15479 }, { "epoch": 0.7251604440905045, "grad_norm": 0.5975647776797899, "learning_rate": 3.6846287117951906e-06, "loss": 0.2935, "step": 15480 }, { "epoch": 0.7252072890804329, "grad_norm": 0.5880597690916881, "learning_rate": 3.6844617015595833e-06, "loss": 0.2708, "step": 15481 }, { "epoch": 0.7252541340703612, "grad_norm": 0.5831356459842192, "learning_rate": 3.6842946845076965e-06, "loss": 0.2904, "step": 15482 }, { "epoch": 0.7253009790602895, "grad_norm": 0.611603279925659, "learning_rate": 3.684127660640491e-06, "loss": 0.2855, "step": 15483 }, { "epoch": 0.7253478240502178, "grad_norm": 0.5268587399808539, "learning_rate": 3.6839606299589294e-06, "loss": 0.2601, "step": 15484 }, { "epoch": 0.7253946690401462, "grad_norm": 0.6088974178267176, "learning_rate": 3.6837935924639706e-06, "loss": 0.2821, "step": 15485 }, { "epoch": 0.7254415140300745, "grad_norm": 0.5960782851889784, "learning_rate": 3.683626548156578e-06, "loss": 0.253, "step": 15486 }, { "epoch": 0.7254883590200029, "grad_norm": 0.6027222204762037, "learning_rate": 3.6834594970377115e-06, "loss": 0.2916, "step": 15487 }, { "epoch": 0.7255352040099311, "grad_norm": 0.6094830937628812, "learning_rate": 3.6832924391083336e-06, "loss": 0.291, "step": 15488 }, { "epoch": 0.7255820489998595, "grad_norm": 0.593350362770431, "learning_rate": 3.683125374369405e-06, "loss": 0.2937, "step": 15489 }, { "epoch": 0.7256288939897878, "grad_norm": 0.6116883760988542, "learning_rate": 3.6829583028218874e-06, "loss": 0.2981, "step": 15490 }, { "epoch": 0.7256757389797162, "grad_norm": 0.5715883736530245, "learning_rate": 3.682791224466742e-06, "loss": 0.242, "step": 15491 }, { "epoch": 0.7257225839696444, "grad_norm": 0.5804065597078648, "learning_rate": 3.6826241393049307e-06, "loss": 0.2761, "step": 15492 }, { "epoch": 0.7257694289595727, "grad_norm": 0.5786500076579809, "learning_rate": 3.682457047337414e-06, "loss": 0.262, "step": 15493 }, { "epoch": 0.7258162739495011, "grad_norm": 0.6272954264154369, "learning_rate": 3.682289948565154e-06, "loss": 0.2824, "step": 15494 }, { "epoch": 0.7258631189394295, "grad_norm": 0.5629921477046769, "learning_rate": 3.6821228429891138e-06, "loss": 0.2631, "step": 15495 }, { "epoch": 0.7259099639293578, "grad_norm": 0.6075607073074559, "learning_rate": 3.6819557306102527e-06, "loss": 0.2781, "step": 15496 }, { "epoch": 0.725956808919286, "grad_norm": 0.5871532074482578, "learning_rate": 3.6817886114295333e-06, "loss": 0.2859, "step": 15497 }, { "epoch": 0.7260036539092144, "grad_norm": 0.5615007376424966, "learning_rate": 3.681621485447918e-06, "loss": 0.2734, "step": 15498 }, { "epoch": 0.7260504988991427, "grad_norm": 0.6127718245878704, "learning_rate": 3.6814543526663683e-06, "loss": 0.3018, "step": 15499 }, { "epoch": 0.7260973438890711, "grad_norm": 0.6487004548285662, "learning_rate": 3.681287213085845e-06, "loss": 0.2974, "step": 15500 }, { "epoch": 0.7261441888789993, "grad_norm": 0.5794001547382539, "learning_rate": 3.681120066707311e-06, "loss": 0.2756, "step": 15501 }, { "epoch": 0.7261910338689277, "grad_norm": 0.58902384680302, "learning_rate": 3.6809529135317285e-06, "loss": 0.251, "step": 15502 }, { "epoch": 0.726237878858856, "grad_norm": 0.581292834634589, "learning_rate": 3.6807857535600576e-06, "loss": 0.281, "step": 15503 }, { "epoch": 0.7262847238487844, "grad_norm": 0.6060627199519666, "learning_rate": 3.6806185867932627e-06, "loss": 0.2969, "step": 15504 }, { "epoch": 0.7263315688387127, "grad_norm": 0.602071105137095, "learning_rate": 3.6804514132323043e-06, "loss": 0.3035, "step": 15505 }, { "epoch": 0.726378413828641, "grad_norm": 0.5894114599246739, "learning_rate": 3.680284232878144e-06, "loss": 0.3059, "step": 15506 }, { "epoch": 0.7264252588185693, "grad_norm": 0.6422078858039701, "learning_rate": 3.6801170457317446e-06, "loss": 0.2955, "step": 15507 }, { "epoch": 0.7264721038084977, "grad_norm": 0.5676642732376688, "learning_rate": 3.679949851794069e-06, "loss": 0.2813, "step": 15508 }, { "epoch": 0.726518948798426, "grad_norm": 0.5714081536200214, "learning_rate": 3.6797826510660785e-06, "loss": 0.265, "step": 15509 }, { "epoch": 0.7265657937883543, "grad_norm": 0.592733854994677, "learning_rate": 3.6796154435487347e-06, "loss": 0.2999, "step": 15510 }, { "epoch": 0.7266126387782826, "grad_norm": 0.5588195794195084, "learning_rate": 3.6794482292430013e-06, "loss": 0.2826, "step": 15511 }, { "epoch": 0.726659483768211, "grad_norm": 0.5494140366089202, "learning_rate": 3.679281008149839e-06, "loss": 0.2613, "step": 15512 }, { "epoch": 0.7267063287581393, "grad_norm": 0.5874080433429001, "learning_rate": 3.6791137802702116e-06, "loss": 0.2752, "step": 15513 }, { "epoch": 0.7267531737480677, "grad_norm": 0.5222621647463291, "learning_rate": 3.67894654560508e-06, "loss": 0.2531, "step": 15514 }, { "epoch": 0.7268000187379959, "grad_norm": 0.5796596051850346, "learning_rate": 3.678779304155408e-06, "loss": 0.2987, "step": 15515 }, { "epoch": 0.7268468637279243, "grad_norm": 0.5659597076152028, "learning_rate": 3.678612055922157e-06, "loss": 0.2754, "step": 15516 }, { "epoch": 0.7268937087178526, "grad_norm": 0.5598133532398141, "learning_rate": 3.67844480090629e-06, "loss": 0.2722, "step": 15517 }, { "epoch": 0.726940553707781, "grad_norm": 0.5957699157858799, "learning_rate": 3.6782775391087687e-06, "loss": 0.2512, "step": 15518 }, { "epoch": 0.7269873986977092, "grad_norm": 0.5568818297482953, "learning_rate": 3.6781102705305575e-06, "loss": 0.271, "step": 15519 }, { "epoch": 0.7270342436876376, "grad_norm": 0.5511828724118049, "learning_rate": 3.6779429951726175e-06, "loss": 0.28, "step": 15520 }, { "epoch": 0.7270810886775659, "grad_norm": 0.540980459851557, "learning_rate": 3.677775713035911e-06, "loss": 0.289, "step": 15521 }, { "epoch": 0.7271279336674943, "grad_norm": 0.5671252679095573, "learning_rate": 3.677608424121402e-06, "loss": 0.2987, "step": 15522 }, { "epoch": 0.7271747786574226, "grad_norm": 0.5725673518957445, "learning_rate": 3.6774411284300524e-06, "loss": 0.2599, "step": 15523 }, { "epoch": 0.7272216236473509, "grad_norm": 0.5147999668221954, "learning_rate": 3.677273825962824e-06, "loss": 0.2684, "step": 15524 }, { "epoch": 0.7272684686372792, "grad_norm": 0.6032444461494302, "learning_rate": 3.6771065167206817e-06, "loss": 0.2861, "step": 15525 }, { "epoch": 0.7273153136272076, "grad_norm": 0.5899478251739613, "learning_rate": 3.676939200704588e-06, "loss": 0.274, "step": 15526 }, { "epoch": 0.7273621586171359, "grad_norm": 0.5841488107871555, "learning_rate": 3.6767718779155037e-06, "loss": 0.2767, "step": 15527 }, { "epoch": 0.7274090036070642, "grad_norm": 0.6099056901602884, "learning_rate": 3.6766045483543927e-06, "loss": 0.2735, "step": 15528 }, { "epoch": 0.7274558485969925, "grad_norm": 0.6181190354545081, "learning_rate": 3.6764372120222193e-06, "loss": 0.2662, "step": 15529 }, { "epoch": 0.7275026935869209, "grad_norm": 0.5982245001981042, "learning_rate": 3.676269868919945e-06, "loss": 0.283, "step": 15530 }, { "epoch": 0.7275495385768492, "grad_norm": 0.5591772781364512, "learning_rate": 3.6761025190485335e-06, "loss": 0.2932, "step": 15531 }, { "epoch": 0.7275963835667776, "grad_norm": 0.5838490366596728, "learning_rate": 3.6759351624089468e-06, "loss": 0.2765, "step": 15532 }, { "epoch": 0.7276432285567058, "grad_norm": 0.5897640877250954, "learning_rate": 3.67576779900215e-06, "loss": 0.3038, "step": 15533 }, { "epoch": 0.7276900735466342, "grad_norm": 0.5578925842382623, "learning_rate": 3.6756004288291037e-06, "loss": 0.2745, "step": 15534 }, { "epoch": 0.7277369185365625, "grad_norm": 0.6045365000876075, "learning_rate": 3.6754330518907734e-06, "loss": 0.2722, "step": 15535 }, { "epoch": 0.7277837635264909, "grad_norm": 0.5626134839197056, "learning_rate": 3.675265668188121e-06, "loss": 0.2703, "step": 15536 }, { "epoch": 0.7278306085164191, "grad_norm": 0.576834778825943, "learning_rate": 3.67509827772211e-06, "loss": 0.2802, "step": 15537 }, { "epoch": 0.7278774535063475, "grad_norm": 0.6099282161787613, "learning_rate": 3.6749308804937045e-06, "loss": 0.2696, "step": 15538 }, { "epoch": 0.7279242984962758, "grad_norm": 0.5202615103422928, "learning_rate": 3.674763476503866e-06, "loss": 0.2396, "step": 15539 }, { "epoch": 0.7279711434862042, "grad_norm": 0.5686189365754977, "learning_rate": 3.6745960657535594e-06, "loss": 0.2814, "step": 15540 }, { "epoch": 0.7280179884761325, "grad_norm": 0.5918727020477801, "learning_rate": 3.674428648243748e-06, "loss": 0.2841, "step": 15541 }, { "epoch": 0.7280648334660608, "grad_norm": 0.5872335080279856, "learning_rate": 3.6742612239753946e-06, "loss": 0.2946, "step": 15542 }, { "epoch": 0.7281116784559891, "grad_norm": 0.6439346562110765, "learning_rate": 3.674093792949463e-06, "loss": 0.2832, "step": 15543 }, { "epoch": 0.7281585234459175, "grad_norm": 0.5357413818631085, "learning_rate": 3.673926355166917e-06, "loss": 0.2703, "step": 15544 }, { "epoch": 0.7282053684358458, "grad_norm": 0.5702886227407562, "learning_rate": 3.673758910628719e-06, "loss": 0.286, "step": 15545 }, { "epoch": 0.7282522134257741, "grad_norm": 0.6284501045115863, "learning_rate": 3.6735914593358336e-06, "loss": 0.296, "step": 15546 }, { "epoch": 0.7282990584157024, "grad_norm": 0.5720923933109681, "learning_rate": 3.6734240012892257e-06, "loss": 0.2731, "step": 15547 }, { "epoch": 0.7283459034056308, "grad_norm": 0.57485425846072, "learning_rate": 3.6732565364898565e-06, "loss": 0.2895, "step": 15548 }, { "epoch": 0.7283927483955591, "grad_norm": 0.6057784847320772, "learning_rate": 3.673089064938691e-06, "loss": 0.2773, "step": 15549 }, { "epoch": 0.7284395933854875, "grad_norm": 0.6120616036847143, "learning_rate": 3.6729215866366924e-06, "loss": 0.2614, "step": 15550 }, { "epoch": 0.7284864383754157, "grad_norm": 0.5956118144959318, "learning_rate": 3.6727541015848255e-06, "loss": 0.2716, "step": 15551 }, { "epoch": 0.7285332833653441, "grad_norm": 0.5174430412445725, "learning_rate": 3.672586609784053e-06, "loss": 0.2552, "step": 15552 }, { "epoch": 0.7285801283552724, "grad_norm": 0.5737635262525983, "learning_rate": 3.67241911123534e-06, "loss": 0.277, "step": 15553 }, { "epoch": 0.7286269733452008, "grad_norm": 0.6229380990479704, "learning_rate": 3.6722516059396497e-06, "loss": 0.3021, "step": 15554 }, { "epoch": 0.728673818335129, "grad_norm": 0.581307253956241, "learning_rate": 3.6720840938979445e-06, "loss": 0.2821, "step": 15555 }, { "epoch": 0.7287206633250574, "grad_norm": 0.5654593891864805, "learning_rate": 3.6719165751111917e-06, "loss": 0.2961, "step": 15556 }, { "epoch": 0.7287675083149857, "grad_norm": 0.5999403636093934, "learning_rate": 3.671749049580352e-06, "loss": 0.2731, "step": 15557 }, { "epoch": 0.7288143533049141, "grad_norm": 0.6473346860176401, "learning_rate": 3.6715815173063925e-06, "loss": 0.3186, "step": 15558 }, { "epoch": 0.7288611982948424, "grad_norm": 0.5778582791878055, "learning_rate": 3.6714139782902754e-06, "loss": 0.2773, "step": 15559 }, { "epoch": 0.7289080432847707, "grad_norm": 0.5805135439456574, "learning_rate": 3.671246432532965e-06, "loss": 0.2703, "step": 15560 }, { "epoch": 0.728954888274699, "grad_norm": 0.5675369041870558, "learning_rate": 3.671078880035426e-06, "loss": 0.2624, "step": 15561 }, { "epoch": 0.7290017332646274, "grad_norm": 0.5710426431128868, "learning_rate": 3.6709113207986223e-06, "loss": 0.2589, "step": 15562 }, { "epoch": 0.7290485782545557, "grad_norm": 0.5659200195317393, "learning_rate": 3.670743754823518e-06, "loss": 0.2873, "step": 15563 }, { "epoch": 0.729095423244484, "grad_norm": 0.6166956207234701, "learning_rate": 3.6705761821110776e-06, "loss": 0.2918, "step": 15564 }, { "epoch": 0.7291422682344123, "grad_norm": 0.5629138861658539, "learning_rate": 3.6704086026622664e-06, "loss": 0.2771, "step": 15565 }, { "epoch": 0.7291891132243407, "grad_norm": 0.6213482147388031, "learning_rate": 3.6702410164780465e-06, "loss": 0.2954, "step": 15566 }, { "epoch": 0.729235958214269, "grad_norm": 0.6282611752125535, "learning_rate": 3.6700734235593844e-06, "loss": 0.2818, "step": 15567 }, { "epoch": 0.7292828032041974, "grad_norm": 0.5804103344017182, "learning_rate": 3.6699058239072437e-06, "loss": 0.2818, "step": 15568 }, { "epoch": 0.7293296481941256, "grad_norm": 0.5761602460027283, "learning_rate": 3.66973821752259e-06, "loss": 0.2954, "step": 15569 }, { "epoch": 0.729376493184054, "grad_norm": 0.6310335463560603, "learning_rate": 3.669570604406385e-06, "loss": 0.2744, "step": 15570 }, { "epoch": 0.7294233381739823, "grad_norm": 0.5738843980402693, "learning_rate": 3.6694029845595968e-06, "loss": 0.2746, "step": 15571 }, { "epoch": 0.7294701831639107, "grad_norm": 0.5698594889225729, "learning_rate": 3.6692353579831873e-06, "loss": 0.2857, "step": 15572 }, { "epoch": 0.7295170281538389, "grad_norm": 0.5772642831265431, "learning_rate": 3.6690677246781227e-06, "loss": 0.2709, "step": 15573 }, { "epoch": 0.7295638731437672, "grad_norm": 0.5451168119088307, "learning_rate": 3.668900084645367e-06, "loss": 0.2751, "step": 15574 }, { "epoch": 0.7296107181336956, "grad_norm": 0.5342151382178356, "learning_rate": 3.668732437885886e-06, "loss": 0.2561, "step": 15575 }, { "epoch": 0.729657563123624, "grad_norm": 0.624717784617333, "learning_rate": 3.6685647844006424e-06, "loss": 0.2936, "step": 15576 }, { "epoch": 0.7297044081135523, "grad_norm": 0.6087763095611494, "learning_rate": 3.668397124190602e-06, "loss": 0.286, "step": 15577 }, { "epoch": 0.7297512531034805, "grad_norm": 0.617908214343186, "learning_rate": 3.66822945725673e-06, "loss": 0.2877, "step": 15578 }, { "epoch": 0.7297980980934089, "grad_norm": 0.5658829339419921, "learning_rate": 3.668061783599992e-06, "loss": 0.2676, "step": 15579 }, { "epoch": 0.7298449430833372, "grad_norm": 0.6088247741666504, "learning_rate": 3.6678941032213513e-06, "loss": 0.295, "step": 15580 }, { "epoch": 0.7298917880732656, "grad_norm": 0.6210198608118689, "learning_rate": 3.6677264161217734e-06, "loss": 0.296, "step": 15581 }, { "epoch": 0.7299386330631938, "grad_norm": 0.5525954295765263, "learning_rate": 3.667558722302224e-06, "loss": 0.2745, "step": 15582 }, { "epoch": 0.7299854780531222, "grad_norm": 0.6005188279202314, "learning_rate": 3.6673910217636673e-06, "loss": 0.2727, "step": 15583 }, { "epoch": 0.7300323230430505, "grad_norm": 0.5444553536974548, "learning_rate": 3.667223314507069e-06, "loss": 0.2635, "step": 15584 }, { "epoch": 0.7300791680329789, "grad_norm": 0.6369300958519811, "learning_rate": 3.6670556005333935e-06, "loss": 0.2677, "step": 15585 }, { "epoch": 0.7301260130229072, "grad_norm": 0.6071681313710622, "learning_rate": 3.6668878798436065e-06, "loss": 0.3003, "step": 15586 }, { "epoch": 0.7301728580128355, "grad_norm": 0.5628681401202682, "learning_rate": 3.666720152438673e-06, "loss": 0.2652, "step": 15587 }, { "epoch": 0.7302197030027638, "grad_norm": 0.5962567502798927, "learning_rate": 3.666552418319558e-06, "loss": 0.2729, "step": 15588 }, { "epoch": 0.7302665479926922, "grad_norm": 0.5326223305827046, "learning_rate": 3.666384677487228e-06, "loss": 0.2732, "step": 15589 }, { "epoch": 0.7303133929826205, "grad_norm": 0.5925217073940273, "learning_rate": 3.6662169299426465e-06, "loss": 0.2737, "step": 15590 }, { "epoch": 0.7303602379725488, "grad_norm": 0.618362929689589, "learning_rate": 3.66604917568678e-06, "loss": 0.2878, "step": 15591 }, { "epoch": 0.7304070829624771, "grad_norm": 0.566577836608685, "learning_rate": 3.6658814147205934e-06, "loss": 0.255, "step": 15592 }, { "epoch": 0.7304539279524055, "grad_norm": 0.5406295801776353, "learning_rate": 3.665713647045053e-06, "loss": 0.2553, "step": 15593 }, { "epoch": 0.7305007729423338, "grad_norm": 0.60050302899239, "learning_rate": 3.665545872661122e-06, "loss": 0.2983, "step": 15594 }, { "epoch": 0.7305476179322622, "grad_norm": 0.5664837804343474, "learning_rate": 3.665378091569768e-06, "loss": 0.2835, "step": 15595 }, { "epoch": 0.7305944629221904, "grad_norm": 0.5957985785505693, "learning_rate": 3.6652103037719573e-06, "loss": 0.2827, "step": 15596 }, { "epoch": 0.7306413079121188, "grad_norm": 0.5688110911721682, "learning_rate": 3.6650425092686526e-06, "loss": 0.2799, "step": 15597 }, { "epoch": 0.7306881529020471, "grad_norm": 0.5741547762167069, "learning_rate": 3.6648747080608216e-06, "loss": 0.2686, "step": 15598 }, { "epoch": 0.7307349978919755, "grad_norm": 0.5901113588852239, "learning_rate": 3.6647069001494296e-06, "loss": 0.2899, "step": 15599 }, { "epoch": 0.7307818428819037, "grad_norm": 0.5785797056046011, "learning_rate": 3.664539085535442e-06, "loss": 0.2904, "step": 15600 }, { "epoch": 0.7308286878718321, "grad_norm": 0.6055372010089838, "learning_rate": 3.664371264219825e-06, "loss": 0.2968, "step": 15601 }, { "epoch": 0.7308755328617604, "grad_norm": 0.570582303841337, "learning_rate": 3.6642034362035435e-06, "loss": 0.2876, "step": 15602 }, { "epoch": 0.7309223778516888, "grad_norm": 0.5706412784364797, "learning_rate": 3.6640356014875643e-06, "loss": 0.2902, "step": 15603 }, { "epoch": 0.7309692228416171, "grad_norm": 0.5962258384886584, "learning_rate": 3.663867760072852e-06, "loss": 0.2763, "step": 15604 }, { "epoch": 0.7310160678315454, "grad_norm": 0.5666221446317051, "learning_rate": 3.6636999119603744e-06, "loss": 0.272, "step": 15605 }, { "epoch": 0.7310629128214737, "grad_norm": 0.5284274863446039, "learning_rate": 3.6635320571510953e-06, "loss": 0.2509, "step": 15606 }, { "epoch": 0.7311097578114021, "grad_norm": 0.6173866814842714, "learning_rate": 3.663364195645982e-06, "loss": 0.3099, "step": 15607 }, { "epoch": 0.7311566028013304, "grad_norm": 0.651934878807431, "learning_rate": 3.6631963274460004e-06, "loss": 0.2947, "step": 15608 }, { "epoch": 0.7312034477912587, "grad_norm": 0.5403994210898152, "learning_rate": 3.6630284525521158e-06, "loss": 0.2672, "step": 15609 }, { "epoch": 0.731250292781187, "grad_norm": 0.5467292898345684, "learning_rate": 3.6628605709652953e-06, "loss": 0.2753, "step": 15610 }, { "epoch": 0.7312971377711154, "grad_norm": 0.5631682705273137, "learning_rate": 3.6626926826865046e-06, "loss": 0.2816, "step": 15611 }, { "epoch": 0.7313439827610437, "grad_norm": 0.5718832994817091, "learning_rate": 3.662524787716709e-06, "loss": 0.2786, "step": 15612 }, { "epoch": 0.7313908277509721, "grad_norm": 0.6017803095888766, "learning_rate": 3.6623568860568757e-06, "loss": 0.2553, "step": 15613 }, { "epoch": 0.7314376727409003, "grad_norm": 0.611587081146794, "learning_rate": 3.6621889777079717e-06, "loss": 0.2975, "step": 15614 }, { "epoch": 0.7314845177308287, "grad_norm": 0.5952758736998592, "learning_rate": 3.662021062670961e-06, "loss": 0.2838, "step": 15615 }, { "epoch": 0.731531362720757, "grad_norm": 0.6129572499045582, "learning_rate": 3.661853140946811e-06, "loss": 0.3196, "step": 15616 }, { "epoch": 0.7315782077106854, "grad_norm": 0.5476931949665881, "learning_rate": 3.6616852125364892e-06, "loss": 0.2528, "step": 15617 }, { "epoch": 0.7316250527006136, "grad_norm": 0.5654032198827296, "learning_rate": 3.66151727744096e-06, "loss": 0.289, "step": 15618 }, { "epoch": 0.731671897690542, "grad_norm": 0.5305366181043428, "learning_rate": 3.6613493356611913e-06, "loss": 0.2655, "step": 15619 }, { "epoch": 0.7317187426804703, "grad_norm": 0.5591207328004464, "learning_rate": 3.6611813871981495e-06, "loss": 0.2715, "step": 15620 }, { "epoch": 0.7317655876703987, "grad_norm": 0.5806614246135812, "learning_rate": 3.6610134320528e-06, "loss": 0.2847, "step": 15621 }, { "epoch": 0.731812432660327, "grad_norm": 0.6148009894241196, "learning_rate": 3.6608454702261097e-06, "loss": 0.2735, "step": 15622 }, { "epoch": 0.7318592776502553, "grad_norm": 0.6206094672224386, "learning_rate": 3.6606775017190467e-06, "loss": 0.3036, "step": 15623 }, { "epoch": 0.7319061226401836, "grad_norm": 0.5666954846704794, "learning_rate": 3.6605095265325758e-06, "loss": 0.2819, "step": 15624 }, { "epoch": 0.731952967630112, "grad_norm": 0.5803364283099641, "learning_rate": 3.660341544667664e-06, "loss": 0.2757, "step": 15625 }, { "epoch": 0.7319998126200403, "grad_norm": 0.5548760103593336, "learning_rate": 3.660173556125278e-06, "loss": 0.2798, "step": 15626 }, { "epoch": 0.7320466576099686, "grad_norm": 0.5855455270153235, "learning_rate": 3.6600055609063858e-06, "loss": 0.2631, "step": 15627 }, { "epoch": 0.7320935025998969, "grad_norm": 0.5892242713884744, "learning_rate": 3.659837559011952e-06, "loss": 0.2655, "step": 15628 }, { "epoch": 0.7321403475898253, "grad_norm": 0.5697530757551891, "learning_rate": 3.659669550442946e-06, "loss": 0.2657, "step": 15629 }, { "epoch": 0.7321871925797536, "grad_norm": 0.6150388803314362, "learning_rate": 3.6595015352003314e-06, "loss": 0.2794, "step": 15630 }, { "epoch": 0.732234037569682, "grad_norm": 0.5990074647483127, "learning_rate": 3.6593335132850784e-06, "loss": 0.2803, "step": 15631 }, { "epoch": 0.7322808825596102, "grad_norm": 0.6279473788712869, "learning_rate": 3.659165484698152e-06, "loss": 0.2814, "step": 15632 }, { "epoch": 0.7323277275495386, "grad_norm": 0.5919965256573564, "learning_rate": 3.658997449440519e-06, "loss": 0.2854, "step": 15633 }, { "epoch": 0.7323745725394669, "grad_norm": 0.6003160381290421, "learning_rate": 3.658829407513147e-06, "loss": 0.299, "step": 15634 }, { "epoch": 0.7324214175293953, "grad_norm": 0.5834608689920352, "learning_rate": 3.6586613589170047e-06, "loss": 0.2971, "step": 15635 }, { "epoch": 0.7324682625193235, "grad_norm": 0.6346010249065486, "learning_rate": 3.6584933036530558e-06, "loss": 0.2673, "step": 15636 }, { "epoch": 0.7325151075092519, "grad_norm": 0.6640195489360615, "learning_rate": 3.6583252417222694e-06, "loss": 0.2759, "step": 15637 }, { "epoch": 0.7325619524991802, "grad_norm": 0.6233003201333387, "learning_rate": 3.658157173125612e-06, "loss": 0.292, "step": 15638 }, { "epoch": 0.7326087974891086, "grad_norm": 0.5988250814228646, "learning_rate": 3.657989097864052e-06, "loss": 0.2994, "step": 15639 }, { "epoch": 0.7326556424790369, "grad_norm": 0.5757564892470767, "learning_rate": 3.6578210159385553e-06, "loss": 0.2729, "step": 15640 }, { "epoch": 0.7327024874689652, "grad_norm": 0.5928218291852416, "learning_rate": 3.65765292735009e-06, "loss": 0.27, "step": 15641 }, { "epoch": 0.7327493324588935, "grad_norm": 0.6188305146808903, "learning_rate": 3.657484832099623e-06, "loss": 0.2603, "step": 15642 }, { "epoch": 0.7327961774488219, "grad_norm": 0.6291835527246996, "learning_rate": 3.6573167301881214e-06, "loss": 0.29, "step": 15643 }, { "epoch": 0.7328430224387502, "grad_norm": 0.5827908426966866, "learning_rate": 3.657148621616553e-06, "loss": 0.2893, "step": 15644 }, { "epoch": 0.7328898674286785, "grad_norm": 0.5873443401426143, "learning_rate": 3.6569805063858854e-06, "loss": 0.2786, "step": 15645 }, { "epoch": 0.7329367124186068, "grad_norm": 0.5606420394687924, "learning_rate": 3.6568123844970854e-06, "loss": 0.2684, "step": 15646 }, { "epoch": 0.7329835574085352, "grad_norm": 0.6246456631187524, "learning_rate": 3.656644255951121e-06, "loss": 0.3073, "step": 15647 }, { "epoch": 0.7330304023984635, "grad_norm": 0.6300884080099997, "learning_rate": 3.6564761207489595e-06, "loss": 0.2822, "step": 15648 }, { "epoch": 0.7330772473883919, "grad_norm": 0.5754469221387148, "learning_rate": 3.6563079788915695e-06, "loss": 0.2874, "step": 15649 }, { "epoch": 0.7331240923783201, "grad_norm": 0.67212484009479, "learning_rate": 3.6561398303799166e-06, "loss": 0.2853, "step": 15650 }, { "epoch": 0.7331709373682485, "grad_norm": 0.6277354587922814, "learning_rate": 3.65597167521497e-06, "loss": 0.3091, "step": 15651 }, { "epoch": 0.7332177823581768, "grad_norm": 0.6310527005082966, "learning_rate": 3.655803513397697e-06, "loss": 0.2903, "step": 15652 }, { "epoch": 0.7332646273481052, "grad_norm": 0.584832182811863, "learning_rate": 3.6556353449290648e-06, "loss": 0.2881, "step": 15653 }, { "epoch": 0.7333114723380334, "grad_norm": 0.5973995302758422, "learning_rate": 3.6554671698100424e-06, "loss": 0.2709, "step": 15654 }, { "epoch": 0.7333583173279618, "grad_norm": 0.6394266919164335, "learning_rate": 3.655298988041596e-06, "loss": 0.3007, "step": 15655 }, { "epoch": 0.7334051623178901, "grad_norm": 0.5443137115530112, "learning_rate": 3.6551307996246952e-06, "loss": 0.2618, "step": 15656 }, { "epoch": 0.7334520073078185, "grad_norm": 0.5997380790741801, "learning_rate": 3.6549626045603072e-06, "loss": 0.2817, "step": 15657 }, { "epoch": 0.7334988522977468, "grad_norm": 0.6717500281277564, "learning_rate": 3.6547944028493987e-06, "loss": 0.2846, "step": 15658 }, { "epoch": 0.733545697287675, "grad_norm": 0.6306437654734717, "learning_rate": 3.654626194492939e-06, "loss": 0.2917, "step": 15659 }, { "epoch": 0.7335925422776034, "grad_norm": 0.5396312478692821, "learning_rate": 3.6544579794918962e-06, "loss": 0.2564, "step": 15660 }, { "epoch": 0.7336393872675318, "grad_norm": 0.5710655647534864, "learning_rate": 3.654289757847237e-06, "loss": 0.2856, "step": 15661 }, { "epoch": 0.7336862322574601, "grad_norm": 0.5850176585282526, "learning_rate": 3.6541215295599306e-06, "loss": 0.2806, "step": 15662 }, { "epoch": 0.7337330772473883, "grad_norm": 0.620925007667811, "learning_rate": 3.653953294630946e-06, "loss": 0.2884, "step": 15663 }, { "epoch": 0.7337799222373167, "grad_norm": 0.5537587360946435, "learning_rate": 3.6537850530612485e-06, "loss": 0.2659, "step": 15664 }, { "epoch": 0.733826767227245, "grad_norm": 0.5814680118053103, "learning_rate": 3.653616804851809e-06, "loss": 0.2789, "step": 15665 }, { "epoch": 0.7338736122171734, "grad_norm": 0.5701461765841119, "learning_rate": 3.653448550003595e-06, "loss": 0.2764, "step": 15666 }, { "epoch": 0.7339204572071018, "grad_norm": 0.5887824485185927, "learning_rate": 3.653280288517574e-06, "loss": 0.2887, "step": 15667 }, { "epoch": 0.73396730219703, "grad_norm": 0.554388980576395, "learning_rate": 3.6531120203947146e-06, "loss": 0.2718, "step": 15668 }, { "epoch": 0.7340141471869583, "grad_norm": 0.7001217171914621, "learning_rate": 3.6529437456359858e-06, "loss": 0.2745, "step": 15669 }, { "epoch": 0.7340609921768867, "grad_norm": 0.6024255455173861, "learning_rate": 3.652775464242355e-06, "loss": 0.2708, "step": 15670 }, { "epoch": 0.734107837166815, "grad_norm": 0.6024029441034858, "learning_rate": 3.652607176214792e-06, "loss": 0.2654, "step": 15671 }, { "epoch": 0.7341546821567433, "grad_norm": 0.6096724391390725, "learning_rate": 3.652438881554264e-06, "loss": 0.3006, "step": 15672 }, { "epoch": 0.7342015271466716, "grad_norm": 0.5932189247699415, "learning_rate": 3.6522705802617397e-06, "loss": 0.2816, "step": 15673 }, { "epoch": 0.7342483721366, "grad_norm": 0.627337621807581, "learning_rate": 3.6521022723381876e-06, "loss": 0.288, "step": 15674 }, { "epoch": 0.7342952171265283, "grad_norm": 0.5968658573796344, "learning_rate": 3.651933957784577e-06, "loss": 0.2851, "step": 15675 }, { "epoch": 0.7343420621164567, "grad_norm": 0.5834191419384456, "learning_rate": 3.6517656366018756e-06, "loss": 0.2666, "step": 15676 }, { "epoch": 0.7343889071063849, "grad_norm": 0.6010642666396951, "learning_rate": 3.651597308791053e-06, "loss": 0.282, "step": 15677 }, { "epoch": 0.7344357520963133, "grad_norm": 0.5837467019013617, "learning_rate": 3.6514289743530774e-06, "loss": 0.2794, "step": 15678 }, { "epoch": 0.7344825970862416, "grad_norm": 0.5631916958454699, "learning_rate": 3.6512606332889165e-06, "loss": 0.2613, "step": 15679 }, { "epoch": 0.73452944207617, "grad_norm": 0.6434373349034309, "learning_rate": 3.6510922855995404e-06, "loss": 0.2998, "step": 15680 }, { "epoch": 0.7345762870660982, "grad_norm": 0.5786148099120046, "learning_rate": 3.650923931285918e-06, "loss": 0.2707, "step": 15681 }, { "epoch": 0.7346231320560266, "grad_norm": 0.6413840079043394, "learning_rate": 3.650755570349017e-06, "loss": 0.2963, "step": 15682 }, { "epoch": 0.7346699770459549, "grad_norm": 0.6292248101228549, "learning_rate": 3.6505872027898076e-06, "loss": 0.2876, "step": 15683 }, { "epoch": 0.7347168220358833, "grad_norm": 0.5830909446690449, "learning_rate": 3.650418828609258e-06, "loss": 0.2677, "step": 15684 }, { "epoch": 0.7347636670258116, "grad_norm": 0.5768363738469837, "learning_rate": 3.6502504478083366e-06, "loss": 0.2812, "step": 15685 }, { "epoch": 0.7348105120157399, "grad_norm": 0.5836638854061921, "learning_rate": 3.650082060388013e-06, "loss": 0.2871, "step": 15686 }, { "epoch": 0.7348573570056682, "grad_norm": 0.5641597009905128, "learning_rate": 3.6499136663492573e-06, "loss": 0.2493, "step": 15687 }, { "epoch": 0.7349042019955966, "grad_norm": 0.6322362010697886, "learning_rate": 3.649745265693036e-06, "loss": 0.2912, "step": 15688 }, { "epoch": 0.7349510469855249, "grad_norm": 0.58679090137016, "learning_rate": 3.64957685842032e-06, "loss": 0.283, "step": 15689 }, { "epoch": 0.7349978919754532, "grad_norm": 0.5951174503793277, "learning_rate": 3.649408444532079e-06, "loss": 0.2731, "step": 15690 }, { "epoch": 0.7350447369653815, "grad_norm": 0.5947869611589033, "learning_rate": 3.6492400240292807e-06, "loss": 0.2679, "step": 15691 }, { "epoch": 0.7350915819553099, "grad_norm": 0.5829893420855051, "learning_rate": 3.6490715969128942e-06, "loss": 0.2718, "step": 15692 }, { "epoch": 0.7351384269452382, "grad_norm": 0.6467127892836966, "learning_rate": 3.6489031631838905e-06, "loss": 0.2881, "step": 15693 }, { "epoch": 0.7351852719351666, "grad_norm": 0.5659646423460378, "learning_rate": 3.648734722843238e-06, "loss": 0.2776, "step": 15694 }, { "epoch": 0.7352321169250948, "grad_norm": 0.6243390939471443, "learning_rate": 3.6485662758919048e-06, "loss": 0.304, "step": 15695 }, { "epoch": 0.7352789619150232, "grad_norm": 0.6143641243437279, "learning_rate": 3.6483978223308625e-06, "loss": 0.2974, "step": 15696 }, { "epoch": 0.7353258069049515, "grad_norm": 0.6342802333879178, "learning_rate": 3.6482293621610786e-06, "loss": 0.2824, "step": 15697 }, { "epoch": 0.7353726518948799, "grad_norm": 0.585445893890738, "learning_rate": 3.648060895383524e-06, "loss": 0.2694, "step": 15698 }, { "epoch": 0.7354194968848081, "grad_norm": 0.609621831767907, "learning_rate": 3.6478924219991675e-06, "loss": 0.2886, "step": 15699 }, { "epoch": 0.7354663418747365, "grad_norm": 0.5254058561561737, "learning_rate": 3.6477239420089778e-06, "loss": 0.2711, "step": 15700 }, { "epoch": 0.7355131868646648, "grad_norm": 0.5697577093055725, "learning_rate": 3.647555455413926e-06, "loss": 0.2764, "step": 15701 }, { "epoch": 0.7355600318545932, "grad_norm": 0.5996064886096699, "learning_rate": 3.6473869622149803e-06, "loss": 0.2831, "step": 15702 }, { "epoch": 0.7356068768445215, "grad_norm": 0.5445737279568627, "learning_rate": 3.6472184624131114e-06, "loss": 0.273, "step": 15703 }, { "epoch": 0.7356537218344498, "grad_norm": 0.5737751417840993, "learning_rate": 3.647049956009288e-06, "loss": 0.2628, "step": 15704 }, { "epoch": 0.7357005668243781, "grad_norm": 0.557379619836044, "learning_rate": 3.646881443004482e-06, "loss": 0.2778, "step": 15705 }, { "epoch": 0.7357474118143065, "grad_norm": 0.5901470897701356, "learning_rate": 3.64671292339966e-06, "loss": 0.2498, "step": 15706 }, { "epoch": 0.7357942568042348, "grad_norm": 0.5970222810748973, "learning_rate": 3.6465443971957935e-06, "loss": 0.2978, "step": 15707 }, { "epoch": 0.7358411017941631, "grad_norm": 0.5992475234553359, "learning_rate": 3.646375864393853e-06, "loss": 0.2704, "step": 15708 }, { "epoch": 0.7358879467840914, "grad_norm": 0.5715558426348583, "learning_rate": 3.646207324994807e-06, "loss": 0.2842, "step": 15709 }, { "epoch": 0.7359347917740198, "grad_norm": 0.5927884031856723, "learning_rate": 3.646038778999626e-06, "loss": 0.2878, "step": 15710 }, { "epoch": 0.7359816367639481, "grad_norm": 0.5948240359907355, "learning_rate": 3.6458702264092803e-06, "loss": 0.278, "step": 15711 }, { "epoch": 0.7360284817538765, "grad_norm": 0.5959156431891915, "learning_rate": 3.6457016672247393e-06, "loss": 0.2829, "step": 15712 }, { "epoch": 0.7360753267438047, "grad_norm": 0.5883312405158423, "learning_rate": 3.6455331014469723e-06, "loss": 0.287, "step": 15713 }, { "epoch": 0.7361221717337331, "grad_norm": 0.5633291917693628, "learning_rate": 3.6453645290769507e-06, "loss": 0.2556, "step": 15714 }, { "epoch": 0.7361690167236614, "grad_norm": 0.5682316343244845, "learning_rate": 3.6451959501156447e-06, "loss": 0.2912, "step": 15715 }, { "epoch": 0.7362158617135898, "grad_norm": 0.5118026886387796, "learning_rate": 3.645027364564023e-06, "loss": 0.2523, "step": 15716 }, { "epoch": 0.736262706703518, "grad_norm": 0.5835553723992569, "learning_rate": 3.6448587724230565e-06, "loss": 0.2875, "step": 15717 }, { "epoch": 0.7363095516934464, "grad_norm": 0.6265740643535095, "learning_rate": 3.644690173693717e-06, "loss": 0.2778, "step": 15718 }, { "epoch": 0.7363563966833747, "grad_norm": 0.6237398273942716, "learning_rate": 3.644521568376972e-06, "loss": 0.2733, "step": 15719 }, { "epoch": 0.7364032416733031, "grad_norm": 0.5844306554877461, "learning_rate": 3.644352956473793e-06, "loss": 0.285, "step": 15720 }, { "epoch": 0.7364500866632314, "grad_norm": 0.6991523334287182, "learning_rate": 3.644184337985151e-06, "loss": 0.2861, "step": 15721 }, { "epoch": 0.7364969316531597, "grad_norm": 0.5793672711201071, "learning_rate": 3.6440157129120156e-06, "loss": 0.2858, "step": 15722 }, { "epoch": 0.736543776643088, "grad_norm": 0.5944072896264897, "learning_rate": 3.643847081255357e-06, "loss": 0.2845, "step": 15723 }, { "epoch": 0.7365906216330164, "grad_norm": 0.6214005414394366, "learning_rate": 3.643678443016146e-06, "loss": 0.301, "step": 15724 }, { "epoch": 0.7366374666229447, "grad_norm": 0.6043301313564022, "learning_rate": 3.6435097981953526e-06, "loss": 0.2746, "step": 15725 }, { "epoch": 0.736684311612873, "grad_norm": 0.6548306726470408, "learning_rate": 3.6433411467939484e-06, "loss": 0.2777, "step": 15726 }, { "epoch": 0.7367311566028013, "grad_norm": 0.577681404078513, "learning_rate": 3.6431724888129033e-06, "loss": 0.2616, "step": 15727 }, { "epoch": 0.7367780015927297, "grad_norm": 0.5863019753088822, "learning_rate": 3.6430038242531875e-06, "loss": 0.2974, "step": 15728 }, { "epoch": 0.736824846582658, "grad_norm": 0.5987028654016195, "learning_rate": 3.6428351531157725e-06, "loss": 0.2896, "step": 15729 }, { "epoch": 0.7368716915725864, "grad_norm": 0.6321329816451131, "learning_rate": 3.6426664754016278e-06, "loss": 0.3049, "step": 15730 }, { "epoch": 0.7369185365625146, "grad_norm": 0.6223218314634764, "learning_rate": 3.6424977911117245e-06, "loss": 0.2975, "step": 15731 }, { "epoch": 0.736965381552443, "grad_norm": 0.5629408288977257, "learning_rate": 3.642329100247034e-06, "loss": 0.2721, "step": 15732 }, { "epoch": 0.7370122265423713, "grad_norm": 0.5696204602041612, "learning_rate": 3.6421604028085274e-06, "loss": 0.2598, "step": 15733 }, { "epoch": 0.7370590715322997, "grad_norm": 0.5949317657895942, "learning_rate": 3.641991698797174e-06, "loss": 0.2768, "step": 15734 }, { "epoch": 0.7371059165222279, "grad_norm": 0.5823320903827732, "learning_rate": 3.6418229882139444e-06, "loss": 0.2763, "step": 15735 }, { "epoch": 0.7371527615121563, "grad_norm": 0.5479602350047782, "learning_rate": 3.6416542710598124e-06, "loss": 0.2695, "step": 15736 }, { "epoch": 0.7371996065020846, "grad_norm": 0.5652253307839792, "learning_rate": 3.6414855473357456e-06, "loss": 0.2593, "step": 15737 }, { "epoch": 0.737246451492013, "grad_norm": 0.6061835597702402, "learning_rate": 3.6413168170427164e-06, "loss": 0.2827, "step": 15738 }, { "epoch": 0.7372932964819413, "grad_norm": 0.6218467073886879, "learning_rate": 3.6411480801816967e-06, "loss": 0.2939, "step": 15739 }, { "epoch": 0.7373401414718695, "grad_norm": 0.5673095231731401, "learning_rate": 3.640979336753656e-06, "loss": 0.2725, "step": 15740 }, { "epoch": 0.7373869864617979, "grad_norm": 0.5601902690923948, "learning_rate": 3.6408105867595662e-06, "loss": 0.2831, "step": 15741 }, { "epoch": 0.7374338314517263, "grad_norm": 0.620025354137641, "learning_rate": 3.640641830200399e-06, "loss": 0.2759, "step": 15742 }, { "epoch": 0.7374806764416546, "grad_norm": 0.6012235219613693, "learning_rate": 3.640473067077124e-06, "loss": 0.2842, "step": 15743 }, { "epoch": 0.7375275214315828, "grad_norm": 0.5574063589838889, "learning_rate": 3.640304297390712e-06, "loss": 0.2724, "step": 15744 }, { "epoch": 0.7375743664215112, "grad_norm": 0.5794157890880438, "learning_rate": 3.640135521142137e-06, "loss": 0.281, "step": 15745 }, { "epoch": 0.7376212114114395, "grad_norm": 0.5916647573019009, "learning_rate": 3.639966738332368e-06, "loss": 0.2692, "step": 15746 }, { "epoch": 0.7376680564013679, "grad_norm": 0.6563917234399922, "learning_rate": 3.639797948962377e-06, "loss": 0.301, "step": 15747 }, { "epoch": 0.7377149013912963, "grad_norm": 0.6085411534817267, "learning_rate": 3.639629153033135e-06, "loss": 0.2914, "step": 15748 }, { "epoch": 0.7377617463812245, "grad_norm": 0.601932173075793, "learning_rate": 3.6394603505456137e-06, "loss": 0.2656, "step": 15749 }, { "epoch": 0.7378085913711528, "grad_norm": 0.6134315507340419, "learning_rate": 3.6392915415007855e-06, "loss": 0.2825, "step": 15750 }, { "epoch": 0.7378554363610812, "grad_norm": 0.6471188828992206, "learning_rate": 3.63912272589962e-06, "loss": 0.2767, "step": 15751 }, { "epoch": 0.7379022813510095, "grad_norm": 0.6268498390186484, "learning_rate": 3.6389539037430893e-06, "loss": 0.2981, "step": 15752 }, { "epoch": 0.7379491263409378, "grad_norm": 0.5625351233779755, "learning_rate": 3.6387850750321656e-06, "loss": 0.2711, "step": 15753 }, { "epoch": 0.7379959713308661, "grad_norm": 0.5511800291430211, "learning_rate": 3.6386162397678205e-06, "loss": 0.2545, "step": 15754 }, { "epoch": 0.7380428163207945, "grad_norm": 0.6047022314981456, "learning_rate": 3.6384473979510237e-06, "loss": 0.3009, "step": 15755 }, { "epoch": 0.7380896613107228, "grad_norm": 0.5402892233025132, "learning_rate": 3.638278549582749e-06, "loss": 0.267, "step": 15756 }, { "epoch": 0.7381365063006512, "grad_norm": 0.5598566313858334, "learning_rate": 3.638109694663968e-06, "loss": 0.277, "step": 15757 }, { "epoch": 0.7381833512905794, "grad_norm": 0.6270415010424419, "learning_rate": 3.6379408331956508e-06, "loss": 0.3026, "step": 15758 }, { "epoch": 0.7382301962805078, "grad_norm": 0.6437812039616181, "learning_rate": 3.63777196517877e-06, "loss": 0.2806, "step": 15759 }, { "epoch": 0.7382770412704361, "grad_norm": 0.6167025688453702, "learning_rate": 3.6376030906142985e-06, "loss": 0.2961, "step": 15760 }, { "epoch": 0.7383238862603645, "grad_norm": 0.6278245709248548, "learning_rate": 3.6374342095032066e-06, "loss": 0.2831, "step": 15761 }, { "epoch": 0.7383707312502927, "grad_norm": 0.6098929007437977, "learning_rate": 3.6372653218464665e-06, "loss": 0.2958, "step": 15762 }, { "epoch": 0.7384175762402211, "grad_norm": 0.6355868173574308, "learning_rate": 3.6370964276450505e-06, "loss": 0.2837, "step": 15763 }, { "epoch": 0.7384644212301494, "grad_norm": 0.604531052356421, "learning_rate": 3.6369275268999305e-06, "loss": 0.2825, "step": 15764 }, { "epoch": 0.7385112662200778, "grad_norm": 0.6109902782037056, "learning_rate": 3.636758619612078e-06, "loss": 0.3091, "step": 15765 }, { "epoch": 0.7385581112100061, "grad_norm": 0.5736884886050525, "learning_rate": 3.636589705782466e-06, "loss": 0.2725, "step": 15766 }, { "epoch": 0.7386049561999344, "grad_norm": 0.6128596170269104, "learning_rate": 3.636420785412065e-06, "loss": 0.273, "step": 15767 }, { "epoch": 0.7386518011898627, "grad_norm": 0.5956492755239748, "learning_rate": 3.6362518585018487e-06, "loss": 0.2507, "step": 15768 }, { "epoch": 0.7386986461797911, "grad_norm": 0.571148482428346, "learning_rate": 3.6360829250527884e-06, "loss": 0.2809, "step": 15769 }, { "epoch": 0.7387454911697194, "grad_norm": 0.5618915774835817, "learning_rate": 3.6359139850658564e-06, "loss": 0.2716, "step": 15770 }, { "epoch": 0.7387923361596477, "grad_norm": 0.6221276568067763, "learning_rate": 3.635745038542025e-06, "loss": 0.3036, "step": 15771 }, { "epoch": 0.738839181149576, "grad_norm": 0.6144094084870304, "learning_rate": 3.635576085482266e-06, "loss": 0.2794, "step": 15772 }, { "epoch": 0.7388860261395044, "grad_norm": 0.6513257319733028, "learning_rate": 3.6354071258875523e-06, "loss": 0.2856, "step": 15773 }, { "epoch": 0.7389328711294327, "grad_norm": 0.5978905074553595, "learning_rate": 3.635238159758856e-06, "loss": 0.2733, "step": 15774 }, { "epoch": 0.7389797161193611, "grad_norm": 0.5950057245665776, "learning_rate": 3.63506918709715e-06, "loss": 0.2767, "step": 15775 }, { "epoch": 0.7390265611092893, "grad_norm": 0.6436052727721743, "learning_rate": 3.6349002079034044e-06, "loss": 0.2888, "step": 15776 }, { "epoch": 0.7390734060992177, "grad_norm": 0.5808739322326487, "learning_rate": 3.6347312221785944e-06, "loss": 0.2777, "step": 15777 }, { "epoch": 0.739120251089146, "grad_norm": 0.6186900261964706, "learning_rate": 3.6345622299236915e-06, "loss": 0.3123, "step": 15778 }, { "epoch": 0.7391670960790744, "grad_norm": 0.5968894128574157, "learning_rate": 3.6343932311396685e-06, "loss": 0.293, "step": 15779 }, { "epoch": 0.7392139410690026, "grad_norm": 0.5634439728684769, "learning_rate": 3.634224225827497e-06, "loss": 0.2777, "step": 15780 }, { "epoch": 0.739260786058931, "grad_norm": 0.5876232147679358, "learning_rate": 3.63405521398815e-06, "loss": 0.272, "step": 15781 }, { "epoch": 0.7393076310488593, "grad_norm": 0.5878040098098954, "learning_rate": 3.6338861956226015e-06, "loss": 0.2813, "step": 15782 }, { "epoch": 0.7393544760387877, "grad_norm": 0.6480434500305131, "learning_rate": 3.6337171707318215e-06, "loss": 0.3079, "step": 15783 }, { "epoch": 0.739401321028716, "grad_norm": 0.6232878912232692, "learning_rate": 3.6335481393167842e-06, "loss": 0.2972, "step": 15784 }, { "epoch": 0.7394481660186443, "grad_norm": 0.5738931714882646, "learning_rate": 3.6333791013784638e-06, "loss": 0.262, "step": 15785 }, { "epoch": 0.7394950110085726, "grad_norm": 0.5371619833117471, "learning_rate": 3.6332100569178295e-06, "loss": 0.2553, "step": 15786 }, { "epoch": 0.739541855998501, "grad_norm": 0.58876291804518, "learning_rate": 3.6330410059358572e-06, "loss": 0.2734, "step": 15787 }, { "epoch": 0.7395887009884293, "grad_norm": 0.5913805988661439, "learning_rate": 3.6328719484335185e-06, "loss": 0.2754, "step": 15788 }, { "epoch": 0.7396355459783576, "grad_norm": 0.6465645918764906, "learning_rate": 3.6327028844117874e-06, "loss": 0.2785, "step": 15789 }, { "epoch": 0.7396823909682859, "grad_norm": 0.5251992386799204, "learning_rate": 3.6325338138716343e-06, "loss": 0.2619, "step": 15790 }, { "epoch": 0.7397292359582143, "grad_norm": 0.625199003318972, "learning_rate": 3.632364736814035e-06, "loss": 0.2857, "step": 15791 }, { "epoch": 0.7397760809481426, "grad_norm": 0.5962087017258655, "learning_rate": 3.6321956532399606e-06, "loss": 0.2563, "step": 15792 }, { "epoch": 0.739822925938071, "grad_norm": 0.5844609223208441, "learning_rate": 3.632026563150385e-06, "loss": 0.273, "step": 15793 }, { "epoch": 0.7398697709279992, "grad_norm": 0.577262863135702, "learning_rate": 3.6318574665462808e-06, "loss": 0.2635, "step": 15794 }, { "epoch": 0.7399166159179276, "grad_norm": 0.5716345919491091, "learning_rate": 3.6316883634286217e-06, "loss": 0.2663, "step": 15795 }, { "epoch": 0.7399634609078559, "grad_norm": 0.6384890290607217, "learning_rate": 3.63151925379838e-06, "loss": 0.2994, "step": 15796 }, { "epoch": 0.7400103058977843, "grad_norm": 0.6213801393353435, "learning_rate": 3.63135013765653e-06, "loss": 0.2759, "step": 15797 }, { "epoch": 0.7400571508877125, "grad_norm": 0.5366079381185382, "learning_rate": 3.6311810150040436e-06, "loss": 0.2664, "step": 15798 }, { "epoch": 0.7401039958776409, "grad_norm": 0.6094479264762469, "learning_rate": 3.631011885841896e-06, "loss": 0.262, "step": 15799 }, { "epoch": 0.7401508408675692, "grad_norm": 0.5432278616756869, "learning_rate": 3.6308427501710586e-06, "loss": 0.252, "step": 15800 }, { "epoch": 0.7401976858574976, "grad_norm": 0.6224769498619498, "learning_rate": 3.630673607992505e-06, "loss": 0.2893, "step": 15801 }, { "epoch": 0.7402445308474259, "grad_norm": 0.5820992426510866, "learning_rate": 3.630504459307209e-06, "loss": 0.2711, "step": 15802 }, { "epoch": 0.7402913758373542, "grad_norm": 0.5570935397505711, "learning_rate": 3.6303353041161447e-06, "loss": 0.2587, "step": 15803 }, { "epoch": 0.7403382208272825, "grad_norm": 0.5855572213588777, "learning_rate": 3.630166142420284e-06, "loss": 0.2764, "step": 15804 }, { "epoch": 0.7403850658172109, "grad_norm": 0.6155509714496967, "learning_rate": 3.6299969742206015e-06, "loss": 0.3117, "step": 15805 }, { "epoch": 0.7404319108071392, "grad_norm": 0.5284021879421971, "learning_rate": 3.629827799518071e-06, "loss": 0.2605, "step": 15806 }, { "epoch": 0.7404787557970675, "grad_norm": 0.631889543819735, "learning_rate": 3.629658618313665e-06, "loss": 0.2847, "step": 15807 }, { "epoch": 0.7405256007869958, "grad_norm": 0.6214684864936404, "learning_rate": 3.6294894306083574e-06, "loss": 0.2627, "step": 15808 }, { "epoch": 0.7405724457769242, "grad_norm": 0.5873885228567395, "learning_rate": 3.6293202364031223e-06, "loss": 0.2862, "step": 15809 }, { "epoch": 0.7406192907668525, "grad_norm": 0.5793510614586679, "learning_rate": 3.629151035698933e-06, "loss": 0.2798, "step": 15810 }, { "epoch": 0.7406661357567809, "grad_norm": 0.6084369103325568, "learning_rate": 3.6289818284967624e-06, "loss": 0.2866, "step": 15811 }, { "epoch": 0.7407129807467091, "grad_norm": 0.6294851054859474, "learning_rate": 3.6288126147975867e-06, "loss": 0.303, "step": 15812 }, { "epoch": 0.7407598257366375, "grad_norm": 0.6261754576969272, "learning_rate": 3.6286433946023776e-06, "loss": 0.3029, "step": 15813 }, { "epoch": 0.7408066707265658, "grad_norm": 0.5668271528518269, "learning_rate": 3.628474167912109e-06, "loss": 0.2758, "step": 15814 }, { "epoch": 0.7408535157164942, "grad_norm": 0.6106906621071341, "learning_rate": 3.6283049347277554e-06, "loss": 0.2768, "step": 15815 }, { "epoch": 0.7409003607064224, "grad_norm": 0.5503434168102413, "learning_rate": 3.6281356950502905e-06, "loss": 0.2809, "step": 15816 }, { "epoch": 0.7409472056963508, "grad_norm": 0.5625171255373487, "learning_rate": 3.627966448880688e-06, "loss": 0.2619, "step": 15817 }, { "epoch": 0.7409940506862791, "grad_norm": 0.6056527192475737, "learning_rate": 3.627797196219923e-06, "loss": 0.2812, "step": 15818 }, { "epoch": 0.7410408956762075, "grad_norm": 0.6298019964286917, "learning_rate": 3.6276279370689678e-06, "loss": 0.2938, "step": 15819 }, { "epoch": 0.7410877406661358, "grad_norm": 0.6584138034193612, "learning_rate": 3.6274586714287974e-06, "loss": 0.3, "step": 15820 }, { "epoch": 0.741134585656064, "grad_norm": 0.5480534908025863, "learning_rate": 3.627289399300386e-06, "loss": 0.2713, "step": 15821 }, { "epoch": 0.7411814306459924, "grad_norm": 0.5549634439342856, "learning_rate": 3.627120120684707e-06, "loss": 0.2442, "step": 15822 }, { "epoch": 0.7412282756359208, "grad_norm": 0.5407915528511732, "learning_rate": 3.6269508355827348e-06, "loss": 0.2767, "step": 15823 }, { "epoch": 0.7412751206258491, "grad_norm": 0.5878549634238784, "learning_rate": 3.6267815439954448e-06, "loss": 0.2756, "step": 15824 }, { "epoch": 0.7413219656157773, "grad_norm": 0.58159055502168, "learning_rate": 3.626612245923809e-06, "loss": 0.2782, "step": 15825 }, { "epoch": 0.7413688106057057, "grad_norm": 0.598073101797499, "learning_rate": 3.6264429413688027e-06, "loss": 0.2809, "step": 15826 }, { "epoch": 0.741415655595634, "grad_norm": 0.5200113679947506, "learning_rate": 3.6262736303314022e-06, "loss": 0.2353, "step": 15827 }, { "epoch": 0.7414625005855624, "grad_norm": 0.5679071825079018, "learning_rate": 3.626104312812579e-06, "loss": 0.2832, "step": 15828 }, { "epoch": 0.7415093455754908, "grad_norm": 0.5887436530928739, "learning_rate": 3.6259349888133076e-06, "loss": 0.2731, "step": 15829 }, { "epoch": 0.741556190565419, "grad_norm": 0.5753651632824032, "learning_rate": 3.6257656583345647e-06, "loss": 0.2786, "step": 15830 }, { "epoch": 0.7416030355553473, "grad_norm": 0.6059443915038919, "learning_rate": 3.6255963213773233e-06, "loss": 0.2998, "step": 15831 }, { "epoch": 0.7416498805452757, "grad_norm": 0.6071476229132541, "learning_rate": 3.625426977942557e-06, "loss": 0.2915, "step": 15832 }, { "epoch": 0.741696725535204, "grad_norm": 0.6339084284538111, "learning_rate": 3.6252576280312423e-06, "loss": 0.2909, "step": 15833 }, { "epoch": 0.7417435705251323, "grad_norm": 0.6283851966136064, "learning_rate": 3.6250882716443525e-06, "loss": 0.2742, "step": 15834 }, { "epoch": 0.7417904155150606, "grad_norm": 0.5977742813168105, "learning_rate": 3.624918908782862e-06, "loss": 0.2914, "step": 15835 }, { "epoch": 0.741837260504989, "grad_norm": 0.6332439589523228, "learning_rate": 3.6247495394477467e-06, "loss": 0.3113, "step": 15836 }, { "epoch": 0.7418841054949173, "grad_norm": 0.6316084131552011, "learning_rate": 3.6245801636399797e-06, "loss": 0.3073, "step": 15837 }, { "epoch": 0.7419309504848457, "grad_norm": 0.6057570875752745, "learning_rate": 3.6244107813605366e-06, "loss": 0.2682, "step": 15838 }, { "epoch": 0.7419777954747739, "grad_norm": 0.6091887649712275, "learning_rate": 3.6242413926103925e-06, "loss": 0.2823, "step": 15839 }, { "epoch": 0.7420246404647023, "grad_norm": 0.5660410898613385, "learning_rate": 3.6240719973905213e-06, "loss": 0.2749, "step": 15840 }, { "epoch": 0.7420714854546306, "grad_norm": 0.6596195420925277, "learning_rate": 3.6239025957018993e-06, "loss": 0.2756, "step": 15841 }, { "epoch": 0.742118330444559, "grad_norm": 0.6355532857979507, "learning_rate": 3.6237331875454997e-06, "loss": 0.2876, "step": 15842 }, { "epoch": 0.7421651754344872, "grad_norm": 0.6380381919545743, "learning_rate": 3.623563772922297e-06, "loss": 0.3003, "step": 15843 }, { "epoch": 0.7422120204244156, "grad_norm": 0.5715464292883504, "learning_rate": 3.623394351833268e-06, "loss": 0.2889, "step": 15844 }, { "epoch": 0.7422588654143439, "grad_norm": 0.6145951542231396, "learning_rate": 3.6232249242793884e-06, "loss": 0.2913, "step": 15845 }, { "epoch": 0.7423057104042723, "grad_norm": 0.5958171968876875, "learning_rate": 3.623055490261629e-06, "loss": 0.2821, "step": 15846 }, { "epoch": 0.7423525553942006, "grad_norm": 0.5601407344074597, "learning_rate": 3.6228860497809692e-06, "loss": 0.2857, "step": 15847 }, { "epoch": 0.7423994003841289, "grad_norm": 0.6114834478898835, "learning_rate": 3.622716602838382e-06, "loss": 0.2914, "step": 15848 }, { "epoch": 0.7424462453740572, "grad_norm": 0.5746938554964874, "learning_rate": 3.622547149434843e-06, "loss": 0.282, "step": 15849 }, { "epoch": 0.7424930903639856, "grad_norm": 0.5674534950206084, "learning_rate": 3.622377689571327e-06, "loss": 0.2946, "step": 15850 }, { "epoch": 0.7425399353539139, "grad_norm": 0.595351578192573, "learning_rate": 3.6222082232488097e-06, "loss": 0.282, "step": 15851 }, { "epoch": 0.7425867803438422, "grad_norm": 0.5839663244869814, "learning_rate": 3.622038750468267e-06, "loss": 0.2821, "step": 15852 }, { "epoch": 0.7426336253337705, "grad_norm": 0.6314725315913047, "learning_rate": 3.621869271230672e-06, "loss": 0.2761, "step": 15853 }, { "epoch": 0.7426804703236989, "grad_norm": 0.6258127284511026, "learning_rate": 3.621699785537001e-06, "loss": 0.2928, "step": 15854 }, { "epoch": 0.7427273153136272, "grad_norm": 0.5801205545166235, "learning_rate": 3.6215302933882313e-06, "loss": 0.2747, "step": 15855 }, { "epoch": 0.7427741603035556, "grad_norm": 0.6336093771836834, "learning_rate": 3.621360794785336e-06, "loss": 0.2975, "step": 15856 }, { "epoch": 0.7428210052934838, "grad_norm": 0.6023214734794967, "learning_rate": 3.62119128972929e-06, "loss": 0.2762, "step": 15857 }, { "epoch": 0.7428678502834122, "grad_norm": 0.6089141963798752, "learning_rate": 3.6210217782210715e-06, "loss": 0.2708, "step": 15858 }, { "epoch": 0.7429146952733405, "grad_norm": 0.538771260915168, "learning_rate": 3.620852260261654e-06, "loss": 0.273, "step": 15859 }, { "epoch": 0.7429615402632689, "grad_norm": 0.5886165397715745, "learning_rate": 3.620682735852013e-06, "loss": 0.2739, "step": 15860 }, { "epoch": 0.7430083852531971, "grad_norm": 0.5646697941239089, "learning_rate": 3.6205132049931245e-06, "loss": 0.2577, "step": 15861 }, { "epoch": 0.7430552302431255, "grad_norm": 0.5406331956099296, "learning_rate": 3.620343667685965e-06, "loss": 0.2841, "step": 15862 }, { "epoch": 0.7431020752330538, "grad_norm": 0.5680091722175169, "learning_rate": 3.6201741239315087e-06, "loss": 0.2836, "step": 15863 }, { "epoch": 0.7431489202229822, "grad_norm": 0.6172410338896398, "learning_rate": 3.620004573730732e-06, "loss": 0.2851, "step": 15864 }, { "epoch": 0.7431957652129105, "grad_norm": 0.6079025948543656, "learning_rate": 3.6198350170846096e-06, "loss": 0.2773, "step": 15865 }, { "epoch": 0.7432426102028388, "grad_norm": 0.6610980691444627, "learning_rate": 3.6196654539941196e-06, "loss": 0.3034, "step": 15866 }, { "epoch": 0.7432894551927671, "grad_norm": 0.5874493020307547, "learning_rate": 3.6194958844602355e-06, "loss": 0.3033, "step": 15867 }, { "epoch": 0.7433363001826955, "grad_norm": 0.5914484412604892, "learning_rate": 3.6193263084839336e-06, "loss": 0.2719, "step": 15868 }, { "epoch": 0.7433831451726238, "grad_norm": 0.5739090576669528, "learning_rate": 3.619156726066191e-06, "loss": 0.2936, "step": 15869 }, { "epoch": 0.7434299901625521, "grad_norm": 0.6257003045370777, "learning_rate": 3.618987137207983e-06, "loss": 0.2857, "step": 15870 }, { "epoch": 0.7434768351524804, "grad_norm": 0.6003677055954305, "learning_rate": 3.618817541910284e-06, "loss": 0.2791, "step": 15871 }, { "epoch": 0.7435236801424088, "grad_norm": 0.5701864349389338, "learning_rate": 3.6186479401740723e-06, "loss": 0.2739, "step": 15872 }, { "epoch": 0.7435705251323371, "grad_norm": 0.5982250742482581, "learning_rate": 3.6184783320003226e-06, "loss": 0.2751, "step": 15873 }, { "epoch": 0.7436173701222655, "grad_norm": 0.5346451026164432, "learning_rate": 3.6183087173900107e-06, "loss": 0.2388, "step": 15874 }, { "epoch": 0.7436642151121937, "grad_norm": 0.5500190445509636, "learning_rate": 3.618139096344113e-06, "loss": 0.2843, "step": 15875 }, { "epoch": 0.7437110601021221, "grad_norm": 0.6196137163895874, "learning_rate": 3.617969468863607e-06, "loss": 0.2858, "step": 15876 }, { "epoch": 0.7437579050920504, "grad_norm": 0.5874316745775378, "learning_rate": 3.617799834949467e-06, "loss": 0.2863, "step": 15877 }, { "epoch": 0.7438047500819788, "grad_norm": 0.5824267437043686, "learning_rate": 3.6176301946026694e-06, "loss": 0.279, "step": 15878 }, { "epoch": 0.743851595071907, "grad_norm": 0.5910855872679674, "learning_rate": 3.617460547824192e-06, "loss": 0.2905, "step": 15879 }, { "epoch": 0.7438984400618354, "grad_norm": 0.5552684328155337, "learning_rate": 3.6172908946150097e-06, "loss": 0.2727, "step": 15880 }, { "epoch": 0.7439452850517637, "grad_norm": 0.5553684399794615, "learning_rate": 3.6171212349760985e-06, "loss": 0.2781, "step": 15881 }, { "epoch": 0.7439921300416921, "grad_norm": 0.5584329909559862, "learning_rate": 3.616951568908436e-06, "loss": 0.2773, "step": 15882 }, { "epoch": 0.7440389750316204, "grad_norm": 0.5961518049345331, "learning_rate": 3.6167818964129982e-06, "loss": 0.2793, "step": 15883 }, { "epoch": 0.7440858200215487, "grad_norm": 0.5581730828872447, "learning_rate": 3.6166122174907603e-06, "loss": 0.2859, "step": 15884 }, { "epoch": 0.744132665011477, "grad_norm": 0.6075897190436759, "learning_rate": 3.6164425321427004e-06, "loss": 0.2659, "step": 15885 }, { "epoch": 0.7441795100014054, "grad_norm": 0.6155734894067879, "learning_rate": 3.6162728403697944e-06, "loss": 0.2662, "step": 15886 }, { "epoch": 0.7442263549913337, "grad_norm": 0.5833330274714481, "learning_rate": 3.6161031421730185e-06, "loss": 0.2977, "step": 15887 }, { "epoch": 0.744273199981262, "grad_norm": 0.6209616719447848, "learning_rate": 3.61593343755335e-06, "loss": 0.2979, "step": 15888 }, { "epoch": 0.7443200449711903, "grad_norm": 0.6199887046963318, "learning_rate": 3.615763726511764e-06, "loss": 0.2925, "step": 15889 }, { "epoch": 0.7443668899611187, "grad_norm": 0.5968555377689204, "learning_rate": 3.6155940090492392e-06, "loss": 0.2821, "step": 15890 }, { "epoch": 0.744413734951047, "grad_norm": 0.5690679509782405, "learning_rate": 3.6154242851667516e-06, "loss": 0.2774, "step": 15891 }, { "epoch": 0.7444605799409754, "grad_norm": 0.6502230711979659, "learning_rate": 3.6152545548652764e-06, "loss": 0.2696, "step": 15892 }, { "epoch": 0.7445074249309036, "grad_norm": 0.6072680876713806, "learning_rate": 3.615084818145792e-06, "loss": 0.3013, "step": 15893 }, { "epoch": 0.744554269920832, "grad_norm": 0.5968027495329868, "learning_rate": 3.6149150750092755e-06, "loss": 0.2901, "step": 15894 }, { "epoch": 0.7446011149107603, "grad_norm": 0.6342441852091074, "learning_rate": 3.614745325456702e-06, "loss": 0.3035, "step": 15895 }, { "epoch": 0.7446479599006887, "grad_norm": 0.5480107233237537, "learning_rate": 3.6145755694890487e-06, "loss": 0.2755, "step": 15896 }, { "epoch": 0.7446948048906169, "grad_norm": 0.5900024783394807, "learning_rate": 3.614405807107295e-06, "loss": 0.2901, "step": 15897 }, { "epoch": 0.7447416498805453, "grad_norm": 0.6072815483256637, "learning_rate": 3.6142360383124147e-06, "loss": 0.2769, "step": 15898 }, { "epoch": 0.7447884948704736, "grad_norm": 0.5596500981259126, "learning_rate": 3.6140662631053865e-06, "loss": 0.2717, "step": 15899 }, { "epoch": 0.744835339860402, "grad_norm": 0.5871717935468724, "learning_rate": 3.6138964814871868e-06, "loss": 0.2784, "step": 15900 }, { "epoch": 0.7448821848503303, "grad_norm": 0.5438100684087964, "learning_rate": 3.6137266934587932e-06, "loss": 0.2763, "step": 15901 }, { "epoch": 0.7449290298402585, "grad_norm": 0.6095268273854461, "learning_rate": 3.613556899021182e-06, "loss": 0.2759, "step": 15902 }, { "epoch": 0.7449758748301869, "grad_norm": 0.6119058238005062, "learning_rate": 3.6133870981753303e-06, "loss": 0.2987, "step": 15903 }, { "epoch": 0.7450227198201153, "grad_norm": 0.5795271748065506, "learning_rate": 3.6132172909222167e-06, "loss": 0.2658, "step": 15904 }, { "epoch": 0.7450695648100436, "grad_norm": 0.5937785468970047, "learning_rate": 3.6130474772628167e-06, "loss": 0.2706, "step": 15905 }, { "epoch": 0.7451164097999718, "grad_norm": 0.5938049701848073, "learning_rate": 3.612877657198108e-06, "loss": 0.307, "step": 15906 }, { "epoch": 0.7451632547899002, "grad_norm": 0.6450165550087475, "learning_rate": 3.6127078307290688e-06, "loss": 0.3136, "step": 15907 }, { "epoch": 0.7452100997798285, "grad_norm": 0.5638428859835123, "learning_rate": 3.6125379978566754e-06, "loss": 0.2761, "step": 15908 }, { "epoch": 0.7452569447697569, "grad_norm": 0.573349509686536, "learning_rate": 3.612368158581906e-06, "loss": 0.2705, "step": 15909 }, { "epoch": 0.7453037897596853, "grad_norm": 0.5877841798088771, "learning_rate": 3.6121983129057363e-06, "loss": 0.2848, "step": 15910 }, { "epoch": 0.7453506347496135, "grad_norm": 0.5439572871646042, "learning_rate": 3.6120284608291455e-06, "loss": 0.2542, "step": 15911 }, { "epoch": 0.7453974797395418, "grad_norm": 0.6515773553452974, "learning_rate": 3.6118586023531103e-06, "loss": 0.29, "step": 15912 }, { "epoch": 0.7454443247294702, "grad_norm": 0.5830702515894054, "learning_rate": 3.611688737478608e-06, "loss": 0.2649, "step": 15913 }, { "epoch": 0.7454911697193985, "grad_norm": 0.6030916388736585, "learning_rate": 3.611518866206617e-06, "loss": 0.2754, "step": 15914 }, { "epoch": 0.7455380147093268, "grad_norm": 0.5862903396757249, "learning_rate": 3.611348988538115e-06, "loss": 0.2632, "step": 15915 }, { "epoch": 0.7455848596992551, "grad_norm": 0.5605151805101637, "learning_rate": 3.6111791044740774e-06, "loss": 0.2791, "step": 15916 }, { "epoch": 0.7456317046891835, "grad_norm": 0.6050987034937763, "learning_rate": 3.6110092140154833e-06, "loss": 0.3068, "step": 15917 }, { "epoch": 0.7456785496791118, "grad_norm": 0.5778250953331991, "learning_rate": 3.6108393171633118e-06, "loss": 0.2861, "step": 15918 }, { "epoch": 0.7457253946690402, "grad_norm": 0.5853861540455433, "learning_rate": 3.610669413918538e-06, "loss": 0.2769, "step": 15919 }, { "epoch": 0.7457722396589684, "grad_norm": 0.5799093347076809, "learning_rate": 3.610499504282141e-06, "loss": 0.2793, "step": 15920 }, { "epoch": 0.7458190846488968, "grad_norm": 0.6159721471959898, "learning_rate": 3.6103295882550994e-06, "loss": 0.2833, "step": 15921 }, { "epoch": 0.7458659296388251, "grad_norm": 0.5281640457399555, "learning_rate": 3.6101596658383893e-06, "loss": 0.2623, "step": 15922 }, { "epoch": 0.7459127746287535, "grad_norm": 0.6002758952396732, "learning_rate": 3.6099897370329895e-06, "loss": 0.3097, "step": 15923 }, { "epoch": 0.7459596196186817, "grad_norm": 0.6039526631161953, "learning_rate": 3.609819801839877e-06, "loss": 0.2738, "step": 15924 }, { "epoch": 0.7460064646086101, "grad_norm": 0.662434424769413, "learning_rate": 3.609649860260032e-06, "loss": 0.3242, "step": 15925 }, { "epoch": 0.7460533095985384, "grad_norm": 0.56937269793044, "learning_rate": 3.6094799122944302e-06, "loss": 0.2616, "step": 15926 }, { "epoch": 0.7461001545884668, "grad_norm": 0.5903420705880142, "learning_rate": 3.60930995794405e-06, "loss": 0.2985, "step": 15927 }, { "epoch": 0.7461469995783951, "grad_norm": 0.618099038666555, "learning_rate": 3.6091399972098706e-06, "loss": 0.2815, "step": 15928 }, { "epoch": 0.7461938445683234, "grad_norm": 0.5682663301156587, "learning_rate": 3.6089700300928687e-06, "loss": 0.2666, "step": 15929 }, { "epoch": 0.7462406895582517, "grad_norm": 0.6018059961073468, "learning_rate": 3.608800056594023e-06, "loss": 0.2898, "step": 15930 }, { "epoch": 0.7462875345481801, "grad_norm": 0.5781724952723463, "learning_rate": 3.6086300767143117e-06, "loss": 0.2527, "step": 15931 }, { "epoch": 0.7463343795381084, "grad_norm": 0.6088363056076153, "learning_rate": 3.6084600904547137e-06, "loss": 0.2755, "step": 15932 }, { "epoch": 0.7463812245280367, "grad_norm": 0.5596588187929981, "learning_rate": 3.6082900978162054e-06, "loss": 0.2788, "step": 15933 }, { "epoch": 0.746428069517965, "grad_norm": 0.5732557204023084, "learning_rate": 3.6081200987997665e-06, "loss": 0.2678, "step": 15934 }, { "epoch": 0.7464749145078934, "grad_norm": 0.5778488810116748, "learning_rate": 3.6079500934063748e-06, "loss": 0.2769, "step": 15935 }, { "epoch": 0.7465217594978217, "grad_norm": 0.6017217220553293, "learning_rate": 3.6077800816370095e-06, "loss": 0.2781, "step": 15936 }, { "epoch": 0.7465686044877501, "grad_norm": 0.541791932473845, "learning_rate": 3.607610063492648e-06, "loss": 0.259, "step": 15937 }, { "epoch": 0.7466154494776783, "grad_norm": 0.5622511446742082, "learning_rate": 3.607440038974268e-06, "loss": 0.2653, "step": 15938 }, { "epoch": 0.7466622944676067, "grad_norm": 0.577078655438458, "learning_rate": 3.6072700080828506e-06, "loss": 0.2658, "step": 15939 }, { "epoch": 0.746709139457535, "grad_norm": 0.6078223402177968, "learning_rate": 3.6070999708193717e-06, "loss": 0.2917, "step": 15940 }, { "epoch": 0.7467559844474634, "grad_norm": 0.5766177705019798, "learning_rate": 3.6069299271848103e-06, "loss": 0.276, "step": 15941 }, { "epoch": 0.7468028294373916, "grad_norm": 0.601825699843523, "learning_rate": 3.606759877180146e-06, "loss": 0.2916, "step": 15942 }, { "epoch": 0.74684967442732, "grad_norm": 0.5789927888044755, "learning_rate": 3.6065898208063566e-06, "loss": 0.2679, "step": 15943 }, { "epoch": 0.7468965194172483, "grad_norm": 0.5608776497802855, "learning_rate": 3.6064197580644213e-06, "loss": 0.2814, "step": 15944 }, { "epoch": 0.7469433644071767, "grad_norm": 0.6828164266535663, "learning_rate": 3.6062496889553173e-06, "loss": 0.3041, "step": 15945 }, { "epoch": 0.746990209397105, "grad_norm": 0.6546044037252194, "learning_rate": 3.6060796134800256e-06, "loss": 0.2783, "step": 15946 }, { "epoch": 0.7470370543870333, "grad_norm": 0.5748232452254197, "learning_rate": 3.605909531639523e-06, "loss": 0.2694, "step": 15947 }, { "epoch": 0.7470838993769616, "grad_norm": 0.6198750371492736, "learning_rate": 3.6057394434347885e-06, "loss": 0.3029, "step": 15948 }, { "epoch": 0.74713074436689, "grad_norm": 0.5851882062450381, "learning_rate": 3.6055693488668027e-06, "loss": 0.2484, "step": 15949 }, { "epoch": 0.7471775893568183, "grad_norm": 0.5365833052719673, "learning_rate": 3.605399247936543e-06, "loss": 0.2576, "step": 15950 }, { "epoch": 0.7472244343467466, "grad_norm": 0.6102316105130351, "learning_rate": 3.6052291406449873e-06, "loss": 0.2849, "step": 15951 }, { "epoch": 0.7472712793366749, "grad_norm": 0.6223294245676524, "learning_rate": 3.6050590269931167e-06, "loss": 0.295, "step": 15952 }, { "epoch": 0.7473181243266033, "grad_norm": 0.6122295330582322, "learning_rate": 3.604888906981909e-06, "loss": 0.2914, "step": 15953 }, { "epoch": 0.7473649693165316, "grad_norm": 0.6017397108960677, "learning_rate": 3.604718780612343e-06, "loss": 0.2788, "step": 15954 }, { "epoch": 0.74741181430646, "grad_norm": 0.5720730408427458, "learning_rate": 3.604548647885399e-06, "loss": 0.2654, "step": 15955 }, { "epoch": 0.7474586592963882, "grad_norm": 0.5658169131461032, "learning_rate": 3.604378508802054e-06, "loss": 0.2737, "step": 15956 }, { "epoch": 0.7475055042863166, "grad_norm": 0.6246991496137813, "learning_rate": 3.6042083633632883e-06, "loss": 0.302, "step": 15957 }, { "epoch": 0.7475523492762449, "grad_norm": 0.6258658191172445, "learning_rate": 3.6040382115700823e-06, "loss": 0.2903, "step": 15958 }, { "epoch": 0.7475991942661733, "grad_norm": 0.5943201257378148, "learning_rate": 3.6038680534234127e-06, "loss": 0.2772, "step": 15959 }, { "epoch": 0.7476460392561015, "grad_norm": 0.5988606664300674, "learning_rate": 3.60369788892426e-06, "loss": 0.2916, "step": 15960 }, { "epoch": 0.7476928842460299, "grad_norm": 0.6098359505817147, "learning_rate": 3.603527718073604e-06, "loss": 0.2999, "step": 15961 }, { "epoch": 0.7477397292359582, "grad_norm": 0.6578668699357587, "learning_rate": 3.6033575408724226e-06, "loss": 0.301, "step": 15962 }, { "epoch": 0.7477865742258866, "grad_norm": 0.5540077262996457, "learning_rate": 3.603187357321696e-06, "loss": 0.2735, "step": 15963 }, { "epoch": 0.7478334192158149, "grad_norm": 0.5844677139483296, "learning_rate": 3.603017167422404e-06, "loss": 0.2704, "step": 15964 }, { "epoch": 0.7478802642057432, "grad_norm": 0.6812256480603506, "learning_rate": 3.602846971175525e-06, "loss": 0.3087, "step": 15965 }, { "epoch": 0.7479271091956715, "grad_norm": 0.5638764398773914, "learning_rate": 3.6026767685820385e-06, "loss": 0.2666, "step": 15966 }, { "epoch": 0.7479739541855999, "grad_norm": 0.6000286011742684, "learning_rate": 3.6025065596429255e-06, "loss": 0.2784, "step": 15967 }, { "epoch": 0.7480207991755282, "grad_norm": 0.5709981407812031, "learning_rate": 3.602336344359163e-06, "loss": 0.2623, "step": 15968 }, { "epoch": 0.7480676441654565, "grad_norm": 0.5559479946319147, "learning_rate": 3.6021661227317324e-06, "loss": 0.2761, "step": 15969 }, { "epoch": 0.7481144891553848, "grad_norm": 0.6035412008720866, "learning_rate": 3.6019958947616128e-06, "loss": 0.2865, "step": 15970 }, { "epoch": 0.7481613341453132, "grad_norm": 0.5887494689690085, "learning_rate": 3.601825660449784e-06, "loss": 0.2742, "step": 15971 }, { "epoch": 0.7482081791352415, "grad_norm": 0.6184037736219051, "learning_rate": 3.6016554197972252e-06, "loss": 0.2944, "step": 15972 }, { "epoch": 0.7482550241251699, "grad_norm": 0.5618907824403658, "learning_rate": 3.6014851728049165e-06, "loss": 0.2682, "step": 15973 }, { "epoch": 0.7483018691150981, "grad_norm": 0.5459261110979235, "learning_rate": 3.601314919473838e-06, "loss": 0.2672, "step": 15974 }, { "epoch": 0.7483487141050265, "grad_norm": 0.6059275709546549, "learning_rate": 3.601144659804968e-06, "loss": 0.2773, "step": 15975 }, { "epoch": 0.7483955590949548, "grad_norm": 0.6430488296374149, "learning_rate": 3.600974393799288e-06, "loss": 0.2866, "step": 15976 }, { "epoch": 0.7484424040848832, "grad_norm": 0.633571419860621, "learning_rate": 3.600804121457776e-06, "loss": 0.2888, "step": 15977 }, { "epoch": 0.7484892490748114, "grad_norm": 0.6061731979502191, "learning_rate": 3.600633842781414e-06, "loss": 0.2881, "step": 15978 }, { "epoch": 0.7485360940647398, "grad_norm": 0.5971986172508584, "learning_rate": 3.600463557771181e-06, "loss": 0.2837, "step": 15979 }, { "epoch": 0.7485829390546681, "grad_norm": 0.6090944440907998, "learning_rate": 3.600293266428056e-06, "loss": 0.281, "step": 15980 }, { "epoch": 0.7486297840445965, "grad_norm": 0.6298484386687428, "learning_rate": 3.6001229687530203e-06, "loss": 0.292, "step": 15981 }, { "epoch": 0.7486766290345248, "grad_norm": 0.5742882663216226, "learning_rate": 3.5999526647470535e-06, "loss": 0.2684, "step": 15982 }, { "epoch": 0.748723474024453, "grad_norm": 0.573564182914225, "learning_rate": 3.5997823544111354e-06, "loss": 0.2746, "step": 15983 }, { "epoch": 0.7487703190143814, "grad_norm": 0.5796723621221257, "learning_rate": 3.5996120377462464e-06, "loss": 0.2892, "step": 15984 }, { "epoch": 0.7488171640043098, "grad_norm": 0.5632312739055698, "learning_rate": 3.5994417147533656e-06, "loss": 0.2714, "step": 15985 }, { "epoch": 0.7488640089942381, "grad_norm": 0.6164514972073165, "learning_rate": 3.5992713854334753e-06, "loss": 0.2921, "step": 15986 }, { "epoch": 0.7489108539841663, "grad_norm": 0.5905437545833864, "learning_rate": 3.5991010497875533e-06, "loss": 0.2863, "step": 15987 }, { "epoch": 0.7489576989740947, "grad_norm": 0.6044649166000648, "learning_rate": 3.5989307078165826e-06, "loss": 0.2791, "step": 15988 }, { "epoch": 0.749004543964023, "grad_norm": 0.5854705823516622, "learning_rate": 3.598760359521541e-06, "loss": 0.2789, "step": 15989 }, { "epoch": 0.7490513889539514, "grad_norm": 0.6066116679307572, "learning_rate": 3.5985900049034097e-06, "loss": 0.2797, "step": 15990 }, { "epoch": 0.7490982339438798, "grad_norm": 0.5260278883701384, "learning_rate": 3.5984196439631687e-06, "loss": 0.264, "step": 15991 }, { "epoch": 0.749145078933808, "grad_norm": 0.5510320793084281, "learning_rate": 3.598249276701799e-06, "loss": 0.2802, "step": 15992 }, { "epoch": 0.7491919239237363, "grad_norm": 0.5942333705922556, "learning_rate": 3.5980789031202805e-06, "loss": 0.2869, "step": 15993 }, { "epoch": 0.7492387689136647, "grad_norm": 0.5993336170698286, "learning_rate": 3.597908523219594e-06, "loss": 0.2915, "step": 15994 }, { "epoch": 0.749285613903593, "grad_norm": 0.5739947734231611, "learning_rate": 3.597738137000721e-06, "loss": 0.2702, "step": 15995 }, { "epoch": 0.7493324588935213, "grad_norm": 0.6115424342545079, "learning_rate": 3.5975677444646395e-06, "loss": 0.2945, "step": 15996 }, { "epoch": 0.7493793038834496, "grad_norm": 0.5529665001687117, "learning_rate": 3.5973973456123316e-06, "loss": 0.2889, "step": 15997 }, { "epoch": 0.749426148873378, "grad_norm": 0.5112678332478936, "learning_rate": 3.5972269404447786e-06, "loss": 0.2654, "step": 15998 }, { "epoch": 0.7494729938633063, "grad_norm": 0.6073409728208043, "learning_rate": 3.59705652896296e-06, "loss": 0.3027, "step": 15999 }, { "epoch": 0.7495198388532347, "grad_norm": 0.6010134356832945, "learning_rate": 3.5968861111678565e-06, "loss": 0.2991, "step": 16000 }, { "epoch": 0.7495666838431629, "grad_norm": 0.5929589489137215, "learning_rate": 3.59671568706045e-06, "loss": 0.2706, "step": 16001 }, { "epoch": 0.7496135288330913, "grad_norm": 0.5866122353503963, "learning_rate": 3.59654525664172e-06, "loss": 0.2643, "step": 16002 }, { "epoch": 0.7496603738230196, "grad_norm": 0.5785978432382626, "learning_rate": 3.596374819912647e-06, "loss": 0.2766, "step": 16003 }, { "epoch": 0.749707218812948, "grad_norm": 0.6119953013764452, "learning_rate": 3.5962043768742127e-06, "loss": 0.3009, "step": 16004 }, { "epoch": 0.7497540638028762, "grad_norm": 0.6998634545572455, "learning_rate": 3.5960339275273978e-06, "loss": 0.3254, "step": 16005 }, { "epoch": 0.7498009087928046, "grad_norm": 0.5728440424937433, "learning_rate": 3.5958634718731833e-06, "loss": 0.264, "step": 16006 }, { "epoch": 0.7498477537827329, "grad_norm": 0.6070522436378822, "learning_rate": 3.5956930099125498e-06, "loss": 0.2973, "step": 16007 }, { "epoch": 0.7498945987726613, "grad_norm": 0.5740295559545391, "learning_rate": 3.5955225416464785e-06, "loss": 0.2887, "step": 16008 }, { "epoch": 0.7499414437625896, "grad_norm": 0.5707865305816447, "learning_rate": 3.5953520670759503e-06, "loss": 0.2725, "step": 16009 }, { "epoch": 0.7499882887525179, "grad_norm": 0.5937212014269083, "learning_rate": 3.595181586201947e-06, "loss": 0.2911, "step": 16010 }, { "epoch": 0.7500351337424462, "grad_norm": 0.5684939752801427, "learning_rate": 3.595011099025447e-06, "loss": 0.276, "step": 16011 }, { "epoch": 0.7500819787323746, "grad_norm": 0.5626835419783242, "learning_rate": 3.594840605547435e-06, "loss": 0.2805, "step": 16012 }, { "epoch": 0.7501288237223029, "grad_norm": 0.5389350298172505, "learning_rate": 3.5946701057688903e-06, "loss": 0.2763, "step": 16013 }, { "epoch": 0.7501756687122312, "grad_norm": 0.6823841674635265, "learning_rate": 3.594499599690794e-06, "loss": 0.3034, "step": 16014 }, { "epoch": 0.7502225137021595, "grad_norm": 0.604606854571789, "learning_rate": 3.5943290873141275e-06, "loss": 0.275, "step": 16015 }, { "epoch": 0.7502693586920879, "grad_norm": 0.5492226945852864, "learning_rate": 3.5941585686398727e-06, "loss": 0.2906, "step": 16016 }, { "epoch": 0.7503162036820162, "grad_norm": 0.5927687917345751, "learning_rate": 3.59398804366901e-06, "loss": 0.2749, "step": 16017 }, { "epoch": 0.7503630486719446, "grad_norm": 0.6058760834158952, "learning_rate": 3.5938175124025204e-06, "loss": 0.2719, "step": 16018 }, { "epoch": 0.7504098936618728, "grad_norm": 0.6247395551046944, "learning_rate": 3.5936469748413868e-06, "loss": 0.2961, "step": 16019 }, { "epoch": 0.7504567386518012, "grad_norm": 0.5781697529016837, "learning_rate": 3.5934764309865895e-06, "loss": 0.2756, "step": 16020 }, { "epoch": 0.7505035836417295, "grad_norm": 0.5876842723476974, "learning_rate": 3.5933058808391095e-06, "loss": 0.297, "step": 16021 }, { "epoch": 0.7505504286316579, "grad_norm": 0.7008688690252137, "learning_rate": 3.59313532439993e-06, "loss": 0.2871, "step": 16022 }, { "epoch": 0.7505972736215861, "grad_norm": 0.6541209757276157, "learning_rate": 3.5929647616700316e-06, "loss": 0.2818, "step": 16023 }, { "epoch": 0.7506441186115145, "grad_norm": 0.6421888465197554, "learning_rate": 3.5927941926503945e-06, "loss": 0.3072, "step": 16024 }, { "epoch": 0.7506909636014428, "grad_norm": 0.5663008885085352, "learning_rate": 3.5926236173420024e-06, "loss": 0.2793, "step": 16025 }, { "epoch": 0.7507378085913712, "grad_norm": 0.579885335311753, "learning_rate": 3.5924530357458353e-06, "loss": 0.2973, "step": 16026 }, { "epoch": 0.7507846535812995, "grad_norm": 0.5436366093950835, "learning_rate": 3.5922824478628766e-06, "loss": 0.2712, "step": 16027 }, { "epoch": 0.7508314985712278, "grad_norm": 0.6099758643096468, "learning_rate": 3.5921118536941062e-06, "loss": 0.2847, "step": 16028 }, { "epoch": 0.7508783435611561, "grad_norm": 0.5461461551941732, "learning_rate": 3.591941253240507e-06, "loss": 0.2615, "step": 16029 }, { "epoch": 0.7509251885510845, "grad_norm": 0.5953520878936904, "learning_rate": 3.5917706465030597e-06, "loss": 0.2897, "step": 16030 }, { "epoch": 0.7509720335410128, "grad_norm": 0.5990304045348548, "learning_rate": 3.591600033482747e-06, "loss": 0.2685, "step": 16031 }, { "epoch": 0.7510188785309411, "grad_norm": 0.7101304875808792, "learning_rate": 3.5914294141805504e-06, "loss": 0.2852, "step": 16032 }, { "epoch": 0.7510657235208694, "grad_norm": 0.625040251561844, "learning_rate": 3.5912587885974524e-06, "loss": 0.2911, "step": 16033 }, { "epoch": 0.7511125685107978, "grad_norm": 0.6212689962032798, "learning_rate": 3.591088156734434e-06, "loss": 0.2783, "step": 16034 }, { "epoch": 0.7511594135007261, "grad_norm": 0.6002155393683412, "learning_rate": 3.590917518592477e-06, "loss": 0.2896, "step": 16035 }, { "epoch": 0.7512062584906545, "grad_norm": 0.5721860631812639, "learning_rate": 3.590746874172564e-06, "loss": 0.282, "step": 16036 }, { "epoch": 0.7512531034805827, "grad_norm": 0.5623982575177878, "learning_rate": 3.5905762234756787e-06, "loss": 0.2613, "step": 16037 }, { "epoch": 0.7512999484705111, "grad_norm": 0.5931393850789038, "learning_rate": 3.5904055665027992e-06, "loss": 0.2711, "step": 16038 }, { "epoch": 0.7513467934604394, "grad_norm": 0.6462002930323509, "learning_rate": 3.59023490325491e-06, "loss": 0.2893, "step": 16039 }, { "epoch": 0.7513936384503678, "grad_norm": 0.584204602930029, "learning_rate": 3.590064233732994e-06, "loss": 0.2618, "step": 16040 }, { "epoch": 0.751440483440296, "grad_norm": 0.6244209657230236, "learning_rate": 3.5898935579380313e-06, "loss": 0.2902, "step": 16041 }, { "epoch": 0.7514873284302244, "grad_norm": 0.577063576557631, "learning_rate": 3.5897228758710054e-06, "loss": 0.2774, "step": 16042 }, { "epoch": 0.7515341734201527, "grad_norm": 0.5885304054408923, "learning_rate": 3.5895521875328987e-06, "loss": 0.2989, "step": 16043 }, { "epoch": 0.7515810184100811, "grad_norm": 0.6189445568137123, "learning_rate": 3.589381492924693e-06, "loss": 0.2649, "step": 16044 }, { "epoch": 0.7516278634000094, "grad_norm": 0.5733129374795886, "learning_rate": 3.5892107920473695e-06, "loss": 0.2811, "step": 16045 }, { "epoch": 0.7516747083899377, "grad_norm": 0.6375708241739864, "learning_rate": 3.5890400849019126e-06, "loss": 0.2799, "step": 16046 }, { "epoch": 0.751721553379866, "grad_norm": 0.5539081818788938, "learning_rate": 3.588869371489303e-06, "loss": 0.2702, "step": 16047 }, { "epoch": 0.7517683983697944, "grad_norm": 0.5621113523224675, "learning_rate": 3.588698651810525e-06, "loss": 0.2759, "step": 16048 }, { "epoch": 0.7518152433597227, "grad_norm": 0.5802561172086582, "learning_rate": 3.5885279258665588e-06, "loss": 0.2766, "step": 16049 }, { "epoch": 0.751862088349651, "grad_norm": 0.586702708823113, "learning_rate": 3.5883571936583884e-06, "loss": 0.2998, "step": 16050 }, { "epoch": 0.7519089333395793, "grad_norm": 0.6032297925890134, "learning_rate": 3.588186455186996e-06, "loss": 0.2698, "step": 16051 }, { "epoch": 0.7519557783295077, "grad_norm": 0.6405178312786978, "learning_rate": 3.5880157104533636e-06, "loss": 0.3138, "step": 16052 }, { "epoch": 0.752002623319436, "grad_norm": 0.5861258032939775, "learning_rate": 3.587844959458475e-06, "loss": 0.272, "step": 16053 }, { "epoch": 0.7520494683093644, "grad_norm": 0.5691055173483366, "learning_rate": 3.587674202203312e-06, "loss": 0.2838, "step": 16054 }, { "epoch": 0.7520963132992926, "grad_norm": 0.6625237704913407, "learning_rate": 3.5875034386888563e-06, "loss": 0.2699, "step": 16055 }, { "epoch": 0.752143158289221, "grad_norm": 0.6271991742098939, "learning_rate": 3.5873326689160927e-06, "loss": 0.2766, "step": 16056 }, { "epoch": 0.7521900032791493, "grad_norm": 0.5852711779435331, "learning_rate": 3.5871618928860024e-06, "loss": 0.266, "step": 16057 }, { "epoch": 0.7522368482690777, "grad_norm": 0.5794635772067771, "learning_rate": 3.586991110599569e-06, "loss": 0.2914, "step": 16058 }, { "epoch": 0.7522836932590059, "grad_norm": 0.5680097002233615, "learning_rate": 3.586820322057775e-06, "loss": 0.2795, "step": 16059 }, { "epoch": 0.7523305382489343, "grad_norm": 0.5817553413853207, "learning_rate": 3.5866495272616026e-06, "loss": 0.2693, "step": 16060 }, { "epoch": 0.7523773832388626, "grad_norm": 0.5920675952484206, "learning_rate": 3.586478726212036e-06, "loss": 0.2809, "step": 16061 }, { "epoch": 0.752424228228791, "grad_norm": 0.620707756067755, "learning_rate": 3.586307918910057e-06, "loss": 0.259, "step": 16062 }, { "epoch": 0.7524710732187193, "grad_norm": 0.6214393858527882, "learning_rate": 3.5861371053566492e-06, "loss": 0.2854, "step": 16063 }, { "epoch": 0.7525179182086476, "grad_norm": 0.5532073174287729, "learning_rate": 3.5859662855527945e-06, "loss": 0.2776, "step": 16064 }, { "epoch": 0.7525647631985759, "grad_norm": 0.5688399756927729, "learning_rate": 3.585795459499478e-06, "loss": 0.2591, "step": 16065 }, { "epoch": 0.7526116081885043, "grad_norm": 0.6224023606723, "learning_rate": 3.5856246271976815e-06, "loss": 0.2953, "step": 16066 }, { "epoch": 0.7526584531784326, "grad_norm": 0.6135239974265123, "learning_rate": 3.585453788648387e-06, "loss": 0.2678, "step": 16067 }, { "epoch": 0.7527052981683608, "grad_norm": 0.599323451677609, "learning_rate": 3.585282943852579e-06, "loss": 0.2745, "step": 16068 }, { "epoch": 0.7527521431582892, "grad_norm": 0.5720976203687842, "learning_rate": 3.5851120928112416e-06, "loss": 0.2617, "step": 16069 }, { "epoch": 0.7527989881482176, "grad_norm": 0.6096692963891542, "learning_rate": 3.5849412355253556e-06, "loss": 0.277, "step": 16070 }, { "epoch": 0.7528458331381459, "grad_norm": 0.5446403280352587, "learning_rate": 3.584770371995906e-06, "loss": 0.2476, "step": 16071 }, { "epoch": 0.7528926781280743, "grad_norm": 0.5610732179988296, "learning_rate": 3.584599502223876e-06, "loss": 0.2551, "step": 16072 }, { "epoch": 0.7529395231180025, "grad_norm": 0.6623553334952351, "learning_rate": 3.5844286262102478e-06, "loss": 0.2855, "step": 16073 }, { "epoch": 0.7529863681079308, "grad_norm": 0.600339367954832, "learning_rate": 3.5842577439560057e-06, "loss": 0.2875, "step": 16074 }, { "epoch": 0.7530332130978592, "grad_norm": 0.571048589054025, "learning_rate": 3.5840868554621323e-06, "loss": 0.2746, "step": 16075 }, { "epoch": 0.7530800580877876, "grad_norm": 0.5695259335518615, "learning_rate": 3.583915960729612e-06, "loss": 0.2678, "step": 16076 }, { "epoch": 0.7531269030777158, "grad_norm": 0.6213480385126804, "learning_rate": 3.583745059759428e-06, "loss": 0.2908, "step": 16077 }, { "epoch": 0.7531737480676441, "grad_norm": 0.5861629929160639, "learning_rate": 3.583574152552563e-06, "loss": 0.2638, "step": 16078 }, { "epoch": 0.7532205930575725, "grad_norm": 0.586517745523365, "learning_rate": 3.5834032391100015e-06, "loss": 0.2914, "step": 16079 }, { "epoch": 0.7532674380475008, "grad_norm": 0.631593884348073, "learning_rate": 3.5832323194327266e-06, "loss": 0.2859, "step": 16080 }, { "epoch": 0.7533142830374292, "grad_norm": 0.5898293055258457, "learning_rate": 3.583061393521722e-06, "loss": 0.265, "step": 16081 }, { "epoch": 0.7533611280273574, "grad_norm": 0.5446402619079339, "learning_rate": 3.5828904613779716e-06, "loss": 0.2712, "step": 16082 }, { "epoch": 0.7534079730172858, "grad_norm": 0.5711073256031185, "learning_rate": 3.582719523002458e-06, "loss": 0.2756, "step": 16083 }, { "epoch": 0.7534548180072141, "grad_norm": 0.578234544513984, "learning_rate": 3.5825485783961656e-06, "loss": 0.2737, "step": 16084 }, { "epoch": 0.7535016629971425, "grad_norm": 0.6116083384822064, "learning_rate": 3.5823776275600786e-06, "loss": 0.2673, "step": 16085 }, { "epoch": 0.7535485079870707, "grad_norm": 0.5364266096092927, "learning_rate": 3.5822066704951806e-06, "loss": 0.2487, "step": 16086 }, { "epoch": 0.7535953529769991, "grad_norm": 0.6212809822266534, "learning_rate": 3.5820357072024555e-06, "loss": 0.3196, "step": 16087 }, { "epoch": 0.7536421979669274, "grad_norm": 0.5757327533404641, "learning_rate": 3.581864737682886e-06, "loss": 0.2848, "step": 16088 }, { "epoch": 0.7536890429568558, "grad_norm": 0.5866499167968123, "learning_rate": 3.5816937619374576e-06, "loss": 0.2877, "step": 16089 }, { "epoch": 0.7537358879467841, "grad_norm": 0.6099114777114024, "learning_rate": 3.5815227799671533e-06, "loss": 0.2828, "step": 16090 }, { "epoch": 0.7537827329367124, "grad_norm": 0.5951361060678703, "learning_rate": 3.581351791772957e-06, "loss": 0.2747, "step": 16091 }, { "epoch": 0.7538295779266407, "grad_norm": 0.6375392741328699, "learning_rate": 3.5811807973558528e-06, "loss": 0.2848, "step": 16092 }, { "epoch": 0.7538764229165691, "grad_norm": 0.5892341621603713, "learning_rate": 3.5810097967168254e-06, "loss": 0.2815, "step": 16093 }, { "epoch": 0.7539232679064974, "grad_norm": 0.5909359269223153, "learning_rate": 3.5808387898568573e-06, "loss": 0.2875, "step": 16094 }, { "epoch": 0.7539701128964257, "grad_norm": 0.5858570247857245, "learning_rate": 3.580667776776935e-06, "loss": 0.2766, "step": 16095 }, { "epoch": 0.754016957886354, "grad_norm": 0.5742153801403417, "learning_rate": 3.5804967574780403e-06, "loss": 0.2761, "step": 16096 }, { "epoch": 0.7540638028762824, "grad_norm": 0.6000674637656787, "learning_rate": 3.5803257319611585e-06, "loss": 0.2704, "step": 16097 }, { "epoch": 0.7541106478662107, "grad_norm": 0.5531705933876073, "learning_rate": 3.5801547002272742e-06, "loss": 0.2696, "step": 16098 }, { "epoch": 0.7541574928561391, "grad_norm": 0.648210927227527, "learning_rate": 3.5799836622773697e-06, "loss": 0.279, "step": 16099 }, { "epoch": 0.7542043378460673, "grad_norm": 0.659334944014554, "learning_rate": 3.579812618112432e-06, "loss": 0.2916, "step": 16100 }, { "epoch": 0.7542511828359957, "grad_norm": 0.6300822164089617, "learning_rate": 3.579641567733444e-06, "loss": 0.2883, "step": 16101 }, { "epoch": 0.754298027825924, "grad_norm": 0.6096271396792134, "learning_rate": 3.5794705111413898e-06, "loss": 0.285, "step": 16102 }, { "epoch": 0.7543448728158524, "grad_norm": 0.549068414903634, "learning_rate": 3.579299448337254e-06, "loss": 0.2613, "step": 16103 }, { "epoch": 0.7543917178057806, "grad_norm": 0.5905902463855941, "learning_rate": 3.5791283793220215e-06, "loss": 0.2941, "step": 16104 }, { "epoch": 0.754438562795709, "grad_norm": 0.5791587205346425, "learning_rate": 3.578957304096676e-06, "loss": 0.2865, "step": 16105 }, { "epoch": 0.7544854077856373, "grad_norm": 0.6215213403856941, "learning_rate": 3.578786222662202e-06, "loss": 0.2763, "step": 16106 }, { "epoch": 0.7545322527755657, "grad_norm": 0.6048041761111169, "learning_rate": 3.5786151350195862e-06, "loss": 0.3015, "step": 16107 }, { "epoch": 0.754579097765494, "grad_norm": 0.5863449237186966, "learning_rate": 3.5784440411698097e-06, "loss": 0.2765, "step": 16108 }, { "epoch": 0.7546259427554223, "grad_norm": 0.5353397486413867, "learning_rate": 3.578272941113859e-06, "loss": 0.2739, "step": 16109 }, { "epoch": 0.7546727877453506, "grad_norm": 0.5898186714199573, "learning_rate": 3.5781018348527193e-06, "loss": 0.2799, "step": 16110 }, { "epoch": 0.754719632735279, "grad_norm": 0.6788960011917791, "learning_rate": 3.577930722387374e-06, "loss": 0.3193, "step": 16111 }, { "epoch": 0.7547664777252073, "grad_norm": 0.6018576205815354, "learning_rate": 3.5777596037188082e-06, "loss": 0.2927, "step": 16112 }, { "epoch": 0.7548133227151356, "grad_norm": 0.6077149868471232, "learning_rate": 3.577588478848007e-06, "loss": 0.2829, "step": 16113 }, { "epoch": 0.7548601677050639, "grad_norm": 0.5736507105179476, "learning_rate": 3.5774173477759556e-06, "loss": 0.2955, "step": 16114 }, { "epoch": 0.7549070126949923, "grad_norm": 0.5743401431339382, "learning_rate": 3.5772462105036364e-06, "loss": 0.2712, "step": 16115 }, { "epoch": 0.7549538576849206, "grad_norm": 0.6283119450348913, "learning_rate": 3.5770750670320376e-06, "loss": 0.3021, "step": 16116 }, { "epoch": 0.755000702674849, "grad_norm": 0.5824866420791499, "learning_rate": 3.576903917362142e-06, "loss": 0.2916, "step": 16117 }, { "epoch": 0.7550475476647772, "grad_norm": 0.5656998078134705, "learning_rate": 3.576732761494935e-06, "loss": 0.2703, "step": 16118 }, { "epoch": 0.7550943926547056, "grad_norm": 0.5517498841777978, "learning_rate": 3.576561599431402e-06, "loss": 0.2805, "step": 16119 }, { "epoch": 0.7551412376446339, "grad_norm": 0.564838180555526, "learning_rate": 3.5763904311725265e-06, "loss": 0.2788, "step": 16120 }, { "epoch": 0.7551880826345623, "grad_norm": 0.5963747371129205, "learning_rate": 3.5762192567192954e-06, "loss": 0.2779, "step": 16121 }, { "epoch": 0.7552349276244905, "grad_norm": 0.5766341509733682, "learning_rate": 3.576048076072693e-06, "loss": 0.2855, "step": 16122 }, { "epoch": 0.7552817726144189, "grad_norm": 0.6032674076383179, "learning_rate": 3.5758768892337043e-06, "loss": 0.2798, "step": 16123 }, { "epoch": 0.7553286176043472, "grad_norm": 0.5911166765651422, "learning_rate": 3.5757056962033144e-06, "loss": 0.2844, "step": 16124 }, { "epoch": 0.7553754625942756, "grad_norm": 0.560291758268338, "learning_rate": 3.5755344969825083e-06, "loss": 0.2862, "step": 16125 }, { "epoch": 0.7554223075842039, "grad_norm": 0.5808309613094684, "learning_rate": 3.5753632915722724e-06, "loss": 0.2894, "step": 16126 }, { "epoch": 0.7554691525741322, "grad_norm": 0.6404454870175129, "learning_rate": 3.57519207997359e-06, "loss": 0.313, "step": 16127 }, { "epoch": 0.7555159975640605, "grad_norm": 0.6063358830899029, "learning_rate": 3.575020862187448e-06, "loss": 0.2682, "step": 16128 }, { "epoch": 0.7555628425539889, "grad_norm": 0.6180499697878769, "learning_rate": 3.574849638214831e-06, "loss": 0.2686, "step": 16129 }, { "epoch": 0.7556096875439172, "grad_norm": 0.6122586079280127, "learning_rate": 3.5746784080567244e-06, "loss": 0.2934, "step": 16130 }, { "epoch": 0.7556565325338455, "grad_norm": 0.6525414641791607, "learning_rate": 3.5745071717141137e-06, "loss": 0.2772, "step": 16131 }, { "epoch": 0.7557033775237738, "grad_norm": 0.6182206525130564, "learning_rate": 3.5743359291879846e-06, "loss": 0.2898, "step": 16132 }, { "epoch": 0.7557502225137022, "grad_norm": 0.5913504770844052, "learning_rate": 3.5741646804793218e-06, "loss": 0.2646, "step": 16133 }, { "epoch": 0.7557970675036305, "grad_norm": 0.5575691577819043, "learning_rate": 3.573993425589111e-06, "loss": 0.2691, "step": 16134 }, { "epoch": 0.7558439124935589, "grad_norm": 0.5498665321213688, "learning_rate": 3.5738221645183396e-06, "loss": 0.2713, "step": 16135 }, { "epoch": 0.7558907574834871, "grad_norm": 0.555975482738851, "learning_rate": 3.57365089726799e-06, "loss": 0.2778, "step": 16136 }, { "epoch": 0.7559376024734155, "grad_norm": 0.5659129487700812, "learning_rate": 3.5734796238390497e-06, "loss": 0.2829, "step": 16137 }, { "epoch": 0.7559844474633438, "grad_norm": 0.577232085596838, "learning_rate": 3.573308344232504e-06, "loss": 0.2741, "step": 16138 }, { "epoch": 0.7560312924532722, "grad_norm": 0.6218803705570125, "learning_rate": 3.5731370584493384e-06, "loss": 0.2862, "step": 16139 }, { "epoch": 0.7560781374432004, "grad_norm": 0.5722947688924328, "learning_rate": 3.572965766490539e-06, "loss": 0.2653, "step": 16140 }, { "epoch": 0.7561249824331288, "grad_norm": 0.5818304459785764, "learning_rate": 3.5727944683570915e-06, "loss": 0.2832, "step": 16141 }, { "epoch": 0.7561718274230571, "grad_norm": 0.6190921075671393, "learning_rate": 3.572623164049981e-06, "loss": 0.2816, "step": 16142 }, { "epoch": 0.7562186724129855, "grad_norm": 0.5682110648190759, "learning_rate": 3.5724518535701936e-06, "loss": 0.2582, "step": 16143 }, { "epoch": 0.7562655174029138, "grad_norm": 0.5814448911977756, "learning_rate": 3.572280536918716e-06, "loss": 0.2602, "step": 16144 }, { "epoch": 0.756312362392842, "grad_norm": 0.5772291573219168, "learning_rate": 3.572109214096533e-06, "loss": 0.2805, "step": 16145 }, { "epoch": 0.7563592073827704, "grad_norm": 0.6221247890308774, "learning_rate": 3.571937885104631e-06, "loss": 0.2751, "step": 16146 }, { "epoch": 0.7564060523726988, "grad_norm": 0.5581922169300597, "learning_rate": 3.5717665499439957e-06, "loss": 0.2779, "step": 16147 }, { "epoch": 0.7564528973626271, "grad_norm": 0.5996199987936965, "learning_rate": 3.5715952086156136e-06, "loss": 0.2945, "step": 16148 }, { "epoch": 0.7564997423525553, "grad_norm": 0.6213117079207607, "learning_rate": 3.5714238611204704e-06, "loss": 0.2922, "step": 16149 }, { "epoch": 0.7565465873424837, "grad_norm": 0.5916858178298129, "learning_rate": 3.571252507459552e-06, "loss": 0.2787, "step": 16150 }, { "epoch": 0.756593432332412, "grad_norm": 0.5844901091129492, "learning_rate": 3.571081147633845e-06, "loss": 0.2835, "step": 16151 }, { "epoch": 0.7566402773223404, "grad_norm": 0.5647618104752339, "learning_rate": 3.570909781644335e-06, "loss": 0.2606, "step": 16152 }, { "epoch": 0.7566871223122688, "grad_norm": 0.5592994294491684, "learning_rate": 3.5707384094920083e-06, "loss": 0.2541, "step": 16153 }, { "epoch": 0.756733967302197, "grad_norm": 0.5666678593212424, "learning_rate": 3.570567031177851e-06, "loss": 0.265, "step": 16154 }, { "epoch": 0.7567808122921253, "grad_norm": 0.606670586646136, "learning_rate": 3.5703956467028495e-06, "loss": 0.287, "step": 16155 }, { "epoch": 0.7568276572820537, "grad_norm": 0.6303159840966349, "learning_rate": 3.5702242560679914e-06, "loss": 0.2887, "step": 16156 }, { "epoch": 0.756874502271982, "grad_norm": 0.5415821327756524, "learning_rate": 3.57005285927426e-06, "loss": 0.2616, "step": 16157 }, { "epoch": 0.7569213472619103, "grad_norm": 0.5919767887475827, "learning_rate": 3.5698814563226437e-06, "loss": 0.2979, "step": 16158 }, { "epoch": 0.7569681922518386, "grad_norm": 0.6060856199804494, "learning_rate": 3.5697100472141287e-06, "loss": 0.2851, "step": 16159 }, { "epoch": 0.757015037241767, "grad_norm": 0.5954163602282094, "learning_rate": 3.5695386319497017e-06, "loss": 0.2681, "step": 16160 }, { "epoch": 0.7570618822316953, "grad_norm": 0.5709262638848996, "learning_rate": 3.569367210530348e-06, "loss": 0.2809, "step": 16161 }, { "epoch": 0.7571087272216237, "grad_norm": 0.604828619337463, "learning_rate": 3.569195782957055e-06, "loss": 0.2713, "step": 16162 }, { "epoch": 0.7571555722115519, "grad_norm": 0.6051368643312373, "learning_rate": 3.5690243492308095e-06, "loss": 0.2971, "step": 16163 }, { "epoch": 0.7572024172014803, "grad_norm": 0.5643673500860458, "learning_rate": 3.568852909352597e-06, "loss": 0.27, "step": 16164 }, { "epoch": 0.7572492621914086, "grad_norm": 0.5976441793035419, "learning_rate": 3.5686814633234044e-06, "loss": 0.3018, "step": 16165 }, { "epoch": 0.757296107181337, "grad_norm": 0.596813232933636, "learning_rate": 3.568510011144219e-06, "loss": 0.2994, "step": 16166 }, { "epoch": 0.7573429521712652, "grad_norm": 0.6034525182166526, "learning_rate": 3.5683385528160276e-06, "loss": 0.2921, "step": 16167 }, { "epoch": 0.7573897971611936, "grad_norm": 0.5646746229608317, "learning_rate": 3.5681670883398157e-06, "loss": 0.2752, "step": 16168 }, { "epoch": 0.7574366421511219, "grad_norm": 0.6153353984451497, "learning_rate": 3.5679956177165705e-06, "loss": 0.2829, "step": 16169 }, { "epoch": 0.7574834871410503, "grad_norm": 0.6146826269247286, "learning_rate": 3.567824140947279e-06, "loss": 0.2891, "step": 16170 }, { "epoch": 0.7575303321309786, "grad_norm": 0.5841227870457834, "learning_rate": 3.5676526580329286e-06, "loss": 0.2905, "step": 16171 }, { "epoch": 0.7575771771209069, "grad_norm": 0.6273167154422492, "learning_rate": 3.5674811689745047e-06, "loss": 0.2906, "step": 16172 }, { "epoch": 0.7576240221108352, "grad_norm": 0.5857325435908655, "learning_rate": 3.5673096737729955e-06, "loss": 0.2851, "step": 16173 }, { "epoch": 0.7576708671007636, "grad_norm": 0.6106741954759449, "learning_rate": 3.5671381724293874e-06, "loss": 0.2931, "step": 16174 }, { "epoch": 0.7577177120906919, "grad_norm": 0.5964272902164209, "learning_rate": 3.5669666649446667e-06, "loss": 0.2865, "step": 16175 }, { "epoch": 0.7577645570806202, "grad_norm": 0.5640816082466202, "learning_rate": 3.566795151319821e-06, "loss": 0.275, "step": 16176 }, { "epoch": 0.7578114020705485, "grad_norm": 0.6339648296532054, "learning_rate": 3.566623631555839e-06, "loss": 0.2821, "step": 16177 }, { "epoch": 0.7578582470604769, "grad_norm": 0.6085708468186609, "learning_rate": 3.5664521056537043e-06, "loss": 0.2862, "step": 16178 }, { "epoch": 0.7579050920504052, "grad_norm": 0.5727227486367701, "learning_rate": 3.5662805736144057e-06, "loss": 0.2902, "step": 16179 }, { "epoch": 0.7579519370403336, "grad_norm": 0.5790190237937736, "learning_rate": 3.5661090354389315e-06, "loss": 0.2632, "step": 16180 }, { "epoch": 0.7579987820302618, "grad_norm": 0.5946117087083941, "learning_rate": 3.5659374911282672e-06, "loss": 0.2732, "step": 16181 }, { "epoch": 0.7580456270201902, "grad_norm": 0.5601863167548584, "learning_rate": 3.5657659406834005e-06, "loss": 0.27, "step": 16182 }, { "epoch": 0.7580924720101185, "grad_norm": 0.5697770364965151, "learning_rate": 3.5655943841053194e-06, "loss": 0.2552, "step": 16183 }, { "epoch": 0.7581393170000469, "grad_norm": 0.598057937291885, "learning_rate": 3.56542282139501e-06, "loss": 0.2811, "step": 16184 }, { "epoch": 0.7581861619899751, "grad_norm": 0.5615128120229599, "learning_rate": 3.5652512525534596e-06, "loss": 0.269, "step": 16185 }, { "epoch": 0.7582330069799035, "grad_norm": 0.6599338716338895, "learning_rate": 3.5650796775816565e-06, "loss": 0.2976, "step": 16186 }, { "epoch": 0.7582798519698318, "grad_norm": 0.5917537590585764, "learning_rate": 3.564908096480587e-06, "loss": 0.2782, "step": 16187 }, { "epoch": 0.7583266969597602, "grad_norm": 0.5885125628709659, "learning_rate": 3.56473650925124e-06, "loss": 0.2721, "step": 16188 }, { "epoch": 0.7583735419496885, "grad_norm": 0.6050226886241287, "learning_rate": 3.5645649158946007e-06, "loss": 0.2799, "step": 16189 }, { "epoch": 0.7584203869396168, "grad_norm": 0.6044090371314074, "learning_rate": 3.564393316411659e-06, "loss": 0.2655, "step": 16190 }, { "epoch": 0.7584672319295451, "grad_norm": 0.6289366223529834, "learning_rate": 3.564221710803401e-06, "loss": 0.2849, "step": 16191 }, { "epoch": 0.7585140769194735, "grad_norm": 0.5697902692830694, "learning_rate": 3.564050099070814e-06, "loss": 0.2556, "step": 16192 }, { "epoch": 0.7585609219094018, "grad_norm": 0.6040302793825012, "learning_rate": 3.563878481214887e-06, "loss": 0.2895, "step": 16193 }, { "epoch": 0.7586077668993301, "grad_norm": 0.5846101197023051, "learning_rate": 3.5637068572366064e-06, "loss": 0.301, "step": 16194 }, { "epoch": 0.7586546118892584, "grad_norm": 0.5902301754671518, "learning_rate": 3.5635352271369596e-06, "loss": 0.2731, "step": 16195 }, { "epoch": 0.7587014568791868, "grad_norm": 0.5979664583149993, "learning_rate": 3.5633635909169355e-06, "loss": 0.2846, "step": 16196 }, { "epoch": 0.7587483018691151, "grad_norm": 0.5948536794863565, "learning_rate": 3.5631919485775207e-06, "loss": 0.29, "step": 16197 }, { "epoch": 0.7587951468590435, "grad_norm": 0.6145694899428122, "learning_rate": 3.563020300119704e-06, "loss": 0.2829, "step": 16198 }, { "epoch": 0.7588419918489717, "grad_norm": 0.5631404399858597, "learning_rate": 3.5628486455444725e-06, "loss": 0.276, "step": 16199 }, { "epoch": 0.7588888368389001, "grad_norm": 0.6239989972243263, "learning_rate": 3.562676984852814e-06, "loss": 0.2889, "step": 16200 }, { "epoch": 0.7589356818288284, "grad_norm": 0.5891113899865128, "learning_rate": 3.562505318045717e-06, "loss": 0.2962, "step": 16201 }, { "epoch": 0.7589825268187568, "grad_norm": 0.6254199963335689, "learning_rate": 3.5623336451241684e-06, "loss": 0.2719, "step": 16202 }, { "epoch": 0.759029371808685, "grad_norm": 0.596503208726089, "learning_rate": 3.5621619660891573e-06, "loss": 0.2896, "step": 16203 }, { "epoch": 0.7590762167986134, "grad_norm": 0.6484317200898351, "learning_rate": 3.56199028094167e-06, "loss": 0.2839, "step": 16204 }, { "epoch": 0.7591230617885417, "grad_norm": 0.6153281469013983, "learning_rate": 3.5618185896826966e-06, "loss": 0.2559, "step": 16205 }, { "epoch": 0.7591699067784701, "grad_norm": 0.657286518810751, "learning_rate": 3.5616468923132237e-06, "loss": 0.2857, "step": 16206 }, { "epoch": 0.7592167517683984, "grad_norm": 0.5651461928612688, "learning_rate": 3.5614751888342397e-06, "loss": 0.262, "step": 16207 }, { "epoch": 0.7592635967583267, "grad_norm": 0.5491373876212977, "learning_rate": 3.5613034792467328e-06, "loss": 0.2731, "step": 16208 }, { "epoch": 0.759310441748255, "grad_norm": 0.6268106633115117, "learning_rate": 3.561131763551692e-06, "loss": 0.2793, "step": 16209 }, { "epoch": 0.7593572867381834, "grad_norm": 0.6083279268183015, "learning_rate": 3.560960041750103e-06, "loss": 0.273, "step": 16210 }, { "epoch": 0.7594041317281117, "grad_norm": 0.6337585794122821, "learning_rate": 3.5607883138429567e-06, "loss": 0.2946, "step": 16211 }, { "epoch": 0.75945097671804, "grad_norm": 0.5754349110791074, "learning_rate": 3.56061657983124e-06, "loss": 0.2633, "step": 16212 }, { "epoch": 0.7594978217079683, "grad_norm": 0.6015246146505346, "learning_rate": 3.5604448397159414e-06, "loss": 0.275, "step": 16213 }, { "epoch": 0.7595446666978967, "grad_norm": 0.6294917188058851, "learning_rate": 3.5602730934980496e-06, "loss": 0.2888, "step": 16214 }, { "epoch": 0.759591511687825, "grad_norm": 0.5431668439373978, "learning_rate": 3.5601013411785517e-06, "loss": 0.2707, "step": 16215 }, { "epoch": 0.7596383566777534, "grad_norm": 0.6145891433396458, "learning_rate": 3.559929582758438e-06, "loss": 0.2981, "step": 16216 }, { "epoch": 0.7596852016676816, "grad_norm": 0.5941325186471358, "learning_rate": 3.559757818238696e-06, "loss": 0.281, "step": 16217 }, { "epoch": 0.75973204665761, "grad_norm": 0.5911416297229579, "learning_rate": 3.5595860476203132e-06, "loss": 0.3005, "step": 16218 }, { "epoch": 0.7597788916475383, "grad_norm": 0.615417786047728, "learning_rate": 3.55941427090428e-06, "loss": 0.2788, "step": 16219 }, { "epoch": 0.7598257366374667, "grad_norm": 0.6203472797745686, "learning_rate": 3.5592424880915834e-06, "loss": 0.2782, "step": 16220 }, { "epoch": 0.7598725816273949, "grad_norm": 0.5886002259242469, "learning_rate": 3.5590706991832124e-06, "loss": 0.2999, "step": 16221 }, { "epoch": 0.7599194266173233, "grad_norm": 0.5934665828006828, "learning_rate": 3.5588989041801565e-06, "loss": 0.2843, "step": 16222 }, { "epoch": 0.7599662716072516, "grad_norm": 0.6183801861995202, "learning_rate": 3.558727103083403e-06, "loss": 0.2895, "step": 16223 }, { "epoch": 0.76001311659718, "grad_norm": 0.578392162256575, "learning_rate": 3.5585552958939406e-06, "loss": 0.2808, "step": 16224 }, { "epoch": 0.7600599615871083, "grad_norm": 0.6291884785889544, "learning_rate": 3.558383482612759e-06, "loss": 0.299, "step": 16225 }, { "epoch": 0.7601068065770366, "grad_norm": 0.6639721183603817, "learning_rate": 3.558211663240847e-06, "loss": 0.2899, "step": 16226 }, { "epoch": 0.7601536515669649, "grad_norm": 0.5745660074940874, "learning_rate": 3.5580398377791924e-06, "loss": 0.2724, "step": 16227 }, { "epoch": 0.7602004965568933, "grad_norm": 0.5343277836231608, "learning_rate": 3.557868006228784e-06, "loss": 0.2606, "step": 16228 }, { "epoch": 0.7602473415468216, "grad_norm": 0.5759110077650613, "learning_rate": 3.557696168590612e-06, "loss": 0.2899, "step": 16229 }, { "epoch": 0.7602941865367498, "grad_norm": 0.5771028073329735, "learning_rate": 3.557524324865664e-06, "loss": 0.2732, "step": 16230 }, { "epoch": 0.7603410315266782, "grad_norm": 0.5938775620950735, "learning_rate": 3.557352475054929e-06, "loss": 0.2817, "step": 16231 }, { "epoch": 0.7603878765166066, "grad_norm": 0.5612036387707001, "learning_rate": 3.5571806191593965e-06, "loss": 0.2676, "step": 16232 }, { "epoch": 0.7604347215065349, "grad_norm": 0.6423734952937484, "learning_rate": 3.5570087571800554e-06, "loss": 0.2696, "step": 16233 }, { "epoch": 0.7604815664964633, "grad_norm": 0.6110562776422198, "learning_rate": 3.556836889117894e-06, "loss": 0.2821, "step": 16234 }, { "epoch": 0.7605284114863915, "grad_norm": 0.5952725620495739, "learning_rate": 3.5566650149739025e-06, "loss": 0.2764, "step": 16235 }, { "epoch": 0.7605752564763198, "grad_norm": 0.636840192475055, "learning_rate": 3.5564931347490694e-06, "loss": 0.2675, "step": 16236 }, { "epoch": 0.7606221014662482, "grad_norm": 0.5752737872858359, "learning_rate": 3.5563212484443837e-06, "loss": 0.2661, "step": 16237 }, { "epoch": 0.7606689464561766, "grad_norm": 0.6202163291853909, "learning_rate": 3.556149356060835e-06, "loss": 0.2796, "step": 16238 }, { "epoch": 0.7607157914461048, "grad_norm": 0.5997072725614626, "learning_rate": 3.5559774575994115e-06, "loss": 0.2946, "step": 16239 }, { "epoch": 0.7607626364360331, "grad_norm": 0.6329788300987487, "learning_rate": 3.5558055530611037e-06, "loss": 0.2824, "step": 16240 }, { "epoch": 0.7608094814259615, "grad_norm": 0.5996869094893859, "learning_rate": 3.5556336424469007e-06, "loss": 0.3019, "step": 16241 }, { "epoch": 0.7608563264158898, "grad_norm": 0.5611870454544642, "learning_rate": 3.5554617257577905e-06, "loss": 0.2716, "step": 16242 }, { "epoch": 0.7609031714058182, "grad_norm": 0.5320115619975463, "learning_rate": 3.5552898029947637e-06, "loss": 0.265, "step": 16243 }, { "epoch": 0.7609500163957464, "grad_norm": 0.6537778187746973, "learning_rate": 3.55511787415881e-06, "loss": 0.3002, "step": 16244 }, { "epoch": 0.7609968613856748, "grad_norm": 0.5309243372519022, "learning_rate": 3.5549459392509165e-06, "loss": 0.2635, "step": 16245 }, { "epoch": 0.7610437063756031, "grad_norm": 0.5998378114003343, "learning_rate": 3.554773998272075e-06, "loss": 0.2777, "step": 16246 }, { "epoch": 0.7610905513655315, "grad_norm": 0.6106779896432268, "learning_rate": 3.5546020512232755e-06, "loss": 0.2979, "step": 16247 }, { "epoch": 0.7611373963554597, "grad_norm": 0.5743365230201293, "learning_rate": 3.554430098105505e-06, "loss": 0.27, "step": 16248 }, { "epoch": 0.7611842413453881, "grad_norm": 0.5803097673419392, "learning_rate": 3.5542581389197545e-06, "loss": 0.287, "step": 16249 }, { "epoch": 0.7612310863353164, "grad_norm": 0.5869358922362459, "learning_rate": 3.5540861736670136e-06, "loss": 0.2917, "step": 16250 }, { "epoch": 0.7612779313252448, "grad_norm": 0.5993087699357083, "learning_rate": 3.553914202348272e-06, "loss": 0.2911, "step": 16251 }, { "epoch": 0.7613247763151731, "grad_norm": 0.5465300931770316, "learning_rate": 3.5537422249645183e-06, "loss": 0.2725, "step": 16252 }, { "epoch": 0.7613716213051014, "grad_norm": 0.6485637623046329, "learning_rate": 3.553570241516743e-06, "loss": 0.2897, "step": 16253 }, { "epoch": 0.7614184662950297, "grad_norm": 0.6063584187081511, "learning_rate": 3.553398252005936e-06, "loss": 0.2922, "step": 16254 }, { "epoch": 0.7614653112849581, "grad_norm": 0.6543356984637086, "learning_rate": 3.553226256433087e-06, "loss": 0.2844, "step": 16255 }, { "epoch": 0.7615121562748864, "grad_norm": 0.632953823263304, "learning_rate": 3.553054254799186e-06, "loss": 0.2638, "step": 16256 }, { "epoch": 0.7615590012648147, "grad_norm": 0.586627406747517, "learning_rate": 3.5528822471052214e-06, "loss": 0.2901, "step": 16257 }, { "epoch": 0.761605846254743, "grad_norm": 0.6045438785801738, "learning_rate": 3.5527102333521845e-06, "loss": 0.2886, "step": 16258 }, { "epoch": 0.7616526912446714, "grad_norm": 0.5932600489454549, "learning_rate": 3.5525382135410646e-06, "loss": 0.2721, "step": 16259 }, { "epoch": 0.7616995362345997, "grad_norm": 0.5627799297688959, "learning_rate": 3.552366187672853e-06, "loss": 0.2845, "step": 16260 }, { "epoch": 0.7617463812245281, "grad_norm": 0.5947410374498892, "learning_rate": 3.552194155748537e-06, "loss": 0.2853, "step": 16261 }, { "epoch": 0.7617932262144563, "grad_norm": 0.6098506842020134, "learning_rate": 3.552022117769109e-06, "loss": 0.2887, "step": 16262 }, { "epoch": 0.7618400712043847, "grad_norm": 0.5391588675228401, "learning_rate": 3.5518500737355578e-06, "loss": 0.2825, "step": 16263 }, { "epoch": 0.761886916194313, "grad_norm": 0.5225737778511634, "learning_rate": 3.551678023648874e-06, "loss": 0.2613, "step": 16264 }, { "epoch": 0.7619337611842414, "grad_norm": 0.5857147210787331, "learning_rate": 3.551505967510047e-06, "loss": 0.2931, "step": 16265 }, { "epoch": 0.7619806061741696, "grad_norm": 0.589112620530884, "learning_rate": 3.5513339053200678e-06, "loss": 0.2788, "step": 16266 }, { "epoch": 0.762027451164098, "grad_norm": 0.5875266313402481, "learning_rate": 3.551161837079926e-06, "loss": 0.2997, "step": 16267 }, { "epoch": 0.7620742961540263, "grad_norm": 0.5638012902386333, "learning_rate": 3.5509897627906124e-06, "loss": 0.2875, "step": 16268 }, { "epoch": 0.7621211411439547, "grad_norm": 0.618047152927961, "learning_rate": 3.5508176824531172e-06, "loss": 0.2877, "step": 16269 }, { "epoch": 0.762167986133883, "grad_norm": 0.6393303956777462, "learning_rate": 3.5506455960684294e-06, "loss": 0.2967, "step": 16270 }, { "epoch": 0.7622148311238113, "grad_norm": 0.559557731686686, "learning_rate": 3.550473503637541e-06, "loss": 0.2844, "step": 16271 }, { "epoch": 0.7622616761137396, "grad_norm": 0.6063402156614107, "learning_rate": 3.5503014051614416e-06, "loss": 0.2809, "step": 16272 }, { "epoch": 0.762308521103668, "grad_norm": 0.6172355762346988, "learning_rate": 3.550129300641121e-06, "loss": 0.3059, "step": 16273 }, { "epoch": 0.7623553660935963, "grad_norm": 0.5879359234437209, "learning_rate": 3.5499571900775702e-06, "loss": 0.2805, "step": 16274 }, { "epoch": 0.7624022110835246, "grad_norm": 0.6133770777943257, "learning_rate": 3.549785073471781e-06, "loss": 0.2819, "step": 16275 }, { "epoch": 0.7624490560734529, "grad_norm": 0.5794627012974606, "learning_rate": 3.549612950824741e-06, "loss": 0.277, "step": 16276 }, { "epoch": 0.7624959010633813, "grad_norm": 0.624330573739565, "learning_rate": 3.549440822137443e-06, "loss": 0.288, "step": 16277 }, { "epoch": 0.7625427460533096, "grad_norm": 0.6124258293062235, "learning_rate": 3.5492686874108767e-06, "loss": 0.275, "step": 16278 }, { "epoch": 0.762589591043238, "grad_norm": 0.620363289605904, "learning_rate": 3.5490965466460333e-06, "loss": 0.2962, "step": 16279 }, { "epoch": 0.7626364360331662, "grad_norm": 0.6043955735681573, "learning_rate": 3.5489243998439016e-06, "loss": 0.2828, "step": 16280 }, { "epoch": 0.7626832810230946, "grad_norm": 0.5627831903430527, "learning_rate": 3.5487522470054747e-06, "loss": 0.2757, "step": 16281 }, { "epoch": 0.7627301260130229, "grad_norm": 0.6600148728820255, "learning_rate": 3.5485800881317424e-06, "loss": 0.2965, "step": 16282 }, { "epoch": 0.7627769710029513, "grad_norm": 0.5523793046392632, "learning_rate": 3.5484079232236944e-06, "loss": 0.2696, "step": 16283 }, { "epoch": 0.7628238159928795, "grad_norm": 0.6098165651612741, "learning_rate": 3.548235752282323e-06, "loss": 0.3017, "step": 16284 }, { "epoch": 0.7628706609828079, "grad_norm": 0.6135754986171461, "learning_rate": 3.548063575308618e-06, "loss": 0.2686, "step": 16285 }, { "epoch": 0.7629175059727362, "grad_norm": 0.5861558214978075, "learning_rate": 3.5478913923035707e-06, "loss": 0.2845, "step": 16286 }, { "epoch": 0.7629643509626646, "grad_norm": 0.5871019752160693, "learning_rate": 3.5477192032681717e-06, "loss": 0.2774, "step": 16287 }, { "epoch": 0.7630111959525929, "grad_norm": 0.5562983066374798, "learning_rate": 3.5475470082034118e-06, "loss": 0.2688, "step": 16288 }, { "epoch": 0.7630580409425212, "grad_norm": 0.5987747478210154, "learning_rate": 3.5473748071102827e-06, "loss": 0.3093, "step": 16289 }, { "epoch": 0.7631048859324495, "grad_norm": 0.6105719991452535, "learning_rate": 3.547202599989775e-06, "loss": 0.2826, "step": 16290 }, { "epoch": 0.7631517309223779, "grad_norm": 0.553829741822328, "learning_rate": 3.5470303868428787e-06, "loss": 0.2617, "step": 16291 }, { "epoch": 0.7631985759123062, "grad_norm": 0.5640569781602804, "learning_rate": 3.546858167670586e-06, "loss": 0.2784, "step": 16292 }, { "epoch": 0.7632454209022345, "grad_norm": 0.5589871425423341, "learning_rate": 3.546685942473888e-06, "loss": 0.2632, "step": 16293 }, { "epoch": 0.7632922658921628, "grad_norm": 0.5605658853807014, "learning_rate": 3.546513711253775e-06, "loss": 0.2857, "step": 16294 }, { "epoch": 0.7633391108820912, "grad_norm": 0.6081199176170475, "learning_rate": 3.5463414740112386e-06, "loss": 0.3006, "step": 16295 }, { "epoch": 0.7633859558720195, "grad_norm": 0.6007036065634054, "learning_rate": 3.5461692307472707e-06, "loss": 0.2779, "step": 16296 }, { "epoch": 0.7634328008619479, "grad_norm": 0.5911233666555269, "learning_rate": 3.5459969814628615e-06, "loss": 0.2947, "step": 16297 }, { "epoch": 0.7634796458518761, "grad_norm": 0.5672058328158971, "learning_rate": 3.5458247261590018e-06, "loss": 0.294, "step": 16298 }, { "epoch": 0.7635264908418045, "grad_norm": 0.5680547749656457, "learning_rate": 3.545652464836684e-06, "loss": 0.2634, "step": 16299 }, { "epoch": 0.7635733358317328, "grad_norm": 0.6242354219909723, "learning_rate": 3.5454801974969e-06, "loss": 0.2779, "step": 16300 }, { "epoch": 0.7636201808216612, "grad_norm": 0.5738304710327033, "learning_rate": 3.545307924140639e-06, "loss": 0.2914, "step": 16301 }, { "epoch": 0.7636670258115894, "grad_norm": 0.5867346631888817, "learning_rate": 3.5451356447688944e-06, "loss": 0.2855, "step": 16302 }, { "epoch": 0.7637138708015178, "grad_norm": 0.5493381926820623, "learning_rate": 3.544963359382657e-06, "loss": 0.2883, "step": 16303 }, { "epoch": 0.7637607157914461, "grad_norm": 0.5904436436137779, "learning_rate": 3.544791067982918e-06, "loss": 0.3078, "step": 16304 }, { "epoch": 0.7638075607813745, "grad_norm": 0.5783400524867223, "learning_rate": 3.5446187705706686e-06, "loss": 0.2645, "step": 16305 }, { "epoch": 0.7638544057713028, "grad_norm": 0.6237154422145269, "learning_rate": 3.5444464671469005e-06, "loss": 0.2839, "step": 16306 }, { "epoch": 0.763901250761231, "grad_norm": 0.5697872836698121, "learning_rate": 3.544274157712606e-06, "loss": 0.2529, "step": 16307 }, { "epoch": 0.7639480957511594, "grad_norm": 0.6128433991523149, "learning_rate": 3.544101842268777e-06, "loss": 0.2927, "step": 16308 }, { "epoch": 0.7639949407410878, "grad_norm": 0.586381761761036, "learning_rate": 3.5439295208164025e-06, "loss": 0.2985, "step": 16309 }, { "epoch": 0.7640417857310161, "grad_norm": 0.5944675415974129, "learning_rate": 3.5437571933564775e-06, "loss": 0.2797, "step": 16310 }, { "epoch": 0.7640886307209444, "grad_norm": 0.6242584960049856, "learning_rate": 3.543584859889991e-06, "loss": 0.2921, "step": 16311 }, { "epoch": 0.7641354757108727, "grad_norm": 0.5747064028387694, "learning_rate": 3.543412520417937e-06, "loss": 0.2923, "step": 16312 }, { "epoch": 0.764182320700801, "grad_norm": 0.5736299232499924, "learning_rate": 3.543240174941306e-06, "loss": 0.2905, "step": 16313 }, { "epoch": 0.7642291656907294, "grad_norm": 0.6119964758950482, "learning_rate": 3.5430678234610894e-06, "loss": 0.2908, "step": 16314 }, { "epoch": 0.7642760106806578, "grad_norm": 0.5632548112215724, "learning_rate": 3.54289546597828e-06, "loss": 0.2676, "step": 16315 }, { "epoch": 0.764322855670586, "grad_norm": 0.5936918720300581, "learning_rate": 3.542723102493869e-06, "loss": 0.2713, "step": 16316 }, { "epoch": 0.7643697006605144, "grad_norm": 0.6362005612537538, "learning_rate": 3.5425507330088495e-06, "loss": 0.2839, "step": 16317 }, { "epoch": 0.7644165456504427, "grad_norm": 0.5497519749009726, "learning_rate": 3.5423783575242117e-06, "loss": 0.257, "step": 16318 }, { "epoch": 0.764463390640371, "grad_norm": 0.5597021604753276, "learning_rate": 3.5422059760409483e-06, "loss": 0.2615, "step": 16319 }, { "epoch": 0.7645102356302993, "grad_norm": 0.6250838176200816, "learning_rate": 3.5420335885600523e-06, "loss": 0.3003, "step": 16320 }, { "epoch": 0.7645570806202276, "grad_norm": 0.6081472057021118, "learning_rate": 3.5418611950825144e-06, "loss": 0.2696, "step": 16321 }, { "epoch": 0.764603925610156, "grad_norm": 0.6319001752128238, "learning_rate": 3.541688795609327e-06, "loss": 0.2894, "step": 16322 }, { "epoch": 0.7646507706000844, "grad_norm": 0.5803177904217586, "learning_rate": 3.5415163901414827e-06, "loss": 0.2765, "step": 16323 }, { "epoch": 0.7646976155900127, "grad_norm": 0.5928582092838466, "learning_rate": 3.5413439786799734e-06, "loss": 0.2851, "step": 16324 }, { "epoch": 0.7647444605799409, "grad_norm": 0.6479879850907369, "learning_rate": 3.541171561225791e-06, "loss": 0.2995, "step": 16325 }, { "epoch": 0.7647913055698693, "grad_norm": 0.5883678127806843, "learning_rate": 3.540999137779928e-06, "loss": 0.2815, "step": 16326 }, { "epoch": 0.7648381505597976, "grad_norm": 0.6159884228264778, "learning_rate": 3.5408267083433765e-06, "loss": 0.2762, "step": 16327 }, { "epoch": 0.764884995549726, "grad_norm": 0.6491737338792722, "learning_rate": 3.5406542729171296e-06, "loss": 0.297, "step": 16328 }, { "epoch": 0.7649318405396542, "grad_norm": 0.5898436242494235, "learning_rate": 3.5404818315021784e-06, "loss": 0.2605, "step": 16329 }, { "epoch": 0.7649786855295826, "grad_norm": 0.6105254843647102, "learning_rate": 3.5403093840995157e-06, "loss": 0.2836, "step": 16330 }, { "epoch": 0.7650255305195109, "grad_norm": 0.6038023496768942, "learning_rate": 3.540136930710134e-06, "loss": 0.2813, "step": 16331 }, { "epoch": 0.7650723755094393, "grad_norm": 0.5772305261575474, "learning_rate": 3.5399644713350256e-06, "loss": 0.2723, "step": 16332 }, { "epoch": 0.7651192204993676, "grad_norm": 0.5316112214087082, "learning_rate": 3.539792005975183e-06, "loss": 0.2549, "step": 16333 }, { "epoch": 0.7651660654892959, "grad_norm": 0.5691438547544675, "learning_rate": 3.5396195346315997e-06, "loss": 0.2609, "step": 16334 }, { "epoch": 0.7652129104792242, "grad_norm": 0.5529411799533916, "learning_rate": 3.5394470573052663e-06, "loss": 0.2845, "step": 16335 }, { "epoch": 0.7652597554691526, "grad_norm": 0.6301601272006334, "learning_rate": 3.539274573997177e-06, "loss": 0.2986, "step": 16336 }, { "epoch": 0.7653066004590809, "grad_norm": 0.6477015595019203, "learning_rate": 3.5391020847083225e-06, "loss": 0.3312, "step": 16337 }, { "epoch": 0.7653534454490092, "grad_norm": 0.5718043674144366, "learning_rate": 3.538929589439698e-06, "loss": 0.2822, "step": 16338 }, { "epoch": 0.7654002904389375, "grad_norm": 0.6339496423394838, "learning_rate": 3.5387570881922944e-06, "loss": 0.2877, "step": 16339 }, { "epoch": 0.7654471354288659, "grad_norm": 0.6075686933100902, "learning_rate": 3.5385845809671044e-06, "loss": 0.2862, "step": 16340 }, { "epoch": 0.7654939804187942, "grad_norm": 0.5599961947870339, "learning_rate": 3.5384120677651216e-06, "loss": 0.2745, "step": 16341 }, { "epoch": 0.7655408254087226, "grad_norm": 0.5774933987172984, "learning_rate": 3.5382395485873387e-06, "loss": 0.2601, "step": 16342 }, { "epoch": 0.7655876703986508, "grad_norm": 0.609894573698071, "learning_rate": 3.5380670234347477e-06, "loss": 0.2839, "step": 16343 }, { "epoch": 0.7656345153885792, "grad_norm": 0.5795581470514112, "learning_rate": 3.5378944923083416e-06, "loss": 0.2855, "step": 16344 }, { "epoch": 0.7656813603785075, "grad_norm": 0.5989191444721161, "learning_rate": 3.537721955209115e-06, "loss": 0.2708, "step": 16345 }, { "epoch": 0.7657282053684359, "grad_norm": 0.6780775426736941, "learning_rate": 3.537549412138057e-06, "loss": 0.3017, "step": 16346 }, { "epoch": 0.7657750503583641, "grad_norm": 0.6575115774130982, "learning_rate": 3.5373768630961643e-06, "loss": 0.3098, "step": 16347 }, { "epoch": 0.7658218953482925, "grad_norm": 0.5773044546047053, "learning_rate": 3.537204308084428e-06, "loss": 0.2799, "step": 16348 }, { "epoch": 0.7658687403382208, "grad_norm": 0.6474646455733051, "learning_rate": 3.5370317471038423e-06, "loss": 0.2924, "step": 16349 }, { "epoch": 0.7659155853281492, "grad_norm": 0.6200185940116881, "learning_rate": 3.5368591801553993e-06, "loss": 0.3141, "step": 16350 }, { "epoch": 0.7659624303180775, "grad_norm": 0.5911906736978104, "learning_rate": 3.5366866072400925e-06, "loss": 0.2952, "step": 16351 }, { "epoch": 0.7660092753080058, "grad_norm": 0.5572869295393581, "learning_rate": 3.5365140283589145e-06, "loss": 0.2794, "step": 16352 }, { "epoch": 0.7660561202979341, "grad_norm": 0.6522937316663504, "learning_rate": 3.5363414435128586e-06, "loss": 0.2925, "step": 16353 }, { "epoch": 0.7661029652878625, "grad_norm": 0.5944910647635601, "learning_rate": 3.5361688527029186e-06, "loss": 0.2737, "step": 16354 }, { "epoch": 0.7661498102777908, "grad_norm": 0.6102680796373158, "learning_rate": 3.5359962559300864e-06, "loss": 0.2662, "step": 16355 }, { "epoch": 0.7661966552677191, "grad_norm": 0.5944160885035202, "learning_rate": 3.535823653195357e-06, "loss": 0.2879, "step": 16356 }, { "epoch": 0.7662435002576474, "grad_norm": 0.5643295099208154, "learning_rate": 3.5356510444997233e-06, "loss": 0.259, "step": 16357 }, { "epoch": 0.7662903452475758, "grad_norm": 0.6110510157289689, "learning_rate": 3.535478429844177e-06, "loss": 0.2615, "step": 16358 }, { "epoch": 0.7663371902375041, "grad_norm": 0.6416175123411012, "learning_rate": 3.5353058092297133e-06, "loss": 0.2934, "step": 16359 }, { "epoch": 0.7663840352274325, "grad_norm": 0.6101069732035765, "learning_rate": 3.535133182657325e-06, "loss": 0.2811, "step": 16360 }, { "epoch": 0.7664308802173607, "grad_norm": 0.5568691885254581, "learning_rate": 3.5349605501280048e-06, "loss": 0.2749, "step": 16361 }, { "epoch": 0.7664777252072891, "grad_norm": 0.5682989771460029, "learning_rate": 3.534787911642747e-06, "loss": 0.3022, "step": 16362 }, { "epoch": 0.7665245701972174, "grad_norm": 0.5732029065175174, "learning_rate": 3.534615267202545e-06, "loss": 0.3056, "step": 16363 }, { "epoch": 0.7665714151871458, "grad_norm": 0.6033581963311377, "learning_rate": 3.5344426168083917e-06, "loss": 0.2974, "step": 16364 }, { "epoch": 0.766618260177074, "grad_norm": 0.5862260169545225, "learning_rate": 3.534269960461281e-06, "loss": 0.2494, "step": 16365 }, { "epoch": 0.7666651051670024, "grad_norm": 0.6037666494233458, "learning_rate": 3.534097298162208e-06, "loss": 0.2922, "step": 16366 }, { "epoch": 0.7667119501569307, "grad_norm": 0.629821516486537, "learning_rate": 3.533924629912164e-06, "loss": 0.2929, "step": 16367 }, { "epoch": 0.7667587951468591, "grad_norm": 0.5828510161549448, "learning_rate": 3.5337519557121436e-06, "loss": 0.2944, "step": 16368 }, { "epoch": 0.7668056401367874, "grad_norm": 0.5820910799280328, "learning_rate": 3.5335792755631404e-06, "loss": 0.2658, "step": 16369 }, { "epoch": 0.7668524851267157, "grad_norm": 0.6180705908074284, "learning_rate": 3.5334065894661485e-06, "loss": 0.3078, "step": 16370 }, { "epoch": 0.766899330116644, "grad_norm": 0.569636503340534, "learning_rate": 3.5332338974221616e-06, "loss": 0.2792, "step": 16371 }, { "epoch": 0.7669461751065724, "grad_norm": 0.5931337389448665, "learning_rate": 3.533061199432173e-06, "loss": 0.2564, "step": 16372 }, { "epoch": 0.7669930200965007, "grad_norm": 0.5838133306266896, "learning_rate": 3.532888495497177e-06, "loss": 0.283, "step": 16373 }, { "epoch": 0.767039865086429, "grad_norm": 0.6211118192661524, "learning_rate": 3.5327157856181672e-06, "loss": 0.2787, "step": 16374 }, { "epoch": 0.7670867100763573, "grad_norm": 0.5688418603786515, "learning_rate": 3.5325430697961376e-06, "loss": 0.2721, "step": 16375 }, { "epoch": 0.7671335550662857, "grad_norm": 0.5783424216865715, "learning_rate": 3.532370348032082e-06, "loss": 0.2639, "step": 16376 }, { "epoch": 0.767180400056214, "grad_norm": 0.5755477129451887, "learning_rate": 3.5321976203269955e-06, "loss": 0.2738, "step": 16377 }, { "epoch": 0.7672272450461424, "grad_norm": 0.6416322418892969, "learning_rate": 3.53202488668187e-06, "loss": 0.2855, "step": 16378 }, { "epoch": 0.7672740900360706, "grad_norm": 0.5622927028439825, "learning_rate": 3.5318521470977008e-06, "loss": 0.2826, "step": 16379 }, { "epoch": 0.767320935025999, "grad_norm": 0.5528475470809706, "learning_rate": 3.5316794015754824e-06, "loss": 0.279, "step": 16380 }, { "epoch": 0.7673677800159273, "grad_norm": 0.5827939637918326, "learning_rate": 3.531506650116208e-06, "loss": 0.282, "step": 16381 }, { "epoch": 0.7674146250058557, "grad_norm": 0.5692377067066929, "learning_rate": 3.5313338927208717e-06, "loss": 0.2758, "step": 16382 }, { "epoch": 0.7674614699957839, "grad_norm": 0.6616575312037621, "learning_rate": 3.531161129390469e-06, "loss": 0.2992, "step": 16383 }, { "epoch": 0.7675083149857123, "grad_norm": 0.5977106838628159, "learning_rate": 3.530988360125993e-06, "loss": 0.2821, "step": 16384 }, { "epoch": 0.7675551599756406, "grad_norm": 0.5745826612976213, "learning_rate": 3.5308155849284374e-06, "loss": 0.2827, "step": 16385 }, { "epoch": 0.767602004965569, "grad_norm": 0.569042895106781, "learning_rate": 3.530642803798797e-06, "loss": 0.2791, "step": 16386 }, { "epoch": 0.7676488499554973, "grad_norm": 0.6151704803418504, "learning_rate": 3.530470016738068e-06, "loss": 0.2957, "step": 16387 }, { "epoch": 0.7676956949454256, "grad_norm": 0.5799438142355703, "learning_rate": 3.5302972237472415e-06, "loss": 0.2676, "step": 16388 }, { "epoch": 0.7677425399353539, "grad_norm": 0.5704500372076872, "learning_rate": 3.5301244248273136e-06, "loss": 0.2794, "step": 16389 }, { "epoch": 0.7677893849252823, "grad_norm": 0.6418606957610007, "learning_rate": 3.529951619979279e-06, "loss": 0.2829, "step": 16390 }, { "epoch": 0.7678362299152106, "grad_norm": 0.6012021009807131, "learning_rate": 3.5297788092041317e-06, "loss": 0.2679, "step": 16391 }, { "epoch": 0.7678830749051389, "grad_norm": 0.658677408768006, "learning_rate": 3.529605992502866e-06, "loss": 0.2999, "step": 16392 }, { "epoch": 0.7679299198950672, "grad_norm": 0.6128802419358104, "learning_rate": 3.529433169876476e-06, "loss": 0.2745, "step": 16393 }, { "epoch": 0.7679767648849956, "grad_norm": 0.5681413651446828, "learning_rate": 3.529260341325958e-06, "loss": 0.2579, "step": 16394 }, { "epoch": 0.7680236098749239, "grad_norm": 0.5691442627003197, "learning_rate": 3.5290875068523045e-06, "loss": 0.2844, "step": 16395 }, { "epoch": 0.7680704548648523, "grad_norm": 0.6225855062464124, "learning_rate": 3.528914666456511e-06, "loss": 0.2666, "step": 16396 }, { "epoch": 0.7681172998547805, "grad_norm": 0.6141992224849603, "learning_rate": 3.528741820139573e-06, "loss": 0.2809, "step": 16397 }, { "epoch": 0.7681641448447089, "grad_norm": 0.5758669568792395, "learning_rate": 3.528568967902484e-06, "loss": 0.268, "step": 16398 }, { "epoch": 0.7682109898346372, "grad_norm": 0.6351616264815898, "learning_rate": 3.5283961097462396e-06, "loss": 0.3052, "step": 16399 }, { "epoch": 0.7682578348245656, "grad_norm": 0.5566112863116589, "learning_rate": 3.5282232456718334e-06, "loss": 0.2621, "step": 16400 }, { "epoch": 0.7683046798144938, "grad_norm": 0.5992332959139329, "learning_rate": 3.528050375680261e-06, "loss": 0.2955, "step": 16401 }, { "epoch": 0.7683515248044221, "grad_norm": 0.6795386324599028, "learning_rate": 3.527877499772517e-06, "loss": 0.2929, "step": 16402 }, { "epoch": 0.7683983697943505, "grad_norm": 0.5987616286972344, "learning_rate": 3.5277046179495966e-06, "loss": 0.2733, "step": 16403 }, { "epoch": 0.7684452147842789, "grad_norm": 0.525032726302045, "learning_rate": 3.5275317302124945e-06, "loss": 0.2606, "step": 16404 }, { "epoch": 0.7684920597742072, "grad_norm": 0.5933176989949124, "learning_rate": 3.5273588365622046e-06, "loss": 0.2868, "step": 16405 }, { "epoch": 0.7685389047641354, "grad_norm": 0.599860602991573, "learning_rate": 3.5271859369997243e-06, "loss": 0.2839, "step": 16406 }, { "epoch": 0.7685857497540638, "grad_norm": 0.5966674568268427, "learning_rate": 3.527013031526046e-06, "loss": 0.2805, "step": 16407 }, { "epoch": 0.7686325947439921, "grad_norm": 0.6190993698495129, "learning_rate": 3.5268401201421665e-06, "loss": 0.2994, "step": 16408 }, { "epoch": 0.7686794397339205, "grad_norm": 0.6215438047838941, "learning_rate": 3.52666720284908e-06, "loss": 0.2695, "step": 16409 }, { "epoch": 0.7687262847238487, "grad_norm": 0.5386517514416991, "learning_rate": 3.526494279647781e-06, "loss": 0.2869, "step": 16410 }, { "epoch": 0.7687731297137771, "grad_norm": 0.540540159161526, "learning_rate": 3.5263213505392662e-06, "loss": 0.2696, "step": 16411 }, { "epoch": 0.7688199747037054, "grad_norm": 0.5643534846975701, "learning_rate": 3.52614841552453e-06, "loss": 0.2852, "step": 16412 }, { "epoch": 0.7688668196936338, "grad_norm": 0.5948405716624174, "learning_rate": 3.525975474604567e-06, "loss": 0.2867, "step": 16413 }, { "epoch": 0.7689136646835621, "grad_norm": 0.615235784275659, "learning_rate": 3.5258025277803737e-06, "loss": 0.2753, "step": 16414 }, { "epoch": 0.7689605096734904, "grad_norm": 0.5726044455482401, "learning_rate": 3.525629575052945e-06, "loss": 0.2789, "step": 16415 }, { "epoch": 0.7690073546634187, "grad_norm": 0.6170511465691679, "learning_rate": 3.525456616423275e-06, "loss": 0.2789, "step": 16416 }, { "epoch": 0.7690541996533471, "grad_norm": 0.6174422981526884, "learning_rate": 3.52528365189236e-06, "loss": 0.2803, "step": 16417 }, { "epoch": 0.7691010446432754, "grad_norm": 0.593660020960463, "learning_rate": 3.525110681461196e-06, "loss": 0.2774, "step": 16418 }, { "epoch": 0.7691478896332037, "grad_norm": 0.6232294109697837, "learning_rate": 3.524937705130777e-06, "loss": 0.3132, "step": 16419 }, { "epoch": 0.769194734623132, "grad_norm": 0.6206400931232496, "learning_rate": 3.524764722902099e-06, "loss": 0.279, "step": 16420 }, { "epoch": 0.7692415796130604, "grad_norm": 0.5769573352394491, "learning_rate": 3.5245917347761583e-06, "loss": 0.267, "step": 16421 }, { "epoch": 0.7692884246029887, "grad_norm": 0.6259405809393971, "learning_rate": 3.5244187407539494e-06, "loss": 0.2685, "step": 16422 }, { "epoch": 0.7693352695929171, "grad_norm": 0.594997837416354, "learning_rate": 3.524245740836468e-06, "loss": 0.2709, "step": 16423 }, { "epoch": 0.7693821145828453, "grad_norm": 0.6389897375466269, "learning_rate": 3.5240727350247094e-06, "loss": 0.3012, "step": 16424 }, { "epoch": 0.7694289595727737, "grad_norm": 0.6151748076582054, "learning_rate": 3.5238997233196696e-06, "loss": 0.2744, "step": 16425 }, { "epoch": 0.769475804562702, "grad_norm": 0.5912558861454853, "learning_rate": 3.523726705722345e-06, "loss": 0.2628, "step": 16426 }, { "epoch": 0.7695226495526304, "grad_norm": 0.6297498115022246, "learning_rate": 3.52355368223373e-06, "loss": 0.2737, "step": 16427 }, { "epoch": 0.7695694945425586, "grad_norm": 0.6244592688249363, "learning_rate": 3.5233806528548207e-06, "loss": 0.294, "step": 16428 }, { "epoch": 0.769616339532487, "grad_norm": 0.5752794627324838, "learning_rate": 3.5232076175866135e-06, "loss": 0.2634, "step": 16429 }, { "epoch": 0.7696631845224153, "grad_norm": 0.6069805162347135, "learning_rate": 3.523034576430104e-06, "loss": 0.282, "step": 16430 }, { "epoch": 0.7697100295123437, "grad_norm": 0.6742412397495656, "learning_rate": 3.5228615293862867e-06, "loss": 0.2777, "step": 16431 }, { "epoch": 0.769756874502272, "grad_norm": 0.5752964270540746, "learning_rate": 3.5226884764561585e-06, "loss": 0.2869, "step": 16432 }, { "epoch": 0.7698037194922003, "grad_norm": 0.592035928875969, "learning_rate": 3.522515417640716e-06, "loss": 0.2881, "step": 16433 }, { "epoch": 0.7698505644821286, "grad_norm": 0.5680166057638248, "learning_rate": 3.5223423529409533e-06, "loss": 0.29, "step": 16434 }, { "epoch": 0.769897409472057, "grad_norm": 0.5752523094774872, "learning_rate": 3.5221692823578673e-06, "loss": 0.2709, "step": 16435 }, { "epoch": 0.7699442544619853, "grad_norm": 0.6230582499249202, "learning_rate": 3.521996205892455e-06, "loss": 0.2887, "step": 16436 }, { "epoch": 0.7699910994519136, "grad_norm": 0.586504104511945, "learning_rate": 3.52182312354571e-06, "loss": 0.2798, "step": 16437 }, { "epoch": 0.7700379444418419, "grad_norm": 0.5971872008866026, "learning_rate": 3.5216500353186307e-06, "loss": 0.2986, "step": 16438 }, { "epoch": 0.7700847894317703, "grad_norm": 0.5199272231933836, "learning_rate": 3.521476941212212e-06, "loss": 0.277, "step": 16439 }, { "epoch": 0.7701316344216986, "grad_norm": 0.61042179982888, "learning_rate": 3.5213038412274503e-06, "loss": 0.2965, "step": 16440 }, { "epoch": 0.770178479411627, "grad_norm": 0.6524081646740642, "learning_rate": 3.5211307353653417e-06, "loss": 0.3096, "step": 16441 }, { "epoch": 0.7702253244015552, "grad_norm": 0.5419205213239959, "learning_rate": 3.5209576236268827e-06, "loss": 0.2607, "step": 16442 }, { "epoch": 0.7702721693914836, "grad_norm": 0.571450982865234, "learning_rate": 3.5207845060130686e-06, "loss": 0.2698, "step": 16443 }, { "epoch": 0.7703190143814119, "grad_norm": 0.6186229917170529, "learning_rate": 3.520611382524896e-06, "loss": 0.2826, "step": 16444 }, { "epoch": 0.7703658593713403, "grad_norm": 0.595947387855867, "learning_rate": 3.5204382531633625e-06, "loss": 0.2708, "step": 16445 }, { "epoch": 0.7704127043612685, "grad_norm": 0.5632860730359772, "learning_rate": 3.5202651179294624e-06, "loss": 0.2688, "step": 16446 }, { "epoch": 0.7704595493511969, "grad_norm": 0.5820635411863626, "learning_rate": 3.5200919768241932e-06, "loss": 0.2773, "step": 16447 }, { "epoch": 0.7705063943411252, "grad_norm": 0.5787032756453376, "learning_rate": 3.5199188298485516e-06, "loss": 0.2745, "step": 16448 }, { "epoch": 0.7705532393310536, "grad_norm": 0.6371859443599076, "learning_rate": 3.5197456770035325e-06, "loss": 0.275, "step": 16449 }, { "epoch": 0.7706000843209819, "grad_norm": 0.5556028071957713, "learning_rate": 3.519572518290134e-06, "loss": 0.2744, "step": 16450 }, { "epoch": 0.7706469293109102, "grad_norm": 0.5372628048120438, "learning_rate": 3.5193993537093518e-06, "loss": 0.2709, "step": 16451 }, { "epoch": 0.7706937743008385, "grad_norm": 0.5681890811516791, "learning_rate": 3.519226183262182e-06, "loss": 0.2709, "step": 16452 }, { "epoch": 0.7707406192907669, "grad_norm": 0.5796402139168091, "learning_rate": 3.5190530069496225e-06, "loss": 0.2768, "step": 16453 }, { "epoch": 0.7707874642806952, "grad_norm": 0.6003108529568775, "learning_rate": 3.518879824772669e-06, "loss": 0.2888, "step": 16454 }, { "epoch": 0.7708343092706235, "grad_norm": 0.6370785903440195, "learning_rate": 3.5187066367323174e-06, "loss": 0.2739, "step": 16455 }, { "epoch": 0.7708811542605518, "grad_norm": 0.596165847777639, "learning_rate": 3.5185334428295657e-06, "loss": 0.2868, "step": 16456 }, { "epoch": 0.7709279992504802, "grad_norm": 0.5555255216589526, "learning_rate": 3.518360243065411e-06, "loss": 0.2694, "step": 16457 }, { "epoch": 0.7709748442404085, "grad_norm": 0.6077875903103155, "learning_rate": 3.518187037440848e-06, "loss": 0.2823, "step": 16458 }, { "epoch": 0.7710216892303369, "grad_norm": 0.5660900811822533, "learning_rate": 3.518013825956874e-06, "loss": 0.2756, "step": 16459 }, { "epoch": 0.7710685342202651, "grad_norm": 0.6464626481536929, "learning_rate": 3.5178406086144868e-06, "loss": 0.2803, "step": 16460 }, { "epoch": 0.7711153792101935, "grad_norm": 0.6050678504068145, "learning_rate": 3.517667385414683e-06, "loss": 0.2874, "step": 16461 }, { "epoch": 0.7711622242001218, "grad_norm": 0.5276538300412061, "learning_rate": 3.5174941563584586e-06, "loss": 0.2517, "step": 16462 }, { "epoch": 0.7712090691900502, "grad_norm": 0.5854694329072371, "learning_rate": 3.5173209214468113e-06, "loss": 0.2722, "step": 16463 }, { "epoch": 0.7712559141799784, "grad_norm": 0.6165355291584801, "learning_rate": 3.5171476806807387e-06, "loss": 0.3018, "step": 16464 }, { "epoch": 0.7713027591699068, "grad_norm": 0.596719645987293, "learning_rate": 3.516974434061236e-06, "loss": 0.2836, "step": 16465 }, { "epoch": 0.7713496041598351, "grad_norm": 0.5454860582187647, "learning_rate": 3.516801181589301e-06, "loss": 0.2582, "step": 16466 }, { "epoch": 0.7713964491497635, "grad_norm": 0.6460168100228392, "learning_rate": 3.5166279232659308e-06, "loss": 0.2985, "step": 16467 }, { "epoch": 0.7714432941396918, "grad_norm": 0.6013310159594192, "learning_rate": 3.516454659092123e-06, "loss": 0.2874, "step": 16468 }, { "epoch": 0.7714901391296201, "grad_norm": 0.6169079228390374, "learning_rate": 3.516281389068873e-06, "loss": 0.2837, "step": 16469 }, { "epoch": 0.7715369841195484, "grad_norm": 0.5930521027936645, "learning_rate": 3.5161081131971797e-06, "loss": 0.2611, "step": 16470 }, { "epoch": 0.7715838291094768, "grad_norm": 0.6031046284809486, "learning_rate": 3.51593483147804e-06, "loss": 0.2776, "step": 16471 }, { "epoch": 0.7716306740994051, "grad_norm": 0.6068701926232656, "learning_rate": 3.51576154391245e-06, "loss": 0.2996, "step": 16472 }, { "epoch": 0.7716775190893334, "grad_norm": 0.5620808481504113, "learning_rate": 3.515588250501408e-06, "loss": 0.2701, "step": 16473 }, { "epoch": 0.7717243640792617, "grad_norm": 0.5311173874619576, "learning_rate": 3.5154149512459114e-06, "loss": 0.2574, "step": 16474 }, { "epoch": 0.7717712090691901, "grad_norm": 0.5853638699283786, "learning_rate": 3.515241646146956e-06, "loss": 0.2758, "step": 16475 }, { "epoch": 0.7718180540591184, "grad_norm": 0.5781086025487135, "learning_rate": 3.51506833520554e-06, "loss": 0.2762, "step": 16476 }, { "epoch": 0.7718648990490468, "grad_norm": 0.5810955244524179, "learning_rate": 3.5148950184226618e-06, "loss": 0.2703, "step": 16477 }, { "epoch": 0.771911744038975, "grad_norm": 0.5823454028191644, "learning_rate": 3.514721695799317e-06, "loss": 0.2874, "step": 16478 }, { "epoch": 0.7719585890289034, "grad_norm": 0.5966964359250042, "learning_rate": 3.5145483673365044e-06, "loss": 0.2851, "step": 16479 }, { "epoch": 0.7720054340188317, "grad_norm": 0.5869735131921321, "learning_rate": 3.5143750330352206e-06, "loss": 0.2832, "step": 16480 }, { "epoch": 0.7720522790087601, "grad_norm": 0.5890110983286754, "learning_rate": 3.514201692896464e-06, "loss": 0.2673, "step": 16481 }, { "epoch": 0.7720991239986883, "grad_norm": 0.5902304535325319, "learning_rate": 3.514028346921231e-06, "loss": 0.2604, "step": 16482 }, { "epoch": 0.7721459689886166, "grad_norm": 0.5329825577510137, "learning_rate": 3.5138549951105194e-06, "loss": 0.2604, "step": 16483 }, { "epoch": 0.772192813978545, "grad_norm": 0.6092107343676199, "learning_rate": 3.513681637465328e-06, "loss": 0.2795, "step": 16484 }, { "epoch": 0.7722396589684734, "grad_norm": 0.5985534316100916, "learning_rate": 3.5135082739866534e-06, "loss": 0.2854, "step": 16485 }, { "epoch": 0.7722865039584017, "grad_norm": 0.6121451588340425, "learning_rate": 3.5133349046754926e-06, "loss": 0.2668, "step": 16486 }, { "epoch": 0.77233334894833, "grad_norm": 0.5823865580891349, "learning_rate": 3.513161529532845e-06, "loss": 0.2835, "step": 16487 }, { "epoch": 0.7723801939382583, "grad_norm": 0.6023836045354377, "learning_rate": 3.5129881485597074e-06, "loss": 0.2738, "step": 16488 }, { "epoch": 0.7724270389281866, "grad_norm": 0.5906708013538055, "learning_rate": 3.5128147617570773e-06, "loss": 0.29, "step": 16489 }, { "epoch": 0.772473883918115, "grad_norm": 0.5412145699036905, "learning_rate": 3.5126413691259526e-06, "loss": 0.2765, "step": 16490 }, { "epoch": 0.7725207289080432, "grad_norm": 0.6996882861406447, "learning_rate": 3.5124679706673314e-06, "loss": 0.2692, "step": 16491 }, { "epoch": 0.7725675738979716, "grad_norm": 0.598931488852824, "learning_rate": 3.5122945663822124e-06, "loss": 0.2871, "step": 16492 }, { "epoch": 0.7726144188879, "grad_norm": 0.576010127295319, "learning_rate": 3.5121211562715917e-06, "loss": 0.2633, "step": 16493 }, { "epoch": 0.7726612638778283, "grad_norm": 0.5553825613257886, "learning_rate": 3.5119477403364683e-06, "loss": 0.2614, "step": 16494 }, { "epoch": 0.7727081088677566, "grad_norm": 0.5827692319997153, "learning_rate": 3.5117743185778397e-06, "loss": 0.2701, "step": 16495 }, { "epoch": 0.7727549538576849, "grad_norm": 0.6340220551100486, "learning_rate": 3.511600890996705e-06, "loss": 0.2907, "step": 16496 }, { "epoch": 0.7728017988476132, "grad_norm": 0.5530224158833541, "learning_rate": 3.511427457594061e-06, "loss": 0.2699, "step": 16497 }, { "epoch": 0.7728486438375416, "grad_norm": 0.5612742177811106, "learning_rate": 3.511254018370906e-06, "loss": 0.2673, "step": 16498 }, { "epoch": 0.77289548882747, "grad_norm": 0.6380376640398373, "learning_rate": 3.5110805733282387e-06, "loss": 0.2931, "step": 16499 }, { "epoch": 0.7729423338173982, "grad_norm": 0.6314806583998805, "learning_rate": 3.5109071224670564e-06, "loss": 0.3059, "step": 16500 }, { "epoch": 0.7729891788073265, "grad_norm": 0.6547499132959987, "learning_rate": 3.510733665788358e-06, "loss": 0.2937, "step": 16501 }, { "epoch": 0.7730360237972549, "grad_norm": 0.6226671899703006, "learning_rate": 3.5105602032931412e-06, "loss": 0.3128, "step": 16502 }, { "epoch": 0.7730828687871832, "grad_norm": 0.5778389885082471, "learning_rate": 3.510386734982405e-06, "loss": 0.3017, "step": 16503 }, { "epoch": 0.7731297137771115, "grad_norm": 0.610294497331676, "learning_rate": 3.5102132608571458e-06, "loss": 0.2923, "step": 16504 }, { "epoch": 0.7731765587670398, "grad_norm": 0.6201082700413421, "learning_rate": 3.510039780918364e-06, "loss": 0.277, "step": 16505 }, { "epoch": 0.7732234037569682, "grad_norm": 0.5665332787505509, "learning_rate": 3.509866295167058e-06, "loss": 0.2806, "step": 16506 }, { "epoch": 0.7732702487468965, "grad_norm": 0.6370171172086464, "learning_rate": 3.5096928036042236e-06, "loss": 0.2793, "step": 16507 }, { "epoch": 0.7733170937368249, "grad_norm": 0.5497112525704949, "learning_rate": 3.5095193062308618e-06, "loss": 0.2537, "step": 16508 }, { "epoch": 0.7733639387267531, "grad_norm": 0.6271423600571804, "learning_rate": 3.50934580304797e-06, "loss": 0.3075, "step": 16509 }, { "epoch": 0.7734107837166815, "grad_norm": 0.5681704667246184, "learning_rate": 3.509172294056547e-06, "loss": 0.2607, "step": 16510 }, { "epoch": 0.7734576287066098, "grad_norm": 0.595803616956932, "learning_rate": 3.508998779257591e-06, "loss": 0.2968, "step": 16511 }, { "epoch": 0.7735044736965382, "grad_norm": 0.6092768960825123, "learning_rate": 3.5088252586521003e-06, "loss": 0.2826, "step": 16512 }, { "epoch": 0.7735513186864664, "grad_norm": 0.5383455491729836, "learning_rate": 3.5086517322410747e-06, "loss": 0.2663, "step": 16513 }, { "epoch": 0.7735981636763948, "grad_norm": 0.6183612354426853, "learning_rate": 3.5084782000255106e-06, "loss": 0.3063, "step": 16514 }, { "epoch": 0.7736450086663231, "grad_norm": 0.6860745333149705, "learning_rate": 3.5083046620064088e-06, "loss": 0.2846, "step": 16515 }, { "epoch": 0.7736918536562515, "grad_norm": 0.576809368945971, "learning_rate": 3.5081311181847666e-06, "loss": 0.2708, "step": 16516 }, { "epoch": 0.7737386986461798, "grad_norm": 0.5682342202065814, "learning_rate": 3.5079575685615836e-06, "loss": 0.2791, "step": 16517 }, { "epoch": 0.7737855436361081, "grad_norm": 0.5495864017796284, "learning_rate": 3.507784013137858e-06, "loss": 0.2629, "step": 16518 }, { "epoch": 0.7738323886260364, "grad_norm": 0.6091874506589605, "learning_rate": 3.507610451914589e-06, "loss": 0.2901, "step": 16519 }, { "epoch": 0.7738792336159648, "grad_norm": 0.5486794036569, "learning_rate": 3.507436884892774e-06, "loss": 0.2684, "step": 16520 }, { "epoch": 0.7739260786058931, "grad_norm": 0.6031608900383832, "learning_rate": 3.5072633120734146e-06, "loss": 0.2744, "step": 16521 }, { "epoch": 0.7739729235958214, "grad_norm": 0.6055653673999041, "learning_rate": 3.5070897334575064e-06, "loss": 0.2768, "step": 16522 }, { "epoch": 0.7740197685857497, "grad_norm": 0.5455068192723858, "learning_rate": 3.5069161490460513e-06, "loss": 0.2812, "step": 16523 }, { "epoch": 0.7740666135756781, "grad_norm": 0.6462148255631597, "learning_rate": 3.506742558840046e-06, "loss": 0.2827, "step": 16524 }, { "epoch": 0.7741134585656064, "grad_norm": 0.6315707931824341, "learning_rate": 3.5065689628404903e-06, "loss": 0.2807, "step": 16525 }, { "epoch": 0.7741603035555348, "grad_norm": 0.630390733534809, "learning_rate": 3.506395361048383e-06, "loss": 0.2595, "step": 16526 }, { "epoch": 0.774207148545463, "grad_norm": 0.5338913574118794, "learning_rate": 3.5062217534647246e-06, "loss": 0.261, "step": 16527 }, { "epoch": 0.7742539935353914, "grad_norm": 0.5914174418431123, "learning_rate": 3.5060481400905115e-06, "loss": 0.2893, "step": 16528 }, { "epoch": 0.7743008385253197, "grad_norm": 0.584170328082794, "learning_rate": 3.5058745209267448e-06, "loss": 0.2735, "step": 16529 }, { "epoch": 0.7743476835152481, "grad_norm": 0.616634188395344, "learning_rate": 3.505700895974423e-06, "loss": 0.2831, "step": 16530 }, { "epoch": 0.7743945285051763, "grad_norm": 0.5806344005602668, "learning_rate": 3.505527265234546e-06, "loss": 0.2718, "step": 16531 }, { "epoch": 0.7744413734951047, "grad_norm": 0.6117725493085172, "learning_rate": 3.5053536287081113e-06, "loss": 0.3092, "step": 16532 }, { "epoch": 0.774488218485033, "grad_norm": 0.5477951664217608, "learning_rate": 3.5051799863961187e-06, "loss": 0.2696, "step": 16533 }, { "epoch": 0.7745350634749614, "grad_norm": 0.5370030767914572, "learning_rate": 3.5050063382995695e-06, "loss": 0.2637, "step": 16534 }, { "epoch": 0.7745819084648897, "grad_norm": 0.5761732378942627, "learning_rate": 3.5048326844194603e-06, "loss": 0.281, "step": 16535 }, { "epoch": 0.774628753454818, "grad_norm": 0.605937289960637, "learning_rate": 3.504659024756792e-06, "loss": 0.2918, "step": 16536 }, { "epoch": 0.7746755984447463, "grad_norm": 0.5869385269097884, "learning_rate": 3.5044853593125632e-06, "loss": 0.2895, "step": 16537 }, { "epoch": 0.7747224434346747, "grad_norm": 0.5856123105637057, "learning_rate": 3.5043116880877743e-06, "loss": 0.2859, "step": 16538 }, { "epoch": 0.774769288424603, "grad_norm": 0.5856262063298812, "learning_rate": 3.5041380110834234e-06, "loss": 0.2805, "step": 16539 }, { "epoch": 0.7748161334145313, "grad_norm": 0.5820342848307632, "learning_rate": 3.503964328300511e-06, "loss": 0.2826, "step": 16540 }, { "epoch": 0.7748629784044596, "grad_norm": 0.6454689520790907, "learning_rate": 3.503790639740036e-06, "loss": 0.2845, "step": 16541 }, { "epoch": 0.774909823394388, "grad_norm": 0.5118910048703823, "learning_rate": 3.5036169454029985e-06, "loss": 0.2483, "step": 16542 }, { "epoch": 0.7749566683843163, "grad_norm": 0.5632063792219165, "learning_rate": 3.503443245290397e-06, "loss": 0.2805, "step": 16543 }, { "epoch": 0.7750035133742447, "grad_norm": 0.6144397479058125, "learning_rate": 3.5032695394032327e-06, "loss": 0.2889, "step": 16544 }, { "epoch": 0.7750503583641729, "grad_norm": 0.6070583860495714, "learning_rate": 3.5030958277425038e-06, "loss": 0.2703, "step": 16545 }, { "epoch": 0.7750972033541013, "grad_norm": 0.5622984137700671, "learning_rate": 3.502922110309211e-06, "loss": 0.2667, "step": 16546 }, { "epoch": 0.7751440483440296, "grad_norm": 0.6320633727675214, "learning_rate": 3.5027483871043526e-06, "loss": 0.2977, "step": 16547 }, { "epoch": 0.775190893333958, "grad_norm": 0.641789200306454, "learning_rate": 3.5025746581289298e-06, "loss": 0.306, "step": 16548 }, { "epoch": 0.7752377383238862, "grad_norm": 0.5882145880923696, "learning_rate": 3.502400923383943e-06, "loss": 0.2719, "step": 16549 }, { "epoch": 0.7752845833138146, "grad_norm": 0.6251774589570127, "learning_rate": 3.5022271828703893e-06, "loss": 0.2793, "step": 16550 }, { "epoch": 0.7753314283037429, "grad_norm": 0.5735702046041569, "learning_rate": 3.5020534365892706e-06, "loss": 0.2672, "step": 16551 }, { "epoch": 0.7753782732936713, "grad_norm": 0.5757144085657229, "learning_rate": 3.501879684541586e-06, "loss": 0.2905, "step": 16552 }, { "epoch": 0.7754251182835996, "grad_norm": 0.6039261137639467, "learning_rate": 3.5017059267283352e-06, "loss": 0.2804, "step": 16553 }, { "epoch": 0.7754719632735279, "grad_norm": 0.6533018682485463, "learning_rate": 3.5015321631505196e-06, "loss": 0.3006, "step": 16554 }, { "epoch": 0.7755188082634562, "grad_norm": 0.5908341831266262, "learning_rate": 3.5013583938091378e-06, "loss": 0.2876, "step": 16555 }, { "epoch": 0.7755656532533846, "grad_norm": 0.5699589754757304, "learning_rate": 3.50118461870519e-06, "loss": 0.291, "step": 16556 }, { "epoch": 0.7756124982433129, "grad_norm": 0.6101489447046725, "learning_rate": 3.5010108378396755e-06, "loss": 0.2903, "step": 16557 }, { "epoch": 0.7756593432332411, "grad_norm": 0.6386701676307922, "learning_rate": 3.5008370512135966e-06, "loss": 0.2748, "step": 16558 }, { "epoch": 0.7757061882231695, "grad_norm": 0.647887061360918, "learning_rate": 3.5006632588279515e-06, "loss": 0.3056, "step": 16559 }, { "epoch": 0.7757530332130979, "grad_norm": 0.642347773967654, "learning_rate": 3.5004894606837407e-06, "loss": 0.2982, "step": 16560 }, { "epoch": 0.7757998782030262, "grad_norm": 0.5840254366789117, "learning_rate": 3.5003156567819645e-06, "loss": 0.2784, "step": 16561 }, { "epoch": 0.7758467231929546, "grad_norm": 0.5845710994050226, "learning_rate": 3.5001418471236236e-06, "loss": 0.2648, "step": 16562 }, { "epoch": 0.7758935681828828, "grad_norm": 0.6019344411445761, "learning_rate": 3.499968031709717e-06, "loss": 0.2837, "step": 16563 }, { "epoch": 0.7759404131728111, "grad_norm": 0.5961202606475619, "learning_rate": 3.4997942105412463e-06, "loss": 0.2919, "step": 16564 }, { "epoch": 0.7759872581627395, "grad_norm": 0.5878986182450224, "learning_rate": 3.499620383619211e-06, "loss": 0.2877, "step": 16565 }, { "epoch": 0.7760341031526679, "grad_norm": 0.5665555195139172, "learning_rate": 3.499446550944612e-06, "loss": 0.2569, "step": 16566 }, { "epoch": 0.7760809481425961, "grad_norm": 0.5992317647953181, "learning_rate": 3.4992727125184488e-06, "loss": 0.2785, "step": 16567 }, { "epoch": 0.7761277931325244, "grad_norm": 0.5630768853046781, "learning_rate": 3.4990988683417225e-06, "loss": 0.2708, "step": 16568 }, { "epoch": 0.7761746381224528, "grad_norm": 0.6102387893741985, "learning_rate": 3.4989250184154332e-06, "loss": 0.3159, "step": 16569 }, { "epoch": 0.7762214831123811, "grad_norm": 0.5714615000031549, "learning_rate": 3.4987511627405816e-06, "loss": 0.2835, "step": 16570 }, { "epoch": 0.7762683281023095, "grad_norm": 0.5969855269170447, "learning_rate": 3.498577301318168e-06, "loss": 0.2856, "step": 16571 }, { "epoch": 0.7763151730922377, "grad_norm": 0.5649053022545322, "learning_rate": 3.4984034341491936e-06, "loss": 0.2518, "step": 16572 }, { "epoch": 0.7763620180821661, "grad_norm": 0.6058452796776932, "learning_rate": 3.498229561234658e-06, "loss": 0.2864, "step": 16573 }, { "epoch": 0.7764088630720944, "grad_norm": 0.5550433374730768, "learning_rate": 3.4980556825755614e-06, "loss": 0.2698, "step": 16574 }, { "epoch": 0.7764557080620228, "grad_norm": 0.5664706357350204, "learning_rate": 3.497881798172906e-06, "loss": 0.285, "step": 16575 }, { "epoch": 0.776502553051951, "grad_norm": 0.5626332573776135, "learning_rate": 3.497707908027692e-06, "loss": 0.2874, "step": 16576 }, { "epoch": 0.7765493980418794, "grad_norm": 0.5976855421360264, "learning_rate": 3.4975340121409184e-06, "loss": 0.2867, "step": 16577 }, { "epoch": 0.7765962430318077, "grad_norm": 0.5597900657785451, "learning_rate": 3.497360110513588e-06, "loss": 0.2742, "step": 16578 }, { "epoch": 0.7766430880217361, "grad_norm": 0.6203220987046523, "learning_rate": 3.4971862031467012e-06, "loss": 0.307, "step": 16579 }, { "epoch": 0.7766899330116644, "grad_norm": 0.5888426206473232, "learning_rate": 3.4970122900412586e-06, "loss": 0.2887, "step": 16580 }, { "epoch": 0.7767367780015927, "grad_norm": 0.6309702372196277, "learning_rate": 3.49683837119826e-06, "loss": 0.2744, "step": 16581 }, { "epoch": 0.776783622991521, "grad_norm": 0.5875587554350893, "learning_rate": 3.496664446618708e-06, "loss": 0.265, "step": 16582 }, { "epoch": 0.7768304679814494, "grad_norm": 0.5618601606455808, "learning_rate": 3.4964905163036022e-06, "loss": 0.2599, "step": 16583 }, { "epoch": 0.7768773129713777, "grad_norm": 0.6027769907469195, "learning_rate": 3.4963165802539433e-06, "loss": 0.2618, "step": 16584 }, { "epoch": 0.776924157961306, "grad_norm": 0.6095988118822128, "learning_rate": 3.496142638470734e-06, "loss": 0.2779, "step": 16585 }, { "epoch": 0.7769710029512343, "grad_norm": 0.5974946780089528, "learning_rate": 3.4959686909549735e-06, "loss": 0.31, "step": 16586 }, { "epoch": 0.7770178479411627, "grad_norm": 0.582457728004303, "learning_rate": 3.495794737707664e-06, "loss": 0.2747, "step": 16587 }, { "epoch": 0.777064692931091, "grad_norm": 0.54048189483655, "learning_rate": 3.495620778729806e-06, "loss": 0.2641, "step": 16588 }, { "epoch": 0.7771115379210194, "grad_norm": 0.5699866057363818, "learning_rate": 3.4954468140223996e-06, "loss": 0.2612, "step": 16589 }, { "epoch": 0.7771583829109476, "grad_norm": 0.5808089451446475, "learning_rate": 3.495272843586448e-06, "loss": 0.2738, "step": 16590 }, { "epoch": 0.777205227900876, "grad_norm": 0.6360548577967123, "learning_rate": 3.4950988674229515e-06, "loss": 0.2835, "step": 16591 }, { "epoch": 0.7772520728908043, "grad_norm": 0.5809934306223346, "learning_rate": 3.4949248855329105e-06, "loss": 0.2694, "step": 16592 }, { "epoch": 0.7772989178807327, "grad_norm": 0.57927468455845, "learning_rate": 3.4947508979173274e-06, "loss": 0.3016, "step": 16593 }, { "epoch": 0.7773457628706609, "grad_norm": 0.6187634263809573, "learning_rate": 3.4945769045772026e-06, "loss": 0.2785, "step": 16594 }, { "epoch": 0.7773926078605893, "grad_norm": 0.6113101631228196, "learning_rate": 3.4944029055135377e-06, "loss": 0.2865, "step": 16595 }, { "epoch": 0.7774394528505176, "grad_norm": 0.5427638027805995, "learning_rate": 3.4942289007273334e-06, "loss": 0.2659, "step": 16596 }, { "epoch": 0.777486297840446, "grad_norm": 0.5357434981231721, "learning_rate": 3.494054890219593e-06, "loss": 0.27, "step": 16597 }, { "epoch": 0.7775331428303743, "grad_norm": 0.5304247963751721, "learning_rate": 3.493880873991316e-06, "loss": 0.2665, "step": 16598 }, { "epoch": 0.7775799878203026, "grad_norm": 0.597814896072471, "learning_rate": 3.4937068520435036e-06, "loss": 0.2967, "step": 16599 }, { "epoch": 0.7776268328102309, "grad_norm": 0.5931069289722285, "learning_rate": 3.4935328243771594e-06, "loss": 0.2766, "step": 16600 }, { "epoch": 0.7776736778001593, "grad_norm": 0.5898588647967847, "learning_rate": 3.4933587909932826e-06, "loss": 0.2848, "step": 16601 }, { "epoch": 0.7777205227900876, "grad_norm": 0.556725524501589, "learning_rate": 3.4931847518928753e-06, "loss": 0.2765, "step": 16602 }, { "epoch": 0.7777673677800159, "grad_norm": 0.5839256482580787, "learning_rate": 3.4930107070769396e-06, "loss": 0.2921, "step": 16603 }, { "epoch": 0.7778142127699442, "grad_norm": 0.6107569922926096, "learning_rate": 3.492836656546478e-06, "loss": 0.2851, "step": 16604 }, { "epoch": 0.7778610577598726, "grad_norm": 0.5735380404779781, "learning_rate": 3.4926626003024898e-06, "loss": 0.2832, "step": 16605 }, { "epoch": 0.7779079027498009, "grad_norm": 0.5947775751106327, "learning_rate": 3.492488538345978e-06, "loss": 0.2817, "step": 16606 }, { "epoch": 0.7779547477397293, "grad_norm": 0.5800746911437085, "learning_rate": 3.492314470677944e-06, "loss": 0.2837, "step": 16607 }, { "epoch": 0.7780015927296575, "grad_norm": 0.5677245741278657, "learning_rate": 3.4921403972993905e-06, "loss": 0.2768, "step": 16608 }, { "epoch": 0.7780484377195859, "grad_norm": 0.5885483767158499, "learning_rate": 3.491966318211317e-06, "loss": 0.2976, "step": 16609 }, { "epoch": 0.7780952827095142, "grad_norm": 0.5547638093564632, "learning_rate": 3.491792233414728e-06, "loss": 0.2809, "step": 16610 }, { "epoch": 0.7781421276994426, "grad_norm": 0.5727051438034968, "learning_rate": 3.4916181429106232e-06, "loss": 0.2801, "step": 16611 }, { "epoch": 0.7781889726893708, "grad_norm": 0.6219581327543838, "learning_rate": 3.4914440467000054e-06, "loss": 0.2905, "step": 16612 }, { "epoch": 0.7782358176792992, "grad_norm": 0.5437306381854433, "learning_rate": 3.4912699447838766e-06, "loss": 0.3053, "step": 16613 }, { "epoch": 0.7782826626692275, "grad_norm": 0.6194789816604632, "learning_rate": 3.4910958371632384e-06, "loss": 0.2814, "step": 16614 }, { "epoch": 0.7783295076591559, "grad_norm": 0.654763246519841, "learning_rate": 3.4909217238390925e-06, "loss": 0.3011, "step": 16615 }, { "epoch": 0.7783763526490842, "grad_norm": 0.5416751060857646, "learning_rate": 3.490747604812441e-06, "loss": 0.253, "step": 16616 }, { "epoch": 0.7784231976390125, "grad_norm": 0.567771801604999, "learning_rate": 3.490573480084286e-06, "loss": 0.27, "step": 16617 }, { "epoch": 0.7784700426289408, "grad_norm": 0.5715374185622426, "learning_rate": 3.49039934965563e-06, "loss": 0.2871, "step": 16618 }, { "epoch": 0.7785168876188692, "grad_norm": 0.5589313859948629, "learning_rate": 3.4902252135274745e-06, "loss": 0.2669, "step": 16619 }, { "epoch": 0.7785637326087975, "grad_norm": 0.6013759870625808, "learning_rate": 3.490051071700822e-06, "loss": 0.2641, "step": 16620 }, { "epoch": 0.7786105775987258, "grad_norm": 0.6410313673188072, "learning_rate": 3.489876924176674e-06, "loss": 0.315, "step": 16621 }, { "epoch": 0.7786574225886541, "grad_norm": 0.5687475566603611, "learning_rate": 3.4897027709560333e-06, "loss": 0.2581, "step": 16622 }, { "epoch": 0.7787042675785825, "grad_norm": 0.6046812125234233, "learning_rate": 3.489528612039902e-06, "loss": 0.2776, "step": 16623 }, { "epoch": 0.7787511125685108, "grad_norm": 0.5879706565898778, "learning_rate": 3.489354447429282e-06, "loss": 0.2995, "step": 16624 }, { "epoch": 0.7787979575584392, "grad_norm": 0.568239654031859, "learning_rate": 3.489180277125176e-06, "loss": 0.278, "step": 16625 }, { "epoch": 0.7788448025483674, "grad_norm": 0.6128775863318255, "learning_rate": 3.489006101128586e-06, "loss": 0.2655, "step": 16626 }, { "epoch": 0.7788916475382958, "grad_norm": 0.5160124179243598, "learning_rate": 3.488831919440514e-06, "loss": 0.2657, "step": 16627 }, { "epoch": 0.7789384925282241, "grad_norm": 0.6307537517860972, "learning_rate": 3.4886577320619636e-06, "loss": 0.3042, "step": 16628 }, { "epoch": 0.7789853375181525, "grad_norm": 0.5746369092667861, "learning_rate": 3.4884835389939363e-06, "loss": 0.257, "step": 16629 }, { "epoch": 0.7790321825080807, "grad_norm": 0.5769985701926416, "learning_rate": 3.4883093402374345e-06, "loss": 0.2819, "step": 16630 }, { "epoch": 0.7790790274980091, "grad_norm": 0.600568444278612, "learning_rate": 3.4881351357934613e-06, "loss": 0.2893, "step": 16631 }, { "epoch": 0.7791258724879374, "grad_norm": 0.5851025505749371, "learning_rate": 3.4879609256630183e-06, "loss": 0.2891, "step": 16632 }, { "epoch": 0.7791727174778658, "grad_norm": 0.60767250775044, "learning_rate": 3.4877867098471086e-06, "loss": 0.2764, "step": 16633 }, { "epoch": 0.7792195624677941, "grad_norm": 0.6423124422757627, "learning_rate": 3.4876124883467345e-06, "loss": 0.2814, "step": 16634 }, { "epoch": 0.7792664074577224, "grad_norm": 0.6010157508275259, "learning_rate": 3.487438261162899e-06, "loss": 0.2661, "step": 16635 }, { "epoch": 0.7793132524476507, "grad_norm": 0.6097783445603059, "learning_rate": 3.4872640282966043e-06, "loss": 0.2802, "step": 16636 }, { "epoch": 0.7793600974375791, "grad_norm": 0.5536087436462198, "learning_rate": 3.487089789748853e-06, "loss": 0.2722, "step": 16637 }, { "epoch": 0.7794069424275074, "grad_norm": 0.614279112297843, "learning_rate": 3.4869155455206483e-06, "loss": 0.2928, "step": 16638 }, { "epoch": 0.7794537874174357, "grad_norm": 0.5959458965637671, "learning_rate": 3.486741295612993e-06, "loss": 0.2904, "step": 16639 }, { "epoch": 0.779500632407364, "grad_norm": 0.6109919054930251, "learning_rate": 3.4865670400268896e-06, "loss": 0.2686, "step": 16640 }, { "epoch": 0.7795474773972924, "grad_norm": 0.6007965837393413, "learning_rate": 3.48639277876334e-06, "loss": 0.2983, "step": 16641 }, { "epoch": 0.7795943223872207, "grad_norm": 0.5803419855537095, "learning_rate": 3.4862185118233487e-06, "loss": 0.2956, "step": 16642 }, { "epoch": 0.7796411673771491, "grad_norm": 0.5957825561524024, "learning_rate": 3.486044239207918e-06, "loss": 0.2838, "step": 16643 }, { "epoch": 0.7796880123670773, "grad_norm": 0.566079742869656, "learning_rate": 3.4858699609180497e-06, "loss": 0.297, "step": 16644 }, { "epoch": 0.7797348573570057, "grad_norm": 0.5923166186337331, "learning_rate": 3.4856956769547475e-06, "loss": 0.2926, "step": 16645 }, { "epoch": 0.779781702346934, "grad_norm": 0.6046175771988193, "learning_rate": 3.485521387319015e-06, "loss": 0.2885, "step": 16646 }, { "epoch": 0.7798285473368624, "grad_norm": 0.5543297728359422, "learning_rate": 3.4853470920118547e-06, "loss": 0.2754, "step": 16647 }, { "epoch": 0.7798753923267906, "grad_norm": 0.5926919399049128, "learning_rate": 3.485172791034269e-06, "loss": 0.2678, "step": 16648 }, { "epoch": 0.779922237316719, "grad_norm": 0.5512409124466136, "learning_rate": 3.484998484387262e-06, "loss": 0.2817, "step": 16649 }, { "epoch": 0.7799690823066473, "grad_norm": 0.5912070067705748, "learning_rate": 3.484824172071836e-06, "loss": 0.2874, "step": 16650 }, { "epoch": 0.7800159272965757, "grad_norm": 0.6130108491190648, "learning_rate": 3.4846498540889946e-06, "loss": 0.2778, "step": 16651 }, { "epoch": 0.780062772286504, "grad_norm": 0.5999830081590743, "learning_rate": 3.4844755304397403e-06, "loss": 0.2853, "step": 16652 }, { "epoch": 0.7801096172764322, "grad_norm": 0.5631020873757775, "learning_rate": 3.484301201125078e-06, "loss": 0.2791, "step": 16653 }, { "epoch": 0.7801564622663606, "grad_norm": 0.6120137687970444, "learning_rate": 3.4841268661460082e-06, "loss": 0.2652, "step": 16654 }, { "epoch": 0.780203307256289, "grad_norm": 0.5654051817902483, "learning_rate": 3.4839525255035366e-06, "loss": 0.2627, "step": 16655 }, { "epoch": 0.7802501522462173, "grad_norm": 0.5829233729026302, "learning_rate": 3.4837781791986645e-06, "loss": 0.273, "step": 16656 }, { "epoch": 0.7802969972361455, "grad_norm": 0.5446357629870586, "learning_rate": 3.483603827232397e-06, "loss": 0.2504, "step": 16657 }, { "epoch": 0.7803438422260739, "grad_norm": 0.5690389571069655, "learning_rate": 3.483429469605737e-06, "loss": 0.2773, "step": 16658 }, { "epoch": 0.7803906872160022, "grad_norm": 0.6521180411101707, "learning_rate": 3.483255106319687e-06, "loss": 0.3067, "step": 16659 }, { "epoch": 0.7804375322059306, "grad_norm": 0.6113196774265445, "learning_rate": 3.4830807373752513e-06, "loss": 0.2896, "step": 16660 }, { "epoch": 0.780484377195859, "grad_norm": 0.6107394130378992, "learning_rate": 3.4829063627734327e-06, "loss": 0.2953, "step": 16661 }, { "epoch": 0.7805312221857872, "grad_norm": 0.576598402831347, "learning_rate": 3.482731982515235e-06, "loss": 0.2432, "step": 16662 }, { "epoch": 0.7805780671757155, "grad_norm": 0.5621038408139226, "learning_rate": 3.482557596601662e-06, "loss": 0.2767, "step": 16663 }, { "epoch": 0.7806249121656439, "grad_norm": 0.641257846522262, "learning_rate": 3.4823832050337177e-06, "loss": 0.2979, "step": 16664 }, { "epoch": 0.7806717571555722, "grad_norm": 0.5889968069381972, "learning_rate": 3.482208807812404e-06, "loss": 0.2684, "step": 16665 }, { "epoch": 0.7807186021455005, "grad_norm": 0.5204388742816394, "learning_rate": 3.4820344049387257e-06, "loss": 0.2546, "step": 16666 }, { "epoch": 0.7807654471354288, "grad_norm": 0.5370343544391277, "learning_rate": 3.481859996413686e-06, "loss": 0.2594, "step": 16667 }, { "epoch": 0.7808122921253572, "grad_norm": 0.5990642004508308, "learning_rate": 3.4816855822382895e-06, "loss": 0.2703, "step": 16668 }, { "epoch": 0.7808591371152855, "grad_norm": 0.5272419045818637, "learning_rate": 3.4815111624135385e-06, "loss": 0.2558, "step": 16669 }, { "epoch": 0.7809059821052139, "grad_norm": 0.5558404277459591, "learning_rate": 3.4813367369404377e-06, "loss": 0.2608, "step": 16670 }, { "epoch": 0.7809528270951421, "grad_norm": 0.599279050583374, "learning_rate": 3.4811623058199908e-06, "loss": 0.2812, "step": 16671 }, { "epoch": 0.7809996720850705, "grad_norm": 0.6054365967507511, "learning_rate": 3.4809878690532006e-06, "loss": 0.2856, "step": 16672 }, { "epoch": 0.7810465170749988, "grad_norm": 0.6056902397094496, "learning_rate": 3.4808134266410726e-06, "loss": 0.2688, "step": 16673 }, { "epoch": 0.7810933620649272, "grad_norm": 0.5876093408353246, "learning_rate": 3.48063897858461e-06, "loss": 0.2781, "step": 16674 }, { "epoch": 0.7811402070548554, "grad_norm": 0.5937097774742501, "learning_rate": 3.480464524884816e-06, "loss": 0.2736, "step": 16675 }, { "epoch": 0.7811870520447838, "grad_norm": 0.6267563407119189, "learning_rate": 3.480290065542695e-06, "loss": 0.2893, "step": 16676 }, { "epoch": 0.7812338970347121, "grad_norm": 0.5744656687093154, "learning_rate": 3.480115600559252e-06, "loss": 0.2745, "step": 16677 }, { "epoch": 0.7812807420246405, "grad_norm": 0.5981058318484271, "learning_rate": 3.47994112993549e-06, "loss": 0.2849, "step": 16678 }, { "epoch": 0.7813275870145688, "grad_norm": 0.5753906678275171, "learning_rate": 3.4797666536724118e-06, "loss": 0.2886, "step": 16679 }, { "epoch": 0.7813744320044971, "grad_norm": 0.6351004021605751, "learning_rate": 3.4795921717710234e-06, "loss": 0.2703, "step": 16680 }, { "epoch": 0.7814212769944254, "grad_norm": 0.5753796843253352, "learning_rate": 3.479417684232329e-06, "loss": 0.2795, "step": 16681 }, { "epoch": 0.7814681219843538, "grad_norm": 0.5933707909080194, "learning_rate": 3.479243191057331e-06, "loss": 0.2958, "step": 16682 }, { "epoch": 0.7815149669742821, "grad_norm": 0.5635065895410973, "learning_rate": 3.4790686922470353e-06, "loss": 0.277, "step": 16683 }, { "epoch": 0.7815618119642104, "grad_norm": 0.6097046944280704, "learning_rate": 3.478894187802445e-06, "loss": 0.2966, "step": 16684 }, { "epoch": 0.7816086569541387, "grad_norm": 0.6171803803090741, "learning_rate": 3.4787196777245646e-06, "loss": 0.2779, "step": 16685 }, { "epoch": 0.7816555019440671, "grad_norm": 0.6710151407579308, "learning_rate": 3.4785451620143982e-06, "loss": 0.2962, "step": 16686 }, { "epoch": 0.7817023469339954, "grad_norm": 0.5641199186176757, "learning_rate": 3.4783706406729506e-06, "loss": 0.2736, "step": 16687 }, { "epoch": 0.7817491919239238, "grad_norm": 0.5682367432547027, "learning_rate": 3.478196113701226e-06, "loss": 0.2605, "step": 16688 }, { "epoch": 0.781796036913852, "grad_norm": 0.5392193470315307, "learning_rate": 3.478021581100229e-06, "loss": 0.2725, "step": 16689 }, { "epoch": 0.7818428819037804, "grad_norm": 0.6061441204739509, "learning_rate": 3.4778470428709626e-06, "loss": 0.2754, "step": 16690 }, { "epoch": 0.7818897268937087, "grad_norm": 0.554689619548711, "learning_rate": 3.4776724990144335e-06, "loss": 0.275, "step": 16691 }, { "epoch": 0.7819365718836371, "grad_norm": 0.5909452217215869, "learning_rate": 3.4774979495316443e-06, "loss": 0.28, "step": 16692 }, { "epoch": 0.7819834168735653, "grad_norm": 0.5598309846360451, "learning_rate": 3.4773233944235996e-06, "loss": 0.2483, "step": 16693 }, { "epoch": 0.7820302618634937, "grad_norm": 0.5944546049405219, "learning_rate": 3.4771488336913044e-06, "loss": 0.2792, "step": 16694 }, { "epoch": 0.782077106853422, "grad_norm": 0.5960605629667991, "learning_rate": 3.476974267335764e-06, "loss": 0.2718, "step": 16695 }, { "epoch": 0.7821239518433504, "grad_norm": 0.6664981788785374, "learning_rate": 3.4767996953579817e-06, "loss": 0.2999, "step": 16696 }, { "epoch": 0.7821707968332787, "grad_norm": 0.5753426659303802, "learning_rate": 3.4766251177589625e-06, "loss": 0.2973, "step": 16697 }, { "epoch": 0.782217641823207, "grad_norm": 0.5769642051446198, "learning_rate": 3.476450534539712e-06, "loss": 0.2786, "step": 16698 }, { "epoch": 0.7822644868131353, "grad_norm": 0.586599565646139, "learning_rate": 3.476275945701234e-06, "loss": 0.2936, "step": 16699 }, { "epoch": 0.7823113318030637, "grad_norm": 0.5640171918500366, "learning_rate": 3.476101351244533e-06, "loss": 0.2939, "step": 16700 }, { "epoch": 0.782358176792992, "grad_norm": 0.5892833233294574, "learning_rate": 3.4759267511706142e-06, "loss": 0.2896, "step": 16701 }, { "epoch": 0.7824050217829203, "grad_norm": 0.6164331002288802, "learning_rate": 3.475752145480482e-06, "loss": 0.294, "step": 16702 }, { "epoch": 0.7824518667728486, "grad_norm": 0.6392654048662926, "learning_rate": 3.4755775341751413e-06, "loss": 0.2979, "step": 16703 }, { "epoch": 0.782498711762777, "grad_norm": 0.5935237768662349, "learning_rate": 3.4754029172555974e-06, "loss": 0.2837, "step": 16704 }, { "epoch": 0.7825455567527053, "grad_norm": 0.5784693154123911, "learning_rate": 3.4752282947228542e-06, "loss": 0.2625, "step": 16705 }, { "epoch": 0.7825924017426337, "grad_norm": 0.5902233623163574, "learning_rate": 3.475053666577918e-06, "loss": 0.2905, "step": 16706 }, { "epoch": 0.7826392467325619, "grad_norm": 0.5774317338157302, "learning_rate": 3.4748790328217936e-06, "loss": 0.2936, "step": 16707 }, { "epoch": 0.7826860917224903, "grad_norm": 0.650376622553714, "learning_rate": 3.474704393455484e-06, "loss": 0.312, "step": 16708 }, { "epoch": 0.7827329367124186, "grad_norm": 0.5698727183488268, "learning_rate": 3.4745297484799965e-06, "loss": 0.2769, "step": 16709 }, { "epoch": 0.782779781702347, "grad_norm": 0.5661701419368544, "learning_rate": 3.474355097896336e-06, "loss": 0.2707, "step": 16710 }, { "epoch": 0.7828266266922752, "grad_norm": 0.6078640526288058, "learning_rate": 3.4741804417055046e-06, "loss": 0.2793, "step": 16711 }, { "epoch": 0.7828734716822036, "grad_norm": 0.5978972647245856, "learning_rate": 3.4740057799085115e-06, "loss": 0.2978, "step": 16712 }, { "epoch": 0.7829203166721319, "grad_norm": 0.5972018624899551, "learning_rate": 3.4738311125063596e-06, "loss": 0.298, "step": 16713 }, { "epoch": 0.7829671616620603, "grad_norm": 0.5291368232507112, "learning_rate": 3.473656439500054e-06, "loss": 0.2585, "step": 16714 }, { "epoch": 0.7830140066519886, "grad_norm": 0.5967974637455733, "learning_rate": 3.4734817608905994e-06, "loss": 0.2784, "step": 16715 }, { "epoch": 0.7830608516419169, "grad_norm": 0.600067491038701, "learning_rate": 3.4733070766790037e-06, "loss": 0.3089, "step": 16716 }, { "epoch": 0.7831076966318452, "grad_norm": 0.628184758281777, "learning_rate": 3.4731323868662697e-06, "loss": 0.297, "step": 16717 }, { "epoch": 0.7831545416217736, "grad_norm": 0.5951178506941253, "learning_rate": 3.472957691453403e-06, "loss": 0.2666, "step": 16718 }, { "epoch": 0.7832013866117019, "grad_norm": 0.5888626530742748, "learning_rate": 3.47278299044141e-06, "loss": 0.2903, "step": 16719 }, { "epoch": 0.7832482316016302, "grad_norm": 0.6129703590990754, "learning_rate": 3.472608283831295e-06, "loss": 0.2911, "step": 16720 }, { "epoch": 0.7832950765915585, "grad_norm": 0.5858665503475405, "learning_rate": 3.4724335716240637e-06, "loss": 0.289, "step": 16721 }, { "epoch": 0.7833419215814869, "grad_norm": 0.5586836457211337, "learning_rate": 3.472258853820722e-06, "loss": 0.2702, "step": 16722 }, { "epoch": 0.7833887665714152, "grad_norm": 0.6102183687167162, "learning_rate": 3.4720841304222747e-06, "loss": 0.307, "step": 16723 }, { "epoch": 0.7834356115613436, "grad_norm": 0.5791315945640626, "learning_rate": 3.471909401429727e-06, "loss": 0.2883, "step": 16724 }, { "epoch": 0.7834824565512718, "grad_norm": 0.6182436524833209, "learning_rate": 3.471734666844086e-06, "loss": 0.2773, "step": 16725 }, { "epoch": 0.7835293015412002, "grad_norm": 0.6574261353261107, "learning_rate": 3.4715599266663558e-06, "loss": 0.2952, "step": 16726 }, { "epoch": 0.7835761465311285, "grad_norm": 0.6194090234979176, "learning_rate": 3.4713851808975423e-06, "loss": 0.2842, "step": 16727 }, { "epoch": 0.7836229915210569, "grad_norm": 0.5986194308909013, "learning_rate": 3.471210429538652e-06, "loss": 0.3186, "step": 16728 }, { "epoch": 0.7836698365109851, "grad_norm": 0.570215870549412, "learning_rate": 3.4710356725906887e-06, "loss": 0.2654, "step": 16729 }, { "epoch": 0.7837166815009134, "grad_norm": 0.613698725715801, "learning_rate": 3.47086091005466e-06, "loss": 0.2884, "step": 16730 }, { "epoch": 0.7837635264908418, "grad_norm": 0.6177430850337535, "learning_rate": 3.4706861419315703e-06, "loss": 0.2733, "step": 16731 }, { "epoch": 0.7838103714807702, "grad_norm": 0.5626493420758067, "learning_rate": 3.4705113682224256e-06, "loss": 0.2837, "step": 16732 }, { "epoch": 0.7838572164706985, "grad_norm": 0.5733237907660454, "learning_rate": 3.470336588928232e-06, "loss": 0.2805, "step": 16733 }, { "epoch": 0.7839040614606267, "grad_norm": 0.6233495126797726, "learning_rate": 3.470161804049996e-06, "loss": 0.2847, "step": 16734 }, { "epoch": 0.7839509064505551, "grad_norm": 0.5940119583501486, "learning_rate": 3.4699870135887214e-06, "loss": 0.2936, "step": 16735 }, { "epoch": 0.7839977514404834, "grad_norm": 0.5267538386097521, "learning_rate": 3.469812217545416e-06, "loss": 0.2628, "step": 16736 }, { "epoch": 0.7840445964304118, "grad_norm": 0.6348512325640245, "learning_rate": 3.469637415921085e-06, "loss": 0.298, "step": 16737 }, { "epoch": 0.78409144142034, "grad_norm": 0.5723076705823732, "learning_rate": 3.469462608716735e-06, "loss": 0.2683, "step": 16738 }, { "epoch": 0.7841382864102684, "grad_norm": 0.5444935525601048, "learning_rate": 3.4692877959333704e-06, "loss": 0.2796, "step": 16739 }, { "epoch": 0.7841851314001967, "grad_norm": 0.5900638966399795, "learning_rate": 3.4691129775719983e-06, "loss": 0.2981, "step": 16740 }, { "epoch": 0.7842319763901251, "grad_norm": 0.5890057409938495, "learning_rate": 3.4689381536336253e-06, "loss": 0.2817, "step": 16741 }, { "epoch": 0.7842788213800534, "grad_norm": 0.5729102190831549, "learning_rate": 3.468763324119256e-06, "loss": 0.2682, "step": 16742 }, { "epoch": 0.7843256663699817, "grad_norm": 0.6166289179386714, "learning_rate": 3.468588489029897e-06, "loss": 0.2869, "step": 16743 }, { "epoch": 0.78437251135991, "grad_norm": 0.6040053504212799, "learning_rate": 3.468413648366556e-06, "loss": 0.2748, "step": 16744 }, { "epoch": 0.7844193563498384, "grad_norm": 0.576395414438001, "learning_rate": 3.4682388021302364e-06, "loss": 0.2708, "step": 16745 }, { "epoch": 0.7844662013397667, "grad_norm": 0.6540054986835157, "learning_rate": 3.4680639503219464e-06, "loss": 0.2915, "step": 16746 }, { "epoch": 0.784513046329695, "grad_norm": 0.5766219090739615, "learning_rate": 3.4678890929426923e-06, "loss": 0.2787, "step": 16747 }, { "epoch": 0.7845598913196233, "grad_norm": 0.5963954409024754, "learning_rate": 3.467714229993479e-06, "loss": 0.2588, "step": 16748 }, { "epoch": 0.7846067363095517, "grad_norm": 0.5747415553082762, "learning_rate": 3.467539361475314e-06, "loss": 0.2716, "step": 16749 }, { "epoch": 0.78465358129948, "grad_norm": 0.6092051561424447, "learning_rate": 3.4673644873892032e-06, "loss": 0.2706, "step": 16750 }, { "epoch": 0.7847004262894084, "grad_norm": 0.560239945269961, "learning_rate": 3.4671896077361522e-06, "loss": 0.2951, "step": 16751 }, { "epoch": 0.7847472712793366, "grad_norm": 0.5965333661821235, "learning_rate": 3.4670147225171685e-06, "loss": 0.2716, "step": 16752 }, { "epoch": 0.784794116269265, "grad_norm": 0.6414618834072017, "learning_rate": 3.4668398317332584e-06, "loss": 0.2961, "step": 16753 }, { "epoch": 0.7848409612591933, "grad_norm": 0.5742070419632562, "learning_rate": 3.466664935385428e-06, "loss": 0.2698, "step": 16754 }, { "epoch": 0.7848878062491217, "grad_norm": 0.5808538232256388, "learning_rate": 3.466490033474683e-06, "loss": 0.2746, "step": 16755 }, { "epoch": 0.7849346512390499, "grad_norm": 0.536069523066858, "learning_rate": 3.4663151260020322e-06, "loss": 0.2625, "step": 16756 }, { "epoch": 0.7849814962289783, "grad_norm": 0.5860757905824613, "learning_rate": 3.4661402129684796e-06, "loss": 0.2654, "step": 16757 }, { "epoch": 0.7850283412189066, "grad_norm": 0.6465138752725642, "learning_rate": 3.465965294375033e-06, "loss": 0.297, "step": 16758 }, { "epoch": 0.785075186208835, "grad_norm": 0.6195497653141882, "learning_rate": 3.4657903702227e-06, "loss": 0.2883, "step": 16759 }, { "epoch": 0.7851220311987633, "grad_norm": 0.5701419839690145, "learning_rate": 3.4656154405124854e-06, "loss": 0.2697, "step": 16760 }, { "epoch": 0.7851688761886916, "grad_norm": 0.623833247775768, "learning_rate": 3.4654405052453966e-06, "loss": 0.2676, "step": 16761 }, { "epoch": 0.7852157211786199, "grad_norm": 0.6482862870186255, "learning_rate": 3.4652655644224404e-06, "loss": 0.2937, "step": 16762 }, { "epoch": 0.7852625661685483, "grad_norm": 0.5913144443408656, "learning_rate": 3.465090618044623e-06, "loss": 0.2789, "step": 16763 }, { "epoch": 0.7853094111584766, "grad_norm": 0.5403570515874419, "learning_rate": 3.464915666112952e-06, "loss": 0.2762, "step": 16764 }, { "epoch": 0.7853562561484049, "grad_norm": 0.6249983407284805, "learning_rate": 3.4647407086284344e-06, "loss": 0.291, "step": 16765 }, { "epoch": 0.7854031011383332, "grad_norm": 0.6681841786428167, "learning_rate": 3.464565745592076e-06, "loss": 0.2707, "step": 16766 }, { "epoch": 0.7854499461282616, "grad_norm": 0.675526727367917, "learning_rate": 3.464390777004884e-06, "loss": 0.2942, "step": 16767 }, { "epoch": 0.7854967911181899, "grad_norm": 0.6102049911681777, "learning_rate": 3.4642158028678663e-06, "loss": 0.287, "step": 16768 }, { "epoch": 0.7855436361081183, "grad_norm": 0.6355551378792184, "learning_rate": 3.4640408231820284e-06, "loss": 0.2824, "step": 16769 }, { "epoch": 0.7855904810980465, "grad_norm": 0.6429743978771146, "learning_rate": 3.4638658379483775e-06, "loss": 0.2812, "step": 16770 }, { "epoch": 0.7856373260879749, "grad_norm": 0.5798856004201426, "learning_rate": 3.4636908471679217e-06, "loss": 0.3159, "step": 16771 }, { "epoch": 0.7856841710779032, "grad_norm": 0.610082223082052, "learning_rate": 3.463515850841667e-06, "loss": 0.2632, "step": 16772 }, { "epoch": 0.7857310160678316, "grad_norm": 0.5921259255420763, "learning_rate": 3.4633408489706204e-06, "loss": 0.2794, "step": 16773 }, { "epoch": 0.7857778610577598, "grad_norm": 0.6499311325702996, "learning_rate": 3.46316584155579e-06, "loss": 0.2972, "step": 16774 }, { "epoch": 0.7858247060476882, "grad_norm": 0.564118304951404, "learning_rate": 3.4629908285981818e-06, "loss": 0.2604, "step": 16775 }, { "epoch": 0.7858715510376165, "grad_norm": 0.6062386479811516, "learning_rate": 3.462815810098803e-06, "loss": 0.2671, "step": 16776 }, { "epoch": 0.7859183960275449, "grad_norm": 0.6321957324631877, "learning_rate": 3.462640786058662e-06, "loss": 0.3003, "step": 16777 }, { "epoch": 0.7859652410174732, "grad_norm": 0.6195334452147404, "learning_rate": 3.4624657564787652e-06, "loss": 0.3056, "step": 16778 }, { "epoch": 0.7860120860074015, "grad_norm": 0.6252795992101094, "learning_rate": 3.46229072136012e-06, "loss": 0.3013, "step": 16779 }, { "epoch": 0.7860589309973298, "grad_norm": 0.594868598903056, "learning_rate": 3.4621156807037327e-06, "loss": 0.2644, "step": 16780 }, { "epoch": 0.7861057759872582, "grad_norm": 0.5317283220975227, "learning_rate": 3.4619406345106123e-06, "loss": 0.2549, "step": 16781 }, { "epoch": 0.7861526209771865, "grad_norm": 0.5880627046695774, "learning_rate": 3.4617655827817647e-06, "loss": 0.2756, "step": 16782 }, { "epoch": 0.7861994659671148, "grad_norm": 0.5638291307330723, "learning_rate": 3.4615905255181985e-06, "loss": 0.2704, "step": 16783 }, { "epoch": 0.7862463109570431, "grad_norm": 0.6053540468595855, "learning_rate": 3.4614154627209195e-06, "loss": 0.2896, "step": 16784 }, { "epoch": 0.7862931559469715, "grad_norm": 0.5561604251986613, "learning_rate": 3.461240394390937e-06, "loss": 0.2831, "step": 16785 }, { "epoch": 0.7863400009368998, "grad_norm": 0.6301206937507392, "learning_rate": 3.461065320529258e-06, "loss": 0.2896, "step": 16786 }, { "epoch": 0.7863868459268282, "grad_norm": 0.5736203284572511, "learning_rate": 3.460890241136889e-06, "loss": 0.2723, "step": 16787 }, { "epoch": 0.7864336909167564, "grad_norm": 0.6052222049106802, "learning_rate": 3.4607151562148377e-06, "loss": 0.2849, "step": 16788 }, { "epoch": 0.7864805359066848, "grad_norm": 0.5618706270543703, "learning_rate": 3.460540065764113e-06, "loss": 0.2836, "step": 16789 }, { "epoch": 0.7865273808966131, "grad_norm": 0.5699619759380447, "learning_rate": 3.4603649697857215e-06, "loss": 0.2711, "step": 16790 }, { "epoch": 0.7865742258865415, "grad_norm": 0.5431176215706233, "learning_rate": 3.4601898682806707e-06, "loss": 0.2809, "step": 16791 }, { "epoch": 0.7866210708764697, "grad_norm": 0.5765767857904038, "learning_rate": 3.460014761249969e-06, "loss": 0.2665, "step": 16792 }, { "epoch": 0.7866679158663981, "grad_norm": 0.563145313948342, "learning_rate": 3.4598396486946235e-06, "loss": 0.2756, "step": 16793 }, { "epoch": 0.7867147608563264, "grad_norm": 0.5961051959453749, "learning_rate": 3.4596645306156417e-06, "loss": 0.2765, "step": 16794 }, { "epoch": 0.7867616058462548, "grad_norm": 0.6457129436302962, "learning_rate": 3.459489407014032e-06, "loss": 0.3025, "step": 16795 }, { "epoch": 0.7868084508361831, "grad_norm": 0.544439878976224, "learning_rate": 3.4593142778908018e-06, "loss": 0.2775, "step": 16796 }, { "epoch": 0.7868552958261114, "grad_norm": 0.5687217635615712, "learning_rate": 3.459139143246959e-06, "loss": 0.2598, "step": 16797 }, { "epoch": 0.7869021408160397, "grad_norm": 0.5821535333226707, "learning_rate": 3.458964003083512e-06, "loss": 0.2715, "step": 16798 }, { "epoch": 0.7869489858059681, "grad_norm": 0.572573299359016, "learning_rate": 3.4587888574014673e-06, "loss": 0.2753, "step": 16799 }, { "epoch": 0.7869958307958964, "grad_norm": 0.6559191340572678, "learning_rate": 3.458613706201834e-06, "loss": 0.3052, "step": 16800 }, { "epoch": 0.7870426757858247, "grad_norm": 0.5918390174499145, "learning_rate": 3.4584385494856203e-06, "loss": 0.2817, "step": 16801 }, { "epoch": 0.787089520775753, "grad_norm": 0.6233405160960641, "learning_rate": 3.4582633872538336e-06, "loss": 0.2967, "step": 16802 }, { "epoch": 0.7871363657656814, "grad_norm": 0.5790008246261137, "learning_rate": 3.4580882195074817e-06, "loss": 0.269, "step": 16803 }, { "epoch": 0.7871832107556097, "grad_norm": 0.6198157964238579, "learning_rate": 3.4579130462475725e-06, "loss": 0.2866, "step": 16804 }, { "epoch": 0.7872300557455381, "grad_norm": 0.5395753577337495, "learning_rate": 3.457737867475115e-06, "loss": 0.2762, "step": 16805 }, { "epoch": 0.7872769007354663, "grad_norm": 0.592115548281534, "learning_rate": 3.4575626831911165e-06, "loss": 0.2815, "step": 16806 }, { "epoch": 0.7873237457253947, "grad_norm": 0.6432484019329626, "learning_rate": 3.4573874933965855e-06, "loss": 0.2741, "step": 16807 }, { "epoch": 0.787370590715323, "grad_norm": 0.5754691362006946, "learning_rate": 3.4572122980925304e-06, "loss": 0.2905, "step": 16808 }, { "epoch": 0.7874174357052514, "grad_norm": 0.5702214583126946, "learning_rate": 3.4570370972799583e-06, "loss": 0.2719, "step": 16809 }, { "epoch": 0.7874642806951796, "grad_norm": 0.6653884117277584, "learning_rate": 3.4568618909598793e-06, "loss": 0.2996, "step": 16810 }, { "epoch": 0.787511125685108, "grad_norm": 0.6090209175468323, "learning_rate": 3.4566866791333005e-06, "loss": 0.2841, "step": 16811 }, { "epoch": 0.7875579706750363, "grad_norm": 0.5823984889087371, "learning_rate": 3.4565114618012295e-06, "loss": 0.2705, "step": 16812 }, { "epoch": 0.7876048156649647, "grad_norm": 0.6060776107125581, "learning_rate": 3.456336238964676e-06, "loss": 0.3013, "step": 16813 }, { "epoch": 0.787651660654893, "grad_norm": 0.5955329113383203, "learning_rate": 3.456161010624648e-06, "loss": 0.2862, "step": 16814 }, { "epoch": 0.7876985056448212, "grad_norm": 0.5662016033127271, "learning_rate": 3.4559857767821533e-06, "loss": 0.2986, "step": 16815 }, { "epoch": 0.7877453506347496, "grad_norm": 0.600119345671453, "learning_rate": 3.4558105374382007e-06, "loss": 0.2982, "step": 16816 }, { "epoch": 0.787792195624678, "grad_norm": 0.5458456774411825, "learning_rate": 3.4556352925937986e-06, "loss": 0.2611, "step": 16817 }, { "epoch": 0.7878390406146063, "grad_norm": 0.5904749898669444, "learning_rate": 3.4554600422499563e-06, "loss": 0.2708, "step": 16818 }, { "epoch": 0.7878858856045345, "grad_norm": 0.6127980958748546, "learning_rate": 3.455284786407681e-06, "loss": 0.2925, "step": 16819 }, { "epoch": 0.7879327305944629, "grad_norm": 0.5852498755492193, "learning_rate": 3.4551095250679823e-06, "loss": 0.2775, "step": 16820 }, { "epoch": 0.7879795755843912, "grad_norm": 0.5973365093528673, "learning_rate": 3.4549342582318678e-06, "loss": 0.2925, "step": 16821 }, { "epoch": 0.7880264205743196, "grad_norm": 0.6219663452588435, "learning_rate": 3.4547589859003466e-06, "loss": 0.2821, "step": 16822 }, { "epoch": 0.788073265564248, "grad_norm": 0.5902289658508207, "learning_rate": 3.454583708074428e-06, "loss": 0.2669, "step": 16823 }, { "epoch": 0.7881201105541762, "grad_norm": 0.5493899611843638, "learning_rate": 3.45440842475512e-06, "loss": 0.25, "step": 16824 }, { "epoch": 0.7881669555441045, "grad_norm": 0.5913783800671258, "learning_rate": 3.454233135943431e-06, "loss": 0.2697, "step": 16825 }, { "epoch": 0.7882138005340329, "grad_norm": 0.5640505066719033, "learning_rate": 3.4540578416403704e-06, "loss": 0.2753, "step": 16826 }, { "epoch": 0.7882606455239612, "grad_norm": 0.5788571510514096, "learning_rate": 3.4538825418469463e-06, "loss": 0.2853, "step": 16827 }, { "epoch": 0.7883074905138895, "grad_norm": 0.5915435264227423, "learning_rate": 3.4537072365641685e-06, "loss": 0.2918, "step": 16828 }, { "epoch": 0.7883543355038178, "grad_norm": 0.5378858518210903, "learning_rate": 3.453531925793045e-06, "loss": 0.255, "step": 16829 }, { "epoch": 0.7884011804937462, "grad_norm": 0.6093473668860329, "learning_rate": 3.453356609534585e-06, "loss": 0.2947, "step": 16830 }, { "epoch": 0.7884480254836745, "grad_norm": 0.566786884644955, "learning_rate": 3.4531812877897975e-06, "loss": 0.2894, "step": 16831 }, { "epoch": 0.7884948704736029, "grad_norm": 0.6134857394147328, "learning_rate": 3.4530059605596912e-06, "loss": 0.2784, "step": 16832 }, { "epoch": 0.7885417154635311, "grad_norm": 0.5304340520315907, "learning_rate": 3.4528306278452745e-06, "loss": 0.2954, "step": 16833 }, { "epoch": 0.7885885604534595, "grad_norm": 0.5496359591752396, "learning_rate": 3.4526552896475574e-06, "loss": 0.2734, "step": 16834 }, { "epoch": 0.7886354054433878, "grad_norm": 0.6229793598700015, "learning_rate": 3.452479945967549e-06, "loss": 0.289, "step": 16835 }, { "epoch": 0.7886822504333162, "grad_norm": 0.5265116577845397, "learning_rate": 3.4523045968062573e-06, "loss": 0.2747, "step": 16836 }, { "epoch": 0.7887290954232444, "grad_norm": 0.5942827795270983, "learning_rate": 3.452129242164692e-06, "loss": 0.2806, "step": 16837 }, { "epoch": 0.7887759404131728, "grad_norm": 0.5443529827165288, "learning_rate": 3.4519538820438627e-06, "loss": 0.269, "step": 16838 }, { "epoch": 0.7888227854031011, "grad_norm": 0.6147476646322583, "learning_rate": 3.451778516444778e-06, "loss": 0.2726, "step": 16839 }, { "epoch": 0.7888696303930295, "grad_norm": 0.5745506816360438, "learning_rate": 3.4516031453684466e-06, "loss": 0.273, "step": 16840 }, { "epoch": 0.7889164753829578, "grad_norm": 0.6517149807537658, "learning_rate": 3.4514277688158787e-06, "loss": 0.2736, "step": 16841 }, { "epoch": 0.7889633203728861, "grad_norm": 0.565182148202619, "learning_rate": 3.451252386788083e-06, "loss": 0.2559, "step": 16842 }, { "epoch": 0.7890101653628144, "grad_norm": 0.5411288372310212, "learning_rate": 3.4510769992860693e-06, "loss": 0.2498, "step": 16843 }, { "epoch": 0.7890570103527428, "grad_norm": 0.5574455570503091, "learning_rate": 3.4509016063108462e-06, "loss": 0.2752, "step": 16844 }, { "epoch": 0.7891038553426711, "grad_norm": 0.579775383030774, "learning_rate": 3.4507262078634228e-06, "loss": 0.2772, "step": 16845 }, { "epoch": 0.7891507003325994, "grad_norm": 0.5629358825391029, "learning_rate": 3.4505508039448098e-06, "loss": 0.2604, "step": 16846 }, { "epoch": 0.7891975453225277, "grad_norm": 0.6104852119869443, "learning_rate": 3.450375394556016e-06, "loss": 0.2903, "step": 16847 }, { "epoch": 0.7892443903124561, "grad_norm": 0.6099206149204419, "learning_rate": 3.45019997969805e-06, "loss": 0.2797, "step": 16848 }, { "epoch": 0.7892912353023844, "grad_norm": 0.6392689699987876, "learning_rate": 3.4500245593719223e-06, "loss": 0.2922, "step": 16849 }, { "epoch": 0.7893380802923128, "grad_norm": 0.5899707653406999, "learning_rate": 3.4498491335786423e-06, "loss": 0.2729, "step": 16850 }, { "epoch": 0.789384925282241, "grad_norm": 0.6211763791354641, "learning_rate": 3.4496737023192182e-06, "loss": 0.2916, "step": 16851 }, { "epoch": 0.7894317702721694, "grad_norm": 0.6045552940870533, "learning_rate": 3.4494982655946617e-06, "loss": 0.304, "step": 16852 }, { "epoch": 0.7894786152620977, "grad_norm": 0.6032113704466646, "learning_rate": 3.4493228234059817e-06, "loss": 0.2619, "step": 16853 }, { "epoch": 0.7895254602520261, "grad_norm": 0.6088029029351965, "learning_rate": 3.449147375754186e-06, "loss": 0.2984, "step": 16854 }, { "epoch": 0.7895723052419543, "grad_norm": 0.5928293058389569, "learning_rate": 3.448971922640286e-06, "loss": 0.2857, "step": 16855 }, { "epoch": 0.7896191502318827, "grad_norm": 0.6346335261159565, "learning_rate": 3.4487964640652925e-06, "loss": 0.288, "step": 16856 }, { "epoch": 0.789665995221811, "grad_norm": 0.6176419524666381, "learning_rate": 3.4486210000302127e-06, "loss": 0.2774, "step": 16857 }, { "epoch": 0.7897128402117394, "grad_norm": 0.5677599096990696, "learning_rate": 3.4484455305360576e-06, "loss": 0.2684, "step": 16858 }, { "epoch": 0.7897596852016677, "grad_norm": 0.5318899166472318, "learning_rate": 3.4482700555838374e-06, "loss": 0.2508, "step": 16859 }, { "epoch": 0.789806530191596, "grad_norm": 0.6095385297467426, "learning_rate": 3.4480945751745608e-06, "loss": 0.2943, "step": 16860 }, { "epoch": 0.7898533751815243, "grad_norm": 0.5814973320421573, "learning_rate": 3.447919089309238e-06, "loss": 0.3031, "step": 16861 }, { "epoch": 0.7899002201714527, "grad_norm": 0.5508529789389895, "learning_rate": 3.4477435979888797e-06, "loss": 0.2883, "step": 16862 }, { "epoch": 0.789947065161381, "grad_norm": 0.5688105947851227, "learning_rate": 3.447568101214495e-06, "loss": 0.3133, "step": 16863 }, { "epoch": 0.7899939101513093, "grad_norm": 0.5368201670148465, "learning_rate": 3.447392598987094e-06, "loss": 0.2683, "step": 16864 }, { "epoch": 0.7900407551412376, "grad_norm": 0.6264977994121829, "learning_rate": 3.4472170913076865e-06, "loss": 0.3047, "step": 16865 }, { "epoch": 0.790087600131166, "grad_norm": 0.605021897025703, "learning_rate": 3.4470415781772828e-06, "loss": 0.2726, "step": 16866 }, { "epoch": 0.7901344451210943, "grad_norm": 0.5799880459591096, "learning_rate": 3.446866059596893e-06, "loss": 0.2691, "step": 16867 }, { "epoch": 0.7901812901110227, "grad_norm": 0.6067871634574112, "learning_rate": 3.446690535567527e-06, "loss": 0.284, "step": 16868 }, { "epoch": 0.7902281351009509, "grad_norm": 0.6062189445783285, "learning_rate": 3.446515006090194e-06, "loss": 0.2979, "step": 16869 }, { "epoch": 0.7902749800908793, "grad_norm": 0.5900258751405889, "learning_rate": 3.4463394711659063e-06, "loss": 0.2866, "step": 16870 }, { "epoch": 0.7903218250808076, "grad_norm": 0.5353812919627325, "learning_rate": 3.446163930795672e-06, "loss": 0.263, "step": 16871 }, { "epoch": 0.790368670070736, "grad_norm": 0.6113819036070524, "learning_rate": 3.4459883849805032e-06, "loss": 0.2613, "step": 16872 }, { "epoch": 0.7904155150606642, "grad_norm": 0.5758055701203356, "learning_rate": 3.445812833721408e-06, "loss": 0.263, "step": 16873 }, { "epoch": 0.7904623600505926, "grad_norm": 0.6032135311496948, "learning_rate": 3.445637277019398e-06, "loss": 0.2697, "step": 16874 }, { "epoch": 0.7905092050405209, "grad_norm": 0.6068504466298965, "learning_rate": 3.445461714875483e-06, "loss": 0.278, "step": 16875 }, { "epoch": 0.7905560500304493, "grad_norm": 0.618119338245358, "learning_rate": 3.4452861472906734e-06, "loss": 0.2852, "step": 16876 }, { "epoch": 0.7906028950203776, "grad_norm": 0.5438708043039496, "learning_rate": 3.4451105742659797e-06, "loss": 0.2765, "step": 16877 }, { "epoch": 0.7906497400103059, "grad_norm": 0.6395154466370105, "learning_rate": 3.4449349958024123e-06, "loss": 0.3029, "step": 16878 }, { "epoch": 0.7906965850002342, "grad_norm": 0.5559079364262639, "learning_rate": 3.444759411900981e-06, "loss": 0.2626, "step": 16879 }, { "epoch": 0.7907434299901626, "grad_norm": 0.5893762208754789, "learning_rate": 3.4445838225626975e-06, "loss": 0.2647, "step": 16880 }, { "epoch": 0.7907902749800909, "grad_norm": 0.5942937182912448, "learning_rate": 3.4444082277885715e-06, "loss": 0.2861, "step": 16881 }, { "epoch": 0.7908371199700192, "grad_norm": 0.6225407657375622, "learning_rate": 3.4442326275796135e-06, "loss": 0.2749, "step": 16882 }, { "epoch": 0.7908839649599475, "grad_norm": 0.6492149694210742, "learning_rate": 3.444057021936833e-06, "loss": 0.2834, "step": 16883 }, { "epoch": 0.7909308099498759, "grad_norm": 0.57590365399157, "learning_rate": 3.4438814108612426e-06, "loss": 0.2785, "step": 16884 }, { "epoch": 0.7909776549398042, "grad_norm": 0.6410575821650282, "learning_rate": 3.443705794353852e-06, "loss": 0.2914, "step": 16885 }, { "epoch": 0.7910244999297326, "grad_norm": 0.6593711228005162, "learning_rate": 3.443530172415671e-06, "loss": 0.3063, "step": 16886 }, { "epoch": 0.7910713449196608, "grad_norm": 0.6456721062922034, "learning_rate": 3.4433545450477118e-06, "loss": 0.2871, "step": 16887 }, { "epoch": 0.7911181899095892, "grad_norm": 0.6202752173061128, "learning_rate": 3.443178912250984e-06, "loss": 0.2967, "step": 16888 }, { "epoch": 0.7911650348995175, "grad_norm": 0.5967550721761018, "learning_rate": 3.4430032740264983e-06, "loss": 0.2658, "step": 16889 }, { "epoch": 0.7912118798894459, "grad_norm": 0.5794741556619581, "learning_rate": 3.442827630375266e-06, "loss": 0.2774, "step": 16890 }, { "epoch": 0.7912587248793741, "grad_norm": 0.5987436592655077, "learning_rate": 3.442651981298298e-06, "loss": 0.2839, "step": 16891 }, { "epoch": 0.7913055698693024, "grad_norm": 0.6203721829732918, "learning_rate": 3.4424763267966044e-06, "loss": 0.2734, "step": 16892 }, { "epoch": 0.7913524148592308, "grad_norm": 0.6359653533169377, "learning_rate": 3.442300666871197e-06, "loss": 0.283, "step": 16893 }, { "epoch": 0.7913992598491592, "grad_norm": 0.5777504280459972, "learning_rate": 3.4421250015230856e-06, "loss": 0.2705, "step": 16894 }, { "epoch": 0.7914461048390875, "grad_norm": 0.6344208564489328, "learning_rate": 3.4419493307532813e-06, "loss": 0.2841, "step": 16895 }, { "epoch": 0.7914929498290157, "grad_norm": 0.5500246183566706, "learning_rate": 3.4417736545627955e-06, "loss": 0.2731, "step": 16896 }, { "epoch": 0.7915397948189441, "grad_norm": 0.5877132619093529, "learning_rate": 3.441597972952639e-06, "loss": 0.2792, "step": 16897 }, { "epoch": 0.7915866398088724, "grad_norm": 0.596682353742933, "learning_rate": 3.4414222859238233e-06, "loss": 0.2968, "step": 16898 }, { "epoch": 0.7916334847988008, "grad_norm": 0.624888077374717, "learning_rate": 3.4412465934773587e-06, "loss": 0.3028, "step": 16899 }, { "epoch": 0.791680329788729, "grad_norm": 0.6056284473905884, "learning_rate": 3.4410708956142564e-06, "loss": 0.2892, "step": 16900 }, { "epoch": 0.7917271747786574, "grad_norm": 0.5570685133041936, "learning_rate": 3.440895192335528e-06, "loss": 0.2776, "step": 16901 }, { "epoch": 0.7917740197685857, "grad_norm": 0.5963595718876551, "learning_rate": 3.4407194836421844e-06, "loss": 0.2796, "step": 16902 }, { "epoch": 0.7918208647585141, "grad_norm": 0.5759811333290372, "learning_rate": 3.4405437695352357e-06, "loss": 0.2773, "step": 16903 }, { "epoch": 0.7918677097484424, "grad_norm": 0.6087647342617762, "learning_rate": 3.440368050015694e-06, "loss": 0.2722, "step": 16904 }, { "epoch": 0.7919145547383707, "grad_norm": 0.5871010862231039, "learning_rate": 3.4401923250845713e-06, "loss": 0.2846, "step": 16905 }, { "epoch": 0.791961399728299, "grad_norm": 0.6300083883551836, "learning_rate": 3.4400165947428775e-06, "loss": 0.2743, "step": 16906 }, { "epoch": 0.7920082447182274, "grad_norm": 0.6449598453492187, "learning_rate": 3.4398408589916247e-06, "loss": 0.2825, "step": 16907 }, { "epoch": 0.7920550897081557, "grad_norm": 0.6343397111401722, "learning_rate": 3.439665117831824e-06, "loss": 0.289, "step": 16908 }, { "epoch": 0.792101934698084, "grad_norm": 0.6094405599264603, "learning_rate": 3.4394893712644872e-06, "loss": 0.2808, "step": 16909 }, { "epoch": 0.7921487796880123, "grad_norm": 0.58652918070377, "learning_rate": 3.4393136192906244e-06, "loss": 0.2914, "step": 16910 }, { "epoch": 0.7921956246779407, "grad_norm": 0.5978614063895038, "learning_rate": 3.4391378619112485e-06, "loss": 0.2584, "step": 16911 }, { "epoch": 0.792242469667869, "grad_norm": 0.5760273272826129, "learning_rate": 3.43896209912737e-06, "loss": 0.295, "step": 16912 }, { "epoch": 0.7922893146577974, "grad_norm": 0.6022124816280736, "learning_rate": 3.4387863309400005e-06, "loss": 0.2803, "step": 16913 }, { "epoch": 0.7923361596477256, "grad_norm": 0.6191780417530355, "learning_rate": 3.4386105573501516e-06, "loss": 0.2924, "step": 16914 }, { "epoch": 0.792383004637654, "grad_norm": 0.6362735714185278, "learning_rate": 3.4384347783588346e-06, "loss": 0.291, "step": 16915 }, { "epoch": 0.7924298496275823, "grad_norm": 0.5762175510533819, "learning_rate": 3.4382589939670617e-06, "loss": 0.2919, "step": 16916 }, { "epoch": 0.7924766946175107, "grad_norm": 0.5404323522341972, "learning_rate": 3.4380832041758443e-06, "loss": 0.2553, "step": 16917 }, { "epoch": 0.7925235396074389, "grad_norm": 0.5821960107040002, "learning_rate": 3.4379074089861936e-06, "loss": 0.296, "step": 16918 }, { "epoch": 0.7925703845973673, "grad_norm": 0.5553732850971198, "learning_rate": 3.4377316083991215e-06, "loss": 0.2718, "step": 16919 }, { "epoch": 0.7926172295872956, "grad_norm": 0.5592764786176777, "learning_rate": 3.43755580241564e-06, "loss": 0.2629, "step": 16920 }, { "epoch": 0.792664074577224, "grad_norm": 0.5505120494970699, "learning_rate": 3.4373799910367594e-06, "loss": 0.2665, "step": 16921 }, { "epoch": 0.7927109195671523, "grad_norm": 0.6162918097914714, "learning_rate": 3.4372041742634937e-06, "loss": 0.3036, "step": 16922 }, { "epoch": 0.7927577645570806, "grad_norm": 0.5294094702998076, "learning_rate": 3.4370283520968534e-06, "loss": 0.2533, "step": 16923 }, { "epoch": 0.7928046095470089, "grad_norm": 0.5983655555557293, "learning_rate": 3.4368525245378496e-06, "loss": 0.2723, "step": 16924 }, { "epoch": 0.7928514545369373, "grad_norm": 0.5911100275629216, "learning_rate": 3.4366766915874956e-06, "loss": 0.2756, "step": 16925 }, { "epoch": 0.7928982995268656, "grad_norm": 0.5975724748760592, "learning_rate": 3.4365008532468034e-06, "loss": 0.292, "step": 16926 }, { "epoch": 0.7929451445167939, "grad_norm": 0.594241686211431, "learning_rate": 3.4363250095167833e-06, "loss": 0.284, "step": 16927 }, { "epoch": 0.7929919895067222, "grad_norm": 0.6360248455790504, "learning_rate": 3.4361491603984477e-06, "loss": 0.3019, "step": 16928 }, { "epoch": 0.7930388344966506, "grad_norm": 0.5567943587361458, "learning_rate": 3.43597330589281e-06, "loss": 0.2701, "step": 16929 }, { "epoch": 0.7930856794865789, "grad_norm": 0.5667388246376142, "learning_rate": 3.43579744600088e-06, "loss": 0.267, "step": 16930 }, { "epoch": 0.7931325244765073, "grad_norm": 0.6440012388163591, "learning_rate": 3.4356215807236716e-06, "loss": 0.2912, "step": 16931 }, { "epoch": 0.7931793694664355, "grad_norm": 0.5727560767961952, "learning_rate": 3.435445710062196e-06, "loss": 0.2712, "step": 16932 }, { "epoch": 0.7932262144563639, "grad_norm": 0.5655352667248886, "learning_rate": 3.4352698340174663e-06, "loss": 0.2495, "step": 16933 }, { "epoch": 0.7932730594462922, "grad_norm": 0.5465663287800026, "learning_rate": 3.4350939525904925e-06, "loss": 0.266, "step": 16934 }, { "epoch": 0.7933199044362206, "grad_norm": 0.619193023157657, "learning_rate": 3.434918065782289e-06, "loss": 0.2836, "step": 16935 }, { "epoch": 0.7933667494261488, "grad_norm": 0.5888775498299438, "learning_rate": 3.434742173593866e-06, "loss": 0.2853, "step": 16936 }, { "epoch": 0.7934135944160772, "grad_norm": 0.5762843882696169, "learning_rate": 3.434566276026238e-06, "loss": 0.2751, "step": 16937 }, { "epoch": 0.7934604394060055, "grad_norm": 0.5541375647902739, "learning_rate": 3.434390373080415e-06, "loss": 0.2803, "step": 16938 }, { "epoch": 0.7935072843959339, "grad_norm": 0.5667338804175094, "learning_rate": 3.4342144647574105e-06, "loss": 0.2636, "step": 16939 }, { "epoch": 0.7935541293858622, "grad_norm": 0.5598174175704446, "learning_rate": 3.4340385510582367e-06, "loss": 0.2641, "step": 16940 }, { "epoch": 0.7936009743757905, "grad_norm": 0.579985223813251, "learning_rate": 3.4338626319839058e-06, "loss": 0.2647, "step": 16941 }, { "epoch": 0.7936478193657188, "grad_norm": 0.5837214702278812, "learning_rate": 3.4336867075354303e-06, "loss": 0.2816, "step": 16942 }, { "epoch": 0.7936946643556472, "grad_norm": 0.621547184408836, "learning_rate": 3.433510777713822e-06, "loss": 0.2821, "step": 16943 }, { "epoch": 0.7937415093455755, "grad_norm": 0.6317955155788614, "learning_rate": 3.433334842520094e-06, "loss": 0.2819, "step": 16944 }, { "epoch": 0.7937883543355038, "grad_norm": 0.6189947345137191, "learning_rate": 3.433158901955259e-06, "loss": 0.2806, "step": 16945 }, { "epoch": 0.7938351993254321, "grad_norm": 0.7059803843349683, "learning_rate": 3.4329829560203284e-06, "loss": 0.3118, "step": 16946 }, { "epoch": 0.7938820443153605, "grad_norm": 0.5889940681589374, "learning_rate": 3.432807004716316e-06, "loss": 0.2562, "step": 16947 }, { "epoch": 0.7939288893052888, "grad_norm": 0.5663451403795413, "learning_rate": 3.4326310480442333e-06, "loss": 0.2739, "step": 16948 }, { "epoch": 0.7939757342952172, "grad_norm": 0.6142056299166513, "learning_rate": 3.4324550860050933e-06, "loss": 0.2819, "step": 16949 }, { "epoch": 0.7940225792851454, "grad_norm": 0.572063153357161, "learning_rate": 3.432279118599909e-06, "loss": 0.2884, "step": 16950 }, { "epoch": 0.7940694242750738, "grad_norm": 0.6130379663094029, "learning_rate": 3.432103145829693e-06, "loss": 0.2715, "step": 16951 }, { "epoch": 0.7941162692650021, "grad_norm": 0.618933001150569, "learning_rate": 3.4319271676954565e-06, "loss": 0.2724, "step": 16952 }, { "epoch": 0.7941631142549305, "grad_norm": 0.62943873932764, "learning_rate": 3.4317511841982136e-06, "loss": 0.2805, "step": 16953 }, { "epoch": 0.7942099592448587, "grad_norm": 0.6297251378972543, "learning_rate": 3.431575195338978e-06, "loss": 0.2813, "step": 16954 }, { "epoch": 0.7942568042347871, "grad_norm": 0.5926677790578246, "learning_rate": 3.43139920111876e-06, "loss": 0.2649, "step": 16955 }, { "epoch": 0.7943036492247154, "grad_norm": 0.5648731404871099, "learning_rate": 3.4312232015385745e-06, "loss": 0.2902, "step": 16956 }, { "epoch": 0.7943504942146438, "grad_norm": 0.6110639163081149, "learning_rate": 3.431047196599433e-06, "loss": 0.2816, "step": 16957 }, { "epoch": 0.7943973392045721, "grad_norm": 0.586325464187473, "learning_rate": 3.4308711863023496e-06, "loss": 0.2843, "step": 16958 }, { "epoch": 0.7944441841945004, "grad_norm": 0.5988796453522881, "learning_rate": 3.4306951706483356e-06, "loss": 0.2674, "step": 16959 }, { "epoch": 0.7944910291844287, "grad_norm": 0.6118214821522845, "learning_rate": 3.4305191496384057e-06, "loss": 0.2698, "step": 16960 }, { "epoch": 0.7945378741743571, "grad_norm": 0.5784844154687299, "learning_rate": 3.4303431232735716e-06, "loss": 0.2947, "step": 16961 }, { "epoch": 0.7945847191642854, "grad_norm": 0.5869551052016507, "learning_rate": 3.4301670915548463e-06, "loss": 0.2984, "step": 16962 }, { "epoch": 0.7946315641542137, "grad_norm": 0.5718047752250529, "learning_rate": 3.429991054483244e-06, "loss": 0.272, "step": 16963 }, { "epoch": 0.794678409144142, "grad_norm": 0.5898493250643319, "learning_rate": 3.4298150120597764e-06, "loss": 0.2934, "step": 16964 }, { "epoch": 0.7947252541340704, "grad_norm": 0.5736247213278658, "learning_rate": 3.4296389642854565e-06, "loss": 0.2908, "step": 16965 }, { "epoch": 0.7947720991239987, "grad_norm": 0.5967009117991005, "learning_rate": 3.4294629111612986e-06, "loss": 0.2714, "step": 16966 }, { "epoch": 0.7948189441139271, "grad_norm": 0.5995437339459001, "learning_rate": 3.4292868526883156e-06, "loss": 0.2698, "step": 16967 }, { "epoch": 0.7948657891038553, "grad_norm": 0.5962097926054211, "learning_rate": 3.4291107888675202e-06, "loss": 0.295, "step": 16968 }, { "epoch": 0.7949126340937837, "grad_norm": 0.5983172461620436, "learning_rate": 3.4289347196999255e-06, "loss": 0.265, "step": 16969 }, { "epoch": 0.794959479083712, "grad_norm": 0.5977858275343828, "learning_rate": 3.4287586451865445e-06, "loss": 0.2866, "step": 16970 }, { "epoch": 0.7950063240736404, "grad_norm": 0.6206443766094004, "learning_rate": 3.428582565328392e-06, "loss": 0.2986, "step": 16971 }, { "epoch": 0.7950531690635686, "grad_norm": 0.5846623214119517, "learning_rate": 3.42840648012648e-06, "loss": 0.2954, "step": 16972 }, { "epoch": 0.795100014053497, "grad_norm": 0.5610457469241718, "learning_rate": 3.4282303895818215e-06, "loss": 0.2767, "step": 16973 }, { "epoch": 0.7951468590434253, "grad_norm": 0.5998756082383354, "learning_rate": 3.4280542936954297e-06, "loss": 0.2858, "step": 16974 }, { "epoch": 0.7951937040333537, "grad_norm": 0.6102710813232297, "learning_rate": 3.4278781924683206e-06, "loss": 0.2915, "step": 16975 }, { "epoch": 0.795240549023282, "grad_norm": 0.5646905870618458, "learning_rate": 3.427702085901504e-06, "loss": 0.2738, "step": 16976 }, { "epoch": 0.7952873940132102, "grad_norm": 0.6190996315242214, "learning_rate": 3.427525973995996e-06, "loss": 0.2837, "step": 16977 }, { "epoch": 0.7953342390031386, "grad_norm": 0.6266202729046112, "learning_rate": 3.4273498567528092e-06, "loss": 0.2907, "step": 16978 }, { "epoch": 0.795381083993067, "grad_norm": 0.6668108966916689, "learning_rate": 3.427173734172957e-06, "loss": 0.2883, "step": 16979 }, { "epoch": 0.7954279289829953, "grad_norm": 0.5556052128059928, "learning_rate": 3.4269976062574522e-06, "loss": 0.2625, "step": 16980 }, { "epoch": 0.7954747739729235, "grad_norm": 0.5798377923403928, "learning_rate": 3.42682147300731e-06, "loss": 0.2861, "step": 16981 }, { "epoch": 0.7955216189628519, "grad_norm": 0.5715306823833878, "learning_rate": 3.4266453344235434e-06, "loss": 0.2638, "step": 16982 }, { "epoch": 0.7955684639527802, "grad_norm": 0.5490266951563184, "learning_rate": 3.426469190507165e-06, "loss": 0.2649, "step": 16983 }, { "epoch": 0.7956153089427086, "grad_norm": 0.5679684237283198, "learning_rate": 3.4262930412591897e-06, "loss": 0.2888, "step": 16984 }, { "epoch": 0.795662153932637, "grad_norm": 0.5863566850035903, "learning_rate": 3.4261168866806305e-06, "loss": 0.2867, "step": 16985 }, { "epoch": 0.7957089989225652, "grad_norm": 0.6099102171332281, "learning_rate": 3.425940726772502e-06, "loss": 0.278, "step": 16986 }, { "epoch": 0.7957558439124935, "grad_norm": 0.5642340643770922, "learning_rate": 3.425764561535817e-06, "loss": 0.2663, "step": 16987 }, { "epoch": 0.7958026889024219, "grad_norm": 0.6057747241271455, "learning_rate": 3.425588390971589e-06, "loss": 0.2674, "step": 16988 }, { "epoch": 0.7958495338923502, "grad_norm": 0.5892997030489872, "learning_rate": 3.4254122150808334e-06, "loss": 0.2842, "step": 16989 }, { "epoch": 0.7958963788822785, "grad_norm": 0.7705458466019808, "learning_rate": 3.425236033864563e-06, "loss": 0.271, "step": 16990 }, { "epoch": 0.7959432238722068, "grad_norm": 0.6254385801711411, "learning_rate": 3.4250598473237912e-06, "loss": 0.2934, "step": 16991 }, { "epoch": 0.7959900688621352, "grad_norm": 0.5681313419930918, "learning_rate": 3.424883655459533e-06, "loss": 0.2809, "step": 16992 }, { "epoch": 0.7960369138520635, "grad_norm": 0.544464242216109, "learning_rate": 3.424707458272801e-06, "loss": 0.2597, "step": 16993 }, { "epoch": 0.7960837588419919, "grad_norm": 0.5652159393174688, "learning_rate": 3.4245312557646103e-06, "loss": 0.2835, "step": 16994 }, { "epoch": 0.7961306038319201, "grad_norm": 0.5915599720036986, "learning_rate": 3.424355047935975e-06, "loss": 0.2799, "step": 16995 }, { "epoch": 0.7961774488218485, "grad_norm": 0.566481488429298, "learning_rate": 3.424178834787909e-06, "loss": 0.2608, "step": 16996 }, { "epoch": 0.7962242938117768, "grad_norm": 0.5879203725796339, "learning_rate": 3.4240026163214256e-06, "loss": 0.2637, "step": 16997 }, { "epoch": 0.7962711388017052, "grad_norm": 0.6241396468902912, "learning_rate": 3.423826392537539e-06, "loss": 0.283, "step": 16998 }, { "epoch": 0.7963179837916334, "grad_norm": 0.638328169085319, "learning_rate": 3.4236501634372643e-06, "loss": 0.2771, "step": 16999 }, { "epoch": 0.7963648287815618, "grad_norm": 0.5750748102768662, "learning_rate": 3.4234739290216155e-06, "loss": 0.2664, "step": 17000 }, { "epoch": 0.7964116737714901, "grad_norm": 0.5937504148897625, "learning_rate": 3.4232976892916054e-06, "loss": 0.3282, "step": 17001 }, { "epoch": 0.7964585187614185, "grad_norm": 0.5793960039377044, "learning_rate": 3.42312144424825e-06, "loss": 0.3139, "step": 17002 }, { "epoch": 0.7965053637513468, "grad_norm": 0.6038782452555833, "learning_rate": 3.4229451938925625e-06, "loss": 0.3397, "step": 17003 }, { "epoch": 0.7965522087412751, "grad_norm": 0.5707199068117438, "learning_rate": 3.4227689382255562e-06, "loss": 0.31, "step": 17004 }, { "epoch": 0.7965990537312034, "grad_norm": 0.6221870499301544, "learning_rate": 3.422592677248248e-06, "loss": 0.3198, "step": 17005 }, { "epoch": 0.7966458987211318, "grad_norm": 0.5740460759204797, "learning_rate": 3.4224164109616497e-06, "loss": 0.318, "step": 17006 }, { "epoch": 0.7966927437110601, "grad_norm": 0.6094884016072711, "learning_rate": 3.422240139366778e-06, "loss": 0.3133, "step": 17007 }, { "epoch": 0.7967395887009884, "grad_norm": 0.6096975379808329, "learning_rate": 3.4220638624646452e-06, "loss": 0.3446, "step": 17008 }, { "epoch": 0.7967864336909167, "grad_norm": 0.595099271376989, "learning_rate": 3.421887580256267e-06, "loss": 0.3477, "step": 17009 }, { "epoch": 0.7968332786808451, "grad_norm": 0.6231915302959974, "learning_rate": 3.421711292742658e-06, "loss": 0.3371, "step": 17010 }, { "epoch": 0.7968801236707734, "grad_norm": 0.6451652457213307, "learning_rate": 3.421534999924831e-06, "loss": 0.3228, "step": 17011 }, { "epoch": 0.7969269686607018, "grad_norm": 0.5836554107878859, "learning_rate": 3.4213587018038025e-06, "loss": 0.3145, "step": 17012 }, { "epoch": 0.79697381365063, "grad_norm": 0.6477809180843884, "learning_rate": 3.4211823983805866e-06, "loss": 0.3443, "step": 17013 }, { "epoch": 0.7970206586405584, "grad_norm": 0.5972733178211722, "learning_rate": 3.4210060896561963e-06, "loss": 0.3235, "step": 17014 }, { "epoch": 0.7970675036304867, "grad_norm": 0.5704870710010849, "learning_rate": 3.420829775631648e-06, "loss": 0.3202, "step": 17015 }, { "epoch": 0.7971143486204151, "grad_norm": 0.6213660154503949, "learning_rate": 3.4206534563079565e-06, "loss": 0.3434, "step": 17016 }, { "epoch": 0.7971611936103433, "grad_norm": 0.6116210937606672, "learning_rate": 3.420477131686135e-06, "loss": 0.3148, "step": 17017 }, { "epoch": 0.7972080386002717, "grad_norm": 0.5839222552566401, "learning_rate": 3.420300801767199e-06, "loss": 0.3129, "step": 17018 }, { "epoch": 0.7972548835902, "grad_norm": 0.5701082400994041, "learning_rate": 3.420124466552163e-06, "loss": 0.2957, "step": 17019 }, { "epoch": 0.7973017285801284, "grad_norm": 0.6076317251350336, "learning_rate": 3.4199481260420424e-06, "loss": 0.2998, "step": 17020 }, { "epoch": 0.7973485735700567, "grad_norm": 0.6215005195113917, "learning_rate": 3.4197717802378515e-06, "loss": 0.3354, "step": 17021 }, { "epoch": 0.797395418559985, "grad_norm": 0.5971527797115509, "learning_rate": 3.4195954291406054e-06, "loss": 0.3285, "step": 17022 }, { "epoch": 0.7974422635499133, "grad_norm": 0.579036652239215, "learning_rate": 3.4194190727513178e-06, "loss": 0.3301, "step": 17023 }, { "epoch": 0.7974891085398417, "grad_norm": 0.6631396019253196, "learning_rate": 3.4192427110710058e-06, "loss": 0.3245, "step": 17024 }, { "epoch": 0.79753595352977, "grad_norm": 0.5790779298218766, "learning_rate": 3.419066344100682e-06, "loss": 0.3233, "step": 17025 }, { "epoch": 0.7975827985196983, "grad_norm": 0.587306913739114, "learning_rate": 3.4188899718413626e-06, "loss": 0.3334, "step": 17026 }, { "epoch": 0.7976296435096266, "grad_norm": 0.5549745137940861, "learning_rate": 3.418713594294063e-06, "loss": 0.2982, "step": 17027 }, { "epoch": 0.797676488499555, "grad_norm": 0.5535380190894807, "learning_rate": 3.418537211459797e-06, "loss": 0.3138, "step": 17028 }, { "epoch": 0.7977233334894833, "grad_norm": 0.5767355149935813, "learning_rate": 3.41836082333958e-06, "loss": 0.3164, "step": 17029 }, { "epoch": 0.7977701784794117, "grad_norm": 0.6078804260191407, "learning_rate": 3.418184429934428e-06, "loss": 0.3244, "step": 17030 }, { "epoch": 0.7978170234693399, "grad_norm": 0.6313147926337186, "learning_rate": 3.4180080312453555e-06, "loss": 0.3558, "step": 17031 }, { "epoch": 0.7978638684592683, "grad_norm": 0.5635476475306045, "learning_rate": 3.4178316272733763e-06, "loss": 0.3186, "step": 17032 }, { "epoch": 0.7979107134491966, "grad_norm": 0.5963212642312098, "learning_rate": 3.417655218019508e-06, "loss": 0.307, "step": 17033 }, { "epoch": 0.797957558439125, "grad_norm": 0.5412885790963132, "learning_rate": 3.417478803484764e-06, "loss": 0.2952, "step": 17034 }, { "epoch": 0.7980044034290532, "grad_norm": 0.563336059267598, "learning_rate": 3.4173023836701603e-06, "loss": 0.3242, "step": 17035 }, { "epoch": 0.7980512484189816, "grad_norm": 0.5575423935385628, "learning_rate": 3.417125958576712e-06, "loss": 0.2992, "step": 17036 }, { "epoch": 0.7980980934089099, "grad_norm": 0.5842218027319768, "learning_rate": 3.4169495282054334e-06, "loss": 0.3396, "step": 17037 }, { "epoch": 0.7981449383988383, "grad_norm": 0.5573204113079182, "learning_rate": 3.4167730925573416e-06, "loss": 0.3195, "step": 17038 }, { "epoch": 0.7981917833887666, "grad_norm": 0.5621129971833012, "learning_rate": 3.4165966516334513e-06, "loss": 0.3106, "step": 17039 }, { "epoch": 0.7982386283786949, "grad_norm": 0.6414798333917209, "learning_rate": 3.4164202054347773e-06, "loss": 0.3232, "step": 17040 }, { "epoch": 0.7982854733686232, "grad_norm": 0.5852448384798187, "learning_rate": 3.416243753962335e-06, "loss": 0.3191, "step": 17041 }, { "epoch": 0.7983323183585516, "grad_norm": 0.5999191682242055, "learning_rate": 3.416067297217141e-06, "loss": 0.3147, "step": 17042 }, { "epoch": 0.7983791633484799, "grad_norm": 0.6116597494211732, "learning_rate": 3.415890835200209e-06, "loss": 0.3213, "step": 17043 }, { "epoch": 0.7984260083384082, "grad_norm": 0.5906990730760284, "learning_rate": 3.4157143679125555e-06, "loss": 0.3091, "step": 17044 }, { "epoch": 0.7984728533283365, "grad_norm": 0.5775944258740603, "learning_rate": 3.415537895355197e-06, "loss": 0.3065, "step": 17045 }, { "epoch": 0.7985196983182649, "grad_norm": 0.6717585484160602, "learning_rate": 3.415361417529147e-06, "loss": 0.3464, "step": 17046 }, { "epoch": 0.7985665433081932, "grad_norm": 0.6151308006337446, "learning_rate": 3.4151849344354225e-06, "loss": 0.3328, "step": 17047 }, { "epoch": 0.7986133882981216, "grad_norm": 0.5668276684380555, "learning_rate": 3.415008446075039e-06, "loss": 0.3125, "step": 17048 }, { "epoch": 0.7986602332880498, "grad_norm": 0.6248293099170764, "learning_rate": 3.4148319524490115e-06, "loss": 0.3375, "step": 17049 }, { "epoch": 0.7987070782779782, "grad_norm": 0.5628565597478912, "learning_rate": 3.414655453558356e-06, "loss": 0.3103, "step": 17050 }, { "epoch": 0.7987539232679065, "grad_norm": 0.6077668133624109, "learning_rate": 3.4144789494040888e-06, "loss": 0.3231, "step": 17051 }, { "epoch": 0.7988007682578349, "grad_norm": 0.5460328012434079, "learning_rate": 3.414302439987225e-06, "loss": 0.2953, "step": 17052 }, { "epoch": 0.7988476132477631, "grad_norm": 0.5750620957401873, "learning_rate": 3.4141259253087795e-06, "loss": 0.3225, "step": 17053 }, { "epoch": 0.7988944582376915, "grad_norm": 0.6424345915160088, "learning_rate": 3.4139494053697697e-06, "loss": 0.3235, "step": 17054 }, { "epoch": 0.7989413032276198, "grad_norm": 0.5795960783369621, "learning_rate": 3.4137728801712105e-06, "loss": 0.3265, "step": 17055 }, { "epoch": 0.7989881482175482, "grad_norm": 0.5458249220425789, "learning_rate": 3.413596349714119e-06, "loss": 0.3122, "step": 17056 }, { "epoch": 0.7990349932074765, "grad_norm": 0.5477882291853653, "learning_rate": 3.4134198139995095e-06, "loss": 0.3145, "step": 17057 }, { "epoch": 0.7990818381974047, "grad_norm": 0.5536381005013848, "learning_rate": 3.413243273028398e-06, "loss": 0.3113, "step": 17058 }, { "epoch": 0.7991286831873331, "grad_norm": 0.5829807454046095, "learning_rate": 3.413066726801802e-06, "loss": 0.3066, "step": 17059 }, { "epoch": 0.7991755281772615, "grad_norm": 0.580807955191768, "learning_rate": 3.4128901753207362e-06, "loss": 0.3203, "step": 17060 }, { "epoch": 0.7992223731671898, "grad_norm": 0.6244354863277788, "learning_rate": 3.4127136185862163e-06, "loss": 0.3119, "step": 17061 }, { "epoch": 0.799269218157118, "grad_norm": 0.5948104661494776, "learning_rate": 3.4125370565992594e-06, "loss": 0.3261, "step": 17062 }, { "epoch": 0.7993160631470464, "grad_norm": 0.5538930559875186, "learning_rate": 3.4123604893608808e-06, "loss": 0.291, "step": 17063 }, { "epoch": 0.7993629081369747, "grad_norm": 0.6048464719950233, "learning_rate": 3.4121839168720973e-06, "loss": 0.321, "step": 17064 }, { "epoch": 0.7994097531269031, "grad_norm": 0.5635370097289366, "learning_rate": 3.4120073391339237e-06, "loss": 0.3198, "step": 17065 }, { "epoch": 0.7994565981168315, "grad_norm": 0.5683269191413354, "learning_rate": 3.4118307561473785e-06, "loss": 0.3191, "step": 17066 }, { "epoch": 0.7995034431067597, "grad_norm": 0.5929994633085689, "learning_rate": 3.4116541679134756e-06, "loss": 0.3283, "step": 17067 }, { "epoch": 0.799550288096688, "grad_norm": 0.571113968976484, "learning_rate": 3.411477574433232e-06, "loss": 0.3071, "step": 17068 }, { "epoch": 0.7995971330866164, "grad_norm": 0.6332685082770442, "learning_rate": 3.411300975707665e-06, "loss": 0.3192, "step": 17069 }, { "epoch": 0.7996439780765447, "grad_norm": 0.6118691666356225, "learning_rate": 3.411124371737789e-06, "loss": 0.3451, "step": 17070 }, { "epoch": 0.799690823066473, "grad_norm": 0.6118761783712099, "learning_rate": 3.4109477625246214e-06, "loss": 0.3177, "step": 17071 }, { "epoch": 0.7997376680564013, "grad_norm": 0.6120546695306497, "learning_rate": 3.4107711480691784e-06, "loss": 0.3568, "step": 17072 }, { "epoch": 0.7997845130463297, "grad_norm": 0.6176434431903544, "learning_rate": 3.410594528372477e-06, "loss": 0.3224, "step": 17073 }, { "epoch": 0.799831358036258, "grad_norm": 0.5659048173917777, "learning_rate": 3.4104179034355323e-06, "loss": 0.3242, "step": 17074 }, { "epoch": 0.7998782030261864, "grad_norm": 0.5404757770240457, "learning_rate": 3.4102412732593616e-06, "loss": 0.3052, "step": 17075 }, { "epoch": 0.7999250480161146, "grad_norm": 0.6305029676283133, "learning_rate": 3.4100646378449807e-06, "loss": 0.3091, "step": 17076 }, { "epoch": 0.799971893006043, "grad_norm": 0.5826009896206865, "learning_rate": 3.409887997193407e-06, "loss": 0.3112, "step": 17077 }, { "epoch": 0.8000187379959713, "grad_norm": 0.6237394730994239, "learning_rate": 3.4097113513056564e-06, "loss": 0.3286, "step": 17078 }, { "epoch": 0.8000655829858997, "grad_norm": 0.629925041465942, "learning_rate": 3.4095347001827455e-06, "loss": 0.3024, "step": 17079 }, { "epoch": 0.8001124279758279, "grad_norm": 0.6148995255921418, "learning_rate": 3.4093580438256914e-06, "loss": 0.3277, "step": 17080 }, { "epoch": 0.8001592729657563, "grad_norm": 0.5918698563550401, "learning_rate": 3.4091813822355102e-06, "loss": 0.296, "step": 17081 }, { "epoch": 0.8002061179556846, "grad_norm": 0.5795322826266175, "learning_rate": 3.4090047154132184e-06, "loss": 0.3143, "step": 17082 }, { "epoch": 0.800252962945613, "grad_norm": 0.6209772004854075, "learning_rate": 3.408828043359833e-06, "loss": 0.3171, "step": 17083 }, { "epoch": 0.8002998079355413, "grad_norm": 0.61298707823353, "learning_rate": 3.408651366076371e-06, "loss": 0.3413, "step": 17084 }, { "epoch": 0.8003466529254696, "grad_norm": 0.5384767128066763, "learning_rate": 3.408474683563848e-06, "loss": 0.3052, "step": 17085 }, { "epoch": 0.8003934979153979, "grad_norm": 0.5886222124873897, "learning_rate": 3.4082979958232822e-06, "loss": 0.3049, "step": 17086 }, { "epoch": 0.8004403429053263, "grad_norm": 0.5782701273873166, "learning_rate": 3.4081213028556896e-06, "loss": 0.3219, "step": 17087 }, { "epoch": 0.8004871878952546, "grad_norm": 0.5941203586499313, "learning_rate": 3.407944604662088e-06, "loss": 0.3172, "step": 17088 }, { "epoch": 0.8005340328851829, "grad_norm": 0.5789513931032569, "learning_rate": 3.4077679012434916e-06, "loss": 0.3323, "step": 17089 }, { "epoch": 0.8005808778751112, "grad_norm": 0.5443991364974211, "learning_rate": 3.4075911926009203e-06, "loss": 0.3034, "step": 17090 }, { "epoch": 0.8006277228650396, "grad_norm": 0.5992030808972486, "learning_rate": 3.4074144787353898e-06, "loss": 0.3174, "step": 17091 }, { "epoch": 0.8006745678549679, "grad_norm": 0.6209562710121321, "learning_rate": 3.4072377596479167e-06, "loss": 0.3279, "step": 17092 }, { "epoch": 0.8007214128448963, "grad_norm": 0.569918227201259, "learning_rate": 3.4070610353395177e-06, "loss": 0.3197, "step": 17093 }, { "epoch": 0.8007682578348245, "grad_norm": 0.635414374347227, "learning_rate": 3.406884305811212e-06, "loss": 0.3222, "step": 17094 }, { "epoch": 0.8008151028247529, "grad_norm": 0.6523380032792716, "learning_rate": 3.406707571064014e-06, "loss": 0.3302, "step": 17095 }, { "epoch": 0.8008619478146812, "grad_norm": 0.6549185971620096, "learning_rate": 3.4065308310989415e-06, "loss": 0.365, "step": 17096 }, { "epoch": 0.8009087928046096, "grad_norm": 0.6985110858926904, "learning_rate": 3.4063540859170126e-06, "loss": 0.3272, "step": 17097 }, { "epoch": 0.8009556377945378, "grad_norm": 0.6002954164982518, "learning_rate": 3.4061773355192436e-06, "loss": 0.3052, "step": 17098 }, { "epoch": 0.8010024827844662, "grad_norm": 0.6170841079332453, "learning_rate": 3.4060005799066515e-06, "loss": 0.318, "step": 17099 }, { "epoch": 0.8010493277743945, "grad_norm": 0.61778917828122, "learning_rate": 3.405823819080254e-06, "loss": 0.3506, "step": 17100 }, { "epoch": 0.8010961727643229, "grad_norm": 0.5849372806137031, "learning_rate": 3.4056470530410683e-06, "loss": 0.313, "step": 17101 }, { "epoch": 0.8011430177542512, "grad_norm": 0.589674014433771, "learning_rate": 3.405470281790111e-06, "loss": 0.3178, "step": 17102 }, { "epoch": 0.8011898627441795, "grad_norm": 0.6103067141270772, "learning_rate": 3.4052935053284005e-06, "loss": 0.3471, "step": 17103 }, { "epoch": 0.8012367077341078, "grad_norm": 0.557522695508912, "learning_rate": 3.405116723656953e-06, "loss": 0.3171, "step": 17104 }, { "epoch": 0.8012835527240362, "grad_norm": 0.5716699538942644, "learning_rate": 3.4049399367767854e-06, "loss": 0.3087, "step": 17105 }, { "epoch": 0.8013303977139645, "grad_norm": 0.5808008729104381, "learning_rate": 3.4047631446889174e-06, "loss": 0.3215, "step": 17106 }, { "epoch": 0.8013772427038928, "grad_norm": 0.6174922916686181, "learning_rate": 3.4045863473943643e-06, "loss": 0.3391, "step": 17107 }, { "epoch": 0.8014240876938211, "grad_norm": 0.5548985840911332, "learning_rate": 3.404409544894144e-06, "loss": 0.3138, "step": 17108 }, { "epoch": 0.8014709326837495, "grad_norm": 0.6108491871348751, "learning_rate": 3.404232737189274e-06, "loss": 0.3309, "step": 17109 }, { "epoch": 0.8015177776736778, "grad_norm": 0.5824537307822448, "learning_rate": 3.404055924280772e-06, "loss": 0.3168, "step": 17110 }, { "epoch": 0.8015646226636062, "grad_norm": 0.6050865232695564, "learning_rate": 3.403879106169655e-06, "loss": 0.3218, "step": 17111 }, { "epoch": 0.8016114676535344, "grad_norm": 0.5978546677673441, "learning_rate": 3.403702282856942e-06, "loss": 0.3225, "step": 17112 }, { "epoch": 0.8016583126434628, "grad_norm": 0.6096884129136615, "learning_rate": 3.403525454343648e-06, "loss": 0.3332, "step": 17113 }, { "epoch": 0.8017051576333911, "grad_norm": 0.5875349177076665, "learning_rate": 3.403348620630793e-06, "loss": 0.3111, "step": 17114 }, { "epoch": 0.8017520026233195, "grad_norm": 0.6111134426111992, "learning_rate": 3.4031717817193943e-06, "loss": 0.3269, "step": 17115 }, { "epoch": 0.8017988476132477, "grad_norm": 0.6077597834681182, "learning_rate": 3.402994937610468e-06, "loss": 0.3202, "step": 17116 }, { "epoch": 0.8018456926031761, "grad_norm": 0.5462335073001832, "learning_rate": 3.402818088305033e-06, "loss": 0.2957, "step": 17117 }, { "epoch": 0.8018925375931044, "grad_norm": 0.5747669361190263, "learning_rate": 3.4026412338041072e-06, "loss": 0.2971, "step": 17118 }, { "epoch": 0.8019393825830328, "grad_norm": 0.5908005791246171, "learning_rate": 3.4024643741087075e-06, "loss": 0.333, "step": 17119 }, { "epoch": 0.8019862275729611, "grad_norm": 0.5543877416118305, "learning_rate": 3.402287509219852e-06, "loss": 0.3184, "step": 17120 }, { "epoch": 0.8020330725628894, "grad_norm": 0.6377443588436692, "learning_rate": 3.4021106391385595e-06, "loss": 0.3515, "step": 17121 }, { "epoch": 0.8020799175528177, "grad_norm": 0.6168342135176369, "learning_rate": 3.4019337638658466e-06, "loss": 0.3344, "step": 17122 }, { "epoch": 0.8021267625427461, "grad_norm": 0.6607964595541277, "learning_rate": 3.4017568834027313e-06, "loss": 0.3024, "step": 17123 }, { "epoch": 0.8021736075326744, "grad_norm": 0.5897281685780013, "learning_rate": 3.4015799977502323e-06, "loss": 0.3344, "step": 17124 }, { "epoch": 0.8022204525226027, "grad_norm": 0.577316624942551, "learning_rate": 3.4014031069093667e-06, "loss": 0.3138, "step": 17125 }, { "epoch": 0.802267297512531, "grad_norm": 0.5751960271712041, "learning_rate": 3.401226210881153e-06, "loss": 0.3226, "step": 17126 }, { "epoch": 0.8023141425024594, "grad_norm": 0.6319006074810071, "learning_rate": 3.4010493096666087e-06, "loss": 0.3319, "step": 17127 }, { "epoch": 0.8023609874923877, "grad_norm": 0.6335823514795149, "learning_rate": 3.4008724032667517e-06, "loss": 0.3362, "step": 17128 }, { "epoch": 0.8024078324823161, "grad_norm": 0.5995683892758409, "learning_rate": 3.400695491682601e-06, "loss": 0.3123, "step": 17129 }, { "epoch": 0.8024546774722443, "grad_norm": 0.57472844140816, "learning_rate": 3.4005185749151748e-06, "loss": 0.3166, "step": 17130 }, { "epoch": 0.8025015224621727, "grad_norm": 0.5688655863728593, "learning_rate": 3.4003416529654894e-06, "loss": 0.2815, "step": 17131 }, { "epoch": 0.802548367452101, "grad_norm": 0.6148516385812312, "learning_rate": 3.4001647258345645e-06, "loss": 0.3267, "step": 17132 }, { "epoch": 0.8025952124420294, "grad_norm": 0.6200637252661556, "learning_rate": 3.3999877935234182e-06, "loss": 0.3392, "step": 17133 }, { "epoch": 0.8026420574319576, "grad_norm": 0.6511054323773668, "learning_rate": 3.3998108560330674e-06, "loss": 0.3586, "step": 17134 }, { "epoch": 0.802688902421886, "grad_norm": 0.5461173639001087, "learning_rate": 3.3996339133645318e-06, "loss": 0.2843, "step": 17135 }, { "epoch": 0.8027357474118143, "grad_norm": 0.5790879373853315, "learning_rate": 3.3994569655188296e-06, "loss": 0.2916, "step": 17136 }, { "epoch": 0.8027825924017427, "grad_norm": 0.5864178283546437, "learning_rate": 3.399280012496978e-06, "loss": 0.3105, "step": 17137 }, { "epoch": 0.802829437391671, "grad_norm": 0.6031300687925113, "learning_rate": 3.399103054299996e-06, "loss": 0.3116, "step": 17138 }, { "epoch": 0.8028762823815992, "grad_norm": 0.5614652319735334, "learning_rate": 3.3989260909289022e-06, "loss": 0.3154, "step": 17139 }, { "epoch": 0.8029231273715276, "grad_norm": 0.6270079687962368, "learning_rate": 3.3987491223847146e-06, "loss": 0.3391, "step": 17140 }, { "epoch": 0.802969972361456, "grad_norm": 0.5672580571297232, "learning_rate": 3.3985721486684514e-06, "loss": 0.3097, "step": 17141 }, { "epoch": 0.8030168173513843, "grad_norm": 0.5677833560488097, "learning_rate": 3.3983951697811318e-06, "loss": 0.3009, "step": 17142 }, { "epoch": 0.8030636623413125, "grad_norm": 0.6036300560262292, "learning_rate": 3.398218185723774e-06, "loss": 0.3124, "step": 17143 }, { "epoch": 0.8031105073312409, "grad_norm": 0.6006120868521932, "learning_rate": 3.3980411964973954e-06, "loss": 0.3227, "step": 17144 }, { "epoch": 0.8031573523211692, "grad_norm": 0.6284642521225086, "learning_rate": 3.3978642021030158e-06, "loss": 0.3011, "step": 17145 }, { "epoch": 0.8032041973110976, "grad_norm": 0.5676034577848731, "learning_rate": 3.3976872025416535e-06, "loss": 0.3083, "step": 17146 }, { "epoch": 0.803251042301026, "grad_norm": 0.6140368776284375, "learning_rate": 3.397510197814327e-06, "loss": 0.3355, "step": 17147 }, { "epoch": 0.8032978872909542, "grad_norm": 0.569741919135034, "learning_rate": 3.397333187922055e-06, "loss": 0.3205, "step": 17148 }, { "epoch": 0.8033447322808825, "grad_norm": 0.5675743699821187, "learning_rate": 3.397156172865856e-06, "loss": 0.2983, "step": 17149 }, { "epoch": 0.8033915772708109, "grad_norm": 0.5955787183233984, "learning_rate": 3.3969791526467486e-06, "loss": 0.3338, "step": 17150 }, { "epoch": 0.8034384222607392, "grad_norm": 0.613745625053364, "learning_rate": 3.3968021272657515e-06, "loss": 0.3316, "step": 17151 }, { "epoch": 0.8034852672506675, "grad_norm": 0.5689532664488625, "learning_rate": 3.396625096723884e-06, "loss": 0.3159, "step": 17152 }, { "epoch": 0.8035321122405958, "grad_norm": 0.567935936894977, "learning_rate": 3.396448061022164e-06, "loss": 0.3085, "step": 17153 }, { "epoch": 0.8035789572305242, "grad_norm": 0.560176041428213, "learning_rate": 3.3962710201616104e-06, "loss": 0.3131, "step": 17154 }, { "epoch": 0.8036258022204525, "grad_norm": 0.6377782629296677, "learning_rate": 3.396093974143243e-06, "loss": 0.3084, "step": 17155 }, { "epoch": 0.8036726472103809, "grad_norm": 0.612873624462806, "learning_rate": 3.39591692296808e-06, "loss": 0.315, "step": 17156 }, { "epoch": 0.8037194922003091, "grad_norm": 0.5579720434965428, "learning_rate": 3.39573986663714e-06, "loss": 0.3164, "step": 17157 }, { "epoch": 0.8037663371902375, "grad_norm": 0.5425926101356786, "learning_rate": 3.395562805151443e-06, "loss": 0.3149, "step": 17158 }, { "epoch": 0.8038131821801658, "grad_norm": 0.5898142868639199, "learning_rate": 3.395385738512006e-06, "loss": 0.3201, "step": 17159 }, { "epoch": 0.8038600271700942, "grad_norm": 0.6700072163985606, "learning_rate": 3.3952086667198497e-06, "loss": 0.341, "step": 17160 }, { "epoch": 0.8039068721600224, "grad_norm": 0.579025580733767, "learning_rate": 3.395031589775992e-06, "loss": 0.2982, "step": 17161 }, { "epoch": 0.8039537171499508, "grad_norm": 0.6242812290054713, "learning_rate": 3.3948545076814534e-06, "loss": 0.3228, "step": 17162 }, { "epoch": 0.8040005621398791, "grad_norm": 0.5557561916572806, "learning_rate": 3.394677420437251e-06, "loss": 0.3126, "step": 17163 }, { "epoch": 0.8040474071298075, "grad_norm": 0.6667527821987494, "learning_rate": 3.394500328044406e-06, "loss": 0.3292, "step": 17164 }, { "epoch": 0.8040942521197358, "grad_norm": 0.6174790909785887, "learning_rate": 3.3943232305039355e-06, "loss": 0.341, "step": 17165 }, { "epoch": 0.8041410971096641, "grad_norm": 0.6358206614681229, "learning_rate": 3.39414612781686e-06, "loss": 0.3281, "step": 17166 }, { "epoch": 0.8041879420995924, "grad_norm": 0.6507013725693132, "learning_rate": 3.393969019984198e-06, "loss": 0.3332, "step": 17167 }, { "epoch": 0.8042347870895208, "grad_norm": 0.6011633731633674, "learning_rate": 3.393791907006969e-06, "loss": 0.3337, "step": 17168 }, { "epoch": 0.8042816320794491, "grad_norm": 0.625599085341678, "learning_rate": 3.3936147888861924e-06, "loss": 0.2981, "step": 17169 }, { "epoch": 0.8043284770693774, "grad_norm": 0.5613701730722155, "learning_rate": 3.3934376656228874e-06, "loss": 0.3034, "step": 17170 }, { "epoch": 0.8043753220593057, "grad_norm": 0.5913959966022461, "learning_rate": 3.3932605372180734e-06, "loss": 0.3217, "step": 17171 }, { "epoch": 0.8044221670492341, "grad_norm": 0.5988961648338988, "learning_rate": 3.393083403672769e-06, "loss": 0.3227, "step": 17172 }, { "epoch": 0.8044690120391624, "grad_norm": 0.5916514113594171, "learning_rate": 3.392906264987994e-06, "loss": 0.3216, "step": 17173 }, { "epoch": 0.8045158570290908, "grad_norm": 0.6136405856027396, "learning_rate": 3.3927291211647685e-06, "loss": 0.3311, "step": 17174 }, { "epoch": 0.804562702019019, "grad_norm": 0.6143909263395849, "learning_rate": 3.3925519722041106e-06, "loss": 0.3182, "step": 17175 }, { "epoch": 0.8046095470089474, "grad_norm": 0.5957229007571998, "learning_rate": 3.3923748181070415e-06, "loss": 0.3116, "step": 17176 }, { "epoch": 0.8046563919988757, "grad_norm": 0.6110097712450521, "learning_rate": 3.392197658874578e-06, "loss": 0.3401, "step": 17177 }, { "epoch": 0.8047032369888041, "grad_norm": 0.6174561925877835, "learning_rate": 3.3920204945077428e-06, "loss": 0.326, "step": 17178 }, { "epoch": 0.8047500819787323, "grad_norm": 0.651427097020684, "learning_rate": 3.3918433250075532e-06, "loss": 0.3356, "step": 17179 }, { "epoch": 0.8047969269686607, "grad_norm": 0.6480729429862393, "learning_rate": 3.3916661503750292e-06, "loss": 0.3041, "step": 17180 }, { "epoch": 0.804843771958589, "grad_norm": 0.597590765887233, "learning_rate": 3.3914889706111907e-06, "loss": 0.3342, "step": 17181 }, { "epoch": 0.8048906169485174, "grad_norm": 0.5836382233470232, "learning_rate": 3.3913117857170573e-06, "loss": 0.3334, "step": 17182 }, { "epoch": 0.8049374619384457, "grad_norm": 0.6185721184669699, "learning_rate": 3.3911345956936487e-06, "loss": 0.3279, "step": 17183 }, { "epoch": 0.804984306928374, "grad_norm": 0.6089851150341156, "learning_rate": 3.3909574005419836e-06, "loss": 0.3167, "step": 17184 }, { "epoch": 0.8050311519183023, "grad_norm": 0.6432041904116139, "learning_rate": 3.3907802002630846e-06, "loss": 0.3363, "step": 17185 }, { "epoch": 0.8050779969082307, "grad_norm": 3.1245052973359915, "learning_rate": 3.3906029948579676e-06, "loss": 0.316, "step": 17186 }, { "epoch": 0.805124841898159, "grad_norm": 0.5970224878362047, "learning_rate": 3.3904257843276545e-06, "loss": 0.3108, "step": 17187 }, { "epoch": 0.8051716868880873, "grad_norm": 0.6096929432263598, "learning_rate": 3.3902485686731656e-06, "loss": 0.3175, "step": 17188 }, { "epoch": 0.8052185318780156, "grad_norm": 0.6582486596603243, "learning_rate": 3.3900713478955195e-06, "loss": 0.3378, "step": 17189 }, { "epoch": 0.805265376867944, "grad_norm": 0.6520662525169593, "learning_rate": 3.3898941219957365e-06, "loss": 0.3236, "step": 17190 }, { "epoch": 0.8053122218578723, "grad_norm": 0.5931521362527455, "learning_rate": 3.3897168909748367e-06, "loss": 0.3161, "step": 17191 }, { "epoch": 0.8053590668478007, "grad_norm": 0.5963125452206554, "learning_rate": 3.38953965483384e-06, "loss": 0.3281, "step": 17192 }, { "epoch": 0.8054059118377289, "grad_norm": 0.595245673547831, "learning_rate": 3.3893624135737653e-06, "loss": 0.331, "step": 17193 }, { "epoch": 0.8054527568276573, "grad_norm": 0.5789262611249043, "learning_rate": 3.389185167195634e-06, "loss": 0.3194, "step": 17194 }, { "epoch": 0.8054996018175856, "grad_norm": 0.5679685366639019, "learning_rate": 3.389007915700465e-06, "loss": 0.3143, "step": 17195 }, { "epoch": 0.805546446807514, "grad_norm": 0.5904639397172078, "learning_rate": 3.3888306590892794e-06, "loss": 0.3042, "step": 17196 }, { "epoch": 0.8055932917974422, "grad_norm": 0.603799360470938, "learning_rate": 3.3886533973630977e-06, "loss": 0.315, "step": 17197 }, { "epoch": 0.8056401367873706, "grad_norm": 0.5870592879355719, "learning_rate": 3.3884761305229372e-06, "loss": 0.3224, "step": 17198 }, { "epoch": 0.8056869817772989, "grad_norm": 0.5841675321676248, "learning_rate": 3.3882988585698208e-06, "loss": 0.3174, "step": 17199 }, { "epoch": 0.8057338267672273, "grad_norm": 0.6504686691167048, "learning_rate": 3.388121581504768e-06, "loss": 0.3319, "step": 17200 }, { "epoch": 0.8057806717571556, "grad_norm": 0.595425846233161, "learning_rate": 3.387944299328798e-06, "loss": 0.3204, "step": 17201 }, { "epoch": 0.8058275167470839, "grad_norm": 0.5962847319608012, "learning_rate": 3.3877670120429325e-06, "loss": 0.3254, "step": 17202 }, { "epoch": 0.8058743617370122, "grad_norm": 0.6117703026040238, "learning_rate": 3.387589719648191e-06, "loss": 0.3484, "step": 17203 }, { "epoch": 0.8059212067269406, "grad_norm": 0.6616893135899433, "learning_rate": 3.387412422145593e-06, "loss": 0.3459, "step": 17204 }, { "epoch": 0.8059680517168689, "grad_norm": 0.5343452466500852, "learning_rate": 3.3872351195361595e-06, "loss": 0.3045, "step": 17205 }, { "epoch": 0.8060148967067972, "grad_norm": 0.5814618802165982, "learning_rate": 3.387057811820912e-06, "loss": 0.3055, "step": 17206 }, { "epoch": 0.8060617416967255, "grad_norm": 0.5870552595504981, "learning_rate": 3.3868804990008684e-06, "loss": 0.3254, "step": 17207 }, { "epoch": 0.8061085866866539, "grad_norm": 0.6210017924938322, "learning_rate": 3.3867031810770513e-06, "loss": 0.3266, "step": 17208 }, { "epoch": 0.8061554316765822, "grad_norm": 0.5935620127836826, "learning_rate": 3.38652585805048e-06, "loss": 0.293, "step": 17209 }, { "epoch": 0.8062022766665106, "grad_norm": 0.5214189571124971, "learning_rate": 3.3863485299221756e-06, "loss": 0.3054, "step": 17210 }, { "epoch": 0.8062491216564388, "grad_norm": 0.5923020864515794, "learning_rate": 3.3861711966931575e-06, "loss": 0.3084, "step": 17211 }, { "epoch": 0.8062959666463672, "grad_norm": 0.6301561560915871, "learning_rate": 3.3859938583644467e-06, "loss": 0.3207, "step": 17212 }, { "epoch": 0.8063428116362955, "grad_norm": 0.6067900857303461, "learning_rate": 3.385816514937065e-06, "loss": 0.335, "step": 17213 }, { "epoch": 0.8063896566262239, "grad_norm": 0.5739824132130337, "learning_rate": 3.3856391664120314e-06, "loss": 0.3078, "step": 17214 }, { "epoch": 0.8064365016161521, "grad_norm": 0.5921527525542846, "learning_rate": 3.3854618127903664e-06, "loss": 0.3175, "step": 17215 }, { "epoch": 0.8064833466060805, "grad_norm": 0.6295630230247379, "learning_rate": 3.3852844540730923e-06, "loss": 0.3446, "step": 17216 }, { "epoch": 0.8065301915960088, "grad_norm": 0.6095336520719038, "learning_rate": 3.385107090261228e-06, "loss": 0.3028, "step": 17217 }, { "epoch": 0.8065770365859372, "grad_norm": 0.5897460983993164, "learning_rate": 3.3849297213557946e-06, "loss": 0.3476, "step": 17218 }, { "epoch": 0.8066238815758655, "grad_norm": 0.5811937410320193, "learning_rate": 3.3847523473578136e-06, "loss": 0.3074, "step": 17219 }, { "epoch": 0.8066707265657937, "grad_norm": 0.5666465365356703, "learning_rate": 3.3845749682683053e-06, "loss": 0.3023, "step": 17220 }, { "epoch": 0.8067175715557221, "grad_norm": 0.5750653695942557, "learning_rate": 3.3843975840882903e-06, "loss": 0.3226, "step": 17221 }, { "epoch": 0.8067644165456505, "grad_norm": 0.5727338226647096, "learning_rate": 3.384220194818789e-06, "loss": 0.3129, "step": 17222 }, { "epoch": 0.8068112615355788, "grad_norm": 0.5994742672224694, "learning_rate": 3.384042800460824e-06, "loss": 0.346, "step": 17223 }, { "epoch": 0.806858106525507, "grad_norm": 0.5389393352399154, "learning_rate": 3.3838654010154135e-06, "loss": 0.3228, "step": 17224 }, { "epoch": 0.8069049515154354, "grad_norm": 0.5352749218883986, "learning_rate": 3.38368799648358e-06, "loss": 0.3082, "step": 17225 }, { "epoch": 0.8069517965053637, "grad_norm": 0.5939658989410069, "learning_rate": 3.3835105868663444e-06, "loss": 0.3176, "step": 17226 }, { "epoch": 0.8069986414952921, "grad_norm": 0.5706028927740099, "learning_rate": 3.383333172164728e-06, "loss": 0.3071, "step": 17227 }, { "epoch": 0.8070454864852205, "grad_norm": 0.59090729482902, "learning_rate": 3.3831557523797508e-06, "loss": 0.3192, "step": 17228 }, { "epoch": 0.8070923314751487, "grad_norm": 0.6003923638440625, "learning_rate": 3.3829783275124332e-06, "loss": 0.3259, "step": 17229 }, { "epoch": 0.807139176465077, "grad_norm": 0.6077007961017412, "learning_rate": 3.382800897563799e-06, "loss": 0.3106, "step": 17230 }, { "epoch": 0.8071860214550054, "grad_norm": 0.5683403207665146, "learning_rate": 3.3826234625348664e-06, "loss": 0.3197, "step": 17231 }, { "epoch": 0.8072328664449337, "grad_norm": 0.590698492327469, "learning_rate": 3.3824460224266576e-06, "loss": 0.3123, "step": 17232 }, { "epoch": 0.807279711434862, "grad_norm": 0.6179720739922548, "learning_rate": 3.3822685772401936e-06, "loss": 0.3293, "step": 17233 }, { "epoch": 0.8073265564247903, "grad_norm": 0.5806011009607782, "learning_rate": 3.3820911269764973e-06, "loss": 0.3306, "step": 17234 }, { "epoch": 0.8073734014147187, "grad_norm": 0.5698948203831112, "learning_rate": 3.3819136716365862e-06, "loss": 0.2939, "step": 17235 }, { "epoch": 0.807420246404647, "grad_norm": 0.6335482170608927, "learning_rate": 3.3817362112214846e-06, "loss": 0.3181, "step": 17236 }, { "epoch": 0.8074670913945754, "grad_norm": 0.5787952938322124, "learning_rate": 3.3815587457322122e-06, "loss": 0.3121, "step": 17237 }, { "epoch": 0.8075139363845036, "grad_norm": 0.5793991892385398, "learning_rate": 3.3813812751697914e-06, "loss": 0.3005, "step": 17238 }, { "epoch": 0.807560781374432, "grad_norm": 0.5681229312003576, "learning_rate": 3.3812037995352425e-06, "loss": 0.3151, "step": 17239 }, { "epoch": 0.8076076263643603, "grad_norm": 0.6754082337320759, "learning_rate": 3.3810263188295877e-06, "loss": 0.3429, "step": 17240 }, { "epoch": 0.8076544713542887, "grad_norm": 0.6248877756762603, "learning_rate": 3.380848833053848e-06, "loss": 0.3442, "step": 17241 }, { "epoch": 0.8077013163442169, "grad_norm": 0.6251300822497108, "learning_rate": 3.3806713422090436e-06, "loss": 0.3185, "step": 17242 }, { "epoch": 0.8077481613341453, "grad_norm": 0.5898428947854926, "learning_rate": 3.3804938462961977e-06, "loss": 0.3096, "step": 17243 }, { "epoch": 0.8077950063240736, "grad_norm": 0.5962890222943185, "learning_rate": 3.380316345316331e-06, "loss": 0.3119, "step": 17244 }, { "epoch": 0.807841851314002, "grad_norm": 0.6646807973633322, "learning_rate": 3.380138839270465e-06, "loss": 0.3537, "step": 17245 }, { "epoch": 0.8078886963039303, "grad_norm": 0.6153444272723124, "learning_rate": 3.379961328159621e-06, "loss": 0.3421, "step": 17246 }, { "epoch": 0.8079355412938586, "grad_norm": 0.6145474735538662, "learning_rate": 3.3797838119848203e-06, "loss": 0.3337, "step": 17247 }, { "epoch": 0.8079823862837869, "grad_norm": 0.5653051607212559, "learning_rate": 3.3796062907470856e-06, "loss": 0.3245, "step": 17248 }, { "epoch": 0.8080292312737153, "grad_norm": 0.5960015619613874, "learning_rate": 3.379428764447438e-06, "loss": 0.3192, "step": 17249 }, { "epoch": 0.8080760762636436, "grad_norm": 0.5889691955072082, "learning_rate": 3.379251233086898e-06, "loss": 0.3047, "step": 17250 }, { "epoch": 0.8081229212535719, "grad_norm": 0.6539989509749043, "learning_rate": 3.379073696666489e-06, "loss": 0.3176, "step": 17251 }, { "epoch": 0.8081697662435002, "grad_norm": 0.5883922595681689, "learning_rate": 3.3788961551872312e-06, "loss": 0.3007, "step": 17252 }, { "epoch": 0.8082166112334286, "grad_norm": 0.6025910221309095, "learning_rate": 3.378718608650147e-06, "loss": 0.316, "step": 17253 }, { "epoch": 0.8082634562233569, "grad_norm": 0.6019689920579675, "learning_rate": 3.3785410570562583e-06, "loss": 0.3165, "step": 17254 }, { "epoch": 0.8083103012132853, "grad_norm": 0.6260940466443945, "learning_rate": 3.3783635004065875e-06, "loss": 0.3404, "step": 17255 }, { "epoch": 0.8083571462032135, "grad_norm": 0.6203717168750504, "learning_rate": 3.3781859387021536e-06, "loss": 0.3162, "step": 17256 }, { "epoch": 0.8084039911931419, "grad_norm": 0.5810093380323956, "learning_rate": 3.378008371943981e-06, "loss": 0.2968, "step": 17257 }, { "epoch": 0.8084508361830702, "grad_norm": 0.540222037790054, "learning_rate": 3.3778308001330917e-06, "loss": 0.3107, "step": 17258 }, { "epoch": 0.8084976811729986, "grad_norm": 0.566731381355941, "learning_rate": 3.377653223270506e-06, "loss": 0.3116, "step": 17259 }, { "epoch": 0.8085445261629268, "grad_norm": 0.639262306492617, "learning_rate": 3.3774756413572466e-06, "loss": 0.3305, "step": 17260 }, { "epoch": 0.8085913711528552, "grad_norm": 0.6449378365318849, "learning_rate": 3.3772980543943364e-06, "loss": 0.3158, "step": 17261 }, { "epoch": 0.8086382161427835, "grad_norm": 0.6347401444617093, "learning_rate": 3.377120462382796e-06, "loss": 0.3237, "step": 17262 }, { "epoch": 0.8086850611327119, "grad_norm": 0.5922866448473502, "learning_rate": 3.376942865323647e-06, "loss": 0.3382, "step": 17263 }, { "epoch": 0.8087319061226402, "grad_norm": 0.5547451919051032, "learning_rate": 3.3767652632179127e-06, "loss": 0.2864, "step": 17264 }, { "epoch": 0.8087787511125685, "grad_norm": 0.5620128918768401, "learning_rate": 3.3765876560666146e-06, "loss": 0.3034, "step": 17265 }, { "epoch": 0.8088255961024968, "grad_norm": 0.537545434309204, "learning_rate": 3.3764100438707754e-06, "loss": 0.2979, "step": 17266 }, { "epoch": 0.8088724410924252, "grad_norm": 0.5456057977635068, "learning_rate": 3.376232426631416e-06, "loss": 0.29, "step": 17267 }, { "epoch": 0.8089192860823535, "grad_norm": 0.6255074925796631, "learning_rate": 3.3760548043495596e-06, "loss": 0.3493, "step": 17268 }, { "epoch": 0.8089661310722818, "grad_norm": 0.5597735794418678, "learning_rate": 3.375877177026228e-06, "loss": 0.3235, "step": 17269 }, { "epoch": 0.8090129760622101, "grad_norm": 0.5471648959610291, "learning_rate": 3.375699544662443e-06, "loss": 0.3173, "step": 17270 }, { "epoch": 0.8090598210521385, "grad_norm": 0.5662590771593011, "learning_rate": 3.3755219072592273e-06, "loss": 0.3182, "step": 17271 }, { "epoch": 0.8091066660420668, "grad_norm": 0.543958777210564, "learning_rate": 3.375344264817604e-06, "loss": 0.304, "step": 17272 }, { "epoch": 0.8091535110319952, "grad_norm": 0.5967978060058118, "learning_rate": 3.375166617338593e-06, "loss": 0.2976, "step": 17273 }, { "epoch": 0.8092003560219234, "grad_norm": 0.7732887907065963, "learning_rate": 3.3749889648232187e-06, "loss": 0.3166, "step": 17274 }, { "epoch": 0.8092472010118518, "grad_norm": 0.671492729828172, "learning_rate": 3.374811307272503e-06, "loss": 0.3178, "step": 17275 }, { "epoch": 0.8092940460017801, "grad_norm": 0.5910367779094358, "learning_rate": 3.3746336446874684e-06, "loss": 0.3052, "step": 17276 }, { "epoch": 0.8093408909917085, "grad_norm": 0.5898614098504739, "learning_rate": 3.3744559770691364e-06, "loss": 0.3235, "step": 17277 }, { "epoch": 0.8093877359816367, "grad_norm": 0.6029576264473466, "learning_rate": 3.37427830441853e-06, "loss": 0.3167, "step": 17278 }, { "epoch": 0.8094345809715651, "grad_norm": 0.6051545066530998, "learning_rate": 3.374100626736672e-06, "loss": 0.3454, "step": 17279 }, { "epoch": 0.8094814259614934, "grad_norm": 0.6268843261527943, "learning_rate": 3.3739229440245845e-06, "loss": 0.3085, "step": 17280 }, { "epoch": 0.8095282709514218, "grad_norm": 0.5739689660121615, "learning_rate": 3.37374525628329e-06, "loss": 0.3133, "step": 17281 }, { "epoch": 0.8095751159413501, "grad_norm": 0.5844609462420988, "learning_rate": 3.373567563513811e-06, "loss": 0.3173, "step": 17282 }, { "epoch": 0.8096219609312784, "grad_norm": 0.6019064959892816, "learning_rate": 3.3733898657171715e-06, "loss": 0.3151, "step": 17283 }, { "epoch": 0.8096688059212067, "grad_norm": 0.5742784789554604, "learning_rate": 3.3732121628943914e-06, "loss": 0.3293, "step": 17284 }, { "epoch": 0.8097156509111351, "grad_norm": 0.5920305692647843, "learning_rate": 3.3730344550464948e-06, "loss": 0.3431, "step": 17285 }, { "epoch": 0.8097624959010634, "grad_norm": 0.599351742561868, "learning_rate": 3.3728567421745052e-06, "loss": 0.3207, "step": 17286 }, { "epoch": 0.8098093408909917, "grad_norm": 0.595576429315094, "learning_rate": 3.3726790242794443e-06, "loss": 0.3218, "step": 17287 }, { "epoch": 0.80985618588092, "grad_norm": 0.585857175399497, "learning_rate": 3.3725013013623343e-06, "loss": 0.3121, "step": 17288 }, { "epoch": 0.8099030308708484, "grad_norm": 0.5591993356634475, "learning_rate": 3.3723235734241993e-06, "loss": 0.3086, "step": 17289 }, { "epoch": 0.8099498758607767, "grad_norm": 0.6167296531162157, "learning_rate": 3.3721458404660614e-06, "loss": 0.3298, "step": 17290 }, { "epoch": 0.8099967208507051, "grad_norm": 0.5791258925708104, "learning_rate": 3.3719681024889428e-06, "loss": 0.3021, "step": 17291 }, { "epoch": 0.8100435658406333, "grad_norm": 0.5722037191562466, "learning_rate": 3.371790359493867e-06, "loss": 0.3078, "step": 17292 }, { "epoch": 0.8100904108305617, "grad_norm": 0.5914715753979982, "learning_rate": 3.3716126114818577e-06, "loss": 0.3137, "step": 17293 }, { "epoch": 0.81013725582049, "grad_norm": 0.6316612965793545, "learning_rate": 3.371434858453936e-06, "loss": 0.3101, "step": 17294 }, { "epoch": 0.8101841008104184, "grad_norm": 0.6249151052817585, "learning_rate": 3.371257100411126e-06, "loss": 0.3185, "step": 17295 }, { "epoch": 0.8102309458003466, "grad_norm": 0.6141135432658896, "learning_rate": 3.37107933735445e-06, "loss": 0.3313, "step": 17296 }, { "epoch": 0.810277790790275, "grad_norm": 0.5930665400761956, "learning_rate": 3.3709015692849316e-06, "loss": 0.3329, "step": 17297 }, { "epoch": 0.8103246357802033, "grad_norm": 0.6094646215196644, "learning_rate": 3.370723796203594e-06, "loss": 0.3083, "step": 17298 }, { "epoch": 0.8103714807701317, "grad_norm": 0.6149785318832524, "learning_rate": 3.370546018111459e-06, "loss": 0.324, "step": 17299 }, { "epoch": 0.81041832576006, "grad_norm": 0.5812938285824879, "learning_rate": 3.370368235009551e-06, "loss": 0.3321, "step": 17300 }, { "epoch": 0.8104651707499883, "grad_norm": 0.6080653304268073, "learning_rate": 3.3701904468988928e-06, "loss": 0.3207, "step": 17301 }, { "epoch": 0.8105120157399166, "grad_norm": 0.5904128560579753, "learning_rate": 3.3700126537805065e-06, "loss": 0.3257, "step": 17302 }, { "epoch": 0.810558860729845, "grad_norm": 0.6464214656859432, "learning_rate": 3.369834855655416e-06, "loss": 0.3358, "step": 17303 }, { "epoch": 0.8106057057197733, "grad_norm": 0.5831741095620913, "learning_rate": 3.3696570525246456e-06, "loss": 0.317, "step": 17304 }, { "epoch": 0.8106525507097015, "grad_norm": 0.5759046106120425, "learning_rate": 3.3694792443892165e-06, "loss": 0.3171, "step": 17305 }, { "epoch": 0.8106993956996299, "grad_norm": 0.6310405123733139, "learning_rate": 3.3693014312501533e-06, "loss": 0.3227, "step": 17306 }, { "epoch": 0.8107462406895583, "grad_norm": 0.6380102734936787, "learning_rate": 3.3691236131084787e-06, "loss": 0.3445, "step": 17307 }, { "epoch": 0.8107930856794866, "grad_norm": 0.5604933633769331, "learning_rate": 3.3689457899652165e-06, "loss": 0.3098, "step": 17308 }, { "epoch": 0.810839930669415, "grad_norm": 0.6124819194961583, "learning_rate": 3.3687679618213894e-06, "loss": 0.3436, "step": 17309 }, { "epoch": 0.8108867756593432, "grad_norm": 0.608768425155536, "learning_rate": 3.368590128678021e-06, "loss": 0.3142, "step": 17310 }, { "epoch": 0.8109336206492715, "grad_norm": 0.5734742661887002, "learning_rate": 3.368412290536135e-06, "loss": 0.3052, "step": 17311 }, { "epoch": 0.8109804656391999, "grad_norm": 0.6043693307875865, "learning_rate": 3.3682344473967544e-06, "loss": 0.3401, "step": 17312 }, { "epoch": 0.8110273106291283, "grad_norm": 0.6405143199562469, "learning_rate": 3.368056599260903e-06, "loss": 0.3258, "step": 17313 }, { "epoch": 0.8110741556190565, "grad_norm": 0.6193057877015572, "learning_rate": 3.367878746129604e-06, "loss": 0.3173, "step": 17314 }, { "epoch": 0.8111210006089848, "grad_norm": 0.5819997389554687, "learning_rate": 3.3677008880038796e-06, "loss": 0.3176, "step": 17315 }, { "epoch": 0.8111678455989132, "grad_norm": 0.6019308553220987, "learning_rate": 3.3675230248847564e-06, "loss": 0.3056, "step": 17316 }, { "epoch": 0.8112146905888415, "grad_norm": 0.6777871845198874, "learning_rate": 3.367345156773255e-06, "loss": 0.3246, "step": 17317 }, { "epoch": 0.8112615355787699, "grad_norm": 0.6094761023169945, "learning_rate": 3.3671672836704013e-06, "loss": 0.319, "step": 17318 }, { "epoch": 0.8113083805686981, "grad_norm": 0.5681674399824471, "learning_rate": 3.3669894055772175e-06, "loss": 0.32, "step": 17319 }, { "epoch": 0.8113552255586265, "grad_norm": 0.6166020853782805, "learning_rate": 3.3668115224947267e-06, "loss": 0.3351, "step": 17320 }, { "epoch": 0.8114020705485548, "grad_norm": 0.6401609114634655, "learning_rate": 3.3666336344239547e-06, "loss": 0.3447, "step": 17321 }, { "epoch": 0.8114489155384832, "grad_norm": 0.586229805367424, "learning_rate": 3.366455741365924e-06, "loss": 0.3112, "step": 17322 }, { "epoch": 0.8114957605284114, "grad_norm": 0.5935985594129058, "learning_rate": 3.366277843321657e-06, "loss": 0.3465, "step": 17323 }, { "epoch": 0.8115426055183398, "grad_norm": 0.5790961090312758, "learning_rate": 3.366099940292179e-06, "loss": 0.3096, "step": 17324 }, { "epoch": 0.8115894505082681, "grad_norm": 0.5735363462784066, "learning_rate": 3.3659220322785147e-06, "loss": 0.3091, "step": 17325 }, { "epoch": 0.8116362954981965, "grad_norm": 0.57378175673451, "learning_rate": 3.3657441192816858e-06, "loss": 0.3039, "step": 17326 }, { "epoch": 0.8116831404881248, "grad_norm": 0.6073908564790188, "learning_rate": 3.365566201302717e-06, "loss": 0.3214, "step": 17327 }, { "epoch": 0.8117299854780531, "grad_norm": 0.5907958397066021, "learning_rate": 3.365388278342633e-06, "loss": 0.3221, "step": 17328 }, { "epoch": 0.8117768304679814, "grad_norm": 0.5744582741398543, "learning_rate": 3.3652103504024567e-06, "loss": 0.3059, "step": 17329 }, { "epoch": 0.8118236754579098, "grad_norm": 0.5704605567217023, "learning_rate": 3.3650324174832117e-06, "loss": 0.3149, "step": 17330 }, { "epoch": 0.8118705204478381, "grad_norm": 0.5809811562484064, "learning_rate": 3.3648544795859235e-06, "loss": 0.3165, "step": 17331 }, { "epoch": 0.8119173654377664, "grad_norm": 0.5518998812029946, "learning_rate": 3.3646765367116146e-06, "loss": 0.3116, "step": 17332 }, { "epoch": 0.8119642104276947, "grad_norm": 0.5999895720519807, "learning_rate": 3.36449858886131e-06, "loss": 0.3225, "step": 17333 }, { "epoch": 0.8120110554176231, "grad_norm": 0.5783295107967091, "learning_rate": 3.3643206360360324e-06, "loss": 0.3129, "step": 17334 }, { "epoch": 0.8120579004075514, "grad_norm": 0.5851011114361704, "learning_rate": 3.364142678236807e-06, "loss": 0.3259, "step": 17335 }, { "epoch": 0.8121047453974798, "grad_norm": 0.635891259097742, "learning_rate": 3.363964715464658e-06, "loss": 0.3052, "step": 17336 }, { "epoch": 0.812151590387408, "grad_norm": 0.5842289305719323, "learning_rate": 3.3637867477206097e-06, "loss": 0.3019, "step": 17337 }, { "epoch": 0.8121984353773364, "grad_norm": 0.5888273639079411, "learning_rate": 3.363608775005685e-06, "loss": 0.3138, "step": 17338 }, { "epoch": 0.8122452803672647, "grad_norm": 0.6134539784250301, "learning_rate": 3.363430797320909e-06, "loss": 0.307, "step": 17339 }, { "epoch": 0.8122921253571931, "grad_norm": 0.5882659855846514, "learning_rate": 3.3632528146673067e-06, "loss": 0.2922, "step": 17340 }, { "epoch": 0.8123389703471213, "grad_norm": 0.5891884821383054, "learning_rate": 3.3630748270459e-06, "loss": 0.336, "step": 17341 }, { "epoch": 0.8123858153370497, "grad_norm": 0.5737945002236573, "learning_rate": 3.3628968344577156e-06, "loss": 0.3131, "step": 17342 }, { "epoch": 0.812432660326978, "grad_norm": 0.5427793242486013, "learning_rate": 3.3627188369037767e-06, "loss": 0.2956, "step": 17343 }, { "epoch": 0.8124795053169064, "grad_norm": 0.5586484320935226, "learning_rate": 3.362540834385107e-06, "loss": 0.3177, "step": 17344 }, { "epoch": 0.8125263503068347, "grad_norm": 0.6322376610256671, "learning_rate": 3.362362826902732e-06, "loss": 0.3218, "step": 17345 }, { "epoch": 0.812573195296763, "grad_norm": 0.6507317758588446, "learning_rate": 3.3621848144576764e-06, "loss": 0.3311, "step": 17346 }, { "epoch": 0.8126200402866913, "grad_norm": 0.6192640913399907, "learning_rate": 3.3620067970509627e-06, "loss": 0.333, "step": 17347 }, { "epoch": 0.8126668852766197, "grad_norm": 0.6333800904761785, "learning_rate": 3.3618287746836163e-06, "loss": 0.3379, "step": 17348 }, { "epoch": 0.812713730266548, "grad_norm": 0.6069524687348303, "learning_rate": 3.361650747356663e-06, "loss": 0.3082, "step": 17349 }, { "epoch": 0.8127605752564763, "grad_norm": 0.6135265415362412, "learning_rate": 3.3614727150711264e-06, "loss": 0.3074, "step": 17350 }, { "epoch": 0.8128074202464046, "grad_norm": 0.5505357894775746, "learning_rate": 3.3612946778280297e-06, "loss": 0.3098, "step": 17351 }, { "epoch": 0.812854265236333, "grad_norm": 0.6018103606564187, "learning_rate": 3.361116635628399e-06, "loss": 0.3166, "step": 17352 }, { "epoch": 0.8129011102262613, "grad_norm": 0.5669070709254204, "learning_rate": 3.3609385884732594e-06, "loss": 0.3144, "step": 17353 }, { "epoch": 0.8129479552161897, "grad_norm": 0.5836872335162228, "learning_rate": 3.3607605363636334e-06, "loss": 0.3145, "step": 17354 }, { "epoch": 0.8129948002061179, "grad_norm": 0.6126715091773157, "learning_rate": 3.3605824793005478e-06, "loss": 0.3208, "step": 17355 }, { "epoch": 0.8130416451960463, "grad_norm": 0.5971671348325791, "learning_rate": 3.360404417285026e-06, "loss": 0.3159, "step": 17356 }, { "epoch": 0.8130884901859746, "grad_norm": 0.6089838437590955, "learning_rate": 3.360226350318093e-06, "loss": 0.3435, "step": 17357 }, { "epoch": 0.813135335175903, "grad_norm": 0.5568767379625582, "learning_rate": 3.3600482784007732e-06, "loss": 0.3035, "step": 17358 }, { "epoch": 0.8131821801658312, "grad_norm": 0.5679164547890339, "learning_rate": 3.3598702015340924e-06, "loss": 0.3023, "step": 17359 }, { "epoch": 0.8132290251557596, "grad_norm": 0.568594249502136, "learning_rate": 3.3596921197190747e-06, "loss": 0.3231, "step": 17360 }, { "epoch": 0.8132758701456879, "grad_norm": 0.6471808582307402, "learning_rate": 3.359514032956744e-06, "loss": 0.3322, "step": 17361 }, { "epoch": 0.8133227151356163, "grad_norm": 0.6273185207975756, "learning_rate": 3.359335941248127e-06, "loss": 0.3057, "step": 17362 }, { "epoch": 0.8133695601255446, "grad_norm": 0.5408374264166083, "learning_rate": 3.3591578445942473e-06, "loss": 0.3115, "step": 17363 }, { "epoch": 0.8134164051154729, "grad_norm": 0.583368605822333, "learning_rate": 3.35897974299613e-06, "loss": 0.3177, "step": 17364 }, { "epoch": 0.8134632501054012, "grad_norm": 0.5919867628439838, "learning_rate": 3.3588016364548003e-06, "loss": 0.3209, "step": 17365 }, { "epoch": 0.8135100950953296, "grad_norm": 0.5953463389621394, "learning_rate": 3.358623524971283e-06, "loss": 0.3169, "step": 17366 }, { "epoch": 0.8135569400852579, "grad_norm": 0.5808800831789777, "learning_rate": 3.3584454085466034e-06, "loss": 0.327, "step": 17367 }, { "epoch": 0.8136037850751862, "grad_norm": 0.5758952932753246, "learning_rate": 3.358267287181786e-06, "loss": 0.3117, "step": 17368 }, { "epoch": 0.8136506300651145, "grad_norm": 0.6327930951416041, "learning_rate": 3.3580891608778558e-06, "loss": 0.3425, "step": 17369 }, { "epoch": 0.8136974750550429, "grad_norm": 0.5527646478535945, "learning_rate": 3.3579110296358386e-06, "loss": 0.3265, "step": 17370 }, { "epoch": 0.8137443200449712, "grad_norm": 0.5858190469688596, "learning_rate": 3.3577328934567594e-06, "loss": 0.2974, "step": 17371 }, { "epoch": 0.8137911650348996, "grad_norm": 0.5934032322838556, "learning_rate": 3.357554752341642e-06, "loss": 0.3186, "step": 17372 }, { "epoch": 0.8138380100248278, "grad_norm": 0.5989930159404246, "learning_rate": 3.3573766062915126e-06, "loss": 0.3372, "step": 17373 }, { "epoch": 0.8138848550147562, "grad_norm": 0.5772650399201273, "learning_rate": 3.357198455307398e-06, "loss": 0.3216, "step": 17374 }, { "epoch": 0.8139317000046845, "grad_norm": 0.5443095021440284, "learning_rate": 3.3570202993903202e-06, "loss": 0.3135, "step": 17375 }, { "epoch": 0.8139785449946129, "grad_norm": 0.6161415839666502, "learning_rate": 3.3568421385413053e-06, "loss": 0.3163, "step": 17376 }, { "epoch": 0.8140253899845411, "grad_norm": 0.5671329371662597, "learning_rate": 3.3566639727613803e-06, "loss": 0.3028, "step": 17377 }, { "epoch": 0.8140722349744695, "grad_norm": 0.5767697308470068, "learning_rate": 3.3564858020515703e-06, "loss": 0.3131, "step": 17378 }, { "epoch": 0.8141190799643978, "grad_norm": 0.5904064518226285, "learning_rate": 3.356307626412898e-06, "loss": 0.3183, "step": 17379 }, { "epoch": 0.8141659249543262, "grad_norm": 0.6021949489795295, "learning_rate": 3.3561294458463917e-06, "loss": 0.3134, "step": 17380 }, { "epoch": 0.8142127699442545, "grad_norm": 0.6172727794938433, "learning_rate": 3.3559512603530755e-06, "loss": 0.3296, "step": 17381 }, { "epoch": 0.8142596149341828, "grad_norm": 0.6081429357165508, "learning_rate": 3.3557730699339743e-06, "loss": 0.3335, "step": 17382 }, { "epoch": 0.8143064599241111, "grad_norm": 0.6260899172542252, "learning_rate": 3.355594874590115e-06, "loss": 0.3239, "step": 17383 }, { "epoch": 0.8143533049140395, "grad_norm": 0.5945692426498533, "learning_rate": 3.355416674322522e-06, "loss": 0.3268, "step": 17384 }, { "epoch": 0.8144001499039678, "grad_norm": 0.5678460477201165, "learning_rate": 3.3552384691322203e-06, "loss": 0.2965, "step": 17385 }, { "epoch": 0.814446994893896, "grad_norm": 0.5748504129716365, "learning_rate": 3.3550602590202375e-06, "loss": 0.3008, "step": 17386 }, { "epoch": 0.8144938398838244, "grad_norm": 0.5690658730621982, "learning_rate": 3.3548820439875964e-06, "loss": 0.3084, "step": 17387 }, { "epoch": 0.8145406848737528, "grad_norm": 0.6214955953422556, "learning_rate": 3.354703824035325e-06, "loss": 0.3387, "step": 17388 }, { "epoch": 0.8145875298636811, "grad_norm": 0.6268579278169669, "learning_rate": 3.3545255991644477e-06, "loss": 0.3152, "step": 17389 }, { "epoch": 0.8146343748536095, "grad_norm": 0.62675095706936, "learning_rate": 3.35434736937599e-06, "loss": 0.3288, "step": 17390 }, { "epoch": 0.8146812198435377, "grad_norm": 0.616074802064109, "learning_rate": 3.354169134670978e-06, "loss": 0.3264, "step": 17391 }, { "epoch": 0.814728064833466, "grad_norm": 0.5917293846562439, "learning_rate": 3.353990895050438e-06, "loss": 0.3046, "step": 17392 }, { "epoch": 0.8147749098233944, "grad_norm": 0.5508005392776459, "learning_rate": 3.3538126505153945e-06, "loss": 0.3097, "step": 17393 }, { "epoch": 0.8148217548133228, "grad_norm": 0.5897841450380656, "learning_rate": 3.353634401066873e-06, "loss": 0.3326, "step": 17394 }, { "epoch": 0.814868599803251, "grad_norm": 0.5982687172802085, "learning_rate": 3.3534561467059017e-06, "loss": 0.3288, "step": 17395 }, { "epoch": 0.8149154447931793, "grad_norm": 0.5746808228321002, "learning_rate": 3.3532778874335035e-06, "loss": 0.3074, "step": 17396 }, { "epoch": 0.8149622897831077, "grad_norm": 0.6123777140992778, "learning_rate": 3.3530996232507062e-06, "loss": 0.3311, "step": 17397 }, { "epoch": 0.815009134773036, "grad_norm": 0.5992057394184537, "learning_rate": 3.3529213541585348e-06, "loss": 0.307, "step": 17398 }, { "epoch": 0.8150559797629644, "grad_norm": 0.5882483810787841, "learning_rate": 3.352743080158016e-06, "loss": 0.2953, "step": 17399 }, { "epoch": 0.8151028247528926, "grad_norm": 0.5793100485498981, "learning_rate": 3.3525648012501737e-06, "loss": 0.3215, "step": 17400 }, { "epoch": 0.815149669742821, "grad_norm": 0.5850440516263532, "learning_rate": 3.352386517436036e-06, "loss": 0.309, "step": 17401 }, { "epoch": 0.8151965147327493, "grad_norm": 0.6044800922664849, "learning_rate": 3.3522082287166285e-06, "loss": 0.3207, "step": 17402 }, { "epoch": 0.8152433597226777, "grad_norm": 0.5965809490945424, "learning_rate": 3.3520299350929764e-06, "loss": 0.3246, "step": 17403 }, { "epoch": 0.8152902047126059, "grad_norm": 0.5969990107858973, "learning_rate": 3.3518516365661064e-06, "loss": 0.3139, "step": 17404 }, { "epoch": 0.8153370497025343, "grad_norm": 0.6775948274756187, "learning_rate": 3.351673333137044e-06, "loss": 0.3265, "step": 17405 }, { "epoch": 0.8153838946924626, "grad_norm": 0.5835377688495043, "learning_rate": 3.3514950248068156e-06, "loss": 0.319, "step": 17406 }, { "epoch": 0.815430739682391, "grad_norm": 0.5842047329810716, "learning_rate": 3.3513167115764476e-06, "loss": 0.3361, "step": 17407 }, { "epoch": 0.8154775846723193, "grad_norm": 0.519635639398575, "learning_rate": 3.351138393446966e-06, "loss": 0.2944, "step": 17408 }, { "epoch": 0.8155244296622476, "grad_norm": 0.5445007563611988, "learning_rate": 3.350960070419397e-06, "loss": 0.3309, "step": 17409 }, { "epoch": 0.8155712746521759, "grad_norm": 0.6511906707223373, "learning_rate": 3.3507817424947666e-06, "loss": 0.34, "step": 17410 }, { "epoch": 0.8156181196421043, "grad_norm": 0.6343396994459308, "learning_rate": 3.3506034096741003e-06, "loss": 0.319, "step": 17411 }, { "epoch": 0.8156649646320326, "grad_norm": 0.5963605153675988, "learning_rate": 3.3504250719584264e-06, "loss": 0.3457, "step": 17412 }, { "epoch": 0.8157118096219609, "grad_norm": 0.6028956684392959, "learning_rate": 3.3502467293487693e-06, "loss": 0.3179, "step": 17413 }, { "epoch": 0.8157586546118892, "grad_norm": 0.5433879209035322, "learning_rate": 3.350068381846156e-06, "loss": 0.3076, "step": 17414 }, { "epoch": 0.8158054996018176, "grad_norm": 0.6224145044441942, "learning_rate": 3.349890029451612e-06, "loss": 0.3385, "step": 17415 }, { "epoch": 0.8158523445917459, "grad_norm": 0.6306631412891115, "learning_rate": 3.3497116721661666e-06, "loss": 0.3326, "step": 17416 }, { "epoch": 0.8158991895816743, "grad_norm": 0.5973046494748895, "learning_rate": 3.349533309990842e-06, "loss": 0.3344, "step": 17417 }, { "epoch": 0.8159460345716025, "grad_norm": 0.630042213891708, "learning_rate": 3.3493549429266675e-06, "loss": 0.3181, "step": 17418 }, { "epoch": 0.8159928795615309, "grad_norm": 0.5870987733017412, "learning_rate": 3.3491765709746694e-06, "loss": 0.3377, "step": 17419 }, { "epoch": 0.8160397245514592, "grad_norm": 0.595393803553946, "learning_rate": 3.348998194135873e-06, "loss": 0.3134, "step": 17420 }, { "epoch": 0.8160865695413876, "grad_norm": 0.6065759821325497, "learning_rate": 3.3488198124113047e-06, "loss": 0.3167, "step": 17421 }, { "epoch": 0.8161334145313158, "grad_norm": 0.5977110738857381, "learning_rate": 3.3486414258019922e-06, "loss": 0.3338, "step": 17422 }, { "epoch": 0.8161802595212442, "grad_norm": 0.6205365792158646, "learning_rate": 3.348463034308963e-06, "loss": 0.3303, "step": 17423 }, { "epoch": 0.8162271045111725, "grad_norm": 0.6297526681716666, "learning_rate": 3.348284637933241e-06, "loss": 0.3412, "step": 17424 }, { "epoch": 0.8162739495011009, "grad_norm": 0.6047646131792236, "learning_rate": 3.348106236675853e-06, "loss": 0.3134, "step": 17425 }, { "epoch": 0.8163207944910292, "grad_norm": 0.6178485506788269, "learning_rate": 3.3479278305378288e-06, "loss": 0.3215, "step": 17426 }, { "epoch": 0.8163676394809575, "grad_norm": 0.5585795765227762, "learning_rate": 3.347749419520192e-06, "loss": 0.31, "step": 17427 }, { "epoch": 0.8164144844708858, "grad_norm": 0.5687426997138934, "learning_rate": 3.3475710036239705e-06, "loss": 0.3094, "step": 17428 }, { "epoch": 0.8164613294608142, "grad_norm": 0.6339116977446289, "learning_rate": 3.3473925828501914e-06, "loss": 0.3307, "step": 17429 }, { "epoch": 0.8165081744507425, "grad_norm": 0.6150387635461741, "learning_rate": 3.3472141571998806e-06, "loss": 0.321, "step": 17430 }, { "epoch": 0.8165550194406708, "grad_norm": 0.5991502739619393, "learning_rate": 3.347035726674065e-06, "loss": 0.3269, "step": 17431 }, { "epoch": 0.8166018644305991, "grad_norm": 0.5837370744213563, "learning_rate": 3.346857291273772e-06, "loss": 0.3092, "step": 17432 }, { "epoch": 0.8166487094205275, "grad_norm": 0.5521263128550704, "learning_rate": 3.346678851000028e-06, "loss": 0.3055, "step": 17433 }, { "epoch": 0.8166955544104558, "grad_norm": 0.5443201730111465, "learning_rate": 3.3465004058538598e-06, "loss": 0.3052, "step": 17434 }, { "epoch": 0.8167423994003842, "grad_norm": 0.6179245029883973, "learning_rate": 3.3463219558362953e-06, "loss": 0.3278, "step": 17435 }, { "epoch": 0.8167892443903124, "grad_norm": 0.6198653753296033, "learning_rate": 3.34614350094836e-06, "loss": 0.3338, "step": 17436 }, { "epoch": 0.8168360893802408, "grad_norm": 0.5661641727026251, "learning_rate": 3.3459650411910817e-06, "loss": 0.3205, "step": 17437 }, { "epoch": 0.8168829343701691, "grad_norm": 0.5674358655639358, "learning_rate": 3.3457865765654875e-06, "loss": 0.3097, "step": 17438 }, { "epoch": 0.8169297793600975, "grad_norm": 0.5593639919529116, "learning_rate": 3.345608107072603e-06, "loss": 0.3054, "step": 17439 }, { "epoch": 0.8169766243500257, "grad_norm": 0.5477468765829265, "learning_rate": 3.3454296327134577e-06, "loss": 0.3074, "step": 17440 }, { "epoch": 0.8170234693399541, "grad_norm": 0.5972008795640151, "learning_rate": 3.3452511534890774e-06, "loss": 0.3067, "step": 17441 }, { "epoch": 0.8170703143298824, "grad_norm": 0.5794890037392325, "learning_rate": 3.345072669400488e-06, "loss": 0.3117, "step": 17442 }, { "epoch": 0.8171171593198108, "grad_norm": 0.5538105688079206, "learning_rate": 3.344894180448718e-06, "loss": 0.3215, "step": 17443 }, { "epoch": 0.8171640043097391, "grad_norm": 0.5941033226077905, "learning_rate": 3.3447156866347956e-06, "loss": 0.3209, "step": 17444 }, { "epoch": 0.8172108492996674, "grad_norm": 0.5617607332141579, "learning_rate": 3.3445371879597453e-06, "loss": 0.3126, "step": 17445 }, { "epoch": 0.8172576942895957, "grad_norm": 0.6085019482270363, "learning_rate": 3.344358684424596e-06, "loss": 0.3228, "step": 17446 }, { "epoch": 0.8173045392795241, "grad_norm": 0.5914176200622869, "learning_rate": 3.3441801760303756e-06, "loss": 0.311, "step": 17447 }, { "epoch": 0.8173513842694524, "grad_norm": 0.52344484955273, "learning_rate": 3.3440016627781103e-06, "loss": 0.3134, "step": 17448 }, { "epoch": 0.8173982292593807, "grad_norm": 0.6502312475757518, "learning_rate": 3.3438231446688263e-06, "loss": 0.333, "step": 17449 }, { "epoch": 0.817445074249309, "grad_norm": 0.569509543666145, "learning_rate": 3.3436446217035532e-06, "loss": 0.3238, "step": 17450 }, { "epoch": 0.8174919192392374, "grad_norm": 0.6390593196881976, "learning_rate": 3.3434660938833173e-06, "loss": 0.3331, "step": 17451 }, { "epoch": 0.8175387642291657, "grad_norm": 0.5592576784491977, "learning_rate": 3.343287561209146e-06, "loss": 0.3002, "step": 17452 }, { "epoch": 0.8175856092190941, "grad_norm": 0.5744805184606985, "learning_rate": 3.343109023682067e-06, "loss": 0.3003, "step": 17453 }, { "epoch": 0.8176324542090223, "grad_norm": 0.5468512606327264, "learning_rate": 3.342930481303107e-06, "loss": 0.3087, "step": 17454 }, { "epoch": 0.8176792991989507, "grad_norm": 0.6060857952446609, "learning_rate": 3.342751934073294e-06, "loss": 0.3159, "step": 17455 }, { "epoch": 0.817726144188879, "grad_norm": 0.6351129430342631, "learning_rate": 3.3425733819936555e-06, "loss": 0.3273, "step": 17456 }, { "epoch": 0.8177729891788074, "grad_norm": 0.6262520555932196, "learning_rate": 3.3423948250652187e-06, "loss": 0.3151, "step": 17457 }, { "epoch": 0.8178198341687356, "grad_norm": 0.6546183959148191, "learning_rate": 3.342216263289012e-06, "loss": 0.3304, "step": 17458 }, { "epoch": 0.817866679158664, "grad_norm": 0.5766430491094235, "learning_rate": 3.342037696666062e-06, "loss": 0.3257, "step": 17459 }, { "epoch": 0.8179135241485923, "grad_norm": 0.5927526532051952, "learning_rate": 3.3418591251973968e-06, "loss": 0.3249, "step": 17460 }, { "epoch": 0.8179603691385207, "grad_norm": 0.5986344372187715, "learning_rate": 3.3416805488840443e-06, "loss": 0.3043, "step": 17461 }, { "epoch": 0.818007214128449, "grad_norm": 0.5739332544647373, "learning_rate": 3.341501967727031e-06, "loss": 0.3037, "step": 17462 }, { "epoch": 0.8180540591183773, "grad_norm": 0.6143581367729737, "learning_rate": 3.341323381727386e-06, "loss": 0.3051, "step": 17463 }, { "epoch": 0.8181009041083056, "grad_norm": 0.5745792948966365, "learning_rate": 3.3411447908861355e-06, "loss": 0.3179, "step": 17464 }, { "epoch": 0.818147749098234, "grad_norm": 0.6661786698180115, "learning_rate": 3.34096619520431e-06, "loss": 0.3384, "step": 17465 }, { "epoch": 0.8181945940881623, "grad_norm": 0.5603952932650024, "learning_rate": 3.340787594682934e-06, "loss": 0.3245, "step": 17466 }, { "epoch": 0.8182414390780905, "grad_norm": 0.5702799009205053, "learning_rate": 3.3406089893230365e-06, "loss": 0.3209, "step": 17467 }, { "epoch": 0.8182882840680189, "grad_norm": 0.5938485201134404, "learning_rate": 3.340430379125646e-06, "loss": 0.3375, "step": 17468 }, { "epoch": 0.8183351290579473, "grad_norm": 0.6005868254483577, "learning_rate": 3.34025176409179e-06, "loss": 0.339, "step": 17469 }, { "epoch": 0.8183819740478756, "grad_norm": 0.5617908306660884, "learning_rate": 3.340073144222496e-06, "loss": 0.3021, "step": 17470 }, { "epoch": 0.818428819037804, "grad_norm": 0.6241716433080329, "learning_rate": 3.3398945195187926e-06, "loss": 0.3238, "step": 17471 }, { "epoch": 0.8184756640277322, "grad_norm": 0.5730148985459991, "learning_rate": 3.3397158899817073e-06, "loss": 0.2977, "step": 17472 }, { "epoch": 0.8185225090176605, "grad_norm": 0.5476695915957419, "learning_rate": 3.3395372556122673e-06, "loss": 0.3108, "step": 17473 }, { "epoch": 0.8185693540075889, "grad_norm": 0.5599939972690324, "learning_rate": 3.339358616411502e-06, "loss": 0.3011, "step": 17474 }, { "epoch": 0.8186161989975173, "grad_norm": 0.5616143390367612, "learning_rate": 3.3391799723804397e-06, "loss": 0.3168, "step": 17475 }, { "epoch": 0.8186630439874455, "grad_norm": 0.5994586903677004, "learning_rate": 3.3390013235201057e-06, "loss": 0.3301, "step": 17476 }, { "epoch": 0.8187098889773738, "grad_norm": 0.6057860714465526, "learning_rate": 3.338822669831532e-06, "loss": 0.3332, "step": 17477 }, { "epoch": 0.8187567339673022, "grad_norm": 0.635933198115345, "learning_rate": 3.3386440113157427e-06, "loss": 0.3184, "step": 17478 }, { "epoch": 0.8188035789572305, "grad_norm": 0.6316860528657939, "learning_rate": 3.3384653479737695e-06, "loss": 0.3366, "step": 17479 }, { "epoch": 0.8188504239471589, "grad_norm": 0.6047973758626797, "learning_rate": 3.3382866798066383e-06, "loss": 0.334, "step": 17480 }, { "epoch": 0.8188972689370871, "grad_norm": 0.5658900136181784, "learning_rate": 3.3381080068153775e-06, "loss": 0.3017, "step": 17481 }, { "epoch": 0.8189441139270155, "grad_norm": 0.5555289842187463, "learning_rate": 3.3379293290010167e-06, "loss": 0.3159, "step": 17482 }, { "epoch": 0.8189909589169438, "grad_norm": 0.5680699670010189, "learning_rate": 3.3377506463645824e-06, "loss": 0.3118, "step": 17483 }, { "epoch": 0.8190378039068722, "grad_norm": 0.601285130377314, "learning_rate": 3.3375719589071043e-06, "loss": 0.3412, "step": 17484 }, { "epoch": 0.8190846488968004, "grad_norm": 0.5751061042904545, "learning_rate": 3.3373932666296093e-06, "loss": 0.3189, "step": 17485 }, { "epoch": 0.8191314938867288, "grad_norm": 0.6313894201122658, "learning_rate": 3.337214569533127e-06, "loss": 0.3166, "step": 17486 }, { "epoch": 0.8191783388766571, "grad_norm": 0.6220023840687209, "learning_rate": 3.3370358676186857e-06, "loss": 0.325, "step": 17487 }, { "epoch": 0.8192251838665855, "grad_norm": 0.5821169940541523, "learning_rate": 3.336857160887313e-06, "loss": 0.2967, "step": 17488 }, { "epoch": 0.8192720288565138, "grad_norm": 0.6261495110199853, "learning_rate": 3.336678449340038e-06, "loss": 0.3303, "step": 17489 }, { "epoch": 0.8193188738464421, "grad_norm": 0.6411210532968277, "learning_rate": 3.336499732977888e-06, "loss": 0.3303, "step": 17490 }, { "epoch": 0.8193657188363704, "grad_norm": 0.5727818462190195, "learning_rate": 3.336321011801893e-06, "loss": 0.3324, "step": 17491 }, { "epoch": 0.8194125638262988, "grad_norm": 0.6634509919603728, "learning_rate": 3.3361422858130797e-06, "loss": 0.3321, "step": 17492 }, { "epoch": 0.8194594088162271, "grad_norm": 0.57445713019447, "learning_rate": 3.3359635550124797e-06, "loss": 0.3042, "step": 17493 }, { "epoch": 0.8195062538061554, "grad_norm": 0.574575147047614, "learning_rate": 3.335784819401118e-06, "loss": 0.3108, "step": 17494 }, { "epoch": 0.8195530987960837, "grad_norm": 0.6188862460888332, "learning_rate": 3.335606078980025e-06, "loss": 0.3062, "step": 17495 }, { "epoch": 0.8195999437860121, "grad_norm": 0.6147045316328162, "learning_rate": 3.335427333750229e-06, "loss": 0.3345, "step": 17496 }, { "epoch": 0.8196467887759404, "grad_norm": 0.5855410306329243, "learning_rate": 3.335248583712759e-06, "loss": 0.3199, "step": 17497 }, { "epoch": 0.8196936337658688, "grad_norm": 0.6099956969854046, "learning_rate": 3.3350698288686436e-06, "loss": 0.3329, "step": 17498 }, { "epoch": 0.819740478755797, "grad_norm": 0.5629342958553832, "learning_rate": 3.3348910692189107e-06, "loss": 0.3161, "step": 17499 }, { "epoch": 0.8197873237457254, "grad_norm": 0.6279356863091141, "learning_rate": 3.3347123047645897e-06, "loss": 0.3389, "step": 17500 }, { "epoch": 0.8198341687356537, "grad_norm": 0.5607537199883504, "learning_rate": 3.334533535506709e-06, "loss": 0.3097, "step": 17501 }, { "epoch": 0.8198810137255821, "grad_norm": 0.6862597998202523, "learning_rate": 3.3343547614462978e-06, "loss": 0.3605, "step": 17502 }, { "epoch": 0.8199278587155103, "grad_norm": 0.5744906507742334, "learning_rate": 3.3341759825843847e-06, "loss": 0.2988, "step": 17503 }, { "epoch": 0.8199747037054387, "grad_norm": 0.5230052521916352, "learning_rate": 3.333997198921998e-06, "loss": 0.2953, "step": 17504 }, { "epoch": 0.820021548695367, "grad_norm": 0.577012203028168, "learning_rate": 3.3338184104601674e-06, "loss": 0.2888, "step": 17505 }, { "epoch": 0.8200683936852954, "grad_norm": 0.6444573456786779, "learning_rate": 3.3336396171999207e-06, "loss": 0.3341, "step": 17506 }, { "epoch": 0.8201152386752237, "grad_norm": 0.5845811301886888, "learning_rate": 3.333460819142289e-06, "loss": 0.3446, "step": 17507 }, { "epoch": 0.820162083665152, "grad_norm": 0.5920601927443985, "learning_rate": 3.333282016288299e-06, "loss": 0.3144, "step": 17508 }, { "epoch": 0.8202089286550803, "grad_norm": 0.5724508717510498, "learning_rate": 3.33310320863898e-06, "loss": 0.2993, "step": 17509 }, { "epoch": 0.8202557736450087, "grad_norm": 0.6436363409482708, "learning_rate": 3.332924396195362e-06, "loss": 0.3575, "step": 17510 }, { "epoch": 0.820302618634937, "grad_norm": 0.5882307800696956, "learning_rate": 3.3327455789584735e-06, "loss": 0.3158, "step": 17511 }, { "epoch": 0.8203494636248653, "grad_norm": 0.566801247383836, "learning_rate": 3.332566756929343e-06, "loss": 0.3037, "step": 17512 }, { "epoch": 0.8203963086147936, "grad_norm": 0.5557687740798162, "learning_rate": 3.332387930109e-06, "loss": 0.3167, "step": 17513 }, { "epoch": 0.820443153604722, "grad_norm": 0.574192865712626, "learning_rate": 3.3322090984984745e-06, "loss": 0.3094, "step": 17514 }, { "epoch": 0.8204899985946503, "grad_norm": 0.6035616976879388, "learning_rate": 3.3320302620987944e-06, "loss": 0.3169, "step": 17515 }, { "epoch": 0.8205368435845787, "grad_norm": 0.5907289173180118, "learning_rate": 3.3318514209109888e-06, "loss": 0.3069, "step": 17516 }, { "epoch": 0.8205836885745069, "grad_norm": 0.6134563784929727, "learning_rate": 3.331672574936088e-06, "loss": 0.3363, "step": 17517 }, { "epoch": 0.8206305335644353, "grad_norm": 0.6159680130573844, "learning_rate": 3.3314937241751206e-06, "loss": 0.3264, "step": 17518 }, { "epoch": 0.8206773785543636, "grad_norm": 0.6221882122765423, "learning_rate": 3.3313148686291154e-06, "loss": 0.3441, "step": 17519 }, { "epoch": 0.820724223544292, "grad_norm": 0.6068496927177045, "learning_rate": 3.3311360082991017e-06, "loss": 0.3138, "step": 17520 }, { "epoch": 0.8207710685342202, "grad_norm": 0.6111698953900156, "learning_rate": 3.3309571431861097e-06, "loss": 0.3172, "step": 17521 }, { "epoch": 0.8208179135241486, "grad_norm": 0.5414252905365557, "learning_rate": 3.3307782732911682e-06, "loss": 0.3019, "step": 17522 }, { "epoch": 0.8208647585140769, "grad_norm": 0.5768908945520714, "learning_rate": 3.330599398615307e-06, "loss": 0.3109, "step": 17523 }, { "epoch": 0.8209116035040053, "grad_norm": 0.6129319286050937, "learning_rate": 3.3304205191595547e-06, "loss": 0.3162, "step": 17524 }, { "epoch": 0.8209584484939336, "grad_norm": 0.5909366326142271, "learning_rate": 3.33024163492494e-06, "loss": 0.3317, "step": 17525 }, { "epoch": 0.8210052934838619, "grad_norm": 0.5542012469521788, "learning_rate": 3.3300627459124946e-06, "loss": 0.3167, "step": 17526 }, { "epoch": 0.8210521384737902, "grad_norm": 0.615361605107913, "learning_rate": 3.3298838521232462e-06, "loss": 0.3296, "step": 17527 }, { "epoch": 0.8210989834637186, "grad_norm": 0.5615762033632794, "learning_rate": 3.329704953558225e-06, "loss": 0.2867, "step": 17528 }, { "epoch": 0.8211458284536469, "grad_norm": 0.5938561480220644, "learning_rate": 3.3295260502184607e-06, "loss": 0.307, "step": 17529 }, { "epoch": 0.8211926734435752, "grad_norm": 0.5477045229212418, "learning_rate": 3.329347142104982e-06, "loss": 0.2965, "step": 17530 }, { "epoch": 0.8212395184335035, "grad_norm": 0.5903290075310753, "learning_rate": 3.3291682292188188e-06, "loss": 0.333, "step": 17531 }, { "epoch": 0.8212863634234319, "grad_norm": 0.5937014217412463, "learning_rate": 3.328989311561002e-06, "loss": 0.3036, "step": 17532 }, { "epoch": 0.8213332084133602, "grad_norm": 0.6030273790503479, "learning_rate": 3.328810389132558e-06, "loss": 0.3394, "step": 17533 }, { "epoch": 0.8213800534032886, "grad_norm": 0.6150288873358888, "learning_rate": 3.3286314619345195e-06, "loss": 0.3343, "step": 17534 }, { "epoch": 0.8214268983932168, "grad_norm": 0.5359067431856777, "learning_rate": 3.328452529967916e-06, "loss": 0.2951, "step": 17535 }, { "epoch": 0.8214737433831452, "grad_norm": 0.6186308622480243, "learning_rate": 3.328273593233776e-06, "loss": 0.3312, "step": 17536 }, { "epoch": 0.8215205883730735, "grad_norm": 0.5660967421137169, "learning_rate": 3.3280946517331287e-06, "loss": 0.3303, "step": 17537 }, { "epoch": 0.8215674333630019, "grad_norm": 0.587143986063708, "learning_rate": 3.3279157054670057e-06, "loss": 0.2969, "step": 17538 }, { "epoch": 0.8216142783529301, "grad_norm": 0.6662233450398917, "learning_rate": 3.327736754436436e-06, "loss": 0.3494, "step": 17539 }, { "epoch": 0.8216611233428585, "grad_norm": 0.5679060487303286, "learning_rate": 3.3275577986424484e-06, "loss": 0.3162, "step": 17540 }, { "epoch": 0.8217079683327868, "grad_norm": 0.5936910597534263, "learning_rate": 3.327378838086075e-06, "loss": 0.3167, "step": 17541 }, { "epoch": 0.8217548133227152, "grad_norm": 0.5829501933471992, "learning_rate": 3.3271998727683436e-06, "loss": 0.3145, "step": 17542 }, { "epoch": 0.8218016583126435, "grad_norm": 0.58725410092454, "learning_rate": 3.3270209026902843e-06, "loss": 0.3342, "step": 17543 }, { "epoch": 0.8218485033025718, "grad_norm": 0.6248194248580285, "learning_rate": 3.3268419278529285e-06, "loss": 0.3246, "step": 17544 }, { "epoch": 0.8218953482925001, "grad_norm": 0.5527389741599009, "learning_rate": 3.3266629482573054e-06, "loss": 0.2977, "step": 17545 }, { "epoch": 0.8219421932824285, "grad_norm": 0.5896193579999561, "learning_rate": 3.3264839639044438e-06, "loss": 0.3033, "step": 17546 }, { "epoch": 0.8219890382723568, "grad_norm": 0.6073042987643409, "learning_rate": 3.3263049747953757e-06, "loss": 0.3606, "step": 17547 }, { "epoch": 0.822035883262285, "grad_norm": 0.5596164181196167, "learning_rate": 3.326125980931129e-06, "loss": 0.3092, "step": 17548 }, { "epoch": 0.8220827282522134, "grad_norm": 0.5770008550977348, "learning_rate": 3.325946982312736e-06, "loss": 0.3169, "step": 17549 }, { "epoch": 0.8221295732421418, "grad_norm": 0.518027979750397, "learning_rate": 3.3257679789412256e-06, "loss": 0.301, "step": 17550 }, { "epoch": 0.8221764182320701, "grad_norm": 0.5517856624186109, "learning_rate": 3.325588970817627e-06, "loss": 0.3233, "step": 17551 }, { "epoch": 0.8222232632219985, "grad_norm": 0.5649198023899203, "learning_rate": 3.3254099579429725e-06, "loss": 0.2988, "step": 17552 }, { "epoch": 0.8222701082119267, "grad_norm": 0.6158638741285121, "learning_rate": 3.3252309403182904e-06, "loss": 0.3272, "step": 17553 }, { "epoch": 0.822316953201855, "grad_norm": 0.5937405350468196, "learning_rate": 3.325051917944612e-06, "loss": 0.3136, "step": 17554 }, { "epoch": 0.8223637981917834, "grad_norm": 0.57167847313779, "learning_rate": 3.3248728908229673e-06, "loss": 0.2984, "step": 17555 }, { "epoch": 0.8224106431817118, "grad_norm": 0.5467376766539515, "learning_rate": 3.3246938589543864e-06, "loss": 0.3025, "step": 17556 }, { "epoch": 0.82245748817164, "grad_norm": 0.5741444790210138, "learning_rate": 3.3245148223399e-06, "loss": 0.3121, "step": 17557 }, { "epoch": 0.8225043331615683, "grad_norm": 0.5935284801645242, "learning_rate": 3.324335780980537e-06, "loss": 0.3335, "step": 17558 }, { "epoch": 0.8225511781514967, "grad_norm": 0.5828139137958624, "learning_rate": 3.3241567348773295e-06, "loss": 0.294, "step": 17559 }, { "epoch": 0.822598023141425, "grad_norm": 0.5952561784449629, "learning_rate": 3.323977684031307e-06, "loss": 0.3117, "step": 17560 }, { "epoch": 0.8226448681313534, "grad_norm": 0.5895542273093647, "learning_rate": 3.3237986284434996e-06, "loss": 0.3359, "step": 17561 }, { "epoch": 0.8226917131212816, "grad_norm": 0.6064447188931433, "learning_rate": 3.3236195681149386e-06, "loss": 0.3223, "step": 17562 }, { "epoch": 0.82273855811121, "grad_norm": 0.5700288752934223, "learning_rate": 3.323440503046655e-06, "loss": 0.3177, "step": 17563 }, { "epoch": 0.8227854031011383, "grad_norm": 0.5906751823424409, "learning_rate": 3.3232614332396766e-06, "loss": 0.3107, "step": 17564 }, { "epoch": 0.8228322480910667, "grad_norm": 0.582175574815764, "learning_rate": 3.3230823586950356e-06, "loss": 0.3321, "step": 17565 }, { "epoch": 0.8228790930809949, "grad_norm": 0.5912966721659165, "learning_rate": 3.322903279413764e-06, "loss": 0.3296, "step": 17566 }, { "epoch": 0.8229259380709233, "grad_norm": 0.6244987744076256, "learning_rate": 3.3227241953968897e-06, "loss": 0.3401, "step": 17567 }, { "epoch": 0.8229727830608516, "grad_norm": 0.5808934324948755, "learning_rate": 3.3225451066454445e-06, "loss": 0.3288, "step": 17568 }, { "epoch": 0.82301962805078, "grad_norm": 0.5831341047865738, "learning_rate": 3.322366013160459e-06, "loss": 0.3197, "step": 17569 }, { "epoch": 0.8230664730407083, "grad_norm": 0.5843814704625836, "learning_rate": 3.322186914942964e-06, "loss": 0.3162, "step": 17570 }, { "epoch": 0.8231133180306366, "grad_norm": 0.555458536770558, "learning_rate": 3.3220078119939904e-06, "loss": 0.3107, "step": 17571 }, { "epoch": 0.8231601630205649, "grad_norm": 0.5558242428946407, "learning_rate": 3.3218287043145673e-06, "loss": 0.3112, "step": 17572 }, { "epoch": 0.8232070080104933, "grad_norm": 0.593581098118164, "learning_rate": 3.3216495919057278e-06, "loss": 0.3053, "step": 17573 }, { "epoch": 0.8232538530004216, "grad_norm": 0.6732252880200732, "learning_rate": 3.3214704747685004e-06, "loss": 0.3448, "step": 17574 }, { "epoch": 0.8233006979903499, "grad_norm": 0.5641702664434153, "learning_rate": 3.3212913529039174e-06, "loss": 0.3182, "step": 17575 }, { "epoch": 0.8233475429802782, "grad_norm": 0.6280490240087411, "learning_rate": 3.321112226313009e-06, "loss": 0.3256, "step": 17576 }, { "epoch": 0.8233943879702066, "grad_norm": 0.6017401545201484, "learning_rate": 3.320933094996806e-06, "loss": 0.3269, "step": 17577 }, { "epoch": 0.8234412329601349, "grad_norm": 0.6196838936390873, "learning_rate": 3.3207539589563397e-06, "loss": 0.318, "step": 17578 }, { "epoch": 0.8234880779500633, "grad_norm": 0.5847184113266414, "learning_rate": 3.3205748181926402e-06, "loss": 0.3099, "step": 17579 }, { "epoch": 0.8235349229399915, "grad_norm": 0.5835501192243002, "learning_rate": 3.3203956727067393e-06, "loss": 0.3032, "step": 17580 }, { "epoch": 0.8235817679299199, "grad_norm": 0.598155555422339, "learning_rate": 3.3202165224996673e-06, "loss": 0.321, "step": 17581 }, { "epoch": 0.8236286129198482, "grad_norm": 0.5955149030910344, "learning_rate": 3.320037367572455e-06, "loss": 0.3348, "step": 17582 }, { "epoch": 0.8236754579097766, "grad_norm": 0.7382172042300428, "learning_rate": 3.3198582079261343e-06, "loss": 0.3248, "step": 17583 }, { "epoch": 0.8237223028997048, "grad_norm": 0.5614641395612447, "learning_rate": 3.319679043561736e-06, "loss": 0.3077, "step": 17584 }, { "epoch": 0.8237691478896332, "grad_norm": 0.5574624043047306, "learning_rate": 3.3194998744802896e-06, "loss": 0.3196, "step": 17585 }, { "epoch": 0.8238159928795615, "grad_norm": 0.5904606496788956, "learning_rate": 3.3193207006828282e-06, "loss": 0.3061, "step": 17586 }, { "epoch": 0.8238628378694899, "grad_norm": 0.5820271649569503, "learning_rate": 3.319141522170382e-06, "loss": 0.3258, "step": 17587 }, { "epoch": 0.8239096828594182, "grad_norm": 0.5572782175348606, "learning_rate": 3.318962338943983e-06, "loss": 0.3048, "step": 17588 }, { "epoch": 0.8239565278493465, "grad_norm": 0.5913270961093905, "learning_rate": 3.31878315100466e-06, "loss": 0.301, "step": 17589 }, { "epoch": 0.8240033728392748, "grad_norm": 0.5480354683804856, "learning_rate": 3.318603958353447e-06, "loss": 0.2821, "step": 17590 }, { "epoch": 0.8240502178292032, "grad_norm": 0.6415647258756306, "learning_rate": 3.318424760991373e-06, "loss": 0.3332, "step": 17591 }, { "epoch": 0.8240970628191315, "grad_norm": 0.5717242233160955, "learning_rate": 3.3182455589194713e-06, "loss": 0.3181, "step": 17592 }, { "epoch": 0.8241439078090598, "grad_norm": 0.6431558041089565, "learning_rate": 3.318066352138771e-06, "loss": 0.3259, "step": 17593 }, { "epoch": 0.8241907527989881, "grad_norm": 0.572398545367792, "learning_rate": 3.3178871406503053e-06, "loss": 0.3161, "step": 17594 }, { "epoch": 0.8242375977889165, "grad_norm": 0.5613404728282161, "learning_rate": 3.317707924455104e-06, "loss": 0.3079, "step": 17595 }, { "epoch": 0.8242844427788448, "grad_norm": 0.5838874175302936, "learning_rate": 3.3175287035542e-06, "loss": 0.3277, "step": 17596 }, { "epoch": 0.8243312877687732, "grad_norm": 0.5832860361398874, "learning_rate": 3.3173494779486225e-06, "loss": 0.3032, "step": 17597 }, { "epoch": 0.8243781327587014, "grad_norm": 0.6309761368140906, "learning_rate": 3.317170247639405e-06, "loss": 0.3235, "step": 17598 }, { "epoch": 0.8244249777486298, "grad_norm": 0.5778007709942714, "learning_rate": 3.3169910126275788e-06, "loss": 0.2955, "step": 17599 }, { "epoch": 0.8244718227385581, "grad_norm": 0.6301566481479214, "learning_rate": 3.3168117729141735e-06, "loss": 0.3121, "step": 17600 }, { "epoch": 0.8245186677284865, "grad_norm": 0.6237611895461873, "learning_rate": 3.316632528500222e-06, "loss": 0.329, "step": 17601 }, { "epoch": 0.8245655127184147, "grad_norm": 0.6143391394050948, "learning_rate": 3.3164532793867565e-06, "loss": 0.3521, "step": 17602 }, { "epoch": 0.8246123577083431, "grad_norm": 0.6009161612116154, "learning_rate": 3.316274025574806e-06, "loss": 0.3029, "step": 17603 }, { "epoch": 0.8246592026982714, "grad_norm": 0.585078910214368, "learning_rate": 3.316094767065404e-06, "loss": 0.3181, "step": 17604 }, { "epoch": 0.8247060476881998, "grad_norm": 0.5684676275711127, "learning_rate": 3.3159155038595838e-06, "loss": 0.3088, "step": 17605 }, { "epoch": 0.8247528926781281, "grad_norm": 0.5880017741352852, "learning_rate": 3.315736235958372e-06, "loss": 0.3371, "step": 17606 }, { "epoch": 0.8247997376680564, "grad_norm": 0.6532151200118297, "learning_rate": 3.315556963362805e-06, "loss": 0.2906, "step": 17607 }, { "epoch": 0.8248465826579847, "grad_norm": 0.5773584397099819, "learning_rate": 3.3153776860739123e-06, "loss": 0.2943, "step": 17608 }, { "epoch": 0.8248934276479131, "grad_norm": 0.6092042501151536, "learning_rate": 3.315198404092726e-06, "loss": 0.2992, "step": 17609 }, { "epoch": 0.8249402726378414, "grad_norm": 0.6177420272561889, "learning_rate": 3.3150191174202772e-06, "loss": 0.3309, "step": 17610 }, { "epoch": 0.8249871176277697, "grad_norm": 0.5454628966918403, "learning_rate": 3.3148398260575985e-06, "loss": 0.2974, "step": 17611 }, { "epoch": 0.825033962617698, "grad_norm": 0.5882675329679272, "learning_rate": 3.314660530005722e-06, "loss": 0.3279, "step": 17612 }, { "epoch": 0.8250808076076264, "grad_norm": 0.5801218017689985, "learning_rate": 3.314481229265678e-06, "loss": 0.3115, "step": 17613 }, { "epoch": 0.8251276525975547, "grad_norm": 0.5855229840074865, "learning_rate": 3.3143019238385e-06, "loss": 0.3321, "step": 17614 }, { "epoch": 0.8251744975874831, "grad_norm": 0.56750141449964, "learning_rate": 3.314122613725219e-06, "loss": 0.3057, "step": 17615 }, { "epoch": 0.8252213425774113, "grad_norm": 0.6393893607230117, "learning_rate": 3.313943298926866e-06, "loss": 0.3442, "step": 17616 }, { "epoch": 0.8252681875673397, "grad_norm": 0.5736614605066475, "learning_rate": 3.3137639794444753e-06, "loss": 0.3133, "step": 17617 }, { "epoch": 0.825315032557268, "grad_norm": 0.6731647104300413, "learning_rate": 3.3135846552790764e-06, "loss": 0.3174, "step": 17618 }, { "epoch": 0.8253618775471964, "grad_norm": 0.634768952116213, "learning_rate": 3.313405326431703e-06, "loss": 0.3258, "step": 17619 }, { "epoch": 0.8254087225371246, "grad_norm": 0.611402119831354, "learning_rate": 3.313225992903386e-06, "loss": 0.3318, "step": 17620 }, { "epoch": 0.825455567527053, "grad_norm": 0.6433381279013441, "learning_rate": 3.3130466546951577e-06, "loss": 0.332, "step": 17621 }, { "epoch": 0.8255024125169813, "grad_norm": 0.5591245841261884, "learning_rate": 3.3128673118080506e-06, "loss": 0.3152, "step": 17622 }, { "epoch": 0.8255492575069097, "grad_norm": 0.5643811939504615, "learning_rate": 3.312687964243096e-06, "loss": 0.3138, "step": 17623 }, { "epoch": 0.825596102496838, "grad_norm": 0.5747010148631803, "learning_rate": 3.312508612001327e-06, "loss": 0.3323, "step": 17624 }, { "epoch": 0.8256429474867663, "grad_norm": 0.6255031098212905, "learning_rate": 3.3123292550837745e-06, "loss": 0.321, "step": 17625 }, { "epoch": 0.8256897924766946, "grad_norm": 0.5932604941370331, "learning_rate": 3.3121498934914714e-06, "loss": 0.3095, "step": 17626 }, { "epoch": 0.825736637466623, "grad_norm": 0.6309299159802004, "learning_rate": 3.3119705272254502e-06, "loss": 0.333, "step": 17627 }, { "epoch": 0.8257834824565513, "grad_norm": 0.5722191746453729, "learning_rate": 3.311791156286742e-06, "loss": 0.3362, "step": 17628 }, { "epoch": 0.8258303274464796, "grad_norm": 0.5932791217978206, "learning_rate": 3.311611780676381e-06, "loss": 0.3303, "step": 17629 }, { "epoch": 0.8258771724364079, "grad_norm": 0.5974846051905645, "learning_rate": 3.3114324003953975e-06, "loss": 0.3323, "step": 17630 }, { "epoch": 0.8259240174263363, "grad_norm": 0.5919016002540426, "learning_rate": 3.3112530154448243e-06, "loss": 0.3306, "step": 17631 }, { "epoch": 0.8259708624162646, "grad_norm": 0.6603808912803388, "learning_rate": 3.3110736258256935e-06, "loss": 0.3337, "step": 17632 }, { "epoch": 0.826017707406193, "grad_norm": 0.5979181431078777, "learning_rate": 3.310894231539039e-06, "loss": 0.3431, "step": 17633 }, { "epoch": 0.8260645523961212, "grad_norm": 0.6406817066581425, "learning_rate": 3.3107148325858913e-06, "loss": 0.3226, "step": 17634 }, { "epoch": 0.8261113973860496, "grad_norm": 0.6305263312205458, "learning_rate": 3.310535428967283e-06, "loss": 0.3102, "step": 17635 }, { "epoch": 0.8261582423759779, "grad_norm": 0.5879144715965414, "learning_rate": 3.310356020684248e-06, "loss": 0.3119, "step": 17636 }, { "epoch": 0.8262050873659063, "grad_norm": 0.5948961103980729, "learning_rate": 3.3101766077378174e-06, "loss": 0.308, "step": 17637 }, { "epoch": 0.8262519323558345, "grad_norm": 0.5470801097749868, "learning_rate": 3.309997190129024e-06, "loss": 0.2942, "step": 17638 }, { "epoch": 0.8262987773457628, "grad_norm": 0.6090098349337731, "learning_rate": 3.3098177678589005e-06, "loss": 0.3058, "step": 17639 }, { "epoch": 0.8263456223356912, "grad_norm": 0.6260414125214537, "learning_rate": 3.309638340928479e-06, "loss": 0.3022, "step": 17640 }, { "epoch": 0.8263924673256196, "grad_norm": 0.5711498086210549, "learning_rate": 3.3094589093387926e-06, "loss": 0.3117, "step": 17641 }, { "epoch": 0.8264393123155479, "grad_norm": 0.5753502194834885, "learning_rate": 3.309279473090874e-06, "loss": 0.3149, "step": 17642 }, { "epoch": 0.8264861573054761, "grad_norm": 0.5991595175845431, "learning_rate": 3.3091000321857547e-06, "loss": 0.3259, "step": 17643 }, { "epoch": 0.8265330022954045, "grad_norm": 0.5829421538516976, "learning_rate": 3.308920586624468e-06, "loss": 0.3156, "step": 17644 }, { "epoch": 0.8265798472853328, "grad_norm": 0.6021020730682428, "learning_rate": 3.3087411364080474e-06, "loss": 0.3082, "step": 17645 }, { "epoch": 0.8266266922752612, "grad_norm": 0.623055155634862, "learning_rate": 3.3085616815375243e-06, "loss": 0.3397, "step": 17646 }, { "epoch": 0.8266735372651894, "grad_norm": 0.5553189380624857, "learning_rate": 3.3083822220139316e-06, "loss": 0.3073, "step": 17647 }, { "epoch": 0.8267203822551178, "grad_norm": 0.6242363911975419, "learning_rate": 3.3082027578383035e-06, "loss": 0.338, "step": 17648 }, { "epoch": 0.8267672272450461, "grad_norm": 0.5206284628516132, "learning_rate": 3.3080232890116704e-06, "loss": 0.2835, "step": 17649 }, { "epoch": 0.8268140722349745, "grad_norm": 0.6387871402513338, "learning_rate": 3.3078438155350675e-06, "loss": 0.3367, "step": 17650 }, { "epoch": 0.8268609172249028, "grad_norm": 0.6011738888651377, "learning_rate": 3.3076643374095263e-06, "loss": 0.3353, "step": 17651 }, { "epoch": 0.8269077622148311, "grad_norm": 0.5599160431730219, "learning_rate": 3.307484854636079e-06, "loss": 0.3005, "step": 17652 }, { "epoch": 0.8269546072047594, "grad_norm": 0.6199516696126733, "learning_rate": 3.3073053672157594e-06, "loss": 0.3214, "step": 17653 }, { "epoch": 0.8270014521946878, "grad_norm": 0.608743529136388, "learning_rate": 3.3071258751496017e-06, "loss": 0.3028, "step": 17654 }, { "epoch": 0.8270482971846161, "grad_norm": 0.6266365613757587, "learning_rate": 3.306946378438636e-06, "loss": 0.3169, "step": 17655 }, { "epoch": 0.8270951421745444, "grad_norm": 0.5813417393365905, "learning_rate": 3.3067668770838968e-06, "loss": 0.2896, "step": 17656 }, { "epoch": 0.8271419871644727, "grad_norm": 0.5692153083297197, "learning_rate": 3.3065873710864175e-06, "loss": 0.312, "step": 17657 }, { "epoch": 0.8271888321544011, "grad_norm": 0.6329916556639948, "learning_rate": 3.3064078604472312e-06, "loss": 0.3349, "step": 17658 }, { "epoch": 0.8272356771443294, "grad_norm": 0.539341965247224, "learning_rate": 3.3062283451673695e-06, "loss": 0.311, "step": 17659 }, { "epoch": 0.8272825221342578, "grad_norm": 0.5518856108447768, "learning_rate": 3.306048825247866e-06, "loss": 0.3104, "step": 17660 }, { "epoch": 0.827329367124186, "grad_norm": 0.5951526157744293, "learning_rate": 3.305869300689755e-06, "loss": 0.3216, "step": 17661 }, { "epoch": 0.8273762121141144, "grad_norm": 0.5190961410285427, "learning_rate": 3.3056897714940678e-06, "loss": 0.2858, "step": 17662 }, { "epoch": 0.8274230571040427, "grad_norm": 0.5554890857293348, "learning_rate": 3.305510237661839e-06, "loss": 0.313, "step": 17663 }, { "epoch": 0.8274699020939711, "grad_norm": 0.6319163841870774, "learning_rate": 3.3053306991941014e-06, "loss": 0.3353, "step": 17664 }, { "epoch": 0.8275167470838993, "grad_norm": 0.5981715364480994, "learning_rate": 3.3051511560918882e-06, "loss": 0.3197, "step": 17665 }, { "epoch": 0.8275635920738277, "grad_norm": 0.6230905348605568, "learning_rate": 3.304971608356232e-06, "loss": 0.3368, "step": 17666 }, { "epoch": 0.827610437063756, "grad_norm": 0.5924546182033626, "learning_rate": 3.304792055988166e-06, "loss": 0.3395, "step": 17667 }, { "epoch": 0.8276572820536844, "grad_norm": 0.6298048555176644, "learning_rate": 3.3046124989887254e-06, "loss": 0.32, "step": 17668 }, { "epoch": 0.8277041270436127, "grad_norm": 0.5868498847462519, "learning_rate": 3.3044329373589412e-06, "loss": 0.3288, "step": 17669 }, { "epoch": 0.827750972033541, "grad_norm": 0.639418012614922, "learning_rate": 3.3042533710998476e-06, "loss": 0.3138, "step": 17670 }, { "epoch": 0.8277978170234693, "grad_norm": 0.581719259263548, "learning_rate": 3.3040738002124783e-06, "loss": 0.3132, "step": 17671 }, { "epoch": 0.8278446620133977, "grad_norm": 0.6240270417250395, "learning_rate": 3.3038942246978663e-06, "loss": 0.3172, "step": 17672 }, { "epoch": 0.827891507003326, "grad_norm": 0.5894964342007932, "learning_rate": 3.3037146445570444e-06, "loss": 0.3191, "step": 17673 }, { "epoch": 0.8279383519932543, "grad_norm": 0.5684259197970146, "learning_rate": 3.303535059791047e-06, "loss": 0.3135, "step": 17674 }, { "epoch": 0.8279851969831826, "grad_norm": 0.6574571885198631, "learning_rate": 3.3033554704009084e-06, "loss": 0.3535, "step": 17675 }, { "epoch": 0.828032041973111, "grad_norm": 0.5894247224703749, "learning_rate": 3.3031758763876596e-06, "loss": 0.3028, "step": 17676 }, { "epoch": 0.8280788869630393, "grad_norm": 0.5600769215524842, "learning_rate": 3.3029962777523363e-06, "loss": 0.3058, "step": 17677 }, { "epoch": 0.8281257319529677, "grad_norm": 0.5654310305845329, "learning_rate": 3.302816674495971e-06, "loss": 0.3068, "step": 17678 }, { "epoch": 0.8281725769428959, "grad_norm": 0.5473120115282298, "learning_rate": 3.3026370666195978e-06, "loss": 0.313, "step": 17679 }, { "epoch": 0.8282194219328243, "grad_norm": 0.6124336139658307, "learning_rate": 3.3024574541242493e-06, "loss": 0.3283, "step": 17680 }, { "epoch": 0.8282662669227526, "grad_norm": 0.594016112760748, "learning_rate": 3.302277837010961e-06, "loss": 0.3126, "step": 17681 }, { "epoch": 0.828313111912681, "grad_norm": 0.6074495239220287, "learning_rate": 3.3020982152807644e-06, "loss": 0.3057, "step": 17682 }, { "epoch": 0.8283599569026092, "grad_norm": 0.5965510892889906, "learning_rate": 3.3019185889346943e-06, "loss": 0.3235, "step": 17683 }, { "epoch": 0.8284068018925376, "grad_norm": 0.6102018354181763, "learning_rate": 3.3017389579737845e-06, "loss": 0.3247, "step": 17684 }, { "epoch": 0.8284536468824659, "grad_norm": 0.5716838620846916, "learning_rate": 3.3015593223990693e-06, "loss": 0.3041, "step": 17685 }, { "epoch": 0.8285004918723943, "grad_norm": 0.6234839420252368, "learning_rate": 3.30137968221158e-06, "loss": 0.3232, "step": 17686 }, { "epoch": 0.8285473368623226, "grad_norm": 0.6511995975397757, "learning_rate": 3.3012000374123533e-06, "loss": 0.3255, "step": 17687 }, { "epoch": 0.8285941818522509, "grad_norm": 0.621902501113232, "learning_rate": 3.301020388002421e-06, "loss": 0.3199, "step": 17688 }, { "epoch": 0.8286410268421792, "grad_norm": 0.5389635706292221, "learning_rate": 3.3008407339828188e-06, "loss": 0.3141, "step": 17689 }, { "epoch": 0.8286878718321076, "grad_norm": 0.6047332590864111, "learning_rate": 3.3006610753545777e-06, "loss": 0.3263, "step": 17690 }, { "epoch": 0.8287347168220359, "grad_norm": 0.6884302911159076, "learning_rate": 3.3004814121187344e-06, "loss": 0.3358, "step": 17691 }, { "epoch": 0.8287815618119642, "grad_norm": 0.6279663026110418, "learning_rate": 3.3003017442763224e-06, "loss": 0.3113, "step": 17692 }, { "epoch": 0.8288284068018925, "grad_norm": 0.6004460303311577, "learning_rate": 3.3001220718283737e-06, "loss": 0.3324, "step": 17693 }, { "epoch": 0.8288752517918209, "grad_norm": 0.6080515901782919, "learning_rate": 3.2999423947759247e-06, "loss": 0.3264, "step": 17694 }, { "epoch": 0.8289220967817492, "grad_norm": 0.5165456198852475, "learning_rate": 3.299762713120007e-06, "loss": 0.2939, "step": 17695 }, { "epoch": 0.8289689417716776, "grad_norm": 0.5902669607330218, "learning_rate": 3.2995830268616567e-06, "loss": 0.3086, "step": 17696 }, { "epoch": 0.8290157867616058, "grad_norm": 0.5995976223602507, "learning_rate": 3.299403336001908e-06, "loss": 0.3239, "step": 17697 }, { "epoch": 0.8290626317515342, "grad_norm": 0.6205670568023854, "learning_rate": 3.2992236405417927e-06, "loss": 0.3387, "step": 17698 }, { "epoch": 0.8291094767414625, "grad_norm": 0.5903792009960664, "learning_rate": 3.2990439404823465e-06, "loss": 0.313, "step": 17699 }, { "epoch": 0.8291563217313909, "grad_norm": 0.5983596574900963, "learning_rate": 3.2988642358246038e-06, "loss": 0.327, "step": 17700 }, { "epoch": 0.8292031667213191, "grad_norm": 0.5538438523633438, "learning_rate": 3.2986845265695973e-06, "loss": 0.3106, "step": 17701 }, { "epoch": 0.8292500117112475, "grad_norm": 0.6140383712854378, "learning_rate": 3.2985048127183622e-06, "loss": 0.3191, "step": 17702 }, { "epoch": 0.8292968567011758, "grad_norm": 0.6034945611978368, "learning_rate": 3.2983250942719336e-06, "loss": 0.3382, "step": 17703 }, { "epoch": 0.8293437016911042, "grad_norm": 0.5658689142862464, "learning_rate": 3.298145371231344e-06, "loss": 0.3166, "step": 17704 }, { "epoch": 0.8293905466810325, "grad_norm": 0.5694421152866203, "learning_rate": 3.2979656435976283e-06, "loss": 0.2999, "step": 17705 }, { "epoch": 0.8294373916709608, "grad_norm": 0.6184534773276309, "learning_rate": 3.2977859113718206e-06, "loss": 0.3357, "step": 17706 }, { "epoch": 0.8294842366608891, "grad_norm": 0.5857638106990427, "learning_rate": 3.2976061745549567e-06, "loss": 0.2955, "step": 17707 }, { "epoch": 0.8295310816508175, "grad_norm": 0.6045897425815083, "learning_rate": 3.297426433148068e-06, "loss": 0.327, "step": 17708 }, { "epoch": 0.8295779266407458, "grad_norm": 0.6090427837290415, "learning_rate": 3.2972466871521924e-06, "loss": 0.3108, "step": 17709 }, { "epoch": 0.829624771630674, "grad_norm": 0.6478781457485622, "learning_rate": 3.2970669365683617e-06, "loss": 0.3224, "step": 17710 }, { "epoch": 0.8296716166206024, "grad_norm": 0.5639105733178352, "learning_rate": 3.2968871813976105e-06, "loss": 0.317, "step": 17711 }, { "epoch": 0.8297184616105308, "grad_norm": 0.5645583332162768, "learning_rate": 3.2967074216409747e-06, "loss": 0.3192, "step": 17712 }, { "epoch": 0.8297653066004591, "grad_norm": 0.6034049425230337, "learning_rate": 3.296527657299488e-06, "loss": 0.3253, "step": 17713 }, { "epoch": 0.8298121515903875, "grad_norm": 0.6262828788094132, "learning_rate": 3.2963478883741838e-06, "loss": 0.3228, "step": 17714 }, { "epoch": 0.8298589965803157, "grad_norm": 0.6027515323965867, "learning_rate": 3.296168114866099e-06, "loss": 0.332, "step": 17715 }, { "epoch": 0.829905841570244, "grad_norm": 0.5717372327423317, "learning_rate": 3.295988336776266e-06, "loss": 0.3101, "step": 17716 }, { "epoch": 0.8299526865601724, "grad_norm": 0.5931290670430787, "learning_rate": 3.2958085541057205e-06, "loss": 0.322, "step": 17717 }, { "epoch": 0.8299995315501008, "grad_norm": 0.5955654857722398, "learning_rate": 3.2956287668554963e-06, "loss": 0.3152, "step": 17718 }, { "epoch": 0.830046376540029, "grad_norm": 0.5722707308953927, "learning_rate": 3.2954489750266287e-06, "loss": 0.3272, "step": 17719 }, { "epoch": 0.8300932215299573, "grad_norm": 0.5734885618562031, "learning_rate": 3.295269178620153e-06, "loss": 0.31, "step": 17720 }, { "epoch": 0.8301400665198857, "grad_norm": 0.6175946236039211, "learning_rate": 3.2950893776371025e-06, "loss": 0.297, "step": 17721 }, { "epoch": 0.830186911509814, "grad_norm": 0.681854631343864, "learning_rate": 3.294909572078512e-06, "loss": 0.3324, "step": 17722 }, { "epoch": 0.8302337564997424, "grad_norm": 0.6440566331756629, "learning_rate": 3.2947297619454165e-06, "loss": 0.3258, "step": 17723 }, { "epoch": 0.8302806014896706, "grad_norm": 0.5394036434025563, "learning_rate": 3.2945499472388527e-06, "loss": 0.2997, "step": 17724 }, { "epoch": 0.830327446479599, "grad_norm": 0.576054528298345, "learning_rate": 3.294370127959852e-06, "loss": 0.3216, "step": 17725 }, { "epoch": 0.8303742914695273, "grad_norm": 0.6552283927725088, "learning_rate": 3.294190304109451e-06, "loss": 0.324, "step": 17726 }, { "epoch": 0.8304211364594557, "grad_norm": 0.6078781124618161, "learning_rate": 3.294010475688685e-06, "loss": 0.3361, "step": 17727 }, { "epoch": 0.8304679814493839, "grad_norm": 0.6027908365523132, "learning_rate": 3.2938306426985884e-06, "loss": 0.3007, "step": 17728 }, { "epoch": 0.8305148264393123, "grad_norm": 0.619863286492411, "learning_rate": 3.2936508051401955e-06, "loss": 0.3337, "step": 17729 }, { "epoch": 0.8305616714292406, "grad_norm": 0.595365374684415, "learning_rate": 3.2934709630145416e-06, "loss": 0.3329, "step": 17730 }, { "epoch": 0.830608516419169, "grad_norm": 0.5721186357579049, "learning_rate": 3.2932911163226622e-06, "loss": 0.3138, "step": 17731 }, { "epoch": 0.8306553614090973, "grad_norm": 0.6383058209455609, "learning_rate": 3.2931112650655917e-06, "loss": 0.3252, "step": 17732 }, { "epoch": 0.8307022063990256, "grad_norm": 0.6563538685632992, "learning_rate": 3.2929314092443654e-06, "loss": 0.3332, "step": 17733 }, { "epoch": 0.8307490513889539, "grad_norm": 0.6039224762761162, "learning_rate": 3.292751548860018e-06, "loss": 0.3372, "step": 17734 }, { "epoch": 0.8307958963788823, "grad_norm": 0.5472936527505421, "learning_rate": 3.2925716839135842e-06, "loss": 0.3178, "step": 17735 }, { "epoch": 0.8308427413688106, "grad_norm": 0.5769059279025363, "learning_rate": 3.2923918144060995e-06, "loss": 0.3262, "step": 17736 }, { "epoch": 0.8308895863587389, "grad_norm": 0.5664356302438034, "learning_rate": 3.2922119403385994e-06, "loss": 0.3166, "step": 17737 }, { "epoch": 0.8309364313486672, "grad_norm": 0.6318586980591251, "learning_rate": 3.2920320617121194e-06, "loss": 0.3185, "step": 17738 }, { "epoch": 0.8309832763385956, "grad_norm": 0.6219226858784768, "learning_rate": 3.2918521785276936e-06, "loss": 0.316, "step": 17739 }, { "epoch": 0.8310301213285239, "grad_norm": 0.6169498826668104, "learning_rate": 3.2916722907863564e-06, "loss": 0.3376, "step": 17740 }, { "epoch": 0.8310769663184523, "grad_norm": 0.5858623498555059, "learning_rate": 3.2914923984891458e-06, "loss": 0.3059, "step": 17741 }, { "epoch": 0.8311238113083805, "grad_norm": 0.6502188323050078, "learning_rate": 3.2913125016370946e-06, "loss": 0.3419, "step": 17742 }, { "epoch": 0.8311706562983089, "grad_norm": 0.5669114119993567, "learning_rate": 3.291132600231238e-06, "loss": 0.3274, "step": 17743 }, { "epoch": 0.8312175012882372, "grad_norm": 0.5848446498616559, "learning_rate": 3.290952694272613e-06, "loss": 0.3198, "step": 17744 }, { "epoch": 0.8312643462781656, "grad_norm": 0.5552909712719561, "learning_rate": 3.2907727837622542e-06, "loss": 0.2876, "step": 17745 }, { "epoch": 0.8313111912680938, "grad_norm": 0.5911382243275805, "learning_rate": 3.2905928687011966e-06, "loss": 0.3026, "step": 17746 }, { "epoch": 0.8313580362580222, "grad_norm": 0.5944974398183226, "learning_rate": 3.2904129490904756e-06, "loss": 0.3095, "step": 17747 }, { "epoch": 0.8314048812479505, "grad_norm": 0.6000583891931928, "learning_rate": 3.290233024931127e-06, "loss": 0.3165, "step": 17748 }, { "epoch": 0.8314517262378789, "grad_norm": 0.5773078660610838, "learning_rate": 3.290053096224186e-06, "loss": 0.3239, "step": 17749 }, { "epoch": 0.8314985712278072, "grad_norm": 0.6186183010096417, "learning_rate": 3.2898731629706878e-06, "loss": 0.3117, "step": 17750 }, { "epoch": 0.8315454162177355, "grad_norm": 0.5886706887948175, "learning_rate": 3.289693225171668e-06, "loss": 0.3535, "step": 17751 }, { "epoch": 0.8315922612076638, "grad_norm": 0.6175058067243624, "learning_rate": 3.2895132828281624e-06, "loss": 0.3214, "step": 17752 }, { "epoch": 0.8316391061975922, "grad_norm": 0.563213968331867, "learning_rate": 3.289333335941206e-06, "loss": 0.3107, "step": 17753 }, { "epoch": 0.8316859511875205, "grad_norm": 0.590141014600238, "learning_rate": 3.289153384511835e-06, "loss": 0.3095, "step": 17754 }, { "epoch": 0.8317327961774488, "grad_norm": 0.6003752331093963, "learning_rate": 3.2889734285410846e-06, "loss": 0.3115, "step": 17755 }, { "epoch": 0.8317796411673771, "grad_norm": 0.6121118407164532, "learning_rate": 3.28879346802999e-06, "loss": 0.3107, "step": 17756 }, { "epoch": 0.8318264861573055, "grad_norm": 0.5773411164212475, "learning_rate": 3.2886135029795875e-06, "loss": 0.3092, "step": 17757 }, { "epoch": 0.8318733311472338, "grad_norm": 0.5761077875920549, "learning_rate": 3.288433533390912e-06, "loss": 0.3457, "step": 17758 }, { "epoch": 0.8319201761371622, "grad_norm": 0.5933575918754134, "learning_rate": 3.2882535592650007e-06, "loss": 0.323, "step": 17759 }, { "epoch": 0.8319670211270904, "grad_norm": 0.5618999666305949, "learning_rate": 3.2880735806028873e-06, "loss": 0.3091, "step": 17760 }, { "epoch": 0.8320138661170188, "grad_norm": 0.5962665711471543, "learning_rate": 3.287893597405609e-06, "loss": 0.3457, "step": 17761 }, { "epoch": 0.8320607111069471, "grad_norm": 0.6347483383335661, "learning_rate": 3.2877136096742014e-06, "loss": 0.3356, "step": 17762 }, { "epoch": 0.8321075560968755, "grad_norm": 0.574996292805131, "learning_rate": 3.2875336174096994e-06, "loss": 0.3024, "step": 17763 }, { "epoch": 0.8321544010868037, "grad_norm": 0.594913850650328, "learning_rate": 3.2873536206131396e-06, "loss": 0.3196, "step": 17764 }, { "epoch": 0.8322012460767321, "grad_norm": 0.5698384197161142, "learning_rate": 3.2871736192855574e-06, "loss": 0.3129, "step": 17765 }, { "epoch": 0.8322480910666604, "grad_norm": 0.5631999352645712, "learning_rate": 3.286993613427989e-06, "loss": 0.3219, "step": 17766 }, { "epoch": 0.8322949360565888, "grad_norm": 0.5728381225260668, "learning_rate": 3.286813603041471e-06, "loss": 0.3147, "step": 17767 }, { "epoch": 0.8323417810465171, "grad_norm": 0.5615231152182998, "learning_rate": 3.2866335881270375e-06, "loss": 0.2897, "step": 17768 }, { "epoch": 0.8323886260364454, "grad_norm": 0.5628953295940946, "learning_rate": 3.2864535686857256e-06, "loss": 0.294, "step": 17769 }, { "epoch": 0.8324354710263737, "grad_norm": 0.5706112719557221, "learning_rate": 3.286273544718571e-06, "loss": 0.3148, "step": 17770 }, { "epoch": 0.8324823160163021, "grad_norm": 0.5475446275278891, "learning_rate": 3.2860935162266096e-06, "loss": 0.3061, "step": 17771 }, { "epoch": 0.8325291610062304, "grad_norm": 0.5358482180449894, "learning_rate": 3.2859134832108774e-06, "loss": 0.3127, "step": 17772 }, { "epoch": 0.8325760059961587, "grad_norm": 0.558013760489009, "learning_rate": 3.2857334456724123e-06, "loss": 0.3025, "step": 17773 }, { "epoch": 0.832622850986087, "grad_norm": 0.6329047977537313, "learning_rate": 3.2855534036122473e-06, "loss": 0.3133, "step": 17774 }, { "epoch": 0.8326696959760154, "grad_norm": 0.5884619144103738, "learning_rate": 3.2853733570314196e-06, "loss": 0.3271, "step": 17775 }, { "epoch": 0.8327165409659437, "grad_norm": 0.5830863989710741, "learning_rate": 3.285193305930966e-06, "loss": 0.3307, "step": 17776 }, { "epoch": 0.8327633859558721, "grad_norm": 0.5804446206974224, "learning_rate": 3.2850132503119227e-06, "loss": 0.302, "step": 17777 }, { "epoch": 0.8328102309458003, "grad_norm": 0.606311772117175, "learning_rate": 3.284833190175325e-06, "loss": 0.3149, "step": 17778 }, { "epoch": 0.8328570759357287, "grad_norm": 0.5717936245958454, "learning_rate": 3.2846531255222096e-06, "loss": 0.3233, "step": 17779 }, { "epoch": 0.832903920925657, "grad_norm": 0.5537368579435732, "learning_rate": 3.284473056353613e-06, "loss": 0.2983, "step": 17780 }, { "epoch": 0.8329507659155854, "grad_norm": 0.607340403345124, "learning_rate": 3.2842929826705704e-06, "loss": 0.3371, "step": 17781 }, { "epoch": 0.8329976109055136, "grad_norm": 0.6182667556805758, "learning_rate": 3.2841129044741197e-06, "loss": 0.3491, "step": 17782 }, { "epoch": 0.833044455895442, "grad_norm": 0.6474810286626493, "learning_rate": 3.283932821765296e-06, "loss": 0.3385, "step": 17783 }, { "epoch": 0.8330913008853703, "grad_norm": 0.5354386273774164, "learning_rate": 3.283752734545136e-06, "loss": 0.3008, "step": 17784 }, { "epoch": 0.8331381458752987, "grad_norm": 0.5461743539953471, "learning_rate": 3.2835726428146757e-06, "loss": 0.3065, "step": 17785 }, { "epoch": 0.833184990865227, "grad_norm": 0.7112079172776754, "learning_rate": 3.2833925465749516e-06, "loss": 0.315, "step": 17786 }, { "epoch": 0.8332318358551553, "grad_norm": 0.6448560114438939, "learning_rate": 3.283212445827001e-06, "loss": 0.3421, "step": 17787 }, { "epoch": 0.8332786808450836, "grad_norm": 0.5856563474704728, "learning_rate": 3.2830323405718596e-06, "loss": 0.3064, "step": 17788 }, { "epoch": 0.833325525835012, "grad_norm": 0.588029825358394, "learning_rate": 3.282852230810563e-06, "loss": 0.3146, "step": 17789 }, { "epoch": 0.8333723708249403, "grad_norm": 0.6370513447932377, "learning_rate": 3.282672116544149e-06, "loss": 0.3357, "step": 17790 }, { "epoch": 0.8334192158148686, "grad_norm": 0.6444449087899093, "learning_rate": 3.2824919977736545e-06, "loss": 0.3284, "step": 17791 }, { "epoch": 0.8334660608047969, "grad_norm": 0.6119005942105509, "learning_rate": 3.282311874500114e-06, "loss": 0.3029, "step": 17792 }, { "epoch": 0.8335129057947253, "grad_norm": 0.5946086979960045, "learning_rate": 3.282131746724566e-06, "loss": 0.3083, "step": 17793 }, { "epoch": 0.8335597507846536, "grad_norm": 0.5771779214461903, "learning_rate": 3.2819516144480467e-06, "loss": 0.3111, "step": 17794 }, { "epoch": 0.833606595774582, "grad_norm": 0.6237821684382979, "learning_rate": 3.2817714776715915e-06, "loss": 0.3303, "step": 17795 }, { "epoch": 0.8336534407645102, "grad_norm": 0.5827245154052035, "learning_rate": 3.281591336396238e-06, "loss": 0.3302, "step": 17796 }, { "epoch": 0.8337002857544386, "grad_norm": 0.6123891283555716, "learning_rate": 3.281411190623024e-06, "loss": 0.3224, "step": 17797 }, { "epoch": 0.8337471307443669, "grad_norm": 0.5695770584676885, "learning_rate": 3.281231040352984e-06, "loss": 0.2979, "step": 17798 }, { "epoch": 0.8337939757342953, "grad_norm": 0.5655657625598419, "learning_rate": 3.281050885587156e-06, "loss": 0.3042, "step": 17799 }, { "epoch": 0.8338408207242235, "grad_norm": 0.6031001454322202, "learning_rate": 3.2808707263265758e-06, "loss": 0.3206, "step": 17800 }, { "epoch": 0.8338876657141518, "grad_norm": 0.5840766627806825, "learning_rate": 3.2806905625722818e-06, "loss": 0.3191, "step": 17801 }, { "epoch": 0.8339345107040802, "grad_norm": 0.5778363775476049, "learning_rate": 3.2805103943253093e-06, "loss": 0.3157, "step": 17802 }, { "epoch": 0.8339813556940086, "grad_norm": 0.5276476846874693, "learning_rate": 3.280330221586696e-06, "loss": 0.2955, "step": 17803 }, { "epoch": 0.8340282006839369, "grad_norm": 0.5773056800900974, "learning_rate": 3.2801500443574784e-06, "loss": 0.2981, "step": 17804 }, { "epoch": 0.8340750456738651, "grad_norm": 0.5832669937688117, "learning_rate": 3.2799698626386926e-06, "loss": 0.3233, "step": 17805 }, { "epoch": 0.8341218906637935, "grad_norm": 0.5750867229403691, "learning_rate": 3.279789676431377e-06, "loss": 0.3171, "step": 17806 }, { "epoch": 0.8341687356537218, "grad_norm": 0.630028373669014, "learning_rate": 3.2796094857365675e-06, "loss": 0.3426, "step": 17807 }, { "epoch": 0.8342155806436502, "grad_norm": 0.5977330480836396, "learning_rate": 3.2794292905553017e-06, "loss": 0.3009, "step": 17808 }, { "epoch": 0.8342624256335784, "grad_norm": 0.6306969118891292, "learning_rate": 3.2792490908886158e-06, "loss": 0.3304, "step": 17809 }, { "epoch": 0.8343092706235068, "grad_norm": 0.6563674296170918, "learning_rate": 3.2790688867375477e-06, "loss": 0.3465, "step": 17810 }, { "epoch": 0.8343561156134351, "grad_norm": 0.5944402958610506, "learning_rate": 3.2788886781031337e-06, "loss": 0.337, "step": 17811 }, { "epoch": 0.8344029606033635, "grad_norm": 0.5667454805643557, "learning_rate": 3.2787084649864116e-06, "loss": 0.3063, "step": 17812 }, { "epoch": 0.8344498055932918, "grad_norm": 0.6424089252677126, "learning_rate": 3.2785282473884172e-06, "loss": 0.3294, "step": 17813 }, { "epoch": 0.8344966505832201, "grad_norm": 0.5729764027774534, "learning_rate": 3.2783480253101886e-06, "loss": 0.3115, "step": 17814 }, { "epoch": 0.8345434955731484, "grad_norm": 0.6206893366135716, "learning_rate": 3.2781677987527634e-06, "loss": 0.3273, "step": 17815 }, { "epoch": 0.8345903405630768, "grad_norm": 0.5539794112264438, "learning_rate": 3.277987567717177e-06, "loss": 0.2953, "step": 17816 }, { "epoch": 0.8346371855530051, "grad_norm": 0.5424702865033996, "learning_rate": 3.277807332204468e-06, "loss": 0.3043, "step": 17817 }, { "epoch": 0.8346840305429334, "grad_norm": 0.5864861458463687, "learning_rate": 3.277627092215674e-06, "loss": 0.3185, "step": 17818 }, { "epoch": 0.8347308755328617, "grad_norm": 0.5378529480274921, "learning_rate": 3.277446847751831e-06, "loss": 0.2877, "step": 17819 }, { "epoch": 0.8347777205227901, "grad_norm": 0.6015384221248421, "learning_rate": 3.2772665988139768e-06, "loss": 0.3145, "step": 17820 }, { "epoch": 0.8348245655127184, "grad_norm": 0.585047912794426, "learning_rate": 3.2770863454031486e-06, "loss": 0.3148, "step": 17821 }, { "epoch": 0.8348714105026468, "grad_norm": 0.5741087636703429, "learning_rate": 3.2769060875203845e-06, "loss": 0.3027, "step": 17822 }, { "epoch": 0.834918255492575, "grad_norm": 0.5745856242363564, "learning_rate": 3.2767258251667205e-06, "loss": 0.3, "step": 17823 }, { "epoch": 0.8349651004825034, "grad_norm": 0.5700763235184672, "learning_rate": 3.276545558343195e-06, "loss": 0.2951, "step": 17824 }, { "epoch": 0.8350119454724317, "grad_norm": 0.628265273905923, "learning_rate": 3.276365287050845e-06, "loss": 0.3479, "step": 17825 }, { "epoch": 0.8350587904623601, "grad_norm": 0.6079830045216551, "learning_rate": 3.2761850112907072e-06, "loss": 0.3204, "step": 17826 }, { "epoch": 0.8351056354522883, "grad_norm": 0.5906656707642493, "learning_rate": 3.27600473106382e-06, "loss": 0.3205, "step": 17827 }, { "epoch": 0.8351524804422167, "grad_norm": 0.5404668533407612, "learning_rate": 3.2758244463712207e-06, "loss": 0.3157, "step": 17828 }, { "epoch": 0.835199325432145, "grad_norm": 0.5767346314343473, "learning_rate": 3.2756441572139474e-06, "loss": 0.3329, "step": 17829 }, { "epoch": 0.8352461704220734, "grad_norm": 0.5934608674188248, "learning_rate": 3.275463863593036e-06, "loss": 0.3166, "step": 17830 }, { "epoch": 0.8352930154120017, "grad_norm": 0.572788833363502, "learning_rate": 3.275283565509525e-06, "loss": 0.3271, "step": 17831 }, { "epoch": 0.83533986040193, "grad_norm": 0.5789598148293167, "learning_rate": 3.275103262964453e-06, "loss": 0.3127, "step": 17832 }, { "epoch": 0.8353867053918583, "grad_norm": 0.6096536978419775, "learning_rate": 3.2749229559588554e-06, "loss": 0.3061, "step": 17833 }, { "epoch": 0.8354335503817867, "grad_norm": 0.5685454507895772, "learning_rate": 3.2747426444937714e-06, "loss": 0.3157, "step": 17834 }, { "epoch": 0.835480395371715, "grad_norm": 0.5850980416342738, "learning_rate": 3.2745623285702375e-06, "loss": 0.303, "step": 17835 }, { "epoch": 0.8355272403616433, "grad_norm": 0.5992234644407274, "learning_rate": 3.274382008189293e-06, "loss": 0.3028, "step": 17836 }, { "epoch": 0.8355740853515716, "grad_norm": 0.6700879205260716, "learning_rate": 3.2742016833519746e-06, "loss": 0.3219, "step": 17837 }, { "epoch": 0.8356209303415, "grad_norm": 0.5890541940629529, "learning_rate": 3.2740213540593195e-06, "loss": 0.2971, "step": 17838 }, { "epoch": 0.8356677753314283, "grad_norm": 0.5692125891421507, "learning_rate": 3.273841020312367e-06, "loss": 0.3129, "step": 17839 }, { "epoch": 0.8357146203213567, "grad_norm": 0.6311730733377274, "learning_rate": 3.2736606821121535e-06, "loss": 0.3235, "step": 17840 }, { "epoch": 0.8357614653112849, "grad_norm": 0.5905451971367637, "learning_rate": 3.273480339459717e-06, "loss": 0.3124, "step": 17841 }, { "epoch": 0.8358083103012133, "grad_norm": 0.5858025106848578, "learning_rate": 3.273299992356095e-06, "loss": 0.3115, "step": 17842 }, { "epoch": 0.8358551552911416, "grad_norm": 0.5953576235459124, "learning_rate": 3.2731196408023275e-06, "loss": 0.3125, "step": 17843 }, { "epoch": 0.83590200028107, "grad_norm": 0.6051397748997019, "learning_rate": 3.2729392847994494e-06, "loss": 0.3166, "step": 17844 }, { "epoch": 0.8359488452709982, "grad_norm": 0.6771122930185068, "learning_rate": 3.2727589243485e-06, "loss": 0.3362, "step": 17845 }, { "epoch": 0.8359956902609266, "grad_norm": 0.6915414944379801, "learning_rate": 3.272578559450518e-06, "loss": 0.319, "step": 17846 }, { "epoch": 0.8360425352508549, "grad_norm": 0.6203580801712405, "learning_rate": 3.2723981901065404e-06, "loss": 0.3047, "step": 17847 }, { "epoch": 0.8360893802407833, "grad_norm": 0.7096436940476677, "learning_rate": 3.2722178163176048e-06, "loss": 0.335, "step": 17848 }, { "epoch": 0.8361362252307116, "grad_norm": 0.5812872855948975, "learning_rate": 3.27203743808475e-06, "loss": 0.308, "step": 17849 }, { "epoch": 0.8361830702206399, "grad_norm": 0.5857614047437789, "learning_rate": 3.2718570554090146e-06, "loss": 0.3037, "step": 17850 }, { "epoch": 0.8362299152105682, "grad_norm": 0.5754635629965774, "learning_rate": 3.271676668291435e-06, "loss": 0.3104, "step": 17851 }, { "epoch": 0.8362767602004966, "grad_norm": 0.5882492310977088, "learning_rate": 3.2714962767330507e-06, "loss": 0.2899, "step": 17852 }, { "epoch": 0.8363236051904249, "grad_norm": 0.5801285057210002, "learning_rate": 3.271315880734899e-06, "loss": 0.3079, "step": 17853 }, { "epoch": 0.8363704501803532, "grad_norm": 0.5781612777535866, "learning_rate": 3.2711354802980174e-06, "loss": 0.3041, "step": 17854 }, { "epoch": 0.8364172951702815, "grad_norm": 0.6368460683178253, "learning_rate": 3.270955075423445e-06, "loss": 0.3252, "step": 17855 }, { "epoch": 0.8364641401602099, "grad_norm": 0.6419344344119288, "learning_rate": 3.2707746661122207e-06, "loss": 0.3505, "step": 17856 }, { "epoch": 0.8365109851501382, "grad_norm": 0.5632801716881468, "learning_rate": 3.270594252365382e-06, "loss": 0.3218, "step": 17857 }, { "epoch": 0.8365578301400666, "grad_norm": 0.5734913441403301, "learning_rate": 3.270413834183967e-06, "loss": 0.305, "step": 17858 }, { "epoch": 0.8366046751299948, "grad_norm": 0.5917703940149427, "learning_rate": 3.2702334115690137e-06, "loss": 0.3089, "step": 17859 }, { "epoch": 0.8366515201199232, "grad_norm": 0.554687509043847, "learning_rate": 3.2700529845215613e-06, "loss": 0.3161, "step": 17860 }, { "epoch": 0.8366983651098515, "grad_norm": 0.5810373394157189, "learning_rate": 3.269872553042647e-06, "loss": 0.2987, "step": 17861 }, { "epoch": 0.8367452100997799, "grad_norm": 0.5939314441033166, "learning_rate": 3.2696921171333098e-06, "loss": 0.317, "step": 17862 }, { "epoch": 0.8367920550897081, "grad_norm": 0.6573165134230521, "learning_rate": 3.2695116767945874e-06, "loss": 0.3198, "step": 17863 }, { "epoch": 0.8368389000796365, "grad_norm": 0.5696282367707174, "learning_rate": 3.2693312320275197e-06, "loss": 0.3081, "step": 17864 }, { "epoch": 0.8368857450695648, "grad_norm": 0.564544324568751, "learning_rate": 3.2691507828331432e-06, "loss": 0.3232, "step": 17865 }, { "epoch": 0.8369325900594932, "grad_norm": 0.5949565465006363, "learning_rate": 3.2689703292124975e-06, "loss": 0.3215, "step": 17866 }, { "epoch": 0.8369794350494215, "grad_norm": 0.5864143217521264, "learning_rate": 3.268789871166621e-06, "loss": 0.3252, "step": 17867 }, { "epoch": 0.8370262800393498, "grad_norm": 0.6452241101524553, "learning_rate": 3.2686094086965518e-06, "loss": 0.3408, "step": 17868 }, { "epoch": 0.8370731250292781, "grad_norm": 0.6072296433752834, "learning_rate": 3.2684289418033288e-06, "loss": 0.3339, "step": 17869 }, { "epoch": 0.8371199700192065, "grad_norm": 0.5817601035523315, "learning_rate": 3.2682484704879907e-06, "loss": 0.307, "step": 17870 }, { "epoch": 0.8371668150091348, "grad_norm": 0.6151916191544229, "learning_rate": 3.268067994751575e-06, "loss": 0.3143, "step": 17871 }, { "epoch": 0.837213659999063, "grad_norm": 0.5855321716202982, "learning_rate": 3.267887514595122e-06, "loss": 0.2977, "step": 17872 }, { "epoch": 0.8372605049889914, "grad_norm": 0.6220811322332714, "learning_rate": 3.267707030019669e-06, "loss": 0.318, "step": 17873 }, { "epoch": 0.8373073499789198, "grad_norm": 0.5455931856099694, "learning_rate": 3.2675265410262547e-06, "loss": 0.2877, "step": 17874 }, { "epoch": 0.8373541949688481, "grad_norm": 0.5794444696274608, "learning_rate": 3.267346047615918e-06, "loss": 0.3225, "step": 17875 }, { "epoch": 0.8374010399587765, "grad_norm": 0.6500974051308396, "learning_rate": 3.2671655497896982e-06, "loss": 0.3226, "step": 17876 }, { "epoch": 0.8374478849487047, "grad_norm": 0.6331917017462764, "learning_rate": 3.2669850475486324e-06, "loss": 0.3349, "step": 17877 }, { "epoch": 0.837494729938633, "grad_norm": 0.5993232971895649, "learning_rate": 3.2668045408937614e-06, "loss": 0.3128, "step": 17878 }, { "epoch": 0.8375415749285614, "grad_norm": 0.5704026340303099, "learning_rate": 3.266624029826123e-06, "loss": 0.3137, "step": 17879 }, { "epoch": 0.8375884199184898, "grad_norm": 0.5726947123495398, "learning_rate": 3.2664435143467552e-06, "loss": 0.3205, "step": 17880 }, { "epoch": 0.837635264908418, "grad_norm": 0.5649662104504999, "learning_rate": 3.2662629944566988e-06, "loss": 0.3213, "step": 17881 }, { "epoch": 0.8376821098983463, "grad_norm": 0.576866829177397, "learning_rate": 3.266082470156991e-06, "loss": 0.3156, "step": 17882 }, { "epoch": 0.8377289548882747, "grad_norm": 0.6450315988515675, "learning_rate": 3.2659019414486705e-06, "loss": 0.3285, "step": 17883 }, { "epoch": 0.837775799878203, "grad_norm": 0.5920528938402577, "learning_rate": 3.265721408332777e-06, "loss": 0.315, "step": 17884 }, { "epoch": 0.8378226448681314, "grad_norm": 0.6167473776293582, "learning_rate": 3.26554087081035e-06, "loss": 0.3207, "step": 17885 }, { "epoch": 0.8378694898580596, "grad_norm": 0.5845680223168109, "learning_rate": 3.2653603288824277e-06, "loss": 0.2885, "step": 17886 }, { "epoch": 0.837916334847988, "grad_norm": 0.5888405923950848, "learning_rate": 3.2651797825500473e-06, "loss": 0.3301, "step": 17887 }, { "epoch": 0.8379631798379163, "grad_norm": 0.5544003491584891, "learning_rate": 3.2649992318142514e-06, "loss": 0.3033, "step": 17888 }, { "epoch": 0.8380100248278447, "grad_norm": 0.5863945627285524, "learning_rate": 3.264818676676077e-06, "loss": 0.3214, "step": 17889 }, { "epoch": 0.8380568698177729, "grad_norm": 0.5911575208769058, "learning_rate": 3.2646381171365626e-06, "loss": 0.3138, "step": 17890 }, { "epoch": 0.8381037148077013, "grad_norm": 0.5901056773520867, "learning_rate": 3.2644575531967487e-06, "loss": 0.3082, "step": 17891 }, { "epoch": 0.8381505597976296, "grad_norm": 0.5612386750568102, "learning_rate": 3.264276984857674e-06, "loss": 0.2946, "step": 17892 }, { "epoch": 0.838197404787558, "grad_norm": 0.578764941978358, "learning_rate": 3.264096412120377e-06, "loss": 0.3156, "step": 17893 }, { "epoch": 0.8382442497774863, "grad_norm": 0.5544671331677246, "learning_rate": 3.263915834985896e-06, "loss": 0.3022, "step": 17894 }, { "epoch": 0.8382910947674146, "grad_norm": 0.5657492891642456, "learning_rate": 3.263735253455273e-06, "loss": 0.304, "step": 17895 }, { "epoch": 0.8383379397573429, "grad_norm": 0.5876454565862218, "learning_rate": 3.263554667529545e-06, "loss": 0.306, "step": 17896 }, { "epoch": 0.8383847847472713, "grad_norm": 0.6514550508598274, "learning_rate": 3.263374077209751e-06, "loss": 0.3431, "step": 17897 }, { "epoch": 0.8384316297371996, "grad_norm": 0.5664573389272596, "learning_rate": 3.2631934824969324e-06, "loss": 0.2987, "step": 17898 }, { "epoch": 0.8384784747271279, "grad_norm": 0.6178620170947303, "learning_rate": 3.2630128833921265e-06, "loss": 0.3263, "step": 17899 }, { "epoch": 0.8385253197170562, "grad_norm": 0.6490516380365563, "learning_rate": 3.262832279896373e-06, "loss": 0.3428, "step": 17900 }, { "epoch": 0.8385721647069846, "grad_norm": 0.6343669303452153, "learning_rate": 3.262651672010712e-06, "loss": 0.3298, "step": 17901 }, { "epoch": 0.8386190096969129, "grad_norm": 0.5603672466616503, "learning_rate": 3.262471059736182e-06, "loss": 0.3175, "step": 17902 }, { "epoch": 0.8386658546868413, "grad_norm": 0.6262153389660605, "learning_rate": 3.262290443073823e-06, "loss": 0.3242, "step": 17903 }, { "epoch": 0.8387126996767695, "grad_norm": 0.570803024152548, "learning_rate": 3.262109822024674e-06, "loss": 0.3084, "step": 17904 }, { "epoch": 0.8387595446666979, "grad_norm": 0.6479662078807084, "learning_rate": 3.261929196589774e-06, "loss": 0.3165, "step": 17905 }, { "epoch": 0.8388063896566262, "grad_norm": 0.5993200192730248, "learning_rate": 3.2617485667701633e-06, "loss": 0.3184, "step": 17906 }, { "epoch": 0.8388532346465546, "grad_norm": 0.6017378594994927, "learning_rate": 3.2615679325668814e-06, "loss": 0.309, "step": 17907 }, { "epoch": 0.8389000796364828, "grad_norm": 0.5458366370002365, "learning_rate": 3.2613872939809664e-06, "loss": 0.3096, "step": 17908 }, { "epoch": 0.8389469246264112, "grad_norm": 0.5987106484201543, "learning_rate": 3.26120665101346e-06, "loss": 0.3254, "step": 17909 }, { "epoch": 0.8389937696163395, "grad_norm": 0.6062495648012683, "learning_rate": 3.2610260036654005e-06, "loss": 0.3305, "step": 17910 }, { "epoch": 0.8390406146062679, "grad_norm": 0.5853572607513339, "learning_rate": 3.260845351937827e-06, "loss": 0.3125, "step": 17911 }, { "epoch": 0.8390874595961962, "grad_norm": 0.5835234484952172, "learning_rate": 3.26066469583178e-06, "loss": 0.3216, "step": 17912 }, { "epoch": 0.8391343045861245, "grad_norm": 0.6432594286471296, "learning_rate": 3.2604840353482997e-06, "loss": 0.3463, "step": 17913 }, { "epoch": 0.8391811495760528, "grad_norm": 0.5732891418809507, "learning_rate": 3.260303370488424e-06, "loss": 0.31, "step": 17914 }, { "epoch": 0.8392279945659812, "grad_norm": 0.6102759975162445, "learning_rate": 3.2601227012531934e-06, "loss": 0.3099, "step": 17915 }, { "epoch": 0.8392748395559095, "grad_norm": 0.5490448839126675, "learning_rate": 3.2599420276436485e-06, "loss": 0.3186, "step": 17916 }, { "epoch": 0.8393216845458378, "grad_norm": 0.5939286279533845, "learning_rate": 3.2597613496608276e-06, "loss": 0.3046, "step": 17917 }, { "epoch": 0.8393685295357661, "grad_norm": 0.585267000744627, "learning_rate": 3.259580667305771e-06, "loss": 0.3247, "step": 17918 }, { "epoch": 0.8394153745256945, "grad_norm": 0.5887766678995184, "learning_rate": 3.259399980579519e-06, "loss": 0.3572, "step": 17919 }, { "epoch": 0.8394622195156228, "grad_norm": 0.5911760012787269, "learning_rate": 3.259219289483111e-06, "loss": 0.3277, "step": 17920 }, { "epoch": 0.8395090645055512, "grad_norm": 0.5611234737100684, "learning_rate": 3.259038594017586e-06, "loss": 0.3217, "step": 17921 }, { "epoch": 0.8395559094954794, "grad_norm": 0.6121536578607083, "learning_rate": 3.2588578941839855e-06, "loss": 0.3316, "step": 17922 }, { "epoch": 0.8396027544854078, "grad_norm": 0.6259690285041292, "learning_rate": 3.2586771899833485e-06, "loss": 0.335, "step": 17923 }, { "epoch": 0.8396495994753361, "grad_norm": 0.5502540415484223, "learning_rate": 3.258496481416715e-06, "loss": 0.3046, "step": 17924 }, { "epoch": 0.8396964444652645, "grad_norm": 0.5498206850695965, "learning_rate": 3.258315768485125e-06, "loss": 0.3239, "step": 17925 }, { "epoch": 0.8397432894551927, "grad_norm": 0.5581141501601424, "learning_rate": 3.258135051189617e-06, "loss": 0.3187, "step": 17926 }, { "epoch": 0.8397901344451211, "grad_norm": 0.6024412335545467, "learning_rate": 3.257954329531234e-06, "loss": 0.3395, "step": 17927 }, { "epoch": 0.8398369794350494, "grad_norm": 0.5532389987588706, "learning_rate": 3.257773603511014e-06, "loss": 0.3059, "step": 17928 }, { "epoch": 0.8398838244249778, "grad_norm": 0.5650441589878975, "learning_rate": 3.2575928731299965e-06, "loss": 0.3043, "step": 17929 }, { "epoch": 0.8399306694149061, "grad_norm": 0.5683826771356268, "learning_rate": 3.257412138389223e-06, "loss": 0.3099, "step": 17930 }, { "epoch": 0.8399775144048344, "grad_norm": 0.5568194961198708, "learning_rate": 3.2572313992897335e-06, "loss": 0.3077, "step": 17931 }, { "epoch": 0.8400243593947627, "grad_norm": 0.5827169471297866, "learning_rate": 3.2570506558325664e-06, "loss": 0.301, "step": 17932 }, { "epoch": 0.8400712043846911, "grad_norm": 0.6030854317262843, "learning_rate": 3.2568699080187637e-06, "loss": 0.3114, "step": 17933 }, { "epoch": 0.8401180493746194, "grad_norm": 0.5635626679900977, "learning_rate": 3.256689155849366e-06, "loss": 0.3002, "step": 17934 }, { "epoch": 0.8401648943645477, "grad_norm": 0.5503724263342976, "learning_rate": 3.256508399325411e-06, "loss": 0.3088, "step": 17935 }, { "epoch": 0.840211739354476, "grad_norm": 0.5697204961437649, "learning_rate": 3.2563276384479404e-06, "loss": 0.3199, "step": 17936 }, { "epoch": 0.8402585843444044, "grad_norm": 0.6848897298605731, "learning_rate": 3.2561468732179947e-06, "loss": 0.3383, "step": 17937 }, { "epoch": 0.8403054293343327, "grad_norm": 0.5930545529235073, "learning_rate": 3.255966103636614e-06, "loss": 0.3275, "step": 17938 }, { "epoch": 0.8403522743242611, "grad_norm": 0.5851874880245603, "learning_rate": 3.255785329704838e-06, "loss": 0.3072, "step": 17939 }, { "epoch": 0.8403991193141893, "grad_norm": 0.6529299341304428, "learning_rate": 3.2556045514237074e-06, "loss": 0.3381, "step": 17940 }, { "epoch": 0.8404459643041177, "grad_norm": 0.5807529029832091, "learning_rate": 3.255423768794263e-06, "loss": 0.2897, "step": 17941 }, { "epoch": 0.840492809294046, "grad_norm": 0.6194611070790654, "learning_rate": 3.2552429818175436e-06, "loss": 0.3266, "step": 17942 }, { "epoch": 0.8405396542839744, "grad_norm": 0.5521086321941551, "learning_rate": 3.2550621904945917e-06, "loss": 0.3076, "step": 17943 }, { "epoch": 0.8405864992739026, "grad_norm": 0.5700170213734503, "learning_rate": 3.2548813948264467e-06, "loss": 0.3003, "step": 17944 }, { "epoch": 0.840633344263831, "grad_norm": 0.5959126165741242, "learning_rate": 3.254700594814148e-06, "loss": 0.3218, "step": 17945 }, { "epoch": 0.8406801892537593, "grad_norm": 0.5737091203429496, "learning_rate": 3.2545197904587377e-06, "loss": 0.3054, "step": 17946 }, { "epoch": 0.8407270342436877, "grad_norm": 0.5901298958294517, "learning_rate": 3.254338981761256e-06, "loss": 0.3169, "step": 17947 }, { "epoch": 0.840773879233616, "grad_norm": 0.5678312566044477, "learning_rate": 3.2541581687227423e-06, "loss": 0.3012, "step": 17948 }, { "epoch": 0.8408207242235443, "grad_norm": 0.6332456362511069, "learning_rate": 3.2539773513442386e-06, "loss": 0.326, "step": 17949 }, { "epoch": 0.8408675692134726, "grad_norm": 0.5905834999846326, "learning_rate": 3.253796529626784e-06, "loss": 0.3219, "step": 17950 }, { "epoch": 0.840914414203401, "grad_norm": 0.6559254351048645, "learning_rate": 3.253615703571421e-06, "loss": 0.3288, "step": 17951 }, { "epoch": 0.8409612591933293, "grad_norm": 0.6003401954208818, "learning_rate": 3.2534348731791884e-06, "loss": 0.3335, "step": 17952 }, { "epoch": 0.8410081041832576, "grad_norm": 0.6189344102449441, "learning_rate": 3.253254038451127e-06, "loss": 0.3219, "step": 17953 }, { "epoch": 0.8410549491731859, "grad_norm": 0.5455627525308197, "learning_rate": 3.253073199388278e-06, "loss": 0.2819, "step": 17954 }, { "epoch": 0.8411017941631143, "grad_norm": 0.6010241525610124, "learning_rate": 3.252892355991683e-06, "loss": 0.3289, "step": 17955 }, { "epoch": 0.8411486391530426, "grad_norm": 0.6140317293584775, "learning_rate": 3.2527115082623808e-06, "loss": 0.3241, "step": 17956 }, { "epoch": 0.841195484142971, "grad_norm": 0.5818065526017517, "learning_rate": 3.252530656201413e-06, "loss": 0.3112, "step": 17957 }, { "epoch": 0.8412423291328992, "grad_norm": 0.653811219693046, "learning_rate": 3.2523497998098208e-06, "loss": 0.3227, "step": 17958 }, { "epoch": 0.8412891741228276, "grad_norm": 0.5372748247924892, "learning_rate": 3.2521689390886446e-06, "loss": 0.2924, "step": 17959 }, { "epoch": 0.8413360191127559, "grad_norm": 0.598190363088198, "learning_rate": 3.2519880740389247e-06, "loss": 0.3026, "step": 17960 }, { "epoch": 0.8413828641026843, "grad_norm": 0.614876539971832, "learning_rate": 3.2518072046617032e-06, "loss": 0.3334, "step": 17961 }, { "epoch": 0.8414297090926125, "grad_norm": 0.5820181652032826, "learning_rate": 3.2516263309580205e-06, "loss": 0.3119, "step": 17962 }, { "epoch": 0.8414765540825409, "grad_norm": 0.6143554591229746, "learning_rate": 3.2514454529289157e-06, "loss": 0.3182, "step": 17963 }, { "epoch": 0.8415233990724692, "grad_norm": 0.5724357671025853, "learning_rate": 3.251264570575432e-06, "loss": 0.3075, "step": 17964 }, { "epoch": 0.8415702440623976, "grad_norm": 0.607004445117515, "learning_rate": 3.2510836838986104e-06, "loss": 0.3203, "step": 17965 }, { "epoch": 0.8416170890523259, "grad_norm": 0.5914042299201293, "learning_rate": 3.2509027928994897e-06, "loss": 0.3109, "step": 17966 }, { "epoch": 0.8416639340422541, "grad_norm": 0.5469472902513314, "learning_rate": 3.250721897579112e-06, "loss": 0.3263, "step": 17967 }, { "epoch": 0.8417107790321825, "grad_norm": 0.5952109055846843, "learning_rate": 3.2505409979385193e-06, "loss": 0.3173, "step": 17968 }, { "epoch": 0.8417576240221109, "grad_norm": 0.614850877066332, "learning_rate": 3.2503600939787515e-06, "loss": 0.3221, "step": 17969 }, { "epoch": 0.8418044690120392, "grad_norm": 0.5979440303608118, "learning_rate": 3.2501791857008503e-06, "loss": 0.3303, "step": 17970 }, { "epoch": 0.8418513140019674, "grad_norm": 0.5881333762307658, "learning_rate": 3.249998273105856e-06, "loss": 0.3175, "step": 17971 }, { "epoch": 0.8418981589918958, "grad_norm": 0.6020856303252703, "learning_rate": 3.2498173561948104e-06, "loss": 0.308, "step": 17972 }, { "epoch": 0.8419450039818241, "grad_norm": 0.624633837447461, "learning_rate": 3.249636434968754e-06, "loss": 0.3029, "step": 17973 }, { "epoch": 0.8419918489717525, "grad_norm": 0.6071594697185134, "learning_rate": 3.249455509428729e-06, "loss": 0.3179, "step": 17974 }, { "epoch": 0.8420386939616809, "grad_norm": 0.5880590986743452, "learning_rate": 3.249274579575775e-06, "loss": 0.3198, "step": 17975 }, { "epoch": 0.8420855389516091, "grad_norm": 0.5786129118798365, "learning_rate": 3.249093645410935e-06, "loss": 0.3213, "step": 17976 }, { "epoch": 0.8421323839415374, "grad_norm": 0.6853894939703504, "learning_rate": 3.248912706935249e-06, "loss": 0.335, "step": 17977 }, { "epoch": 0.8421792289314658, "grad_norm": 0.5773913832610453, "learning_rate": 3.2487317641497583e-06, "loss": 0.3188, "step": 17978 }, { "epoch": 0.8422260739213941, "grad_norm": 0.6291529735495363, "learning_rate": 3.2485508170555047e-06, "loss": 0.3521, "step": 17979 }, { "epoch": 0.8422729189113224, "grad_norm": 0.5831664466426382, "learning_rate": 3.24836986565353e-06, "loss": 0.3242, "step": 17980 }, { "epoch": 0.8423197639012507, "grad_norm": 0.6506884347903794, "learning_rate": 3.2481889099448737e-06, "loss": 0.3361, "step": 17981 }, { "epoch": 0.8423666088911791, "grad_norm": 0.6026162776004348, "learning_rate": 3.2480079499305784e-06, "loss": 0.3091, "step": 17982 }, { "epoch": 0.8424134538811074, "grad_norm": 0.6242622947263726, "learning_rate": 3.247826985611686e-06, "loss": 0.323, "step": 17983 }, { "epoch": 0.8424602988710358, "grad_norm": 0.567154309589139, "learning_rate": 3.247646016989237e-06, "loss": 0.3148, "step": 17984 }, { "epoch": 0.842507143860964, "grad_norm": 0.6088631117527312, "learning_rate": 3.247465044064273e-06, "loss": 0.3465, "step": 17985 }, { "epoch": 0.8425539888508924, "grad_norm": 0.5611651339116978, "learning_rate": 3.2472840668378357e-06, "loss": 0.3194, "step": 17986 }, { "epoch": 0.8426008338408207, "grad_norm": 0.6380403310116932, "learning_rate": 3.2471030853109664e-06, "loss": 0.3292, "step": 17987 }, { "epoch": 0.8426476788307491, "grad_norm": 0.5721740151738782, "learning_rate": 3.246922099484706e-06, "loss": 0.304, "step": 17988 }, { "epoch": 0.8426945238206773, "grad_norm": 0.6008150379098888, "learning_rate": 3.2467411093600975e-06, "loss": 0.3145, "step": 17989 }, { "epoch": 0.8427413688106057, "grad_norm": 0.6144340710077881, "learning_rate": 3.2465601149381817e-06, "loss": 0.3354, "step": 17990 }, { "epoch": 0.842788213800534, "grad_norm": 0.6452219138314396, "learning_rate": 3.2463791162199994e-06, "loss": 0.3283, "step": 17991 }, { "epoch": 0.8428350587904624, "grad_norm": 0.5945812926990519, "learning_rate": 3.246198113206593e-06, "loss": 0.3272, "step": 17992 }, { "epoch": 0.8428819037803907, "grad_norm": 0.57583791305225, "learning_rate": 3.2460171058990044e-06, "loss": 0.2974, "step": 17993 }, { "epoch": 0.842928748770319, "grad_norm": 0.6243257131098173, "learning_rate": 3.2458360942982744e-06, "loss": 0.3612, "step": 17994 }, { "epoch": 0.8429755937602473, "grad_norm": 0.5993090600889522, "learning_rate": 3.2456550784054454e-06, "loss": 0.3309, "step": 17995 }, { "epoch": 0.8430224387501757, "grad_norm": 0.548979325583474, "learning_rate": 3.245474058221558e-06, "loss": 0.2879, "step": 17996 }, { "epoch": 0.843069283740104, "grad_norm": 0.6529516299724735, "learning_rate": 3.245293033747656e-06, "loss": 0.3206, "step": 17997 }, { "epoch": 0.8431161287300323, "grad_norm": 0.5923043745537424, "learning_rate": 3.2451120049847796e-06, "loss": 0.3219, "step": 17998 }, { "epoch": 0.8431629737199606, "grad_norm": 0.6445798740530292, "learning_rate": 3.2449309719339706e-06, "loss": 0.3202, "step": 17999 }, { "epoch": 0.843209818709889, "grad_norm": 0.5733704149471924, "learning_rate": 3.2447499345962715e-06, "loss": 0.3281, "step": 18000 }, { "epoch": 0.8432566636998173, "grad_norm": 0.5451688195548295, "learning_rate": 3.244568892972724e-06, "loss": 0.2984, "step": 18001 }, { "epoch": 0.8433035086897457, "grad_norm": 0.6662396468057522, "learning_rate": 3.244387847064368e-06, "loss": 0.3237, "step": 18002 }, { "epoch": 0.8433503536796739, "grad_norm": 0.5855169543316082, "learning_rate": 3.2442067968722477e-06, "loss": 0.3201, "step": 18003 }, { "epoch": 0.8433971986696023, "grad_norm": 0.5773499475096544, "learning_rate": 3.244025742397406e-06, "loss": 0.3196, "step": 18004 }, { "epoch": 0.8434440436595306, "grad_norm": 0.5989499154932639, "learning_rate": 3.2438446836408814e-06, "loss": 0.311, "step": 18005 }, { "epoch": 0.843490888649459, "grad_norm": 0.6011212902496824, "learning_rate": 3.2436636206037174e-06, "loss": 0.3315, "step": 18006 }, { "epoch": 0.8435377336393872, "grad_norm": 0.6446932400176855, "learning_rate": 3.2434825532869575e-06, "loss": 0.3243, "step": 18007 }, { "epoch": 0.8435845786293156, "grad_norm": 0.5928958060791844, "learning_rate": 3.243301481691642e-06, "loss": 0.3206, "step": 18008 }, { "epoch": 0.8436314236192439, "grad_norm": 0.5853130245735529, "learning_rate": 3.2431204058188125e-06, "loss": 0.3096, "step": 18009 }, { "epoch": 0.8436782686091723, "grad_norm": 0.6223146531180189, "learning_rate": 3.2429393256695128e-06, "loss": 0.338, "step": 18010 }, { "epoch": 0.8437251135991006, "grad_norm": 0.6270086817464511, "learning_rate": 3.2427582412447838e-06, "loss": 0.3182, "step": 18011 }, { "epoch": 0.8437719585890289, "grad_norm": 0.5871070899665982, "learning_rate": 3.2425771525456673e-06, "loss": 0.3088, "step": 18012 }, { "epoch": 0.8438188035789572, "grad_norm": 0.6030755194897575, "learning_rate": 3.242396059573206e-06, "loss": 0.3203, "step": 18013 }, { "epoch": 0.8438656485688856, "grad_norm": 0.6156386323667233, "learning_rate": 3.242214962328443e-06, "loss": 0.3101, "step": 18014 }, { "epoch": 0.8439124935588139, "grad_norm": 0.6323648985021094, "learning_rate": 3.242033860812418e-06, "loss": 0.3277, "step": 18015 }, { "epoch": 0.8439593385487422, "grad_norm": 0.6159268735173906, "learning_rate": 3.241852755026176e-06, "loss": 0.3441, "step": 18016 }, { "epoch": 0.8440061835386705, "grad_norm": 0.6041939028934628, "learning_rate": 3.2416716449707564e-06, "loss": 0.3257, "step": 18017 }, { "epoch": 0.8440530285285989, "grad_norm": 0.583573605754065, "learning_rate": 3.241490530647204e-06, "loss": 0.3229, "step": 18018 }, { "epoch": 0.8440998735185272, "grad_norm": 0.5273366765160595, "learning_rate": 3.24130941205656e-06, "loss": 0.3096, "step": 18019 }, { "epoch": 0.8441467185084556, "grad_norm": 0.6434169817952696, "learning_rate": 3.2411282891998657e-06, "loss": 0.355, "step": 18020 }, { "epoch": 0.8441935634983838, "grad_norm": 0.5638752562303275, "learning_rate": 3.240947162078165e-06, "loss": 0.2938, "step": 18021 }, { "epoch": 0.8442404084883122, "grad_norm": 0.5962999066787479, "learning_rate": 3.2407660306925e-06, "loss": 0.3395, "step": 18022 }, { "epoch": 0.8442872534782405, "grad_norm": 0.5888273771614161, "learning_rate": 3.2405848950439118e-06, "loss": 0.3295, "step": 18023 }, { "epoch": 0.8443340984681689, "grad_norm": 0.5693211477262872, "learning_rate": 3.240403755133444e-06, "loss": 0.3057, "step": 18024 }, { "epoch": 0.8443809434580971, "grad_norm": 0.5280167588769825, "learning_rate": 3.240222610962139e-06, "loss": 0.2915, "step": 18025 }, { "epoch": 0.8444277884480255, "grad_norm": 0.6131963375995245, "learning_rate": 3.240041462531039e-06, "loss": 0.3011, "step": 18026 }, { "epoch": 0.8444746334379538, "grad_norm": 0.6103651630694701, "learning_rate": 3.239860309841185e-06, "loss": 0.3091, "step": 18027 }, { "epoch": 0.8445214784278822, "grad_norm": 0.5785178873160645, "learning_rate": 3.239679152893623e-06, "loss": 0.3217, "step": 18028 }, { "epoch": 0.8445683234178105, "grad_norm": 0.6143680050280323, "learning_rate": 3.239497991689392e-06, "loss": 0.3134, "step": 18029 }, { "epoch": 0.8446151684077388, "grad_norm": 0.6777804321347815, "learning_rate": 3.239316826229536e-06, "loss": 0.3144, "step": 18030 }, { "epoch": 0.8446620133976671, "grad_norm": 0.6086101233672485, "learning_rate": 3.239135656515098e-06, "loss": 0.3171, "step": 18031 }, { "epoch": 0.8447088583875955, "grad_norm": 0.5483347438207086, "learning_rate": 3.23895448254712e-06, "loss": 0.3086, "step": 18032 }, { "epoch": 0.8447557033775238, "grad_norm": 0.5657304553456951, "learning_rate": 3.2387733043266447e-06, "loss": 0.318, "step": 18033 }, { "epoch": 0.844802548367452, "grad_norm": 0.5742558582556746, "learning_rate": 3.2385921218547137e-06, "loss": 0.3094, "step": 18034 }, { "epoch": 0.8448493933573804, "grad_norm": 0.5742418524014529, "learning_rate": 3.2384109351323724e-06, "loss": 0.3078, "step": 18035 }, { "epoch": 0.8448962383473088, "grad_norm": 0.5969857463481506, "learning_rate": 3.23822974416066e-06, "loss": 0.3256, "step": 18036 }, { "epoch": 0.8449430833372371, "grad_norm": 0.5542831370878751, "learning_rate": 3.238048548940622e-06, "loss": 0.3271, "step": 18037 }, { "epoch": 0.8449899283271655, "grad_norm": 0.6435334742459932, "learning_rate": 3.2378673494733004e-06, "loss": 0.3338, "step": 18038 }, { "epoch": 0.8450367733170937, "grad_norm": 0.6139912144528432, "learning_rate": 3.237686145759737e-06, "loss": 0.3161, "step": 18039 }, { "epoch": 0.845083618307022, "grad_norm": 0.6558850219009659, "learning_rate": 3.237504937800975e-06, "loss": 0.3181, "step": 18040 }, { "epoch": 0.8451304632969504, "grad_norm": 0.6031879630003343, "learning_rate": 3.237323725598058e-06, "loss": 0.3342, "step": 18041 }, { "epoch": 0.8451773082868788, "grad_norm": 0.5897806971404075, "learning_rate": 3.2371425091520287e-06, "loss": 0.3138, "step": 18042 }, { "epoch": 0.845224153276807, "grad_norm": 0.5957773965812897, "learning_rate": 3.2369612884639283e-06, "loss": 0.3091, "step": 18043 }, { "epoch": 0.8452709982667354, "grad_norm": 0.6023957212455191, "learning_rate": 3.2367800635348025e-06, "loss": 0.3301, "step": 18044 }, { "epoch": 0.8453178432566637, "grad_norm": 0.5670294103043296, "learning_rate": 3.2365988343656907e-06, "loss": 0.318, "step": 18045 }, { "epoch": 0.845364688246592, "grad_norm": 0.5805780681730277, "learning_rate": 3.2364176009576393e-06, "loss": 0.316, "step": 18046 }, { "epoch": 0.8454115332365204, "grad_norm": 0.5775832113343629, "learning_rate": 3.2362363633116894e-06, "loss": 0.2989, "step": 18047 }, { "epoch": 0.8454583782264486, "grad_norm": 0.5668971857925663, "learning_rate": 3.2360551214288837e-06, "loss": 0.302, "step": 18048 }, { "epoch": 0.845505223216377, "grad_norm": 0.6100607627214031, "learning_rate": 3.2358738753102665e-06, "loss": 0.3089, "step": 18049 }, { "epoch": 0.8455520682063054, "grad_norm": 0.5948257914066108, "learning_rate": 3.2356926249568797e-06, "loss": 0.3177, "step": 18050 }, { "epoch": 0.8455989131962337, "grad_norm": 0.5991320070758335, "learning_rate": 3.2355113703697664e-06, "loss": 0.3239, "step": 18051 }, { "epoch": 0.8456457581861619, "grad_norm": 0.5671073198528015, "learning_rate": 3.2353301115499703e-06, "loss": 0.3232, "step": 18052 }, { "epoch": 0.8456926031760903, "grad_norm": 0.5461976538771351, "learning_rate": 3.235148848498535e-06, "loss": 0.3091, "step": 18053 }, { "epoch": 0.8457394481660186, "grad_norm": 0.575454732260166, "learning_rate": 3.2349675812165016e-06, "loss": 0.3045, "step": 18054 }, { "epoch": 0.845786293155947, "grad_norm": 0.5923712315282148, "learning_rate": 3.234786309704915e-06, "loss": 0.3341, "step": 18055 }, { "epoch": 0.8458331381458754, "grad_norm": 0.5825734546148394, "learning_rate": 3.2346050339648182e-06, "loss": 0.3128, "step": 18056 }, { "epoch": 0.8458799831358036, "grad_norm": 0.5765901245801452, "learning_rate": 3.234423753997254e-06, "loss": 0.3201, "step": 18057 }, { "epoch": 0.8459268281257319, "grad_norm": 0.578555607571055, "learning_rate": 3.2342424698032647e-06, "loss": 0.3101, "step": 18058 }, { "epoch": 0.8459736731156603, "grad_norm": 0.6145369645913444, "learning_rate": 3.234061181383896e-06, "loss": 0.3265, "step": 18059 }, { "epoch": 0.8460205181055886, "grad_norm": 0.6342392919767837, "learning_rate": 3.233879888740189e-06, "loss": 0.3414, "step": 18060 }, { "epoch": 0.8460673630955169, "grad_norm": 0.5368935700399764, "learning_rate": 3.2336985918731878e-06, "loss": 0.3282, "step": 18061 }, { "epoch": 0.8461142080854452, "grad_norm": 0.5481250408421476, "learning_rate": 3.2335172907839352e-06, "loss": 0.2953, "step": 18062 }, { "epoch": 0.8461610530753736, "grad_norm": 0.580337200033054, "learning_rate": 3.2333359854734758e-06, "loss": 0.3083, "step": 18063 }, { "epoch": 0.8462078980653019, "grad_norm": 0.6046048720393613, "learning_rate": 3.2331546759428513e-06, "loss": 0.3283, "step": 18064 }, { "epoch": 0.8462547430552303, "grad_norm": 0.6101439324926062, "learning_rate": 3.2329733621931065e-06, "loss": 0.3188, "step": 18065 }, { "epoch": 0.8463015880451585, "grad_norm": 0.5709775271202812, "learning_rate": 3.2327920442252834e-06, "loss": 0.3001, "step": 18066 }, { "epoch": 0.8463484330350869, "grad_norm": 0.6104870677786065, "learning_rate": 3.2326107220404267e-06, "loss": 0.3184, "step": 18067 }, { "epoch": 0.8463952780250152, "grad_norm": 0.6195800217647854, "learning_rate": 3.2324293956395804e-06, "loss": 0.3158, "step": 18068 }, { "epoch": 0.8464421230149436, "grad_norm": 0.5447015758943771, "learning_rate": 3.2322480650237854e-06, "loss": 0.2948, "step": 18069 }, { "epoch": 0.8464889680048718, "grad_norm": 0.5450607127410261, "learning_rate": 3.2320667301940876e-06, "loss": 0.3102, "step": 18070 }, { "epoch": 0.8465358129948002, "grad_norm": 0.5780846064394691, "learning_rate": 3.2318853911515304e-06, "loss": 0.3112, "step": 18071 }, { "epoch": 0.8465826579847285, "grad_norm": 0.5832127666744771, "learning_rate": 3.2317040478971556e-06, "loss": 0.3281, "step": 18072 }, { "epoch": 0.8466295029746569, "grad_norm": 0.6061935445414259, "learning_rate": 3.231522700432008e-06, "loss": 0.3251, "step": 18073 }, { "epoch": 0.8466763479645852, "grad_norm": 0.6292631170334063, "learning_rate": 3.231341348757132e-06, "loss": 0.3235, "step": 18074 }, { "epoch": 0.8467231929545135, "grad_norm": 0.5559708273911165, "learning_rate": 3.23115999287357e-06, "loss": 0.3147, "step": 18075 }, { "epoch": 0.8467700379444418, "grad_norm": 0.5918403327511251, "learning_rate": 3.2309786327823654e-06, "loss": 0.3358, "step": 18076 }, { "epoch": 0.8468168829343702, "grad_norm": 0.5672748876216371, "learning_rate": 3.2307972684845633e-06, "loss": 0.3327, "step": 18077 }, { "epoch": 0.8468637279242985, "grad_norm": 0.569339154750306, "learning_rate": 3.2306158999812066e-06, "loss": 0.3233, "step": 18078 }, { "epoch": 0.8469105729142268, "grad_norm": 0.5944204181909329, "learning_rate": 3.2304345272733384e-06, "loss": 0.32, "step": 18079 }, { "epoch": 0.8469574179041551, "grad_norm": 0.6239153174871254, "learning_rate": 3.230253150362003e-06, "loss": 0.3092, "step": 18080 }, { "epoch": 0.8470042628940835, "grad_norm": 0.5879353551996166, "learning_rate": 3.2300717692482452e-06, "loss": 0.3113, "step": 18081 }, { "epoch": 0.8470511078840118, "grad_norm": 0.6414189950673483, "learning_rate": 3.229890383933107e-06, "loss": 0.3273, "step": 18082 }, { "epoch": 0.8470979528739402, "grad_norm": 0.5840069189847349, "learning_rate": 3.2297089944176334e-06, "loss": 0.3113, "step": 18083 }, { "epoch": 0.8471447978638684, "grad_norm": 0.6262092296636668, "learning_rate": 3.2295276007028683e-06, "loss": 0.3252, "step": 18084 }, { "epoch": 0.8471916428537968, "grad_norm": 0.6478562958968118, "learning_rate": 3.2293462027898547e-06, "loss": 0.3143, "step": 18085 }, { "epoch": 0.8472384878437251, "grad_norm": 0.6164838625805267, "learning_rate": 3.2291648006796374e-06, "loss": 0.3185, "step": 18086 }, { "epoch": 0.8472853328336535, "grad_norm": 0.5902491263794745, "learning_rate": 3.228983394373259e-06, "loss": 0.3196, "step": 18087 }, { "epoch": 0.8473321778235817, "grad_norm": 0.5726052959717349, "learning_rate": 3.2288019838717655e-06, "loss": 0.3184, "step": 18088 }, { "epoch": 0.8473790228135101, "grad_norm": 0.5817795399867477, "learning_rate": 3.2286205691761996e-06, "loss": 0.3134, "step": 18089 }, { "epoch": 0.8474258678034384, "grad_norm": 0.6055902784188792, "learning_rate": 3.228439150287605e-06, "loss": 0.316, "step": 18090 }, { "epoch": 0.8474727127933668, "grad_norm": 0.5641615571884638, "learning_rate": 3.2282577272070264e-06, "loss": 0.3121, "step": 18091 }, { "epoch": 0.8475195577832951, "grad_norm": 0.6101826423922249, "learning_rate": 3.2280762999355074e-06, "loss": 0.3274, "step": 18092 }, { "epoch": 0.8475664027732234, "grad_norm": 0.630576001921109, "learning_rate": 3.227894868474093e-06, "loss": 0.3069, "step": 18093 }, { "epoch": 0.8476132477631517, "grad_norm": 0.5635548396902624, "learning_rate": 3.227713432823825e-06, "loss": 0.3117, "step": 18094 }, { "epoch": 0.8476600927530801, "grad_norm": 0.6623772317673117, "learning_rate": 3.227531992985751e-06, "loss": 0.3095, "step": 18095 }, { "epoch": 0.8477069377430084, "grad_norm": 0.5654311614257979, "learning_rate": 3.2273505489609127e-06, "loss": 0.3253, "step": 18096 }, { "epoch": 0.8477537827329367, "grad_norm": 0.5613060658034392, "learning_rate": 3.227169100750354e-06, "loss": 0.3239, "step": 18097 }, { "epoch": 0.847800627722865, "grad_norm": 0.5693078635637547, "learning_rate": 3.2269876483551205e-06, "loss": 0.3089, "step": 18098 }, { "epoch": 0.8478474727127934, "grad_norm": 0.5570392912865096, "learning_rate": 3.226806191776256e-06, "loss": 0.3088, "step": 18099 }, { "epoch": 0.8478943177027217, "grad_norm": 0.6237999689964646, "learning_rate": 3.226624731014804e-06, "loss": 0.3082, "step": 18100 }, { "epoch": 0.8479411626926501, "grad_norm": 0.5859673374984183, "learning_rate": 3.22644326607181e-06, "loss": 0.3266, "step": 18101 }, { "epoch": 0.8479880076825783, "grad_norm": 0.5393024310899307, "learning_rate": 3.2262617969483167e-06, "loss": 0.3286, "step": 18102 }, { "epoch": 0.8480348526725067, "grad_norm": 0.5833634534373522, "learning_rate": 3.22608032364537e-06, "loss": 0.321, "step": 18103 }, { "epoch": 0.848081697662435, "grad_norm": 0.553953456705643, "learning_rate": 3.225898846164013e-06, "loss": 0.3227, "step": 18104 }, { "epoch": 0.8481285426523634, "grad_norm": 0.5989937052148064, "learning_rate": 3.2257173645052913e-06, "loss": 0.3202, "step": 18105 }, { "epoch": 0.8481753876422916, "grad_norm": 0.6254111844015074, "learning_rate": 3.225535878670248e-06, "loss": 0.323, "step": 18106 }, { "epoch": 0.84822223263222, "grad_norm": 0.5968540335527617, "learning_rate": 3.2253543886599282e-06, "loss": 0.3167, "step": 18107 }, { "epoch": 0.8482690776221483, "grad_norm": 0.619319618138773, "learning_rate": 3.225172894475376e-06, "loss": 0.3179, "step": 18108 }, { "epoch": 0.8483159226120767, "grad_norm": 0.6049150833268241, "learning_rate": 3.2249913961176365e-06, "loss": 0.3224, "step": 18109 }, { "epoch": 0.848362767602005, "grad_norm": 0.5966442091389778, "learning_rate": 3.224809893587753e-06, "loss": 0.313, "step": 18110 }, { "epoch": 0.8484096125919333, "grad_norm": 0.6177682568006293, "learning_rate": 3.2246283868867718e-06, "loss": 0.3252, "step": 18111 }, { "epoch": 0.8484564575818616, "grad_norm": 0.6427233972475839, "learning_rate": 3.224446876015736e-06, "loss": 0.326, "step": 18112 }, { "epoch": 0.84850330257179, "grad_norm": 0.639234907040215, "learning_rate": 3.2242653609756904e-06, "loss": 0.299, "step": 18113 }, { "epoch": 0.8485501475617183, "grad_norm": 0.6062092211277605, "learning_rate": 3.224083841767679e-06, "loss": 0.3012, "step": 18114 }, { "epoch": 0.8485969925516466, "grad_norm": 0.5660993075228742, "learning_rate": 3.2239023183927475e-06, "loss": 0.3142, "step": 18115 }, { "epoch": 0.8486438375415749, "grad_norm": 0.5625861425979175, "learning_rate": 3.2237207908519406e-06, "loss": 0.2984, "step": 18116 }, { "epoch": 0.8486906825315033, "grad_norm": 0.5789270485801318, "learning_rate": 3.223539259146302e-06, "loss": 0.3053, "step": 18117 }, { "epoch": 0.8487375275214316, "grad_norm": 0.6417203343218374, "learning_rate": 3.223357723276877e-06, "loss": 0.3328, "step": 18118 }, { "epoch": 0.84878437251136, "grad_norm": 0.5421032622115851, "learning_rate": 3.223176183244711e-06, "loss": 0.3084, "step": 18119 }, { "epoch": 0.8488312175012882, "grad_norm": 0.5665586553506359, "learning_rate": 3.2229946390508466e-06, "loss": 0.3039, "step": 18120 }, { "epoch": 0.8488780624912166, "grad_norm": 0.5827346814029895, "learning_rate": 3.2228130906963294e-06, "loss": 0.3234, "step": 18121 }, { "epoch": 0.8489249074811449, "grad_norm": 0.5315456420145854, "learning_rate": 3.222631538182205e-06, "loss": 0.2932, "step": 18122 }, { "epoch": 0.8489717524710733, "grad_norm": 0.5518837837698032, "learning_rate": 3.222449981509519e-06, "loss": 0.2888, "step": 18123 }, { "epoch": 0.8490185974610015, "grad_norm": 0.5771789248768127, "learning_rate": 3.222268420679313e-06, "loss": 0.3205, "step": 18124 }, { "epoch": 0.8490654424509299, "grad_norm": 0.5553092517245807, "learning_rate": 3.2220868556926344e-06, "loss": 0.3086, "step": 18125 }, { "epoch": 0.8491122874408582, "grad_norm": 0.6049111735696076, "learning_rate": 3.2219052865505277e-06, "loss": 0.3499, "step": 18126 }, { "epoch": 0.8491591324307866, "grad_norm": 0.6096687080465242, "learning_rate": 3.221723713254038e-06, "loss": 0.3132, "step": 18127 }, { "epoch": 0.8492059774207149, "grad_norm": 0.577279589565259, "learning_rate": 3.2215421358042087e-06, "loss": 0.3311, "step": 18128 }, { "epoch": 0.8492528224106431, "grad_norm": 0.5900822612497845, "learning_rate": 3.221360554202087e-06, "loss": 0.325, "step": 18129 }, { "epoch": 0.8492996674005715, "grad_norm": 0.5525394410302991, "learning_rate": 3.221178968448716e-06, "loss": 0.3202, "step": 18130 }, { "epoch": 0.8493465123904999, "grad_norm": 0.6232048997052858, "learning_rate": 3.220997378545141e-06, "loss": 0.3095, "step": 18131 }, { "epoch": 0.8493933573804282, "grad_norm": 0.6045229541013811, "learning_rate": 3.2208157844924076e-06, "loss": 0.331, "step": 18132 }, { "epoch": 0.8494402023703564, "grad_norm": 0.5540715378271626, "learning_rate": 3.220634186291561e-06, "loss": 0.3117, "step": 18133 }, { "epoch": 0.8494870473602848, "grad_norm": 0.5768693364750563, "learning_rate": 3.2204525839436446e-06, "loss": 0.3183, "step": 18134 }, { "epoch": 0.8495338923502131, "grad_norm": 0.5916459055935498, "learning_rate": 3.2202709774497054e-06, "loss": 0.3369, "step": 18135 }, { "epoch": 0.8495807373401415, "grad_norm": 0.5968571465466261, "learning_rate": 3.2200893668107876e-06, "loss": 0.347, "step": 18136 }, { "epoch": 0.8496275823300699, "grad_norm": 0.5864290580289827, "learning_rate": 3.2199077520279365e-06, "loss": 0.3433, "step": 18137 }, { "epoch": 0.8496744273199981, "grad_norm": 0.5802658608345211, "learning_rate": 3.219726133102197e-06, "loss": 0.3247, "step": 18138 }, { "epoch": 0.8497212723099264, "grad_norm": 0.5832949413987429, "learning_rate": 3.2195445100346146e-06, "loss": 0.3233, "step": 18139 }, { "epoch": 0.8497681172998548, "grad_norm": 0.5777507377456408, "learning_rate": 3.2193628828262348e-06, "loss": 0.2967, "step": 18140 }, { "epoch": 0.8498149622897831, "grad_norm": 0.5794575833164222, "learning_rate": 3.2191812514781025e-06, "loss": 0.3279, "step": 18141 }, { "epoch": 0.8498618072797114, "grad_norm": 0.5906311407792363, "learning_rate": 3.2189996159912623e-06, "loss": 0.3194, "step": 18142 }, { "epoch": 0.8499086522696397, "grad_norm": 0.5788601550880133, "learning_rate": 3.2188179763667597e-06, "loss": 0.3286, "step": 18143 }, { "epoch": 0.8499554972595681, "grad_norm": 0.5507805409750273, "learning_rate": 3.2186363326056417e-06, "loss": 0.3029, "step": 18144 }, { "epoch": 0.8500023422494964, "grad_norm": 0.5723462694464376, "learning_rate": 3.218454684708951e-06, "loss": 0.3044, "step": 18145 }, { "epoch": 0.8500491872394248, "grad_norm": 0.5753829029559, "learning_rate": 3.2182730326777345e-06, "loss": 0.3411, "step": 18146 }, { "epoch": 0.850096032229353, "grad_norm": 0.6153001023922745, "learning_rate": 3.218091376513037e-06, "loss": 0.321, "step": 18147 }, { "epoch": 0.8501428772192814, "grad_norm": 0.632898395928504, "learning_rate": 3.217909716215905e-06, "loss": 0.3273, "step": 18148 }, { "epoch": 0.8501897222092097, "grad_norm": 1.0490580255469613, "learning_rate": 3.2177280517873823e-06, "loss": 0.3288, "step": 18149 }, { "epoch": 0.8502365671991381, "grad_norm": 0.6197528608047552, "learning_rate": 3.2175463832285146e-06, "loss": 0.3391, "step": 18150 }, { "epoch": 0.8502834121890663, "grad_norm": 0.5958282693314687, "learning_rate": 3.2173647105403494e-06, "loss": 0.3129, "step": 18151 }, { "epoch": 0.8503302571789947, "grad_norm": 0.6299562897577514, "learning_rate": 3.217183033723929e-06, "loss": 0.3258, "step": 18152 }, { "epoch": 0.850377102168923, "grad_norm": 0.5354186232714975, "learning_rate": 3.2170013527803013e-06, "loss": 0.2917, "step": 18153 }, { "epoch": 0.8504239471588514, "grad_norm": 0.6621336667195664, "learning_rate": 3.216819667710511e-06, "loss": 0.3181, "step": 18154 }, { "epoch": 0.8504707921487797, "grad_norm": 0.6076101007814995, "learning_rate": 3.216637978515603e-06, "loss": 0.3194, "step": 18155 }, { "epoch": 0.850517637138708, "grad_norm": 0.5923770648043878, "learning_rate": 3.216456285196624e-06, "loss": 0.3268, "step": 18156 }, { "epoch": 0.8505644821286363, "grad_norm": 0.5735733282779999, "learning_rate": 3.2162745877546194e-06, "loss": 0.2988, "step": 18157 }, { "epoch": 0.8506113271185647, "grad_norm": 0.5606486023359973, "learning_rate": 3.2160928861906343e-06, "loss": 0.3161, "step": 18158 }, { "epoch": 0.850658172108493, "grad_norm": 0.5387674371202339, "learning_rate": 3.2159111805057146e-06, "loss": 0.3009, "step": 18159 }, { "epoch": 0.8507050170984213, "grad_norm": 0.5602725094853094, "learning_rate": 3.215729470700906e-06, "loss": 0.3129, "step": 18160 }, { "epoch": 0.8507518620883496, "grad_norm": 0.6043534824709145, "learning_rate": 3.2155477567772548e-06, "loss": 0.3265, "step": 18161 }, { "epoch": 0.850798707078278, "grad_norm": 0.6416553750237936, "learning_rate": 3.2153660387358052e-06, "loss": 0.3395, "step": 18162 }, { "epoch": 0.8508455520682063, "grad_norm": 0.5799960070094848, "learning_rate": 3.215184316577604e-06, "loss": 0.3199, "step": 18163 }, { "epoch": 0.8508923970581347, "grad_norm": 0.5225805630822147, "learning_rate": 3.215002590303697e-06, "loss": 0.2966, "step": 18164 }, { "epoch": 0.8509392420480629, "grad_norm": 0.5776483225914802, "learning_rate": 3.2148208599151302e-06, "loss": 0.3082, "step": 18165 }, { "epoch": 0.8509860870379913, "grad_norm": 0.6353169394423901, "learning_rate": 3.2146391254129485e-06, "loss": 0.3176, "step": 18166 }, { "epoch": 0.8510329320279196, "grad_norm": 0.6290864870405096, "learning_rate": 3.214457386798198e-06, "loss": 0.3259, "step": 18167 }, { "epoch": 0.851079777017848, "grad_norm": 0.5686125159170519, "learning_rate": 3.214275644071926e-06, "loss": 0.3127, "step": 18168 }, { "epoch": 0.8511266220077762, "grad_norm": 0.5951962537945918, "learning_rate": 3.2140938972351766e-06, "loss": 0.3261, "step": 18169 }, { "epoch": 0.8511734669977046, "grad_norm": 0.5323729599532135, "learning_rate": 3.2139121462889954e-06, "loss": 0.2931, "step": 18170 }, { "epoch": 0.8512203119876329, "grad_norm": 0.5598322671516034, "learning_rate": 3.2137303912344304e-06, "loss": 0.3133, "step": 18171 }, { "epoch": 0.8512671569775613, "grad_norm": 0.6105440796808775, "learning_rate": 3.2135486320725264e-06, "loss": 0.3053, "step": 18172 }, { "epoch": 0.8513140019674896, "grad_norm": 0.6186102331677141, "learning_rate": 3.2133668688043278e-06, "loss": 0.32, "step": 18173 }, { "epoch": 0.8513608469574179, "grad_norm": 0.6280411600247016, "learning_rate": 3.2131851014308836e-06, "loss": 0.3513, "step": 18174 }, { "epoch": 0.8514076919473462, "grad_norm": 0.5694737325132879, "learning_rate": 3.2130033299532383e-06, "loss": 0.3288, "step": 18175 }, { "epoch": 0.8514545369372746, "grad_norm": 0.5752219807699364, "learning_rate": 3.2128215543724374e-06, "loss": 0.3067, "step": 18176 }, { "epoch": 0.8515013819272029, "grad_norm": 0.5620467763533775, "learning_rate": 3.2126397746895276e-06, "loss": 0.3085, "step": 18177 }, { "epoch": 0.8515482269171312, "grad_norm": 0.546635428423993, "learning_rate": 3.2124579909055557e-06, "loss": 0.3046, "step": 18178 }, { "epoch": 0.8515950719070595, "grad_norm": 0.5956344544322115, "learning_rate": 3.212276203021567e-06, "loss": 0.3092, "step": 18179 }, { "epoch": 0.8516419168969879, "grad_norm": 0.6034528667062633, "learning_rate": 3.2120944110386076e-06, "loss": 0.3244, "step": 18180 }, { "epoch": 0.8516887618869162, "grad_norm": 0.6247349581054729, "learning_rate": 3.211912614957724e-06, "loss": 0.3218, "step": 18181 }, { "epoch": 0.8517356068768446, "grad_norm": 0.5606695212178487, "learning_rate": 3.2117308147799626e-06, "loss": 0.3144, "step": 18182 }, { "epoch": 0.8517824518667728, "grad_norm": 0.6115329917630695, "learning_rate": 3.2115490105063684e-06, "loss": 0.2981, "step": 18183 }, { "epoch": 0.8518292968567012, "grad_norm": 0.6098925384565907, "learning_rate": 3.211367202137989e-06, "loss": 0.3192, "step": 18184 }, { "epoch": 0.8518761418466295, "grad_norm": 0.541399339474487, "learning_rate": 3.2111853896758693e-06, "loss": 0.3038, "step": 18185 }, { "epoch": 0.8519229868365579, "grad_norm": 0.5900455985083582, "learning_rate": 3.2110035731210575e-06, "loss": 0.3211, "step": 18186 }, { "epoch": 0.8519698318264861, "grad_norm": 0.5733287376084794, "learning_rate": 3.210821752474599e-06, "loss": 0.3193, "step": 18187 }, { "epoch": 0.8520166768164145, "grad_norm": 0.5694846405905502, "learning_rate": 3.210639927737539e-06, "loss": 0.2993, "step": 18188 }, { "epoch": 0.8520635218063428, "grad_norm": 0.6044242584956251, "learning_rate": 3.2104580989109253e-06, "loss": 0.3298, "step": 18189 }, { "epoch": 0.8521103667962712, "grad_norm": 0.5756774237250614, "learning_rate": 3.2102762659958043e-06, "loss": 0.3068, "step": 18190 }, { "epoch": 0.8521572117861995, "grad_norm": 0.5626636352823663, "learning_rate": 3.2100944289932208e-06, "loss": 0.3182, "step": 18191 }, { "epoch": 0.8522040567761278, "grad_norm": 0.5853523689582172, "learning_rate": 3.2099125879042225e-06, "loss": 0.3111, "step": 18192 }, { "epoch": 0.8522509017660561, "grad_norm": 0.6455041117830036, "learning_rate": 3.209730742729857e-06, "loss": 0.318, "step": 18193 }, { "epoch": 0.8522977467559845, "grad_norm": 0.6207491957257179, "learning_rate": 3.209548893471168e-06, "loss": 0.3244, "step": 18194 }, { "epoch": 0.8523445917459128, "grad_norm": 0.613576160697065, "learning_rate": 3.209367040129204e-06, "loss": 0.3263, "step": 18195 }, { "epoch": 0.8523914367358411, "grad_norm": 0.564510895609994, "learning_rate": 3.2091851827050113e-06, "loss": 0.2698, "step": 18196 }, { "epoch": 0.8524382817257694, "grad_norm": 0.606339513714876, "learning_rate": 3.2090033211996357e-06, "loss": 0.3276, "step": 18197 }, { "epoch": 0.8524851267156978, "grad_norm": 0.569926305195752, "learning_rate": 3.208821455614124e-06, "loss": 0.3082, "step": 18198 }, { "epoch": 0.8525319717056261, "grad_norm": 0.642970252761496, "learning_rate": 3.2086395859495235e-06, "loss": 0.3152, "step": 18199 }, { "epoch": 0.8525788166955545, "grad_norm": 0.5364219564953805, "learning_rate": 3.2084577122068804e-06, "loss": 0.3112, "step": 18200 }, { "epoch": 0.8526256616854827, "grad_norm": 0.5683743677257296, "learning_rate": 3.2082758343872407e-06, "loss": 0.3001, "step": 18201 }, { "epoch": 0.8526725066754111, "grad_norm": 0.6191904896403737, "learning_rate": 3.208093952491652e-06, "loss": 0.3109, "step": 18202 }, { "epoch": 0.8527193516653394, "grad_norm": 0.5605776068110098, "learning_rate": 3.2079120665211605e-06, "loss": 0.2974, "step": 18203 }, { "epoch": 0.8527661966552678, "grad_norm": 0.6222462362209603, "learning_rate": 3.2077301764768126e-06, "loss": 0.3426, "step": 18204 }, { "epoch": 0.852813041645196, "grad_norm": 0.6501074415825239, "learning_rate": 3.207548282359656e-06, "loss": 0.3227, "step": 18205 }, { "epoch": 0.8528598866351244, "grad_norm": 0.6084824784409295, "learning_rate": 3.207366384170736e-06, "loss": 0.3118, "step": 18206 }, { "epoch": 0.8529067316250527, "grad_norm": 0.6600608376279531, "learning_rate": 3.2071844819111007e-06, "loss": 0.3554, "step": 18207 }, { "epoch": 0.8529535766149811, "grad_norm": 0.5659833956596237, "learning_rate": 3.207002575581797e-06, "loss": 0.3116, "step": 18208 }, { "epoch": 0.8530004216049094, "grad_norm": 0.5823652485440347, "learning_rate": 3.2068206651838708e-06, "loss": 0.3286, "step": 18209 }, { "epoch": 0.8530472665948376, "grad_norm": 0.6253428373794901, "learning_rate": 3.2066387507183696e-06, "loss": 0.3155, "step": 18210 }, { "epoch": 0.853094111584766, "grad_norm": 0.6923728351753687, "learning_rate": 3.2064568321863394e-06, "loss": 0.3454, "step": 18211 }, { "epoch": 0.8531409565746944, "grad_norm": 0.5995615864954599, "learning_rate": 3.206274909588828e-06, "loss": 0.3164, "step": 18212 }, { "epoch": 0.8531878015646227, "grad_norm": 0.7129549042571129, "learning_rate": 3.206092982926881e-06, "loss": 0.3444, "step": 18213 }, { "epoch": 0.853234646554551, "grad_norm": 0.6212872583497725, "learning_rate": 3.205911052201548e-06, "loss": 0.3412, "step": 18214 }, { "epoch": 0.8532814915444793, "grad_norm": 0.6152201593851072, "learning_rate": 3.205729117413874e-06, "loss": 0.3183, "step": 18215 }, { "epoch": 0.8533283365344076, "grad_norm": 0.6383850503955542, "learning_rate": 3.2055471785649052e-06, "loss": 0.3346, "step": 18216 }, { "epoch": 0.853375181524336, "grad_norm": 0.5828578214370947, "learning_rate": 3.2053652356556908e-06, "loss": 0.3219, "step": 18217 }, { "epoch": 0.8534220265142644, "grad_norm": 0.6042084401857319, "learning_rate": 3.2051832886872764e-06, "loss": 0.3395, "step": 18218 }, { "epoch": 0.8534688715041926, "grad_norm": 0.6374897585262582, "learning_rate": 3.2050013376607093e-06, "loss": 0.3186, "step": 18219 }, { "epoch": 0.853515716494121, "grad_norm": 0.6383632334077909, "learning_rate": 3.204819382577037e-06, "loss": 0.3491, "step": 18220 }, { "epoch": 0.8535625614840493, "grad_norm": 0.5489885601496612, "learning_rate": 3.2046374234373063e-06, "loss": 0.3077, "step": 18221 }, { "epoch": 0.8536094064739776, "grad_norm": 0.5567831557606199, "learning_rate": 3.2044554602425638e-06, "loss": 0.3035, "step": 18222 }, { "epoch": 0.8536562514639059, "grad_norm": 0.6160335678663783, "learning_rate": 3.204273492993858e-06, "loss": 0.3396, "step": 18223 }, { "epoch": 0.8537030964538342, "grad_norm": 0.5724737181152476, "learning_rate": 3.2040915216922347e-06, "loss": 0.3282, "step": 18224 }, { "epoch": 0.8537499414437626, "grad_norm": 0.5883159555825522, "learning_rate": 3.2039095463387417e-06, "loss": 0.321, "step": 18225 }, { "epoch": 0.853796786433691, "grad_norm": 0.616821979107895, "learning_rate": 3.2037275669344258e-06, "loss": 0.2986, "step": 18226 }, { "epoch": 0.8538436314236193, "grad_norm": 0.5835821995067406, "learning_rate": 3.2035455834803352e-06, "loss": 0.2908, "step": 18227 }, { "epoch": 0.8538904764135475, "grad_norm": 0.552248074895123, "learning_rate": 3.2033635959775165e-06, "loss": 0.3062, "step": 18228 }, { "epoch": 0.8539373214034759, "grad_norm": 0.5657964898076966, "learning_rate": 3.203181604427017e-06, "loss": 0.3024, "step": 18229 }, { "epoch": 0.8539841663934042, "grad_norm": 0.5935720696851503, "learning_rate": 3.2029996088298843e-06, "loss": 0.3118, "step": 18230 }, { "epoch": 0.8540310113833326, "grad_norm": 0.5440145110755373, "learning_rate": 3.2028176091871654e-06, "loss": 0.3012, "step": 18231 }, { "epoch": 0.8540778563732608, "grad_norm": 0.566642432799921, "learning_rate": 3.202635605499908e-06, "loss": 0.319, "step": 18232 }, { "epoch": 0.8541247013631892, "grad_norm": 0.5900847035198901, "learning_rate": 3.202453597769159e-06, "loss": 0.3155, "step": 18233 }, { "epoch": 0.8541715463531175, "grad_norm": 0.6209006074433886, "learning_rate": 3.202271585995966e-06, "loss": 0.3336, "step": 18234 }, { "epoch": 0.8542183913430459, "grad_norm": 0.5905337845940121, "learning_rate": 3.202089570181377e-06, "loss": 0.3114, "step": 18235 }, { "epoch": 0.8542652363329742, "grad_norm": 0.5703027686252052, "learning_rate": 3.2019075503264383e-06, "loss": 0.2911, "step": 18236 }, { "epoch": 0.8543120813229025, "grad_norm": 0.6611416541390438, "learning_rate": 3.2017255264321984e-06, "loss": 0.3389, "step": 18237 }, { "epoch": 0.8543589263128308, "grad_norm": 0.5769365854322119, "learning_rate": 3.2015434984997048e-06, "loss": 0.307, "step": 18238 }, { "epoch": 0.8544057713027592, "grad_norm": 0.6114224772123217, "learning_rate": 3.2013614665300048e-06, "loss": 0.32, "step": 18239 }, { "epoch": 0.8544526162926875, "grad_norm": 0.5781140568009954, "learning_rate": 3.201179430524145e-06, "loss": 0.3059, "step": 18240 }, { "epoch": 0.8544994612826158, "grad_norm": 0.6563847286196545, "learning_rate": 3.2009973904831743e-06, "loss": 0.3456, "step": 18241 }, { "epoch": 0.8545463062725441, "grad_norm": 0.6400235636662776, "learning_rate": 3.2008153464081406e-06, "loss": 0.3332, "step": 18242 }, { "epoch": 0.8545931512624725, "grad_norm": 0.5700767424101302, "learning_rate": 3.200633298300089e-06, "loss": 0.3131, "step": 18243 }, { "epoch": 0.8546399962524008, "grad_norm": 0.6349253621946369, "learning_rate": 3.2004512461600694e-06, "loss": 0.3289, "step": 18244 }, { "epoch": 0.8546868412423292, "grad_norm": 0.6032911252284321, "learning_rate": 3.2002691899891304e-06, "loss": 0.3361, "step": 18245 }, { "epoch": 0.8547336862322574, "grad_norm": 0.6276870332263302, "learning_rate": 3.200087129788317e-06, "loss": 0.3526, "step": 18246 }, { "epoch": 0.8547805312221858, "grad_norm": 0.5963436282020935, "learning_rate": 3.1999050655586776e-06, "loss": 0.3022, "step": 18247 }, { "epoch": 0.8548273762121141, "grad_norm": 0.5957017607263368, "learning_rate": 3.199722997301261e-06, "loss": 0.3246, "step": 18248 }, { "epoch": 0.8548742212020425, "grad_norm": 0.5829656773430514, "learning_rate": 3.199540925017115e-06, "loss": 0.3191, "step": 18249 }, { "epoch": 0.8549210661919707, "grad_norm": 0.6158542898881806, "learning_rate": 3.199358848707286e-06, "loss": 0.324, "step": 18250 }, { "epoch": 0.8549679111818991, "grad_norm": 0.6145030136180388, "learning_rate": 3.199176768372823e-06, "loss": 0.3178, "step": 18251 }, { "epoch": 0.8550147561718274, "grad_norm": 0.6235801570852403, "learning_rate": 3.198994684014774e-06, "loss": 0.331, "step": 18252 }, { "epoch": 0.8550616011617558, "grad_norm": 0.5864241934241926, "learning_rate": 3.1988125956341852e-06, "loss": 0.3212, "step": 18253 }, { "epoch": 0.8551084461516841, "grad_norm": 0.5511420974143143, "learning_rate": 3.1986305032321065e-06, "loss": 0.2948, "step": 18254 }, { "epoch": 0.8551552911416124, "grad_norm": 0.5333936524275855, "learning_rate": 3.1984484068095837e-06, "loss": 0.3207, "step": 18255 }, { "epoch": 0.8552021361315407, "grad_norm": 0.6084863496808288, "learning_rate": 3.198266306367667e-06, "loss": 0.308, "step": 18256 }, { "epoch": 0.8552489811214691, "grad_norm": 0.5622576378328182, "learning_rate": 3.1980842019074028e-06, "loss": 0.2972, "step": 18257 }, { "epoch": 0.8552958261113974, "grad_norm": 0.6138819476600957, "learning_rate": 3.197902093429839e-06, "loss": 0.3263, "step": 18258 }, { "epoch": 0.8553426711013257, "grad_norm": 0.5692561948436314, "learning_rate": 3.1977199809360247e-06, "loss": 0.3154, "step": 18259 }, { "epoch": 0.855389516091254, "grad_norm": 0.6127539463492725, "learning_rate": 3.197537864427007e-06, "loss": 0.3108, "step": 18260 }, { "epoch": 0.8554363610811824, "grad_norm": 0.5803685917335152, "learning_rate": 3.1973557439038343e-06, "loss": 0.3131, "step": 18261 }, { "epoch": 0.8554832060711107, "grad_norm": 0.5170297719410473, "learning_rate": 3.197173619367554e-06, "loss": 0.3019, "step": 18262 }, { "epoch": 0.8555300510610391, "grad_norm": 0.5640936202825783, "learning_rate": 3.1969914908192163e-06, "loss": 0.3212, "step": 18263 }, { "epoch": 0.8555768960509673, "grad_norm": 0.5656412297914365, "learning_rate": 3.196809358259866e-06, "loss": 0.3052, "step": 18264 }, { "epoch": 0.8556237410408957, "grad_norm": 0.6200894642579501, "learning_rate": 3.1966272216905538e-06, "loss": 0.3326, "step": 18265 }, { "epoch": 0.855670586030824, "grad_norm": 0.5836674789722818, "learning_rate": 3.196445081112327e-06, "loss": 0.3122, "step": 18266 }, { "epoch": 0.8557174310207524, "grad_norm": 0.5876802803607946, "learning_rate": 3.1962629365262336e-06, "loss": 0.3248, "step": 18267 }, { "epoch": 0.8557642760106806, "grad_norm": 0.6239373911903254, "learning_rate": 3.1960807879333224e-06, "loss": 0.3156, "step": 18268 }, { "epoch": 0.855811121000609, "grad_norm": 0.609314634913316, "learning_rate": 3.195898635334641e-06, "loss": 0.3215, "step": 18269 }, { "epoch": 0.8558579659905373, "grad_norm": 0.6154765614436289, "learning_rate": 3.1957164787312376e-06, "loss": 0.3339, "step": 18270 }, { "epoch": 0.8559048109804657, "grad_norm": 0.579933039904434, "learning_rate": 3.195534318124161e-06, "loss": 0.3077, "step": 18271 }, { "epoch": 0.855951655970394, "grad_norm": 0.58215100932328, "learning_rate": 3.1953521535144593e-06, "loss": 0.3099, "step": 18272 }, { "epoch": 0.8559985009603223, "grad_norm": 0.5938552851506931, "learning_rate": 3.195169984903181e-06, "loss": 0.3327, "step": 18273 }, { "epoch": 0.8560453459502506, "grad_norm": 0.6715175734895943, "learning_rate": 3.1949878122913736e-06, "loss": 0.3483, "step": 18274 }, { "epoch": 0.856092190940179, "grad_norm": 0.5152089713662958, "learning_rate": 3.1948056356800862e-06, "loss": 0.2955, "step": 18275 }, { "epoch": 0.8561390359301073, "grad_norm": 0.5822574911345747, "learning_rate": 3.1946234550703664e-06, "loss": 0.3131, "step": 18276 }, { "epoch": 0.8561858809200356, "grad_norm": 0.5557863525842919, "learning_rate": 3.194441270463264e-06, "loss": 0.3103, "step": 18277 }, { "epoch": 0.8562327259099639, "grad_norm": 0.5787428283468415, "learning_rate": 3.1942590818598267e-06, "loss": 0.29, "step": 18278 }, { "epoch": 0.8562795708998923, "grad_norm": 0.5705735549998175, "learning_rate": 3.1940768892611025e-06, "loss": 0.3249, "step": 18279 }, { "epoch": 0.8563264158898206, "grad_norm": 0.558406689232509, "learning_rate": 3.1938946926681403e-06, "loss": 0.311, "step": 18280 }, { "epoch": 0.856373260879749, "grad_norm": 0.6087152368833164, "learning_rate": 3.1937124920819886e-06, "loss": 0.3275, "step": 18281 }, { "epoch": 0.8564201058696772, "grad_norm": 0.6387612702932935, "learning_rate": 3.1935302875036956e-06, "loss": 0.3166, "step": 18282 }, { "epoch": 0.8564669508596056, "grad_norm": 0.6524204221049987, "learning_rate": 3.19334807893431e-06, "loss": 0.3434, "step": 18283 }, { "epoch": 0.8565137958495339, "grad_norm": 0.6086340790184057, "learning_rate": 3.1931658663748818e-06, "loss": 0.3291, "step": 18284 }, { "epoch": 0.8565606408394623, "grad_norm": 0.6181717955299368, "learning_rate": 3.1929836498264564e-06, "loss": 0.3343, "step": 18285 }, { "epoch": 0.8566074858293905, "grad_norm": 0.574163658027412, "learning_rate": 3.1928014292900856e-06, "loss": 0.3074, "step": 18286 }, { "epoch": 0.8566543308193189, "grad_norm": 0.5902418872861432, "learning_rate": 3.192619204766816e-06, "loss": 0.3204, "step": 18287 }, { "epoch": 0.8567011758092472, "grad_norm": 0.5663965887047829, "learning_rate": 3.1924369762576975e-06, "loss": 0.2993, "step": 18288 }, { "epoch": 0.8567480207991756, "grad_norm": 0.5705870132195241, "learning_rate": 3.1922547437637774e-06, "loss": 0.3036, "step": 18289 }, { "epoch": 0.8567948657891039, "grad_norm": 0.5728132737600924, "learning_rate": 3.1920725072861064e-06, "loss": 0.3008, "step": 18290 }, { "epoch": 0.8568417107790322, "grad_norm": 0.5897099322287654, "learning_rate": 3.1918902668257313e-06, "loss": 0.3058, "step": 18291 }, { "epoch": 0.8568885557689605, "grad_norm": 0.59625305414955, "learning_rate": 3.1917080223837016e-06, "loss": 0.3121, "step": 18292 }, { "epoch": 0.8569354007588889, "grad_norm": 0.5956060633124154, "learning_rate": 3.1915257739610665e-06, "loss": 0.3209, "step": 18293 }, { "epoch": 0.8569822457488172, "grad_norm": 0.5016601878504046, "learning_rate": 3.1913435215588745e-06, "loss": 0.3, "step": 18294 }, { "epoch": 0.8570290907387454, "grad_norm": 0.6231640448211503, "learning_rate": 3.191161265178174e-06, "loss": 0.3065, "step": 18295 }, { "epoch": 0.8570759357286738, "grad_norm": 0.6087496258200822, "learning_rate": 3.190979004820014e-06, "loss": 0.3199, "step": 18296 }, { "epoch": 0.8571227807186022, "grad_norm": 0.5532077737926993, "learning_rate": 3.1907967404854427e-06, "loss": 0.3025, "step": 18297 }, { "epoch": 0.8571696257085305, "grad_norm": 0.617153814361949, "learning_rate": 3.190614472175511e-06, "loss": 0.3286, "step": 18298 }, { "epoch": 0.8572164706984589, "grad_norm": 0.6527347801410635, "learning_rate": 3.1904321998912667e-06, "loss": 0.3136, "step": 18299 }, { "epoch": 0.8572633156883871, "grad_norm": 0.5818440131617881, "learning_rate": 3.190249923633758e-06, "loss": 0.3016, "step": 18300 }, { "epoch": 0.8573101606783154, "grad_norm": 0.5877754046534953, "learning_rate": 3.1900676434040345e-06, "loss": 0.3278, "step": 18301 }, { "epoch": 0.8573570056682438, "grad_norm": 0.6027601792427703, "learning_rate": 3.1898853592031453e-06, "loss": 0.3272, "step": 18302 }, { "epoch": 0.8574038506581722, "grad_norm": 0.6444422907804755, "learning_rate": 3.1897030710321393e-06, "loss": 0.3305, "step": 18303 }, { "epoch": 0.8574506956481004, "grad_norm": 0.5952484332665589, "learning_rate": 3.189520778892065e-06, "loss": 0.2993, "step": 18304 }, { "epoch": 0.8574975406380287, "grad_norm": 0.6093160553588219, "learning_rate": 3.1893384827839723e-06, "loss": 0.3032, "step": 18305 }, { "epoch": 0.8575443856279571, "grad_norm": 0.5818179153485573, "learning_rate": 3.1891561827089106e-06, "loss": 0.3276, "step": 18306 }, { "epoch": 0.8575912306178854, "grad_norm": 0.6036033310238904, "learning_rate": 3.1889738786679268e-06, "loss": 0.3679, "step": 18307 }, { "epoch": 0.8576380756078138, "grad_norm": 0.569538298934343, "learning_rate": 3.188791570662073e-06, "loss": 0.308, "step": 18308 }, { "epoch": 0.857684920597742, "grad_norm": 0.6271208418359441, "learning_rate": 3.1886092586923967e-06, "loss": 0.3249, "step": 18309 }, { "epoch": 0.8577317655876704, "grad_norm": 0.5868951004395283, "learning_rate": 3.1884269427599456e-06, "loss": 0.3053, "step": 18310 }, { "epoch": 0.8577786105775987, "grad_norm": 0.5675427511991296, "learning_rate": 3.188244622865772e-06, "loss": 0.3277, "step": 18311 }, { "epoch": 0.8578254555675271, "grad_norm": 0.6228524050937214, "learning_rate": 3.1880622990109235e-06, "loss": 0.3366, "step": 18312 }, { "epoch": 0.8578723005574553, "grad_norm": 0.6356924500451803, "learning_rate": 3.187879971196449e-06, "loss": 0.3243, "step": 18313 }, { "epoch": 0.8579191455473837, "grad_norm": 0.5714786413079601, "learning_rate": 3.187697639423397e-06, "loss": 0.3203, "step": 18314 }, { "epoch": 0.857965990537312, "grad_norm": 0.5840840625981413, "learning_rate": 3.18751530369282e-06, "loss": 0.3152, "step": 18315 }, { "epoch": 0.8580128355272404, "grad_norm": 0.648247009936575, "learning_rate": 3.187332964005764e-06, "loss": 0.3148, "step": 18316 }, { "epoch": 0.8580596805171687, "grad_norm": 0.542624559555027, "learning_rate": 3.1871506203632795e-06, "loss": 0.3046, "step": 18317 }, { "epoch": 0.858106525507097, "grad_norm": 0.5407431606511082, "learning_rate": 3.186968272766417e-06, "loss": 0.3059, "step": 18318 }, { "epoch": 0.8581533704970253, "grad_norm": 0.593735126165342, "learning_rate": 3.186785921216224e-06, "loss": 0.342, "step": 18319 }, { "epoch": 0.8582002154869537, "grad_norm": 0.6139361127646433, "learning_rate": 3.1866035657137504e-06, "loss": 0.3157, "step": 18320 }, { "epoch": 0.858247060476882, "grad_norm": 0.638101272740244, "learning_rate": 3.1864212062600465e-06, "loss": 0.3385, "step": 18321 }, { "epoch": 0.8582939054668103, "grad_norm": 0.6198698334155018, "learning_rate": 3.1862388428561607e-06, "loss": 0.2993, "step": 18322 }, { "epoch": 0.8583407504567386, "grad_norm": 0.6158226745838379, "learning_rate": 3.1860564755031427e-06, "loss": 0.3175, "step": 18323 }, { "epoch": 0.858387595446667, "grad_norm": 0.5889083741048827, "learning_rate": 3.1858741042020423e-06, "loss": 0.319, "step": 18324 }, { "epoch": 0.8584344404365953, "grad_norm": 0.5802780485190733, "learning_rate": 3.1856917289539085e-06, "loss": 0.3133, "step": 18325 }, { "epoch": 0.8584812854265237, "grad_norm": 0.5941083933904355, "learning_rate": 3.1855093497597917e-06, "loss": 0.3109, "step": 18326 }, { "epoch": 0.8585281304164519, "grad_norm": 0.5568655484122883, "learning_rate": 3.1853269666207405e-06, "loss": 0.296, "step": 18327 }, { "epoch": 0.8585749754063803, "grad_norm": 0.5485186795650613, "learning_rate": 3.1851445795378043e-06, "loss": 0.322, "step": 18328 }, { "epoch": 0.8586218203963086, "grad_norm": 0.5698428255120268, "learning_rate": 3.1849621885120344e-06, "loss": 0.3124, "step": 18329 }, { "epoch": 0.858668665386237, "grad_norm": 0.6056915801288719, "learning_rate": 3.184779793544479e-06, "loss": 0.3106, "step": 18330 }, { "epoch": 0.8587155103761652, "grad_norm": 0.549435999862462, "learning_rate": 3.1845973946361874e-06, "loss": 0.3116, "step": 18331 }, { "epoch": 0.8587623553660936, "grad_norm": 0.5969134379219946, "learning_rate": 3.18441499178821e-06, "loss": 0.298, "step": 18332 }, { "epoch": 0.8588092003560219, "grad_norm": 0.5994876569961339, "learning_rate": 3.184232585001598e-06, "loss": 0.3251, "step": 18333 }, { "epoch": 0.8588560453459503, "grad_norm": 0.5788552066654157, "learning_rate": 3.184050174277397e-06, "loss": 0.3046, "step": 18334 }, { "epoch": 0.8589028903358786, "grad_norm": 0.6207872961363444, "learning_rate": 3.18386775961666e-06, "loss": 0.3106, "step": 18335 }, { "epoch": 0.8589497353258069, "grad_norm": 0.5443020236121241, "learning_rate": 3.183685341020436e-06, "loss": 0.3061, "step": 18336 }, { "epoch": 0.8589965803157352, "grad_norm": 0.5647787263647462, "learning_rate": 3.1835029184897755e-06, "loss": 0.321, "step": 18337 }, { "epoch": 0.8590434253056636, "grad_norm": 0.5758079641866474, "learning_rate": 3.1833204920257264e-06, "loss": 0.3139, "step": 18338 }, { "epoch": 0.8590902702955919, "grad_norm": 0.5689582547219888, "learning_rate": 3.18313806162934e-06, "loss": 0.3318, "step": 18339 }, { "epoch": 0.8591371152855202, "grad_norm": 0.5989199147002319, "learning_rate": 3.182955627301666e-06, "loss": 0.319, "step": 18340 }, { "epoch": 0.8591839602754485, "grad_norm": 0.6637612521439347, "learning_rate": 3.1827731890437534e-06, "loss": 0.3011, "step": 18341 }, { "epoch": 0.8592308052653769, "grad_norm": 0.5895617865935788, "learning_rate": 3.1825907468566535e-06, "loss": 0.3253, "step": 18342 }, { "epoch": 0.8592776502553052, "grad_norm": 0.5976380244220486, "learning_rate": 3.1824083007414154e-06, "loss": 0.3279, "step": 18343 }, { "epoch": 0.8593244952452336, "grad_norm": 0.5800167192947552, "learning_rate": 3.1822258506990882e-06, "loss": 0.3106, "step": 18344 }, { "epoch": 0.8593713402351618, "grad_norm": 0.5570738335315165, "learning_rate": 3.1820433967307235e-06, "loss": 0.3086, "step": 18345 }, { "epoch": 0.8594181852250902, "grad_norm": 0.6153233019226328, "learning_rate": 3.18186093883737e-06, "loss": 0.3337, "step": 18346 }, { "epoch": 0.8594650302150185, "grad_norm": 0.6785486428913492, "learning_rate": 3.181678477020078e-06, "loss": 0.3537, "step": 18347 }, { "epoch": 0.8595118752049469, "grad_norm": 0.6305406997504333, "learning_rate": 3.1814960112798986e-06, "loss": 0.3155, "step": 18348 }, { "epoch": 0.8595587201948751, "grad_norm": 0.5950150205557392, "learning_rate": 3.18131354161788e-06, "loss": 0.338, "step": 18349 }, { "epoch": 0.8596055651848035, "grad_norm": 0.5967118938357807, "learning_rate": 3.181131068035074e-06, "loss": 0.3146, "step": 18350 }, { "epoch": 0.8596524101747318, "grad_norm": 0.6211624866280143, "learning_rate": 3.1809485905325294e-06, "loss": 0.3258, "step": 18351 }, { "epoch": 0.8596992551646602, "grad_norm": 0.5589978308667025, "learning_rate": 3.180766109111296e-06, "loss": 0.3112, "step": 18352 }, { "epoch": 0.8597461001545885, "grad_norm": 0.5873946779347908, "learning_rate": 3.180583623772426e-06, "loss": 0.3113, "step": 18353 }, { "epoch": 0.8597929451445168, "grad_norm": 0.6480334223766729, "learning_rate": 3.180401134516968e-06, "loss": 0.3149, "step": 18354 }, { "epoch": 0.8598397901344451, "grad_norm": 0.6154580626528521, "learning_rate": 3.1802186413459725e-06, "loss": 0.3111, "step": 18355 }, { "epoch": 0.8598866351243735, "grad_norm": 0.5825369068591669, "learning_rate": 3.180036144260489e-06, "loss": 0.3129, "step": 18356 }, { "epoch": 0.8599334801143018, "grad_norm": 0.603693511870361, "learning_rate": 3.1798536432615696e-06, "loss": 0.3158, "step": 18357 }, { "epoch": 0.8599803251042301, "grad_norm": 0.6476389881862485, "learning_rate": 3.1796711383502633e-06, "loss": 0.3221, "step": 18358 }, { "epoch": 0.8600271700941584, "grad_norm": 0.5676472107050078, "learning_rate": 3.1794886295276193e-06, "loss": 0.3199, "step": 18359 }, { "epoch": 0.8600740150840868, "grad_norm": 0.5778877176915537, "learning_rate": 3.1793061167946896e-06, "loss": 0.3251, "step": 18360 }, { "epoch": 0.8601208600740151, "grad_norm": 0.5947726425933464, "learning_rate": 3.1791236001525246e-06, "loss": 0.3088, "step": 18361 }, { "epoch": 0.8601677050639435, "grad_norm": 0.5915414095562661, "learning_rate": 3.178941079602173e-06, "loss": 0.3072, "step": 18362 }, { "epoch": 0.8602145500538717, "grad_norm": 0.583151698746867, "learning_rate": 3.1787585551446864e-06, "loss": 0.3162, "step": 18363 }, { "epoch": 0.8602613950438001, "grad_norm": 0.6205222706081204, "learning_rate": 3.178576026781115e-06, "loss": 0.3114, "step": 18364 }, { "epoch": 0.8603082400337284, "grad_norm": 0.5609235660189644, "learning_rate": 3.178393494512509e-06, "loss": 0.3071, "step": 18365 }, { "epoch": 0.8603550850236568, "grad_norm": 0.5738791990153987, "learning_rate": 3.178210958339919e-06, "loss": 0.3141, "step": 18366 }, { "epoch": 0.860401930013585, "grad_norm": 0.5877053863020042, "learning_rate": 3.1780284182643957e-06, "loss": 0.317, "step": 18367 }, { "epoch": 0.8604487750035134, "grad_norm": 0.5337796299717827, "learning_rate": 3.177845874286989e-06, "loss": 0.3048, "step": 18368 }, { "epoch": 0.8604956199934417, "grad_norm": 0.5722105136984987, "learning_rate": 3.1776633264087495e-06, "loss": 0.3044, "step": 18369 }, { "epoch": 0.8605424649833701, "grad_norm": 0.6276236561613926, "learning_rate": 3.177480774630728e-06, "loss": 0.3195, "step": 18370 }, { "epoch": 0.8605893099732984, "grad_norm": 0.6140137506497713, "learning_rate": 3.1772982189539752e-06, "loss": 0.3182, "step": 18371 }, { "epoch": 0.8606361549632267, "grad_norm": 0.6007395493903024, "learning_rate": 3.177115659379541e-06, "loss": 0.3145, "step": 18372 }, { "epoch": 0.860682999953155, "grad_norm": 0.6505140939163708, "learning_rate": 3.1769330959084766e-06, "loss": 0.3082, "step": 18373 }, { "epoch": 0.8607298449430834, "grad_norm": 0.6280074489522096, "learning_rate": 3.176750528541832e-06, "loss": 0.3292, "step": 18374 }, { "epoch": 0.8607766899330117, "grad_norm": 0.5566380117784451, "learning_rate": 3.1765679572806584e-06, "loss": 0.2996, "step": 18375 }, { "epoch": 0.86082353492294, "grad_norm": 0.5901131508492451, "learning_rate": 3.176385382126007e-06, "loss": 0.3193, "step": 18376 }, { "epoch": 0.8608703799128683, "grad_norm": 0.5957597303931912, "learning_rate": 3.1762028030789264e-06, "loss": 0.3314, "step": 18377 }, { "epoch": 0.8609172249027967, "grad_norm": 0.5945859207755393, "learning_rate": 3.17602022014047e-06, "loss": 0.314, "step": 18378 }, { "epoch": 0.860964069892725, "grad_norm": 0.5706703529476954, "learning_rate": 3.1758376333116863e-06, "loss": 0.3035, "step": 18379 }, { "epoch": 0.8610109148826534, "grad_norm": 0.586000819153911, "learning_rate": 3.1756550425936266e-06, "loss": 0.301, "step": 18380 }, { "epoch": 0.8610577598725816, "grad_norm": 0.5454983540973981, "learning_rate": 3.1754724479873427e-06, "loss": 0.3106, "step": 18381 }, { "epoch": 0.86110460486251, "grad_norm": 0.5726102100434179, "learning_rate": 3.1752898494938844e-06, "loss": 0.3176, "step": 18382 }, { "epoch": 0.8611514498524383, "grad_norm": 0.5886164184279256, "learning_rate": 3.1751072471143025e-06, "loss": 0.3227, "step": 18383 }, { "epoch": 0.8611982948423667, "grad_norm": 0.6230265946028666, "learning_rate": 3.174924640849648e-06, "loss": 0.3248, "step": 18384 }, { "epoch": 0.8612451398322949, "grad_norm": 0.6730408017794698, "learning_rate": 3.1747420307009728e-06, "loss": 0.3424, "step": 18385 }, { "epoch": 0.8612919848222232, "grad_norm": 0.5511184716828513, "learning_rate": 3.174559416669326e-06, "loss": 0.3134, "step": 18386 }, { "epoch": 0.8613388298121516, "grad_norm": 0.573386491098741, "learning_rate": 3.1743767987557587e-06, "loss": 0.3112, "step": 18387 }, { "epoch": 0.86138567480208, "grad_norm": 0.5947266776443302, "learning_rate": 3.174194176961323e-06, "loss": 0.3016, "step": 18388 }, { "epoch": 0.8614325197920083, "grad_norm": 0.6806570594392473, "learning_rate": 3.17401155128707e-06, "loss": 0.3179, "step": 18389 }, { "epoch": 0.8614793647819365, "grad_norm": 0.5978670877991745, "learning_rate": 3.173828921734049e-06, "loss": 0.3395, "step": 18390 }, { "epoch": 0.8615262097718649, "grad_norm": 0.5899913228193706, "learning_rate": 3.1736462883033125e-06, "loss": 0.3201, "step": 18391 }, { "epoch": 0.8615730547617932, "grad_norm": 0.5638080656172262, "learning_rate": 3.1734636509959107e-06, "loss": 0.3079, "step": 18392 }, { "epoch": 0.8616198997517216, "grad_norm": 0.5884525906469865, "learning_rate": 3.1732810098128948e-06, "loss": 0.32, "step": 18393 }, { "epoch": 0.8616667447416498, "grad_norm": 0.6078295594504769, "learning_rate": 3.173098364755316e-06, "loss": 0.2845, "step": 18394 }, { "epoch": 0.8617135897315782, "grad_norm": 0.6312754838113595, "learning_rate": 3.1729157158242246e-06, "loss": 0.2987, "step": 18395 }, { "epoch": 0.8617604347215065, "grad_norm": 0.5695552571751713, "learning_rate": 3.172733063020673e-06, "loss": 0.3044, "step": 18396 }, { "epoch": 0.8618072797114349, "grad_norm": 0.5635735255251094, "learning_rate": 3.1725504063457125e-06, "loss": 0.2888, "step": 18397 }, { "epoch": 0.8618541247013632, "grad_norm": 0.604085504071906, "learning_rate": 3.172367745800392e-06, "loss": 0.3273, "step": 18398 }, { "epoch": 0.8619009696912915, "grad_norm": 0.6654366680490568, "learning_rate": 3.1721850813857645e-06, "loss": 0.3354, "step": 18399 }, { "epoch": 0.8619478146812198, "grad_norm": 0.5501361476370744, "learning_rate": 3.1720024131028815e-06, "loss": 0.314, "step": 18400 }, { "epoch": 0.8619946596711482, "grad_norm": 0.6246448175631673, "learning_rate": 3.171819740952793e-06, "loss": 0.3081, "step": 18401 }, { "epoch": 0.8620415046610765, "grad_norm": 0.5942763337099889, "learning_rate": 3.1716370649365504e-06, "loss": 0.3221, "step": 18402 }, { "epoch": 0.8620883496510048, "grad_norm": 0.5440133735482355, "learning_rate": 3.1714543850552067e-06, "loss": 0.3106, "step": 18403 }, { "epoch": 0.8621351946409331, "grad_norm": 0.5793371822950804, "learning_rate": 3.1712717013098105e-06, "loss": 0.3153, "step": 18404 }, { "epoch": 0.8621820396308615, "grad_norm": 0.6547819837918697, "learning_rate": 3.171089013701414e-06, "loss": 0.328, "step": 18405 }, { "epoch": 0.8622288846207898, "grad_norm": 0.58506883831339, "learning_rate": 3.17090632223107e-06, "loss": 0.3051, "step": 18406 }, { "epoch": 0.8622757296107182, "grad_norm": 0.5800505023509914, "learning_rate": 3.170723626899829e-06, "loss": 0.3104, "step": 18407 }, { "epoch": 0.8623225746006464, "grad_norm": 0.5801913556180492, "learning_rate": 3.1705409277087407e-06, "loss": 0.2957, "step": 18408 }, { "epoch": 0.8623694195905748, "grad_norm": 0.6132003562068653, "learning_rate": 3.170358224658859e-06, "loss": 0.3424, "step": 18409 }, { "epoch": 0.8624162645805031, "grad_norm": 0.6600319002334645, "learning_rate": 3.1701755177512337e-06, "loss": 0.3406, "step": 18410 }, { "epoch": 0.8624631095704315, "grad_norm": 0.5778477654409633, "learning_rate": 3.1699928069869163e-06, "loss": 0.3265, "step": 18411 }, { "epoch": 0.8625099545603597, "grad_norm": 0.5533881234266226, "learning_rate": 3.1698100923669596e-06, "loss": 0.2984, "step": 18412 }, { "epoch": 0.8625567995502881, "grad_norm": 0.604579397435892, "learning_rate": 3.1696273738924138e-06, "loss": 0.3047, "step": 18413 }, { "epoch": 0.8626036445402164, "grad_norm": 0.5900139504972897, "learning_rate": 3.1694446515643306e-06, "loss": 0.3168, "step": 18414 }, { "epoch": 0.8626504895301448, "grad_norm": 0.6048306652241258, "learning_rate": 3.169261925383762e-06, "loss": 0.3194, "step": 18415 }, { "epoch": 0.8626973345200731, "grad_norm": 0.5767368748583239, "learning_rate": 3.169079195351759e-06, "loss": 0.3161, "step": 18416 }, { "epoch": 0.8627441795100014, "grad_norm": 0.606679871230477, "learning_rate": 3.1688964614693736e-06, "loss": 0.3221, "step": 18417 }, { "epoch": 0.8627910244999297, "grad_norm": 0.574366209658049, "learning_rate": 3.1687137237376574e-06, "loss": 0.3128, "step": 18418 }, { "epoch": 0.8628378694898581, "grad_norm": 0.6358483918178955, "learning_rate": 3.168530982157661e-06, "loss": 0.3235, "step": 18419 }, { "epoch": 0.8628847144797864, "grad_norm": 0.5907239715846861, "learning_rate": 3.1683482367304375e-06, "loss": 0.3185, "step": 18420 }, { "epoch": 0.8629315594697147, "grad_norm": 0.6208761701800268, "learning_rate": 3.1681654874570377e-06, "loss": 0.3137, "step": 18421 }, { "epoch": 0.862978404459643, "grad_norm": 0.5686036305143739, "learning_rate": 3.167982734338513e-06, "loss": 0.3198, "step": 18422 }, { "epoch": 0.8630252494495714, "grad_norm": 0.6445199447085525, "learning_rate": 3.167799977375916e-06, "loss": 0.3324, "step": 18423 }, { "epoch": 0.8630720944394997, "grad_norm": 0.6102766255980027, "learning_rate": 3.167617216570299e-06, "loss": 0.3417, "step": 18424 }, { "epoch": 0.8631189394294281, "grad_norm": 0.6664811236452511, "learning_rate": 3.167434451922711e-06, "loss": 0.3407, "step": 18425 }, { "epoch": 0.8631657844193563, "grad_norm": 0.6004174157257299, "learning_rate": 3.167251683434206e-06, "loss": 0.3255, "step": 18426 }, { "epoch": 0.8632126294092847, "grad_norm": 0.6127402679954679, "learning_rate": 3.1670689111058356e-06, "loss": 0.298, "step": 18427 }, { "epoch": 0.863259474399213, "grad_norm": 0.6186257323217018, "learning_rate": 3.1668861349386514e-06, "loss": 0.3322, "step": 18428 }, { "epoch": 0.8633063193891414, "grad_norm": 0.5917391377321277, "learning_rate": 3.1667033549337045e-06, "loss": 0.3299, "step": 18429 }, { "epoch": 0.8633531643790696, "grad_norm": 0.6153202456974975, "learning_rate": 3.1665205710920478e-06, "loss": 0.3055, "step": 18430 }, { "epoch": 0.863400009368998, "grad_norm": 0.5834964685889129, "learning_rate": 3.1663377834147328e-06, "loss": 0.3536, "step": 18431 }, { "epoch": 0.8634468543589263, "grad_norm": 0.5995208027951654, "learning_rate": 3.166154991902811e-06, "loss": 0.3271, "step": 18432 }, { "epoch": 0.8634936993488547, "grad_norm": 0.5879433002879785, "learning_rate": 3.165972196557335e-06, "loss": 0.34, "step": 18433 }, { "epoch": 0.863540544338783, "grad_norm": 0.6324145024923857, "learning_rate": 3.165789397379356e-06, "loss": 0.3515, "step": 18434 }, { "epoch": 0.8635873893287113, "grad_norm": 0.5553825262897094, "learning_rate": 3.1656065943699266e-06, "loss": 0.3333, "step": 18435 }, { "epoch": 0.8636342343186396, "grad_norm": 0.6466501182559339, "learning_rate": 3.1654237875300984e-06, "loss": 0.3241, "step": 18436 }, { "epoch": 0.863681079308568, "grad_norm": 0.5909120090464959, "learning_rate": 3.1652409768609236e-06, "loss": 0.3088, "step": 18437 }, { "epoch": 0.8637279242984963, "grad_norm": 0.6006822623178169, "learning_rate": 3.1650581623634547e-06, "loss": 0.3034, "step": 18438 }, { "epoch": 0.8637747692884246, "grad_norm": 0.6320711206819695, "learning_rate": 3.164875344038743e-06, "loss": 0.331, "step": 18439 }, { "epoch": 0.8638216142783529, "grad_norm": 0.5619620455796296, "learning_rate": 3.16469252188784e-06, "loss": 0.315, "step": 18440 }, { "epoch": 0.8638684592682813, "grad_norm": 0.5794831304879293, "learning_rate": 3.1645096959117993e-06, "loss": 0.3369, "step": 18441 }, { "epoch": 0.8639153042582096, "grad_norm": 0.5742387052001757, "learning_rate": 3.164326866111672e-06, "loss": 0.3066, "step": 18442 }, { "epoch": 0.8639621492481379, "grad_norm": 0.6139904719293116, "learning_rate": 3.164144032488511e-06, "loss": 0.3047, "step": 18443 }, { "epoch": 0.8640089942380662, "grad_norm": 0.6275246833133715, "learning_rate": 3.1639611950433673e-06, "loss": 0.3051, "step": 18444 }, { "epoch": 0.8640558392279946, "grad_norm": 0.5903895885794715, "learning_rate": 3.163778353777295e-06, "loss": 0.3214, "step": 18445 }, { "epoch": 0.8641026842179229, "grad_norm": 0.5891097835826632, "learning_rate": 3.1635955086913444e-06, "loss": 0.3223, "step": 18446 }, { "epoch": 0.8641495292078513, "grad_norm": 0.5914933469399415, "learning_rate": 3.163412659786568e-06, "loss": 0.3047, "step": 18447 }, { "epoch": 0.8641963741977795, "grad_norm": 0.5763966128023194, "learning_rate": 3.163229807064019e-06, "loss": 0.3197, "step": 18448 }, { "epoch": 0.8642432191877079, "grad_norm": 0.6157952506576436, "learning_rate": 3.1630469505247495e-06, "loss": 0.2962, "step": 18449 }, { "epoch": 0.8642900641776362, "grad_norm": 0.6095777316692912, "learning_rate": 3.1628640901698104e-06, "loss": 0.3299, "step": 18450 }, { "epoch": 0.8643369091675646, "grad_norm": 0.5575407806773707, "learning_rate": 3.162681226000256e-06, "loss": 0.2979, "step": 18451 }, { "epoch": 0.8643837541574928, "grad_norm": 0.5768796887498306, "learning_rate": 3.1624983580171376e-06, "loss": 0.3076, "step": 18452 }, { "epoch": 0.8644305991474212, "grad_norm": 0.5569477231890604, "learning_rate": 3.162315486221507e-06, "loss": 0.3144, "step": 18453 }, { "epoch": 0.8644774441373495, "grad_norm": 0.5954786152667495, "learning_rate": 3.162132610614418e-06, "loss": 0.3234, "step": 18454 }, { "epoch": 0.8645242891272779, "grad_norm": 0.5229815391818573, "learning_rate": 3.1619497311969223e-06, "loss": 0.2913, "step": 18455 }, { "epoch": 0.8645711341172062, "grad_norm": 0.5693120475238698, "learning_rate": 3.1617668479700713e-06, "loss": 0.3223, "step": 18456 }, { "epoch": 0.8646179791071344, "grad_norm": 0.5739957894182348, "learning_rate": 3.161583960934919e-06, "loss": 0.3268, "step": 18457 }, { "epoch": 0.8646648240970628, "grad_norm": 0.5935895707213058, "learning_rate": 3.1614010700925174e-06, "loss": 0.3354, "step": 18458 }, { "epoch": 0.8647116690869912, "grad_norm": 0.6157761128474447, "learning_rate": 3.1612181754439193e-06, "loss": 0.3218, "step": 18459 }, { "epoch": 0.8647585140769195, "grad_norm": 0.642810671997819, "learning_rate": 3.161035276990176e-06, "loss": 0.3257, "step": 18460 }, { "epoch": 0.8648053590668477, "grad_norm": 0.5909004579116909, "learning_rate": 3.1608523747323412e-06, "loss": 0.3102, "step": 18461 }, { "epoch": 0.8648522040567761, "grad_norm": 0.6305776898891889, "learning_rate": 3.1606694686714674e-06, "loss": 0.3148, "step": 18462 }, { "epoch": 0.8648990490467044, "grad_norm": 0.5607379573625952, "learning_rate": 3.160486558808606e-06, "loss": 0.3143, "step": 18463 }, { "epoch": 0.8649458940366328, "grad_norm": 0.5262353795881488, "learning_rate": 3.160303645144811e-06, "loss": 0.2988, "step": 18464 }, { "epoch": 0.8649927390265612, "grad_norm": 0.5864692513506679, "learning_rate": 3.1601207276811343e-06, "loss": 0.3366, "step": 18465 }, { "epoch": 0.8650395840164894, "grad_norm": 0.5939164240643808, "learning_rate": 3.159937806418629e-06, "loss": 0.3186, "step": 18466 }, { "epoch": 0.8650864290064177, "grad_norm": 0.656841123446341, "learning_rate": 3.1597548813583474e-06, "loss": 0.3301, "step": 18467 }, { "epoch": 0.8651332739963461, "grad_norm": 0.6063976247425322, "learning_rate": 3.159571952501342e-06, "loss": 0.3317, "step": 18468 }, { "epoch": 0.8651801189862744, "grad_norm": 0.6220991218493286, "learning_rate": 3.159389019848666e-06, "loss": 0.3223, "step": 18469 }, { "epoch": 0.8652269639762027, "grad_norm": 0.5882132470747141, "learning_rate": 3.159206083401372e-06, "loss": 0.2847, "step": 18470 }, { "epoch": 0.865273808966131, "grad_norm": 0.5945480486377703, "learning_rate": 3.1590231431605123e-06, "loss": 0.3015, "step": 18471 }, { "epoch": 0.8653206539560594, "grad_norm": 0.6312018670184016, "learning_rate": 3.15884019912714e-06, "loss": 0.3238, "step": 18472 }, { "epoch": 0.8653674989459877, "grad_norm": 0.6111562706974509, "learning_rate": 3.158657251302309e-06, "loss": 0.3241, "step": 18473 }, { "epoch": 0.8654143439359161, "grad_norm": 0.5859729561831362, "learning_rate": 3.15847429968707e-06, "loss": 0.3184, "step": 18474 }, { "epoch": 0.8654611889258443, "grad_norm": 0.5942688642955068, "learning_rate": 3.158291344282477e-06, "loss": 0.3291, "step": 18475 }, { "epoch": 0.8655080339157727, "grad_norm": 0.5993612469628868, "learning_rate": 3.158108385089583e-06, "loss": 0.318, "step": 18476 }, { "epoch": 0.865554878905701, "grad_norm": 0.6025443045006242, "learning_rate": 3.1579254221094413e-06, "loss": 0.3231, "step": 18477 }, { "epoch": 0.8656017238956294, "grad_norm": 0.6432040053728286, "learning_rate": 3.1577424553431028e-06, "loss": 0.3091, "step": 18478 }, { "epoch": 0.8656485688855576, "grad_norm": 0.6023264485709093, "learning_rate": 3.157559484791623e-06, "loss": 0.3087, "step": 18479 }, { "epoch": 0.865695413875486, "grad_norm": 0.6072249452888417, "learning_rate": 3.1573765104560533e-06, "loss": 0.3311, "step": 18480 }, { "epoch": 0.8657422588654143, "grad_norm": 0.6024925033302426, "learning_rate": 3.1571935323374468e-06, "loss": 0.3276, "step": 18481 }, { "epoch": 0.8657891038553427, "grad_norm": 0.5686532306053337, "learning_rate": 3.157010550436857e-06, "loss": 0.3116, "step": 18482 }, { "epoch": 0.865835948845271, "grad_norm": 0.5870377137805504, "learning_rate": 3.1568275647553366e-06, "loss": 0.3092, "step": 18483 }, { "epoch": 0.8658827938351993, "grad_norm": 0.6497385875879864, "learning_rate": 3.156644575293938e-06, "loss": 0.3311, "step": 18484 }, { "epoch": 0.8659296388251276, "grad_norm": 0.5583383549211944, "learning_rate": 3.156461582053716e-06, "loss": 0.3192, "step": 18485 }, { "epoch": 0.865976483815056, "grad_norm": 0.6031476706158312, "learning_rate": 3.1562785850357214e-06, "loss": 0.3129, "step": 18486 }, { "epoch": 0.8660233288049843, "grad_norm": 0.5674701837701683, "learning_rate": 3.156095584241009e-06, "loss": 0.3072, "step": 18487 }, { "epoch": 0.8660701737949126, "grad_norm": 0.6513942825053826, "learning_rate": 3.1559125796706313e-06, "loss": 0.3291, "step": 18488 }, { "epoch": 0.8661170187848409, "grad_norm": 0.5905340374258607, "learning_rate": 3.1557295713256417e-06, "loss": 0.3131, "step": 18489 }, { "epoch": 0.8661638637747693, "grad_norm": 0.6530020342072429, "learning_rate": 3.1555465592070933e-06, "loss": 0.3265, "step": 18490 }, { "epoch": 0.8662107087646976, "grad_norm": 0.623133675507227, "learning_rate": 3.1553635433160397e-06, "loss": 0.3271, "step": 18491 }, { "epoch": 0.866257553754626, "grad_norm": 0.5923523521696206, "learning_rate": 3.1551805236535326e-06, "loss": 0.3144, "step": 18492 }, { "epoch": 0.8663043987445542, "grad_norm": 0.6010894184762509, "learning_rate": 3.1549975002206268e-06, "loss": 0.3286, "step": 18493 }, { "epoch": 0.8663512437344826, "grad_norm": 0.638358540317387, "learning_rate": 3.1548144730183757e-06, "loss": 0.3445, "step": 18494 }, { "epoch": 0.8663980887244109, "grad_norm": 0.5866071128905312, "learning_rate": 3.154631442047831e-06, "loss": 0.3046, "step": 18495 }, { "epoch": 0.8664449337143393, "grad_norm": 0.6732749263103133, "learning_rate": 3.154448407310046e-06, "loss": 0.3502, "step": 18496 }, { "epoch": 0.8664917787042675, "grad_norm": 0.6062721273378029, "learning_rate": 3.1542653688060765e-06, "loss": 0.2933, "step": 18497 }, { "epoch": 0.8665386236941959, "grad_norm": 0.5846326886711922, "learning_rate": 3.1540823265369736e-06, "loss": 0.306, "step": 18498 }, { "epoch": 0.8665854686841242, "grad_norm": 0.6101718078527392, "learning_rate": 3.1538992805037914e-06, "loss": 0.3007, "step": 18499 }, { "epoch": 0.8666323136740526, "grad_norm": 0.5827694058917692, "learning_rate": 3.153716230707583e-06, "loss": 0.3235, "step": 18500 }, { "epoch": 0.8666791586639809, "grad_norm": 0.6236392529812116, "learning_rate": 3.1535331771494026e-06, "loss": 0.3409, "step": 18501 }, { "epoch": 0.8667260036539092, "grad_norm": 0.5955096350304923, "learning_rate": 3.1533501198303025e-06, "loss": 0.3135, "step": 18502 }, { "epoch": 0.8667728486438375, "grad_norm": 0.5873696983960458, "learning_rate": 3.1531670587513367e-06, "loss": 0.3198, "step": 18503 }, { "epoch": 0.8668196936337659, "grad_norm": 0.6214189656544289, "learning_rate": 3.152983993913559e-06, "loss": 0.314, "step": 18504 }, { "epoch": 0.8668665386236942, "grad_norm": 0.5647316701380731, "learning_rate": 3.152800925318022e-06, "loss": 0.3089, "step": 18505 }, { "epoch": 0.8669133836136225, "grad_norm": 0.5395659248689798, "learning_rate": 3.1526178529657803e-06, "loss": 0.2993, "step": 18506 }, { "epoch": 0.8669602286035508, "grad_norm": 0.6006865723515931, "learning_rate": 3.152434776857886e-06, "loss": 0.3047, "step": 18507 }, { "epoch": 0.8670070735934792, "grad_norm": 0.5399335934788991, "learning_rate": 3.152251696995394e-06, "loss": 0.307, "step": 18508 }, { "epoch": 0.8670539185834075, "grad_norm": 0.6269398601858863, "learning_rate": 3.1520686133793575e-06, "loss": 0.3026, "step": 18509 }, { "epoch": 0.8671007635733359, "grad_norm": 0.599523438171033, "learning_rate": 3.1518855260108307e-06, "loss": 0.3185, "step": 18510 }, { "epoch": 0.8671476085632641, "grad_norm": 0.5650844623731537, "learning_rate": 3.151702434890866e-06, "loss": 0.3029, "step": 18511 }, { "epoch": 0.8671944535531925, "grad_norm": 0.5590071263728755, "learning_rate": 3.151519340020517e-06, "loss": 0.3053, "step": 18512 }, { "epoch": 0.8672412985431208, "grad_norm": 0.5650410738866549, "learning_rate": 3.1513362414008387e-06, "loss": 0.3342, "step": 18513 }, { "epoch": 0.8672881435330492, "grad_norm": 0.5996683284329294, "learning_rate": 3.1511531390328835e-06, "loss": 0.319, "step": 18514 }, { "epoch": 0.8673349885229774, "grad_norm": 0.6046250227477554, "learning_rate": 3.150970032917706e-06, "loss": 0.3307, "step": 18515 }, { "epoch": 0.8673818335129058, "grad_norm": 0.6477096437370796, "learning_rate": 3.1507869230563604e-06, "loss": 0.3317, "step": 18516 }, { "epoch": 0.8674286785028341, "grad_norm": 0.6084713197810953, "learning_rate": 3.1506038094498983e-06, "loss": 0.3333, "step": 18517 }, { "epoch": 0.8674755234927625, "grad_norm": 0.5923377533736575, "learning_rate": 3.1504206920993752e-06, "loss": 0.2999, "step": 18518 }, { "epoch": 0.8675223684826908, "grad_norm": 0.5770393349459324, "learning_rate": 3.150237571005845e-06, "loss": 0.3143, "step": 18519 }, { "epoch": 0.8675692134726191, "grad_norm": 0.5660587899841731, "learning_rate": 3.1500544461703598e-06, "loss": 0.3253, "step": 18520 }, { "epoch": 0.8676160584625474, "grad_norm": 0.6033971740914987, "learning_rate": 3.1498713175939756e-06, "loss": 0.3144, "step": 18521 }, { "epoch": 0.8676629034524758, "grad_norm": 0.5872365985156629, "learning_rate": 3.1496881852777454e-06, "loss": 0.3318, "step": 18522 }, { "epoch": 0.8677097484424041, "grad_norm": 0.5849966281894022, "learning_rate": 3.1495050492227223e-06, "loss": 0.3441, "step": 18523 }, { "epoch": 0.8677565934323324, "grad_norm": 0.5693740109352973, "learning_rate": 3.149321909429961e-06, "loss": 0.3068, "step": 18524 }, { "epoch": 0.8678034384222607, "grad_norm": 0.5926333753746974, "learning_rate": 3.1491387659005167e-06, "loss": 0.3154, "step": 18525 }, { "epoch": 0.8678502834121891, "grad_norm": 0.5610988858430866, "learning_rate": 3.1489556186354403e-06, "loss": 0.3077, "step": 18526 }, { "epoch": 0.8678971284021174, "grad_norm": 0.6300971340750336, "learning_rate": 3.148772467635788e-06, "loss": 0.3433, "step": 18527 }, { "epoch": 0.8679439733920458, "grad_norm": 0.6011132077472758, "learning_rate": 3.148589312902613e-06, "loss": 0.3284, "step": 18528 }, { "epoch": 0.867990818381974, "grad_norm": 0.6010062330619574, "learning_rate": 3.1484061544369698e-06, "loss": 0.3114, "step": 18529 }, { "epoch": 0.8680376633719024, "grad_norm": 0.5263475518414077, "learning_rate": 3.148222992239912e-06, "loss": 0.2913, "step": 18530 }, { "epoch": 0.8680845083618307, "grad_norm": 0.5631684595156058, "learning_rate": 3.1480398263124938e-06, "loss": 0.3184, "step": 18531 }, { "epoch": 0.8681313533517591, "grad_norm": 0.568396131152649, "learning_rate": 3.14785665665577e-06, "loss": 0.2986, "step": 18532 }, { "epoch": 0.8681781983416873, "grad_norm": 0.5800809925473143, "learning_rate": 3.1476734832707926e-06, "loss": 0.3142, "step": 18533 }, { "epoch": 0.8682250433316157, "grad_norm": 0.613608994752134, "learning_rate": 3.147490306158618e-06, "loss": 0.3205, "step": 18534 }, { "epoch": 0.868271888321544, "grad_norm": 0.6350896065987397, "learning_rate": 3.147307125320299e-06, "loss": 0.3223, "step": 18535 }, { "epoch": 0.8683187333114724, "grad_norm": 0.5340867527855151, "learning_rate": 3.1471239407568908e-06, "loss": 0.3144, "step": 18536 }, { "epoch": 0.8683655783014007, "grad_norm": 0.603744078986096, "learning_rate": 3.1469407524694467e-06, "loss": 0.3132, "step": 18537 }, { "epoch": 0.868412423291329, "grad_norm": 0.665289634289852, "learning_rate": 3.1467575604590206e-06, "loss": 0.3255, "step": 18538 }, { "epoch": 0.8684592682812573, "grad_norm": 0.5889959199953213, "learning_rate": 3.146574364726668e-06, "loss": 0.3007, "step": 18539 }, { "epoch": 0.8685061132711857, "grad_norm": 0.6053488041125463, "learning_rate": 3.1463911652734427e-06, "loss": 0.2983, "step": 18540 }, { "epoch": 0.868552958261114, "grad_norm": 0.5942183835409793, "learning_rate": 3.146207962100398e-06, "loss": 0.3304, "step": 18541 }, { "epoch": 0.8685998032510422, "grad_norm": 0.6362438888393751, "learning_rate": 3.146024755208589e-06, "loss": 0.3271, "step": 18542 }, { "epoch": 0.8686466482409706, "grad_norm": 0.5930863238088327, "learning_rate": 3.1458415445990704e-06, "loss": 0.3351, "step": 18543 }, { "epoch": 0.868693493230899, "grad_norm": 0.564777831701196, "learning_rate": 3.145658330272895e-06, "loss": 0.2972, "step": 18544 }, { "epoch": 0.8687403382208273, "grad_norm": 0.5777628389560238, "learning_rate": 3.145475112231119e-06, "loss": 0.3206, "step": 18545 }, { "epoch": 0.8687871832107557, "grad_norm": 0.5590665393264963, "learning_rate": 3.145291890474796e-06, "loss": 0.2928, "step": 18546 }, { "epoch": 0.8688340282006839, "grad_norm": 0.6065674465125579, "learning_rate": 3.14510866500498e-06, "loss": 0.3025, "step": 18547 }, { "epoch": 0.8688808731906122, "grad_norm": 0.56777621236603, "learning_rate": 3.1449254358227254e-06, "loss": 0.3146, "step": 18548 }, { "epoch": 0.8689277181805406, "grad_norm": 0.5771549742807998, "learning_rate": 3.144742202929088e-06, "loss": 0.3273, "step": 18549 }, { "epoch": 0.868974563170469, "grad_norm": 0.548765735614487, "learning_rate": 3.144558966325121e-06, "loss": 0.2988, "step": 18550 }, { "epoch": 0.8690214081603972, "grad_norm": 0.6003787629562809, "learning_rate": 3.144375726011879e-06, "loss": 0.3237, "step": 18551 }, { "epoch": 0.8690682531503255, "grad_norm": 0.5844421442467049, "learning_rate": 3.1441924819904166e-06, "loss": 0.316, "step": 18552 }, { "epoch": 0.8691150981402539, "grad_norm": 0.5932984458541946, "learning_rate": 3.144009234261789e-06, "loss": 0.3251, "step": 18553 }, { "epoch": 0.8691619431301822, "grad_norm": 0.6232638050544628, "learning_rate": 3.1438259828270488e-06, "loss": 0.3113, "step": 18554 }, { "epoch": 0.8692087881201106, "grad_norm": 0.5949244526367656, "learning_rate": 3.143642727687253e-06, "loss": 0.3125, "step": 18555 }, { "epoch": 0.8692556331100388, "grad_norm": 0.6254214237615909, "learning_rate": 3.143459468843454e-06, "loss": 0.3184, "step": 18556 }, { "epoch": 0.8693024780999672, "grad_norm": 0.5853370192397443, "learning_rate": 3.1432762062967088e-06, "loss": 0.3154, "step": 18557 }, { "epoch": 0.8693493230898955, "grad_norm": 0.6224670076863464, "learning_rate": 3.1430929400480702e-06, "loss": 0.3228, "step": 18558 }, { "epoch": 0.8693961680798239, "grad_norm": 0.586043185797866, "learning_rate": 3.1429096700985927e-06, "loss": 0.3131, "step": 18559 }, { "epoch": 0.8694430130697521, "grad_norm": 0.6233603581414726, "learning_rate": 3.142726396449333e-06, "loss": 0.3217, "step": 18560 }, { "epoch": 0.8694898580596805, "grad_norm": 0.5964874419882873, "learning_rate": 3.1425431191013435e-06, "loss": 0.3203, "step": 18561 }, { "epoch": 0.8695367030496088, "grad_norm": 0.5800749315485552, "learning_rate": 3.1423598380556797e-06, "loss": 0.3002, "step": 18562 }, { "epoch": 0.8695835480395372, "grad_norm": 0.5729333543247485, "learning_rate": 3.1421765533133964e-06, "loss": 0.3038, "step": 18563 }, { "epoch": 0.8696303930294655, "grad_norm": 0.6433196342339563, "learning_rate": 3.141993264875549e-06, "loss": 0.318, "step": 18564 }, { "epoch": 0.8696772380193938, "grad_norm": 0.6172346368669576, "learning_rate": 3.1418099727431917e-06, "loss": 0.3217, "step": 18565 }, { "epoch": 0.8697240830093221, "grad_norm": 0.616102168252734, "learning_rate": 3.1416266769173785e-06, "loss": 0.3297, "step": 18566 }, { "epoch": 0.8697709279992505, "grad_norm": 0.6169217495717629, "learning_rate": 3.1414433773991658e-06, "loss": 0.3284, "step": 18567 }, { "epoch": 0.8698177729891788, "grad_norm": 0.5857240528552358, "learning_rate": 3.141260074189608e-06, "loss": 0.3259, "step": 18568 }, { "epoch": 0.8698646179791071, "grad_norm": 0.5765997715831147, "learning_rate": 3.141076767289759e-06, "loss": 0.3186, "step": 18569 }, { "epoch": 0.8699114629690354, "grad_norm": 0.5654289114872828, "learning_rate": 3.1408934567006744e-06, "loss": 0.3208, "step": 18570 }, { "epoch": 0.8699583079589638, "grad_norm": 0.5956444363766186, "learning_rate": 3.1407101424234098e-06, "loss": 0.2986, "step": 18571 }, { "epoch": 0.8700051529488921, "grad_norm": 0.5926531524760349, "learning_rate": 3.1405268244590183e-06, "loss": 0.3214, "step": 18572 }, { "epoch": 0.8700519979388205, "grad_norm": 0.5747966859052642, "learning_rate": 3.1403435028085568e-06, "loss": 0.3266, "step": 18573 }, { "epoch": 0.8700988429287487, "grad_norm": 0.6261856541975058, "learning_rate": 3.140160177473079e-06, "loss": 0.3258, "step": 18574 }, { "epoch": 0.8701456879186771, "grad_norm": 0.6010837678323752, "learning_rate": 3.13997684845364e-06, "loss": 0.3374, "step": 18575 }, { "epoch": 0.8701925329086054, "grad_norm": 0.6153732219516136, "learning_rate": 3.1397935157512952e-06, "loss": 0.3125, "step": 18576 }, { "epoch": 0.8702393778985338, "grad_norm": 0.594031156932767, "learning_rate": 3.1396101793670996e-06, "loss": 0.3031, "step": 18577 }, { "epoch": 0.870286222888462, "grad_norm": 0.6383371970393769, "learning_rate": 3.1394268393021083e-06, "loss": 0.316, "step": 18578 }, { "epoch": 0.8703330678783904, "grad_norm": 0.620332103910736, "learning_rate": 3.139243495557376e-06, "loss": 0.3149, "step": 18579 }, { "epoch": 0.8703799128683187, "grad_norm": 0.5846171111244355, "learning_rate": 3.1390601481339583e-06, "loss": 0.3147, "step": 18580 }, { "epoch": 0.8704267578582471, "grad_norm": 0.6440155328898303, "learning_rate": 3.1388767970329103e-06, "loss": 0.3298, "step": 18581 }, { "epoch": 0.8704736028481754, "grad_norm": 0.6138662721871809, "learning_rate": 3.1386934422552862e-06, "loss": 0.3339, "step": 18582 }, { "epoch": 0.8705204478381037, "grad_norm": 0.5835418120528425, "learning_rate": 3.1385100838021427e-06, "loss": 0.3006, "step": 18583 }, { "epoch": 0.870567292828032, "grad_norm": 0.5308983347837514, "learning_rate": 3.1383267216745333e-06, "loss": 0.3063, "step": 18584 }, { "epoch": 0.8706141378179604, "grad_norm": 0.6352640708846293, "learning_rate": 3.138143355873515e-06, "loss": 0.3273, "step": 18585 }, { "epoch": 0.8706609828078887, "grad_norm": 0.5658557102098316, "learning_rate": 3.1379599864001415e-06, "loss": 0.3055, "step": 18586 }, { "epoch": 0.870707827797817, "grad_norm": 0.6525084056666062, "learning_rate": 3.137776613255468e-06, "loss": 0.3487, "step": 18587 }, { "epoch": 0.8707546727877453, "grad_norm": 0.575773011578956, "learning_rate": 3.1375932364405514e-06, "loss": 0.3234, "step": 18588 }, { "epoch": 0.8708015177776737, "grad_norm": 0.6122212331871377, "learning_rate": 3.137409855956446e-06, "loss": 0.3345, "step": 18589 }, { "epoch": 0.870848362767602, "grad_norm": 0.6288526479655866, "learning_rate": 3.1372264718042063e-06, "loss": 0.3252, "step": 18590 }, { "epoch": 0.8708952077575304, "grad_norm": 0.6115617265272787, "learning_rate": 3.137043083984889e-06, "loss": 0.3204, "step": 18591 }, { "epoch": 0.8709420527474586, "grad_norm": 0.5681843427166151, "learning_rate": 3.1368596924995486e-06, "loss": 0.3107, "step": 18592 }, { "epoch": 0.870988897737387, "grad_norm": 0.5756597702628268, "learning_rate": 3.136676297349241e-06, "loss": 0.3166, "step": 18593 }, { "epoch": 0.8710357427273153, "grad_norm": 0.5413605406527542, "learning_rate": 3.1364928985350206e-06, "loss": 0.2921, "step": 18594 }, { "epoch": 0.8710825877172437, "grad_norm": 0.6332487419240228, "learning_rate": 3.1363094960579448e-06, "loss": 0.3209, "step": 18595 }, { "epoch": 0.8711294327071719, "grad_norm": 0.5525742317030929, "learning_rate": 3.136126089919067e-06, "loss": 0.3317, "step": 18596 }, { "epoch": 0.8711762776971003, "grad_norm": 0.613877502644144, "learning_rate": 3.1359426801194432e-06, "loss": 0.3134, "step": 18597 }, { "epoch": 0.8712231226870286, "grad_norm": 0.6448009308512159, "learning_rate": 3.13575926666013e-06, "loss": 0.3421, "step": 18598 }, { "epoch": 0.871269967676957, "grad_norm": 0.6225314044245586, "learning_rate": 3.135575849542181e-06, "loss": 0.3454, "step": 18599 }, { "epoch": 0.8713168126668853, "grad_norm": 0.5817609603766204, "learning_rate": 3.135392428766653e-06, "loss": 0.3305, "step": 18600 }, { "epoch": 0.8713636576568136, "grad_norm": 0.5747782422239656, "learning_rate": 3.135209004334602e-06, "loss": 0.3261, "step": 18601 }, { "epoch": 0.8714105026467419, "grad_norm": 0.5993782815074875, "learning_rate": 3.1350255762470826e-06, "loss": 0.3127, "step": 18602 }, { "epoch": 0.8714573476366703, "grad_norm": 0.5879075136400422, "learning_rate": 3.1348421445051497e-06, "loss": 0.348, "step": 18603 }, { "epoch": 0.8715041926265986, "grad_norm": 0.7840135333388223, "learning_rate": 3.134658709109861e-06, "loss": 0.3291, "step": 18604 }, { "epoch": 0.8715510376165269, "grad_norm": 0.6111714074190403, "learning_rate": 3.1344752700622697e-06, "loss": 0.3254, "step": 18605 }, { "epoch": 0.8715978826064552, "grad_norm": 0.5945372587424921, "learning_rate": 3.1342918273634338e-06, "loss": 0.2923, "step": 18606 }, { "epoch": 0.8716447275963836, "grad_norm": 0.6073098474862552, "learning_rate": 3.1341083810144076e-06, "loss": 0.3181, "step": 18607 }, { "epoch": 0.8716915725863119, "grad_norm": 0.5682106637758533, "learning_rate": 3.1339249310162466e-06, "loss": 0.3073, "step": 18608 }, { "epoch": 0.8717384175762403, "grad_norm": 0.6110803100591181, "learning_rate": 3.1337414773700075e-06, "loss": 0.3066, "step": 18609 }, { "epoch": 0.8717852625661685, "grad_norm": 0.537929266247186, "learning_rate": 3.133558020076745e-06, "loss": 0.3196, "step": 18610 }, { "epoch": 0.8718321075560969, "grad_norm": 0.5806440011389615, "learning_rate": 3.1333745591375155e-06, "loss": 0.3187, "step": 18611 }, { "epoch": 0.8718789525460252, "grad_norm": 0.6121074417860745, "learning_rate": 3.133191094553375e-06, "loss": 0.313, "step": 18612 }, { "epoch": 0.8719257975359536, "grad_norm": 0.6259202454854884, "learning_rate": 3.1330076263253782e-06, "loss": 0.3184, "step": 18613 }, { "epoch": 0.8719726425258818, "grad_norm": 0.6295922389668598, "learning_rate": 3.1328241544545823e-06, "loss": 0.3351, "step": 18614 }, { "epoch": 0.8720194875158102, "grad_norm": 0.6538794567157967, "learning_rate": 3.1326406789420415e-06, "loss": 0.3376, "step": 18615 }, { "epoch": 0.8720663325057385, "grad_norm": 0.6268893185770271, "learning_rate": 3.1324571997888133e-06, "loss": 0.3368, "step": 18616 }, { "epoch": 0.8721131774956669, "grad_norm": 0.5995735194601483, "learning_rate": 3.132273716995953e-06, "loss": 0.3042, "step": 18617 }, { "epoch": 0.8721600224855952, "grad_norm": 0.5771661964021609, "learning_rate": 3.132090230564516e-06, "loss": 0.3023, "step": 18618 }, { "epoch": 0.8722068674755235, "grad_norm": 0.5624093580167702, "learning_rate": 3.1319067404955587e-06, "loss": 0.2974, "step": 18619 }, { "epoch": 0.8722537124654518, "grad_norm": 0.5814355754977306, "learning_rate": 3.1317232467901376e-06, "loss": 0.3134, "step": 18620 }, { "epoch": 0.8723005574553802, "grad_norm": 0.6325928530320487, "learning_rate": 3.131539749449307e-06, "loss": 0.3226, "step": 18621 }, { "epoch": 0.8723474024453085, "grad_norm": 0.5709236068312534, "learning_rate": 3.1313562484741245e-06, "loss": 0.2868, "step": 18622 }, { "epoch": 0.8723942474352367, "grad_norm": 0.6558296687996823, "learning_rate": 3.1311727438656453e-06, "loss": 0.3582, "step": 18623 }, { "epoch": 0.8724410924251651, "grad_norm": 0.6279800846152634, "learning_rate": 3.130989235624925e-06, "loss": 0.3267, "step": 18624 }, { "epoch": 0.8724879374150935, "grad_norm": 0.5815301862418272, "learning_rate": 3.130805723753021e-06, "loss": 0.3089, "step": 18625 }, { "epoch": 0.8725347824050218, "grad_norm": 0.5388236050409049, "learning_rate": 3.1306222082509884e-06, "loss": 0.2996, "step": 18626 }, { "epoch": 0.8725816273949502, "grad_norm": 0.643417469557864, "learning_rate": 3.1304386891198833e-06, "loss": 0.32, "step": 18627 }, { "epoch": 0.8726284723848784, "grad_norm": 0.6282391766017725, "learning_rate": 3.130255166360763e-06, "loss": 0.3324, "step": 18628 }, { "epoch": 0.8726753173748067, "grad_norm": 0.534438515824576, "learning_rate": 3.1300716399746813e-06, "loss": 0.289, "step": 18629 }, { "epoch": 0.8727221623647351, "grad_norm": 0.5588243918234302, "learning_rate": 3.129888109962696e-06, "loss": 0.3101, "step": 18630 }, { "epoch": 0.8727690073546635, "grad_norm": 0.585774177094519, "learning_rate": 3.1297045763258637e-06, "loss": 0.3098, "step": 18631 }, { "epoch": 0.8728158523445917, "grad_norm": 0.5438872684788216, "learning_rate": 3.1295210390652385e-06, "loss": 0.3138, "step": 18632 }, { "epoch": 0.87286269733452, "grad_norm": 0.5864397729596046, "learning_rate": 3.1293374981818784e-06, "loss": 0.3269, "step": 18633 }, { "epoch": 0.8729095423244484, "grad_norm": 0.5489730602404415, "learning_rate": 3.12915395367684e-06, "loss": 0.3163, "step": 18634 }, { "epoch": 0.8729563873143767, "grad_norm": 0.5713724296283598, "learning_rate": 3.1289704055511785e-06, "loss": 0.3097, "step": 18635 }, { "epoch": 0.8730032323043051, "grad_norm": 0.5593593302937854, "learning_rate": 3.12878685380595e-06, "loss": 0.311, "step": 18636 }, { "epoch": 0.8730500772942333, "grad_norm": 0.6441284461012948, "learning_rate": 3.128603298442211e-06, "loss": 0.3366, "step": 18637 }, { "epoch": 0.8730969222841617, "grad_norm": 0.6390705662929489, "learning_rate": 3.128419739461018e-06, "loss": 0.3217, "step": 18638 }, { "epoch": 0.87314376727409, "grad_norm": 0.6130994033090447, "learning_rate": 3.128236176863428e-06, "loss": 0.3219, "step": 18639 }, { "epoch": 0.8731906122640184, "grad_norm": 0.6142460206812002, "learning_rate": 3.128052610650496e-06, "loss": 0.3231, "step": 18640 }, { "epoch": 0.8732374572539466, "grad_norm": 0.581378611016878, "learning_rate": 3.1278690408232805e-06, "loss": 0.3153, "step": 18641 }, { "epoch": 0.873284302243875, "grad_norm": 0.5643446226050326, "learning_rate": 3.1276854673828344e-06, "loss": 0.3261, "step": 18642 }, { "epoch": 0.8733311472338033, "grad_norm": 0.524938195185724, "learning_rate": 3.127501890330218e-06, "loss": 0.2966, "step": 18643 }, { "epoch": 0.8733779922237317, "grad_norm": 0.5831349176032928, "learning_rate": 3.127318309666485e-06, "loss": 0.3078, "step": 18644 }, { "epoch": 0.87342483721366, "grad_norm": 0.5579527955843708, "learning_rate": 3.1271347253926927e-06, "loss": 0.313, "step": 18645 }, { "epoch": 0.8734716822035883, "grad_norm": 0.5701122178715988, "learning_rate": 3.1269511375098977e-06, "loss": 0.301, "step": 18646 }, { "epoch": 0.8735185271935166, "grad_norm": 0.5627072233585292, "learning_rate": 3.1267675460191566e-06, "loss": 0.3081, "step": 18647 }, { "epoch": 0.873565372183445, "grad_norm": 0.6327322939639818, "learning_rate": 3.1265839509215264e-06, "loss": 0.3591, "step": 18648 }, { "epoch": 0.8736122171733733, "grad_norm": 0.6195748215257342, "learning_rate": 3.126400352218062e-06, "loss": 0.346, "step": 18649 }, { "epoch": 0.8736590621633016, "grad_norm": 0.5975686626692157, "learning_rate": 3.1262167499098217e-06, "loss": 0.3144, "step": 18650 }, { "epoch": 0.8737059071532299, "grad_norm": 0.587155275441969, "learning_rate": 3.126033143997862e-06, "loss": 0.3251, "step": 18651 }, { "epoch": 0.8737527521431583, "grad_norm": 0.5547362981965882, "learning_rate": 3.1258495344832375e-06, "loss": 0.3168, "step": 18652 }, { "epoch": 0.8737995971330866, "grad_norm": 0.5774671851736427, "learning_rate": 3.125665921367007e-06, "loss": 0.3136, "step": 18653 }, { "epoch": 0.873846442123015, "grad_norm": 0.651692746505016, "learning_rate": 3.125482304650226e-06, "loss": 0.3133, "step": 18654 }, { "epoch": 0.8738932871129432, "grad_norm": 0.6009959384834831, "learning_rate": 3.1252986843339523e-06, "loss": 0.3127, "step": 18655 }, { "epoch": 0.8739401321028716, "grad_norm": 0.5645727666575291, "learning_rate": 3.1251150604192414e-06, "loss": 0.3134, "step": 18656 }, { "epoch": 0.8739869770927999, "grad_norm": 0.6161667880680697, "learning_rate": 3.12493143290715e-06, "loss": 0.3213, "step": 18657 }, { "epoch": 0.8740338220827283, "grad_norm": 0.6028321626084905, "learning_rate": 3.124747801798736e-06, "loss": 0.323, "step": 18658 }, { "epoch": 0.8740806670726565, "grad_norm": 0.5963455016965925, "learning_rate": 3.1245641670950556e-06, "loss": 0.303, "step": 18659 }, { "epoch": 0.8741275120625849, "grad_norm": 0.6019584571559751, "learning_rate": 3.124380528797164e-06, "loss": 0.3205, "step": 18660 }, { "epoch": 0.8741743570525132, "grad_norm": 0.6142349430722519, "learning_rate": 3.1241968869061207e-06, "loss": 0.3112, "step": 18661 }, { "epoch": 0.8742212020424416, "grad_norm": 0.5417113994467209, "learning_rate": 3.1240132414229813e-06, "loss": 0.2802, "step": 18662 }, { "epoch": 0.8742680470323699, "grad_norm": 0.6285256623381859, "learning_rate": 3.1238295923488016e-06, "loss": 0.3229, "step": 18663 }, { "epoch": 0.8743148920222982, "grad_norm": 0.6029210989135573, "learning_rate": 3.1236459396846393e-06, "loss": 0.3208, "step": 18664 }, { "epoch": 0.8743617370122265, "grad_norm": 0.6557212954748216, "learning_rate": 3.1234622834315526e-06, "loss": 0.3372, "step": 18665 }, { "epoch": 0.8744085820021549, "grad_norm": 0.5721461451116174, "learning_rate": 3.123278623590596e-06, "loss": 0.3015, "step": 18666 }, { "epoch": 0.8744554269920832, "grad_norm": 0.6087754843788642, "learning_rate": 3.123094960162828e-06, "loss": 0.3269, "step": 18667 }, { "epoch": 0.8745022719820115, "grad_norm": 0.5806320257101912, "learning_rate": 3.1229112931493056e-06, "loss": 0.3131, "step": 18668 }, { "epoch": 0.8745491169719398, "grad_norm": 0.6366208647687355, "learning_rate": 3.1227276225510845e-06, "loss": 0.3314, "step": 18669 }, { "epoch": 0.8745959619618682, "grad_norm": 0.5413464008706891, "learning_rate": 3.122543948369223e-06, "loss": 0.3217, "step": 18670 }, { "epoch": 0.8746428069517965, "grad_norm": 0.5584117141654205, "learning_rate": 3.1223602706047773e-06, "loss": 0.3157, "step": 18671 }, { "epoch": 0.8746896519417249, "grad_norm": 0.5611119904454699, "learning_rate": 3.1221765892588046e-06, "loss": 0.2901, "step": 18672 }, { "epoch": 0.8747364969316531, "grad_norm": 0.5740630625626846, "learning_rate": 3.1219929043323618e-06, "loss": 0.316, "step": 18673 }, { "epoch": 0.8747833419215815, "grad_norm": 0.6340149594112247, "learning_rate": 3.1218092158265068e-06, "loss": 0.3348, "step": 18674 }, { "epoch": 0.8748301869115098, "grad_norm": 0.5820997329091551, "learning_rate": 3.1216255237422955e-06, "loss": 0.3048, "step": 18675 }, { "epoch": 0.8748770319014382, "grad_norm": 0.5921793294826904, "learning_rate": 3.1214418280807858e-06, "loss": 0.3145, "step": 18676 }, { "epoch": 0.8749238768913664, "grad_norm": 0.5735146716094479, "learning_rate": 3.1212581288430343e-06, "loss": 0.3173, "step": 18677 }, { "epoch": 0.8749707218812948, "grad_norm": 0.5923586263561611, "learning_rate": 3.1210744260300983e-06, "loss": 0.3235, "step": 18678 }, { "epoch": 0.8750175668712231, "grad_norm": 0.5683937139921817, "learning_rate": 3.120890719643036e-06, "loss": 0.3202, "step": 18679 }, { "epoch": 0.8750644118611515, "grad_norm": 0.6120256782463913, "learning_rate": 3.1207070096829033e-06, "loss": 0.3189, "step": 18680 }, { "epoch": 0.8751112568510798, "grad_norm": 0.5710158994248136, "learning_rate": 3.120523296150757e-06, "loss": 0.3001, "step": 18681 }, { "epoch": 0.8751581018410081, "grad_norm": 0.5906629247168139, "learning_rate": 3.120339579047656e-06, "loss": 0.3199, "step": 18682 }, { "epoch": 0.8752049468309364, "grad_norm": 0.6080617331049374, "learning_rate": 3.1201558583746566e-06, "loss": 0.3183, "step": 18683 }, { "epoch": 0.8752517918208648, "grad_norm": 0.5674882091852123, "learning_rate": 3.1199721341328153e-06, "loss": 0.3176, "step": 18684 }, { "epoch": 0.8752986368107931, "grad_norm": 0.563861147498252, "learning_rate": 3.1197884063231903e-06, "loss": 0.316, "step": 18685 }, { "epoch": 0.8753454818007214, "grad_norm": 0.6092000342456988, "learning_rate": 3.1196046749468397e-06, "loss": 0.3267, "step": 18686 }, { "epoch": 0.8753923267906497, "grad_norm": 0.5985188662602341, "learning_rate": 3.119420940004819e-06, "loss": 0.3135, "step": 18687 }, { "epoch": 0.8754391717805781, "grad_norm": 0.5315946732918396, "learning_rate": 3.119237201498187e-06, "loss": 0.3027, "step": 18688 }, { "epoch": 0.8754860167705064, "grad_norm": 0.5990517455543634, "learning_rate": 3.1190534594280004e-06, "loss": 0.3344, "step": 18689 }, { "epoch": 0.8755328617604348, "grad_norm": 0.5896372875876377, "learning_rate": 3.1188697137953174e-06, "loss": 0.3334, "step": 18690 }, { "epoch": 0.875579706750363, "grad_norm": 0.538052979865287, "learning_rate": 3.1186859646011937e-06, "loss": 0.3132, "step": 18691 }, { "epoch": 0.8756265517402914, "grad_norm": 0.6118952968609163, "learning_rate": 3.1185022118466877e-06, "loss": 0.3223, "step": 18692 }, { "epoch": 0.8756733967302197, "grad_norm": 0.6113694053542121, "learning_rate": 3.1183184555328583e-06, "loss": 0.3334, "step": 18693 }, { "epoch": 0.8757202417201481, "grad_norm": 0.6309443417435262, "learning_rate": 3.11813469566076e-06, "loss": 0.3311, "step": 18694 }, { "epoch": 0.8757670867100763, "grad_norm": 0.6047038976551777, "learning_rate": 3.1179509322314525e-06, "loss": 0.3349, "step": 18695 }, { "epoch": 0.8758139317000047, "grad_norm": 0.5348347133014066, "learning_rate": 3.117767165245993e-06, "loss": 0.2875, "step": 18696 }, { "epoch": 0.875860776689933, "grad_norm": 0.5912565344354479, "learning_rate": 3.1175833947054384e-06, "loss": 0.3168, "step": 18697 }, { "epoch": 0.8759076216798614, "grad_norm": 0.6614441976327562, "learning_rate": 3.117399620610847e-06, "loss": 0.3307, "step": 18698 }, { "epoch": 0.8759544666697897, "grad_norm": 0.5409186617189079, "learning_rate": 3.1172158429632756e-06, "loss": 0.3058, "step": 18699 }, { "epoch": 0.876001311659718, "grad_norm": 0.6273144517343228, "learning_rate": 3.1170320617637824e-06, "loss": 0.3155, "step": 18700 }, { "epoch": 0.8760481566496463, "grad_norm": 0.5623987856780727, "learning_rate": 3.1168482770134247e-06, "loss": 0.3167, "step": 18701 }, { "epoch": 0.8760950016395747, "grad_norm": 0.5491909192581057, "learning_rate": 3.11666448871326e-06, "loss": 0.317, "step": 18702 }, { "epoch": 0.876141846629503, "grad_norm": 0.642121526721498, "learning_rate": 3.116480696864346e-06, "loss": 0.3451, "step": 18703 }, { "epoch": 0.8761886916194312, "grad_norm": 0.587228327868739, "learning_rate": 3.116296901467741e-06, "loss": 0.3191, "step": 18704 }, { "epoch": 0.8762355366093596, "grad_norm": 0.5568309200065577, "learning_rate": 3.116113102524502e-06, "loss": 0.2912, "step": 18705 }, { "epoch": 0.876282381599288, "grad_norm": 0.6042395134993732, "learning_rate": 3.1159293000356864e-06, "loss": 0.308, "step": 18706 }, { "epoch": 0.8763292265892163, "grad_norm": 0.601585021070634, "learning_rate": 3.1157454940023536e-06, "loss": 0.2946, "step": 18707 }, { "epoch": 0.8763760715791447, "grad_norm": 0.6075793818296603, "learning_rate": 3.1155616844255597e-06, "loss": 0.3262, "step": 18708 }, { "epoch": 0.8764229165690729, "grad_norm": 0.6011092209316032, "learning_rate": 3.1153778713063627e-06, "loss": 0.321, "step": 18709 }, { "epoch": 0.8764697615590012, "grad_norm": 0.6086021681922245, "learning_rate": 3.1151940546458216e-06, "loss": 0.3194, "step": 18710 }, { "epoch": 0.8765166065489296, "grad_norm": 0.5614870222687198, "learning_rate": 3.115010234444993e-06, "loss": 0.3229, "step": 18711 }, { "epoch": 0.876563451538858, "grad_norm": 0.5457101368283778, "learning_rate": 3.1148264107049344e-06, "loss": 0.3164, "step": 18712 }, { "epoch": 0.8766102965287862, "grad_norm": 0.5787961985700576, "learning_rate": 3.114642583426704e-06, "loss": 0.3217, "step": 18713 }, { "epoch": 0.8766571415187145, "grad_norm": 0.5493069944991642, "learning_rate": 3.1144587526113616e-06, "loss": 0.2872, "step": 18714 }, { "epoch": 0.8767039865086429, "grad_norm": 0.5119966229843826, "learning_rate": 3.114274918259963e-06, "loss": 0.287, "step": 18715 }, { "epoch": 0.8767508314985712, "grad_norm": 0.5508099256620247, "learning_rate": 3.114091080373566e-06, "loss": 0.2971, "step": 18716 }, { "epoch": 0.8767976764884996, "grad_norm": 0.5400349108744306, "learning_rate": 3.1139072389532294e-06, "loss": 0.3207, "step": 18717 }, { "epoch": 0.8768445214784278, "grad_norm": 0.5902965669962125, "learning_rate": 3.1137233940000113e-06, "loss": 0.326, "step": 18718 }, { "epoch": 0.8768913664683562, "grad_norm": 0.5669603791824295, "learning_rate": 3.1135395455149686e-06, "loss": 0.2968, "step": 18719 }, { "epoch": 0.8769382114582845, "grad_norm": 0.6071072420587246, "learning_rate": 3.113355693499161e-06, "loss": 0.3214, "step": 18720 }, { "epoch": 0.8769850564482129, "grad_norm": 0.6089751103922364, "learning_rate": 3.113171837953645e-06, "loss": 0.3211, "step": 18721 }, { "epoch": 0.8770319014381411, "grad_norm": 0.6436706337234067, "learning_rate": 3.1129879788794793e-06, "loss": 0.3342, "step": 18722 }, { "epoch": 0.8770787464280695, "grad_norm": 0.6050569207063177, "learning_rate": 3.1128041162777224e-06, "loss": 0.3238, "step": 18723 }, { "epoch": 0.8771255914179978, "grad_norm": 0.5986101767444105, "learning_rate": 3.1126202501494306e-06, "loss": 0.3251, "step": 18724 }, { "epoch": 0.8771724364079262, "grad_norm": 0.6017828675680321, "learning_rate": 3.112436380495664e-06, "loss": 0.321, "step": 18725 }, { "epoch": 0.8772192813978545, "grad_norm": 0.5444391637697443, "learning_rate": 3.1122525073174803e-06, "loss": 0.3167, "step": 18726 }, { "epoch": 0.8772661263877828, "grad_norm": 0.5952855355038961, "learning_rate": 3.1120686306159363e-06, "loss": 0.3128, "step": 18727 }, { "epoch": 0.8773129713777111, "grad_norm": 0.5752051679285594, "learning_rate": 3.1118847503920917e-06, "loss": 0.3166, "step": 18728 }, { "epoch": 0.8773598163676395, "grad_norm": 0.5928901284537201, "learning_rate": 3.1117008666470046e-06, "loss": 0.3321, "step": 18729 }, { "epoch": 0.8774066613575678, "grad_norm": 0.5739556857034193, "learning_rate": 3.111516979381732e-06, "loss": 0.3188, "step": 18730 }, { "epoch": 0.8774535063474961, "grad_norm": 0.5930056938131806, "learning_rate": 3.1113330885973335e-06, "loss": 0.3106, "step": 18731 }, { "epoch": 0.8775003513374244, "grad_norm": 0.6177776332652145, "learning_rate": 3.1111491942948667e-06, "loss": 0.3341, "step": 18732 }, { "epoch": 0.8775471963273528, "grad_norm": 0.6520967923170893, "learning_rate": 3.110965296475389e-06, "loss": 0.3085, "step": 18733 }, { "epoch": 0.8775940413172811, "grad_norm": 0.5814106160735382, "learning_rate": 3.1107813951399602e-06, "loss": 0.3089, "step": 18734 }, { "epoch": 0.8776408863072095, "grad_norm": 0.6335638603579282, "learning_rate": 3.1105974902896386e-06, "loss": 0.324, "step": 18735 }, { "epoch": 0.8776877312971377, "grad_norm": 0.5846268319362877, "learning_rate": 3.110413581925481e-06, "loss": 0.3045, "step": 18736 }, { "epoch": 0.8777345762870661, "grad_norm": 0.6030959022256175, "learning_rate": 3.110229670048547e-06, "loss": 0.3081, "step": 18737 }, { "epoch": 0.8777814212769944, "grad_norm": 0.6199170206065405, "learning_rate": 3.1100457546598946e-06, "loss": 0.3455, "step": 18738 }, { "epoch": 0.8778282662669228, "grad_norm": 0.5816724707503294, "learning_rate": 3.1098618357605825e-06, "loss": 0.3197, "step": 18739 }, { "epoch": 0.877875111256851, "grad_norm": 0.6141531161402654, "learning_rate": 3.109677913351668e-06, "loss": 0.3056, "step": 18740 }, { "epoch": 0.8779219562467794, "grad_norm": 0.6079790859312102, "learning_rate": 3.109493987434211e-06, "loss": 0.3224, "step": 18741 }, { "epoch": 0.8779688012367077, "grad_norm": 0.6067004598239131, "learning_rate": 3.1093100580092694e-06, "loss": 0.3261, "step": 18742 }, { "epoch": 0.8780156462266361, "grad_norm": 0.5499476924569104, "learning_rate": 3.109126125077901e-06, "loss": 0.3092, "step": 18743 }, { "epoch": 0.8780624912165644, "grad_norm": 0.585154735137867, "learning_rate": 3.108942188641165e-06, "loss": 0.312, "step": 18744 }, { "epoch": 0.8781093362064927, "grad_norm": 0.6076555077137026, "learning_rate": 3.10875824870012e-06, "loss": 0.3238, "step": 18745 }, { "epoch": 0.878156181196421, "grad_norm": 0.5738230818478025, "learning_rate": 3.1085743052558243e-06, "loss": 0.2993, "step": 18746 }, { "epoch": 0.8782030261863494, "grad_norm": 0.5690972643471484, "learning_rate": 3.1083903583093366e-06, "loss": 0.3145, "step": 18747 }, { "epoch": 0.8782498711762777, "grad_norm": 0.601784292333255, "learning_rate": 3.1082064078617148e-06, "loss": 0.2987, "step": 18748 }, { "epoch": 0.878296716166206, "grad_norm": 0.5622491398879812, "learning_rate": 3.1080224539140186e-06, "loss": 0.2975, "step": 18749 }, { "epoch": 0.8783435611561343, "grad_norm": 0.5859473608766357, "learning_rate": 3.107838496467306e-06, "loss": 0.309, "step": 18750 }, { "epoch": 0.8783904061460627, "grad_norm": 0.550648366904725, "learning_rate": 3.1076545355226344e-06, "loss": 0.3085, "step": 18751 }, { "epoch": 0.878437251135991, "grad_norm": 0.5797266090372224, "learning_rate": 3.107470571081065e-06, "loss": 0.3309, "step": 18752 }, { "epoch": 0.8784840961259194, "grad_norm": 0.5775246276710482, "learning_rate": 3.1072866031436548e-06, "loss": 0.3074, "step": 18753 }, { "epoch": 0.8785309411158476, "grad_norm": 0.6102457169270187, "learning_rate": 3.1071026317114626e-06, "loss": 0.3165, "step": 18754 }, { "epoch": 0.878577786105776, "grad_norm": 0.5631952432941437, "learning_rate": 3.106918656785547e-06, "loss": 0.2995, "step": 18755 }, { "epoch": 0.8786246310957043, "grad_norm": 0.5590166593986499, "learning_rate": 3.1067346783669678e-06, "loss": 0.3001, "step": 18756 }, { "epoch": 0.8786714760856327, "grad_norm": 0.6090882134057654, "learning_rate": 3.1065506964567828e-06, "loss": 0.3046, "step": 18757 }, { "epoch": 0.8787183210755609, "grad_norm": 0.5856852177050508, "learning_rate": 3.1063667110560508e-06, "loss": 0.3095, "step": 18758 }, { "epoch": 0.8787651660654893, "grad_norm": 0.6455468385209266, "learning_rate": 3.1061827221658306e-06, "loss": 0.3199, "step": 18759 }, { "epoch": 0.8788120110554176, "grad_norm": 0.5647997861156269, "learning_rate": 3.1059987297871824e-06, "loss": 0.3333, "step": 18760 }, { "epoch": 0.878858856045346, "grad_norm": 0.582858176755859, "learning_rate": 3.1058147339211627e-06, "loss": 0.3113, "step": 18761 }, { "epoch": 0.8789057010352743, "grad_norm": 0.6777371731999368, "learning_rate": 3.105630734568832e-06, "loss": 0.3325, "step": 18762 }, { "epoch": 0.8789525460252026, "grad_norm": 0.5916711108698128, "learning_rate": 3.105446731731248e-06, "loss": 0.3244, "step": 18763 }, { "epoch": 0.8789993910151309, "grad_norm": 0.5795829442247566, "learning_rate": 3.10526272540947e-06, "loss": 0.3264, "step": 18764 }, { "epoch": 0.8790462360050593, "grad_norm": 0.596302182734665, "learning_rate": 3.1050787156045584e-06, "loss": 0.3132, "step": 18765 }, { "epoch": 0.8790930809949876, "grad_norm": 0.5532774949798331, "learning_rate": 3.10489470231757e-06, "loss": 0.3133, "step": 18766 }, { "epoch": 0.8791399259849159, "grad_norm": 0.5645576119647061, "learning_rate": 3.104710685549565e-06, "loss": 0.2988, "step": 18767 }, { "epoch": 0.8791867709748442, "grad_norm": 0.5702776361630105, "learning_rate": 3.104526665301602e-06, "loss": 0.3303, "step": 18768 }, { "epoch": 0.8792336159647726, "grad_norm": 0.6218962899637441, "learning_rate": 3.1043426415747395e-06, "loss": 0.3146, "step": 18769 }, { "epoch": 0.8792804609547009, "grad_norm": 0.5872236788314632, "learning_rate": 3.1041586143700376e-06, "loss": 0.3361, "step": 18770 }, { "epoch": 0.8793273059446293, "grad_norm": 0.6152534992021688, "learning_rate": 3.1039745836885544e-06, "loss": 0.3269, "step": 18771 }, { "epoch": 0.8793741509345575, "grad_norm": 0.5695488577736826, "learning_rate": 3.103790549531349e-06, "loss": 0.3164, "step": 18772 }, { "epoch": 0.8794209959244859, "grad_norm": 0.5701635270752785, "learning_rate": 3.1036065118994806e-06, "loss": 0.3208, "step": 18773 }, { "epoch": 0.8794678409144142, "grad_norm": 0.5808897010175167, "learning_rate": 3.1034224707940097e-06, "loss": 0.3199, "step": 18774 }, { "epoch": 0.8795146859043426, "grad_norm": 0.6098323921294034, "learning_rate": 3.103238426215993e-06, "loss": 0.3022, "step": 18775 }, { "epoch": 0.8795615308942708, "grad_norm": 0.5170630400595915, "learning_rate": 3.1030543781664906e-06, "loss": 0.2991, "step": 18776 }, { "epoch": 0.8796083758841992, "grad_norm": 0.5539257160772358, "learning_rate": 3.102870326646563e-06, "loss": 0.3186, "step": 18777 }, { "epoch": 0.8796552208741275, "grad_norm": 0.5612442394538598, "learning_rate": 3.1026862716572677e-06, "loss": 0.3046, "step": 18778 }, { "epoch": 0.8797020658640559, "grad_norm": 0.6109845329316675, "learning_rate": 3.1025022131996637e-06, "loss": 0.3195, "step": 18779 }, { "epoch": 0.8797489108539842, "grad_norm": 0.6089756930487856, "learning_rate": 3.102318151274812e-06, "loss": 0.328, "step": 18780 }, { "epoch": 0.8797957558439125, "grad_norm": 0.5654798371342621, "learning_rate": 3.1021340858837702e-06, "loss": 0.3115, "step": 18781 }, { "epoch": 0.8798426008338408, "grad_norm": 0.5978427269365935, "learning_rate": 3.101950017027597e-06, "loss": 0.3315, "step": 18782 }, { "epoch": 0.8798894458237692, "grad_norm": 0.6072920180209392, "learning_rate": 3.101765944707354e-06, "loss": 0.34, "step": 18783 }, { "epoch": 0.8799362908136975, "grad_norm": 0.5550521278878247, "learning_rate": 3.1015818689240994e-06, "loss": 0.3088, "step": 18784 }, { "epoch": 0.8799831358036257, "grad_norm": 0.5363203866283055, "learning_rate": 3.1013977896788914e-06, "loss": 0.2998, "step": 18785 }, { "epoch": 0.8800299807935541, "grad_norm": 0.5573293475756528, "learning_rate": 3.101213706972791e-06, "loss": 0.307, "step": 18786 }, { "epoch": 0.8800768257834825, "grad_norm": 0.6506181107675543, "learning_rate": 3.101029620806857e-06, "loss": 0.3364, "step": 18787 }, { "epoch": 0.8801236707734108, "grad_norm": 0.5805963804994365, "learning_rate": 3.100845531182148e-06, "loss": 0.3083, "step": 18788 }, { "epoch": 0.8801705157633392, "grad_norm": 0.6204916937028635, "learning_rate": 3.100661438099724e-06, "loss": 0.3055, "step": 18789 }, { "epoch": 0.8802173607532674, "grad_norm": 0.6033941048711015, "learning_rate": 3.100477341560645e-06, "loss": 0.316, "step": 18790 }, { "epoch": 0.8802642057431957, "grad_norm": 0.6138823109082426, "learning_rate": 3.1002932415659693e-06, "loss": 0.3082, "step": 18791 }, { "epoch": 0.8803110507331241, "grad_norm": 0.6261704718074702, "learning_rate": 3.100109138116757e-06, "loss": 0.3101, "step": 18792 }, { "epoch": 0.8803578957230525, "grad_norm": 0.5953570096217856, "learning_rate": 3.0999250312140677e-06, "loss": 0.3372, "step": 18793 }, { "epoch": 0.8804047407129807, "grad_norm": 0.5969868043356297, "learning_rate": 3.09974092085896e-06, "loss": 0.319, "step": 18794 }, { "epoch": 0.880451585702909, "grad_norm": 0.5668169298919508, "learning_rate": 3.0995568070524945e-06, "loss": 0.3235, "step": 18795 }, { "epoch": 0.8804984306928374, "grad_norm": 0.5562849414907266, "learning_rate": 3.0993726897957305e-06, "loss": 0.3104, "step": 18796 }, { "epoch": 0.8805452756827657, "grad_norm": 0.604811286536307, "learning_rate": 3.0991885690897265e-06, "loss": 0.3106, "step": 18797 }, { "epoch": 0.8805921206726941, "grad_norm": 0.5885948712605509, "learning_rate": 3.099004444935544e-06, "loss": 0.325, "step": 18798 }, { "epoch": 0.8806389656626223, "grad_norm": 0.7807980663356942, "learning_rate": 3.0988203173342407e-06, "loss": 0.3313, "step": 18799 }, { "epoch": 0.8806858106525507, "grad_norm": 0.6231615191151483, "learning_rate": 3.098636186286877e-06, "loss": 0.3046, "step": 18800 }, { "epoch": 0.880732655642479, "grad_norm": 0.5866370081439197, "learning_rate": 3.0984520517945125e-06, "loss": 0.2968, "step": 18801 }, { "epoch": 0.8807795006324074, "grad_norm": 0.635527882745114, "learning_rate": 3.0982679138582074e-06, "loss": 0.295, "step": 18802 }, { "epoch": 0.8808263456223356, "grad_norm": 0.6132496201828868, "learning_rate": 3.0980837724790205e-06, "loss": 0.3228, "step": 18803 }, { "epoch": 0.880873190612264, "grad_norm": 0.5963557535231794, "learning_rate": 3.097899627658011e-06, "loss": 0.3086, "step": 18804 }, { "epoch": 0.8809200356021923, "grad_norm": 0.6316886580885944, "learning_rate": 3.097715479396241e-06, "loss": 0.341, "step": 18805 }, { "epoch": 0.8809668805921207, "grad_norm": 0.6189935981719801, "learning_rate": 3.0975313276947676e-06, "loss": 0.3321, "step": 18806 }, { "epoch": 0.881013725582049, "grad_norm": 0.5900653598997104, "learning_rate": 3.097347172554651e-06, "loss": 0.3024, "step": 18807 }, { "epoch": 0.8810605705719773, "grad_norm": 0.6436990371547884, "learning_rate": 3.097163013976953e-06, "loss": 0.3126, "step": 18808 }, { "epoch": 0.8811074155619056, "grad_norm": 0.5794917142661343, "learning_rate": 3.0969788519627315e-06, "loss": 0.3102, "step": 18809 }, { "epoch": 0.881154260551834, "grad_norm": 0.5854100064010249, "learning_rate": 3.096794686513046e-06, "loss": 0.3122, "step": 18810 }, { "epoch": 0.8812011055417623, "grad_norm": 0.6158422777327867, "learning_rate": 3.096610517628958e-06, "loss": 0.308, "step": 18811 }, { "epoch": 0.8812479505316906, "grad_norm": 0.6053805930595437, "learning_rate": 3.0964263453115263e-06, "loss": 0.3102, "step": 18812 }, { "epoch": 0.8812947955216189, "grad_norm": 0.5665820120444429, "learning_rate": 3.0962421695618108e-06, "loss": 0.3224, "step": 18813 }, { "epoch": 0.8813416405115473, "grad_norm": 0.5673749337201413, "learning_rate": 3.0960579903808715e-06, "loss": 0.3044, "step": 18814 }, { "epoch": 0.8813884855014756, "grad_norm": 0.6964297878188311, "learning_rate": 3.0958738077697677e-06, "loss": 0.3091, "step": 18815 }, { "epoch": 0.881435330491404, "grad_norm": 0.6461712350736406, "learning_rate": 3.0956896217295603e-06, "loss": 0.3378, "step": 18816 }, { "epoch": 0.8814821754813322, "grad_norm": 0.609096431174646, "learning_rate": 3.095505432261309e-06, "loss": 0.3231, "step": 18817 }, { "epoch": 0.8815290204712606, "grad_norm": 0.6085294073591101, "learning_rate": 3.095321239366073e-06, "loss": 0.3282, "step": 18818 }, { "epoch": 0.8815758654611889, "grad_norm": 0.5717042296922016, "learning_rate": 3.0951370430449135e-06, "loss": 0.3247, "step": 18819 }, { "epoch": 0.8816227104511173, "grad_norm": 0.5614277807479244, "learning_rate": 3.09495284329889e-06, "loss": 0.2942, "step": 18820 }, { "epoch": 0.8816695554410455, "grad_norm": 0.6164055679798648, "learning_rate": 3.094768640129062e-06, "loss": 0.3088, "step": 18821 }, { "epoch": 0.8817164004309739, "grad_norm": 0.6205826381741623, "learning_rate": 3.0945844335364905e-06, "loss": 0.3187, "step": 18822 }, { "epoch": 0.8817632454209022, "grad_norm": 0.5852130314957824, "learning_rate": 3.094400223522235e-06, "loss": 0.3113, "step": 18823 }, { "epoch": 0.8818100904108306, "grad_norm": 0.5740727307074912, "learning_rate": 3.0942160100873547e-06, "loss": 0.3056, "step": 18824 }, { "epoch": 0.8818569354007589, "grad_norm": 0.5733814131535593, "learning_rate": 3.094031793232911e-06, "loss": 0.3262, "step": 18825 }, { "epoch": 0.8819037803906872, "grad_norm": 0.5762394599066333, "learning_rate": 3.093847572959964e-06, "loss": 0.298, "step": 18826 }, { "epoch": 0.8819506253806155, "grad_norm": 0.5941449486933527, "learning_rate": 3.0936633492695734e-06, "loss": 0.3177, "step": 18827 }, { "epoch": 0.8819974703705439, "grad_norm": 0.5953259055786149, "learning_rate": 3.0934791221627993e-06, "loss": 0.3358, "step": 18828 }, { "epoch": 0.8820443153604722, "grad_norm": 0.5076714524380318, "learning_rate": 3.0932948916407024e-06, "loss": 0.298, "step": 18829 }, { "epoch": 0.8820911603504005, "grad_norm": 0.5776209185102135, "learning_rate": 3.0931106577043423e-06, "loss": 0.3146, "step": 18830 }, { "epoch": 0.8821380053403288, "grad_norm": 0.5789114077184612, "learning_rate": 3.0929264203547787e-06, "loss": 0.3206, "step": 18831 }, { "epoch": 0.8821848503302572, "grad_norm": 0.5457186762207387, "learning_rate": 3.0927421795930733e-06, "loss": 0.2963, "step": 18832 }, { "epoch": 0.8822316953201855, "grad_norm": 0.563818799950678, "learning_rate": 3.092557935420286e-06, "loss": 0.3273, "step": 18833 }, { "epoch": 0.8822785403101139, "grad_norm": 0.6087955705647563, "learning_rate": 3.092373687837476e-06, "loss": 0.3274, "step": 18834 }, { "epoch": 0.8823253853000421, "grad_norm": 0.6176427148763018, "learning_rate": 3.0921894368457044e-06, "loss": 0.3359, "step": 18835 }, { "epoch": 0.8823722302899705, "grad_norm": 0.5441233318192105, "learning_rate": 3.0920051824460316e-06, "loss": 0.3055, "step": 18836 }, { "epoch": 0.8824190752798988, "grad_norm": 0.581291115885075, "learning_rate": 3.091820924639518e-06, "loss": 0.3247, "step": 18837 }, { "epoch": 0.8824659202698272, "grad_norm": 0.5950841540010294, "learning_rate": 3.091636663427224e-06, "loss": 0.33, "step": 18838 }, { "epoch": 0.8825127652597554, "grad_norm": 0.5535633132387912, "learning_rate": 3.0914523988102085e-06, "loss": 0.3013, "step": 18839 }, { "epoch": 0.8825596102496838, "grad_norm": 0.5956738973558182, "learning_rate": 3.0912681307895344e-06, "loss": 0.3111, "step": 18840 }, { "epoch": 0.8826064552396121, "grad_norm": 0.5860209490689856, "learning_rate": 3.09108385936626e-06, "loss": 0.3194, "step": 18841 }, { "epoch": 0.8826533002295405, "grad_norm": 0.5704056544179975, "learning_rate": 3.0908995845414464e-06, "loss": 0.3127, "step": 18842 }, { "epoch": 0.8827001452194688, "grad_norm": 0.5966815205402225, "learning_rate": 3.0907153063161544e-06, "loss": 0.3239, "step": 18843 }, { "epoch": 0.8827469902093971, "grad_norm": 0.5768440893985541, "learning_rate": 3.090531024691445e-06, "loss": 0.2953, "step": 18844 }, { "epoch": 0.8827938351993254, "grad_norm": 0.6125499690792687, "learning_rate": 3.0903467396683773e-06, "loss": 0.3113, "step": 18845 }, { "epoch": 0.8828406801892538, "grad_norm": 0.6844034056154644, "learning_rate": 3.090162451248012e-06, "loss": 0.3068, "step": 18846 }, { "epoch": 0.8828875251791821, "grad_norm": 0.6684060987089552, "learning_rate": 3.089978159431411e-06, "loss": 0.3285, "step": 18847 }, { "epoch": 0.8829343701691104, "grad_norm": 0.5973694053673214, "learning_rate": 3.089793864219634e-06, "loss": 0.313, "step": 18848 }, { "epoch": 0.8829812151590387, "grad_norm": 0.6274234291378511, "learning_rate": 3.089609565613741e-06, "loss": 0.2986, "step": 18849 }, { "epoch": 0.8830280601489671, "grad_norm": 0.5760768706116347, "learning_rate": 3.0894252636147937e-06, "loss": 0.3108, "step": 18850 }, { "epoch": 0.8830749051388954, "grad_norm": 0.593437378345461, "learning_rate": 3.089240958223852e-06, "loss": 0.3094, "step": 18851 }, { "epoch": 0.8831217501288238, "grad_norm": 0.5405158668734763, "learning_rate": 3.0890566494419767e-06, "loss": 0.307, "step": 18852 }, { "epoch": 0.883168595118752, "grad_norm": 0.5817658626695194, "learning_rate": 3.0888723372702278e-06, "loss": 0.2993, "step": 18853 }, { "epoch": 0.8832154401086804, "grad_norm": 0.5450148772306842, "learning_rate": 3.0886880217096677e-06, "loss": 0.3012, "step": 18854 }, { "epoch": 0.8832622850986087, "grad_norm": 0.6260578705309267, "learning_rate": 3.088503702761355e-06, "loss": 0.3254, "step": 18855 }, { "epoch": 0.8833091300885371, "grad_norm": 0.5445700292256274, "learning_rate": 3.0883193804263516e-06, "loss": 0.2929, "step": 18856 }, { "epoch": 0.8833559750784653, "grad_norm": 0.5580700009647619, "learning_rate": 3.0881350547057182e-06, "loss": 0.3085, "step": 18857 }, { "epoch": 0.8834028200683937, "grad_norm": 0.5920511811704946, "learning_rate": 3.087950725600516e-06, "loss": 0.3203, "step": 18858 }, { "epoch": 0.883449665058322, "grad_norm": 0.570174412092438, "learning_rate": 3.087766393111804e-06, "loss": 0.3214, "step": 18859 }, { "epoch": 0.8834965100482504, "grad_norm": 0.6061107542746494, "learning_rate": 3.087582057240645e-06, "loss": 0.3216, "step": 18860 }, { "epoch": 0.8835433550381787, "grad_norm": 0.6235820418871597, "learning_rate": 3.0873977179880986e-06, "loss": 0.3206, "step": 18861 }, { "epoch": 0.883590200028107, "grad_norm": 0.5748146208030255, "learning_rate": 3.087213375355226e-06, "loss": 0.3357, "step": 18862 }, { "epoch": 0.8836370450180353, "grad_norm": 0.5509612932521705, "learning_rate": 3.087029029343088e-06, "loss": 0.3048, "step": 18863 }, { "epoch": 0.8836838900079637, "grad_norm": 0.5914978973510471, "learning_rate": 3.086844679952745e-06, "loss": 0.3214, "step": 18864 }, { "epoch": 0.883730734997892, "grad_norm": 0.6224662890880531, "learning_rate": 3.0866603271852594e-06, "loss": 0.3378, "step": 18865 }, { "epoch": 0.8837775799878202, "grad_norm": 0.6069057308249082, "learning_rate": 3.0864759710416907e-06, "loss": 0.3364, "step": 18866 }, { "epoch": 0.8838244249777486, "grad_norm": 0.5507768581965661, "learning_rate": 3.0862916115231e-06, "loss": 0.2951, "step": 18867 }, { "epoch": 0.883871269967677, "grad_norm": 0.6012075830080202, "learning_rate": 3.0861072486305487e-06, "loss": 0.3274, "step": 18868 }, { "epoch": 0.8839181149576053, "grad_norm": 0.6045843288729784, "learning_rate": 3.085922882365097e-06, "loss": 0.3244, "step": 18869 }, { "epoch": 0.8839649599475337, "grad_norm": 0.5886301421812993, "learning_rate": 3.0857385127278066e-06, "loss": 0.2965, "step": 18870 }, { "epoch": 0.8840118049374619, "grad_norm": 0.6207420493710104, "learning_rate": 3.0855541397197386e-06, "loss": 0.3537, "step": 18871 }, { "epoch": 0.8840586499273902, "grad_norm": 0.6123888030745652, "learning_rate": 3.085369763341954e-06, "loss": 0.3209, "step": 18872 }, { "epoch": 0.8841054949173186, "grad_norm": 0.6358508965609405, "learning_rate": 3.0851853835955126e-06, "loss": 0.3227, "step": 18873 }, { "epoch": 0.884152339907247, "grad_norm": 0.5550813008043757, "learning_rate": 3.0850010004814763e-06, "loss": 0.2899, "step": 18874 }, { "epoch": 0.8841991848971752, "grad_norm": 0.5820793660529576, "learning_rate": 3.084816614000907e-06, "loss": 0.3183, "step": 18875 }, { "epoch": 0.8842460298871035, "grad_norm": 0.5536097889873827, "learning_rate": 3.084632224154865e-06, "loss": 0.2993, "step": 18876 }, { "epoch": 0.8842928748770319, "grad_norm": 0.6711270584429629, "learning_rate": 3.084447830944411e-06, "loss": 0.334, "step": 18877 }, { "epoch": 0.8843397198669602, "grad_norm": 0.5591496499106735, "learning_rate": 3.084263434370607e-06, "loss": 0.31, "step": 18878 }, { "epoch": 0.8843865648568886, "grad_norm": 0.6032765003591835, "learning_rate": 3.084079034434514e-06, "loss": 0.34, "step": 18879 }, { "epoch": 0.8844334098468168, "grad_norm": 0.5765888114208473, "learning_rate": 3.0838946311371926e-06, "loss": 0.3179, "step": 18880 }, { "epoch": 0.8844802548367452, "grad_norm": 0.6381879408672106, "learning_rate": 3.0837102244797044e-06, "loss": 0.3206, "step": 18881 }, { "epoch": 0.8845270998266735, "grad_norm": 0.5776648747549874, "learning_rate": 3.0835258144631106e-06, "loss": 0.3091, "step": 18882 }, { "epoch": 0.8845739448166019, "grad_norm": 0.5274122996395695, "learning_rate": 3.083341401088472e-06, "loss": 0.2951, "step": 18883 }, { "epoch": 0.8846207898065301, "grad_norm": 0.5392894099858813, "learning_rate": 3.083156984356851e-06, "loss": 0.288, "step": 18884 }, { "epoch": 0.8846676347964585, "grad_norm": 0.649037606202141, "learning_rate": 3.0829725642693077e-06, "loss": 0.3459, "step": 18885 }, { "epoch": 0.8847144797863868, "grad_norm": 0.6175542352311523, "learning_rate": 3.0827881408269038e-06, "loss": 0.3133, "step": 18886 }, { "epoch": 0.8847613247763152, "grad_norm": 0.6113400428109279, "learning_rate": 3.082603714030701e-06, "loss": 0.3207, "step": 18887 }, { "epoch": 0.8848081697662435, "grad_norm": 0.6040322074804892, "learning_rate": 3.08241928388176e-06, "loss": 0.3163, "step": 18888 }, { "epoch": 0.8848550147561718, "grad_norm": 0.6055048437746163, "learning_rate": 3.0822348503811427e-06, "loss": 0.3235, "step": 18889 }, { "epoch": 0.8849018597461001, "grad_norm": 0.5853403376842232, "learning_rate": 3.0820504135299103e-06, "loss": 0.34, "step": 18890 }, { "epoch": 0.8849487047360285, "grad_norm": 0.5973707917138484, "learning_rate": 3.081865973329123e-06, "loss": 0.2973, "step": 18891 }, { "epoch": 0.8849955497259568, "grad_norm": 0.5328904827379255, "learning_rate": 3.081681529779844e-06, "loss": 0.2925, "step": 18892 }, { "epoch": 0.8850423947158851, "grad_norm": 0.5882201713658864, "learning_rate": 3.081497082883134e-06, "loss": 0.3313, "step": 18893 }, { "epoch": 0.8850892397058134, "grad_norm": 0.5917563111253772, "learning_rate": 3.0813126326400542e-06, "loss": 0.3232, "step": 18894 }, { "epoch": 0.8851360846957418, "grad_norm": 0.5451550784037488, "learning_rate": 3.0811281790516663e-06, "loss": 0.2979, "step": 18895 }, { "epoch": 0.8851829296856701, "grad_norm": 0.5802664626682229, "learning_rate": 3.0809437221190323e-06, "loss": 0.3057, "step": 18896 }, { "epoch": 0.8852297746755985, "grad_norm": 0.5795825821023268, "learning_rate": 3.0807592618432125e-06, "loss": 0.333, "step": 18897 }, { "epoch": 0.8852766196655267, "grad_norm": 0.5639997827930009, "learning_rate": 3.0805747982252693e-06, "loss": 0.3094, "step": 18898 }, { "epoch": 0.8853234646554551, "grad_norm": 0.5762530956511741, "learning_rate": 3.0803903312662643e-06, "loss": 0.3159, "step": 18899 }, { "epoch": 0.8853703096453834, "grad_norm": 0.6262533067465017, "learning_rate": 3.0802058609672594e-06, "loss": 0.3148, "step": 18900 }, { "epoch": 0.8854171546353118, "grad_norm": 0.6062764496714789, "learning_rate": 3.0800213873293145e-06, "loss": 0.2967, "step": 18901 }, { "epoch": 0.88546399962524, "grad_norm": 0.6061602564327354, "learning_rate": 3.079836910353493e-06, "loss": 0.3255, "step": 18902 }, { "epoch": 0.8855108446151684, "grad_norm": 0.56649917716994, "learning_rate": 3.0796524300408553e-06, "loss": 0.2942, "step": 18903 }, { "epoch": 0.8855576896050967, "grad_norm": 0.5929307505244067, "learning_rate": 3.0794679463924637e-06, "loss": 0.3191, "step": 18904 }, { "epoch": 0.8856045345950251, "grad_norm": 0.6523872543521254, "learning_rate": 3.07928345940938e-06, "loss": 0.3134, "step": 18905 }, { "epoch": 0.8856513795849534, "grad_norm": 0.5949998232065444, "learning_rate": 3.0790989690926652e-06, "loss": 0.3161, "step": 18906 }, { "epoch": 0.8856982245748817, "grad_norm": 0.6024400869064025, "learning_rate": 3.078914475443382e-06, "loss": 0.3348, "step": 18907 }, { "epoch": 0.88574506956481, "grad_norm": 0.5395346703719871, "learning_rate": 3.0787299784625913e-06, "loss": 0.3138, "step": 18908 }, { "epoch": 0.8857919145547384, "grad_norm": 0.5521285632124019, "learning_rate": 3.078545478151354e-06, "loss": 0.3267, "step": 18909 }, { "epoch": 0.8858387595446667, "grad_norm": 0.5562205280015793, "learning_rate": 3.078360974510734e-06, "loss": 0.3182, "step": 18910 }, { "epoch": 0.885885604534595, "grad_norm": 0.5492254707741593, "learning_rate": 3.078176467541792e-06, "loss": 0.3033, "step": 18911 }, { "epoch": 0.8859324495245233, "grad_norm": 0.5913845475694282, "learning_rate": 3.077991957245589e-06, "loss": 0.3203, "step": 18912 }, { "epoch": 0.8859792945144517, "grad_norm": 0.6399736538048383, "learning_rate": 3.0778074436231875e-06, "loss": 0.3228, "step": 18913 }, { "epoch": 0.88602613950438, "grad_norm": 0.6093129587156049, "learning_rate": 3.0776229266756506e-06, "loss": 0.3035, "step": 18914 }, { "epoch": 0.8860729844943084, "grad_norm": 0.5792909683658726, "learning_rate": 3.077438406404038e-06, "loss": 0.3256, "step": 18915 }, { "epoch": 0.8861198294842366, "grad_norm": 0.5849720111882842, "learning_rate": 3.0772538828094124e-06, "loss": 0.3172, "step": 18916 }, { "epoch": 0.886166674474165, "grad_norm": 0.5900740945342381, "learning_rate": 3.0770693558928367e-06, "loss": 0.3425, "step": 18917 }, { "epoch": 0.8862135194640933, "grad_norm": 0.5987912357683188, "learning_rate": 3.0768848256553715e-06, "loss": 0.3042, "step": 18918 }, { "epoch": 0.8862603644540217, "grad_norm": 0.5742823589347508, "learning_rate": 3.0767002920980783e-06, "loss": 0.3349, "step": 18919 }, { "epoch": 0.8863072094439499, "grad_norm": 0.6093299956874663, "learning_rate": 3.0765157552220202e-06, "loss": 0.324, "step": 18920 }, { "epoch": 0.8863540544338783, "grad_norm": 0.5847766569396791, "learning_rate": 3.07633121502826e-06, "loss": 0.3222, "step": 18921 }, { "epoch": 0.8864008994238066, "grad_norm": 0.5765036813911806, "learning_rate": 3.0761466715178574e-06, "loss": 0.3038, "step": 18922 }, { "epoch": 0.886447744413735, "grad_norm": 0.5544201535366574, "learning_rate": 3.0759621246918753e-06, "loss": 0.3015, "step": 18923 }, { "epoch": 0.8864945894036633, "grad_norm": 0.5418226468420996, "learning_rate": 3.075777574551377e-06, "loss": 0.2831, "step": 18924 }, { "epoch": 0.8865414343935916, "grad_norm": 0.5862561893135138, "learning_rate": 3.0755930210974226e-06, "loss": 0.3314, "step": 18925 }, { "epoch": 0.8865882793835199, "grad_norm": 0.5570823040707893, "learning_rate": 3.075408464331075e-06, "loss": 0.3229, "step": 18926 }, { "epoch": 0.8866351243734483, "grad_norm": 0.5731073381806349, "learning_rate": 3.0752239042533964e-06, "loss": 0.2923, "step": 18927 }, { "epoch": 0.8866819693633766, "grad_norm": 0.5367330253743372, "learning_rate": 3.0750393408654493e-06, "loss": 0.2974, "step": 18928 }, { "epoch": 0.8867288143533049, "grad_norm": 0.5800271360901333, "learning_rate": 3.074854774168295e-06, "loss": 0.3042, "step": 18929 }, { "epoch": 0.8867756593432332, "grad_norm": 0.5439895526823542, "learning_rate": 3.074670204162996e-06, "loss": 0.3058, "step": 18930 }, { "epoch": 0.8868225043331616, "grad_norm": 0.5684478247896937, "learning_rate": 3.0744856308506143e-06, "loss": 0.2993, "step": 18931 }, { "epoch": 0.8868693493230899, "grad_norm": 0.5817423650917651, "learning_rate": 3.074301054232212e-06, "loss": 0.303, "step": 18932 }, { "epoch": 0.8869161943130183, "grad_norm": 0.6463386244683819, "learning_rate": 3.0741164743088525e-06, "loss": 0.322, "step": 18933 }, { "epoch": 0.8869630393029465, "grad_norm": 0.6312741402813475, "learning_rate": 3.0739318910815956e-06, "loss": 0.3311, "step": 18934 }, { "epoch": 0.8870098842928749, "grad_norm": 0.6130827296758669, "learning_rate": 3.073747304551506e-06, "loss": 0.3397, "step": 18935 }, { "epoch": 0.8870567292828032, "grad_norm": 0.5930364951440614, "learning_rate": 3.0735627147196444e-06, "loss": 0.3284, "step": 18936 }, { "epoch": 0.8871035742727316, "grad_norm": 0.5560698413103459, "learning_rate": 3.0733781215870733e-06, "loss": 0.3235, "step": 18937 }, { "epoch": 0.8871504192626598, "grad_norm": 0.609619220070655, "learning_rate": 3.0731935251548556e-06, "loss": 0.3195, "step": 18938 }, { "epoch": 0.8871972642525882, "grad_norm": 0.6224965673035364, "learning_rate": 3.0730089254240536e-06, "loss": 0.3149, "step": 18939 }, { "epoch": 0.8872441092425165, "grad_norm": 0.648287402193717, "learning_rate": 3.0728243223957283e-06, "loss": 0.3127, "step": 18940 }, { "epoch": 0.8872909542324449, "grad_norm": 0.5645086543347584, "learning_rate": 3.0726397160709438e-06, "loss": 0.3128, "step": 18941 }, { "epoch": 0.8873377992223732, "grad_norm": 0.651834612328456, "learning_rate": 3.072455106450761e-06, "loss": 0.3136, "step": 18942 }, { "epoch": 0.8873846442123015, "grad_norm": 0.5533974695468397, "learning_rate": 3.0722704935362437e-06, "loss": 0.3138, "step": 18943 }, { "epoch": 0.8874314892022298, "grad_norm": 0.58793465148873, "learning_rate": 3.0720858773284524e-06, "loss": 0.3283, "step": 18944 }, { "epoch": 0.8874783341921582, "grad_norm": 0.5959776799552015, "learning_rate": 3.0719012578284524e-06, "loss": 0.3142, "step": 18945 }, { "epoch": 0.8875251791820865, "grad_norm": 0.6417169811711759, "learning_rate": 3.071716635037303e-06, "loss": 0.3253, "step": 18946 }, { "epoch": 0.8875720241720148, "grad_norm": 0.5836681975398693, "learning_rate": 3.071532008956068e-06, "loss": 0.315, "step": 18947 }, { "epoch": 0.8876188691619431, "grad_norm": 0.5830651225669936, "learning_rate": 3.0713473795858107e-06, "loss": 0.3066, "step": 18948 }, { "epoch": 0.8876657141518715, "grad_norm": 0.5377818263911828, "learning_rate": 3.071162746927593e-06, "loss": 0.298, "step": 18949 }, { "epoch": 0.8877125591417998, "grad_norm": 0.6204829366575451, "learning_rate": 3.070978110982476e-06, "loss": 0.3306, "step": 18950 }, { "epoch": 0.8877594041317282, "grad_norm": 0.5788272423259933, "learning_rate": 3.070793471751525e-06, "loss": 0.3124, "step": 18951 }, { "epoch": 0.8878062491216564, "grad_norm": 0.5851837266063293, "learning_rate": 3.0706088292358e-06, "loss": 0.339, "step": 18952 }, { "epoch": 0.8878530941115848, "grad_norm": 0.5883036045684493, "learning_rate": 3.0704241834363647e-06, "loss": 0.3006, "step": 18953 }, { "epoch": 0.8878999391015131, "grad_norm": 0.6476422929892609, "learning_rate": 3.0702395343542816e-06, "loss": 0.3279, "step": 18954 }, { "epoch": 0.8879467840914415, "grad_norm": 0.5990003028813415, "learning_rate": 3.0700548819906133e-06, "loss": 0.3028, "step": 18955 }, { "epoch": 0.8879936290813697, "grad_norm": 0.6116321553753484, "learning_rate": 3.069870226346423e-06, "loss": 0.3027, "step": 18956 }, { "epoch": 0.888040474071298, "grad_norm": 0.6143757869077238, "learning_rate": 3.0696855674227726e-06, "loss": 0.3141, "step": 18957 }, { "epoch": 0.8880873190612264, "grad_norm": 0.5672871791051409, "learning_rate": 3.0695009052207247e-06, "loss": 0.3234, "step": 18958 }, { "epoch": 0.8881341640511548, "grad_norm": 0.5641485966923374, "learning_rate": 3.0693162397413424e-06, "loss": 0.297, "step": 18959 }, { "epoch": 0.8881810090410831, "grad_norm": 0.5215408396160357, "learning_rate": 3.0691315709856884e-06, "loss": 0.2954, "step": 18960 }, { "epoch": 0.8882278540310113, "grad_norm": 0.589038839156227, "learning_rate": 3.0689468989548243e-06, "loss": 0.3177, "step": 18961 }, { "epoch": 0.8882746990209397, "grad_norm": 0.6489157409416606, "learning_rate": 3.068762223649815e-06, "loss": 0.3343, "step": 18962 }, { "epoch": 0.888321544010868, "grad_norm": 0.5927741292274376, "learning_rate": 3.0685775450717216e-06, "loss": 0.3402, "step": 18963 }, { "epoch": 0.8883683890007964, "grad_norm": 0.5936540123637122, "learning_rate": 3.068392863221607e-06, "loss": 0.3239, "step": 18964 }, { "epoch": 0.8884152339907246, "grad_norm": 0.6038970380452001, "learning_rate": 3.0682081781005343e-06, "loss": 0.323, "step": 18965 }, { "epoch": 0.888462078980653, "grad_norm": 0.6098041582829404, "learning_rate": 3.068023489709567e-06, "loss": 0.3296, "step": 18966 }, { "epoch": 0.8885089239705813, "grad_norm": 0.6154754501880947, "learning_rate": 3.067838798049767e-06, "loss": 0.3411, "step": 18967 }, { "epoch": 0.8885557689605097, "grad_norm": 0.5627255879396862, "learning_rate": 3.0676541031221967e-06, "loss": 0.3023, "step": 18968 }, { "epoch": 0.888602613950438, "grad_norm": 0.6067466338097044, "learning_rate": 3.0674694049279206e-06, "loss": 0.3126, "step": 18969 }, { "epoch": 0.8886494589403663, "grad_norm": 0.5899927176046335, "learning_rate": 3.067284703468001e-06, "loss": 0.3371, "step": 18970 }, { "epoch": 0.8886963039302946, "grad_norm": 0.5846289226797685, "learning_rate": 3.0670999987434997e-06, "loss": 0.3312, "step": 18971 }, { "epoch": 0.888743148920223, "grad_norm": 0.6438165958614129, "learning_rate": 3.0669152907554805e-06, "loss": 0.3208, "step": 18972 }, { "epoch": 0.8887899939101513, "grad_norm": 0.5868364048354259, "learning_rate": 3.0667305795050073e-06, "loss": 0.3181, "step": 18973 }, { "epoch": 0.8888368389000796, "grad_norm": 0.5902990139852149, "learning_rate": 3.0665458649931408e-06, "loss": 0.32, "step": 18974 }, { "epoch": 0.8888836838900079, "grad_norm": 0.5854086319268494, "learning_rate": 3.0663611472209458e-06, "loss": 0.3125, "step": 18975 }, { "epoch": 0.8889305288799363, "grad_norm": 0.650338222941491, "learning_rate": 3.0661764261894844e-06, "loss": 0.32, "step": 18976 }, { "epoch": 0.8889773738698646, "grad_norm": 0.5811714893235762, "learning_rate": 3.0659917018998203e-06, "loss": 0.3038, "step": 18977 }, { "epoch": 0.889024218859793, "grad_norm": 0.5784927372017263, "learning_rate": 3.0658069743530163e-06, "loss": 0.3201, "step": 18978 }, { "epoch": 0.8890710638497212, "grad_norm": 0.5836998487141659, "learning_rate": 3.065622243550135e-06, "loss": 0.3297, "step": 18979 }, { "epoch": 0.8891179088396496, "grad_norm": 0.5874557211672659, "learning_rate": 3.06543750949224e-06, "loss": 0.3075, "step": 18980 }, { "epoch": 0.8891647538295779, "grad_norm": 0.6258446654586566, "learning_rate": 3.065252772180395e-06, "loss": 0.3153, "step": 18981 }, { "epoch": 0.8892115988195063, "grad_norm": 0.614580664275337, "learning_rate": 3.065068031615661e-06, "loss": 0.3324, "step": 18982 }, { "epoch": 0.8892584438094345, "grad_norm": 0.5626952080389915, "learning_rate": 3.064883287799103e-06, "loss": 0.2995, "step": 18983 }, { "epoch": 0.8893052887993629, "grad_norm": 0.6081785940504052, "learning_rate": 3.0646985407317846e-06, "loss": 0.3104, "step": 18984 }, { "epoch": 0.8893521337892912, "grad_norm": 0.6179307264082183, "learning_rate": 3.064513790414767e-06, "loss": 0.321, "step": 18985 }, { "epoch": 0.8893989787792196, "grad_norm": 0.5648504098859104, "learning_rate": 3.064329036849114e-06, "loss": 0.3236, "step": 18986 }, { "epoch": 0.8894458237691479, "grad_norm": 0.5844963793978096, "learning_rate": 3.0641442800358906e-06, "loss": 0.3212, "step": 18987 }, { "epoch": 0.8894926687590762, "grad_norm": 0.6516732265237878, "learning_rate": 3.063959519976158e-06, "loss": 0.2889, "step": 18988 }, { "epoch": 0.8895395137490045, "grad_norm": 0.6427899269811748, "learning_rate": 3.06377475667098e-06, "loss": 0.339, "step": 18989 }, { "epoch": 0.8895863587389329, "grad_norm": 0.6202381585874165, "learning_rate": 3.06358999012142e-06, "loss": 0.3203, "step": 18990 }, { "epoch": 0.8896332037288612, "grad_norm": 0.5729640967783913, "learning_rate": 3.063405220328542e-06, "loss": 0.3087, "step": 18991 }, { "epoch": 0.8896800487187895, "grad_norm": 0.5992459683431203, "learning_rate": 3.0632204472934075e-06, "loss": 0.3348, "step": 18992 }, { "epoch": 0.8897268937087178, "grad_norm": 0.5851328555849165, "learning_rate": 3.0630356710170805e-06, "loss": 0.3152, "step": 18993 }, { "epoch": 0.8897737386986462, "grad_norm": 0.5669238483368676, "learning_rate": 3.0628508915006267e-06, "loss": 0.319, "step": 18994 }, { "epoch": 0.8898205836885745, "grad_norm": 0.591041815544659, "learning_rate": 3.062666108745106e-06, "loss": 0.2899, "step": 18995 }, { "epoch": 0.8898674286785029, "grad_norm": 0.5705920948181608, "learning_rate": 3.062481322751583e-06, "loss": 0.32, "step": 18996 }, { "epoch": 0.8899142736684311, "grad_norm": 0.6087941910660329, "learning_rate": 3.0622965335211217e-06, "loss": 0.3211, "step": 18997 }, { "epoch": 0.8899611186583595, "grad_norm": 0.6205777630459093, "learning_rate": 3.062111741054786e-06, "loss": 0.3242, "step": 18998 }, { "epoch": 0.8900079636482878, "grad_norm": 0.6001680176774166, "learning_rate": 3.0619269453536376e-06, "loss": 0.3269, "step": 18999 }, { "epoch": 0.8900548086382162, "grad_norm": 0.5984915931838406, "learning_rate": 3.0617421464187414e-06, "loss": 0.3174, "step": 19000 }, { "epoch": 0.8901016536281444, "grad_norm": 0.6060191196136034, "learning_rate": 3.0615573442511604e-06, "loss": 0.3109, "step": 19001 }, { "epoch": 0.8901484986180728, "grad_norm": 0.5532341624590864, "learning_rate": 3.0613725388519573e-06, "loss": 0.2987, "step": 19002 }, { "epoch": 0.8901953436080011, "grad_norm": 0.5807438340766251, "learning_rate": 3.0611877302221973e-06, "loss": 0.3243, "step": 19003 }, { "epoch": 0.8902421885979295, "grad_norm": 0.6123425705153631, "learning_rate": 3.061002918362942e-06, "loss": 0.3209, "step": 19004 }, { "epoch": 0.8902890335878578, "grad_norm": 0.6158052091178411, "learning_rate": 3.0608181032752566e-06, "loss": 0.3227, "step": 19005 }, { "epoch": 0.8903358785777861, "grad_norm": 0.5799983920130952, "learning_rate": 3.0606332849602038e-06, "loss": 0.2967, "step": 19006 }, { "epoch": 0.8903827235677144, "grad_norm": 0.5692130673530722, "learning_rate": 3.060448463418847e-06, "loss": 0.3152, "step": 19007 }, { "epoch": 0.8904295685576428, "grad_norm": 0.623071080090104, "learning_rate": 3.0602636386522507e-06, "loss": 0.3326, "step": 19008 }, { "epoch": 0.8904764135475711, "grad_norm": 0.6093338610561704, "learning_rate": 3.0600788106614774e-06, "loss": 0.3312, "step": 19009 }, { "epoch": 0.8905232585374994, "grad_norm": 0.612082019030023, "learning_rate": 3.0598939794475917e-06, "loss": 0.3006, "step": 19010 }, { "epoch": 0.8905701035274277, "grad_norm": 0.5705436963602925, "learning_rate": 3.0597091450116566e-06, "loss": 0.3191, "step": 19011 }, { "epoch": 0.8906169485173561, "grad_norm": 0.6387942744440902, "learning_rate": 3.059524307354737e-06, "loss": 0.3287, "step": 19012 }, { "epoch": 0.8906637935072844, "grad_norm": 0.5570558705040465, "learning_rate": 3.059339466477894e-06, "loss": 0.2961, "step": 19013 }, { "epoch": 0.8907106384972128, "grad_norm": 0.5994386000686251, "learning_rate": 3.0591546223821938e-06, "loss": 0.337, "step": 19014 }, { "epoch": 0.890757483487141, "grad_norm": 0.6033833421015493, "learning_rate": 3.0589697750687003e-06, "loss": 0.3183, "step": 19015 }, { "epoch": 0.8908043284770694, "grad_norm": 0.5530327169425721, "learning_rate": 3.0587849245384747e-06, "loss": 0.3256, "step": 19016 }, { "epoch": 0.8908511734669977, "grad_norm": 0.6101547785770812, "learning_rate": 3.058600070792583e-06, "loss": 0.3184, "step": 19017 }, { "epoch": 0.8908980184569261, "grad_norm": 0.6100483496896575, "learning_rate": 3.0584152138320877e-06, "loss": 0.2961, "step": 19018 }, { "epoch": 0.8909448634468543, "grad_norm": 0.6237485592831308, "learning_rate": 3.0582303536580545e-06, "loss": 0.3323, "step": 19019 }, { "epoch": 0.8909917084367827, "grad_norm": 0.5874241721934271, "learning_rate": 3.0580454902715446e-06, "loss": 0.3004, "step": 19020 }, { "epoch": 0.891038553426711, "grad_norm": 0.6347117075682267, "learning_rate": 3.0578606236736237e-06, "loss": 0.3285, "step": 19021 }, { "epoch": 0.8910853984166394, "grad_norm": 0.5897303293256515, "learning_rate": 3.057675753865355e-06, "loss": 0.3247, "step": 19022 }, { "epoch": 0.8911322434065677, "grad_norm": 0.5986534443996209, "learning_rate": 3.057490880847803e-06, "loss": 0.3252, "step": 19023 }, { "epoch": 0.891179088396496, "grad_norm": 0.5746910079184111, "learning_rate": 3.0573060046220306e-06, "loss": 0.316, "step": 19024 }, { "epoch": 0.8912259333864243, "grad_norm": 0.6690742443969361, "learning_rate": 3.057121125189102e-06, "loss": 0.3343, "step": 19025 }, { "epoch": 0.8912727783763527, "grad_norm": 0.57795787707093, "learning_rate": 3.0569362425500813e-06, "loss": 0.3151, "step": 19026 }, { "epoch": 0.891319623366281, "grad_norm": 0.5970781984436039, "learning_rate": 3.056751356706033e-06, "loss": 0.3113, "step": 19027 }, { "epoch": 0.8913664683562093, "grad_norm": 0.6568686615131029, "learning_rate": 3.05656646765802e-06, "loss": 0.3246, "step": 19028 }, { "epoch": 0.8914133133461376, "grad_norm": 0.5624358357490115, "learning_rate": 3.056381575407108e-06, "loss": 0.2918, "step": 19029 }, { "epoch": 0.891460158336066, "grad_norm": 0.572663411895746, "learning_rate": 3.056196679954359e-06, "loss": 0.3154, "step": 19030 }, { "epoch": 0.8915070033259943, "grad_norm": 0.5216205460460891, "learning_rate": 3.0560117813008376e-06, "loss": 0.3008, "step": 19031 }, { "epoch": 0.8915538483159227, "grad_norm": 0.5494254132698586, "learning_rate": 3.0558268794476093e-06, "loss": 0.3136, "step": 19032 }, { "epoch": 0.8916006933058509, "grad_norm": 0.6196387488687061, "learning_rate": 3.0556419743957368e-06, "loss": 0.3135, "step": 19033 }, { "epoch": 0.8916475382957793, "grad_norm": 0.5449471836740861, "learning_rate": 3.0554570661462836e-06, "loss": 0.2964, "step": 19034 }, { "epoch": 0.8916943832857076, "grad_norm": 0.552928256045457, "learning_rate": 3.055272154700314e-06, "loss": 0.3203, "step": 19035 }, { "epoch": 0.891741228275636, "grad_norm": 0.6039674431547251, "learning_rate": 3.0550872400588943e-06, "loss": 0.339, "step": 19036 }, { "epoch": 0.8917880732655642, "grad_norm": 0.6008668273067682, "learning_rate": 3.054902322223086e-06, "loss": 0.3159, "step": 19037 }, { "epoch": 0.8918349182554925, "grad_norm": 0.5881056376849805, "learning_rate": 3.0547174011939545e-06, "loss": 0.3211, "step": 19038 }, { "epoch": 0.8918817632454209, "grad_norm": 0.5520649857605616, "learning_rate": 3.0545324769725643e-06, "loss": 0.2975, "step": 19039 }, { "epoch": 0.8919286082353493, "grad_norm": 0.6089018674002824, "learning_rate": 3.054347549559979e-06, "loss": 0.3336, "step": 19040 }, { "epoch": 0.8919754532252776, "grad_norm": 0.5898686167830621, "learning_rate": 3.054162618957262e-06, "loss": 0.3335, "step": 19041 }, { "epoch": 0.8920222982152058, "grad_norm": 0.5564771352875411, "learning_rate": 3.0539776851654795e-06, "loss": 0.305, "step": 19042 }, { "epoch": 0.8920691432051342, "grad_norm": 0.584701231938981, "learning_rate": 3.0537927481856944e-06, "loss": 0.3001, "step": 19043 }, { "epoch": 0.8921159881950625, "grad_norm": 0.5708777885961956, "learning_rate": 3.053607808018971e-06, "loss": 0.3295, "step": 19044 }, { "epoch": 0.8921628331849909, "grad_norm": 0.5870313874207185, "learning_rate": 3.0534228646663733e-06, "loss": 0.3267, "step": 19045 }, { "epoch": 0.8922096781749191, "grad_norm": 0.6162233938886059, "learning_rate": 3.0532379181289666e-06, "loss": 0.3462, "step": 19046 }, { "epoch": 0.8922565231648475, "grad_norm": 0.55140544182993, "learning_rate": 3.0530529684078147e-06, "loss": 0.3081, "step": 19047 }, { "epoch": 0.8923033681547758, "grad_norm": 0.6151235038237006, "learning_rate": 3.0528680155039826e-06, "loss": 0.3224, "step": 19048 }, { "epoch": 0.8923502131447042, "grad_norm": 0.6092379459833114, "learning_rate": 3.052683059418533e-06, "loss": 0.3285, "step": 19049 }, { "epoch": 0.8923970581346325, "grad_norm": 0.5950018723696162, "learning_rate": 3.052498100152532e-06, "loss": 0.3269, "step": 19050 }, { "epoch": 0.8924439031245608, "grad_norm": 0.617405063468353, "learning_rate": 3.052313137707043e-06, "loss": 0.3403, "step": 19051 }, { "epoch": 0.8924907481144891, "grad_norm": 0.6072693069365065, "learning_rate": 3.052128172083131e-06, "loss": 0.3368, "step": 19052 }, { "epoch": 0.8925375931044175, "grad_norm": 0.6503335939446606, "learning_rate": 3.051943203281859e-06, "loss": 0.3363, "step": 19053 }, { "epoch": 0.8925844380943458, "grad_norm": 0.5601267404231993, "learning_rate": 3.0517582313042936e-06, "loss": 0.3023, "step": 19054 }, { "epoch": 0.8926312830842741, "grad_norm": 0.5207627220741529, "learning_rate": 3.0515732561514987e-06, "loss": 0.2945, "step": 19055 }, { "epoch": 0.8926781280742024, "grad_norm": 0.5580424359036308, "learning_rate": 3.0513882778245373e-06, "loss": 0.3053, "step": 19056 }, { "epoch": 0.8927249730641308, "grad_norm": 0.5864050190253951, "learning_rate": 3.0512032963244757e-06, "loss": 0.3176, "step": 19057 }, { "epoch": 0.8927718180540591, "grad_norm": 0.580327421513939, "learning_rate": 3.0510183116523777e-06, "loss": 0.3086, "step": 19058 }, { "epoch": 0.8928186630439875, "grad_norm": 0.561388741412369, "learning_rate": 3.0508333238093073e-06, "loss": 0.3071, "step": 19059 }, { "epoch": 0.8928655080339157, "grad_norm": 0.5772429799000683, "learning_rate": 3.05064833279633e-06, "loss": 0.2952, "step": 19060 }, { "epoch": 0.8929123530238441, "grad_norm": 0.6526961362047842, "learning_rate": 3.0504633386145097e-06, "loss": 0.3253, "step": 19061 }, { "epoch": 0.8929591980137724, "grad_norm": 0.6696997546746782, "learning_rate": 3.050278341264911e-06, "loss": 0.3264, "step": 19062 }, { "epoch": 0.8930060430037008, "grad_norm": 0.5638210868172813, "learning_rate": 3.050093340748599e-06, "loss": 0.325, "step": 19063 }, { "epoch": 0.893052887993629, "grad_norm": 0.630839978928046, "learning_rate": 3.049908337066639e-06, "loss": 0.3202, "step": 19064 }, { "epoch": 0.8930997329835574, "grad_norm": 0.5645830976603103, "learning_rate": 3.0497233302200934e-06, "loss": 0.3146, "step": 19065 }, { "epoch": 0.8931465779734857, "grad_norm": 0.6235066602152746, "learning_rate": 3.049538320210028e-06, "loss": 0.3102, "step": 19066 }, { "epoch": 0.8931934229634141, "grad_norm": 0.6055185663376956, "learning_rate": 3.049353307037509e-06, "loss": 0.3319, "step": 19067 }, { "epoch": 0.8932402679533424, "grad_norm": 0.5924980183014805, "learning_rate": 3.0491682907035993e-06, "loss": 0.33, "step": 19068 }, { "epoch": 0.8932871129432707, "grad_norm": 0.5289442069488787, "learning_rate": 3.048983271209363e-06, "loss": 0.3044, "step": 19069 }, { "epoch": 0.893333957933199, "grad_norm": 0.5855794739779823, "learning_rate": 3.0487982485558674e-06, "loss": 0.3257, "step": 19070 }, { "epoch": 0.8933808029231274, "grad_norm": 0.5461384523970644, "learning_rate": 3.0486132227441754e-06, "loss": 0.2987, "step": 19071 }, { "epoch": 0.8934276479130557, "grad_norm": 0.5984509945425553, "learning_rate": 3.048428193775352e-06, "loss": 0.3078, "step": 19072 }, { "epoch": 0.893474492902984, "grad_norm": 0.5805216995326471, "learning_rate": 3.0482431616504625e-06, "loss": 0.324, "step": 19073 }, { "epoch": 0.8935213378929123, "grad_norm": 0.5702994431844959, "learning_rate": 3.048058126370571e-06, "loss": 0.3044, "step": 19074 }, { "epoch": 0.8935681828828407, "grad_norm": 0.6144104201204804, "learning_rate": 3.047873087936743e-06, "loss": 0.3163, "step": 19075 }, { "epoch": 0.893615027872769, "grad_norm": 0.5744594230558756, "learning_rate": 3.047688046350043e-06, "loss": 0.3119, "step": 19076 }, { "epoch": 0.8936618728626974, "grad_norm": 0.575395447580257, "learning_rate": 3.047503001611536e-06, "loss": 0.3185, "step": 19077 }, { "epoch": 0.8937087178526256, "grad_norm": 0.5919917864924213, "learning_rate": 3.047317953722287e-06, "loss": 0.3146, "step": 19078 }, { "epoch": 0.893755562842554, "grad_norm": 0.6024023516580296, "learning_rate": 3.0471329026833605e-06, "loss": 0.3116, "step": 19079 }, { "epoch": 0.8938024078324823, "grad_norm": 0.5679117377582555, "learning_rate": 3.0469478484958217e-06, "loss": 0.3103, "step": 19080 }, { "epoch": 0.8938492528224107, "grad_norm": 0.601525616868127, "learning_rate": 3.0467627911607355e-06, "loss": 0.3138, "step": 19081 }, { "epoch": 0.8938960978123389, "grad_norm": 1.3650552691392939, "learning_rate": 3.046577730679167e-06, "loss": 0.3065, "step": 19082 }, { "epoch": 0.8939429428022673, "grad_norm": 0.5550225539250085, "learning_rate": 3.0463926670521806e-06, "loss": 0.2959, "step": 19083 }, { "epoch": 0.8939897877921956, "grad_norm": 0.597783383122267, "learning_rate": 3.0462076002808416e-06, "loss": 0.3076, "step": 19084 }, { "epoch": 0.894036632782124, "grad_norm": 0.5795029354059796, "learning_rate": 3.0460225303662167e-06, "loss": 0.3129, "step": 19085 }, { "epoch": 0.8940834777720523, "grad_norm": 0.5872064115528337, "learning_rate": 3.0458374573093673e-06, "loss": 0.3181, "step": 19086 }, { "epoch": 0.8941303227619806, "grad_norm": 0.6151105699121475, "learning_rate": 3.0456523811113614e-06, "loss": 0.3259, "step": 19087 }, { "epoch": 0.8941771677519089, "grad_norm": 0.6216968134013314, "learning_rate": 3.0454673017732638e-06, "loss": 0.3279, "step": 19088 }, { "epoch": 0.8942240127418373, "grad_norm": 0.5948559582445078, "learning_rate": 3.0452822192961382e-06, "loss": 0.334, "step": 19089 }, { "epoch": 0.8942708577317656, "grad_norm": 0.5576706947866359, "learning_rate": 3.045097133681051e-06, "loss": 0.3185, "step": 19090 }, { "epoch": 0.8943177027216939, "grad_norm": 0.5841858842273536, "learning_rate": 3.0449120449290663e-06, "loss": 0.3221, "step": 19091 }, { "epoch": 0.8943645477116222, "grad_norm": 0.5843086406714323, "learning_rate": 3.0447269530412506e-06, "loss": 0.3351, "step": 19092 }, { "epoch": 0.8944113927015506, "grad_norm": 0.5553747711977369, "learning_rate": 3.044541858018667e-06, "loss": 0.3198, "step": 19093 }, { "epoch": 0.8944582376914789, "grad_norm": 0.5690525367792828, "learning_rate": 3.0443567598623824e-06, "loss": 0.3152, "step": 19094 }, { "epoch": 0.8945050826814073, "grad_norm": 0.5785416563939147, "learning_rate": 3.0441716585734615e-06, "loss": 0.3212, "step": 19095 }, { "epoch": 0.8945519276713355, "grad_norm": 0.6242556575193051, "learning_rate": 3.0439865541529696e-06, "loss": 0.3132, "step": 19096 }, { "epoch": 0.8945987726612639, "grad_norm": 0.5554930329594961, "learning_rate": 3.0438014466019714e-06, "loss": 0.3122, "step": 19097 }, { "epoch": 0.8946456176511922, "grad_norm": 0.6391137473399994, "learning_rate": 3.0436163359215333e-06, "loss": 0.3538, "step": 19098 }, { "epoch": 0.8946924626411206, "grad_norm": 0.6222070151759437, "learning_rate": 3.0434312221127192e-06, "loss": 0.3434, "step": 19099 }, { "epoch": 0.8947393076310488, "grad_norm": 0.5471966804127215, "learning_rate": 3.0432461051765956e-06, "loss": 0.3024, "step": 19100 }, { "epoch": 0.8947861526209772, "grad_norm": 0.6019240283507394, "learning_rate": 3.043060985114226e-06, "loss": 0.3103, "step": 19101 }, { "epoch": 0.8948329976109055, "grad_norm": 0.6744522917677314, "learning_rate": 3.042875861926678e-06, "loss": 0.336, "step": 19102 }, { "epoch": 0.8948798426008339, "grad_norm": 0.5808248475983562, "learning_rate": 3.042690735615016e-06, "loss": 0.3051, "step": 19103 }, { "epoch": 0.8949266875907622, "grad_norm": 0.5410580447414682, "learning_rate": 3.0425056061803044e-06, "loss": 0.2935, "step": 19104 }, { "epoch": 0.8949735325806905, "grad_norm": 0.5689108316407046, "learning_rate": 3.0423204736236095e-06, "loss": 0.3138, "step": 19105 }, { "epoch": 0.8950203775706188, "grad_norm": 0.6091826790832967, "learning_rate": 3.042135337945997e-06, "loss": 0.33, "step": 19106 }, { "epoch": 0.8950672225605472, "grad_norm": 0.5878157954654465, "learning_rate": 3.0419501991485316e-06, "loss": 0.3219, "step": 19107 }, { "epoch": 0.8951140675504755, "grad_norm": 0.6071407531080983, "learning_rate": 3.041765057232279e-06, "loss": 0.3233, "step": 19108 }, { "epoch": 0.8951609125404038, "grad_norm": 0.5922546427539692, "learning_rate": 3.0415799121983046e-06, "loss": 0.312, "step": 19109 }, { "epoch": 0.8952077575303321, "grad_norm": 0.5673481769954617, "learning_rate": 3.0413947640476747e-06, "loss": 0.3223, "step": 19110 }, { "epoch": 0.8952546025202605, "grad_norm": 0.6543820498588702, "learning_rate": 3.0412096127814535e-06, "loss": 0.3453, "step": 19111 }, { "epoch": 0.8953014475101888, "grad_norm": 0.5982951134693559, "learning_rate": 3.041024458400707e-06, "loss": 0.3147, "step": 19112 }, { "epoch": 0.8953482925001172, "grad_norm": 0.6047861838565142, "learning_rate": 3.0408393009065006e-06, "loss": 0.3361, "step": 19113 }, { "epoch": 0.8953951374900454, "grad_norm": 0.6517121374346477, "learning_rate": 3.0406541402998997e-06, "loss": 0.3429, "step": 19114 }, { "epoch": 0.8954419824799738, "grad_norm": 0.5899538381323578, "learning_rate": 3.0404689765819706e-06, "loss": 0.3107, "step": 19115 }, { "epoch": 0.8954888274699021, "grad_norm": 0.5369401050605103, "learning_rate": 3.040283809753778e-06, "loss": 0.3218, "step": 19116 }, { "epoch": 0.8955356724598305, "grad_norm": 0.5716128603438377, "learning_rate": 3.0400986398163883e-06, "loss": 0.3332, "step": 19117 }, { "epoch": 0.8955825174497587, "grad_norm": 0.5487653731227624, "learning_rate": 3.0399134667708667e-06, "loss": 0.3056, "step": 19118 }, { "epoch": 0.895629362439687, "grad_norm": 0.5473860129045661, "learning_rate": 3.0397282906182784e-06, "loss": 0.2963, "step": 19119 }, { "epoch": 0.8956762074296154, "grad_norm": 0.6331844834716621, "learning_rate": 3.0395431113596897e-06, "loss": 0.3235, "step": 19120 }, { "epoch": 0.8957230524195438, "grad_norm": 0.5719545678951279, "learning_rate": 3.039357928996166e-06, "loss": 0.3196, "step": 19121 }, { "epoch": 0.8957698974094721, "grad_norm": 0.5455433192257042, "learning_rate": 3.0391727435287726e-06, "loss": 0.3159, "step": 19122 }, { "epoch": 0.8958167423994003, "grad_norm": 0.6264111895791729, "learning_rate": 3.0389875549585757e-06, "loss": 0.3316, "step": 19123 }, { "epoch": 0.8958635873893287, "grad_norm": 0.5983325489784109, "learning_rate": 3.0388023632866413e-06, "loss": 0.3367, "step": 19124 }, { "epoch": 0.895910432379257, "grad_norm": 0.6178385316453374, "learning_rate": 3.0386171685140347e-06, "loss": 0.3274, "step": 19125 }, { "epoch": 0.8959572773691854, "grad_norm": 0.576584526576553, "learning_rate": 3.0384319706418207e-06, "loss": 0.3249, "step": 19126 }, { "epoch": 0.8960041223591136, "grad_norm": 0.6661298866758655, "learning_rate": 3.038246769671067e-06, "loss": 0.3323, "step": 19127 }, { "epoch": 0.896050967349042, "grad_norm": 0.5725282861878197, "learning_rate": 3.038061565602839e-06, "loss": 0.327, "step": 19128 }, { "epoch": 0.8960978123389703, "grad_norm": 0.6041882529816075, "learning_rate": 3.0378763584382004e-06, "loss": 0.3135, "step": 19129 }, { "epoch": 0.8961446573288987, "grad_norm": 0.5876688274089376, "learning_rate": 3.0376911481782194e-06, "loss": 0.3254, "step": 19130 }, { "epoch": 0.896191502318827, "grad_norm": 0.5970851801274955, "learning_rate": 3.0375059348239617e-06, "loss": 0.3203, "step": 19131 }, { "epoch": 0.8962383473087553, "grad_norm": 0.6103236929630321, "learning_rate": 3.0373207183764906e-06, "loss": 0.3124, "step": 19132 }, { "epoch": 0.8962851922986836, "grad_norm": 0.5690179973652317, "learning_rate": 3.0371354988368752e-06, "loss": 0.2915, "step": 19133 }, { "epoch": 0.896332037288612, "grad_norm": 0.570085115009912, "learning_rate": 3.0369502762061805e-06, "loss": 0.3136, "step": 19134 }, { "epoch": 0.8963788822785403, "grad_norm": 0.5608077442202652, "learning_rate": 3.03676505048547e-06, "loss": 0.3122, "step": 19135 }, { "epoch": 0.8964257272684686, "grad_norm": 0.6304160823886967, "learning_rate": 3.0365798216758123e-06, "loss": 0.3257, "step": 19136 }, { "epoch": 0.8964725722583969, "grad_norm": 0.6424340584220247, "learning_rate": 3.0363945897782734e-06, "loss": 0.3344, "step": 19137 }, { "epoch": 0.8965194172483253, "grad_norm": 0.5581345773194736, "learning_rate": 3.036209354793918e-06, "loss": 0.3112, "step": 19138 }, { "epoch": 0.8965662622382536, "grad_norm": 0.5646058234287958, "learning_rate": 3.0360241167238126e-06, "loss": 0.311, "step": 19139 }, { "epoch": 0.896613107228182, "grad_norm": 0.5812435127176767, "learning_rate": 3.0358388755690227e-06, "loss": 0.3056, "step": 19140 }, { "epoch": 0.8966599522181102, "grad_norm": 0.5960680372568457, "learning_rate": 3.0356536313306156e-06, "loss": 0.3094, "step": 19141 }, { "epoch": 0.8967067972080386, "grad_norm": 0.5788993440062384, "learning_rate": 3.0354683840096554e-06, "loss": 0.2914, "step": 19142 }, { "epoch": 0.8967536421979669, "grad_norm": 0.5573432933488418, "learning_rate": 3.0352831336072105e-06, "loss": 0.2934, "step": 19143 }, { "epoch": 0.8968004871878953, "grad_norm": 0.61762569012705, "learning_rate": 3.0350978801243443e-06, "loss": 0.3279, "step": 19144 }, { "epoch": 0.8968473321778235, "grad_norm": 0.5835792327766129, "learning_rate": 3.034912623562125e-06, "loss": 0.2997, "step": 19145 }, { "epoch": 0.8968941771677519, "grad_norm": 0.5654414772508722, "learning_rate": 3.0347273639216186e-06, "loss": 0.3113, "step": 19146 }, { "epoch": 0.8969410221576802, "grad_norm": 0.5675689775921278, "learning_rate": 3.0345421012038894e-06, "loss": 0.3111, "step": 19147 }, { "epoch": 0.8969878671476086, "grad_norm": 0.5967502032784534, "learning_rate": 3.034356835410006e-06, "loss": 0.3104, "step": 19148 }, { "epoch": 0.8970347121375369, "grad_norm": 0.582103885214716, "learning_rate": 3.0341715665410326e-06, "loss": 0.3108, "step": 19149 }, { "epoch": 0.8970815571274652, "grad_norm": 0.5800895445051071, "learning_rate": 3.033986294598036e-06, "loss": 0.313, "step": 19150 }, { "epoch": 0.8971284021173935, "grad_norm": 0.5887743375281911, "learning_rate": 3.033801019582083e-06, "loss": 0.3065, "step": 19151 }, { "epoch": 0.8971752471073219, "grad_norm": 0.5817942209180603, "learning_rate": 3.033615741494239e-06, "loss": 0.3242, "step": 19152 }, { "epoch": 0.8972220920972502, "grad_norm": 0.5393886468319938, "learning_rate": 3.0334304603355703e-06, "loss": 0.2796, "step": 19153 }, { "epoch": 0.8972689370871785, "grad_norm": 0.568454213086298, "learning_rate": 3.033245176107143e-06, "loss": 0.3235, "step": 19154 }, { "epoch": 0.8973157820771068, "grad_norm": 0.5785078439673316, "learning_rate": 3.033059888810025e-06, "loss": 0.3002, "step": 19155 }, { "epoch": 0.8973626270670352, "grad_norm": 0.5975801981599558, "learning_rate": 3.03287459844528e-06, "loss": 0.3063, "step": 19156 }, { "epoch": 0.8974094720569635, "grad_norm": 0.5516537654448461, "learning_rate": 3.032689305013976e-06, "loss": 0.3208, "step": 19157 }, { "epoch": 0.8974563170468919, "grad_norm": 0.6198984094293545, "learning_rate": 3.0325040085171796e-06, "loss": 0.3326, "step": 19158 }, { "epoch": 0.8975031620368201, "grad_norm": 0.5994007326502023, "learning_rate": 3.0323187089559566e-06, "loss": 0.3175, "step": 19159 }, { "epoch": 0.8975500070267485, "grad_norm": 0.5976965240395282, "learning_rate": 3.0321334063313716e-06, "loss": 0.3263, "step": 19160 }, { "epoch": 0.8975968520166768, "grad_norm": 0.6500108709953482, "learning_rate": 3.031948100644494e-06, "loss": 0.3207, "step": 19161 }, { "epoch": 0.8976436970066052, "grad_norm": 0.7007004815216035, "learning_rate": 3.031762791896389e-06, "loss": 0.3232, "step": 19162 }, { "epoch": 0.8976905419965334, "grad_norm": 0.593815035954561, "learning_rate": 3.0315774800881215e-06, "loss": 0.3138, "step": 19163 }, { "epoch": 0.8977373869864618, "grad_norm": 0.5586638278051498, "learning_rate": 3.0313921652207607e-06, "loss": 0.3172, "step": 19164 }, { "epoch": 0.8977842319763901, "grad_norm": 0.6020477596294659, "learning_rate": 3.0312068472953704e-06, "loss": 0.3348, "step": 19165 }, { "epoch": 0.8978310769663185, "grad_norm": 0.561875776077522, "learning_rate": 3.0310215263130183e-06, "loss": 0.312, "step": 19166 }, { "epoch": 0.8978779219562468, "grad_norm": 0.5584122092251664, "learning_rate": 3.0308362022747717e-06, "loss": 0.3122, "step": 19167 }, { "epoch": 0.8979247669461751, "grad_norm": 0.6453899635950102, "learning_rate": 3.0306508751816948e-06, "loss": 0.3124, "step": 19168 }, { "epoch": 0.8979716119361034, "grad_norm": 0.6105778354153487, "learning_rate": 3.030465545034857e-06, "loss": 0.2923, "step": 19169 }, { "epoch": 0.8980184569260318, "grad_norm": 0.5615532488847819, "learning_rate": 3.0302802118353226e-06, "loss": 0.3059, "step": 19170 }, { "epoch": 0.8980653019159601, "grad_norm": 0.5444266398011367, "learning_rate": 3.0300948755841585e-06, "loss": 0.3118, "step": 19171 }, { "epoch": 0.8981121469058884, "grad_norm": 0.5181721105654762, "learning_rate": 3.029909536282432e-06, "loss": 0.3027, "step": 19172 }, { "epoch": 0.8981589918958167, "grad_norm": 0.6274376444776699, "learning_rate": 3.0297241939312094e-06, "loss": 0.3106, "step": 19173 }, { "epoch": 0.8982058368857451, "grad_norm": 0.6051980468128628, "learning_rate": 3.029538848531557e-06, "loss": 0.3464, "step": 19174 }, { "epoch": 0.8982526818756734, "grad_norm": 0.6414886954602923, "learning_rate": 3.0293535000845407e-06, "loss": 0.3214, "step": 19175 }, { "epoch": 0.8982995268656018, "grad_norm": 0.5937351224758813, "learning_rate": 3.02916814859123e-06, "loss": 0.3059, "step": 19176 }, { "epoch": 0.89834637185553, "grad_norm": 0.594541405103265, "learning_rate": 3.028982794052688e-06, "loss": 0.3202, "step": 19177 }, { "epoch": 0.8983932168454584, "grad_norm": 0.6189943151042224, "learning_rate": 3.028797436469984e-06, "loss": 0.3257, "step": 19178 }, { "epoch": 0.8984400618353867, "grad_norm": 0.5946363224971098, "learning_rate": 3.0286120758441835e-06, "loss": 0.2978, "step": 19179 }, { "epoch": 0.8984869068253151, "grad_norm": 0.6098936713347483, "learning_rate": 3.028426712176353e-06, "loss": 0.3239, "step": 19180 }, { "epoch": 0.8985337518152433, "grad_norm": 0.6025223771336505, "learning_rate": 3.0282413454675597e-06, "loss": 0.3248, "step": 19181 }, { "epoch": 0.8985805968051717, "grad_norm": 0.5890021627874361, "learning_rate": 3.0280559757188704e-06, "loss": 0.3114, "step": 19182 }, { "epoch": 0.8986274417951, "grad_norm": 0.5911975927996574, "learning_rate": 3.0278706029313514e-06, "loss": 0.3364, "step": 19183 }, { "epoch": 0.8986742867850284, "grad_norm": 0.595005412477383, "learning_rate": 3.02768522710607e-06, "loss": 0.3297, "step": 19184 }, { "epoch": 0.8987211317749567, "grad_norm": 0.6685902760598706, "learning_rate": 3.0274998482440926e-06, "loss": 0.3351, "step": 19185 }, { "epoch": 0.898767976764885, "grad_norm": 0.6602761220356436, "learning_rate": 3.027314466346486e-06, "loss": 0.3196, "step": 19186 }, { "epoch": 0.8988148217548133, "grad_norm": 0.5530987867920718, "learning_rate": 3.0271290814143174e-06, "loss": 0.3047, "step": 19187 }, { "epoch": 0.8988616667447417, "grad_norm": 0.6024369729475377, "learning_rate": 3.0269436934486536e-06, "loss": 0.3165, "step": 19188 }, { "epoch": 0.89890851173467, "grad_norm": 0.5981180513386907, "learning_rate": 3.026758302450561e-06, "loss": 0.3097, "step": 19189 }, { "epoch": 0.8989553567245983, "grad_norm": 0.6097866870525034, "learning_rate": 3.026572908421107e-06, "loss": 0.3191, "step": 19190 }, { "epoch": 0.8990022017145266, "grad_norm": 0.5524661682457414, "learning_rate": 3.026387511361358e-06, "loss": 0.3074, "step": 19191 }, { "epoch": 0.899049046704455, "grad_norm": 0.5582786478591223, "learning_rate": 3.026202111272381e-06, "loss": 0.3128, "step": 19192 }, { "epoch": 0.8990958916943833, "grad_norm": 0.62299928445976, "learning_rate": 3.0260167081552433e-06, "loss": 0.3158, "step": 19193 }, { "epoch": 0.8991427366843117, "grad_norm": 0.5817944040526316, "learning_rate": 3.025831302011012e-06, "loss": 0.331, "step": 19194 }, { "epoch": 0.8991895816742399, "grad_norm": 0.6034153892477646, "learning_rate": 3.0256458928407534e-06, "loss": 0.3102, "step": 19195 }, { "epoch": 0.8992364266641683, "grad_norm": 0.6084422612191301, "learning_rate": 3.0254604806455347e-06, "loss": 0.3173, "step": 19196 }, { "epoch": 0.8992832716540966, "grad_norm": 0.54132363781203, "learning_rate": 3.0252750654264236e-06, "loss": 0.2943, "step": 19197 }, { "epoch": 0.899330116644025, "grad_norm": 0.6623203434155175, "learning_rate": 3.0250896471844858e-06, "loss": 0.3325, "step": 19198 }, { "epoch": 0.8993769616339532, "grad_norm": 0.6372066826482679, "learning_rate": 3.0249042259207896e-06, "loss": 0.3126, "step": 19199 }, { "epoch": 0.8994238066238815, "grad_norm": 0.5936690617291212, "learning_rate": 3.0247188016364008e-06, "loss": 0.31, "step": 19200 }, { "epoch": 0.8994706516138099, "grad_norm": 0.571703894574075, "learning_rate": 3.0245333743323877e-06, "loss": 0.3112, "step": 19201 }, { "epoch": 0.8995174966037383, "grad_norm": 0.551986343018951, "learning_rate": 3.0243479440098165e-06, "loss": 0.3043, "step": 19202 }, { "epoch": 0.8995643415936666, "grad_norm": 0.5741560945647668, "learning_rate": 3.0241625106697552e-06, "loss": 0.3277, "step": 19203 }, { "epoch": 0.8996111865835948, "grad_norm": 0.544572980386039, "learning_rate": 3.0239770743132707e-06, "loss": 0.2988, "step": 19204 }, { "epoch": 0.8996580315735232, "grad_norm": 0.5777075616358064, "learning_rate": 3.0237916349414294e-06, "loss": 0.3162, "step": 19205 }, { "epoch": 0.8997048765634515, "grad_norm": 0.6000647611437979, "learning_rate": 3.0236061925552985e-06, "loss": 0.3054, "step": 19206 }, { "epoch": 0.8997517215533799, "grad_norm": 0.6371524592894376, "learning_rate": 3.0234207471559457e-06, "loss": 0.3345, "step": 19207 }, { "epoch": 0.8997985665433081, "grad_norm": 0.5800827953335215, "learning_rate": 3.0232352987444387e-06, "loss": 0.3111, "step": 19208 }, { "epoch": 0.8998454115332365, "grad_norm": 0.6103016136927603, "learning_rate": 3.0230498473218426e-06, "loss": 0.3011, "step": 19209 }, { "epoch": 0.8998922565231648, "grad_norm": 0.6018990050927079, "learning_rate": 3.022864392889228e-06, "loss": 0.313, "step": 19210 }, { "epoch": 0.8999391015130932, "grad_norm": 0.572983232796292, "learning_rate": 3.0226789354476593e-06, "loss": 0.3039, "step": 19211 }, { "epoch": 0.8999859465030215, "grad_norm": 0.6271287175693661, "learning_rate": 3.0224934749982043e-06, "loss": 0.3132, "step": 19212 }, { "epoch": 0.9000327914929498, "grad_norm": 0.5678329663245998, "learning_rate": 3.022308011541931e-06, "loss": 0.304, "step": 19213 }, { "epoch": 0.9000796364828781, "grad_norm": 0.5607495652522675, "learning_rate": 3.0221225450799063e-06, "loss": 0.302, "step": 19214 }, { "epoch": 0.9001264814728065, "grad_norm": 0.5778983113274402, "learning_rate": 3.0219370756131982e-06, "loss": 0.3107, "step": 19215 }, { "epoch": 0.9001733264627348, "grad_norm": 0.5552068104348887, "learning_rate": 3.0217516031428728e-06, "loss": 0.3054, "step": 19216 }, { "epoch": 0.9002201714526631, "grad_norm": 0.586033702472853, "learning_rate": 3.021566127669998e-06, "loss": 0.2908, "step": 19217 }, { "epoch": 0.9002670164425914, "grad_norm": 0.5693599233405998, "learning_rate": 3.0213806491956417e-06, "loss": 0.3015, "step": 19218 }, { "epoch": 0.9003138614325198, "grad_norm": 0.6349259136559644, "learning_rate": 3.0211951677208707e-06, "loss": 0.328, "step": 19219 }, { "epoch": 0.9003607064224481, "grad_norm": 0.6398953771774473, "learning_rate": 3.021009683246752e-06, "loss": 0.321, "step": 19220 }, { "epoch": 0.9004075514123765, "grad_norm": 0.5971921575567728, "learning_rate": 3.0208241957743543e-06, "loss": 0.3009, "step": 19221 }, { "epoch": 0.9004543964023047, "grad_norm": 0.6044806629842892, "learning_rate": 3.020638705304744e-06, "loss": 0.3282, "step": 19222 }, { "epoch": 0.9005012413922331, "grad_norm": 0.5796744438943768, "learning_rate": 3.0204532118389886e-06, "loss": 0.3166, "step": 19223 }, { "epoch": 0.9005480863821614, "grad_norm": 0.5732971290310093, "learning_rate": 3.020267715378155e-06, "loss": 0.2879, "step": 19224 }, { "epoch": 0.9005949313720898, "grad_norm": 0.6236343650374275, "learning_rate": 3.0200822159233134e-06, "loss": 0.3342, "step": 19225 }, { "epoch": 0.900641776362018, "grad_norm": 0.5421255848543594, "learning_rate": 3.0198967134755276e-06, "loss": 0.304, "step": 19226 }, { "epoch": 0.9006886213519464, "grad_norm": 0.6237993697340667, "learning_rate": 3.019711208035867e-06, "loss": 0.321, "step": 19227 }, { "epoch": 0.9007354663418747, "grad_norm": 0.6163956149734443, "learning_rate": 3.0195256996054e-06, "loss": 0.3078, "step": 19228 }, { "epoch": 0.9007823113318031, "grad_norm": 0.5610478757912358, "learning_rate": 3.019340188185193e-06, "loss": 0.3209, "step": 19229 }, { "epoch": 0.9008291563217314, "grad_norm": 0.6221314860262438, "learning_rate": 3.0191546737763134e-06, "loss": 0.3443, "step": 19230 }, { "epoch": 0.9008760013116597, "grad_norm": 0.6070940771838587, "learning_rate": 3.018969156379829e-06, "loss": 0.312, "step": 19231 }, { "epoch": 0.900922846301588, "grad_norm": 0.6212525750223615, "learning_rate": 3.018783635996808e-06, "loss": 0.3044, "step": 19232 }, { "epoch": 0.9009696912915164, "grad_norm": 0.5920140160659613, "learning_rate": 3.018598112628317e-06, "loss": 0.3171, "step": 19233 }, { "epoch": 0.9010165362814447, "grad_norm": 0.5626649830898518, "learning_rate": 3.018412586275425e-06, "loss": 0.2991, "step": 19234 }, { "epoch": 0.901063381271373, "grad_norm": 0.5910574734179646, "learning_rate": 3.0182270569391982e-06, "loss": 0.3185, "step": 19235 }, { "epoch": 0.9011102262613013, "grad_norm": 0.5565931270663398, "learning_rate": 3.018041524620705e-06, "loss": 0.294, "step": 19236 }, { "epoch": 0.9011570712512297, "grad_norm": 0.6095552458736752, "learning_rate": 3.0178559893210135e-06, "loss": 0.3163, "step": 19237 }, { "epoch": 0.901203916241158, "grad_norm": 0.5794638696108139, "learning_rate": 3.0176704510411904e-06, "loss": 0.3115, "step": 19238 }, { "epoch": 0.9012507612310864, "grad_norm": 0.6034609648826014, "learning_rate": 3.0174849097823045e-06, "loss": 0.3087, "step": 19239 }, { "epoch": 0.9012976062210146, "grad_norm": 0.5946062051640566, "learning_rate": 3.0172993655454223e-06, "loss": 0.3164, "step": 19240 }, { "epoch": 0.901344451210943, "grad_norm": 0.5520394271151988, "learning_rate": 3.017113818331613e-06, "loss": 0.3131, "step": 19241 }, { "epoch": 0.9013912962008713, "grad_norm": 0.5849883802003386, "learning_rate": 3.016928268141943e-06, "loss": 0.3289, "step": 19242 }, { "epoch": 0.9014381411907997, "grad_norm": 0.5364794416630079, "learning_rate": 3.0167427149774815e-06, "loss": 0.3173, "step": 19243 }, { "epoch": 0.9014849861807279, "grad_norm": 0.5425023169221611, "learning_rate": 3.016557158839295e-06, "loss": 0.3143, "step": 19244 }, { "epoch": 0.9015318311706563, "grad_norm": 0.5750722695710829, "learning_rate": 3.0163715997284513e-06, "loss": 0.3279, "step": 19245 }, { "epoch": 0.9015786761605846, "grad_norm": 0.5341778570977685, "learning_rate": 3.0161860376460207e-06, "loss": 0.305, "step": 19246 }, { "epoch": 0.901625521150513, "grad_norm": 0.5988852931005749, "learning_rate": 3.0160004725930675e-06, "loss": 0.3349, "step": 19247 }, { "epoch": 0.9016723661404413, "grad_norm": 0.6113586537984609, "learning_rate": 3.015814904570661e-06, "loss": 0.3275, "step": 19248 }, { "epoch": 0.9017192111303696, "grad_norm": 0.5922879479512351, "learning_rate": 3.0156293335798703e-06, "loss": 0.3173, "step": 19249 }, { "epoch": 0.9017660561202979, "grad_norm": 0.5960264010602643, "learning_rate": 3.015443759621763e-06, "loss": 0.3192, "step": 19250 }, { "epoch": 0.9018129011102263, "grad_norm": 0.5904941395857263, "learning_rate": 3.015258182697405e-06, "loss": 0.3166, "step": 19251 }, { "epoch": 0.9018597461001546, "grad_norm": 0.5935963539572259, "learning_rate": 3.0150726028078662e-06, "loss": 0.3203, "step": 19252 }, { "epoch": 0.9019065910900829, "grad_norm": 0.6208245490405105, "learning_rate": 3.0148870199542145e-06, "loss": 0.3264, "step": 19253 }, { "epoch": 0.9019534360800112, "grad_norm": 0.6152467030647611, "learning_rate": 3.014701434137517e-06, "loss": 0.3378, "step": 19254 }, { "epoch": 0.9020002810699396, "grad_norm": 0.5632431934001271, "learning_rate": 3.0145158453588424e-06, "loss": 0.3052, "step": 19255 }, { "epoch": 0.9020471260598679, "grad_norm": 0.6714964362500148, "learning_rate": 3.0143302536192578e-06, "loss": 0.3551, "step": 19256 }, { "epoch": 0.9020939710497963, "grad_norm": 0.6542514387532267, "learning_rate": 3.0141446589198325e-06, "loss": 0.3314, "step": 19257 }, { "epoch": 0.9021408160397245, "grad_norm": 0.5645086337549652, "learning_rate": 3.0139590612616336e-06, "loss": 0.3156, "step": 19258 }, { "epoch": 0.9021876610296529, "grad_norm": 0.6054817368219941, "learning_rate": 3.0137734606457293e-06, "loss": 0.3423, "step": 19259 }, { "epoch": 0.9022345060195812, "grad_norm": 0.5772530506418321, "learning_rate": 3.0135878570731884e-06, "loss": 0.3167, "step": 19260 }, { "epoch": 0.9022813510095096, "grad_norm": 0.5579026248019605, "learning_rate": 3.0134022505450777e-06, "loss": 0.3062, "step": 19261 }, { "epoch": 0.9023281959994378, "grad_norm": 0.5854788866552866, "learning_rate": 3.013216641062467e-06, "loss": 0.3204, "step": 19262 }, { "epoch": 0.9023750409893662, "grad_norm": 0.605585882599958, "learning_rate": 3.0130310286264225e-06, "loss": 0.3264, "step": 19263 }, { "epoch": 0.9024218859792945, "grad_norm": 0.6287354263276635, "learning_rate": 3.0128454132380142e-06, "loss": 0.3424, "step": 19264 }, { "epoch": 0.9024687309692229, "grad_norm": 0.6519117652878498, "learning_rate": 3.0126597948983094e-06, "loss": 0.3402, "step": 19265 }, { "epoch": 0.9025155759591512, "grad_norm": 0.5791220637188949, "learning_rate": 3.0124741736083757e-06, "loss": 0.3168, "step": 19266 }, { "epoch": 0.9025624209490795, "grad_norm": 0.6181113319572146, "learning_rate": 3.012288549369283e-06, "loss": 0.3063, "step": 19267 }, { "epoch": 0.9026092659390078, "grad_norm": 0.5939366526403539, "learning_rate": 3.012102922182098e-06, "loss": 0.3266, "step": 19268 }, { "epoch": 0.9026561109289362, "grad_norm": 0.5885201703415482, "learning_rate": 3.011917292047889e-06, "loss": 0.3217, "step": 19269 }, { "epoch": 0.9027029559188645, "grad_norm": 0.6237122586834177, "learning_rate": 3.0117316589677247e-06, "loss": 0.322, "step": 19270 }, { "epoch": 0.9027498009087928, "grad_norm": 0.5854638874732716, "learning_rate": 3.011546022942674e-06, "loss": 0.3099, "step": 19271 }, { "epoch": 0.9027966458987211, "grad_norm": 0.601884860744431, "learning_rate": 3.0113603839738038e-06, "loss": 0.3005, "step": 19272 }, { "epoch": 0.9028434908886495, "grad_norm": 0.6138296776757257, "learning_rate": 3.011174742062183e-06, "loss": 0.3393, "step": 19273 }, { "epoch": 0.9028903358785778, "grad_norm": 0.5430012470272529, "learning_rate": 3.0109890972088807e-06, "loss": 0.2993, "step": 19274 }, { "epoch": 0.9029371808685062, "grad_norm": 0.540736348795068, "learning_rate": 3.010803449414964e-06, "loss": 0.2909, "step": 19275 }, { "epoch": 0.9029840258584344, "grad_norm": 0.6124523937087576, "learning_rate": 3.0106177986815015e-06, "loss": 0.3425, "step": 19276 }, { "epoch": 0.9030308708483628, "grad_norm": 0.58270363741053, "learning_rate": 3.010432145009563e-06, "loss": 0.3257, "step": 19277 }, { "epoch": 0.9030777158382911, "grad_norm": 0.6451237638352986, "learning_rate": 3.010246488400215e-06, "loss": 0.3494, "step": 19278 }, { "epoch": 0.9031245608282195, "grad_norm": 0.5855875873664039, "learning_rate": 3.010060828854527e-06, "loss": 0.3483, "step": 19279 }, { "epoch": 0.9031714058181477, "grad_norm": 0.6323891810707388, "learning_rate": 3.0098751663735675e-06, "loss": 0.3201, "step": 19280 }, { "epoch": 0.903218250808076, "grad_norm": 0.6199129748779542, "learning_rate": 3.0096895009584044e-06, "loss": 0.326, "step": 19281 }, { "epoch": 0.9032650957980044, "grad_norm": 0.7492338143309719, "learning_rate": 3.0095038326101056e-06, "loss": 0.3282, "step": 19282 }, { "epoch": 0.9033119407879328, "grad_norm": 0.5886388379500358, "learning_rate": 3.0093181613297413e-06, "loss": 0.3111, "step": 19283 }, { "epoch": 0.9033587857778611, "grad_norm": 0.6139320496765975, "learning_rate": 3.009132487118378e-06, "loss": 0.3214, "step": 19284 }, { "epoch": 0.9034056307677893, "grad_norm": 0.604375851325939, "learning_rate": 3.008946809977086e-06, "loss": 0.3267, "step": 19285 }, { "epoch": 0.9034524757577177, "grad_norm": 0.5361635810160732, "learning_rate": 3.0087611299069332e-06, "loss": 0.3026, "step": 19286 }, { "epoch": 0.903499320747646, "grad_norm": 0.5995336464565276, "learning_rate": 3.0085754469089866e-06, "loss": 0.3067, "step": 19287 }, { "epoch": 0.9035461657375744, "grad_norm": 0.6084914292411047, "learning_rate": 3.0083897609843175e-06, "loss": 0.3324, "step": 19288 }, { "epoch": 0.9035930107275026, "grad_norm": 0.6324003326679982, "learning_rate": 3.0082040721339928e-06, "loss": 0.3207, "step": 19289 }, { "epoch": 0.903639855717431, "grad_norm": 0.6073897359707043, "learning_rate": 3.0080183803590814e-06, "loss": 0.3015, "step": 19290 }, { "epoch": 0.9036867007073593, "grad_norm": 0.5723748999053744, "learning_rate": 3.0078326856606515e-06, "loss": 0.3028, "step": 19291 }, { "epoch": 0.9037335456972877, "grad_norm": 0.5987751573058796, "learning_rate": 3.0076469880397724e-06, "loss": 0.3147, "step": 19292 }, { "epoch": 0.903780390687216, "grad_norm": 0.5698468795189616, "learning_rate": 3.0074612874975123e-06, "loss": 0.2948, "step": 19293 }, { "epoch": 0.9038272356771443, "grad_norm": 0.5853704821352097, "learning_rate": 3.00727558403494e-06, "loss": 0.3091, "step": 19294 }, { "epoch": 0.9038740806670726, "grad_norm": 0.6091976763654328, "learning_rate": 3.007089877653125e-06, "loss": 0.3115, "step": 19295 }, { "epoch": 0.903920925657001, "grad_norm": 0.5489722690008942, "learning_rate": 3.0069041683531342e-06, "loss": 0.295, "step": 19296 }, { "epoch": 0.9039677706469293, "grad_norm": 0.5700655217143309, "learning_rate": 3.0067184561360373e-06, "loss": 0.3243, "step": 19297 }, { "epoch": 0.9040146156368576, "grad_norm": 0.6384064717812447, "learning_rate": 3.0065327410029036e-06, "loss": 0.3385, "step": 19298 }, { "epoch": 0.9040614606267859, "grad_norm": 0.5909647255021677, "learning_rate": 3.006347022954802e-06, "loss": 0.301, "step": 19299 }, { "epoch": 0.9041083056167143, "grad_norm": 0.5482803068330984, "learning_rate": 3.0061613019927995e-06, "loss": 0.2906, "step": 19300 }, { "epoch": 0.9041551506066426, "grad_norm": 0.561848565999287, "learning_rate": 3.0059755781179657e-06, "loss": 0.3168, "step": 19301 }, { "epoch": 0.904201995596571, "grad_norm": 0.5967346575333236, "learning_rate": 3.0057898513313705e-06, "loss": 0.3147, "step": 19302 }, { "epoch": 0.9042488405864992, "grad_norm": 0.621868496045352, "learning_rate": 3.005604121634081e-06, "loss": 0.3258, "step": 19303 }, { "epoch": 0.9042956855764276, "grad_norm": 0.5913043865237276, "learning_rate": 3.0054183890271675e-06, "loss": 0.3203, "step": 19304 }, { "epoch": 0.9043425305663559, "grad_norm": 0.6010408755474613, "learning_rate": 3.0052326535116975e-06, "loss": 0.312, "step": 19305 }, { "epoch": 0.9043893755562843, "grad_norm": 0.5983472035827567, "learning_rate": 3.005046915088741e-06, "loss": 0.302, "step": 19306 }, { "epoch": 0.9044362205462125, "grad_norm": 0.565568262801332, "learning_rate": 3.004861173759367e-06, "loss": 0.3294, "step": 19307 }, { "epoch": 0.9044830655361409, "grad_norm": 0.5977437774302425, "learning_rate": 3.004675429524643e-06, "loss": 0.2985, "step": 19308 }, { "epoch": 0.9045299105260692, "grad_norm": 0.6111254116947673, "learning_rate": 3.004489682385639e-06, "loss": 0.3149, "step": 19309 }, { "epoch": 0.9045767555159976, "grad_norm": 0.5894953446349303, "learning_rate": 3.004303932343424e-06, "loss": 0.3122, "step": 19310 }, { "epoch": 0.9046236005059259, "grad_norm": 0.5820078575380208, "learning_rate": 3.0041181793990658e-06, "loss": 0.3042, "step": 19311 }, { "epoch": 0.9046704454958542, "grad_norm": 0.5273086470887566, "learning_rate": 3.003932423553635e-06, "loss": 0.2956, "step": 19312 }, { "epoch": 0.9047172904857825, "grad_norm": 0.6594219050748653, "learning_rate": 3.0037466648082e-06, "loss": 0.3067, "step": 19313 }, { "epoch": 0.9047641354757109, "grad_norm": 0.5912583180038556, "learning_rate": 3.0035609031638285e-06, "loss": 0.2945, "step": 19314 }, { "epoch": 0.9048109804656392, "grad_norm": 0.6408315054527153, "learning_rate": 3.003375138621591e-06, "loss": 0.325, "step": 19315 }, { "epoch": 0.9048578254555675, "grad_norm": 0.6043867229065188, "learning_rate": 3.0031893711825568e-06, "loss": 0.3297, "step": 19316 }, { "epoch": 0.9049046704454958, "grad_norm": 0.6044276921457054, "learning_rate": 3.003003600847793e-06, "loss": 0.3217, "step": 19317 }, { "epoch": 0.9049515154354242, "grad_norm": 0.6355239871227972, "learning_rate": 3.0028178276183707e-06, "loss": 0.3268, "step": 19318 }, { "epoch": 0.9049983604253525, "grad_norm": 0.6388104568143105, "learning_rate": 3.0026320514953577e-06, "loss": 0.3329, "step": 19319 }, { "epoch": 0.9050452054152809, "grad_norm": 0.6358241700433775, "learning_rate": 3.0024462724798242e-06, "loss": 0.3406, "step": 19320 }, { "epoch": 0.9050920504052091, "grad_norm": 0.605059931678825, "learning_rate": 3.0022604905728383e-06, "loss": 0.323, "step": 19321 }, { "epoch": 0.9051388953951375, "grad_norm": 0.6195076847703403, "learning_rate": 3.0020747057754697e-06, "loss": 0.3086, "step": 19322 }, { "epoch": 0.9051857403850658, "grad_norm": 0.6076163247523316, "learning_rate": 3.001888918088788e-06, "loss": 0.3435, "step": 19323 }, { "epoch": 0.9052325853749942, "grad_norm": 0.622829712302093, "learning_rate": 3.00170312751386e-06, "loss": 0.348, "step": 19324 }, { "epoch": 0.9052794303649224, "grad_norm": 0.605503556540306, "learning_rate": 3.001517334051758e-06, "loss": 0.3196, "step": 19325 }, { "epoch": 0.9053262753548508, "grad_norm": 0.6138941889834051, "learning_rate": 3.001331537703549e-06, "loss": 0.319, "step": 19326 }, { "epoch": 0.9053731203447791, "grad_norm": 0.5850711746706165, "learning_rate": 3.0011457384703037e-06, "loss": 0.3119, "step": 19327 }, { "epoch": 0.9054199653347075, "grad_norm": 0.607234192220424, "learning_rate": 3.00095993635309e-06, "loss": 0.3194, "step": 19328 }, { "epoch": 0.9054668103246358, "grad_norm": 0.5586152092711942, "learning_rate": 3.000774131352978e-06, "loss": 0.3125, "step": 19329 }, { "epoch": 0.9055136553145641, "grad_norm": 0.5523801087423853, "learning_rate": 3.0005883234710374e-06, "loss": 0.3089, "step": 19330 }, { "epoch": 0.9055605003044924, "grad_norm": 0.6017393851367993, "learning_rate": 3.000402512708336e-06, "loss": 0.3232, "step": 19331 }, { "epoch": 0.9056073452944208, "grad_norm": 0.5658124884322692, "learning_rate": 3.0002166990659443e-06, "loss": 0.328, "step": 19332 }, { "epoch": 0.9056541902843491, "grad_norm": 0.583655747361331, "learning_rate": 3.000030882544931e-06, "loss": 0.3135, "step": 19333 }, { "epoch": 0.9057010352742774, "grad_norm": 0.6041983228122433, "learning_rate": 2.9998450631463653e-06, "loss": 0.3315, "step": 19334 }, { "epoch": 0.9057478802642057, "grad_norm": 0.6758165159145032, "learning_rate": 2.9996592408713176e-06, "loss": 0.3196, "step": 19335 }, { "epoch": 0.9057947252541341, "grad_norm": 0.5752260521801885, "learning_rate": 2.999473415720856e-06, "loss": 0.3206, "step": 19336 }, { "epoch": 0.9058415702440624, "grad_norm": 0.5932699653001788, "learning_rate": 2.999287587696051e-06, "loss": 0.3294, "step": 19337 }, { "epoch": 0.9058884152339908, "grad_norm": 0.6040281770883069, "learning_rate": 2.9991017567979713e-06, "loss": 0.3359, "step": 19338 }, { "epoch": 0.905935260223919, "grad_norm": 1.03457486383002, "learning_rate": 2.998915923027686e-06, "loss": 0.3377, "step": 19339 }, { "epoch": 0.9059821052138474, "grad_norm": 0.6025363747838256, "learning_rate": 2.9987300863862655e-06, "loss": 0.2896, "step": 19340 }, { "epoch": 0.9060289502037757, "grad_norm": 0.5879473488167474, "learning_rate": 2.9985442468747783e-06, "loss": 0.3044, "step": 19341 }, { "epoch": 0.9060757951937041, "grad_norm": 0.5677692128498804, "learning_rate": 2.9983584044942944e-06, "loss": 0.3226, "step": 19342 }, { "epoch": 0.9061226401836323, "grad_norm": 0.6035987203946224, "learning_rate": 2.9981725592458827e-06, "loss": 0.3244, "step": 19343 }, { "epoch": 0.9061694851735607, "grad_norm": 0.574613745582974, "learning_rate": 2.9979867111306143e-06, "loss": 0.3401, "step": 19344 }, { "epoch": 0.906216330163489, "grad_norm": 0.5549427546829093, "learning_rate": 2.997800860149557e-06, "loss": 0.3179, "step": 19345 }, { "epoch": 0.9062631751534174, "grad_norm": 0.5890795556542039, "learning_rate": 2.9976150063037802e-06, "loss": 0.2963, "step": 19346 }, { "epoch": 0.9063100201433457, "grad_norm": 0.6331911153452413, "learning_rate": 2.9974291495943546e-06, "loss": 0.3316, "step": 19347 }, { "epoch": 0.906356865133274, "grad_norm": 0.5932912967034455, "learning_rate": 2.997243290022349e-06, "loss": 0.3105, "step": 19348 }, { "epoch": 0.9064037101232023, "grad_norm": 0.6203257250786629, "learning_rate": 2.9970574275888335e-06, "loss": 0.3277, "step": 19349 }, { "epoch": 0.9064505551131307, "grad_norm": 0.6191602914749791, "learning_rate": 2.9968715622948776e-06, "loss": 0.3241, "step": 19350 }, { "epoch": 0.906497400103059, "grad_norm": 0.5738438712759378, "learning_rate": 2.9966856941415507e-06, "loss": 0.3189, "step": 19351 }, { "epoch": 0.9065442450929873, "grad_norm": 0.6352095684499427, "learning_rate": 2.996499823129922e-06, "loss": 0.3206, "step": 19352 }, { "epoch": 0.9065910900829156, "grad_norm": 0.647642742528912, "learning_rate": 2.996313949261062e-06, "loss": 0.3252, "step": 19353 }, { "epoch": 0.906637935072844, "grad_norm": 0.5772750899885413, "learning_rate": 2.99612807253604e-06, "loss": 0.3171, "step": 19354 }, { "epoch": 0.9066847800627723, "grad_norm": 0.5525047877658557, "learning_rate": 2.9959421929559255e-06, "loss": 0.3109, "step": 19355 }, { "epoch": 0.9067316250527007, "grad_norm": 0.561969441280809, "learning_rate": 2.995756310521789e-06, "loss": 0.3075, "step": 19356 }, { "epoch": 0.9067784700426289, "grad_norm": 0.5453788769298207, "learning_rate": 2.9955704252346984e-06, "loss": 0.3168, "step": 19357 }, { "epoch": 0.9068253150325573, "grad_norm": 0.6048572877920316, "learning_rate": 2.995384537095725e-06, "loss": 0.3182, "step": 19358 }, { "epoch": 0.9068721600224856, "grad_norm": 0.5956961017173112, "learning_rate": 2.9951986461059385e-06, "loss": 0.3127, "step": 19359 }, { "epoch": 0.906919005012414, "grad_norm": 0.6399991888473959, "learning_rate": 2.9950127522664073e-06, "loss": 0.3185, "step": 19360 }, { "epoch": 0.9069658500023422, "grad_norm": 0.5923235779844017, "learning_rate": 2.9948268555782035e-06, "loss": 0.3194, "step": 19361 }, { "epoch": 0.9070126949922706, "grad_norm": 0.5798947216438206, "learning_rate": 2.994640956042395e-06, "loss": 0.3109, "step": 19362 }, { "epoch": 0.9070595399821989, "grad_norm": 0.6023876059039626, "learning_rate": 2.9944550536600515e-06, "loss": 0.3256, "step": 19363 }, { "epoch": 0.9071063849721273, "grad_norm": 0.6498848866072194, "learning_rate": 2.994269148432243e-06, "loss": 0.3486, "step": 19364 }, { "epoch": 0.9071532299620556, "grad_norm": 0.56888682632021, "learning_rate": 2.9940832403600416e-06, "loss": 0.3034, "step": 19365 }, { "epoch": 0.9072000749519838, "grad_norm": 0.600121161372846, "learning_rate": 2.9938973294445146e-06, "loss": 0.3122, "step": 19366 }, { "epoch": 0.9072469199419122, "grad_norm": 0.6103248072435611, "learning_rate": 2.9937114156867318e-06, "loss": 0.3153, "step": 19367 }, { "epoch": 0.9072937649318406, "grad_norm": 0.58256377765928, "learning_rate": 2.9935254990877645e-06, "loss": 0.3032, "step": 19368 }, { "epoch": 0.9073406099217689, "grad_norm": 0.6196208908585351, "learning_rate": 2.993339579648682e-06, "loss": 0.3267, "step": 19369 }, { "epoch": 0.9073874549116971, "grad_norm": 0.5788293022970353, "learning_rate": 2.993153657370554e-06, "loss": 0.3203, "step": 19370 }, { "epoch": 0.9074342999016255, "grad_norm": 0.6341830431740048, "learning_rate": 2.992967732254451e-06, "loss": 0.3445, "step": 19371 }, { "epoch": 0.9074811448915538, "grad_norm": 0.5766244534014063, "learning_rate": 2.992781804301443e-06, "loss": 0.2981, "step": 19372 }, { "epoch": 0.9075279898814822, "grad_norm": 0.5535046063975471, "learning_rate": 2.992595873512598e-06, "loss": 0.3217, "step": 19373 }, { "epoch": 0.9075748348714106, "grad_norm": 0.5613821149616591, "learning_rate": 2.9924099398889895e-06, "loss": 0.323, "step": 19374 }, { "epoch": 0.9076216798613388, "grad_norm": 0.6179991969870657, "learning_rate": 2.9922240034316843e-06, "loss": 0.3115, "step": 19375 }, { "epoch": 0.9076685248512671, "grad_norm": 0.565954474925705, "learning_rate": 2.992038064141754e-06, "loss": 0.3014, "step": 19376 }, { "epoch": 0.9077153698411955, "grad_norm": 0.5877524720331125, "learning_rate": 2.9918521220202685e-06, "loss": 0.3055, "step": 19377 }, { "epoch": 0.9077622148311238, "grad_norm": 0.576456312774558, "learning_rate": 2.991666177068297e-06, "loss": 0.3209, "step": 19378 }, { "epoch": 0.9078090598210521, "grad_norm": 0.6337277497445001, "learning_rate": 2.991480229286911e-06, "loss": 0.3322, "step": 19379 }, { "epoch": 0.9078559048109804, "grad_norm": 0.6149073635361182, "learning_rate": 2.9912942786771794e-06, "loss": 0.3104, "step": 19380 }, { "epoch": 0.9079027498009088, "grad_norm": 0.6000163652106008, "learning_rate": 2.991108325240173e-06, "loss": 0.3124, "step": 19381 }, { "epoch": 0.9079495947908371, "grad_norm": 0.5874373186354933, "learning_rate": 2.9909223689769613e-06, "loss": 0.3106, "step": 19382 }, { "epoch": 0.9079964397807655, "grad_norm": 0.5550896125176192, "learning_rate": 2.990736409888615e-06, "loss": 0.3198, "step": 19383 }, { "epoch": 0.9080432847706937, "grad_norm": 0.5635011054673501, "learning_rate": 2.990550447976204e-06, "loss": 0.3033, "step": 19384 }, { "epoch": 0.9080901297606221, "grad_norm": 0.552274786151439, "learning_rate": 2.9903644832407973e-06, "loss": 0.2907, "step": 19385 }, { "epoch": 0.9081369747505504, "grad_norm": 0.6368390227130162, "learning_rate": 2.990178515683468e-06, "loss": 0.3286, "step": 19386 }, { "epoch": 0.9081838197404788, "grad_norm": 0.5853018514836019, "learning_rate": 2.9899925453052835e-06, "loss": 0.3083, "step": 19387 }, { "epoch": 0.908230664730407, "grad_norm": 0.5819107685713543, "learning_rate": 2.9898065721073155e-06, "loss": 0.3234, "step": 19388 }, { "epoch": 0.9082775097203354, "grad_norm": 0.5454128328864769, "learning_rate": 2.9896205960906337e-06, "loss": 0.2933, "step": 19389 }, { "epoch": 0.9083243547102637, "grad_norm": 0.5588300184468272, "learning_rate": 2.9894346172563086e-06, "loss": 0.3245, "step": 19390 }, { "epoch": 0.9083711997001921, "grad_norm": 0.6306697300296751, "learning_rate": 2.98924863560541e-06, "loss": 0.3186, "step": 19391 }, { "epoch": 0.9084180446901204, "grad_norm": 0.5849855867867171, "learning_rate": 2.9890626511390087e-06, "loss": 0.3065, "step": 19392 }, { "epoch": 0.9084648896800487, "grad_norm": 0.592161969434728, "learning_rate": 2.9888766638581753e-06, "loss": 0.323, "step": 19393 }, { "epoch": 0.908511734669977, "grad_norm": 0.5956571074321564, "learning_rate": 2.988690673763978e-06, "loss": 0.312, "step": 19394 }, { "epoch": 0.9085585796599054, "grad_norm": 0.5390531456119874, "learning_rate": 2.988504680857489e-06, "loss": 0.2922, "step": 19395 }, { "epoch": 0.9086054246498337, "grad_norm": 0.5546918796884502, "learning_rate": 2.9883186851397792e-06, "loss": 0.2946, "step": 19396 }, { "epoch": 0.908652269639762, "grad_norm": 0.5826993344022107, "learning_rate": 2.9881326866119186e-06, "loss": 0.3198, "step": 19397 }, { "epoch": 0.9086991146296903, "grad_norm": 0.5663371639620736, "learning_rate": 2.987946685274976e-06, "loss": 0.3237, "step": 19398 }, { "epoch": 0.9087459596196187, "grad_norm": 0.5571449885255393, "learning_rate": 2.987760681130023e-06, "loss": 0.3097, "step": 19399 }, { "epoch": 0.908792804609547, "grad_norm": 0.6284799969580739, "learning_rate": 2.987574674178131e-06, "loss": 0.3418, "step": 19400 }, { "epoch": 0.9088396495994754, "grad_norm": 0.5824617321735592, "learning_rate": 2.9873886644203676e-06, "loss": 0.2981, "step": 19401 }, { "epoch": 0.9088864945894036, "grad_norm": 0.6146096683765845, "learning_rate": 2.987202651857806e-06, "loss": 0.3167, "step": 19402 }, { "epoch": 0.908933339579332, "grad_norm": 0.5680176547677442, "learning_rate": 2.9870166364915147e-06, "loss": 0.303, "step": 19403 }, { "epoch": 0.9089801845692603, "grad_norm": 0.6051595754166901, "learning_rate": 2.986830618322566e-06, "loss": 0.3096, "step": 19404 }, { "epoch": 0.9090270295591887, "grad_norm": 0.5776734871658584, "learning_rate": 2.9866445973520297e-06, "loss": 0.3135, "step": 19405 }, { "epoch": 0.9090738745491169, "grad_norm": 0.5862265355945274, "learning_rate": 2.986458573580975e-06, "loss": 0.2965, "step": 19406 }, { "epoch": 0.9091207195390453, "grad_norm": 0.6138698074739599, "learning_rate": 2.986272547010474e-06, "loss": 0.3218, "step": 19407 }, { "epoch": 0.9091675645289736, "grad_norm": 0.6092854281008863, "learning_rate": 2.986086517641597e-06, "loss": 0.3177, "step": 19408 }, { "epoch": 0.909214409518902, "grad_norm": 0.5740282145006894, "learning_rate": 2.985900485475414e-06, "loss": 0.2983, "step": 19409 }, { "epoch": 0.9092612545088303, "grad_norm": 0.5961863830618686, "learning_rate": 2.9857144505129955e-06, "loss": 0.3201, "step": 19410 }, { "epoch": 0.9093080994987586, "grad_norm": 0.6290939925119566, "learning_rate": 2.985528412755413e-06, "loss": 0.3235, "step": 19411 }, { "epoch": 0.9093549444886869, "grad_norm": 0.535225623706555, "learning_rate": 2.9853423722037356e-06, "loss": 0.3246, "step": 19412 }, { "epoch": 0.9094017894786153, "grad_norm": 0.615637823403909, "learning_rate": 2.985156328859035e-06, "loss": 0.3189, "step": 19413 }, { "epoch": 0.9094486344685436, "grad_norm": 0.5924705126116412, "learning_rate": 2.984970282722383e-06, "loss": 0.339, "step": 19414 }, { "epoch": 0.9094954794584719, "grad_norm": 0.5681145761735537, "learning_rate": 2.9847842337948477e-06, "loss": 0.3337, "step": 19415 }, { "epoch": 0.9095423244484002, "grad_norm": 0.5744146446566474, "learning_rate": 2.984598182077501e-06, "loss": 0.3136, "step": 19416 }, { "epoch": 0.9095891694383286, "grad_norm": 0.5750537412809802, "learning_rate": 2.9844121275714137e-06, "loss": 0.3008, "step": 19417 }, { "epoch": 0.9096360144282569, "grad_norm": 0.5948936196405713, "learning_rate": 2.984226070277657e-06, "loss": 0.3104, "step": 19418 }, { "epoch": 0.9096828594181853, "grad_norm": 0.6072607875939376, "learning_rate": 2.9840400101973e-06, "loss": 0.316, "step": 19419 }, { "epoch": 0.9097297044081135, "grad_norm": 0.5995218417972996, "learning_rate": 2.9838539473314155e-06, "loss": 0.3124, "step": 19420 }, { "epoch": 0.9097765493980419, "grad_norm": 0.5501360721041203, "learning_rate": 2.9836678816810727e-06, "loss": 0.298, "step": 19421 }, { "epoch": 0.9098233943879702, "grad_norm": 0.5969898947659537, "learning_rate": 2.983481813247342e-06, "loss": 0.3218, "step": 19422 }, { "epoch": 0.9098702393778986, "grad_norm": 0.6112610812502828, "learning_rate": 2.9832957420312956e-06, "loss": 0.3087, "step": 19423 }, { "epoch": 0.9099170843678268, "grad_norm": 0.5856136974294844, "learning_rate": 2.983109668034004e-06, "loss": 0.3112, "step": 19424 }, { "epoch": 0.9099639293577552, "grad_norm": 0.5754891927983654, "learning_rate": 2.9829235912565375e-06, "loss": 0.2937, "step": 19425 }, { "epoch": 0.9100107743476835, "grad_norm": 0.6092177661569179, "learning_rate": 2.982737511699967e-06, "loss": 0.3189, "step": 19426 }, { "epoch": 0.9100576193376119, "grad_norm": 0.6457107170678611, "learning_rate": 2.982551429365363e-06, "loss": 0.3343, "step": 19427 }, { "epoch": 0.9101044643275402, "grad_norm": 0.6034876957911091, "learning_rate": 2.9823653442537974e-06, "loss": 0.3273, "step": 19428 }, { "epoch": 0.9101513093174685, "grad_norm": 0.6013291674204087, "learning_rate": 2.982179256366341e-06, "loss": 0.3185, "step": 19429 }, { "epoch": 0.9101981543073968, "grad_norm": 0.6004437925443078, "learning_rate": 2.9819931657040628e-06, "loss": 0.3276, "step": 19430 }, { "epoch": 0.9102449992973252, "grad_norm": 0.6573667763181243, "learning_rate": 2.9818070722680358e-06, "loss": 0.3302, "step": 19431 }, { "epoch": 0.9102918442872535, "grad_norm": 0.6059590136665757, "learning_rate": 2.9816209760593307e-06, "loss": 0.3267, "step": 19432 }, { "epoch": 0.9103386892771818, "grad_norm": 0.5817399091201134, "learning_rate": 2.981434877079017e-06, "loss": 0.304, "step": 19433 }, { "epoch": 0.9103855342671101, "grad_norm": 0.6011902629742948, "learning_rate": 2.9812487753281668e-06, "loss": 0.3274, "step": 19434 }, { "epoch": 0.9104323792570385, "grad_norm": 0.5870135820304536, "learning_rate": 2.9810626708078518e-06, "loss": 0.3312, "step": 19435 }, { "epoch": 0.9104792242469668, "grad_norm": 0.5828547509381574, "learning_rate": 2.9808765635191413e-06, "loss": 0.3208, "step": 19436 }, { "epoch": 0.9105260692368952, "grad_norm": 0.5847412406567062, "learning_rate": 2.9806904534631065e-06, "loss": 0.3086, "step": 19437 }, { "epoch": 0.9105729142268234, "grad_norm": 0.6045655276114951, "learning_rate": 2.9805043406408196e-06, "loss": 0.3292, "step": 19438 }, { "epoch": 0.9106197592167518, "grad_norm": 0.590176550448826, "learning_rate": 2.980318225053351e-06, "loss": 0.3357, "step": 19439 }, { "epoch": 0.9106666042066801, "grad_norm": 0.5788029293769487, "learning_rate": 2.9801321067017713e-06, "loss": 0.32, "step": 19440 }, { "epoch": 0.9107134491966085, "grad_norm": 0.5942517898568015, "learning_rate": 2.9799459855871525e-06, "loss": 0.3259, "step": 19441 }, { "epoch": 0.9107602941865367, "grad_norm": 0.593087892447658, "learning_rate": 2.979759861710565e-06, "loss": 0.3143, "step": 19442 }, { "epoch": 0.910807139176465, "grad_norm": 0.5942504310760007, "learning_rate": 2.9795737350730802e-06, "loss": 0.3256, "step": 19443 }, { "epoch": 0.9108539841663934, "grad_norm": 0.5785998298152698, "learning_rate": 2.979387605675769e-06, "loss": 0.3235, "step": 19444 }, { "epoch": 0.9109008291563218, "grad_norm": 0.5571605924499108, "learning_rate": 2.979201473519702e-06, "loss": 0.3162, "step": 19445 }, { "epoch": 0.9109476741462501, "grad_norm": 0.648032863876365, "learning_rate": 2.979015338605952e-06, "loss": 0.3393, "step": 19446 }, { "epoch": 0.9109945191361783, "grad_norm": 0.5874871707087882, "learning_rate": 2.978829200935589e-06, "loss": 0.3013, "step": 19447 }, { "epoch": 0.9110413641261067, "grad_norm": 0.6293624324415388, "learning_rate": 2.9786430605096835e-06, "loss": 0.3373, "step": 19448 }, { "epoch": 0.911088209116035, "grad_norm": 0.5512281860060101, "learning_rate": 2.978456917329308e-06, "loss": 0.2951, "step": 19449 }, { "epoch": 0.9111350541059634, "grad_norm": 0.6213874527389616, "learning_rate": 2.9782707713955334e-06, "loss": 0.3435, "step": 19450 }, { "epoch": 0.9111818990958916, "grad_norm": 0.5504465448211308, "learning_rate": 2.97808462270943e-06, "loss": 0.3368, "step": 19451 }, { "epoch": 0.91122874408582, "grad_norm": 0.6376143276666316, "learning_rate": 2.9778984712720703e-06, "loss": 0.3334, "step": 19452 }, { "epoch": 0.9112755890757483, "grad_norm": 0.6252346098162537, "learning_rate": 2.977712317084526e-06, "loss": 0.3281, "step": 19453 }, { "epoch": 0.9113224340656767, "grad_norm": 0.6081747794803078, "learning_rate": 2.9775261601478656e-06, "loss": 0.2984, "step": 19454 }, { "epoch": 0.911369279055605, "grad_norm": 0.6282006489012272, "learning_rate": 2.9773400004631624e-06, "loss": 0.3098, "step": 19455 }, { "epoch": 0.9114161240455333, "grad_norm": 0.608955308594348, "learning_rate": 2.977153838031489e-06, "loss": 0.3028, "step": 19456 }, { "epoch": 0.9114629690354616, "grad_norm": 0.5731066834475107, "learning_rate": 2.976967672853914e-06, "loss": 0.329, "step": 19457 }, { "epoch": 0.91150981402539, "grad_norm": 0.6003174838927037, "learning_rate": 2.9767815049315095e-06, "loss": 0.3222, "step": 19458 }, { "epoch": 0.9115566590153183, "grad_norm": 0.586257734776787, "learning_rate": 2.976595334265348e-06, "loss": 0.306, "step": 19459 }, { "epoch": 0.9116035040052466, "grad_norm": 0.5752813432865176, "learning_rate": 2.9764091608565004e-06, "loss": 0.3098, "step": 19460 }, { "epoch": 0.9116503489951749, "grad_norm": 0.6055679902466157, "learning_rate": 2.976222984706037e-06, "loss": 0.3143, "step": 19461 }, { "epoch": 0.9116971939851033, "grad_norm": 0.5872908435667235, "learning_rate": 2.9760368058150307e-06, "loss": 0.3218, "step": 19462 }, { "epoch": 0.9117440389750316, "grad_norm": 0.5336368115366913, "learning_rate": 2.9758506241845517e-06, "loss": 0.3191, "step": 19463 }, { "epoch": 0.91179088396496, "grad_norm": 0.5969215734359257, "learning_rate": 2.975664439815672e-06, "loss": 0.321, "step": 19464 }, { "epoch": 0.9118377289548882, "grad_norm": 0.5794962984601912, "learning_rate": 2.9754782527094634e-06, "loss": 0.3002, "step": 19465 }, { "epoch": 0.9118845739448166, "grad_norm": 0.6112265938421274, "learning_rate": 2.9752920628669966e-06, "loss": 0.3218, "step": 19466 }, { "epoch": 0.9119314189347449, "grad_norm": 0.6157902250733411, "learning_rate": 2.9751058702893444e-06, "loss": 0.3318, "step": 19467 }, { "epoch": 0.9119782639246733, "grad_norm": 0.6033723831068156, "learning_rate": 2.974919674977576e-06, "loss": 0.3278, "step": 19468 }, { "epoch": 0.9120251089146015, "grad_norm": 0.6101500994547474, "learning_rate": 2.9747334769327654e-06, "loss": 0.3198, "step": 19469 }, { "epoch": 0.9120719539045299, "grad_norm": 0.5843359372069896, "learning_rate": 2.974547276155982e-06, "loss": 0.3218, "step": 19470 }, { "epoch": 0.9121187988944582, "grad_norm": 0.5740402796460911, "learning_rate": 2.9743610726482984e-06, "loss": 0.3122, "step": 19471 }, { "epoch": 0.9121656438843866, "grad_norm": 0.5723460996985225, "learning_rate": 2.9741748664107868e-06, "loss": 0.3366, "step": 19472 }, { "epoch": 0.9122124888743149, "grad_norm": 0.5777460351534293, "learning_rate": 2.973988657444517e-06, "loss": 0.3187, "step": 19473 }, { "epoch": 0.9122593338642432, "grad_norm": 0.6249927257923719, "learning_rate": 2.973802445750562e-06, "loss": 0.3192, "step": 19474 }, { "epoch": 0.9123061788541715, "grad_norm": 0.6300800573227198, "learning_rate": 2.9736162313299938e-06, "loss": 0.3108, "step": 19475 }, { "epoch": 0.9123530238440999, "grad_norm": 0.5752802035094075, "learning_rate": 2.9734300141838824e-06, "loss": 0.3061, "step": 19476 }, { "epoch": 0.9123998688340282, "grad_norm": 0.663751070159736, "learning_rate": 2.9732437943133e-06, "loss": 0.343, "step": 19477 }, { "epoch": 0.9124467138239565, "grad_norm": 0.6147355082375179, "learning_rate": 2.9730575717193193e-06, "loss": 0.3185, "step": 19478 }, { "epoch": 0.9124935588138848, "grad_norm": 0.5584496932134206, "learning_rate": 2.9728713464030106e-06, "loss": 0.306, "step": 19479 }, { "epoch": 0.9125404038038132, "grad_norm": 0.5921573177267457, "learning_rate": 2.9726851183654464e-06, "loss": 0.3312, "step": 19480 }, { "epoch": 0.9125872487937415, "grad_norm": 0.5857539682879904, "learning_rate": 2.972498887607699e-06, "loss": 0.3087, "step": 19481 }, { "epoch": 0.9126340937836699, "grad_norm": 0.5651339383898828, "learning_rate": 2.9723126541308376e-06, "loss": 0.3036, "step": 19482 }, { "epoch": 0.9126809387735981, "grad_norm": 0.5206008384587879, "learning_rate": 2.972126417935936e-06, "loss": 0.2903, "step": 19483 }, { "epoch": 0.9127277837635265, "grad_norm": 0.5965264162454962, "learning_rate": 2.9719401790240664e-06, "loss": 0.3309, "step": 19484 }, { "epoch": 0.9127746287534548, "grad_norm": 0.6077420621463349, "learning_rate": 2.971753937396299e-06, "loss": 0.3215, "step": 19485 }, { "epoch": 0.9128214737433832, "grad_norm": 0.5687003090196557, "learning_rate": 2.971567693053706e-06, "loss": 0.3136, "step": 19486 }, { "epoch": 0.9128683187333114, "grad_norm": 0.5973574091726414, "learning_rate": 2.9713814459973605e-06, "loss": 0.3218, "step": 19487 }, { "epoch": 0.9129151637232398, "grad_norm": 0.6150987420913441, "learning_rate": 2.9711951962283325e-06, "loss": 0.3192, "step": 19488 }, { "epoch": 0.9129620087131681, "grad_norm": 0.5974935798935516, "learning_rate": 2.9710089437476946e-06, "loss": 0.326, "step": 19489 }, { "epoch": 0.9130088537030965, "grad_norm": 0.5974725640742922, "learning_rate": 2.970822688556519e-06, "loss": 0.3213, "step": 19490 }, { "epoch": 0.9130556986930248, "grad_norm": 0.5819663973735203, "learning_rate": 2.9706364306558773e-06, "loss": 0.3261, "step": 19491 }, { "epoch": 0.9131025436829531, "grad_norm": 0.6305348383030508, "learning_rate": 2.97045017004684e-06, "loss": 0.3014, "step": 19492 }, { "epoch": 0.9131493886728814, "grad_norm": 0.5756291304236273, "learning_rate": 2.9702639067304815e-06, "loss": 0.3358, "step": 19493 }, { "epoch": 0.9131962336628098, "grad_norm": 0.543629874480038, "learning_rate": 2.9700776407078717e-06, "loss": 0.3176, "step": 19494 }, { "epoch": 0.9132430786527381, "grad_norm": 0.5785654112266446, "learning_rate": 2.969891371980084e-06, "loss": 0.3226, "step": 19495 }, { "epoch": 0.9132899236426664, "grad_norm": 0.5709194552213314, "learning_rate": 2.969705100548189e-06, "loss": 0.3181, "step": 19496 }, { "epoch": 0.9133367686325947, "grad_norm": 0.5771645878776297, "learning_rate": 2.969518826413259e-06, "loss": 0.3266, "step": 19497 }, { "epoch": 0.9133836136225231, "grad_norm": 0.5755852164476571, "learning_rate": 2.9693325495763664e-06, "loss": 0.3174, "step": 19498 }, { "epoch": 0.9134304586124514, "grad_norm": 0.610926948166168, "learning_rate": 2.969146270038583e-06, "loss": 0.3145, "step": 19499 }, { "epoch": 0.9134773036023798, "grad_norm": 0.5976762298251452, "learning_rate": 2.9689599878009807e-06, "loss": 0.3138, "step": 19500 }, { "epoch": 0.913524148592308, "grad_norm": 0.5477555836613633, "learning_rate": 2.9687737028646317e-06, "loss": 0.3168, "step": 19501 }, { "epoch": 0.9135709935822364, "grad_norm": 0.6053936563845209, "learning_rate": 2.968587415230608e-06, "loss": 0.33, "step": 19502 }, { "epoch": 0.9136178385721647, "grad_norm": 0.6225645581185165, "learning_rate": 2.968401124899981e-06, "loss": 0.319, "step": 19503 }, { "epoch": 0.9136646835620931, "grad_norm": 0.6791787407268136, "learning_rate": 2.968214831873823e-06, "loss": 0.3096, "step": 19504 }, { "epoch": 0.9137115285520213, "grad_norm": 0.6128883631174719, "learning_rate": 2.9680285361532075e-06, "loss": 0.3237, "step": 19505 }, { "epoch": 0.9137583735419497, "grad_norm": 0.5689639917256635, "learning_rate": 2.967842237739204e-06, "loss": 0.3034, "step": 19506 }, { "epoch": 0.913805218531878, "grad_norm": 0.6163359651036864, "learning_rate": 2.9676559366328867e-06, "loss": 0.3173, "step": 19507 }, { "epoch": 0.9138520635218064, "grad_norm": 0.5972210270028583, "learning_rate": 2.967469632835327e-06, "loss": 0.3229, "step": 19508 }, { "epoch": 0.9138989085117347, "grad_norm": 0.6681057082710686, "learning_rate": 2.9672833263475976e-06, "loss": 0.3364, "step": 19509 }, { "epoch": 0.913945753501663, "grad_norm": 0.5471909019936133, "learning_rate": 2.967097017170769e-06, "loss": 0.2889, "step": 19510 }, { "epoch": 0.9139925984915913, "grad_norm": 0.6035273488535138, "learning_rate": 2.9669107053059154e-06, "loss": 0.3209, "step": 19511 }, { "epoch": 0.9140394434815197, "grad_norm": 0.5696348933717882, "learning_rate": 2.9667243907541076e-06, "loss": 0.3165, "step": 19512 }, { "epoch": 0.914086288471448, "grad_norm": 0.626778749942676, "learning_rate": 2.9665380735164184e-06, "loss": 0.3335, "step": 19513 }, { "epoch": 0.9141331334613763, "grad_norm": 0.6219569419183983, "learning_rate": 2.9663517535939203e-06, "loss": 0.3267, "step": 19514 }, { "epoch": 0.9141799784513046, "grad_norm": 0.5733640706376016, "learning_rate": 2.9661654309876834e-06, "loss": 0.3258, "step": 19515 }, { "epoch": 0.914226823441233, "grad_norm": 0.5747600731251362, "learning_rate": 2.965979105698783e-06, "loss": 0.3229, "step": 19516 }, { "epoch": 0.9142736684311613, "grad_norm": 0.5877323258471987, "learning_rate": 2.9657927777282904e-06, "loss": 0.3226, "step": 19517 }, { "epoch": 0.9143205134210897, "grad_norm": 0.5640002459742208, "learning_rate": 2.965606447077276e-06, "loss": 0.317, "step": 19518 }, { "epoch": 0.9143673584110179, "grad_norm": 0.6150442826671936, "learning_rate": 2.9654201137468146e-06, "loss": 0.3044, "step": 19519 }, { "epoch": 0.9144142034009463, "grad_norm": 0.5955061683955595, "learning_rate": 2.9652337777379775e-06, "loss": 0.3177, "step": 19520 }, { "epoch": 0.9144610483908746, "grad_norm": 0.5617512107419385, "learning_rate": 2.9650474390518365e-06, "loss": 0.2908, "step": 19521 }, { "epoch": 0.914507893380803, "grad_norm": 0.5850069503142685, "learning_rate": 2.9648610976894645e-06, "loss": 0.3078, "step": 19522 }, { "epoch": 0.9145547383707312, "grad_norm": 0.6137463922408976, "learning_rate": 2.9646747536519337e-06, "loss": 0.3144, "step": 19523 }, { "epoch": 0.9146015833606596, "grad_norm": 0.5831246117931322, "learning_rate": 2.964488406940316e-06, "loss": 0.3008, "step": 19524 }, { "epoch": 0.9146484283505879, "grad_norm": 0.6896666029494286, "learning_rate": 2.964302057555685e-06, "loss": 0.3159, "step": 19525 }, { "epoch": 0.9146952733405163, "grad_norm": 0.5926324761424681, "learning_rate": 2.9641157054991123e-06, "loss": 0.3126, "step": 19526 }, { "epoch": 0.9147421183304446, "grad_norm": 0.616314079352874, "learning_rate": 2.96392935077167e-06, "loss": 0.3165, "step": 19527 }, { "epoch": 0.9147889633203728, "grad_norm": 0.6525578215372867, "learning_rate": 2.9637429933744306e-06, "loss": 0.3223, "step": 19528 }, { "epoch": 0.9148358083103012, "grad_norm": 0.6049975424825959, "learning_rate": 2.963556633308467e-06, "loss": 0.3195, "step": 19529 }, { "epoch": 0.9148826533002296, "grad_norm": 0.6210935179479814, "learning_rate": 2.9633702705748523e-06, "loss": 0.3135, "step": 19530 }, { "epoch": 0.9149294982901579, "grad_norm": 0.5487133892088261, "learning_rate": 2.9631839051746567e-06, "loss": 0.3047, "step": 19531 }, { "epoch": 0.9149763432800861, "grad_norm": 0.56229095246498, "learning_rate": 2.962997537108955e-06, "loss": 0.3137, "step": 19532 }, { "epoch": 0.9150231882700145, "grad_norm": 0.5604781775322871, "learning_rate": 2.9628111663788197e-06, "loss": 0.306, "step": 19533 }, { "epoch": 0.9150700332599428, "grad_norm": 0.6032355348132539, "learning_rate": 2.962624792985321e-06, "loss": 0.3134, "step": 19534 }, { "epoch": 0.9151168782498712, "grad_norm": 0.5778777930932494, "learning_rate": 2.9624384169295333e-06, "loss": 0.3199, "step": 19535 }, { "epoch": 0.9151637232397996, "grad_norm": 0.5804809280704667, "learning_rate": 2.9622520382125293e-06, "loss": 0.3349, "step": 19536 }, { "epoch": 0.9152105682297278, "grad_norm": 0.5656908565352637, "learning_rate": 2.9620656568353807e-06, "loss": 0.2911, "step": 19537 }, { "epoch": 0.9152574132196561, "grad_norm": 0.6150996822005664, "learning_rate": 2.96187927279916e-06, "loss": 0.2975, "step": 19538 }, { "epoch": 0.9153042582095845, "grad_norm": 0.5895968242971683, "learning_rate": 2.961692886104941e-06, "loss": 0.3136, "step": 19539 }, { "epoch": 0.9153511031995128, "grad_norm": 0.5970224362464926, "learning_rate": 2.9615064967537947e-06, "loss": 0.3121, "step": 19540 }, { "epoch": 0.9153979481894411, "grad_norm": 0.5970807190629254, "learning_rate": 2.961320104746795e-06, "loss": 0.3241, "step": 19541 }, { "epoch": 0.9154447931793694, "grad_norm": 0.6072464707530788, "learning_rate": 2.961133710085014e-06, "loss": 0.3198, "step": 19542 }, { "epoch": 0.9154916381692978, "grad_norm": 0.5707270682143278, "learning_rate": 2.960947312769524e-06, "loss": 0.3154, "step": 19543 }, { "epoch": 0.9155384831592261, "grad_norm": 0.6119762579316823, "learning_rate": 2.960760912801398e-06, "loss": 0.322, "step": 19544 }, { "epoch": 0.9155853281491545, "grad_norm": 0.5394462692908271, "learning_rate": 2.9605745101817095e-06, "loss": 0.2951, "step": 19545 }, { "epoch": 0.9156321731390827, "grad_norm": 0.595661538167313, "learning_rate": 2.9603881049115295e-06, "loss": 0.319, "step": 19546 }, { "epoch": 0.9156790181290111, "grad_norm": 0.5654171115456122, "learning_rate": 2.960201696991933e-06, "loss": 0.3173, "step": 19547 }, { "epoch": 0.9157258631189394, "grad_norm": 0.6203878054433953, "learning_rate": 2.9600152864239906e-06, "loss": 0.3282, "step": 19548 }, { "epoch": 0.9157727081088678, "grad_norm": 0.6277102745721371, "learning_rate": 2.9598288732087755e-06, "loss": 0.3073, "step": 19549 }, { "epoch": 0.915819553098796, "grad_norm": 0.6034882341798381, "learning_rate": 2.9596424573473616e-06, "loss": 0.3327, "step": 19550 }, { "epoch": 0.9158663980887244, "grad_norm": 0.5623054860487373, "learning_rate": 2.9594560388408206e-06, "loss": 0.3051, "step": 19551 }, { "epoch": 0.9159132430786527, "grad_norm": 0.6041014439204784, "learning_rate": 2.9592696176902247e-06, "loss": 0.3193, "step": 19552 }, { "epoch": 0.9159600880685811, "grad_norm": 0.5844525959215309, "learning_rate": 2.959083193896648e-06, "loss": 0.3322, "step": 19553 }, { "epoch": 0.9160069330585094, "grad_norm": 0.5405287473421617, "learning_rate": 2.958896767461164e-06, "loss": 0.3061, "step": 19554 }, { "epoch": 0.9160537780484377, "grad_norm": 0.5795675174086554, "learning_rate": 2.9587103383848432e-06, "loss": 0.3193, "step": 19555 }, { "epoch": 0.916100623038366, "grad_norm": 0.647735945862086, "learning_rate": 2.95852390666876e-06, "loss": 0.3249, "step": 19556 }, { "epoch": 0.9161474680282944, "grad_norm": 0.6201186641843827, "learning_rate": 2.9583374723139876e-06, "loss": 0.3415, "step": 19557 }, { "epoch": 0.9161943130182227, "grad_norm": 0.5475504049476478, "learning_rate": 2.9581510353215975e-06, "loss": 0.3089, "step": 19558 }, { "epoch": 0.916241158008151, "grad_norm": 0.5817607341434521, "learning_rate": 2.957964595692663e-06, "loss": 0.3223, "step": 19559 }, { "epoch": 0.9162880029980793, "grad_norm": 0.5806090827785503, "learning_rate": 2.9577781534282583e-06, "loss": 0.2931, "step": 19560 }, { "epoch": 0.9163348479880077, "grad_norm": 0.6293335750167997, "learning_rate": 2.957591708529455e-06, "loss": 0.3379, "step": 19561 }, { "epoch": 0.916381692977936, "grad_norm": 0.6056537614600139, "learning_rate": 2.9574052609973265e-06, "loss": 0.3297, "step": 19562 }, { "epoch": 0.9164285379678644, "grad_norm": 0.5576708369786181, "learning_rate": 2.957218810832946e-06, "loss": 0.303, "step": 19563 }, { "epoch": 0.9164753829577926, "grad_norm": 0.5846451235229486, "learning_rate": 2.9570323580373856e-06, "loss": 0.3204, "step": 19564 }, { "epoch": 0.916522227947721, "grad_norm": 0.5607564762456682, "learning_rate": 2.9568459026117192e-06, "loss": 0.306, "step": 19565 }, { "epoch": 0.9165690729376493, "grad_norm": 0.5608282980719506, "learning_rate": 2.9566594445570197e-06, "loss": 0.2885, "step": 19566 }, { "epoch": 0.9166159179275777, "grad_norm": 0.5954568137402916, "learning_rate": 2.956472983874359e-06, "loss": 0.3103, "step": 19567 }, { "epoch": 0.9166627629175059, "grad_norm": 0.5685451643435381, "learning_rate": 2.9562865205648115e-06, "loss": 0.3029, "step": 19568 }, { "epoch": 0.9167096079074343, "grad_norm": 0.5387380800302906, "learning_rate": 2.95610005462945e-06, "loss": 0.2951, "step": 19569 }, { "epoch": 0.9167564528973626, "grad_norm": 0.6504741978110982, "learning_rate": 2.9559135860693465e-06, "loss": 0.3269, "step": 19570 }, { "epoch": 0.916803297887291, "grad_norm": 0.6015558265171567, "learning_rate": 2.955727114885576e-06, "loss": 0.3351, "step": 19571 }, { "epoch": 0.9168501428772193, "grad_norm": 0.6133267510295145, "learning_rate": 2.9555406410792106e-06, "loss": 0.3289, "step": 19572 }, { "epoch": 0.9168969878671476, "grad_norm": 0.5706896307020362, "learning_rate": 2.9553541646513216e-06, "loss": 0.3029, "step": 19573 }, { "epoch": 0.9169438328570759, "grad_norm": 0.5865118676504097, "learning_rate": 2.9551676856029846e-06, "loss": 0.3273, "step": 19574 }, { "epoch": 0.9169906778470043, "grad_norm": 0.5886874080619421, "learning_rate": 2.9549812039352732e-06, "loss": 0.3119, "step": 19575 }, { "epoch": 0.9170375228369326, "grad_norm": 0.6065364160131563, "learning_rate": 2.954794719649258e-06, "loss": 0.3015, "step": 19576 }, { "epoch": 0.9170843678268609, "grad_norm": 0.5678522377257907, "learning_rate": 2.9546082327460135e-06, "loss": 0.3035, "step": 19577 }, { "epoch": 0.9171312128167892, "grad_norm": 0.6160460677632463, "learning_rate": 2.9544217432266137e-06, "loss": 0.3157, "step": 19578 }, { "epoch": 0.9171780578067176, "grad_norm": 0.5729213254961483, "learning_rate": 2.9542352510921306e-06, "loss": 0.2921, "step": 19579 }, { "epoch": 0.9172249027966459, "grad_norm": 0.5705767960186678, "learning_rate": 2.9540487563436377e-06, "loss": 0.299, "step": 19580 }, { "epoch": 0.9172717477865743, "grad_norm": 0.5865208240728462, "learning_rate": 2.9538622589822087e-06, "loss": 0.3218, "step": 19581 }, { "epoch": 0.9173185927765025, "grad_norm": 0.6055845896431641, "learning_rate": 2.953675759008916e-06, "loss": 0.3241, "step": 19582 }, { "epoch": 0.9173654377664309, "grad_norm": 0.614342227094684, "learning_rate": 2.9534892564248334e-06, "loss": 0.3228, "step": 19583 }, { "epoch": 0.9174122827563592, "grad_norm": 0.569255589965193, "learning_rate": 2.9533027512310347e-06, "loss": 0.3444, "step": 19584 }, { "epoch": 0.9174591277462876, "grad_norm": 0.5646867615766447, "learning_rate": 2.9531162434285914e-06, "loss": 0.2951, "step": 19585 }, { "epoch": 0.9175059727362158, "grad_norm": 0.5867572695840998, "learning_rate": 2.952929733018579e-06, "loss": 0.3016, "step": 19586 }, { "epoch": 0.9175528177261442, "grad_norm": 0.6100598868657269, "learning_rate": 2.95274322000207e-06, "loss": 0.2989, "step": 19587 }, { "epoch": 0.9175996627160725, "grad_norm": 0.5450663402731776, "learning_rate": 2.952556704380137e-06, "loss": 0.3098, "step": 19588 }, { "epoch": 0.9176465077060009, "grad_norm": 0.5954126726418947, "learning_rate": 2.952370186153854e-06, "loss": 0.3408, "step": 19589 }, { "epoch": 0.9176933526959292, "grad_norm": 0.6147900420184467, "learning_rate": 2.952183665324294e-06, "loss": 0.3296, "step": 19590 }, { "epoch": 0.9177401976858575, "grad_norm": 0.5571779001007635, "learning_rate": 2.951997141892531e-06, "loss": 0.3049, "step": 19591 }, { "epoch": 0.9177870426757858, "grad_norm": 0.5854274299241471, "learning_rate": 2.9518106158596384e-06, "loss": 0.3256, "step": 19592 }, { "epoch": 0.9178338876657142, "grad_norm": 0.6195744010163702, "learning_rate": 2.951624087226689e-06, "loss": 0.3024, "step": 19593 }, { "epoch": 0.9178807326556425, "grad_norm": 0.5704068735590312, "learning_rate": 2.951437555994756e-06, "loss": 0.3186, "step": 19594 }, { "epoch": 0.9179275776455708, "grad_norm": 0.6030836703489487, "learning_rate": 2.951251022164913e-06, "loss": 0.326, "step": 19595 }, { "epoch": 0.9179744226354991, "grad_norm": 0.6015647150455199, "learning_rate": 2.951064485738235e-06, "loss": 0.3054, "step": 19596 }, { "epoch": 0.9180212676254275, "grad_norm": 0.5776161043520099, "learning_rate": 2.950877946715794e-06, "loss": 0.3128, "step": 19597 }, { "epoch": 0.9180681126153558, "grad_norm": 0.5599879855525738, "learning_rate": 2.950691405098663e-06, "loss": 0.316, "step": 19598 }, { "epoch": 0.9181149576052842, "grad_norm": 0.641801671204957, "learning_rate": 2.9505048608879173e-06, "loss": 0.3196, "step": 19599 }, { "epoch": 0.9181618025952124, "grad_norm": 0.5654382418195434, "learning_rate": 2.950318314084629e-06, "loss": 0.3032, "step": 19600 }, { "epoch": 0.9182086475851408, "grad_norm": 0.5486885466160606, "learning_rate": 2.950131764689871e-06, "loss": 0.3125, "step": 19601 }, { "epoch": 0.9182554925750691, "grad_norm": 0.6031429409717541, "learning_rate": 2.9499452127047183e-06, "loss": 0.3201, "step": 19602 }, { "epoch": 0.9183023375649975, "grad_norm": 0.5611566652360578, "learning_rate": 2.949758658130245e-06, "loss": 0.3079, "step": 19603 }, { "epoch": 0.9183491825549257, "grad_norm": 0.617872300409092, "learning_rate": 2.9495721009675227e-06, "loss": 0.3258, "step": 19604 }, { "epoch": 0.918396027544854, "grad_norm": 0.5971594850931881, "learning_rate": 2.9493855412176257e-06, "loss": 0.314, "step": 19605 }, { "epoch": 0.9184428725347824, "grad_norm": 0.5584461955139683, "learning_rate": 2.9491989788816287e-06, "loss": 0.3005, "step": 19606 }, { "epoch": 0.9184897175247108, "grad_norm": 0.6097101692482282, "learning_rate": 2.9490124139606045e-06, "loss": 0.3282, "step": 19607 }, { "epoch": 0.9185365625146391, "grad_norm": 0.6184876068866705, "learning_rate": 2.9488258464556258e-06, "loss": 0.3419, "step": 19608 }, { "epoch": 0.9185834075045674, "grad_norm": 0.5849484265539738, "learning_rate": 2.948639276367768e-06, "loss": 0.3169, "step": 19609 }, { "epoch": 0.9186302524944957, "grad_norm": 0.6151607459469496, "learning_rate": 2.9484527036981037e-06, "loss": 0.3245, "step": 19610 }, { "epoch": 0.918677097484424, "grad_norm": 0.5592351554253707, "learning_rate": 2.9482661284477066e-06, "loss": 0.2922, "step": 19611 }, { "epoch": 0.9187239424743524, "grad_norm": 0.5799088823009769, "learning_rate": 2.948079550617651e-06, "loss": 0.2939, "step": 19612 }, { "epoch": 0.9187707874642806, "grad_norm": 0.670013148875168, "learning_rate": 2.94789297020901e-06, "loss": 0.3068, "step": 19613 }, { "epoch": 0.918817632454209, "grad_norm": 0.5607661437396021, "learning_rate": 2.9477063872228575e-06, "loss": 0.2858, "step": 19614 }, { "epoch": 0.9188644774441374, "grad_norm": 0.6178857301314279, "learning_rate": 2.9475198016602667e-06, "loss": 0.3083, "step": 19615 }, { "epoch": 0.9189113224340657, "grad_norm": 0.5608585673833002, "learning_rate": 2.9473332135223125e-06, "loss": 0.3149, "step": 19616 }, { "epoch": 0.918958167423994, "grad_norm": 0.5746321917507367, "learning_rate": 2.947146622810068e-06, "loss": 0.3144, "step": 19617 }, { "epoch": 0.9190050124139223, "grad_norm": 0.5994604887765841, "learning_rate": 2.9469600295246074e-06, "loss": 0.3225, "step": 19618 }, { "epoch": 0.9190518574038506, "grad_norm": 0.534022202988829, "learning_rate": 2.946773433667004e-06, "loss": 0.297, "step": 19619 }, { "epoch": 0.919098702393779, "grad_norm": 0.5799052029475434, "learning_rate": 2.946586835238332e-06, "loss": 0.3151, "step": 19620 }, { "epoch": 0.9191455473837074, "grad_norm": 0.605560423464005, "learning_rate": 2.9464002342396647e-06, "loss": 0.3196, "step": 19621 }, { "epoch": 0.9191923923736356, "grad_norm": 0.6279982653085806, "learning_rate": 2.9462136306720757e-06, "loss": 0.3338, "step": 19622 }, { "epoch": 0.9192392373635639, "grad_norm": 0.6292694157905313, "learning_rate": 2.9460270245366394e-06, "loss": 0.3149, "step": 19623 }, { "epoch": 0.9192860823534923, "grad_norm": 0.5880298494637626, "learning_rate": 2.945840415834431e-06, "loss": 0.303, "step": 19624 }, { "epoch": 0.9193329273434206, "grad_norm": 0.556504976296627, "learning_rate": 2.9456538045665225e-06, "loss": 0.3002, "step": 19625 }, { "epoch": 0.919379772333349, "grad_norm": 0.5959353843562972, "learning_rate": 2.9454671907339877e-06, "loss": 0.3137, "step": 19626 }, { "epoch": 0.9194266173232772, "grad_norm": 0.5378313751315386, "learning_rate": 2.9452805743379014e-06, "loss": 0.3141, "step": 19627 }, { "epoch": 0.9194734623132056, "grad_norm": 0.6065828504553465, "learning_rate": 2.945093955379338e-06, "loss": 0.3551, "step": 19628 }, { "epoch": 0.9195203073031339, "grad_norm": 0.5643259476724395, "learning_rate": 2.94490733385937e-06, "loss": 0.3119, "step": 19629 }, { "epoch": 0.9195671522930623, "grad_norm": 0.5687046556504535, "learning_rate": 2.9447207097790725e-06, "loss": 0.3074, "step": 19630 }, { "epoch": 0.9196139972829905, "grad_norm": 0.5896633971887196, "learning_rate": 2.944534083139519e-06, "loss": 0.3258, "step": 19631 }, { "epoch": 0.9196608422729189, "grad_norm": 0.5920460938457753, "learning_rate": 2.9443474539417837e-06, "loss": 0.3167, "step": 19632 }, { "epoch": 0.9197076872628472, "grad_norm": 0.5922671819202039, "learning_rate": 2.9441608221869407e-06, "loss": 0.3004, "step": 19633 }, { "epoch": 0.9197545322527756, "grad_norm": 0.6637904477052244, "learning_rate": 2.943974187876063e-06, "loss": 0.3452, "step": 19634 }, { "epoch": 0.9198013772427039, "grad_norm": 0.595566110739136, "learning_rate": 2.943787551010226e-06, "loss": 0.3206, "step": 19635 }, { "epoch": 0.9198482222326322, "grad_norm": 0.5939671297804003, "learning_rate": 2.9436009115905036e-06, "loss": 0.3171, "step": 19636 }, { "epoch": 0.9198950672225605, "grad_norm": 0.6503983319346859, "learning_rate": 2.9434142696179686e-06, "loss": 0.3393, "step": 19637 }, { "epoch": 0.9199419122124889, "grad_norm": 0.6219228406955537, "learning_rate": 2.9432276250936964e-06, "loss": 0.3139, "step": 19638 }, { "epoch": 0.9199887572024172, "grad_norm": 0.5837821282265149, "learning_rate": 2.9430409780187607e-06, "loss": 0.3245, "step": 19639 }, { "epoch": 0.9200356021923455, "grad_norm": 0.5812904244949275, "learning_rate": 2.942854328394235e-06, "loss": 0.3014, "step": 19640 }, { "epoch": 0.9200824471822738, "grad_norm": 0.6119715229985395, "learning_rate": 2.942667676221194e-06, "loss": 0.3174, "step": 19641 }, { "epoch": 0.9201292921722022, "grad_norm": 0.6212718059516248, "learning_rate": 2.942481021500713e-06, "loss": 0.3184, "step": 19642 }, { "epoch": 0.9201761371621305, "grad_norm": 0.5528410321239193, "learning_rate": 2.942294364233863e-06, "loss": 0.3158, "step": 19643 }, { "epoch": 0.9202229821520589, "grad_norm": 0.5525277692124708, "learning_rate": 2.942107704421721e-06, "loss": 0.2944, "step": 19644 }, { "epoch": 0.9202698271419871, "grad_norm": 0.5740071153081132, "learning_rate": 2.941921042065361e-06, "loss": 0.3139, "step": 19645 }, { "epoch": 0.9203166721319155, "grad_norm": 0.5982404889112193, "learning_rate": 2.9417343771658556e-06, "loss": 0.3336, "step": 19646 }, { "epoch": 0.9203635171218438, "grad_norm": 0.603542629927689, "learning_rate": 2.94154770972428e-06, "loss": 0.3375, "step": 19647 }, { "epoch": 0.9204103621117722, "grad_norm": 0.5892941909364462, "learning_rate": 2.9413610397417088e-06, "loss": 0.3381, "step": 19648 }, { "epoch": 0.9204572071017004, "grad_norm": 0.6393296250166802, "learning_rate": 2.9411743672192157e-06, "loss": 0.3247, "step": 19649 }, { "epoch": 0.9205040520916288, "grad_norm": 0.576349555635767, "learning_rate": 2.940987692157874e-06, "loss": 0.3055, "step": 19650 }, { "epoch": 0.9205508970815571, "grad_norm": 0.6401009923900529, "learning_rate": 2.9408010145587596e-06, "loss": 0.3291, "step": 19651 }, { "epoch": 0.9205977420714855, "grad_norm": 0.652096790040078, "learning_rate": 2.9406143344229464e-06, "loss": 0.3323, "step": 19652 }, { "epoch": 0.9206445870614138, "grad_norm": 0.6040913795245668, "learning_rate": 2.9404276517515083e-06, "loss": 0.3132, "step": 19653 }, { "epoch": 0.9206914320513421, "grad_norm": 0.5927168607927487, "learning_rate": 2.94024096654552e-06, "loss": 0.3188, "step": 19654 }, { "epoch": 0.9207382770412704, "grad_norm": 0.6128104724077342, "learning_rate": 2.9400542788060547e-06, "loss": 0.3143, "step": 19655 }, { "epoch": 0.9207851220311988, "grad_norm": 0.5801627508759916, "learning_rate": 2.9398675885341887e-06, "loss": 0.3136, "step": 19656 }, { "epoch": 0.9208319670211271, "grad_norm": 0.5704740422950858, "learning_rate": 2.9396808957309948e-06, "loss": 0.3183, "step": 19657 }, { "epoch": 0.9208788120110554, "grad_norm": 0.576165961835408, "learning_rate": 2.9394942003975474e-06, "loss": 0.304, "step": 19658 }, { "epoch": 0.9209256570009837, "grad_norm": 0.5638950663277372, "learning_rate": 2.9393075025349223e-06, "loss": 0.3075, "step": 19659 }, { "epoch": 0.9209725019909121, "grad_norm": 0.5724295637874992, "learning_rate": 2.9391208021441923e-06, "loss": 0.3116, "step": 19660 }, { "epoch": 0.9210193469808404, "grad_norm": 0.6509570265464573, "learning_rate": 2.9389340992264327e-06, "loss": 0.3208, "step": 19661 }, { "epoch": 0.9210661919707688, "grad_norm": 0.5607514326896178, "learning_rate": 2.9387473937827175e-06, "loss": 0.3213, "step": 19662 }, { "epoch": 0.921113036960697, "grad_norm": 0.6292840005266883, "learning_rate": 2.938560685814122e-06, "loss": 0.3306, "step": 19663 }, { "epoch": 0.9211598819506254, "grad_norm": 0.5650185168898929, "learning_rate": 2.938373975321719e-06, "loss": 0.3215, "step": 19664 }, { "epoch": 0.9212067269405537, "grad_norm": 0.640872760983033, "learning_rate": 2.938187262306584e-06, "loss": 0.351, "step": 19665 }, { "epoch": 0.9212535719304821, "grad_norm": 0.5478050877699812, "learning_rate": 2.9380005467697926e-06, "loss": 0.2983, "step": 19666 }, { "epoch": 0.9213004169204103, "grad_norm": 0.6030414900808744, "learning_rate": 2.937813828712417e-06, "loss": 0.3327, "step": 19667 }, { "epoch": 0.9213472619103387, "grad_norm": 0.5645645820317108, "learning_rate": 2.937627108135533e-06, "loss": 0.3084, "step": 19668 }, { "epoch": 0.921394106900267, "grad_norm": 0.5953010648684602, "learning_rate": 2.937440385040215e-06, "loss": 0.3146, "step": 19669 }, { "epoch": 0.9214409518901954, "grad_norm": 0.6624878719481981, "learning_rate": 2.937253659427538e-06, "loss": 0.3033, "step": 19670 }, { "epoch": 0.9214877968801237, "grad_norm": 0.5805439489615122, "learning_rate": 2.9370669312985755e-06, "loss": 0.3225, "step": 19671 }, { "epoch": 0.921534641870052, "grad_norm": 0.573330408219151, "learning_rate": 2.9368802006544028e-06, "loss": 0.319, "step": 19672 }, { "epoch": 0.9215814868599803, "grad_norm": 0.5484448734283204, "learning_rate": 2.9366934674960952e-06, "loss": 0.2982, "step": 19673 }, { "epoch": 0.9216283318499087, "grad_norm": 0.5972679463532682, "learning_rate": 2.936506731824725e-06, "loss": 0.3283, "step": 19674 }, { "epoch": 0.921675176839837, "grad_norm": 0.5749592288745716, "learning_rate": 2.936319993641369e-06, "loss": 0.316, "step": 19675 }, { "epoch": 0.9217220218297653, "grad_norm": 0.5792070886883128, "learning_rate": 2.9361332529471015e-06, "loss": 0.3109, "step": 19676 }, { "epoch": 0.9217688668196936, "grad_norm": 0.5928879695442402, "learning_rate": 2.9359465097429963e-06, "loss": 0.3157, "step": 19677 }, { "epoch": 0.921815711809622, "grad_norm": 0.5949535645446397, "learning_rate": 2.935759764030128e-06, "loss": 0.3092, "step": 19678 }, { "epoch": 0.9218625567995503, "grad_norm": 0.7005501494712774, "learning_rate": 2.935573015809573e-06, "loss": 0.3339, "step": 19679 }, { "epoch": 0.9219094017894787, "grad_norm": 0.6186629423002745, "learning_rate": 2.9353862650824044e-06, "loss": 0.316, "step": 19680 }, { "epoch": 0.9219562467794069, "grad_norm": 0.5945194773273604, "learning_rate": 2.9351995118496968e-06, "loss": 0.3295, "step": 19681 }, { "epoch": 0.9220030917693353, "grad_norm": 0.6316566802415438, "learning_rate": 2.9350127561125253e-06, "loss": 0.3188, "step": 19682 }, { "epoch": 0.9220499367592636, "grad_norm": 0.5735092111094694, "learning_rate": 2.934825997871965e-06, "loss": 0.3136, "step": 19683 }, { "epoch": 0.922096781749192, "grad_norm": 0.5422849646351868, "learning_rate": 2.9346392371290905e-06, "loss": 0.2813, "step": 19684 }, { "epoch": 0.9221436267391202, "grad_norm": 0.5737648144734074, "learning_rate": 2.9344524738849765e-06, "loss": 0.3089, "step": 19685 }, { "epoch": 0.9221904717290486, "grad_norm": 0.5789774688703815, "learning_rate": 2.9342657081406974e-06, "loss": 0.3316, "step": 19686 }, { "epoch": 0.9222373167189769, "grad_norm": 0.6139678934050242, "learning_rate": 2.9340789398973284e-06, "loss": 0.3018, "step": 19687 }, { "epoch": 0.9222841617089053, "grad_norm": 0.5775667385194352, "learning_rate": 2.9338921691559445e-06, "loss": 0.3102, "step": 19688 }, { "epoch": 0.9223310066988336, "grad_norm": 0.5953380438503754, "learning_rate": 2.9337053959176198e-06, "loss": 0.2945, "step": 19689 }, { "epoch": 0.9223778516887619, "grad_norm": 0.6469714510286666, "learning_rate": 2.93351862018343e-06, "loss": 0.3307, "step": 19690 }, { "epoch": 0.9224246966786902, "grad_norm": 0.609264076651461, "learning_rate": 2.933331841954449e-06, "loss": 0.3405, "step": 19691 }, { "epoch": 0.9224715416686186, "grad_norm": 0.6420683613254443, "learning_rate": 2.9331450612317527e-06, "loss": 0.3397, "step": 19692 }, { "epoch": 0.9225183866585469, "grad_norm": 0.5885526883463916, "learning_rate": 2.9329582780164144e-06, "loss": 0.3044, "step": 19693 }, { "epoch": 0.9225652316484751, "grad_norm": 0.6047098410665538, "learning_rate": 2.9327714923095114e-06, "loss": 0.3222, "step": 19694 }, { "epoch": 0.9226120766384035, "grad_norm": 0.5551939341566405, "learning_rate": 2.9325847041121163e-06, "loss": 0.3106, "step": 19695 }, { "epoch": 0.9226589216283319, "grad_norm": 0.5209354844364649, "learning_rate": 2.932397913425305e-06, "loss": 0.288, "step": 19696 }, { "epoch": 0.9227057666182602, "grad_norm": 0.5944098805465903, "learning_rate": 2.932211120250153e-06, "loss": 0.3317, "step": 19697 }, { "epoch": 0.9227526116081886, "grad_norm": 0.5750080144327687, "learning_rate": 2.9320243245877343e-06, "loss": 0.3135, "step": 19698 }, { "epoch": 0.9227994565981168, "grad_norm": 0.571030152347696, "learning_rate": 2.9318375264391243e-06, "loss": 0.3171, "step": 19699 }, { "epoch": 0.9228463015880451, "grad_norm": 0.6325748701894806, "learning_rate": 2.931650725805397e-06, "loss": 0.3118, "step": 19700 }, { "epoch": 0.9228931465779735, "grad_norm": 0.57728526326557, "learning_rate": 2.9314639226876294e-06, "loss": 0.2956, "step": 19701 }, { "epoch": 0.9229399915679019, "grad_norm": 0.5692099913346668, "learning_rate": 2.9312771170868947e-06, "loss": 0.303, "step": 19702 }, { "epoch": 0.9229868365578301, "grad_norm": 0.6083413231193773, "learning_rate": 2.931090309004269e-06, "loss": 0.2981, "step": 19703 }, { "epoch": 0.9230336815477584, "grad_norm": 0.538886141266904, "learning_rate": 2.930903498440827e-06, "loss": 0.3083, "step": 19704 }, { "epoch": 0.9230805265376868, "grad_norm": 0.5870723682451444, "learning_rate": 2.930716685397643e-06, "loss": 0.2873, "step": 19705 }, { "epoch": 0.9231273715276151, "grad_norm": 0.6064191710308252, "learning_rate": 2.930529869875794e-06, "loss": 0.3181, "step": 19706 }, { "epoch": 0.9231742165175435, "grad_norm": 0.552077224477648, "learning_rate": 2.9303430518763527e-06, "loss": 0.2903, "step": 19707 }, { "epoch": 0.9232210615074717, "grad_norm": 0.565981277688748, "learning_rate": 2.9301562314003955e-06, "loss": 0.2872, "step": 19708 }, { "epoch": 0.9232679064974001, "grad_norm": 0.6071290903443235, "learning_rate": 2.9299694084489977e-06, "loss": 0.3186, "step": 19709 }, { "epoch": 0.9233147514873284, "grad_norm": 0.5919098837619056, "learning_rate": 2.9297825830232336e-06, "loss": 0.3223, "step": 19710 }, { "epoch": 0.9233615964772568, "grad_norm": 0.5742704722464951, "learning_rate": 2.9295957551241787e-06, "loss": 0.3154, "step": 19711 }, { "epoch": 0.923408441467185, "grad_norm": 0.5926545327853865, "learning_rate": 2.929408924752909e-06, "loss": 0.3281, "step": 19712 }, { "epoch": 0.9234552864571134, "grad_norm": 0.5683101365206694, "learning_rate": 2.9292220919104973e-06, "loss": 0.3007, "step": 19713 }, { "epoch": 0.9235021314470417, "grad_norm": 0.6004091781358158, "learning_rate": 2.929035256598021e-06, "loss": 0.2973, "step": 19714 }, { "epoch": 0.9235489764369701, "grad_norm": 0.5885385760054314, "learning_rate": 2.928848418816556e-06, "loss": 0.3334, "step": 19715 }, { "epoch": 0.9235958214268984, "grad_norm": 0.5931263533326899, "learning_rate": 2.9286615785671747e-06, "loss": 0.3253, "step": 19716 }, { "epoch": 0.9236426664168267, "grad_norm": 0.5773848333664829, "learning_rate": 2.9284747358509534e-06, "loss": 0.312, "step": 19717 }, { "epoch": 0.923689511406755, "grad_norm": 0.5846850795085055, "learning_rate": 2.9282878906689687e-06, "loss": 0.3323, "step": 19718 }, { "epoch": 0.9237363563966834, "grad_norm": 0.5814832637535696, "learning_rate": 2.9281010430222952e-06, "loss": 0.2958, "step": 19719 }, { "epoch": 0.9237832013866117, "grad_norm": 0.5823024033671779, "learning_rate": 2.9279141929120065e-06, "loss": 0.3168, "step": 19720 }, { "epoch": 0.92383004637654, "grad_norm": 0.577890008956307, "learning_rate": 2.9277273403391804e-06, "loss": 0.3074, "step": 19721 }, { "epoch": 0.9238768913664683, "grad_norm": 0.593649092660891, "learning_rate": 2.9275404853048905e-06, "loss": 0.3231, "step": 19722 }, { "epoch": 0.9239237363563967, "grad_norm": 0.5684845563816657, "learning_rate": 2.927353627810212e-06, "loss": 0.3286, "step": 19723 }, { "epoch": 0.923970581346325, "grad_norm": 0.5520292323524472, "learning_rate": 2.927166767856221e-06, "loss": 0.2957, "step": 19724 }, { "epoch": 0.9240174263362534, "grad_norm": 0.5779610224197882, "learning_rate": 2.926979905443993e-06, "loss": 0.303, "step": 19725 }, { "epoch": 0.9240642713261816, "grad_norm": 0.6299073627082238, "learning_rate": 2.9267930405746024e-06, "loss": 0.3054, "step": 19726 }, { "epoch": 0.92411111631611, "grad_norm": 0.6356759908285078, "learning_rate": 2.9266061732491263e-06, "loss": 0.3271, "step": 19727 }, { "epoch": 0.9241579613060383, "grad_norm": 0.6240216541998137, "learning_rate": 2.926419303468638e-06, "loss": 0.3196, "step": 19728 }, { "epoch": 0.9242048062959667, "grad_norm": 0.6006388650775362, "learning_rate": 2.926232431234214e-06, "loss": 0.3153, "step": 19729 }, { "epoch": 0.9242516512858949, "grad_norm": 0.5617306457828174, "learning_rate": 2.9260455565469293e-06, "loss": 0.3092, "step": 19730 }, { "epoch": 0.9242984962758233, "grad_norm": 0.6470271532283726, "learning_rate": 2.925858679407859e-06, "loss": 0.3287, "step": 19731 }, { "epoch": 0.9243453412657516, "grad_norm": 0.5514826607192653, "learning_rate": 2.92567179981808e-06, "loss": 0.297, "step": 19732 }, { "epoch": 0.92439218625568, "grad_norm": 0.6453881592479223, "learning_rate": 2.9254849177786664e-06, "loss": 0.3168, "step": 19733 }, { "epoch": 0.9244390312456083, "grad_norm": 0.6745588963040716, "learning_rate": 2.925298033290694e-06, "loss": 0.3561, "step": 19734 }, { "epoch": 0.9244858762355366, "grad_norm": 0.5809897061788022, "learning_rate": 2.9251111463552377e-06, "loss": 0.3293, "step": 19735 }, { "epoch": 0.9245327212254649, "grad_norm": 0.6073777912907592, "learning_rate": 2.9249242569733756e-06, "loss": 0.3332, "step": 19736 }, { "epoch": 0.9245795662153933, "grad_norm": 0.6037774099736195, "learning_rate": 2.9247373651461793e-06, "loss": 0.3151, "step": 19737 }, { "epoch": 0.9246264112053216, "grad_norm": 0.5542216138722973, "learning_rate": 2.9245504708747263e-06, "loss": 0.3057, "step": 19738 }, { "epoch": 0.9246732561952499, "grad_norm": 0.5527752861097343, "learning_rate": 2.9243635741600927e-06, "loss": 0.2869, "step": 19739 }, { "epoch": 0.9247201011851782, "grad_norm": 0.5790365460134087, "learning_rate": 2.9241766750033535e-06, "loss": 0.3208, "step": 19740 }, { "epoch": 0.9247669461751066, "grad_norm": 0.5666022070597718, "learning_rate": 2.9239897734055835e-06, "loss": 0.2931, "step": 19741 }, { "epoch": 0.9248137911650349, "grad_norm": 0.5276732678497266, "learning_rate": 2.9238028693678588e-06, "loss": 0.2877, "step": 19742 }, { "epoch": 0.9248606361549633, "grad_norm": 0.60686286495472, "learning_rate": 2.9236159628912565e-06, "loss": 0.3183, "step": 19743 }, { "epoch": 0.9249074811448915, "grad_norm": 0.6190489209320241, "learning_rate": 2.9234290539768497e-06, "loss": 0.3097, "step": 19744 }, { "epoch": 0.9249543261348199, "grad_norm": 0.5986120827906427, "learning_rate": 2.9232421426257147e-06, "loss": 0.3115, "step": 19745 }, { "epoch": 0.9250011711247482, "grad_norm": 0.6020662208752886, "learning_rate": 2.9230552288389283e-06, "loss": 0.3289, "step": 19746 }, { "epoch": 0.9250480161146766, "grad_norm": 0.597416044804714, "learning_rate": 2.9228683126175656e-06, "loss": 0.3155, "step": 19747 }, { "epoch": 0.9250948611046048, "grad_norm": 0.6003997399932766, "learning_rate": 2.9226813939627014e-06, "loss": 0.3137, "step": 19748 }, { "epoch": 0.9251417060945332, "grad_norm": 0.5718996314794286, "learning_rate": 2.922494472875412e-06, "loss": 0.3118, "step": 19749 }, { "epoch": 0.9251885510844615, "grad_norm": 0.5594451700948572, "learning_rate": 2.9223075493567742e-06, "loss": 0.3036, "step": 19750 }, { "epoch": 0.9252353960743899, "grad_norm": 0.630814123759818, "learning_rate": 2.9221206234078615e-06, "loss": 0.3284, "step": 19751 }, { "epoch": 0.9252822410643182, "grad_norm": 0.6352214005610559, "learning_rate": 2.921933695029751e-06, "loss": 0.3328, "step": 19752 }, { "epoch": 0.9253290860542465, "grad_norm": 0.6235451947930978, "learning_rate": 2.921746764223518e-06, "loss": 0.3262, "step": 19753 }, { "epoch": 0.9253759310441748, "grad_norm": 0.5520337226913509, "learning_rate": 2.9215598309902386e-06, "loss": 0.2952, "step": 19754 }, { "epoch": 0.9254227760341032, "grad_norm": 0.6265526160451729, "learning_rate": 2.9213728953309884e-06, "loss": 0.3319, "step": 19755 }, { "epoch": 0.9254696210240315, "grad_norm": 0.5696741459195289, "learning_rate": 2.9211859572468426e-06, "loss": 0.3076, "step": 19756 }, { "epoch": 0.9255164660139598, "grad_norm": 0.6444622703163747, "learning_rate": 2.920999016738878e-06, "loss": 0.3077, "step": 19757 }, { "epoch": 0.9255633110038881, "grad_norm": 0.6004172220721123, "learning_rate": 2.9208120738081704e-06, "loss": 0.3063, "step": 19758 }, { "epoch": 0.9256101559938165, "grad_norm": 0.6387293467659046, "learning_rate": 2.9206251284557936e-06, "loss": 0.3154, "step": 19759 }, { "epoch": 0.9256570009837448, "grad_norm": 0.5713586228279911, "learning_rate": 2.920438180682826e-06, "loss": 0.2998, "step": 19760 }, { "epoch": 0.9257038459736732, "grad_norm": 0.5414642100374095, "learning_rate": 2.920251230490342e-06, "loss": 0.2935, "step": 19761 }, { "epoch": 0.9257506909636014, "grad_norm": 0.5811287171357483, "learning_rate": 2.9200642778794177e-06, "loss": 0.3336, "step": 19762 }, { "epoch": 0.9257975359535298, "grad_norm": 0.5431569038641674, "learning_rate": 2.919877322851129e-06, "loss": 0.2984, "step": 19763 }, { "epoch": 0.9258443809434581, "grad_norm": 0.5850233963701182, "learning_rate": 2.9196903654065524e-06, "loss": 0.3234, "step": 19764 }, { "epoch": 0.9258912259333865, "grad_norm": 0.6046223255368223, "learning_rate": 2.9195034055467624e-06, "loss": 0.3231, "step": 19765 }, { "epoch": 0.9259380709233147, "grad_norm": 0.6439088622914563, "learning_rate": 2.919316443272836e-06, "loss": 0.3375, "step": 19766 }, { "epoch": 0.9259849159132431, "grad_norm": 0.5591298201471466, "learning_rate": 2.919129478585849e-06, "loss": 0.3011, "step": 19767 }, { "epoch": 0.9260317609031714, "grad_norm": 0.6819888953255885, "learning_rate": 2.918942511486878e-06, "loss": 0.3541, "step": 19768 }, { "epoch": 0.9260786058930998, "grad_norm": 0.5772487417990071, "learning_rate": 2.918755541976997e-06, "loss": 0.3164, "step": 19769 }, { "epoch": 0.9261254508830281, "grad_norm": 0.5599424800609109, "learning_rate": 2.9185685700572836e-06, "loss": 0.3176, "step": 19770 }, { "epoch": 0.9261722958729564, "grad_norm": 0.6993628194027989, "learning_rate": 2.918381595728813e-06, "loss": 0.3206, "step": 19771 }, { "epoch": 0.9262191408628847, "grad_norm": 0.5912392868974545, "learning_rate": 2.918194618992662e-06, "loss": 0.3174, "step": 19772 }, { "epoch": 0.9262659858528131, "grad_norm": 0.6018950935617816, "learning_rate": 2.918007639849906e-06, "loss": 0.3182, "step": 19773 }, { "epoch": 0.9263128308427414, "grad_norm": 0.564139297291616, "learning_rate": 2.91782065830162e-06, "loss": 0.3166, "step": 19774 }, { "epoch": 0.9263596758326696, "grad_norm": 0.6426878212563479, "learning_rate": 2.917633674348882e-06, "loss": 0.3221, "step": 19775 }, { "epoch": 0.926406520822598, "grad_norm": 0.5294590989401152, "learning_rate": 2.9174466879927678e-06, "loss": 0.2865, "step": 19776 }, { "epoch": 0.9264533658125264, "grad_norm": 0.6261455095292597, "learning_rate": 2.9172596992343516e-06, "loss": 0.3518, "step": 19777 }, { "epoch": 0.9265002108024547, "grad_norm": 0.5560432575771487, "learning_rate": 2.9170727080747114e-06, "loss": 0.299, "step": 19778 }, { "epoch": 0.9265470557923831, "grad_norm": 0.6140131760540757, "learning_rate": 2.916885714514923e-06, "loss": 0.3346, "step": 19779 }, { "epoch": 0.9265939007823113, "grad_norm": 0.6025955236016985, "learning_rate": 2.916698718556061e-06, "loss": 0.3095, "step": 19780 }, { "epoch": 0.9266407457722396, "grad_norm": 0.5885429548712098, "learning_rate": 2.916511720199203e-06, "loss": 0.3147, "step": 19781 }, { "epoch": 0.926687590762168, "grad_norm": 0.5845205218270955, "learning_rate": 2.9163247194454253e-06, "loss": 0.3147, "step": 19782 }, { "epoch": 0.9267344357520964, "grad_norm": 0.5471133103859878, "learning_rate": 2.9161377162958025e-06, "loss": 0.3118, "step": 19783 }, { "epoch": 0.9267812807420246, "grad_norm": 0.5703104872908096, "learning_rate": 2.9159507107514125e-06, "loss": 0.3024, "step": 19784 }, { "epoch": 0.926828125731953, "grad_norm": 0.5627402240813911, "learning_rate": 2.915763702813331e-06, "loss": 0.3308, "step": 19785 }, { "epoch": 0.9268749707218813, "grad_norm": 0.618983945009236, "learning_rate": 2.915576692482633e-06, "loss": 0.3052, "step": 19786 }, { "epoch": 0.9269218157118096, "grad_norm": 0.6079518238145868, "learning_rate": 2.915389679760396e-06, "loss": 0.3174, "step": 19787 }, { "epoch": 0.926968660701738, "grad_norm": 0.5598891373450938, "learning_rate": 2.9152026646476956e-06, "loss": 0.3237, "step": 19788 }, { "epoch": 0.9270155056916662, "grad_norm": 0.6480051265642073, "learning_rate": 2.9150156471456085e-06, "loss": 0.3344, "step": 19789 }, { "epoch": 0.9270623506815946, "grad_norm": 0.5965563356871673, "learning_rate": 2.914828627255211e-06, "loss": 0.3206, "step": 19790 }, { "epoch": 0.927109195671523, "grad_norm": 0.5307634388894855, "learning_rate": 2.9146416049775782e-06, "loss": 0.2988, "step": 19791 }, { "epoch": 0.9271560406614513, "grad_norm": 0.5729990449626617, "learning_rate": 2.9144545803137882e-06, "loss": 0.2979, "step": 19792 }, { "epoch": 0.9272028856513795, "grad_norm": 0.6335404475750946, "learning_rate": 2.914267553264915e-06, "loss": 0.3302, "step": 19793 }, { "epoch": 0.9272497306413079, "grad_norm": 0.5947607218621422, "learning_rate": 2.914080523832037e-06, "loss": 0.3172, "step": 19794 }, { "epoch": 0.9272965756312362, "grad_norm": 0.635996579371631, "learning_rate": 2.9138934920162286e-06, "loss": 0.3282, "step": 19795 }, { "epoch": 0.9273434206211646, "grad_norm": 0.5624535527866817, "learning_rate": 2.9137064578185686e-06, "loss": 0.3293, "step": 19796 }, { "epoch": 0.927390265611093, "grad_norm": 0.6192969079090246, "learning_rate": 2.9135194212401315e-06, "loss": 0.3329, "step": 19797 }, { "epoch": 0.9274371106010212, "grad_norm": 0.5617596374446694, "learning_rate": 2.913332382281994e-06, "loss": 0.3184, "step": 19798 }, { "epoch": 0.9274839555909495, "grad_norm": 0.5855518001929975, "learning_rate": 2.913145340945232e-06, "loss": 0.3274, "step": 19799 }, { "epoch": 0.9275308005808779, "grad_norm": 0.6038780392824875, "learning_rate": 2.9129582972309233e-06, "loss": 0.3351, "step": 19800 }, { "epoch": 0.9275776455708062, "grad_norm": 0.5826514543975666, "learning_rate": 2.9127712511401423e-06, "loss": 0.3196, "step": 19801 }, { "epoch": 0.9276244905607345, "grad_norm": 0.5666317911285174, "learning_rate": 2.9125842026739675e-06, "loss": 0.3175, "step": 19802 }, { "epoch": 0.9276713355506628, "grad_norm": 0.5606793732044447, "learning_rate": 2.9123971518334743e-06, "loss": 0.2897, "step": 19803 }, { "epoch": 0.9277181805405912, "grad_norm": 0.5627239829642852, "learning_rate": 2.9122100986197383e-06, "loss": 0.3159, "step": 19804 }, { "epoch": 0.9277650255305195, "grad_norm": 0.5698432498212793, "learning_rate": 2.912023043033837e-06, "loss": 0.3164, "step": 19805 }, { "epoch": 0.9278118705204479, "grad_norm": 0.5391105470607306, "learning_rate": 2.911835985076847e-06, "loss": 0.3141, "step": 19806 }, { "epoch": 0.9278587155103761, "grad_norm": 0.5793246419669659, "learning_rate": 2.9116489247498446e-06, "loss": 0.3078, "step": 19807 }, { "epoch": 0.9279055605003045, "grad_norm": 0.6137893124293905, "learning_rate": 2.9114618620539055e-06, "loss": 0.3397, "step": 19808 }, { "epoch": 0.9279524054902328, "grad_norm": 0.6143898773862956, "learning_rate": 2.9112747969901074e-06, "loss": 0.3137, "step": 19809 }, { "epoch": 0.9279992504801612, "grad_norm": 0.5873713321274572, "learning_rate": 2.9110877295595256e-06, "loss": 0.337, "step": 19810 }, { "epoch": 0.9280460954700894, "grad_norm": 0.616056669931253, "learning_rate": 2.9109006597632376e-06, "loss": 0.3196, "step": 19811 }, { "epoch": 0.9280929404600178, "grad_norm": 0.5733314232008462, "learning_rate": 2.910713587602319e-06, "loss": 0.2932, "step": 19812 }, { "epoch": 0.9281397854499461, "grad_norm": 0.6130336194339238, "learning_rate": 2.910526513077848e-06, "loss": 0.3145, "step": 19813 }, { "epoch": 0.9281866304398745, "grad_norm": 0.596334431634364, "learning_rate": 2.9103394361909e-06, "loss": 0.3093, "step": 19814 }, { "epoch": 0.9282334754298028, "grad_norm": 0.5340067352966514, "learning_rate": 2.9101523569425504e-06, "loss": 0.2922, "step": 19815 }, { "epoch": 0.9282803204197311, "grad_norm": 0.612033741971492, "learning_rate": 2.909965275333878e-06, "loss": 0.343, "step": 19816 }, { "epoch": 0.9283271654096594, "grad_norm": 0.586247528349062, "learning_rate": 2.909778191365959e-06, "loss": 0.3218, "step": 19817 }, { "epoch": 0.9283740103995878, "grad_norm": 0.5652681715308748, "learning_rate": 2.9095911050398684e-06, "loss": 0.3053, "step": 19818 }, { "epoch": 0.9284208553895161, "grad_norm": 0.6230020101135688, "learning_rate": 2.909404016356685e-06, "loss": 0.318, "step": 19819 }, { "epoch": 0.9284677003794444, "grad_norm": 0.512362297170457, "learning_rate": 2.909216925317484e-06, "loss": 0.2911, "step": 19820 }, { "epoch": 0.9285145453693727, "grad_norm": 0.6042496034140813, "learning_rate": 2.9090298319233416e-06, "loss": 0.3206, "step": 19821 }, { "epoch": 0.9285613903593011, "grad_norm": 0.6260743376651376, "learning_rate": 2.9088427361753363e-06, "loss": 0.3, "step": 19822 }, { "epoch": 0.9286082353492294, "grad_norm": 0.5891191477954086, "learning_rate": 2.9086556380745436e-06, "loss": 0.317, "step": 19823 }, { "epoch": 0.9286550803391578, "grad_norm": 0.5947170370183491, "learning_rate": 2.90846853762204e-06, "loss": 0.3106, "step": 19824 }, { "epoch": 0.928701925329086, "grad_norm": 0.5782495240117063, "learning_rate": 2.9082814348189038e-06, "loss": 0.3026, "step": 19825 }, { "epoch": 0.9287487703190144, "grad_norm": 0.6182935740848077, "learning_rate": 2.908094329666209e-06, "loss": 0.3166, "step": 19826 }, { "epoch": 0.9287956153089427, "grad_norm": 0.6132197012185006, "learning_rate": 2.907907222165035e-06, "loss": 0.3102, "step": 19827 }, { "epoch": 0.9288424602988711, "grad_norm": 0.5512389160778097, "learning_rate": 2.9077201123164573e-06, "loss": 0.3043, "step": 19828 }, { "epoch": 0.9288893052887993, "grad_norm": 0.5697880232859548, "learning_rate": 2.9075330001215526e-06, "loss": 0.3015, "step": 19829 }, { "epoch": 0.9289361502787277, "grad_norm": 0.6674865245279119, "learning_rate": 2.9073458855813975e-06, "loss": 0.3291, "step": 19830 }, { "epoch": 0.928982995268656, "grad_norm": 0.5623276474450962, "learning_rate": 2.90715876869707e-06, "loss": 0.3174, "step": 19831 }, { "epoch": 0.9290298402585844, "grad_norm": 0.5465929233032816, "learning_rate": 2.9069716494696453e-06, "loss": 0.2994, "step": 19832 }, { "epoch": 0.9290766852485127, "grad_norm": 0.5667281025751321, "learning_rate": 2.9067845279002018e-06, "loss": 0.3177, "step": 19833 }, { "epoch": 0.929123530238441, "grad_norm": 0.5463187106339935, "learning_rate": 2.9065974039898154e-06, "loss": 0.3029, "step": 19834 }, { "epoch": 0.9291703752283693, "grad_norm": 0.5321529601619355, "learning_rate": 2.9064102777395632e-06, "loss": 0.2991, "step": 19835 }, { "epoch": 0.9292172202182977, "grad_norm": 0.5659260403544542, "learning_rate": 2.906223149150521e-06, "loss": 0.301, "step": 19836 }, { "epoch": 0.929264065208226, "grad_norm": 0.5869863203622333, "learning_rate": 2.906036018223768e-06, "loss": 0.3217, "step": 19837 }, { "epoch": 0.9293109101981543, "grad_norm": 0.5958605791338283, "learning_rate": 2.9058488849603796e-06, "loss": 0.3227, "step": 19838 }, { "epoch": 0.9293577551880826, "grad_norm": 0.56823596300694, "learning_rate": 2.905661749361432e-06, "loss": 0.3019, "step": 19839 }, { "epoch": 0.929404600178011, "grad_norm": 0.5941590016715295, "learning_rate": 2.9054746114280035e-06, "loss": 0.3084, "step": 19840 }, { "epoch": 0.9294514451679393, "grad_norm": 0.5944409845183316, "learning_rate": 2.9052874711611712e-06, "loss": 0.3208, "step": 19841 }, { "epoch": 0.9294982901578677, "grad_norm": 0.5820902753194411, "learning_rate": 2.9051003285620104e-06, "loss": 0.3198, "step": 19842 }, { "epoch": 0.9295451351477959, "grad_norm": 0.6542730647652294, "learning_rate": 2.904913183631599e-06, "loss": 0.3328, "step": 19843 }, { "epoch": 0.9295919801377243, "grad_norm": 0.6597324116703978, "learning_rate": 2.9047260363710143e-06, "loss": 0.3424, "step": 19844 }, { "epoch": 0.9296388251276526, "grad_norm": 0.6215676409308294, "learning_rate": 2.9045388867813334e-06, "loss": 0.3305, "step": 19845 }, { "epoch": 0.929685670117581, "grad_norm": 0.5869739678627146, "learning_rate": 2.904351734863633e-06, "loss": 0.3228, "step": 19846 }, { "epoch": 0.9297325151075092, "grad_norm": 0.5937965181563154, "learning_rate": 2.9041645806189887e-06, "loss": 0.3103, "step": 19847 }, { "epoch": 0.9297793600974376, "grad_norm": 0.6193969106966235, "learning_rate": 2.9039774240484803e-06, "loss": 0.3036, "step": 19848 }, { "epoch": 0.9298262050873659, "grad_norm": 0.5837304508028432, "learning_rate": 2.9037902651531823e-06, "loss": 0.313, "step": 19849 }, { "epoch": 0.9298730500772943, "grad_norm": 0.5982455608324562, "learning_rate": 2.9036031039341735e-06, "loss": 0.3228, "step": 19850 }, { "epoch": 0.9299198950672226, "grad_norm": 0.5700033926197609, "learning_rate": 2.90341594039253e-06, "loss": 0.313, "step": 19851 }, { "epoch": 0.9299667400571509, "grad_norm": 0.6134005504016268, "learning_rate": 2.903228774529329e-06, "loss": 0.3326, "step": 19852 }, { "epoch": 0.9300135850470792, "grad_norm": 0.5810220340238569, "learning_rate": 2.903041606345648e-06, "loss": 0.3012, "step": 19853 }, { "epoch": 0.9300604300370076, "grad_norm": 0.5668760285828445, "learning_rate": 2.902854435842563e-06, "loss": 0.3012, "step": 19854 }, { "epoch": 0.9301072750269359, "grad_norm": 0.5942337059687177, "learning_rate": 2.902667263021154e-06, "loss": 0.3017, "step": 19855 }, { "epoch": 0.9301541200168641, "grad_norm": 0.5950804701466926, "learning_rate": 2.9024800878824944e-06, "loss": 0.3211, "step": 19856 }, { "epoch": 0.9302009650067925, "grad_norm": 0.5683496864554974, "learning_rate": 2.902292910427663e-06, "loss": 0.3036, "step": 19857 }, { "epoch": 0.9302478099967209, "grad_norm": 0.5228237917421219, "learning_rate": 2.9021057306577376e-06, "loss": 0.3032, "step": 19858 }, { "epoch": 0.9302946549866492, "grad_norm": 0.5404317327886298, "learning_rate": 2.901918548573795e-06, "loss": 0.2983, "step": 19859 }, { "epoch": 0.9303414999765776, "grad_norm": 0.6381527553138554, "learning_rate": 2.9017313641769117e-06, "loss": 0.3175, "step": 19860 }, { "epoch": 0.9303883449665058, "grad_norm": 0.6000306669039489, "learning_rate": 2.901544177468166e-06, "loss": 0.3148, "step": 19861 }, { "epoch": 0.9304351899564341, "grad_norm": 0.5563025434353966, "learning_rate": 2.9013569884486337e-06, "loss": 0.2933, "step": 19862 }, { "epoch": 0.9304820349463625, "grad_norm": 0.6325057348953398, "learning_rate": 2.901169797119393e-06, "loss": 0.3228, "step": 19863 }, { "epoch": 0.9305288799362909, "grad_norm": 0.606032754818899, "learning_rate": 2.9009826034815212e-06, "loss": 0.32, "step": 19864 }, { "epoch": 0.9305757249262191, "grad_norm": 0.5678620413639413, "learning_rate": 2.900795407536095e-06, "loss": 0.333, "step": 19865 }, { "epoch": 0.9306225699161474, "grad_norm": 0.5503218169802364, "learning_rate": 2.9006082092841925e-06, "loss": 0.2771, "step": 19866 }, { "epoch": 0.9306694149060758, "grad_norm": 0.6160848725417646, "learning_rate": 2.9004210087268904e-06, "loss": 0.3204, "step": 19867 }, { "epoch": 0.9307162598960041, "grad_norm": 0.5694626305601512, "learning_rate": 2.9002338058652652e-06, "loss": 0.3034, "step": 19868 }, { "epoch": 0.9307631048859325, "grad_norm": 0.5747962415358105, "learning_rate": 2.9000466007003957e-06, "loss": 0.3076, "step": 19869 }, { "epoch": 0.9308099498758607, "grad_norm": 0.6002636422945627, "learning_rate": 2.8998593932333584e-06, "loss": 0.3263, "step": 19870 }, { "epoch": 0.9308567948657891, "grad_norm": 0.6285747660795363, "learning_rate": 2.8996721834652307e-06, "loss": 0.3217, "step": 19871 }, { "epoch": 0.9309036398557174, "grad_norm": 0.6005319661418109, "learning_rate": 2.89948497139709e-06, "loss": 0.3161, "step": 19872 }, { "epoch": 0.9309504848456458, "grad_norm": 0.5425504113754619, "learning_rate": 2.899297757030014e-06, "loss": 0.3081, "step": 19873 }, { "epoch": 0.930997329835574, "grad_norm": 0.6366890721198938, "learning_rate": 2.8991105403650797e-06, "loss": 0.3255, "step": 19874 }, { "epoch": 0.9310441748255024, "grad_norm": 0.602810944549172, "learning_rate": 2.898923321403364e-06, "loss": 0.32, "step": 19875 }, { "epoch": 0.9310910198154307, "grad_norm": 0.6192433451675999, "learning_rate": 2.898736100145946e-06, "loss": 0.3265, "step": 19876 }, { "epoch": 0.9311378648053591, "grad_norm": 0.56787929724671, "learning_rate": 2.898548876593901e-06, "loss": 0.3174, "step": 19877 }, { "epoch": 0.9311847097952874, "grad_norm": 0.6249963576245248, "learning_rate": 2.8983616507483075e-06, "loss": 0.3242, "step": 19878 }, { "epoch": 0.9312315547852157, "grad_norm": 0.591550600454406, "learning_rate": 2.898174422610243e-06, "loss": 0.3314, "step": 19879 }, { "epoch": 0.931278399775144, "grad_norm": 0.5667944660511891, "learning_rate": 2.897987192180785e-06, "loss": 0.3078, "step": 19880 }, { "epoch": 0.9313252447650724, "grad_norm": 0.5788103558103265, "learning_rate": 2.897799959461011e-06, "loss": 0.3326, "step": 19881 }, { "epoch": 0.9313720897550007, "grad_norm": 0.5506866313323404, "learning_rate": 2.897612724451997e-06, "loss": 0.2877, "step": 19882 }, { "epoch": 0.931418934744929, "grad_norm": 0.6339631832786732, "learning_rate": 2.897425487154823e-06, "loss": 0.3301, "step": 19883 }, { "epoch": 0.9314657797348573, "grad_norm": 0.5638447097916005, "learning_rate": 2.8972382475705647e-06, "loss": 0.3274, "step": 19884 }, { "epoch": 0.9315126247247857, "grad_norm": 0.5852417009961837, "learning_rate": 2.897051005700299e-06, "loss": 0.3354, "step": 19885 }, { "epoch": 0.931559469714714, "grad_norm": 0.5725193887693767, "learning_rate": 2.8968637615451063e-06, "loss": 0.302, "step": 19886 }, { "epoch": 0.9316063147046424, "grad_norm": 0.5809178844187082, "learning_rate": 2.8966765151060614e-06, "loss": 0.3128, "step": 19887 }, { "epoch": 0.9316531596945706, "grad_norm": 0.6562860551070365, "learning_rate": 2.8964892663842437e-06, "loss": 0.3368, "step": 19888 }, { "epoch": 0.931700004684499, "grad_norm": 0.5988418024012108, "learning_rate": 2.8963020153807294e-06, "loss": 0.3237, "step": 19889 }, { "epoch": 0.9317468496744273, "grad_norm": 0.5893844021910138, "learning_rate": 2.896114762096597e-06, "loss": 0.3038, "step": 19890 }, { "epoch": 0.9317936946643557, "grad_norm": 0.5804925816717089, "learning_rate": 2.8959275065329228e-06, "loss": 0.3311, "step": 19891 }, { "epoch": 0.9318405396542839, "grad_norm": 0.5512707130605862, "learning_rate": 2.895740248690786e-06, "loss": 0.3057, "step": 19892 }, { "epoch": 0.9318873846442123, "grad_norm": 0.5617199198535837, "learning_rate": 2.895552988571263e-06, "loss": 0.3174, "step": 19893 }, { "epoch": 0.9319342296341406, "grad_norm": 0.5588161404857401, "learning_rate": 2.8953657261754326e-06, "loss": 0.31, "step": 19894 }, { "epoch": 0.931981074624069, "grad_norm": 0.6547628371577363, "learning_rate": 2.8951784615043716e-06, "loss": 0.3176, "step": 19895 }, { "epoch": 0.9320279196139973, "grad_norm": 0.6102547302532285, "learning_rate": 2.8949911945591576e-06, "loss": 0.3341, "step": 19896 }, { "epoch": 0.9320747646039256, "grad_norm": 0.5973425008985216, "learning_rate": 2.894803925340869e-06, "loss": 0.3175, "step": 19897 }, { "epoch": 0.9321216095938539, "grad_norm": 0.5566376425077697, "learning_rate": 2.8946166538505832e-06, "loss": 0.3039, "step": 19898 }, { "epoch": 0.9321684545837823, "grad_norm": 0.5862301653029722, "learning_rate": 2.894429380089377e-06, "loss": 0.3246, "step": 19899 }, { "epoch": 0.9322152995737106, "grad_norm": 0.6714007309980813, "learning_rate": 2.8942421040583297e-06, "loss": 0.2968, "step": 19900 }, { "epoch": 0.9322621445636389, "grad_norm": 0.5836075067882358, "learning_rate": 2.894054825758518e-06, "loss": 0.3083, "step": 19901 }, { "epoch": 0.9323089895535672, "grad_norm": 0.5217489523128817, "learning_rate": 2.8938675451910185e-06, "loss": 0.3134, "step": 19902 }, { "epoch": 0.9323558345434956, "grad_norm": 0.5343696261227986, "learning_rate": 2.893680262356911e-06, "loss": 0.2947, "step": 19903 }, { "epoch": 0.9324026795334239, "grad_norm": 0.5960951446758718, "learning_rate": 2.893492977257274e-06, "loss": 0.3074, "step": 19904 }, { "epoch": 0.9324495245233523, "grad_norm": 0.574089997809287, "learning_rate": 2.8933056898931824e-06, "loss": 0.28, "step": 19905 }, { "epoch": 0.9324963695132805, "grad_norm": 0.5700918693741119, "learning_rate": 2.8931184002657152e-06, "loss": 0.3129, "step": 19906 }, { "epoch": 0.9325432145032089, "grad_norm": 0.590653525841209, "learning_rate": 2.892931108375951e-06, "loss": 0.3021, "step": 19907 }, { "epoch": 0.9325900594931372, "grad_norm": 0.6060264012681137, "learning_rate": 2.892743814224967e-06, "loss": 0.3051, "step": 19908 }, { "epoch": 0.9326369044830656, "grad_norm": 0.6601097968762338, "learning_rate": 2.892556517813841e-06, "loss": 0.3595, "step": 19909 }, { "epoch": 0.9326837494729938, "grad_norm": 0.6521749202526288, "learning_rate": 2.8923692191436507e-06, "loss": 0.3183, "step": 19910 }, { "epoch": 0.9327305944629222, "grad_norm": 0.5501043716005406, "learning_rate": 2.892181918215475e-06, "loss": 0.2964, "step": 19911 }, { "epoch": 0.9327774394528505, "grad_norm": 0.6164668523045895, "learning_rate": 2.8919946150303895e-06, "loss": 0.3395, "step": 19912 }, { "epoch": 0.9328242844427789, "grad_norm": 0.6247180255317423, "learning_rate": 2.8918073095894745e-06, "loss": 0.3114, "step": 19913 }, { "epoch": 0.9328711294327072, "grad_norm": 0.5792019569438553, "learning_rate": 2.891620001893806e-06, "loss": 0.3038, "step": 19914 }, { "epoch": 0.9329179744226355, "grad_norm": 0.6349936131117312, "learning_rate": 2.8914326919444643e-06, "loss": 0.3189, "step": 19915 }, { "epoch": 0.9329648194125638, "grad_norm": 0.6617557553236607, "learning_rate": 2.891245379742525e-06, "loss": 0.3295, "step": 19916 }, { "epoch": 0.9330116644024922, "grad_norm": 0.5828866595283931, "learning_rate": 2.891058065289067e-06, "loss": 0.3219, "step": 19917 }, { "epoch": 0.9330585093924205, "grad_norm": 0.5710683990993773, "learning_rate": 2.890870748585168e-06, "loss": 0.3236, "step": 19918 }, { "epoch": 0.9331053543823488, "grad_norm": 0.6298492232436026, "learning_rate": 2.8906834296319065e-06, "loss": 0.3351, "step": 19919 }, { "epoch": 0.9331521993722771, "grad_norm": 0.598064591858883, "learning_rate": 2.890496108430359e-06, "loss": 0.297, "step": 19920 }, { "epoch": 0.9331990443622055, "grad_norm": 0.5865313023925756, "learning_rate": 2.890308784981606e-06, "loss": 0.3232, "step": 19921 }, { "epoch": 0.9332458893521338, "grad_norm": 0.5777988861029304, "learning_rate": 2.8901214592867233e-06, "loss": 0.3051, "step": 19922 }, { "epoch": 0.9332927343420622, "grad_norm": 0.5968901053970573, "learning_rate": 2.889934131346789e-06, "loss": 0.3002, "step": 19923 }, { "epoch": 0.9333395793319904, "grad_norm": 0.5948899969708453, "learning_rate": 2.889746801162882e-06, "loss": 0.3259, "step": 19924 }, { "epoch": 0.9333864243219188, "grad_norm": 0.5288774605844111, "learning_rate": 2.8895594687360816e-06, "loss": 0.2894, "step": 19925 }, { "epoch": 0.9334332693118471, "grad_norm": 0.6886861182861167, "learning_rate": 2.889372134067463e-06, "loss": 0.3339, "step": 19926 }, { "epoch": 0.9334801143017755, "grad_norm": 0.6215683293013631, "learning_rate": 2.8891847971581055e-06, "loss": 0.3453, "step": 19927 }, { "epoch": 0.9335269592917037, "grad_norm": 0.5922164133821577, "learning_rate": 2.888997458009088e-06, "loss": 0.3311, "step": 19928 }, { "epoch": 0.9335738042816321, "grad_norm": 0.5727086715453116, "learning_rate": 2.8888101166214876e-06, "loss": 0.3033, "step": 19929 }, { "epoch": 0.9336206492715604, "grad_norm": 0.655993704713643, "learning_rate": 2.8886227729963824e-06, "loss": 0.3654, "step": 19930 }, { "epoch": 0.9336674942614888, "grad_norm": 0.5915012080919698, "learning_rate": 2.8884354271348515e-06, "loss": 0.3174, "step": 19931 }, { "epoch": 0.9337143392514171, "grad_norm": 0.6018262810529188, "learning_rate": 2.8882480790379722e-06, "loss": 0.3277, "step": 19932 }, { "epoch": 0.9337611842413454, "grad_norm": 0.5745410757357674, "learning_rate": 2.8880607287068225e-06, "loss": 0.3201, "step": 19933 }, { "epoch": 0.9338080292312737, "grad_norm": 0.5479567518734019, "learning_rate": 2.887873376142481e-06, "loss": 0.3009, "step": 19934 }, { "epoch": 0.9338548742212021, "grad_norm": 0.5656350738117308, "learning_rate": 2.8876860213460256e-06, "loss": 0.3189, "step": 19935 }, { "epoch": 0.9339017192111304, "grad_norm": 0.6221606568357289, "learning_rate": 2.8874986643185345e-06, "loss": 0.3094, "step": 19936 }, { "epoch": 0.9339485642010587, "grad_norm": 0.6634496928069595, "learning_rate": 2.8873113050610864e-06, "loss": 0.3408, "step": 19937 }, { "epoch": 0.933995409190987, "grad_norm": 0.633823278773052, "learning_rate": 2.8871239435747584e-06, "loss": 0.3118, "step": 19938 }, { "epoch": 0.9340422541809154, "grad_norm": 0.6250726879895728, "learning_rate": 2.8869365798606304e-06, "loss": 0.3208, "step": 19939 }, { "epoch": 0.9340890991708437, "grad_norm": 0.620288533852069, "learning_rate": 2.886749213919779e-06, "loss": 0.3158, "step": 19940 }, { "epoch": 0.9341359441607721, "grad_norm": 0.6531436664267534, "learning_rate": 2.886561845753283e-06, "loss": 0.3643, "step": 19941 }, { "epoch": 0.9341827891507003, "grad_norm": 0.5636051410428744, "learning_rate": 2.8863744753622208e-06, "loss": 0.2992, "step": 19942 }, { "epoch": 0.9342296341406287, "grad_norm": 0.5956162439278939, "learning_rate": 2.88618710274767e-06, "loss": 0.322, "step": 19943 }, { "epoch": 0.934276479130557, "grad_norm": 0.61414563515264, "learning_rate": 2.8859997279107103e-06, "loss": 0.3434, "step": 19944 }, { "epoch": 0.9343233241204854, "grad_norm": 0.5800455971570562, "learning_rate": 2.8858123508524193e-06, "loss": 0.2884, "step": 19945 }, { "epoch": 0.9343701691104136, "grad_norm": 0.5873972624577373, "learning_rate": 2.885624971573875e-06, "loss": 0.3109, "step": 19946 }, { "epoch": 0.934417014100342, "grad_norm": 0.5757033217386656, "learning_rate": 2.885437590076156e-06, "loss": 0.3078, "step": 19947 }, { "epoch": 0.9344638590902703, "grad_norm": 0.5581088947258733, "learning_rate": 2.88525020636034e-06, "loss": 0.2846, "step": 19948 }, { "epoch": 0.9345107040801987, "grad_norm": 0.6215287373303273, "learning_rate": 2.8850628204275068e-06, "loss": 0.3364, "step": 19949 }, { "epoch": 0.934557549070127, "grad_norm": 0.6560855352852776, "learning_rate": 2.884875432278733e-06, "loss": 0.3214, "step": 19950 }, { "epoch": 0.9346043940600552, "grad_norm": 0.6276990927070281, "learning_rate": 2.8846880419150978e-06, "loss": 0.3336, "step": 19951 }, { "epoch": 0.9346512390499836, "grad_norm": 0.6377562307217668, "learning_rate": 2.8845006493376796e-06, "loss": 0.3309, "step": 19952 }, { "epoch": 0.934698084039912, "grad_norm": 0.5965089590307815, "learning_rate": 2.884313254547558e-06, "loss": 0.2945, "step": 19953 }, { "epoch": 0.9347449290298403, "grad_norm": 0.5936701294648856, "learning_rate": 2.884125857545809e-06, "loss": 0.3071, "step": 19954 }, { "epoch": 0.9347917740197685, "grad_norm": 0.6614808093415958, "learning_rate": 2.883938458333513e-06, "loss": 0.3285, "step": 19955 }, { "epoch": 0.9348386190096969, "grad_norm": 0.5891720836196185, "learning_rate": 2.8837510569117473e-06, "loss": 0.3321, "step": 19956 }, { "epoch": 0.9348854639996252, "grad_norm": 0.57917272906091, "learning_rate": 2.883563653281591e-06, "loss": 0.3154, "step": 19957 }, { "epoch": 0.9349323089895536, "grad_norm": 0.6003179547861031, "learning_rate": 2.883376247444121e-06, "loss": 0.3283, "step": 19958 }, { "epoch": 0.934979153979482, "grad_norm": 0.5737383525889297, "learning_rate": 2.8831888394004188e-06, "loss": 0.3128, "step": 19959 }, { "epoch": 0.9350259989694102, "grad_norm": 0.5836738226920937, "learning_rate": 2.88300142915156e-06, "loss": 0.3119, "step": 19960 }, { "epoch": 0.9350728439593385, "grad_norm": 0.5736992470472718, "learning_rate": 2.8828140166986245e-06, "loss": 0.3118, "step": 19961 }, { "epoch": 0.9351196889492669, "grad_norm": 0.5599821641602772, "learning_rate": 2.8826266020426908e-06, "loss": 0.3125, "step": 19962 }, { "epoch": 0.9351665339391952, "grad_norm": 0.5909072958713982, "learning_rate": 2.8824391851848377e-06, "loss": 0.3056, "step": 19963 }, { "epoch": 0.9352133789291235, "grad_norm": 0.621596044708717, "learning_rate": 2.882251766126142e-06, "loss": 0.3223, "step": 19964 }, { "epoch": 0.9352602239190518, "grad_norm": 0.5636942744049369, "learning_rate": 2.882064344867684e-06, "loss": 0.31, "step": 19965 }, { "epoch": 0.9353070689089802, "grad_norm": 0.593283474261525, "learning_rate": 2.881876921410541e-06, "loss": 0.3481, "step": 19966 }, { "epoch": 0.9353539138989085, "grad_norm": 0.6023090581651117, "learning_rate": 2.881689495755794e-06, "loss": 0.3325, "step": 19967 }, { "epoch": 0.9354007588888369, "grad_norm": 0.604924509433823, "learning_rate": 2.8815020679045192e-06, "loss": 0.3237, "step": 19968 }, { "epoch": 0.9354476038787651, "grad_norm": 0.5690056664260806, "learning_rate": 2.881314637857795e-06, "loss": 0.3082, "step": 19969 }, { "epoch": 0.9354944488686935, "grad_norm": 0.5360156563239339, "learning_rate": 2.8811272056167015e-06, "loss": 0.2945, "step": 19970 }, { "epoch": 0.9355412938586218, "grad_norm": 0.5427171939575929, "learning_rate": 2.8809397711823173e-06, "loss": 0.3033, "step": 19971 }, { "epoch": 0.9355881388485502, "grad_norm": 0.5538284217956132, "learning_rate": 2.88075233455572e-06, "loss": 0.3223, "step": 19972 }, { "epoch": 0.9356349838384784, "grad_norm": 0.5920670236908787, "learning_rate": 2.880564895737988e-06, "loss": 0.3119, "step": 19973 }, { "epoch": 0.9356818288284068, "grad_norm": 0.5583148820596286, "learning_rate": 2.8803774547302014e-06, "loss": 0.3039, "step": 19974 }, { "epoch": 0.9357286738183351, "grad_norm": 0.5676777064921595, "learning_rate": 2.880190011533438e-06, "loss": 0.3087, "step": 19975 }, { "epoch": 0.9357755188082635, "grad_norm": 0.5865061011899878, "learning_rate": 2.8800025661487764e-06, "loss": 0.3281, "step": 19976 }, { "epoch": 0.9358223637981918, "grad_norm": 0.6012237226521785, "learning_rate": 2.879815118577296e-06, "loss": 0.3406, "step": 19977 }, { "epoch": 0.9358692087881201, "grad_norm": 0.6163117801418132, "learning_rate": 2.879627668820075e-06, "loss": 0.3267, "step": 19978 }, { "epoch": 0.9359160537780484, "grad_norm": 0.6152010620408971, "learning_rate": 2.879440216878192e-06, "loss": 0.3038, "step": 19979 }, { "epoch": 0.9359628987679768, "grad_norm": 0.5753678219203726, "learning_rate": 2.879252762752726e-06, "loss": 0.3221, "step": 19980 }, { "epoch": 0.9360097437579051, "grad_norm": 0.5852510131654718, "learning_rate": 2.879065306444756e-06, "loss": 0.3202, "step": 19981 }, { "epoch": 0.9360565887478334, "grad_norm": 0.5898293381513776, "learning_rate": 2.87887784795536e-06, "loss": 0.3351, "step": 19982 }, { "epoch": 0.9361034337377617, "grad_norm": 0.6207380845288912, "learning_rate": 2.878690387285617e-06, "loss": 0.3076, "step": 19983 }, { "epoch": 0.9361502787276901, "grad_norm": 0.5663358659975368, "learning_rate": 2.8785029244366057e-06, "loss": 0.2999, "step": 19984 }, { "epoch": 0.9361971237176184, "grad_norm": 0.6103747327132605, "learning_rate": 2.8783154594094065e-06, "loss": 0.3239, "step": 19985 }, { "epoch": 0.9362439687075468, "grad_norm": 0.583664674374496, "learning_rate": 2.8781279922050962e-06, "loss": 0.3264, "step": 19986 }, { "epoch": 0.936290813697475, "grad_norm": 0.6150183345035082, "learning_rate": 2.877940522824754e-06, "loss": 0.3192, "step": 19987 }, { "epoch": 0.9363376586874034, "grad_norm": 0.6617776812500796, "learning_rate": 2.8777530512694597e-06, "loss": 0.3377, "step": 19988 }, { "epoch": 0.9363845036773317, "grad_norm": 0.6438487930159543, "learning_rate": 2.8775655775402916e-06, "loss": 0.3147, "step": 19989 }, { "epoch": 0.9364313486672601, "grad_norm": 0.5901765752068803, "learning_rate": 2.8773781016383277e-06, "loss": 0.332, "step": 19990 }, { "epoch": 0.9364781936571883, "grad_norm": 0.5743149293046208, "learning_rate": 2.8771906235646484e-06, "loss": 0.319, "step": 19991 }, { "epoch": 0.9365250386471167, "grad_norm": 0.6042412544429628, "learning_rate": 2.877003143320332e-06, "loss": 0.2874, "step": 19992 }, { "epoch": 0.936571883637045, "grad_norm": 0.6252831296149397, "learning_rate": 2.8768156609064567e-06, "loss": 0.3165, "step": 19993 }, { "epoch": 0.9366187286269734, "grad_norm": 0.5643354311298083, "learning_rate": 2.8766281763241018e-06, "loss": 0.3186, "step": 19994 }, { "epoch": 0.9366655736169017, "grad_norm": 0.5841628506635425, "learning_rate": 2.8764406895743474e-06, "loss": 0.3182, "step": 19995 }, { "epoch": 0.93671241860683, "grad_norm": 0.5874729068601923, "learning_rate": 2.8762532006582703e-06, "loss": 0.3086, "step": 19996 }, { "epoch": 0.9367592635967583, "grad_norm": 0.58034479836818, "learning_rate": 2.8760657095769513e-06, "loss": 0.2948, "step": 19997 }, { "epoch": 0.9368061085866867, "grad_norm": 0.5585846675451865, "learning_rate": 2.8758782163314686e-06, "loss": 0.302, "step": 19998 }, { "epoch": 0.936852953576615, "grad_norm": 0.5601221542224304, "learning_rate": 2.8756907209229013e-06, "loss": 0.3008, "step": 19999 }, { "epoch": 0.9368997985665433, "grad_norm": 0.5702457292832357, "learning_rate": 2.8755032233523278e-06, "loss": 0.315, "step": 20000 }, { "epoch": 0.9369466435564716, "grad_norm": 0.5927979707125425, "learning_rate": 2.875315723620828e-06, "loss": 0.314, "step": 20001 }, { "epoch": 0.9369934885464, "grad_norm": 0.6201051616749649, "learning_rate": 2.875128221729481e-06, "loss": 0.3275, "step": 20002 }, { "epoch": 0.9370403335363283, "grad_norm": 0.5710193329166505, "learning_rate": 2.874940717679364e-06, "loss": 0.3136, "step": 20003 }, { "epoch": 0.9370871785262567, "grad_norm": 0.5688382192638936, "learning_rate": 2.8747532114715587e-06, "loss": 0.3321, "step": 20004 }, { "epoch": 0.9371340235161849, "grad_norm": 0.5548399240667163, "learning_rate": 2.8745657031071422e-06, "loss": 0.3109, "step": 20005 }, { "epoch": 0.9371808685061133, "grad_norm": 0.5501376135767637, "learning_rate": 2.874378192587195e-06, "loss": 0.3059, "step": 20006 }, { "epoch": 0.9372277134960416, "grad_norm": 0.596645240041601, "learning_rate": 2.874190679912795e-06, "loss": 0.3225, "step": 20007 }, { "epoch": 0.93727455848597, "grad_norm": 0.6241836788339893, "learning_rate": 2.874003165085021e-06, "loss": 0.3145, "step": 20008 }, { "epoch": 0.9373214034758982, "grad_norm": 0.6185833747833709, "learning_rate": 2.8738156481049532e-06, "loss": 0.3094, "step": 20009 }, { "epoch": 0.9373682484658266, "grad_norm": 0.5501075236370205, "learning_rate": 2.8736281289736706e-06, "loss": 0.301, "step": 20010 }, { "epoch": 0.9374150934557549, "grad_norm": 0.6073932884067695, "learning_rate": 2.8734406076922516e-06, "loss": 0.323, "step": 20011 }, { "epoch": 0.9374619384456833, "grad_norm": 0.6241398577028833, "learning_rate": 2.8732530842617765e-06, "loss": 0.3286, "step": 20012 }, { "epoch": 0.9375087834356116, "grad_norm": 0.5775695665182752, "learning_rate": 2.873065558683323e-06, "loss": 0.3138, "step": 20013 }, { "epoch": 0.9375556284255399, "grad_norm": 0.6155121969048106, "learning_rate": 2.872878030957971e-06, "loss": 0.3084, "step": 20014 }, { "epoch": 0.9376024734154682, "grad_norm": 0.5361870997417671, "learning_rate": 2.8726905010868e-06, "loss": 0.3172, "step": 20015 }, { "epoch": 0.9376493184053966, "grad_norm": 0.5642922741459382, "learning_rate": 2.8725029690708887e-06, "loss": 0.321, "step": 20016 }, { "epoch": 0.9376961633953249, "grad_norm": 0.6310092859867102, "learning_rate": 2.872315434911317e-06, "loss": 0.3245, "step": 20017 }, { "epoch": 0.9377430083852532, "grad_norm": 0.5865455250508604, "learning_rate": 2.8721278986091626e-06, "loss": 0.3131, "step": 20018 }, { "epoch": 0.9377898533751815, "grad_norm": 0.567379930880065, "learning_rate": 2.871940360165506e-06, "loss": 0.3004, "step": 20019 }, { "epoch": 0.9378366983651099, "grad_norm": 0.5353590246444072, "learning_rate": 2.8717528195814265e-06, "loss": 0.318, "step": 20020 }, { "epoch": 0.9378835433550382, "grad_norm": 0.6589370018859013, "learning_rate": 2.871565276858003e-06, "loss": 0.3162, "step": 20021 }, { "epoch": 0.9379303883449666, "grad_norm": 0.5705682207567597, "learning_rate": 2.871377731996313e-06, "loss": 0.3311, "step": 20022 }, { "epoch": 0.9379772333348948, "grad_norm": 0.6353495513852037, "learning_rate": 2.87119018499744e-06, "loss": 0.323, "step": 20023 }, { "epoch": 0.9380240783248232, "grad_norm": 0.5755757253630966, "learning_rate": 2.871002635862459e-06, "loss": 0.2873, "step": 20024 }, { "epoch": 0.9380709233147515, "grad_norm": 0.5685776011016522, "learning_rate": 2.870815084592452e-06, "loss": 0.3017, "step": 20025 }, { "epoch": 0.9381177683046799, "grad_norm": 0.5973375083271154, "learning_rate": 2.8706275311884967e-06, "loss": 0.3126, "step": 20026 }, { "epoch": 0.9381646132946081, "grad_norm": 0.5772525147404752, "learning_rate": 2.8704399756516745e-06, "loss": 0.3184, "step": 20027 }, { "epoch": 0.9382114582845364, "grad_norm": 0.6231063843900646, "learning_rate": 2.8702524179830617e-06, "loss": 0.3304, "step": 20028 }, { "epoch": 0.9382583032744648, "grad_norm": 0.6540149048244921, "learning_rate": 2.8700648581837406e-06, "loss": 0.3405, "step": 20029 }, { "epoch": 0.9383051482643932, "grad_norm": 0.5980470941507895, "learning_rate": 2.8698772962547884e-06, "loss": 0.2977, "step": 20030 }, { "epoch": 0.9383519932543215, "grad_norm": 0.557241572253655, "learning_rate": 2.869689732197285e-06, "loss": 0.2983, "step": 20031 }, { "epoch": 0.9383988382442497, "grad_norm": 0.6121253618891772, "learning_rate": 2.8695021660123114e-06, "loss": 0.3184, "step": 20032 }, { "epoch": 0.9384456832341781, "grad_norm": 0.5431501637956047, "learning_rate": 2.8693145977009453e-06, "loss": 0.3049, "step": 20033 }, { "epoch": 0.9384925282241064, "grad_norm": 0.5930199251363621, "learning_rate": 2.869127027264265e-06, "loss": 0.2998, "step": 20034 }, { "epoch": 0.9385393732140348, "grad_norm": 0.548650216978175, "learning_rate": 2.8689394547033532e-06, "loss": 0.313, "step": 20035 }, { "epoch": 0.938586218203963, "grad_norm": 0.5967713040712745, "learning_rate": 2.8687518800192864e-06, "loss": 0.3279, "step": 20036 }, { "epoch": 0.9386330631938914, "grad_norm": 0.6337795475364337, "learning_rate": 2.8685643032131463e-06, "loss": 0.3218, "step": 20037 }, { "epoch": 0.9386799081838197, "grad_norm": 0.6492278672256535, "learning_rate": 2.868376724286011e-06, "loss": 0.3459, "step": 20038 }, { "epoch": 0.9387267531737481, "grad_norm": 0.5131071363061541, "learning_rate": 2.86818914323896e-06, "loss": 0.2999, "step": 20039 }, { "epoch": 0.9387735981636764, "grad_norm": 0.6093606547079844, "learning_rate": 2.8680015600730727e-06, "loss": 0.327, "step": 20040 }, { "epoch": 0.9388204431536047, "grad_norm": 0.6111746141274373, "learning_rate": 2.8678139747894296e-06, "loss": 0.3008, "step": 20041 }, { "epoch": 0.938867288143533, "grad_norm": 0.6362155260805287, "learning_rate": 2.8676263873891087e-06, "loss": 0.3121, "step": 20042 }, { "epoch": 0.9389141331334614, "grad_norm": 0.6156772541530031, "learning_rate": 2.8674387978731905e-06, "loss": 0.32, "step": 20043 }, { "epoch": 0.9389609781233897, "grad_norm": 0.6563649786761526, "learning_rate": 2.8672512062427553e-06, "loss": 0.3376, "step": 20044 }, { "epoch": 0.939007823113318, "grad_norm": 0.6088149701190712, "learning_rate": 2.867063612498881e-06, "loss": 0.3161, "step": 20045 }, { "epoch": 0.9390546681032463, "grad_norm": 0.5792467832352918, "learning_rate": 2.8668760166426476e-06, "loss": 0.2893, "step": 20046 }, { "epoch": 0.9391015130931747, "grad_norm": 0.6119940117158803, "learning_rate": 2.8666884186751354e-06, "loss": 0.3367, "step": 20047 }, { "epoch": 0.939148358083103, "grad_norm": 0.5597243112169843, "learning_rate": 2.8665008185974236e-06, "loss": 0.3022, "step": 20048 }, { "epoch": 0.9391952030730314, "grad_norm": 0.5766510718335575, "learning_rate": 2.8663132164105912e-06, "loss": 0.3136, "step": 20049 }, { "epoch": 0.9392420480629596, "grad_norm": 0.630989388778704, "learning_rate": 2.866125612115719e-06, "loss": 0.3051, "step": 20050 }, { "epoch": 0.939288893052888, "grad_norm": 0.6247372508458987, "learning_rate": 2.8659380057138854e-06, "loss": 0.3199, "step": 20051 }, { "epoch": 0.9393357380428163, "grad_norm": 0.5660123338877319, "learning_rate": 2.8657503972061705e-06, "loss": 0.314, "step": 20052 }, { "epoch": 0.9393825830327447, "grad_norm": 0.7916447903079554, "learning_rate": 2.865562786593654e-06, "loss": 0.3018, "step": 20053 }, { "epoch": 0.9394294280226729, "grad_norm": 0.6084982618831559, "learning_rate": 2.8653751738774153e-06, "loss": 0.3152, "step": 20054 }, { "epoch": 0.9394762730126013, "grad_norm": 0.6211630689929247, "learning_rate": 2.8651875590585355e-06, "loss": 0.3376, "step": 20055 }, { "epoch": 0.9395231180025296, "grad_norm": 0.615865192155834, "learning_rate": 2.8649999421380923e-06, "loss": 0.3238, "step": 20056 }, { "epoch": 0.939569962992458, "grad_norm": 0.5585387240215373, "learning_rate": 2.864812323117166e-06, "loss": 0.3076, "step": 20057 }, { "epoch": 0.9396168079823863, "grad_norm": 0.6170727385784371, "learning_rate": 2.8646247019968364e-06, "loss": 0.3134, "step": 20058 }, { "epoch": 0.9396636529723146, "grad_norm": 0.5418952523579899, "learning_rate": 2.864437078778184e-06, "loss": 0.2997, "step": 20059 }, { "epoch": 0.9397104979622429, "grad_norm": 0.5776121785099578, "learning_rate": 2.864249453462287e-06, "loss": 0.3159, "step": 20060 }, { "epoch": 0.9397573429521713, "grad_norm": 0.5440060114345021, "learning_rate": 2.8640618260502266e-06, "loss": 0.2949, "step": 20061 }, { "epoch": 0.9398041879420996, "grad_norm": 0.6117290055047463, "learning_rate": 2.8638741965430817e-06, "loss": 0.3196, "step": 20062 }, { "epoch": 0.9398510329320279, "grad_norm": 0.6612764888369406, "learning_rate": 2.863686564941932e-06, "loss": 0.3391, "step": 20063 }, { "epoch": 0.9398978779219562, "grad_norm": 0.6287065350680285, "learning_rate": 2.8634989312478573e-06, "loss": 0.2912, "step": 20064 }, { "epoch": 0.9399447229118846, "grad_norm": 0.5965018159754004, "learning_rate": 2.863311295461939e-06, "loss": 0.3194, "step": 20065 }, { "epoch": 0.9399915679018129, "grad_norm": 0.5941599622076779, "learning_rate": 2.863123657585254e-06, "loss": 0.3021, "step": 20066 }, { "epoch": 0.9400384128917413, "grad_norm": 0.5504394637916524, "learning_rate": 2.8629360176188836e-06, "loss": 0.3156, "step": 20067 }, { "epoch": 0.9400852578816695, "grad_norm": 0.6074671217096564, "learning_rate": 2.8627483755639083e-06, "loss": 0.3239, "step": 20068 }, { "epoch": 0.9401321028715979, "grad_norm": 0.609025124024236, "learning_rate": 2.8625607314214077e-06, "loss": 0.298, "step": 20069 }, { "epoch": 0.9401789478615262, "grad_norm": 0.5677361839826789, "learning_rate": 2.8623730851924603e-06, "loss": 0.3133, "step": 20070 }, { "epoch": 0.9402257928514546, "grad_norm": 0.6323197085532256, "learning_rate": 2.862185436878147e-06, "loss": 0.3267, "step": 20071 }, { "epoch": 0.9402726378413828, "grad_norm": 0.5375528312147131, "learning_rate": 2.8619977864795482e-06, "loss": 0.2872, "step": 20072 }, { "epoch": 0.9403194828313112, "grad_norm": 0.5509637772372339, "learning_rate": 2.8618101339977422e-06, "loss": 0.3257, "step": 20073 }, { "epoch": 0.9403663278212395, "grad_norm": 0.6624885022018144, "learning_rate": 2.861622479433811e-06, "loss": 0.3472, "step": 20074 }, { "epoch": 0.9404131728111679, "grad_norm": 0.6091673999070656, "learning_rate": 2.861434822788832e-06, "loss": 0.3262, "step": 20075 }, { "epoch": 0.9404600178010962, "grad_norm": 0.5843411370997639, "learning_rate": 2.8612471640638873e-06, "loss": 0.3173, "step": 20076 }, { "epoch": 0.9405068627910245, "grad_norm": 0.5918580959744116, "learning_rate": 2.861059503260056e-06, "loss": 0.3172, "step": 20077 }, { "epoch": 0.9405537077809528, "grad_norm": 0.5973051720661823, "learning_rate": 2.8608718403784174e-06, "loss": 0.3084, "step": 20078 }, { "epoch": 0.9406005527708812, "grad_norm": 0.576493050140911, "learning_rate": 2.860684175420053e-06, "loss": 0.3061, "step": 20079 }, { "epoch": 0.9406473977608095, "grad_norm": 0.5706044219030464, "learning_rate": 2.8604965083860404e-06, "loss": 0.3114, "step": 20080 }, { "epoch": 0.9406942427507378, "grad_norm": 0.5917208220911281, "learning_rate": 2.860308839277462e-06, "loss": 0.3206, "step": 20081 }, { "epoch": 0.9407410877406661, "grad_norm": 0.6017129801008194, "learning_rate": 2.8601211680953967e-06, "loss": 0.3082, "step": 20082 }, { "epoch": 0.9407879327305945, "grad_norm": 0.5889395711143992, "learning_rate": 2.8599334948409242e-06, "loss": 0.3191, "step": 20083 }, { "epoch": 0.9408347777205228, "grad_norm": 0.6022968230210856, "learning_rate": 2.859745819515126e-06, "loss": 0.3174, "step": 20084 }, { "epoch": 0.9408816227104512, "grad_norm": 0.6121415372089858, "learning_rate": 2.8595581421190794e-06, "loss": 0.3321, "step": 20085 }, { "epoch": 0.9409284677003794, "grad_norm": 0.5711443283539198, "learning_rate": 2.859370462653867e-06, "loss": 0.318, "step": 20086 }, { "epoch": 0.9409753126903078, "grad_norm": 0.5953150168392728, "learning_rate": 2.859182781120568e-06, "loss": 0.3078, "step": 20087 }, { "epoch": 0.9410221576802361, "grad_norm": 0.5558252363417536, "learning_rate": 2.858995097520262e-06, "loss": 0.2952, "step": 20088 }, { "epoch": 0.9410690026701645, "grad_norm": 0.5823233747067723, "learning_rate": 2.85880741185403e-06, "loss": 0.3244, "step": 20089 }, { "epoch": 0.9411158476600927, "grad_norm": 0.5966144126454747, "learning_rate": 2.8586197241229515e-06, "loss": 0.3185, "step": 20090 }, { "epoch": 0.9411626926500211, "grad_norm": 0.6190324855938387, "learning_rate": 2.858432034328106e-06, "loss": 0.3319, "step": 20091 }, { "epoch": 0.9412095376399494, "grad_norm": 0.6183368740383401, "learning_rate": 2.858244342470574e-06, "loss": 0.3208, "step": 20092 }, { "epoch": 0.9412563826298778, "grad_norm": 0.601031472699299, "learning_rate": 2.858056648551437e-06, "loss": 0.325, "step": 20093 }, { "epoch": 0.9413032276198061, "grad_norm": 0.6126207599120448, "learning_rate": 2.857868952571773e-06, "loss": 0.32, "step": 20094 }, { "epoch": 0.9413500726097344, "grad_norm": 0.6159104257948846, "learning_rate": 2.857681254532663e-06, "loss": 0.3221, "step": 20095 }, { "epoch": 0.9413969175996627, "grad_norm": 0.6149224281617611, "learning_rate": 2.857493554435188e-06, "loss": 0.3236, "step": 20096 }, { "epoch": 0.9414437625895911, "grad_norm": 0.5273160555967117, "learning_rate": 2.8573058522804274e-06, "loss": 0.2888, "step": 20097 }, { "epoch": 0.9414906075795194, "grad_norm": 0.6172174357880235, "learning_rate": 2.857118148069461e-06, "loss": 0.3108, "step": 20098 }, { "epoch": 0.9415374525694477, "grad_norm": 0.646110869055833, "learning_rate": 2.85693044180337e-06, "loss": 0.3288, "step": 20099 }, { "epoch": 0.941584297559376, "grad_norm": 0.6110156879069393, "learning_rate": 2.8567427334832336e-06, "loss": 0.307, "step": 20100 }, { "epoch": 0.9416311425493044, "grad_norm": 0.5821793602779954, "learning_rate": 2.8565550231101324e-06, "loss": 0.3203, "step": 20101 }, { "epoch": 0.9416779875392327, "grad_norm": 0.6073135492510274, "learning_rate": 2.856367310685147e-06, "loss": 0.3304, "step": 20102 }, { "epoch": 0.9417248325291611, "grad_norm": 0.5487525499563041, "learning_rate": 2.856179596209357e-06, "loss": 0.2876, "step": 20103 }, { "epoch": 0.9417716775190893, "grad_norm": 0.5588597126835426, "learning_rate": 2.8559918796838424e-06, "loss": 0.3038, "step": 20104 }, { "epoch": 0.9418185225090177, "grad_norm": 0.6285984621335797, "learning_rate": 2.8558041611096853e-06, "loss": 0.3086, "step": 20105 }, { "epoch": 0.941865367498946, "grad_norm": 0.6299739070760563, "learning_rate": 2.855616440487963e-06, "loss": 0.3117, "step": 20106 }, { "epoch": 0.9419122124888744, "grad_norm": 0.6136364376775109, "learning_rate": 2.8554287178197594e-06, "loss": 0.3166, "step": 20107 }, { "epoch": 0.9419590574788026, "grad_norm": 0.5822291106358196, "learning_rate": 2.855240993106152e-06, "loss": 0.3024, "step": 20108 }, { "epoch": 0.942005902468731, "grad_norm": 0.5834267567248426, "learning_rate": 2.8550532663482213e-06, "loss": 0.3067, "step": 20109 }, { "epoch": 0.9420527474586593, "grad_norm": 0.644072522693559, "learning_rate": 2.8548655375470492e-06, "loss": 0.3211, "step": 20110 }, { "epoch": 0.9420995924485877, "grad_norm": 0.5761472365876182, "learning_rate": 2.854677806703715e-06, "loss": 0.3281, "step": 20111 }, { "epoch": 0.942146437438516, "grad_norm": 0.5635222062088698, "learning_rate": 2.854490073819299e-06, "loss": 0.3055, "step": 20112 }, { "epoch": 0.9421932824284442, "grad_norm": 0.5505252794391108, "learning_rate": 2.854302338894881e-06, "loss": 0.3073, "step": 20113 }, { "epoch": 0.9422401274183726, "grad_norm": 0.6486967113813829, "learning_rate": 2.8541146019315434e-06, "loss": 0.3262, "step": 20114 }, { "epoch": 0.942286972408301, "grad_norm": 0.619049997287001, "learning_rate": 2.853926862930364e-06, "loss": 0.3243, "step": 20115 }, { "epoch": 0.9423338173982293, "grad_norm": 0.5835968225778773, "learning_rate": 2.853739121892425e-06, "loss": 0.3279, "step": 20116 }, { "epoch": 0.9423806623881575, "grad_norm": 0.603216359263515, "learning_rate": 2.853551378818807e-06, "loss": 0.3343, "step": 20117 }, { "epoch": 0.9424275073780859, "grad_norm": 0.5704101199704974, "learning_rate": 2.8533636337105893e-06, "loss": 0.303, "step": 20118 }, { "epoch": 0.9424743523680142, "grad_norm": 0.5784104846238234, "learning_rate": 2.853175886568852e-06, "loss": 0.3015, "step": 20119 }, { "epoch": 0.9425211973579426, "grad_norm": 0.5820879874100043, "learning_rate": 2.852988137394677e-06, "loss": 0.2938, "step": 20120 }, { "epoch": 0.942568042347871, "grad_norm": 0.5642269878277792, "learning_rate": 2.8528003861891433e-06, "loss": 0.281, "step": 20121 }, { "epoch": 0.9426148873377992, "grad_norm": 0.5944461824341688, "learning_rate": 2.8526126329533323e-06, "loss": 0.3297, "step": 20122 }, { "epoch": 0.9426617323277275, "grad_norm": 0.5569290657650854, "learning_rate": 2.8524248776883247e-06, "loss": 0.323, "step": 20123 }, { "epoch": 0.9427085773176559, "grad_norm": 0.550666638955399, "learning_rate": 2.8522371203951997e-06, "loss": 0.3122, "step": 20124 }, { "epoch": 0.9427554223075842, "grad_norm": 0.5777475589440955, "learning_rate": 2.8520493610750393e-06, "loss": 0.3142, "step": 20125 }, { "epoch": 0.9428022672975125, "grad_norm": 0.647526644564679, "learning_rate": 2.8518615997289234e-06, "loss": 0.3363, "step": 20126 }, { "epoch": 0.9428491122874408, "grad_norm": 0.5605250026873696, "learning_rate": 2.8516738363579317e-06, "loss": 0.3062, "step": 20127 }, { "epoch": 0.9428959572773692, "grad_norm": 0.6255150834902131, "learning_rate": 2.8514860709631457e-06, "loss": 0.3356, "step": 20128 }, { "epoch": 0.9429428022672975, "grad_norm": 0.6090215094949176, "learning_rate": 2.8512983035456463e-06, "loss": 0.3096, "step": 20129 }, { "epoch": 0.9429896472572259, "grad_norm": 0.5590204970854135, "learning_rate": 2.8511105341065127e-06, "loss": 0.3134, "step": 20130 }, { "epoch": 0.9430364922471541, "grad_norm": 0.594635107440921, "learning_rate": 2.850922762646827e-06, "loss": 0.3239, "step": 20131 }, { "epoch": 0.9430833372370825, "grad_norm": 0.6011146874881855, "learning_rate": 2.8507349891676683e-06, "loss": 0.3129, "step": 20132 }, { "epoch": 0.9431301822270108, "grad_norm": 0.5561736962448355, "learning_rate": 2.850547213670118e-06, "loss": 0.2992, "step": 20133 }, { "epoch": 0.9431770272169392, "grad_norm": 0.5986166727154026, "learning_rate": 2.8503594361552566e-06, "loss": 0.329, "step": 20134 }, { "epoch": 0.9432238722068674, "grad_norm": 0.6385834255685737, "learning_rate": 2.8501716566241654e-06, "loss": 0.3406, "step": 20135 }, { "epoch": 0.9432707171967958, "grad_norm": 0.6102682896694217, "learning_rate": 2.8499838750779235e-06, "loss": 0.3041, "step": 20136 }, { "epoch": 0.9433175621867241, "grad_norm": 0.5584843280032538, "learning_rate": 2.8497960915176122e-06, "loss": 0.299, "step": 20137 }, { "epoch": 0.9433644071766525, "grad_norm": 0.5467136367941497, "learning_rate": 2.849608305944313e-06, "loss": 0.3169, "step": 20138 }, { "epoch": 0.9434112521665808, "grad_norm": 0.5735325997399034, "learning_rate": 2.8494205183591054e-06, "loss": 0.3191, "step": 20139 }, { "epoch": 0.9434580971565091, "grad_norm": 0.5510913366768729, "learning_rate": 2.8492327287630707e-06, "loss": 0.308, "step": 20140 }, { "epoch": 0.9435049421464374, "grad_norm": 0.631761836786217, "learning_rate": 2.84904493715729e-06, "loss": 0.3096, "step": 20141 }, { "epoch": 0.9435517871363658, "grad_norm": 0.6139548114953913, "learning_rate": 2.848857143542843e-06, "loss": 0.3233, "step": 20142 }, { "epoch": 0.9435986321262941, "grad_norm": 0.618126760561465, "learning_rate": 2.8486693479208106e-06, "loss": 0.3256, "step": 20143 }, { "epoch": 0.9436454771162224, "grad_norm": 0.5819999076154767, "learning_rate": 2.8484815502922735e-06, "loss": 0.3112, "step": 20144 }, { "epoch": 0.9436923221061507, "grad_norm": 0.5528057191878872, "learning_rate": 2.848293750658313e-06, "loss": 0.3, "step": 20145 }, { "epoch": 0.9437391670960791, "grad_norm": 0.5942073033563082, "learning_rate": 2.8481059490200096e-06, "loss": 0.3032, "step": 20146 }, { "epoch": 0.9437860120860074, "grad_norm": 0.5875039177143001, "learning_rate": 2.8479181453784437e-06, "loss": 0.3091, "step": 20147 }, { "epoch": 0.9438328570759358, "grad_norm": 0.5944838639168281, "learning_rate": 2.847730339734697e-06, "loss": 0.3206, "step": 20148 }, { "epoch": 0.943879702065864, "grad_norm": 0.5964062887582473, "learning_rate": 2.8475425320898486e-06, "loss": 0.3297, "step": 20149 }, { "epoch": 0.9439265470557924, "grad_norm": 0.5946019177507703, "learning_rate": 2.8473547224449806e-06, "loss": 0.3309, "step": 20150 }, { "epoch": 0.9439733920457207, "grad_norm": 0.5776449500177099, "learning_rate": 2.8471669108011742e-06, "loss": 0.3171, "step": 20151 }, { "epoch": 0.9440202370356491, "grad_norm": 0.5892583189688008, "learning_rate": 2.846979097159509e-06, "loss": 0.313, "step": 20152 }, { "epoch": 0.9440670820255773, "grad_norm": 0.6036619355062482, "learning_rate": 2.846791281521066e-06, "loss": 0.302, "step": 20153 }, { "epoch": 0.9441139270155057, "grad_norm": 0.5661601406921624, "learning_rate": 2.8466034638869265e-06, "loss": 0.3081, "step": 20154 }, { "epoch": 0.944160772005434, "grad_norm": 0.6071329565714948, "learning_rate": 2.846415644258171e-06, "loss": 0.3208, "step": 20155 }, { "epoch": 0.9442076169953624, "grad_norm": 0.6806148066410017, "learning_rate": 2.8462278226358806e-06, "loss": 0.3575, "step": 20156 }, { "epoch": 0.9442544619852907, "grad_norm": 0.62194123646164, "learning_rate": 2.8460399990211367e-06, "loss": 0.3282, "step": 20157 }, { "epoch": 0.944301306975219, "grad_norm": 0.640131524846857, "learning_rate": 2.8458521734150192e-06, "loss": 0.3542, "step": 20158 }, { "epoch": 0.9443481519651473, "grad_norm": 0.5330740989913894, "learning_rate": 2.8456643458186093e-06, "loss": 0.3147, "step": 20159 }, { "epoch": 0.9443949969550757, "grad_norm": 0.5778509865649286, "learning_rate": 2.8454765162329883e-06, "loss": 0.3301, "step": 20160 }, { "epoch": 0.944441841945004, "grad_norm": 0.6136446480209972, "learning_rate": 2.8452886846592357e-06, "loss": 0.3116, "step": 20161 }, { "epoch": 0.9444886869349323, "grad_norm": 0.5576105709622695, "learning_rate": 2.8451008510984344e-06, "loss": 0.2818, "step": 20162 }, { "epoch": 0.9445355319248606, "grad_norm": 0.5868299690539196, "learning_rate": 2.8449130155516653e-06, "loss": 0.3156, "step": 20163 }, { "epoch": 0.944582376914789, "grad_norm": 0.5461275393836174, "learning_rate": 2.844725178020007e-06, "loss": 0.3092, "step": 20164 }, { "epoch": 0.9446292219047173, "grad_norm": 0.569058586050927, "learning_rate": 2.844537338504542e-06, "loss": 0.3227, "step": 20165 }, { "epoch": 0.9446760668946457, "grad_norm": 0.5673250710473483, "learning_rate": 2.844349497006352e-06, "loss": 0.3067, "step": 20166 }, { "epoch": 0.9447229118845739, "grad_norm": 0.6055622797091659, "learning_rate": 2.844161653526517e-06, "loss": 0.3137, "step": 20167 }, { "epoch": 0.9447697568745023, "grad_norm": 0.6042133244136377, "learning_rate": 2.8439738080661178e-06, "loss": 0.3259, "step": 20168 }, { "epoch": 0.9448166018644306, "grad_norm": 0.5617259687386066, "learning_rate": 2.8437859606262365e-06, "loss": 0.2948, "step": 20169 }, { "epoch": 0.944863446854359, "grad_norm": 0.6140884973744282, "learning_rate": 2.843598111207953e-06, "loss": 0.3351, "step": 20170 }, { "epoch": 0.9449102918442872, "grad_norm": 0.5731669256643313, "learning_rate": 2.843410259812348e-06, "loss": 0.2982, "step": 20171 }, { "epoch": 0.9449571368342156, "grad_norm": 0.5659375392068443, "learning_rate": 2.8432224064405045e-06, "loss": 0.3024, "step": 20172 }, { "epoch": 0.9450039818241439, "grad_norm": 0.5753243289954961, "learning_rate": 2.843034551093502e-06, "loss": 0.3222, "step": 20173 }, { "epoch": 0.9450508268140723, "grad_norm": 0.554900674153035, "learning_rate": 2.842846693772421e-06, "loss": 0.2906, "step": 20174 }, { "epoch": 0.9450976718040006, "grad_norm": 0.6173401570372377, "learning_rate": 2.8426588344783444e-06, "loss": 0.3093, "step": 20175 }, { "epoch": 0.9451445167939289, "grad_norm": 0.6094148361413397, "learning_rate": 2.842470973212352e-06, "loss": 0.3241, "step": 20176 }, { "epoch": 0.9451913617838572, "grad_norm": 0.6577633326710858, "learning_rate": 2.8422831099755256e-06, "loss": 0.3414, "step": 20177 }, { "epoch": 0.9452382067737856, "grad_norm": 0.6132599592679079, "learning_rate": 2.8420952447689454e-06, "loss": 0.3216, "step": 20178 }, { "epoch": 0.9452850517637139, "grad_norm": 0.561408269285325, "learning_rate": 2.841907377593693e-06, "loss": 0.3051, "step": 20179 }, { "epoch": 0.9453318967536422, "grad_norm": 0.5842889572335794, "learning_rate": 2.8417195084508497e-06, "loss": 0.2971, "step": 20180 }, { "epoch": 0.9453787417435705, "grad_norm": 0.5446428934603771, "learning_rate": 2.841531637341497e-06, "loss": 0.298, "step": 20181 }, { "epoch": 0.9454255867334989, "grad_norm": 0.5033869617987048, "learning_rate": 2.841343764266715e-06, "loss": 0.2989, "step": 20182 }, { "epoch": 0.9454724317234272, "grad_norm": 0.5904597981297944, "learning_rate": 2.8411558892275854e-06, "loss": 0.3064, "step": 20183 }, { "epoch": 0.9455192767133556, "grad_norm": 0.5986409089113859, "learning_rate": 2.8409680122251905e-06, "loss": 0.3362, "step": 20184 }, { "epoch": 0.9455661217032838, "grad_norm": 0.6107695067929155, "learning_rate": 2.840780133260609e-06, "loss": 0.3275, "step": 20185 }, { "epoch": 0.9456129666932122, "grad_norm": 0.6017886009838422, "learning_rate": 2.8405922523349234e-06, "loss": 0.315, "step": 20186 }, { "epoch": 0.9456598116831405, "grad_norm": 0.5625770124631113, "learning_rate": 2.8404043694492163e-06, "loss": 0.3062, "step": 20187 }, { "epoch": 0.9457066566730689, "grad_norm": 0.5731758495802943, "learning_rate": 2.8402164846045664e-06, "loss": 0.3187, "step": 20188 }, { "epoch": 0.9457535016629971, "grad_norm": 0.5567543596060569, "learning_rate": 2.8400285978020566e-06, "loss": 0.3133, "step": 20189 }, { "epoch": 0.9458003466529254, "grad_norm": 0.6299490083986374, "learning_rate": 2.839840709042768e-06, "loss": 0.3339, "step": 20190 }, { "epoch": 0.9458471916428538, "grad_norm": 0.5825686111500717, "learning_rate": 2.8396528183277817e-06, "loss": 0.3088, "step": 20191 }, { "epoch": 0.9458940366327822, "grad_norm": 0.6648254713143851, "learning_rate": 2.839464925658178e-06, "loss": 0.317, "step": 20192 }, { "epoch": 0.9459408816227105, "grad_norm": 0.6123655363734201, "learning_rate": 2.839277031035039e-06, "loss": 0.3462, "step": 20193 }, { "epoch": 0.9459877266126387, "grad_norm": 0.5922296459873525, "learning_rate": 2.839089134459446e-06, "loss": 0.3237, "step": 20194 }, { "epoch": 0.9460345716025671, "grad_norm": 0.5775580618334042, "learning_rate": 2.8389012359324808e-06, "loss": 0.2974, "step": 20195 }, { "epoch": 0.9460814165924954, "grad_norm": 0.5941939685897942, "learning_rate": 2.838713335455224e-06, "loss": 0.3266, "step": 20196 }, { "epoch": 0.9461282615824238, "grad_norm": 0.6164182868526, "learning_rate": 2.838525433028757e-06, "loss": 0.3167, "step": 20197 }, { "epoch": 0.946175106572352, "grad_norm": 0.5850032635976402, "learning_rate": 2.8383375286541616e-06, "loss": 0.3219, "step": 20198 }, { "epoch": 0.9462219515622804, "grad_norm": 0.6116252609548366, "learning_rate": 2.8381496223325182e-06, "loss": 0.3194, "step": 20199 }, { "epoch": 0.9462687965522087, "grad_norm": 0.5413735416307711, "learning_rate": 2.837961714064909e-06, "loss": 0.3201, "step": 20200 }, { "epoch": 0.9463156415421371, "grad_norm": 0.5727529694029537, "learning_rate": 2.837773803852415e-06, "loss": 0.3094, "step": 20201 }, { "epoch": 0.9463624865320655, "grad_norm": 0.6669273743520474, "learning_rate": 2.837585891696118e-06, "loss": 0.3429, "step": 20202 }, { "epoch": 0.9464093315219937, "grad_norm": 0.5705497603902225, "learning_rate": 2.8373979775970984e-06, "loss": 0.328, "step": 20203 }, { "epoch": 0.946456176511922, "grad_norm": 0.5963473605535757, "learning_rate": 2.837210061556438e-06, "loss": 0.3058, "step": 20204 }, { "epoch": 0.9465030215018504, "grad_norm": 0.5542860664600384, "learning_rate": 2.8370221435752197e-06, "loss": 0.2933, "step": 20205 }, { "epoch": 0.9465498664917787, "grad_norm": 0.6091729897979639, "learning_rate": 2.8368342236545225e-06, "loss": 0.3104, "step": 20206 }, { "epoch": 0.946596711481707, "grad_norm": 0.5523231849717488, "learning_rate": 2.8366463017954294e-06, "loss": 0.3134, "step": 20207 }, { "epoch": 0.9466435564716353, "grad_norm": 0.5736643848171296, "learning_rate": 2.836458377999022e-06, "loss": 0.3186, "step": 20208 }, { "epoch": 0.9466904014615637, "grad_norm": 0.5883713787791652, "learning_rate": 2.8362704522663802e-06, "loss": 0.3077, "step": 20209 }, { "epoch": 0.946737246451492, "grad_norm": 0.6194851178165793, "learning_rate": 2.8360825245985867e-06, "loss": 0.3261, "step": 20210 }, { "epoch": 0.9467840914414204, "grad_norm": 0.5910799438130928, "learning_rate": 2.835894594996723e-06, "loss": 0.3361, "step": 20211 }, { "epoch": 0.9468309364313486, "grad_norm": 0.6053962028697566, "learning_rate": 2.835706663461871e-06, "loss": 0.3216, "step": 20212 }, { "epoch": 0.946877781421277, "grad_norm": 0.6051495186215822, "learning_rate": 2.8355187299951103e-06, "loss": 0.333, "step": 20213 }, { "epoch": 0.9469246264112053, "grad_norm": 0.6099371868379024, "learning_rate": 2.835330794597524e-06, "loss": 0.3048, "step": 20214 }, { "epoch": 0.9469714714011337, "grad_norm": 0.6081131549378379, "learning_rate": 2.8351428572701935e-06, "loss": 0.3044, "step": 20215 }, { "epoch": 0.9470183163910619, "grad_norm": 0.586347933101336, "learning_rate": 2.8349549180142006e-06, "loss": 0.3139, "step": 20216 }, { "epoch": 0.9470651613809903, "grad_norm": 0.5792696974501761, "learning_rate": 2.834766976830625e-06, "loss": 0.2992, "step": 20217 }, { "epoch": 0.9471120063709186, "grad_norm": 0.5990323257558002, "learning_rate": 2.834579033720551e-06, "loss": 0.3208, "step": 20218 }, { "epoch": 0.947158851360847, "grad_norm": 0.5915796420659429, "learning_rate": 2.834391088685058e-06, "loss": 0.3325, "step": 20219 }, { "epoch": 0.9472056963507753, "grad_norm": 0.5879986723414579, "learning_rate": 2.834203141725228e-06, "loss": 0.3126, "step": 20220 }, { "epoch": 0.9472525413407036, "grad_norm": 0.5980383772065573, "learning_rate": 2.834015192842144e-06, "loss": 0.3265, "step": 20221 }, { "epoch": 0.9472993863306319, "grad_norm": 0.6239921731177519, "learning_rate": 2.8338272420368864e-06, "loss": 0.3215, "step": 20222 }, { "epoch": 0.9473462313205603, "grad_norm": 0.6062306760925313, "learning_rate": 2.833639289310536e-06, "loss": 0.3439, "step": 20223 }, { "epoch": 0.9473930763104886, "grad_norm": 0.6156405345063739, "learning_rate": 2.833451334664176e-06, "loss": 0.3167, "step": 20224 }, { "epoch": 0.9474399213004169, "grad_norm": 0.5567898688720839, "learning_rate": 2.833263378098886e-06, "loss": 0.2823, "step": 20225 }, { "epoch": 0.9474867662903452, "grad_norm": 0.6090510387683341, "learning_rate": 2.8330754196157505e-06, "loss": 0.3168, "step": 20226 }, { "epoch": 0.9475336112802736, "grad_norm": 0.6175721036258857, "learning_rate": 2.8328874592158494e-06, "loss": 0.3193, "step": 20227 }, { "epoch": 0.9475804562702019, "grad_norm": 0.5955975028880782, "learning_rate": 2.8326994969002646e-06, "loss": 0.3126, "step": 20228 }, { "epoch": 0.9476273012601303, "grad_norm": 0.6228781271932657, "learning_rate": 2.832511532670078e-06, "loss": 0.3037, "step": 20229 }, { "epoch": 0.9476741462500585, "grad_norm": 0.5726963100766367, "learning_rate": 2.832323566526371e-06, "loss": 0.3202, "step": 20230 }, { "epoch": 0.9477209912399869, "grad_norm": 0.5275868928816031, "learning_rate": 2.832135598470225e-06, "loss": 0.3039, "step": 20231 }, { "epoch": 0.9477678362299152, "grad_norm": 0.5915558277305075, "learning_rate": 2.831947628502722e-06, "loss": 0.3253, "step": 20232 }, { "epoch": 0.9478146812198436, "grad_norm": 0.6566928099422832, "learning_rate": 2.831759656624945e-06, "loss": 0.3099, "step": 20233 }, { "epoch": 0.9478615262097718, "grad_norm": 0.6038094263155066, "learning_rate": 2.8315716828379736e-06, "loss": 0.3228, "step": 20234 }, { "epoch": 0.9479083711997002, "grad_norm": 0.5998675584231977, "learning_rate": 2.83138370714289e-06, "loss": 0.3226, "step": 20235 }, { "epoch": 0.9479552161896285, "grad_norm": 0.6149041454319512, "learning_rate": 2.8311957295407778e-06, "loss": 0.3044, "step": 20236 }, { "epoch": 0.9480020611795569, "grad_norm": 0.571514650412363, "learning_rate": 2.831007750032717e-06, "loss": 0.3075, "step": 20237 }, { "epoch": 0.9480489061694852, "grad_norm": 0.5554619987678826, "learning_rate": 2.830819768619789e-06, "loss": 0.3026, "step": 20238 }, { "epoch": 0.9480957511594135, "grad_norm": 0.5691182570739949, "learning_rate": 2.8306317853030773e-06, "loss": 0.3108, "step": 20239 }, { "epoch": 0.9481425961493418, "grad_norm": 0.6032366587591576, "learning_rate": 2.8304438000836624e-06, "loss": 0.3152, "step": 20240 }, { "epoch": 0.9481894411392702, "grad_norm": 0.5935628938737094, "learning_rate": 2.8302558129626263e-06, "loss": 0.3149, "step": 20241 }, { "epoch": 0.9482362861291985, "grad_norm": 0.5908857441404409, "learning_rate": 2.8300678239410516e-06, "loss": 0.3203, "step": 20242 }, { "epoch": 0.9482831311191268, "grad_norm": 0.556300813615896, "learning_rate": 2.8298798330200194e-06, "loss": 0.3062, "step": 20243 }, { "epoch": 0.9483299761090551, "grad_norm": 0.5815146682320892, "learning_rate": 2.8296918402006107e-06, "loss": 0.3151, "step": 20244 }, { "epoch": 0.9483768210989835, "grad_norm": 0.5403943840773101, "learning_rate": 2.8295038454839095e-06, "loss": 0.3241, "step": 20245 }, { "epoch": 0.9484236660889118, "grad_norm": 0.6253212940525844, "learning_rate": 2.829315848870996e-06, "loss": 0.3121, "step": 20246 }, { "epoch": 0.9484705110788402, "grad_norm": 0.5383581814962741, "learning_rate": 2.8291278503629525e-06, "loss": 0.3014, "step": 20247 }, { "epoch": 0.9485173560687684, "grad_norm": 0.584484348601955, "learning_rate": 2.8289398499608612e-06, "loss": 0.3101, "step": 20248 }, { "epoch": 0.9485642010586968, "grad_norm": 0.572408715639876, "learning_rate": 2.8287518476658033e-06, "loss": 0.3281, "step": 20249 }, { "epoch": 0.9486110460486251, "grad_norm": 0.5694234101592491, "learning_rate": 2.8285638434788613e-06, "loss": 0.314, "step": 20250 }, { "epoch": 0.9486578910385535, "grad_norm": 0.6329259204633393, "learning_rate": 2.8283758374011178e-06, "loss": 0.335, "step": 20251 }, { "epoch": 0.9487047360284817, "grad_norm": 0.6629664897901941, "learning_rate": 2.828187829433653e-06, "loss": 0.3006, "step": 20252 }, { "epoch": 0.9487515810184101, "grad_norm": 0.5819411380537989, "learning_rate": 2.827999819577549e-06, "loss": 0.307, "step": 20253 }, { "epoch": 0.9487984260083384, "grad_norm": 0.5859314825338026, "learning_rate": 2.82781180783389e-06, "loss": 0.3197, "step": 20254 }, { "epoch": 0.9488452709982668, "grad_norm": 0.5544582817700764, "learning_rate": 2.827623794203756e-06, "loss": 0.3091, "step": 20255 }, { "epoch": 0.9488921159881951, "grad_norm": 0.5702979676909191, "learning_rate": 2.827435778688229e-06, "loss": 0.318, "step": 20256 }, { "epoch": 0.9489389609781234, "grad_norm": 0.6114470322807773, "learning_rate": 2.8272477612883916e-06, "loss": 0.3042, "step": 20257 }, { "epoch": 0.9489858059680517, "grad_norm": 0.6095060055926382, "learning_rate": 2.827059742005326e-06, "loss": 0.3231, "step": 20258 }, { "epoch": 0.9490326509579801, "grad_norm": 0.594287716976732, "learning_rate": 2.826871720840113e-06, "loss": 0.3126, "step": 20259 }, { "epoch": 0.9490794959479084, "grad_norm": 0.6755780766943895, "learning_rate": 2.8266836977938355e-06, "loss": 0.3364, "step": 20260 }, { "epoch": 0.9491263409378367, "grad_norm": 0.6396604537490709, "learning_rate": 2.8264956728675764e-06, "loss": 0.3228, "step": 20261 }, { "epoch": 0.949173185927765, "grad_norm": 0.5690678414576956, "learning_rate": 2.8263076460624157e-06, "loss": 0.3137, "step": 20262 }, { "epoch": 0.9492200309176934, "grad_norm": 0.5736019524917643, "learning_rate": 2.826119617379437e-06, "loss": 0.2977, "step": 20263 }, { "epoch": 0.9492668759076217, "grad_norm": 0.6038496992416011, "learning_rate": 2.825931586819721e-06, "loss": 0.306, "step": 20264 }, { "epoch": 0.9493137208975501, "grad_norm": 0.5583955453987013, "learning_rate": 2.825743554384352e-06, "loss": 0.2992, "step": 20265 }, { "epoch": 0.9493605658874783, "grad_norm": 0.6341411039402205, "learning_rate": 2.82555552007441e-06, "loss": 0.3258, "step": 20266 }, { "epoch": 0.9494074108774067, "grad_norm": 0.6209048308491598, "learning_rate": 2.825367483890978e-06, "loss": 0.2932, "step": 20267 }, { "epoch": 0.949454255867335, "grad_norm": 0.6163678384404472, "learning_rate": 2.825179445835138e-06, "loss": 0.3226, "step": 20268 }, { "epoch": 0.9495011008572634, "grad_norm": 0.639751780338151, "learning_rate": 2.824991405907972e-06, "loss": 0.3378, "step": 20269 }, { "epoch": 0.9495479458471916, "grad_norm": 0.5611440449659887, "learning_rate": 2.8248033641105615e-06, "loss": 0.3019, "step": 20270 }, { "epoch": 0.94959479083712, "grad_norm": 0.595881254225869, "learning_rate": 2.8246153204439902e-06, "loss": 0.3089, "step": 20271 }, { "epoch": 0.9496416358270483, "grad_norm": 0.5833738795561033, "learning_rate": 2.8244272749093395e-06, "loss": 0.3174, "step": 20272 }, { "epoch": 0.9496884808169767, "grad_norm": 0.5827338528108486, "learning_rate": 2.82423922750769e-06, "loss": 0.3183, "step": 20273 }, { "epoch": 0.949735325806905, "grad_norm": 0.5360136350230269, "learning_rate": 2.8240511782401257e-06, "loss": 0.2787, "step": 20274 }, { "epoch": 0.9497821707968332, "grad_norm": 0.5730367740130629, "learning_rate": 2.8238631271077294e-06, "loss": 0.3059, "step": 20275 }, { "epoch": 0.9498290157867616, "grad_norm": 0.5585207877927891, "learning_rate": 2.823675074111581e-06, "loss": 0.323, "step": 20276 }, { "epoch": 0.94987586077669, "grad_norm": 0.577630873829898, "learning_rate": 2.823487019252764e-06, "loss": 0.3191, "step": 20277 }, { "epoch": 0.9499227057666183, "grad_norm": 0.6295182548671, "learning_rate": 2.823298962532361e-06, "loss": 0.3105, "step": 20278 }, { "epoch": 0.9499695507565465, "grad_norm": 0.6512903667440546, "learning_rate": 2.823110903951454e-06, "loss": 0.2963, "step": 20279 }, { "epoch": 0.9500163957464749, "grad_norm": 0.5675097131829198, "learning_rate": 2.822922843511124e-06, "loss": 0.3242, "step": 20280 }, { "epoch": 0.9500632407364032, "grad_norm": 0.6122670976500885, "learning_rate": 2.822734781212455e-06, "loss": 0.3299, "step": 20281 }, { "epoch": 0.9501100857263316, "grad_norm": 0.6175350987066667, "learning_rate": 2.8225467170565285e-06, "loss": 0.305, "step": 20282 }, { "epoch": 0.95015693071626, "grad_norm": 0.5622650657724954, "learning_rate": 2.8223586510444255e-06, "loss": 0.3152, "step": 20283 }, { "epoch": 0.9502037757061882, "grad_norm": 0.6137736363816444, "learning_rate": 2.8221705831772306e-06, "loss": 0.3272, "step": 20284 }, { "epoch": 0.9502506206961165, "grad_norm": 0.5533614517438951, "learning_rate": 2.8219825134560246e-06, "loss": 0.308, "step": 20285 }, { "epoch": 0.9502974656860449, "grad_norm": 0.6304126245052742, "learning_rate": 2.821794441881891e-06, "loss": 0.3226, "step": 20286 }, { "epoch": 0.9503443106759732, "grad_norm": 0.5894811322655524, "learning_rate": 2.8216063684559096e-06, "loss": 0.3117, "step": 20287 }, { "epoch": 0.9503911556659015, "grad_norm": 0.6023590373591969, "learning_rate": 2.821418293179166e-06, "loss": 0.3327, "step": 20288 }, { "epoch": 0.9504380006558298, "grad_norm": 0.6082961933882777, "learning_rate": 2.8212302160527406e-06, "loss": 0.3201, "step": 20289 }, { "epoch": 0.9504848456457582, "grad_norm": 0.5732369727399409, "learning_rate": 2.8210421370777153e-06, "loss": 0.3111, "step": 20290 }, { "epoch": 0.9505316906356865, "grad_norm": 0.6057752351070153, "learning_rate": 2.8208540562551735e-06, "loss": 0.3068, "step": 20291 }, { "epoch": 0.9505785356256149, "grad_norm": 0.6272980516004611, "learning_rate": 2.820665973586198e-06, "loss": 0.3389, "step": 20292 }, { "epoch": 0.9506253806155431, "grad_norm": 0.5335526472969504, "learning_rate": 2.820477889071869e-06, "loss": 0.3061, "step": 20293 }, { "epoch": 0.9506722256054715, "grad_norm": 0.570248444424051, "learning_rate": 2.820289802713271e-06, "loss": 0.3149, "step": 20294 }, { "epoch": 0.9507190705953998, "grad_norm": 0.5686050972086965, "learning_rate": 2.8201017145114855e-06, "loss": 0.3321, "step": 20295 }, { "epoch": 0.9507659155853282, "grad_norm": 0.5810175014911597, "learning_rate": 2.8199136244675962e-06, "loss": 0.3124, "step": 20296 }, { "epoch": 0.9508127605752564, "grad_norm": 0.5811667196192476, "learning_rate": 2.8197255325826834e-06, "loss": 0.3116, "step": 20297 }, { "epoch": 0.9508596055651848, "grad_norm": 0.5912484582849635, "learning_rate": 2.8195374388578304e-06, "loss": 0.2983, "step": 20298 }, { "epoch": 0.9509064505551131, "grad_norm": 0.6126815497174436, "learning_rate": 2.8193493432941197e-06, "loss": 0.3339, "step": 20299 }, { "epoch": 0.9509532955450415, "grad_norm": 0.5816207268418481, "learning_rate": 2.819161245892635e-06, "loss": 0.332, "step": 20300 }, { "epoch": 0.9510001405349698, "grad_norm": 0.6134342011885019, "learning_rate": 2.818973146654456e-06, "loss": 0.3346, "step": 20301 }, { "epoch": 0.9510469855248981, "grad_norm": 0.5834489718416747, "learning_rate": 2.8187850455806672e-06, "loss": 0.3078, "step": 20302 }, { "epoch": 0.9510938305148264, "grad_norm": 0.6720095444779974, "learning_rate": 2.8185969426723516e-06, "loss": 0.3252, "step": 20303 }, { "epoch": 0.9511406755047548, "grad_norm": 0.6083260112061012, "learning_rate": 2.8184088379305895e-06, "loss": 0.3106, "step": 20304 }, { "epoch": 0.9511875204946831, "grad_norm": 0.582031144335339, "learning_rate": 2.818220731356465e-06, "loss": 0.3071, "step": 20305 }, { "epoch": 0.9512343654846114, "grad_norm": 0.5447162640975902, "learning_rate": 2.8180326229510595e-06, "loss": 0.3078, "step": 20306 }, { "epoch": 0.9512812104745397, "grad_norm": 0.5667842975244454, "learning_rate": 2.817844512715457e-06, "loss": 0.2869, "step": 20307 }, { "epoch": 0.9513280554644681, "grad_norm": 0.566354597151354, "learning_rate": 2.817656400650739e-06, "loss": 0.3191, "step": 20308 }, { "epoch": 0.9513749004543964, "grad_norm": 0.6210474481124922, "learning_rate": 2.8174682867579883e-06, "loss": 0.3075, "step": 20309 }, { "epoch": 0.9514217454443248, "grad_norm": 0.6254509313792033, "learning_rate": 2.8172801710382874e-06, "loss": 0.3228, "step": 20310 }, { "epoch": 0.951468590434253, "grad_norm": 0.6064022061442125, "learning_rate": 2.817092053492718e-06, "loss": 0.3363, "step": 20311 }, { "epoch": 0.9515154354241814, "grad_norm": 0.570128534589266, "learning_rate": 2.8169039341223647e-06, "loss": 0.3365, "step": 20312 }, { "epoch": 0.9515622804141097, "grad_norm": 0.6966701018979132, "learning_rate": 2.816715812928309e-06, "loss": 0.327, "step": 20313 }, { "epoch": 0.9516091254040381, "grad_norm": 0.5857857728850219, "learning_rate": 2.8165276899116328e-06, "loss": 0.3147, "step": 20314 }, { "epoch": 0.9516559703939663, "grad_norm": 0.5600681742778661, "learning_rate": 2.8163395650734194e-06, "loss": 0.3067, "step": 20315 }, { "epoch": 0.9517028153838947, "grad_norm": 0.5403359178482863, "learning_rate": 2.816151438414751e-06, "loss": 0.318, "step": 20316 }, { "epoch": 0.951749660373823, "grad_norm": 0.6275908631029986, "learning_rate": 2.8159633099367113e-06, "loss": 0.3085, "step": 20317 }, { "epoch": 0.9517965053637514, "grad_norm": 0.5452881677790888, "learning_rate": 2.815775179640382e-06, "loss": 0.3119, "step": 20318 }, { "epoch": 0.9518433503536797, "grad_norm": 0.6170985498447947, "learning_rate": 2.8155870475268447e-06, "loss": 0.3128, "step": 20319 }, { "epoch": 0.951890195343608, "grad_norm": 0.5941501178893516, "learning_rate": 2.815398913597185e-06, "loss": 0.3058, "step": 20320 }, { "epoch": 0.9519370403335363, "grad_norm": 0.567715316353305, "learning_rate": 2.815210777852483e-06, "loss": 0.314, "step": 20321 }, { "epoch": 0.9519838853234647, "grad_norm": 0.62249380437124, "learning_rate": 2.815022640293822e-06, "loss": 0.3215, "step": 20322 }, { "epoch": 0.952030730313393, "grad_norm": 0.5450320732370681, "learning_rate": 2.814834500922285e-06, "loss": 0.322, "step": 20323 }, { "epoch": 0.9520775753033213, "grad_norm": 0.6105948484869275, "learning_rate": 2.814646359738955e-06, "loss": 0.3234, "step": 20324 }, { "epoch": 0.9521244202932496, "grad_norm": 0.641966051714189, "learning_rate": 2.8144582167449138e-06, "loss": 0.3226, "step": 20325 }, { "epoch": 0.952171265283178, "grad_norm": 0.6074135945378165, "learning_rate": 2.814270071941244e-06, "loss": 0.3415, "step": 20326 }, { "epoch": 0.9522181102731063, "grad_norm": 0.587577150341746, "learning_rate": 2.8140819253290302e-06, "loss": 0.3178, "step": 20327 }, { "epoch": 0.9522649552630347, "grad_norm": 0.5914339585307605, "learning_rate": 2.813893776909354e-06, "loss": 0.3223, "step": 20328 }, { "epoch": 0.9523118002529629, "grad_norm": 0.587193147769487, "learning_rate": 2.813705626683297e-06, "loss": 0.317, "step": 20329 }, { "epoch": 0.9523586452428913, "grad_norm": 0.586046710017186, "learning_rate": 2.8135174746519434e-06, "loss": 0.3164, "step": 20330 }, { "epoch": 0.9524054902328196, "grad_norm": 0.6419715785180045, "learning_rate": 2.8133293208163753e-06, "loss": 0.303, "step": 20331 }, { "epoch": 0.952452335222748, "grad_norm": 0.5984207089139276, "learning_rate": 2.8131411651776757e-06, "loss": 0.3262, "step": 20332 }, { "epoch": 0.9524991802126762, "grad_norm": 0.644551271338419, "learning_rate": 2.812953007736928e-06, "loss": 0.3223, "step": 20333 }, { "epoch": 0.9525460252026046, "grad_norm": 0.6039072328595199, "learning_rate": 2.8127648484952135e-06, "loss": 0.3359, "step": 20334 }, { "epoch": 0.9525928701925329, "grad_norm": 0.5581028235422815, "learning_rate": 2.8125766874536164e-06, "loss": 0.3132, "step": 20335 }, { "epoch": 0.9526397151824613, "grad_norm": 0.5905249367975561, "learning_rate": 2.8123885246132195e-06, "loss": 0.3066, "step": 20336 }, { "epoch": 0.9526865601723896, "grad_norm": 0.5661695727156211, "learning_rate": 2.812200359975104e-06, "loss": 0.3181, "step": 20337 }, { "epoch": 0.9527334051623179, "grad_norm": 0.5397025029281816, "learning_rate": 2.812012193540355e-06, "loss": 0.298, "step": 20338 }, { "epoch": 0.9527802501522462, "grad_norm": 0.5996555671000847, "learning_rate": 2.8118240253100544e-06, "loss": 0.3292, "step": 20339 }, { "epoch": 0.9528270951421746, "grad_norm": 0.5402060283618413, "learning_rate": 2.8116358552852833e-06, "loss": 0.3031, "step": 20340 }, { "epoch": 0.9528739401321029, "grad_norm": 0.5875460428984354, "learning_rate": 2.811447683467128e-06, "loss": 0.3153, "step": 20341 }, { "epoch": 0.9529207851220312, "grad_norm": 0.6432578109287951, "learning_rate": 2.8112595098566684e-06, "loss": 0.3269, "step": 20342 }, { "epoch": 0.9529676301119595, "grad_norm": 0.6476773296060476, "learning_rate": 2.811071334454989e-06, "loss": 0.3265, "step": 20343 }, { "epoch": 0.9530144751018879, "grad_norm": 0.627761424578787, "learning_rate": 2.8108831572631717e-06, "loss": 0.3344, "step": 20344 }, { "epoch": 0.9530613200918162, "grad_norm": 0.5701476459025536, "learning_rate": 2.810694978282301e-06, "loss": 0.3052, "step": 20345 }, { "epoch": 0.9531081650817446, "grad_norm": 0.5505337129141542, "learning_rate": 2.8105067975134583e-06, "loss": 0.3099, "step": 20346 }, { "epoch": 0.9531550100716728, "grad_norm": 0.5868145185912583, "learning_rate": 2.8103186149577266e-06, "loss": 0.3106, "step": 20347 }, { "epoch": 0.9532018550616012, "grad_norm": 0.5985574432825563, "learning_rate": 2.81013043061619e-06, "loss": 0.3347, "step": 20348 }, { "epoch": 0.9532487000515295, "grad_norm": 0.6351129335338312, "learning_rate": 2.80994224448993e-06, "loss": 0.3202, "step": 20349 }, { "epoch": 0.9532955450414579, "grad_norm": 0.5747303307874196, "learning_rate": 2.8097540565800303e-06, "loss": 0.3254, "step": 20350 }, { "epoch": 0.9533423900313861, "grad_norm": 0.6662117660014367, "learning_rate": 2.8095658668875743e-06, "loss": 0.343, "step": 20351 }, { "epoch": 0.9533892350213145, "grad_norm": 0.6323743020040818, "learning_rate": 2.809377675413645e-06, "loss": 0.3353, "step": 20352 }, { "epoch": 0.9534360800112428, "grad_norm": 0.5938156732264948, "learning_rate": 2.8091894821593242e-06, "loss": 0.3439, "step": 20353 }, { "epoch": 0.9534829250011712, "grad_norm": 0.5868431271174505, "learning_rate": 2.809001287125695e-06, "loss": 0.3256, "step": 20354 }, { "epoch": 0.9535297699910995, "grad_norm": 0.5975019912206081, "learning_rate": 2.808813090313842e-06, "loss": 0.3237, "step": 20355 }, { "epoch": 0.9535766149810277, "grad_norm": 0.6047508883531155, "learning_rate": 2.8086248917248475e-06, "loss": 0.307, "step": 20356 }, { "epoch": 0.9536234599709561, "grad_norm": 0.5712708563542788, "learning_rate": 2.8084366913597936e-06, "loss": 0.305, "step": 20357 }, { "epoch": 0.9536703049608845, "grad_norm": 0.5560775893844265, "learning_rate": 2.808248489219765e-06, "loss": 0.2964, "step": 20358 }, { "epoch": 0.9537171499508128, "grad_norm": 0.6177995750708979, "learning_rate": 2.808060285305843e-06, "loss": 0.331, "step": 20359 }, { "epoch": 0.953763994940741, "grad_norm": 0.6327754849454574, "learning_rate": 2.8078720796191115e-06, "loss": 0.3235, "step": 20360 }, { "epoch": 0.9538108399306694, "grad_norm": 0.6125563976291886, "learning_rate": 2.8076838721606544e-06, "loss": 0.32, "step": 20361 }, { "epoch": 0.9538576849205977, "grad_norm": 0.5963921488920821, "learning_rate": 2.8074956629315532e-06, "loss": 0.2925, "step": 20362 }, { "epoch": 0.9539045299105261, "grad_norm": 0.6194815682690211, "learning_rate": 2.8073074519328914e-06, "loss": 0.346, "step": 20363 }, { "epoch": 0.9539513749004545, "grad_norm": 0.5865015778303525, "learning_rate": 2.807119239165753e-06, "loss": 0.3211, "step": 20364 }, { "epoch": 0.9539982198903827, "grad_norm": 0.5752815000888861, "learning_rate": 2.8069310246312205e-06, "loss": 0.3039, "step": 20365 }, { "epoch": 0.954045064880311, "grad_norm": 0.5655402948635776, "learning_rate": 2.806742808330377e-06, "loss": 0.3214, "step": 20366 }, { "epoch": 0.9540919098702394, "grad_norm": 0.6136309116595926, "learning_rate": 2.8065545902643066e-06, "loss": 0.328, "step": 20367 }, { "epoch": 0.9541387548601677, "grad_norm": 0.6050236625481088, "learning_rate": 2.8063663704340905e-06, "loss": 0.3209, "step": 20368 }, { "epoch": 0.954185599850096, "grad_norm": 0.6000565347596218, "learning_rate": 2.806178148840813e-06, "loss": 0.3033, "step": 20369 }, { "epoch": 0.9542324448400243, "grad_norm": 0.5530589689059627, "learning_rate": 2.805989925485558e-06, "loss": 0.291, "step": 20370 }, { "epoch": 0.9542792898299527, "grad_norm": 0.6274067380346493, "learning_rate": 2.8058017003694073e-06, "loss": 0.3199, "step": 20371 }, { "epoch": 0.954326134819881, "grad_norm": 0.6116572878859834, "learning_rate": 2.8056134734934436e-06, "loss": 0.3181, "step": 20372 }, { "epoch": 0.9543729798098094, "grad_norm": 0.5966235884380123, "learning_rate": 2.8054252448587533e-06, "loss": 0.3132, "step": 20373 }, { "epoch": 0.9544198247997376, "grad_norm": 0.6137768945440887, "learning_rate": 2.8052370144664166e-06, "loss": 0.3292, "step": 20374 }, { "epoch": 0.954466669789666, "grad_norm": 0.6565740002430065, "learning_rate": 2.8050487823175166e-06, "loss": 0.3575, "step": 20375 }, { "epoch": 0.9545135147795943, "grad_norm": 0.600913355995813, "learning_rate": 2.8048605484131387e-06, "loss": 0.323, "step": 20376 }, { "epoch": 0.9545603597695227, "grad_norm": 0.5884775354688888, "learning_rate": 2.804672312754365e-06, "loss": 0.3317, "step": 20377 }, { "epoch": 0.9546072047594509, "grad_norm": 0.5632919404581993, "learning_rate": 2.804484075342278e-06, "loss": 0.3134, "step": 20378 }, { "epoch": 0.9546540497493793, "grad_norm": 0.6008255090428316, "learning_rate": 2.8042958361779626e-06, "loss": 0.3287, "step": 20379 }, { "epoch": 0.9547008947393076, "grad_norm": 0.5802220840371883, "learning_rate": 2.8041075952625007e-06, "loss": 0.3097, "step": 20380 }, { "epoch": 0.954747739729236, "grad_norm": 0.6025578310422859, "learning_rate": 2.8039193525969755e-06, "loss": 0.317, "step": 20381 }, { "epoch": 0.9547945847191642, "grad_norm": 0.6081497133939875, "learning_rate": 2.8037311081824714e-06, "loss": 0.2978, "step": 20382 }, { "epoch": 0.9548414297090926, "grad_norm": 0.5744483840080308, "learning_rate": 2.803542862020071e-06, "loss": 0.3029, "step": 20383 }, { "epoch": 0.9548882746990209, "grad_norm": 0.5750459904564109, "learning_rate": 2.8033546141108576e-06, "loss": 0.3348, "step": 20384 }, { "epoch": 0.9549351196889493, "grad_norm": 0.6031201644580215, "learning_rate": 2.8031663644559147e-06, "loss": 0.3119, "step": 20385 }, { "epoch": 0.9549819646788776, "grad_norm": 0.5578471896235313, "learning_rate": 2.8029781130563254e-06, "loss": 0.2967, "step": 20386 }, { "epoch": 0.9550288096688059, "grad_norm": 0.6212987874039269, "learning_rate": 2.8027898599131738e-06, "loss": 0.3173, "step": 20387 }, { "epoch": 0.9550756546587342, "grad_norm": 0.5745812503006842, "learning_rate": 2.8026016050275424e-06, "loss": 0.3245, "step": 20388 }, { "epoch": 0.9551224996486626, "grad_norm": 0.5567696073200122, "learning_rate": 2.8024133484005146e-06, "loss": 0.2988, "step": 20389 }, { "epoch": 0.9551693446385909, "grad_norm": 0.6031391695676361, "learning_rate": 2.8022250900331743e-06, "loss": 0.3131, "step": 20390 }, { "epoch": 0.9552161896285192, "grad_norm": 0.6099566261359689, "learning_rate": 2.802036829926604e-06, "loss": 0.317, "step": 20391 }, { "epoch": 0.9552630346184475, "grad_norm": 0.5817047829050682, "learning_rate": 2.801848568081888e-06, "loss": 0.3287, "step": 20392 }, { "epoch": 0.9553098796083759, "grad_norm": 0.58423063195851, "learning_rate": 2.801660304500109e-06, "loss": 0.3157, "step": 20393 }, { "epoch": 0.9553567245983042, "grad_norm": 0.6054311397162786, "learning_rate": 2.801472039182352e-06, "loss": 0.3315, "step": 20394 }, { "epoch": 0.9554035695882326, "grad_norm": 0.5887740434456098, "learning_rate": 2.8012837721296983e-06, "loss": 0.317, "step": 20395 }, { "epoch": 0.9554504145781608, "grad_norm": 0.578980338526459, "learning_rate": 2.8010955033432314e-06, "loss": 0.3142, "step": 20396 }, { "epoch": 0.9554972595680892, "grad_norm": 0.6156659983984957, "learning_rate": 2.8009072328240366e-06, "loss": 0.3176, "step": 20397 }, { "epoch": 0.9555441045580175, "grad_norm": 0.6348804502356298, "learning_rate": 2.8007189605731964e-06, "loss": 0.3427, "step": 20398 }, { "epoch": 0.9555909495479459, "grad_norm": 0.5504052316505987, "learning_rate": 2.800530686591794e-06, "loss": 0.3071, "step": 20399 }, { "epoch": 0.9556377945378741, "grad_norm": 0.5839040154970246, "learning_rate": 2.8003424108809126e-06, "loss": 0.3188, "step": 20400 }, { "epoch": 0.9556846395278025, "grad_norm": 0.6344114201859203, "learning_rate": 2.8001541334416365e-06, "loss": 0.3038, "step": 20401 }, { "epoch": 0.9557314845177308, "grad_norm": 0.6027050352202822, "learning_rate": 2.7999658542750484e-06, "loss": 0.3147, "step": 20402 }, { "epoch": 0.9557783295076592, "grad_norm": 0.5927544307613258, "learning_rate": 2.799777573382233e-06, "loss": 0.3035, "step": 20403 }, { "epoch": 0.9558251744975875, "grad_norm": 0.5867773798703981, "learning_rate": 2.799589290764272e-06, "loss": 0.3269, "step": 20404 }, { "epoch": 0.9558720194875158, "grad_norm": 0.5717732638991166, "learning_rate": 2.799401006422251e-06, "loss": 0.3011, "step": 20405 }, { "epoch": 0.9559188644774441, "grad_norm": 0.6359247950005865, "learning_rate": 2.7992127203572516e-06, "loss": 0.3355, "step": 20406 }, { "epoch": 0.9559657094673725, "grad_norm": 0.5670029130627625, "learning_rate": 2.7990244325703584e-06, "loss": 0.3117, "step": 20407 }, { "epoch": 0.9560125544573008, "grad_norm": 0.6162272822315789, "learning_rate": 2.7988361430626547e-06, "loss": 0.3288, "step": 20408 }, { "epoch": 0.9560593994472291, "grad_norm": 0.5767008492852795, "learning_rate": 2.798647851835225e-06, "loss": 0.318, "step": 20409 }, { "epoch": 0.9561062444371574, "grad_norm": 0.5650948572943325, "learning_rate": 2.7984595588891504e-06, "loss": 0.3198, "step": 20410 }, { "epoch": 0.9561530894270858, "grad_norm": 0.5860596148341294, "learning_rate": 2.7982712642255173e-06, "loss": 0.2998, "step": 20411 }, { "epoch": 0.9561999344170141, "grad_norm": 0.5665728609758793, "learning_rate": 2.798082967845408e-06, "loss": 0.3086, "step": 20412 }, { "epoch": 0.9562467794069425, "grad_norm": 0.5903818015866926, "learning_rate": 2.7978946697499053e-06, "loss": 0.3065, "step": 20413 }, { "epoch": 0.9562936243968707, "grad_norm": 0.6416145034933346, "learning_rate": 2.797706369940094e-06, "loss": 0.33, "step": 20414 }, { "epoch": 0.9563404693867991, "grad_norm": 0.5821163305732256, "learning_rate": 2.7975180684170585e-06, "loss": 0.3088, "step": 20415 }, { "epoch": 0.9563873143767274, "grad_norm": 0.6395086924780541, "learning_rate": 2.7973297651818797e-06, "loss": 0.3181, "step": 20416 }, { "epoch": 0.9564341593666558, "grad_norm": 0.6239667505525194, "learning_rate": 2.7971414602356428e-06, "loss": 0.3345, "step": 20417 }, { "epoch": 0.956481004356584, "grad_norm": 0.5822999795339259, "learning_rate": 2.7969531535794327e-06, "loss": 0.3311, "step": 20418 }, { "epoch": 0.9565278493465124, "grad_norm": 0.5952848906384104, "learning_rate": 2.7967648452143314e-06, "loss": 0.3163, "step": 20419 }, { "epoch": 0.9565746943364407, "grad_norm": 0.6129879840505865, "learning_rate": 2.7965765351414225e-06, "loss": 0.3232, "step": 20420 }, { "epoch": 0.9566215393263691, "grad_norm": 0.5990930703171519, "learning_rate": 2.79638822336179e-06, "loss": 0.315, "step": 20421 }, { "epoch": 0.9566683843162974, "grad_norm": 0.5861365120348685, "learning_rate": 2.796199909876519e-06, "loss": 0.3258, "step": 20422 }, { "epoch": 0.9567152293062257, "grad_norm": 0.5923963992757721, "learning_rate": 2.7960115946866912e-06, "loss": 0.3216, "step": 20423 }, { "epoch": 0.956762074296154, "grad_norm": 0.5823697308614434, "learning_rate": 2.795823277793391e-06, "loss": 0.2897, "step": 20424 }, { "epoch": 0.9568089192860824, "grad_norm": 0.571885761925729, "learning_rate": 2.7956349591977024e-06, "loss": 0.3082, "step": 20425 }, { "epoch": 0.9568557642760107, "grad_norm": 0.5820549257514387, "learning_rate": 2.795446638900709e-06, "loss": 0.308, "step": 20426 }, { "epoch": 0.956902609265939, "grad_norm": 0.5718187158178933, "learning_rate": 2.7952583169034938e-06, "loss": 0.3041, "step": 20427 }, { "epoch": 0.9569494542558673, "grad_norm": 0.5768305125971247, "learning_rate": 2.795069993207142e-06, "loss": 0.2965, "step": 20428 }, { "epoch": 0.9569962992457957, "grad_norm": 0.5694619047289625, "learning_rate": 2.794881667812736e-06, "loss": 0.3197, "step": 20429 }, { "epoch": 0.957043144235724, "grad_norm": 0.5626037041539929, "learning_rate": 2.7946933407213605e-06, "loss": 0.3201, "step": 20430 }, { "epoch": 0.9570899892256524, "grad_norm": 0.6167709314729151, "learning_rate": 2.7945050119340988e-06, "loss": 0.3253, "step": 20431 }, { "epoch": 0.9571368342155806, "grad_norm": 0.6098608803020354, "learning_rate": 2.794316681452035e-06, "loss": 0.3087, "step": 20432 }, { "epoch": 0.957183679205509, "grad_norm": 0.6054296785835098, "learning_rate": 2.794128349276252e-06, "loss": 0.3297, "step": 20433 }, { "epoch": 0.9572305241954373, "grad_norm": 0.5488394206965209, "learning_rate": 2.793940015407835e-06, "loss": 0.3038, "step": 20434 }, { "epoch": 0.9572773691853657, "grad_norm": 0.6028533009741001, "learning_rate": 2.7937516798478664e-06, "loss": 0.3349, "step": 20435 }, { "epoch": 0.9573242141752939, "grad_norm": 0.610724666694171, "learning_rate": 2.7935633425974314e-06, "loss": 0.3308, "step": 20436 }, { "epoch": 0.9573710591652222, "grad_norm": 0.590449783735045, "learning_rate": 2.793375003657613e-06, "loss": 0.2789, "step": 20437 }, { "epoch": 0.9574179041551506, "grad_norm": 0.5951663957701477, "learning_rate": 2.7931866630294948e-06, "loss": 0.3217, "step": 20438 }, { "epoch": 0.957464749145079, "grad_norm": 0.6135324915954229, "learning_rate": 2.792998320714161e-06, "loss": 0.3258, "step": 20439 }, { "epoch": 0.9575115941350073, "grad_norm": 0.5799371944458795, "learning_rate": 2.792809976712696e-06, "loss": 0.2997, "step": 20440 }, { "epoch": 0.9575584391249355, "grad_norm": 0.6069555042915783, "learning_rate": 2.7926216310261826e-06, "loss": 0.3356, "step": 20441 }, { "epoch": 0.9576052841148639, "grad_norm": 0.5815111906486736, "learning_rate": 2.7924332836557054e-06, "loss": 0.3246, "step": 20442 }, { "epoch": 0.9576521291047922, "grad_norm": 0.579997796595207, "learning_rate": 2.7922449346023487e-06, "loss": 0.3301, "step": 20443 }, { "epoch": 0.9576989740947206, "grad_norm": 0.5929946793635485, "learning_rate": 2.7920565838671954e-06, "loss": 0.3082, "step": 20444 }, { "epoch": 0.9577458190846488, "grad_norm": 0.5995445955598503, "learning_rate": 2.7918682314513296e-06, "loss": 0.3178, "step": 20445 }, { "epoch": 0.9577926640745772, "grad_norm": 0.6001507537536978, "learning_rate": 2.791679877355836e-06, "loss": 0.3154, "step": 20446 }, { "epoch": 0.9578395090645055, "grad_norm": 0.5994329934863588, "learning_rate": 2.7914915215817985e-06, "loss": 0.3124, "step": 20447 }, { "epoch": 0.9578863540544339, "grad_norm": 0.6297542090739513, "learning_rate": 2.7913031641302994e-06, "loss": 0.3245, "step": 20448 }, { "epoch": 0.9579331990443622, "grad_norm": 0.5565388854600587, "learning_rate": 2.7911148050024243e-06, "loss": 0.2984, "step": 20449 }, { "epoch": 0.9579800440342905, "grad_norm": 0.573661141728061, "learning_rate": 2.7909264441992573e-06, "loss": 0.3124, "step": 20450 }, { "epoch": 0.9580268890242188, "grad_norm": 0.5903814883555855, "learning_rate": 2.790738081721881e-06, "loss": 0.3258, "step": 20451 }, { "epoch": 0.9580737340141472, "grad_norm": 0.5878875891887596, "learning_rate": 2.79054971757138e-06, "loss": 0.3229, "step": 20452 }, { "epoch": 0.9581205790040755, "grad_norm": 0.5930788844663343, "learning_rate": 2.7903613517488395e-06, "loss": 0.3487, "step": 20453 }, { "epoch": 0.9581674239940038, "grad_norm": 0.5287600612702402, "learning_rate": 2.790172984255341e-06, "loss": 0.3054, "step": 20454 }, { "epoch": 0.9582142689839321, "grad_norm": 0.6013282382086851, "learning_rate": 2.7899846150919706e-06, "loss": 0.3305, "step": 20455 }, { "epoch": 0.9582611139738605, "grad_norm": 0.5873572234112273, "learning_rate": 2.7897962442598107e-06, "loss": 0.316, "step": 20456 }, { "epoch": 0.9583079589637888, "grad_norm": 0.5626744611852862, "learning_rate": 2.789607871759948e-06, "loss": 0.3176, "step": 20457 }, { "epoch": 0.9583548039537172, "grad_norm": 0.5607519156859915, "learning_rate": 2.7894194975934637e-06, "loss": 0.2879, "step": 20458 }, { "epoch": 0.9584016489436454, "grad_norm": 0.5601472540265119, "learning_rate": 2.7892311217614424e-06, "loss": 0.3081, "step": 20459 }, { "epoch": 0.9584484939335738, "grad_norm": 0.6323686276553091, "learning_rate": 2.7890427442649696e-06, "loss": 0.3215, "step": 20460 }, { "epoch": 0.9584953389235021, "grad_norm": 0.6110386510979323, "learning_rate": 2.7888543651051287e-06, "loss": 0.2977, "step": 20461 }, { "epoch": 0.9585421839134305, "grad_norm": 0.553876620840561, "learning_rate": 2.7886659842830025e-06, "loss": 0.3208, "step": 20462 }, { "epoch": 0.9585890289033587, "grad_norm": 0.6227146004824582, "learning_rate": 2.7884776017996766e-06, "loss": 0.3102, "step": 20463 }, { "epoch": 0.9586358738932871, "grad_norm": 0.5864972797270176, "learning_rate": 2.788289217656235e-06, "loss": 0.325, "step": 20464 }, { "epoch": 0.9586827188832154, "grad_norm": 0.5684780028638814, "learning_rate": 2.788100831853761e-06, "loss": 0.301, "step": 20465 }, { "epoch": 0.9587295638731438, "grad_norm": 0.5535959424029825, "learning_rate": 2.787912444393339e-06, "loss": 0.3009, "step": 20466 }, { "epoch": 0.9587764088630721, "grad_norm": 0.6252635507328645, "learning_rate": 2.7877240552760537e-06, "loss": 0.3139, "step": 20467 }, { "epoch": 0.9588232538530004, "grad_norm": 0.6167941955807242, "learning_rate": 2.7875356645029887e-06, "loss": 0.3155, "step": 20468 }, { "epoch": 0.9588700988429287, "grad_norm": 0.5862682195483495, "learning_rate": 2.7873472720752275e-06, "loss": 0.3312, "step": 20469 }, { "epoch": 0.9589169438328571, "grad_norm": 0.5574369122410794, "learning_rate": 2.787158877993856e-06, "loss": 0.2934, "step": 20470 }, { "epoch": 0.9589637888227854, "grad_norm": 0.6167472328133767, "learning_rate": 2.786970482259957e-06, "loss": 0.3244, "step": 20471 }, { "epoch": 0.9590106338127137, "grad_norm": 0.5940114723984782, "learning_rate": 2.786782084874615e-06, "loss": 0.3087, "step": 20472 }, { "epoch": 0.959057478802642, "grad_norm": 0.5682066373248077, "learning_rate": 2.7865936858389137e-06, "loss": 0.3141, "step": 20473 }, { "epoch": 0.9591043237925704, "grad_norm": 0.5881337549158651, "learning_rate": 2.7864052851539374e-06, "loss": 0.3045, "step": 20474 }, { "epoch": 0.9591511687824987, "grad_norm": 0.5493411964485513, "learning_rate": 2.786216882820772e-06, "loss": 0.3104, "step": 20475 }, { "epoch": 0.9591980137724271, "grad_norm": 0.6214259687870888, "learning_rate": 2.7860284788405e-06, "loss": 0.3168, "step": 20476 }, { "epoch": 0.9592448587623553, "grad_norm": 0.5767810106716018, "learning_rate": 2.7858400732142054e-06, "loss": 0.3073, "step": 20477 }, { "epoch": 0.9592917037522837, "grad_norm": 0.5241432029161892, "learning_rate": 2.785651665942973e-06, "loss": 0.292, "step": 20478 }, { "epoch": 0.959338548742212, "grad_norm": 0.6152991584216057, "learning_rate": 2.7854632570278884e-06, "loss": 0.3039, "step": 20479 }, { "epoch": 0.9593853937321404, "grad_norm": 0.5870515620346936, "learning_rate": 2.785274846470033e-06, "loss": 0.3043, "step": 20480 }, { "epoch": 0.9594322387220686, "grad_norm": 0.577199621170215, "learning_rate": 2.785086434270493e-06, "loss": 0.3129, "step": 20481 }, { "epoch": 0.959479083711997, "grad_norm": 0.6155892990227707, "learning_rate": 2.7848980204303527e-06, "loss": 0.3113, "step": 20482 }, { "epoch": 0.9595259287019253, "grad_norm": 0.5752860369807454, "learning_rate": 2.784709604950695e-06, "loss": 0.3201, "step": 20483 }, { "epoch": 0.9595727736918537, "grad_norm": 0.5864608841086772, "learning_rate": 2.7845211878326055e-06, "loss": 0.2947, "step": 20484 }, { "epoch": 0.959619618681782, "grad_norm": 0.5900685193675651, "learning_rate": 2.784332769077169e-06, "loss": 0.3076, "step": 20485 }, { "epoch": 0.9596664636717103, "grad_norm": 0.5387373200319984, "learning_rate": 2.7841443486854674e-06, "loss": 0.2848, "step": 20486 }, { "epoch": 0.9597133086616386, "grad_norm": 0.6164307737339076, "learning_rate": 2.783955926658587e-06, "loss": 0.3186, "step": 20487 }, { "epoch": 0.959760153651567, "grad_norm": 0.5539946877440326, "learning_rate": 2.783767502997612e-06, "loss": 0.3228, "step": 20488 }, { "epoch": 0.9598069986414953, "grad_norm": 0.620176886035313, "learning_rate": 2.7835790777036266e-06, "loss": 0.3194, "step": 20489 }, { "epoch": 0.9598538436314236, "grad_norm": 0.5901914283287953, "learning_rate": 2.7833906507777135e-06, "loss": 0.3033, "step": 20490 }, { "epoch": 0.9599006886213519, "grad_norm": 0.6072537575733775, "learning_rate": 2.7832022222209592e-06, "loss": 0.3429, "step": 20491 }, { "epoch": 0.9599475336112803, "grad_norm": 0.6249594966808393, "learning_rate": 2.783013792034448e-06, "loss": 0.3075, "step": 20492 }, { "epoch": 0.9599943786012086, "grad_norm": 0.6246816432409925, "learning_rate": 2.782825360219263e-06, "loss": 0.3196, "step": 20493 }, { "epoch": 0.960041223591137, "grad_norm": 0.6147133167039414, "learning_rate": 2.7826369267764884e-06, "loss": 0.343, "step": 20494 }, { "epoch": 0.9600880685810652, "grad_norm": 0.5821144353262648, "learning_rate": 2.7824484917072103e-06, "loss": 0.303, "step": 20495 }, { "epoch": 0.9601349135709936, "grad_norm": 0.5906112388399214, "learning_rate": 2.7822600550125124e-06, "loss": 0.3129, "step": 20496 }, { "epoch": 0.9601817585609219, "grad_norm": 0.586464744056472, "learning_rate": 2.7820716166934773e-06, "loss": 0.3148, "step": 20497 }, { "epoch": 0.9602286035508503, "grad_norm": 0.5664662968470932, "learning_rate": 2.7818831767511924e-06, "loss": 0.3094, "step": 20498 }, { "epoch": 0.9602754485407785, "grad_norm": 0.6097413072191029, "learning_rate": 2.7816947351867403e-06, "loss": 0.3421, "step": 20499 }, { "epoch": 0.9603222935307069, "grad_norm": 0.5586266181567889, "learning_rate": 2.7815062920012052e-06, "loss": 0.3075, "step": 20500 }, { "epoch": 0.9603691385206352, "grad_norm": 0.5711347581849039, "learning_rate": 2.781317847195673e-06, "loss": 0.3008, "step": 20501 }, { "epoch": 0.9604159835105636, "grad_norm": 0.5870341085042645, "learning_rate": 2.7811294007712267e-06, "loss": 0.3176, "step": 20502 }, { "epoch": 0.9604628285004919, "grad_norm": 0.6306964370715075, "learning_rate": 2.7809409527289517e-06, "loss": 0.3341, "step": 20503 }, { "epoch": 0.9605096734904202, "grad_norm": 0.607007164092429, "learning_rate": 2.780752503069932e-06, "loss": 0.3098, "step": 20504 }, { "epoch": 0.9605565184803485, "grad_norm": 0.5917219555633276, "learning_rate": 2.780564051795252e-06, "loss": 0.292, "step": 20505 }, { "epoch": 0.9606033634702769, "grad_norm": 0.6190333099368771, "learning_rate": 2.7803755989059973e-06, "loss": 0.3125, "step": 20506 }, { "epoch": 0.9606502084602052, "grad_norm": 0.6202702557493928, "learning_rate": 2.780187144403251e-06, "loss": 0.3058, "step": 20507 }, { "epoch": 0.9606970534501335, "grad_norm": 0.6148569053044343, "learning_rate": 2.7799986882880975e-06, "loss": 0.3035, "step": 20508 }, { "epoch": 0.9607438984400618, "grad_norm": 0.5800103191543869, "learning_rate": 2.7798102305616228e-06, "loss": 0.2937, "step": 20509 }, { "epoch": 0.9607907434299902, "grad_norm": 0.5852451100416411, "learning_rate": 2.7796217712249096e-06, "loss": 0.3116, "step": 20510 }, { "epoch": 0.9608375884199185, "grad_norm": 0.6254322603513046, "learning_rate": 2.779433310279044e-06, "loss": 0.3428, "step": 20511 }, { "epoch": 0.9608844334098469, "grad_norm": 0.5753523414605342, "learning_rate": 2.7792448477251095e-06, "loss": 0.3142, "step": 20512 }, { "epoch": 0.9609312783997751, "grad_norm": 0.5981306290353542, "learning_rate": 2.779056383564192e-06, "loss": 0.3331, "step": 20513 }, { "epoch": 0.9609781233897035, "grad_norm": 0.5805435544348325, "learning_rate": 2.778867917797374e-06, "loss": 0.3086, "step": 20514 }, { "epoch": 0.9610249683796318, "grad_norm": 0.5845623547022009, "learning_rate": 2.7786794504257416e-06, "loss": 0.3253, "step": 20515 }, { "epoch": 0.9610718133695602, "grad_norm": 0.5745737636409045, "learning_rate": 2.7784909814503792e-06, "loss": 0.3037, "step": 20516 }, { "epoch": 0.9611186583594884, "grad_norm": 0.596878042072873, "learning_rate": 2.7783025108723717e-06, "loss": 0.3097, "step": 20517 }, { "epoch": 0.9611655033494167, "grad_norm": 0.6033648293432422, "learning_rate": 2.778114038692802e-06, "loss": 0.3297, "step": 20518 }, { "epoch": 0.9612123483393451, "grad_norm": 0.5529476913566573, "learning_rate": 2.777925564912757e-06, "loss": 0.3068, "step": 20519 }, { "epoch": 0.9612591933292735, "grad_norm": 0.6197637971455197, "learning_rate": 2.77773708953332e-06, "loss": 0.3321, "step": 20520 }, { "epoch": 0.9613060383192018, "grad_norm": 0.6502032013100856, "learning_rate": 2.777548612555575e-06, "loss": 0.3186, "step": 20521 }, { "epoch": 0.96135288330913, "grad_norm": 0.5421419953036956, "learning_rate": 2.7773601339806088e-06, "loss": 0.2999, "step": 20522 }, { "epoch": 0.9613997282990584, "grad_norm": 0.5771141542340557, "learning_rate": 2.777171653809504e-06, "loss": 0.3128, "step": 20523 }, { "epoch": 0.9614465732889867, "grad_norm": 0.5553687917387188, "learning_rate": 2.776983172043346e-06, "loss": 0.3149, "step": 20524 }, { "epoch": 0.9614934182789151, "grad_norm": 0.5872447903975407, "learning_rate": 2.7767946886832198e-06, "loss": 0.3173, "step": 20525 }, { "epoch": 0.9615402632688433, "grad_norm": 0.5828876570235314, "learning_rate": 2.776606203730209e-06, "loss": 0.3002, "step": 20526 }, { "epoch": 0.9615871082587717, "grad_norm": 0.6151060199256974, "learning_rate": 2.7764177171853994e-06, "loss": 0.3189, "step": 20527 }, { "epoch": 0.9616339532487, "grad_norm": 0.6389200726871365, "learning_rate": 2.776229229049876e-06, "loss": 0.3313, "step": 20528 }, { "epoch": 0.9616807982386284, "grad_norm": 0.601606106206983, "learning_rate": 2.7760407393247218e-06, "loss": 0.3095, "step": 20529 }, { "epoch": 0.9617276432285568, "grad_norm": 0.5771266473608516, "learning_rate": 2.7758522480110233e-06, "loss": 0.309, "step": 20530 }, { "epoch": 0.961774488218485, "grad_norm": 0.6263674670729636, "learning_rate": 2.7756637551098643e-06, "loss": 0.3445, "step": 20531 }, { "epoch": 0.9618213332084133, "grad_norm": 0.5693501007735453, "learning_rate": 2.775475260622329e-06, "loss": 0.3126, "step": 20532 }, { "epoch": 0.9618681781983417, "grad_norm": 0.565009866051459, "learning_rate": 2.775286764549503e-06, "loss": 0.3369, "step": 20533 }, { "epoch": 0.96191502318827, "grad_norm": 0.5846743686303592, "learning_rate": 2.775098266892472e-06, "loss": 0.327, "step": 20534 }, { "epoch": 0.9619618681781983, "grad_norm": 0.5883142335487994, "learning_rate": 2.7749097676523186e-06, "loss": 0.2934, "step": 20535 }, { "epoch": 0.9620087131681266, "grad_norm": 0.5758803426758804, "learning_rate": 2.774721266830128e-06, "loss": 0.3222, "step": 20536 }, { "epoch": 0.962055558158055, "grad_norm": 0.6466104177171264, "learning_rate": 2.774532764426987e-06, "loss": 0.3375, "step": 20537 }, { "epoch": 0.9621024031479833, "grad_norm": 0.6548839080124371, "learning_rate": 2.7743442604439786e-06, "loss": 0.3345, "step": 20538 }, { "epoch": 0.9621492481379117, "grad_norm": 0.5483547748678487, "learning_rate": 2.7741557548821868e-06, "loss": 0.2886, "step": 20539 }, { "epoch": 0.9621960931278399, "grad_norm": 0.6129990823266944, "learning_rate": 2.7739672477426987e-06, "loss": 0.3251, "step": 20540 }, { "epoch": 0.9622429381177683, "grad_norm": 0.5842495718543647, "learning_rate": 2.773778739026598e-06, "loss": 0.2959, "step": 20541 }, { "epoch": 0.9622897831076966, "grad_norm": 0.6197495764149412, "learning_rate": 2.7735902287349685e-06, "loss": 0.343, "step": 20542 }, { "epoch": 0.962336628097625, "grad_norm": 0.627928713951916, "learning_rate": 2.773401716868897e-06, "loss": 0.3065, "step": 20543 }, { "epoch": 0.9623834730875532, "grad_norm": 0.6189411286297083, "learning_rate": 2.773213203429466e-06, "loss": 0.3041, "step": 20544 }, { "epoch": 0.9624303180774816, "grad_norm": 0.5714665766885961, "learning_rate": 2.773024688417763e-06, "loss": 0.2978, "step": 20545 }, { "epoch": 0.9624771630674099, "grad_norm": 0.5951621596992968, "learning_rate": 2.7728361718348716e-06, "loss": 0.3114, "step": 20546 }, { "epoch": 0.9625240080573383, "grad_norm": 0.5738040558602355, "learning_rate": 2.772647653681876e-06, "loss": 0.3049, "step": 20547 }, { "epoch": 0.9625708530472666, "grad_norm": 0.5790159105328395, "learning_rate": 2.7724591339598616e-06, "loss": 0.3199, "step": 20548 }, { "epoch": 0.9626176980371949, "grad_norm": 0.60659893178284, "learning_rate": 2.772270612669914e-06, "loss": 0.3093, "step": 20549 }, { "epoch": 0.9626645430271232, "grad_norm": 0.5870574444595112, "learning_rate": 2.7720820898131163e-06, "loss": 0.3009, "step": 20550 }, { "epoch": 0.9627113880170516, "grad_norm": 0.6347705659450708, "learning_rate": 2.7718935653905554e-06, "loss": 0.3412, "step": 20551 }, { "epoch": 0.9627582330069799, "grad_norm": 0.5519305442465978, "learning_rate": 2.771705039403316e-06, "loss": 0.3139, "step": 20552 }, { "epoch": 0.9628050779969082, "grad_norm": 0.5614849955625159, "learning_rate": 2.771516511852481e-06, "loss": 0.2958, "step": 20553 }, { "epoch": 0.9628519229868365, "grad_norm": 0.5996583732656651, "learning_rate": 2.771327982739137e-06, "loss": 0.3086, "step": 20554 }, { "epoch": 0.9628987679767649, "grad_norm": 0.6119395542802772, "learning_rate": 2.771139452064369e-06, "loss": 0.3376, "step": 20555 }, { "epoch": 0.9629456129666932, "grad_norm": 0.5543951393571697, "learning_rate": 2.7709509198292624e-06, "loss": 0.3012, "step": 20556 }, { "epoch": 0.9629924579566216, "grad_norm": 0.5792393525512949, "learning_rate": 2.7707623860349e-06, "loss": 0.3213, "step": 20557 }, { "epoch": 0.9630393029465498, "grad_norm": 0.6642423539895076, "learning_rate": 2.770573850682369e-06, "loss": 0.3325, "step": 20558 }, { "epoch": 0.9630861479364782, "grad_norm": 0.5524953751840501, "learning_rate": 2.7703853137727534e-06, "loss": 0.3037, "step": 20559 }, { "epoch": 0.9631329929264065, "grad_norm": 0.5627995467092033, "learning_rate": 2.7701967753071376e-06, "loss": 0.3072, "step": 20560 }, { "epoch": 0.9631798379163349, "grad_norm": 0.6014466120670895, "learning_rate": 2.7700082352866076e-06, "loss": 0.3386, "step": 20561 }, { "epoch": 0.9632266829062631, "grad_norm": 0.6011705383119942, "learning_rate": 2.7698196937122483e-06, "loss": 0.3243, "step": 20562 }, { "epoch": 0.9632735278961915, "grad_norm": 0.5757009452066442, "learning_rate": 2.769631150585144e-06, "loss": 0.3021, "step": 20563 }, { "epoch": 0.9633203728861198, "grad_norm": 0.6001137098230663, "learning_rate": 2.7694426059063807e-06, "loss": 0.3485, "step": 20564 }, { "epoch": 0.9633672178760482, "grad_norm": 0.6037891193832066, "learning_rate": 2.769254059677043e-06, "loss": 0.3284, "step": 20565 }, { "epoch": 0.9634140628659765, "grad_norm": 0.6060879240485629, "learning_rate": 2.7690655118982156e-06, "loss": 0.298, "step": 20566 }, { "epoch": 0.9634609078559048, "grad_norm": 0.561325996092399, "learning_rate": 2.768876962570984e-06, "loss": 0.3116, "step": 20567 }, { "epoch": 0.9635077528458331, "grad_norm": 0.6089923265769871, "learning_rate": 2.768688411696433e-06, "loss": 0.3274, "step": 20568 }, { "epoch": 0.9635545978357615, "grad_norm": 0.6105906930944213, "learning_rate": 2.768499859275648e-06, "loss": 0.2944, "step": 20569 }, { "epoch": 0.9636014428256898, "grad_norm": 0.6214260276008996, "learning_rate": 2.7683113053097126e-06, "loss": 0.3328, "step": 20570 }, { "epoch": 0.9636482878156181, "grad_norm": 0.5901969535051074, "learning_rate": 2.7681227497997144e-06, "loss": 0.3256, "step": 20571 }, { "epoch": 0.9636951328055464, "grad_norm": 0.5671960960582029, "learning_rate": 2.7679341927467368e-06, "loss": 0.285, "step": 20572 }, { "epoch": 0.9637419777954748, "grad_norm": 0.6026139085395315, "learning_rate": 2.767745634151865e-06, "loss": 0.3009, "step": 20573 }, { "epoch": 0.9637888227854031, "grad_norm": 0.6526007910467903, "learning_rate": 2.7675570740161846e-06, "loss": 0.3002, "step": 20574 }, { "epoch": 0.9638356677753315, "grad_norm": 0.6067800356107244, "learning_rate": 2.76736851234078e-06, "loss": 0.3259, "step": 20575 }, { "epoch": 0.9638825127652597, "grad_norm": 0.6110282079215955, "learning_rate": 2.7671799491267377e-06, "loss": 0.3257, "step": 20576 }, { "epoch": 0.9639293577551881, "grad_norm": 0.5792593823372647, "learning_rate": 2.7669913843751416e-06, "loss": 0.3156, "step": 20577 }, { "epoch": 0.9639762027451164, "grad_norm": 0.5881356433284661, "learning_rate": 2.7668028180870764e-06, "loss": 0.3071, "step": 20578 }, { "epoch": 0.9640230477350448, "grad_norm": 0.6064589290882368, "learning_rate": 2.766614250263629e-06, "loss": 0.3166, "step": 20579 }, { "epoch": 0.964069892724973, "grad_norm": 0.6018383826831358, "learning_rate": 2.766425680905884e-06, "loss": 0.3249, "step": 20580 }, { "epoch": 0.9641167377149014, "grad_norm": 0.5969735883376259, "learning_rate": 2.7662371100149247e-06, "loss": 0.3148, "step": 20581 }, { "epoch": 0.9641635827048297, "grad_norm": 0.5901644333425512, "learning_rate": 2.7660485375918385e-06, "loss": 0.2968, "step": 20582 }, { "epoch": 0.9642104276947581, "grad_norm": 0.5691444371803653, "learning_rate": 2.7658599636377106e-06, "loss": 0.3077, "step": 20583 }, { "epoch": 0.9642572726846864, "grad_norm": 0.5587957980674466, "learning_rate": 2.765671388153624e-06, "loss": 0.3238, "step": 20584 }, { "epoch": 0.9643041176746147, "grad_norm": 0.5880536168279483, "learning_rate": 2.7654828111406655e-06, "loss": 0.3215, "step": 20585 }, { "epoch": 0.964350962664543, "grad_norm": 0.6491243054354435, "learning_rate": 2.765294232599921e-06, "loss": 0.3356, "step": 20586 }, { "epoch": 0.9643978076544714, "grad_norm": 0.6219839439132604, "learning_rate": 2.7651056525324747e-06, "loss": 0.3214, "step": 20587 }, { "epoch": 0.9644446526443997, "grad_norm": 0.5958000965571055, "learning_rate": 2.764917070939412e-06, "loss": 0.3271, "step": 20588 }, { "epoch": 0.964491497634328, "grad_norm": 0.5963843163737597, "learning_rate": 2.7647284878218177e-06, "loss": 0.3057, "step": 20589 }, { "epoch": 0.9645383426242563, "grad_norm": 0.572651423890803, "learning_rate": 2.7645399031807784e-06, "loss": 0.3033, "step": 20590 }, { "epoch": 0.9645851876141847, "grad_norm": 0.6178359036940304, "learning_rate": 2.7643513170173774e-06, "loss": 0.3294, "step": 20591 }, { "epoch": 0.964632032604113, "grad_norm": 0.5773232867807931, "learning_rate": 2.7641627293327018e-06, "loss": 0.3005, "step": 20592 }, { "epoch": 0.9646788775940414, "grad_norm": 0.5703659837366478, "learning_rate": 2.763974140127836e-06, "loss": 0.3037, "step": 20593 }, { "epoch": 0.9647257225839696, "grad_norm": 0.6381950170566881, "learning_rate": 2.7637855494038646e-06, "loss": 0.3231, "step": 20594 }, { "epoch": 0.964772567573898, "grad_norm": 0.6281183165260764, "learning_rate": 2.7635969571618743e-06, "loss": 0.3155, "step": 20595 }, { "epoch": 0.9648194125638263, "grad_norm": 0.6673603880981784, "learning_rate": 2.763408363402949e-06, "loss": 0.3136, "step": 20596 }, { "epoch": 0.9648662575537547, "grad_norm": 0.609092225012115, "learning_rate": 2.7632197681281758e-06, "loss": 0.3244, "step": 20597 }, { "epoch": 0.9649131025436829, "grad_norm": 0.5911850268255633, "learning_rate": 2.763031171338639e-06, "loss": 0.2944, "step": 20598 }, { "epoch": 0.9649599475336113, "grad_norm": 0.6539090591911909, "learning_rate": 2.762842573035423e-06, "loss": 0.3189, "step": 20599 }, { "epoch": 0.9650067925235396, "grad_norm": 0.5401029805376887, "learning_rate": 2.7626539732196145e-06, "loss": 0.3123, "step": 20600 }, { "epoch": 0.965053637513468, "grad_norm": 0.6294139768499271, "learning_rate": 2.762465371892299e-06, "loss": 0.3233, "step": 20601 }, { "epoch": 0.9651004825033963, "grad_norm": 0.5656551668592862, "learning_rate": 2.7622767690545605e-06, "loss": 0.3143, "step": 20602 }, { "epoch": 0.9651473274933245, "grad_norm": 0.5977061571032873, "learning_rate": 2.7620881647074847e-06, "loss": 0.316, "step": 20603 }, { "epoch": 0.9651941724832529, "grad_norm": 0.6157495279235703, "learning_rate": 2.7618995588521584e-06, "loss": 0.3119, "step": 20604 }, { "epoch": 0.9652410174731813, "grad_norm": 0.5692393122948523, "learning_rate": 2.7617109514896657e-06, "loss": 0.2959, "step": 20605 }, { "epoch": 0.9652878624631096, "grad_norm": 0.5881052746324589, "learning_rate": 2.7615223426210917e-06, "loss": 0.2996, "step": 20606 }, { "epoch": 0.9653347074530378, "grad_norm": 0.6156816684327256, "learning_rate": 2.761333732247523e-06, "loss": 0.3457, "step": 20607 }, { "epoch": 0.9653815524429662, "grad_norm": 0.5728611617664632, "learning_rate": 2.7611451203700445e-06, "loss": 0.3112, "step": 20608 }, { "epoch": 0.9654283974328945, "grad_norm": 0.6288010423810121, "learning_rate": 2.7609565069897405e-06, "loss": 0.332, "step": 20609 }, { "epoch": 0.9654752424228229, "grad_norm": 0.598244917973874, "learning_rate": 2.760767892107698e-06, "loss": 0.3082, "step": 20610 }, { "epoch": 0.9655220874127513, "grad_norm": 0.6150676711110805, "learning_rate": 2.7605792757250024e-06, "loss": 0.3093, "step": 20611 }, { "epoch": 0.9655689324026795, "grad_norm": 0.6338562118669039, "learning_rate": 2.7603906578427374e-06, "loss": 0.3107, "step": 20612 }, { "epoch": 0.9656157773926078, "grad_norm": 0.6179876168891468, "learning_rate": 2.7602020384619902e-06, "loss": 0.3251, "step": 20613 }, { "epoch": 0.9656626223825362, "grad_norm": 0.5922068735886511, "learning_rate": 2.7600134175838455e-06, "loss": 0.3194, "step": 20614 }, { "epoch": 0.9657094673724645, "grad_norm": 0.6183084032955012, "learning_rate": 2.759824795209389e-06, "loss": 0.322, "step": 20615 }, { "epoch": 0.9657563123623928, "grad_norm": 0.596233704172374, "learning_rate": 2.759636171339707e-06, "loss": 0.314, "step": 20616 }, { "epoch": 0.9658031573523211, "grad_norm": 0.6244414126244745, "learning_rate": 2.759447545975883e-06, "loss": 0.3068, "step": 20617 }, { "epoch": 0.9658500023422495, "grad_norm": 0.5840343323166935, "learning_rate": 2.759258919119004e-06, "loss": 0.3133, "step": 20618 }, { "epoch": 0.9658968473321778, "grad_norm": 0.7196635113743598, "learning_rate": 2.759070290770155e-06, "loss": 0.3239, "step": 20619 }, { "epoch": 0.9659436923221062, "grad_norm": 0.5593039152037751, "learning_rate": 2.7588816609304216e-06, "loss": 0.3103, "step": 20620 }, { "epoch": 0.9659905373120344, "grad_norm": 0.6056233491878271, "learning_rate": 2.758693029600889e-06, "loss": 0.3123, "step": 20621 }, { "epoch": 0.9660373823019628, "grad_norm": 0.5639528521555824, "learning_rate": 2.7585043967826434e-06, "loss": 0.3165, "step": 20622 }, { "epoch": 0.9660842272918911, "grad_norm": 0.5673400685140358, "learning_rate": 2.75831576247677e-06, "loss": 0.2872, "step": 20623 }, { "epoch": 0.9661310722818195, "grad_norm": 0.550678440934413, "learning_rate": 2.7581271266843533e-06, "loss": 0.296, "step": 20624 }, { "epoch": 0.9661779172717477, "grad_norm": 0.5738495540409001, "learning_rate": 2.757938489406481e-06, "loss": 0.3156, "step": 20625 }, { "epoch": 0.9662247622616761, "grad_norm": 0.5799633524556808, "learning_rate": 2.7577498506442372e-06, "loss": 0.2965, "step": 20626 }, { "epoch": 0.9662716072516044, "grad_norm": 0.5840069427951241, "learning_rate": 2.7575612103987083e-06, "loss": 0.3143, "step": 20627 }, { "epoch": 0.9663184522415328, "grad_norm": 0.5987008654736473, "learning_rate": 2.7573725686709785e-06, "loss": 0.3147, "step": 20628 }, { "epoch": 0.9663652972314611, "grad_norm": 0.5608799940779486, "learning_rate": 2.7571839254621353e-06, "loss": 0.3126, "step": 20629 }, { "epoch": 0.9664121422213894, "grad_norm": 0.5871472568018087, "learning_rate": 2.756995280773262e-06, "loss": 0.3259, "step": 20630 }, { "epoch": 0.9664589872113177, "grad_norm": 0.5432605074629983, "learning_rate": 2.7568066346054458e-06, "loss": 0.2873, "step": 20631 }, { "epoch": 0.9665058322012461, "grad_norm": 0.5536842375193048, "learning_rate": 2.7566179869597733e-06, "loss": 0.3041, "step": 20632 }, { "epoch": 0.9665526771911744, "grad_norm": 0.5933316968369147, "learning_rate": 2.756429337837327e-06, "loss": 0.3131, "step": 20633 }, { "epoch": 0.9665995221811027, "grad_norm": 0.6178667836757183, "learning_rate": 2.756240687239195e-06, "loss": 0.3364, "step": 20634 }, { "epoch": 0.966646367171031, "grad_norm": 0.589637944271031, "learning_rate": 2.7560520351664622e-06, "loss": 0.3281, "step": 20635 }, { "epoch": 0.9666932121609594, "grad_norm": 0.6511923413409659, "learning_rate": 2.7558633816202147e-06, "loss": 0.3305, "step": 20636 }, { "epoch": 0.9667400571508877, "grad_norm": 0.5810426329359861, "learning_rate": 2.755674726601537e-06, "loss": 0.3086, "step": 20637 }, { "epoch": 0.9667869021408161, "grad_norm": 0.5594126261066479, "learning_rate": 2.755486070111516e-06, "loss": 0.3079, "step": 20638 }, { "epoch": 0.9668337471307443, "grad_norm": 0.5478607272403342, "learning_rate": 2.755297412151237e-06, "loss": 0.3073, "step": 20639 }, { "epoch": 0.9668805921206727, "grad_norm": 0.6050384830311187, "learning_rate": 2.7551087527217852e-06, "loss": 0.3253, "step": 20640 }, { "epoch": 0.966927437110601, "grad_norm": 0.6075852634461452, "learning_rate": 2.7549200918242467e-06, "loss": 0.3144, "step": 20641 }, { "epoch": 0.9669742821005294, "grad_norm": 0.5723950105520881, "learning_rate": 2.7547314294597077e-06, "loss": 0.3115, "step": 20642 }, { "epoch": 0.9670211270904576, "grad_norm": 0.6176145910073162, "learning_rate": 2.754542765629253e-06, "loss": 0.3415, "step": 20643 }, { "epoch": 0.967067972080386, "grad_norm": 0.5906615738300733, "learning_rate": 2.7543541003339683e-06, "loss": 0.3014, "step": 20644 }, { "epoch": 0.9671148170703143, "grad_norm": 0.5788349916127915, "learning_rate": 2.7541654335749394e-06, "loss": 0.3019, "step": 20645 }, { "epoch": 0.9671616620602427, "grad_norm": 0.5879279489752485, "learning_rate": 2.7539767653532535e-06, "loss": 0.3275, "step": 20646 }, { "epoch": 0.967208507050171, "grad_norm": 0.6117209886632133, "learning_rate": 2.753788095669995e-06, "loss": 0.3285, "step": 20647 }, { "epoch": 0.9672553520400993, "grad_norm": 0.6007806555693949, "learning_rate": 2.7535994245262487e-06, "loss": 0.3051, "step": 20648 }, { "epoch": 0.9673021970300276, "grad_norm": 0.6595624487662792, "learning_rate": 2.7534107519231023e-06, "loss": 0.3329, "step": 20649 }, { "epoch": 0.967349042019956, "grad_norm": 0.5743032538839735, "learning_rate": 2.753222077861641e-06, "loss": 0.3283, "step": 20650 }, { "epoch": 0.9673958870098843, "grad_norm": 0.6143309872414768, "learning_rate": 2.753033402342949e-06, "loss": 0.3135, "step": 20651 }, { "epoch": 0.9674427319998126, "grad_norm": 0.632037689165628, "learning_rate": 2.752844725368114e-06, "loss": 0.3333, "step": 20652 }, { "epoch": 0.9674895769897409, "grad_norm": 0.5583702963948629, "learning_rate": 2.752656046938222e-06, "loss": 0.2788, "step": 20653 }, { "epoch": 0.9675364219796693, "grad_norm": 0.6177651510238568, "learning_rate": 2.752467367054357e-06, "loss": 0.3121, "step": 20654 }, { "epoch": 0.9675832669695976, "grad_norm": 0.6098175778252184, "learning_rate": 2.7522786857176054e-06, "loss": 0.3291, "step": 20655 }, { "epoch": 0.967630111959526, "grad_norm": 0.646521788409563, "learning_rate": 2.7520900029290544e-06, "loss": 0.31, "step": 20656 }, { "epoch": 0.9676769569494542, "grad_norm": 0.5804229517259054, "learning_rate": 2.7519013186897887e-06, "loss": 0.3225, "step": 20657 }, { "epoch": 0.9677238019393826, "grad_norm": 0.5759808421789513, "learning_rate": 2.751712633000893e-06, "loss": 0.3169, "step": 20658 }, { "epoch": 0.9677706469293109, "grad_norm": 0.6177309357182346, "learning_rate": 2.751523945863456e-06, "loss": 0.3296, "step": 20659 }, { "epoch": 0.9678174919192393, "grad_norm": 0.6167686696066279, "learning_rate": 2.7513352572785613e-06, "loss": 0.3161, "step": 20660 }, { "epoch": 0.9678643369091675, "grad_norm": 0.6015502194423369, "learning_rate": 2.751146567247295e-06, "loss": 0.3009, "step": 20661 }, { "epoch": 0.9679111818990959, "grad_norm": 0.5664970037482301, "learning_rate": 2.7509578757707434e-06, "loss": 0.3167, "step": 20662 }, { "epoch": 0.9679580268890242, "grad_norm": 0.5318694754962738, "learning_rate": 2.7507691828499927e-06, "loss": 0.2926, "step": 20663 }, { "epoch": 0.9680048718789526, "grad_norm": 0.6365321501492834, "learning_rate": 2.7505804884861282e-06, "loss": 0.3363, "step": 20664 }, { "epoch": 0.9680517168688809, "grad_norm": 0.6133319370565866, "learning_rate": 2.750391792680236e-06, "loss": 0.3144, "step": 20665 }, { "epoch": 0.9680985618588092, "grad_norm": 0.5487682368508436, "learning_rate": 2.750203095433401e-06, "loss": 0.3157, "step": 20666 }, { "epoch": 0.9681454068487375, "grad_norm": 0.5951324163191575, "learning_rate": 2.7500143967467113e-06, "loss": 0.3243, "step": 20667 }, { "epoch": 0.9681922518386659, "grad_norm": 0.614115872100399, "learning_rate": 2.7498256966212516e-06, "loss": 0.3197, "step": 20668 }, { "epoch": 0.9682390968285942, "grad_norm": 0.6128338237909496, "learning_rate": 2.749636995058107e-06, "loss": 0.3128, "step": 20669 }, { "epoch": 0.9682859418185225, "grad_norm": 0.6253897835032346, "learning_rate": 2.749448292058365e-06, "loss": 0.3358, "step": 20670 }, { "epoch": 0.9683327868084508, "grad_norm": 0.6159312767729483, "learning_rate": 2.749259587623111e-06, "loss": 0.3251, "step": 20671 }, { "epoch": 0.9683796317983792, "grad_norm": 0.5677027859345651, "learning_rate": 2.7490708817534297e-06, "loss": 0.3229, "step": 20672 }, { "epoch": 0.9684264767883075, "grad_norm": 0.5809405115191033, "learning_rate": 2.748882174450408e-06, "loss": 0.3317, "step": 20673 }, { "epoch": 0.9684733217782359, "grad_norm": 0.5802931919093188, "learning_rate": 2.7486934657151333e-06, "loss": 0.3169, "step": 20674 }, { "epoch": 0.9685201667681641, "grad_norm": 0.5681780642661678, "learning_rate": 2.748504755548689e-06, "loss": 0.3108, "step": 20675 }, { "epoch": 0.9685670117580925, "grad_norm": 0.5795398084878018, "learning_rate": 2.7483160439521623e-06, "loss": 0.3177, "step": 20676 }, { "epoch": 0.9686138567480208, "grad_norm": 0.6318965323924307, "learning_rate": 2.74812733092664e-06, "loss": 0.3343, "step": 20677 }, { "epoch": 0.9686607017379492, "grad_norm": 0.5564699102968237, "learning_rate": 2.747938616473207e-06, "loss": 0.2762, "step": 20678 }, { "epoch": 0.9687075467278774, "grad_norm": 0.5759948984152035, "learning_rate": 2.747749900592949e-06, "loss": 0.3086, "step": 20679 }, { "epoch": 0.9687543917178058, "grad_norm": 0.6128190683911336, "learning_rate": 2.7475611832869532e-06, "loss": 0.3299, "step": 20680 }, { "epoch": 0.9688012367077341, "grad_norm": 0.575718868969497, "learning_rate": 2.7473724645563048e-06, "loss": 0.314, "step": 20681 }, { "epoch": 0.9688480816976625, "grad_norm": 0.5752543554519476, "learning_rate": 2.74718374440209e-06, "loss": 0.3096, "step": 20682 }, { "epoch": 0.9688949266875908, "grad_norm": 0.6301790906275617, "learning_rate": 2.746995022825395e-06, "loss": 0.3175, "step": 20683 }, { "epoch": 0.968941771677519, "grad_norm": 0.5713448629563589, "learning_rate": 2.7468062998273053e-06, "loss": 0.3046, "step": 20684 }, { "epoch": 0.9689886166674474, "grad_norm": 0.610327800940176, "learning_rate": 2.746617575408908e-06, "loss": 0.3164, "step": 20685 }, { "epoch": 0.9690354616573758, "grad_norm": 0.5891841561478226, "learning_rate": 2.7464288495712886e-06, "loss": 0.3082, "step": 20686 }, { "epoch": 0.9690823066473041, "grad_norm": 0.6112035054047712, "learning_rate": 2.7462401223155326e-06, "loss": 0.3346, "step": 20687 }, { "epoch": 0.9691291516372323, "grad_norm": 0.5895183218708623, "learning_rate": 2.746051393642727e-06, "loss": 0.3165, "step": 20688 }, { "epoch": 0.9691759966271607, "grad_norm": 0.627480247686366, "learning_rate": 2.7458626635539575e-06, "loss": 0.317, "step": 20689 }, { "epoch": 0.969222841617089, "grad_norm": 0.5727923773229089, "learning_rate": 2.7456739320503096e-06, "loss": 0.3227, "step": 20690 }, { "epoch": 0.9692696866070174, "grad_norm": 0.585829134377975, "learning_rate": 2.7454851991328703e-06, "loss": 0.3006, "step": 20691 }, { "epoch": 0.9693165315969458, "grad_norm": 0.6034489844379374, "learning_rate": 2.745296464802725e-06, "loss": 0.3362, "step": 20692 }, { "epoch": 0.969363376586874, "grad_norm": 0.5516169941922173, "learning_rate": 2.7451077290609607e-06, "loss": 0.299, "step": 20693 }, { "epoch": 0.9694102215768023, "grad_norm": 0.623454604104921, "learning_rate": 2.7449189919086628e-06, "loss": 0.3333, "step": 20694 }, { "epoch": 0.9694570665667307, "grad_norm": 0.6122760967686176, "learning_rate": 2.7447302533469177e-06, "loss": 0.3121, "step": 20695 }, { "epoch": 0.969503911556659, "grad_norm": 0.5993030814078326, "learning_rate": 2.744541513376812e-06, "loss": 0.3239, "step": 20696 }, { "epoch": 0.9695507565465873, "grad_norm": 0.646256586534453, "learning_rate": 2.7443527719994305e-06, "loss": 0.3498, "step": 20697 }, { "epoch": 0.9695976015365156, "grad_norm": 0.6012286141265039, "learning_rate": 2.744164029215861e-06, "loss": 0.3162, "step": 20698 }, { "epoch": 0.969644446526444, "grad_norm": 0.5872807115170671, "learning_rate": 2.7439752850271884e-06, "loss": 0.3107, "step": 20699 }, { "epoch": 0.9696912915163723, "grad_norm": 0.583259162183967, "learning_rate": 2.7437865394344994e-06, "loss": 0.3042, "step": 20700 }, { "epoch": 0.9697381365063007, "grad_norm": 0.6390257818084756, "learning_rate": 2.7435977924388794e-06, "loss": 0.3121, "step": 20701 }, { "epoch": 0.9697849814962289, "grad_norm": 0.5878820615685244, "learning_rate": 2.7434090440414174e-06, "loss": 0.3219, "step": 20702 }, { "epoch": 0.9698318264861573, "grad_norm": 0.5954347406664527, "learning_rate": 2.743220294243196e-06, "loss": 0.2987, "step": 20703 }, { "epoch": 0.9698786714760856, "grad_norm": 0.5988350868677664, "learning_rate": 2.7430315430453023e-06, "loss": 0.3029, "step": 20704 }, { "epoch": 0.969925516466014, "grad_norm": 0.5933446939935013, "learning_rate": 2.7428427904488248e-06, "loss": 0.3183, "step": 20705 }, { "epoch": 0.9699723614559422, "grad_norm": 0.5828560702157661, "learning_rate": 2.742654036454847e-06, "loss": 0.3318, "step": 20706 }, { "epoch": 0.9700192064458706, "grad_norm": 0.6350557225884079, "learning_rate": 2.7424652810644564e-06, "loss": 0.3336, "step": 20707 }, { "epoch": 0.9700660514357989, "grad_norm": 0.6099648243473452, "learning_rate": 2.74227652427874e-06, "loss": 0.3386, "step": 20708 }, { "epoch": 0.9701128964257273, "grad_norm": 0.5969732116250611, "learning_rate": 2.742087766098782e-06, "loss": 0.3222, "step": 20709 }, { "epoch": 0.9701597414156556, "grad_norm": 0.5921041800765424, "learning_rate": 2.7418990065256694e-06, "loss": 0.3126, "step": 20710 }, { "epoch": 0.9702065864055839, "grad_norm": 0.597667726827178, "learning_rate": 2.74171024556049e-06, "loss": 0.3084, "step": 20711 }, { "epoch": 0.9702534313955122, "grad_norm": 0.6229120135960471, "learning_rate": 2.7415214832043285e-06, "loss": 0.3452, "step": 20712 }, { "epoch": 0.9703002763854406, "grad_norm": 0.5628340006760935, "learning_rate": 2.741332719458271e-06, "loss": 0.3097, "step": 20713 }, { "epoch": 0.9703471213753689, "grad_norm": 0.5812019691064385, "learning_rate": 2.741143954323405e-06, "loss": 0.3265, "step": 20714 }, { "epoch": 0.9703939663652972, "grad_norm": 0.5886625123121243, "learning_rate": 2.7409551878008157e-06, "loss": 0.3274, "step": 20715 }, { "epoch": 0.9704408113552255, "grad_norm": 0.6010870103048853, "learning_rate": 2.74076641989159e-06, "loss": 0.3285, "step": 20716 }, { "epoch": 0.9704876563451539, "grad_norm": 0.5416163213644655, "learning_rate": 2.7405776505968144e-06, "loss": 0.2913, "step": 20717 }, { "epoch": 0.9705345013350822, "grad_norm": 0.6069107826647894, "learning_rate": 2.7403888799175743e-06, "loss": 0.3149, "step": 20718 }, { "epoch": 0.9705813463250106, "grad_norm": 0.5971358325299697, "learning_rate": 2.7402001078549575e-06, "loss": 0.3082, "step": 20719 }, { "epoch": 0.9706281913149388, "grad_norm": 0.6253186501479028, "learning_rate": 2.740011334410049e-06, "loss": 0.3134, "step": 20720 }, { "epoch": 0.9706750363048672, "grad_norm": 0.6321263451684158, "learning_rate": 2.739822559583935e-06, "loss": 0.3298, "step": 20721 }, { "epoch": 0.9707218812947955, "grad_norm": 0.6315513774485682, "learning_rate": 2.7396337833777024e-06, "loss": 0.3346, "step": 20722 }, { "epoch": 0.9707687262847239, "grad_norm": 0.6704986720602228, "learning_rate": 2.739445005792439e-06, "loss": 0.3187, "step": 20723 }, { "epoch": 0.9708155712746521, "grad_norm": 0.5765698746496388, "learning_rate": 2.7392562268292287e-06, "loss": 0.3097, "step": 20724 }, { "epoch": 0.9708624162645805, "grad_norm": 0.5643718042505342, "learning_rate": 2.7390674464891586e-06, "loss": 0.2979, "step": 20725 }, { "epoch": 0.9709092612545088, "grad_norm": 0.5949794577283744, "learning_rate": 2.7388786647733168e-06, "loss": 0.3273, "step": 20726 }, { "epoch": 0.9709561062444372, "grad_norm": 0.5986510163561836, "learning_rate": 2.738689881682787e-06, "loss": 0.3215, "step": 20727 }, { "epoch": 0.9710029512343655, "grad_norm": 0.617142053255026, "learning_rate": 2.7385010972186575e-06, "loss": 0.3192, "step": 20728 }, { "epoch": 0.9710497962242938, "grad_norm": 0.61323366205026, "learning_rate": 2.7383123113820144e-06, "loss": 0.3414, "step": 20729 }, { "epoch": 0.9710966412142221, "grad_norm": 0.6354407998223844, "learning_rate": 2.7381235241739436e-06, "loss": 0.3407, "step": 20730 }, { "epoch": 0.9711434862041505, "grad_norm": 0.5796143433669722, "learning_rate": 2.7379347355955315e-06, "loss": 0.2987, "step": 20731 }, { "epoch": 0.9711903311940788, "grad_norm": 0.5866176911900705, "learning_rate": 2.737745945647865e-06, "loss": 0.3264, "step": 20732 }, { "epoch": 0.9712371761840071, "grad_norm": 0.6108041950082242, "learning_rate": 2.7375571543320306e-06, "loss": 0.3121, "step": 20733 }, { "epoch": 0.9712840211739354, "grad_norm": 0.6282829392202085, "learning_rate": 2.7373683616491137e-06, "loss": 0.3297, "step": 20734 }, { "epoch": 0.9713308661638638, "grad_norm": 0.6247908499401917, "learning_rate": 2.7371795676002024e-06, "loss": 0.317, "step": 20735 }, { "epoch": 0.9713777111537921, "grad_norm": 0.5929896177541307, "learning_rate": 2.7369907721863813e-06, "loss": 0.3179, "step": 20736 }, { "epoch": 0.9714245561437205, "grad_norm": 0.591741494848544, "learning_rate": 2.7368019754087394e-06, "loss": 0.3331, "step": 20737 }, { "epoch": 0.9714714011336487, "grad_norm": 0.6468421655223034, "learning_rate": 2.736613177268361e-06, "loss": 0.342, "step": 20738 }, { "epoch": 0.9715182461235771, "grad_norm": 0.5936001316384184, "learning_rate": 2.7364243777663323e-06, "loss": 0.3252, "step": 20739 }, { "epoch": 0.9715650911135054, "grad_norm": 0.572411061369773, "learning_rate": 2.736235576903742e-06, "loss": 0.3055, "step": 20740 }, { "epoch": 0.9716119361034338, "grad_norm": 0.5848586733624845, "learning_rate": 2.736046774681675e-06, "loss": 0.3306, "step": 20741 }, { "epoch": 0.971658781093362, "grad_norm": 0.5609911717220628, "learning_rate": 2.7358579711012175e-06, "loss": 0.2866, "step": 20742 }, { "epoch": 0.9717056260832904, "grad_norm": 0.5977866349437393, "learning_rate": 2.7356691661634567e-06, "loss": 0.306, "step": 20743 }, { "epoch": 0.9717524710732187, "grad_norm": 0.6001826732043997, "learning_rate": 2.73548035986948e-06, "loss": 0.2861, "step": 20744 }, { "epoch": 0.9717993160631471, "grad_norm": 0.6184654990307002, "learning_rate": 2.7352915522203723e-06, "loss": 0.3311, "step": 20745 }, { "epoch": 0.9718461610530754, "grad_norm": 0.6113365469324341, "learning_rate": 2.7351027432172205e-06, "loss": 0.335, "step": 20746 }, { "epoch": 0.9718930060430037, "grad_norm": 0.6059281271295501, "learning_rate": 2.7349139328611123e-06, "loss": 0.3183, "step": 20747 }, { "epoch": 0.971939851032932, "grad_norm": 0.6225799670989413, "learning_rate": 2.734725121153133e-06, "loss": 0.3241, "step": 20748 }, { "epoch": 0.9719866960228604, "grad_norm": 0.6320623455823096, "learning_rate": 2.73453630809437e-06, "loss": 0.3224, "step": 20749 }, { "epoch": 0.9720335410127887, "grad_norm": 0.645831056087807, "learning_rate": 2.7343474936859095e-06, "loss": 0.3195, "step": 20750 }, { "epoch": 0.972080386002717, "grad_norm": 0.7282662109054242, "learning_rate": 2.7341586779288376e-06, "loss": 0.3166, "step": 20751 }, { "epoch": 0.9721272309926453, "grad_norm": 0.5847698860803318, "learning_rate": 2.7339698608242413e-06, "loss": 0.3151, "step": 20752 }, { "epoch": 0.9721740759825737, "grad_norm": 0.6187510434354917, "learning_rate": 2.7337810423732083e-06, "loss": 0.3137, "step": 20753 }, { "epoch": 0.972220920972502, "grad_norm": 0.5589334796343722, "learning_rate": 2.733592222576823e-06, "loss": 0.2975, "step": 20754 }, { "epoch": 0.9722677659624304, "grad_norm": 0.5825182454842455, "learning_rate": 2.7334034014361736e-06, "loss": 0.3123, "step": 20755 }, { "epoch": 0.9723146109523586, "grad_norm": 0.6457524481446102, "learning_rate": 2.7332145789523468e-06, "loss": 0.3329, "step": 20756 }, { "epoch": 0.972361455942287, "grad_norm": 0.5506338446725388, "learning_rate": 2.7330257551264276e-06, "loss": 0.3065, "step": 20757 }, { "epoch": 0.9724083009322153, "grad_norm": 0.6055622518753219, "learning_rate": 2.732836929959505e-06, "loss": 0.337, "step": 20758 }, { "epoch": 0.9724551459221437, "grad_norm": 0.6034753717934097, "learning_rate": 2.7326481034526637e-06, "loss": 0.3012, "step": 20759 }, { "epoch": 0.9725019909120719, "grad_norm": 0.590496625006882, "learning_rate": 2.7324592756069904e-06, "loss": 0.3234, "step": 20760 }, { "epoch": 0.9725488359020003, "grad_norm": 0.6143940243476289, "learning_rate": 2.732270446423574e-06, "loss": 0.3367, "step": 20761 }, { "epoch": 0.9725956808919286, "grad_norm": 0.5822739801945362, "learning_rate": 2.732081615903498e-06, "loss": 0.3117, "step": 20762 }, { "epoch": 0.972642525881857, "grad_norm": 0.5949417525832638, "learning_rate": 2.7318927840478516e-06, "loss": 0.3184, "step": 20763 }, { "epoch": 0.9726893708717853, "grad_norm": 0.5751892320617404, "learning_rate": 2.7317039508577197e-06, "loss": 0.3031, "step": 20764 }, { "epoch": 0.9727362158617135, "grad_norm": 0.6435205757782625, "learning_rate": 2.7315151163341907e-06, "loss": 0.3196, "step": 20765 }, { "epoch": 0.9727830608516419, "grad_norm": 0.5829337596101349, "learning_rate": 2.73132628047835e-06, "loss": 0.3147, "step": 20766 }, { "epoch": 0.9728299058415703, "grad_norm": 0.6266568154341852, "learning_rate": 2.7311374432912844e-06, "loss": 0.3333, "step": 20767 }, { "epoch": 0.9728767508314986, "grad_norm": 0.6059507707858014, "learning_rate": 2.7309486047740815e-06, "loss": 0.3073, "step": 20768 }, { "epoch": 0.9729235958214268, "grad_norm": 0.5846936797394537, "learning_rate": 2.730759764927827e-06, "loss": 0.3134, "step": 20769 }, { "epoch": 0.9729704408113552, "grad_norm": 0.6109254484789697, "learning_rate": 2.730570923753608e-06, "loss": 0.3164, "step": 20770 }, { "epoch": 0.9730172858012835, "grad_norm": 0.5990842132843186, "learning_rate": 2.730382081252511e-06, "loss": 0.3387, "step": 20771 }, { "epoch": 0.9730641307912119, "grad_norm": 0.6040930253519252, "learning_rate": 2.730193237425625e-06, "loss": 0.3192, "step": 20772 }, { "epoch": 0.9731109757811403, "grad_norm": 0.5974220699711579, "learning_rate": 2.7300043922740325e-06, "loss": 0.3237, "step": 20773 }, { "epoch": 0.9731578207710685, "grad_norm": 0.5816706388680798, "learning_rate": 2.729815545798823e-06, "loss": 0.3159, "step": 20774 }, { "epoch": 0.9732046657609968, "grad_norm": 0.6621439924986428, "learning_rate": 2.729626698001083e-06, "loss": 0.3241, "step": 20775 }, { "epoch": 0.9732515107509252, "grad_norm": 0.6122212205554456, "learning_rate": 2.7294378488818995e-06, "loss": 0.3308, "step": 20776 }, { "epoch": 0.9732983557408535, "grad_norm": 0.5841920367930917, "learning_rate": 2.729248998442358e-06, "loss": 0.3035, "step": 20777 }, { "epoch": 0.9733452007307818, "grad_norm": 0.5638742528306501, "learning_rate": 2.729060146683547e-06, "loss": 0.32, "step": 20778 }, { "epoch": 0.9733920457207101, "grad_norm": 0.6014203647450355, "learning_rate": 2.7288712936065524e-06, "loss": 0.3147, "step": 20779 }, { "epoch": 0.9734388907106385, "grad_norm": 0.5401323513292839, "learning_rate": 2.7286824392124606e-06, "loss": 0.2767, "step": 20780 }, { "epoch": 0.9734857357005668, "grad_norm": 0.5799142701909185, "learning_rate": 2.728493583502359e-06, "loss": 0.3026, "step": 20781 }, { "epoch": 0.9735325806904952, "grad_norm": 0.6066458720928207, "learning_rate": 2.728304726477334e-06, "loss": 0.306, "step": 20782 }, { "epoch": 0.9735794256804234, "grad_norm": 0.6028731483885621, "learning_rate": 2.7281158681384727e-06, "loss": 0.3156, "step": 20783 }, { "epoch": 0.9736262706703518, "grad_norm": 0.5992398831912656, "learning_rate": 2.7279270084868626e-06, "loss": 0.2938, "step": 20784 }, { "epoch": 0.9736731156602801, "grad_norm": 0.6101703368508449, "learning_rate": 2.7277381475235885e-06, "loss": 0.3221, "step": 20785 }, { "epoch": 0.9737199606502085, "grad_norm": 0.55532707369166, "learning_rate": 2.7275492852497397e-06, "loss": 0.3092, "step": 20786 }, { "epoch": 0.9737668056401367, "grad_norm": 0.5467263255761293, "learning_rate": 2.7273604216664026e-06, "loss": 0.2944, "step": 20787 }, { "epoch": 0.9738136506300651, "grad_norm": 0.6111185819860128, "learning_rate": 2.7271715567746617e-06, "loss": 0.3274, "step": 20788 }, { "epoch": 0.9738604956199934, "grad_norm": 0.6323999892878538, "learning_rate": 2.7269826905756067e-06, "loss": 0.3404, "step": 20789 }, { "epoch": 0.9739073406099218, "grad_norm": 0.6048967328470304, "learning_rate": 2.7267938230703232e-06, "loss": 0.3158, "step": 20790 }, { "epoch": 0.9739541855998501, "grad_norm": 0.6379237509624035, "learning_rate": 2.7266049542598975e-06, "loss": 0.3104, "step": 20791 }, { "epoch": 0.9740010305897784, "grad_norm": 0.5888267008255602, "learning_rate": 2.726416084145418e-06, "loss": 0.3128, "step": 20792 }, { "epoch": 0.9740478755797067, "grad_norm": 0.555049666123393, "learning_rate": 2.7262272127279713e-06, "loss": 0.3171, "step": 20793 }, { "epoch": 0.9740947205696351, "grad_norm": 0.5709193846632791, "learning_rate": 2.726038340008643e-06, "loss": 0.3112, "step": 20794 }, { "epoch": 0.9741415655595634, "grad_norm": 0.5636982197502756, "learning_rate": 2.725849465988521e-06, "loss": 0.2842, "step": 20795 }, { "epoch": 0.9741884105494917, "grad_norm": 0.5827262543173278, "learning_rate": 2.7256605906686923e-06, "loss": 0.3331, "step": 20796 }, { "epoch": 0.97423525553942, "grad_norm": 0.5852947050126138, "learning_rate": 2.7254717140502435e-06, "loss": 0.2992, "step": 20797 }, { "epoch": 0.9742821005293484, "grad_norm": 0.6127100239442709, "learning_rate": 2.7252828361342615e-06, "loss": 0.3379, "step": 20798 }, { "epoch": 0.9743289455192767, "grad_norm": 0.6335474052932003, "learning_rate": 2.7250939569218336e-06, "loss": 0.3251, "step": 20799 }, { "epoch": 0.9743757905092051, "grad_norm": 0.6176861191514972, "learning_rate": 2.7249050764140472e-06, "loss": 0.3213, "step": 20800 }, { "epoch": 0.9744226354991333, "grad_norm": 0.5573102915139913, "learning_rate": 2.7247161946119873e-06, "loss": 0.3219, "step": 20801 }, { "epoch": 0.9744694804890617, "grad_norm": 0.5908113944810547, "learning_rate": 2.724527311516743e-06, "loss": 0.3149, "step": 20802 }, { "epoch": 0.97451632547899, "grad_norm": 0.6388240273474796, "learning_rate": 2.7243384271294004e-06, "loss": 0.3094, "step": 20803 }, { "epoch": 0.9745631704689184, "grad_norm": 0.6045644451533424, "learning_rate": 2.7241495414510462e-06, "loss": 0.3291, "step": 20804 }, { "epoch": 0.9746100154588466, "grad_norm": 0.6099175055778154, "learning_rate": 2.7239606544827684e-06, "loss": 0.3242, "step": 20805 }, { "epoch": 0.974656860448775, "grad_norm": 0.5918690864503329, "learning_rate": 2.7237717662256525e-06, "loss": 0.3223, "step": 20806 }, { "epoch": 0.9747037054387033, "grad_norm": 0.603584087345641, "learning_rate": 2.723582876680787e-06, "loss": 0.3159, "step": 20807 }, { "epoch": 0.9747505504286317, "grad_norm": 0.5697982909080223, "learning_rate": 2.7233939858492582e-06, "loss": 0.3102, "step": 20808 }, { "epoch": 0.97479739541856, "grad_norm": 0.6767518859292452, "learning_rate": 2.7232050937321526e-06, "loss": 0.3305, "step": 20809 }, { "epoch": 0.9748442404084883, "grad_norm": 0.6106281071310724, "learning_rate": 2.7230162003305583e-06, "loss": 0.3212, "step": 20810 }, { "epoch": 0.9748910853984166, "grad_norm": 0.6574931344766276, "learning_rate": 2.7228273056455618e-06, "loss": 0.3435, "step": 20811 }, { "epoch": 0.974937930388345, "grad_norm": 0.5768621431199497, "learning_rate": 2.7226384096782492e-06, "loss": 0.3063, "step": 20812 }, { "epoch": 0.9749847753782733, "grad_norm": 0.5994316044440795, "learning_rate": 2.722449512429709e-06, "loss": 0.3155, "step": 20813 }, { "epoch": 0.9750316203682016, "grad_norm": 0.6361207696335947, "learning_rate": 2.7222606139010287e-06, "loss": 0.3157, "step": 20814 }, { "epoch": 0.9750784653581299, "grad_norm": 0.6034393506669947, "learning_rate": 2.722071714093294e-06, "loss": 0.3285, "step": 20815 }, { "epoch": 0.9751253103480583, "grad_norm": 0.5411050070881772, "learning_rate": 2.7218828130075915e-06, "loss": 0.3047, "step": 20816 }, { "epoch": 0.9751721553379866, "grad_norm": 0.6257135285796117, "learning_rate": 2.7216939106450104e-06, "loss": 0.3453, "step": 20817 }, { "epoch": 0.975219000327915, "grad_norm": 0.6213035093156689, "learning_rate": 2.7215050070066363e-06, "loss": 0.3039, "step": 20818 }, { "epoch": 0.9752658453178432, "grad_norm": 0.5757219924903293, "learning_rate": 2.721316102093556e-06, "loss": 0.3084, "step": 20819 }, { "epoch": 0.9753126903077716, "grad_norm": 0.6166572951456746, "learning_rate": 2.7211271959068574e-06, "loss": 0.3026, "step": 20820 }, { "epoch": 0.9753595352976999, "grad_norm": 0.623811222259514, "learning_rate": 2.7209382884476277e-06, "loss": 0.3144, "step": 20821 }, { "epoch": 0.9754063802876283, "grad_norm": 0.5754462046818337, "learning_rate": 2.7207493797169525e-06, "loss": 0.3251, "step": 20822 }, { "epoch": 0.9754532252775565, "grad_norm": 0.6088874957360474, "learning_rate": 2.7205604697159215e-06, "loss": 0.3151, "step": 20823 }, { "epoch": 0.9755000702674849, "grad_norm": 0.5488356265529648, "learning_rate": 2.7203715584456196e-06, "loss": 0.3084, "step": 20824 }, { "epoch": 0.9755469152574132, "grad_norm": 0.5969541036657772, "learning_rate": 2.7201826459071353e-06, "loss": 0.3201, "step": 20825 }, { "epoch": 0.9755937602473416, "grad_norm": 0.6340908220198767, "learning_rate": 2.719993732101555e-06, "loss": 0.3281, "step": 20826 }, { "epoch": 0.9756406052372699, "grad_norm": 0.585926054482069, "learning_rate": 2.719804817029966e-06, "loss": 0.3036, "step": 20827 }, { "epoch": 0.9756874502271982, "grad_norm": 0.6133298220720815, "learning_rate": 2.719615900693456e-06, "loss": 0.3219, "step": 20828 }, { "epoch": 0.9757342952171265, "grad_norm": 0.5763402416628665, "learning_rate": 2.719426983093111e-06, "loss": 0.3129, "step": 20829 }, { "epoch": 0.9757811402070549, "grad_norm": 0.6255155416336033, "learning_rate": 2.719238064230019e-06, "loss": 0.3302, "step": 20830 }, { "epoch": 0.9758279851969832, "grad_norm": 0.601333703561789, "learning_rate": 2.7190491441052672e-06, "loss": 0.3167, "step": 20831 }, { "epoch": 0.9758748301869115, "grad_norm": 0.6372450049244336, "learning_rate": 2.7188602227199427e-06, "loss": 0.3303, "step": 20832 }, { "epoch": 0.9759216751768398, "grad_norm": 0.5587248539813785, "learning_rate": 2.7186713000751327e-06, "loss": 0.2978, "step": 20833 }, { "epoch": 0.9759685201667682, "grad_norm": 0.6498162495449875, "learning_rate": 2.718482376171924e-06, "loss": 0.3372, "step": 20834 }, { "epoch": 0.9760153651566965, "grad_norm": 0.598360220035277, "learning_rate": 2.7182934510114044e-06, "loss": 0.3331, "step": 20835 }, { "epoch": 0.9760622101466249, "grad_norm": 0.5792945816523616, "learning_rate": 2.718104524594661e-06, "loss": 0.31, "step": 20836 }, { "epoch": 0.9761090551365531, "grad_norm": 0.5625372369058307, "learning_rate": 2.7179155969227807e-06, "loss": 0.3099, "step": 20837 }, { "epoch": 0.9761559001264815, "grad_norm": 0.645378732350229, "learning_rate": 2.7177266679968507e-06, "loss": 0.3562, "step": 20838 }, { "epoch": 0.9762027451164098, "grad_norm": 0.6309809978032306, "learning_rate": 2.717537737817959e-06, "loss": 0.3196, "step": 20839 }, { "epoch": 0.9762495901063382, "grad_norm": 0.5653232914788267, "learning_rate": 2.717348806387191e-06, "loss": 0.3252, "step": 20840 }, { "epoch": 0.9762964350962664, "grad_norm": 0.6199378311974948, "learning_rate": 2.717159873705636e-06, "loss": 0.3367, "step": 20841 }, { "epoch": 0.9763432800861948, "grad_norm": 0.5870296484625972, "learning_rate": 2.7169709397743814e-06, "loss": 0.3218, "step": 20842 }, { "epoch": 0.9763901250761231, "grad_norm": 0.5908998893922962, "learning_rate": 2.7167820045945124e-06, "loss": 0.3101, "step": 20843 }, { "epoch": 0.9764369700660515, "grad_norm": 0.5301736768106331, "learning_rate": 2.7165930681671177e-06, "loss": 0.3094, "step": 20844 }, { "epoch": 0.9764838150559798, "grad_norm": 0.5737274737439911, "learning_rate": 2.7164041304932848e-06, "loss": 0.3171, "step": 20845 }, { "epoch": 0.976530660045908, "grad_norm": 0.567815274168336, "learning_rate": 2.7162151915741004e-06, "loss": 0.3206, "step": 20846 }, { "epoch": 0.9765775050358364, "grad_norm": 0.5809997437386174, "learning_rate": 2.7160262514106517e-06, "loss": 0.3043, "step": 20847 }, { "epoch": 0.9766243500257648, "grad_norm": 0.6088928987767025, "learning_rate": 2.7158373100040263e-06, "loss": 0.3199, "step": 20848 }, { "epoch": 0.9766711950156931, "grad_norm": 0.5631489590788278, "learning_rate": 2.715648367355312e-06, "loss": 0.3009, "step": 20849 }, { "epoch": 0.9767180400056213, "grad_norm": 0.5594836029326989, "learning_rate": 2.715459423465594e-06, "loss": 0.3038, "step": 20850 }, { "epoch": 0.9767648849955497, "grad_norm": 0.5641504349292522, "learning_rate": 2.7152704783359624e-06, "loss": 0.3237, "step": 20851 }, { "epoch": 0.976811729985478, "grad_norm": 0.602221826050301, "learning_rate": 2.715081531967503e-06, "loss": 0.3102, "step": 20852 }, { "epoch": 0.9768585749754064, "grad_norm": 0.6205122849562414, "learning_rate": 2.714892584361304e-06, "loss": 0.3181, "step": 20853 }, { "epoch": 0.9769054199653348, "grad_norm": 0.6537463811199962, "learning_rate": 2.7147036355184513e-06, "loss": 0.3389, "step": 20854 }, { "epoch": 0.976952264955263, "grad_norm": 0.6170354424904951, "learning_rate": 2.7145146854400334e-06, "loss": 0.3123, "step": 20855 }, { "epoch": 0.9769991099451913, "grad_norm": 0.5985109690779733, "learning_rate": 2.714325734127138e-06, "loss": 0.3286, "step": 20856 }, { "epoch": 0.9770459549351197, "grad_norm": 0.5935218162710398, "learning_rate": 2.714136781580851e-06, "loss": 0.3136, "step": 20857 }, { "epoch": 0.977092799925048, "grad_norm": 0.5990466529250674, "learning_rate": 2.713947827802261e-06, "loss": 0.3242, "step": 20858 }, { "epoch": 0.9771396449149763, "grad_norm": 0.5879430143635315, "learning_rate": 2.7137588727924553e-06, "loss": 0.2897, "step": 20859 }, { "epoch": 0.9771864899049046, "grad_norm": 0.5878071455247881, "learning_rate": 2.7135699165525207e-06, "loss": 0.3023, "step": 20860 }, { "epoch": 0.977233334894833, "grad_norm": 0.5173428959553682, "learning_rate": 2.7133809590835448e-06, "loss": 0.2868, "step": 20861 }, { "epoch": 0.9772801798847613, "grad_norm": 0.5897877724931897, "learning_rate": 2.7131920003866145e-06, "loss": 0.3167, "step": 20862 }, { "epoch": 0.9773270248746897, "grad_norm": 0.598830966789461, "learning_rate": 2.7130030404628198e-06, "loss": 0.308, "step": 20863 }, { "epoch": 0.9773738698646179, "grad_norm": 0.5841525878414542, "learning_rate": 2.7128140793132447e-06, "loss": 0.3107, "step": 20864 }, { "epoch": 0.9774207148545463, "grad_norm": 0.6796359922920169, "learning_rate": 2.7126251169389777e-06, "loss": 0.3424, "step": 20865 }, { "epoch": 0.9774675598444746, "grad_norm": 0.6339988469282467, "learning_rate": 2.7124361533411076e-06, "loss": 0.311, "step": 20866 }, { "epoch": 0.977514404834403, "grad_norm": 0.6005985433118096, "learning_rate": 2.7122471885207204e-06, "loss": 0.3079, "step": 20867 }, { "epoch": 0.9775612498243312, "grad_norm": 0.5993242309326425, "learning_rate": 2.7120582224789036e-06, "loss": 0.3185, "step": 20868 }, { "epoch": 0.9776080948142596, "grad_norm": 0.6196522104248978, "learning_rate": 2.7118692552167458e-06, "loss": 0.3164, "step": 20869 }, { "epoch": 0.9776549398041879, "grad_norm": 0.5667505833687555, "learning_rate": 2.7116802867353335e-06, "loss": 0.3139, "step": 20870 }, { "epoch": 0.9777017847941163, "grad_norm": 0.6722264752140886, "learning_rate": 2.7114913170357536e-06, "loss": 0.3464, "step": 20871 }, { "epoch": 0.9777486297840446, "grad_norm": 0.5776533179609746, "learning_rate": 2.711302346119095e-06, "loss": 0.3015, "step": 20872 }, { "epoch": 0.9777954747739729, "grad_norm": 0.6116692262163077, "learning_rate": 2.7111133739864447e-06, "loss": 0.3076, "step": 20873 }, { "epoch": 0.9778423197639012, "grad_norm": 0.5732596754050314, "learning_rate": 2.7109244006388896e-06, "loss": 0.2973, "step": 20874 }, { "epoch": 0.9778891647538296, "grad_norm": 0.5980539071313401, "learning_rate": 2.710735426077518e-06, "loss": 0.3264, "step": 20875 }, { "epoch": 0.9779360097437579, "grad_norm": 0.589534411981248, "learning_rate": 2.7105464503034164e-06, "loss": 0.3185, "step": 20876 }, { "epoch": 0.9779828547336862, "grad_norm": 0.6249019897975129, "learning_rate": 2.710357473317673e-06, "loss": 0.3385, "step": 20877 }, { "epoch": 0.9780296997236145, "grad_norm": 0.5722116036203337, "learning_rate": 2.7101684951213757e-06, "loss": 0.2988, "step": 20878 }, { "epoch": 0.9780765447135429, "grad_norm": 0.5886993212341946, "learning_rate": 2.709979515715611e-06, "loss": 0.3241, "step": 20879 }, { "epoch": 0.9781233897034712, "grad_norm": 0.5350188311907375, "learning_rate": 2.7097905351014673e-06, "loss": 0.2916, "step": 20880 }, { "epoch": 0.9781702346933996, "grad_norm": 0.6963472218905209, "learning_rate": 2.709601553280032e-06, "loss": 0.3379, "step": 20881 }, { "epoch": 0.9782170796833278, "grad_norm": 0.6421205699545739, "learning_rate": 2.7094125702523916e-06, "loss": 0.3274, "step": 20882 }, { "epoch": 0.9782639246732562, "grad_norm": 0.6122437805435152, "learning_rate": 2.7092235860196347e-06, "loss": 0.3199, "step": 20883 }, { "epoch": 0.9783107696631845, "grad_norm": 0.6175707953242624, "learning_rate": 2.70903460058285e-06, "loss": 0.3152, "step": 20884 }, { "epoch": 0.9783576146531129, "grad_norm": 0.5835598590270028, "learning_rate": 2.7088456139431225e-06, "loss": 0.3122, "step": 20885 }, { "epoch": 0.9784044596430411, "grad_norm": 0.5640611266403186, "learning_rate": 2.7086566261015406e-06, "loss": 0.2976, "step": 20886 }, { "epoch": 0.9784513046329695, "grad_norm": 0.62489046697028, "learning_rate": 2.708467637059193e-06, "loss": 0.3075, "step": 20887 }, { "epoch": 0.9784981496228978, "grad_norm": 0.593206751886542, "learning_rate": 2.7082786468171664e-06, "loss": 0.3206, "step": 20888 }, { "epoch": 0.9785449946128262, "grad_norm": 0.6720976978620196, "learning_rate": 2.7080896553765483e-06, "loss": 0.3149, "step": 20889 }, { "epoch": 0.9785918396027545, "grad_norm": 0.5972043917477086, "learning_rate": 2.707900662738427e-06, "loss": 0.3092, "step": 20890 }, { "epoch": 0.9786386845926828, "grad_norm": 0.6384451691780462, "learning_rate": 2.7077116689038895e-06, "loss": 0.3343, "step": 20891 }, { "epoch": 0.9786855295826111, "grad_norm": 0.5639207883915469, "learning_rate": 2.7075226738740227e-06, "loss": 0.3153, "step": 20892 }, { "epoch": 0.9787323745725395, "grad_norm": 0.5695341971121811, "learning_rate": 2.7073336776499165e-06, "loss": 0.321, "step": 20893 }, { "epoch": 0.9787792195624678, "grad_norm": 0.5611584850488524, "learning_rate": 2.7071446802326564e-06, "loss": 0.3032, "step": 20894 }, { "epoch": 0.9788260645523961, "grad_norm": 0.5761069835052913, "learning_rate": 2.7069556816233304e-06, "loss": 0.3267, "step": 20895 }, { "epoch": 0.9788729095423244, "grad_norm": 0.5737837335301429, "learning_rate": 2.7067666818230266e-06, "loss": 0.306, "step": 20896 }, { "epoch": 0.9789197545322528, "grad_norm": 0.5471323613854584, "learning_rate": 2.7065776808328333e-06, "loss": 0.2951, "step": 20897 }, { "epoch": 0.9789665995221811, "grad_norm": 0.6436433573099254, "learning_rate": 2.706388678653837e-06, "loss": 0.3237, "step": 20898 }, { "epoch": 0.9790134445121095, "grad_norm": 0.6155246034199051, "learning_rate": 2.7061996752871245e-06, "loss": 0.3184, "step": 20899 }, { "epoch": 0.9790602895020377, "grad_norm": 0.5819672179054874, "learning_rate": 2.7060106707337863e-06, "loss": 0.3177, "step": 20900 }, { "epoch": 0.9791071344919661, "grad_norm": 0.5962744918563413, "learning_rate": 2.7058216649949078e-06, "loss": 0.3213, "step": 20901 }, { "epoch": 0.9791539794818944, "grad_norm": 0.6154776545083475, "learning_rate": 2.705632658071577e-06, "loss": 0.3122, "step": 20902 }, { "epoch": 0.9792008244718228, "grad_norm": 0.5771173651840378, "learning_rate": 2.705443649964883e-06, "loss": 0.2907, "step": 20903 }, { "epoch": 0.979247669461751, "grad_norm": 0.5981172354132397, "learning_rate": 2.705254640675911e-06, "loss": 0.3187, "step": 20904 }, { "epoch": 0.9792945144516794, "grad_norm": 0.5142420783998927, "learning_rate": 2.7050656302057517e-06, "loss": 0.2999, "step": 20905 }, { "epoch": 0.9793413594416077, "grad_norm": 0.5597674615298818, "learning_rate": 2.7048766185554904e-06, "loss": 0.3052, "step": 20906 }, { "epoch": 0.9793882044315361, "grad_norm": 0.5645006847786705, "learning_rate": 2.704687605726215e-06, "loss": 0.3008, "step": 20907 }, { "epoch": 0.9794350494214644, "grad_norm": 0.6129861534047742, "learning_rate": 2.704498591719015e-06, "loss": 0.324, "step": 20908 }, { "epoch": 0.9794818944113927, "grad_norm": 0.5741078377284975, "learning_rate": 2.704309576534977e-06, "loss": 0.2929, "step": 20909 }, { "epoch": 0.979528739401321, "grad_norm": 0.5897422702072488, "learning_rate": 2.704120560175188e-06, "loss": 0.306, "step": 20910 }, { "epoch": 0.9795755843912494, "grad_norm": 0.6311728697928384, "learning_rate": 2.7039315426407363e-06, "loss": 0.3422, "step": 20911 }, { "epoch": 0.9796224293811777, "grad_norm": 0.6043333562267698, "learning_rate": 2.703742523932711e-06, "loss": 0.3262, "step": 20912 }, { "epoch": 0.979669274371106, "grad_norm": 0.560488938454853, "learning_rate": 2.7035535040521977e-06, "loss": 0.3057, "step": 20913 }, { "epoch": 0.9797161193610343, "grad_norm": 0.6579074661321356, "learning_rate": 2.7033644830002853e-06, "loss": 0.321, "step": 20914 }, { "epoch": 0.9797629643509627, "grad_norm": 0.579321152664734, "learning_rate": 2.7031754607780617e-06, "loss": 0.3111, "step": 20915 }, { "epoch": 0.979809809340891, "grad_norm": 0.5776586183752619, "learning_rate": 2.7029864373866142e-06, "loss": 0.3193, "step": 20916 }, { "epoch": 0.9798566543308194, "grad_norm": 0.612663465446753, "learning_rate": 2.702797412827031e-06, "loss": 0.312, "step": 20917 }, { "epoch": 0.9799034993207476, "grad_norm": 0.5982027871519044, "learning_rate": 2.7026083871003995e-06, "loss": 0.3133, "step": 20918 }, { "epoch": 0.979950344310676, "grad_norm": 0.5793751269264449, "learning_rate": 2.702419360207808e-06, "loss": 0.2983, "step": 20919 }, { "epoch": 0.9799971893006043, "grad_norm": 0.6030218750259247, "learning_rate": 2.702230332150343e-06, "loss": 0.3086, "step": 20920 }, { "epoch": 0.9800440342905327, "grad_norm": 0.623568175722694, "learning_rate": 2.702041302929094e-06, "loss": 0.3159, "step": 20921 }, { "epoch": 0.9800908792804609, "grad_norm": 0.5680268950890422, "learning_rate": 2.7018522725451483e-06, "loss": 0.3233, "step": 20922 }, { "epoch": 0.9801377242703893, "grad_norm": 0.6974010239442582, "learning_rate": 2.701663240999593e-06, "loss": 0.3124, "step": 20923 }, { "epoch": 0.9801845692603176, "grad_norm": 0.6364087872455131, "learning_rate": 2.7014742082935167e-06, "loss": 0.3274, "step": 20924 }, { "epoch": 0.980231414250246, "grad_norm": 0.5910250787721433, "learning_rate": 2.7012851744280067e-06, "loss": 0.321, "step": 20925 }, { "epoch": 0.9802782592401743, "grad_norm": 0.5750738211474261, "learning_rate": 2.701096139404151e-06, "loss": 0.3047, "step": 20926 }, { "epoch": 0.9803251042301026, "grad_norm": 0.5509333775305658, "learning_rate": 2.7009071032230385e-06, "loss": 0.2884, "step": 20927 }, { "epoch": 0.9803719492200309, "grad_norm": 0.6333142045446782, "learning_rate": 2.700718065885755e-06, "loss": 0.3295, "step": 20928 }, { "epoch": 0.9804187942099593, "grad_norm": 0.6140964689169179, "learning_rate": 2.7005290273933902e-06, "loss": 0.3185, "step": 20929 }, { "epoch": 0.9804656391998876, "grad_norm": 0.6355489458530635, "learning_rate": 2.700339987747031e-06, "loss": 0.3105, "step": 20930 }, { "epoch": 0.9805124841898158, "grad_norm": 0.5722597371628388, "learning_rate": 2.7001509469477656e-06, "loss": 0.3119, "step": 20931 }, { "epoch": 0.9805593291797442, "grad_norm": 0.5921500330742941, "learning_rate": 2.6999619049966815e-06, "loss": 0.3241, "step": 20932 }, { "epoch": 0.9806061741696726, "grad_norm": 0.6144993219421607, "learning_rate": 2.6997728618948675e-06, "loss": 0.311, "step": 20933 }, { "epoch": 0.9806530191596009, "grad_norm": 0.5704716875927394, "learning_rate": 2.6995838176434105e-06, "loss": 0.3083, "step": 20934 }, { "epoch": 0.9806998641495293, "grad_norm": 0.6012201936323656, "learning_rate": 2.6993947722433983e-06, "loss": 0.3371, "step": 20935 }, { "epoch": 0.9807467091394575, "grad_norm": 0.5877410610668053, "learning_rate": 2.6992057256959196e-06, "loss": 0.3277, "step": 20936 }, { "epoch": 0.9807935541293858, "grad_norm": 0.6016591925413439, "learning_rate": 2.6990166780020626e-06, "loss": 0.3324, "step": 20937 }, { "epoch": 0.9808403991193142, "grad_norm": 0.598868938683012, "learning_rate": 2.698827629162914e-06, "loss": 0.3121, "step": 20938 }, { "epoch": 0.9808872441092426, "grad_norm": 0.6121547083762491, "learning_rate": 2.6986385791795627e-06, "loss": 0.3305, "step": 20939 }, { "epoch": 0.9809340890991708, "grad_norm": 0.5433947558080975, "learning_rate": 2.6984495280530966e-06, "loss": 0.3169, "step": 20940 }, { "epoch": 0.9809809340890991, "grad_norm": 0.5777965829068158, "learning_rate": 2.6982604757846028e-06, "loss": 0.3075, "step": 20941 }, { "epoch": 0.9810277790790275, "grad_norm": 0.579944987771698, "learning_rate": 2.6980714223751703e-06, "loss": 0.3151, "step": 20942 }, { "epoch": 0.9810746240689558, "grad_norm": 0.6082827068425584, "learning_rate": 2.697882367825886e-06, "loss": 0.327, "step": 20943 }, { "epoch": 0.9811214690588842, "grad_norm": 0.5994105411826349, "learning_rate": 2.6976933121378384e-06, "loss": 0.3319, "step": 20944 }, { "epoch": 0.9811683140488124, "grad_norm": 0.5886034253336366, "learning_rate": 2.6975042553121163e-06, "loss": 0.3189, "step": 20945 }, { "epoch": 0.9812151590387408, "grad_norm": 0.5284252502491297, "learning_rate": 2.697315197349806e-06, "loss": 0.2752, "step": 20946 }, { "epoch": 0.9812620040286691, "grad_norm": 0.579053386879222, "learning_rate": 2.6971261382519963e-06, "loss": 0.3042, "step": 20947 }, { "epoch": 0.9813088490185975, "grad_norm": 0.5766716200652672, "learning_rate": 2.6969370780197757e-06, "loss": 0.3151, "step": 20948 }, { "epoch": 0.9813556940085257, "grad_norm": 0.5875421423926788, "learning_rate": 2.6967480166542314e-06, "loss": 0.3073, "step": 20949 }, { "epoch": 0.9814025389984541, "grad_norm": 0.5932478759530716, "learning_rate": 2.6965589541564518e-06, "loss": 0.312, "step": 20950 }, { "epoch": 0.9814493839883824, "grad_norm": 0.5544075311187445, "learning_rate": 2.696369890527525e-06, "loss": 0.3074, "step": 20951 }, { "epoch": 0.9814962289783108, "grad_norm": 0.6568815832729848, "learning_rate": 2.6961808257685383e-06, "loss": 0.3257, "step": 20952 }, { "epoch": 0.9815430739682391, "grad_norm": 0.5779022104010002, "learning_rate": 2.6959917598805805e-06, "loss": 0.3057, "step": 20953 }, { "epoch": 0.9815899189581674, "grad_norm": 0.6011119990602894, "learning_rate": 2.6958026928647403e-06, "loss": 0.3157, "step": 20954 }, { "epoch": 0.9816367639480957, "grad_norm": 0.6288770766681144, "learning_rate": 2.6956136247221036e-06, "loss": 0.328, "step": 20955 }, { "epoch": 0.9816836089380241, "grad_norm": 0.587396961569824, "learning_rate": 2.69542455545376e-06, "loss": 0.3288, "step": 20956 }, { "epoch": 0.9817304539279524, "grad_norm": 0.6032311894213861, "learning_rate": 2.6952354850607974e-06, "loss": 0.3015, "step": 20957 }, { "epoch": 0.9817772989178807, "grad_norm": 0.5755948300060105, "learning_rate": 2.695046413544304e-06, "loss": 0.3174, "step": 20958 }, { "epoch": 0.981824143907809, "grad_norm": 0.5504238087738633, "learning_rate": 2.6948573409053665e-06, "loss": 0.3271, "step": 20959 }, { "epoch": 0.9818709888977374, "grad_norm": 0.6268271443131052, "learning_rate": 2.6946682671450747e-06, "loss": 0.3052, "step": 20960 }, { "epoch": 0.9819178338876657, "grad_norm": 0.5700102392964504, "learning_rate": 2.694479192264516e-06, "loss": 0.2961, "step": 20961 }, { "epoch": 0.9819646788775941, "grad_norm": 0.61360345797036, "learning_rate": 2.6942901162647778e-06, "loss": 0.3193, "step": 20962 }, { "epoch": 0.9820115238675223, "grad_norm": 0.5887808938341973, "learning_rate": 2.6941010391469494e-06, "loss": 0.2982, "step": 20963 }, { "epoch": 0.9820583688574507, "grad_norm": 0.6169413535352554, "learning_rate": 2.6939119609121176e-06, "loss": 0.3218, "step": 20964 }, { "epoch": 0.982105213847379, "grad_norm": 0.5323654777854434, "learning_rate": 2.6937228815613724e-06, "loss": 0.3143, "step": 20965 }, { "epoch": 0.9821520588373074, "grad_norm": 0.6486122331393135, "learning_rate": 2.6935338010957997e-06, "loss": 0.3294, "step": 20966 }, { "epoch": 0.9821989038272356, "grad_norm": 0.5924753921213886, "learning_rate": 2.693344719516489e-06, "loss": 0.3055, "step": 20967 }, { "epoch": 0.982245748817164, "grad_norm": 0.6185088838407592, "learning_rate": 2.693155636824528e-06, "loss": 0.2987, "step": 20968 }, { "epoch": 0.9822925938070923, "grad_norm": 0.6666458462878316, "learning_rate": 2.6929665530210047e-06, "loss": 0.326, "step": 20969 }, { "epoch": 0.9823394387970207, "grad_norm": 0.5822322178717172, "learning_rate": 2.692777468107008e-06, "loss": 0.3177, "step": 20970 }, { "epoch": 0.982386283786949, "grad_norm": 0.5728179211414834, "learning_rate": 2.6925883820836253e-06, "loss": 0.3028, "step": 20971 }, { "epoch": 0.9824331287768773, "grad_norm": 0.5655146030338977, "learning_rate": 2.692399294951944e-06, "loss": 0.2959, "step": 20972 }, { "epoch": 0.9824799737668056, "grad_norm": 0.6101463664659523, "learning_rate": 2.692210206713054e-06, "loss": 0.3162, "step": 20973 }, { "epoch": 0.982526818756734, "grad_norm": 0.5832125220168038, "learning_rate": 2.692021117368042e-06, "loss": 0.3222, "step": 20974 }, { "epoch": 0.9825736637466623, "grad_norm": 0.5711176032316649, "learning_rate": 2.691832026917997e-06, "loss": 0.3192, "step": 20975 }, { "epoch": 0.9826205087365906, "grad_norm": 0.6172249313742593, "learning_rate": 2.691642935364007e-06, "loss": 0.3242, "step": 20976 }, { "epoch": 0.9826673537265189, "grad_norm": 0.5837232691051338, "learning_rate": 2.6914538427071595e-06, "loss": 0.3114, "step": 20977 }, { "epoch": 0.9827141987164473, "grad_norm": 0.5739883299927645, "learning_rate": 2.691264748948544e-06, "loss": 0.2988, "step": 20978 }, { "epoch": 0.9827610437063756, "grad_norm": 0.5630196919604396, "learning_rate": 2.691075654089248e-06, "loss": 0.3047, "step": 20979 }, { "epoch": 0.982807888696304, "grad_norm": 0.5689047766133555, "learning_rate": 2.690886558130359e-06, "loss": 0.3003, "step": 20980 }, { "epoch": 0.9828547336862322, "grad_norm": 0.5801103491905264, "learning_rate": 2.690697461072966e-06, "loss": 0.3133, "step": 20981 }, { "epoch": 0.9829015786761606, "grad_norm": 0.5672637027441398, "learning_rate": 2.690508362918157e-06, "loss": 0.3183, "step": 20982 }, { "epoch": 0.9829484236660889, "grad_norm": 0.597104198812939, "learning_rate": 2.6903192636670207e-06, "loss": 0.3134, "step": 20983 }, { "epoch": 0.9829952686560173, "grad_norm": 0.5523163332929429, "learning_rate": 2.690130163320644e-06, "loss": 0.3009, "step": 20984 }, { "epoch": 0.9830421136459455, "grad_norm": 0.6250400062182484, "learning_rate": 2.689941061880117e-06, "loss": 0.3001, "step": 20985 }, { "epoch": 0.9830889586358739, "grad_norm": 0.6455298287134722, "learning_rate": 2.6897519593465267e-06, "loss": 0.3247, "step": 20986 }, { "epoch": 0.9831358036258022, "grad_norm": 0.6112509332376417, "learning_rate": 2.6895628557209607e-06, "loss": 0.2987, "step": 20987 }, { "epoch": 0.9831826486157306, "grad_norm": 0.5766895883258111, "learning_rate": 2.6893737510045094e-06, "loss": 0.2988, "step": 20988 }, { "epoch": 0.9832294936056589, "grad_norm": 0.6218021997981966, "learning_rate": 2.6891846451982588e-06, "loss": 0.3237, "step": 20989 }, { "epoch": 0.9832763385955872, "grad_norm": 0.6421851582738, "learning_rate": 2.6889955383032985e-06, "loss": 0.3346, "step": 20990 }, { "epoch": 0.9833231835855155, "grad_norm": 0.631879844551887, "learning_rate": 2.688806430320716e-06, "loss": 0.334, "step": 20991 }, { "epoch": 0.9833700285754439, "grad_norm": 0.6327680187610304, "learning_rate": 2.688617321251601e-06, "loss": 0.3162, "step": 20992 }, { "epoch": 0.9834168735653722, "grad_norm": 0.5885305147608094, "learning_rate": 2.6884282110970394e-06, "loss": 0.3131, "step": 20993 }, { "epoch": 0.9834637185553005, "grad_norm": 0.6254337130206932, "learning_rate": 2.688239099858122e-06, "loss": 0.3398, "step": 20994 }, { "epoch": 0.9835105635452288, "grad_norm": 0.6069339889229416, "learning_rate": 2.688049987535935e-06, "loss": 0.3311, "step": 20995 }, { "epoch": 0.9835574085351572, "grad_norm": 0.5788939530481941, "learning_rate": 2.6878608741315682e-06, "loss": 0.3035, "step": 20996 }, { "epoch": 0.9836042535250855, "grad_norm": 0.6411578816671517, "learning_rate": 2.687671759646109e-06, "loss": 0.3095, "step": 20997 }, { "epoch": 0.9836510985150139, "grad_norm": 0.7007881758226676, "learning_rate": 2.687482644080646e-06, "loss": 0.3156, "step": 20998 }, { "epoch": 0.9836979435049421, "grad_norm": 0.7398731697185391, "learning_rate": 2.6872935274362673e-06, "loss": 0.3428, "step": 20999 }, { "epoch": 0.9837447884948705, "grad_norm": 0.5560701941840791, "learning_rate": 2.6871044097140618e-06, "loss": 0.289, "step": 21000 }, { "epoch": 0.9837916334847988, "grad_norm": 0.6125249035454007, "learning_rate": 2.686915290915117e-06, "loss": 0.3074, "step": 21001 }, { "epoch": 0.9838384784747272, "grad_norm": 0.5635127529031223, "learning_rate": 2.6867261710405214e-06, "loss": 0.3175, "step": 21002 }, { "epoch": 0.9838853234646554, "grad_norm": 0.642818081975461, "learning_rate": 2.686537050091365e-06, "loss": 0.3204, "step": 21003 }, { "epoch": 0.9839321684545838, "grad_norm": 0.5763002119633409, "learning_rate": 2.686347928068733e-06, "loss": 0.2932, "step": 21004 }, { "epoch": 0.9839790134445121, "grad_norm": 0.6021929734315139, "learning_rate": 2.6861588049737163e-06, "loss": 0.3106, "step": 21005 }, { "epoch": 0.9840258584344405, "grad_norm": 0.6199782565410443, "learning_rate": 2.685969680807403e-06, "loss": 0.3289, "step": 21006 }, { "epoch": 0.9840727034243688, "grad_norm": 0.638747372147842, "learning_rate": 2.685780555570881e-06, "loss": 0.3152, "step": 21007 }, { "epoch": 0.984119548414297, "grad_norm": 0.6326184185748585, "learning_rate": 2.6855914292652372e-06, "loss": 0.3262, "step": 21008 }, { "epoch": 0.9841663934042254, "grad_norm": 0.6419982545063474, "learning_rate": 2.6854023018915627e-06, "loss": 0.3286, "step": 21009 }, { "epoch": 0.9842132383941538, "grad_norm": 0.6132607196177721, "learning_rate": 2.6852131734509446e-06, "loss": 0.3318, "step": 21010 }, { "epoch": 0.9842600833840821, "grad_norm": 0.5818783182310883, "learning_rate": 2.6850240439444703e-06, "loss": 0.3001, "step": 21011 }, { "epoch": 0.9843069283740103, "grad_norm": 0.6146392563022097, "learning_rate": 2.68483491337323e-06, "loss": 0.3084, "step": 21012 }, { "epoch": 0.9843537733639387, "grad_norm": 0.6184701358657505, "learning_rate": 2.684645781738311e-06, "loss": 0.3021, "step": 21013 }, { "epoch": 0.984400618353867, "grad_norm": 0.7123156496511844, "learning_rate": 2.6844566490408012e-06, "loss": 0.3249, "step": 21014 }, { "epoch": 0.9844474633437954, "grad_norm": 0.6054510757575654, "learning_rate": 2.6842675152817904e-06, "loss": 0.311, "step": 21015 }, { "epoch": 0.9844943083337238, "grad_norm": 0.5715676489099517, "learning_rate": 2.684078380462366e-06, "loss": 0.3063, "step": 21016 }, { "epoch": 0.984541153323652, "grad_norm": 0.5808376826375794, "learning_rate": 2.6838892445836173e-06, "loss": 0.2955, "step": 21017 }, { "epoch": 0.9845879983135803, "grad_norm": 0.6232950212693182, "learning_rate": 2.6837001076466325e-06, "loss": 0.3159, "step": 21018 }, { "epoch": 0.9846348433035087, "grad_norm": 0.5675017412846157, "learning_rate": 2.6835109696524984e-06, "loss": 0.309, "step": 21019 }, { "epoch": 0.984681688293437, "grad_norm": 0.5990719302348039, "learning_rate": 2.683321830602306e-06, "loss": 0.2897, "step": 21020 }, { "epoch": 0.9847285332833653, "grad_norm": 0.576103966194844, "learning_rate": 2.6831326904971423e-06, "loss": 0.311, "step": 21021 }, { "epoch": 0.9847753782732936, "grad_norm": 0.5368081231855356, "learning_rate": 2.682943549338096e-06, "loss": 0.2872, "step": 21022 }, { "epoch": 0.984822223263222, "grad_norm": 0.5619872413041115, "learning_rate": 2.6827544071262548e-06, "loss": 0.3005, "step": 21023 }, { "epoch": 0.9848690682531503, "grad_norm": 0.5478057510659204, "learning_rate": 2.6825652638627092e-06, "loss": 0.2932, "step": 21024 }, { "epoch": 0.9849159132430787, "grad_norm": 0.6466755065923817, "learning_rate": 2.6823761195485455e-06, "loss": 0.3009, "step": 21025 }, { "epoch": 0.9849627582330069, "grad_norm": 0.5812448474233969, "learning_rate": 2.6821869741848525e-06, "loss": 0.2964, "step": 21026 }, { "epoch": 0.9850096032229353, "grad_norm": 0.5747144508660674, "learning_rate": 2.6819978277727204e-06, "loss": 0.3133, "step": 21027 }, { "epoch": 0.9850564482128636, "grad_norm": 0.5846324336585211, "learning_rate": 2.6818086803132366e-06, "loss": 0.3125, "step": 21028 }, { "epoch": 0.985103293202792, "grad_norm": 0.5704628947029045, "learning_rate": 2.681619531807489e-06, "loss": 0.3087, "step": 21029 }, { "epoch": 0.9851501381927202, "grad_norm": 0.5917626616819663, "learning_rate": 2.681430382256567e-06, "loss": 0.3184, "step": 21030 }, { "epoch": 0.9851969831826486, "grad_norm": 0.5505943330705758, "learning_rate": 2.681241231661559e-06, "loss": 0.2957, "step": 21031 }, { "epoch": 0.9852438281725769, "grad_norm": 0.5777788548689424, "learning_rate": 2.6810520800235525e-06, "loss": 0.3041, "step": 21032 }, { "epoch": 0.9852906731625053, "grad_norm": 0.59889838801137, "learning_rate": 2.6808629273436365e-06, "loss": 0.3488, "step": 21033 }, { "epoch": 0.9853375181524336, "grad_norm": 0.602190257628338, "learning_rate": 2.6806737736229005e-06, "loss": 0.326, "step": 21034 }, { "epoch": 0.9853843631423619, "grad_norm": 0.5727176909403755, "learning_rate": 2.6804846188624323e-06, "loss": 0.3048, "step": 21035 }, { "epoch": 0.9854312081322902, "grad_norm": 0.5442469482765416, "learning_rate": 2.68029546306332e-06, "loss": 0.3059, "step": 21036 }, { "epoch": 0.9854780531222186, "grad_norm": 0.5875377886288832, "learning_rate": 2.6801063062266535e-06, "loss": 0.3013, "step": 21037 }, { "epoch": 0.9855248981121469, "grad_norm": 0.618473144187077, "learning_rate": 2.67991714835352e-06, "loss": 0.329, "step": 21038 }, { "epoch": 0.9855717431020752, "grad_norm": 0.6007783579370795, "learning_rate": 2.6797279894450084e-06, "loss": 0.3155, "step": 21039 }, { "epoch": 0.9856185880920035, "grad_norm": 0.6053546646642415, "learning_rate": 2.679538829502208e-06, "loss": 0.3032, "step": 21040 }, { "epoch": 0.9856654330819319, "grad_norm": 0.5686402870787507, "learning_rate": 2.679349668526206e-06, "loss": 0.3148, "step": 21041 }, { "epoch": 0.9857122780718602, "grad_norm": 0.5980124167404663, "learning_rate": 2.679160506518092e-06, "loss": 0.3063, "step": 21042 }, { "epoch": 0.9857591230617886, "grad_norm": 0.5796440176355488, "learning_rate": 2.6789713434789544e-06, "loss": 0.3373, "step": 21043 }, { "epoch": 0.9858059680517168, "grad_norm": 0.632806911868032, "learning_rate": 2.6787821794098813e-06, "loss": 0.3233, "step": 21044 }, { "epoch": 0.9858528130416452, "grad_norm": 0.583865640695673, "learning_rate": 2.6785930143119626e-06, "loss": 0.3159, "step": 21045 }, { "epoch": 0.9858996580315735, "grad_norm": 0.5868879220141852, "learning_rate": 2.6784038481862855e-06, "loss": 0.3, "step": 21046 }, { "epoch": 0.9859465030215019, "grad_norm": 0.5868500516524638, "learning_rate": 2.6782146810339387e-06, "loss": 0.3091, "step": 21047 }, { "epoch": 0.9859933480114301, "grad_norm": 0.541413855919606, "learning_rate": 2.6780255128560117e-06, "loss": 0.3263, "step": 21048 }, { "epoch": 0.9860401930013585, "grad_norm": 0.5777365948722698, "learning_rate": 2.6778363436535925e-06, "loss": 0.3338, "step": 21049 }, { "epoch": 0.9860870379912868, "grad_norm": 0.6704890487986583, "learning_rate": 2.6776471734277698e-06, "loss": 0.3471, "step": 21050 }, { "epoch": 0.9861338829812152, "grad_norm": 0.5788507555395372, "learning_rate": 2.6774580021796317e-06, "loss": 0.3073, "step": 21051 }, { "epoch": 0.9861807279711435, "grad_norm": 0.6008663172346501, "learning_rate": 2.677268829910269e-06, "loss": 0.322, "step": 21052 }, { "epoch": 0.9862275729610718, "grad_norm": 0.6775740332049494, "learning_rate": 2.677079656620767e-06, "loss": 0.357, "step": 21053 }, { "epoch": 0.9862744179510001, "grad_norm": 0.639685344828087, "learning_rate": 2.6768904823122165e-06, "loss": 0.3204, "step": 21054 }, { "epoch": 0.9863212629409285, "grad_norm": 0.6078279928839675, "learning_rate": 2.6767013069857062e-06, "loss": 0.3238, "step": 21055 }, { "epoch": 0.9863681079308568, "grad_norm": 0.6524493389612265, "learning_rate": 2.676512130642324e-06, "loss": 0.3335, "step": 21056 }, { "epoch": 0.9864149529207851, "grad_norm": 0.6040750310398606, "learning_rate": 2.676322953283159e-06, "loss": 0.3163, "step": 21057 }, { "epoch": 0.9864617979107134, "grad_norm": 0.6004716679649786, "learning_rate": 2.6761337749092996e-06, "loss": 0.308, "step": 21058 }, { "epoch": 0.9865086429006418, "grad_norm": 0.5776431393908403, "learning_rate": 2.675944595521835e-06, "loss": 0.3223, "step": 21059 }, { "epoch": 0.9865554878905701, "grad_norm": 0.6151626002409921, "learning_rate": 2.675755415121853e-06, "loss": 0.3191, "step": 21060 }, { "epoch": 0.9866023328804985, "grad_norm": 0.5538345365947062, "learning_rate": 2.675566233710443e-06, "loss": 0.3311, "step": 21061 }, { "epoch": 0.9866491778704267, "grad_norm": 0.6287404680893606, "learning_rate": 2.6753770512886935e-06, "loss": 0.3225, "step": 21062 }, { "epoch": 0.9866960228603551, "grad_norm": 0.5885355860675976, "learning_rate": 2.6751878678576924e-06, "loss": 0.3119, "step": 21063 }, { "epoch": 0.9867428678502834, "grad_norm": 0.6149564967422749, "learning_rate": 2.6749986834185306e-06, "loss": 0.3342, "step": 21064 }, { "epoch": 0.9867897128402118, "grad_norm": 0.62218461731735, "learning_rate": 2.6748094979722937e-06, "loss": 0.3241, "step": 21065 }, { "epoch": 0.98683655783014, "grad_norm": 0.6113830019795509, "learning_rate": 2.6746203115200734e-06, "loss": 0.3177, "step": 21066 }, { "epoch": 0.9868834028200684, "grad_norm": 0.6338497290591397, "learning_rate": 2.6744311240629566e-06, "loss": 0.3203, "step": 21067 }, { "epoch": 0.9869302478099967, "grad_norm": 0.6083137061564127, "learning_rate": 2.674241935602032e-06, "loss": 0.3, "step": 21068 }, { "epoch": 0.9869770927999251, "grad_norm": 0.6235343228689321, "learning_rate": 2.67405274613839e-06, "loss": 0.3166, "step": 21069 }, { "epoch": 0.9870239377898534, "grad_norm": 0.6216161939715388, "learning_rate": 2.673863555673118e-06, "loss": 0.3295, "step": 21070 }, { "epoch": 0.9870707827797817, "grad_norm": 0.5758751044832799, "learning_rate": 2.6736743642073038e-06, "loss": 0.3194, "step": 21071 }, { "epoch": 0.98711762776971, "grad_norm": 0.5818165424861391, "learning_rate": 2.673485171742038e-06, "loss": 0.2974, "step": 21072 }, { "epoch": 0.9871644727596384, "grad_norm": 0.6143692501690872, "learning_rate": 2.673295978278409e-06, "loss": 0.3268, "step": 21073 }, { "epoch": 0.9872113177495667, "grad_norm": 0.6225035809904891, "learning_rate": 2.6731067838175045e-06, "loss": 0.3322, "step": 21074 }, { "epoch": 0.987258162739495, "grad_norm": 0.640229129680489, "learning_rate": 2.6729175883604143e-06, "loss": 0.3035, "step": 21075 }, { "epoch": 0.9873050077294233, "grad_norm": 0.628168356391996, "learning_rate": 2.672728391908227e-06, "loss": 0.3176, "step": 21076 }, { "epoch": 0.9873518527193517, "grad_norm": 0.6442733776628504, "learning_rate": 2.672539194462031e-06, "loss": 0.3084, "step": 21077 }, { "epoch": 0.98739869770928, "grad_norm": 0.6554789515838513, "learning_rate": 2.6723499960229155e-06, "loss": 0.3322, "step": 21078 }, { "epoch": 0.9874455426992084, "grad_norm": 0.595604924777211, "learning_rate": 2.672160796591969e-06, "loss": 0.3152, "step": 21079 }, { "epoch": 0.9874923876891366, "grad_norm": 0.6251085394245933, "learning_rate": 2.67197159617028e-06, "loss": 0.3147, "step": 21080 }, { "epoch": 0.987539232679065, "grad_norm": 0.5815230036402941, "learning_rate": 2.6717823947589384e-06, "loss": 0.3155, "step": 21081 }, { "epoch": 0.9875860776689933, "grad_norm": 0.5472711034599289, "learning_rate": 2.671593192359032e-06, "loss": 0.2917, "step": 21082 }, { "epoch": 0.9876329226589217, "grad_norm": 0.5793293801659912, "learning_rate": 2.6714039889716497e-06, "loss": 0.2934, "step": 21083 }, { "epoch": 0.9876797676488499, "grad_norm": 0.5902703058716382, "learning_rate": 2.67121478459788e-06, "loss": 0.319, "step": 21084 }, { "epoch": 0.9877266126387783, "grad_norm": 0.592529549441887, "learning_rate": 2.6710255792388134e-06, "loss": 0.3026, "step": 21085 }, { "epoch": 0.9877734576287066, "grad_norm": 0.5908513614296614, "learning_rate": 2.6708363728955367e-06, "loss": 0.3184, "step": 21086 }, { "epoch": 0.987820302618635, "grad_norm": 0.6244647241605266, "learning_rate": 2.6706471655691403e-06, "loss": 0.3087, "step": 21087 }, { "epoch": 0.9878671476085633, "grad_norm": 0.6191034599199634, "learning_rate": 2.6704579572607125e-06, "loss": 0.3189, "step": 21088 }, { "epoch": 0.9879139925984916, "grad_norm": 0.5946969040474381, "learning_rate": 2.6702687479713413e-06, "loss": 0.3202, "step": 21089 }, { "epoch": 0.9879608375884199, "grad_norm": 0.5593326908657885, "learning_rate": 2.6700795377021165e-06, "loss": 0.3032, "step": 21090 }, { "epoch": 0.9880076825783483, "grad_norm": 0.5975001305497368, "learning_rate": 2.669890326454127e-06, "loss": 0.3228, "step": 21091 }, { "epoch": 0.9880545275682766, "grad_norm": 0.5425716424344479, "learning_rate": 2.6697011142284605e-06, "loss": 0.3093, "step": 21092 }, { "epoch": 0.9881013725582048, "grad_norm": 0.6316284633059525, "learning_rate": 2.669511901026207e-06, "loss": 0.3258, "step": 21093 }, { "epoch": 0.9881482175481332, "grad_norm": 0.5939683212703444, "learning_rate": 2.6693226868484563e-06, "loss": 0.308, "step": 21094 }, { "epoch": 0.9881950625380616, "grad_norm": 0.6053271617540553, "learning_rate": 2.6691334716962945e-06, "loss": 0.3236, "step": 21095 }, { "epoch": 0.9882419075279899, "grad_norm": 0.539496294035511, "learning_rate": 2.6689442555708126e-06, "loss": 0.3073, "step": 21096 }, { "epoch": 0.9882887525179183, "grad_norm": 0.6365119762681409, "learning_rate": 2.6687550384731e-06, "loss": 0.3405, "step": 21097 }, { "epoch": 0.9883355975078465, "grad_norm": 0.5841694384401551, "learning_rate": 2.6685658204042438e-06, "loss": 0.3236, "step": 21098 }, { "epoch": 0.9883824424977748, "grad_norm": 0.6090789055989307, "learning_rate": 2.668376601365333e-06, "loss": 0.3314, "step": 21099 }, { "epoch": 0.9884292874877032, "grad_norm": 0.6490901292261609, "learning_rate": 2.668187381357458e-06, "loss": 0.3249, "step": 21100 }, { "epoch": 0.9884761324776316, "grad_norm": 0.5794624313314228, "learning_rate": 2.6679981603817067e-06, "loss": 0.311, "step": 21101 }, { "epoch": 0.9885229774675598, "grad_norm": 0.5816480108636728, "learning_rate": 2.667808938439168e-06, "loss": 0.3068, "step": 21102 }, { "epoch": 0.9885698224574881, "grad_norm": 0.5450701378165044, "learning_rate": 2.667619715530931e-06, "loss": 0.317, "step": 21103 }, { "epoch": 0.9886166674474165, "grad_norm": 0.5870763388076309, "learning_rate": 2.667430491658085e-06, "loss": 0.3262, "step": 21104 }, { "epoch": 0.9886635124373448, "grad_norm": 0.5766810945783043, "learning_rate": 2.667241266821719e-06, "loss": 0.3022, "step": 21105 }, { "epoch": 0.9887103574272732, "grad_norm": 0.571633034829416, "learning_rate": 2.6670520410229205e-06, "loss": 0.3071, "step": 21106 }, { "epoch": 0.9887572024172014, "grad_norm": 0.5635789307215182, "learning_rate": 2.66686281426278e-06, "loss": 0.3216, "step": 21107 }, { "epoch": 0.9888040474071298, "grad_norm": 0.6013812040175319, "learning_rate": 2.6666735865423865e-06, "loss": 0.3174, "step": 21108 }, { "epoch": 0.9888508923970581, "grad_norm": 0.6509025707655028, "learning_rate": 2.6664843578628273e-06, "loss": 0.3319, "step": 21109 }, { "epoch": 0.9888977373869865, "grad_norm": 0.5650946810386195, "learning_rate": 2.6662951282251938e-06, "loss": 0.3211, "step": 21110 }, { "epoch": 0.9889445823769147, "grad_norm": 0.5304288722507852, "learning_rate": 2.6661058976305727e-06, "loss": 0.2881, "step": 21111 }, { "epoch": 0.9889914273668431, "grad_norm": 0.633982261900003, "learning_rate": 2.6659166660800538e-06, "loss": 0.3124, "step": 21112 }, { "epoch": 0.9890382723567714, "grad_norm": 0.5774897241346095, "learning_rate": 2.6657274335747267e-06, "loss": 0.3139, "step": 21113 }, { "epoch": 0.9890851173466998, "grad_norm": 0.5577241957816322, "learning_rate": 2.6655382001156795e-06, "loss": 0.3089, "step": 21114 }, { "epoch": 0.9891319623366281, "grad_norm": 0.5811682150382056, "learning_rate": 2.6653489657040015e-06, "loss": 0.306, "step": 21115 }, { "epoch": 0.9891788073265564, "grad_norm": 0.5702716263275392, "learning_rate": 2.6651597303407823e-06, "loss": 0.3076, "step": 21116 }, { "epoch": 0.9892256523164847, "grad_norm": 0.5800090135453646, "learning_rate": 2.66497049402711e-06, "loss": 0.3102, "step": 21117 }, { "epoch": 0.9892724973064131, "grad_norm": 0.5515089424353471, "learning_rate": 2.664781256764074e-06, "loss": 0.2845, "step": 21118 }, { "epoch": 0.9893193422963414, "grad_norm": 0.6212577763733631, "learning_rate": 2.6645920185527635e-06, "loss": 0.3321, "step": 21119 }, { "epoch": 0.9893661872862697, "grad_norm": 0.6194878932146289, "learning_rate": 2.6644027793942666e-06, "loss": 0.3279, "step": 21120 }, { "epoch": 0.989413032276198, "grad_norm": 0.5829028475732352, "learning_rate": 2.6642135392896733e-06, "loss": 0.3007, "step": 21121 }, { "epoch": 0.9894598772661264, "grad_norm": 0.5945988353339076, "learning_rate": 2.6640242982400734e-06, "loss": 0.3359, "step": 21122 }, { "epoch": 0.9895067222560547, "grad_norm": 0.6042555087159086, "learning_rate": 2.663835056246554e-06, "loss": 0.3254, "step": 21123 }, { "epoch": 0.9895535672459831, "grad_norm": 0.5557236514033244, "learning_rate": 2.6636458133102045e-06, "loss": 0.3225, "step": 21124 }, { "epoch": 0.9896004122359113, "grad_norm": 0.592345722375716, "learning_rate": 2.663456569432115e-06, "loss": 0.3055, "step": 21125 }, { "epoch": 0.9896472572258397, "grad_norm": 0.5825083856596287, "learning_rate": 2.6632673246133743e-06, "loss": 0.2998, "step": 21126 }, { "epoch": 0.989694102215768, "grad_norm": 0.6391267498995075, "learning_rate": 2.6630780788550707e-06, "loss": 0.3183, "step": 21127 }, { "epoch": 0.9897409472056964, "grad_norm": 0.6014298162447436, "learning_rate": 2.6628888321582945e-06, "loss": 0.3248, "step": 21128 }, { "epoch": 0.9897877921956246, "grad_norm": 0.6315682700838519, "learning_rate": 2.662699584524134e-06, "loss": 0.3209, "step": 21129 }, { "epoch": 0.989834637185553, "grad_norm": 0.6012302106400665, "learning_rate": 2.6625103359536776e-06, "loss": 0.3402, "step": 21130 }, { "epoch": 0.9898814821754813, "grad_norm": 0.5883545936543472, "learning_rate": 2.662321086448015e-06, "loss": 0.3164, "step": 21131 }, { "epoch": 0.9899283271654097, "grad_norm": 0.5816505942531123, "learning_rate": 2.662131836008236e-06, "loss": 0.3184, "step": 21132 }, { "epoch": 0.989975172155338, "grad_norm": 0.5612701963588743, "learning_rate": 2.6619425846354278e-06, "loss": 0.3056, "step": 21133 }, { "epoch": 0.9900220171452663, "grad_norm": 0.6618597206449404, "learning_rate": 2.6617533323306823e-06, "loss": 0.3278, "step": 21134 }, { "epoch": 0.9900688621351946, "grad_norm": 0.6070583244160579, "learning_rate": 2.6615640790950857e-06, "loss": 0.3143, "step": 21135 }, { "epoch": 0.990115707125123, "grad_norm": 0.5880555354255188, "learning_rate": 2.6613748249297294e-06, "loss": 0.3136, "step": 21136 }, { "epoch": 0.9901625521150513, "grad_norm": 0.6099480626577258, "learning_rate": 2.6611855698357014e-06, "loss": 0.3064, "step": 21137 }, { "epoch": 0.9902093971049796, "grad_norm": 0.6267314016167221, "learning_rate": 2.6609963138140902e-06, "loss": 0.3162, "step": 21138 }, { "epoch": 0.9902562420949079, "grad_norm": 0.6148715725288583, "learning_rate": 2.6608070568659868e-06, "loss": 0.3331, "step": 21139 }, { "epoch": 0.9903030870848363, "grad_norm": 0.5895019617171895, "learning_rate": 2.6606177989924786e-06, "loss": 0.3056, "step": 21140 }, { "epoch": 0.9903499320747646, "grad_norm": 0.5446308558445132, "learning_rate": 2.660428540194655e-06, "loss": 0.2989, "step": 21141 }, { "epoch": 0.990396777064693, "grad_norm": 0.6311565735049138, "learning_rate": 2.6602392804736055e-06, "loss": 0.3257, "step": 21142 }, { "epoch": 0.9904436220546212, "grad_norm": 0.5718555630605293, "learning_rate": 2.6600500198304207e-06, "loss": 0.3101, "step": 21143 }, { "epoch": 0.9904904670445496, "grad_norm": 0.5782476875549514, "learning_rate": 2.6598607582661868e-06, "loss": 0.3179, "step": 21144 }, { "epoch": 0.9905373120344779, "grad_norm": 0.5776698140368595, "learning_rate": 2.659671495781994e-06, "loss": 0.2938, "step": 21145 }, { "epoch": 0.9905841570244063, "grad_norm": 0.5881325291504084, "learning_rate": 2.6594822323789334e-06, "loss": 0.2928, "step": 21146 }, { "epoch": 0.9906310020143345, "grad_norm": 0.7396418687078272, "learning_rate": 2.6592929680580916e-06, "loss": 0.3298, "step": 21147 }, { "epoch": 0.9906778470042629, "grad_norm": 0.6232666454771738, "learning_rate": 2.659103702820559e-06, "loss": 0.3059, "step": 21148 }, { "epoch": 0.9907246919941912, "grad_norm": 0.661546018369013, "learning_rate": 2.6589144366674246e-06, "loss": 0.3116, "step": 21149 }, { "epoch": 0.9907715369841196, "grad_norm": 0.6341308019070315, "learning_rate": 2.658725169599778e-06, "loss": 0.3231, "step": 21150 }, { "epoch": 0.9908183819740479, "grad_norm": 0.6029560005096777, "learning_rate": 2.6585359016187073e-06, "loss": 0.331, "step": 21151 }, { "epoch": 0.9908652269639762, "grad_norm": 0.630652034546029, "learning_rate": 2.6583466327253027e-06, "loss": 0.3244, "step": 21152 }, { "epoch": 0.9909120719539045, "grad_norm": 0.5898669750001445, "learning_rate": 2.658157362920653e-06, "loss": 0.3302, "step": 21153 }, { "epoch": 0.9909589169438329, "grad_norm": 0.5890535563434954, "learning_rate": 2.657968092205847e-06, "loss": 0.3179, "step": 21154 }, { "epoch": 0.9910057619337612, "grad_norm": 0.5967868484941526, "learning_rate": 2.6577788205819748e-06, "loss": 0.3277, "step": 21155 }, { "epoch": 0.9910526069236895, "grad_norm": 0.6124058217199095, "learning_rate": 2.6575895480501244e-06, "loss": 0.3409, "step": 21156 }, { "epoch": 0.9910994519136178, "grad_norm": 0.5719907955854193, "learning_rate": 2.657400274611387e-06, "loss": 0.3044, "step": 21157 }, { "epoch": 0.9911462969035462, "grad_norm": 0.6066484984336638, "learning_rate": 2.65721100026685e-06, "loss": 0.3161, "step": 21158 }, { "epoch": 0.9911931418934745, "grad_norm": 0.5843829745604806, "learning_rate": 2.6570217250176027e-06, "loss": 0.2948, "step": 21159 }, { "epoch": 0.9912399868834029, "grad_norm": 0.5729943337995216, "learning_rate": 2.6568324488647356e-06, "loss": 0.2951, "step": 21160 }, { "epoch": 0.9912868318733311, "grad_norm": 0.5573372917171278, "learning_rate": 2.656643171809337e-06, "loss": 0.3007, "step": 21161 }, { "epoch": 0.9913336768632595, "grad_norm": 0.6172589432804387, "learning_rate": 2.6564538938524957e-06, "loss": 0.326, "step": 21162 }, { "epoch": 0.9913805218531878, "grad_norm": 0.5953791038979879, "learning_rate": 2.656264614995302e-06, "loss": 0.3217, "step": 21163 }, { "epoch": 0.9914273668431162, "grad_norm": 0.6543054390706514, "learning_rate": 2.6560753352388454e-06, "loss": 0.3313, "step": 21164 }, { "epoch": 0.9914742118330444, "grad_norm": 0.5718380089817569, "learning_rate": 2.6558860545842137e-06, "loss": 0.304, "step": 21165 }, { "epoch": 0.9915210568229728, "grad_norm": 0.6353694656080716, "learning_rate": 2.6556967730324967e-06, "loss": 0.3234, "step": 21166 }, { "epoch": 0.9915679018129011, "grad_norm": 0.644421382545504, "learning_rate": 2.6555074905847844e-06, "loss": 0.3371, "step": 21167 }, { "epoch": 0.9916147468028295, "grad_norm": 0.5923830637820217, "learning_rate": 2.6553182072421658e-06, "loss": 0.3303, "step": 21168 }, { "epoch": 0.9916615917927578, "grad_norm": 0.6069193953636077, "learning_rate": 2.655128923005729e-06, "loss": 0.3028, "step": 21169 }, { "epoch": 0.991708436782686, "grad_norm": 0.5802584013218328, "learning_rate": 2.654939637876565e-06, "loss": 0.3101, "step": 21170 }, { "epoch": 0.9917552817726144, "grad_norm": 0.5880230548324874, "learning_rate": 2.654750351855763e-06, "loss": 0.3084, "step": 21171 }, { "epoch": 0.9918021267625428, "grad_norm": 0.5581540508248931, "learning_rate": 2.6545610649444097e-06, "loss": 0.3131, "step": 21172 }, { "epoch": 0.9918489717524711, "grad_norm": 0.6096994317968747, "learning_rate": 2.6543717771435974e-06, "loss": 0.3219, "step": 21173 }, { "epoch": 0.9918958167423993, "grad_norm": 0.5649851475334274, "learning_rate": 2.6541824884544145e-06, "loss": 0.2962, "step": 21174 }, { "epoch": 0.9919426617323277, "grad_norm": 0.6034678270270013, "learning_rate": 2.65399319887795e-06, "loss": 0.3087, "step": 21175 }, { "epoch": 0.991989506722256, "grad_norm": 0.6158840438641433, "learning_rate": 2.653803908415293e-06, "loss": 0.3294, "step": 21176 }, { "epoch": 0.9920363517121844, "grad_norm": 0.5913204112176245, "learning_rate": 2.653614617067534e-06, "loss": 0.3151, "step": 21177 }, { "epoch": 0.9920831967021128, "grad_norm": 0.5934350115049273, "learning_rate": 2.653425324835761e-06, "loss": 0.3022, "step": 21178 }, { "epoch": 0.992130041692041, "grad_norm": 0.6276681286830835, "learning_rate": 2.6532360317210633e-06, "loss": 0.3379, "step": 21179 }, { "epoch": 0.9921768866819693, "grad_norm": 0.5917796184698273, "learning_rate": 2.6530467377245315e-06, "loss": 0.2979, "step": 21180 }, { "epoch": 0.9922237316718977, "grad_norm": 0.6088208038106953, "learning_rate": 2.652857442847254e-06, "loss": 0.3159, "step": 21181 }, { "epoch": 0.992270576661826, "grad_norm": 0.5669248006671506, "learning_rate": 2.6526681470903198e-06, "loss": 0.2892, "step": 21182 }, { "epoch": 0.9923174216517543, "grad_norm": 0.5744949236723377, "learning_rate": 2.65247885045482e-06, "loss": 0.296, "step": 21183 }, { "epoch": 0.9923642666416826, "grad_norm": 0.6099105078641828, "learning_rate": 2.652289552941841e-06, "loss": 0.3063, "step": 21184 }, { "epoch": 0.992411111631611, "grad_norm": 0.5975697730267455, "learning_rate": 2.6521002545524754e-06, "loss": 0.2971, "step": 21185 }, { "epoch": 0.9924579566215394, "grad_norm": 0.6063485867117965, "learning_rate": 2.651910955287811e-06, "loss": 0.3333, "step": 21186 }, { "epoch": 0.9925048016114677, "grad_norm": 0.5489355874639943, "learning_rate": 2.651721655148936e-06, "loss": 0.3062, "step": 21187 }, { "epoch": 0.9925516466013959, "grad_norm": 0.5649137752063657, "learning_rate": 2.651532354136942e-06, "loss": 0.3083, "step": 21188 }, { "epoch": 0.9925984915913243, "grad_norm": 0.6146229482283262, "learning_rate": 2.6513430522529175e-06, "loss": 0.3223, "step": 21189 }, { "epoch": 0.9926453365812526, "grad_norm": 0.5819000265139908, "learning_rate": 2.651153749497951e-06, "loss": 0.307, "step": 21190 }, { "epoch": 0.992692181571181, "grad_norm": 0.5476081283825236, "learning_rate": 2.650964445873133e-06, "loss": 0.3021, "step": 21191 }, { "epoch": 0.9927390265611092, "grad_norm": 0.6277992813269758, "learning_rate": 2.6507751413795534e-06, "loss": 0.316, "step": 21192 }, { "epoch": 0.9927858715510376, "grad_norm": 0.5656911398966792, "learning_rate": 2.6505858360182997e-06, "loss": 0.3135, "step": 21193 }, { "epoch": 0.9928327165409659, "grad_norm": 0.6293856384871953, "learning_rate": 2.6503965297904622e-06, "loss": 0.3296, "step": 21194 }, { "epoch": 0.9928795615308943, "grad_norm": 0.5606985965342974, "learning_rate": 2.6502072226971316e-06, "loss": 0.3003, "step": 21195 }, { "epoch": 0.9929264065208226, "grad_norm": 0.6162025503101837, "learning_rate": 2.650017914739396e-06, "loss": 0.3547, "step": 21196 }, { "epoch": 0.9929732515107509, "grad_norm": 0.6014796324499726, "learning_rate": 2.6498286059183437e-06, "loss": 0.3456, "step": 21197 }, { "epoch": 0.9930200965006792, "grad_norm": 0.6042483101157371, "learning_rate": 2.6496392962350663e-06, "loss": 0.3159, "step": 21198 }, { "epoch": 0.9930669414906076, "grad_norm": 0.577854071791434, "learning_rate": 2.6494499856906524e-06, "loss": 0.3049, "step": 21199 }, { "epoch": 0.9931137864805359, "grad_norm": 0.6879296023423418, "learning_rate": 2.6492606742861913e-06, "loss": 0.344, "step": 21200 }, { "epoch": 0.9931606314704642, "grad_norm": 0.6140936797196644, "learning_rate": 2.6490713620227728e-06, "loss": 0.3114, "step": 21201 }, { "epoch": 0.9932074764603925, "grad_norm": 0.5988218831881669, "learning_rate": 2.648882048901486e-06, "loss": 0.3088, "step": 21202 }, { "epoch": 0.9932543214503209, "grad_norm": 0.5984150931330474, "learning_rate": 2.64869273492342e-06, "loss": 0.3301, "step": 21203 }, { "epoch": 0.9933011664402492, "grad_norm": 0.6386982967705125, "learning_rate": 2.6485034200896655e-06, "loss": 0.3203, "step": 21204 }, { "epoch": 0.9933480114301776, "grad_norm": 0.6302679547979825, "learning_rate": 2.64831410440131e-06, "loss": 0.3367, "step": 21205 }, { "epoch": 0.9933948564201058, "grad_norm": 0.6112526810845805, "learning_rate": 2.6481247878594453e-06, "loss": 0.3285, "step": 21206 }, { "epoch": 0.9934417014100342, "grad_norm": 0.6118090653826478, "learning_rate": 2.647935470465159e-06, "loss": 0.3292, "step": 21207 }, { "epoch": 0.9934885463999625, "grad_norm": 0.6593169130235726, "learning_rate": 2.647746152219541e-06, "loss": 0.3335, "step": 21208 }, { "epoch": 0.9935353913898909, "grad_norm": 0.5674735629082565, "learning_rate": 2.647556833123682e-06, "loss": 0.2929, "step": 21209 }, { "epoch": 0.9935822363798191, "grad_norm": 0.600316227115769, "learning_rate": 2.64736751317867e-06, "loss": 0.2926, "step": 21210 }, { "epoch": 0.9936290813697475, "grad_norm": 0.5785469223746481, "learning_rate": 2.6471781923855943e-06, "loss": 0.3086, "step": 21211 }, { "epoch": 0.9936759263596758, "grad_norm": 0.5956132089397075, "learning_rate": 2.6469888707455447e-06, "loss": 0.3266, "step": 21212 }, { "epoch": 0.9937227713496042, "grad_norm": 0.569578676314079, "learning_rate": 2.6467995482596133e-06, "loss": 0.3129, "step": 21213 }, { "epoch": 0.9937696163395325, "grad_norm": 0.6181853108552666, "learning_rate": 2.6466102249288856e-06, "loss": 0.3415, "step": 21214 }, { "epoch": 0.9938164613294608, "grad_norm": 0.5539387397487274, "learning_rate": 2.6464209007544533e-06, "loss": 0.3087, "step": 21215 }, { "epoch": 0.9938633063193891, "grad_norm": 0.6473179021636172, "learning_rate": 2.6462315757374058e-06, "loss": 0.3162, "step": 21216 }, { "epoch": 0.9939101513093175, "grad_norm": 0.5746934675399749, "learning_rate": 2.6460422498788323e-06, "loss": 0.3329, "step": 21217 }, { "epoch": 0.9939569962992458, "grad_norm": 0.5731023753888831, "learning_rate": 2.6458529231798225e-06, "loss": 0.3039, "step": 21218 }, { "epoch": 0.9940038412891741, "grad_norm": 0.574408543047169, "learning_rate": 2.6456635956414658e-06, "loss": 0.2984, "step": 21219 }, { "epoch": 0.9940506862791024, "grad_norm": 0.6218117128779928, "learning_rate": 2.6454742672648514e-06, "loss": 0.2977, "step": 21220 }, { "epoch": 0.9940975312690308, "grad_norm": 0.649830773840231, "learning_rate": 2.6452849380510686e-06, "loss": 0.3431, "step": 21221 }, { "epoch": 0.9941443762589591, "grad_norm": 0.6526867148896945, "learning_rate": 2.6450956080012088e-06, "loss": 0.3291, "step": 21222 }, { "epoch": 0.9941912212488875, "grad_norm": 0.5811210830892365, "learning_rate": 2.6449062771163595e-06, "loss": 0.3099, "step": 21223 }, { "epoch": 0.9942380662388157, "grad_norm": 0.5709335679134533, "learning_rate": 2.6447169453976106e-06, "loss": 0.3014, "step": 21224 }, { "epoch": 0.9942849112287441, "grad_norm": 0.6448509232251257, "learning_rate": 2.644527612846053e-06, "loss": 0.3434, "step": 21225 }, { "epoch": 0.9943317562186724, "grad_norm": 0.5576034817238849, "learning_rate": 2.6443382794627746e-06, "loss": 0.3064, "step": 21226 }, { "epoch": 0.9943786012086008, "grad_norm": 0.5983743984202411, "learning_rate": 2.644148945248866e-06, "loss": 0.3139, "step": 21227 }, { "epoch": 0.994425446198529, "grad_norm": 0.5734596115205485, "learning_rate": 2.6439596102054167e-06, "loss": 0.307, "step": 21228 }, { "epoch": 0.9944722911884574, "grad_norm": 0.6115455068594645, "learning_rate": 2.6437702743335154e-06, "loss": 0.3256, "step": 21229 }, { "epoch": 0.9945191361783857, "grad_norm": 0.6428069865215267, "learning_rate": 2.6435809376342525e-06, "loss": 0.3228, "step": 21230 }, { "epoch": 0.9945659811683141, "grad_norm": 0.5677273899545865, "learning_rate": 2.643391600108718e-06, "loss": 0.3079, "step": 21231 }, { "epoch": 0.9946128261582424, "grad_norm": 0.5661365947922004, "learning_rate": 2.643202261758e-06, "loss": 0.3151, "step": 21232 }, { "epoch": 0.9946596711481707, "grad_norm": 0.5937828008771194, "learning_rate": 2.643012922583189e-06, "loss": 0.3099, "step": 21233 }, { "epoch": 0.994706516138099, "grad_norm": 0.6650801269837866, "learning_rate": 2.6428235825853753e-06, "loss": 0.3442, "step": 21234 }, { "epoch": 0.9947533611280274, "grad_norm": 0.5627588736420275, "learning_rate": 2.6426342417656474e-06, "loss": 0.3164, "step": 21235 }, { "epoch": 0.9948002061179557, "grad_norm": 0.6921524643714658, "learning_rate": 2.642444900125095e-06, "loss": 0.3245, "step": 21236 }, { "epoch": 0.994847051107884, "grad_norm": 0.6330732001238774, "learning_rate": 2.6422555576648083e-06, "loss": 0.3486, "step": 21237 }, { "epoch": 0.9948938960978123, "grad_norm": 0.6141127624624445, "learning_rate": 2.642066214385877e-06, "loss": 0.3238, "step": 21238 }, { "epoch": 0.9949407410877407, "grad_norm": 0.6013688954129454, "learning_rate": 2.64187687028939e-06, "loss": 0.3082, "step": 21239 }, { "epoch": 0.994987586077669, "grad_norm": 0.5658434718455599, "learning_rate": 2.6416875253764375e-06, "loss": 0.303, "step": 21240 }, { "epoch": 0.9950344310675974, "grad_norm": 0.5728728536919974, "learning_rate": 2.641498179648109e-06, "loss": 0.3171, "step": 21241 }, { "epoch": 0.9950812760575256, "grad_norm": 0.6252142133492414, "learning_rate": 2.6413088331054926e-06, "loss": 0.323, "step": 21242 }, { "epoch": 0.995128121047454, "grad_norm": 0.5593707017344229, "learning_rate": 2.6411194857496803e-06, "loss": 0.3048, "step": 21243 }, { "epoch": 0.9951749660373823, "grad_norm": 0.6358170602213455, "learning_rate": 2.6409301375817612e-06, "loss": 0.329, "step": 21244 }, { "epoch": 0.9952218110273107, "grad_norm": 0.5673873226231925, "learning_rate": 2.6407407886028246e-06, "loss": 0.3029, "step": 21245 }, { "epoch": 0.9952686560172389, "grad_norm": 0.6044986024019859, "learning_rate": 2.6405514388139597e-06, "loss": 0.3271, "step": 21246 }, { "epoch": 0.9953155010071673, "grad_norm": 0.565223596968674, "learning_rate": 2.6403620882162567e-06, "loss": 0.2975, "step": 21247 }, { "epoch": 0.9953623459970956, "grad_norm": 0.5766916850261238, "learning_rate": 2.6401727368108053e-06, "loss": 0.3059, "step": 21248 }, { "epoch": 0.995409190987024, "grad_norm": 0.5822248512859446, "learning_rate": 2.639983384598695e-06, "loss": 0.3235, "step": 21249 }, { "epoch": 0.9954560359769523, "grad_norm": 0.6249299555095076, "learning_rate": 2.6397940315810156e-06, "loss": 0.304, "step": 21250 }, { "epoch": 0.9955028809668806, "grad_norm": 0.586852548154072, "learning_rate": 2.639604677758857e-06, "loss": 0.3122, "step": 21251 }, { "epoch": 0.9955497259568089, "grad_norm": 0.6150377776329865, "learning_rate": 2.6394153231333076e-06, "loss": 0.3168, "step": 21252 }, { "epoch": 0.9955965709467373, "grad_norm": 0.6028005497856509, "learning_rate": 2.639225967705459e-06, "loss": 0.3091, "step": 21253 }, { "epoch": 0.9956434159366656, "grad_norm": 0.6036147672645008, "learning_rate": 2.6390366114763993e-06, "loss": 0.3097, "step": 21254 }, { "epoch": 0.9956902609265939, "grad_norm": 0.5601491190457736, "learning_rate": 2.638847254447219e-06, "loss": 0.3107, "step": 21255 }, { "epoch": 0.9957371059165222, "grad_norm": 0.5914512263641781, "learning_rate": 2.638657896619008e-06, "loss": 0.3081, "step": 21256 }, { "epoch": 0.9957839509064506, "grad_norm": 0.6325997353568664, "learning_rate": 2.6384685379928555e-06, "loss": 0.3128, "step": 21257 }, { "epoch": 0.9958307958963789, "grad_norm": 0.5537862705685342, "learning_rate": 2.6382791785698514e-06, "loss": 0.3077, "step": 21258 }, { "epoch": 0.9958776408863073, "grad_norm": 0.5759609021896352, "learning_rate": 2.638089818351085e-06, "loss": 0.3146, "step": 21259 }, { "epoch": 0.9959244858762355, "grad_norm": 0.6051417321085353, "learning_rate": 2.6379004573376464e-06, "loss": 0.3291, "step": 21260 }, { "epoch": 0.9959713308661639, "grad_norm": 0.5977491923159644, "learning_rate": 2.6377110955306258e-06, "loss": 0.3326, "step": 21261 }, { "epoch": 0.9960181758560922, "grad_norm": 0.5756211773141994, "learning_rate": 2.6375217329311126e-06, "loss": 0.321, "step": 21262 }, { "epoch": 0.9960650208460206, "grad_norm": 0.5770591432347305, "learning_rate": 2.637332369540196e-06, "loss": 0.3243, "step": 21263 }, { "epoch": 0.9961118658359488, "grad_norm": 0.535645184406193, "learning_rate": 2.637143005358965e-06, "loss": 0.2973, "step": 21264 }, { "epoch": 0.9961587108258771, "grad_norm": 0.6448469559886073, "learning_rate": 2.6369536403885127e-06, "loss": 0.3252, "step": 21265 }, { "epoch": 0.9962055558158055, "grad_norm": 0.5678225871484768, "learning_rate": 2.6367642746299254e-06, "loss": 0.2953, "step": 21266 }, { "epoch": 0.9962524008057339, "grad_norm": 0.5912327735776295, "learning_rate": 2.636574908084294e-06, "loss": 0.2872, "step": 21267 }, { "epoch": 0.9962992457956622, "grad_norm": 0.6478360207692264, "learning_rate": 2.636385540752709e-06, "loss": 0.3076, "step": 21268 }, { "epoch": 0.9963460907855904, "grad_norm": 0.6649302785836358, "learning_rate": 2.636196172636259e-06, "loss": 0.3411, "step": 21269 }, { "epoch": 0.9963929357755188, "grad_norm": 0.5985700445806128, "learning_rate": 2.6360068037360344e-06, "loss": 0.3247, "step": 21270 }, { "epoch": 0.9964397807654471, "grad_norm": 0.5672563771824637, "learning_rate": 2.6358174340531244e-06, "loss": 0.3076, "step": 21271 }, { "epoch": 0.9964866257553755, "grad_norm": 0.6220809153301703, "learning_rate": 2.63562806358862e-06, "loss": 0.3229, "step": 21272 }, { "epoch": 0.9965334707453037, "grad_norm": 0.5968907703368161, "learning_rate": 2.6354386923436095e-06, "loss": 0.3202, "step": 21273 }, { "epoch": 0.9965803157352321, "grad_norm": 0.6781182288382551, "learning_rate": 2.6352493203191836e-06, "loss": 0.3085, "step": 21274 }, { "epoch": 0.9966271607251604, "grad_norm": 0.6180236950540221, "learning_rate": 2.635059947516432e-06, "loss": 0.2943, "step": 21275 }, { "epoch": 0.9966740057150888, "grad_norm": 0.6009782590400828, "learning_rate": 2.634870573936444e-06, "loss": 0.3264, "step": 21276 }, { "epoch": 0.9967208507050171, "grad_norm": 0.5841831714154131, "learning_rate": 2.63468119958031e-06, "loss": 0.3207, "step": 21277 }, { "epoch": 0.9967676956949454, "grad_norm": 0.6451728988696295, "learning_rate": 2.6344918244491192e-06, "loss": 0.3258, "step": 21278 }, { "epoch": 0.9968145406848737, "grad_norm": 0.5905742095636636, "learning_rate": 2.6343024485439623e-06, "loss": 0.3032, "step": 21279 }, { "epoch": 0.9968613856748021, "grad_norm": 0.5541534651655476, "learning_rate": 2.6341130718659285e-06, "loss": 0.2878, "step": 21280 }, { "epoch": 0.9969082306647304, "grad_norm": 0.604192499485572, "learning_rate": 2.633923694416107e-06, "loss": 0.321, "step": 21281 }, { "epoch": 0.9969550756546587, "grad_norm": 0.6012447930611844, "learning_rate": 2.6337343161955886e-06, "loss": 0.3332, "step": 21282 }, { "epoch": 0.997001920644587, "grad_norm": 0.5788099859350169, "learning_rate": 2.633544937205464e-06, "loss": 0.3128, "step": 21283 }, { "epoch": 0.9970487656345154, "grad_norm": 0.6943105364627788, "learning_rate": 2.6333555574468206e-06, "loss": 0.3237, "step": 21284 }, { "epoch": 0.9970956106244437, "grad_norm": 0.5685651682857589, "learning_rate": 2.633166176920749e-06, "loss": 0.2989, "step": 21285 }, { "epoch": 0.9971424556143721, "grad_norm": 0.5761202747960795, "learning_rate": 2.6329767956283407e-06, "loss": 0.3107, "step": 21286 }, { "epoch": 0.9971893006043003, "grad_norm": 0.5870123487037111, "learning_rate": 2.632787413570684e-06, "loss": 0.3101, "step": 21287 }, { "epoch": 0.9972361455942287, "grad_norm": 0.6254988621872815, "learning_rate": 2.6325980307488684e-06, "loss": 0.294, "step": 21288 }, { "epoch": 0.997282990584157, "grad_norm": 0.6427108234550561, "learning_rate": 2.632408647163986e-06, "loss": 0.333, "step": 21289 }, { "epoch": 0.9973298355740854, "grad_norm": 0.5642116420432493, "learning_rate": 2.632219262817124e-06, "loss": 0.3202, "step": 21290 }, { "epoch": 0.9973766805640136, "grad_norm": 0.5878724467184347, "learning_rate": 2.6320298777093734e-06, "loss": 0.3249, "step": 21291 }, { "epoch": 0.997423525553942, "grad_norm": 0.5654367518379544, "learning_rate": 2.6318404918418245e-06, "loss": 0.3024, "step": 21292 }, { "epoch": 0.9974703705438703, "grad_norm": 0.5853166834352296, "learning_rate": 2.631651105215566e-06, "loss": 0.2991, "step": 21293 }, { "epoch": 0.9975172155337987, "grad_norm": 0.6083915516302961, "learning_rate": 2.6314617178316894e-06, "loss": 0.3334, "step": 21294 }, { "epoch": 0.997564060523727, "grad_norm": 0.5867529697371738, "learning_rate": 2.631272329691283e-06, "loss": 0.31, "step": 21295 }, { "epoch": 0.9976109055136553, "grad_norm": 0.5559546264171559, "learning_rate": 2.6310829407954377e-06, "loss": 0.2932, "step": 21296 }, { "epoch": 0.9976577505035836, "grad_norm": 0.6521261717906727, "learning_rate": 2.6308935511452425e-06, "loss": 0.3277, "step": 21297 }, { "epoch": 0.997704595493512, "grad_norm": 0.560897384218934, "learning_rate": 2.630704160741788e-06, "loss": 0.3041, "step": 21298 }, { "epoch": 0.9977514404834403, "grad_norm": 0.6027252564202099, "learning_rate": 2.6305147695861643e-06, "loss": 0.3193, "step": 21299 }, { "epoch": 0.9977982854733686, "grad_norm": 0.6014140796681434, "learning_rate": 2.6303253776794607e-06, "loss": 0.3109, "step": 21300 }, { "epoch": 0.9978451304632969, "grad_norm": 0.6364476742854593, "learning_rate": 2.6301359850227674e-06, "loss": 0.3229, "step": 21301 }, { "epoch": 0.9978919754532253, "grad_norm": 0.6158690266103739, "learning_rate": 2.629946591617174e-06, "loss": 0.3254, "step": 21302 }, { "epoch": 0.9979388204431536, "grad_norm": 0.5257795988427705, "learning_rate": 2.6297571974637703e-06, "loss": 0.302, "step": 21303 }, { "epoch": 0.997985665433082, "grad_norm": 0.6130384715949845, "learning_rate": 2.629567802563648e-06, "loss": 0.328, "step": 21304 }, { "epoch": 0.9980325104230102, "grad_norm": 0.6070444678091378, "learning_rate": 2.629378406917894e-06, "loss": 0.3015, "step": 21305 }, { "epoch": 0.9980793554129386, "grad_norm": 0.617220785262487, "learning_rate": 2.6291890105275998e-06, "loss": 0.327, "step": 21306 }, { "epoch": 0.9981262004028669, "grad_norm": 0.5713026204453382, "learning_rate": 2.6289996133938563e-06, "loss": 0.325, "step": 21307 }, { "epoch": 0.9981730453927953, "grad_norm": 0.583625437359098, "learning_rate": 2.628810215517752e-06, "loss": 0.2881, "step": 21308 }, { "epoch": 0.9982198903827235, "grad_norm": 0.5917694320757414, "learning_rate": 2.628620816900377e-06, "loss": 0.3496, "step": 21309 }, { "epoch": 0.9982667353726519, "grad_norm": 0.6028694708273685, "learning_rate": 2.6284314175428215e-06, "loss": 0.3161, "step": 21310 }, { "epoch": 0.9983135803625802, "grad_norm": 0.5902883337535765, "learning_rate": 2.6282420174461767e-06, "loss": 0.3121, "step": 21311 }, { "epoch": 0.9983604253525086, "grad_norm": 0.613016952752628, "learning_rate": 2.6280526166115294e-06, "loss": 0.321, "step": 21312 }, { "epoch": 0.9984072703424369, "grad_norm": 0.5886638029300909, "learning_rate": 2.6278632150399723e-06, "loss": 0.3148, "step": 21313 }, { "epoch": 0.9984541153323652, "grad_norm": 0.5960034657835527, "learning_rate": 2.6276738127325947e-06, "loss": 0.2958, "step": 21314 }, { "epoch": 0.9985009603222935, "grad_norm": 0.5450681301033272, "learning_rate": 2.6274844096904868e-06, "loss": 0.3019, "step": 21315 }, { "epoch": 0.9985478053122219, "grad_norm": 0.5852353134980778, "learning_rate": 2.6272950059147374e-06, "loss": 0.3014, "step": 21316 }, { "epoch": 0.9985946503021502, "grad_norm": 0.6045866974997831, "learning_rate": 2.627105601406438e-06, "loss": 0.3059, "step": 21317 }, { "epoch": 0.9986414952920785, "grad_norm": 0.58736867350428, "learning_rate": 2.626916196166677e-06, "loss": 0.2914, "step": 21318 }, { "epoch": 0.9986883402820068, "grad_norm": 0.5883613312403677, "learning_rate": 2.6267267901965453e-06, "loss": 0.2998, "step": 21319 }, { "epoch": 0.9987351852719352, "grad_norm": 0.6253075184908741, "learning_rate": 2.6265373834971337e-06, "loss": 0.3334, "step": 21320 }, { "epoch": 0.9987820302618635, "grad_norm": 0.6069799400917593, "learning_rate": 2.6263479760695305e-06, "loss": 0.3046, "step": 21321 }, { "epoch": 0.9988288752517919, "grad_norm": 0.6028041179725744, "learning_rate": 2.626158567914826e-06, "loss": 0.3453, "step": 21322 }, { "epoch": 0.9988757202417201, "grad_norm": 0.6349009378554056, "learning_rate": 2.6259691590341117e-06, "loss": 0.3164, "step": 21323 }, { "epoch": 0.9989225652316485, "grad_norm": 0.5746337994764481, "learning_rate": 2.625779749428476e-06, "loss": 0.2974, "step": 21324 }, { "epoch": 0.9989694102215768, "grad_norm": 0.6357363461126505, "learning_rate": 2.6255903390990094e-06, "loss": 0.3206, "step": 21325 }, { "epoch": 0.9990162552115052, "grad_norm": 0.5988970464924327, "learning_rate": 2.6254009280468022e-06, "loss": 0.2976, "step": 21326 }, { "epoch": 0.9990631002014334, "grad_norm": 0.6188267733433048, "learning_rate": 2.6252115162729442e-06, "loss": 0.3392, "step": 21327 }, { "epoch": 0.9991099451913618, "grad_norm": 0.6034348352909391, "learning_rate": 2.625022103778525e-06, "loss": 0.3063, "step": 21328 }, { "epoch": 0.9991567901812901, "grad_norm": 0.6516186337846519, "learning_rate": 2.6248326905646356e-06, "loss": 0.339, "step": 21329 }, { "epoch": 0.9992036351712185, "grad_norm": 0.5917300863371502, "learning_rate": 2.624643276632365e-06, "loss": 0.32, "step": 21330 }, { "epoch": 0.9992504801611468, "grad_norm": 0.5648172151757263, "learning_rate": 2.624453861982803e-06, "loss": 0.2989, "step": 21331 }, { "epoch": 0.999297325151075, "grad_norm": 0.6306752314709015, "learning_rate": 2.624264446617042e-06, "loss": 0.3414, "step": 21332 }, { "epoch": 0.9993441701410034, "grad_norm": 0.5730072662724722, "learning_rate": 2.624075030536169e-06, "loss": 0.3204, "step": 21333 }, { "epoch": 0.9993910151309318, "grad_norm": 0.5702339712020589, "learning_rate": 2.6238856137412756e-06, "loss": 0.3179, "step": 21334 }, { "epoch": 0.9994378601208601, "grad_norm": 0.6254280507968396, "learning_rate": 2.623696196233452e-06, "loss": 0.3276, "step": 21335 }, { "epoch": 0.9994847051107884, "grad_norm": 0.6145178397207334, "learning_rate": 2.6235067780137878e-06, "loss": 0.3009, "step": 21336 }, { "epoch": 0.9995315501007167, "grad_norm": 0.5621264174664204, "learning_rate": 2.6233173590833725e-06, "loss": 0.3088, "step": 21337 }, { "epoch": 0.999578395090645, "grad_norm": 0.608873400578698, "learning_rate": 2.623127939443298e-06, "loss": 0.3204, "step": 21338 }, { "epoch": 0.9996252400805734, "grad_norm": 0.6504355700601218, "learning_rate": 2.6229385190946525e-06, "loss": 0.3416, "step": 21339 }, { "epoch": 0.9996720850705018, "grad_norm": 0.5246786170345673, "learning_rate": 2.6227490980385268e-06, "loss": 0.2884, "step": 21340 }, { "epoch": 0.99971893006043, "grad_norm": 0.654194535953376, "learning_rate": 2.6225596762760107e-06, "loss": 0.347, "step": 21341 }, { "epoch": 0.9997657750503584, "grad_norm": 0.5804934267801942, "learning_rate": 2.6223702538081953e-06, "loss": 0.31, "step": 21342 }, { "epoch": 0.9998126200402867, "grad_norm": 0.6077270488282179, "learning_rate": 2.6221808306361685e-06, "loss": 0.3303, "step": 21343 }, { "epoch": 0.999859465030215, "grad_norm": 0.5627794664847283, "learning_rate": 2.6219914067610227e-06, "loss": 0.3137, "step": 21344 }, { "epoch": 0.9999063100201433, "grad_norm": 0.5914836509135921, "learning_rate": 2.621801982183846e-06, "loss": 0.3243, "step": 21345 }, { "epoch": 0.9999531550100716, "grad_norm": 0.6497334614118613, "learning_rate": 2.6216125569057305e-06, "loss": 0.3455, "step": 21346 }, { "epoch": 1.0, "grad_norm": 0.5925572660636661, "learning_rate": 2.6214231309277656e-06, "loss": 0.3404, "step": 21347 }, { "epoch": 1.0000468449899282, "grad_norm": 0.7621683541790276, "learning_rate": 2.62123370425104e-06, "loss": 0.3065, "step": 21348 }, { "epoch": 1.0000936899798567, "grad_norm": 0.6452814344281902, "learning_rate": 2.6210442768766453e-06, "loss": 0.2751, "step": 21349 }, { "epoch": 1.000140534969785, "grad_norm": 0.6623822476048196, "learning_rate": 2.6208548488056718e-06, "loss": 0.2655, "step": 21350 }, { "epoch": 1.0001873799597134, "grad_norm": 0.6722763794832238, "learning_rate": 2.620665420039208e-06, "loss": 0.2785, "step": 21351 }, { "epoch": 1.0002342249496416, "grad_norm": 0.6301510218092059, "learning_rate": 2.620475990578345e-06, "loss": 0.2907, "step": 21352 }, { "epoch": 1.00028106993957, "grad_norm": 0.6515464775838649, "learning_rate": 2.6202865604241747e-06, "loss": 0.2855, "step": 21353 }, { "epoch": 1.0003279149294984, "grad_norm": 0.5934511416635357, "learning_rate": 2.620097129577784e-06, "loss": 0.2771, "step": 21354 }, { "epoch": 1.0003747599194266, "grad_norm": 0.5929206939460382, "learning_rate": 2.6199076980402644e-06, "loss": 0.2839, "step": 21355 }, { "epoch": 1.0004216049093548, "grad_norm": 0.562104201587405, "learning_rate": 2.6197182658127063e-06, "loss": 0.2689, "step": 21356 }, { "epoch": 1.0004684498992833, "grad_norm": 0.5868383897843054, "learning_rate": 2.6195288328962003e-06, "loss": 0.2791, "step": 21357 }, { "epoch": 1.0005152948892115, "grad_norm": 0.590886450912142, "learning_rate": 2.6193393992918354e-06, "loss": 0.2839, "step": 21358 }, { "epoch": 1.00056213987914, "grad_norm": 0.6360721602139459, "learning_rate": 2.6191499650007025e-06, "loss": 0.2862, "step": 21359 }, { "epoch": 1.0006089848690682, "grad_norm": 0.6083402397294084, "learning_rate": 2.6189605300238914e-06, "loss": 0.2852, "step": 21360 }, { "epoch": 1.0006558298589965, "grad_norm": 0.6409385844575117, "learning_rate": 2.618771094362492e-06, "loss": 0.2859, "step": 21361 }, { "epoch": 1.000702674848925, "grad_norm": 0.6495743725836046, "learning_rate": 2.6185816580175954e-06, "loss": 0.282, "step": 21362 }, { "epoch": 1.0007495198388532, "grad_norm": 0.6721244669185126, "learning_rate": 2.618392220990291e-06, "loss": 0.2802, "step": 21363 }, { "epoch": 1.0007963648287816, "grad_norm": 0.600968557461043, "learning_rate": 2.6182027832816688e-06, "loss": 0.2809, "step": 21364 }, { "epoch": 1.00084320981871, "grad_norm": 0.6081989461590142, "learning_rate": 2.6180133448928196e-06, "loss": 0.2903, "step": 21365 }, { "epoch": 1.0008900548086381, "grad_norm": 0.5962887813623428, "learning_rate": 2.617823905824833e-06, "loss": 0.27, "step": 21366 }, { "epoch": 1.0009368997985666, "grad_norm": 0.596940738614943, "learning_rate": 2.6176344660787995e-06, "loss": 0.2928, "step": 21367 }, { "epoch": 1.0009837447884948, "grad_norm": 0.5516713385228559, "learning_rate": 2.6174450256558096e-06, "loss": 0.2764, "step": 21368 }, { "epoch": 1.0010305897784233, "grad_norm": 0.5924111919806592, "learning_rate": 2.6172555845569527e-06, "loss": 0.2774, "step": 21369 }, { "epoch": 1.0010774347683515, "grad_norm": 0.6600233402729188, "learning_rate": 2.6170661427833192e-06, "loss": 0.2917, "step": 21370 }, { "epoch": 1.0011242797582798, "grad_norm": 0.6083158117031688, "learning_rate": 2.6168767003360002e-06, "loss": 0.2906, "step": 21371 }, { "epoch": 1.0011711247482082, "grad_norm": 0.5781676128053397, "learning_rate": 2.6166872572160846e-06, "loss": 0.2815, "step": 21372 }, { "epoch": 1.0012179697381365, "grad_norm": 0.6363212207314644, "learning_rate": 2.616497813424663e-06, "loss": 0.2863, "step": 21373 }, { "epoch": 1.0012648147280647, "grad_norm": 0.6162337189608341, "learning_rate": 2.616308368962826e-06, "loss": 0.275, "step": 21374 }, { "epoch": 1.0013116597179932, "grad_norm": 0.554226637323728, "learning_rate": 2.6161189238316635e-06, "loss": 0.2684, "step": 21375 }, { "epoch": 1.0013585047079214, "grad_norm": 0.5890759396746714, "learning_rate": 2.615929478032266e-06, "loss": 0.2884, "step": 21376 }, { "epoch": 1.00140534969785, "grad_norm": 0.5915313128838672, "learning_rate": 2.6157400315657234e-06, "loss": 0.2829, "step": 21377 }, { "epoch": 1.0014521946877781, "grad_norm": 0.6281918502988475, "learning_rate": 2.6155505844331264e-06, "loss": 0.2816, "step": 21378 }, { "epoch": 1.0014990396777064, "grad_norm": 0.6171045605629709, "learning_rate": 2.615361136635564e-06, "loss": 0.2738, "step": 21379 }, { "epoch": 1.0015458846676348, "grad_norm": 0.580393423374886, "learning_rate": 2.615171688174128e-06, "loss": 0.2743, "step": 21380 }, { "epoch": 1.001592729657563, "grad_norm": 0.5489444640379922, "learning_rate": 2.6149822390499076e-06, "loss": 0.2645, "step": 21381 }, { "epoch": 1.0016395746474915, "grad_norm": 0.5654956554695513, "learning_rate": 2.614792789263993e-06, "loss": 0.2616, "step": 21382 }, { "epoch": 1.0016864196374198, "grad_norm": 0.609475185296607, "learning_rate": 2.614603338817475e-06, "loss": 0.2758, "step": 21383 }, { "epoch": 1.001733264627348, "grad_norm": 0.6247094932810627, "learning_rate": 2.6144138877114433e-06, "loss": 0.2789, "step": 21384 }, { "epoch": 1.0017801096172765, "grad_norm": 0.5885017359671056, "learning_rate": 2.6142244359469893e-06, "loss": 0.2714, "step": 21385 }, { "epoch": 1.0018269546072047, "grad_norm": 0.5584660381877136, "learning_rate": 2.6140349835252018e-06, "loss": 0.2696, "step": 21386 }, { "epoch": 1.0018737995971332, "grad_norm": 0.612944598430084, "learning_rate": 2.6138455304471716e-06, "loss": 0.292, "step": 21387 }, { "epoch": 1.0019206445870614, "grad_norm": 0.6444917102938598, "learning_rate": 2.613656076713989e-06, "loss": 0.2898, "step": 21388 }, { "epoch": 1.0019674895769897, "grad_norm": 0.638658770240823, "learning_rate": 2.613466622326744e-06, "loss": 0.296, "step": 21389 }, { "epoch": 1.0020143345669181, "grad_norm": 0.6021928898170886, "learning_rate": 2.6132771672865274e-06, "loss": 0.291, "step": 21390 }, { "epoch": 1.0020611795568464, "grad_norm": 0.6063935934336643, "learning_rate": 2.61308771159443e-06, "loss": 0.2802, "step": 21391 }, { "epoch": 1.0021080245467746, "grad_norm": 0.5705442945942418, "learning_rate": 2.6128982552515397e-06, "loss": 0.2732, "step": 21392 }, { "epoch": 1.002154869536703, "grad_norm": 0.5738122801510729, "learning_rate": 2.6127087982589493e-06, "loss": 0.2716, "step": 21393 }, { "epoch": 1.0022017145266313, "grad_norm": 0.5697702433375164, "learning_rate": 2.6125193406177473e-06, "loss": 0.2707, "step": 21394 }, { "epoch": 1.0022485595165598, "grad_norm": 0.583949688425172, "learning_rate": 2.6123298823290254e-06, "loss": 0.2773, "step": 21395 }, { "epoch": 1.002295404506488, "grad_norm": 0.5762072264593651, "learning_rate": 2.6121404233938734e-06, "loss": 0.2621, "step": 21396 }, { "epoch": 1.0023422494964163, "grad_norm": 0.6073203039942336, "learning_rate": 2.611950963813381e-06, "loss": 0.2974, "step": 21397 }, { "epoch": 1.0023890944863447, "grad_norm": 0.5784534531447744, "learning_rate": 2.6117615035886395e-06, "loss": 0.2769, "step": 21398 }, { "epoch": 1.002435939476273, "grad_norm": 0.5825719026729944, "learning_rate": 2.6115720427207388e-06, "loss": 0.2748, "step": 21399 }, { "epoch": 1.0024827844662014, "grad_norm": 0.575690001108276, "learning_rate": 2.611382581210768e-06, "loss": 0.2569, "step": 21400 }, { "epoch": 1.0025296294561297, "grad_norm": 0.589616860493083, "learning_rate": 2.611193119059818e-06, "loss": 0.2879, "step": 21401 }, { "epoch": 1.002576474446058, "grad_norm": 0.5723757948213621, "learning_rate": 2.611003656268982e-06, "loss": 0.2858, "step": 21402 }, { "epoch": 1.0026233194359864, "grad_norm": 0.605695352520867, "learning_rate": 2.6108141928393456e-06, "loss": 0.2841, "step": 21403 }, { "epoch": 1.0026701644259146, "grad_norm": 0.5871127439310732, "learning_rate": 2.6106247287720023e-06, "loss": 0.2639, "step": 21404 }, { "epoch": 1.002717009415843, "grad_norm": 0.5506756708855276, "learning_rate": 2.6104352640680414e-06, "loss": 0.274, "step": 21405 }, { "epoch": 1.0027638544057713, "grad_norm": 0.5812333232331687, "learning_rate": 2.6102457987285533e-06, "loss": 0.2904, "step": 21406 }, { "epoch": 1.0028106993956996, "grad_norm": 0.6189056641065335, "learning_rate": 2.610056332754628e-06, "loss": 0.2816, "step": 21407 }, { "epoch": 1.002857544385628, "grad_norm": 0.6067541019718526, "learning_rate": 2.6098668661473563e-06, "loss": 0.2859, "step": 21408 }, { "epoch": 1.0029043893755563, "grad_norm": 0.580309079045611, "learning_rate": 2.609677398907829e-06, "loss": 0.2835, "step": 21409 }, { "epoch": 1.0029512343654845, "grad_norm": 0.6216073699692082, "learning_rate": 2.6094879310371353e-06, "loss": 0.2888, "step": 21410 }, { "epoch": 1.002998079355413, "grad_norm": 0.5830879352184215, "learning_rate": 2.609298462536366e-06, "loss": 0.2799, "step": 21411 }, { "epoch": 1.0030449243453412, "grad_norm": 0.6076639830122114, "learning_rate": 2.6091089934066123e-06, "loss": 0.2988, "step": 21412 }, { "epoch": 1.0030917693352697, "grad_norm": 0.5898203582107507, "learning_rate": 2.6089195236489622e-06, "loss": 0.2921, "step": 21413 }, { "epoch": 1.003138614325198, "grad_norm": 0.5846424033437457, "learning_rate": 2.6087300532645087e-06, "loss": 0.2818, "step": 21414 }, { "epoch": 1.0031854593151261, "grad_norm": 0.6333824113166386, "learning_rate": 2.608540582254341e-06, "loss": 0.293, "step": 21415 }, { "epoch": 1.0032323043050546, "grad_norm": 0.5384497783843007, "learning_rate": 2.6083511106195497e-06, "loss": 0.2514, "step": 21416 }, { "epoch": 1.0032791492949829, "grad_norm": 0.6494738136148223, "learning_rate": 2.6081616383612247e-06, "loss": 0.2842, "step": 21417 }, { "epoch": 1.0033259942849113, "grad_norm": 0.5678476451266093, "learning_rate": 2.607972165480456e-06, "loss": 0.2805, "step": 21418 }, { "epoch": 1.0033728392748396, "grad_norm": 0.5578010198019967, "learning_rate": 2.607782691978336e-06, "loss": 0.2668, "step": 21419 }, { "epoch": 1.0034196842647678, "grad_norm": 0.5786992978242369, "learning_rate": 2.607593217855953e-06, "loss": 0.2786, "step": 21420 }, { "epoch": 1.0034665292546963, "grad_norm": 0.591157957000169, "learning_rate": 2.6074037431143977e-06, "loss": 0.2907, "step": 21421 }, { "epoch": 1.0035133742446245, "grad_norm": 0.5900154808635065, "learning_rate": 2.6072142677547614e-06, "loss": 0.2737, "step": 21422 }, { "epoch": 1.003560219234553, "grad_norm": 0.580419081366838, "learning_rate": 2.6070247917781345e-06, "loss": 0.2764, "step": 21423 }, { "epoch": 1.0036070642244812, "grad_norm": 0.6162703053662256, "learning_rate": 2.6068353151856057e-06, "loss": 0.2815, "step": 21424 }, { "epoch": 1.0036539092144094, "grad_norm": 0.58839066331712, "learning_rate": 2.6066458379782666e-06, "loss": 0.2723, "step": 21425 }, { "epoch": 1.003700754204338, "grad_norm": 0.5760413242902857, "learning_rate": 2.606456360157208e-06, "loss": 0.2725, "step": 21426 }, { "epoch": 1.0037475991942661, "grad_norm": 0.5938864687877342, "learning_rate": 2.60626688172352e-06, "loss": 0.2838, "step": 21427 }, { "epoch": 1.0037944441841944, "grad_norm": 0.6366007603759142, "learning_rate": 2.606077402678292e-06, "loss": 0.298, "step": 21428 }, { "epoch": 1.0038412891741229, "grad_norm": 0.5691569127246459, "learning_rate": 2.605887923022616e-06, "loss": 0.2875, "step": 21429 }, { "epoch": 1.003888134164051, "grad_norm": 0.6098610320634014, "learning_rate": 2.6056984427575816e-06, "loss": 0.2832, "step": 21430 }, { "epoch": 1.0039349791539796, "grad_norm": 0.5871346153279215, "learning_rate": 2.6055089618842783e-06, "loss": 0.2831, "step": 21431 }, { "epoch": 1.0039818241439078, "grad_norm": 0.5842957251118422, "learning_rate": 2.6053194804037986e-06, "loss": 0.293, "step": 21432 }, { "epoch": 1.004028669133836, "grad_norm": 0.6308502723976379, "learning_rate": 2.6051299983172312e-06, "loss": 0.2852, "step": 21433 }, { "epoch": 1.0040755141237645, "grad_norm": 0.5981065979180445, "learning_rate": 2.6049405156256668e-06, "loss": 0.2802, "step": 21434 }, { "epoch": 1.0041223591136927, "grad_norm": 0.5999552721144881, "learning_rate": 2.6047510323301966e-06, "loss": 0.2841, "step": 21435 }, { "epoch": 1.0041692041036212, "grad_norm": 0.5567591981806329, "learning_rate": 2.6045615484319096e-06, "loss": 0.2622, "step": 21436 }, { "epoch": 1.0042160490935494, "grad_norm": 0.5718803037668873, "learning_rate": 2.604372063931898e-06, "loss": 0.2876, "step": 21437 }, { "epoch": 1.0042628940834777, "grad_norm": 0.6417719842063629, "learning_rate": 2.604182578831252e-06, "loss": 0.2639, "step": 21438 }, { "epoch": 1.0043097390734061, "grad_norm": 0.6054897941397254, "learning_rate": 2.6039930931310604e-06, "loss": 0.2671, "step": 21439 }, { "epoch": 1.0043565840633344, "grad_norm": 0.5500629165717439, "learning_rate": 2.603803606832415e-06, "loss": 0.2644, "step": 21440 }, { "epoch": 1.0044034290532629, "grad_norm": 0.554167661335181, "learning_rate": 2.6036141199364064e-06, "loss": 0.2807, "step": 21441 }, { "epoch": 1.004450274043191, "grad_norm": 0.5915527659481743, "learning_rate": 2.603424632444124e-06, "loss": 0.2797, "step": 21442 }, { "epoch": 1.0044971190331193, "grad_norm": 0.550807902821988, "learning_rate": 2.6032351443566584e-06, "loss": 0.2621, "step": 21443 }, { "epoch": 1.0045439640230478, "grad_norm": 0.5781126009101997, "learning_rate": 2.603045655675102e-06, "loss": 0.2799, "step": 21444 }, { "epoch": 1.004590809012976, "grad_norm": 0.5750630801517375, "learning_rate": 2.6028561664005425e-06, "loss": 0.2684, "step": 21445 }, { "epoch": 1.0046376540029043, "grad_norm": 0.6505500273191692, "learning_rate": 2.6026666765340715e-06, "loss": 0.2875, "step": 21446 }, { "epoch": 1.0046844989928327, "grad_norm": 0.5909704528999197, "learning_rate": 2.60247718607678e-06, "loss": 0.2753, "step": 21447 }, { "epoch": 1.004731343982761, "grad_norm": 0.603126573152084, "learning_rate": 2.602287695029759e-06, "loss": 0.2842, "step": 21448 }, { "epoch": 1.0047781889726894, "grad_norm": 0.6073147691624754, "learning_rate": 2.602098203394096e-06, "loss": 0.2966, "step": 21449 }, { "epoch": 1.0048250339626177, "grad_norm": 0.5496458088509357, "learning_rate": 2.601908711170885e-06, "loss": 0.2597, "step": 21450 }, { "epoch": 1.004871878952546, "grad_norm": 0.5710188017963577, "learning_rate": 2.601719218361215e-06, "loss": 0.2852, "step": 21451 }, { "epoch": 1.0049187239424744, "grad_norm": 0.5993721522448535, "learning_rate": 2.601529724966176e-06, "loss": 0.2488, "step": 21452 }, { "epoch": 1.0049655689324026, "grad_norm": 0.5970863115442636, "learning_rate": 2.6013402309868586e-06, "loss": 0.2639, "step": 21453 }, { "epoch": 1.005012413922331, "grad_norm": 0.6191269758618422, "learning_rate": 2.6011507364243544e-06, "loss": 0.2804, "step": 21454 }, { "epoch": 1.0050592589122593, "grad_norm": 0.6061141095889278, "learning_rate": 2.6009612412797526e-06, "loss": 0.2942, "step": 21455 }, { "epoch": 1.0051061039021876, "grad_norm": 0.6174344531803915, "learning_rate": 2.6007717455541442e-06, "loss": 0.2849, "step": 21456 }, { "epoch": 1.005152948892116, "grad_norm": 0.570935397784869, "learning_rate": 2.60058224924862e-06, "loss": 0.2757, "step": 21457 }, { "epoch": 1.0051997938820443, "grad_norm": 0.5916410568832178, "learning_rate": 2.6003927523642704e-06, "loss": 0.2844, "step": 21458 }, { "epoch": 1.0052466388719727, "grad_norm": 0.5698155882057766, "learning_rate": 2.600203254902185e-06, "loss": 0.2829, "step": 21459 }, { "epoch": 1.005293483861901, "grad_norm": 0.5916554648618924, "learning_rate": 2.600013756863456e-06, "loss": 0.2798, "step": 21460 }, { "epoch": 1.0053403288518292, "grad_norm": 0.553507677834582, "learning_rate": 2.5998242582491727e-06, "loss": 0.2694, "step": 21461 }, { "epoch": 1.0053871738417577, "grad_norm": 0.5777071908712633, "learning_rate": 2.5996347590604253e-06, "loss": 0.27, "step": 21462 }, { "epoch": 1.005434018831686, "grad_norm": 0.5777704171859686, "learning_rate": 2.5994452592983055e-06, "loss": 0.2657, "step": 21463 }, { "epoch": 1.0054808638216142, "grad_norm": 0.5400837702306492, "learning_rate": 2.599255758963903e-06, "loss": 0.2766, "step": 21464 }, { "epoch": 1.0055277088115426, "grad_norm": 0.5636400240790542, "learning_rate": 2.5990662580583085e-06, "loss": 0.2764, "step": 21465 }, { "epoch": 1.0055745538014709, "grad_norm": 0.5986224312336009, "learning_rate": 2.5988767565826127e-06, "loss": 0.28, "step": 21466 }, { "epoch": 1.0056213987913993, "grad_norm": 0.5641099527498186, "learning_rate": 2.598687254537906e-06, "loss": 0.2825, "step": 21467 }, { "epoch": 1.0056682437813276, "grad_norm": 0.5923517659431384, "learning_rate": 2.598497751925279e-06, "loss": 0.2495, "step": 21468 }, { "epoch": 1.0057150887712558, "grad_norm": 0.5734931663637441, "learning_rate": 2.5983082487458227e-06, "loss": 0.275, "step": 21469 }, { "epoch": 1.0057619337611843, "grad_norm": 0.5934667581046824, "learning_rate": 2.5981187450006262e-06, "loss": 0.2753, "step": 21470 }, { "epoch": 1.0058087787511125, "grad_norm": 0.5634181076039196, "learning_rate": 2.5979292406907807e-06, "loss": 0.275, "step": 21471 }, { "epoch": 1.005855623741041, "grad_norm": 0.5938274006623083, "learning_rate": 2.597739735817379e-06, "loss": 0.2891, "step": 21472 }, { "epoch": 1.0059024687309692, "grad_norm": 0.5751178042640662, "learning_rate": 2.5975502303815075e-06, "loss": 0.2632, "step": 21473 }, { "epoch": 1.0059493137208975, "grad_norm": 0.5532023665786715, "learning_rate": 2.5973607243842596e-06, "loss": 0.2586, "step": 21474 }, { "epoch": 1.005996158710826, "grad_norm": 0.5814381381471735, "learning_rate": 2.597171217826726e-06, "loss": 0.2785, "step": 21475 }, { "epoch": 1.0060430037007542, "grad_norm": 0.5670507843890359, "learning_rate": 2.596981710709996e-06, "loss": 0.2728, "step": 21476 }, { "epoch": 1.0060898486906826, "grad_norm": 0.6051742447280076, "learning_rate": 2.59679220303516e-06, "loss": 0.2857, "step": 21477 }, { "epoch": 1.0061366936806109, "grad_norm": 0.5995862266560684, "learning_rate": 2.59660269480331e-06, "loss": 0.2846, "step": 21478 }, { "epoch": 1.0061835386705391, "grad_norm": 0.5608173316171454, "learning_rate": 2.5964131860155354e-06, "loss": 0.2743, "step": 21479 }, { "epoch": 1.0062303836604676, "grad_norm": 0.5708419961014086, "learning_rate": 2.5962236766729276e-06, "loss": 0.2585, "step": 21480 }, { "epoch": 1.0062772286503958, "grad_norm": 0.6311582155446115, "learning_rate": 2.5960341667765766e-06, "loss": 0.2969, "step": 21481 }, { "epoch": 1.006324073640324, "grad_norm": 0.6419563608413341, "learning_rate": 2.595844656327573e-06, "loss": 0.2868, "step": 21482 }, { "epoch": 1.0063709186302525, "grad_norm": 0.5759892311312336, "learning_rate": 2.5956551453270066e-06, "loss": 0.2599, "step": 21483 }, { "epoch": 1.0064177636201808, "grad_norm": 0.5879242616144386, "learning_rate": 2.5954656337759704e-06, "loss": 0.2617, "step": 21484 }, { "epoch": 1.0064646086101092, "grad_norm": 0.6054422610360221, "learning_rate": 2.5952761216755524e-06, "loss": 0.277, "step": 21485 }, { "epoch": 1.0065114536000375, "grad_norm": 0.582636651651883, "learning_rate": 2.5950866090268446e-06, "loss": 0.2668, "step": 21486 }, { "epoch": 1.0065582985899657, "grad_norm": 0.5481302297981152, "learning_rate": 2.5948970958309373e-06, "loss": 0.263, "step": 21487 }, { "epoch": 1.0066051435798942, "grad_norm": 0.5861444797800168, "learning_rate": 2.5947075820889208e-06, "loss": 0.2775, "step": 21488 }, { "epoch": 1.0066519885698224, "grad_norm": 0.5941741211801688, "learning_rate": 2.594518067801887e-06, "loss": 0.2801, "step": 21489 }, { "epoch": 1.0066988335597509, "grad_norm": 0.5646469302714154, "learning_rate": 2.5943285529709243e-06, "loss": 0.2667, "step": 21490 }, { "epoch": 1.0067456785496791, "grad_norm": 0.6024731077392902, "learning_rate": 2.5941390375971247e-06, "loss": 0.2589, "step": 21491 }, { "epoch": 1.0067925235396074, "grad_norm": 0.615909762157642, "learning_rate": 2.5939495216815784e-06, "loss": 0.2796, "step": 21492 }, { "epoch": 1.0068393685295358, "grad_norm": 0.6090296395945998, "learning_rate": 2.5937600052253774e-06, "loss": 0.2781, "step": 21493 }, { "epoch": 1.006886213519464, "grad_norm": 0.5446008836214866, "learning_rate": 2.5935704882296096e-06, "loss": 0.2555, "step": 21494 }, { "epoch": 1.0069330585093925, "grad_norm": 0.6180632598722665, "learning_rate": 2.5933809706953677e-06, "loss": 0.2878, "step": 21495 }, { "epoch": 1.0069799034993208, "grad_norm": 0.5385363643059204, "learning_rate": 2.5931914526237423e-06, "loss": 0.2612, "step": 21496 }, { "epoch": 1.007026748489249, "grad_norm": 0.5954476564405571, "learning_rate": 2.5930019340158234e-06, "loss": 0.2815, "step": 21497 }, { "epoch": 1.0070735934791775, "grad_norm": 0.5697450703440975, "learning_rate": 2.5928124148727013e-06, "loss": 0.288, "step": 21498 }, { "epoch": 1.0071204384691057, "grad_norm": 0.5821671998254192, "learning_rate": 2.5926228951954674e-06, "loss": 0.2675, "step": 21499 }, { "epoch": 1.007167283459034, "grad_norm": 0.6235944643789101, "learning_rate": 2.592433374985212e-06, "loss": 0.2975, "step": 21500 }, { "epoch": 1.0072141284489624, "grad_norm": 0.5713401880992023, "learning_rate": 2.5922438542430256e-06, "loss": 0.283, "step": 21501 }, { "epoch": 1.0072609734388906, "grad_norm": 0.6487494870513466, "learning_rate": 2.592054332969999e-06, "loss": 0.2807, "step": 21502 }, { "epoch": 1.0073078184288191, "grad_norm": 0.6423663741693927, "learning_rate": 2.5918648111672234e-06, "loss": 0.2888, "step": 21503 }, { "epoch": 1.0073546634187474, "grad_norm": 0.5784184295933986, "learning_rate": 2.5916752888357876e-06, "loss": 0.2521, "step": 21504 }, { "epoch": 1.0074015084086756, "grad_norm": 0.5877894180549523, "learning_rate": 2.591485765976785e-06, "loss": 0.2643, "step": 21505 }, { "epoch": 1.007448353398604, "grad_norm": 0.546795214034399, "learning_rate": 2.5912962425913033e-06, "loss": 0.2785, "step": 21506 }, { "epoch": 1.0074951983885323, "grad_norm": 0.5982724328555229, "learning_rate": 2.591106718680436e-06, "loss": 0.2887, "step": 21507 }, { "epoch": 1.0075420433784608, "grad_norm": 0.5595245356330213, "learning_rate": 2.5909171942452727e-06, "loss": 0.2602, "step": 21508 }, { "epoch": 1.007588888368389, "grad_norm": 0.5677393122082051, "learning_rate": 2.5907276692869027e-06, "loss": 0.2721, "step": 21509 }, { "epoch": 1.0076357333583172, "grad_norm": 0.6307518418781827, "learning_rate": 2.590538143806418e-06, "loss": 0.2732, "step": 21510 }, { "epoch": 1.0076825783482457, "grad_norm": 0.6107078502516854, "learning_rate": 2.5903486178049092e-06, "loss": 0.2953, "step": 21511 }, { "epoch": 1.007729423338174, "grad_norm": 0.5682235319563682, "learning_rate": 2.590159091283467e-06, "loss": 0.2664, "step": 21512 }, { "epoch": 1.0077762683281024, "grad_norm": 0.6073738681463896, "learning_rate": 2.589969564243181e-06, "loss": 0.2871, "step": 21513 }, { "epoch": 1.0078231133180307, "grad_norm": 0.5701684278288811, "learning_rate": 2.589780036685144e-06, "loss": 0.2758, "step": 21514 }, { "epoch": 1.007869958307959, "grad_norm": 0.5757123969077393, "learning_rate": 2.589590508610445e-06, "loss": 0.2684, "step": 21515 }, { "epoch": 1.0079168032978874, "grad_norm": 0.60430886539235, "learning_rate": 2.5894009800201745e-06, "loss": 0.2855, "step": 21516 }, { "epoch": 1.0079636482878156, "grad_norm": 0.5437358018834882, "learning_rate": 2.589211450915425e-06, "loss": 0.2667, "step": 21517 }, { "epoch": 1.0080104932777438, "grad_norm": 0.5905541504842265, "learning_rate": 2.5890219212972856e-06, "loss": 0.2743, "step": 21518 }, { "epoch": 1.0080573382676723, "grad_norm": 0.6017004031325847, "learning_rate": 2.588832391166847e-06, "loss": 0.2828, "step": 21519 }, { "epoch": 1.0081041832576005, "grad_norm": 0.5723174376450602, "learning_rate": 2.588642860525201e-06, "loss": 0.2793, "step": 21520 }, { "epoch": 1.008151028247529, "grad_norm": 0.5700429464785224, "learning_rate": 2.5884533293734372e-06, "loss": 0.2719, "step": 21521 }, { "epoch": 1.0081978732374572, "grad_norm": 0.5490990791096536, "learning_rate": 2.5882637977126462e-06, "loss": 0.2646, "step": 21522 }, { "epoch": 1.0082447182273855, "grad_norm": 0.6071188373097215, "learning_rate": 2.5880742655439196e-06, "loss": 0.3033, "step": 21523 }, { "epoch": 1.008291563217314, "grad_norm": 0.603050310761784, "learning_rate": 2.587884732868348e-06, "loss": 0.2839, "step": 21524 }, { "epoch": 1.0083384082072422, "grad_norm": 0.5720579928021144, "learning_rate": 2.5876951996870215e-06, "loss": 0.28, "step": 21525 }, { "epoch": 1.0083852531971707, "grad_norm": 0.537286442881661, "learning_rate": 2.587505666001031e-06, "loss": 0.2744, "step": 21526 }, { "epoch": 1.008432098187099, "grad_norm": 0.6192289802995181, "learning_rate": 2.587316131811468e-06, "loss": 0.2794, "step": 21527 }, { "epoch": 1.0084789431770271, "grad_norm": 0.5998495221489198, "learning_rate": 2.5871265971194227e-06, "loss": 0.2913, "step": 21528 }, { "epoch": 1.0085257881669556, "grad_norm": 0.654339887110915, "learning_rate": 2.5869370619259847e-06, "loss": 0.2927, "step": 21529 }, { "epoch": 1.0085726331568838, "grad_norm": 0.5692729462849858, "learning_rate": 2.586747526232247e-06, "loss": 0.2667, "step": 21530 }, { "epoch": 1.0086194781468123, "grad_norm": 0.5763085419457868, "learning_rate": 2.586557990039299e-06, "loss": 0.3002, "step": 21531 }, { "epoch": 1.0086663231367405, "grad_norm": 0.5329172727434601, "learning_rate": 2.5863684533482306e-06, "loss": 0.2636, "step": 21532 }, { "epoch": 1.0087131681266688, "grad_norm": 0.5995125781696751, "learning_rate": 2.586178916160134e-06, "loss": 0.286, "step": 21533 }, { "epoch": 1.0087600131165972, "grad_norm": 0.5535075599929422, "learning_rate": 2.585989378476099e-06, "loss": 0.2641, "step": 21534 }, { "epoch": 1.0088068581065255, "grad_norm": 0.6187124965215689, "learning_rate": 2.5857998402972172e-06, "loss": 0.3061, "step": 21535 }, { "epoch": 1.0088537030964537, "grad_norm": 0.5631585998219092, "learning_rate": 2.585610301624579e-06, "loss": 0.2708, "step": 21536 }, { "epoch": 1.0089005480863822, "grad_norm": 0.5916523913157558, "learning_rate": 2.585420762459275e-06, "loss": 0.2718, "step": 21537 }, { "epoch": 1.0089473930763104, "grad_norm": 0.5943879921778011, "learning_rate": 2.585231222802396e-06, "loss": 0.2879, "step": 21538 }, { "epoch": 1.008994238066239, "grad_norm": 0.6061696785200017, "learning_rate": 2.585041682655032e-06, "loss": 0.2641, "step": 21539 }, { "epoch": 1.0090410830561671, "grad_norm": 0.5522905508472515, "learning_rate": 2.584852142018275e-06, "loss": 0.2788, "step": 21540 }, { "epoch": 1.0090879280460954, "grad_norm": 0.6221863093344113, "learning_rate": 2.5846626008932147e-06, "loss": 0.284, "step": 21541 }, { "epoch": 1.0091347730360238, "grad_norm": 0.6422300214045638, "learning_rate": 2.584473059280944e-06, "loss": 0.2879, "step": 21542 }, { "epoch": 1.009181618025952, "grad_norm": 0.6102570317998017, "learning_rate": 2.5842835171825502e-06, "loss": 0.2874, "step": 21543 }, { "epoch": 1.0092284630158805, "grad_norm": 0.5632450472298144, "learning_rate": 2.5840939745991266e-06, "loss": 0.2747, "step": 21544 }, { "epoch": 1.0092753080058088, "grad_norm": 0.5696671151114231, "learning_rate": 2.583904431531764e-06, "loss": 0.2681, "step": 21545 }, { "epoch": 1.009322152995737, "grad_norm": 0.6022980881852236, "learning_rate": 2.583714887981552e-06, "loss": 0.281, "step": 21546 }, { "epoch": 1.0093689979856655, "grad_norm": 0.5959589243024137, "learning_rate": 2.5835253439495816e-06, "loss": 0.2639, "step": 21547 }, { "epoch": 1.0094158429755937, "grad_norm": 0.6021802673811641, "learning_rate": 2.583335799436944e-06, "loss": 0.28, "step": 21548 }, { "epoch": 1.0094626879655222, "grad_norm": 0.573254164116356, "learning_rate": 2.5831462544447306e-06, "loss": 0.2709, "step": 21549 }, { "epoch": 1.0095095329554504, "grad_norm": 0.6052384453109538, "learning_rate": 2.5829567089740305e-06, "loss": 0.2772, "step": 21550 }, { "epoch": 1.0095563779453787, "grad_norm": 0.5701083962900574, "learning_rate": 2.5827671630259354e-06, "loss": 0.2708, "step": 21551 }, { "epoch": 1.0096032229353071, "grad_norm": 0.5748570759209107, "learning_rate": 2.5825776166015363e-06, "loss": 0.28, "step": 21552 }, { "epoch": 1.0096500679252354, "grad_norm": 0.5766918665917555, "learning_rate": 2.5823880697019238e-06, "loss": 0.2748, "step": 21553 }, { "epoch": 1.0096969129151636, "grad_norm": 0.5738192546943963, "learning_rate": 2.582198522328189e-06, "loss": 0.2677, "step": 21554 }, { "epoch": 1.009743757905092, "grad_norm": 0.6613941149970992, "learning_rate": 2.5820089744814215e-06, "loss": 0.3016, "step": 21555 }, { "epoch": 1.0097906028950203, "grad_norm": 0.6124518128294726, "learning_rate": 2.5818194261627133e-06, "loss": 0.2941, "step": 21556 }, { "epoch": 1.0098374478849488, "grad_norm": 0.6021160112027285, "learning_rate": 2.581629877373155e-06, "loss": 0.2756, "step": 21557 }, { "epoch": 1.009884292874877, "grad_norm": 0.6269104895579765, "learning_rate": 2.581440328113837e-06, "loss": 0.2759, "step": 21558 }, { "epoch": 1.0099311378648053, "grad_norm": 0.5889431747764353, "learning_rate": 2.5812507783858507e-06, "loss": 0.2704, "step": 21559 }, { "epoch": 1.0099779828547337, "grad_norm": 0.5905630718247242, "learning_rate": 2.581061228190287e-06, "loss": 0.2815, "step": 21560 }, { "epoch": 1.010024827844662, "grad_norm": 0.601792734950296, "learning_rate": 2.5808716775282354e-06, "loss": 0.2962, "step": 21561 }, { "epoch": 1.0100716728345904, "grad_norm": 0.608755389883279, "learning_rate": 2.580682126400788e-06, "loss": 0.293, "step": 21562 }, { "epoch": 1.0101185178245187, "grad_norm": 0.6337054868036596, "learning_rate": 2.580492574809036e-06, "loss": 0.2867, "step": 21563 }, { "epoch": 1.010165362814447, "grad_norm": 0.5433089655111073, "learning_rate": 2.5803030227540684e-06, "loss": 0.278, "step": 21564 }, { "epoch": 1.0102122078043754, "grad_norm": 0.5954750937784717, "learning_rate": 2.5801134702369774e-06, "loss": 0.2863, "step": 21565 }, { "epoch": 1.0102590527943036, "grad_norm": 0.5152682237318426, "learning_rate": 2.5799239172588536e-06, "loss": 0.2475, "step": 21566 }, { "epoch": 1.010305897784232, "grad_norm": 0.5600168861109737, "learning_rate": 2.579734363820788e-06, "loss": 0.2682, "step": 21567 }, { "epoch": 1.0103527427741603, "grad_norm": 0.6232078364952713, "learning_rate": 2.5795448099238705e-06, "loss": 0.263, "step": 21568 }, { "epoch": 1.0103995877640886, "grad_norm": 0.5954607703623643, "learning_rate": 2.5793552555691933e-06, "loss": 0.2923, "step": 21569 }, { "epoch": 1.010446432754017, "grad_norm": 0.5651349036310547, "learning_rate": 2.579165700757846e-06, "loss": 0.2581, "step": 21570 }, { "epoch": 1.0104932777439453, "grad_norm": 0.5889591639553686, "learning_rate": 2.57897614549092e-06, "loss": 0.2665, "step": 21571 }, { "epoch": 1.0105401227338735, "grad_norm": 0.6548733717864943, "learning_rate": 2.5787865897695067e-06, "loss": 0.2831, "step": 21572 }, { "epoch": 1.010586967723802, "grad_norm": 0.6540144158478717, "learning_rate": 2.5785970335946962e-06, "loss": 0.2856, "step": 21573 }, { "epoch": 1.0106338127137302, "grad_norm": 0.6797206730865459, "learning_rate": 2.5784074769675795e-06, "loss": 0.2799, "step": 21574 }, { "epoch": 1.0106806577036587, "grad_norm": 0.5547116911284428, "learning_rate": 2.578217919889247e-06, "loss": 0.265, "step": 21575 }, { "epoch": 1.010727502693587, "grad_norm": 0.6216956459880175, "learning_rate": 2.5780283623607906e-06, "loss": 0.2867, "step": 21576 }, { "epoch": 1.0107743476835152, "grad_norm": 0.5673391191559252, "learning_rate": 2.5778388043833e-06, "loss": 0.277, "step": 21577 }, { "epoch": 1.0108211926734436, "grad_norm": 0.5858058735274785, "learning_rate": 2.577649245957868e-06, "loss": 0.257, "step": 21578 }, { "epoch": 1.0108680376633719, "grad_norm": 0.6160783496934136, "learning_rate": 2.5774596870855824e-06, "loss": 0.278, "step": 21579 }, { "epoch": 1.0109148826533003, "grad_norm": 0.5905505629111903, "learning_rate": 2.5772701277675364e-06, "loss": 0.2869, "step": 21580 }, { "epoch": 1.0109617276432286, "grad_norm": 0.6012177614541873, "learning_rate": 2.5770805680048206e-06, "loss": 0.2598, "step": 21581 }, { "epoch": 1.0110085726331568, "grad_norm": 0.60271656624072, "learning_rate": 2.5768910077985254e-06, "loss": 0.2826, "step": 21582 }, { "epoch": 1.0110554176230853, "grad_norm": 0.5982905783401983, "learning_rate": 2.5767014471497407e-06, "loss": 0.2821, "step": 21583 }, { "epoch": 1.0111022626130135, "grad_norm": 0.5967519010621534, "learning_rate": 2.57651188605956e-06, "loss": 0.2754, "step": 21584 }, { "epoch": 1.011149107602942, "grad_norm": 0.5726098911267379, "learning_rate": 2.576322324529072e-06, "loss": 0.2821, "step": 21585 }, { "epoch": 1.0111959525928702, "grad_norm": 0.6171353776387122, "learning_rate": 2.576132762559368e-06, "loss": 0.2859, "step": 21586 }, { "epoch": 1.0112427975827984, "grad_norm": 0.5634858589334082, "learning_rate": 2.5759432001515395e-06, "loss": 0.2587, "step": 21587 }, { "epoch": 1.011289642572727, "grad_norm": 0.5562023920920972, "learning_rate": 2.575753637306677e-06, "loss": 0.2639, "step": 21588 }, { "epoch": 1.0113364875626552, "grad_norm": 0.5739190808858095, "learning_rate": 2.575564074025871e-06, "loss": 0.2707, "step": 21589 }, { "epoch": 1.0113833325525834, "grad_norm": 0.5702667691555203, "learning_rate": 2.5753745103102127e-06, "loss": 0.2776, "step": 21590 }, { "epoch": 1.0114301775425119, "grad_norm": 0.5509759283183285, "learning_rate": 2.5751849461607937e-06, "loss": 0.2711, "step": 21591 }, { "epoch": 1.01147702253244, "grad_norm": 0.5893988191732122, "learning_rate": 2.574995381578703e-06, "loss": 0.2796, "step": 21592 }, { "epoch": 1.0115238675223686, "grad_norm": 0.6430481260715255, "learning_rate": 2.574805816565033e-06, "loss": 0.282, "step": 21593 }, { "epoch": 1.0115707125122968, "grad_norm": 0.6042958693583571, "learning_rate": 2.574616251120876e-06, "loss": 0.2766, "step": 21594 }, { "epoch": 1.011617557502225, "grad_norm": 0.5797317656312763, "learning_rate": 2.5744266852473192e-06, "loss": 0.2647, "step": 21595 }, { "epoch": 1.0116644024921535, "grad_norm": 0.6351800898252445, "learning_rate": 2.574237118945456e-06, "loss": 0.2972, "step": 21596 }, { "epoch": 1.0117112474820817, "grad_norm": 0.5851310814575389, "learning_rate": 2.5740475522163775e-06, "loss": 0.2644, "step": 21597 }, { "epoch": 1.0117580924720102, "grad_norm": 0.576821323927488, "learning_rate": 2.573857985061174e-06, "loss": 0.273, "step": 21598 }, { "epoch": 1.0118049374619384, "grad_norm": 0.5979376902331479, "learning_rate": 2.5736684174809357e-06, "loss": 0.281, "step": 21599 }, { "epoch": 1.0118517824518667, "grad_norm": 0.5926791531950689, "learning_rate": 2.573478849476755e-06, "loss": 0.2976, "step": 21600 }, { "epoch": 1.0118986274417952, "grad_norm": 0.6237656988645676, "learning_rate": 2.5732892810497212e-06, "loss": 0.2749, "step": 21601 }, { "epoch": 1.0119454724317234, "grad_norm": 0.5300564163171011, "learning_rate": 2.573099712200926e-06, "loss": 0.2606, "step": 21602 }, { "epoch": 1.0119923174216519, "grad_norm": 0.5831435572527719, "learning_rate": 2.5729101429314607e-06, "loss": 0.2667, "step": 21603 }, { "epoch": 1.01203916241158, "grad_norm": 0.5954874122325464, "learning_rate": 2.5727205732424153e-06, "loss": 0.2775, "step": 21604 }, { "epoch": 1.0120860074015083, "grad_norm": 0.5997294855337162, "learning_rate": 2.5725310031348822e-06, "loss": 0.2812, "step": 21605 }, { "epoch": 1.0121328523914368, "grad_norm": 0.5939342094333955, "learning_rate": 2.5723414326099513e-06, "loss": 0.2721, "step": 21606 }, { "epoch": 1.012179697381365, "grad_norm": 0.6034034702727773, "learning_rate": 2.572151861668713e-06, "loss": 0.2908, "step": 21607 }, { "epoch": 1.0122265423712933, "grad_norm": 0.5610502124553578, "learning_rate": 2.5719622903122594e-06, "loss": 0.263, "step": 21608 }, { "epoch": 1.0122733873612217, "grad_norm": 0.6433600203238965, "learning_rate": 2.5717727185416806e-06, "loss": 0.2776, "step": 21609 }, { "epoch": 1.01232023235115, "grad_norm": 0.6043030251110585, "learning_rate": 2.5715831463580676e-06, "loss": 0.2769, "step": 21610 }, { "epoch": 1.0123670773410784, "grad_norm": 0.5996308935717216, "learning_rate": 2.5713935737625113e-06, "loss": 0.2801, "step": 21611 }, { "epoch": 1.0124139223310067, "grad_norm": 0.6046442622263475, "learning_rate": 2.5712040007561045e-06, "loss": 0.2877, "step": 21612 }, { "epoch": 1.012460767320935, "grad_norm": 0.5463843726217708, "learning_rate": 2.571014427339935e-06, "loss": 0.2714, "step": 21613 }, { "epoch": 1.0125076123108634, "grad_norm": 0.5449954945871178, "learning_rate": 2.570824853515096e-06, "loss": 0.2714, "step": 21614 }, { "epoch": 1.0125544573007916, "grad_norm": 0.6113871057036716, "learning_rate": 2.570635279282678e-06, "loss": 0.2913, "step": 21615 }, { "epoch": 1.01260130229072, "grad_norm": 0.6341390862009204, "learning_rate": 2.5704457046437715e-06, "loss": 0.2912, "step": 21616 }, { "epoch": 1.0126481472806483, "grad_norm": 0.5906093302882303, "learning_rate": 2.570256129599467e-06, "loss": 0.2672, "step": 21617 }, { "epoch": 1.0126949922705766, "grad_norm": 0.5724635968949408, "learning_rate": 2.5700665541508568e-06, "loss": 0.2708, "step": 21618 }, { "epoch": 1.012741837260505, "grad_norm": 0.5715145810274338, "learning_rate": 2.5698769782990313e-06, "loss": 0.2924, "step": 21619 }, { "epoch": 1.0127886822504333, "grad_norm": 0.599726737490771, "learning_rate": 2.5696874020450808e-06, "loss": 0.2768, "step": 21620 }, { "epoch": 1.0128355272403617, "grad_norm": 0.6019210015646107, "learning_rate": 2.569497825390098e-06, "loss": 0.2904, "step": 21621 }, { "epoch": 1.01288237223029, "grad_norm": 0.6000219251641167, "learning_rate": 2.5693082483351718e-06, "loss": 0.2907, "step": 21622 }, { "epoch": 1.0129292172202182, "grad_norm": 0.6062257354733186, "learning_rate": 2.569118670881393e-06, "loss": 0.2888, "step": 21623 }, { "epoch": 1.0129760622101467, "grad_norm": 0.5558456868415828, "learning_rate": 2.5689290930298554e-06, "loss": 0.27, "step": 21624 }, { "epoch": 1.013022907200075, "grad_norm": 0.7045334179220958, "learning_rate": 2.5687395147816467e-06, "loss": 0.2958, "step": 21625 }, { "epoch": 1.0130697521900032, "grad_norm": 0.5612340224265813, "learning_rate": 2.568549936137861e-06, "loss": 0.2611, "step": 21626 }, { "epoch": 1.0131165971799316, "grad_norm": 0.5584453092805928, "learning_rate": 2.568360357099587e-06, "loss": 0.2594, "step": 21627 }, { "epoch": 1.0131634421698599, "grad_norm": 0.6340958831492545, "learning_rate": 2.5681707776679156e-06, "loss": 0.2916, "step": 21628 }, { "epoch": 1.0132102871597883, "grad_norm": 0.6046843313181076, "learning_rate": 2.567981197843939e-06, "loss": 0.292, "step": 21629 }, { "epoch": 1.0132571321497166, "grad_norm": 0.62416465956352, "learning_rate": 2.567791617628748e-06, "loss": 0.2934, "step": 21630 }, { "epoch": 1.0133039771396448, "grad_norm": 0.5539577031161593, "learning_rate": 2.5676020370234326e-06, "loss": 0.2666, "step": 21631 }, { "epoch": 1.0133508221295733, "grad_norm": 0.5515640123818222, "learning_rate": 2.5674124560290846e-06, "loss": 0.2765, "step": 21632 }, { "epoch": 1.0133976671195015, "grad_norm": 0.5641850351391231, "learning_rate": 2.567222874646796e-06, "loss": 0.2711, "step": 21633 }, { "epoch": 1.01344451210943, "grad_norm": 0.6126910732876808, "learning_rate": 2.5670332928776555e-06, "loss": 0.2887, "step": 21634 }, { "epoch": 1.0134913570993582, "grad_norm": 0.6104492027841214, "learning_rate": 2.566843710722755e-06, "loss": 0.2789, "step": 21635 }, { "epoch": 1.0135382020892865, "grad_norm": 0.6467606495369552, "learning_rate": 2.566654128183187e-06, "loss": 0.2849, "step": 21636 }, { "epoch": 1.013585047079215, "grad_norm": 0.5673088739732103, "learning_rate": 2.5664645452600407e-06, "loss": 0.2809, "step": 21637 }, { "epoch": 1.0136318920691432, "grad_norm": 0.6666627552837661, "learning_rate": 2.566274961954407e-06, "loss": 0.2811, "step": 21638 }, { "epoch": 1.0136787370590716, "grad_norm": 0.6354815607089214, "learning_rate": 2.5660853782673782e-06, "loss": 0.2962, "step": 21639 }, { "epoch": 1.0137255820489999, "grad_norm": 0.6027255400381653, "learning_rate": 2.5658957942000447e-06, "loss": 0.2708, "step": 21640 }, { "epoch": 1.0137724270389281, "grad_norm": 0.5947733971119908, "learning_rate": 2.565706209753497e-06, "loss": 0.2826, "step": 21641 }, { "epoch": 1.0138192720288566, "grad_norm": 0.5472677335479232, "learning_rate": 2.565516624928827e-06, "loss": 0.2624, "step": 21642 }, { "epoch": 1.0138661170187848, "grad_norm": 0.5854016267504236, "learning_rate": 2.565327039727125e-06, "loss": 0.285, "step": 21643 }, { "epoch": 1.013912962008713, "grad_norm": 0.6027438431510045, "learning_rate": 2.565137454149482e-06, "loss": 0.2883, "step": 21644 }, { "epoch": 1.0139598069986415, "grad_norm": 0.5941592577860794, "learning_rate": 2.5649478681969904e-06, "loss": 0.2918, "step": 21645 }, { "epoch": 1.0140066519885698, "grad_norm": 0.5725393442539393, "learning_rate": 2.564758281870739e-06, "loss": 0.2467, "step": 21646 }, { "epoch": 1.0140534969784982, "grad_norm": 0.6007389673990841, "learning_rate": 2.5645686951718207e-06, "loss": 0.2841, "step": 21647 }, { "epoch": 1.0141003419684265, "grad_norm": 0.6287867504256456, "learning_rate": 2.5643791081013255e-06, "loss": 0.2921, "step": 21648 }, { "epoch": 1.0141471869583547, "grad_norm": 0.5916749590891505, "learning_rate": 2.5641895206603452e-06, "loss": 0.279, "step": 21649 }, { "epoch": 1.0141940319482832, "grad_norm": 0.6546362301816994, "learning_rate": 2.5639999328499697e-06, "loss": 0.2887, "step": 21650 }, { "epoch": 1.0142408769382114, "grad_norm": 0.555770025499186, "learning_rate": 2.5638103446712907e-06, "loss": 0.2734, "step": 21651 }, { "epoch": 1.0142877219281399, "grad_norm": 0.6051203073982923, "learning_rate": 2.5636207561253996e-06, "loss": 0.2918, "step": 21652 }, { "epoch": 1.0143345669180681, "grad_norm": 0.5793440552421141, "learning_rate": 2.5634311672133866e-06, "loss": 0.2723, "step": 21653 }, { "epoch": 1.0143814119079964, "grad_norm": 0.5284568385642646, "learning_rate": 2.5632415779363435e-06, "loss": 0.2707, "step": 21654 }, { "epoch": 1.0144282568979248, "grad_norm": 0.5673368770420776, "learning_rate": 2.563051988295361e-06, "loss": 0.2918, "step": 21655 }, { "epoch": 1.014475101887853, "grad_norm": 0.5440649098941585, "learning_rate": 2.5628623982915295e-06, "loss": 0.2826, "step": 21656 }, { "epoch": 1.0145219468777815, "grad_norm": 0.6064262159477974, "learning_rate": 2.5626728079259415e-06, "loss": 0.2826, "step": 21657 }, { "epoch": 1.0145687918677098, "grad_norm": 0.5807947698319869, "learning_rate": 2.5624832171996876e-06, "loss": 0.2814, "step": 21658 }, { "epoch": 1.014615636857638, "grad_norm": 0.5659423741316337, "learning_rate": 2.5622936261138577e-06, "loss": 0.2736, "step": 21659 }, { "epoch": 1.0146624818475665, "grad_norm": 0.6075819327019991, "learning_rate": 2.5621040346695436e-06, "loss": 0.2741, "step": 21660 }, { "epoch": 1.0147093268374947, "grad_norm": 0.5883933179351566, "learning_rate": 2.561914442867837e-06, "loss": 0.2584, "step": 21661 }, { "epoch": 1.014756171827423, "grad_norm": 0.5884907697396515, "learning_rate": 2.5617248507098273e-06, "loss": 0.287, "step": 21662 }, { "epoch": 1.0148030168173514, "grad_norm": 0.6266725153089039, "learning_rate": 2.5615352581966075e-06, "loss": 0.287, "step": 21663 }, { "epoch": 1.0148498618072797, "grad_norm": 0.5546308013433962, "learning_rate": 2.561345665329268e-06, "loss": 0.2676, "step": 21664 }, { "epoch": 1.0148967067972081, "grad_norm": 0.6007831820264785, "learning_rate": 2.5611560721088993e-06, "loss": 0.2914, "step": 21665 }, { "epoch": 1.0149435517871364, "grad_norm": 0.6168379479332301, "learning_rate": 2.560966478536592e-06, "loss": 0.2917, "step": 21666 }, { "epoch": 1.0149903967770646, "grad_norm": 0.5397417543218644, "learning_rate": 2.5607768846134383e-06, "loss": 0.2627, "step": 21667 }, { "epoch": 1.015037241766993, "grad_norm": 0.6081777373428919, "learning_rate": 2.56058729034053e-06, "loss": 0.2989, "step": 21668 }, { "epoch": 1.0150840867569213, "grad_norm": 0.5866486680724188, "learning_rate": 2.560397695718956e-06, "loss": 0.2798, "step": 21669 }, { "epoch": 1.0151309317468498, "grad_norm": 0.5452288731059104, "learning_rate": 2.5602081007498086e-06, "loss": 0.2711, "step": 21670 }, { "epoch": 1.015177776736778, "grad_norm": 0.5464746420636273, "learning_rate": 2.560018505434179e-06, "loss": 0.2567, "step": 21671 }, { "epoch": 1.0152246217267062, "grad_norm": 0.5962739596000153, "learning_rate": 2.559828909773157e-06, "loss": 0.2771, "step": 21672 }, { "epoch": 1.0152714667166347, "grad_norm": 0.5328959059772101, "learning_rate": 2.559639313767836e-06, "loss": 0.2488, "step": 21673 }, { "epoch": 1.015318311706563, "grad_norm": 0.6183938405367624, "learning_rate": 2.559449717419305e-06, "loss": 0.2939, "step": 21674 }, { "epoch": 1.0153651566964914, "grad_norm": 0.6767518797848762, "learning_rate": 2.5592601207286562e-06, "loss": 0.2988, "step": 21675 }, { "epoch": 1.0154120016864197, "grad_norm": 0.5884856900252861, "learning_rate": 2.5590705236969803e-06, "loss": 0.2908, "step": 21676 }, { "epoch": 1.015458846676348, "grad_norm": 0.5884341356079934, "learning_rate": 2.5588809263253676e-06, "loss": 0.2675, "step": 21677 }, { "epoch": 1.0155056916662764, "grad_norm": 0.597533541013571, "learning_rate": 2.558691328614911e-06, "loss": 0.2733, "step": 21678 }, { "epoch": 1.0155525366562046, "grad_norm": 0.6181256383259526, "learning_rate": 2.5585017305667005e-06, "loss": 0.2881, "step": 21679 }, { "epoch": 1.0155993816461328, "grad_norm": 0.5980408034602156, "learning_rate": 2.558312132181826e-06, "loss": 0.2697, "step": 21680 }, { "epoch": 1.0156462266360613, "grad_norm": 0.6194162459104091, "learning_rate": 2.5581225334613806e-06, "loss": 0.2698, "step": 21681 }, { "epoch": 1.0156930716259895, "grad_norm": 0.5686794169613806, "learning_rate": 2.557932934406455e-06, "loss": 0.2647, "step": 21682 }, { "epoch": 1.015739916615918, "grad_norm": 0.5856280962565504, "learning_rate": 2.5577433350181395e-06, "loss": 0.2833, "step": 21683 }, { "epoch": 1.0157867616058462, "grad_norm": 0.6423541791834575, "learning_rate": 2.5575537352975257e-06, "loss": 0.285, "step": 21684 }, { "epoch": 1.0158336065957745, "grad_norm": 0.6238498604167833, "learning_rate": 2.5573641352457047e-06, "loss": 0.2826, "step": 21685 }, { "epoch": 1.015880451585703, "grad_norm": 0.5736325925121216, "learning_rate": 2.5571745348637678e-06, "loss": 0.2616, "step": 21686 }, { "epoch": 1.0159272965756312, "grad_norm": 0.7148742325470049, "learning_rate": 2.556984934152805e-06, "loss": 0.3068, "step": 21687 }, { "epoch": 1.0159741415655597, "grad_norm": 0.5278870727246775, "learning_rate": 2.556795333113909e-06, "loss": 0.2593, "step": 21688 }, { "epoch": 1.016020986555488, "grad_norm": 0.6644129174229022, "learning_rate": 2.55660573174817e-06, "loss": 0.2593, "step": 21689 }, { "epoch": 1.0160678315454161, "grad_norm": 0.5956603840097737, "learning_rate": 2.5564161300566793e-06, "loss": 0.2703, "step": 21690 }, { "epoch": 1.0161146765353446, "grad_norm": 0.5918381938467919, "learning_rate": 2.5562265280405274e-06, "loss": 0.2848, "step": 21691 }, { "epoch": 1.0161615215252728, "grad_norm": 0.6638001099124291, "learning_rate": 2.5560369257008064e-06, "loss": 0.2728, "step": 21692 }, { "epoch": 1.0162083665152013, "grad_norm": 0.5630091490722329, "learning_rate": 2.5558473230386073e-06, "loss": 0.2798, "step": 21693 }, { "epoch": 1.0162552115051295, "grad_norm": 0.5655911991375363, "learning_rate": 2.55565772005502e-06, "loss": 0.2615, "step": 21694 }, { "epoch": 1.0163020564950578, "grad_norm": 0.5434858635620511, "learning_rate": 2.555468116751137e-06, "loss": 0.2595, "step": 21695 }, { "epoch": 1.0163489014849862, "grad_norm": 0.5821396728742987, "learning_rate": 2.5552785131280496e-06, "loss": 0.2755, "step": 21696 }, { "epoch": 1.0163957464749145, "grad_norm": 0.6657533324824221, "learning_rate": 2.5550889091868476e-06, "loss": 0.2967, "step": 21697 }, { "epoch": 1.0164425914648427, "grad_norm": 0.6259604599959072, "learning_rate": 2.5548993049286224e-06, "loss": 0.276, "step": 21698 }, { "epoch": 1.0164894364547712, "grad_norm": 0.6382848379328047, "learning_rate": 2.554709700354466e-06, "loss": 0.2945, "step": 21699 }, { "epoch": 1.0165362814446994, "grad_norm": 0.6055902751428706, "learning_rate": 2.554520095465469e-06, "loss": 0.2957, "step": 21700 }, { "epoch": 1.016583126434628, "grad_norm": 0.6020027478501768, "learning_rate": 2.5543304902627218e-06, "loss": 0.2864, "step": 21701 }, { "epoch": 1.0166299714245561, "grad_norm": 0.5417181804590481, "learning_rate": 2.554140884747317e-06, "loss": 0.2599, "step": 21702 }, { "epoch": 1.0166768164144844, "grad_norm": 0.619763709041575, "learning_rate": 2.553951278920346e-06, "loss": 0.2937, "step": 21703 }, { "epoch": 1.0167236614044128, "grad_norm": 0.5916871827415524, "learning_rate": 2.553761672782897e-06, "loss": 0.2717, "step": 21704 }, { "epoch": 1.016770506394341, "grad_norm": 0.6161190348503607, "learning_rate": 2.5535720663360634e-06, "loss": 0.2879, "step": 21705 }, { "epoch": 1.0168173513842695, "grad_norm": 0.5573466892196534, "learning_rate": 2.553382459580937e-06, "loss": 0.2797, "step": 21706 }, { "epoch": 1.0168641963741978, "grad_norm": 0.585176457484397, "learning_rate": 2.5531928525186077e-06, "loss": 0.2785, "step": 21707 }, { "epoch": 1.016911041364126, "grad_norm": 0.595093482262348, "learning_rate": 2.553003245150167e-06, "loss": 0.2652, "step": 21708 }, { "epoch": 1.0169578863540545, "grad_norm": 0.6024800866262261, "learning_rate": 2.5528136374767052e-06, "loss": 0.2773, "step": 21709 }, { "epoch": 1.0170047313439827, "grad_norm": 0.5864258179164551, "learning_rate": 2.5526240294993155e-06, "loss": 0.2914, "step": 21710 }, { "epoch": 1.0170515763339112, "grad_norm": 0.5364679330148795, "learning_rate": 2.5524344212190865e-06, "loss": 0.257, "step": 21711 }, { "epoch": 1.0170984213238394, "grad_norm": 0.5518366020329739, "learning_rate": 2.5522448126371113e-06, "loss": 0.2588, "step": 21712 }, { "epoch": 1.0171452663137677, "grad_norm": 0.5649285179720618, "learning_rate": 2.55205520375448e-06, "loss": 0.2808, "step": 21713 }, { "epoch": 1.0171921113036961, "grad_norm": 0.5787405601179647, "learning_rate": 2.551865594572284e-06, "loss": 0.2774, "step": 21714 }, { "epoch": 1.0172389562936244, "grad_norm": 0.58722452888306, "learning_rate": 2.551675985091614e-06, "loss": 0.2775, "step": 21715 }, { "epoch": 1.0172858012835526, "grad_norm": 0.6007431391509497, "learning_rate": 2.5514863753135626e-06, "loss": 0.273, "step": 21716 }, { "epoch": 1.017332646273481, "grad_norm": 0.5745155291005036, "learning_rate": 2.55129676523922e-06, "loss": 0.277, "step": 21717 }, { "epoch": 1.0173794912634093, "grad_norm": 0.6137531201554696, "learning_rate": 2.551107154869677e-06, "loss": 0.2685, "step": 21718 }, { "epoch": 1.0174263362533378, "grad_norm": 0.6453189431883604, "learning_rate": 2.5509175442060256e-06, "loss": 0.3053, "step": 21719 }, { "epoch": 1.017473181243266, "grad_norm": 0.5871753966717352, "learning_rate": 2.5507279332493567e-06, "loss": 0.2794, "step": 21720 }, { "epoch": 1.0175200262331943, "grad_norm": 0.6087568666245128, "learning_rate": 2.5505383220007605e-06, "loss": 0.2732, "step": 21721 }, { "epoch": 1.0175668712231227, "grad_norm": 0.5975402565954407, "learning_rate": 2.5503487104613296e-06, "loss": 0.2699, "step": 21722 }, { "epoch": 1.017613716213051, "grad_norm": 0.5678877787382328, "learning_rate": 2.550159098632154e-06, "loss": 0.2823, "step": 21723 }, { "epoch": 1.0176605612029794, "grad_norm": 0.5924807958514199, "learning_rate": 2.549969486514326e-06, "loss": 0.2631, "step": 21724 }, { "epoch": 1.0177074061929077, "grad_norm": 0.604633065008511, "learning_rate": 2.5497798741089362e-06, "loss": 0.2777, "step": 21725 }, { "epoch": 1.017754251182836, "grad_norm": 0.5739415572050609, "learning_rate": 2.5495902614170746e-06, "loss": 0.2807, "step": 21726 }, { "epoch": 1.0178010961727644, "grad_norm": 0.5892986135783874, "learning_rate": 2.549400648439835e-06, "loss": 0.2881, "step": 21727 }, { "epoch": 1.0178479411626926, "grad_norm": 0.5397735886060043, "learning_rate": 2.5492110351783068e-06, "loss": 0.2581, "step": 21728 }, { "epoch": 1.017894786152621, "grad_norm": 0.5963183925600284, "learning_rate": 2.549021421633581e-06, "loss": 0.2749, "step": 21729 }, { "epoch": 1.0179416311425493, "grad_norm": 0.5704841282407267, "learning_rate": 2.548831807806749e-06, "loss": 0.2598, "step": 21730 }, { "epoch": 1.0179884761324776, "grad_norm": 0.5614631866794818, "learning_rate": 2.5486421936989027e-06, "loss": 0.2624, "step": 21731 }, { "epoch": 1.018035321122406, "grad_norm": 0.5676683355204346, "learning_rate": 2.548452579311132e-06, "loss": 0.2771, "step": 21732 }, { "epoch": 1.0180821661123343, "grad_norm": 0.5282653260895505, "learning_rate": 2.5482629646445295e-06, "loss": 0.2528, "step": 21733 }, { "epoch": 1.0181290111022625, "grad_norm": 0.5864372247292594, "learning_rate": 2.5480733497001862e-06, "loss": 0.2533, "step": 21734 }, { "epoch": 1.018175856092191, "grad_norm": 0.6259370403782539, "learning_rate": 2.5478837344791922e-06, "loss": 0.2909, "step": 21735 }, { "epoch": 1.0182227010821192, "grad_norm": 0.6961931452667106, "learning_rate": 2.5476941189826393e-06, "loss": 0.291, "step": 21736 }, { "epoch": 1.0182695460720477, "grad_norm": 0.604588783611533, "learning_rate": 2.547504503211619e-06, "loss": 0.2847, "step": 21737 }, { "epoch": 1.018316391061976, "grad_norm": 0.5949376664893246, "learning_rate": 2.5473148871672226e-06, "loss": 0.2759, "step": 21738 }, { "epoch": 1.0183632360519042, "grad_norm": 0.5544073215401368, "learning_rate": 2.5471252708505406e-06, "loss": 0.2676, "step": 21739 }, { "epoch": 1.0184100810418326, "grad_norm": 0.6053390874410373, "learning_rate": 2.5469356542626646e-06, "loss": 0.2894, "step": 21740 }, { "epoch": 1.0184569260317609, "grad_norm": 0.5731041957591542, "learning_rate": 2.5467460374046854e-06, "loss": 0.2903, "step": 21741 }, { "epoch": 1.0185037710216893, "grad_norm": 0.6194402618668018, "learning_rate": 2.5465564202776944e-06, "loss": 0.2874, "step": 21742 }, { "epoch": 1.0185506160116176, "grad_norm": 0.5677631477041064, "learning_rate": 2.5463668028827836e-06, "loss": 0.2792, "step": 21743 }, { "epoch": 1.0185974610015458, "grad_norm": 0.6184702742636099, "learning_rate": 2.5461771852210426e-06, "loss": 0.2944, "step": 21744 }, { "epoch": 1.0186443059914743, "grad_norm": 0.5625871051444465, "learning_rate": 2.545987567293564e-06, "loss": 0.2788, "step": 21745 }, { "epoch": 1.0186911509814025, "grad_norm": 0.5559920674087648, "learning_rate": 2.545797949101439e-06, "loss": 0.2666, "step": 21746 }, { "epoch": 1.018737995971331, "grad_norm": 0.5910781440978652, "learning_rate": 2.5456083306457573e-06, "loss": 0.2857, "step": 21747 }, { "epoch": 1.0187848409612592, "grad_norm": 0.6109756114262153, "learning_rate": 2.545418711927612e-06, "loss": 0.2936, "step": 21748 }, { "epoch": 1.0188316859511874, "grad_norm": 0.5821781613237236, "learning_rate": 2.545229092948093e-06, "loss": 0.2675, "step": 21749 }, { "epoch": 1.018878530941116, "grad_norm": 0.5586484094729456, "learning_rate": 2.5450394737082916e-06, "loss": 0.2543, "step": 21750 }, { "epoch": 1.0189253759310442, "grad_norm": 0.5939758522311899, "learning_rate": 2.5448498542092997e-06, "loss": 0.2653, "step": 21751 }, { "epoch": 1.0189722209209724, "grad_norm": 0.6012346425682508, "learning_rate": 2.5446602344522088e-06, "loss": 0.28, "step": 21752 }, { "epoch": 1.0190190659109009, "grad_norm": 0.5821264521743381, "learning_rate": 2.5444706144381086e-06, "loss": 0.2771, "step": 21753 }, { "epoch": 1.019065910900829, "grad_norm": 0.5521137606293011, "learning_rate": 2.5442809941680914e-06, "loss": 0.2805, "step": 21754 }, { "epoch": 1.0191127558907576, "grad_norm": 0.5513330951053863, "learning_rate": 2.544091373643248e-06, "loss": 0.2662, "step": 21755 }, { "epoch": 1.0191596008806858, "grad_norm": 0.5460502082327675, "learning_rate": 2.54390175286467e-06, "loss": 0.2593, "step": 21756 }, { "epoch": 1.019206445870614, "grad_norm": 0.6157648399991309, "learning_rate": 2.543712131833449e-06, "loss": 0.2878, "step": 21757 }, { "epoch": 1.0192532908605425, "grad_norm": 0.5963919724668926, "learning_rate": 2.543522510550675e-06, "loss": 0.2804, "step": 21758 }, { "epoch": 1.0193001358504707, "grad_norm": 0.644234096196462, "learning_rate": 2.543332889017441e-06, "loss": 0.275, "step": 21759 }, { "epoch": 1.0193469808403992, "grad_norm": 0.5923547636541971, "learning_rate": 2.5431432672348354e-06, "loss": 0.2704, "step": 21760 }, { "epoch": 1.0193938258303274, "grad_norm": 0.578044352684706, "learning_rate": 2.5429536452039523e-06, "loss": 0.2675, "step": 21761 }, { "epoch": 1.0194406708202557, "grad_norm": 0.5822022281175502, "learning_rate": 2.5427640229258817e-06, "loss": 0.2785, "step": 21762 }, { "epoch": 1.0194875158101842, "grad_norm": 0.5578882688858394, "learning_rate": 2.5425744004017146e-06, "loss": 0.2573, "step": 21763 }, { "epoch": 1.0195343608001124, "grad_norm": 0.6315341672111039, "learning_rate": 2.5423847776325423e-06, "loss": 0.2769, "step": 21764 }, { "epoch": 1.0195812057900409, "grad_norm": 0.5615271105381987, "learning_rate": 2.5421951546194564e-06, "loss": 0.2687, "step": 21765 }, { "epoch": 1.019628050779969, "grad_norm": 0.5575729207105853, "learning_rate": 2.5420055313635485e-06, "loss": 0.2672, "step": 21766 }, { "epoch": 1.0196748957698973, "grad_norm": 0.6144547891952015, "learning_rate": 2.541815907865909e-06, "loss": 0.2895, "step": 21767 }, { "epoch": 1.0197217407598258, "grad_norm": 0.5550499904148118, "learning_rate": 2.5416262841276292e-06, "loss": 0.2703, "step": 21768 }, { "epoch": 1.019768585749754, "grad_norm": 0.5707018545751888, "learning_rate": 2.5414366601498007e-06, "loss": 0.2787, "step": 21769 }, { "epoch": 1.0198154307396823, "grad_norm": 0.5936925507424133, "learning_rate": 2.541247035933515e-06, "loss": 0.2924, "step": 21770 }, { "epoch": 1.0198622757296107, "grad_norm": 0.5949704587955622, "learning_rate": 2.541057411479862e-06, "loss": 0.2731, "step": 21771 }, { "epoch": 1.019909120719539, "grad_norm": 0.6100973996430641, "learning_rate": 2.5408677867899344e-06, "loss": 0.2789, "step": 21772 }, { "epoch": 1.0199559657094674, "grad_norm": 0.5435489463092835, "learning_rate": 2.540678161864824e-06, "loss": 0.2659, "step": 21773 }, { "epoch": 1.0200028106993957, "grad_norm": 0.5812227102678337, "learning_rate": 2.54048853670562e-06, "loss": 0.2751, "step": 21774 }, { "epoch": 1.020049655689324, "grad_norm": 0.5439524040876517, "learning_rate": 2.540298911313414e-06, "loss": 0.2695, "step": 21775 }, { "epoch": 1.0200965006792524, "grad_norm": 0.6525424969854786, "learning_rate": 2.5401092856892995e-06, "loss": 0.2735, "step": 21776 }, { "epoch": 1.0201433456691806, "grad_norm": 0.5776602117151328, "learning_rate": 2.539919659834365e-06, "loss": 0.2725, "step": 21777 }, { "epoch": 1.020190190659109, "grad_norm": 0.5845436575725871, "learning_rate": 2.5397300337497034e-06, "loss": 0.2807, "step": 21778 }, { "epoch": 1.0202370356490373, "grad_norm": 0.6633305918914674, "learning_rate": 2.5395404074364055e-06, "loss": 0.3044, "step": 21779 }, { "epoch": 1.0202838806389656, "grad_norm": 0.5864460013469297, "learning_rate": 2.5393507808955624e-06, "loss": 0.2762, "step": 21780 }, { "epoch": 1.020330725628894, "grad_norm": 0.5417710756266659, "learning_rate": 2.539161154128265e-06, "loss": 0.2597, "step": 21781 }, { "epoch": 1.0203775706188223, "grad_norm": 0.5557635220291937, "learning_rate": 2.5389715271356052e-06, "loss": 0.2645, "step": 21782 }, { "epoch": 1.0204244156087507, "grad_norm": 0.6369869187990089, "learning_rate": 2.538781899918675e-06, "loss": 0.2821, "step": 21783 }, { "epoch": 1.020471260598679, "grad_norm": 0.5563867096437919, "learning_rate": 2.5385922724785635e-06, "loss": 0.2627, "step": 21784 }, { "epoch": 1.0205181055886072, "grad_norm": 0.5718584746439889, "learning_rate": 2.5384026448163627e-06, "loss": 0.2707, "step": 21785 }, { "epoch": 1.0205649505785357, "grad_norm": 0.6207839087874962, "learning_rate": 2.538213016933166e-06, "loss": 0.289, "step": 21786 }, { "epoch": 1.020611795568464, "grad_norm": 0.6259082882496929, "learning_rate": 2.538023388830062e-06, "loss": 0.2916, "step": 21787 }, { "epoch": 1.0206586405583922, "grad_norm": 0.5670711206470964, "learning_rate": 2.537833760508143e-06, "loss": 0.2646, "step": 21788 }, { "epoch": 1.0207054855483206, "grad_norm": 0.5838943063095567, "learning_rate": 2.5376441319685e-06, "loss": 0.2924, "step": 21789 }, { "epoch": 1.0207523305382489, "grad_norm": 0.609786678230567, "learning_rate": 2.5374545032122255e-06, "loss": 0.2819, "step": 21790 }, { "epoch": 1.0207991755281773, "grad_norm": 0.5486117586641006, "learning_rate": 2.537264874240409e-06, "loss": 0.2823, "step": 21791 }, { "epoch": 1.0208460205181056, "grad_norm": 0.6139647297118027, "learning_rate": 2.5370752450541425e-06, "loss": 0.2887, "step": 21792 }, { "epoch": 1.0208928655080338, "grad_norm": 0.6742666643075691, "learning_rate": 2.5368856156545174e-06, "loss": 0.2793, "step": 21793 }, { "epoch": 1.0209397104979623, "grad_norm": 0.6270457591734119, "learning_rate": 2.536695986042625e-06, "loss": 0.2837, "step": 21794 }, { "epoch": 1.0209865554878905, "grad_norm": 0.6009765605849421, "learning_rate": 2.5365063562195564e-06, "loss": 0.2915, "step": 21795 }, { "epoch": 1.021033400477819, "grad_norm": 0.5745528908277645, "learning_rate": 2.5363167261864024e-06, "loss": 0.2723, "step": 21796 }, { "epoch": 1.0210802454677472, "grad_norm": 0.5863175347685481, "learning_rate": 2.5361270959442553e-06, "loss": 0.2689, "step": 21797 }, { "epoch": 1.0211270904576755, "grad_norm": 0.6507783537765685, "learning_rate": 2.535937465494206e-06, "loss": 0.3093, "step": 21798 }, { "epoch": 1.021173935447604, "grad_norm": 0.6316206794077269, "learning_rate": 2.535747834837345e-06, "loss": 0.2922, "step": 21799 }, { "epoch": 1.0212207804375322, "grad_norm": 0.6184743955269604, "learning_rate": 2.5355582039747647e-06, "loss": 0.281, "step": 21800 }, { "epoch": 1.0212676254274606, "grad_norm": 0.6295204734999597, "learning_rate": 2.5353685729075554e-06, "loss": 0.2844, "step": 21801 }, { "epoch": 1.0213144704173889, "grad_norm": 0.5807757204562413, "learning_rate": 2.5351789416368087e-06, "loss": 0.2426, "step": 21802 }, { "epoch": 1.0213613154073171, "grad_norm": 0.574598142489127, "learning_rate": 2.5349893101636165e-06, "loss": 0.2661, "step": 21803 }, { "epoch": 1.0214081603972456, "grad_norm": 0.5947264965582203, "learning_rate": 2.5347996784890706e-06, "loss": 0.2871, "step": 21804 }, { "epoch": 1.0214550053871738, "grad_norm": 0.6934099409747836, "learning_rate": 2.53461004661426e-06, "loss": 0.2927, "step": 21805 }, { "epoch": 1.021501850377102, "grad_norm": 0.6110074156897946, "learning_rate": 2.534420414540277e-06, "loss": 0.2786, "step": 21806 }, { "epoch": 1.0215486953670305, "grad_norm": 0.5788287343631494, "learning_rate": 2.5342307822682137e-06, "loss": 0.2627, "step": 21807 }, { "epoch": 1.0215955403569588, "grad_norm": 0.6216214576311627, "learning_rate": 2.534041149799161e-06, "loss": 0.2859, "step": 21808 }, { "epoch": 1.0216423853468872, "grad_norm": 0.5774521198268766, "learning_rate": 2.53385151713421e-06, "loss": 0.2742, "step": 21809 }, { "epoch": 1.0216892303368155, "grad_norm": 0.5942079284858665, "learning_rate": 2.533661884274452e-06, "loss": 0.2717, "step": 21810 }, { "epoch": 1.0217360753267437, "grad_norm": 0.5818233293237468, "learning_rate": 2.533472251220978e-06, "loss": 0.2566, "step": 21811 }, { "epoch": 1.0217829203166722, "grad_norm": 0.590986940652247, "learning_rate": 2.5332826179748798e-06, "loss": 0.2792, "step": 21812 }, { "epoch": 1.0218297653066004, "grad_norm": 0.6060632431191245, "learning_rate": 2.5330929845372488e-06, "loss": 0.2832, "step": 21813 }, { "epoch": 1.0218766102965289, "grad_norm": 0.634974675310292, "learning_rate": 2.5329033509091755e-06, "loss": 0.3035, "step": 21814 }, { "epoch": 1.0219234552864571, "grad_norm": 0.547792130694545, "learning_rate": 2.532713717091752e-06, "loss": 0.2737, "step": 21815 }, { "epoch": 1.0219703002763854, "grad_norm": 0.6082567759851151, "learning_rate": 2.5325240830860693e-06, "loss": 0.2725, "step": 21816 }, { "epoch": 1.0220171452663138, "grad_norm": 0.6313259734759501, "learning_rate": 2.532334448893218e-06, "loss": 0.2857, "step": 21817 }, { "epoch": 1.022063990256242, "grad_norm": 0.5886873079586882, "learning_rate": 2.5321448145142913e-06, "loss": 0.2702, "step": 21818 }, { "epoch": 1.0221108352461705, "grad_norm": 0.605135138185015, "learning_rate": 2.531955179950379e-06, "loss": 0.2749, "step": 21819 }, { "epoch": 1.0221576802360988, "grad_norm": 0.5811925291974839, "learning_rate": 2.531765545202572e-06, "loss": 0.2642, "step": 21820 }, { "epoch": 1.022204525226027, "grad_norm": 0.6138467233098471, "learning_rate": 2.5315759102719616e-06, "loss": 0.2834, "step": 21821 }, { "epoch": 1.0222513702159555, "grad_norm": 0.6413816295554747, "learning_rate": 2.531386275159642e-06, "loss": 0.2836, "step": 21822 }, { "epoch": 1.0222982152058837, "grad_norm": 0.6198444798497482, "learning_rate": 2.5311966398667004e-06, "loss": 0.2851, "step": 21823 }, { "epoch": 1.022345060195812, "grad_norm": 0.5633791748776208, "learning_rate": 2.5310070043942303e-06, "loss": 0.2781, "step": 21824 }, { "epoch": 1.0223919051857404, "grad_norm": 0.6168910885955577, "learning_rate": 2.5308173687433234e-06, "loss": 0.2655, "step": 21825 }, { "epoch": 1.0224387501756687, "grad_norm": 0.6057594852263659, "learning_rate": 2.5306277329150698e-06, "loss": 0.271, "step": 21826 }, { "epoch": 1.0224855951655971, "grad_norm": 0.5630661018984642, "learning_rate": 2.5304380969105613e-06, "loss": 0.2599, "step": 21827 }, { "epoch": 1.0225324401555254, "grad_norm": 0.5957230262776195, "learning_rate": 2.530248460730889e-06, "loss": 0.2809, "step": 21828 }, { "epoch": 1.0225792851454536, "grad_norm": 0.5528769603622337, "learning_rate": 2.530058824377145e-06, "loss": 0.2647, "step": 21829 }, { "epoch": 1.022626130135382, "grad_norm": 0.5389313155166933, "learning_rate": 2.5298691878504196e-06, "loss": 0.2668, "step": 21830 }, { "epoch": 1.0226729751253103, "grad_norm": 0.5782153325272136, "learning_rate": 2.5296795511518045e-06, "loss": 0.2703, "step": 21831 }, { "epoch": 1.0227198201152388, "grad_norm": 0.6175290612407575, "learning_rate": 2.529489914282392e-06, "loss": 0.2658, "step": 21832 }, { "epoch": 1.022766665105167, "grad_norm": 0.5881545736611135, "learning_rate": 2.5293002772432714e-06, "loss": 0.2996, "step": 21833 }, { "epoch": 1.0228135100950952, "grad_norm": 0.5822338430416993, "learning_rate": 2.5291106400355353e-06, "loss": 0.2773, "step": 21834 }, { "epoch": 1.0228603550850237, "grad_norm": 0.6117628386401496, "learning_rate": 2.5289210026602747e-06, "loss": 0.2626, "step": 21835 }, { "epoch": 1.022907200074952, "grad_norm": 0.5785477576498466, "learning_rate": 2.5287313651185806e-06, "loss": 0.2752, "step": 21836 }, { "epoch": 1.0229540450648804, "grad_norm": 0.5794565064834026, "learning_rate": 2.528541727411546e-06, "loss": 0.2789, "step": 21837 }, { "epoch": 1.0230008900548087, "grad_norm": 0.6960393142316135, "learning_rate": 2.5283520895402596e-06, "loss": 0.3012, "step": 21838 }, { "epoch": 1.023047735044737, "grad_norm": 0.6003449700177513, "learning_rate": 2.5281624515058147e-06, "loss": 0.2786, "step": 21839 }, { "epoch": 1.0230945800346654, "grad_norm": 0.5582446620707344, "learning_rate": 2.527972813309302e-06, "loss": 0.2688, "step": 21840 }, { "epoch": 1.0231414250245936, "grad_norm": 0.5818411400042861, "learning_rate": 2.527783174951812e-06, "loss": 0.2772, "step": 21841 }, { "epoch": 1.0231882700145218, "grad_norm": 0.5934634370937879, "learning_rate": 2.5275935364344373e-06, "loss": 0.2852, "step": 21842 }, { "epoch": 1.0232351150044503, "grad_norm": 0.5844733882823573, "learning_rate": 2.52740389775827e-06, "loss": 0.2618, "step": 21843 }, { "epoch": 1.0232819599943785, "grad_norm": 0.6171042457077083, "learning_rate": 2.527214258924399e-06, "loss": 0.2866, "step": 21844 }, { "epoch": 1.023328804984307, "grad_norm": 0.550292322047167, "learning_rate": 2.527024619933916e-06, "loss": 0.2621, "step": 21845 }, { "epoch": 1.0233756499742352, "grad_norm": 0.6437884938180509, "learning_rate": 2.526834980787914e-06, "loss": 0.2884, "step": 21846 }, { "epoch": 1.0234224949641635, "grad_norm": 0.5647527546903003, "learning_rate": 2.5266453414874837e-06, "loss": 0.2738, "step": 21847 }, { "epoch": 1.023469339954092, "grad_norm": 0.6231982204538865, "learning_rate": 2.5264557020337156e-06, "loss": 0.283, "step": 21848 }, { "epoch": 1.0235161849440202, "grad_norm": 0.6141698219218208, "learning_rate": 2.526266062427702e-06, "loss": 0.2883, "step": 21849 }, { "epoch": 1.0235630299339487, "grad_norm": 0.6540330472412236, "learning_rate": 2.526076422670534e-06, "loss": 0.2952, "step": 21850 }, { "epoch": 1.023609874923877, "grad_norm": 0.5988311504072582, "learning_rate": 2.5258867827633016e-06, "loss": 0.2879, "step": 21851 }, { "epoch": 1.0236567199138051, "grad_norm": 0.5804647300142597, "learning_rate": 2.5256971427070974e-06, "loss": 0.2675, "step": 21852 }, { "epoch": 1.0237035649037336, "grad_norm": 0.5895588805823547, "learning_rate": 2.525507502503014e-06, "loss": 0.2672, "step": 21853 }, { "epoch": 1.0237504098936618, "grad_norm": 0.569203403831279, "learning_rate": 2.5253178621521407e-06, "loss": 0.2762, "step": 21854 }, { "epoch": 1.0237972548835903, "grad_norm": 0.6164904146342481, "learning_rate": 2.5251282216555687e-06, "loss": 0.2686, "step": 21855 }, { "epoch": 1.0238440998735185, "grad_norm": 0.6037872262410735, "learning_rate": 2.5249385810143913e-06, "loss": 0.2899, "step": 21856 }, { "epoch": 1.0238909448634468, "grad_norm": 0.6006181110269223, "learning_rate": 2.524748940229698e-06, "loss": 0.2775, "step": 21857 }, { "epoch": 1.0239377898533752, "grad_norm": 0.5698704443000254, "learning_rate": 2.5245592993025803e-06, "loss": 0.2762, "step": 21858 }, { "epoch": 1.0239846348433035, "grad_norm": 0.6269966724642403, "learning_rate": 2.524369658234131e-06, "loss": 0.28, "step": 21859 }, { "epoch": 1.0240314798332317, "grad_norm": 0.5377577201454237, "learning_rate": 2.52418001702544e-06, "loss": 0.2519, "step": 21860 }, { "epoch": 1.0240783248231602, "grad_norm": 0.5684679814577313, "learning_rate": 2.5239903756775986e-06, "loss": 0.2809, "step": 21861 }, { "epoch": 1.0241251698130884, "grad_norm": 0.5417205137364886, "learning_rate": 2.5238007341916994e-06, "loss": 0.2597, "step": 21862 }, { "epoch": 1.024172014803017, "grad_norm": 0.5485525807642936, "learning_rate": 2.5236110925688325e-06, "loss": 0.2727, "step": 21863 }, { "epoch": 1.0242188597929451, "grad_norm": 0.6237296931075234, "learning_rate": 2.52342145081009e-06, "loss": 0.268, "step": 21864 }, { "epoch": 1.0242657047828734, "grad_norm": 0.5833357671306936, "learning_rate": 2.523231808916563e-06, "loss": 0.2687, "step": 21865 }, { "epoch": 1.0243125497728018, "grad_norm": 0.5379762198224926, "learning_rate": 2.5230421668893423e-06, "loss": 0.2708, "step": 21866 }, { "epoch": 1.02435939476273, "grad_norm": 0.5483405451308925, "learning_rate": 2.52285252472952e-06, "loss": 0.2677, "step": 21867 }, { "epoch": 1.0244062397526585, "grad_norm": 0.5766213968756954, "learning_rate": 2.5226628824381873e-06, "loss": 0.2634, "step": 21868 }, { "epoch": 1.0244530847425868, "grad_norm": 0.5668672771061537, "learning_rate": 2.522473240016435e-06, "loss": 0.2622, "step": 21869 }, { "epoch": 1.024499929732515, "grad_norm": 0.5973447456385652, "learning_rate": 2.5222835974653553e-06, "loss": 0.2792, "step": 21870 }, { "epoch": 1.0245467747224435, "grad_norm": 0.5543052813170632, "learning_rate": 2.5220939547860392e-06, "loss": 0.2678, "step": 21871 }, { "epoch": 1.0245936197123717, "grad_norm": 0.6036241407605962, "learning_rate": 2.521904311979577e-06, "loss": 0.2982, "step": 21872 }, { "epoch": 1.0246404647023002, "grad_norm": 0.5898205156784402, "learning_rate": 2.5217146690470612e-06, "loss": 0.2718, "step": 21873 }, { "epoch": 1.0246873096922284, "grad_norm": 0.6213093324632294, "learning_rate": 2.521525025989584e-06, "loss": 0.2802, "step": 21874 }, { "epoch": 1.0247341546821567, "grad_norm": 0.5820359025615705, "learning_rate": 2.5213353828082347e-06, "loss": 0.2636, "step": 21875 }, { "epoch": 1.0247809996720851, "grad_norm": 0.6182308207946278, "learning_rate": 2.521145739504106e-06, "loss": 0.2876, "step": 21876 }, { "epoch": 1.0248278446620134, "grad_norm": 0.5534811858434935, "learning_rate": 2.520956096078289e-06, "loss": 0.2798, "step": 21877 }, { "epoch": 1.0248746896519416, "grad_norm": 0.5889966848843138, "learning_rate": 2.5207664525318748e-06, "loss": 0.2788, "step": 21878 }, { "epoch": 1.02492153464187, "grad_norm": 0.575358315162396, "learning_rate": 2.520576808865955e-06, "loss": 0.2601, "step": 21879 }, { "epoch": 1.0249683796317983, "grad_norm": 0.5515264565945164, "learning_rate": 2.520387165081621e-06, "loss": 0.2703, "step": 21880 }, { "epoch": 1.0250152246217268, "grad_norm": 0.5768094815070317, "learning_rate": 2.5201975211799634e-06, "loss": 0.2799, "step": 21881 }, { "epoch": 1.025062069611655, "grad_norm": 0.6026517638387436, "learning_rate": 2.520007877162074e-06, "loss": 0.2833, "step": 21882 }, { "epoch": 1.0251089146015833, "grad_norm": 0.6442490004202832, "learning_rate": 2.519818233029045e-06, "loss": 0.2994, "step": 21883 }, { "epoch": 1.0251557595915117, "grad_norm": 0.6344868529605922, "learning_rate": 2.5196285887819668e-06, "loss": 0.2818, "step": 21884 }, { "epoch": 1.02520260458144, "grad_norm": 0.6257450135969541, "learning_rate": 2.5194389444219306e-06, "loss": 0.2766, "step": 21885 }, { "epoch": 1.0252494495713684, "grad_norm": 0.5882967275507891, "learning_rate": 2.519249299950029e-06, "loss": 0.2675, "step": 21886 }, { "epoch": 1.0252962945612967, "grad_norm": 0.5619641673254425, "learning_rate": 2.519059655367352e-06, "loss": 0.2753, "step": 21887 }, { "epoch": 1.025343139551225, "grad_norm": 0.6229945782232152, "learning_rate": 2.518870010674992e-06, "loss": 0.3005, "step": 21888 }, { "epoch": 1.0253899845411534, "grad_norm": 0.6214437528186119, "learning_rate": 2.5186803658740393e-06, "loss": 0.2895, "step": 21889 }, { "epoch": 1.0254368295310816, "grad_norm": 0.6068508332353566, "learning_rate": 2.5184907209655857e-06, "loss": 0.2879, "step": 21890 }, { "epoch": 1.02548367452101, "grad_norm": 0.5807322353791817, "learning_rate": 2.518301075950722e-06, "loss": 0.2738, "step": 21891 }, { "epoch": 1.0255305195109383, "grad_norm": 0.6168639936964375, "learning_rate": 2.5181114308305423e-06, "loss": 0.2884, "step": 21892 }, { "epoch": 1.0255773645008666, "grad_norm": 0.6039722405014367, "learning_rate": 2.517921785606134e-06, "loss": 0.2731, "step": 21893 }, { "epoch": 1.025624209490795, "grad_norm": 0.5806541391250667, "learning_rate": 2.51773214027859e-06, "loss": 0.282, "step": 21894 }, { "epoch": 1.0256710544807233, "grad_norm": 0.6220787012057019, "learning_rate": 2.5175424948490034e-06, "loss": 0.2887, "step": 21895 }, { "epoch": 1.0257178994706515, "grad_norm": 0.6150215449454521, "learning_rate": 2.517352849318464e-06, "loss": 0.2879, "step": 21896 }, { "epoch": 1.02576474446058, "grad_norm": 0.551313719252999, "learning_rate": 2.517163203688063e-06, "loss": 0.2523, "step": 21897 }, { "epoch": 1.0258115894505082, "grad_norm": 0.5780877287973174, "learning_rate": 2.516973557958892e-06, "loss": 0.2787, "step": 21898 }, { "epoch": 1.0258584344404367, "grad_norm": 0.5550896839569683, "learning_rate": 2.5167839121320426e-06, "loss": 0.2459, "step": 21899 }, { "epoch": 1.025905279430365, "grad_norm": 0.5711054677414404, "learning_rate": 2.5165942662086055e-06, "loss": 0.2628, "step": 21900 }, { "epoch": 1.0259521244202932, "grad_norm": 0.6084059404784115, "learning_rate": 2.5164046201896735e-06, "loss": 0.2673, "step": 21901 }, { "epoch": 1.0259989694102216, "grad_norm": 0.6657249658757153, "learning_rate": 2.5162149740763364e-06, "loss": 0.2948, "step": 21902 }, { "epoch": 1.0260458144001499, "grad_norm": 0.5990648025547626, "learning_rate": 2.516025327869686e-06, "loss": 0.2752, "step": 21903 }, { "epoch": 1.0260926593900783, "grad_norm": 0.6275247626864693, "learning_rate": 2.5158356815708145e-06, "loss": 0.2694, "step": 21904 }, { "epoch": 1.0261395043800066, "grad_norm": 0.5285269763554906, "learning_rate": 2.5156460351808122e-06, "loss": 0.2513, "step": 21905 }, { "epoch": 1.0261863493699348, "grad_norm": 0.564762821600289, "learning_rate": 2.515456388700771e-06, "loss": 0.2695, "step": 21906 }, { "epoch": 1.0262331943598633, "grad_norm": 0.5867748408050623, "learning_rate": 2.5152667421317824e-06, "loss": 0.2738, "step": 21907 }, { "epoch": 1.0262800393497915, "grad_norm": 0.5913520282338701, "learning_rate": 2.515077095474937e-06, "loss": 0.2887, "step": 21908 }, { "epoch": 1.02632688433972, "grad_norm": 0.6031831472834229, "learning_rate": 2.5148874487313274e-06, "loss": 0.2834, "step": 21909 }, { "epoch": 1.0263737293296482, "grad_norm": 0.5987860252871307, "learning_rate": 2.514697801902044e-06, "loss": 0.2595, "step": 21910 }, { "epoch": 1.0264205743195765, "grad_norm": 0.5533867605633613, "learning_rate": 2.514508154988178e-06, "loss": 0.2673, "step": 21911 }, { "epoch": 1.026467419309505, "grad_norm": 0.6076174005366172, "learning_rate": 2.514318507990821e-06, "loss": 0.2618, "step": 21912 }, { "epoch": 1.0265142642994332, "grad_norm": 0.6354700482972357, "learning_rate": 2.514128860911066e-06, "loss": 0.2922, "step": 21913 }, { "epoch": 1.0265611092893614, "grad_norm": 0.6252468625394724, "learning_rate": 2.513939213750002e-06, "loss": 0.2846, "step": 21914 }, { "epoch": 1.0266079542792899, "grad_norm": 0.5531289261501537, "learning_rate": 2.513749566508721e-06, "loss": 0.266, "step": 21915 }, { "epoch": 1.026654799269218, "grad_norm": 0.5625502966790624, "learning_rate": 2.513559919188315e-06, "loss": 0.2855, "step": 21916 }, { "epoch": 1.0267016442591466, "grad_norm": 0.5805928975546224, "learning_rate": 2.513370271789876e-06, "loss": 0.2717, "step": 21917 }, { "epoch": 1.0267484892490748, "grad_norm": 0.6277240024205055, "learning_rate": 2.513180624314493e-06, "loss": 0.2719, "step": 21918 }, { "epoch": 1.026795334239003, "grad_norm": 0.5249959410494311, "learning_rate": 2.5129909767632595e-06, "loss": 0.2562, "step": 21919 }, { "epoch": 1.0268421792289315, "grad_norm": 0.5776996986614021, "learning_rate": 2.512801329137266e-06, "loss": 0.2835, "step": 21920 }, { "epoch": 1.0268890242188597, "grad_norm": 0.6113875425097735, "learning_rate": 2.512611681437604e-06, "loss": 0.285, "step": 21921 }, { "epoch": 1.0269358692087882, "grad_norm": 0.5676006702540136, "learning_rate": 2.5124220336653648e-06, "loss": 0.2738, "step": 21922 }, { "epoch": 1.0269827141987165, "grad_norm": 0.5990366330614395, "learning_rate": 2.5122323858216413e-06, "loss": 0.2775, "step": 21923 }, { "epoch": 1.0270295591886447, "grad_norm": 0.5878164742654908, "learning_rate": 2.512042737907522e-06, "loss": 0.2873, "step": 21924 }, { "epoch": 1.0270764041785732, "grad_norm": 0.5732241574801912, "learning_rate": 2.5118530899241e-06, "loss": 0.2679, "step": 21925 }, { "epoch": 1.0271232491685014, "grad_norm": 0.6049867135630841, "learning_rate": 2.5116634418724667e-06, "loss": 0.2867, "step": 21926 }, { "epoch": 1.0271700941584299, "grad_norm": 0.5606369457605124, "learning_rate": 2.5114737937537138e-06, "loss": 0.2684, "step": 21927 }, { "epoch": 1.027216939148358, "grad_norm": 0.5979764231986039, "learning_rate": 2.511284145568931e-06, "loss": 0.2647, "step": 21928 }, { "epoch": 1.0272637841382863, "grad_norm": 0.6019695818679869, "learning_rate": 2.5110944973192114e-06, "loss": 0.2801, "step": 21929 }, { "epoch": 1.0273106291282148, "grad_norm": 0.5725228414338056, "learning_rate": 2.510904849005646e-06, "loss": 0.2779, "step": 21930 }, { "epoch": 1.027357474118143, "grad_norm": 0.5803296820521676, "learning_rate": 2.510715200629325e-06, "loss": 0.2988, "step": 21931 }, { "epoch": 1.0274043191080713, "grad_norm": 0.5661942182801173, "learning_rate": 2.5105255521913412e-06, "loss": 0.2721, "step": 21932 }, { "epoch": 1.0274511640979997, "grad_norm": 0.5925632517877379, "learning_rate": 2.5103359036927855e-06, "loss": 0.2929, "step": 21933 }, { "epoch": 1.027498009087928, "grad_norm": 0.5481424451386389, "learning_rate": 2.5101462551347495e-06, "loss": 0.2612, "step": 21934 }, { "epoch": 1.0275448540778565, "grad_norm": 0.5548408842104408, "learning_rate": 2.5099566065183245e-06, "loss": 0.2795, "step": 21935 }, { "epoch": 1.0275916990677847, "grad_norm": 0.595642254588714, "learning_rate": 2.509766957844601e-06, "loss": 0.258, "step": 21936 }, { "epoch": 1.027638544057713, "grad_norm": 0.5823729258746617, "learning_rate": 2.509577309114672e-06, "loss": 0.2762, "step": 21937 }, { "epoch": 1.0276853890476414, "grad_norm": 0.6212045962134544, "learning_rate": 2.5093876603296273e-06, "loss": 0.2773, "step": 21938 }, { "epoch": 1.0277322340375696, "grad_norm": 0.6134277335187966, "learning_rate": 2.509198011490559e-06, "loss": 0.2784, "step": 21939 }, { "epoch": 1.027779079027498, "grad_norm": 0.5788337637461887, "learning_rate": 2.509008362598559e-06, "loss": 0.2686, "step": 21940 }, { "epoch": 1.0278259240174263, "grad_norm": 0.5642571097715017, "learning_rate": 2.5088187136547184e-06, "loss": 0.2712, "step": 21941 }, { "epoch": 1.0278727690073546, "grad_norm": 0.6527282383397126, "learning_rate": 2.508629064660127e-06, "loss": 0.293, "step": 21942 }, { "epoch": 1.027919613997283, "grad_norm": 0.613413048558124, "learning_rate": 2.508439415615878e-06, "loss": 0.2799, "step": 21943 }, { "epoch": 1.0279664589872113, "grad_norm": 0.5998651944471993, "learning_rate": 2.508249766523063e-06, "loss": 0.2804, "step": 21944 }, { "epoch": 1.0280133039771397, "grad_norm": 0.6053296619599577, "learning_rate": 2.5080601173827723e-06, "loss": 0.2653, "step": 21945 }, { "epoch": 1.028060148967068, "grad_norm": 0.59757427320398, "learning_rate": 2.5078704681960968e-06, "loss": 0.2724, "step": 21946 }, { "epoch": 1.0281069939569962, "grad_norm": 0.5892398491569665, "learning_rate": 2.50768081896413e-06, "loss": 0.2743, "step": 21947 }, { "epoch": 1.0281538389469247, "grad_norm": 0.5712015918132975, "learning_rate": 2.5074911696879616e-06, "loss": 0.2632, "step": 21948 }, { "epoch": 1.028200683936853, "grad_norm": 0.5835144323434767, "learning_rate": 2.507301520368683e-06, "loss": 0.2694, "step": 21949 }, { "epoch": 1.0282475289267812, "grad_norm": 0.5381180500661656, "learning_rate": 2.5071118710073866e-06, "loss": 0.2554, "step": 21950 }, { "epoch": 1.0282943739167096, "grad_norm": 0.5921494890836625, "learning_rate": 2.5069222216051632e-06, "loss": 0.2788, "step": 21951 }, { "epoch": 1.0283412189066379, "grad_norm": 0.5690087322630797, "learning_rate": 2.506732572163103e-06, "loss": 0.273, "step": 21952 }, { "epoch": 1.0283880638965663, "grad_norm": 0.5923109577741134, "learning_rate": 2.5065429226822996e-06, "loss": 0.2738, "step": 21953 }, { "epoch": 1.0284349088864946, "grad_norm": 0.5900296142075347, "learning_rate": 2.506353273163843e-06, "loss": 0.2785, "step": 21954 }, { "epoch": 1.0284817538764228, "grad_norm": 0.6195647692441957, "learning_rate": 2.506163623608825e-06, "loss": 0.3059, "step": 21955 }, { "epoch": 1.0285285988663513, "grad_norm": 0.6632244639167529, "learning_rate": 2.505973974018337e-06, "loss": 0.2829, "step": 21956 }, { "epoch": 1.0285754438562795, "grad_norm": 0.5604292827853646, "learning_rate": 2.5057843243934698e-06, "loss": 0.2715, "step": 21957 }, { "epoch": 1.028622288846208, "grad_norm": 0.6015489942958175, "learning_rate": 2.5055946747353162e-06, "loss": 0.2887, "step": 21958 }, { "epoch": 1.0286691338361362, "grad_norm": 0.552322391152086, "learning_rate": 2.5054050250449657e-06, "loss": 0.2716, "step": 21959 }, { "epoch": 1.0287159788260645, "grad_norm": 0.5777563381106785, "learning_rate": 2.505215375323511e-06, "loss": 0.2785, "step": 21960 }, { "epoch": 1.028762823815993, "grad_norm": 0.5383622480586276, "learning_rate": 2.5050257255720426e-06, "loss": 0.2688, "step": 21961 }, { "epoch": 1.0288096688059212, "grad_norm": 0.5954201648234007, "learning_rate": 2.504836075791654e-06, "loss": 0.2841, "step": 21962 }, { "epoch": 1.0288565137958496, "grad_norm": 0.6547788548217838, "learning_rate": 2.5046464259834335e-06, "loss": 0.3004, "step": 21963 }, { "epoch": 1.0289033587857779, "grad_norm": 0.5656876195943332, "learning_rate": 2.504456776148474e-06, "loss": 0.2682, "step": 21964 }, { "epoch": 1.0289502037757061, "grad_norm": 0.5952239737736631, "learning_rate": 2.504267126287868e-06, "loss": 0.2812, "step": 21965 }, { "epoch": 1.0289970487656346, "grad_norm": 0.5756926052833363, "learning_rate": 2.5040774764027048e-06, "loss": 0.2523, "step": 21966 }, { "epoch": 1.0290438937555628, "grad_norm": 0.5882631173198773, "learning_rate": 2.503887826494077e-06, "loss": 0.2869, "step": 21967 }, { "epoch": 1.029090738745491, "grad_norm": 0.5478943208357075, "learning_rate": 2.5036981765630757e-06, "loss": 0.2656, "step": 21968 }, { "epoch": 1.0291375837354195, "grad_norm": 0.5874056453677782, "learning_rate": 2.503508526610793e-06, "loss": 0.2734, "step": 21969 }, { "epoch": 1.0291844287253478, "grad_norm": 0.5736532163915578, "learning_rate": 2.503318876638318e-06, "loss": 0.27, "step": 21970 }, { "epoch": 1.0292312737152762, "grad_norm": 0.6328319440989936, "learning_rate": 2.503129226646745e-06, "loss": 0.2802, "step": 21971 }, { "epoch": 1.0292781187052045, "grad_norm": 0.5906915493256357, "learning_rate": 2.502939576637164e-06, "loss": 0.2742, "step": 21972 }, { "epoch": 1.0293249636951327, "grad_norm": 0.577374850581382, "learning_rate": 2.5027499266106657e-06, "loss": 0.2762, "step": 21973 }, { "epoch": 1.0293718086850612, "grad_norm": 0.6301752408128323, "learning_rate": 2.5025602765683433e-06, "loss": 0.2885, "step": 21974 }, { "epoch": 1.0294186536749894, "grad_norm": 0.6112357802915684, "learning_rate": 2.5023706265112864e-06, "loss": 0.2667, "step": 21975 }, { "epoch": 1.0294654986649179, "grad_norm": 0.5490128952331609, "learning_rate": 2.5021809764405875e-06, "loss": 0.2637, "step": 21976 }, { "epoch": 1.0295123436548461, "grad_norm": 0.6574345735088767, "learning_rate": 2.5019913263573374e-06, "loss": 0.2797, "step": 21977 }, { "epoch": 1.0295591886447744, "grad_norm": 0.5980383035349056, "learning_rate": 2.501801676262628e-06, "loss": 0.2642, "step": 21978 }, { "epoch": 1.0296060336347028, "grad_norm": 0.6206564557241113, "learning_rate": 2.5016120261575504e-06, "loss": 0.2815, "step": 21979 }, { "epoch": 1.029652878624631, "grad_norm": 0.6374835626229638, "learning_rate": 2.501422376043196e-06, "loss": 0.2901, "step": 21980 }, { "epoch": 1.0296997236145595, "grad_norm": 0.5783002310545445, "learning_rate": 2.501232725920656e-06, "loss": 0.2668, "step": 21981 }, { "epoch": 1.0297465686044878, "grad_norm": 0.5728927759131056, "learning_rate": 2.5010430757910216e-06, "loss": 0.2733, "step": 21982 }, { "epoch": 1.029793413594416, "grad_norm": 0.5863332753457101, "learning_rate": 2.500853425655386e-06, "loss": 0.2689, "step": 21983 }, { "epoch": 1.0298402585843445, "grad_norm": 0.5770372182656245, "learning_rate": 2.5006637755148372e-06, "loss": 0.2727, "step": 21984 }, { "epoch": 1.0298871035742727, "grad_norm": 0.5674844098146203, "learning_rate": 2.500474125370469e-06, "loss": 0.2673, "step": 21985 }, { "epoch": 1.029933948564201, "grad_norm": 0.6131332455979938, "learning_rate": 2.500284475223373e-06, "loss": 0.2948, "step": 21986 }, { "epoch": 1.0299807935541294, "grad_norm": 0.5882513702003513, "learning_rate": 2.50009482507464e-06, "loss": 0.2874, "step": 21987 }, { "epoch": 1.0300276385440577, "grad_norm": 0.564038886045953, "learning_rate": 2.499905174925361e-06, "loss": 0.2777, "step": 21988 }, { "epoch": 1.0300744835339861, "grad_norm": 0.6132776959367024, "learning_rate": 2.4997155247766275e-06, "loss": 0.2884, "step": 21989 }, { "epoch": 1.0301213285239144, "grad_norm": 0.5853528198157572, "learning_rate": 2.499525874629532e-06, "loss": 0.2724, "step": 21990 }, { "epoch": 1.0301681735138426, "grad_norm": 0.6046533848317046, "learning_rate": 2.4993362244851636e-06, "loss": 0.2807, "step": 21991 }, { "epoch": 1.030215018503771, "grad_norm": 0.6156234885079782, "learning_rate": 2.4991465743446155e-06, "loss": 0.2873, "step": 21992 }, { "epoch": 1.0302618634936993, "grad_norm": 0.5651007289127986, "learning_rate": 2.498956924208979e-06, "loss": 0.2784, "step": 21993 }, { "epoch": 1.0303087084836278, "grad_norm": 0.5817026831214158, "learning_rate": 2.4987672740793456e-06, "loss": 0.2855, "step": 21994 }, { "epoch": 1.030355553473556, "grad_norm": 0.5931274753736484, "learning_rate": 2.4985776239568054e-06, "loss": 0.271, "step": 21995 }, { "epoch": 1.0304023984634842, "grad_norm": 0.5848802049993437, "learning_rate": 2.4983879738424505e-06, "loss": 0.2859, "step": 21996 }, { "epoch": 1.0304492434534127, "grad_norm": 0.5964584829514104, "learning_rate": 2.4981983237373727e-06, "loss": 0.28, "step": 21997 }, { "epoch": 1.030496088443341, "grad_norm": 0.6142923528800784, "learning_rate": 2.498008673642663e-06, "loss": 0.2878, "step": 21998 }, { "epoch": 1.0305429334332694, "grad_norm": 0.5880472721517955, "learning_rate": 2.497819023559413e-06, "loss": 0.2606, "step": 21999 }, { "epoch": 1.0305897784231977, "grad_norm": 0.5885111945588576, "learning_rate": 2.4976293734887145e-06, "loss": 0.2914, "step": 22000 }, { "epoch": 1.030636623413126, "grad_norm": 0.6374627687917714, "learning_rate": 2.497439723431658e-06, "loss": 0.2948, "step": 22001 }, { "epoch": 1.0306834684030544, "grad_norm": 0.5771160813090983, "learning_rate": 2.4972500733893347e-06, "loss": 0.2777, "step": 22002 }, { "epoch": 1.0307303133929826, "grad_norm": 0.5783531850650494, "learning_rate": 2.4970604233628366e-06, "loss": 0.2805, "step": 22003 }, { "epoch": 1.0307771583829108, "grad_norm": 0.5638551771984593, "learning_rate": 2.496870773353256e-06, "loss": 0.2681, "step": 22004 }, { "epoch": 1.0308240033728393, "grad_norm": 0.5898203414631354, "learning_rate": 2.4966811233616818e-06, "loss": 0.2772, "step": 22005 }, { "epoch": 1.0308708483627675, "grad_norm": 0.5875200956400413, "learning_rate": 2.4964914733892087e-06, "loss": 0.2794, "step": 22006 }, { "epoch": 1.030917693352696, "grad_norm": 0.5643131821676672, "learning_rate": 2.496301823436925e-06, "loss": 0.2649, "step": 22007 }, { "epoch": 1.0309645383426242, "grad_norm": 0.5715051630168656, "learning_rate": 2.496112173505924e-06, "loss": 0.2745, "step": 22008 }, { "epoch": 1.0310113833325525, "grad_norm": 0.6142880046747718, "learning_rate": 2.495922523597296e-06, "loss": 0.2781, "step": 22009 }, { "epoch": 1.031058228322481, "grad_norm": 0.6392198729126151, "learning_rate": 2.495732873712133e-06, "loss": 0.2697, "step": 22010 }, { "epoch": 1.0311050733124092, "grad_norm": 0.6401771918461401, "learning_rate": 2.4955432238515263e-06, "loss": 0.2805, "step": 22011 }, { "epoch": 1.0311519183023377, "grad_norm": 0.5957234567466955, "learning_rate": 2.4953535740165678e-06, "loss": 0.2755, "step": 22012 }, { "epoch": 1.031198763292266, "grad_norm": 0.6002483515664147, "learning_rate": 2.4951639242083475e-06, "loss": 0.2814, "step": 22013 }, { "epoch": 1.0312456082821941, "grad_norm": 0.5810476162627463, "learning_rate": 2.4949742744279578e-06, "loss": 0.2891, "step": 22014 }, { "epoch": 1.0312924532721226, "grad_norm": 0.5763513247694138, "learning_rate": 2.49478462467649e-06, "loss": 0.2668, "step": 22015 }, { "epoch": 1.0313392982620508, "grad_norm": 0.5766300514785752, "learning_rate": 2.4945949749550347e-06, "loss": 0.273, "step": 22016 }, { "epoch": 1.0313861432519793, "grad_norm": 0.5681413297688663, "learning_rate": 2.494405325264685e-06, "loss": 0.2797, "step": 22017 }, { "epoch": 1.0314329882419075, "grad_norm": 0.5725855838453113, "learning_rate": 2.4942156756065315e-06, "loss": 0.2641, "step": 22018 }, { "epoch": 1.0314798332318358, "grad_norm": 0.579991792767761, "learning_rate": 2.494026025981664e-06, "loss": 0.2811, "step": 22019 }, { "epoch": 1.0315266782217642, "grad_norm": 0.5623589982731084, "learning_rate": 2.493836376391176e-06, "loss": 0.2618, "step": 22020 }, { "epoch": 1.0315735232116925, "grad_norm": 0.5945033468830521, "learning_rate": 2.493646726836158e-06, "loss": 0.2702, "step": 22021 }, { "epoch": 1.0316203682016207, "grad_norm": 0.5540908283489019, "learning_rate": 2.4934570773177013e-06, "loss": 0.2664, "step": 22022 }, { "epoch": 1.0316672131915492, "grad_norm": 0.5434105839961588, "learning_rate": 2.493267427836897e-06, "loss": 0.2705, "step": 22023 }, { "epoch": 1.0317140581814774, "grad_norm": 0.6060371520432555, "learning_rate": 2.493077778394838e-06, "loss": 0.2912, "step": 22024 }, { "epoch": 1.031760903171406, "grad_norm": 0.6624704218884075, "learning_rate": 2.492888128992615e-06, "loss": 0.2879, "step": 22025 }, { "epoch": 1.0318077481613341, "grad_norm": 0.597663714044621, "learning_rate": 2.4926984796313178e-06, "loss": 0.2713, "step": 22026 }, { "epoch": 1.0318545931512624, "grad_norm": 0.5866377536032479, "learning_rate": 2.492508830312039e-06, "loss": 0.2878, "step": 22027 }, { "epoch": 1.0319014381411908, "grad_norm": 0.6231889994807841, "learning_rate": 2.492319181035871e-06, "loss": 0.2875, "step": 22028 }, { "epoch": 1.031948283131119, "grad_norm": 0.5503185807943016, "learning_rate": 2.492129531803903e-06, "loss": 0.2749, "step": 22029 }, { "epoch": 1.0319951281210475, "grad_norm": 0.5714484295938191, "learning_rate": 2.491939882617228e-06, "loss": 0.2653, "step": 22030 }, { "epoch": 1.0320419731109758, "grad_norm": 0.5901101352545437, "learning_rate": 2.4917502334769382e-06, "loss": 0.2818, "step": 22031 }, { "epoch": 1.032088818100904, "grad_norm": 0.54922950875188, "learning_rate": 2.4915605843841224e-06, "loss": 0.2715, "step": 22032 }, { "epoch": 1.0321356630908325, "grad_norm": 0.5825794225454407, "learning_rate": 2.4913709353398734e-06, "loss": 0.2792, "step": 22033 }, { "epoch": 1.0321825080807607, "grad_norm": 0.5640272755941069, "learning_rate": 2.4911812863452824e-06, "loss": 0.2568, "step": 22034 }, { "epoch": 1.0322293530706892, "grad_norm": 0.6377956137966553, "learning_rate": 2.4909916374014415e-06, "loss": 0.2767, "step": 22035 }, { "epoch": 1.0322761980606174, "grad_norm": 0.6362888735789833, "learning_rate": 2.4908019885094407e-06, "loss": 0.2932, "step": 22036 }, { "epoch": 1.0323230430505457, "grad_norm": 0.5836627199372793, "learning_rate": 2.4906123396703735e-06, "loss": 0.2796, "step": 22037 }, { "epoch": 1.0323698880404741, "grad_norm": 0.5680074663762499, "learning_rate": 2.490422690885329e-06, "loss": 0.2747, "step": 22038 }, { "epoch": 1.0324167330304024, "grad_norm": 0.5863275248393989, "learning_rate": 2.4902330421553997e-06, "loss": 0.2717, "step": 22039 }, { "epoch": 1.0324635780203306, "grad_norm": 0.5601280300046103, "learning_rate": 2.4900433934816764e-06, "loss": 0.2611, "step": 22040 }, { "epoch": 1.032510423010259, "grad_norm": 0.6106323463008838, "learning_rate": 2.4898537448652505e-06, "loss": 0.2892, "step": 22041 }, { "epoch": 1.0325572680001873, "grad_norm": 0.6148119349127981, "learning_rate": 2.489664096307215e-06, "loss": 0.28, "step": 22042 }, { "epoch": 1.0326041129901158, "grad_norm": 0.6454788185263954, "learning_rate": 2.4894744478086596e-06, "loss": 0.2828, "step": 22043 }, { "epoch": 1.032650957980044, "grad_norm": 0.637138369074407, "learning_rate": 2.4892847993706757e-06, "loss": 0.3074, "step": 22044 }, { "epoch": 1.0326978029699723, "grad_norm": 0.6181352099015428, "learning_rate": 2.489095150994355e-06, "loss": 0.289, "step": 22045 }, { "epoch": 1.0327446479599007, "grad_norm": 0.5830473290604988, "learning_rate": 2.4889055026807894e-06, "loss": 0.2814, "step": 22046 }, { "epoch": 1.032791492949829, "grad_norm": 0.6152996694847417, "learning_rate": 2.4887158544310696e-06, "loss": 0.2856, "step": 22047 }, { "epoch": 1.0328383379397574, "grad_norm": 0.6337526766303945, "learning_rate": 2.4885262062462866e-06, "loss": 0.2828, "step": 22048 }, { "epoch": 1.0328851829296857, "grad_norm": 0.6105248337900873, "learning_rate": 2.488336558127534e-06, "loss": 0.2921, "step": 22049 }, { "epoch": 1.032932027919614, "grad_norm": 0.6252209947960073, "learning_rate": 2.488146910075901e-06, "loss": 0.2781, "step": 22050 }, { "epoch": 1.0329788729095424, "grad_norm": 0.5642416034710691, "learning_rate": 2.487957262092479e-06, "loss": 0.2682, "step": 22051 }, { "epoch": 1.0330257178994706, "grad_norm": 0.5686914825453726, "learning_rate": 2.4877676141783595e-06, "loss": 0.2829, "step": 22052 }, { "epoch": 1.033072562889399, "grad_norm": 0.5861318640214587, "learning_rate": 2.4875779663346352e-06, "loss": 0.2705, "step": 22053 }, { "epoch": 1.0331194078793273, "grad_norm": 0.5733032056722964, "learning_rate": 2.4873883185623963e-06, "loss": 0.2578, "step": 22054 }, { "epoch": 1.0331662528692556, "grad_norm": 0.5794434660773686, "learning_rate": 2.4871986708627354e-06, "loss": 0.267, "step": 22055 }, { "epoch": 1.033213097859184, "grad_norm": 0.5805190591853784, "learning_rate": 2.4870090232367414e-06, "loss": 0.2885, "step": 22056 }, { "epoch": 1.0332599428491123, "grad_norm": 0.58172588761621, "learning_rate": 2.486819375685508e-06, "loss": 0.2745, "step": 22057 }, { "epoch": 1.0333067878390405, "grad_norm": 0.5796851728095987, "learning_rate": 2.4866297282101255e-06, "loss": 0.2771, "step": 22058 }, { "epoch": 1.033353632828969, "grad_norm": 0.5768729697289838, "learning_rate": 2.4864400808116856e-06, "loss": 0.2747, "step": 22059 }, { "epoch": 1.0334004778188972, "grad_norm": 0.5641133499724519, "learning_rate": 2.4862504334912794e-06, "loss": 0.2701, "step": 22060 }, { "epoch": 1.0334473228088257, "grad_norm": 0.5623290651076103, "learning_rate": 2.4860607862499997e-06, "loss": 0.2732, "step": 22061 }, { "epoch": 1.033494167798754, "grad_norm": 0.6024166345010837, "learning_rate": 2.4858711390889353e-06, "loss": 0.294, "step": 22062 }, { "epoch": 1.0335410127886822, "grad_norm": 0.5853038620326795, "learning_rate": 2.4856814920091797e-06, "loss": 0.2752, "step": 22063 }, { "epoch": 1.0335878577786106, "grad_norm": 0.610616027109301, "learning_rate": 2.485491845011823e-06, "loss": 0.2863, "step": 22064 }, { "epoch": 1.0336347027685389, "grad_norm": 0.5912092242303513, "learning_rate": 2.485302198097957e-06, "loss": 0.2595, "step": 22065 }, { "epoch": 1.0336815477584673, "grad_norm": 0.5918021693010284, "learning_rate": 2.4851125512686734e-06, "loss": 0.2697, "step": 22066 }, { "epoch": 1.0337283927483956, "grad_norm": 0.5653706375072204, "learning_rate": 2.4849229045250645e-06, "loss": 0.2658, "step": 22067 }, { "epoch": 1.0337752377383238, "grad_norm": 0.5846861080910705, "learning_rate": 2.484733257868219e-06, "loss": 0.2747, "step": 22068 }, { "epoch": 1.0338220827282523, "grad_norm": 0.5779550869703086, "learning_rate": 2.4845436112992296e-06, "loss": 0.2595, "step": 22069 }, { "epoch": 1.0338689277181805, "grad_norm": 0.5776920320537554, "learning_rate": 2.4843539648191886e-06, "loss": 0.2775, "step": 22070 }, { "epoch": 1.033915772708109, "grad_norm": 0.5707246792259972, "learning_rate": 2.484164318429186e-06, "loss": 0.2724, "step": 22071 }, { "epoch": 1.0339626176980372, "grad_norm": 0.5954189068729528, "learning_rate": 2.483974672130314e-06, "loss": 0.2737, "step": 22072 }, { "epoch": 1.0340094626879655, "grad_norm": 0.6238077086708236, "learning_rate": 2.4837850259236645e-06, "loss": 0.2807, "step": 22073 }, { "epoch": 1.034056307677894, "grad_norm": 0.5824603254040336, "learning_rate": 2.483595379810328e-06, "loss": 0.2561, "step": 22074 }, { "epoch": 1.0341031526678222, "grad_norm": 0.5833130260896636, "learning_rate": 2.483405733791395e-06, "loss": 0.2855, "step": 22075 }, { "epoch": 1.0341499976577504, "grad_norm": 0.6239307248078297, "learning_rate": 2.483216087867958e-06, "loss": 0.2841, "step": 22076 }, { "epoch": 1.0341968426476789, "grad_norm": 0.6147328463170249, "learning_rate": 2.483026442041109e-06, "loss": 0.2929, "step": 22077 }, { "epoch": 1.034243687637607, "grad_norm": 0.568352065684242, "learning_rate": 2.4828367963119375e-06, "loss": 0.2675, "step": 22078 }, { "epoch": 1.0342905326275356, "grad_norm": 0.6043907426521464, "learning_rate": 2.4826471506815374e-06, "loss": 0.2928, "step": 22079 }, { "epoch": 1.0343373776174638, "grad_norm": 0.5882704356939944, "learning_rate": 2.482457505150997e-06, "loss": 0.2695, "step": 22080 }, { "epoch": 1.034384222607392, "grad_norm": 0.5912449193536395, "learning_rate": 2.4822678597214103e-06, "loss": 0.2849, "step": 22081 }, { "epoch": 1.0344310675973205, "grad_norm": 0.5884767352288678, "learning_rate": 2.482078214393867e-06, "loss": 0.2736, "step": 22082 }, { "epoch": 1.0344779125872487, "grad_norm": 0.5867856524404451, "learning_rate": 2.4818885691694585e-06, "loss": 0.2932, "step": 22083 }, { "epoch": 1.0345247575771772, "grad_norm": 0.6173178382134364, "learning_rate": 2.4816989240492776e-06, "loss": 0.2817, "step": 22084 }, { "epoch": 1.0345716025671055, "grad_norm": 0.5914346847691803, "learning_rate": 2.481509279034416e-06, "loss": 0.2724, "step": 22085 }, { "epoch": 1.0346184475570337, "grad_norm": 0.6265187062233245, "learning_rate": 2.481319634125962e-06, "loss": 0.2676, "step": 22086 }, { "epoch": 1.0346652925469622, "grad_norm": 0.6143025223353863, "learning_rate": 2.481129989325009e-06, "loss": 0.2697, "step": 22087 }, { "epoch": 1.0347121375368904, "grad_norm": 0.6148034375760073, "learning_rate": 2.480940344632649e-06, "loss": 0.2764, "step": 22088 }, { "epoch": 1.0347589825268189, "grad_norm": 0.5898861395890778, "learning_rate": 2.4807507000499715e-06, "loss": 0.2699, "step": 22089 }, { "epoch": 1.034805827516747, "grad_norm": 0.5839902121435657, "learning_rate": 2.480561055578069e-06, "loss": 0.2947, "step": 22090 }, { "epoch": 1.0348526725066753, "grad_norm": 0.6128811126027671, "learning_rate": 2.4803714112180336e-06, "loss": 0.2878, "step": 22091 }, { "epoch": 1.0348995174966038, "grad_norm": 0.5942273628305846, "learning_rate": 2.480181766970956e-06, "loss": 0.279, "step": 22092 }, { "epoch": 1.034946362486532, "grad_norm": 0.5882918295574192, "learning_rate": 2.4799921228379265e-06, "loss": 0.2998, "step": 22093 }, { "epoch": 1.0349932074764603, "grad_norm": 0.6123278538890542, "learning_rate": 2.4798024788200374e-06, "loss": 0.2811, "step": 22094 }, { "epoch": 1.0350400524663887, "grad_norm": 0.5928182775972539, "learning_rate": 2.47961283491838e-06, "loss": 0.2773, "step": 22095 }, { "epoch": 1.035086897456317, "grad_norm": 0.5680006229483975, "learning_rate": 2.4794231911340454e-06, "loss": 0.2804, "step": 22096 }, { "epoch": 1.0351337424462455, "grad_norm": 0.659585382787904, "learning_rate": 2.479233547468125e-06, "loss": 0.2819, "step": 22097 }, { "epoch": 1.0351805874361737, "grad_norm": 0.58343321353882, "learning_rate": 2.4790439039217116e-06, "loss": 0.2814, "step": 22098 }, { "epoch": 1.035227432426102, "grad_norm": 0.6130903186850535, "learning_rate": 2.478854260495895e-06, "loss": 0.2807, "step": 22099 }, { "epoch": 1.0352742774160304, "grad_norm": 0.585671033170938, "learning_rate": 2.4786646171917657e-06, "loss": 0.2693, "step": 22100 }, { "epoch": 1.0353211224059586, "grad_norm": 0.5600108443672401, "learning_rate": 2.4784749740104165e-06, "loss": 0.2555, "step": 22101 }, { "epoch": 1.035367967395887, "grad_norm": 0.5571516645572006, "learning_rate": 2.4782853309529387e-06, "loss": 0.2702, "step": 22102 }, { "epoch": 1.0354148123858153, "grad_norm": 0.6455808742583234, "learning_rate": 2.4780956880204233e-06, "loss": 0.2948, "step": 22103 }, { "epoch": 1.0354616573757436, "grad_norm": 0.5856037230819846, "learning_rate": 2.477906045213962e-06, "loss": 0.2658, "step": 22104 }, { "epoch": 1.035508502365672, "grad_norm": 0.6060208135553031, "learning_rate": 2.4777164025346455e-06, "loss": 0.2719, "step": 22105 }, { "epoch": 1.0355553473556003, "grad_norm": 0.6168638834154999, "learning_rate": 2.477526759983566e-06, "loss": 0.2574, "step": 22106 }, { "epoch": 1.0356021923455287, "grad_norm": 0.6483246770266302, "learning_rate": 2.4773371175618135e-06, "loss": 0.2891, "step": 22107 }, { "epoch": 1.035649037335457, "grad_norm": 0.6270067086187697, "learning_rate": 2.477147475270481e-06, "loss": 0.2949, "step": 22108 }, { "epoch": 1.0356958823253852, "grad_norm": 0.6160632304485629, "learning_rate": 2.476957833110658e-06, "loss": 0.2895, "step": 22109 }, { "epoch": 1.0357427273153137, "grad_norm": 0.59427193747714, "learning_rate": 2.4767681910834383e-06, "loss": 0.2776, "step": 22110 }, { "epoch": 1.035789572305242, "grad_norm": 0.5582773038409085, "learning_rate": 2.476578549189911e-06, "loss": 0.2837, "step": 22111 }, { "epoch": 1.0358364172951702, "grad_norm": 0.5919835934874929, "learning_rate": 2.4763889074311684e-06, "loss": 0.2879, "step": 22112 }, { "epoch": 1.0358832622850986, "grad_norm": 0.6522508298365082, "learning_rate": 2.476199265808301e-06, "loss": 0.2683, "step": 22113 }, { "epoch": 1.0359301072750269, "grad_norm": 0.5812218233258005, "learning_rate": 2.4760096243224013e-06, "loss": 0.2844, "step": 22114 }, { "epoch": 1.0359769522649553, "grad_norm": 0.5563318572047649, "learning_rate": 2.4758199829745605e-06, "loss": 0.264, "step": 22115 }, { "epoch": 1.0360237972548836, "grad_norm": 0.5482897075595587, "learning_rate": 2.47563034176587e-06, "loss": 0.2715, "step": 22116 }, { "epoch": 1.0360706422448118, "grad_norm": 0.5669730603174528, "learning_rate": 2.47544070069742e-06, "loss": 0.2888, "step": 22117 }, { "epoch": 1.0361174872347403, "grad_norm": 0.6324627494891163, "learning_rate": 2.4752510597703026e-06, "loss": 0.2925, "step": 22118 }, { "epoch": 1.0361643322246685, "grad_norm": 0.5995896395157867, "learning_rate": 2.47506141898561e-06, "loss": 0.2834, "step": 22119 }, { "epoch": 1.036211177214597, "grad_norm": 0.6207312431478527, "learning_rate": 2.4748717783444317e-06, "loss": 0.296, "step": 22120 }, { "epoch": 1.0362580222045252, "grad_norm": 0.6035224951167371, "learning_rate": 2.4746821378478598e-06, "loss": 0.2939, "step": 22121 }, { "epoch": 1.0363048671944535, "grad_norm": 0.6054487963630432, "learning_rate": 2.474492497496987e-06, "loss": 0.2955, "step": 22122 }, { "epoch": 1.036351712184382, "grad_norm": 0.5803176228236406, "learning_rate": 2.474302857292903e-06, "loss": 0.2571, "step": 22123 }, { "epoch": 1.0363985571743102, "grad_norm": 0.6128447884790617, "learning_rate": 2.4741132172366993e-06, "loss": 0.2725, "step": 22124 }, { "epoch": 1.0364454021642386, "grad_norm": 0.6435119464659022, "learning_rate": 2.473923577329467e-06, "loss": 0.2914, "step": 22125 }, { "epoch": 1.0364922471541669, "grad_norm": 0.5892665532752348, "learning_rate": 2.473733937572299e-06, "loss": 0.2853, "step": 22126 }, { "epoch": 1.0365390921440951, "grad_norm": 0.5591800493569603, "learning_rate": 2.4735442979662848e-06, "loss": 0.2687, "step": 22127 }, { "epoch": 1.0365859371340236, "grad_norm": 0.5900689447107347, "learning_rate": 2.4733546585125176e-06, "loss": 0.2843, "step": 22128 }, { "epoch": 1.0366327821239518, "grad_norm": 0.6017455921609595, "learning_rate": 2.4731650192120866e-06, "loss": 0.2925, "step": 22129 }, { "epoch": 1.03667962711388, "grad_norm": 0.5320126890277352, "learning_rate": 2.4729753800660846e-06, "loss": 0.2521, "step": 22130 }, { "epoch": 1.0367264721038085, "grad_norm": 0.5639602983350941, "learning_rate": 2.4727857410756025e-06, "loss": 0.286, "step": 22131 }, { "epoch": 1.0367733170937368, "grad_norm": 0.5824488532252724, "learning_rate": 2.4725961022417312e-06, "loss": 0.2839, "step": 22132 }, { "epoch": 1.0368201620836652, "grad_norm": 0.5828477678181648, "learning_rate": 2.4724064635655627e-06, "loss": 0.2781, "step": 22133 }, { "epoch": 1.0368670070735935, "grad_norm": 0.5862987060298639, "learning_rate": 2.472216825048189e-06, "loss": 0.278, "step": 22134 }, { "epoch": 1.0369138520635217, "grad_norm": 0.5804472837199771, "learning_rate": 2.472027186690699e-06, "loss": 0.2705, "step": 22135 }, { "epoch": 1.0369606970534502, "grad_norm": 0.636812821688927, "learning_rate": 2.471837548494186e-06, "loss": 0.2885, "step": 22136 }, { "epoch": 1.0370075420433784, "grad_norm": 0.5479817259450979, "learning_rate": 2.4716479104597412e-06, "loss": 0.2548, "step": 22137 }, { "epoch": 1.0370543870333069, "grad_norm": 0.6099742447656669, "learning_rate": 2.471458272588455e-06, "loss": 0.2914, "step": 22138 }, { "epoch": 1.0371012320232351, "grad_norm": 0.6026832935835815, "learning_rate": 2.4712686348814194e-06, "loss": 0.2991, "step": 22139 }, { "epoch": 1.0371480770131634, "grad_norm": 0.5889728952722852, "learning_rate": 2.4710789973397266e-06, "loss": 0.2787, "step": 22140 }, { "epoch": 1.0371949220030918, "grad_norm": 0.6028732074098054, "learning_rate": 2.4708893599644664e-06, "loss": 0.2727, "step": 22141 }, { "epoch": 1.03724176699302, "grad_norm": 0.650589302084574, "learning_rate": 2.47069972275673e-06, "loss": 0.2769, "step": 22142 }, { "epoch": 1.0372886119829485, "grad_norm": 0.5619072849034352, "learning_rate": 2.470510085717609e-06, "loss": 0.266, "step": 22143 }, { "epoch": 1.0373354569728768, "grad_norm": 0.5703611175228783, "learning_rate": 2.470320448848196e-06, "loss": 0.2584, "step": 22144 }, { "epoch": 1.037382301962805, "grad_norm": 0.6388367026361141, "learning_rate": 2.4701308121495808e-06, "loss": 0.2803, "step": 22145 }, { "epoch": 1.0374291469527335, "grad_norm": 0.5956645353781996, "learning_rate": 2.469941175622856e-06, "loss": 0.2664, "step": 22146 }, { "epoch": 1.0374759919426617, "grad_norm": 0.5653656204341396, "learning_rate": 2.4697515392691114e-06, "loss": 0.2794, "step": 22147 }, { "epoch": 1.03752283693259, "grad_norm": 0.5941862635318668, "learning_rate": 2.4695619030894395e-06, "loss": 0.2722, "step": 22148 }, { "epoch": 1.0375696819225184, "grad_norm": 0.6338837447018104, "learning_rate": 2.4693722670849306e-06, "loss": 0.2827, "step": 22149 }, { "epoch": 1.0376165269124467, "grad_norm": 0.575747545515467, "learning_rate": 2.4691826312566775e-06, "loss": 0.2694, "step": 22150 }, { "epoch": 1.0376633719023751, "grad_norm": 0.617503824251587, "learning_rate": 2.4689929956057697e-06, "loss": 0.2894, "step": 22151 }, { "epoch": 1.0377102168923034, "grad_norm": 0.6752557003467689, "learning_rate": 2.468803360133301e-06, "loss": 0.2826, "step": 22152 }, { "epoch": 1.0377570618822316, "grad_norm": 0.5845422822021904, "learning_rate": 2.4686137248403594e-06, "loss": 0.2737, "step": 22153 }, { "epoch": 1.03780390687216, "grad_norm": 0.5795360859614509, "learning_rate": 2.4684240897280388e-06, "loss": 0.2703, "step": 22154 }, { "epoch": 1.0378507518620883, "grad_norm": 0.5794633116487602, "learning_rate": 2.468234454797429e-06, "loss": 0.2815, "step": 22155 }, { "epoch": 1.0378975968520168, "grad_norm": 0.6282144936980523, "learning_rate": 2.468044820049622e-06, "loss": 0.2786, "step": 22156 }, { "epoch": 1.037944441841945, "grad_norm": 0.5742034638303867, "learning_rate": 2.4678551854857095e-06, "loss": 0.2719, "step": 22157 }, { "epoch": 1.0379912868318732, "grad_norm": 0.5774276998385935, "learning_rate": 2.467665551106783e-06, "loss": 0.2772, "step": 22158 }, { "epoch": 1.0380381318218017, "grad_norm": 0.5381997175632383, "learning_rate": 2.4674759169139316e-06, "loss": 0.2728, "step": 22159 }, { "epoch": 1.03808497681173, "grad_norm": 0.5623596002764388, "learning_rate": 2.4672862829082485e-06, "loss": 0.2654, "step": 22160 }, { "epoch": 1.0381318218016584, "grad_norm": 0.5912054164910927, "learning_rate": 2.4670966490908253e-06, "loss": 0.2598, "step": 22161 }, { "epoch": 1.0381786667915867, "grad_norm": 0.5782624638839277, "learning_rate": 2.466907015462752e-06, "loss": 0.2775, "step": 22162 }, { "epoch": 1.038225511781515, "grad_norm": 0.5804665613377031, "learning_rate": 2.4667173820251202e-06, "loss": 0.2583, "step": 22163 }, { "epoch": 1.0382723567714434, "grad_norm": 0.6401150634272443, "learning_rate": 2.4665277487790224e-06, "loss": 0.2799, "step": 22164 }, { "epoch": 1.0383192017613716, "grad_norm": 0.6242061246821157, "learning_rate": 2.4663381157255497e-06, "loss": 0.2932, "step": 22165 }, { "epoch": 1.0383660467512998, "grad_norm": 0.5992726603900709, "learning_rate": 2.4661484828657912e-06, "loss": 0.2714, "step": 22166 }, { "epoch": 1.0384128917412283, "grad_norm": 0.5568518405959492, "learning_rate": 2.4659588502008398e-06, "loss": 0.2539, "step": 22167 }, { "epoch": 1.0384597367311565, "grad_norm": 0.6131897604046376, "learning_rate": 2.4657692177317867e-06, "loss": 0.2927, "step": 22168 }, { "epoch": 1.038506581721085, "grad_norm": 0.6222086405081925, "learning_rate": 2.4655795854597234e-06, "loss": 0.2759, "step": 22169 }, { "epoch": 1.0385534267110133, "grad_norm": 0.5913138812506191, "learning_rate": 2.4653899533857405e-06, "loss": 0.281, "step": 22170 }, { "epoch": 1.0386002717009415, "grad_norm": 0.5720409767947715, "learning_rate": 2.465200321510931e-06, "loss": 0.2634, "step": 22171 }, { "epoch": 1.03864711669087, "grad_norm": 0.5703672881150117, "learning_rate": 2.4650106898363843e-06, "loss": 0.2846, "step": 22172 }, { "epoch": 1.0386939616807982, "grad_norm": 0.5926065111216968, "learning_rate": 2.4648210583631917e-06, "loss": 0.282, "step": 22173 }, { "epoch": 1.0387408066707267, "grad_norm": 0.5471374346798407, "learning_rate": 2.464631427092445e-06, "loss": 0.2556, "step": 22174 }, { "epoch": 1.038787651660655, "grad_norm": 0.6331924394773375, "learning_rate": 2.464441796025236e-06, "loss": 0.2923, "step": 22175 }, { "epoch": 1.0388344966505831, "grad_norm": 0.6270609277812775, "learning_rate": 2.464252165162655e-06, "loss": 0.303, "step": 22176 }, { "epoch": 1.0388813416405116, "grad_norm": 0.5933214900679435, "learning_rate": 2.4640625345057952e-06, "loss": 0.282, "step": 22177 }, { "epoch": 1.0389281866304398, "grad_norm": 0.6064534212573368, "learning_rate": 2.4638729040557456e-06, "loss": 0.2976, "step": 22178 }, { "epoch": 1.0389750316203683, "grad_norm": 0.598825364762325, "learning_rate": 2.4636832738135984e-06, "loss": 0.2979, "step": 22179 }, { "epoch": 1.0390218766102965, "grad_norm": 0.5906120291616601, "learning_rate": 2.4634936437804444e-06, "loss": 0.2858, "step": 22180 }, { "epoch": 1.0390687216002248, "grad_norm": 0.5800395196882936, "learning_rate": 2.4633040139573753e-06, "loss": 0.2684, "step": 22181 }, { "epoch": 1.0391155665901533, "grad_norm": 0.553879593768937, "learning_rate": 2.463114384345483e-06, "loss": 0.2513, "step": 22182 }, { "epoch": 1.0391624115800815, "grad_norm": 0.5855315772350074, "learning_rate": 2.462924754945859e-06, "loss": 0.281, "step": 22183 }, { "epoch": 1.0392092565700097, "grad_norm": 0.6206748366127907, "learning_rate": 2.462735125759592e-06, "loss": 0.2881, "step": 22184 }, { "epoch": 1.0392561015599382, "grad_norm": 0.6009841916246558, "learning_rate": 2.462545496787775e-06, "loss": 0.279, "step": 22185 }, { "epoch": 1.0393029465498664, "grad_norm": 0.605163382097917, "learning_rate": 2.4623558680315002e-06, "loss": 0.2777, "step": 22186 }, { "epoch": 1.039349791539795, "grad_norm": 0.5946985886923626, "learning_rate": 2.462166239491857e-06, "loss": 0.2731, "step": 22187 }, { "epoch": 1.0393966365297231, "grad_norm": 0.5935207594956645, "learning_rate": 2.461976611169938e-06, "loss": 0.278, "step": 22188 }, { "epoch": 1.0394434815196514, "grad_norm": 0.6326206382092777, "learning_rate": 2.461786983066835e-06, "loss": 0.2733, "step": 22189 }, { "epoch": 1.0394903265095798, "grad_norm": 0.6001823933749032, "learning_rate": 2.4615973551836377e-06, "loss": 0.2844, "step": 22190 }, { "epoch": 1.039537171499508, "grad_norm": 0.6311544746310611, "learning_rate": 2.4614077275214377e-06, "loss": 0.2914, "step": 22191 }, { "epoch": 1.0395840164894365, "grad_norm": 0.6059480465698357, "learning_rate": 2.461218100081326e-06, "loss": 0.2677, "step": 22192 }, { "epoch": 1.0396308614793648, "grad_norm": 0.5726589044800252, "learning_rate": 2.461028472864395e-06, "loss": 0.2672, "step": 22193 }, { "epoch": 1.039677706469293, "grad_norm": 0.5902832161430289, "learning_rate": 2.4608388458717354e-06, "loss": 0.2793, "step": 22194 }, { "epoch": 1.0397245514592215, "grad_norm": 0.589727808232042, "learning_rate": 2.460649219104439e-06, "loss": 0.2828, "step": 22195 }, { "epoch": 1.0397713964491497, "grad_norm": 0.6020612008008538, "learning_rate": 2.4604595925635953e-06, "loss": 0.2846, "step": 22196 }, { "epoch": 1.0398182414390782, "grad_norm": 0.6372518317861217, "learning_rate": 2.4602699662502975e-06, "loss": 0.2947, "step": 22197 }, { "epoch": 1.0398650864290064, "grad_norm": 0.5948811013805896, "learning_rate": 2.4600803401656354e-06, "loss": 0.2827, "step": 22198 }, { "epoch": 1.0399119314189347, "grad_norm": 0.6371447641557175, "learning_rate": 2.4598907143107014e-06, "loss": 0.2875, "step": 22199 }, { "epoch": 1.0399587764088631, "grad_norm": 0.5706942580904761, "learning_rate": 2.459701088686586e-06, "loss": 0.277, "step": 22200 }, { "epoch": 1.0400056213987914, "grad_norm": 0.6007327445471925, "learning_rate": 2.4595114632943813e-06, "loss": 0.2795, "step": 22201 }, { "epoch": 1.0400524663887196, "grad_norm": 0.6168761252940358, "learning_rate": 2.4593218381351773e-06, "loss": 0.2931, "step": 22202 }, { "epoch": 1.040099311378648, "grad_norm": 0.6337750377405126, "learning_rate": 2.459132213210066e-06, "loss": 0.2894, "step": 22203 }, { "epoch": 1.0401461563685763, "grad_norm": 0.582390603783147, "learning_rate": 2.4589425885201386e-06, "loss": 0.2562, "step": 22204 }, { "epoch": 1.0401930013585048, "grad_norm": 0.5921012097702394, "learning_rate": 2.458752964066486e-06, "loss": 0.2775, "step": 22205 }, { "epoch": 1.040239846348433, "grad_norm": 0.6051759319915211, "learning_rate": 2.4585633398502e-06, "loss": 0.2797, "step": 22206 }, { "epoch": 1.0402866913383613, "grad_norm": 0.5559829938523408, "learning_rate": 2.4583737158723725e-06, "loss": 0.2732, "step": 22207 }, { "epoch": 1.0403335363282897, "grad_norm": 0.6010252013536658, "learning_rate": 2.4581840921340924e-06, "loss": 0.2771, "step": 22208 }, { "epoch": 1.040380381318218, "grad_norm": 0.5486040585402532, "learning_rate": 2.4579944686364528e-06, "loss": 0.2611, "step": 22209 }, { "epoch": 1.0404272263081464, "grad_norm": 0.624931323597741, "learning_rate": 2.4578048453805445e-06, "loss": 0.2864, "step": 22210 }, { "epoch": 1.0404740712980747, "grad_norm": 0.5991135326043626, "learning_rate": 2.4576152223674585e-06, "loss": 0.2749, "step": 22211 }, { "epoch": 1.040520916288003, "grad_norm": 0.5528362325104945, "learning_rate": 2.457425599598286e-06, "loss": 0.2578, "step": 22212 }, { "epoch": 1.0405677612779314, "grad_norm": 0.6147343105973371, "learning_rate": 2.4572359770741195e-06, "loss": 0.2884, "step": 22213 }, { "epoch": 1.0406146062678596, "grad_norm": 0.616973763270423, "learning_rate": 2.457046354796049e-06, "loss": 0.2893, "step": 22214 }, { "epoch": 1.040661451257788, "grad_norm": 0.6061019014671252, "learning_rate": 2.456856732765165e-06, "loss": 0.2853, "step": 22215 }, { "epoch": 1.0407082962477163, "grad_norm": 0.5884699983937648, "learning_rate": 2.4566671109825604e-06, "loss": 0.2756, "step": 22216 }, { "epoch": 1.0407551412376446, "grad_norm": 0.5930791426656855, "learning_rate": 2.4564774894493252e-06, "loss": 0.2959, "step": 22217 }, { "epoch": 1.040801986227573, "grad_norm": 0.6474826300121516, "learning_rate": 2.4562878681665514e-06, "loss": 0.2957, "step": 22218 }, { "epoch": 1.0408488312175013, "grad_norm": 0.5782424814776941, "learning_rate": 2.4560982471353306e-06, "loss": 0.2657, "step": 22219 }, { "epoch": 1.0408956762074295, "grad_norm": 0.5520920655856313, "learning_rate": 2.4559086263567523e-06, "loss": 0.2727, "step": 22220 }, { "epoch": 1.040942521197358, "grad_norm": 0.5689947540624786, "learning_rate": 2.4557190058319095e-06, "loss": 0.277, "step": 22221 }, { "epoch": 1.0409893661872862, "grad_norm": 0.5505960727304701, "learning_rate": 2.455529385561892e-06, "loss": 0.2591, "step": 22222 }, { "epoch": 1.0410362111772147, "grad_norm": 0.5574681178905075, "learning_rate": 2.4553397655477916e-06, "loss": 0.2766, "step": 22223 }, { "epoch": 1.041083056167143, "grad_norm": 0.5906827903823731, "learning_rate": 2.4551501457907003e-06, "loss": 0.275, "step": 22224 }, { "epoch": 1.0411299011570712, "grad_norm": 0.5644340018734452, "learning_rate": 2.4549605262917096e-06, "loss": 0.2769, "step": 22225 }, { "epoch": 1.0411767461469996, "grad_norm": 0.5769012802638543, "learning_rate": 2.454770907051908e-06, "loss": 0.2683, "step": 22226 }, { "epoch": 1.0412235911369279, "grad_norm": 0.5308881551535699, "learning_rate": 2.4545812880723888e-06, "loss": 0.2481, "step": 22227 }, { "epoch": 1.0412704361268563, "grad_norm": 0.5836590140723331, "learning_rate": 2.4543916693542435e-06, "loss": 0.2749, "step": 22228 }, { "epoch": 1.0413172811167846, "grad_norm": 0.6033850822714071, "learning_rate": 2.4542020508985615e-06, "loss": 0.2614, "step": 22229 }, { "epoch": 1.0413641261067128, "grad_norm": 0.5480946932159548, "learning_rate": 2.454012432706436e-06, "loss": 0.2821, "step": 22230 }, { "epoch": 1.0414109710966413, "grad_norm": 0.5793383340604433, "learning_rate": 2.4538228147789574e-06, "loss": 0.2632, "step": 22231 }, { "epoch": 1.0414578160865695, "grad_norm": 0.5342518251815472, "learning_rate": 2.4536331971172176e-06, "loss": 0.2639, "step": 22232 }, { "epoch": 1.041504661076498, "grad_norm": 0.5936448085814473, "learning_rate": 2.453443579722306e-06, "loss": 0.2766, "step": 22233 }, { "epoch": 1.0415515060664262, "grad_norm": 0.6058188008115123, "learning_rate": 2.453253962595315e-06, "loss": 0.278, "step": 22234 }, { "epoch": 1.0415983510563545, "grad_norm": 0.5750909960270143, "learning_rate": 2.4530643457373363e-06, "loss": 0.2683, "step": 22235 }, { "epoch": 1.041645196046283, "grad_norm": 0.6194473797913758, "learning_rate": 2.4528747291494598e-06, "loss": 0.2887, "step": 22236 }, { "epoch": 1.0416920410362112, "grad_norm": 0.5910889143657079, "learning_rate": 2.4526851128327774e-06, "loss": 0.2896, "step": 22237 }, { "epoch": 1.0417388860261394, "grad_norm": 0.5607441996379543, "learning_rate": 2.4524954967883814e-06, "loss": 0.2749, "step": 22238 }, { "epoch": 1.0417857310160679, "grad_norm": 0.6087797049351392, "learning_rate": 2.4523058810173615e-06, "loss": 0.293, "step": 22239 }, { "epoch": 1.041832576005996, "grad_norm": 0.5889624608279648, "learning_rate": 2.452116265520808e-06, "loss": 0.2722, "step": 22240 }, { "epoch": 1.0418794209959246, "grad_norm": 0.5889755982987177, "learning_rate": 2.451926650299814e-06, "loss": 0.2811, "step": 22241 }, { "epoch": 1.0419262659858528, "grad_norm": 0.5935123678016522, "learning_rate": 2.4517370353554705e-06, "loss": 0.2746, "step": 22242 }, { "epoch": 1.041973110975781, "grad_norm": 0.5872245057595986, "learning_rate": 2.451547420688868e-06, "loss": 0.2906, "step": 22243 }, { "epoch": 1.0420199559657095, "grad_norm": 0.5744035264983945, "learning_rate": 2.4513578063010986e-06, "loss": 0.2591, "step": 22244 }, { "epoch": 1.0420668009556378, "grad_norm": 0.5960919233245738, "learning_rate": 2.4511681921932516e-06, "loss": 0.2697, "step": 22245 }, { "epoch": 1.0421136459455662, "grad_norm": 0.5723255020616071, "learning_rate": 2.4509785783664204e-06, "loss": 0.2738, "step": 22246 }, { "epoch": 1.0421604909354945, "grad_norm": 0.5494917845481454, "learning_rate": 2.450788964821694e-06, "loss": 0.2659, "step": 22247 }, { "epoch": 1.0422073359254227, "grad_norm": 0.5572353231176324, "learning_rate": 2.450599351560166e-06, "loss": 0.2558, "step": 22248 }, { "epoch": 1.0422541809153512, "grad_norm": 0.5686306815867692, "learning_rate": 2.450409738582925e-06, "loss": 0.2588, "step": 22249 }, { "epoch": 1.0423010259052794, "grad_norm": 0.5531548778131691, "learning_rate": 2.450220125891065e-06, "loss": 0.2862, "step": 22250 }, { "epoch": 1.0423478708952079, "grad_norm": 0.6193880644367185, "learning_rate": 2.4500305134856746e-06, "loss": 0.2856, "step": 22251 }, { "epoch": 1.042394715885136, "grad_norm": 0.5812746854863885, "learning_rate": 2.4498409013678468e-06, "loss": 0.2728, "step": 22252 }, { "epoch": 1.0424415608750643, "grad_norm": 0.5999611430459872, "learning_rate": 2.449651289538671e-06, "loss": 0.2793, "step": 22253 }, { "epoch": 1.0424884058649928, "grad_norm": 0.5830146663549309, "learning_rate": 2.4494616779992395e-06, "loss": 0.2795, "step": 22254 }, { "epoch": 1.042535250854921, "grad_norm": 0.5866461448790953, "learning_rate": 2.449272066750644e-06, "loss": 0.2809, "step": 22255 }, { "epoch": 1.0425820958448493, "grad_norm": 0.5665504011327142, "learning_rate": 2.4490824557939756e-06, "loss": 0.2787, "step": 22256 }, { "epoch": 1.0426289408347778, "grad_norm": 0.552740759796593, "learning_rate": 2.4488928451303234e-06, "loss": 0.2803, "step": 22257 }, { "epoch": 1.042675785824706, "grad_norm": 0.5486300763715315, "learning_rate": 2.4487032347607805e-06, "loss": 0.263, "step": 22258 }, { "epoch": 1.0427226308146345, "grad_norm": 0.5711027833162352, "learning_rate": 2.448513624686438e-06, "loss": 0.2722, "step": 22259 }, { "epoch": 1.0427694758045627, "grad_norm": 0.5776553843975462, "learning_rate": 2.448324014908386e-06, "loss": 0.2968, "step": 22260 }, { "epoch": 1.042816320794491, "grad_norm": 0.61297033778791, "learning_rate": 2.448134405427716e-06, "loss": 0.2718, "step": 22261 }, { "epoch": 1.0428631657844194, "grad_norm": 0.5707580981954002, "learning_rate": 2.447944796245521e-06, "loss": 0.2824, "step": 22262 }, { "epoch": 1.0429100107743476, "grad_norm": 0.6108107375499785, "learning_rate": 2.4477551873628904e-06, "loss": 0.2729, "step": 22263 }, { "epoch": 1.042956855764276, "grad_norm": 0.5744978693009599, "learning_rate": 2.4475655787809143e-06, "loss": 0.2655, "step": 22264 }, { "epoch": 1.0430037007542043, "grad_norm": 0.5681734521356445, "learning_rate": 2.4473759705006853e-06, "loss": 0.2793, "step": 22265 }, { "epoch": 1.0430505457441326, "grad_norm": 0.5991315455647889, "learning_rate": 2.447186362523295e-06, "loss": 0.2844, "step": 22266 }, { "epoch": 1.043097390734061, "grad_norm": 0.5694994691829532, "learning_rate": 2.4469967548498336e-06, "loss": 0.2636, "step": 22267 }, { "epoch": 1.0431442357239893, "grad_norm": 0.5574505180617201, "learning_rate": 2.4468071474813936e-06, "loss": 0.272, "step": 22268 }, { "epoch": 1.0431910807139178, "grad_norm": 0.6405405279330719, "learning_rate": 2.4466175404190637e-06, "loss": 0.2909, "step": 22269 }, { "epoch": 1.043237925703846, "grad_norm": 0.5758734827151425, "learning_rate": 2.446427933663937e-06, "loss": 0.2662, "step": 22270 }, { "epoch": 1.0432847706937742, "grad_norm": 0.5628975243217529, "learning_rate": 2.4462383272171036e-06, "loss": 0.2694, "step": 22271 }, { "epoch": 1.0433316156837027, "grad_norm": 0.6364455301731952, "learning_rate": 2.4460487210796554e-06, "loss": 0.2768, "step": 22272 }, { "epoch": 1.043378460673631, "grad_norm": 0.5776815626608558, "learning_rate": 2.4458591152526832e-06, "loss": 0.2797, "step": 22273 }, { "epoch": 1.0434253056635592, "grad_norm": 0.5697550681179142, "learning_rate": 2.4456695097372795e-06, "loss": 0.28, "step": 22274 }, { "epoch": 1.0434721506534876, "grad_norm": 0.6135833791126865, "learning_rate": 2.4454799045345325e-06, "loss": 0.2879, "step": 22275 }, { "epoch": 1.0435189956434159, "grad_norm": 0.5931203780261508, "learning_rate": 2.445290299645535e-06, "loss": 0.2713, "step": 22276 }, { "epoch": 1.0435658406333443, "grad_norm": 0.5877827816050988, "learning_rate": 2.4451006950713784e-06, "loss": 0.2809, "step": 22277 }, { "epoch": 1.0436126856232726, "grad_norm": 0.6104088663919076, "learning_rate": 2.444911090813153e-06, "loss": 0.2834, "step": 22278 }, { "epoch": 1.0436595306132008, "grad_norm": 0.6092853828999808, "learning_rate": 2.444721486871951e-06, "loss": 0.2837, "step": 22279 }, { "epoch": 1.0437063756031293, "grad_norm": 0.6144609856060637, "learning_rate": 2.4445318832488636e-06, "loss": 0.2637, "step": 22280 }, { "epoch": 1.0437532205930575, "grad_norm": 0.5918973956087472, "learning_rate": 2.444342279944981e-06, "loss": 0.261, "step": 22281 }, { "epoch": 1.043800065582986, "grad_norm": 0.5661407082418414, "learning_rate": 2.444152676961394e-06, "loss": 0.2862, "step": 22282 }, { "epoch": 1.0438469105729142, "grad_norm": 0.5902411420488363, "learning_rate": 2.443963074299194e-06, "loss": 0.2778, "step": 22283 }, { "epoch": 1.0438937555628425, "grad_norm": 0.569609069904018, "learning_rate": 2.443773471959473e-06, "loss": 0.282, "step": 22284 }, { "epoch": 1.043940600552771, "grad_norm": 0.5808054161315088, "learning_rate": 2.443583869943321e-06, "loss": 0.2628, "step": 22285 }, { "epoch": 1.0439874455426992, "grad_norm": 0.5616201163971111, "learning_rate": 2.443394268251831e-06, "loss": 0.259, "step": 22286 }, { "epoch": 1.0440342905326276, "grad_norm": 0.5802950641111291, "learning_rate": 2.4432046668860913e-06, "loss": 0.2697, "step": 22287 }, { "epoch": 1.0440811355225559, "grad_norm": 0.6044185960350917, "learning_rate": 2.4430150658471956e-06, "loss": 0.2976, "step": 22288 }, { "epoch": 1.0441279805124841, "grad_norm": 0.6312099348219601, "learning_rate": 2.442825465136233e-06, "loss": 0.2782, "step": 22289 }, { "epoch": 1.0441748255024126, "grad_norm": 0.5366344679822127, "learning_rate": 2.4426358647542957e-06, "loss": 0.2623, "step": 22290 }, { "epoch": 1.0442216704923408, "grad_norm": 0.5713511194021933, "learning_rate": 2.4424462647024747e-06, "loss": 0.2798, "step": 22291 }, { "epoch": 1.044268515482269, "grad_norm": 0.5623365538524423, "learning_rate": 2.4422566649818618e-06, "loss": 0.269, "step": 22292 }, { "epoch": 1.0443153604721975, "grad_norm": 0.6299928978075874, "learning_rate": 2.4420670655935458e-06, "loss": 0.2782, "step": 22293 }, { "epoch": 1.0443622054621258, "grad_norm": 0.5949781135607475, "learning_rate": 2.4418774665386203e-06, "loss": 0.2797, "step": 22294 }, { "epoch": 1.0444090504520542, "grad_norm": 0.5788975426209596, "learning_rate": 2.4416878678181745e-06, "loss": 0.2741, "step": 22295 }, { "epoch": 1.0444558954419825, "grad_norm": 0.5570665154949828, "learning_rate": 2.4414982694333008e-06, "loss": 0.2568, "step": 22296 }, { "epoch": 1.0445027404319107, "grad_norm": 0.5922202311019497, "learning_rate": 2.4413086713850896e-06, "loss": 0.2852, "step": 22297 }, { "epoch": 1.0445495854218392, "grad_norm": 0.6103828778219118, "learning_rate": 2.4411190736746324e-06, "loss": 0.2709, "step": 22298 }, { "epoch": 1.0445964304117674, "grad_norm": 0.5524875945194421, "learning_rate": 2.440929476303021e-06, "loss": 0.2737, "step": 22299 }, { "epoch": 1.0446432754016959, "grad_norm": 0.5455070998938133, "learning_rate": 2.4407398792713446e-06, "loss": 0.2501, "step": 22300 }, { "epoch": 1.0446901203916241, "grad_norm": 0.5499520858720063, "learning_rate": 2.440550282580696e-06, "loss": 0.2792, "step": 22301 }, { "epoch": 1.0447369653815524, "grad_norm": 0.5801524662155791, "learning_rate": 2.440360686232165e-06, "loss": 0.2894, "step": 22302 }, { "epoch": 1.0447838103714808, "grad_norm": 0.6023598422166876, "learning_rate": 2.440171090226843e-06, "loss": 0.2703, "step": 22303 }, { "epoch": 1.044830655361409, "grad_norm": 0.5469967723450699, "learning_rate": 2.4399814945658217e-06, "loss": 0.269, "step": 22304 }, { "epoch": 1.0448775003513375, "grad_norm": 0.5644751420715194, "learning_rate": 2.4397918992501926e-06, "loss": 0.2505, "step": 22305 }, { "epoch": 1.0449243453412658, "grad_norm": 0.5817803511759395, "learning_rate": 2.4396023042810453e-06, "loss": 0.2903, "step": 22306 }, { "epoch": 1.044971190331194, "grad_norm": 0.5572345371685882, "learning_rate": 2.439412709659471e-06, "loss": 0.2719, "step": 22307 }, { "epoch": 1.0450180353211225, "grad_norm": 0.5826278431619882, "learning_rate": 2.439223115386562e-06, "loss": 0.2848, "step": 22308 }, { "epoch": 1.0450648803110507, "grad_norm": 0.5219177724623222, "learning_rate": 2.439033521463408e-06, "loss": 0.2556, "step": 22309 }, { "epoch": 1.045111725300979, "grad_norm": 0.5788517806920471, "learning_rate": 2.438843927891101e-06, "loss": 0.2698, "step": 22310 }, { "epoch": 1.0451585702909074, "grad_norm": 0.5852270617583549, "learning_rate": 2.438654334670733e-06, "loss": 0.2646, "step": 22311 }, { "epoch": 1.0452054152808357, "grad_norm": 0.6016641000564191, "learning_rate": 2.4384647418033933e-06, "loss": 0.2916, "step": 22312 }, { "epoch": 1.0452522602707641, "grad_norm": 0.6047871738451335, "learning_rate": 2.438275149290173e-06, "loss": 0.2866, "step": 22313 }, { "epoch": 1.0452991052606924, "grad_norm": 0.5559693450880389, "learning_rate": 2.4380855571321634e-06, "loss": 0.2696, "step": 22314 }, { "epoch": 1.0453459502506206, "grad_norm": 0.5725007722786739, "learning_rate": 2.437895965330457e-06, "loss": 0.2647, "step": 22315 }, { "epoch": 1.045392795240549, "grad_norm": 0.5643852478247491, "learning_rate": 2.4377063738861427e-06, "loss": 0.2657, "step": 22316 }, { "epoch": 1.0454396402304773, "grad_norm": 0.6291940871439119, "learning_rate": 2.4375167828003136e-06, "loss": 0.2763, "step": 22317 }, { "epoch": 1.0454864852204058, "grad_norm": 0.5780510667063049, "learning_rate": 2.437327192074059e-06, "loss": 0.26, "step": 22318 }, { "epoch": 1.045533330210334, "grad_norm": 0.5924136958206546, "learning_rate": 2.437137601708471e-06, "loss": 0.2862, "step": 22319 }, { "epoch": 1.0455801752002623, "grad_norm": 0.607505560109336, "learning_rate": 2.4369480117046396e-06, "loss": 0.2858, "step": 22320 }, { "epoch": 1.0456270201901907, "grad_norm": 0.5730817464230091, "learning_rate": 2.4367584220636565e-06, "loss": 0.2756, "step": 22321 }, { "epoch": 1.045673865180119, "grad_norm": 0.6149602971732681, "learning_rate": 2.4365688327866138e-06, "loss": 0.2926, "step": 22322 }, { "epoch": 1.0457207101700474, "grad_norm": 0.5741026262212751, "learning_rate": 2.4363792438746016e-06, "loss": 0.2609, "step": 22323 }, { "epoch": 1.0457675551599757, "grad_norm": 0.5524763144547652, "learning_rate": 2.43618965532871e-06, "loss": 0.2745, "step": 22324 }, { "epoch": 1.045814400149904, "grad_norm": 0.5641856247223662, "learning_rate": 2.4360000671500307e-06, "loss": 0.2726, "step": 22325 }, { "epoch": 1.0458612451398324, "grad_norm": 0.5488199111138107, "learning_rate": 2.435810479339656e-06, "loss": 0.2596, "step": 22326 }, { "epoch": 1.0459080901297606, "grad_norm": 0.5985594394764973, "learning_rate": 2.435620891898675e-06, "loss": 0.2754, "step": 22327 }, { "epoch": 1.0459549351196888, "grad_norm": 0.6105926746947016, "learning_rate": 2.4354313048281793e-06, "loss": 0.2799, "step": 22328 }, { "epoch": 1.0460017801096173, "grad_norm": 0.6208071273880603, "learning_rate": 2.4352417181292614e-06, "loss": 0.2775, "step": 22329 }, { "epoch": 1.0460486250995455, "grad_norm": 0.5512863440661939, "learning_rate": 2.435052131803011e-06, "loss": 0.2535, "step": 22330 }, { "epoch": 1.046095470089474, "grad_norm": 0.604306820187085, "learning_rate": 2.4348625458505182e-06, "loss": 0.2737, "step": 22331 }, { "epoch": 1.0461423150794023, "grad_norm": 0.6019882426473938, "learning_rate": 2.4346729602728754e-06, "loss": 0.277, "step": 22332 }, { "epoch": 1.0461891600693305, "grad_norm": 0.6204279811366865, "learning_rate": 2.4344833750711737e-06, "loss": 0.2794, "step": 22333 }, { "epoch": 1.046236005059259, "grad_norm": 0.5923439724734889, "learning_rate": 2.4342937902465034e-06, "loss": 0.2761, "step": 22334 }, { "epoch": 1.0462828500491872, "grad_norm": 0.6188768563143566, "learning_rate": 2.4341042057999566e-06, "loss": 0.2999, "step": 22335 }, { "epoch": 1.0463296950391157, "grad_norm": 0.5838386260231239, "learning_rate": 2.4339146217326226e-06, "loss": 0.2723, "step": 22336 }, { "epoch": 1.046376540029044, "grad_norm": 0.5980118588945684, "learning_rate": 2.4337250380455937e-06, "loss": 0.2779, "step": 22337 }, { "epoch": 1.0464233850189721, "grad_norm": 0.5781807292013329, "learning_rate": 2.43353545473996e-06, "loss": 0.2717, "step": 22338 }, { "epoch": 1.0464702300089006, "grad_norm": 0.5608237978894834, "learning_rate": 2.433345871816814e-06, "loss": 0.2691, "step": 22339 }, { "epoch": 1.0465170749988288, "grad_norm": 0.5699388452043436, "learning_rate": 2.433156289277245e-06, "loss": 0.2777, "step": 22340 }, { "epoch": 1.0465639199887573, "grad_norm": 0.5865532900935219, "learning_rate": 2.4329667071223457e-06, "loss": 0.2782, "step": 22341 }, { "epoch": 1.0466107649786855, "grad_norm": 0.5941188549993626, "learning_rate": 2.4327771253532053e-06, "loss": 0.2789, "step": 22342 }, { "epoch": 1.0466576099686138, "grad_norm": 0.6062916755473737, "learning_rate": 2.4325875439709158e-06, "loss": 0.2732, "step": 22343 }, { "epoch": 1.0467044549585423, "grad_norm": 0.5798594384017369, "learning_rate": 2.432397962976568e-06, "loss": 0.2744, "step": 22344 }, { "epoch": 1.0467512999484705, "grad_norm": 0.6160330525140767, "learning_rate": 2.4322083823712523e-06, "loss": 0.2877, "step": 22345 }, { "epoch": 1.0467981449383987, "grad_norm": 0.5789041580440822, "learning_rate": 2.4320188021560616e-06, "loss": 0.2679, "step": 22346 }, { "epoch": 1.0468449899283272, "grad_norm": 0.562125147165797, "learning_rate": 2.4318292223320857e-06, "loss": 0.2649, "step": 22347 }, { "epoch": 1.0468918349182554, "grad_norm": 0.568143174266472, "learning_rate": 2.431639642900415e-06, "loss": 0.2765, "step": 22348 }, { "epoch": 1.046938679908184, "grad_norm": 0.5670349894104019, "learning_rate": 2.43145006386214e-06, "loss": 0.2723, "step": 22349 }, { "epoch": 1.0469855248981121, "grad_norm": 0.5320932534342286, "learning_rate": 2.4312604852183537e-06, "loss": 0.2708, "step": 22350 }, { "epoch": 1.0470323698880404, "grad_norm": 0.6382820568745592, "learning_rate": 2.4310709069701454e-06, "loss": 0.276, "step": 22351 }, { "epoch": 1.0470792148779688, "grad_norm": 0.5464430342233655, "learning_rate": 2.430881329118607e-06, "loss": 0.2673, "step": 22352 }, { "epoch": 1.047126059867897, "grad_norm": 0.6344334814031437, "learning_rate": 2.43069175166483e-06, "loss": 0.2729, "step": 22353 }, { "epoch": 1.0471729048578255, "grad_norm": 0.5929632155053878, "learning_rate": 2.430502174609904e-06, "loss": 0.2714, "step": 22354 }, { "epoch": 1.0472197498477538, "grad_norm": 0.6221476984070133, "learning_rate": 2.43031259795492e-06, "loss": 0.2779, "step": 22355 }, { "epoch": 1.047266594837682, "grad_norm": 0.6206259653846662, "learning_rate": 2.4301230217009695e-06, "loss": 0.3019, "step": 22356 }, { "epoch": 1.0473134398276105, "grad_norm": 0.5824265609395702, "learning_rate": 2.429933445849144e-06, "loss": 0.2836, "step": 22357 }, { "epoch": 1.0473602848175387, "grad_norm": 0.6129572802288434, "learning_rate": 2.4297438704005334e-06, "loss": 0.268, "step": 22358 }, { "epoch": 1.0474071298074672, "grad_norm": 0.5625335349056312, "learning_rate": 2.42955429535623e-06, "loss": 0.2784, "step": 22359 }, { "epoch": 1.0474539747973954, "grad_norm": 0.5860154485511734, "learning_rate": 2.4293647207173233e-06, "loss": 0.2868, "step": 22360 }, { "epoch": 1.0475008197873237, "grad_norm": 0.5835913540702912, "learning_rate": 2.429175146484905e-06, "loss": 0.2815, "step": 22361 }, { "epoch": 1.0475476647772521, "grad_norm": 0.6099007787671114, "learning_rate": 2.4289855726600654e-06, "loss": 0.2712, "step": 22362 }, { "epoch": 1.0475945097671804, "grad_norm": 0.5671893092513196, "learning_rate": 2.4287959992438963e-06, "loss": 0.2666, "step": 22363 }, { "epoch": 1.0476413547571086, "grad_norm": 0.602844912721262, "learning_rate": 2.4286064262374887e-06, "loss": 0.286, "step": 22364 }, { "epoch": 1.047688199747037, "grad_norm": 0.630318100894037, "learning_rate": 2.4284168536419323e-06, "loss": 0.2985, "step": 22365 }, { "epoch": 1.0477350447369653, "grad_norm": 0.5881846761310072, "learning_rate": 2.4282272814583207e-06, "loss": 0.2742, "step": 22366 }, { "epoch": 1.0477818897268938, "grad_norm": 0.5628187109869601, "learning_rate": 2.428037709687742e-06, "loss": 0.2632, "step": 22367 }, { "epoch": 1.047828734716822, "grad_norm": 0.6712994543972376, "learning_rate": 2.427848138331288e-06, "loss": 0.2772, "step": 22368 }, { "epoch": 1.0478755797067503, "grad_norm": 0.6092358302330698, "learning_rate": 2.4276585673900496e-06, "loss": 0.2804, "step": 22369 }, { "epoch": 1.0479224246966787, "grad_norm": 0.5920876035459672, "learning_rate": 2.427468996865118e-06, "loss": 0.2865, "step": 22370 }, { "epoch": 1.047969269686607, "grad_norm": 0.5952409144063338, "learning_rate": 2.4272794267575847e-06, "loss": 0.2704, "step": 22371 }, { "epoch": 1.0480161146765354, "grad_norm": 0.5949934428044322, "learning_rate": 2.42708985706854e-06, "loss": 0.2835, "step": 22372 }, { "epoch": 1.0480629596664637, "grad_norm": 0.5816723471012621, "learning_rate": 2.4269002877990747e-06, "loss": 0.2719, "step": 22373 }, { "epoch": 1.048109804656392, "grad_norm": 0.5795097743354853, "learning_rate": 2.4267107189502796e-06, "loss": 0.2779, "step": 22374 }, { "epoch": 1.0481566496463204, "grad_norm": 0.6567197742893779, "learning_rate": 2.426521150523246e-06, "loss": 0.2922, "step": 22375 }, { "epoch": 1.0482034946362486, "grad_norm": 0.5783496296955539, "learning_rate": 2.4263315825190643e-06, "loss": 0.2748, "step": 22376 }, { "epoch": 1.048250339626177, "grad_norm": 0.5656703524340884, "learning_rate": 2.426142014938827e-06, "loss": 0.2644, "step": 22377 }, { "epoch": 1.0482971846161053, "grad_norm": 0.6397283188197348, "learning_rate": 2.425952447783623e-06, "loss": 0.2797, "step": 22378 }, { "epoch": 1.0483440296060336, "grad_norm": 0.6522565018563337, "learning_rate": 2.4257628810545443e-06, "loss": 0.286, "step": 22379 }, { "epoch": 1.048390874595962, "grad_norm": 0.5615141670872118, "learning_rate": 2.425573314752681e-06, "loss": 0.2506, "step": 22380 }, { "epoch": 1.0484377195858903, "grad_norm": 0.5691754917151777, "learning_rate": 2.425383748879125e-06, "loss": 0.2674, "step": 22381 }, { "epoch": 1.0484845645758185, "grad_norm": 0.6058403632219951, "learning_rate": 2.425194183434967e-06, "loss": 0.2852, "step": 22382 }, { "epoch": 1.048531409565747, "grad_norm": 0.6092419921057363, "learning_rate": 2.425004618421297e-06, "loss": 0.2791, "step": 22383 }, { "epoch": 1.0485782545556752, "grad_norm": 0.5737737570235756, "learning_rate": 2.424815053839208e-06, "loss": 0.2811, "step": 22384 }, { "epoch": 1.0486250995456037, "grad_norm": 0.5841411244019412, "learning_rate": 2.424625489689788e-06, "loss": 0.2824, "step": 22385 }, { "epoch": 1.048671944535532, "grad_norm": 0.6097988785608471, "learning_rate": 2.42443592597413e-06, "loss": 0.2915, "step": 22386 }, { "epoch": 1.0487187895254602, "grad_norm": 0.6285156404450432, "learning_rate": 2.424246362693324e-06, "loss": 0.2825, "step": 22387 }, { "epoch": 1.0487656345153886, "grad_norm": 0.6505442977456961, "learning_rate": 2.4240567998484614e-06, "loss": 0.3009, "step": 22388 }, { "epoch": 1.0488124795053169, "grad_norm": 0.6680001039725205, "learning_rate": 2.4238672374406323e-06, "loss": 0.3083, "step": 22389 }, { "epoch": 1.0488593244952453, "grad_norm": 0.5888652884613195, "learning_rate": 2.423677675470929e-06, "loss": 0.2755, "step": 22390 }, { "epoch": 1.0489061694851736, "grad_norm": 0.5502837023254781, "learning_rate": 2.423488113940441e-06, "loss": 0.2725, "step": 22391 }, { "epoch": 1.0489530144751018, "grad_norm": 0.5797249613241826, "learning_rate": 2.4232985528502597e-06, "loss": 0.2812, "step": 22392 }, { "epoch": 1.0489998594650303, "grad_norm": 0.5830509074187578, "learning_rate": 2.4231089922014754e-06, "loss": 0.2816, "step": 22393 }, { "epoch": 1.0490467044549585, "grad_norm": 0.6229833765753229, "learning_rate": 2.42291943199518e-06, "loss": 0.3043, "step": 22394 }, { "epoch": 1.049093549444887, "grad_norm": 0.5725637732793672, "learning_rate": 2.422729872232464e-06, "loss": 0.2814, "step": 22395 }, { "epoch": 1.0491403944348152, "grad_norm": 0.6049044478866875, "learning_rate": 2.422540312914419e-06, "loss": 0.2805, "step": 22396 }, { "epoch": 1.0491872394247435, "grad_norm": 0.5872969904623365, "learning_rate": 2.4223507540421337e-06, "loss": 0.2796, "step": 22397 }, { "epoch": 1.049234084414672, "grad_norm": 0.6017550055759734, "learning_rate": 2.4221611956167004e-06, "loss": 0.2785, "step": 22398 }, { "epoch": 1.0492809294046002, "grad_norm": 0.590211595227411, "learning_rate": 2.4219716376392102e-06, "loss": 0.2803, "step": 22399 }, { "epoch": 1.0493277743945284, "grad_norm": 0.5700924036949891, "learning_rate": 2.4217820801107534e-06, "loss": 0.2604, "step": 22400 }, { "epoch": 1.0493746193844569, "grad_norm": 0.6220223874805073, "learning_rate": 2.421592523032421e-06, "loss": 0.2933, "step": 22401 }, { "epoch": 1.049421464374385, "grad_norm": 0.6147365928697315, "learning_rate": 2.421402966405305e-06, "loss": 0.2904, "step": 22402 }, { "epoch": 1.0494683093643136, "grad_norm": 0.6390326910997207, "learning_rate": 2.421213410230494e-06, "loss": 0.2834, "step": 22403 }, { "epoch": 1.0495151543542418, "grad_norm": 0.5722750687870128, "learning_rate": 2.4210238545090805e-06, "loss": 0.2788, "step": 22404 }, { "epoch": 1.04956199934417, "grad_norm": 0.6455221090362325, "learning_rate": 2.420834299242154e-06, "loss": 0.2947, "step": 22405 }, { "epoch": 1.0496088443340985, "grad_norm": 0.5907898021639162, "learning_rate": 2.4206447444308076e-06, "loss": 0.2902, "step": 22406 }, { "epoch": 1.0496556893240268, "grad_norm": 0.5494919732395576, "learning_rate": 2.42045519007613e-06, "loss": 0.2758, "step": 22407 }, { "epoch": 1.0497025343139552, "grad_norm": 0.5783268981279392, "learning_rate": 2.4202656361792133e-06, "loss": 0.2796, "step": 22408 }, { "epoch": 1.0497493793038835, "grad_norm": 0.5647718718680316, "learning_rate": 2.4200760827411472e-06, "loss": 0.2711, "step": 22409 }, { "epoch": 1.0497962242938117, "grad_norm": 0.5710835977391034, "learning_rate": 2.4198865297630234e-06, "loss": 0.2652, "step": 22410 }, { "epoch": 1.0498430692837402, "grad_norm": 0.5413306135996754, "learning_rate": 2.419696977245932e-06, "loss": 0.2836, "step": 22411 }, { "epoch": 1.0498899142736684, "grad_norm": 0.633438720359569, "learning_rate": 2.419507425190965e-06, "loss": 0.2869, "step": 22412 }, { "epoch": 1.0499367592635969, "grad_norm": 0.616499588956445, "learning_rate": 2.4193178735992125e-06, "loss": 0.2935, "step": 22413 }, { "epoch": 1.049983604253525, "grad_norm": 0.6176032794120843, "learning_rate": 2.419128322471766e-06, "loss": 0.2677, "step": 22414 }, { "epoch": 1.0500304492434533, "grad_norm": 0.6478799321319675, "learning_rate": 2.418938771809714e-06, "loss": 0.2854, "step": 22415 }, { "epoch": 1.0500772942333818, "grad_norm": 0.5808200145302378, "learning_rate": 2.4187492216141497e-06, "loss": 0.2733, "step": 22416 }, { "epoch": 1.05012413922331, "grad_norm": 0.5849167941818554, "learning_rate": 2.4185596718861633e-06, "loss": 0.2695, "step": 22417 }, { "epoch": 1.0501709842132383, "grad_norm": 0.5999476474275202, "learning_rate": 2.4183701226268457e-06, "loss": 0.2945, "step": 22418 }, { "epoch": 1.0502178292031668, "grad_norm": 0.5928709667871925, "learning_rate": 2.4181805738372867e-06, "loss": 0.284, "step": 22419 }, { "epoch": 1.050264674193095, "grad_norm": 0.5821558375169029, "learning_rate": 2.4179910255185797e-06, "loss": 0.2781, "step": 22420 }, { "epoch": 1.0503115191830235, "grad_norm": 0.5624894115701943, "learning_rate": 2.4178014776718125e-06, "loss": 0.2552, "step": 22421 }, { "epoch": 1.0503583641729517, "grad_norm": 0.6067638291586279, "learning_rate": 2.417611930298077e-06, "loss": 0.2726, "step": 22422 }, { "epoch": 1.05040520916288, "grad_norm": 0.5734819139492239, "learning_rate": 2.417422383398464e-06, "loss": 0.275, "step": 22423 }, { "epoch": 1.0504520541528084, "grad_norm": 0.5775572417813203, "learning_rate": 2.417232836974065e-06, "loss": 0.2719, "step": 22424 }, { "epoch": 1.0504988991427366, "grad_norm": 0.5834305277384075, "learning_rate": 2.41704329102597e-06, "loss": 0.2637, "step": 22425 }, { "epoch": 1.050545744132665, "grad_norm": 0.5834143481039294, "learning_rate": 2.416853745555271e-06, "loss": 0.2911, "step": 22426 }, { "epoch": 1.0505925891225933, "grad_norm": 0.5758057739671573, "learning_rate": 2.4166642005630565e-06, "loss": 0.2594, "step": 22427 }, { "epoch": 1.0506394341125216, "grad_norm": 0.6044039361013904, "learning_rate": 2.416474656050419e-06, "loss": 0.2863, "step": 22428 }, { "epoch": 1.05068627910245, "grad_norm": 0.5991994685012637, "learning_rate": 2.4162851120184486e-06, "loss": 0.2769, "step": 22429 }, { "epoch": 1.0507331240923783, "grad_norm": 0.5920996938315383, "learning_rate": 2.416095568468237e-06, "loss": 0.2806, "step": 22430 }, { "epoch": 1.0507799690823068, "grad_norm": 0.6327682108311621, "learning_rate": 2.4159060254008733e-06, "loss": 0.2953, "step": 22431 }, { "epoch": 1.050826814072235, "grad_norm": 0.6244938662968375, "learning_rate": 2.4157164828174493e-06, "loss": 0.2746, "step": 22432 }, { "epoch": 1.0508736590621632, "grad_norm": 0.6416428580384489, "learning_rate": 2.415526940719057e-06, "loss": 0.2913, "step": 22433 }, { "epoch": 1.0509205040520917, "grad_norm": 0.5735009557629057, "learning_rate": 2.4153373991067857e-06, "loss": 0.29, "step": 22434 }, { "epoch": 1.05096734904202, "grad_norm": 0.592345855827647, "learning_rate": 2.415147857981726e-06, "loss": 0.2794, "step": 22435 }, { "epoch": 1.0510141940319482, "grad_norm": 0.5968986622722254, "learning_rate": 2.414958317344968e-06, "loss": 0.2809, "step": 22436 }, { "epoch": 1.0510610390218766, "grad_norm": 0.5974851406607755, "learning_rate": 2.414768777197605e-06, "loss": 0.2953, "step": 22437 }, { "epoch": 1.0511078840118049, "grad_norm": 0.5714831210468239, "learning_rate": 2.4145792375407256e-06, "loss": 0.2724, "step": 22438 }, { "epoch": 1.0511547290017333, "grad_norm": 0.5841877066378723, "learning_rate": 2.414389698375422e-06, "loss": 0.2786, "step": 22439 }, { "epoch": 1.0512015739916616, "grad_norm": 0.5858105170261552, "learning_rate": 2.414200159702783e-06, "loss": 0.2917, "step": 22440 }, { "epoch": 1.0512484189815898, "grad_norm": 0.5912473845225705, "learning_rate": 2.4140106215239013e-06, "loss": 0.2926, "step": 22441 }, { "epoch": 1.0512952639715183, "grad_norm": 0.5938759732381067, "learning_rate": 2.4138210838398666e-06, "loss": 0.2781, "step": 22442 }, { "epoch": 1.0513421089614465, "grad_norm": 0.625404375146986, "learning_rate": 2.4136315466517698e-06, "loss": 0.2895, "step": 22443 }, { "epoch": 1.051388953951375, "grad_norm": 0.5743242510195513, "learning_rate": 2.413442009960702e-06, "loss": 0.2633, "step": 22444 }, { "epoch": 1.0514357989413032, "grad_norm": 0.5674412870358775, "learning_rate": 2.413252473767754e-06, "loss": 0.2565, "step": 22445 }, { "epoch": 1.0514826439312315, "grad_norm": 0.5769801938885608, "learning_rate": 2.4130629380740157e-06, "loss": 0.2781, "step": 22446 }, { "epoch": 1.05152948892116, "grad_norm": 0.5898065234881181, "learning_rate": 2.412873402880578e-06, "loss": 0.2876, "step": 22447 }, { "epoch": 1.0515763339110882, "grad_norm": 0.5825332834307502, "learning_rate": 2.4126838681885327e-06, "loss": 0.2828, "step": 22448 }, { "epoch": 1.0516231789010166, "grad_norm": 0.5937963049732428, "learning_rate": 2.412494333998969e-06, "loss": 0.2868, "step": 22449 }, { "epoch": 1.0516700238909449, "grad_norm": 0.5789983448561639, "learning_rate": 2.4123048003129785e-06, "loss": 0.2733, "step": 22450 }, { "epoch": 1.0517168688808731, "grad_norm": 0.5647010371632157, "learning_rate": 2.4121152671316532e-06, "loss": 0.2663, "step": 22451 }, { "epoch": 1.0517637138708016, "grad_norm": 0.5782528182827321, "learning_rate": 2.4119257344560816e-06, "loss": 0.2772, "step": 22452 }, { "epoch": 1.0518105588607298, "grad_norm": 0.5585694291800295, "learning_rate": 2.4117362022873546e-06, "loss": 0.26, "step": 22453 }, { "epoch": 1.051857403850658, "grad_norm": 0.5727594970342728, "learning_rate": 2.411546670626564e-06, "loss": 0.2637, "step": 22454 }, { "epoch": 1.0519042488405865, "grad_norm": 0.6327336879158305, "learning_rate": 2.4113571394748005e-06, "loss": 0.2806, "step": 22455 }, { "epoch": 1.0519510938305148, "grad_norm": 0.6238904093242198, "learning_rate": 2.4111676088331536e-06, "loss": 0.2889, "step": 22456 }, { "epoch": 1.0519979388204432, "grad_norm": 0.6289536340456643, "learning_rate": 2.410978078702716e-06, "loss": 0.2858, "step": 22457 }, { "epoch": 1.0520447838103715, "grad_norm": 0.6100772105121874, "learning_rate": 2.410788549084576e-06, "loss": 0.2769, "step": 22458 }, { "epoch": 1.0520916288002997, "grad_norm": 0.6059371030532443, "learning_rate": 2.410599019979826e-06, "loss": 0.2825, "step": 22459 }, { "epoch": 1.0521384737902282, "grad_norm": 0.6176465415451697, "learning_rate": 2.4104094913895556e-06, "loss": 0.2931, "step": 22460 }, { "epoch": 1.0521853187801564, "grad_norm": 0.6328094504980406, "learning_rate": 2.4102199633148565e-06, "loss": 0.292, "step": 22461 }, { "epoch": 1.0522321637700849, "grad_norm": 0.5637310494246182, "learning_rate": 2.410030435756819e-06, "loss": 0.267, "step": 22462 }, { "epoch": 1.0522790087600131, "grad_norm": 0.6331618859436968, "learning_rate": 2.4098409087165343e-06, "loss": 0.2846, "step": 22463 }, { "epoch": 1.0523258537499414, "grad_norm": 0.6179149869321653, "learning_rate": 2.4096513821950916e-06, "loss": 0.2888, "step": 22464 }, { "epoch": 1.0523726987398698, "grad_norm": 0.6378637838587535, "learning_rate": 2.4094618561935827e-06, "loss": 0.3015, "step": 22465 }, { "epoch": 1.052419543729798, "grad_norm": 0.5887098092908541, "learning_rate": 2.409272330713098e-06, "loss": 0.2693, "step": 22466 }, { "epoch": 1.0524663887197265, "grad_norm": 0.5987794716364534, "learning_rate": 2.409082805754728e-06, "loss": 0.264, "step": 22467 }, { "epoch": 1.0525132337096548, "grad_norm": 0.5978229570117002, "learning_rate": 2.408893281319564e-06, "loss": 0.2905, "step": 22468 }, { "epoch": 1.052560078699583, "grad_norm": 0.6053376600930388, "learning_rate": 2.408703757408697e-06, "loss": 0.2878, "step": 22469 }, { "epoch": 1.0526069236895115, "grad_norm": 0.5594687831242506, "learning_rate": 2.4085142340232164e-06, "loss": 0.2662, "step": 22470 }, { "epoch": 1.0526537686794397, "grad_norm": 0.6112640625580176, "learning_rate": 2.408324711164213e-06, "loss": 0.274, "step": 22471 }, { "epoch": 1.052700613669368, "grad_norm": 0.6284970251207345, "learning_rate": 2.4081351888327774e-06, "loss": 0.2863, "step": 22472 }, { "epoch": 1.0527474586592964, "grad_norm": 0.5961581470994527, "learning_rate": 2.4079456670300015e-06, "loss": 0.2791, "step": 22473 }, { "epoch": 1.0527943036492247, "grad_norm": 0.5954179277661164, "learning_rate": 2.407756145756975e-06, "loss": 0.2708, "step": 22474 }, { "epoch": 1.0528411486391531, "grad_norm": 0.5758831211466492, "learning_rate": 2.407566625014789e-06, "loss": 0.2852, "step": 22475 }, { "epoch": 1.0528879936290814, "grad_norm": 0.612902034293673, "learning_rate": 2.4073771048045335e-06, "loss": 0.2959, "step": 22476 }, { "epoch": 1.0529348386190096, "grad_norm": 0.5962541047911797, "learning_rate": 2.4071875851272995e-06, "loss": 0.2757, "step": 22477 }, { "epoch": 1.052981683608938, "grad_norm": 0.6216080894614838, "learning_rate": 2.4069980659841774e-06, "loss": 0.2901, "step": 22478 }, { "epoch": 1.0530285285988663, "grad_norm": 0.5861317059707647, "learning_rate": 2.4068085473762586e-06, "loss": 0.282, "step": 22479 }, { "epoch": 1.0530753735887948, "grad_norm": 0.5630990433015601, "learning_rate": 2.4066190293046323e-06, "loss": 0.2684, "step": 22480 }, { "epoch": 1.053122218578723, "grad_norm": 0.5682412186992313, "learning_rate": 2.4064295117703917e-06, "loss": 0.2676, "step": 22481 }, { "epoch": 1.0531690635686513, "grad_norm": 0.628360963124616, "learning_rate": 2.406239994774624e-06, "loss": 0.2759, "step": 22482 }, { "epoch": 1.0532159085585797, "grad_norm": 0.6024216814945036, "learning_rate": 2.4060504783184224e-06, "loss": 0.2737, "step": 22483 }, { "epoch": 1.053262753548508, "grad_norm": 0.5531280195072991, "learning_rate": 2.405860962402876e-06, "loss": 0.2727, "step": 22484 }, { "epoch": 1.0533095985384364, "grad_norm": 0.5989670786020234, "learning_rate": 2.405671447029076e-06, "loss": 0.2807, "step": 22485 }, { "epoch": 1.0533564435283647, "grad_norm": 0.6025075353874703, "learning_rate": 2.4054819321981144e-06, "loss": 0.2824, "step": 22486 }, { "epoch": 1.053403288518293, "grad_norm": 0.5916667550345748, "learning_rate": 2.4052924179110805e-06, "loss": 0.2826, "step": 22487 }, { "epoch": 1.0534501335082214, "grad_norm": 0.617550880961253, "learning_rate": 2.405102904169064e-06, "loss": 0.2816, "step": 22488 }, { "epoch": 1.0534969784981496, "grad_norm": 0.6086942736458177, "learning_rate": 2.4049133909731563e-06, "loss": 0.27, "step": 22489 }, { "epoch": 1.0535438234880778, "grad_norm": 0.6006209576228985, "learning_rate": 2.404723878324449e-06, "loss": 0.2862, "step": 22490 }, { "epoch": 1.0535906684780063, "grad_norm": 0.5615759170939177, "learning_rate": 2.4045343662240304e-06, "loss": 0.2544, "step": 22491 }, { "epoch": 1.0536375134679345, "grad_norm": 0.581505793727037, "learning_rate": 2.4043448546729934e-06, "loss": 0.2759, "step": 22492 }, { "epoch": 1.053684358457863, "grad_norm": 0.5810863369975535, "learning_rate": 2.4041553436724286e-06, "loss": 0.2729, "step": 22493 }, { "epoch": 1.0537312034477913, "grad_norm": 0.5371558162424038, "learning_rate": 2.403965833223425e-06, "loss": 0.2644, "step": 22494 }, { "epoch": 1.0537780484377195, "grad_norm": 0.5615794408260659, "learning_rate": 2.4037763233270732e-06, "loss": 0.2656, "step": 22495 }, { "epoch": 1.053824893427648, "grad_norm": 0.5750755154184418, "learning_rate": 2.403586813984465e-06, "loss": 0.2784, "step": 22496 }, { "epoch": 1.0538717384175762, "grad_norm": 0.6345307488078564, "learning_rate": 2.4033973051966904e-06, "loss": 0.2808, "step": 22497 }, { "epoch": 1.0539185834075047, "grad_norm": 0.6073830772690382, "learning_rate": 2.40320779696484e-06, "loss": 0.3004, "step": 22498 }, { "epoch": 1.053965428397433, "grad_norm": 0.5952219797627155, "learning_rate": 2.4030182892900053e-06, "loss": 0.2582, "step": 22499 }, { "epoch": 1.0540122733873611, "grad_norm": 0.6000690865970912, "learning_rate": 2.4028287821732747e-06, "loss": 0.2719, "step": 22500 }, { "epoch": 1.0540591183772896, "grad_norm": 0.585508041100718, "learning_rate": 2.402639275615741e-06, "loss": 0.2859, "step": 22501 }, { "epoch": 1.0541059633672178, "grad_norm": 0.6274897700908173, "learning_rate": 2.402449769618493e-06, "loss": 0.2737, "step": 22502 }, { "epoch": 1.0541528083571463, "grad_norm": 0.5588939366712038, "learning_rate": 2.402260264182622e-06, "loss": 0.2737, "step": 22503 }, { "epoch": 1.0541996533470746, "grad_norm": 0.5467473506949558, "learning_rate": 2.4020707593092193e-06, "loss": 0.2679, "step": 22504 }, { "epoch": 1.0542464983370028, "grad_norm": 0.612452229556374, "learning_rate": 2.401881254999374e-06, "loss": 0.2835, "step": 22505 }, { "epoch": 1.0542933433269313, "grad_norm": 0.5997475740468097, "learning_rate": 2.4016917512541786e-06, "loss": 0.2821, "step": 22506 }, { "epoch": 1.0543401883168595, "grad_norm": 0.5672656290020637, "learning_rate": 2.4015022480747218e-06, "loss": 0.2633, "step": 22507 }, { "epoch": 1.0543870333067877, "grad_norm": 0.550265590955623, "learning_rate": 2.4013127454620948e-06, "loss": 0.2702, "step": 22508 }, { "epoch": 1.0544338782967162, "grad_norm": 0.6031409837615344, "learning_rate": 2.4011232434173877e-06, "loss": 0.2846, "step": 22509 }, { "epoch": 1.0544807232866444, "grad_norm": 0.5581807400212142, "learning_rate": 2.4009337419416915e-06, "loss": 0.2613, "step": 22510 }, { "epoch": 1.054527568276573, "grad_norm": 0.579450941716153, "learning_rate": 2.400744241036097e-06, "loss": 0.2683, "step": 22511 }, { "epoch": 1.0545744132665011, "grad_norm": 0.6233650616085781, "learning_rate": 2.4005547407016957e-06, "loss": 0.2778, "step": 22512 }, { "epoch": 1.0546212582564294, "grad_norm": 0.6412218836551908, "learning_rate": 2.400365240939575e-06, "loss": 0.2615, "step": 22513 }, { "epoch": 1.0546681032463578, "grad_norm": 0.6015231224566322, "learning_rate": 2.400175741750828e-06, "loss": 0.2914, "step": 22514 }, { "epoch": 1.054714948236286, "grad_norm": 0.639246619526234, "learning_rate": 2.3999862431365447e-06, "loss": 0.2756, "step": 22515 }, { "epoch": 1.0547617932262146, "grad_norm": 0.5464107764663697, "learning_rate": 2.3997967450978148e-06, "loss": 0.2653, "step": 22516 }, { "epoch": 1.0548086382161428, "grad_norm": 0.6419651936609614, "learning_rate": 2.3996072476357304e-06, "loss": 0.2973, "step": 22517 }, { "epoch": 1.054855483206071, "grad_norm": 0.6265309081077433, "learning_rate": 2.399417750751381e-06, "loss": 0.2899, "step": 22518 }, { "epoch": 1.0549023281959995, "grad_norm": 0.5905619259446546, "learning_rate": 2.3992282544458566e-06, "loss": 0.2887, "step": 22519 }, { "epoch": 1.0549491731859277, "grad_norm": 0.582509537033304, "learning_rate": 2.3990387587202482e-06, "loss": 0.2658, "step": 22520 }, { "epoch": 1.0549960181758562, "grad_norm": 0.5493382160128014, "learning_rate": 2.3988492635756464e-06, "loss": 0.2747, "step": 22521 }, { "epoch": 1.0550428631657844, "grad_norm": 0.6017795991178869, "learning_rate": 2.398659769013142e-06, "loss": 0.2644, "step": 22522 }, { "epoch": 1.0550897081557127, "grad_norm": 0.5580294041103155, "learning_rate": 2.3984702750338244e-06, "loss": 0.2746, "step": 22523 }, { "epoch": 1.0551365531456411, "grad_norm": 0.5949630315269453, "learning_rate": 2.398280781638786e-06, "loss": 0.275, "step": 22524 }, { "epoch": 1.0551833981355694, "grad_norm": 0.6559996870580218, "learning_rate": 2.3980912888291154e-06, "loss": 0.2765, "step": 22525 }, { "epoch": 1.0552302431254976, "grad_norm": 0.6650326397393621, "learning_rate": 2.3979017966059044e-06, "loss": 0.2881, "step": 22526 }, { "epoch": 1.055277088115426, "grad_norm": 0.5828718146698371, "learning_rate": 2.397712304970242e-06, "loss": 0.2612, "step": 22527 }, { "epoch": 1.0553239331053543, "grad_norm": 0.5470673746073965, "learning_rate": 2.3975228139232206e-06, "loss": 0.2688, "step": 22528 }, { "epoch": 1.0553707780952828, "grad_norm": 0.6017202407796315, "learning_rate": 2.397333323465929e-06, "loss": 0.2909, "step": 22529 }, { "epoch": 1.055417623085211, "grad_norm": 0.5308675803705346, "learning_rate": 2.397143833599459e-06, "loss": 0.2583, "step": 22530 }, { "epoch": 1.0554644680751393, "grad_norm": 0.5711535776174408, "learning_rate": 2.3969543443248995e-06, "loss": 0.2755, "step": 22531 }, { "epoch": 1.0555113130650677, "grad_norm": 0.5638378210819333, "learning_rate": 2.3967648556433424e-06, "loss": 0.2596, "step": 22532 }, { "epoch": 1.055558158054996, "grad_norm": 0.5963095231791488, "learning_rate": 2.396575367555877e-06, "loss": 0.2739, "step": 22533 }, { "epoch": 1.0556050030449244, "grad_norm": 0.558783047975154, "learning_rate": 2.3963858800635945e-06, "loss": 0.254, "step": 22534 }, { "epoch": 1.0556518480348527, "grad_norm": 0.5435442076082592, "learning_rate": 2.3961963931675854e-06, "loss": 0.2608, "step": 22535 }, { "epoch": 1.055698693024781, "grad_norm": 0.6138375194466404, "learning_rate": 2.396006906868941e-06, "loss": 0.2827, "step": 22536 }, { "epoch": 1.0557455380147094, "grad_norm": 0.5841050841076524, "learning_rate": 2.3958174211687493e-06, "loss": 0.2721, "step": 22537 }, { "epoch": 1.0557923830046376, "grad_norm": 0.6233635315583187, "learning_rate": 2.3956279360681024e-06, "loss": 0.2899, "step": 22538 }, { "epoch": 1.055839227994566, "grad_norm": 0.5832756869274236, "learning_rate": 2.395438451568091e-06, "loss": 0.2858, "step": 22539 }, { "epoch": 1.0558860729844943, "grad_norm": 0.6212753213600452, "learning_rate": 2.3952489676698042e-06, "loss": 0.2937, "step": 22540 }, { "epoch": 1.0559329179744226, "grad_norm": 0.6213947186062252, "learning_rate": 2.3950594843743337e-06, "loss": 0.2799, "step": 22541 }, { "epoch": 1.055979762964351, "grad_norm": 0.5821969755300802, "learning_rate": 2.39487000168277e-06, "loss": 0.2702, "step": 22542 }, { "epoch": 1.0560266079542793, "grad_norm": 0.5697908148088856, "learning_rate": 2.394680519596203e-06, "loss": 0.2819, "step": 22543 }, { "epoch": 1.0560734529442075, "grad_norm": 0.6024876390653745, "learning_rate": 2.3944910381157225e-06, "loss": 0.2825, "step": 22544 }, { "epoch": 1.056120297934136, "grad_norm": 0.6129940541189965, "learning_rate": 2.3943015572424193e-06, "loss": 0.2597, "step": 22545 }, { "epoch": 1.0561671429240642, "grad_norm": 0.5496428250736992, "learning_rate": 2.394112076977385e-06, "loss": 0.2744, "step": 22546 }, { "epoch": 1.0562139879139927, "grad_norm": 0.5862910181037596, "learning_rate": 2.393922597321708e-06, "loss": 0.2855, "step": 22547 }, { "epoch": 1.056260832903921, "grad_norm": 0.6131290324811555, "learning_rate": 2.3937331182764814e-06, "loss": 0.2899, "step": 22548 }, { "epoch": 1.0563076778938492, "grad_norm": 0.6047486100001115, "learning_rate": 2.3935436398427927e-06, "loss": 0.2725, "step": 22549 }, { "epoch": 1.0563545228837776, "grad_norm": 0.6500759053971981, "learning_rate": 2.3933541620217342e-06, "loss": 0.2832, "step": 22550 }, { "epoch": 1.0564013678737059, "grad_norm": 0.5684185436610253, "learning_rate": 2.393164684814395e-06, "loss": 0.2592, "step": 22551 }, { "epoch": 1.0564482128636343, "grad_norm": 0.5900732513830866, "learning_rate": 2.3929752082218664e-06, "loss": 0.2865, "step": 22552 }, { "epoch": 1.0564950578535626, "grad_norm": 0.5737087083287603, "learning_rate": 2.392785732245239e-06, "loss": 0.2689, "step": 22553 }, { "epoch": 1.0565419028434908, "grad_norm": 0.599229427437449, "learning_rate": 2.392596256885603e-06, "loss": 0.2929, "step": 22554 }, { "epoch": 1.0565887478334193, "grad_norm": 0.6356412003775429, "learning_rate": 2.392406782144048e-06, "loss": 0.2774, "step": 22555 }, { "epoch": 1.0566355928233475, "grad_norm": 0.592821935452919, "learning_rate": 2.3922173080216644e-06, "loss": 0.2758, "step": 22556 }, { "epoch": 1.056682437813276, "grad_norm": 0.5787231315038677, "learning_rate": 2.3920278345195443e-06, "loss": 0.2749, "step": 22557 }, { "epoch": 1.0567292828032042, "grad_norm": 0.5982081460279102, "learning_rate": 2.3918383616387758e-06, "loss": 0.2845, "step": 22558 }, { "epoch": 1.0567761277931325, "grad_norm": 0.5719693378916052, "learning_rate": 2.3916488893804507e-06, "loss": 0.2856, "step": 22559 }, { "epoch": 1.056822972783061, "grad_norm": 0.6053281326426865, "learning_rate": 2.39145941774566e-06, "loss": 0.302, "step": 22560 }, { "epoch": 1.0568698177729892, "grad_norm": 0.6249414050171123, "learning_rate": 2.391269946735492e-06, "loss": 0.2824, "step": 22561 }, { "epoch": 1.0569166627629174, "grad_norm": 0.6110884493305667, "learning_rate": 2.3910804763510386e-06, "loss": 0.2807, "step": 22562 }, { "epoch": 1.0569635077528459, "grad_norm": 0.5979753182971894, "learning_rate": 2.390891006593389e-06, "loss": 0.2828, "step": 22563 }, { "epoch": 1.057010352742774, "grad_norm": 0.6601112404520804, "learning_rate": 2.3907015374636347e-06, "loss": 0.2727, "step": 22564 }, { "epoch": 1.0570571977327026, "grad_norm": 0.612036245794568, "learning_rate": 2.390512068962865e-06, "loss": 0.2894, "step": 22565 }, { "epoch": 1.0571040427226308, "grad_norm": 0.5840971139547835, "learning_rate": 2.3903226010921724e-06, "loss": 0.282, "step": 22566 }, { "epoch": 1.057150887712559, "grad_norm": 0.5688822841371657, "learning_rate": 2.390133133852644e-06, "loss": 0.2622, "step": 22567 }, { "epoch": 1.0571977327024875, "grad_norm": 0.5962478543199095, "learning_rate": 2.389943667245373e-06, "loss": 0.2762, "step": 22568 }, { "epoch": 1.0572445776924158, "grad_norm": 0.6023086378350919, "learning_rate": 2.3897542012714476e-06, "loss": 0.2854, "step": 22569 }, { "epoch": 1.0572914226823442, "grad_norm": 0.6461596679198882, "learning_rate": 2.3895647359319595e-06, "loss": 0.2956, "step": 22570 }, { "epoch": 1.0573382676722725, "grad_norm": 0.5842384705710223, "learning_rate": 2.389375271227998e-06, "loss": 0.2711, "step": 22571 }, { "epoch": 1.0573851126622007, "grad_norm": 0.5475112334768388, "learning_rate": 2.3891858071606544e-06, "loss": 0.2636, "step": 22572 }, { "epoch": 1.0574319576521292, "grad_norm": 0.5597213207837362, "learning_rate": 2.3889963437310194e-06, "loss": 0.2722, "step": 22573 }, { "epoch": 1.0574788026420574, "grad_norm": 0.5849217551938408, "learning_rate": 2.3888068809401823e-06, "loss": 0.2529, "step": 22574 }, { "epoch": 1.0575256476319859, "grad_norm": 0.5492786859033778, "learning_rate": 2.388617418789233e-06, "loss": 0.2645, "step": 22575 }, { "epoch": 1.057572492621914, "grad_norm": 0.5517185313699003, "learning_rate": 2.3884279572792625e-06, "loss": 0.2627, "step": 22576 }, { "epoch": 1.0576193376118423, "grad_norm": 0.5895503100861703, "learning_rate": 2.3882384964113613e-06, "loss": 0.2688, "step": 22577 }, { "epoch": 1.0576661826017708, "grad_norm": 0.5550580246721455, "learning_rate": 2.388049036186619e-06, "loss": 0.2781, "step": 22578 }, { "epoch": 1.057713027591699, "grad_norm": 0.6143752419179888, "learning_rate": 2.387859576606128e-06, "loss": 0.2772, "step": 22579 }, { "epoch": 1.0577598725816273, "grad_norm": 0.605815879578864, "learning_rate": 2.387670117670975e-06, "loss": 0.2793, "step": 22580 }, { "epoch": 1.0578067175715558, "grad_norm": 0.6157427992524696, "learning_rate": 2.3874806593822535e-06, "loss": 0.2739, "step": 22581 }, { "epoch": 1.057853562561484, "grad_norm": 0.6106991679417312, "learning_rate": 2.3872912017410515e-06, "loss": 0.2943, "step": 22582 }, { "epoch": 1.0579004075514125, "grad_norm": 0.6065419798429337, "learning_rate": 2.3871017447484603e-06, "loss": 0.2903, "step": 22583 }, { "epoch": 1.0579472525413407, "grad_norm": 0.5807329206584682, "learning_rate": 2.3869122884055713e-06, "loss": 0.2776, "step": 22584 }, { "epoch": 1.057994097531269, "grad_norm": 0.5726358353403496, "learning_rate": 2.386722832713474e-06, "loss": 0.3044, "step": 22585 }, { "epoch": 1.0580409425211974, "grad_norm": 0.5686709894204502, "learning_rate": 2.3865333776732567e-06, "loss": 0.2685, "step": 22586 }, { "epoch": 1.0580877875111256, "grad_norm": 0.5728938435923216, "learning_rate": 2.386343923286012e-06, "loss": 0.2728, "step": 22587 }, { "epoch": 1.058134632501054, "grad_norm": 0.593904897798539, "learning_rate": 2.386154469552829e-06, "loss": 0.2707, "step": 22588 }, { "epoch": 1.0581814774909823, "grad_norm": 0.5730602985604731, "learning_rate": 2.3859650164747987e-06, "loss": 0.2761, "step": 22589 }, { "epoch": 1.0582283224809106, "grad_norm": 0.6326260354903992, "learning_rate": 2.385775564053011e-06, "loss": 0.2899, "step": 22590 }, { "epoch": 1.058275167470839, "grad_norm": 0.5977226932894097, "learning_rate": 2.3855861122885575e-06, "loss": 0.2712, "step": 22591 }, { "epoch": 1.0583220124607673, "grad_norm": 0.6066274431962583, "learning_rate": 2.385396661182526e-06, "loss": 0.2842, "step": 22592 }, { "epoch": 1.0583688574506958, "grad_norm": 0.5877556684085709, "learning_rate": 2.385207210736008e-06, "loss": 0.2791, "step": 22593 }, { "epoch": 1.058415702440624, "grad_norm": 0.5897181656225553, "learning_rate": 2.385017760950093e-06, "loss": 0.2835, "step": 22594 }, { "epoch": 1.0584625474305522, "grad_norm": 0.6139387392288362, "learning_rate": 2.3848283118258727e-06, "loss": 0.2739, "step": 22595 }, { "epoch": 1.0585093924204807, "grad_norm": 0.604370322935605, "learning_rate": 2.3846388633644364e-06, "loss": 0.29, "step": 22596 }, { "epoch": 1.058556237410409, "grad_norm": 0.623595749856401, "learning_rate": 2.384449415566875e-06, "loss": 0.2895, "step": 22597 }, { "epoch": 1.0586030824003372, "grad_norm": 0.5864225142728349, "learning_rate": 2.384259968434277e-06, "loss": 0.2596, "step": 22598 }, { "epoch": 1.0586499273902656, "grad_norm": 0.5604169509305086, "learning_rate": 2.384070521967735e-06, "loss": 0.287, "step": 22599 }, { "epoch": 1.0586967723801939, "grad_norm": 0.6177997316625056, "learning_rate": 2.383881076168337e-06, "loss": 0.2818, "step": 22600 }, { "epoch": 1.0587436173701223, "grad_norm": 0.5652615323826006, "learning_rate": 2.383691631037174e-06, "loss": 0.2651, "step": 22601 }, { "epoch": 1.0587904623600506, "grad_norm": 0.627727236787238, "learning_rate": 2.3835021865753376e-06, "loss": 0.2762, "step": 22602 }, { "epoch": 1.0588373073499788, "grad_norm": 0.5725812494830761, "learning_rate": 2.3833127427839167e-06, "loss": 0.2635, "step": 22603 }, { "epoch": 1.0588841523399073, "grad_norm": 0.6187229421513782, "learning_rate": 2.383123299664001e-06, "loss": 0.2968, "step": 22604 }, { "epoch": 1.0589309973298355, "grad_norm": 0.5797421208433136, "learning_rate": 2.382933857216681e-06, "loss": 0.2689, "step": 22605 }, { "epoch": 1.058977842319764, "grad_norm": 0.5810707124938606, "learning_rate": 2.382744415443048e-06, "loss": 0.2776, "step": 22606 }, { "epoch": 1.0590246873096922, "grad_norm": 0.5343951646400011, "learning_rate": 2.382554974344191e-06, "loss": 0.2627, "step": 22607 }, { "epoch": 1.0590715322996205, "grad_norm": 0.5722358026139001, "learning_rate": 2.3823655339212005e-06, "loss": 0.2801, "step": 22608 }, { "epoch": 1.059118377289549, "grad_norm": 0.5845507626960996, "learning_rate": 2.382176094175168e-06, "loss": 0.2798, "step": 22609 }, { "epoch": 1.0591652222794772, "grad_norm": 0.6165631846639787, "learning_rate": 2.3819866551071816e-06, "loss": 0.2843, "step": 22610 }, { "epoch": 1.0592120672694056, "grad_norm": 0.5542810986468429, "learning_rate": 2.381797216718332e-06, "loss": 0.271, "step": 22611 }, { "epoch": 1.0592589122593339, "grad_norm": 0.5388370888466724, "learning_rate": 2.3816077790097097e-06, "loss": 0.2709, "step": 22612 }, { "epoch": 1.0593057572492621, "grad_norm": 0.5865653192253261, "learning_rate": 2.381418341982405e-06, "loss": 0.2755, "step": 22613 }, { "epoch": 1.0593526022391906, "grad_norm": 0.644560319381035, "learning_rate": 2.3812289056375082e-06, "loss": 0.2879, "step": 22614 }, { "epoch": 1.0593994472291188, "grad_norm": 0.5900938247320741, "learning_rate": 2.38103946997611e-06, "loss": 0.2634, "step": 22615 }, { "epoch": 1.059446292219047, "grad_norm": 0.5586857756850062, "learning_rate": 2.3808500349992983e-06, "loss": 0.2778, "step": 22616 }, { "epoch": 1.0594931372089755, "grad_norm": 0.5847501505311113, "learning_rate": 2.3806606007081654e-06, "loss": 0.2805, "step": 22617 }, { "epoch": 1.0595399821989038, "grad_norm": 0.6137162841660475, "learning_rate": 2.3804711671038e-06, "loss": 0.2752, "step": 22618 }, { "epoch": 1.0595868271888322, "grad_norm": 0.6472402511489529, "learning_rate": 2.380281734187294e-06, "loss": 0.3018, "step": 22619 }, { "epoch": 1.0596336721787605, "grad_norm": 0.5637752319418293, "learning_rate": 2.380092301959736e-06, "loss": 0.2769, "step": 22620 }, { "epoch": 1.0596805171686887, "grad_norm": 0.5663339702260901, "learning_rate": 2.3799028704222174e-06, "loss": 0.271, "step": 22621 }, { "epoch": 1.0597273621586172, "grad_norm": 0.6314634071863878, "learning_rate": 2.3797134395758266e-06, "loss": 0.2892, "step": 22622 }, { "epoch": 1.0597742071485454, "grad_norm": 0.563682346881688, "learning_rate": 2.3795240094216555e-06, "loss": 0.281, "step": 22623 }, { "epoch": 1.0598210521384739, "grad_norm": 0.558168024355747, "learning_rate": 2.379334579960793e-06, "loss": 0.2732, "step": 22624 }, { "epoch": 1.0598678971284021, "grad_norm": 0.566954520133046, "learning_rate": 2.379145151194329e-06, "loss": 0.2547, "step": 22625 }, { "epoch": 1.0599147421183304, "grad_norm": 0.5539044116089513, "learning_rate": 2.3789557231233555e-06, "loss": 0.2572, "step": 22626 }, { "epoch": 1.0599615871082588, "grad_norm": 0.5871935590758137, "learning_rate": 2.3787662957489616e-06, "loss": 0.269, "step": 22627 }, { "epoch": 1.060008432098187, "grad_norm": 0.5600090733183042, "learning_rate": 2.378576869072236e-06, "loss": 0.2673, "step": 22628 }, { "epoch": 1.0600552770881155, "grad_norm": 0.6004391736330787, "learning_rate": 2.3783874430942704e-06, "loss": 0.2909, "step": 22629 }, { "epoch": 1.0601021220780438, "grad_norm": 0.5834523976515191, "learning_rate": 2.3781980178161546e-06, "loss": 0.2855, "step": 22630 }, { "epoch": 1.060148967067972, "grad_norm": 0.5829570186024671, "learning_rate": 2.378008593238978e-06, "loss": 0.2794, "step": 22631 }, { "epoch": 1.0601958120579005, "grad_norm": 0.5994558579266878, "learning_rate": 2.3778191693638315e-06, "loss": 0.2807, "step": 22632 }, { "epoch": 1.0602426570478287, "grad_norm": 0.5552963392518602, "learning_rate": 2.3776297461918064e-06, "loss": 0.2571, "step": 22633 }, { "epoch": 1.060289502037757, "grad_norm": 0.5583220980726846, "learning_rate": 2.3774403237239906e-06, "loss": 0.2758, "step": 22634 }, { "epoch": 1.0603363470276854, "grad_norm": 0.5902957405095205, "learning_rate": 2.377250901961474e-06, "loss": 0.2697, "step": 22635 }, { "epoch": 1.0603831920176137, "grad_norm": 0.5985178159988604, "learning_rate": 2.377061480905348e-06, "loss": 0.2704, "step": 22636 }, { "epoch": 1.0604300370075421, "grad_norm": 0.5848658373758111, "learning_rate": 2.3768720605567026e-06, "loss": 0.2653, "step": 22637 }, { "epoch": 1.0604768819974704, "grad_norm": 0.6228963207817674, "learning_rate": 2.376682640916627e-06, "loss": 0.281, "step": 22638 }, { "epoch": 1.0605237269873986, "grad_norm": 0.5892979950393387, "learning_rate": 2.3764932219862122e-06, "loss": 0.2737, "step": 22639 }, { "epoch": 1.060570571977327, "grad_norm": 0.5328115985243115, "learning_rate": 2.3763038037665487e-06, "loss": 0.2563, "step": 22640 }, { "epoch": 1.0606174169672553, "grad_norm": 0.6394529237719697, "learning_rate": 2.3761143862587252e-06, "loss": 0.2982, "step": 22641 }, { "epoch": 1.0606642619571838, "grad_norm": 0.5918146755353098, "learning_rate": 2.3759249694638313e-06, "loss": 0.2629, "step": 22642 }, { "epoch": 1.060711106947112, "grad_norm": 0.6039384086587767, "learning_rate": 2.375735553382959e-06, "loss": 0.2652, "step": 22643 }, { "epoch": 1.0607579519370403, "grad_norm": 0.5626240528472366, "learning_rate": 2.3755461380171967e-06, "loss": 0.256, "step": 22644 }, { "epoch": 1.0608047969269687, "grad_norm": 0.5720147494558728, "learning_rate": 2.375356723367635e-06, "loss": 0.2798, "step": 22645 }, { "epoch": 1.060851641916897, "grad_norm": 0.6116887252046355, "learning_rate": 2.3751673094353657e-06, "loss": 0.276, "step": 22646 }, { "epoch": 1.0608984869068254, "grad_norm": 0.607221500843869, "learning_rate": 2.374977896221476e-06, "loss": 0.2891, "step": 22647 }, { "epoch": 1.0609453318967537, "grad_norm": 0.5838770184214952, "learning_rate": 2.374788483727057e-06, "loss": 0.2774, "step": 22648 }, { "epoch": 1.060992176886682, "grad_norm": 0.5900367400711588, "learning_rate": 2.374599071953198e-06, "loss": 0.2739, "step": 22649 }, { "epoch": 1.0610390218766104, "grad_norm": 0.602007970377346, "learning_rate": 2.3744096609009906e-06, "loss": 0.2914, "step": 22650 }, { "epoch": 1.0610858668665386, "grad_norm": 0.6024650189842552, "learning_rate": 2.3742202505715245e-06, "loss": 0.2849, "step": 22651 }, { "epoch": 1.0611327118564668, "grad_norm": 0.5827346767643347, "learning_rate": 2.3740308409658896e-06, "loss": 0.2588, "step": 22652 }, { "epoch": 1.0611795568463953, "grad_norm": 0.6004485751051188, "learning_rate": 2.3738414320851742e-06, "loss": 0.2864, "step": 22653 }, { "epoch": 1.0612264018363236, "grad_norm": 0.5816973296051716, "learning_rate": 2.37365202393047e-06, "loss": 0.2715, "step": 22654 }, { "epoch": 1.061273246826252, "grad_norm": 0.5643691311327356, "learning_rate": 2.3734626165028675e-06, "loss": 0.2724, "step": 22655 }, { "epoch": 1.0613200918161803, "grad_norm": 0.6218558175870142, "learning_rate": 2.3732732098034547e-06, "loss": 0.2926, "step": 22656 }, { "epoch": 1.0613669368061085, "grad_norm": 0.6674665153089724, "learning_rate": 2.3730838038333233e-06, "loss": 0.2929, "step": 22657 }, { "epoch": 1.061413781796037, "grad_norm": 0.5585549481087909, "learning_rate": 2.372894398593563e-06, "loss": 0.2775, "step": 22658 }, { "epoch": 1.0614606267859652, "grad_norm": 0.6166660792712705, "learning_rate": 2.3727049940852635e-06, "loss": 0.2867, "step": 22659 }, { "epoch": 1.0615074717758937, "grad_norm": 0.5749584464340356, "learning_rate": 2.372515590309514e-06, "loss": 0.2729, "step": 22660 }, { "epoch": 1.061554316765822, "grad_norm": 0.6113192092804045, "learning_rate": 2.3723261872674057e-06, "loss": 0.3021, "step": 22661 }, { "epoch": 1.0616011617557501, "grad_norm": 0.57051133599295, "learning_rate": 2.372136784960028e-06, "loss": 0.2634, "step": 22662 }, { "epoch": 1.0616480067456786, "grad_norm": 0.6056766012332689, "learning_rate": 2.3719473833884706e-06, "loss": 0.2691, "step": 22663 }, { "epoch": 1.0616948517356068, "grad_norm": 0.607590490289058, "learning_rate": 2.371757982553825e-06, "loss": 0.2854, "step": 22664 }, { "epoch": 1.0617416967255353, "grad_norm": 0.5794502143156655, "learning_rate": 2.371568582457179e-06, "loss": 0.2779, "step": 22665 }, { "epoch": 1.0617885417154636, "grad_norm": 0.5980887345897982, "learning_rate": 2.371379183099624e-06, "loss": 0.2957, "step": 22666 }, { "epoch": 1.0618353867053918, "grad_norm": 0.6382683034149507, "learning_rate": 2.3711897844822485e-06, "loss": 0.3013, "step": 22667 }, { "epoch": 1.0618822316953203, "grad_norm": 0.5673418649053437, "learning_rate": 2.371000386606145e-06, "loss": 0.2576, "step": 22668 }, { "epoch": 1.0619290766852485, "grad_norm": 0.6610845496260098, "learning_rate": 2.3708109894724006e-06, "loss": 0.2936, "step": 22669 }, { "epoch": 1.0619759216751767, "grad_norm": 0.5934635428079233, "learning_rate": 2.3706215930821077e-06, "loss": 0.2808, "step": 22670 }, { "epoch": 1.0620227666651052, "grad_norm": 0.5477619021086593, "learning_rate": 2.3704321974363537e-06, "loss": 0.2744, "step": 22671 }, { "epoch": 1.0620696116550334, "grad_norm": 0.5849482064789505, "learning_rate": 2.3702428025362305e-06, "loss": 0.2769, "step": 22672 }, { "epoch": 1.062116456644962, "grad_norm": 0.6115276245835032, "learning_rate": 2.370053408382827e-06, "loss": 0.277, "step": 22673 }, { "epoch": 1.0621633016348901, "grad_norm": 0.5867623242498157, "learning_rate": 2.369864014977233e-06, "loss": 0.2962, "step": 22674 }, { "epoch": 1.0622101466248184, "grad_norm": 0.6327699155800627, "learning_rate": 2.36967462232054e-06, "loss": 0.2986, "step": 22675 }, { "epoch": 1.0622569916147468, "grad_norm": 0.6011272432049755, "learning_rate": 2.3694852304138374e-06, "loss": 0.2809, "step": 22676 }, { "epoch": 1.062303836604675, "grad_norm": 0.5843639715928601, "learning_rate": 2.3692958392582127e-06, "loss": 0.2846, "step": 22677 }, { "epoch": 1.0623506815946036, "grad_norm": 0.6179053135258914, "learning_rate": 2.369106448854758e-06, "loss": 0.2855, "step": 22678 }, { "epoch": 1.0623975265845318, "grad_norm": 0.6323642718672997, "learning_rate": 2.3689170592045636e-06, "loss": 0.2946, "step": 22679 }, { "epoch": 1.06244437157446, "grad_norm": 0.6168453953046839, "learning_rate": 2.3687276703087173e-06, "loss": 0.2831, "step": 22680 }, { "epoch": 1.0624912165643885, "grad_norm": 0.5902540338008453, "learning_rate": 2.368538282168311e-06, "loss": 0.2699, "step": 22681 }, { "epoch": 1.0625380615543167, "grad_norm": 0.5565316851976351, "learning_rate": 2.3683488947844346e-06, "loss": 0.263, "step": 22682 }, { "epoch": 1.0625849065442452, "grad_norm": 0.5537631946565859, "learning_rate": 2.3681595081581767e-06, "loss": 0.2643, "step": 22683 }, { "epoch": 1.0626317515341734, "grad_norm": 0.5607737441485616, "learning_rate": 2.3679701222906274e-06, "loss": 0.2736, "step": 22684 }, { "epoch": 1.0626785965241017, "grad_norm": 0.5732922655406225, "learning_rate": 2.3677807371828764e-06, "loss": 0.2652, "step": 22685 }, { "epoch": 1.0627254415140301, "grad_norm": 0.6330438125057295, "learning_rate": 2.367591352836015e-06, "loss": 0.2869, "step": 22686 }, { "epoch": 1.0627722865039584, "grad_norm": 0.6099547263766197, "learning_rate": 2.367401969251131e-06, "loss": 0.2777, "step": 22687 }, { "epoch": 1.0628191314938866, "grad_norm": 0.611691434041569, "learning_rate": 2.367212586429317e-06, "loss": 0.2794, "step": 22688 }, { "epoch": 1.062865976483815, "grad_norm": 0.5555863656044077, "learning_rate": 2.36702320437166e-06, "loss": 0.2722, "step": 22689 }, { "epoch": 1.0629128214737433, "grad_norm": 0.6403220555618956, "learning_rate": 2.3668338230792513e-06, "loss": 0.2677, "step": 22690 }, { "epoch": 1.0629596664636718, "grad_norm": 0.5879002194185136, "learning_rate": 2.3666444425531803e-06, "loss": 0.2681, "step": 22691 }, { "epoch": 1.0630065114536, "grad_norm": 0.5912446698298762, "learning_rate": 2.3664550627945368e-06, "loss": 0.2671, "step": 22692 }, { "epoch": 1.0630533564435283, "grad_norm": 0.5817198017220493, "learning_rate": 2.3662656838044113e-06, "loss": 0.2684, "step": 22693 }, { "epoch": 1.0631002014334567, "grad_norm": 0.6154173925582924, "learning_rate": 2.366076305583894e-06, "loss": 0.2801, "step": 22694 }, { "epoch": 1.063147046423385, "grad_norm": 0.5804004419859887, "learning_rate": 2.3658869281340727e-06, "loss": 0.2665, "step": 22695 }, { "epoch": 1.0631938914133134, "grad_norm": 0.5835985636834675, "learning_rate": 2.3656975514560385e-06, "loss": 0.2761, "step": 22696 }, { "epoch": 1.0632407364032417, "grad_norm": 0.5946677079427304, "learning_rate": 2.3655081755508816e-06, "loss": 0.2883, "step": 22697 }, { "epoch": 1.06328758139317, "grad_norm": 0.6687766333027861, "learning_rate": 2.3653188004196905e-06, "loss": 0.2916, "step": 22698 }, { "epoch": 1.0633344263830984, "grad_norm": 0.6599339579218488, "learning_rate": 2.365129426063556e-06, "loss": 0.2974, "step": 22699 }, { "epoch": 1.0633812713730266, "grad_norm": 0.636362202193036, "learning_rate": 2.364940052483569e-06, "loss": 0.2913, "step": 22700 }, { "epoch": 1.0634281163629549, "grad_norm": 0.6115308526701035, "learning_rate": 2.3647506796808177e-06, "loss": 0.2817, "step": 22701 }, { "epoch": 1.0634749613528833, "grad_norm": 0.6142950064811389, "learning_rate": 2.3645613076563914e-06, "loss": 0.2805, "step": 22702 }, { "epoch": 1.0635218063428116, "grad_norm": 0.6011762597098907, "learning_rate": 2.3643719364113806e-06, "loss": 0.2852, "step": 22703 }, { "epoch": 1.06356865133274, "grad_norm": 0.5618567301134123, "learning_rate": 2.364182565946876e-06, "loss": 0.2719, "step": 22704 }, { "epoch": 1.0636154963226683, "grad_norm": 0.5866975811241578, "learning_rate": 2.363993196263966e-06, "loss": 0.2702, "step": 22705 }, { "epoch": 1.0636623413125965, "grad_norm": 0.5610639990675569, "learning_rate": 2.363803827363742e-06, "loss": 0.2628, "step": 22706 }, { "epoch": 1.063709186302525, "grad_norm": 0.6043453447313621, "learning_rate": 2.363614459247292e-06, "loss": 0.2711, "step": 22707 }, { "epoch": 1.0637560312924532, "grad_norm": 0.5936308737738581, "learning_rate": 2.3634250919157067e-06, "loss": 0.2787, "step": 22708 }, { "epoch": 1.0638028762823817, "grad_norm": 0.6202989257424821, "learning_rate": 2.363235725370075e-06, "loss": 0.2837, "step": 22709 }, { "epoch": 1.06384972127231, "grad_norm": 0.6524432092582224, "learning_rate": 2.363046359611488e-06, "loss": 0.2997, "step": 22710 }, { "epoch": 1.0638965662622382, "grad_norm": 0.6021678787497905, "learning_rate": 2.3628569946410345e-06, "loss": 0.2816, "step": 22711 }, { "epoch": 1.0639434112521666, "grad_norm": 0.5925586346407375, "learning_rate": 2.3626676304598046e-06, "loss": 0.2743, "step": 22712 }, { "epoch": 1.0639902562420949, "grad_norm": 0.5917989302493062, "learning_rate": 2.3624782670688886e-06, "loss": 0.2769, "step": 22713 }, { "epoch": 1.0640371012320233, "grad_norm": 0.60384675507552, "learning_rate": 2.3622889044693755e-06, "loss": 0.2813, "step": 22714 }, { "epoch": 1.0640839462219516, "grad_norm": 0.6161282095714883, "learning_rate": 2.362099542662354e-06, "loss": 0.2746, "step": 22715 }, { "epoch": 1.0641307912118798, "grad_norm": 0.5449542599374921, "learning_rate": 2.3619101816489155e-06, "loss": 0.2592, "step": 22716 }, { "epoch": 1.0641776362018083, "grad_norm": 0.5689052925561549, "learning_rate": 2.3617208214301494e-06, "loss": 0.2605, "step": 22717 }, { "epoch": 1.0642244811917365, "grad_norm": 0.5942760167538277, "learning_rate": 2.361531462007145e-06, "loss": 0.2702, "step": 22718 }, { "epoch": 1.064271326181665, "grad_norm": 0.5667245437148357, "learning_rate": 2.361342103380993e-06, "loss": 0.2477, "step": 22719 }, { "epoch": 1.0643181711715932, "grad_norm": 0.6145115849559875, "learning_rate": 2.3611527455527817e-06, "loss": 0.2803, "step": 22720 }, { "epoch": 1.0643650161615215, "grad_norm": 0.5673264174265379, "learning_rate": 2.3609633885236016e-06, "loss": 0.2873, "step": 22721 }, { "epoch": 1.06441186115145, "grad_norm": 0.6305624204546116, "learning_rate": 2.360774032294542e-06, "loss": 0.2766, "step": 22722 }, { "epoch": 1.0644587061413782, "grad_norm": 0.549474407086829, "learning_rate": 2.3605846768666924e-06, "loss": 0.2823, "step": 22723 }, { "epoch": 1.0645055511313064, "grad_norm": 0.5761447081404428, "learning_rate": 2.360395322241144e-06, "loss": 0.2828, "step": 22724 }, { "epoch": 1.0645523961212349, "grad_norm": 0.539107018100734, "learning_rate": 2.3602059684189856e-06, "loss": 0.2545, "step": 22725 }, { "epoch": 1.064599241111163, "grad_norm": 0.5729277936746545, "learning_rate": 2.3600166154013057e-06, "loss": 0.273, "step": 22726 }, { "epoch": 1.0646460861010916, "grad_norm": 0.5946913064468268, "learning_rate": 2.359827263189195e-06, "loss": 0.2747, "step": 22727 }, { "epoch": 1.0646929310910198, "grad_norm": 0.5848796443287468, "learning_rate": 2.3596379117837437e-06, "loss": 0.2754, "step": 22728 }, { "epoch": 1.064739776080948, "grad_norm": 0.5722971096771886, "learning_rate": 2.3594485611860407e-06, "loss": 0.2547, "step": 22729 }, { "epoch": 1.0647866210708765, "grad_norm": 0.5440724722484581, "learning_rate": 2.3592592113971754e-06, "loss": 0.2542, "step": 22730 }, { "epoch": 1.0648334660608048, "grad_norm": 0.6182921043731567, "learning_rate": 2.3590698624182396e-06, "loss": 0.2631, "step": 22731 }, { "epoch": 1.0648803110507332, "grad_norm": 0.6016840838549019, "learning_rate": 2.3588805142503205e-06, "loss": 0.2683, "step": 22732 }, { "epoch": 1.0649271560406615, "grad_norm": 0.6023266829869894, "learning_rate": 2.3586911668945078e-06, "loss": 0.2738, "step": 22733 }, { "epoch": 1.0649740010305897, "grad_norm": 0.5890067939317777, "learning_rate": 2.3585018203518924e-06, "loss": 0.2682, "step": 22734 }, { "epoch": 1.0650208460205182, "grad_norm": 0.6113245034047631, "learning_rate": 2.3583124746235637e-06, "loss": 0.2942, "step": 22735 }, { "epoch": 1.0650676910104464, "grad_norm": 0.6051841266544041, "learning_rate": 2.3581231297106105e-06, "loss": 0.2819, "step": 22736 }, { "epoch": 1.0651145360003746, "grad_norm": 0.6326122231948214, "learning_rate": 2.3579337856141243e-06, "loss": 0.2748, "step": 22737 }, { "epoch": 1.065161380990303, "grad_norm": 0.6267193522625736, "learning_rate": 2.357744442335192e-06, "loss": 0.2685, "step": 22738 }, { "epoch": 1.0652082259802313, "grad_norm": 0.5277926567880706, "learning_rate": 2.357555099874906e-06, "loss": 0.2558, "step": 22739 }, { "epoch": 1.0652550709701598, "grad_norm": 0.5879454133405361, "learning_rate": 2.3573657582343534e-06, "loss": 0.2892, "step": 22740 }, { "epoch": 1.065301915960088, "grad_norm": 0.5886386375187302, "learning_rate": 2.357176417414625e-06, "loss": 0.2652, "step": 22741 }, { "epoch": 1.0653487609500163, "grad_norm": 0.5767075719541714, "learning_rate": 2.3569870774168114e-06, "loss": 0.2748, "step": 22742 }, { "epoch": 1.0653956059399448, "grad_norm": 0.5557237414693479, "learning_rate": 2.3567977382420012e-06, "loss": 0.2615, "step": 22743 }, { "epoch": 1.065442450929873, "grad_norm": 0.6262428544651821, "learning_rate": 2.3566083998912833e-06, "loss": 0.2542, "step": 22744 }, { "epoch": 1.0654892959198015, "grad_norm": 0.5815365820923494, "learning_rate": 2.356419062365748e-06, "loss": 0.2725, "step": 22745 }, { "epoch": 1.0655361409097297, "grad_norm": 0.5873859491566003, "learning_rate": 2.3562297256664855e-06, "loss": 0.2751, "step": 22746 }, { "epoch": 1.065582985899658, "grad_norm": 0.6160951132056539, "learning_rate": 2.356040389794584e-06, "loss": 0.2742, "step": 22747 }, { "epoch": 1.0656298308895864, "grad_norm": 0.6179069243212476, "learning_rate": 2.355851054751134e-06, "loss": 0.3013, "step": 22748 }, { "epoch": 1.0656766758795146, "grad_norm": 0.5976538709482728, "learning_rate": 2.355661720537226e-06, "loss": 0.2893, "step": 22749 }, { "epoch": 1.065723520869443, "grad_norm": 0.618817150949706, "learning_rate": 2.3554723871539483e-06, "loss": 0.2732, "step": 22750 }, { "epoch": 1.0657703658593713, "grad_norm": 0.6421662479843225, "learning_rate": 2.3552830546023898e-06, "loss": 0.278, "step": 22751 }, { "epoch": 1.0658172108492996, "grad_norm": 0.5446826999001639, "learning_rate": 2.3550937228836413e-06, "loss": 0.2484, "step": 22752 }, { "epoch": 1.065864055839228, "grad_norm": 0.563595600279444, "learning_rate": 2.354904391998792e-06, "loss": 0.2661, "step": 22753 }, { "epoch": 1.0659109008291563, "grad_norm": 0.6345472370296796, "learning_rate": 2.354715061948931e-06, "loss": 0.285, "step": 22754 }, { "epoch": 1.0659577458190848, "grad_norm": 0.5723232027222024, "learning_rate": 2.35452573273515e-06, "loss": 0.2684, "step": 22755 }, { "epoch": 1.066004590809013, "grad_norm": 0.5710882500183999, "learning_rate": 2.3543364043585355e-06, "loss": 0.2565, "step": 22756 }, { "epoch": 1.0660514357989412, "grad_norm": 0.6418735509071131, "learning_rate": 2.3541470768201783e-06, "loss": 0.2867, "step": 22757 }, { "epoch": 1.0660982807888697, "grad_norm": 0.6053169447268665, "learning_rate": 2.353957750121168e-06, "loss": 0.2787, "step": 22758 }, { "epoch": 1.066145125778798, "grad_norm": 0.6134509631181568, "learning_rate": 2.3537684242625947e-06, "loss": 0.2707, "step": 22759 }, { "epoch": 1.0661919707687262, "grad_norm": 0.5948844957157889, "learning_rate": 2.3535790992455467e-06, "loss": 0.2694, "step": 22760 }, { "epoch": 1.0662388157586546, "grad_norm": 0.6675466887483595, "learning_rate": 2.3533897750711153e-06, "loss": 0.2826, "step": 22761 }, { "epoch": 1.0662856607485829, "grad_norm": 0.5659457112671469, "learning_rate": 2.353200451740388e-06, "loss": 0.2722, "step": 22762 }, { "epoch": 1.0663325057385113, "grad_norm": 0.6288012834917823, "learning_rate": 2.3530111292544557e-06, "loss": 0.2821, "step": 22763 }, { "epoch": 1.0663793507284396, "grad_norm": 0.5868369071897727, "learning_rate": 2.3528218076144065e-06, "loss": 0.2816, "step": 22764 }, { "epoch": 1.0664261957183678, "grad_norm": 0.5875769339281426, "learning_rate": 2.3526324868213315e-06, "loss": 0.2896, "step": 22765 }, { "epoch": 1.0664730407082963, "grad_norm": 0.5832848057911852, "learning_rate": 2.3524431668763193e-06, "loss": 0.2718, "step": 22766 }, { "epoch": 1.0665198856982245, "grad_norm": 0.579776775935683, "learning_rate": 2.3522538477804603e-06, "loss": 0.2866, "step": 22767 }, { "epoch": 1.066566730688153, "grad_norm": 0.6178152290766089, "learning_rate": 2.3520645295348423e-06, "loss": 0.2659, "step": 22768 }, { "epoch": 1.0666135756780812, "grad_norm": 0.60385747400925, "learning_rate": 2.351875212140556e-06, "loss": 0.2736, "step": 22769 }, { "epoch": 1.0666604206680095, "grad_norm": 0.614642986808547, "learning_rate": 2.3516858955986906e-06, "loss": 0.2885, "step": 22770 }, { "epoch": 1.066707265657938, "grad_norm": 0.6195146893776577, "learning_rate": 2.3514965799103358e-06, "loss": 0.3053, "step": 22771 }, { "epoch": 1.0667541106478662, "grad_norm": 0.5320432915933431, "learning_rate": 2.35130726507658e-06, "loss": 0.2532, "step": 22772 }, { "epoch": 1.0668009556377944, "grad_norm": 0.6294284293677633, "learning_rate": 2.351117951098515e-06, "loss": 0.277, "step": 22773 }, { "epoch": 1.0668478006277229, "grad_norm": 0.6098193997802864, "learning_rate": 2.3509286379772285e-06, "loss": 0.2928, "step": 22774 }, { "epoch": 1.0668946456176511, "grad_norm": 0.6215065338983131, "learning_rate": 2.3507393257138095e-06, "loss": 0.2864, "step": 22775 }, { "epoch": 1.0669414906075796, "grad_norm": 0.543938964595071, "learning_rate": 2.350550014309348e-06, "loss": 0.2809, "step": 22776 }, { "epoch": 1.0669883355975078, "grad_norm": 0.5988951688670496, "learning_rate": 2.350360703764934e-06, "loss": 0.2795, "step": 22777 }, { "epoch": 1.067035180587436, "grad_norm": 0.559599775108525, "learning_rate": 2.3501713940816563e-06, "loss": 0.2691, "step": 22778 }, { "epoch": 1.0670820255773645, "grad_norm": 0.6152337598186938, "learning_rate": 2.3499820852606046e-06, "loss": 0.2658, "step": 22779 }, { "epoch": 1.0671288705672928, "grad_norm": 0.6261113206824886, "learning_rate": 2.3497927773028696e-06, "loss": 0.3056, "step": 22780 }, { "epoch": 1.0671757155572212, "grad_norm": 0.6199113632686408, "learning_rate": 2.349603470209538e-06, "loss": 0.2751, "step": 22781 }, { "epoch": 1.0672225605471495, "grad_norm": 0.5867790800257519, "learning_rate": 2.349414163981701e-06, "loss": 0.2775, "step": 22782 }, { "epoch": 1.0672694055370777, "grad_norm": 0.6346630355578349, "learning_rate": 2.349224858620447e-06, "loss": 0.2926, "step": 22783 }, { "epoch": 1.0673162505270062, "grad_norm": 0.5954025489367913, "learning_rate": 2.349035554126867e-06, "loss": 0.2718, "step": 22784 }, { "epoch": 1.0673630955169344, "grad_norm": 0.6073673333677233, "learning_rate": 2.348846250502049e-06, "loss": 0.2875, "step": 22785 }, { "epoch": 1.0674099405068629, "grad_norm": 0.6190113024631411, "learning_rate": 2.3486569477470838e-06, "loss": 0.2874, "step": 22786 }, { "epoch": 1.0674567854967911, "grad_norm": 0.6119867169680457, "learning_rate": 2.3484676458630583e-06, "loss": 0.2777, "step": 22787 }, { "epoch": 1.0675036304867194, "grad_norm": 0.5965883376917747, "learning_rate": 2.3482783448510646e-06, "loss": 0.2702, "step": 22788 }, { "epoch": 1.0675504754766478, "grad_norm": 0.5989004203139441, "learning_rate": 2.34808904471219e-06, "loss": 0.279, "step": 22789 }, { "epoch": 1.067597320466576, "grad_norm": 0.5739385068473061, "learning_rate": 2.3478997454475246e-06, "loss": 0.2621, "step": 22790 }, { "epoch": 1.0676441654565045, "grad_norm": 0.6052322936694147, "learning_rate": 2.347710447058159e-06, "loss": 0.2808, "step": 22791 }, { "epoch": 1.0676910104464328, "grad_norm": 0.5777351415615724, "learning_rate": 2.3475211495451817e-06, "loss": 0.272, "step": 22792 }, { "epoch": 1.067737855436361, "grad_norm": 0.5730170700707553, "learning_rate": 2.3473318529096807e-06, "loss": 0.2666, "step": 22793 }, { "epoch": 1.0677847004262895, "grad_norm": 0.562552946169931, "learning_rate": 2.3471425571527463e-06, "loss": 0.2754, "step": 22794 }, { "epoch": 1.0678315454162177, "grad_norm": 0.5886507789155833, "learning_rate": 2.3469532622754693e-06, "loss": 0.2758, "step": 22795 }, { "epoch": 1.067878390406146, "grad_norm": 0.6432148222564426, "learning_rate": 2.3467639682789367e-06, "loss": 0.2801, "step": 22796 }, { "epoch": 1.0679252353960744, "grad_norm": 0.6114217668388914, "learning_rate": 2.34657467516424e-06, "loss": 0.2729, "step": 22797 }, { "epoch": 1.0679720803860027, "grad_norm": 0.6090160072738479, "learning_rate": 2.346385382932467e-06, "loss": 0.2652, "step": 22798 }, { "epoch": 1.0680189253759311, "grad_norm": 0.571673551972172, "learning_rate": 2.3461960915847077e-06, "loss": 0.2617, "step": 22799 }, { "epoch": 1.0680657703658594, "grad_norm": 0.6279636903490677, "learning_rate": 2.3460068011220503e-06, "loss": 0.2865, "step": 22800 }, { "epoch": 1.0681126153557876, "grad_norm": 0.611477646839407, "learning_rate": 2.3458175115455864e-06, "loss": 0.2666, "step": 22801 }, { "epoch": 1.068159460345716, "grad_norm": 0.6305132832332875, "learning_rate": 2.345628222856403e-06, "loss": 0.2787, "step": 22802 }, { "epoch": 1.0682063053356443, "grad_norm": 0.6573190141183423, "learning_rate": 2.3454389350555903e-06, "loss": 0.287, "step": 22803 }, { "epoch": 1.0682531503255728, "grad_norm": 0.5701852674325777, "learning_rate": 2.345249648144239e-06, "loss": 0.2671, "step": 22804 }, { "epoch": 1.068299995315501, "grad_norm": 0.6118311880539465, "learning_rate": 2.3450603621234354e-06, "loss": 0.2772, "step": 22805 }, { "epoch": 1.0683468403054293, "grad_norm": 0.5886271189628165, "learning_rate": 2.3448710769942717e-06, "loss": 0.2763, "step": 22806 }, { "epoch": 1.0683936852953577, "grad_norm": 0.5754532732506007, "learning_rate": 2.344681792757835e-06, "loss": 0.2719, "step": 22807 }, { "epoch": 1.068440530285286, "grad_norm": 0.5569684183051274, "learning_rate": 2.3444925094152164e-06, "loss": 0.2741, "step": 22808 }, { "epoch": 1.0684873752752142, "grad_norm": 0.5863682808446428, "learning_rate": 2.3443032269675037e-06, "loss": 0.2795, "step": 22809 }, { "epoch": 1.0685342202651427, "grad_norm": 0.5813891474050737, "learning_rate": 2.3441139454157875e-06, "loss": 0.2652, "step": 22810 }, { "epoch": 1.068581065255071, "grad_norm": 0.6074489133452617, "learning_rate": 2.343924664761156e-06, "loss": 0.2656, "step": 22811 }, { "epoch": 1.0686279102449994, "grad_norm": 0.5526673796548959, "learning_rate": 2.3437353850046985e-06, "loss": 0.2582, "step": 22812 }, { "epoch": 1.0686747552349276, "grad_norm": 0.5866960780550938, "learning_rate": 2.3435461061475047e-06, "loss": 0.2641, "step": 22813 }, { "epoch": 1.0687216002248558, "grad_norm": 0.545157095133182, "learning_rate": 2.3433568281906637e-06, "loss": 0.267, "step": 22814 }, { "epoch": 1.0687684452147843, "grad_norm": 0.529368310181016, "learning_rate": 2.3431675511352652e-06, "loss": 0.2437, "step": 22815 }, { "epoch": 1.0688152902047126, "grad_norm": 0.6002979907687764, "learning_rate": 2.3429782749823986e-06, "loss": 0.2804, "step": 22816 }, { "epoch": 1.068862135194641, "grad_norm": 0.7233330384526412, "learning_rate": 2.3427889997331513e-06, "loss": 0.3034, "step": 22817 }, { "epoch": 1.0689089801845693, "grad_norm": 0.6120336536063538, "learning_rate": 2.342599725388614e-06, "loss": 0.2758, "step": 22818 }, { "epoch": 1.0689558251744975, "grad_norm": 0.5884507249986014, "learning_rate": 2.342410451949876e-06, "loss": 0.2695, "step": 22819 }, { "epoch": 1.069002670164426, "grad_norm": 0.63399116186943, "learning_rate": 2.342221179418026e-06, "loss": 0.2675, "step": 22820 }, { "epoch": 1.0690495151543542, "grad_norm": 0.6393569491000852, "learning_rate": 2.3420319077941535e-06, "loss": 0.2805, "step": 22821 }, { "epoch": 1.0690963601442827, "grad_norm": 0.6247499414640906, "learning_rate": 2.3418426370793485e-06, "loss": 0.2773, "step": 22822 }, { "epoch": 1.069143205134211, "grad_norm": 0.6178601904694329, "learning_rate": 2.341653367274699e-06, "loss": 0.3, "step": 22823 }, { "epoch": 1.0691900501241391, "grad_norm": 0.6114074565742119, "learning_rate": 2.3414640983812935e-06, "loss": 0.2768, "step": 22824 }, { "epoch": 1.0692368951140676, "grad_norm": 0.6017719589033135, "learning_rate": 2.341274830400223e-06, "loss": 0.275, "step": 22825 }, { "epoch": 1.0692837401039959, "grad_norm": 0.6041816464445097, "learning_rate": 2.3410855633325762e-06, "loss": 0.2924, "step": 22826 }, { "epoch": 1.0693305850939243, "grad_norm": 0.5549038628248519, "learning_rate": 2.3408962971794413e-06, "loss": 0.2697, "step": 22827 }, { "epoch": 1.0693774300838526, "grad_norm": 0.5813060363162598, "learning_rate": 2.3407070319419096e-06, "loss": 0.2601, "step": 22828 }, { "epoch": 1.0694242750737808, "grad_norm": 0.5814908091437496, "learning_rate": 2.340517767621068e-06, "loss": 0.2762, "step": 22829 }, { "epoch": 1.0694711200637093, "grad_norm": 0.6435257609727985, "learning_rate": 2.3403285042180067e-06, "loss": 0.3079, "step": 22830 }, { "epoch": 1.0695179650536375, "grad_norm": 0.6057966664076617, "learning_rate": 2.340139241733814e-06, "loss": 0.2771, "step": 22831 }, { "epoch": 1.0695648100435657, "grad_norm": 0.6175699956508193, "learning_rate": 2.33994998016958e-06, "loss": 0.2893, "step": 22832 }, { "epoch": 1.0696116550334942, "grad_norm": 0.572027858770288, "learning_rate": 2.3397607195263945e-06, "loss": 0.2632, "step": 22833 }, { "epoch": 1.0696585000234224, "grad_norm": 0.5983280029767601, "learning_rate": 2.339571459805346e-06, "loss": 0.2734, "step": 22834 }, { "epoch": 1.069705345013351, "grad_norm": 0.6168394904390853, "learning_rate": 2.339382201007522e-06, "loss": 0.2901, "step": 22835 }, { "epoch": 1.0697521900032791, "grad_norm": 0.581689528117623, "learning_rate": 2.339192943134014e-06, "loss": 0.2803, "step": 22836 }, { "epoch": 1.0697990349932074, "grad_norm": 0.5951408724359661, "learning_rate": 2.33900368618591e-06, "loss": 0.2868, "step": 22837 }, { "epoch": 1.0698458799831359, "grad_norm": 0.5717795846472016, "learning_rate": 2.3388144301642994e-06, "loss": 0.2592, "step": 22838 }, { "epoch": 1.069892724973064, "grad_norm": 0.620994653811009, "learning_rate": 2.3386251750702706e-06, "loss": 0.2967, "step": 22839 }, { "epoch": 1.0699395699629926, "grad_norm": 0.6307653644113138, "learning_rate": 2.3384359209049147e-06, "loss": 0.2775, "step": 22840 }, { "epoch": 1.0699864149529208, "grad_norm": 0.5382427715661174, "learning_rate": 2.338246667669319e-06, "loss": 0.2625, "step": 22841 }, { "epoch": 1.070033259942849, "grad_norm": 0.6131914593121954, "learning_rate": 2.3380574153645726e-06, "loss": 0.2835, "step": 22842 }, { "epoch": 1.0700801049327775, "grad_norm": 0.5860920581883888, "learning_rate": 2.3378681639917645e-06, "loss": 0.2533, "step": 22843 }, { "epoch": 1.0701269499227057, "grad_norm": 0.6117431105246476, "learning_rate": 2.3376789135519857e-06, "loss": 0.2791, "step": 22844 }, { "epoch": 1.070173794912634, "grad_norm": 0.5493498408148416, "learning_rate": 2.337489664046323e-06, "loss": 0.283, "step": 22845 }, { "epoch": 1.0702206399025624, "grad_norm": 0.5507443146616238, "learning_rate": 2.337300415475867e-06, "loss": 0.276, "step": 22846 }, { "epoch": 1.0702674848924907, "grad_norm": 0.5916776789375849, "learning_rate": 2.3371111678417064e-06, "loss": 0.2748, "step": 22847 }, { "epoch": 1.0703143298824191, "grad_norm": 0.5977490550738994, "learning_rate": 2.3369219211449297e-06, "loss": 0.28, "step": 22848 }, { "epoch": 1.0703611748723474, "grad_norm": 0.5702853937320026, "learning_rate": 2.336732675386626e-06, "loss": 0.2739, "step": 22849 }, { "epoch": 1.0704080198622756, "grad_norm": 0.5959877942129009, "learning_rate": 2.3365434305678854e-06, "loss": 0.27, "step": 22850 }, { "epoch": 1.070454864852204, "grad_norm": 0.6324360248216431, "learning_rate": 2.3363541866897955e-06, "loss": 0.2832, "step": 22851 }, { "epoch": 1.0705017098421323, "grad_norm": 0.6144727617830177, "learning_rate": 2.3361649437534466e-06, "loss": 0.2893, "step": 22852 }, { "epoch": 1.0705485548320608, "grad_norm": 0.6224474508922514, "learning_rate": 2.335975701759928e-06, "loss": 0.2916, "step": 22853 }, { "epoch": 1.070595399821989, "grad_norm": 0.5813622289014706, "learning_rate": 2.335786460710327e-06, "loss": 0.271, "step": 22854 }, { "epoch": 1.0706422448119173, "grad_norm": 0.5542950621787394, "learning_rate": 2.335597220605734e-06, "loss": 0.2667, "step": 22855 }, { "epoch": 1.0706890898018457, "grad_norm": 0.5913881404544742, "learning_rate": 2.3354079814472374e-06, "loss": 0.2856, "step": 22856 }, { "epoch": 1.070735934791774, "grad_norm": 0.6237419397245914, "learning_rate": 2.3352187432359266e-06, "loss": 0.2942, "step": 22857 }, { "epoch": 1.0707827797817024, "grad_norm": 0.5372229062544533, "learning_rate": 2.3350295059728905e-06, "loss": 0.2673, "step": 22858 }, { "epoch": 1.0708296247716307, "grad_norm": 0.5886730722260651, "learning_rate": 2.3348402696592186e-06, "loss": 0.2847, "step": 22859 }, { "epoch": 1.070876469761559, "grad_norm": 0.6300371773218103, "learning_rate": 2.334651034295999e-06, "loss": 0.2788, "step": 22860 }, { "epoch": 1.0709233147514874, "grad_norm": 0.6024307561657628, "learning_rate": 2.3344617998843213e-06, "loss": 0.2838, "step": 22861 }, { "epoch": 1.0709701597414156, "grad_norm": 0.5979994651902513, "learning_rate": 2.334272566425274e-06, "loss": 0.2774, "step": 22862 }, { "epoch": 1.071017004731344, "grad_norm": 0.5847857470543956, "learning_rate": 2.3340833339199466e-06, "loss": 0.2661, "step": 22863 }, { "epoch": 1.0710638497212723, "grad_norm": 0.5693029027602557, "learning_rate": 2.3338941023694277e-06, "loss": 0.2734, "step": 22864 }, { "epoch": 1.0711106947112006, "grad_norm": 0.6026867178722314, "learning_rate": 2.333704871774808e-06, "loss": 0.273, "step": 22865 }, { "epoch": 1.071157539701129, "grad_norm": 0.5702201936221263, "learning_rate": 2.333515642137173e-06, "loss": 0.2754, "step": 22866 }, { "epoch": 1.0712043846910573, "grad_norm": 0.5970266432835899, "learning_rate": 2.3333264134576144e-06, "loss": 0.2765, "step": 22867 }, { "epoch": 1.0712512296809855, "grad_norm": 0.5860557381016822, "learning_rate": 2.3331371857372205e-06, "loss": 0.2726, "step": 22868 }, { "epoch": 1.071298074670914, "grad_norm": 0.5663313026401471, "learning_rate": 2.3329479589770794e-06, "loss": 0.2673, "step": 22869 }, { "epoch": 1.0713449196608422, "grad_norm": 0.6142950315483527, "learning_rate": 2.3327587331782814e-06, "loss": 0.2715, "step": 22870 }, { "epoch": 1.0713917646507707, "grad_norm": 0.5735245194199758, "learning_rate": 2.3325695083419157e-06, "loss": 0.2618, "step": 22871 }, { "epoch": 1.071438609640699, "grad_norm": 0.5302982920539787, "learning_rate": 2.3323802844690694e-06, "loss": 0.2652, "step": 22872 }, { "epoch": 1.0714854546306272, "grad_norm": 0.5924900415396857, "learning_rate": 2.3321910615608328e-06, "loss": 0.2947, "step": 22873 }, { "epoch": 1.0715322996205556, "grad_norm": 0.67937980761744, "learning_rate": 2.3320018396182937e-06, "loss": 0.2923, "step": 22874 }, { "epoch": 1.0715791446104839, "grad_norm": 0.6140771415234879, "learning_rate": 2.3318126186425428e-06, "loss": 0.2775, "step": 22875 }, { "epoch": 1.0716259896004123, "grad_norm": 0.5921025057238949, "learning_rate": 2.331623398634667e-06, "loss": 0.2665, "step": 22876 }, { "epoch": 1.0716728345903406, "grad_norm": 0.5720210767382717, "learning_rate": 2.3314341795957575e-06, "loss": 0.2679, "step": 22877 }, { "epoch": 1.0717196795802688, "grad_norm": 0.6106153285695288, "learning_rate": 2.3312449615269012e-06, "loss": 0.2821, "step": 22878 }, { "epoch": 1.0717665245701973, "grad_norm": 0.6379965191293414, "learning_rate": 2.3310557444291878e-06, "loss": 0.2765, "step": 22879 }, { "epoch": 1.0718133695601255, "grad_norm": 0.560823628331192, "learning_rate": 2.330866528303706e-06, "loss": 0.2802, "step": 22880 }, { "epoch": 1.0718602145500538, "grad_norm": 0.5703218150753917, "learning_rate": 2.3306773131515445e-06, "loss": 0.274, "step": 22881 }, { "epoch": 1.0719070595399822, "grad_norm": 0.5620692200486407, "learning_rate": 2.330488098973793e-06, "loss": 0.2631, "step": 22882 }, { "epoch": 1.0719539045299105, "grad_norm": 0.5978456938178949, "learning_rate": 2.3302988857715407e-06, "loss": 0.2668, "step": 22883 }, { "epoch": 1.072000749519839, "grad_norm": 0.6246144001426905, "learning_rate": 2.3301096735458744e-06, "loss": 0.2688, "step": 22884 }, { "epoch": 1.0720475945097672, "grad_norm": 0.6076917400171414, "learning_rate": 2.329920462297884e-06, "loss": 0.2861, "step": 22885 }, { "epoch": 1.0720944394996954, "grad_norm": 0.5825014481507724, "learning_rate": 2.3297312520286596e-06, "loss": 0.2765, "step": 22886 }, { "epoch": 1.0721412844896239, "grad_norm": 0.5699262028616964, "learning_rate": 2.3295420427392884e-06, "loss": 0.2657, "step": 22887 }, { "epoch": 1.072188129479552, "grad_norm": 0.604202767352087, "learning_rate": 2.3293528344308597e-06, "loss": 0.2919, "step": 22888 }, { "epoch": 1.0722349744694806, "grad_norm": 0.6079822361737623, "learning_rate": 2.3291636271044637e-06, "loss": 0.2824, "step": 22889 }, { "epoch": 1.0722818194594088, "grad_norm": 0.5987627835669872, "learning_rate": 2.3289744207611875e-06, "loss": 0.2774, "step": 22890 }, { "epoch": 1.072328664449337, "grad_norm": 0.564511560320584, "learning_rate": 2.3287852154021203e-06, "loss": 0.2637, "step": 22891 }, { "epoch": 1.0723755094392655, "grad_norm": 0.574226382840303, "learning_rate": 2.328596011028351e-06, "loss": 0.2759, "step": 22892 }, { "epoch": 1.0724223544291938, "grad_norm": 0.6018820838968966, "learning_rate": 2.3284068076409687e-06, "loss": 0.2736, "step": 22893 }, { "epoch": 1.0724691994191222, "grad_norm": 0.6164152304808497, "learning_rate": 2.328217605241062e-06, "loss": 0.2903, "step": 22894 }, { "epoch": 1.0725160444090505, "grad_norm": 0.5754406176116983, "learning_rate": 2.3280284038297207e-06, "loss": 0.2636, "step": 22895 }, { "epoch": 1.0725628893989787, "grad_norm": 0.5636835127759627, "learning_rate": 2.327839203408032e-06, "loss": 0.259, "step": 22896 }, { "epoch": 1.0726097343889072, "grad_norm": 0.5201634472228056, "learning_rate": 2.3276500039770857e-06, "loss": 0.2454, "step": 22897 }, { "epoch": 1.0726565793788354, "grad_norm": 0.6297631144549622, "learning_rate": 2.3274608055379694e-06, "loss": 0.2829, "step": 22898 }, { "epoch": 1.0727034243687639, "grad_norm": 0.6044532444155453, "learning_rate": 2.3272716080917735e-06, "loss": 0.2861, "step": 22899 }, { "epoch": 1.072750269358692, "grad_norm": 0.5861588606551489, "learning_rate": 2.327082411639586e-06, "loss": 0.2746, "step": 22900 }, { "epoch": 1.0727971143486204, "grad_norm": 0.5719973524776657, "learning_rate": 2.3268932161824963e-06, "loss": 0.28, "step": 22901 }, { "epoch": 1.0728439593385488, "grad_norm": 0.6060624531171429, "learning_rate": 2.3267040217215918e-06, "loss": 0.2712, "step": 22902 }, { "epoch": 1.072890804328477, "grad_norm": 0.5912328872834004, "learning_rate": 2.3265148282579627e-06, "loss": 0.2664, "step": 22903 }, { "epoch": 1.0729376493184053, "grad_norm": 0.6120671346715149, "learning_rate": 2.3263256357926966e-06, "loss": 0.2821, "step": 22904 }, { "epoch": 1.0729844943083338, "grad_norm": 0.5809645318967062, "learning_rate": 2.326136444326883e-06, "loss": 0.2744, "step": 22905 }, { "epoch": 1.073031339298262, "grad_norm": 0.6050071502530496, "learning_rate": 2.3259472538616106e-06, "loss": 0.2609, "step": 22906 }, { "epoch": 1.0730781842881905, "grad_norm": 0.5866302674490098, "learning_rate": 2.325758064397969e-06, "loss": 0.2852, "step": 22907 }, { "epoch": 1.0731250292781187, "grad_norm": 0.5672903178662855, "learning_rate": 2.3255688759370443e-06, "loss": 0.2733, "step": 22908 }, { "epoch": 1.073171874268047, "grad_norm": 0.6118326870220331, "learning_rate": 2.3253796884799274e-06, "loss": 0.2879, "step": 22909 }, { "epoch": 1.0732187192579754, "grad_norm": 0.5680233354608142, "learning_rate": 2.3251905020277067e-06, "loss": 0.2635, "step": 22910 }, { "epoch": 1.0732655642479036, "grad_norm": 0.5832616439224982, "learning_rate": 2.32500131658147e-06, "loss": 0.27, "step": 22911 }, { "epoch": 1.073312409237832, "grad_norm": 0.6363447215090657, "learning_rate": 2.324812132142307e-06, "loss": 0.2872, "step": 22912 }, { "epoch": 1.0733592542277604, "grad_norm": 0.6400375951958341, "learning_rate": 2.324622948711307e-06, "loss": 0.2865, "step": 22913 }, { "epoch": 1.0734060992176886, "grad_norm": 0.5927245069538863, "learning_rate": 2.3244337662895582e-06, "loss": 0.2857, "step": 22914 }, { "epoch": 1.073452944207617, "grad_norm": 0.6122181059831057, "learning_rate": 2.324244584878148e-06, "loss": 0.2831, "step": 22915 }, { "epoch": 1.0734997891975453, "grad_norm": 0.5946962991595011, "learning_rate": 2.3240554044781653e-06, "loss": 0.2831, "step": 22916 }, { "epoch": 1.0735466341874735, "grad_norm": 0.5451529563616815, "learning_rate": 2.323866225090701e-06, "loss": 0.2747, "step": 22917 }, { "epoch": 1.073593479177402, "grad_norm": 0.6377413280328339, "learning_rate": 2.3236770467168413e-06, "loss": 0.2859, "step": 22918 }, { "epoch": 1.0736403241673302, "grad_norm": 0.5725012158321151, "learning_rate": 2.3234878693576756e-06, "loss": 0.2844, "step": 22919 }, { "epoch": 1.0736871691572587, "grad_norm": 0.5659265283237459, "learning_rate": 2.3232986930142946e-06, "loss": 0.2696, "step": 22920 }, { "epoch": 1.073734014147187, "grad_norm": 0.5980747920596965, "learning_rate": 2.323109517687784e-06, "loss": 0.259, "step": 22921 }, { "epoch": 1.0737808591371152, "grad_norm": 0.5853276296060086, "learning_rate": 2.3229203433792336e-06, "loss": 0.2736, "step": 22922 }, { "epoch": 1.0738277041270436, "grad_norm": 0.6083640388052116, "learning_rate": 2.322731170089732e-06, "loss": 0.2755, "step": 22923 }, { "epoch": 1.0738745491169719, "grad_norm": 0.5745037027162504, "learning_rate": 2.3225419978203683e-06, "loss": 0.2664, "step": 22924 }, { "epoch": 1.0739213941069004, "grad_norm": 0.5682266534642225, "learning_rate": 2.3223528265722307e-06, "loss": 0.2653, "step": 22925 }, { "epoch": 1.0739682390968286, "grad_norm": 0.5849891997733805, "learning_rate": 2.322163656346409e-06, "loss": 0.2786, "step": 22926 }, { "epoch": 1.0740150840867568, "grad_norm": 0.5767445319642317, "learning_rate": 2.321974487143989e-06, "loss": 0.2671, "step": 22927 }, { "epoch": 1.0740619290766853, "grad_norm": 0.5396044702792079, "learning_rate": 2.321785318966062e-06, "loss": 0.262, "step": 22928 }, { "epoch": 1.0741087740666135, "grad_norm": 0.5919130440262808, "learning_rate": 2.3215961518137154e-06, "loss": 0.2718, "step": 22929 }, { "epoch": 1.074155619056542, "grad_norm": 0.6301021533632974, "learning_rate": 2.321406985688038e-06, "loss": 0.2788, "step": 22930 }, { "epoch": 1.0742024640464702, "grad_norm": 0.5901999989392995, "learning_rate": 2.3212178205901187e-06, "loss": 0.2702, "step": 22931 }, { "epoch": 1.0742493090363985, "grad_norm": 0.5694616209788717, "learning_rate": 2.321028656521047e-06, "loss": 0.2712, "step": 22932 }, { "epoch": 1.074296154026327, "grad_norm": 0.6254694499260873, "learning_rate": 2.3208394934819087e-06, "loss": 0.2929, "step": 22933 }, { "epoch": 1.0743429990162552, "grad_norm": 0.5700377756038915, "learning_rate": 2.3206503314737942e-06, "loss": 0.2724, "step": 22934 }, { "epoch": 1.0743898440061836, "grad_norm": 0.6329143041632158, "learning_rate": 2.3204611704977932e-06, "loss": 0.2889, "step": 22935 }, { "epoch": 1.0744366889961119, "grad_norm": 0.5973250920370692, "learning_rate": 2.320272010554992e-06, "loss": 0.281, "step": 22936 }, { "epoch": 1.0744835339860401, "grad_norm": 0.5641792693632899, "learning_rate": 2.3200828516464807e-06, "loss": 0.2626, "step": 22937 }, { "epoch": 1.0745303789759686, "grad_norm": 0.584552200352826, "learning_rate": 2.3198936937733473e-06, "loss": 0.2852, "step": 22938 }, { "epoch": 1.0745772239658968, "grad_norm": 0.6151313201147729, "learning_rate": 2.3197045369366803e-06, "loss": 0.2851, "step": 22939 }, { "epoch": 1.074624068955825, "grad_norm": 0.5762291482470037, "learning_rate": 2.3195153811375685e-06, "loss": 0.283, "step": 22940 }, { "epoch": 1.0746709139457535, "grad_norm": 0.5769772136405428, "learning_rate": 2.3193262263771004e-06, "loss": 0.27, "step": 22941 }, { "epoch": 1.0747177589356818, "grad_norm": 0.5464817261637206, "learning_rate": 2.319137072656364e-06, "loss": 0.2613, "step": 22942 }, { "epoch": 1.0747646039256102, "grad_norm": 0.615565437761555, "learning_rate": 2.318947919976448e-06, "loss": 0.2818, "step": 22943 }, { "epoch": 1.0748114489155385, "grad_norm": 0.5966239514191897, "learning_rate": 2.3187587683384427e-06, "loss": 0.2957, "step": 22944 }, { "epoch": 1.0748582939054667, "grad_norm": 0.555626309756878, "learning_rate": 2.3185696177434344e-06, "loss": 0.2625, "step": 22945 }, { "epoch": 1.0749051388953952, "grad_norm": 0.5737308013433471, "learning_rate": 2.318380468192512e-06, "loss": 0.2698, "step": 22946 }, { "epoch": 1.0749519838853234, "grad_norm": 0.6252587966586931, "learning_rate": 2.3181913196867638e-06, "loss": 0.2728, "step": 22947 }, { "epoch": 1.0749988288752519, "grad_norm": 0.5948573567860543, "learning_rate": 2.31800217222728e-06, "loss": 0.2769, "step": 22948 }, { "epoch": 1.0750456738651801, "grad_norm": 0.6195309786082074, "learning_rate": 2.317813025815147e-06, "loss": 0.2907, "step": 22949 }, { "epoch": 1.0750925188551084, "grad_norm": 0.6328712172775154, "learning_rate": 2.3176238804514557e-06, "loss": 0.2911, "step": 22950 }, { "epoch": 1.0751393638450368, "grad_norm": 0.5765776762031032, "learning_rate": 2.317434736137292e-06, "loss": 0.2745, "step": 22951 }, { "epoch": 1.075186208834965, "grad_norm": 0.5983823549625334, "learning_rate": 2.3172455928737456e-06, "loss": 0.2871, "step": 22952 }, { "epoch": 1.0752330538248933, "grad_norm": 0.5983810527130717, "learning_rate": 2.317056450661905e-06, "loss": 0.2884, "step": 22953 }, { "epoch": 1.0752798988148218, "grad_norm": 0.5474330369288033, "learning_rate": 2.316867309502858e-06, "loss": 0.266, "step": 22954 }, { "epoch": 1.07532674380475, "grad_norm": 0.5690845877919903, "learning_rate": 2.3166781693976948e-06, "loss": 0.261, "step": 22955 }, { "epoch": 1.0753735887946785, "grad_norm": 0.587835833086636, "learning_rate": 2.3164890303475025e-06, "loss": 0.2815, "step": 22956 }, { "epoch": 1.0754204337846067, "grad_norm": 0.5558326372835392, "learning_rate": 2.316299892353369e-06, "loss": 0.2772, "step": 22957 }, { "epoch": 1.075467278774535, "grad_norm": 0.5958069585439116, "learning_rate": 2.3161107554163835e-06, "loss": 0.2933, "step": 22958 }, { "epoch": 1.0755141237644634, "grad_norm": 0.5701843791563281, "learning_rate": 2.3159216195376346e-06, "loss": 0.2732, "step": 22959 }, { "epoch": 1.0755609687543917, "grad_norm": 0.5835414930151455, "learning_rate": 2.31573248471821e-06, "loss": 0.2719, "step": 22960 }, { "epoch": 1.0756078137443201, "grad_norm": 0.5709282574313419, "learning_rate": 2.315543350959199e-06, "loss": 0.2762, "step": 22961 }, { "epoch": 1.0756546587342484, "grad_norm": 0.6027354291383674, "learning_rate": 2.3153542182616905e-06, "loss": 0.2947, "step": 22962 }, { "epoch": 1.0757015037241766, "grad_norm": 0.6057063642528907, "learning_rate": 2.3151650866267714e-06, "loss": 0.2726, "step": 22963 }, { "epoch": 1.075748348714105, "grad_norm": 0.611903677441704, "learning_rate": 2.3149759560555305e-06, "loss": 0.2828, "step": 22964 }, { "epoch": 1.0757951937040333, "grad_norm": 0.5605309562560611, "learning_rate": 2.314786826549056e-06, "loss": 0.2601, "step": 22965 }, { "epoch": 1.0758420386939618, "grad_norm": 0.6277333248062333, "learning_rate": 2.3145976981084377e-06, "loss": 0.2789, "step": 22966 }, { "epoch": 1.07588888368389, "grad_norm": 0.6521597547244459, "learning_rate": 2.3144085707347627e-06, "loss": 0.2937, "step": 22967 }, { "epoch": 1.0759357286738183, "grad_norm": 0.6084272847341825, "learning_rate": 2.3142194444291205e-06, "loss": 0.3011, "step": 22968 }, { "epoch": 1.0759825736637467, "grad_norm": 0.586423420695293, "learning_rate": 2.3140303191925977e-06, "loss": 0.2795, "step": 22969 }, { "epoch": 1.076029418653675, "grad_norm": 0.6061845624467578, "learning_rate": 2.313841195026284e-06, "loss": 0.2684, "step": 22970 }, { "epoch": 1.0760762636436034, "grad_norm": 0.606286952349337, "learning_rate": 2.3136520719312673e-06, "loss": 0.2833, "step": 22971 }, { "epoch": 1.0761231086335317, "grad_norm": 0.577282656024982, "learning_rate": 2.3134629499086357e-06, "loss": 0.2783, "step": 22972 }, { "epoch": 1.07616995362346, "grad_norm": 0.6196341433754627, "learning_rate": 2.3132738289594786e-06, "loss": 0.2856, "step": 22973 }, { "epoch": 1.0762167986133884, "grad_norm": 0.5860491532382582, "learning_rate": 2.3130847090848844e-06, "loss": 0.2805, "step": 22974 }, { "epoch": 1.0762636436033166, "grad_norm": 0.5755818041952826, "learning_rate": 2.3128955902859395e-06, "loss": 0.2805, "step": 22975 }, { "epoch": 1.0763104885932449, "grad_norm": 0.6269145048829469, "learning_rate": 2.3127064725637335e-06, "loss": 0.2783, "step": 22976 }, { "epoch": 1.0763573335831733, "grad_norm": 0.5734109172478953, "learning_rate": 2.3125173559193554e-06, "loss": 0.2732, "step": 22977 }, { "epoch": 1.0764041785731016, "grad_norm": 0.5806290652525029, "learning_rate": 2.312328240353892e-06, "loss": 0.264, "step": 22978 }, { "epoch": 1.07645102356303, "grad_norm": 0.5779874100998554, "learning_rate": 2.312139125868432e-06, "loss": 0.2872, "step": 22979 }, { "epoch": 1.0764978685529583, "grad_norm": 0.5964846784553706, "learning_rate": 2.311950012464065e-06, "loss": 0.2753, "step": 22980 }, { "epoch": 1.0765447135428865, "grad_norm": 0.6356690831793099, "learning_rate": 2.3117609001418794e-06, "loss": 0.2854, "step": 22981 }, { "epoch": 1.076591558532815, "grad_norm": 0.5295017962104976, "learning_rate": 2.311571788902961e-06, "loss": 0.2635, "step": 22982 }, { "epoch": 1.0766384035227432, "grad_norm": 0.5845377437907182, "learning_rate": 2.3113826787484e-06, "loss": 0.2677, "step": 22983 }, { "epoch": 1.0766852485126717, "grad_norm": 0.5972275136711833, "learning_rate": 2.3111935696792843e-06, "loss": 0.2864, "step": 22984 }, { "epoch": 1.0767320935026, "grad_norm": 0.6108760497865954, "learning_rate": 2.311004461696702e-06, "loss": 0.2869, "step": 22985 }, { "epoch": 1.0767789384925281, "grad_norm": 0.5837909633650924, "learning_rate": 2.3108153548017416e-06, "loss": 0.2742, "step": 22986 }, { "epoch": 1.0768257834824566, "grad_norm": 0.6081626676919316, "learning_rate": 2.3106262489954914e-06, "loss": 0.2652, "step": 22987 }, { "epoch": 1.0768726284723849, "grad_norm": 0.6107377126168066, "learning_rate": 2.3104371442790398e-06, "loss": 0.2906, "step": 22988 }, { "epoch": 1.076919473462313, "grad_norm": 0.5601185079872912, "learning_rate": 2.310248040653474e-06, "loss": 0.2744, "step": 22989 }, { "epoch": 1.0769663184522416, "grad_norm": 0.5638715619910646, "learning_rate": 2.310058938119884e-06, "loss": 0.2793, "step": 22990 }, { "epoch": 1.0770131634421698, "grad_norm": 0.6138193447397787, "learning_rate": 2.3098698366793564e-06, "loss": 0.2769, "step": 22991 }, { "epoch": 1.0770600084320983, "grad_norm": 0.5733670601899139, "learning_rate": 2.3096807363329798e-06, "loss": 0.2746, "step": 22992 }, { "epoch": 1.0771068534220265, "grad_norm": 0.6235652625312067, "learning_rate": 2.309491637081844e-06, "loss": 0.2915, "step": 22993 }, { "epoch": 1.0771536984119547, "grad_norm": 0.5843832780497754, "learning_rate": 2.309302538927035e-06, "loss": 0.2549, "step": 22994 }, { "epoch": 1.0772005434018832, "grad_norm": 0.5469649112694994, "learning_rate": 2.309113441869642e-06, "loss": 0.2637, "step": 22995 }, { "epoch": 1.0772473883918114, "grad_norm": 0.5484527232827435, "learning_rate": 2.308924345910753e-06, "loss": 0.2583, "step": 22996 }, { "epoch": 1.07729423338174, "grad_norm": 0.5796021740885438, "learning_rate": 2.3087352510514565e-06, "loss": 0.2774, "step": 22997 }, { "epoch": 1.0773410783716681, "grad_norm": 0.575320617343916, "learning_rate": 2.3085461572928405e-06, "loss": 0.2783, "step": 22998 }, { "epoch": 1.0773879233615964, "grad_norm": 0.5830011029764577, "learning_rate": 2.308357064635994e-06, "loss": 0.2682, "step": 22999 }, { "epoch": 1.0774347683515249, "grad_norm": 0.6264790529278385, "learning_rate": 2.3081679730820038e-06, "loss": 0.2736, "step": 23000 }, { "epoch": 1.077481613341453, "grad_norm": 0.6053444592064838, "learning_rate": 2.3079788826319586e-06, "loss": 0.271, "step": 23001 }, { "epoch": 1.0775284583313816, "grad_norm": 0.5734889039879801, "learning_rate": 2.307789793286947e-06, "loss": 0.2739, "step": 23002 }, { "epoch": 1.0775753033213098, "grad_norm": 0.6384871137053998, "learning_rate": 2.307600705048056e-06, "loss": 0.3003, "step": 23003 }, { "epoch": 1.077622148311238, "grad_norm": 0.5698026636825559, "learning_rate": 2.3074116179163755e-06, "loss": 0.2642, "step": 23004 }, { "epoch": 1.0776689933011665, "grad_norm": 0.5875784649652855, "learning_rate": 2.307222531892993e-06, "loss": 0.2704, "step": 23005 }, { "epoch": 1.0777158382910947, "grad_norm": 0.5735661659037082, "learning_rate": 2.307033446978996e-06, "loss": 0.2831, "step": 23006 }, { "epoch": 1.0777626832810232, "grad_norm": 0.5713122201207532, "learning_rate": 2.3068443631754724e-06, "loss": 0.2652, "step": 23007 }, { "epoch": 1.0778095282709514, "grad_norm": 0.6311093990328717, "learning_rate": 2.3066552804835117e-06, "loss": 0.2794, "step": 23008 }, { "epoch": 1.0778563732608797, "grad_norm": 0.5748713670471255, "learning_rate": 2.3064661989042007e-06, "loss": 0.2792, "step": 23009 }, { "epoch": 1.0779032182508081, "grad_norm": 0.5755339265309455, "learning_rate": 2.306277118438628e-06, "loss": 0.2758, "step": 23010 }, { "epoch": 1.0779500632407364, "grad_norm": 0.5980574724649625, "learning_rate": 2.306088039087883e-06, "loss": 0.2836, "step": 23011 }, { "epoch": 1.0779969082306646, "grad_norm": 0.6010259718951806, "learning_rate": 2.305898960853052e-06, "loss": 0.2878, "step": 23012 }, { "epoch": 1.078043753220593, "grad_norm": 0.5522203408918583, "learning_rate": 2.305709883735223e-06, "loss": 0.2643, "step": 23013 }, { "epoch": 1.0780905982105213, "grad_norm": 0.6423531709758272, "learning_rate": 2.3055208077354846e-06, "loss": 0.2883, "step": 23014 }, { "epoch": 1.0781374432004498, "grad_norm": 0.6041010639198272, "learning_rate": 2.305331732854926e-06, "loss": 0.278, "step": 23015 }, { "epoch": 1.078184288190378, "grad_norm": 0.5343522358004842, "learning_rate": 2.305142659094634e-06, "loss": 0.2581, "step": 23016 }, { "epoch": 1.0782311331803063, "grad_norm": 0.6240634144870024, "learning_rate": 2.3049535864556977e-06, "loss": 0.276, "step": 23017 }, { "epoch": 1.0782779781702347, "grad_norm": 0.5808646324214497, "learning_rate": 2.3047645149392034e-06, "loss": 0.2647, "step": 23018 }, { "epoch": 1.078324823160163, "grad_norm": 0.6190862486499321, "learning_rate": 2.3045754445462407e-06, "loss": 0.2757, "step": 23019 }, { "epoch": 1.0783716681500914, "grad_norm": 0.5853035273168633, "learning_rate": 2.304386375277897e-06, "loss": 0.2694, "step": 23020 }, { "epoch": 1.0784185131400197, "grad_norm": 0.6326232017952179, "learning_rate": 2.3041973071352605e-06, "loss": 0.2796, "step": 23021 }, { "epoch": 1.078465358129948, "grad_norm": 0.5856221738332664, "learning_rate": 2.30400824011942e-06, "loss": 0.2712, "step": 23022 }, { "epoch": 1.0785122031198764, "grad_norm": 0.6455303098031104, "learning_rate": 2.303819174231463e-06, "loss": 0.2895, "step": 23023 }, { "epoch": 1.0785590481098046, "grad_norm": 0.586343931026166, "learning_rate": 2.303630109472476e-06, "loss": 0.2782, "step": 23024 }, { "epoch": 1.0786058930997329, "grad_norm": 0.5882970026311667, "learning_rate": 2.3034410458435486e-06, "loss": 0.2849, "step": 23025 }, { "epoch": 1.0786527380896613, "grad_norm": 0.6323028793790133, "learning_rate": 2.3032519833457695e-06, "loss": 0.2751, "step": 23026 }, { "epoch": 1.0786995830795896, "grad_norm": 0.5998697144439341, "learning_rate": 2.303062921980225e-06, "loss": 0.282, "step": 23027 }, { "epoch": 1.078746428069518, "grad_norm": 0.5596487918739182, "learning_rate": 2.302873861748004e-06, "loss": 0.2573, "step": 23028 }, { "epoch": 1.0787932730594463, "grad_norm": 0.5932340337448239, "learning_rate": 2.3026848026501953e-06, "loss": 0.2646, "step": 23029 }, { "epoch": 1.0788401180493745, "grad_norm": 0.628496609546451, "learning_rate": 2.3024957446878854e-06, "loss": 0.2863, "step": 23030 }, { "epoch": 1.078886963039303, "grad_norm": 0.6112337885769541, "learning_rate": 2.3023066878621624e-06, "loss": 0.2771, "step": 23031 }, { "epoch": 1.0789338080292312, "grad_norm": 0.5912993820372971, "learning_rate": 2.3021176321741144e-06, "loss": 0.2826, "step": 23032 }, { "epoch": 1.0789806530191597, "grad_norm": 0.6072680316816931, "learning_rate": 2.3019285776248305e-06, "loss": 0.2811, "step": 23033 }, { "epoch": 1.079027498009088, "grad_norm": 0.5833137126515201, "learning_rate": 2.3017395242153977e-06, "loss": 0.2779, "step": 23034 }, { "epoch": 1.0790743429990162, "grad_norm": 0.6003327935848816, "learning_rate": 2.3015504719469046e-06, "loss": 0.2855, "step": 23035 }, { "epoch": 1.0791211879889446, "grad_norm": 0.5621082344018921, "learning_rate": 2.301361420820438e-06, "loss": 0.277, "step": 23036 }, { "epoch": 1.0791680329788729, "grad_norm": 0.6099690661706445, "learning_rate": 2.3011723708370867e-06, "loss": 0.2816, "step": 23037 }, { "epoch": 1.0792148779688013, "grad_norm": 0.6040845870337384, "learning_rate": 2.300983321997938e-06, "loss": 0.2697, "step": 23038 }, { "epoch": 1.0792617229587296, "grad_norm": 0.6319132204179848, "learning_rate": 2.300794274304081e-06, "loss": 0.2878, "step": 23039 }, { "epoch": 1.0793085679486578, "grad_norm": 0.6115271051033687, "learning_rate": 2.300605227756602e-06, "loss": 0.2767, "step": 23040 }, { "epoch": 1.0793554129385863, "grad_norm": 0.5437612696135034, "learning_rate": 2.3004161823565908e-06, "loss": 0.2692, "step": 23041 }, { "epoch": 1.0794022579285145, "grad_norm": 0.6072504932268186, "learning_rate": 2.3002271381051337e-06, "loss": 0.2929, "step": 23042 }, { "epoch": 1.079449102918443, "grad_norm": 0.5599285443822524, "learning_rate": 2.3000380950033198e-06, "loss": 0.2623, "step": 23043 }, { "epoch": 1.0794959479083712, "grad_norm": 0.5893092450696567, "learning_rate": 2.2998490530522356e-06, "loss": 0.2789, "step": 23044 }, { "epoch": 1.0795427928982995, "grad_norm": 0.5527553155347517, "learning_rate": 2.2996600122529693e-06, "loss": 0.2634, "step": 23045 }, { "epoch": 1.079589637888228, "grad_norm": 0.5818344809717207, "learning_rate": 2.2994709726066106e-06, "loss": 0.2554, "step": 23046 }, { "epoch": 1.0796364828781562, "grad_norm": 0.5793962213255307, "learning_rate": 2.2992819341142453e-06, "loss": 0.2624, "step": 23047 }, { "epoch": 1.0796833278680844, "grad_norm": 0.5839538930196395, "learning_rate": 2.2990928967769628e-06, "loss": 0.2742, "step": 23048 }, { "epoch": 1.0797301728580129, "grad_norm": 0.569756880863868, "learning_rate": 2.2989038605958493e-06, "loss": 0.2706, "step": 23049 }, { "epoch": 1.0797770178479411, "grad_norm": 0.5758362708002898, "learning_rate": 2.298714825571994e-06, "loss": 0.2722, "step": 23050 }, { "epoch": 1.0798238628378696, "grad_norm": 0.5752261784000604, "learning_rate": 2.298525791706484e-06, "loss": 0.277, "step": 23051 }, { "epoch": 1.0798707078277978, "grad_norm": 0.5654128179015455, "learning_rate": 2.298336759000407e-06, "loss": 0.2819, "step": 23052 }, { "epoch": 1.079917552817726, "grad_norm": 0.5536598596754732, "learning_rate": 2.2981477274548525e-06, "loss": 0.2678, "step": 23053 }, { "epoch": 1.0799643978076545, "grad_norm": 0.6099461370742651, "learning_rate": 2.297958697070907e-06, "loss": 0.2719, "step": 23054 }, { "epoch": 1.0800112427975828, "grad_norm": 0.6133846318164022, "learning_rate": 2.2977696678496575e-06, "loss": 0.2855, "step": 23055 }, { "epoch": 1.0800580877875112, "grad_norm": 0.5901276326918451, "learning_rate": 2.297580639792193e-06, "loss": 0.2646, "step": 23056 }, { "epoch": 1.0801049327774395, "grad_norm": 0.5669723838959455, "learning_rate": 2.2973916128996014e-06, "loss": 0.2723, "step": 23057 }, { "epoch": 1.0801517777673677, "grad_norm": 0.5977105362309919, "learning_rate": 2.2972025871729696e-06, "loss": 0.2953, "step": 23058 }, { "epoch": 1.0801986227572962, "grad_norm": 0.5455499078882746, "learning_rate": 2.2970135626133857e-06, "loss": 0.2709, "step": 23059 }, { "epoch": 1.0802454677472244, "grad_norm": 0.5750637356356161, "learning_rate": 2.296824539221939e-06, "loss": 0.2678, "step": 23060 }, { "epoch": 1.0802923127371526, "grad_norm": 0.5755562440871438, "learning_rate": 2.2966355169997155e-06, "loss": 0.2788, "step": 23061 }, { "epoch": 1.0803391577270811, "grad_norm": 0.6369396579038902, "learning_rate": 2.296446495947803e-06, "loss": 0.2812, "step": 23062 }, { "epoch": 1.0803860027170094, "grad_norm": 0.5749410349213078, "learning_rate": 2.2962574760672894e-06, "loss": 0.2698, "step": 23063 }, { "epoch": 1.0804328477069378, "grad_norm": 0.5843937280925053, "learning_rate": 2.296068457359264e-06, "loss": 0.2704, "step": 23064 }, { "epoch": 1.080479692696866, "grad_norm": 0.5653546481756792, "learning_rate": 2.2958794398248123e-06, "loss": 0.2474, "step": 23065 }, { "epoch": 1.0805265376867943, "grad_norm": 0.6250139079743177, "learning_rate": 2.2956904234650245e-06, "loss": 0.2691, "step": 23066 }, { "epoch": 1.0805733826767228, "grad_norm": 0.6069092160752795, "learning_rate": 2.2955014082809857e-06, "loss": 0.2932, "step": 23067 }, { "epoch": 1.080620227666651, "grad_norm": 0.6283896547544704, "learning_rate": 2.2953123942737853e-06, "loss": 0.2748, "step": 23068 }, { "epoch": 1.0806670726565795, "grad_norm": 0.6095438145790651, "learning_rate": 2.2951233814445105e-06, "loss": 0.2752, "step": 23069 }, { "epoch": 1.0807139176465077, "grad_norm": 0.6552480368399042, "learning_rate": 2.2949343697942487e-06, "loss": 0.2962, "step": 23070 }, { "epoch": 1.080760762636436, "grad_norm": 0.5603817381373508, "learning_rate": 2.294745359324089e-06, "loss": 0.2751, "step": 23071 }, { "epoch": 1.0808076076263644, "grad_norm": 0.5797402922356419, "learning_rate": 2.2945563500351187e-06, "loss": 0.2642, "step": 23072 }, { "epoch": 1.0808544526162926, "grad_norm": 0.6202632379022199, "learning_rate": 2.2943673419284235e-06, "loss": 0.2831, "step": 23073 }, { "epoch": 1.0809012976062211, "grad_norm": 0.6144747707554865, "learning_rate": 2.2941783350050926e-06, "loss": 0.2875, "step": 23074 }, { "epoch": 1.0809481425961494, "grad_norm": 0.6186677805865786, "learning_rate": 2.2939893292662145e-06, "loss": 0.2717, "step": 23075 }, { "epoch": 1.0809949875860776, "grad_norm": 0.6467582543806303, "learning_rate": 2.2938003247128755e-06, "loss": 0.2887, "step": 23076 }, { "epoch": 1.081041832576006, "grad_norm": 0.6418119237966409, "learning_rate": 2.293611321346164e-06, "loss": 0.3136, "step": 23077 }, { "epoch": 1.0810886775659343, "grad_norm": 0.5679808626595355, "learning_rate": 2.293422319167168e-06, "loss": 0.2771, "step": 23078 }, { "epoch": 1.0811355225558628, "grad_norm": 0.5555672436024809, "learning_rate": 2.2932333181769743e-06, "loss": 0.2634, "step": 23079 }, { "epoch": 1.081182367545791, "grad_norm": 0.6643772171984162, "learning_rate": 2.29304431837667e-06, "loss": 0.289, "step": 23080 }, { "epoch": 1.0812292125357192, "grad_norm": 0.6021418936999388, "learning_rate": 2.292855319767345e-06, "loss": 0.2826, "step": 23081 }, { "epoch": 1.0812760575256477, "grad_norm": 0.6055097232805, "learning_rate": 2.2926663223500843e-06, "loss": 0.2872, "step": 23082 }, { "epoch": 1.081322902515576, "grad_norm": 0.6135470279846608, "learning_rate": 2.292477326125977e-06, "loss": 0.2805, "step": 23083 }, { "epoch": 1.0813697475055042, "grad_norm": 0.6258622289189963, "learning_rate": 2.2922883310961117e-06, "loss": 0.2951, "step": 23084 }, { "epoch": 1.0814165924954326, "grad_norm": 0.588074091252959, "learning_rate": 2.2920993372615744e-06, "loss": 0.2749, "step": 23085 }, { "epoch": 1.081463437485361, "grad_norm": 0.6211518134161206, "learning_rate": 2.2919103446234525e-06, "loss": 0.2959, "step": 23086 }, { "epoch": 1.0815102824752894, "grad_norm": 0.6064259604895381, "learning_rate": 2.291721353182834e-06, "loss": 0.2884, "step": 23087 }, { "epoch": 1.0815571274652176, "grad_norm": 0.5501229487732854, "learning_rate": 2.291532362940808e-06, "loss": 0.2795, "step": 23088 }, { "epoch": 1.0816039724551458, "grad_norm": 0.5722005934529286, "learning_rate": 2.29134337389846e-06, "loss": 0.2663, "step": 23089 }, { "epoch": 1.0816508174450743, "grad_norm": 0.6363238953350202, "learning_rate": 2.2911543860568792e-06, "loss": 0.2817, "step": 23090 }, { "epoch": 1.0816976624350025, "grad_norm": 0.5767279451018856, "learning_rate": 2.290965399417151e-06, "loss": 0.2745, "step": 23091 }, { "epoch": 1.081744507424931, "grad_norm": 0.5802656902652296, "learning_rate": 2.290776413980366e-06, "loss": 0.2737, "step": 23092 }, { "epoch": 1.0817913524148592, "grad_norm": 0.5892169171260568, "learning_rate": 2.290587429747609e-06, "loss": 0.2808, "step": 23093 }, { "epoch": 1.0818381974047875, "grad_norm": 0.5568780778902678, "learning_rate": 2.2903984467199685e-06, "loss": 0.2811, "step": 23094 }, { "epoch": 1.081885042394716, "grad_norm": 0.5692693846760543, "learning_rate": 2.2902094648985335e-06, "loss": 0.2561, "step": 23095 }, { "epoch": 1.0819318873846442, "grad_norm": 0.5478411613595724, "learning_rate": 2.2900204842843903e-06, "loss": 0.2672, "step": 23096 }, { "epoch": 1.0819787323745724, "grad_norm": 0.6088898414170827, "learning_rate": 2.2898315048786256e-06, "loss": 0.2796, "step": 23097 }, { "epoch": 1.082025577364501, "grad_norm": 0.6064477274117157, "learning_rate": 2.2896425266823273e-06, "loss": 0.2719, "step": 23098 }, { "epoch": 1.0820724223544291, "grad_norm": 0.5689652192780031, "learning_rate": 2.2894535496965845e-06, "loss": 0.2665, "step": 23099 }, { "epoch": 1.0821192673443576, "grad_norm": 0.5931480063278384, "learning_rate": 2.289264573922483e-06, "loss": 0.2805, "step": 23100 }, { "epoch": 1.0821661123342858, "grad_norm": 0.5109450641205634, "learning_rate": 2.2890755993611104e-06, "loss": 0.254, "step": 23101 }, { "epoch": 1.082212957324214, "grad_norm": 0.591781667898645, "learning_rate": 2.288886626013556e-06, "loss": 0.2937, "step": 23102 }, { "epoch": 1.0822598023141425, "grad_norm": 0.6004169969508284, "learning_rate": 2.288697653880906e-06, "loss": 0.2843, "step": 23103 }, { "epoch": 1.0823066473040708, "grad_norm": 0.5724296664634227, "learning_rate": 2.2885086829642468e-06, "loss": 0.2812, "step": 23104 }, { "epoch": 1.0823534922939992, "grad_norm": 0.599439867064222, "learning_rate": 2.2883197132646673e-06, "loss": 0.2729, "step": 23105 }, { "epoch": 1.0824003372839275, "grad_norm": 0.6250811845313164, "learning_rate": 2.288130744783255e-06, "loss": 0.2924, "step": 23106 }, { "epoch": 1.0824471822738557, "grad_norm": 0.6198474486453691, "learning_rate": 2.2879417775210963e-06, "loss": 0.2932, "step": 23107 }, { "epoch": 1.0824940272637842, "grad_norm": 0.5580521676197051, "learning_rate": 2.287752811479281e-06, "loss": 0.2728, "step": 23108 }, { "epoch": 1.0825408722537124, "grad_norm": 0.5527612109490327, "learning_rate": 2.2875638466588932e-06, "loss": 0.2663, "step": 23109 }, { "epoch": 1.082587717243641, "grad_norm": 0.5569639258391865, "learning_rate": 2.2873748830610227e-06, "loss": 0.2695, "step": 23110 }, { "epoch": 1.0826345622335691, "grad_norm": 0.5770665461152288, "learning_rate": 2.287185920686756e-06, "loss": 0.2794, "step": 23111 }, { "epoch": 1.0826814072234974, "grad_norm": 0.5915334504961819, "learning_rate": 2.286996959537181e-06, "loss": 0.2752, "step": 23112 }, { "epoch": 1.0827282522134258, "grad_norm": 0.6100028540472417, "learning_rate": 2.286807999613385e-06, "loss": 0.2882, "step": 23113 }, { "epoch": 1.082775097203354, "grad_norm": 0.5975356699985724, "learning_rate": 2.2866190409164552e-06, "loss": 0.2983, "step": 23114 }, { "epoch": 1.0828219421932825, "grad_norm": 0.5540514149484407, "learning_rate": 2.2864300834474805e-06, "loss": 0.2728, "step": 23115 }, { "epoch": 1.0828687871832108, "grad_norm": 0.5979178288810433, "learning_rate": 2.2862411272075455e-06, "loss": 0.2746, "step": 23116 }, { "epoch": 1.082915632173139, "grad_norm": 0.5235636440530445, "learning_rate": 2.2860521721977396e-06, "loss": 0.2554, "step": 23117 }, { "epoch": 1.0829624771630675, "grad_norm": 0.5894433806933594, "learning_rate": 2.2858632184191494e-06, "loss": 0.2819, "step": 23118 }, { "epoch": 1.0830093221529957, "grad_norm": 0.5588914397784019, "learning_rate": 2.2856742658728625e-06, "loss": 0.2689, "step": 23119 }, { "epoch": 1.083056167142924, "grad_norm": 0.5478471475924467, "learning_rate": 2.2854853145599666e-06, "loss": 0.2636, "step": 23120 }, { "epoch": 1.0831030121328524, "grad_norm": 0.5877251267133016, "learning_rate": 2.28529636448155e-06, "loss": 0.2774, "step": 23121 }, { "epoch": 1.0831498571227807, "grad_norm": 0.6203987926015502, "learning_rate": 2.285107415638697e-06, "loss": 0.2914, "step": 23122 }, { "epoch": 1.0831967021127091, "grad_norm": 0.563805051792731, "learning_rate": 2.2849184680324973e-06, "loss": 0.2656, "step": 23123 }, { "epoch": 1.0832435471026374, "grad_norm": 0.6045615675933667, "learning_rate": 2.284729521664038e-06, "loss": 0.2712, "step": 23124 }, { "epoch": 1.0832903920925656, "grad_norm": 0.5879966351164496, "learning_rate": 2.284540576534406e-06, "loss": 0.2809, "step": 23125 }, { "epoch": 1.083337237082494, "grad_norm": 0.5876541941579252, "learning_rate": 2.284351632644689e-06, "loss": 0.2739, "step": 23126 }, { "epoch": 1.0833840820724223, "grad_norm": 0.6287749412029476, "learning_rate": 2.284162689995974e-06, "loss": 0.2781, "step": 23127 }, { "epoch": 1.0834309270623508, "grad_norm": 0.6245148029394609, "learning_rate": 2.283973748589349e-06, "loss": 0.2869, "step": 23128 }, { "epoch": 1.083477772052279, "grad_norm": 0.532935624965875, "learning_rate": 2.2837848084259e-06, "loss": 0.2639, "step": 23129 }, { "epoch": 1.0835246170422073, "grad_norm": 0.6490299019056877, "learning_rate": 2.283595869506716e-06, "loss": 0.2956, "step": 23130 }, { "epoch": 1.0835714620321357, "grad_norm": 0.5685137246451504, "learning_rate": 2.2834069318328823e-06, "loss": 0.2642, "step": 23131 }, { "epoch": 1.083618307022064, "grad_norm": 0.5954768635385845, "learning_rate": 2.2832179954054875e-06, "loss": 0.2809, "step": 23132 }, { "epoch": 1.0836651520119922, "grad_norm": 0.5997683979660599, "learning_rate": 2.28302906022562e-06, "loss": 0.2681, "step": 23133 }, { "epoch": 1.0837119970019207, "grad_norm": 0.6057951953708876, "learning_rate": 2.2828401262943643e-06, "loss": 0.2989, "step": 23134 }, { "epoch": 1.083758841991849, "grad_norm": 0.593471100970243, "learning_rate": 2.2826511936128093e-06, "loss": 0.2833, "step": 23135 }, { "epoch": 1.0838056869817774, "grad_norm": 0.5811349540837462, "learning_rate": 2.282462262182042e-06, "loss": 0.2676, "step": 23136 }, { "epoch": 1.0838525319717056, "grad_norm": 0.5604755602684992, "learning_rate": 2.28227333200315e-06, "loss": 0.2737, "step": 23137 }, { "epoch": 1.0838993769616339, "grad_norm": 0.5658062476739111, "learning_rate": 2.2820844030772198e-06, "loss": 0.2696, "step": 23138 }, { "epoch": 1.0839462219515623, "grad_norm": 0.5590768054519897, "learning_rate": 2.2818954754053403e-06, "loss": 0.267, "step": 23139 }, { "epoch": 1.0839930669414906, "grad_norm": 0.5566603292902624, "learning_rate": 2.2817065489885964e-06, "loss": 0.2721, "step": 23140 }, { "epoch": 1.084039911931419, "grad_norm": 0.5850745414644991, "learning_rate": 2.2815176238280765e-06, "loss": 0.279, "step": 23141 }, { "epoch": 1.0840867569213473, "grad_norm": 0.5609606061477715, "learning_rate": 2.2813286999248678e-06, "loss": 0.2644, "step": 23142 }, { "epoch": 1.0841336019112755, "grad_norm": 0.5743720959877953, "learning_rate": 2.2811397772800577e-06, "loss": 0.2909, "step": 23143 }, { "epoch": 1.084180446901204, "grad_norm": 0.6193516284628126, "learning_rate": 2.280950855894733e-06, "loss": 0.2927, "step": 23144 }, { "epoch": 1.0842272918911322, "grad_norm": 0.5731508136796122, "learning_rate": 2.280761935769982e-06, "loss": 0.2638, "step": 23145 }, { "epoch": 1.0842741368810607, "grad_norm": 0.6416664123194844, "learning_rate": 2.28057301690689e-06, "loss": 0.2982, "step": 23146 }, { "epoch": 1.084320981870989, "grad_norm": 0.5382504068415491, "learning_rate": 2.280384099306545e-06, "loss": 0.2723, "step": 23147 }, { "epoch": 1.0843678268609172, "grad_norm": 0.5794315921171492, "learning_rate": 2.2801951829700347e-06, "loss": 0.2888, "step": 23148 }, { "epoch": 1.0844146718508456, "grad_norm": 0.6516319612391036, "learning_rate": 2.2800062678984455e-06, "loss": 0.2874, "step": 23149 }, { "epoch": 1.0844615168407739, "grad_norm": 0.5446099704503835, "learning_rate": 2.2798173540928647e-06, "loss": 0.2717, "step": 23150 }, { "epoch": 1.0845083618307023, "grad_norm": 0.5539070680889819, "learning_rate": 2.279628441554381e-06, "loss": 0.2618, "step": 23151 }, { "epoch": 1.0845552068206306, "grad_norm": 0.6150768510444559, "learning_rate": 2.2794395302840798e-06, "loss": 0.2759, "step": 23152 }, { "epoch": 1.0846020518105588, "grad_norm": 0.5757635490809487, "learning_rate": 2.279250620283048e-06, "loss": 0.2757, "step": 23153 }, { "epoch": 1.0846488968004873, "grad_norm": 0.5799537709686263, "learning_rate": 2.2790617115523735e-06, "loss": 0.2679, "step": 23154 }, { "epoch": 1.0846957417904155, "grad_norm": 0.5508010485603423, "learning_rate": 2.2788728040931435e-06, "loss": 0.2782, "step": 23155 }, { "epoch": 1.0847425867803437, "grad_norm": 0.6102860436582376, "learning_rate": 2.2786838979064445e-06, "loss": 0.2729, "step": 23156 }, { "epoch": 1.0847894317702722, "grad_norm": 0.5816361757744204, "learning_rate": 2.2784949929933654e-06, "loss": 0.2612, "step": 23157 }, { "epoch": 1.0848362767602004, "grad_norm": 0.5622111097250423, "learning_rate": 2.2783060893549905e-06, "loss": 0.2763, "step": 23158 }, { "epoch": 1.084883121750129, "grad_norm": 0.5857838492072583, "learning_rate": 2.278117186992409e-06, "loss": 0.2787, "step": 23159 }, { "epoch": 1.0849299667400572, "grad_norm": 0.6083314726171708, "learning_rate": 2.277928285906707e-06, "loss": 0.2843, "step": 23160 }, { "epoch": 1.0849768117299854, "grad_norm": 0.5578585052256881, "learning_rate": 2.2777393860989717e-06, "loss": 0.2481, "step": 23161 }, { "epoch": 1.0850236567199139, "grad_norm": 0.529570710459911, "learning_rate": 2.277550487570291e-06, "loss": 0.2655, "step": 23162 }, { "epoch": 1.085070501709842, "grad_norm": 0.5623744936856884, "learning_rate": 2.277361590321752e-06, "loss": 0.2596, "step": 23163 }, { "epoch": 1.0851173466997706, "grad_norm": 0.5852216678737571, "learning_rate": 2.27717269435444e-06, "loss": 0.2578, "step": 23164 }, { "epoch": 1.0851641916896988, "grad_norm": 0.5624448579873863, "learning_rate": 2.276983799669443e-06, "loss": 0.2684, "step": 23165 }, { "epoch": 1.085211036679627, "grad_norm": 0.6046546581725335, "learning_rate": 2.2767949062678487e-06, "loss": 0.2787, "step": 23166 }, { "epoch": 1.0852578816695555, "grad_norm": 0.5590780436913952, "learning_rate": 2.2766060141507426e-06, "loss": 0.2653, "step": 23167 }, { "epoch": 1.0853047266594837, "grad_norm": 0.577425038825283, "learning_rate": 2.2764171233192136e-06, "loss": 0.2727, "step": 23168 }, { "epoch": 1.085351571649412, "grad_norm": 0.6526341239503467, "learning_rate": 2.2762282337743484e-06, "loss": 0.2865, "step": 23169 }, { "epoch": 1.0853984166393404, "grad_norm": 0.5980887039416471, "learning_rate": 2.2760393455172333e-06, "loss": 0.2751, "step": 23170 }, { "epoch": 1.0854452616292687, "grad_norm": 0.5952007858371864, "learning_rate": 2.275850458548954e-06, "loss": 0.2705, "step": 23171 }, { "epoch": 1.0854921066191972, "grad_norm": 0.5573811182400291, "learning_rate": 2.2756615728706e-06, "loss": 0.2718, "step": 23172 }, { "epoch": 1.0855389516091254, "grad_norm": 0.5849071699712741, "learning_rate": 2.2754726884832574e-06, "loss": 0.2966, "step": 23173 }, { "epoch": 1.0855857965990536, "grad_norm": 0.5813903506417284, "learning_rate": 2.2752838053880127e-06, "loss": 0.2679, "step": 23174 }, { "epoch": 1.085632641588982, "grad_norm": 0.5522333010546775, "learning_rate": 2.2750949235859545e-06, "loss": 0.2693, "step": 23175 }, { "epoch": 1.0856794865789103, "grad_norm": 0.5848294748419166, "learning_rate": 2.2749060430781668e-06, "loss": 0.2732, "step": 23176 }, { "epoch": 1.0857263315688388, "grad_norm": 0.6101966023200284, "learning_rate": 2.2747171638657394e-06, "loss": 0.274, "step": 23177 }, { "epoch": 1.085773176558767, "grad_norm": 0.5711858270325161, "learning_rate": 2.274528285949757e-06, "loss": 0.2827, "step": 23178 }, { "epoch": 1.0858200215486953, "grad_norm": 0.6011392634090371, "learning_rate": 2.2743394093313086e-06, "loss": 0.279, "step": 23179 }, { "epoch": 1.0858668665386237, "grad_norm": 0.5683904401175828, "learning_rate": 2.2741505340114795e-06, "loss": 0.2661, "step": 23180 }, { "epoch": 1.085913711528552, "grad_norm": 0.558640859490319, "learning_rate": 2.273961659991358e-06, "loss": 0.2773, "step": 23181 }, { "epoch": 1.0859605565184804, "grad_norm": 0.6396363737524082, "learning_rate": 2.27377278727203e-06, "loss": 0.2788, "step": 23182 }, { "epoch": 1.0860074015084087, "grad_norm": 0.5791537437469099, "learning_rate": 2.273583915854583e-06, "loss": 0.2677, "step": 23183 }, { "epoch": 1.086054246498337, "grad_norm": 0.5667615807515035, "learning_rate": 2.273395045740103e-06, "loss": 0.2672, "step": 23184 }, { "epoch": 1.0861010914882654, "grad_norm": 0.587854389130895, "learning_rate": 2.2732061769296776e-06, "loss": 0.2802, "step": 23185 }, { "epoch": 1.0861479364781936, "grad_norm": 0.5758585959721519, "learning_rate": 2.273017309424394e-06, "loss": 0.2712, "step": 23186 }, { "epoch": 1.086194781468122, "grad_norm": 0.6183190610527228, "learning_rate": 2.2728284432253383e-06, "loss": 0.2838, "step": 23187 }, { "epoch": 1.0862416264580503, "grad_norm": 0.5808104875544984, "learning_rate": 2.272639578333599e-06, "loss": 0.2696, "step": 23188 }, { "epoch": 1.0862884714479786, "grad_norm": 0.6166269217318243, "learning_rate": 2.2724507147502607e-06, "loss": 0.27, "step": 23189 }, { "epoch": 1.086335316437907, "grad_norm": 0.5777718067456941, "learning_rate": 2.272261852476412e-06, "loss": 0.276, "step": 23190 }, { "epoch": 1.0863821614278353, "grad_norm": 0.6161314020501145, "learning_rate": 2.2720729915131383e-06, "loss": 0.2851, "step": 23191 }, { "epoch": 1.0864290064177635, "grad_norm": 0.6219121387270374, "learning_rate": 2.2718841318615273e-06, "loss": 0.2633, "step": 23192 }, { "epoch": 1.086475851407692, "grad_norm": 0.6149243047692391, "learning_rate": 2.2716952735226662e-06, "loss": 0.2747, "step": 23193 }, { "epoch": 1.0865226963976202, "grad_norm": 0.6179291875742479, "learning_rate": 2.2715064164976426e-06, "loss": 0.2752, "step": 23194 }, { "epoch": 1.0865695413875487, "grad_norm": 0.576117425142799, "learning_rate": 2.2713175607875402e-06, "loss": 0.259, "step": 23195 }, { "epoch": 1.086616386377477, "grad_norm": 0.5575680474706147, "learning_rate": 2.2711287063934484e-06, "loss": 0.2724, "step": 23196 }, { "epoch": 1.0866632313674052, "grad_norm": 0.6383286844917377, "learning_rate": 2.270939853316454e-06, "loss": 0.2725, "step": 23197 }, { "epoch": 1.0867100763573336, "grad_norm": 0.5983837968855406, "learning_rate": 2.270751001557642e-06, "loss": 0.2573, "step": 23198 }, { "epoch": 1.0867569213472619, "grad_norm": 0.5472997729220979, "learning_rate": 2.2705621511181005e-06, "loss": 0.2659, "step": 23199 }, { "epoch": 1.0868037663371903, "grad_norm": 0.5884933661448127, "learning_rate": 2.2703733019989173e-06, "loss": 0.2806, "step": 23200 }, { "epoch": 1.0868506113271186, "grad_norm": 0.5665058904749777, "learning_rate": 2.270184454201178e-06, "loss": 0.2783, "step": 23201 }, { "epoch": 1.0868974563170468, "grad_norm": 0.6186996182625647, "learning_rate": 2.2699956077259684e-06, "loss": 0.286, "step": 23202 }, { "epoch": 1.0869443013069753, "grad_norm": 0.5749518265684967, "learning_rate": 2.2698067625743765e-06, "loss": 0.2787, "step": 23203 }, { "epoch": 1.0869911462969035, "grad_norm": 0.5755507699428417, "learning_rate": 2.269617918747489e-06, "loss": 0.2766, "step": 23204 }, { "epoch": 1.0870379912868318, "grad_norm": 0.6024212898220094, "learning_rate": 2.2694290762463924e-06, "loss": 0.2899, "step": 23205 }, { "epoch": 1.0870848362767602, "grad_norm": 0.5736382592364133, "learning_rate": 2.269240235072174e-06, "loss": 0.2616, "step": 23206 }, { "epoch": 1.0871316812666885, "grad_norm": 0.5795304639670681, "learning_rate": 2.2690513952259193e-06, "loss": 0.2756, "step": 23207 }, { "epoch": 1.087178526256617, "grad_norm": 0.5849871886428712, "learning_rate": 2.2688625567087164e-06, "loss": 0.2869, "step": 23208 }, { "epoch": 1.0872253712465452, "grad_norm": 0.59832241153938, "learning_rate": 2.268673719521651e-06, "loss": 0.2771, "step": 23209 }, { "epoch": 1.0872722162364734, "grad_norm": 0.5525109998153039, "learning_rate": 2.2684848836658097e-06, "loss": 0.2875, "step": 23210 }, { "epoch": 1.0873190612264019, "grad_norm": 0.6051254656158006, "learning_rate": 2.2682960491422803e-06, "loss": 0.2808, "step": 23211 }, { "epoch": 1.0873659062163301, "grad_norm": 0.5972016030155382, "learning_rate": 2.2681072159521497e-06, "loss": 0.2738, "step": 23212 }, { "epoch": 1.0874127512062586, "grad_norm": 0.6256392385178764, "learning_rate": 2.2679183840965026e-06, "loss": 0.2807, "step": 23213 }, { "epoch": 1.0874595961961868, "grad_norm": 0.5964274192379878, "learning_rate": 2.267729553576427e-06, "loss": 0.2764, "step": 23214 }, { "epoch": 1.087506441186115, "grad_norm": 0.6135697162540231, "learning_rate": 2.26754072439301e-06, "loss": 0.284, "step": 23215 }, { "epoch": 1.0875532861760435, "grad_norm": 0.5496192208502402, "learning_rate": 2.267351896547337e-06, "loss": 0.2545, "step": 23216 }, { "epoch": 1.0876001311659718, "grad_norm": 0.559210648549149, "learning_rate": 2.267163070040496e-06, "loss": 0.2741, "step": 23217 }, { "epoch": 1.0876469761559002, "grad_norm": 0.5895307596271426, "learning_rate": 2.266974244873573e-06, "loss": 0.2845, "step": 23218 }, { "epoch": 1.0876938211458285, "grad_norm": 0.6182923932055252, "learning_rate": 2.266785421047655e-06, "loss": 0.2923, "step": 23219 }, { "epoch": 1.0877406661357567, "grad_norm": 0.5969669977584918, "learning_rate": 2.266596598563827e-06, "loss": 0.2875, "step": 23220 }, { "epoch": 1.0877875111256852, "grad_norm": 0.5405268923454943, "learning_rate": 2.266407777423178e-06, "loss": 0.2501, "step": 23221 }, { "epoch": 1.0878343561156134, "grad_norm": 0.6110288150776204, "learning_rate": 2.2662189576267926e-06, "loss": 0.2664, "step": 23222 }, { "epoch": 1.0878812011055419, "grad_norm": 0.5780216639489837, "learning_rate": 2.2660301391757586e-06, "loss": 0.2842, "step": 23223 }, { "epoch": 1.0879280460954701, "grad_norm": 0.6324965268556814, "learning_rate": 2.265841322071163e-06, "loss": 0.2807, "step": 23224 }, { "epoch": 1.0879748910853984, "grad_norm": 0.584583579731069, "learning_rate": 2.2656525063140917e-06, "loss": 0.2699, "step": 23225 }, { "epoch": 1.0880217360753268, "grad_norm": 0.585955251963359, "learning_rate": 2.265463691905631e-06, "loss": 0.2777, "step": 23226 }, { "epoch": 1.088068581065255, "grad_norm": 0.6376701966087709, "learning_rate": 2.2652748788468672e-06, "loss": 0.2834, "step": 23227 }, { "epoch": 1.0881154260551833, "grad_norm": 0.6095383341798826, "learning_rate": 2.2650860671388885e-06, "loss": 0.2793, "step": 23228 }, { "epoch": 1.0881622710451118, "grad_norm": 0.5966148457263677, "learning_rate": 2.2648972567827795e-06, "loss": 0.3064, "step": 23229 }, { "epoch": 1.08820911603504, "grad_norm": 0.6217843199925553, "learning_rate": 2.264708447779629e-06, "loss": 0.301, "step": 23230 }, { "epoch": 1.0882559610249685, "grad_norm": 0.5947726455820517, "learning_rate": 2.264519640130521e-06, "loss": 0.2772, "step": 23231 }, { "epoch": 1.0883028060148967, "grad_norm": 0.6020369937984877, "learning_rate": 2.264330833836544e-06, "loss": 0.2781, "step": 23232 }, { "epoch": 1.088349651004825, "grad_norm": 0.5928144347046344, "learning_rate": 2.2641420288987833e-06, "loss": 0.2945, "step": 23233 }, { "epoch": 1.0883964959947534, "grad_norm": 0.5939593556739627, "learning_rate": 2.263953225318326e-06, "loss": 0.2641, "step": 23234 }, { "epoch": 1.0884433409846817, "grad_norm": 0.5963146998226493, "learning_rate": 2.263764423096259e-06, "loss": 0.2831, "step": 23235 }, { "epoch": 1.0884901859746101, "grad_norm": 0.5509005251989003, "learning_rate": 2.263575622233669e-06, "loss": 0.2645, "step": 23236 }, { "epoch": 1.0885370309645384, "grad_norm": 0.6437159658266277, "learning_rate": 2.2633868227316404e-06, "loss": 0.2795, "step": 23237 }, { "epoch": 1.0885838759544666, "grad_norm": 0.5731787898538596, "learning_rate": 2.2631980245912614e-06, "loss": 0.2646, "step": 23238 }, { "epoch": 1.088630720944395, "grad_norm": 0.5963894949431711, "learning_rate": 2.263009227813619e-06, "loss": 0.2779, "step": 23239 }, { "epoch": 1.0886775659343233, "grad_norm": 0.6290043775715995, "learning_rate": 2.262820432399798e-06, "loss": 0.2953, "step": 23240 }, { "epoch": 1.0887244109242515, "grad_norm": 0.5952455750230489, "learning_rate": 2.2626316383508863e-06, "loss": 0.2751, "step": 23241 }, { "epoch": 1.08877125591418, "grad_norm": 0.5609827869574102, "learning_rate": 2.2624428456679706e-06, "loss": 0.2636, "step": 23242 }, { "epoch": 1.0888181009041082, "grad_norm": 0.594510676978294, "learning_rate": 2.262254054352136e-06, "loss": 0.2663, "step": 23243 }, { "epoch": 1.0888649458940367, "grad_norm": 0.5811546066504282, "learning_rate": 2.2620652644044694e-06, "loss": 0.2782, "step": 23244 }, { "epoch": 1.088911790883965, "grad_norm": 0.5773576795770543, "learning_rate": 2.261876475826057e-06, "loss": 0.2516, "step": 23245 }, { "epoch": 1.0889586358738932, "grad_norm": 0.5644392989940585, "learning_rate": 2.2616876886179865e-06, "loss": 0.2746, "step": 23246 }, { "epoch": 1.0890054808638217, "grad_norm": 0.5653409803431176, "learning_rate": 2.261498902781343e-06, "loss": 0.2784, "step": 23247 }, { "epoch": 1.08905232585375, "grad_norm": 0.6112448258439878, "learning_rate": 2.2613101183172138e-06, "loss": 0.2842, "step": 23248 }, { "epoch": 1.0890991708436784, "grad_norm": 0.5701568163043079, "learning_rate": 2.261121335226684e-06, "loss": 0.2658, "step": 23249 }, { "epoch": 1.0891460158336066, "grad_norm": 0.5410591733728632, "learning_rate": 2.260932553510842e-06, "loss": 0.2739, "step": 23250 }, { "epoch": 1.0891928608235348, "grad_norm": 0.5967123239345866, "learning_rate": 2.260743773170772e-06, "loss": 0.2898, "step": 23251 }, { "epoch": 1.0892397058134633, "grad_norm": 0.5818012791855501, "learning_rate": 2.2605549942075616e-06, "loss": 0.2778, "step": 23252 }, { "epoch": 1.0892865508033915, "grad_norm": 0.5727693603018154, "learning_rate": 2.2603662166222976e-06, "loss": 0.2834, "step": 23253 }, { "epoch": 1.08933339579332, "grad_norm": 0.6231190092686392, "learning_rate": 2.260177440416065e-06, "loss": 0.2817, "step": 23254 }, { "epoch": 1.0893802407832482, "grad_norm": 0.6122424084179495, "learning_rate": 2.2599886655899523e-06, "loss": 0.2917, "step": 23255 }, { "epoch": 1.0894270857731765, "grad_norm": 0.5826280769751409, "learning_rate": 2.2597998921450434e-06, "loss": 0.2723, "step": 23256 }, { "epoch": 1.089473930763105, "grad_norm": 0.5739662107016821, "learning_rate": 2.259611120082426e-06, "loss": 0.2688, "step": 23257 }, { "epoch": 1.0895207757530332, "grad_norm": 0.6573400087835186, "learning_rate": 2.259422349403186e-06, "loss": 0.3041, "step": 23258 }, { "epoch": 1.0895676207429617, "grad_norm": 0.531085006263983, "learning_rate": 2.25923358010841e-06, "loss": 0.2524, "step": 23259 }, { "epoch": 1.08961446573289, "grad_norm": 0.6038868777618225, "learning_rate": 2.2590448121991847e-06, "loss": 0.29, "step": 23260 }, { "epoch": 1.0896613107228181, "grad_norm": 0.5698703997544798, "learning_rate": 2.258856045676596e-06, "loss": 0.2661, "step": 23261 }, { "epoch": 1.0897081557127466, "grad_norm": 0.5919698285358465, "learning_rate": 2.2586672805417296e-06, "loss": 0.2893, "step": 23262 }, { "epoch": 1.0897550007026748, "grad_norm": 0.5911798260138641, "learning_rate": 2.2584785167956723e-06, "loss": 0.2785, "step": 23263 }, { "epoch": 1.089801845692603, "grad_norm": 0.6012645218826685, "learning_rate": 2.258289754439511e-06, "loss": 0.2978, "step": 23264 }, { "epoch": 1.0898486906825315, "grad_norm": 0.543668574544501, "learning_rate": 2.25810099347433e-06, "loss": 0.249, "step": 23265 }, { "epoch": 1.0898955356724598, "grad_norm": 0.5968529563657838, "learning_rate": 2.2579122339012187e-06, "loss": 0.2628, "step": 23266 }, { "epoch": 1.0899423806623882, "grad_norm": 0.5376596121139327, "learning_rate": 2.2577234757212614e-06, "loss": 0.2536, "step": 23267 }, { "epoch": 1.0899892256523165, "grad_norm": 0.6202898379850159, "learning_rate": 2.2575347189355444e-06, "loss": 0.2752, "step": 23268 }, { "epoch": 1.0900360706422447, "grad_norm": 0.5983909020665111, "learning_rate": 2.2573459635451533e-06, "loss": 0.2862, "step": 23269 }, { "epoch": 1.0900829156321732, "grad_norm": 0.571014922366865, "learning_rate": 2.257157209551176e-06, "loss": 0.2712, "step": 23270 }, { "epoch": 1.0901297606221014, "grad_norm": 0.5641917450109107, "learning_rate": 2.2569684569546973e-06, "loss": 0.2714, "step": 23271 }, { "epoch": 1.09017660561203, "grad_norm": 0.5829605891086327, "learning_rate": 2.2567797057568046e-06, "loss": 0.2859, "step": 23272 }, { "epoch": 1.0902234506019581, "grad_norm": 0.5824810711582898, "learning_rate": 2.2565909559585843e-06, "loss": 0.2843, "step": 23273 }, { "epoch": 1.0902702955918864, "grad_norm": 0.6287922834328624, "learning_rate": 2.256402207561121e-06, "loss": 0.2898, "step": 23274 }, { "epoch": 1.0903171405818148, "grad_norm": 0.5693449660361256, "learning_rate": 2.2562134605655014e-06, "loss": 0.2674, "step": 23275 }, { "epoch": 1.090363985571743, "grad_norm": 0.5801239689562596, "learning_rate": 2.256024714972812e-06, "loss": 0.2803, "step": 23276 }, { "epoch": 1.0904108305616713, "grad_norm": 0.5692549106437046, "learning_rate": 2.25583597078414e-06, "loss": 0.2825, "step": 23277 }, { "epoch": 1.0904576755515998, "grad_norm": 0.5709176745743394, "learning_rate": 2.2556472280005695e-06, "loss": 0.2787, "step": 23278 }, { "epoch": 1.090504520541528, "grad_norm": 0.612619438827832, "learning_rate": 2.2554584866231895e-06, "loss": 0.2722, "step": 23279 }, { "epoch": 1.0905513655314565, "grad_norm": 0.5556289900696939, "learning_rate": 2.2552697466530827e-06, "loss": 0.2733, "step": 23280 }, { "epoch": 1.0905982105213847, "grad_norm": 0.6076058089446565, "learning_rate": 2.255081008091338e-06, "loss": 0.2806, "step": 23281 }, { "epoch": 1.090645055511313, "grad_norm": 0.5723407214156851, "learning_rate": 2.2548922709390397e-06, "loss": 0.2828, "step": 23282 }, { "epoch": 1.0906919005012414, "grad_norm": 0.5552434543704765, "learning_rate": 2.2547035351972752e-06, "loss": 0.2676, "step": 23283 }, { "epoch": 1.0907387454911697, "grad_norm": 0.556144932035873, "learning_rate": 2.25451480086713e-06, "loss": 0.2645, "step": 23284 }, { "epoch": 1.0907855904810981, "grad_norm": 0.587191470407383, "learning_rate": 2.2543260679496916e-06, "loss": 0.2779, "step": 23285 }, { "epoch": 1.0908324354710264, "grad_norm": 0.5579964147586837, "learning_rate": 2.254137336446044e-06, "loss": 0.273, "step": 23286 }, { "epoch": 1.0908792804609546, "grad_norm": 0.5652948602922195, "learning_rate": 2.253948606357274e-06, "loss": 0.2791, "step": 23287 }, { "epoch": 1.090926125450883, "grad_norm": 0.6400091229814359, "learning_rate": 2.2537598776844682e-06, "loss": 0.2999, "step": 23288 }, { "epoch": 1.0909729704408113, "grad_norm": 0.6116600009220226, "learning_rate": 2.253571150428712e-06, "loss": 0.2738, "step": 23289 }, { "epoch": 1.0910198154307398, "grad_norm": 0.5925975360430306, "learning_rate": 2.2533824245910923e-06, "loss": 0.2898, "step": 23290 }, { "epoch": 1.091066660420668, "grad_norm": 0.5768071141322408, "learning_rate": 2.253193700172695e-06, "loss": 0.285, "step": 23291 }, { "epoch": 1.0911135054105963, "grad_norm": 0.6215142637807263, "learning_rate": 2.253004977174606e-06, "loss": 0.2901, "step": 23292 }, { "epoch": 1.0911603504005247, "grad_norm": 0.5944422229271923, "learning_rate": 2.252816255597911e-06, "loss": 0.2757, "step": 23293 }, { "epoch": 1.091207195390453, "grad_norm": 0.5985661911258224, "learning_rate": 2.2526275354436956e-06, "loss": 0.285, "step": 23294 }, { "epoch": 1.0912540403803814, "grad_norm": 0.5833292859885979, "learning_rate": 2.252438816713047e-06, "loss": 0.2733, "step": 23295 }, { "epoch": 1.0913008853703097, "grad_norm": 0.636809276553994, "learning_rate": 2.2522500994070514e-06, "loss": 0.2652, "step": 23296 }, { "epoch": 1.091347730360238, "grad_norm": 0.595525188402677, "learning_rate": 2.252061383526794e-06, "loss": 0.2837, "step": 23297 }, { "epoch": 1.0913945753501664, "grad_norm": 0.560594557044064, "learning_rate": 2.251872669073361e-06, "loss": 0.2706, "step": 23298 }, { "epoch": 1.0914414203400946, "grad_norm": 0.6125236645097077, "learning_rate": 2.251683956047838e-06, "loss": 0.2939, "step": 23299 }, { "epoch": 1.0914882653300229, "grad_norm": 0.5834086349500346, "learning_rate": 2.2514952444513115e-06, "loss": 0.2699, "step": 23300 }, { "epoch": 1.0915351103199513, "grad_norm": 0.6100906395737162, "learning_rate": 2.2513065342848675e-06, "loss": 0.2926, "step": 23301 }, { "epoch": 1.0915819553098796, "grad_norm": 0.6122211033645646, "learning_rate": 2.2511178255495924e-06, "loss": 0.2873, "step": 23302 }, { "epoch": 1.091628800299808, "grad_norm": 0.6108698701767535, "learning_rate": 2.2509291182465716e-06, "loss": 0.2865, "step": 23303 }, { "epoch": 1.0916756452897363, "grad_norm": 0.5908130049387634, "learning_rate": 2.2507404123768907e-06, "loss": 0.2775, "step": 23304 }, { "epoch": 1.0917224902796645, "grad_norm": 0.5665904968130117, "learning_rate": 2.2505517079416355e-06, "loss": 0.2677, "step": 23305 }, { "epoch": 1.091769335269593, "grad_norm": 0.5976023850251372, "learning_rate": 2.2503630049418936e-06, "loss": 0.2849, "step": 23306 }, { "epoch": 1.0918161802595212, "grad_norm": 0.6184484527084833, "learning_rate": 2.2501743033787492e-06, "loss": 0.2858, "step": 23307 }, { "epoch": 1.0918630252494497, "grad_norm": 0.5622252182643414, "learning_rate": 2.2499856032532887e-06, "loss": 0.2719, "step": 23308 }, { "epoch": 1.091909870239378, "grad_norm": 0.5913861542186314, "learning_rate": 2.2497969045665992e-06, "loss": 0.271, "step": 23309 }, { "epoch": 1.0919567152293062, "grad_norm": 0.5662969663115792, "learning_rate": 2.2496082073197658e-06, "loss": 0.264, "step": 23310 }, { "epoch": 1.0920035602192346, "grad_norm": 0.5990390865658816, "learning_rate": 2.249419511513873e-06, "loss": 0.2753, "step": 23311 }, { "epoch": 1.0920504052091629, "grad_norm": 0.5679782719223763, "learning_rate": 2.249230817150008e-06, "loss": 0.2608, "step": 23312 }, { "epoch": 1.092097250199091, "grad_norm": 0.6330009911211547, "learning_rate": 2.249042124229257e-06, "loss": 0.2833, "step": 23313 }, { "epoch": 1.0921440951890196, "grad_norm": 0.5993225039477821, "learning_rate": 2.2488534327527055e-06, "loss": 0.2733, "step": 23314 }, { "epoch": 1.0921909401789478, "grad_norm": 0.5911140695541284, "learning_rate": 2.24866474272144e-06, "loss": 0.2694, "step": 23315 }, { "epoch": 1.0922377851688763, "grad_norm": 0.6108100308368593, "learning_rate": 2.248476054136545e-06, "loss": 0.2838, "step": 23316 }, { "epoch": 1.0922846301588045, "grad_norm": 0.5537552988366115, "learning_rate": 2.2482873669991074e-06, "loss": 0.2627, "step": 23317 }, { "epoch": 1.0923314751487327, "grad_norm": 0.597964065431003, "learning_rate": 2.248098681310212e-06, "loss": 0.2722, "step": 23318 }, { "epoch": 1.0923783201386612, "grad_norm": 0.596385996789428, "learning_rate": 2.2479099970709465e-06, "loss": 0.2681, "step": 23319 }, { "epoch": 1.0924251651285894, "grad_norm": 0.6113090285980785, "learning_rate": 2.2477213142823946e-06, "loss": 0.2891, "step": 23320 }, { "epoch": 1.092472010118518, "grad_norm": 0.5719633630944039, "learning_rate": 2.247532632945643e-06, "loss": 0.2558, "step": 23321 }, { "epoch": 1.0925188551084462, "grad_norm": 0.646819284914732, "learning_rate": 2.2473439530617794e-06, "loss": 0.2789, "step": 23322 }, { "epoch": 1.0925657000983744, "grad_norm": 0.5818668485904563, "learning_rate": 2.2471552746318866e-06, "loss": 0.2702, "step": 23323 }, { "epoch": 1.0926125450883029, "grad_norm": 0.5741166323315208, "learning_rate": 2.2469665976570516e-06, "loss": 0.2733, "step": 23324 }, { "epoch": 1.092659390078231, "grad_norm": 0.5852277095890734, "learning_rate": 2.2467779221383603e-06, "loss": 0.2669, "step": 23325 }, { "epoch": 1.0927062350681596, "grad_norm": 0.5648007171572084, "learning_rate": 2.2465892480768985e-06, "loss": 0.2824, "step": 23326 }, { "epoch": 1.0927530800580878, "grad_norm": 0.5680678055290185, "learning_rate": 2.2464005754737513e-06, "loss": 0.2684, "step": 23327 }, { "epoch": 1.092799925048016, "grad_norm": 0.5422484665885388, "learning_rate": 2.2462119043300063e-06, "loss": 0.26, "step": 23328 }, { "epoch": 1.0928467700379445, "grad_norm": 0.5351695077395979, "learning_rate": 2.2460232346467473e-06, "loss": 0.2583, "step": 23329 }, { "epoch": 1.0928936150278727, "grad_norm": 0.5806422034052698, "learning_rate": 2.245834566425061e-06, "loss": 0.2772, "step": 23330 }, { "epoch": 1.0929404600178012, "grad_norm": 0.564548943852026, "learning_rate": 2.245645899666032e-06, "loss": 0.2679, "step": 23331 }, { "epoch": 1.0929873050077294, "grad_norm": 0.6045467836686695, "learning_rate": 2.245457234370747e-06, "loss": 0.2883, "step": 23332 }, { "epoch": 1.0930341499976577, "grad_norm": 0.6777209471330194, "learning_rate": 2.2452685705402927e-06, "loss": 0.2877, "step": 23333 }, { "epoch": 1.0930809949875862, "grad_norm": 0.6230374863733544, "learning_rate": 2.245079908175754e-06, "loss": 0.2719, "step": 23334 }, { "epoch": 1.0931278399775144, "grad_norm": 0.5948943639370593, "learning_rate": 2.2448912472782156e-06, "loss": 0.2715, "step": 23335 }, { "epoch": 1.0931746849674426, "grad_norm": 0.6721619089337338, "learning_rate": 2.2447025878487635e-06, "loss": 0.2973, "step": 23336 }, { "epoch": 1.093221529957371, "grad_norm": 0.593962339763352, "learning_rate": 2.2445139298884845e-06, "loss": 0.2717, "step": 23337 }, { "epoch": 1.0932683749472993, "grad_norm": 0.6296630029967342, "learning_rate": 2.244325273398463e-06, "loss": 0.2949, "step": 23338 }, { "epoch": 1.0933152199372278, "grad_norm": 0.608243134266549, "learning_rate": 2.2441366183797853e-06, "loss": 0.2703, "step": 23339 }, { "epoch": 1.093362064927156, "grad_norm": 0.6248896118091664, "learning_rate": 2.243947964833538e-06, "loss": 0.2761, "step": 23340 }, { "epoch": 1.0934089099170843, "grad_norm": 0.5511302453870903, "learning_rate": 2.243759312760806e-06, "loss": 0.2633, "step": 23341 }, { "epoch": 1.0934557549070127, "grad_norm": 0.6106617634581181, "learning_rate": 2.2435706621626736e-06, "loss": 0.2866, "step": 23342 }, { "epoch": 1.093502599896941, "grad_norm": 0.536591061089686, "learning_rate": 2.2433820130402276e-06, "loss": 0.2787, "step": 23343 }, { "epoch": 1.0935494448868694, "grad_norm": 0.637424770523656, "learning_rate": 2.2431933653945542e-06, "loss": 0.269, "step": 23344 }, { "epoch": 1.0935962898767977, "grad_norm": 0.6699229550683986, "learning_rate": 2.243004719226738e-06, "loss": 0.2867, "step": 23345 }, { "epoch": 1.093643134866726, "grad_norm": 0.6429632614376668, "learning_rate": 2.242816074537866e-06, "loss": 0.285, "step": 23346 }, { "epoch": 1.0936899798566544, "grad_norm": 0.6415343888465145, "learning_rate": 2.242627431329022e-06, "loss": 0.2866, "step": 23347 }, { "epoch": 1.0937368248465826, "grad_norm": 0.6206510504852488, "learning_rate": 2.242438789601293e-06, "loss": 0.2732, "step": 23348 }, { "epoch": 1.0937836698365109, "grad_norm": 0.5714084615878386, "learning_rate": 2.242250149355763e-06, "loss": 0.2825, "step": 23349 }, { "epoch": 1.0938305148264393, "grad_norm": 0.6015949237473541, "learning_rate": 2.242061510593519e-06, "loss": 0.2764, "step": 23350 }, { "epoch": 1.0938773598163676, "grad_norm": 0.5951733771974599, "learning_rate": 2.2418728733156467e-06, "loss": 0.2796, "step": 23351 }, { "epoch": 1.093924204806296, "grad_norm": 0.6094916962596709, "learning_rate": 2.2416842375232317e-06, "loss": 0.286, "step": 23352 }, { "epoch": 1.0939710497962243, "grad_norm": 0.5785535858398062, "learning_rate": 2.241495603217358e-06, "loss": 0.276, "step": 23353 }, { "epoch": 1.0940178947861525, "grad_norm": 0.6128423034624041, "learning_rate": 2.2413069703991115e-06, "loss": 0.2843, "step": 23354 }, { "epoch": 1.094064739776081, "grad_norm": 0.6037852308534262, "learning_rate": 2.2411183390695797e-06, "loss": 0.2872, "step": 23355 }, { "epoch": 1.0941115847660092, "grad_norm": 0.5893143633821994, "learning_rate": 2.240929709229846e-06, "loss": 0.2628, "step": 23356 }, { "epoch": 1.0941584297559377, "grad_norm": 0.5914984105964101, "learning_rate": 2.2407410808809968e-06, "loss": 0.2765, "step": 23357 }, { "epoch": 1.094205274745866, "grad_norm": 0.6204004688254493, "learning_rate": 2.240552454024118e-06, "loss": 0.2715, "step": 23358 }, { "epoch": 1.0942521197357942, "grad_norm": 0.6073944138287845, "learning_rate": 2.240363828660294e-06, "loss": 0.3141, "step": 23359 }, { "epoch": 1.0942989647257226, "grad_norm": 0.5795015499544665, "learning_rate": 2.2401752047906112e-06, "loss": 0.2746, "step": 23360 }, { "epoch": 1.0943458097156509, "grad_norm": 0.6060466948103775, "learning_rate": 2.2399865824161553e-06, "loss": 0.2818, "step": 23361 }, { "epoch": 1.0943926547055793, "grad_norm": 0.5701359944446384, "learning_rate": 2.23979796153801e-06, "loss": 0.2722, "step": 23362 }, { "epoch": 1.0944394996955076, "grad_norm": 0.5937925084042711, "learning_rate": 2.2396093421572626e-06, "loss": 0.2715, "step": 23363 }, { "epoch": 1.0944863446854358, "grad_norm": 0.5508637388062445, "learning_rate": 2.239420724274999e-06, "loss": 0.2628, "step": 23364 }, { "epoch": 1.0945331896753643, "grad_norm": 0.5367501223076899, "learning_rate": 2.239232107892303e-06, "loss": 0.2581, "step": 23365 }, { "epoch": 1.0945800346652925, "grad_norm": 0.6080688192505314, "learning_rate": 2.23904349301026e-06, "loss": 0.2875, "step": 23366 }, { "epoch": 1.094626879655221, "grad_norm": 0.6242982774858478, "learning_rate": 2.2388548796299564e-06, "loss": 0.2848, "step": 23367 }, { "epoch": 1.0946737246451492, "grad_norm": 0.6257591993284649, "learning_rate": 2.238666267752478e-06, "loss": 0.2875, "step": 23368 }, { "epoch": 1.0947205696350775, "grad_norm": 0.6026110255879751, "learning_rate": 2.2384776573789087e-06, "loss": 0.282, "step": 23369 }, { "epoch": 1.094767414625006, "grad_norm": 0.5486825733424154, "learning_rate": 2.2382890485103356e-06, "loss": 0.259, "step": 23370 }, { "epoch": 1.0948142596149342, "grad_norm": 0.6249947916700328, "learning_rate": 2.2381004411478424e-06, "loss": 0.2933, "step": 23371 }, { "epoch": 1.0948611046048624, "grad_norm": 0.6275704480109551, "learning_rate": 2.237911835292516e-06, "loss": 0.2802, "step": 23372 }, { "epoch": 1.0949079495947909, "grad_norm": 0.6097337095894572, "learning_rate": 2.2377232309454404e-06, "loss": 0.2841, "step": 23373 }, { "epoch": 1.0949547945847191, "grad_norm": 0.5795214147941877, "learning_rate": 2.237534628107702e-06, "loss": 0.2625, "step": 23374 }, { "epoch": 1.0950016395746476, "grad_norm": 0.5788275693892809, "learning_rate": 2.237346026780386e-06, "loss": 0.2655, "step": 23375 }, { "epoch": 1.0950484845645758, "grad_norm": 0.5620517702328047, "learning_rate": 2.2371574269645786e-06, "loss": 0.2662, "step": 23376 }, { "epoch": 1.095095329554504, "grad_norm": 0.596781034769136, "learning_rate": 2.2369688286613624e-06, "loss": 0.2783, "step": 23377 }, { "epoch": 1.0951421745444325, "grad_norm": 0.5116413138299225, "learning_rate": 2.236780231871825e-06, "loss": 0.26, "step": 23378 }, { "epoch": 1.0951890195343608, "grad_norm": 0.6044411655606302, "learning_rate": 2.2365916365970516e-06, "loss": 0.2912, "step": 23379 }, { "epoch": 1.0952358645242892, "grad_norm": 0.5662646054131406, "learning_rate": 2.2364030428381265e-06, "loss": 0.2713, "step": 23380 }, { "epoch": 1.0952827095142175, "grad_norm": 0.573888657372331, "learning_rate": 2.236214450596136e-06, "loss": 0.2698, "step": 23381 }, { "epoch": 1.0953295545041457, "grad_norm": 0.5589829711797426, "learning_rate": 2.236025859872166e-06, "loss": 0.2586, "step": 23382 }, { "epoch": 1.0953763994940742, "grad_norm": 0.6437676768217242, "learning_rate": 2.2358372706673e-06, "loss": 0.2796, "step": 23383 }, { "epoch": 1.0954232444840024, "grad_norm": 0.5662586928409309, "learning_rate": 2.2356486829826235e-06, "loss": 0.2679, "step": 23384 }, { "epoch": 1.0954700894739307, "grad_norm": 0.6115179664976512, "learning_rate": 2.2354600968192224e-06, "loss": 0.2881, "step": 23385 }, { "epoch": 1.0955169344638591, "grad_norm": 0.6005131875438223, "learning_rate": 2.2352715121781827e-06, "loss": 0.2824, "step": 23386 }, { "epoch": 1.0955637794537874, "grad_norm": 0.5743545630479286, "learning_rate": 2.2350829290605885e-06, "loss": 0.265, "step": 23387 }, { "epoch": 1.0956106244437158, "grad_norm": 0.6130498537633153, "learning_rate": 2.2348943474675253e-06, "loss": 0.2876, "step": 23388 }, { "epoch": 1.095657469433644, "grad_norm": 0.5635826533440671, "learning_rate": 2.2347057674000793e-06, "loss": 0.287, "step": 23389 }, { "epoch": 1.0957043144235723, "grad_norm": 0.5723779435907074, "learning_rate": 2.234517188859335e-06, "loss": 0.2643, "step": 23390 }, { "epoch": 1.0957511594135008, "grad_norm": 0.5812793492692275, "learning_rate": 2.2343286118463766e-06, "loss": 0.2552, "step": 23391 }, { "epoch": 1.095798004403429, "grad_norm": 0.572299917076993, "learning_rate": 2.2341400363622903e-06, "loss": 0.2663, "step": 23392 }, { "epoch": 1.0958448493933575, "grad_norm": 0.5632282170040576, "learning_rate": 2.233951462408162e-06, "loss": 0.2693, "step": 23393 }, { "epoch": 1.0958916943832857, "grad_norm": 0.6277633836332397, "learning_rate": 2.2337628899850752e-06, "loss": 0.2727, "step": 23394 }, { "epoch": 1.095938539373214, "grad_norm": 0.6140808731416875, "learning_rate": 2.2335743190941174e-06, "loss": 0.2718, "step": 23395 }, { "epoch": 1.0959853843631424, "grad_norm": 0.6290965695279382, "learning_rate": 2.2333857497363714e-06, "loss": 0.2925, "step": 23396 }, { "epoch": 1.0960322293530707, "grad_norm": 0.6207993817059869, "learning_rate": 2.233197181912924e-06, "loss": 0.2698, "step": 23397 }, { "epoch": 1.0960790743429991, "grad_norm": 0.572094337397942, "learning_rate": 2.2330086156248592e-06, "loss": 0.2579, "step": 23398 }, { "epoch": 1.0961259193329274, "grad_norm": 0.5481193409399355, "learning_rate": 2.2328200508732627e-06, "loss": 0.2715, "step": 23399 }, { "epoch": 1.0961727643228556, "grad_norm": 0.5926659387757477, "learning_rate": 2.23263148765922e-06, "loss": 0.2631, "step": 23400 }, { "epoch": 1.096219609312784, "grad_norm": 0.5888537010745734, "learning_rate": 2.2324429259838167e-06, "loss": 0.2593, "step": 23401 }, { "epoch": 1.0962664543027123, "grad_norm": 0.5825186358517143, "learning_rate": 2.232254365848136e-06, "loss": 0.268, "step": 23402 }, { "epoch": 1.0963132992926408, "grad_norm": 0.5796628784420053, "learning_rate": 2.232065807253264e-06, "loss": 0.2785, "step": 23403 }, { "epoch": 1.096360144282569, "grad_norm": 0.5863590700548408, "learning_rate": 2.231877250200286e-06, "loss": 0.2759, "step": 23404 }, { "epoch": 1.0964069892724972, "grad_norm": 0.5623501997708118, "learning_rate": 2.231688694690287e-06, "loss": 0.2553, "step": 23405 }, { "epoch": 1.0964538342624257, "grad_norm": 0.5580124816667494, "learning_rate": 2.231500140724353e-06, "loss": 0.2563, "step": 23406 }, { "epoch": 1.096500679252354, "grad_norm": 0.5658182779453127, "learning_rate": 2.2313115883035675e-06, "loss": 0.2708, "step": 23407 }, { "epoch": 1.0965475242422822, "grad_norm": 0.5870121492909459, "learning_rate": 2.231123037429017e-06, "loss": 0.2652, "step": 23408 }, { "epoch": 1.0965943692322107, "grad_norm": 0.6125710445061388, "learning_rate": 2.230934488101785e-06, "loss": 0.2815, "step": 23409 }, { "epoch": 1.096641214222139, "grad_norm": 0.6218317344228292, "learning_rate": 2.230745940322958e-06, "loss": 0.2936, "step": 23410 }, { "epoch": 1.0966880592120674, "grad_norm": 0.5888740586214471, "learning_rate": 2.2305573940936197e-06, "loss": 0.2744, "step": 23411 }, { "epoch": 1.0967349042019956, "grad_norm": 0.6197520399185231, "learning_rate": 2.230368849414856e-06, "loss": 0.2935, "step": 23412 }, { "epoch": 1.0967817491919238, "grad_norm": 0.5456733657718588, "learning_rate": 2.2301803062877525e-06, "loss": 0.2682, "step": 23413 }, { "epoch": 1.0968285941818523, "grad_norm": 0.5934303118541078, "learning_rate": 2.2299917647133936e-06, "loss": 0.2809, "step": 23414 }, { "epoch": 1.0968754391717805, "grad_norm": 0.6051218741507297, "learning_rate": 2.229803224692863e-06, "loss": 0.2836, "step": 23415 }, { "epoch": 1.096922284161709, "grad_norm": 0.5652852241812854, "learning_rate": 2.2296146862272474e-06, "loss": 0.2633, "step": 23416 }, { "epoch": 1.0969691291516372, "grad_norm": 0.5385596768863542, "learning_rate": 2.229426149317632e-06, "loss": 0.2533, "step": 23417 }, { "epoch": 1.0970159741415655, "grad_norm": 0.5587725041841631, "learning_rate": 2.2292376139651e-06, "loss": 0.2739, "step": 23418 }, { "epoch": 1.097062819131494, "grad_norm": 0.5836546184519115, "learning_rate": 2.2290490801707393e-06, "loss": 0.2737, "step": 23419 }, { "epoch": 1.0971096641214222, "grad_norm": 0.5437756589566882, "learning_rate": 2.2288605479356314e-06, "loss": 0.2708, "step": 23420 }, { "epoch": 1.0971565091113504, "grad_norm": 0.5783092670946852, "learning_rate": 2.2286720172608637e-06, "loss": 0.265, "step": 23421 }, { "epoch": 1.097203354101279, "grad_norm": 0.5490199656314165, "learning_rate": 2.22848348814752e-06, "loss": 0.2808, "step": 23422 }, { "epoch": 1.0972501990912071, "grad_norm": 0.5718555367978909, "learning_rate": 2.228294960596685e-06, "loss": 0.2808, "step": 23423 }, { "epoch": 1.0972970440811356, "grad_norm": 0.5538453573918481, "learning_rate": 2.228106434609445e-06, "loss": 0.252, "step": 23424 }, { "epoch": 1.0973438890710638, "grad_norm": 0.5525514032130364, "learning_rate": 2.227917910186885e-06, "loss": 0.2632, "step": 23425 }, { "epoch": 1.097390734060992, "grad_norm": 0.5526641513786175, "learning_rate": 2.2277293873300877e-06, "loss": 0.2785, "step": 23426 }, { "epoch": 1.0974375790509205, "grad_norm": 0.6217366772287731, "learning_rate": 2.227540866040139e-06, "loss": 0.2638, "step": 23427 }, { "epoch": 1.0974844240408488, "grad_norm": 0.5655886839632057, "learning_rate": 2.227352346318125e-06, "loss": 0.2685, "step": 23428 }, { "epoch": 1.0975312690307772, "grad_norm": 0.5931416685592072, "learning_rate": 2.227163828165129e-06, "loss": 0.2918, "step": 23429 }, { "epoch": 1.0975781140207055, "grad_norm": 0.6291098105319162, "learning_rate": 2.2269753115822373e-06, "loss": 0.2782, "step": 23430 }, { "epoch": 1.0976249590106337, "grad_norm": 0.5676405778268794, "learning_rate": 2.2267867965705343e-06, "loss": 0.2792, "step": 23431 }, { "epoch": 1.0976718040005622, "grad_norm": 0.5542034433510545, "learning_rate": 2.2265982831311044e-06, "loss": 0.2688, "step": 23432 }, { "epoch": 1.0977186489904904, "grad_norm": 0.6312595904555173, "learning_rate": 2.226409771265032e-06, "loss": 0.2981, "step": 23433 }, { "epoch": 1.097765493980419, "grad_norm": 0.6015947449715058, "learning_rate": 2.226221260973403e-06, "loss": 0.2879, "step": 23434 }, { "epoch": 1.0978123389703471, "grad_norm": 0.5876674477612623, "learning_rate": 2.226032752257302e-06, "loss": 0.2881, "step": 23435 }, { "epoch": 1.0978591839602754, "grad_norm": 0.5754871925662587, "learning_rate": 2.225844245117813e-06, "loss": 0.2768, "step": 23436 }, { "epoch": 1.0979060289502038, "grad_norm": 0.5962133008516396, "learning_rate": 2.225655739556023e-06, "loss": 0.2747, "step": 23437 }, { "epoch": 1.097952873940132, "grad_norm": 0.6192524792392972, "learning_rate": 2.225467235573014e-06, "loss": 0.2909, "step": 23438 }, { "epoch": 1.0979997189300605, "grad_norm": 0.5554108697904779, "learning_rate": 2.2252787331698724e-06, "loss": 0.2759, "step": 23439 }, { "epoch": 1.0980465639199888, "grad_norm": 0.6300454832573096, "learning_rate": 2.2250902323476823e-06, "loss": 0.289, "step": 23440 }, { "epoch": 1.098093408909917, "grad_norm": 0.5958898925289007, "learning_rate": 2.2249017331075286e-06, "loss": 0.2634, "step": 23441 }, { "epoch": 1.0981402538998455, "grad_norm": 0.555322705484872, "learning_rate": 2.224713235450497e-06, "loss": 0.266, "step": 23442 }, { "epoch": 1.0981870988897737, "grad_norm": 0.5571129422509883, "learning_rate": 2.224524739377672e-06, "loss": 0.2779, "step": 23443 }, { "epoch": 1.098233943879702, "grad_norm": 0.5723050635972097, "learning_rate": 2.224336244890137e-06, "loss": 0.2572, "step": 23444 }, { "epoch": 1.0982807888696304, "grad_norm": 0.6005531618949355, "learning_rate": 2.224147751988977e-06, "loss": 0.2793, "step": 23445 }, { "epoch": 1.0983276338595587, "grad_norm": 0.5442088133241345, "learning_rate": 2.223959260675279e-06, "loss": 0.2551, "step": 23446 }, { "epoch": 1.0983744788494871, "grad_norm": 0.5705368321069637, "learning_rate": 2.2237707709501247e-06, "loss": 0.2686, "step": 23447 }, { "epoch": 1.0984213238394154, "grad_norm": 0.5572803464374874, "learning_rate": 2.2235822828146e-06, "loss": 0.2753, "step": 23448 }, { "epoch": 1.0984681688293436, "grad_norm": 0.5891014233598709, "learning_rate": 2.2233937962697915e-06, "loss": 0.284, "step": 23449 }, { "epoch": 1.098515013819272, "grad_norm": 0.5664947726259898, "learning_rate": 2.223205311316782e-06, "loss": 0.2689, "step": 23450 }, { "epoch": 1.0985618588092003, "grad_norm": 0.5431012043567637, "learning_rate": 2.223016827956655e-06, "loss": 0.265, "step": 23451 }, { "epoch": 1.0986087037991288, "grad_norm": 0.5921350250228609, "learning_rate": 2.2228283461904963e-06, "loss": 0.292, "step": 23452 }, { "epoch": 1.098655548789057, "grad_norm": 0.5530562258569385, "learning_rate": 2.222639866019392e-06, "loss": 0.2697, "step": 23453 }, { "epoch": 1.0987023937789853, "grad_norm": 0.5987602633489486, "learning_rate": 2.222451387444425e-06, "loss": 0.2788, "step": 23454 }, { "epoch": 1.0987492387689137, "grad_norm": 0.5149958955693099, "learning_rate": 2.2222629104666807e-06, "loss": 0.2531, "step": 23455 }, { "epoch": 1.098796083758842, "grad_norm": 0.6032285831388803, "learning_rate": 2.222074435087244e-06, "loss": 0.2822, "step": 23456 }, { "epoch": 1.0988429287487702, "grad_norm": 0.616882898640865, "learning_rate": 2.2218859613071986e-06, "loss": 0.2875, "step": 23457 }, { "epoch": 1.0988897737386987, "grad_norm": 0.6006102772018442, "learning_rate": 2.221697489127629e-06, "loss": 0.2768, "step": 23458 }, { "epoch": 1.098936618728627, "grad_norm": 0.5737011087723616, "learning_rate": 2.221509018549621e-06, "loss": 0.2716, "step": 23459 }, { "epoch": 1.0989834637185554, "grad_norm": 0.6042120338688872, "learning_rate": 2.221320549574259e-06, "loss": 0.287, "step": 23460 }, { "epoch": 1.0990303087084836, "grad_norm": 0.591454781947168, "learning_rate": 2.2211320822026263e-06, "loss": 0.2764, "step": 23461 }, { "epoch": 1.0990771536984119, "grad_norm": 0.610737807685174, "learning_rate": 2.2209436164358095e-06, "loss": 0.2764, "step": 23462 }, { "epoch": 1.0991239986883403, "grad_norm": 0.5943433697645355, "learning_rate": 2.2207551522748918e-06, "loss": 0.2658, "step": 23463 }, { "epoch": 1.0991708436782686, "grad_norm": 0.575139438962364, "learning_rate": 2.2205666897209573e-06, "loss": 0.2734, "step": 23464 }, { "epoch": 1.099217688668197, "grad_norm": 0.5423840649870939, "learning_rate": 2.220378228775091e-06, "loss": 0.261, "step": 23465 }, { "epoch": 1.0992645336581253, "grad_norm": 0.5808486945282431, "learning_rate": 2.2201897694383785e-06, "loss": 0.2872, "step": 23466 }, { "epoch": 1.0993113786480535, "grad_norm": 0.5811694713436867, "learning_rate": 2.220001311711903e-06, "loss": 0.2764, "step": 23467 }, { "epoch": 1.099358223637982, "grad_norm": 0.6133415982186115, "learning_rate": 2.219812855596751e-06, "loss": 0.2823, "step": 23468 }, { "epoch": 1.0994050686279102, "grad_norm": 0.6133747543632401, "learning_rate": 2.219624401094004e-06, "loss": 0.2862, "step": 23469 }, { "epoch": 1.0994519136178387, "grad_norm": 0.5872783064407662, "learning_rate": 2.219435948204749e-06, "loss": 0.2789, "step": 23470 }, { "epoch": 1.099498758607767, "grad_norm": 0.5412067803142298, "learning_rate": 2.2192474969300682e-06, "loss": 0.2588, "step": 23471 }, { "epoch": 1.0995456035976952, "grad_norm": 0.5820574994897503, "learning_rate": 2.2190590472710487e-06, "loss": 0.2747, "step": 23472 }, { "epoch": 1.0995924485876236, "grad_norm": 0.5613081007080313, "learning_rate": 2.2188705992287737e-06, "loss": 0.278, "step": 23473 }, { "epoch": 1.0996392935775519, "grad_norm": 0.5666283635883619, "learning_rate": 2.218682152804328e-06, "loss": 0.2682, "step": 23474 }, { "epoch": 1.0996861385674803, "grad_norm": 0.6667133120780898, "learning_rate": 2.218493707998795e-06, "loss": 0.2983, "step": 23475 }, { "epoch": 1.0997329835574086, "grad_norm": 0.5595732946379394, "learning_rate": 2.21830526481326e-06, "loss": 0.2828, "step": 23476 }, { "epoch": 1.0997798285473368, "grad_norm": 0.5776446965511275, "learning_rate": 2.218116823248808e-06, "loss": 0.2742, "step": 23477 }, { "epoch": 1.0998266735372653, "grad_norm": 0.5894068384497636, "learning_rate": 2.2179283833065227e-06, "loss": 0.2813, "step": 23478 }, { "epoch": 1.0998735185271935, "grad_norm": 0.5998174701547511, "learning_rate": 2.217739944987488e-06, "loss": 0.2822, "step": 23479 }, { "epoch": 1.0999203635171217, "grad_norm": 0.6380691930457422, "learning_rate": 2.2175515082927905e-06, "loss": 0.2865, "step": 23480 }, { "epoch": 1.0999672085070502, "grad_norm": 0.5887607539594798, "learning_rate": 2.217363073223512e-06, "loss": 0.2779, "step": 23481 }, { "epoch": 1.1000140534969785, "grad_norm": 0.5484434488931028, "learning_rate": 2.217174639780738e-06, "loss": 0.272, "step": 23482 }, { "epoch": 1.100060898486907, "grad_norm": 0.5965739979393118, "learning_rate": 2.2169862079655525e-06, "loss": 0.2762, "step": 23483 }, { "epoch": 1.1001077434768352, "grad_norm": 0.6175162016029895, "learning_rate": 2.216797777779041e-06, "loss": 0.2866, "step": 23484 }, { "epoch": 1.1001545884667634, "grad_norm": 0.5669254791522766, "learning_rate": 2.2166093492222864e-06, "loss": 0.2642, "step": 23485 }, { "epoch": 1.1002014334566919, "grad_norm": 0.6283646706079555, "learning_rate": 2.216420922296375e-06, "loss": 0.2829, "step": 23486 }, { "epoch": 1.10024827844662, "grad_norm": 0.603423218924244, "learning_rate": 2.2162324970023887e-06, "loss": 0.2781, "step": 23487 }, { "epoch": 1.1002951234365486, "grad_norm": 0.6342518562464177, "learning_rate": 2.2160440733414137e-06, "loss": 0.2812, "step": 23488 }, { "epoch": 1.1003419684264768, "grad_norm": 0.5885593907509029, "learning_rate": 2.2158556513145334e-06, "loss": 0.2767, "step": 23489 }, { "epoch": 1.100388813416405, "grad_norm": 0.572838504313927, "learning_rate": 2.215667230922832e-06, "loss": 0.2646, "step": 23490 }, { "epoch": 1.1004356584063335, "grad_norm": 0.6008259502971413, "learning_rate": 2.215478812167395e-06, "loss": 0.2638, "step": 23491 }, { "epoch": 1.1004825033962617, "grad_norm": 0.6438489311435875, "learning_rate": 2.215290395049306e-06, "loss": 0.2871, "step": 23492 }, { "epoch": 1.10052934838619, "grad_norm": 0.580072830728198, "learning_rate": 2.2151019795696486e-06, "loss": 0.2723, "step": 23493 }, { "epoch": 1.1005761933761185, "grad_norm": 0.570544316016848, "learning_rate": 2.2149135657295077e-06, "loss": 0.2629, "step": 23494 }, { "epoch": 1.1006230383660467, "grad_norm": 0.5795011220240522, "learning_rate": 2.214725153529968e-06, "loss": 0.2782, "step": 23495 }, { "epoch": 1.1006698833559752, "grad_norm": 0.5854714573927722, "learning_rate": 2.214536742972113e-06, "loss": 0.2765, "step": 23496 }, { "epoch": 1.1007167283459034, "grad_norm": 0.5883964435123333, "learning_rate": 2.2143483340570273e-06, "loss": 0.303, "step": 23497 }, { "epoch": 1.1007635733358316, "grad_norm": 0.5990735372299519, "learning_rate": 2.2141599267857954e-06, "loss": 0.2709, "step": 23498 }, { "epoch": 1.10081041832576, "grad_norm": 0.6079470061208707, "learning_rate": 2.2139715211595016e-06, "loss": 0.2736, "step": 23499 }, { "epoch": 1.1008572633156883, "grad_norm": 0.6486680674578352, "learning_rate": 2.2137831171792287e-06, "loss": 0.309, "step": 23500 }, { "epoch": 1.1009041083056168, "grad_norm": 0.5874338104031741, "learning_rate": 2.213594714846063e-06, "loss": 0.2803, "step": 23501 }, { "epoch": 1.100950953295545, "grad_norm": 0.6271866304081793, "learning_rate": 2.213406314161087e-06, "loss": 0.278, "step": 23502 }, { "epoch": 1.1009977982854733, "grad_norm": 0.6057317685567416, "learning_rate": 2.2132179151253856e-06, "loss": 0.2765, "step": 23503 }, { "epoch": 1.1010446432754017, "grad_norm": 0.5605071607717393, "learning_rate": 2.2130295177400443e-06, "loss": 0.2727, "step": 23504 }, { "epoch": 1.10109148826533, "grad_norm": 0.5824877459890557, "learning_rate": 2.2128411220061453e-06, "loss": 0.2656, "step": 23505 }, { "epoch": 1.1011383332552585, "grad_norm": 0.5993842667859891, "learning_rate": 2.212652727924773e-06, "loss": 0.2792, "step": 23506 }, { "epoch": 1.1011851782451867, "grad_norm": 0.6033534025946289, "learning_rate": 2.212464335497012e-06, "loss": 0.2728, "step": 23507 }, { "epoch": 1.101232023235115, "grad_norm": 0.6164487745390765, "learning_rate": 2.212275944723947e-06, "loss": 0.2987, "step": 23508 }, { "epoch": 1.1012788682250434, "grad_norm": 0.5487621050278847, "learning_rate": 2.2120875556066613e-06, "loss": 0.2707, "step": 23509 }, { "epoch": 1.1013257132149716, "grad_norm": 0.5922184953517203, "learning_rate": 2.21189916814624e-06, "loss": 0.2863, "step": 23510 }, { "epoch": 1.1013725582049, "grad_norm": 0.5761362907190499, "learning_rate": 2.2117107823437656e-06, "loss": 0.2703, "step": 23511 }, { "epoch": 1.1014194031948283, "grad_norm": 0.6098968352637831, "learning_rate": 2.211522398200324e-06, "loss": 0.2729, "step": 23512 }, { "epoch": 1.1014662481847566, "grad_norm": 0.6368263875319724, "learning_rate": 2.211334015716998e-06, "loss": 0.2894, "step": 23513 }, { "epoch": 1.101513093174685, "grad_norm": 0.6146426353739589, "learning_rate": 2.211145634894872e-06, "loss": 0.2821, "step": 23514 }, { "epoch": 1.1015599381646133, "grad_norm": 0.563694191490243, "learning_rate": 2.2109572557350308e-06, "loss": 0.2668, "step": 23515 }, { "epoch": 1.1016067831545415, "grad_norm": 0.6185149629176138, "learning_rate": 2.210768878238559e-06, "loss": 0.2866, "step": 23516 }, { "epoch": 1.10165362814447, "grad_norm": 0.5567406335076142, "learning_rate": 2.2105805024065375e-06, "loss": 0.28, "step": 23517 }, { "epoch": 1.1017004731343982, "grad_norm": 0.5866460234260675, "learning_rate": 2.2103921282400532e-06, "loss": 0.2681, "step": 23518 }, { "epoch": 1.1017473181243267, "grad_norm": 0.639697752369047, "learning_rate": 2.2102037557401897e-06, "loss": 0.2815, "step": 23519 }, { "epoch": 1.101794163114255, "grad_norm": 0.6246613840833649, "learning_rate": 2.2100153849080302e-06, "loss": 0.2963, "step": 23520 }, { "epoch": 1.1018410081041832, "grad_norm": 0.5382379287554371, "learning_rate": 2.2098270157446595e-06, "loss": 0.2517, "step": 23521 }, { "epoch": 1.1018878530941116, "grad_norm": 0.5804060879063432, "learning_rate": 2.209638648251162e-06, "loss": 0.2755, "step": 23522 }, { "epoch": 1.1019346980840399, "grad_norm": 0.5937178342909482, "learning_rate": 2.209450282428621e-06, "loss": 0.2805, "step": 23523 }, { "epoch": 1.1019815430739683, "grad_norm": 0.5800252482018221, "learning_rate": 2.2092619182781198e-06, "loss": 0.267, "step": 23524 }, { "epoch": 1.1020283880638966, "grad_norm": 0.5775186140369326, "learning_rate": 2.2090735558007435e-06, "loss": 0.2726, "step": 23525 }, { "epoch": 1.1020752330538248, "grad_norm": 0.6316416012756212, "learning_rate": 2.208885194997576e-06, "loss": 0.2857, "step": 23526 }, { "epoch": 1.1021220780437533, "grad_norm": 0.6061831938734068, "learning_rate": 2.208696835869701e-06, "loss": 0.2618, "step": 23527 }, { "epoch": 1.1021689230336815, "grad_norm": 0.6170467892878254, "learning_rate": 2.208508478418202e-06, "loss": 0.2758, "step": 23528 }, { "epoch": 1.1022157680236098, "grad_norm": 0.594511286637139, "learning_rate": 2.2083201226441644e-06, "loss": 0.2737, "step": 23529 }, { "epoch": 1.1022626130135382, "grad_norm": 0.6038377994962073, "learning_rate": 2.208131768548671e-06, "loss": 0.2652, "step": 23530 }, { "epoch": 1.1023094580034665, "grad_norm": 0.6367835681303686, "learning_rate": 2.207943416132805e-06, "loss": 0.2878, "step": 23531 }, { "epoch": 1.102356302993395, "grad_norm": 0.5558149317138527, "learning_rate": 2.2077550653976517e-06, "loss": 0.2742, "step": 23532 }, { "epoch": 1.1024031479833232, "grad_norm": 0.5754821231273898, "learning_rate": 2.207566716344295e-06, "loss": 0.2674, "step": 23533 }, { "epoch": 1.1024499929732514, "grad_norm": 0.6228295406819622, "learning_rate": 2.2073783689738174e-06, "loss": 0.298, "step": 23534 }, { "epoch": 1.1024968379631799, "grad_norm": 0.5835457497699075, "learning_rate": 2.207190023287305e-06, "loss": 0.2917, "step": 23535 }, { "epoch": 1.1025436829531081, "grad_norm": 0.578794476274086, "learning_rate": 2.2070016792858394e-06, "loss": 0.2949, "step": 23536 }, { "epoch": 1.1025905279430366, "grad_norm": 0.594337318859328, "learning_rate": 2.2068133369705065e-06, "loss": 0.2673, "step": 23537 }, { "epoch": 1.1026373729329648, "grad_norm": 0.6034740130882791, "learning_rate": 2.206624996342388e-06, "loss": 0.2777, "step": 23538 }, { "epoch": 1.102684217922893, "grad_norm": 0.5851895901069283, "learning_rate": 2.206436657402569e-06, "loss": 0.2802, "step": 23539 }, { "epoch": 1.1027310629128215, "grad_norm": 0.5846831131818425, "learning_rate": 2.206248320152134e-06, "loss": 0.2464, "step": 23540 }, { "epoch": 1.1027779079027498, "grad_norm": 0.6137803924759662, "learning_rate": 2.2060599845921667e-06, "loss": 0.2683, "step": 23541 }, { "epoch": 1.1028247528926782, "grad_norm": 0.5936589827565946, "learning_rate": 2.2058716507237488e-06, "loss": 0.2866, "step": 23542 }, { "epoch": 1.1028715978826065, "grad_norm": 0.5759878454808051, "learning_rate": 2.205683318547966e-06, "loss": 0.2623, "step": 23543 }, { "epoch": 1.1029184428725347, "grad_norm": 0.5586267466057371, "learning_rate": 2.205494988065902e-06, "loss": 0.2544, "step": 23544 }, { "epoch": 1.1029652878624632, "grad_norm": 0.6058879926748093, "learning_rate": 2.20530665927864e-06, "loss": 0.2707, "step": 23545 }, { "epoch": 1.1030121328523914, "grad_norm": 0.5986474526859165, "learning_rate": 2.2051183321872645e-06, "loss": 0.2745, "step": 23546 }, { "epoch": 1.1030589778423199, "grad_norm": 0.5830782288453857, "learning_rate": 2.204930006792859e-06, "loss": 0.2818, "step": 23547 }, { "epoch": 1.1031058228322481, "grad_norm": 0.5949513336649862, "learning_rate": 2.2047416830965066e-06, "loss": 0.2807, "step": 23548 }, { "epoch": 1.1031526678221764, "grad_norm": 0.5858745989774559, "learning_rate": 2.204553361099292e-06, "loss": 0.2846, "step": 23549 }, { "epoch": 1.1031995128121048, "grad_norm": 0.5715270693775197, "learning_rate": 2.2043650408022984e-06, "loss": 0.2684, "step": 23550 }, { "epoch": 1.103246357802033, "grad_norm": 0.5202258243912928, "learning_rate": 2.2041767222066096e-06, "loss": 0.259, "step": 23551 }, { "epoch": 1.1032932027919613, "grad_norm": 0.5974718242925062, "learning_rate": 2.203988405313309e-06, "loss": 0.2911, "step": 23552 }, { "epoch": 1.1033400477818898, "grad_norm": 0.5612294253160508, "learning_rate": 2.2038000901234822e-06, "loss": 0.2713, "step": 23553 }, { "epoch": 1.103386892771818, "grad_norm": 0.5945886257908802, "learning_rate": 2.2036117766382104e-06, "loss": 0.2777, "step": 23554 }, { "epoch": 1.1034337377617465, "grad_norm": 0.5689541193974925, "learning_rate": 2.2034234648585784e-06, "loss": 0.2745, "step": 23555 }, { "epoch": 1.1034805827516747, "grad_norm": 0.6453941137323707, "learning_rate": 2.2032351547856694e-06, "loss": 0.2704, "step": 23556 }, { "epoch": 1.103527427741603, "grad_norm": 0.5591624159237225, "learning_rate": 2.203046846420568e-06, "loss": 0.2569, "step": 23557 }, { "epoch": 1.1035742727315314, "grad_norm": 0.5960128700254943, "learning_rate": 2.202858539764357e-06, "loss": 0.281, "step": 23558 }, { "epoch": 1.1036211177214597, "grad_norm": 0.6105242349297174, "learning_rate": 2.2026702348181215e-06, "loss": 0.2818, "step": 23559 }, { "epoch": 1.1036679627113881, "grad_norm": 0.5546202883138408, "learning_rate": 2.202481931582943e-06, "loss": 0.2552, "step": 23560 }, { "epoch": 1.1037148077013164, "grad_norm": 0.5788018400835849, "learning_rate": 2.202293630059907e-06, "loss": 0.2838, "step": 23561 }, { "epoch": 1.1037616526912446, "grad_norm": 0.5389138780804562, "learning_rate": 2.202105330250095e-06, "loss": 0.2677, "step": 23562 }, { "epoch": 1.103808497681173, "grad_norm": 0.5707783324092415, "learning_rate": 2.201917032154593e-06, "loss": 0.27, "step": 23563 }, { "epoch": 1.1038553426711013, "grad_norm": 0.6006478563484788, "learning_rate": 2.201728735774483e-06, "loss": 0.2824, "step": 23564 }, { "epoch": 1.1039021876610295, "grad_norm": 0.5602176888366367, "learning_rate": 2.2015404411108504e-06, "loss": 0.2725, "step": 23565 }, { "epoch": 1.103949032650958, "grad_norm": 0.5948276418031512, "learning_rate": 2.2013521481647766e-06, "loss": 0.2729, "step": 23566 }, { "epoch": 1.1039958776408862, "grad_norm": 0.6080673345084618, "learning_rate": 2.2011638569373457e-06, "loss": 0.2832, "step": 23567 }, { "epoch": 1.1040427226308147, "grad_norm": 0.6148055546838168, "learning_rate": 2.2009755674296424e-06, "loss": 0.2924, "step": 23568 }, { "epoch": 1.104089567620743, "grad_norm": 0.5924430778836749, "learning_rate": 2.200787279642749e-06, "loss": 0.2878, "step": 23569 }, { "epoch": 1.1041364126106712, "grad_norm": 0.5661059935556885, "learning_rate": 2.2005989935777495e-06, "loss": 0.2669, "step": 23570 }, { "epoch": 1.1041832576005997, "grad_norm": 0.5594697318480835, "learning_rate": 2.2004107092357283e-06, "loss": 0.2681, "step": 23571 }, { "epoch": 1.104230102590528, "grad_norm": 0.5223689220942664, "learning_rate": 2.2002224266177686e-06, "loss": 0.2569, "step": 23572 }, { "epoch": 1.1042769475804564, "grad_norm": 0.5948504959550769, "learning_rate": 2.200034145724952e-06, "loss": 0.2894, "step": 23573 }, { "epoch": 1.1043237925703846, "grad_norm": 0.6008553990431397, "learning_rate": 2.199845866558364e-06, "loss": 0.2663, "step": 23574 }, { "epoch": 1.1043706375603128, "grad_norm": 0.5799352606234888, "learning_rate": 2.199657589119088e-06, "loss": 0.2769, "step": 23575 }, { "epoch": 1.1044174825502413, "grad_norm": 0.5768380506289482, "learning_rate": 2.1994693134082066e-06, "loss": 0.2677, "step": 23576 }, { "epoch": 1.1044643275401695, "grad_norm": 0.5765155153936734, "learning_rate": 2.199281039426805e-06, "loss": 0.2702, "step": 23577 }, { "epoch": 1.104511172530098, "grad_norm": 0.5572898598447562, "learning_rate": 2.199092767175964e-06, "loss": 0.2554, "step": 23578 }, { "epoch": 1.1045580175200262, "grad_norm": 0.564268805562858, "learning_rate": 2.198904496656769e-06, "loss": 0.289, "step": 23579 }, { "epoch": 1.1046048625099545, "grad_norm": 0.5518852820736935, "learning_rate": 2.198716227870303e-06, "loss": 0.2476, "step": 23580 }, { "epoch": 1.104651707499883, "grad_norm": 0.6022353603154793, "learning_rate": 2.198527960817649e-06, "loss": 0.2792, "step": 23581 }, { "epoch": 1.1046985524898112, "grad_norm": 0.5857803013196974, "learning_rate": 2.198339695499891e-06, "loss": 0.2789, "step": 23582 }, { "epoch": 1.1047453974797397, "grad_norm": 0.5858472314699945, "learning_rate": 2.198151431918113e-06, "loss": 0.2637, "step": 23583 }, { "epoch": 1.104792242469668, "grad_norm": 0.5569983834902863, "learning_rate": 2.197963170073397e-06, "loss": 0.2736, "step": 23584 }, { "epoch": 1.1048390874595961, "grad_norm": 0.5416990203280752, "learning_rate": 2.1977749099668266e-06, "loss": 0.262, "step": 23585 }, { "epoch": 1.1048859324495246, "grad_norm": 0.6131704991868617, "learning_rate": 2.197586651599486e-06, "loss": 0.2756, "step": 23586 }, { "epoch": 1.1049327774394528, "grad_norm": 0.5914343452530723, "learning_rate": 2.197398394972458e-06, "loss": 0.279, "step": 23587 }, { "epoch": 1.104979622429381, "grad_norm": 0.6197343916706818, "learning_rate": 2.1972101400868266e-06, "loss": 0.2834, "step": 23588 }, { "epoch": 1.1050264674193095, "grad_norm": 0.6203263733755311, "learning_rate": 2.197021886943675e-06, "loss": 0.2956, "step": 23589 }, { "epoch": 1.1050733124092378, "grad_norm": 0.5936178545387883, "learning_rate": 2.196833635544086e-06, "loss": 0.2854, "step": 23590 }, { "epoch": 1.1051201573991662, "grad_norm": 0.6100273419443484, "learning_rate": 2.1966453858891433e-06, "loss": 0.2634, "step": 23591 }, { "epoch": 1.1051670023890945, "grad_norm": 0.6246413016172754, "learning_rate": 2.1964571379799294e-06, "loss": 0.2709, "step": 23592 }, { "epoch": 1.1052138473790227, "grad_norm": 0.5843905504909098, "learning_rate": 2.1962688918175295e-06, "loss": 0.2649, "step": 23593 }, { "epoch": 1.1052606923689512, "grad_norm": 0.5797791480814689, "learning_rate": 2.196080647403025e-06, "loss": 0.2694, "step": 23594 }, { "epoch": 1.1053075373588794, "grad_norm": 0.5701455906017521, "learning_rate": 2.1958924047375e-06, "loss": 0.2658, "step": 23595 }, { "epoch": 1.105354382348808, "grad_norm": 0.5905404755261215, "learning_rate": 2.1957041638220382e-06, "loss": 0.2742, "step": 23596 }, { "epoch": 1.1054012273387361, "grad_norm": 0.5694085537722822, "learning_rate": 2.1955159246577225e-06, "loss": 0.2653, "step": 23597 }, { "epoch": 1.1054480723286644, "grad_norm": 0.6213908828317144, "learning_rate": 2.1953276872456355e-06, "loss": 0.2865, "step": 23598 }, { "epoch": 1.1054949173185928, "grad_norm": 0.5995838680625675, "learning_rate": 2.1951394515868617e-06, "loss": 0.2702, "step": 23599 }, { "epoch": 1.105541762308521, "grad_norm": 0.5970125907252668, "learning_rate": 2.1949512176824834e-06, "loss": 0.2886, "step": 23600 }, { "epoch": 1.1055886072984493, "grad_norm": 0.6140599591866435, "learning_rate": 2.1947629855335838e-06, "loss": 0.2882, "step": 23601 }, { "epoch": 1.1056354522883778, "grad_norm": 0.5378701913926576, "learning_rate": 2.1945747551412475e-06, "loss": 0.2599, "step": 23602 }, { "epoch": 1.105682297278306, "grad_norm": 0.6298624744093647, "learning_rate": 2.1943865265065568e-06, "loss": 0.3036, "step": 23603 }, { "epoch": 1.1057291422682345, "grad_norm": 0.5891094781286704, "learning_rate": 2.194198299630594e-06, "loss": 0.2901, "step": 23604 }, { "epoch": 1.1057759872581627, "grad_norm": 0.6203529119441551, "learning_rate": 2.1940100745144425e-06, "loss": 0.2832, "step": 23605 }, { "epoch": 1.105822832248091, "grad_norm": 0.6070483011652626, "learning_rate": 2.1938218511591874e-06, "loss": 0.2681, "step": 23606 }, { "epoch": 1.1058696772380194, "grad_norm": 0.589037258030725, "learning_rate": 2.19363362956591e-06, "loss": 0.2748, "step": 23607 }, { "epoch": 1.1059165222279477, "grad_norm": 0.6030220753382745, "learning_rate": 2.193445409735695e-06, "loss": 0.3039, "step": 23608 }, { "epoch": 1.1059633672178761, "grad_norm": 0.5632644350418248, "learning_rate": 2.1932571916696237e-06, "loss": 0.289, "step": 23609 }, { "epoch": 1.1060102122078044, "grad_norm": 0.6013121611227352, "learning_rate": 2.1930689753687804e-06, "loss": 0.2908, "step": 23610 }, { "epoch": 1.1060570571977326, "grad_norm": 0.6062037254809772, "learning_rate": 2.1928807608342473e-06, "loss": 0.2913, "step": 23611 }, { "epoch": 1.106103902187661, "grad_norm": 0.5534684253305844, "learning_rate": 2.1926925480671086e-06, "loss": 0.266, "step": 23612 }, { "epoch": 1.1061507471775893, "grad_norm": 0.6378315676823876, "learning_rate": 2.1925043370684476e-06, "loss": 0.2782, "step": 23613 }, { "epoch": 1.1061975921675178, "grad_norm": 0.5943537136942283, "learning_rate": 2.1923161278393473e-06, "loss": 0.2712, "step": 23614 }, { "epoch": 1.106244437157446, "grad_norm": 0.5973371571723828, "learning_rate": 2.1921279203808893e-06, "loss": 0.291, "step": 23615 }, { "epoch": 1.1062912821473743, "grad_norm": 0.6285274792442079, "learning_rate": 2.1919397146941575e-06, "loss": 0.2839, "step": 23616 }, { "epoch": 1.1063381271373027, "grad_norm": 0.5960701883647598, "learning_rate": 2.191751510780236e-06, "loss": 0.2798, "step": 23617 }, { "epoch": 1.106384972127231, "grad_norm": 0.6183038355649823, "learning_rate": 2.1915633086402068e-06, "loss": 0.2635, "step": 23618 }, { "epoch": 1.1064318171171594, "grad_norm": 0.5657071818678234, "learning_rate": 2.1913751082751525e-06, "loss": 0.2754, "step": 23619 }, { "epoch": 1.1064786621070877, "grad_norm": 0.5897815587652344, "learning_rate": 2.1911869096861587e-06, "loss": 0.3, "step": 23620 }, { "epoch": 1.106525507097016, "grad_norm": 0.5689442884825514, "learning_rate": 2.1909987128743056e-06, "loss": 0.2823, "step": 23621 }, { "epoch": 1.1065723520869444, "grad_norm": 0.6129504259919322, "learning_rate": 2.190810517840677e-06, "loss": 0.2995, "step": 23622 }, { "epoch": 1.1066191970768726, "grad_norm": 0.5666428211178571, "learning_rate": 2.190622324586356e-06, "loss": 0.261, "step": 23623 }, { "epoch": 1.1066660420668009, "grad_norm": 0.6147286645474757, "learning_rate": 2.190434133112426e-06, "loss": 0.2655, "step": 23624 }, { "epoch": 1.1067128870567293, "grad_norm": 0.5802900827370138, "learning_rate": 2.1902459434199696e-06, "loss": 0.2727, "step": 23625 }, { "epoch": 1.1067597320466576, "grad_norm": 0.6657666190348609, "learning_rate": 2.190057755510071e-06, "loss": 0.2915, "step": 23626 }, { "epoch": 1.106806577036586, "grad_norm": 0.5999906257750185, "learning_rate": 2.1898695693838114e-06, "loss": 0.2827, "step": 23627 }, { "epoch": 1.1068534220265143, "grad_norm": 0.5483180007684629, "learning_rate": 2.1896813850422742e-06, "loss": 0.2721, "step": 23628 }, { "epoch": 1.1069002670164425, "grad_norm": 0.5786463999958741, "learning_rate": 2.1894932024865426e-06, "loss": 0.2771, "step": 23629 }, { "epoch": 1.106947112006371, "grad_norm": 0.5569097940812893, "learning_rate": 2.1893050217176993e-06, "loss": 0.2761, "step": 23630 }, { "epoch": 1.1069939569962992, "grad_norm": 0.5845595979652445, "learning_rate": 2.1891168427368283e-06, "loss": 0.2815, "step": 23631 }, { "epoch": 1.1070408019862277, "grad_norm": 0.5789583183218253, "learning_rate": 2.1889286655450124e-06, "loss": 0.2769, "step": 23632 }, { "epoch": 1.107087646976156, "grad_norm": 0.5604105540086726, "learning_rate": 2.188740490143333e-06, "loss": 0.2648, "step": 23633 }, { "epoch": 1.1071344919660842, "grad_norm": 0.6125254518362305, "learning_rate": 2.188552316532873e-06, "loss": 0.2845, "step": 23634 }, { "epoch": 1.1071813369560126, "grad_norm": 0.5454514315859798, "learning_rate": 2.188364144714717e-06, "loss": 0.2912, "step": 23635 }, { "epoch": 1.1072281819459409, "grad_norm": 0.5714731901615013, "learning_rate": 2.188175974689947e-06, "loss": 0.2646, "step": 23636 }, { "epoch": 1.107275026935869, "grad_norm": 0.607962476474445, "learning_rate": 2.187987806459646e-06, "loss": 0.277, "step": 23637 }, { "epoch": 1.1073218719257976, "grad_norm": 0.611275682319895, "learning_rate": 2.1877996400248964e-06, "loss": 0.2795, "step": 23638 }, { "epoch": 1.1073687169157258, "grad_norm": 0.6164041136693673, "learning_rate": 2.1876114753867818e-06, "loss": 0.2752, "step": 23639 }, { "epoch": 1.1074155619056543, "grad_norm": 0.6116620059487824, "learning_rate": 2.187423312546384e-06, "loss": 0.275, "step": 23640 }, { "epoch": 1.1074624068955825, "grad_norm": 0.6305578913086843, "learning_rate": 2.1872351515047873e-06, "loss": 0.2724, "step": 23641 }, { "epoch": 1.1075092518855107, "grad_norm": 0.5662644355887011, "learning_rate": 2.1870469922630725e-06, "loss": 0.2809, "step": 23642 }, { "epoch": 1.1075560968754392, "grad_norm": 0.5813194293111688, "learning_rate": 2.1868588348223243e-06, "loss": 0.286, "step": 23643 }, { "epoch": 1.1076029418653675, "grad_norm": 0.5932340959084784, "learning_rate": 2.1866706791836255e-06, "loss": 0.2803, "step": 23644 }, { "epoch": 1.107649786855296, "grad_norm": 0.5864391179421987, "learning_rate": 2.186482525348058e-06, "loss": 0.268, "step": 23645 }, { "epoch": 1.1076966318452242, "grad_norm": 0.677062999717921, "learning_rate": 2.1862943733167043e-06, "loss": 0.3068, "step": 23646 }, { "epoch": 1.1077434768351524, "grad_norm": 0.5660052506521229, "learning_rate": 2.1861062230906473e-06, "loss": 0.2539, "step": 23647 }, { "epoch": 1.1077903218250809, "grad_norm": 0.6275369061501519, "learning_rate": 2.1859180746709706e-06, "loss": 0.2872, "step": 23648 }, { "epoch": 1.107837166815009, "grad_norm": 0.6873141545873142, "learning_rate": 2.1857299280587555e-06, "loss": 0.3062, "step": 23649 }, { "epoch": 1.1078840118049376, "grad_norm": 0.5399025814251176, "learning_rate": 2.1855417832550875e-06, "loss": 0.2695, "step": 23650 }, { "epoch": 1.1079308567948658, "grad_norm": 0.6312558962010875, "learning_rate": 2.185353640261046e-06, "loss": 0.28, "step": 23651 }, { "epoch": 1.107977701784794, "grad_norm": 0.5602064176528223, "learning_rate": 2.185165499077716e-06, "loss": 0.2658, "step": 23652 }, { "epoch": 1.1080245467747225, "grad_norm": 0.5798999904750287, "learning_rate": 2.184977359706179e-06, "loss": 0.2737, "step": 23653 }, { "epoch": 1.1080713917646507, "grad_norm": 0.5932995879913107, "learning_rate": 2.1847892221475175e-06, "loss": 0.2694, "step": 23654 }, { "epoch": 1.1081182367545792, "grad_norm": 0.6029152280130732, "learning_rate": 2.184601086402816e-06, "loss": 0.2678, "step": 23655 }, { "epoch": 1.1081650817445075, "grad_norm": 0.5687707744888039, "learning_rate": 2.184412952473156e-06, "loss": 0.281, "step": 23656 }, { "epoch": 1.1082119267344357, "grad_norm": 0.5833499571934825, "learning_rate": 2.1842248203596193e-06, "loss": 0.2729, "step": 23657 }, { "epoch": 1.1082587717243642, "grad_norm": 0.5668333354513659, "learning_rate": 2.1840366900632895e-06, "loss": 0.2587, "step": 23658 }, { "epoch": 1.1083056167142924, "grad_norm": 0.5581122525907517, "learning_rate": 2.1838485615852494e-06, "loss": 0.2739, "step": 23659 }, { "epoch": 1.1083524617042206, "grad_norm": 0.5774840266237106, "learning_rate": 2.183660434926581e-06, "loss": 0.2666, "step": 23660 }, { "epoch": 1.108399306694149, "grad_norm": 0.6451683771704851, "learning_rate": 2.1834723100883676e-06, "loss": 0.2938, "step": 23661 }, { "epoch": 1.1084461516840773, "grad_norm": 0.6088106484488034, "learning_rate": 2.183284187071692e-06, "loss": 0.2669, "step": 23662 }, { "epoch": 1.1084929966740058, "grad_norm": 0.5552181100017781, "learning_rate": 2.183096065877636e-06, "loss": 0.2582, "step": 23663 }, { "epoch": 1.108539841663934, "grad_norm": 0.5955937581714911, "learning_rate": 2.1829079465072822e-06, "loss": 0.2762, "step": 23664 }, { "epoch": 1.1085866866538623, "grad_norm": 0.5553951928034478, "learning_rate": 2.1827198289617134e-06, "loss": 0.266, "step": 23665 }, { "epoch": 1.1086335316437907, "grad_norm": 0.6071679586691795, "learning_rate": 2.1825317132420126e-06, "loss": 0.2962, "step": 23666 }, { "epoch": 1.108680376633719, "grad_norm": 0.6381207275460317, "learning_rate": 2.1823435993492615e-06, "loss": 0.2825, "step": 23667 }, { "epoch": 1.1087272216236475, "grad_norm": 0.5306592947685497, "learning_rate": 2.182155487284543e-06, "loss": 0.2531, "step": 23668 }, { "epoch": 1.1087740666135757, "grad_norm": 0.6210003682360611, "learning_rate": 2.181967377048941e-06, "loss": 0.2733, "step": 23669 }, { "epoch": 1.108820911603504, "grad_norm": 0.6139153965762277, "learning_rate": 2.1817792686435364e-06, "loss": 0.2931, "step": 23670 }, { "epoch": 1.1088677565934324, "grad_norm": 0.5439856356020981, "learning_rate": 2.1815911620694113e-06, "loss": 0.2638, "step": 23671 }, { "epoch": 1.1089146015833606, "grad_norm": 0.5259060124218946, "learning_rate": 2.1814030573276492e-06, "loss": 0.2511, "step": 23672 }, { "epoch": 1.1089614465732889, "grad_norm": 0.5634388685872392, "learning_rate": 2.1812149544193328e-06, "loss": 0.2867, "step": 23673 }, { "epoch": 1.1090082915632173, "grad_norm": 0.6823347738687885, "learning_rate": 2.1810268533455444e-06, "loss": 0.2924, "step": 23674 }, { "epoch": 1.1090551365531456, "grad_norm": 0.5559516730757208, "learning_rate": 2.180838754107366e-06, "loss": 0.2534, "step": 23675 }, { "epoch": 1.109101981543074, "grad_norm": 0.60751401748568, "learning_rate": 2.1806506567058807e-06, "loss": 0.2671, "step": 23676 }, { "epoch": 1.1091488265330023, "grad_norm": 0.5800833345149957, "learning_rate": 2.1804625611421704e-06, "loss": 0.2754, "step": 23677 }, { "epoch": 1.1091956715229305, "grad_norm": 0.5962913777089289, "learning_rate": 2.1802744674173174e-06, "loss": 0.2843, "step": 23678 }, { "epoch": 1.109242516512859, "grad_norm": 0.6120997392669847, "learning_rate": 2.180086375532404e-06, "loss": 0.2937, "step": 23679 }, { "epoch": 1.1092893615027872, "grad_norm": 0.5687503461933481, "learning_rate": 2.179898285488514e-06, "loss": 0.2603, "step": 23680 }, { "epoch": 1.1093362064927157, "grad_norm": 0.5895388550919572, "learning_rate": 2.17971019728673e-06, "loss": 0.281, "step": 23681 }, { "epoch": 1.109383051482644, "grad_norm": 0.5756278330441058, "learning_rate": 2.1795221109281317e-06, "loss": 0.2779, "step": 23682 }, { "epoch": 1.1094298964725722, "grad_norm": 0.5732755381058328, "learning_rate": 2.179334026413803e-06, "loss": 0.2769, "step": 23683 }, { "epoch": 1.1094767414625006, "grad_norm": 0.5829256313626886, "learning_rate": 2.179145943744827e-06, "loss": 0.2871, "step": 23684 }, { "epoch": 1.1095235864524289, "grad_norm": 0.5568240938165947, "learning_rate": 2.178957862922285e-06, "loss": 0.2709, "step": 23685 }, { "epoch": 1.1095704314423573, "grad_norm": 0.6182533407789098, "learning_rate": 2.1787697839472606e-06, "loss": 0.294, "step": 23686 }, { "epoch": 1.1096172764322856, "grad_norm": 0.545250077509048, "learning_rate": 2.1785817068208348e-06, "loss": 0.2585, "step": 23687 }, { "epoch": 1.1096641214222138, "grad_norm": 0.5688817859411597, "learning_rate": 2.178393631544091e-06, "loss": 0.2739, "step": 23688 }, { "epoch": 1.1097109664121423, "grad_norm": 0.5978189939649036, "learning_rate": 2.17820555811811e-06, "loss": 0.2845, "step": 23689 }, { "epoch": 1.1097578114020705, "grad_norm": 0.5931063980083868, "learning_rate": 2.178017486543976e-06, "loss": 0.2685, "step": 23690 }, { "epoch": 1.109804656391999, "grad_norm": 0.5691968706143082, "learning_rate": 2.17782941682277e-06, "loss": 0.2919, "step": 23691 }, { "epoch": 1.1098515013819272, "grad_norm": 0.5754823017867388, "learning_rate": 2.177641348955574e-06, "loss": 0.2563, "step": 23692 }, { "epoch": 1.1098983463718555, "grad_norm": 0.5709839583316608, "learning_rate": 2.177453282943473e-06, "loss": 0.2847, "step": 23693 }, { "epoch": 1.109945191361784, "grad_norm": 0.6295593268636401, "learning_rate": 2.1772652187875464e-06, "loss": 0.29, "step": 23694 }, { "epoch": 1.1099920363517122, "grad_norm": 0.6183749427666978, "learning_rate": 2.1770771564888765e-06, "loss": 0.2768, "step": 23695 }, { "epoch": 1.1100388813416404, "grad_norm": 0.5902226347452606, "learning_rate": 2.1768890960485465e-06, "loss": 0.2862, "step": 23696 }, { "epoch": 1.1100857263315689, "grad_norm": 0.6559509978072644, "learning_rate": 2.1767010374676394e-06, "loss": 0.2748, "step": 23697 }, { "epoch": 1.1101325713214971, "grad_norm": 0.5725160039758448, "learning_rate": 2.1765129807472363e-06, "loss": 0.2864, "step": 23698 }, { "epoch": 1.1101794163114256, "grad_norm": 0.6059809063025836, "learning_rate": 2.17632492588842e-06, "loss": 0.2853, "step": 23699 }, { "epoch": 1.1102262613013538, "grad_norm": 0.5488971681298522, "learning_rate": 2.176136872892272e-06, "loss": 0.2697, "step": 23700 }, { "epoch": 1.110273106291282, "grad_norm": 0.6004408370728881, "learning_rate": 2.175948821759875e-06, "loss": 0.2838, "step": 23701 }, { "epoch": 1.1103199512812105, "grad_norm": 0.6105214194885563, "learning_rate": 2.1757607724923108e-06, "loss": 0.2746, "step": 23702 }, { "epoch": 1.1103667962711388, "grad_norm": 0.5846307595319872, "learning_rate": 2.1755727250906618e-06, "loss": 0.2742, "step": 23703 }, { "epoch": 1.1104136412610672, "grad_norm": 0.5595447941245473, "learning_rate": 2.1753846795560106e-06, "loss": 0.2906, "step": 23704 }, { "epoch": 1.1104604862509955, "grad_norm": 0.619028287168202, "learning_rate": 2.1751966358894394e-06, "loss": 0.2708, "step": 23705 }, { "epoch": 1.1105073312409237, "grad_norm": 0.5459150898010251, "learning_rate": 2.1750085940920293e-06, "loss": 0.2595, "step": 23706 }, { "epoch": 1.1105541762308522, "grad_norm": 0.5840893261293327, "learning_rate": 2.1748205541648624e-06, "loss": 0.2814, "step": 23707 }, { "epoch": 1.1106010212207804, "grad_norm": 0.6011005781762104, "learning_rate": 2.174632516109023e-06, "loss": 0.2923, "step": 23708 }, { "epoch": 1.1106478662107087, "grad_norm": 0.600323891514902, "learning_rate": 2.1744444799255906e-06, "loss": 0.2799, "step": 23709 }, { "epoch": 1.1106947112006371, "grad_norm": 0.5828225932983757, "learning_rate": 2.174256445615648e-06, "loss": 0.2654, "step": 23710 }, { "epoch": 1.1107415561905654, "grad_norm": 0.5979637863838133, "learning_rate": 2.1740684131802793e-06, "loss": 0.2733, "step": 23711 }, { "epoch": 1.1107884011804938, "grad_norm": 0.6071856906996498, "learning_rate": 2.173880382620564e-06, "loss": 0.2787, "step": 23712 }, { "epoch": 1.110835246170422, "grad_norm": 0.5759263786500379, "learning_rate": 2.173692353937585e-06, "loss": 0.2689, "step": 23713 }, { "epoch": 1.1108820911603503, "grad_norm": 0.5504439386264723, "learning_rate": 2.1735043271324244e-06, "loss": 0.2667, "step": 23714 }, { "epoch": 1.1109289361502788, "grad_norm": 0.5964753805947878, "learning_rate": 2.173316302206165e-06, "loss": 0.2924, "step": 23715 }, { "epoch": 1.110975781140207, "grad_norm": 0.5841916049654287, "learning_rate": 2.1731282791598874e-06, "loss": 0.2966, "step": 23716 }, { "epoch": 1.1110226261301355, "grad_norm": 0.5887726735154503, "learning_rate": 2.1729402579946757e-06, "loss": 0.2799, "step": 23717 }, { "epoch": 1.1110694711200637, "grad_norm": 0.5855280988831121, "learning_rate": 2.1727522387116093e-06, "loss": 0.259, "step": 23718 }, { "epoch": 1.111116316109992, "grad_norm": 0.6128999888611859, "learning_rate": 2.172564221311772e-06, "loss": 0.2883, "step": 23719 }, { "epoch": 1.1111631610999204, "grad_norm": 0.6596018493336226, "learning_rate": 2.172376205796245e-06, "loss": 0.2802, "step": 23720 }, { "epoch": 1.1112100060898487, "grad_norm": 0.5886138625074022, "learning_rate": 2.1721881921661104e-06, "loss": 0.2705, "step": 23721 }, { "epoch": 1.1112568510797771, "grad_norm": 0.5953698200032708, "learning_rate": 2.1720001804224513e-06, "loss": 0.2654, "step": 23722 }, { "epoch": 1.1113036960697054, "grad_norm": 0.5956755152177765, "learning_rate": 2.1718121705663487e-06, "loss": 0.2688, "step": 23723 }, { "epoch": 1.1113505410596336, "grad_norm": 0.642574425823472, "learning_rate": 2.171624162598884e-06, "loss": 0.301, "step": 23724 }, { "epoch": 1.111397386049562, "grad_norm": 0.6128680992439403, "learning_rate": 2.171436156521139e-06, "loss": 0.2834, "step": 23725 }, { "epoch": 1.1114442310394903, "grad_norm": 0.5983623299535312, "learning_rate": 2.1712481523341975e-06, "loss": 0.2654, "step": 23726 }, { "epoch": 1.1114910760294188, "grad_norm": 0.5984701901158769, "learning_rate": 2.1710601500391396e-06, "loss": 0.2635, "step": 23727 }, { "epoch": 1.111537921019347, "grad_norm": 0.5695117233850937, "learning_rate": 2.1708721496370483e-06, "loss": 0.2742, "step": 23728 }, { "epoch": 1.1115847660092752, "grad_norm": 0.5956243700832661, "learning_rate": 2.1706841511290044e-06, "loss": 0.2813, "step": 23729 }, { "epoch": 1.1116316109992037, "grad_norm": 0.6033583166680097, "learning_rate": 2.1704961545160917e-06, "loss": 0.2742, "step": 23730 }, { "epoch": 1.111678455989132, "grad_norm": 0.633453288105841, "learning_rate": 2.1703081597993897e-06, "loss": 0.2975, "step": 23731 }, { "epoch": 1.1117253009790602, "grad_norm": 0.5936566909122574, "learning_rate": 2.1701201669799814e-06, "loss": 0.2779, "step": 23732 }, { "epoch": 1.1117721459689887, "grad_norm": 0.5715143706829854, "learning_rate": 2.1699321760589493e-06, "loss": 0.2677, "step": 23733 }, { "epoch": 1.111818990958917, "grad_norm": 0.6409658644258999, "learning_rate": 2.1697441870373737e-06, "loss": 0.2744, "step": 23734 }, { "epoch": 1.1118658359488454, "grad_norm": 0.6188964479915053, "learning_rate": 2.169556199916338e-06, "loss": 0.2836, "step": 23735 }, { "epoch": 1.1119126809387736, "grad_norm": 0.5650586999458906, "learning_rate": 2.1693682146969235e-06, "loss": 0.2663, "step": 23736 }, { "epoch": 1.1119595259287018, "grad_norm": 0.5942163901159087, "learning_rate": 2.1691802313802114e-06, "loss": 0.2641, "step": 23737 }, { "epoch": 1.1120063709186303, "grad_norm": 0.6348140489835432, "learning_rate": 2.1689922499672837e-06, "loss": 0.2918, "step": 23738 }, { "epoch": 1.1120532159085585, "grad_norm": 0.5620389138621369, "learning_rate": 2.168804270459223e-06, "loss": 0.2596, "step": 23739 }, { "epoch": 1.112100060898487, "grad_norm": 0.6162507439630822, "learning_rate": 2.1686162928571098e-06, "loss": 0.2737, "step": 23740 }, { "epoch": 1.1121469058884152, "grad_norm": 0.571748653557979, "learning_rate": 2.1684283171620264e-06, "loss": 0.2735, "step": 23741 }, { "epoch": 1.1121937508783435, "grad_norm": 0.583365664549642, "learning_rate": 2.168240343375056e-06, "loss": 0.2565, "step": 23742 }, { "epoch": 1.112240595868272, "grad_norm": 0.5787824309506495, "learning_rate": 2.1680523714972784e-06, "loss": 0.2734, "step": 23743 }, { "epoch": 1.1122874408582002, "grad_norm": 0.6123604385662847, "learning_rate": 2.1678644015297754e-06, "loss": 0.2757, "step": 23744 }, { "epoch": 1.1123342858481284, "grad_norm": 0.6004008559985006, "learning_rate": 2.1676764334736295e-06, "loss": 0.2627, "step": 23745 }, { "epoch": 1.112381130838057, "grad_norm": 0.5758870832017193, "learning_rate": 2.1674884673299225e-06, "loss": 0.2665, "step": 23746 }, { "epoch": 1.1124279758279851, "grad_norm": 0.5650647108301335, "learning_rate": 2.167300503099736e-06, "loss": 0.2861, "step": 23747 }, { "epoch": 1.1124748208179136, "grad_norm": 0.5778743608447627, "learning_rate": 2.1671125407841515e-06, "loss": 0.2637, "step": 23748 }, { "epoch": 1.1125216658078418, "grad_norm": 0.618000305244528, "learning_rate": 2.16692458038425e-06, "loss": 0.2889, "step": 23749 }, { "epoch": 1.11256851079777, "grad_norm": 0.6171639894642031, "learning_rate": 2.1667366219011144e-06, "loss": 0.2977, "step": 23750 }, { "epoch": 1.1126153557876985, "grad_norm": 0.5576388283060271, "learning_rate": 2.1665486653358254e-06, "loss": 0.2752, "step": 23751 }, { "epoch": 1.1126622007776268, "grad_norm": 0.5748958318674203, "learning_rate": 2.1663607106894646e-06, "loss": 0.2709, "step": 23752 }, { "epoch": 1.1127090457675552, "grad_norm": 0.5728792633595152, "learning_rate": 2.166172757963115e-06, "loss": 0.2649, "step": 23753 }, { "epoch": 1.1127558907574835, "grad_norm": 0.5555837533220148, "learning_rate": 2.1659848071578574e-06, "loss": 0.2515, "step": 23754 }, { "epoch": 1.1128027357474117, "grad_norm": 0.5815282494054976, "learning_rate": 2.1657968582747723e-06, "loss": 0.2627, "step": 23755 }, { "epoch": 1.1128495807373402, "grad_norm": 0.5634535733737418, "learning_rate": 2.1656089113149424e-06, "loss": 0.256, "step": 23756 }, { "epoch": 1.1128964257272684, "grad_norm": 0.5769352322440804, "learning_rate": 2.1654209662794502e-06, "loss": 0.2667, "step": 23757 }, { "epoch": 1.112943270717197, "grad_norm": 0.6414272048645961, "learning_rate": 2.165233023169375e-06, "loss": 0.2884, "step": 23758 }, { "epoch": 1.1129901157071251, "grad_norm": 0.6145450281570927, "learning_rate": 2.1650450819857998e-06, "loss": 0.2723, "step": 23759 }, { "epoch": 1.1130369606970534, "grad_norm": 0.5977446617697335, "learning_rate": 2.164857142729807e-06, "loss": 0.2678, "step": 23760 }, { "epoch": 1.1130838056869818, "grad_norm": 0.6595877460440873, "learning_rate": 2.164669205402477e-06, "loss": 0.2847, "step": 23761 }, { "epoch": 1.11313065067691, "grad_norm": 0.6241838194059668, "learning_rate": 2.16448127000489e-06, "loss": 0.2816, "step": 23762 }, { "epoch": 1.1131774956668385, "grad_norm": 0.5652842915397277, "learning_rate": 2.1642933365381295e-06, "loss": 0.2739, "step": 23763 }, { "epoch": 1.1132243406567668, "grad_norm": 0.6349229674241862, "learning_rate": 2.1641054050032774e-06, "loss": 0.2935, "step": 23764 }, { "epoch": 1.113271185646695, "grad_norm": 0.5895256264071186, "learning_rate": 2.1639174754014133e-06, "loss": 0.2672, "step": 23765 }, { "epoch": 1.1133180306366235, "grad_norm": 0.6300549304284317, "learning_rate": 2.163729547733621e-06, "loss": 0.2923, "step": 23766 }, { "epoch": 1.1133648756265517, "grad_norm": 0.6065034879645808, "learning_rate": 2.1635416220009793e-06, "loss": 0.2861, "step": 23767 }, { "epoch": 1.11341172061648, "grad_norm": 0.5924374056226653, "learning_rate": 2.1633536982045715e-06, "loss": 0.2969, "step": 23768 }, { "epoch": 1.1134585656064084, "grad_norm": 0.5956189274995003, "learning_rate": 2.163165776345478e-06, "loss": 0.2724, "step": 23769 }, { "epoch": 1.1135054105963367, "grad_norm": 0.6058195123088405, "learning_rate": 2.162977856424781e-06, "loss": 0.2781, "step": 23770 }, { "epoch": 1.1135522555862651, "grad_norm": 0.6088691869168235, "learning_rate": 2.1627899384435623e-06, "loss": 0.2799, "step": 23771 }, { "epoch": 1.1135991005761934, "grad_norm": 0.5853539539517018, "learning_rate": 2.1626020224029033e-06, "loss": 0.2664, "step": 23772 }, { "epoch": 1.1136459455661216, "grad_norm": 0.6635028186892507, "learning_rate": 2.1624141083038834e-06, "loss": 0.2908, "step": 23773 }, { "epoch": 1.11369279055605, "grad_norm": 0.6950910332985105, "learning_rate": 2.1622261961475857e-06, "loss": 0.2923, "step": 23774 }, { "epoch": 1.1137396355459783, "grad_norm": 0.6000880276144435, "learning_rate": 2.162038285935092e-06, "loss": 0.2768, "step": 23775 }, { "epoch": 1.1137864805359068, "grad_norm": 0.5565082824227117, "learning_rate": 2.161850377667482e-06, "loss": 0.2684, "step": 23776 }, { "epoch": 1.113833325525835, "grad_norm": 0.6807121267264666, "learning_rate": 2.1616624713458392e-06, "loss": 0.2929, "step": 23777 }, { "epoch": 1.1138801705157633, "grad_norm": 0.6159930751321968, "learning_rate": 2.161474566971244e-06, "loss": 0.2914, "step": 23778 }, { "epoch": 1.1139270155056917, "grad_norm": 0.6571874253039646, "learning_rate": 2.1612866645447767e-06, "loss": 0.2808, "step": 23779 }, { "epoch": 1.11397386049562, "grad_norm": 0.6466149786510947, "learning_rate": 2.1610987640675197e-06, "loss": 0.2908, "step": 23780 }, { "epoch": 1.1140207054855482, "grad_norm": 0.6615469144701167, "learning_rate": 2.1609108655405543e-06, "loss": 0.3098, "step": 23781 }, { "epoch": 1.1140675504754767, "grad_norm": 0.6043530240977509, "learning_rate": 2.1607229689649613e-06, "loss": 0.268, "step": 23782 }, { "epoch": 1.114114395465405, "grad_norm": 0.617112124675198, "learning_rate": 2.1605350743418225e-06, "loss": 0.277, "step": 23783 }, { "epoch": 1.1141612404553334, "grad_norm": 0.647972435839366, "learning_rate": 2.16034718167222e-06, "loss": 0.2898, "step": 23784 }, { "epoch": 1.1142080854452616, "grad_norm": 0.5947466765257886, "learning_rate": 2.160159290957233e-06, "loss": 0.2935, "step": 23785 }, { "epoch": 1.1142549304351899, "grad_norm": 0.6093690571826877, "learning_rate": 2.159971402197944e-06, "loss": 0.2777, "step": 23786 }, { "epoch": 1.1143017754251183, "grad_norm": 0.6072557162376871, "learning_rate": 2.159783515395434e-06, "loss": 0.2834, "step": 23787 }, { "epoch": 1.1143486204150466, "grad_norm": 0.5759875716644454, "learning_rate": 2.159595630550785e-06, "loss": 0.2714, "step": 23788 }, { "epoch": 1.114395465404975, "grad_norm": 0.5931196156377727, "learning_rate": 2.1594077476650766e-06, "loss": 0.2871, "step": 23789 }, { "epoch": 1.1144423103949033, "grad_norm": 0.5647029815981427, "learning_rate": 2.1592198667393923e-06, "loss": 0.2649, "step": 23790 }, { "epoch": 1.1144891553848315, "grad_norm": 0.5901134809057835, "learning_rate": 2.159031987774811e-06, "loss": 0.2926, "step": 23791 }, { "epoch": 1.11453600037476, "grad_norm": 0.6054343118286786, "learning_rate": 2.158844110772416e-06, "loss": 0.2664, "step": 23792 }, { "epoch": 1.1145828453646882, "grad_norm": 0.5707601251581081, "learning_rate": 2.1586562357332857e-06, "loss": 0.2587, "step": 23793 }, { "epoch": 1.1146296903546167, "grad_norm": 0.5658032541681008, "learning_rate": 2.1584683626585033e-06, "loss": 0.2629, "step": 23794 }, { "epoch": 1.114676535344545, "grad_norm": 0.6019319574300335, "learning_rate": 2.1582804915491507e-06, "loss": 0.2886, "step": 23795 }, { "epoch": 1.1147233803344732, "grad_norm": 0.5698159006964983, "learning_rate": 2.158092622406307e-06, "loss": 0.2762, "step": 23796 }, { "epoch": 1.1147702253244016, "grad_norm": 0.6100893827286017, "learning_rate": 2.157904755231056e-06, "loss": 0.2847, "step": 23797 }, { "epoch": 1.1148170703143299, "grad_norm": 0.5774002090018621, "learning_rate": 2.1577168900244756e-06, "loss": 0.2837, "step": 23798 }, { "epoch": 1.1148639153042583, "grad_norm": 0.5843183746957714, "learning_rate": 2.157529026787649e-06, "loss": 0.2744, "step": 23799 }, { "epoch": 1.1149107602941866, "grad_norm": 0.5977310924933086, "learning_rate": 2.157341165521656e-06, "loss": 0.2742, "step": 23800 }, { "epoch": 1.1149576052841148, "grad_norm": 0.6401111769692921, "learning_rate": 2.1571533062275792e-06, "loss": 0.2984, "step": 23801 }, { "epoch": 1.1150044502740433, "grad_norm": 0.6367942349828961, "learning_rate": 2.1569654489064988e-06, "loss": 0.2965, "step": 23802 }, { "epoch": 1.1150512952639715, "grad_norm": 0.5798005763382732, "learning_rate": 2.156777593559497e-06, "loss": 0.2719, "step": 23803 }, { "epoch": 1.1150981402538998, "grad_norm": 0.5920213086099997, "learning_rate": 2.1565897401876524e-06, "loss": 0.2582, "step": 23804 }, { "epoch": 1.1151449852438282, "grad_norm": 0.6454893645607694, "learning_rate": 2.156401888792048e-06, "loss": 0.3114, "step": 23805 }, { "epoch": 1.1151918302337565, "grad_norm": 0.6014134761583892, "learning_rate": 2.1562140393737647e-06, "loss": 0.2958, "step": 23806 }, { "epoch": 1.115238675223685, "grad_norm": 0.5972622051468293, "learning_rate": 2.1560261919338822e-06, "loss": 0.2765, "step": 23807 }, { "epoch": 1.1152855202136132, "grad_norm": 0.6441118530014416, "learning_rate": 2.155838346473483e-06, "loss": 0.2847, "step": 23808 }, { "epoch": 1.1153323652035414, "grad_norm": 0.6050293627036045, "learning_rate": 2.1556505029936486e-06, "loss": 0.2827, "step": 23809 }, { "epoch": 1.1153792101934699, "grad_norm": 0.5867521738244365, "learning_rate": 2.1554626614954587e-06, "loss": 0.2811, "step": 23810 }, { "epoch": 1.115426055183398, "grad_norm": 0.6164483428729116, "learning_rate": 2.155274821979994e-06, "loss": 0.2815, "step": 23811 }, { "epoch": 1.1154729001733266, "grad_norm": 0.5673628177049518, "learning_rate": 2.155086984448336e-06, "loss": 0.2841, "step": 23812 }, { "epoch": 1.1155197451632548, "grad_norm": 0.580438693764094, "learning_rate": 2.154899148901566e-06, "loss": 0.2745, "step": 23813 }, { "epoch": 1.115566590153183, "grad_norm": 0.5652773423083338, "learning_rate": 2.154711315340764e-06, "loss": 0.2728, "step": 23814 }, { "epoch": 1.1156134351431115, "grad_norm": 0.5593282125466587, "learning_rate": 2.154523483767013e-06, "loss": 0.278, "step": 23815 }, { "epoch": 1.1156602801330398, "grad_norm": 0.5738878407048396, "learning_rate": 2.1543356541813915e-06, "loss": 0.2854, "step": 23816 }, { "epoch": 1.115707125122968, "grad_norm": 0.687504509864402, "learning_rate": 2.1541478265849816e-06, "loss": 0.3023, "step": 23817 }, { "epoch": 1.1157539701128965, "grad_norm": 0.5917924014556926, "learning_rate": 2.1539600009788637e-06, "loss": 0.2717, "step": 23818 }, { "epoch": 1.1158008151028247, "grad_norm": 0.6169135379477088, "learning_rate": 2.153772177364119e-06, "loss": 0.2919, "step": 23819 }, { "epoch": 1.1158476600927532, "grad_norm": 0.6472144077829811, "learning_rate": 2.153584355741829e-06, "loss": 0.2965, "step": 23820 }, { "epoch": 1.1158945050826814, "grad_norm": 0.5918132174183148, "learning_rate": 2.1533965361130744e-06, "loss": 0.2668, "step": 23821 }, { "epoch": 1.1159413500726096, "grad_norm": 0.600416711770205, "learning_rate": 2.153208718478935e-06, "loss": 0.2685, "step": 23822 }, { "epoch": 1.115988195062538, "grad_norm": 0.59711116638092, "learning_rate": 2.153020902840492e-06, "loss": 0.2697, "step": 23823 }, { "epoch": 1.1160350400524663, "grad_norm": 0.5435178516931479, "learning_rate": 2.1528330891988266e-06, "loss": 0.2577, "step": 23824 }, { "epoch": 1.1160818850423948, "grad_norm": 0.6023388774674739, "learning_rate": 2.1526452775550194e-06, "loss": 0.2809, "step": 23825 }, { "epoch": 1.116128730032323, "grad_norm": 0.6367679676452731, "learning_rate": 2.152457467910152e-06, "loss": 0.2815, "step": 23826 }, { "epoch": 1.1161755750222513, "grad_norm": 0.6248176871743605, "learning_rate": 2.1522696602653045e-06, "loss": 0.2801, "step": 23827 }, { "epoch": 1.1162224200121798, "grad_norm": 0.5817129271959994, "learning_rate": 2.152081854621557e-06, "loss": 0.2822, "step": 23828 }, { "epoch": 1.116269265002108, "grad_norm": 0.5512850181030482, "learning_rate": 2.151894050979991e-06, "loss": 0.2643, "step": 23829 }, { "epoch": 1.1163161099920365, "grad_norm": 0.6263130654085856, "learning_rate": 2.1517062493416878e-06, "loss": 0.2853, "step": 23830 }, { "epoch": 1.1163629549819647, "grad_norm": 0.6069925163791146, "learning_rate": 2.151518449707727e-06, "loss": 0.2759, "step": 23831 }, { "epoch": 1.116409799971893, "grad_norm": 0.5791554588754826, "learning_rate": 2.15133065207919e-06, "loss": 0.2602, "step": 23832 }, { "epoch": 1.1164566449618214, "grad_norm": 0.5511982124681253, "learning_rate": 2.1511428564571586e-06, "loss": 0.2627, "step": 23833 }, { "epoch": 1.1165034899517496, "grad_norm": 0.6379652845520063, "learning_rate": 2.1509550628427115e-06, "loss": 0.2914, "step": 23834 }, { "epoch": 1.116550334941678, "grad_norm": 0.5975797462947909, "learning_rate": 2.1507672712369297e-06, "loss": 0.2765, "step": 23835 }, { "epoch": 1.1165971799316063, "grad_norm": 0.539903913701441, "learning_rate": 2.150579481640895e-06, "loss": 0.2532, "step": 23836 }, { "epoch": 1.1166440249215346, "grad_norm": 0.6291821154415352, "learning_rate": 2.1503916940556877e-06, "loss": 0.3036, "step": 23837 }, { "epoch": 1.116690869911463, "grad_norm": 0.5760094399068689, "learning_rate": 2.1502039084823878e-06, "loss": 0.2681, "step": 23838 }, { "epoch": 1.1167377149013913, "grad_norm": 0.5937147321582357, "learning_rate": 2.150016124922078e-06, "loss": 0.2795, "step": 23839 }, { "epoch": 1.1167845598913195, "grad_norm": 0.6102631489339864, "learning_rate": 2.149828343375836e-06, "loss": 0.2776, "step": 23840 }, { "epoch": 1.116831404881248, "grad_norm": 0.5883477206441007, "learning_rate": 2.1496405638447443e-06, "loss": 0.2803, "step": 23841 }, { "epoch": 1.1168782498711762, "grad_norm": 0.5857753378136042, "learning_rate": 2.149452786329883e-06, "loss": 0.2845, "step": 23842 }, { "epoch": 1.1169250948611047, "grad_norm": 0.62344296740885, "learning_rate": 2.149265010832332e-06, "loss": 0.278, "step": 23843 }, { "epoch": 1.116971939851033, "grad_norm": 0.5972099632962907, "learning_rate": 2.149077237353174e-06, "loss": 0.2853, "step": 23844 }, { "epoch": 1.1170187848409612, "grad_norm": 0.6014799603422485, "learning_rate": 2.1488894658934886e-06, "loss": 0.2835, "step": 23845 }, { "epoch": 1.1170656298308896, "grad_norm": 0.5505844975278428, "learning_rate": 2.148701696454355e-06, "loss": 0.2624, "step": 23846 }, { "epoch": 1.1171124748208179, "grad_norm": 0.676770003653777, "learning_rate": 2.1485139290368547e-06, "loss": 0.2931, "step": 23847 }, { "epoch": 1.1171593198107463, "grad_norm": 0.6077051023900701, "learning_rate": 2.148326163642069e-06, "loss": 0.2895, "step": 23848 }, { "epoch": 1.1172061648006746, "grad_norm": 0.5966159397471149, "learning_rate": 2.1481384002710774e-06, "loss": 0.2819, "step": 23849 }, { "epoch": 1.1172530097906028, "grad_norm": 0.6119118282466266, "learning_rate": 2.147950638924961e-06, "loss": 0.2829, "step": 23850 }, { "epoch": 1.1172998547805313, "grad_norm": 0.5998905687743917, "learning_rate": 2.147762879604801e-06, "loss": 0.2821, "step": 23851 }, { "epoch": 1.1173466997704595, "grad_norm": 0.6411300072355222, "learning_rate": 2.1475751223116765e-06, "loss": 0.2933, "step": 23852 }, { "epoch": 1.1173935447603878, "grad_norm": 0.5806183524532349, "learning_rate": 2.147387367046668e-06, "loss": 0.2755, "step": 23853 }, { "epoch": 1.1174403897503162, "grad_norm": 0.5653850329615776, "learning_rate": 2.147199613810857e-06, "loss": 0.2579, "step": 23854 }, { "epoch": 1.1174872347402445, "grad_norm": 0.5784389003242667, "learning_rate": 2.147011862605324e-06, "loss": 0.2797, "step": 23855 }, { "epoch": 1.117534079730173, "grad_norm": 0.5721306297012473, "learning_rate": 2.1468241134311483e-06, "loss": 0.2682, "step": 23856 }, { "epoch": 1.1175809247201012, "grad_norm": 0.5519063003547613, "learning_rate": 2.1466363662894124e-06, "loss": 0.2787, "step": 23857 }, { "epoch": 1.1176277697100294, "grad_norm": 0.5792025118751957, "learning_rate": 2.1464486211811943e-06, "loss": 0.2647, "step": 23858 }, { "epoch": 1.1176746146999579, "grad_norm": 0.5673648599689981, "learning_rate": 2.1462608781075756e-06, "loss": 0.2627, "step": 23859 }, { "epoch": 1.1177214596898861, "grad_norm": 0.5706767588795028, "learning_rate": 2.1460731370696362e-06, "loss": 0.2564, "step": 23860 }, { "epoch": 1.1177683046798146, "grad_norm": 0.5851265875921781, "learning_rate": 2.1458853980684575e-06, "loss": 0.2803, "step": 23861 }, { "epoch": 1.1178151496697428, "grad_norm": 0.5642149748420503, "learning_rate": 2.1456976611051193e-06, "loss": 0.2682, "step": 23862 }, { "epoch": 1.117861994659671, "grad_norm": 0.5679823664851907, "learning_rate": 2.145509926180703e-06, "loss": 0.2534, "step": 23863 }, { "epoch": 1.1179088396495995, "grad_norm": 0.6067779422117647, "learning_rate": 2.1453221932962866e-06, "loss": 0.2778, "step": 23864 }, { "epoch": 1.1179556846395278, "grad_norm": 0.5457445156468158, "learning_rate": 2.1451344624529516e-06, "loss": 0.2664, "step": 23865 }, { "epoch": 1.1180025296294562, "grad_norm": 0.6104493770168943, "learning_rate": 2.144946733651779e-06, "loss": 0.2819, "step": 23866 }, { "epoch": 1.1180493746193845, "grad_norm": 0.601636744986572, "learning_rate": 2.144759006893849e-06, "loss": 0.2766, "step": 23867 }, { "epoch": 1.1180962196093127, "grad_norm": 0.5831189658159913, "learning_rate": 2.144571282180242e-06, "loss": 0.2848, "step": 23868 }, { "epoch": 1.1181430645992412, "grad_norm": 0.6050980334111251, "learning_rate": 2.1443835595120364e-06, "loss": 0.2828, "step": 23869 }, { "epoch": 1.1181899095891694, "grad_norm": 0.5603226175927631, "learning_rate": 2.144195838890316e-06, "loss": 0.2691, "step": 23870 }, { "epoch": 1.1182367545790979, "grad_norm": 0.5802722665091942, "learning_rate": 2.144008120316158e-06, "loss": 0.2673, "step": 23871 }, { "epoch": 1.1182835995690261, "grad_norm": 0.6178545428356499, "learning_rate": 2.143820403790644e-06, "loss": 0.2773, "step": 23872 }, { "epoch": 1.1183304445589544, "grad_norm": 0.5741987548988264, "learning_rate": 2.143632689314854e-06, "loss": 0.2713, "step": 23873 }, { "epoch": 1.1183772895488828, "grad_norm": 0.5977508588693551, "learning_rate": 2.143444976889868e-06, "loss": 0.2848, "step": 23874 }, { "epoch": 1.118424134538811, "grad_norm": 0.5873872651383943, "learning_rate": 2.143257266516767e-06, "loss": 0.2584, "step": 23875 }, { "epoch": 1.1184709795287393, "grad_norm": 0.5799842713758534, "learning_rate": 2.143069558196631e-06, "loss": 0.2682, "step": 23876 }, { "epoch": 1.1185178245186678, "grad_norm": 0.6031103382516005, "learning_rate": 2.1428818519305396e-06, "loss": 0.2956, "step": 23877 }, { "epoch": 1.118564669508596, "grad_norm": 0.586297985116536, "learning_rate": 2.142694147719573e-06, "loss": 0.2826, "step": 23878 }, { "epoch": 1.1186115144985245, "grad_norm": 0.5496349698358561, "learning_rate": 2.1425064455648124e-06, "loss": 0.2663, "step": 23879 }, { "epoch": 1.1186583594884527, "grad_norm": 0.5570536430814056, "learning_rate": 2.1423187454673372e-06, "loss": 0.2524, "step": 23880 }, { "epoch": 1.118705204478381, "grad_norm": 0.6547860433522152, "learning_rate": 2.142131047428227e-06, "loss": 0.2736, "step": 23881 }, { "epoch": 1.1187520494683094, "grad_norm": 0.6225881550460874, "learning_rate": 2.1419433514485645e-06, "loss": 0.2875, "step": 23882 }, { "epoch": 1.1187988944582377, "grad_norm": 0.567341826562716, "learning_rate": 2.141755657529427e-06, "loss": 0.2771, "step": 23883 }, { "epoch": 1.1188457394481661, "grad_norm": 0.5743750969116024, "learning_rate": 2.141567965671895e-06, "loss": 0.2662, "step": 23884 }, { "epoch": 1.1188925844380944, "grad_norm": 0.5761519820822973, "learning_rate": 2.1413802758770493e-06, "loss": 0.2654, "step": 23885 }, { "epoch": 1.1189394294280226, "grad_norm": 0.5600557621803158, "learning_rate": 2.1411925881459706e-06, "loss": 0.2591, "step": 23886 }, { "epoch": 1.118986274417951, "grad_norm": 0.5785378842342512, "learning_rate": 2.141004902479738e-06, "loss": 0.2732, "step": 23887 }, { "epoch": 1.1190331194078793, "grad_norm": 0.5839809127589392, "learning_rate": 2.140817218879433e-06, "loss": 0.2799, "step": 23888 }, { "epoch": 1.1190799643978075, "grad_norm": 0.6341106206198606, "learning_rate": 2.1406295373461334e-06, "loss": 0.2935, "step": 23889 }, { "epoch": 1.119126809387736, "grad_norm": 0.652744621578206, "learning_rate": 2.140441857880921e-06, "loss": 0.2997, "step": 23890 }, { "epoch": 1.1191736543776643, "grad_norm": 0.5988514464588606, "learning_rate": 2.140254180484875e-06, "loss": 0.2904, "step": 23891 }, { "epoch": 1.1192204993675927, "grad_norm": 0.5866627894115589, "learning_rate": 2.1400665051590758e-06, "loss": 0.2862, "step": 23892 }, { "epoch": 1.119267344357521, "grad_norm": 0.5604411207866133, "learning_rate": 2.1398788319046037e-06, "loss": 0.272, "step": 23893 }, { "epoch": 1.1193141893474492, "grad_norm": 0.600967620608498, "learning_rate": 2.139691160722539e-06, "loss": 0.2861, "step": 23894 }, { "epoch": 1.1193610343373777, "grad_norm": 0.6327653817987827, "learning_rate": 2.1395034916139605e-06, "loss": 0.2769, "step": 23895 }, { "epoch": 1.119407879327306, "grad_norm": 0.5769538563693387, "learning_rate": 2.139315824579948e-06, "loss": 0.2725, "step": 23896 }, { "epoch": 1.1194547243172344, "grad_norm": 0.5746930646157368, "learning_rate": 2.1391281596215834e-06, "loss": 0.2719, "step": 23897 }, { "epoch": 1.1195015693071626, "grad_norm": 0.558034214119064, "learning_rate": 2.1389404967399446e-06, "loss": 0.251, "step": 23898 }, { "epoch": 1.1195484142970908, "grad_norm": 0.6072856096132369, "learning_rate": 2.1387528359361127e-06, "loss": 0.2777, "step": 23899 }, { "epoch": 1.1195952592870193, "grad_norm": 0.5957782503600002, "learning_rate": 2.1385651772111686e-06, "loss": 0.2832, "step": 23900 }, { "epoch": 1.1196421042769475, "grad_norm": 0.613619845211929, "learning_rate": 2.1383775205661906e-06, "loss": 0.2727, "step": 23901 }, { "epoch": 1.119688949266876, "grad_norm": 0.5558182453128215, "learning_rate": 2.138189866002258e-06, "loss": 0.2602, "step": 23902 }, { "epoch": 1.1197357942568043, "grad_norm": 0.6090536629622495, "learning_rate": 2.1380022135204526e-06, "loss": 0.2716, "step": 23903 }, { "epoch": 1.1197826392467325, "grad_norm": 0.6038517360872984, "learning_rate": 2.1378145631218534e-06, "loss": 0.2861, "step": 23904 }, { "epoch": 1.119829484236661, "grad_norm": 0.5881711132114715, "learning_rate": 2.13762691480754e-06, "loss": 0.2843, "step": 23905 }, { "epoch": 1.1198763292265892, "grad_norm": 0.6149966378593777, "learning_rate": 2.137439268578594e-06, "loss": 0.2797, "step": 23906 }, { "epoch": 1.1199231742165177, "grad_norm": 0.6015857131083457, "learning_rate": 2.137251624436092e-06, "loss": 0.2946, "step": 23907 }, { "epoch": 1.119970019206446, "grad_norm": 0.5482763310047551, "learning_rate": 2.137063982381117e-06, "loss": 0.2628, "step": 23908 }, { "epoch": 1.1200168641963741, "grad_norm": 0.5658840715430605, "learning_rate": 2.136876342414747e-06, "loss": 0.2845, "step": 23909 }, { "epoch": 1.1200637091863026, "grad_norm": 0.5763535069100895, "learning_rate": 2.136688704538062e-06, "loss": 0.2811, "step": 23910 }, { "epoch": 1.1201105541762308, "grad_norm": 0.6040259163444329, "learning_rate": 2.1365010687521427e-06, "loss": 0.2818, "step": 23911 }, { "epoch": 1.120157399166159, "grad_norm": 0.5746338192772003, "learning_rate": 2.136313435058069e-06, "loss": 0.2774, "step": 23912 }, { "epoch": 1.1202042441560875, "grad_norm": 0.5580032183120142, "learning_rate": 2.1361258034569195e-06, "loss": 0.2783, "step": 23913 }, { "epoch": 1.1202510891460158, "grad_norm": 0.5779868458476832, "learning_rate": 2.135938173949774e-06, "loss": 0.2757, "step": 23914 }, { "epoch": 1.1202979341359443, "grad_norm": 0.5578537332684065, "learning_rate": 2.1357505465377134e-06, "loss": 0.2751, "step": 23915 }, { "epoch": 1.1203447791258725, "grad_norm": 0.5460453038647345, "learning_rate": 2.1355629212218163e-06, "loss": 0.2766, "step": 23916 }, { "epoch": 1.1203916241158007, "grad_norm": 0.5705980516472879, "learning_rate": 2.135375298003164e-06, "loss": 0.2717, "step": 23917 }, { "epoch": 1.1204384691057292, "grad_norm": 0.5969440313011545, "learning_rate": 2.1351876768828346e-06, "loss": 0.2859, "step": 23918 }, { "epoch": 1.1204853140956574, "grad_norm": 0.5781467504751578, "learning_rate": 2.135000057861909e-06, "loss": 0.2767, "step": 23919 }, { "epoch": 1.120532159085586, "grad_norm": 0.579921809010491, "learning_rate": 2.1348124409414654e-06, "loss": 0.2769, "step": 23920 }, { "epoch": 1.1205790040755141, "grad_norm": 0.6264796132900821, "learning_rate": 2.134624826122585e-06, "loss": 0.274, "step": 23921 }, { "epoch": 1.1206258490654424, "grad_norm": 0.6337299315620266, "learning_rate": 2.1344372134063463e-06, "loss": 0.2888, "step": 23922 }, { "epoch": 1.1206726940553708, "grad_norm": 0.5844963826070547, "learning_rate": 2.13424960279383e-06, "loss": 0.273, "step": 23923 }, { "epoch": 1.120719539045299, "grad_norm": 0.5886725665527106, "learning_rate": 2.134061994286116e-06, "loss": 0.2812, "step": 23924 }, { "epoch": 1.1207663840352273, "grad_norm": 0.5504002255504156, "learning_rate": 2.1338743878842825e-06, "loss": 0.2591, "step": 23925 }, { "epoch": 1.1208132290251558, "grad_norm": 0.6236537836065758, "learning_rate": 2.1336867835894096e-06, "loss": 0.2929, "step": 23926 }, { "epoch": 1.120860074015084, "grad_norm": 0.5566403090710842, "learning_rate": 2.1334991814025773e-06, "loss": 0.2503, "step": 23927 }, { "epoch": 1.1209069190050125, "grad_norm": 0.6345377658669171, "learning_rate": 2.1333115813248655e-06, "loss": 0.3004, "step": 23928 }, { "epoch": 1.1209537639949407, "grad_norm": 0.6245751974039797, "learning_rate": 2.133123983357353e-06, "loss": 0.2889, "step": 23929 }, { "epoch": 1.121000608984869, "grad_norm": 0.5818243144012495, "learning_rate": 2.1329363875011207e-06, "loss": 0.2691, "step": 23930 }, { "epoch": 1.1210474539747974, "grad_norm": 0.5662759829878252, "learning_rate": 2.132748793757246e-06, "loss": 0.273, "step": 23931 }, { "epoch": 1.1210942989647257, "grad_norm": 0.5561308806452214, "learning_rate": 2.1325612021268104e-06, "loss": 0.2619, "step": 23932 }, { "epoch": 1.1211411439546541, "grad_norm": 0.5893895791482794, "learning_rate": 2.132373612610892e-06, "loss": 0.2641, "step": 23933 }, { "epoch": 1.1211879889445824, "grad_norm": 0.5734042307416825, "learning_rate": 2.1321860252105712e-06, "loss": 0.2599, "step": 23934 }, { "epoch": 1.1212348339345106, "grad_norm": 0.5825437219905175, "learning_rate": 2.1319984399269277e-06, "loss": 0.2534, "step": 23935 }, { "epoch": 1.121281678924439, "grad_norm": 0.5964272291270656, "learning_rate": 2.1318108567610405e-06, "loss": 0.2856, "step": 23936 }, { "epoch": 1.1213285239143673, "grad_norm": 0.5971821902545167, "learning_rate": 2.1316232757139902e-06, "loss": 0.2801, "step": 23937 }, { "epoch": 1.1213753689042958, "grad_norm": 0.653673888539433, "learning_rate": 2.131435696786854e-06, "loss": 0.2981, "step": 23938 }, { "epoch": 1.121422213894224, "grad_norm": 0.6045801854720506, "learning_rate": 2.131248119980714e-06, "loss": 0.2788, "step": 23939 }, { "epoch": 1.1214690588841523, "grad_norm": 0.5803019864056717, "learning_rate": 2.1310605452966476e-06, "loss": 0.29, "step": 23940 }, { "epoch": 1.1215159038740807, "grad_norm": 0.6373250398893534, "learning_rate": 2.1308729727357345e-06, "loss": 0.2765, "step": 23941 }, { "epoch": 1.121562748864009, "grad_norm": 0.5748681942550449, "learning_rate": 2.130685402299056e-06, "loss": 0.2837, "step": 23942 }, { "epoch": 1.1216095938539374, "grad_norm": 0.5976988712921842, "learning_rate": 2.1304978339876902e-06, "loss": 0.2872, "step": 23943 }, { "epoch": 1.1216564388438657, "grad_norm": 0.562023116019198, "learning_rate": 2.1303102678027156e-06, "loss": 0.2768, "step": 23944 }, { "epoch": 1.121703283833794, "grad_norm": 0.5920303147857707, "learning_rate": 2.1301227037452124e-06, "loss": 0.2703, "step": 23945 }, { "epoch": 1.1217501288237224, "grad_norm": 0.6206402125549904, "learning_rate": 2.1299351418162607e-06, "loss": 0.2837, "step": 23946 }, { "epoch": 1.1217969738136506, "grad_norm": 0.6305626187878501, "learning_rate": 2.1297475820169387e-06, "loss": 0.286, "step": 23947 }, { "epoch": 1.1218438188035789, "grad_norm": 0.593625227103594, "learning_rate": 2.129560024348326e-06, "loss": 0.2773, "step": 23948 }, { "epoch": 1.1218906637935073, "grad_norm": 0.5350457076467551, "learning_rate": 2.1293724688115037e-06, "loss": 0.2616, "step": 23949 }, { "epoch": 1.1219375087834356, "grad_norm": 0.5553594452945178, "learning_rate": 2.129184915407549e-06, "loss": 0.2638, "step": 23950 }, { "epoch": 1.121984353773364, "grad_norm": 0.6105102404030163, "learning_rate": 2.1289973641375412e-06, "loss": 0.2647, "step": 23951 }, { "epoch": 1.1220311987632923, "grad_norm": 0.5963527363158512, "learning_rate": 2.1288098150025606e-06, "loss": 0.2718, "step": 23952 }, { "epoch": 1.1220780437532205, "grad_norm": 0.5851301195392904, "learning_rate": 2.128622268003687e-06, "loss": 0.273, "step": 23953 }, { "epoch": 1.122124888743149, "grad_norm": 0.6093926569409764, "learning_rate": 2.1284347231419975e-06, "loss": 0.2965, "step": 23954 }, { "epoch": 1.1221717337330772, "grad_norm": 0.6054895250109366, "learning_rate": 2.1282471804185747e-06, "loss": 0.2719, "step": 23955 }, { "epoch": 1.1222185787230057, "grad_norm": 0.6267878673630604, "learning_rate": 2.1280596398344946e-06, "loss": 0.2873, "step": 23956 }, { "epoch": 1.122265423712934, "grad_norm": 0.5573544752452084, "learning_rate": 2.1278721013908383e-06, "loss": 0.272, "step": 23957 }, { "epoch": 1.1223122687028622, "grad_norm": 0.5653531835424574, "learning_rate": 2.1276845650886837e-06, "loss": 0.2769, "step": 23958 }, { "epoch": 1.1223591136927906, "grad_norm": 0.5902997257517987, "learning_rate": 2.1274970309291113e-06, "loss": 0.28, "step": 23959 }, { "epoch": 1.1224059586827189, "grad_norm": 0.6219711867542878, "learning_rate": 2.1273094989132e-06, "loss": 0.2813, "step": 23960 }, { "epoch": 1.122452803672647, "grad_norm": 0.598234420420752, "learning_rate": 2.1271219690420297e-06, "loss": 0.281, "step": 23961 }, { "epoch": 1.1224996486625756, "grad_norm": 0.6122907897417647, "learning_rate": 2.1269344413166775e-06, "loss": 0.2988, "step": 23962 }, { "epoch": 1.1225464936525038, "grad_norm": 0.5857413896071332, "learning_rate": 2.1267469157382243e-06, "loss": 0.255, "step": 23963 }, { "epoch": 1.1225933386424323, "grad_norm": 0.598720567901613, "learning_rate": 2.126559392307749e-06, "loss": 0.2849, "step": 23964 }, { "epoch": 1.1226401836323605, "grad_norm": 0.5872513868164163, "learning_rate": 2.12637187102633e-06, "loss": 0.2662, "step": 23965 }, { "epoch": 1.1226870286222888, "grad_norm": 0.5973662137941081, "learning_rate": 2.126184351895047e-06, "loss": 0.2788, "step": 23966 }, { "epoch": 1.1227338736122172, "grad_norm": 0.6019008446279699, "learning_rate": 2.12599683491498e-06, "loss": 0.2665, "step": 23967 }, { "epoch": 1.1227807186021455, "grad_norm": 0.5821808942125055, "learning_rate": 2.1258093200872067e-06, "loss": 0.2812, "step": 23968 }, { "epoch": 1.122827563592074, "grad_norm": 0.584453555212196, "learning_rate": 2.1256218074128058e-06, "loss": 0.277, "step": 23969 }, { "epoch": 1.1228744085820022, "grad_norm": 0.5929635694373822, "learning_rate": 2.1254342968928586e-06, "loss": 0.2767, "step": 23970 }, { "epoch": 1.1229212535719304, "grad_norm": 0.5826268461380817, "learning_rate": 2.125246788528442e-06, "loss": 0.2718, "step": 23971 }, { "epoch": 1.1229680985618589, "grad_norm": 0.607363835808848, "learning_rate": 2.125059282320636e-06, "loss": 0.2835, "step": 23972 }, { "epoch": 1.123014943551787, "grad_norm": 0.6001179805107774, "learning_rate": 2.1248717782705204e-06, "loss": 0.2607, "step": 23973 }, { "epoch": 1.1230617885417156, "grad_norm": 0.6088769380235953, "learning_rate": 2.1246842763791733e-06, "loss": 0.2749, "step": 23974 }, { "epoch": 1.1231086335316438, "grad_norm": 0.5871154806045497, "learning_rate": 2.124496776647673e-06, "loss": 0.2817, "step": 23975 }, { "epoch": 1.123155478521572, "grad_norm": 0.5530298713640704, "learning_rate": 2.1243092790770995e-06, "loss": 0.255, "step": 23976 }, { "epoch": 1.1232023235115005, "grad_norm": 0.6698814161930643, "learning_rate": 2.1241217836685322e-06, "loss": 0.2863, "step": 23977 }, { "epoch": 1.1232491685014288, "grad_norm": 0.54709694996177, "learning_rate": 2.123934290423049e-06, "loss": 0.2622, "step": 23978 }, { "epoch": 1.1232960134913572, "grad_norm": 0.5944314532362023, "learning_rate": 2.123746799341731e-06, "loss": 0.281, "step": 23979 }, { "epoch": 1.1233428584812855, "grad_norm": 0.6647317893773665, "learning_rate": 2.123559310425654e-06, "loss": 0.2783, "step": 23980 }, { "epoch": 1.1233897034712137, "grad_norm": 0.5318410050981525, "learning_rate": 2.123371823675899e-06, "loss": 0.2492, "step": 23981 }, { "epoch": 1.1234365484611422, "grad_norm": 0.5965502536791047, "learning_rate": 2.123184339093544e-06, "loss": 0.2784, "step": 23982 }, { "epoch": 1.1234833934510704, "grad_norm": 0.5958053844121561, "learning_rate": 2.122996856679669e-06, "loss": 0.2699, "step": 23983 }, { "epoch": 1.1235302384409986, "grad_norm": 0.5737709287416719, "learning_rate": 2.122809376435352e-06, "loss": 0.2675, "step": 23984 }, { "epoch": 1.123577083430927, "grad_norm": 0.5970465518532375, "learning_rate": 2.1226218983616736e-06, "loss": 0.286, "step": 23985 }, { "epoch": 1.1236239284208553, "grad_norm": 0.617808461319744, "learning_rate": 2.1224344224597097e-06, "loss": 0.2881, "step": 23986 }, { "epoch": 1.1236707734107838, "grad_norm": 0.6055417108555058, "learning_rate": 2.1222469487305408e-06, "loss": 0.2754, "step": 23987 }, { "epoch": 1.123717618400712, "grad_norm": 0.6505361745630525, "learning_rate": 2.1220594771752463e-06, "loss": 0.2916, "step": 23988 }, { "epoch": 1.1237644633906403, "grad_norm": 0.5876447792214721, "learning_rate": 2.121872007794904e-06, "loss": 0.2713, "step": 23989 }, { "epoch": 1.1238113083805688, "grad_norm": 0.5468091786399704, "learning_rate": 2.1216845405905935e-06, "loss": 0.255, "step": 23990 }, { "epoch": 1.123858153370497, "grad_norm": 0.6009439810019984, "learning_rate": 2.1214970755633947e-06, "loss": 0.2867, "step": 23991 }, { "epoch": 1.1239049983604255, "grad_norm": 0.5786497223597843, "learning_rate": 2.121309612714384e-06, "loss": 0.2834, "step": 23992 }, { "epoch": 1.1239518433503537, "grad_norm": 0.5713356781933729, "learning_rate": 2.121122152044641e-06, "loss": 0.2798, "step": 23993 }, { "epoch": 1.123998688340282, "grad_norm": 0.6139632300622766, "learning_rate": 2.1209346935552448e-06, "loss": 0.2968, "step": 23994 }, { "epoch": 1.1240455333302104, "grad_norm": 0.6294461890730251, "learning_rate": 2.1207472372472743e-06, "loss": 0.2796, "step": 23995 }, { "epoch": 1.1240923783201386, "grad_norm": 0.5997864910408105, "learning_rate": 2.1205597831218084e-06, "loss": 0.2772, "step": 23996 }, { "epoch": 1.1241392233100669, "grad_norm": 0.6060277207921345, "learning_rate": 2.120372331179926e-06, "loss": 0.2811, "step": 23997 }, { "epoch": 1.1241860682999953, "grad_norm": 0.5862954908894794, "learning_rate": 2.1201848814227047e-06, "loss": 0.2671, "step": 23998 }, { "epoch": 1.1242329132899236, "grad_norm": 0.6059549081910376, "learning_rate": 2.119997433851224e-06, "loss": 0.2864, "step": 23999 }, { "epoch": 1.124279758279852, "grad_norm": 0.5817064261842957, "learning_rate": 2.1198099884665625e-06, "loss": 0.2745, "step": 24000 }, { "epoch": 1.1243266032697803, "grad_norm": 0.5598942228406119, "learning_rate": 2.119622545269799e-06, "loss": 0.2568, "step": 24001 }, { "epoch": 1.1243734482597085, "grad_norm": 0.5702877413030005, "learning_rate": 2.119435104262012e-06, "loss": 0.2673, "step": 24002 }, { "epoch": 1.124420293249637, "grad_norm": 0.5858491537040489, "learning_rate": 2.1192476654442806e-06, "loss": 0.2739, "step": 24003 }, { "epoch": 1.1244671382395652, "grad_norm": 0.6288791408729155, "learning_rate": 2.119060228817684e-06, "loss": 0.2852, "step": 24004 }, { "epoch": 1.1245139832294937, "grad_norm": 0.597316432375419, "learning_rate": 2.118872794383299e-06, "loss": 0.2783, "step": 24005 }, { "epoch": 1.124560828219422, "grad_norm": 0.5385705817325696, "learning_rate": 2.1186853621422056e-06, "loss": 0.2688, "step": 24006 }, { "epoch": 1.1246076732093502, "grad_norm": 0.625429376417483, "learning_rate": 2.1184979320954816e-06, "loss": 0.289, "step": 24007 }, { "epoch": 1.1246545181992786, "grad_norm": 0.5632531892055629, "learning_rate": 2.118310504244207e-06, "loss": 0.2713, "step": 24008 }, { "epoch": 1.1247013631892069, "grad_norm": 0.6354760142357722, "learning_rate": 2.1181230785894584e-06, "loss": 0.2972, "step": 24009 }, { "epoch": 1.1247482081791353, "grad_norm": 0.5821504250862968, "learning_rate": 2.117935655132317e-06, "loss": 0.2816, "step": 24010 }, { "epoch": 1.1247950531690636, "grad_norm": 0.6115229704733935, "learning_rate": 2.1177482338738585e-06, "loss": 0.2895, "step": 24011 }, { "epoch": 1.1248418981589918, "grad_norm": 0.5670624217451654, "learning_rate": 2.117560814815163e-06, "loss": 0.2748, "step": 24012 }, { "epoch": 1.1248887431489203, "grad_norm": 0.5261844650476352, "learning_rate": 2.1173733979573097e-06, "loss": 0.2537, "step": 24013 }, { "epoch": 1.1249355881388485, "grad_norm": 0.5921866382655916, "learning_rate": 2.1171859833013755e-06, "loss": 0.285, "step": 24014 }, { "epoch": 1.124982433128777, "grad_norm": 0.6118496845838988, "learning_rate": 2.11699857084844e-06, "loss": 0.2851, "step": 24015 }, { "epoch": 1.1250292781187052, "grad_norm": 0.6165586220748237, "learning_rate": 2.116811160599582e-06, "loss": 0.2807, "step": 24016 }, { "epoch": 1.1250761231086335, "grad_norm": 0.6332864935488398, "learning_rate": 2.1166237525558793e-06, "loss": 0.2886, "step": 24017 }, { "epoch": 1.125122968098562, "grad_norm": 0.532800699809236, "learning_rate": 2.1164363467184103e-06, "loss": 0.2656, "step": 24018 }, { "epoch": 1.1251698130884902, "grad_norm": 0.5979436573384866, "learning_rate": 2.116248943088254e-06, "loss": 0.2763, "step": 24019 }, { "epoch": 1.1252166580784184, "grad_norm": 0.5485398769328848, "learning_rate": 2.1160615416664875e-06, "loss": 0.2665, "step": 24020 }, { "epoch": 1.1252635030683469, "grad_norm": 0.5514747977832594, "learning_rate": 2.115874142454191e-06, "loss": 0.2638, "step": 24021 }, { "epoch": 1.1253103480582751, "grad_norm": 0.5657935973413097, "learning_rate": 2.115686745452443e-06, "loss": 0.2687, "step": 24022 }, { "epoch": 1.1253571930482036, "grad_norm": 0.5981664867475084, "learning_rate": 2.1154993506623208e-06, "loss": 0.2796, "step": 24023 }, { "epoch": 1.1254040380381318, "grad_norm": 0.5737197864167484, "learning_rate": 2.1153119580849026e-06, "loss": 0.2757, "step": 24024 }, { "epoch": 1.12545088302806, "grad_norm": 0.5992192349100635, "learning_rate": 2.1151245677212673e-06, "loss": 0.2675, "step": 24025 }, { "epoch": 1.1254977280179885, "grad_norm": 0.5998134439840792, "learning_rate": 2.1149371795724945e-06, "loss": 0.2717, "step": 24026 }, { "epoch": 1.1255445730079168, "grad_norm": 0.6571581703318992, "learning_rate": 2.11474979363966e-06, "loss": 0.2815, "step": 24027 }, { "epoch": 1.1255914179978452, "grad_norm": 0.5700727146419083, "learning_rate": 2.1145624099238452e-06, "loss": 0.2714, "step": 24028 }, { "epoch": 1.1256382629877735, "grad_norm": 0.5760080305861819, "learning_rate": 2.114375028426126e-06, "loss": 0.2742, "step": 24029 }, { "epoch": 1.1256851079777017, "grad_norm": 0.5806999670403822, "learning_rate": 2.1141876491475815e-06, "loss": 0.2701, "step": 24030 }, { "epoch": 1.1257319529676302, "grad_norm": 0.5402516290944142, "learning_rate": 2.11400027208929e-06, "loss": 0.2565, "step": 24031 }, { "epoch": 1.1257787979575584, "grad_norm": 0.558501367049757, "learning_rate": 2.11381289725233e-06, "loss": 0.2695, "step": 24032 }, { "epoch": 1.1258256429474867, "grad_norm": 0.5464237654896719, "learning_rate": 2.11362552463778e-06, "loss": 0.2584, "step": 24033 }, { "epoch": 1.1258724879374151, "grad_norm": 0.6846381089006508, "learning_rate": 2.1134381542467188e-06, "loss": 0.3149, "step": 24034 }, { "epoch": 1.1259193329273434, "grad_norm": 0.5726805967331434, "learning_rate": 2.1132507860802222e-06, "loss": 0.2757, "step": 24035 }, { "epoch": 1.1259661779172718, "grad_norm": 0.5889507666266826, "learning_rate": 2.113063420139371e-06, "loss": 0.2734, "step": 24036 }, { "epoch": 1.1260130229072, "grad_norm": 0.5413379339074231, "learning_rate": 2.1128760564252424e-06, "loss": 0.256, "step": 24037 }, { "epoch": 1.1260598678971283, "grad_norm": 0.5679865406986275, "learning_rate": 2.112688694938914e-06, "loss": 0.2765, "step": 24038 }, { "epoch": 1.1261067128870568, "grad_norm": 0.571939699688052, "learning_rate": 2.1125013356814655e-06, "loss": 0.2746, "step": 24039 }, { "epoch": 1.126153557876985, "grad_norm": 0.5747039068352839, "learning_rate": 2.1123139786539753e-06, "loss": 0.2592, "step": 24040 }, { "epoch": 1.1262004028669135, "grad_norm": 0.5841783021298427, "learning_rate": 2.1121266238575203e-06, "loss": 0.2751, "step": 24041 }, { "epoch": 1.1262472478568417, "grad_norm": 0.5832074603748995, "learning_rate": 2.1119392712931783e-06, "loss": 0.2744, "step": 24042 }, { "epoch": 1.12629409284677, "grad_norm": 0.5941236611519622, "learning_rate": 2.1117519209620286e-06, "loss": 0.2723, "step": 24043 }, { "epoch": 1.1263409378366984, "grad_norm": 0.5515388718317786, "learning_rate": 2.111564572865149e-06, "loss": 0.2549, "step": 24044 }, { "epoch": 1.1263877828266267, "grad_norm": 0.5900459776004721, "learning_rate": 2.1113772270036176e-06, "loss": 0.2786, "step": 24045 }, { "epoch": 1.126434627816555, "grad_norm": 0.5781413078481219, "learning_rate": 2.1111898833785137e-06, "loss": 0.2689, "step": 24046 }, { "epoch": 1.1264814728064834, "grad_norm": 0.5830760460522254, "learning_rate": 2.111002541990913e-06, "loss": 0.2674, "step": 24047 }, { "epoch": 1.1265283177964116, "grad_norm": 0.5886386219050241, "learning_rate": 2.1108152028418953e-06, "loss": 0.2851, "step": 24048 }, { "epoch": 1.12657516278634, "grad_norm": 0.5765005959843558, "learning_rate": 2.110627865932538e-06, "loss": 0.2724, "step": 24049 }, { "epoch": 1.1266220077762683, "grad_norm": 0.5818689730350222, "learning_rate": 2.1104405312639193e-06, "loss": 0.2832, "step": 24050 }, { "epoch": 1.1266688527661968, "grad_norm": 0.5883061400183646, "learning_rate": 2.110253198837118e-06, "loss": 0.272, "step": 24051 }, { "epoch": 1.126715697756125, "grad_norm": 0.5690082943325918, "learning_rate": 2.110065868653212e-06, "loss": 0.2634, "step": 24052 }, { "epoch": 1.1267625427460533, "grad_norm": 0.5914747526506484, "learning_rate": 2.1098785407132784e-06, "loss": 0.2764, "step": 24053 }, { "epoch": 1.1268093877359817, "grad_norm": 0.601423853195136, "learning_rate": 2.1096912150183953e-06, "loss": 0.2721, "step": 24054 }, { "epoch": 1.12685623272591, "grad_norm": 0.6376665290998002, "learning_rate": 2.1095038915696412e-06, "loss": 0.2892, "step": 24055 }, { "epoch": 1.1269030777158382, "grad_norm": 0.5404457514230476, "learning_rate": 2.1093165703680943e-06, "loss": 0.2607, "step": 24056 }, { "epoch": 1.1269499227057667, "grad_norm": 0.5606067364665694, "learning_rate": 2.1091292514148325e-06, "loss": 0.2601, "step": 24057 }, { "epoch": 1.126996767695695, "grad_norm": 0.5989721670727854, "learning_rate": 2.108941934710934e-06, "loss": 0.2701, "step": 24058 }, { "epoch": 1.1270436126856234, "grad_norm": 0.5568782504255487, "learning_rate": 2.108754620257476e-06, "loss": 0.2742, "step": 24059 }, { "epoch": 1.1270904576755516, "grad_norm": 0.565143885715257, "learning_rate": 2.1085673080555366e-06, "loss": 0.2551, "step": 24060 }, { "epoch": 1.1271373026654798, "grad_norm": 0.5707833988676363, "learning_rate": 2.1083799981061943e-06, "loss": 0.2758, "step": 24061 }, { "epoch": 1.1271841476554083, "grad_norm": 0.6330990084282698, "learning_rate": 2.108192690410526e-06, "loss": 0.3015, "step": 24062 }, { "epoch": 1.1272309926453365, "grad_norm": 0.5640222162699814, "learning_rate": 2.1080053849696105e-06, "loss": 0.2666, "step": 24063 }, { "epoch": 1.127277837635265, "grad_norm": 0.5978807360801842, "learning_rate": 2.107818081784527e-06, "loss": 0.2808, "step": 24064 }, { "epoch": 1.1273246826251933, "grad_norm": 0.5615101606950087, "learning_rate": 2.1076307808563505e-06, "loss": 0.2668, "step": 24065 }, { "epoch": 1.1273715276151215, "grad_norm": 0.5690990801949795, "learning_rate": 2.10744348218616e-06, "loss": 0.2644, "step": 24066 }, { "epoch": 1.12741837260505, "grad_norm": 0.5786760305942167, "learning_rate": 2.1072561857750334e-06, "loss": 0.2785, "step": 24067 }, { "epoch": 1.1274652175949782, "grad_norm": 0.6221263788187339, "learning_rate": 2.10706889162405e-06, "loss": 0.2708, "step": 24068 }, { "epoch": 1.1275120625849064, "grad_norm": 0.6293389521811916, "learning_rate": 2.106881599734285e-06, "loss": 0.2846, "step": 24069 }, { "epoch": 1.127558907574835, "grad_norm": 0.5943486640642724, "learning_rate": 2.106694310106818e-06, "loss": 0.2827, "step": 24070 }, { "epoch": 1.1276057525647631, "grad_norm": 0.6072757864820654, "learning_rate": 2.1065070227427275e-06, "loss": 0.2785, "step": 24071 }, { "epoch": 1.1276525975546916, "grad_norm": 0.5648833419868291, "learning_rate": 2.1063197376430894e-06, "loss": 0.284, "step": 24072 }, { "epoch": 1.1276994425446198, "grad_norm": 0.6010153549149495, "learning_rate": 2.106132454808982e-06, "loss": 0.2705, "step": 24073 }, { "epoch": 1.127746287534548, "grad_norm": 0.5558981167717758, "learning_rate": 2.1059451742414834e-06, "loss": 0.2721, "step": 24074 }, { "epoch": 1.1277931325244765, "grad_norm": 0.6010901652541312, "learning_rate": 2.105757895941671e-06, "loss": 0.2871, "step": 24075 }, { "epoch": 1.1278399775144048, "grad_norm": 0.5528585628659591, "learning_rate": 2.105570619910623e-06, "loss": 0.2639, "step": 24076 }, { "epoch": 1.1278868225043333, "grad_norm": 0.5929174787386966, "learning_rate": 2.1053833461494184e-06, "loss": 0.2757, "step": 24077 }, { "epoch": 1.1279336674942615, "grad_norm": 0.5627405260040763, "learning_rate": 2.1051960746591315e-06, "loss": 0.2686, "step": 24078 }, { "epoch": 1.1279805124841897, "grad_norm": 0.5474938936343587, "learning_rate": 2.1050088054408428e-06, "loss": 0.2598, "step": 24079 }, { "epoch": 1.1280273574741182, "grad_norm": 0.5524754492637638, "learning_rate": 2.1048215384956288e-06, "loss": 0.2625, "step": 24080 }, { "epoch": 1.1280742024640464, "grad_norm": 0.6083239728263478, "learning_rate": 2.104634273824568e-06, "loss": 0.2846, "step": 24081 }, { "epoch": 1.1281210474539747, "grad_norm": 0.6218182687723364, "learning_rate": 2.1044470114287373e-06, "loss": 0.2748, "step": 24082 }, { "epoch": 1.1281678924439031, "grad_norm": 0.6252040515302976, "learning_rate": 2.1042597513092154e-06, "loss": 0.2994, "step": 24083 }, { "epoch": 1.1282147374338314, "grad_norm": 0.552478310162137, "learning_rate": 2.104072493467078e-06, "loss": 0.2705, "step": 24084 }, { "epoch": 1.1282615824237598, "grad_norm": 0.5857099364856313, "learning_rate": 2.1038852379034043e-06, "loss": 0.2726, "step": 24085 }, { "epoch": 1.128308427413688, "grad_norm": 0.5708675980237959, "learning_rate": 2.1036979846192715e-06, "loss": 0.2835, "step": 24086 }, { "epoch": 1.1283552724036165, "grad_norm": 0.6210156589029352, "learning_rate": 2.1035107336157567e-06, "loss": 0.2823, "step": 24087 }, { "epoch": 1.1284021173935448, "grad_norm": 0.5905752300513116, "learning_rate": 2.103323484893938e-06, "loss": 0.2773, "step": 24088 }, { "epoch": 1.128448962383473, "grad_norm": 0.5914439259234958, "learning_rate": 2.1031362384548946e-06, "loss": 0.2836, "step": 24089 }, { "epoch": 1.1284958073734015, "grad_norm": 0.638796042693367, "learning_rate": 2.1029489942997013e-06, "loss": 0.2689, "step": 24090 }, { "epoch": 1.1285426523633297, "grad_norm": 0.5874598291046567, "learning_rate": 2.1027617524294366e-06, "loss": 0.2723, "step": 24091 }, { "epoch": 1.128589497353258, "grad_norm": 0.583144535969907, "learning_rate": 2.102574512845178e-06, "loss": 0.2549, "step": 24092 }, { "epoch": 1.1286363423431864, "grad_norm": 0.6029784308998879, "learning_rate": 2.1023872755480033e-06, "loss": 0.2805, "step": 24093 }, { "epoch": 1.1286831873331147, "grad_norm": 0.5542653855740365, "learning_rate": 2.1022000405389896e-06, "loss": 0.2637, "step": 24094 }, { "epoch": 1.1287300323230431, "grad_norm": 0.5899200636465237, "learning_rate": 2.102012807819216e-06, "loss": 0.2629, "step": 24095 }, { "epoch": 1.1287768773129714, "grad_norm": 0.5962813954170645, "learning_rate": 2.1018255773897574e-06, "loss": 0.2818, "step": 24096 }, { "epoch": 1.1288237223028996, "grad_norm": 0.6110109893478264, "learning_rate": 2.1016383492516933e-06, "loss": 0.2615, "step": 24097 }, { "epoch": 1.128870567292828, "grad_norm": 0.6439444355947423, "learning_rate": 2.1014511234060993e-06, "loss": 0.2671, "step": 24098 }, { "epoch": 1.1289174122827563, "grad_norm": 0.6236672583950349, "learning_rate": 2.1012638998540546e-06, "loss": 0.2635, "step": 24099 }, { "epoch": 1.1289642572726848, "grad_norm": 0.6003718594469113, "learning_rate": 2.101076678596636e-06, "loss": 0.2798, "step": 24100 }, { "epoch": 1.129011102262613, "grad_norm": 0.6155142585097995, "learning_rate": 2.1008894596349216e-06, "loss": 0.2837, "step": 24101 }, { "epoch": 1.1290579472525413, "grad_norm": 0.5937187149132948, "learning_rate": 2.100702242969987e-06, "loss": 0.2766, "step": 24102 }, { "epoch": 1.1291047922424697, "grad_norm": 0.5273972486300466, "learning_rate": 2.1005150286029104e-06, "loss": 0.251, "step": 24103 }, { "epoch": 1.129151637232398, "grad_norm": 0.6195445342871742, "learning_rate": 2.10032781653477e-06, "loss": 0.2559, "step": 24104 }, { "epoch": 1.1291984822223262, "grad_norm": 0.5774139258195493, "learning_rate": 2.100140606766642e-06, "loss": 0.2727, "step": 24105 }, { "epoch": 1.1292453272122547, "grad_norm": 0.5875269050303508, "learning_rate": 2.0999533992996047e-06, "loss": 0.264, "step": 24106 }, { "epoch": 1.129292172202183, "grad_norm": 0.5594215631384042, "learning_rate": 2.0997661941347356e-06, "loss": 0.2664, "step": 24107 }, { "epoch": 1.1293390171921114, "grad_norm": 0.6019205977968359, "learning_rate": 2.0995789912731113e-06, "loss": 0.2852, "step": 24108 }, { "epoch": 1.1293858621820396, "grad_norm": 0.6126387923973728, "learning_rate": 2.0993917907158083e-06, "loss": 0.2747, "step": 24109 }, { "epoch": 1.1294327071719679, "grad_norm": 0.6187528854835821, "learning_rate": 2.099204592463906e-06, "loss": 0.2676, "step": 24110 }, { "epoch": 1.1294795521618963, "grad_norm": 0.6115048135153445, "learning_rate": 2.099017396518479e-06, "loss": 0.2787, "step": 24111 }, { "epoch": 1.1295263971518246, "grad_norm": 0.5487278478473813, "learning_rate": 2.098830202880607e-06, "loss": 0.258, "step": 24112 }, { "epoch": 1.129573242141753, "grad_norm": 0.6020179643210976, "learning_rate": 2.098643011551367e-06, "loss": 0.2639, "step": 24113 }, { "epoch": 1.1296200871316813, "grad_norm": 0.6114734822204483, "learning_rate": 2.0984558225318354e-06, "loss": 0.2797, "step": 24114 }, { "epoch": 1.1296669321216095, "grad_norm": 0.594991037493843, "learning_rate": 2.098268635823089e-06, "loss": 0.272, "step": 24115 }, { "epoch": 1.129713777111538, "grad_norm": 0.5738930978380579, "learning_rate": 2.0980814514262054e-06, "loss": 0.2721, "step": 24116 }, { "epoch": 1.1297606221014662, "grad_norm": 0.5699385566132739, "learning_rate": 2.097894269342263e-06, "loss": 0.2723, "step": 24117 }, { "epoch": 1.1298074670913945, "grad_norm": 0.5983147668478331, "learning_rate": 2.097707089572337e-06, "loss": 0.2746, "step": 24118 }, { "epoch": 1.129854312081323, "grad_norm": 0.599184389471038, "learning_rate": 2.097519912117507e-06, "loss": 0.2722, "step": 24119 }, { "epoch": 1.1299011570712512, "grad_norm": 0.5849822546400467, "learning_rate": 2.0973327369788473e-06, "loss": 0.2671, "step": 24120 }, { "epoch": 1.1299480020611796, "grad_norm": 0.6357189192865431, "learning_rate": 2.0971455641574375e-06, "loss": 0.2851, "step": 24121 }, { "epoch": 1.1299948470511079, "grad_norm": 0.618303095089122, "learning_rate": 2.0969583936543526e-06, "loss": 0.278, "step": 24122 }, { "epoch": 1.1300416920410363, "grad_norm": 0.6190182655201714, "learning_rate": 2.0967712254706713e-06, "loss": 0.2829, "step": 24123 }, { "epoch": 1.1300885370309646, "grad_norm": 0.5598302015297878, "learning_rate": 2.0965840596074707e-06, "loss": 0.273, "step": 24124 }, { "epoch": 1.1301353820208928, "grad_norm": 0.6140391097021177, "learning_rate": 2.0963968960658277e-06, "loss": 0.2958, "step": 24125 }, { "epoch": 1.1301822270108213, "grad_norm": 0.5969542304606539, "learning_rate": 2.0962097348468185e-06, "loss": 0.2694, "step": 24126 }, { "epoch": 1.1302290720007495, "grad_norm": 0.5745471733013702, "learning_rate": 2.0960225759515206e-06, "loss": 0.2696, "step": 24127 }, { "epoch": 1.1302759169906778, "grad_norm": 0.590119014983571, "learning_rate": 2.0958354193810117e-06, "loss": 0.2744, "step": 24128 }, { "epoch": 1.1303227619806062, "grad_norm": 0.6155145721138724, "learning_rate": 2.095648265136368e-06, "loss": 0.2885, "step": 24129 }, { "epoch": 1.1303696069705345, "grad_norm": 0.5692900564019061, "learning_rate": 2.0954611132186666e-06, "loss": 0.2806, "step": 24130 }, { "epoch": 1.130416451960463, "grad_norm": 0.6018024555534538, "learning_rate": 2.095273963628986e-06, "loss": 0.2882, "step": 24131 }, { "epoch": 1.1304632969503912, "grad_norm": 0.6606230800834557, "learning_rate": 2.095086816368402e-06, "loss": 0.3148, "step": 24132 }, { "epoch": 1.1305101419403194, "grad_norm": 0.5942352029833686, "learning_rate": 2.0948996714379904e-06, "loss": 0.2894, "step": 24133 }, { "epoch": 1.1305569869302479, "grad_norm": 0.5557494467641433, "learning_rate": 2.0947125288388296e-06, "loss": 0.2574, "step": 24134 }, { "epoch": 1.130603831920176, "grad_norm": 0.5561587930038482, "learning_rate": 2.094525388571997e-06, "loss": 0.2749, "step": 24135 }, { "epoch": 1.1306506769101046, "grad_norm": 0.6959532650515339, "learning_rate": 2.094338250638568e-06, "loss": 0.2866, "step": 24136 }, { "epoch": 1.1306975219000328, "grad_norm": 0.5686120159681111, "learning_rate": 2.094151115039621e-06, "loss": 0.2744, "step": 24137 }, { "epoch": 1.130744366889961, "grad_norm": 0.6020373538990373, "learning_rate": 2.0939639817762327e-06, "loss": 0.2822, "step": 24138 }, { "epoch": 1.1307912118798895, "grad_norm": 0.652236226482415, "learning_rate": 2.0937768508494795e-06, "loss": 0.2761, "step": 24139 }, { "epoch": 1.1308380568698178, "grad_norm": 0.5831460247216101, "learning_rate": 2.093589722260438e-06, "loss": 0.2765, "step": 24140 }, { "epoch": 1.130884901859746, "grad_norm": 0.5694181430920955, "learning_rate": 2.093402596010185e-06, "loss": 0.2552, "step": 24141 }, { "epoch": 1.1309317468496745, "grad_norm": 0.6057226362673526, "learning_rate": 2.0932154720997986e-06, "loss": 0.2837, "step": 24142 }, { "epoch": 1.1309785918396027, "grad_norm": 0.6047844576420267, "learning_rate": 2.0930283505303547e-06, "loss": 0.2678, "step": 24143 }, { "epoch": 1.1310254368295312, "grad_norm": 0.6101214032991648, "learning_rate": 2.092841231302931e-06, "loss": 0.2702, "step": 24144 }, { "epoch": 1.1310722818194594, "grad_norm": 0.588489485678189, "learning_rate": 2.092654114418603e-06, "loss": 0.2793, "step": 24145 }, { "epoch": 1.1311191268093876, "grad_norm": 0.5872371215835631, "learning_rate": 2.0924669998784487e-06, "loss": 0.2748, "step": 24146 }, { "epoch": 1.131165971799316, "grad_norm": 0.5848358527353184, "learning_rate": 2.0922798876835436e-06, "loss": 0.2714, "step": 24147 }, { "epoch": 1.1312128167892443, "grad_norm": 0.5422911096481483, "learning_rate": 2.092092777834966e-06, "loss": 0.2552, "step": 24148 }, { "epoch": 1.1312596617791728, "grad_norm": 0.5605702779465968, "learning_rate": 2.091905670333791e-06, "loss": 0.2551, "step": 24149 }, { "epoch": 1.131306506769101, "grad_norm": 0.6179815620162455, "learning_rate": 2.091718565181098e-06, "loss": 0.2999, "step": 24150 }, { "epoch": 1.1313533517590293, "grad_norm": 0.5912612826099374, "learning_rate": 2.0915314623779603e-06, "loss": 0.2701, "step": 24151 }, { "epoch": 1.1314001967489578, "grad_norm": 0.5582139582562015, "learning_rate": 2.0913443619254577e-06, "loss": 0.2716, "step": 24152 }, { "epoch": 1.131447041738886, "grad_norm": 0.5606903282159028, "learning_rate": 2.0911572638246646e-06, "loss": 0.2571, "step": 24153 }, { "epoch": 1.1314938867288142, "grad_norm": 0.5794682976288221, "learning_rate": 2.0909701680766584e-06, "loss": 0.29, "step": 24154 }, { "epoch": 1.1315407317187427, "grad_norm": 0.5490934903715026, "learning_rate": 2.090783074682517e-06, "loss": 0.2595, "step": 24155 }, { "epoch": 1.131587576708671, "grad_norm": 0.5979402228191931, "learning_rate": 2.090595983643316e-06, "loss": 0.2872, "step": 24156 }, { "epoch": 1.1316344216985994, "grad_norm": 0.5799955685715005, "learning_rate": 2.0904088949601324e-06, "loss": 0.2787, "step": 24157 }, { "epoch": 1.1316812666885276, "grad_norm": 0.5760002116411427, "learning_rate": 2.0902218086340417e-06, "loss": 0.2842, "step": 24158 }, { "epoch": 1.131728111678456, "grad_norm": 0.5754804654033997, "learning_rate": 2.0900347246661225e-06, "loss": 0.2607, "step": 24159 }, { "epoch": 1.1317749566683843, "grad_norm": 0.5857765785968275, "learning_rate": 2.0898476430574496e-06, "loss": 0.2544, "step": 24160 }, { "epoch": 1.1318218016583126, "grad_norm": 0.577009964570567, "learning_rate": 2.0896605638091006e-06, "loss": 0.2541, "step": 24161 }, { "epoch": 1.131868646648241, "grad_norm": 0.5834910365213334, "learning_rate": 2.0894734869221527e-06, "loss": 0.2747, "step": 24162 }, { "epoch": 1.1319154916381693, "grad_norm": 0.5666301275149628, "learning_rate": 2.0892864123976814e-06, "loss": 0.2666, "step": 24163 }, { "epoch": 1.1319623366280975, "grad_norm": 0.5682934396985924, "learning_rate": 2.0890993402367633e-06, "loss": 0.2704, "step": 24164 }, { "epoch": 1.132009181618026, "grad_norm": 0.5822177015069233, "learning_rate": 2.0889122704404748e-06, "loss": 0.2732, "step": 24165 }, { "epoch": 1.1320560266079542, "grad_norm": 0.5682561301556662, "learning_rate": 2.088725203009894e-06, "loss": 0.2677, "step": 24166 }, { "epoch": 1.1321028715978827, "grad_norm": 0.60347063181186, "learning_rate": 2.088538137946095e-06, "loss": 0.2541, "step": 24167 }, { "epoch": 1.132149716587811, "grad_norm": 0.5593305817841564, "learning_rate": 2.0883510752501566e-06, "loss": 0.261, "step": 24168 }, { "epoch": 1.1321965615777392, "grad_norm": 0.5310344718296356, "learning_rate": 2.088164014923154e-06, "loss": 0.2496, "step": 24169 }, { "epoch": 1.1322434065676676, "grad_norm": 0.622958965995221, "learning_rate": 2.087976956966164e-06, "loss": 0.2886, "step": 24170 }, { "epoch": 1.1322902515575959, "grad_norm": 0.5767497901390205, "learning_rate": 2.0877899013802626e-06, "loss": 0.2685, "step": 24171 }, { "epoch": 1.1323370965475243, "grad_norm": 0.569835668525536, "learning_rate": 2.0876028481665266e-06, "loss": 0.2601, "step": 24172 }, { "epoch": 1.1323839415374526, "grad_norm": 0.6033862240488624, "learning_rate": 2.0874157973260333e-06, "loss": 0.2788, "step": 24173 }, { "epoch": 1.1324307865273808, "grad_norm": 0.5581356973539452, "learning_rate": 2.087228748859859e-06, "loss": 0.2579, "step": 24174 }, { "epoch": 1.1324776315173093, "grad_norm": 0.6094749087714894, "learning_rate": 2.0870417027690783e-06, "loss": 0.2847, "step": 24175 }, { "epoch": 1.1325244765072375, "grad_norm": 0.6083539282260357, "learning_rate": 2.0868546590547686e-06, "loss": 0.2815, "step": 24176 }, { "epoch": 1.1325713214971658, "grad_norm": 0.5571218839795992, "learning_rate": 2.0866676177180074e-06, "loss": 0.264, "step": 24177 }, { "epoch": 1.1326181664870942, "grad_norm": 0.5538280667946848, "learning_rate": 2.086480578759869e-06, "loss": 0.2751, "step": 24178 }, { "epoch": 1.1326650114770225, "grad_norm": 0.6194533002128082, "learning_rate": 2.086293542181432e-06, "loss": 0.2846, "step": 24179 }, { "epoch": 1.132711856466951, "grad_norm": 0.6237265580987308, "learning_rate": 2.086106507983772e-06, "loss": 0.2829, "step": 24180 }, { "epoch": 1.1327587014568792, "grad_norm": 0.5548964553280398, "learning_rate": 2.0859194761679642e-06, "loss": 0.2676, "step": 24181 }, { "epoch": 1.1328055464468074, "grad_norm": 0.5923492755732027, "learning_rate": 2.085732446735086e-06, "loss": 0.2717, "step": 24182 }, { "epoch": 1.1328523914367359, "grad_norm": 0.5647464608212952, "learning_rate": 2.085545419686213e-06, "loss": 0.2791, "step": 24183 }, { "epoch": 1.1328992364266641, "grad_norm": 0.6966987947613429, "learning_rate": 2.0853583950224226e-06, "loss": 0.3006, "step": 24184 }, { "epoch": 1.1329460814165926, "grad_norm": 0.6408819892738283, "learning_rate": 2.0851713727447896e-06, "loss": 0.2933, "step": 24185 }, { "epoch": 1.1329929264065208, "grad_norm": 0.5761339798966849, "learning_rate": 2.0849843528543927e-06, "loss": 0.2672, "step": 24186 }, { "epoch": 1.133039771396449, "grad_norm": 0.5540835771241304, "learning_rate": 2.084797335352305e-06, "loss": 0.2584, "step": 24187 }, { "epoch": 1.1330866163863775, "grad_norm": 0.5782525608506232, "learning_rate": 2.084610320239605e-06, "loss": 0.2676, "step": 24188 }, { "epoch": 1.1331334613763058, "grad_norm": 0.6574291970575621, "learning_rate": 2.0844233075173676e-06, "loss": 0.2921, "step": 24189 }, { "epoch": 1.133180306366234, "grad_norm": 0.5962674376544438, "learning_rate": 2.0842362971866697e-06, "loss": 0.2872, "step": 24190 }, { "epoch": 1.1332271513561625, "grad_norm": 0.5933258901538977, "learning_rate": 2.084049289248588e-06, "loss": 0.2916, "step": 24191 }, { "epoch": 1.1332739963460907, "grad_norm": 0.6860856954033466, "learning_rate": 2.0838622837041984e-06, "loss": 0.3063, "step": 24192 }, { "epoch": 1.1333208413360192, "grad_norm": 0.6383697548149073, "learning_rate": 2.083675280554576e-06, "loss": 0.301, "step": 24193 }, { "epoch": 1.1333676863259474, "grad_norm": 0.6145795670746852, "learning_rate": 2.0834882798007976e-06, "loss": 0.278, "step": 24194 }, { "epoch": 1.1334145313158759, "grad_norm": 0.5848455304791802, "learning_rate": 2.0833012814439396e-06, "loss": 0.2818, "step": 24195 }, { "epoch": 1.1334613763058041, "grad_norm": 0.6181722495781287, "learning_rate": 2.083114285485078e-06, "loss": 0.295, "step": 24196 }, { "epoch": 1.1335082212957324, "grad_norm": 0.5762637726357425, "learning_rate": 2.082927291925289e-06, "loss": 0.2784, "step": 24197 }, { "epoch": 1.1335550662856608, "grad_norm": 0.5819123861112153, "learning_rate": 2.0827403007656492e-06, "loss": 0.2724, "step": 24198 }, { "epoch": 1.133601911275589, "grad_norm": 0.5402073146754376, "learning_rate": 2.0825533120072335e-06, "loss": 0.2584, "step": 24199 }, { "epoch": 1.1336487562655173, "grad_norm": 0.5861215241306911, "learning_rate": 2.0823663256511183e-06, "loss": 0.2829, "step": 24200 }, { "epoch": 1.1336956012554458, "grad_norm": 0.6905088946462138, "learning_rate": 2.082179341698381e-06, "loss": 0.2754, "step": 24201 }, { "epoch": 1.133742446245374, "grad_norm": 0.5808189194139911, "learning_rate": 2.081992360150095e-06, "loss": 0.2719, "step": 24202 }, { "epoch": 1.1337892912353025, "grad_norm": 0.5531785226637956, "learning_rate": 2.0818053810073383e-06, "loss": 0.2667, "step": 24203 }, { "epoch": 1.1338361362252307, "grad_norm": 0.5919418746786649, "learning_rate": 2.0816184042711877e-06, "loss": 0.2874, "step": 24204 }, { "epoch": 1.133882981215159, "grad_norm": 0.5994117507189393, "learning_rate": 2.0814314299427177e-06, "loss": 0.2709, "step": 24205 }, { "epoch": 1.1339298262050874, "grad_norm": 0.5804581860970379, "learning_rate": 2.081244458023004e-06, "loss": 0.2776, "step": 24206 }, { "epoch": 1.1339766711950157, "grad_norm": 0.6020472398980113, "learning_rate": 2.081057488513123e-06, "loss": 0.2908, "step": 24207 }, { "epoch": 1.1340235161849441, "grad_norm": 0.5840210686988793, "learning_rate": 2.080870521414151e-06, "loss": 0.261, "step": 24208 }, { "epoch": 1.1340703611748724, "grad_norm": 0.5359281878138622, "learning_rate": 2.080683556727164e-06, "loss": 0.2491, "step": 24209 }, { "epoch": 1.1341172061648006, "grad_norm": 0.5861191197837367, "learning_rate": 2.0804965944532375e-06, "loss": 0.2696, "step": 24210 }, { "epoch": 1.134164051154729, "grad_norm": 0.5362999440449004, "learning_rate": 2.080309634593449e-06, "loss": 0.2483, "step": 24211 }, { "epoch": 1.1342108961446573, "grad_norm": 0.6049004378215359, "learning_rate": 2.0801226771488717e-06, "loss": 0.2687, "step": 24212 }, { "epoch": 1.1342577411345856, "grad_norm": 0.578834774602949, "learning_rate": 2.0799357221205827e-06, "loss": 0.2737, "step": 24213 }, { "epoch": 1.134304586124514, "grad_norm": 0.5810903330743927, "learning_rate": 2.0797487695096584e-06, "loss": 0.2691, "step": 24214 }, { "epoch": 1.1343514311144423, "grad_norm": 0.5752339463752226, "learning_rate": 2.0795618193171745e-06, "loss": 0.2628, "step": 24215 }, { "epoch": 1.1343982761043707, "grad_norm": 0.6461740963721925, "learning_rate": 2.0793748715442063e-06, "loss": 0.287, "step": 24216 }, { "epoch": 1.134445121094299, "grad_norm": 0.6204203098271808, "learning_rate": 2.0791879261918313e-06, "loss": 0.2973, "step": 24217 }, { "epoch": 1.1344919660842272, "grad_norm": 0.578473792831068, "learning_rate": 2.0790009832611222e-06, "loss": 0.2789, "step": 24218 }, { "epoch": 1.1345388110741557, "grad_norm": 0.5644517701366796, "learning_rate": 2.078814042753158e-06, "loss": 0.2809, "step": 24219 }, { "epoch": 1.134585656064084, "grad_norm": 0.5911614812245981, "learning_rate": 2.078627104669012e-06, "loss": 0.2792, "step": 24220 }, { "epoch": 1.1346325010540124, "grad_norm": 0.6083204087354658, "learning_rate": 2.078440169009762e-06, "loss": 0.2819, "step": 24221 }, { "epoch": 1.1346793460439406, "grad_norm": 0.5498915563492316, "learning_rate": 2.0782532357764822e-06, "loss": 0.2531, "step": 24222 }, { "epoch": 1.1347261910338688, "grad_norm": 0.5590539548153017, "learning_rate": 2.07806630497025e-06, "loss": 0.2646, "step": 24223 }, { "epoch": 1.1347730360237973, "grad_norm": 0.563350983765125, "learning_rate": 2.0778793765921393e-06, "loss": 0.2742, "step": 24224 }, { "epoch": 1.1348198810137256, "grad_norm": 0.5645590454147797, "learning_rate": 2.0776924506432266e-06, "loss": 0.2765, "step": 24225 }, { "epoch": 1.1348667260036538, "grad_norm": 0.5540511822804077, "learning_rate": 2.0775055271245882e-06, "loss": 0.2635, "step": 24226 }, { "epoch": 1.1349135709935823, "grad_norm": 0.6058524348159302, "learning_rate": 2.0773186060372986e-06, "loss": 0.2667, "step": 24227 }, { "epoch": 1.1349604159835105, "grad_norm": 0.603616854528683, "learning_rate": 2.077131687382435e-06, "loss": 0.2758, "step": 24228 }, { "epoch": 1.135007260973439, "grad_norm": 0.5774199938278518, "learning_rate": 2.076944771161072e-06, "loss": 0.2739, "step": 24229 }, { "epoch": 1.1350541059633672, "grad_norm": 0.5830287051787665, "learning_rate": 2.0767578573742857e-06, "loss": 0.2815, "step": 24230 }, { "epoch": 1.1351009509532957, "grad_norm": 0.6021836326545476, "learning_rate": 2.076570946023151e-06, "loss": 0.2927, "step": 24231 }, { "epoch": 1.135147795943224, "grad_norm": 0.5966468399617018, "learning_rate": 2.0763840371087444e-06, "loss": 0.2781, "step": 24232 }, { "epoch": 1.1351946409331521, "grad_norm": 0.5820177036966566, "learning_rate": 2.076197130632141e-06, "loss": 0.2626, "step": 24233 }, { "epoch": 1.1352414859230806, "grad_norm": 0.5509343960117681, "learning_rate": 2.0760102265944165e-06, "loss": 0.2708, "step": 24234 }, { "epoch": 1.1352883309130088, "grad_norm": 0.5638540760575793, "learning_rate": 2.0758233249966478e-06, "loss": 0.268, "step": 24235 }, { "epoch": 1.135335175902937, "grad_norm": 0.5885403014561035, "learning_rate": 2.075636425839908e-06, "loss": 0.2572, "step": 24236 }, { "epoch": 1.1353820208928656, "grad_norm": 0.5923398061958027, "learning_rate": 2.0754495291252745e-06, "loss": 0.2794, "step": 24237 }, { "epoch": 1.1354288658827938, "grad_norm": 0.5512986550137751, "learning_rate": 2.0752626348538215e-06, "loss": 0.2605, "step": 24238 }, { "epoch": 1.1354757108727223, "grad_norm": 0.6240837737829569, "learning_rate": 2.0750757430266257e-06, "loss": 0.2912, "step": 24239 }, { "epoch": 1.1355225558626505, "grad_norm": 0.5669617710724286, "learning_rate": 2.0748888536447623e-06, "loss": 0.2866, "step": 24240 }, { "epoch": 1.1355694008525787, "grad_norm": 0.5818242634179752, "learning_rate": 2.0747019667093075e-06, "loss": 0.2678, "step": 24241 }, { "epoch": 1.1356162458425072, "grad_norm": 0.6201695081486139, "learning_rate": 2.0745150822213344e-06, "loss": 0.2769, "step": 24242 }, { "epoch": 1.1356630908324354, "grad_norm": 0.5510872850553019, "learning_rate": 2.0743282001819207e-06, "loss": 0.2726, "step": 24243 }, { "epoch": 1.135709935822364, "grad_norm": 0.5799578046079991, "learning_rate": 2.0741413205921414e-06, "loss": 0.2694, "step": 24244 }, { "epoch": 1.1357567808122921, "grad_norm": 0.5716924130413933, "learning_rate": 2.0739544434530716e-06, "loss": 0.2641, "step": 24245 }, { "epoch": 1.1358036258022204, "grad_norm": 0.6230323137418079, "learning_rate": 2.0737675687657873e-06, "loss": 0.2822, "step": 24246 }, { "epoch": 1.1358504707921488, "grad_norm": 0.5978703298206686, "learning_rate": 2.073580696531363e-06, "loss": 0.2705, "step": 24247 }, { "epoch": 1.135897315782077, "grad_norm": 0.6012877947983484, "learning_rate": 2.073393826750875e-06, "loss": 0.2935, "step": 24248 }, { "epoch": 1.1359441607720053, "grad_norm": 0.5540021893218458, "learning_rate": 2.073206959425398e-06, "loss": 0.2587, "step": 24249 }, { "epoch": 1.1359910057619338, "grad_norm": 0.5319688434193469, "learning_rate": 2.0730200945560082e-06, "loss": 0.2675, "step": 24250 }, { "epoch": 1.136037850751862, "grad_norm": 0.5854036984186199, "learning_rate": 2.0728332321437793e-06, "loss": 0.2824, "step": 24251 }, { "epoch": 1.1360846957417905, "grad_norm": 0.6145669467546383, "learning_rate": 2.0726463721897883e-06, "loss": 0.2872, "step": 24252 }, { "epoch": 1.1361315407317187, "grad_norm": 0.5957207487925779, "learning_rate": 2.072459514695111e-06, "loss": 0.266, "step": 24253 }, { "epoch": 1.136178385721647, "grad_norm": 0.6325129370904778, "learning_rate": 2.0722726596608213e-06, "loss": 0.3014, "step": 24254 }, { "epoch": 1.1362252307115754, "grad_norm": 0.6195132352604461, "learning_rate": 2.072085807087994e-06, "loss": 0.2747, "step": 24255 }, { "epoch": 1.1362720757015037, "grad_norm": 0.6469960801554658, "learning_rate": 2.0718989569777056e-06, "loss": 0.2901, "step": 24256 }, { "epoch": 1.1363189206914321, "grad_norm": 0.6284876828179738, "learning_rate": 2.0717121093310317e-06, "loss": 0.3045, "step": 24257 }, { "epoch": 1.1363657656813604, "grad_norm": 0.5649556423641205, "learning_rate": 2.071525264149046e-06, "loss": 0.2527, "step": 24258 }, { "epoch": 1.1364126106712886, "grad_norm": 0.6191565342690154, "learning_rate": 2.0713384214328265e-06, "loss": 0.2969, "step": 24259 }, { "epoch": 1.136459455661217, "grad_norm": 0.5701900407632844, "learning_rate": 2.0711515811834455e-06, "loss": 0.2836, "step": 24260 }, { "epoch": 1.1365063006511453, "grad_norm": 0.6166837397245425, "learning_rate": 2.0709647434019792e-06, "loss": 0.2874, "step": 24261 }, { "epoch": 1.1365531456410736, "grad_norm": 0.6412491211328369, "learning_rate": 2.070777908089503e-06, "loss": 0.2856, "step": 24262 }, { "epoch": 1.136599990631002, "grad_norm": 0.5952271515647907, "learning_rate": 2.070591075247092e-06, "loss": 0.2764, "step": 24263 }, { "epoch": 1.1366468356209303, "grad_norm": 0.5883699040088711, "learning_rate": 2.0704042448758217e-06, "loss": 0.2719, "step": 24264 }, { "epoch": 1.1366936806108587, "grad_norm": 0.6199228522658838, "learning_rate": 2.070217416976768e-06, "loss": 0.2853, "step": 24265 }, { "epoch": 1.136740525600787, "grad_norm": 0.6118321772933921, "learning_rate": 2.0700305915510036e-06, "loss": 0.2895, "step": 24266 }, { "epoch": 1.1367873705907154, "grad_norm": 0.595266401174217, "learning_rate": 2.069843768599605e-06, "loss": 0.2722, "step": 24267 }, { "epoch": 1.1368342155806437, "grad_norm": 0.5690004130144083, "learning_rate": 2.0696569481236486e-06, "loss": 0.2851, "step": 24268 }, { "epoch": 1.136881060570572, "grad_norm": 0.5583644854601952, "learning_rate": 2.069470130124207e-06, "loss": 0.2523, "step": 24269 }, { "epoch": 1.1369279055605004, "grad_norm": 0.5363833589846589, "learning_rate": 2.0692833146023565e-06, "loss": 0.2541, "step": 24270 }, { "epoch": 1.1369747505504286, "grad_norm": 0.5964064483769892, "learning_rate": 2.069096501559174e-06, "loss": 0.2707, "step": 24271 }, { "epoch": 1.1370215955403569, "grad_norm": 0.6447953216079552, "learning_rate": 2.0689096909957317e-06, "loss": 0.286, "step": 24272 }, { "epoch": 1.1370684405302853, "grad_norm": 0.6250188957217919, "learning_rate": 2.0687228829131057e-06, "loss": 0.2956, "step": 24273 }, { "epoch": 1.1371152855202136, "grad_norm": 0.601588932446766, "learning_rate": 2.068536077312371e-06, "loss": 0.2756, "step": 24274 }, { "epoch": 1.137162130510142, "grad_norm": 0.6307176970576965, "learning_rate": 2.0683492741946033e-06, "loss": 0.2756, "step": 24275 }, { "epoch": 1.1372089755000703, "grad_norm": 0.5782155756310123, "learning_rate": 2.068162473560876e-06, "loss": 0.2756, "step": 24276 }, { "epoch": 1.1372558204899985, "grad_norm": 0.5925737586360826, "learning_rate": 2.067975675412266e-06, "loss": 0.273, "step": 24277 }, { "epoch": 1.137302665479927, "grad_norm": 0.580979640138191, "learning_rate": 2.067788879749848e-06, "loss": 0.2597, "step": 24278 }, { "epoch": 1.1373495104698552, "grad_norm": 0.5881098339504152, "learning_rate": 2.067602086574696e-06, "loss": 0.278, "step": 24279 }, { "epoch": 1.1373963554597837, "grad_norm": 0.5942093753276748, "learning_rate": 2.0674152958878845e-06, "loss": 0.2867, "step": 24280 }, { "epoch": 1.137443200449712, "grad_norm": 0.5602874596148923, "learning_rate": 2.067228507690489e-06, "loss": 0.269, "step": 24281 }, { "epoch": 1.1374900454396402, "grad_norm": 0.610071164741641, "learning_rate": 2.067041721983586e-06, "loss": 0.2794, "step": 24282 }, { "epoch": 1.1375368904295686, "grad_norm": 0.5899843682093536, "learning_rate": 2.0668549387682477e-06, "loss": 0.2719, "step": 24283 }, { "epoch": 1.1375837354194969, "grad_norm": 0.6108041050127472, "learning_rate": 2.066668158045552e-06, "loss": 0.2837, "step": 24284 }, { "epoch": 1.137630580409425, "grad_norm": 0.6528170194039665, "learning_rate": 2.066481379816571e-06, "loss": 0.3018, "step": 24285 }, { "epoch": 1.1376774253993536, "grad_norm": 0.585528577793981, "learning_rate": 2.066294604082381e-06, "loss": 0.2744, "step": 24286 }, { "epoch": 1.1377242703892818, "grad_norm": 0.5678762944056647, "learning_rate": 2.066107830844056e-06, "loss": 0.2785, "step": 24287 }, { "epoch": 1.1377711153792103, "grad_norm": 0.6179333305638579, "learning_rate": 2.065921060102672e-06, "loss": 0.2775, "step": 24288 }, { "epoch": 1.1378179603691385, "grad_norm": 0.588900646207984, "learning_rate": 2.065734291859303e-06, "loss": 0.2782, "step": 24289 }, { "epoch": 1.1378648053590668, "grad_norm": 0.6472076191166295, "learning_rate": 2.0655475261150247e-06, "loss": 0.2785, "step": 24290 }, { "epoch": 1.1379116503489952, "grad_norm": 0.5880094117169348, "learning_rate": 2.0653607628709103e-06, "loss": 0.2816, "step": 24291 }, { "epoch": 1.1379584953389235, "grad_norm": 0.5909191070071376, "learning_rate": 2.065174002128036e-06, "loss": 0.2808, "step": 24292 }, { "epoch": 1.138005340328852, "grad_norm": 0.5927297168559346, "learning_rate": 2.064987243887475e-06, "loss": 0.2704, "step": 24293 }, { "epoch": 1.1380521853187802, "grad_norm": 0.5909541087333706, "learning_rate": 2.0648004881503037e-06, "loss": 0.2863, "step": 24294 }, { "epoch": 1.1380990303087084, "grad_norm": 0.5660425271745261, "learning_rate": 2.0646137349175965e-06, "loss": 0.2716, "step": 24295 }, { "epoch": 1.1381458752986369, "grad_norm": 0.5902628629819788, "learning_rate": 2.064426984190428e-06, "loss": 0.2665, "step": 24296 }, { "epoch": 1.138192720288565, "grad_norm": 0.5991266997700592, "learning_rate": 2.0642402359698722e-06, "loss": 0.2864, "step": 24297 }, { "epoch": 1.1382395652784933, "grad_norm": 0.639600061612193, "learning_rate": 2.064053490257004e-06, "loss": 0.2797, "step": 24298 }, { "epoch": 1.1382864102684218, "grad_norm": 0.6074632567918897, "learning_rate": 2.0638667470528993e-06, "loss": 0.283, "step": 24299 }, { "epoch": 1.13833325525835, "grad_norm": 0.6850836609942246, "learning_rate": 2.063680006358631e-06, "loss": 0.2955, "step": 24300 }, { "epoch": 1.1383801002482785, "grad_norm": 0.5992092359513448, "learning_rate": 2.063493268175275e-06, "loss": 0.2856, "step": 24301 }, { "epoch": 1.1384269452382068, "grad_norm": 0.6013122235162126, "learning_rate": 2.063306532503906e-06, "loss": 0.2798, "step": 24302 }, { "epoch": 1.1384737902281352, "grad_norm": 0.572715553387042, "learning_rate": 2.063119799345598e-06, "loss": 0.2675, "step": 24303 }, { "epoch": 1.1385206352180635, "grad_norm": 0.6443198550009563, "learning_rate": 2.0629330687014253e-06, "loss": 0.3007, "step": 24304 }, { "epoch": 1.1385674802079917, "grad_norm": 0.5849813684321349, "learning_rate": 2.0627463405724627e-06, "loss": 0.2728, "step": 24305 }, { "epoch": 1.1386143251979202, "grad_norm": 0.6288739255518976, "learning_rate": 2.0625596149597855e-06, "loss": 0.2905, "step": 24306 }, { "epoch": 1.1386611701878484, "grad_norm": 0.6088723768103655, "learning_rate": 2.0623728918644674e-06, "loss": 0.2502, "step": 24307 }, { "epoch": 1.1387080151777766, "grad_norm": 0.6136680881590427, "learning_rate": 2.0621861712875845e-06, "loss": 0.2799, "step": 24308 }, { "epoch": 1.138754860167705, "grad_norm": 0.606088066641531, "learning_rate": 2.0619994532302087e-06, "loss": 0.2706, "step": 24309 }, { "epoch": 1.1388017051576333, "grad_norm": 0.5890765464141988, "learning_rate": 2.061812737693417e-06, "loss": 0.283, "step": 24310 }, { "epoch": 1.1388485501475618, "grad_norm": 0.6366709752465018, "learning_rate": 2.061626024678282e-06, "loss": 0.2901, "step": 24311 }, { "epoch": 1.13889539513749, "grad_norm": 0.5927989693679825, "learning_rate": 2.061439314185879e-06, "loss": 0.2886, "step": 24312 }, { "epoch": 1.1389422401274183, "grad_norm": 0.5927662770953901, "learning_rate": 2.061252606217283e-06, "loss": 0.287, "step": 24313 }, { "epoch": 1.1389890851173468, "grad_norm": 0.6250089429817745, "learning_rate": 2.0610659007735685e-06, "loss": 0.2836, "step": 24314 }, { "epoch": 1.139035930107275, "grad_norm": 0.5981319708859426, "learning_rate": 2.0608791978558085e-06, "loss": 0.2753, "step": 24315 }, { "epoch": 1.1390827750972035, "grad_norm": 0.5747386789788369, "learning_rate": 2.060692497465078e-06, "loss": 0.2634, "step": 24316 }, { "epoch": 1.1391296200871317, "grad_norm": 0.5804054161856468, "learning_rate": 2.060505799602453e-06, "loss": 0.2749, "step": 24317 }, { "epoch": 1.13917646507706, "grad_norm": 0.5871696524040981, "learning_rate": 2.0603191042690056e-06, "loss": 0.2931, "step": 24318 }, { "epoch": 1.1392233100669884, "grad_norm": 0.5775438975234964, "learning_rate": 2.0601324114658117e-06, "loss": 0.278, "step": 24319 }, { "epoch": 1.1392701550569166, "grad_norm": 0.6145301307767376, "learning_rate": 2.0599457211939457e-06, "loss": 0.2757, "step": 24320 }, { "epoch": 1.1393170000468449, "grad_norm": 0.5790351643712435, "learning_rate": 2.0597590334544816e-06, "loss": 0.2665, "step": 24321 }, { "epoch": 1.1393638450367733, "grad_norm": 0.5526847438466546, "learning_rate": 2.0595723482484926e-06, "loss": 0.2709, "step": 24322 }, { "epoch": 1.1394106900267016, "grad_norm": 0.5540417608871376, "learning_rate": 2.059385665577054e-06, "loss": 0.2743, "step": 24323 }, { "epoch": 1.13945753501663, "grad_norm": 0.6071148101855702, "learning_rate": 2.0591989854412408e-06, "loss": 0.2765, "step": 24324 }, { "epoch": 1.1395043800065583, "grad_norm": 0.6098797527605496, "learning_rate": 2.059012307842126e-06, "loss": 0.2943, "step": 24325 }, { "epoch": 1.1395512249964865, "grad_norm": 0.5956663661003522, "learning_rate": 2.058825632780786e-06, "loss": 0.2787, "step": 24326 }, { "epoch": 1.139598069986415, "grad_norm": 0.6308921047447594, "learning_rate": 2.058638960258292e-06, "loss": 0.2827, "step": 24327 }, { "epoch": 1.1396449149763432, "grad_norm": 0.5216096173660258, "learning_rate": 2.058452290275721e-06, "loss": 0.2604, "step": 24328 }, { "epoch": 1.1396917599662717, "grad_norm": 0.6095598249710694, "learning_rate": 2.0582656228341448e-06, "loss": 0.2985, "step": 24329 }, { "epoch": 1.1397386049562, "grad_norm": 0.6078075006287271, "learning_rate": 2.0580789579346395e-06, "loss": 0.2822, "step": 24330 }, { "epoch": 1.1397854499461282, "grad_norm": 0.5895266081338595, "learning_rate": 2.057892295578279e-06, "loss": 0.2852, "step": 24331 }, { "epoch": 1.1398322949360566, "grad_norm": 0.5857182140687527, "learning_rate": 2.057705635766138e-06, "loss": 0.274, "step": 24332 }, { "epoch": 1.1398791399259849, "grad_norm": 0.658430524601869, "learning_rate": 2.0575189784992885e-06, "loss": 0.2869, "step": 24333 }, { "epoch": 1.1399259849159131, "grad_norm": 0.555187312326552, "learning_rate": 2.0573323237788064e-06, "loss": 0.2753, "step": 24334 }, { "epoch": 1.1399728299058416, "grad_norm": 0.5996620397507549, "learning_rate": 2.0571456716057657e-06, "loss": 0.2702, "step": 24335 }, { "epoch": 1.1400196748957698, "grad_norm": 0.6405668498377411, "learning_rate": 2.05695902198124e-06, "loss": 0.2856, "step": 24336 }, { "epoch": 1.1400665198856983, "grad_norm": 0.6013364070566005, "learning_rate": 2.0567723749063044e-06, "loss": 0.2726, "step": 24337 }, { "epoch": 1.1401133648756265, "grad_norm": 0.5840641822348497, "learning_rate": 2.0565857303820322e-06, "loss": 0.2642, "step": 24338 }, { "epoch": 1.140160209865555, "grad_norm": 0.604053572826416, "learning_rate": 2.0563990884094976e-06, "loss": 0.2717, "step": 24339 }, { "epoch": 1.1402070548554832, "grad_norm": 0.6451200493982953, "learning_rate": 2.0562124489897743e-06, "loss": 0.2993, "step": 24340 }, { "epoch": 1.1402538998454115, "grad_norm": 0.5931375416395501, "learning_rate": 2.0560258121239378e-06, "loss": 0.2628, "step": 24341 }, { "epoch": 1.14030074483534, "grad_norm": 0.6628315356906325, "learning_rate": 2.05583917781306e-06, "loss": 0.2788, "step": 24342 }, { "epoch": 1.1403475898252682, "grad_norm": 0.5899506222624258, "learning_rate": 2.0556525460582167e-06, "loss": 0.2763, "step": 24343 }, { "epoch": 1.1403944348151964, "grad_norm": 0.6187646502200286, "learning_rate": 2.0554659168604814e-06, "loss": 0.2917, "step": 24344 }, { "epoch": 1.1404412798051249, "grad_norm": 0.6365628779267932, "learning_rate": 2.0552792902209283e-06, "loss": 0.288, "step": 24345 }, { "epoch": 1.1404881247950531, "grad_norm": 0.5662682562251351, "learning_rate": 2.0550926661406307e-06, "loss": 0.2607, "step": 24346 }, { "epoch": 1.1405349697849816, "grad_norm": 0.5748373627312325, "learning_rate": 2.0549060446206625e-06, "loss": 0.2788, "step": 24347 }, { "epoch": 1.1405818147749098, "grad_norm": 0.6077543247529132, "learning_rate": 2.054719425662099e-06, "loss": 0.2811, "step": 24348 }, { "epoch": 1.140628659764838, "grad_norm": 0.5373561616526984, "learning_rate": 2.0545328092660127e-06, "loss": 0.243, "step": 24349 }, { "epoch": 1.1406755047547665, "grad_norm": 0.5909604714251111, "learning_rate": 2.054346195433478e-06, "loss": 0.2615, "step": 24350 }, { "epoch": 1.1407223497446948, "grad_norm": 0.5588694261299006, "learning_rate": 2.0541595841655696e-06, "loss": 0.2671, "step": 24351 }, { "epoch": 1.1407691947346232, "grad_norm": 0.6126255745475188, "learning_rate": 2.053972975463361e-06, "loss": 0.2769, "step": 24352 }, { "epoch": 1.1408160397245515, "grad_norm": 0.6210878413241949, "learning_rate": 2.0537863693279247e-06, "loss": 0.2703, "step": 24353 }, { "epoch": 1.1408628847144797, "grad_norm": 0.6172991310302877, "learning_rate": 2.053599765760336e-06, "loss": 0.2841, "step": 24354 }, { "epoch": 1.1409097297044082, "grad_norm": 0.5839672083410408, "learning_rate": 2.053413164761669e-06, "loss": 0.2602, "step": 24355 }, { "epoch": 1.1409565746943364, "grad_norm": 0.5754202622283506, "learning_rate": 2.0532265663329963e-06, "loss": 0.2629, "step": 24356 }, { "epoch": 1.1410034196842647, "grad_norm": 0.5989629518390343, "learning_rate": 2.053039970475394e-06, "loss": 0.2704, "step": 24357 }, { "epoch": 1.1410502646741931, "grad_norm": 0.5761350045364457, "learning_rate": 2.0528533771899324e-06, "loss": 0.2708, "step": 24358 }, { "epoch": 1.1410971096641214, "grad_norm": 0.5964388203935321, "learning_rate": 2.052666786477688e-06, "loss": 0.2675, "step": 24359 }, { "epoch": 1.1411439546540498, "grad_norm": 0.5944780541790173, "learning_rate": 2.0524801983397337e-06, "loss": 0.2799, "step": 24360 }, { "epoch": 1.141190799643978, "grad_norm": 0.6016555386931781, "learning_rate": 2.052293612777143e-06, "loss": 0.2806, "step": 24361 }, { "epoch": 1.1412376446339063, "grad_norm": 0.5685687238682638, "learning_rate": 2.0521070297909905e-06, "loss": 0.2711, "step": 24362 }, { "epoch": 1.1412844896238348, "grad_norm": 0.5786199986368148, "learning_rate": 2.0519204493823504e-06, "loss": 0.2892, "step": 24363 }, { "epoch": 1.141331334613763, "grad_norm": 0.5778293724869846, "learning_rate": 2.051733871552294e-06, "loss": 0.263, "step": 24364 }, { "epoch": 1.1413781796036915, "grad_norm": 0.5723947490876099, "learning_rate": 2.0515472963018967e-06, "loss": 0.2622, "step": 24365 }, { "epoch": 1.1414250245936197, "grad_norm": 0.5344745855484268, "learning_rate": 2.051360723632233e-06, "loss": 0.263, "step": 24366 }, { "epoch": 1.141471869583548, "grad_norm": 0.6039099646269619, "learning_rate": 2.051174153544374e-06, "loss": 0.2912, "step": 24367 }, { "epoch": 1.1415187145734764, "grad_norm": 0.5492749317306691, "learning_rate": 2.050987586039396e-06, "loss": 0.2762, "step": 24368 }, { "epoch": 1.1415655595634047, "grad_norm": 0.6323691141654992, "learning_rate": 2.050801021118372e-06, "loss": 0.2776, "step": 24369 }, { "epoch": 1.141612404553333, "grad_norm": 0.5564550601875538, "learning_rate": 2.0506144587823747e-06, "loss": 0.2635, "step": 24370 }, { "epoch": 1.1416592495432614, "grad_norm": 0.6293128992508089, "learning_rate": 2.050427899032478e-06, "loss": 0.2826, "step": 24371 }, { "epoch": 1.1417060945331896, "grad_norm": 0.5776911423015455, "learning_rate": 2.0502413418697557e-06, "loss": 0.2734, "step": 24372 }, { "epoch": 1.141752939523118, "grad_norm": 0.6427961187144893, "learning_rate": 2.050054787295282e-06, "loss": 0.2814, "step": 24373 }, { "epoch": 1.1417997845130463, "grad_norm": 0.5648195896309203, "learning_rate": 2.049868235310129e-06, "loss": 0.2714, "step": 24374 }, { "epoch": 1.1418466295029748, "grad_norm": 0.5838763614809919, "learning_rate": 2.0496816859153724e-06, "loss": 0.2805, "step": 24375 }, { "epoch": 1.141893474492903, "grad_norm": 0.6235927010919684, "learning_rate": 2.049495139112084e-06, "loss": 0.288, "step": 24376 }, { "epoch": 1.1419403194828313, "grad_norm": 0.6037251290964201, "learning_rate": 2.0493085949013376e-06, "loss": 0.2857, "step": 24377 }, { "epoch": 1.1419871644727597, "grad_norm": 0.5907124762715301, "learning_rate": 2.049122053284207e-06, "loss": 0.2875, "step": 24378 }, { "epoch": 1.142034009462688, "grad_norm": 0.6076634966129102, "learning_rate": 2.0489355142617652e-06, "loss": 0.2876, "step": 24379 }, { "epoch": 1.1420808544526162, "grad_norm": 0.5733009878775727, "learning_rate": 2.048748977835087e-06, "loss": 0.2632, "step": 24380 }, { "epoch": 1.1421276994425447, "grad_norm": 0.5739875675981078, "learning_rate": 2.0485624440052456e-06, "loss": 0.2688, "step": 24381 }, { "epoch": 1.142174544432473, "grad_norm": 0.560536735894885, "learning_rate": 2.0483759127733123e-06, "loss": 0.2716, "step": 24382 }, { "epoch": 1.1422213894224014, "grad_norm": 0.5609803633271696, "learning_rate": 2.0481893841403624e-06, "loss": 0.271, "step": 24383 }, { "epoch": 1.1422682344123296, "grad_norm": 0.6067341663640556, "learning_rate": 2.0480028581074695e-06, "loss": 0.2688, "step": 24384 }, { "epoch": 1.1423150794022578, "grad_norm": 0.5966221948681355, "learning_rate": 2.0478163346757064e-06, "loss": 0.2811, "step": 24385 }, { "epoch": 1.1423619243921863, "grad_norm": 0.524765671316622, "learning_rate": 2.0476298138461467e-06, "loss": 0.2506, "step": 24386 }, { "epoch": 1.1424087693821146, "grad_norm": 0.6146628505671036, "learning_rate": 2.0474432956198643e-06, "loss": 0.2729, "step": 24387 }, { "epoch": 1.142455614372043, "grad_norm": 0.6014060325461484, "learning_rate": 2.0472567799979313e-06, "loss": 0.2761, "step": 24388 }, { "epoch": 1.1425024593619713, "grad_norm": 0.5739418151088266, "learning_rate": 2.0470702669814217e-06, "loss": 0.2744, "step": 24389 }, { "epoch": 1.1425493043518995, "grad_norm": 0.5882399103918318, "learning_rate": 2.046883756571409e-06, "loss": 0.2773, "step": 24390 }, { "epoch": 1.142596149341828, "grad_norm": 0.5723848058313317, "learning_rate": 2.046697248768966e-06, "loss": 0.273, "step": 24391 }, { "epoch": 1.1426429943317562, "grad_norm": 0.5770640218591794, "learning_rate": 2.0465107435751666e-06, "loss": 0.2626, "step": 24392 }, { "epoch": 1.1426898393216844, "grad_norm": 0.5978550544789799, "learning_rate": 2.046324240991085e-06, "loss": 0.2866, "step": 24393 }, { "epoch": 1.142736684311613, "grad_norm": 0.5878727179767149, "learning_rate": 2.0461377410177926e-06, "loss": 0.2859, "step": 24394 }, { "epoch": 1.1427835293015411, "grad_norm": 0.5770415262169916, "learning_rate": 2.045951243656363e-06, "loss": 0.2836, "step": 24395 }, { "epoch": 1.1428303742914696, "grad_norm": 0.5867258134667435, "learning_rate": 2.04576474890787e-06, "loss": 0.2767, "step": 24396 }, { "epoch": 1.1428772192813978, "grad_norm": 0.5764325869301549, "learning_rate": 2.045578256773387e-06, "loss": 0.2793, "step": 24397 }, { "epoch": 1.142924064271326, "grad_norm": 0.5841803660440382, "learning_rate": 2.0453917672539865e-06, "loss": 0.2789, "step": 24398 }, { "epoch": 1.1429709092612546, "grad_norm": 0.5855035479211059, "learning_rate": 2.045205280350743e-06, "loss": 0.2606, "step": 24399 }, { "epoch": 1.1430177542511828, "grad_norm": 0.6072609094606795, "learning_rate": 2.0450187960647276e-06, "loss": 0.2816, "step": 24400 }, { "epoch": 1.1430645992411113, "grad_norm": 0.5861773726793561, "learning_rate": 2.0448323143970158e-06, "loss": 0.2816, "step": 24401 }, { "epoch": 1.1431114442310395, "grad_norm": 0.5870800426825408, "learning_rate": 2.044645835348679e-06, "loss": 0.2818, "step": 24402 }, { "epoch": 1.1431582892209677, "grad_norm": 0.5976849752913224, "learning_rate": 2.0444593589207907e-06, "loss": 0.2723, "step": 24403 }, { "epoch": 1.1432051342108962, "grad_norm": 0.5650537413897487, "learning_rate": 2.0442728851144247e-06, "loss": 0.2646, "step": 24404 }, { "epoch": 1.1432519792008244, "grad_norm": 0.6005779669335927, "learning_rate": 2.0440864139306544e-06, "loss": 0.3024, "step": 24405 }, { "epoch": 1.1432988241907527, "grad_norm": 0.5623213325241949, "learning_rate": 2.0438999453705512e-06, "loss": 0.2816, "step": 24406 }, { "epoch": 1.1433456691806811, "grad_norm": 0.5853694942882164, "learning_rate": 2.0437134794351893e-06, "loss": 0.2694, "step": 24407 }, { "epoch": 1.1433925141706094, "grad_norm": 0.6046092736339932, "learning_rate": 2.043527016125642e-06, "loss": 0.293, "step": 24408 }, { "epoch": 1.1434393591605378, "grad_norm": 0.6125888242748042, "learning_rate": 2.0433405554429815e-06, "loss": 0.2647, "step": 24409 }, { "epoch": 1.143486204150466, "grad_norm": 0.5856958962384015, "learning_rate": 2.043154097388281e-06, "loss": 0.2725, "step": 24410 }, { "epoch": 1.1435330491403946, "grad_norm": 0.5852043313665092, "learning_rate": 2.042967641962615e-06, "loss": 0.2727, "step": 24411 }, { "epoch": 1.1435798941303228, "grad_norm": 0.5960706716250359, "learning_rate": 2.0427811891670552e-06, "loss": 0.2808, "step": 24412 }, { "epoch": 1.143626739120251, "grad_norm": 0.6208056996164373, "learning_rate": 2.042594739002674e-06, "loss": 0.2821, "step": 24413 }, { "epoch": 1.1436735841101795, "grad_norm": 0.6070653683950934, "learning_rate": 2.0424082914705453e-06, "loss": 0.2736, "step": 24414 }, { "epoch": 1.1437204291001077, "grad_norm": 0.6257180864488541, "learning_rate": 2.042221846571742e-06, "loss": 0.2712, "step": 24415 }, { "epoch": 1.143767274090036, "grad_norm": 0.5846118176782114, "learning_rate": 2.042035404307337e-06, "loss": 0.2749, "step": 24416 }, { "epoch": 1.1438141190799644, "grad_norm": 0.5757525324802334, "learning_rate": 2.0418489646784025e-06, "loss": 0.2811, "step": 24417 }, { "epoch": 1.1438609640698927, "grad_norm": 0.6321977711296078, "learning_rate": 2.0416625276860137e-06, "loss": 0.2889, "step": 24418 }, { "epoch": 1.1439078090598211, "grad_norm": 0.5418430707898322, "learning_rate": 2.041476093331241e-06, "loss": 0.2623, "step": 24419 }, { "epoch": 1.1439546540497494, "grad_norm": 0.5911509481344231, "learning_rate": 2.041289661615157e-06, "loss": 0.282, "step": 24420 }, { "epoch": 1.1440014990396776, "grad_norm": 0.6471379320306367, "learning_rate": 2.0411032325388367e-06, "loss": 0.2754, "step": 24421 }, { "epoch": 1.144048344029606, "grad_norm": 0.5781312856320938, "learning_rate": 2.040916806103352e-06, "loss": 0.2712, "step": 24422 }, { "epoch": 1.1440951890195343, "grad_norm": 0.5611733407750437, "learning_rate": 2.0407303823097753e-06, "loss": 0.2722, "step": 24423 }, { "epoch": 1.1441420340094628, "grad_norm": 0.5963442431680458, "learning_rate": 2.040543961159181e-06, "loss": 0.2859, "step": 24424 }, { "epoch": 1.144188878999391, "grad_norm": 0.611998006573064, "learning_rate": 2.0403575426526396e-06, "loss": 0.2815, "step": 24425 }, { "epoch": 1.1442357239893193, "grad_norm": 0.5879928602575352, "learning_rate": 2.0401711267912254e-06, "loss": 0.2739, "step": 24426 }, { "epoch": 1.1442825689792477, "grad_norm": 0.55286611371254, "learning_rate": 2.0399847135760102e-06, "loss": 0.271, "step": 24427 }, { "epoch": 1.144329413969176, "grad_norm": 0.5829498960295246, "learning_rate": 2.0397983030080682e-06, "loss": 0.2775, "step": 24428 }, { "epoch": 1.1443762589591042, "grad_norm": 0.5370744338776856, "learning_rate": 2.0396118950884704e-06, "loss": 0.2731, "step": 24429 }, { "epoch": 1.1444231039490327, "grad_norm": 0.6263826641081671, "learning_rate": 2.0394254898182918e-06, "loss": 0.2958, "step": 24430 }, { "epoch": 1.144469948938961, "grad_norm": 0.5969900436507861, "learning_rate": 2.0392390871986025e-06, "loss": 0.2894, "step": 24431 }, { "epoch": 1.1445167939288894, "grad_norm": 0.5800060596702582, "learning_rate": 2.039052687230477e-06, "loss": 0.2707, "step": 24432 }, { "epoch": 1.1445636389188176, "grad_norm": 0.5786944469787072, "learning_rate": 2.038866289914987e-06, "loss": 0.265, "step": 24433 }, { "epoch": 1.1446104839087459, "grad_norm": 0.5674572728042367, "learning_rate": 2.0386798952532053e-06, "loss": 0.2665, "step": 24434 }, { "epoch": 1.1446573288986743, "grad_norm": 0.5695008462224452, "learning_rate": 2.0384935032462057e-06, "loss": 0.2589, "step": 24435 }, { "epoch": 1.1447041738886026, "grad_norm": 0.5879672232724765, "learning_rate": 2.0383071138950605e-06, "loss": 0.2719, "step": 24436 }, { "epoch": 1.144751018878531, "grad_norm": 0.6199825507446468, "learning_rate": 2.0381207272008403e-06, "loss": 0.277, "step": 24437 }, { "epoch": 1.1447978638684593, "grad_norm": 0.5570918045092426, "learning_rate": 2.0379343431646198e-06, "loss": 0.2724, "step": 24438 }, { "epoch": 1.1448447088583875, "grad_norm": 0.5684956059457248, "learning_rate": 2.0377479617874715e-06, "loss": 0.2811, "step": 24439 }, { "epoch": 1.144891553848316, "grad_norm": 0.6268271997980307, "learning_rate": 2.0375615830704666e-06, "loss": 0.2849, "step": 24440 }, { "epoch": 1.1449383988382442, "grad_norm": 0.5975306054476618, "learning_rate": 2.0373752070146792e-06, "loss": 0.2799, "step": 24441 }, { "epoch": 1.1449852438281725, "grad_norm": 0.6158828046679116, "learning_rate": 2.0371888336211816e-06, "loss": 0.2808, "step": 24442 }, { "epoch": 1.145032088818101, "grad_norm": 0.6339390321574737, "learning_rate": 2.0370024628910456e-06, "loss": 0.2803, "step": 24443 }, { "epoch": 1.1450789338080292, "grad_norm": 0.647172961733691, "learning_rate": 2.0368160948253437e-06, "loss": 0.2717, "step": 24444 }, { "epoch": 1.1451257787979576, "grad_norm": 0.6506094415565425, "learning_rate": 2.0366297294251486e-06, "loss": 0.2812, "step": 24445 }, { "epoch": 1.1451726237878859, "grad_norm": 0.5768919622795593, "learning_rate": 2.0364433666915334e-06, "loss": 0.2727, "step": 24446 }, { "epoch": 1.1452194687778143, "grad_norm": 0.5760222421120029, "learning_rate": 2.03625700662557e-06, "loss": 0.2768, "step": 24447 }, { "epoch": 1.1452663137677426, "grad_norm": 0.5830917675376763, "learning_rate": 2.0360706492283317e-06, "loss": 0.2759, "step": 24448 }, { "epoch": 1.1453131587576708, "grad_norm": 0.5931898129963014, "learning_rate": 2.035884294500889e-06, "loss": 0.282, "step": 24449 }, { "epoch": 1.1453600037475993, "grad_norm": 0.5517552793671704, "learning_rate": 2.0356979424443164e-06, "loss": 0.2572, "step": 24450 }, { "epoch": 1.1454068487375275, "grad_norm": 0.6103607381717888, "learning_rate": 2.0355115930596847e-06, "loss": 0.2776, "step": 24451 }, { "epoch": 1.1454536937274558, "grad_norm": 0.5697554203434037, "learning_rate": 2.035325246348067e-06, "loss": 0.2616, "step": 24452 }, { "epoch": 1.1455005387173842, "grad_norm": 0.6226697959207999, "learning_rate": 2.0351389023105363e-06, "loss": 0.2818, "step": 24453 }, { "epoch": 1.1455473837073125, "grad_norm": 0.5760877660953078, "learning_rate": 2.034952560948165e-06, "loss": 0.2851, "step": 24454 }, { "epoch": 1.145594228697241, "grad_norm": 0.5958403793227384, "learning_rate": 2.0347662222620238e-06, "loss": 0.2815, "step": 24455 }, { "epoch": 1.1456410736871692, "grad_norm": 0.5449083901905023, "learning_rate": 2.034579886253186e-06, "loss": 0.2809, "step": 24456 }, { "epoch": 1.1456879186770974, "grad_norm": 0.6595978215531126, "learning_rate": 2.0343935529227245e-06, "loss": 0.2785, "step": 24457 }, { "epoch": 1.1457347636670259, "grad_norm": 0.5749335447035353, "learning_rate": 2.0342072222717105e-06, "loss": 0.2608, "step": 24458 }, { "epoch": 1.145781608656954, "grad_norm": 0.5890874730520642, "learning_rate": 2.034020894301217e-06, "loss": 0.2769, "step": 24459 }, { "epoch": 1.1458284536468826, "grad_norm": 0.5792802321386596, "learning_rate": 2.033834569012317e-06, "loss": 0.2565, "step": 24460 }, { "epoch": 1.1458752986368108, "grad_norm": 0.6087769143014985, "learning_rate": 2.0336482464060814e-06, "loss": 0.2755, "step": 24461 }, { "epoch": 1.145922143626739, "grad_norm": 0.5810498916716045, "learning_rate": 2.0334619264835824e-06, "loss": 0.2896, "step": 24462 }, { "epoch": 1.1459689886166675, "grad_norm": 0.5476242214482102, "learning_rate": 2.033275609245893e-06, "loss": 0.2612, "step": 24463 }, { "epoch": 1.1460158336065958, "grad_norm": 0.6173835147070373, "learning_rate": 2.0330892946940855e-06, "loss": 0.2856, "step": 24464 }, { "epoch": 1.146062678596524, "grad_norm": 0.6167462217136503, "learning_rate": 2.032902982829231e-06, "loss": 0.2737, "step": 24465 }, { "epoch": 1.1461095235864525, "grad_norm": 0.5985381368783221, "learning_rate": 2.0327166736524037e-06, "loss": 0.2908, "step": 24466 }, { "epoch": 1.1461563685763807, "grad_norm": 0.5983754561147887, "learning_rate": 2.0325303671646735e-06, "loss": 0.2741, "step": 24467 }, { "epoch": 1.1462032135663092, "grad_norm": 0.6000805145971341, "learning_rate": 2.032344063367114e-06, "loss": 0.2716, "step": 24468 }, { "epoch": 1.1462500585562374, "grad_norm": 0.6364765501485617, "learning_rate": 2.0321577622607965e-06, "loss": 0.286, "step": 24469 }, { "epoch": 1.1462969035461656, "grad_norm": 0.5785077013133236, "learning_rate": 2.0319714638467934e-06, "loss": 0.2709, "step": 24470 }, { "epoch": 1.146343748536094, "grad_norm": 0.615669790218706, "learning_rate": 2.031785168126177e-06, "loss": 0.2851, "step": 24471 }, { "epoch": 1.1463905935260224, "grad_norm": 0.6248667338209296, "learning_rate": 2.0315988751000206e-06, "loss": 0.2839, "step": 24472 }, { "epoch": 1.1464374385159508, "grad_norm": 0.5442294494541948, "learning_rate": 2.031412584769393e-06, "loss": 0.259, "step": 24473 }, { "epoch": 1.146484283505879, "grad_norm": 0.5835783648157945, "learning_rate": 2.0312262971353687e-06, "loss": 0.2635, "step": 24474 }, { "epoch": 1.1465311284958073, "grad_norm": 0.5669428121384924, "learning_rate": 2.0310400121990197e-06, "loss": 0.2569, "step": 24475 }, { "epoch": 1.1465779734857358, "grad_norm": 0.588665930497518, "learning_rate": 2.030853729961417e-06, "loss": 0.2808, "step": 24476 }, { "epoch": 1.146624818475664, "grad_norm": 0.6003128776982428, "learning_rate": 2.030667450423634e-06, "loss": 0.2872, "step": 24477 }, { "epoch": 1.1466716634655922, "grad_norm": 0.5820087234739683, "learning_rate": 2.0304811735867415e-06, "loss": 0.2612, "step": 24478 }, { "epoch": 1.1467185084555207, "grad_norm": 0.603151727319188, "learning_rate": 2.0302948994518125e-06, "loss": 0.2774, "step": 24479 }, { "epoch": 1.146765353445449, "grad_norm": 0.5921864851806501, "learning_rate": 2.030108628019917e-06, "loss": 0.2786, "step": 24480 }, { "epoch": 1.1468121984353774, "grad_norm": 0.5308582959938165, "learning_rate": 2.0299223592921287e-06, "loss": 0.2602, "step": 24481 }, { "epoch": 1.1468590434253056, "grad_norm": 0.587175858072337, "learning_rate": 2.029736093269519e-06, "loss": 0.2779, "step": 24482 }, { "epoch": 1.146905888415234, "grad_norm": 0.5683442662048648, "learning_rate": 2.0295498299531594e-06, "loss": 0.262, "step": 24483 }, { "epoch": 1.1469527334051624, "grad_norm": 0.5686862812632413, "learning_rate": 2.0293635693441235e-06, "loss": 0.2713, "step": 24484 }, { "epoch": 1.1469995783950906, "grad_norm": 0.5850121164252968, "learning_rate": 2.0291773114434823e-06, "loss": 0.2761, "step": 24485 }, { "epoch": 1.147046423385019, "grad_norm": 0.5824632548418107, "learning_rate": 2.028991056252306e-06, "loss": 0.2784, "step": 24486 }, { "epoch": 1.1470932683749473, "grad_norm": 0.5832713040236118, "learning_rate": 2.028804803771668e-06, "loss": 0.2883, "step": 24487 }, { "epoch": 1.1471401133648755, "grad_norm": 0.5644936637460696, "learning_rate": 2.0286185540026403e-06, "loss": 0.2668, "step": 24488 }, { "epoch": 1.147186958354804, "grad_norm": 0.592588660341425, "learning_rate": 2.0284323069462938e-06, "loss": 0.2884, "step": 24489 }, { "epoch": 1.1472338033447322, "grad_norm": 0.5955558776043507, "learning_rate": 2.028246062603701e-06, "loss": 0.2864, "step": 24490 }, { "epoch": 1.1472806483346607, "grad_norm": 0.6028737406374217, "learning_rate": 2.0280598209759345e-06, "loss": 0.2835, "step": 24491 }, { "epoch": 1.147327493324589, "grad_norm": 0.6124288476456874, "learning_rate": 2.0278735820640647e-06, "loss": 0.29, "step": 24492 }, { "epoch": 1.1473743383145172, "grad_norm": 0.5848100345286065, "learning_rate": 2.027687345869163e-06, "loss": 0.2616, "step": 24493 }, { "epoch": 1.1474211833044456, "grad_norm": 0.5784982983097379, "learning_rate": 2.0275011123923023e-06, "loss": 0.2725, "step": 24494 }, { "epoch": 1.1474680282943739, "grad_norm": 0.6263554409302586, "learning_rate": 2.027314881634554e-06, "loss": 0.2846, "step": 24495 }, { "epoch": 1.1475148732843024, "grad_norm": 0.5710086900930645, "learning_rate": 2.0271286535969894e-06, "loss": 0.249, "step": 24496 }, { "epoch": 1.1475617182742306, "grad_norm": 0.5776823292195786, "learning_rate": 2.026942428280682e-06, "loss": 0.2748, "step": 24497 }, { "epoch": 1.1476085632641588, "grad_norm": 0.5893508992716567, "learning_rate": 2.0267562056867003e-06, "loss": 0.2781, "step": 24498 }, { "epoch": 1.1476554082540873, "grad_norm": 0.6115483175448089, "learning_rate": 2.0265699858161184e-06, "loss": 0.2955, "step": 24499 }, { "epoch": 1.1477022532440155, "grad_norm": 0.5677442481779705, "learning_rate": 2.026383768670007e-06, "loss": 0.2765, "step": 24500 }, { "epoch": 1.1477490982339438, "grad_norm": 0.6260025856816728, "learning_rate": 2.0261975542494376e-06, "loss": 0.2646, "step": 24501 }, { "epoch": 1.1477959432238722, "grad_norm": 0.5662884973226817, "learning_rate": 2.0260113425554834e-06, "loss": 0.2686, "step": 24502 }, { "epoch": 1.1478427882138005, "grad_norm": 0.6074452727280119, "learning_rate": 2.025825133589215e-06, "loss": 0.2685, "step": 24503 }, { "epoch": 1.147889633203729, "grad_norm": 0.5979689691245262, "learning_rate": 2.0256389273517025e-06, "loss": 0.2564, "step": 24504 }, { "epoch": 1.1479364781936572, "grad_norm": 0.6224259224486758, "learning_rate": 2.0254527238440184e-06, "loss": 0.2793, "step": 24505 }, { "epoch": 1.1479833231835854, "grad_norm": 0.652113394151647, "learning_rate": 2.025266523067236e-06, "loss": 0.2914, "step": 24506 }, { "epoch": 1.1480301681735139, "grad_norm": 0.5640102400230935, "learning_rate": 2.0250803250224243e-06, "loss": 0.2519, "step": 24507 }, { "epoch": 1.1480770131634421, "grad_norm": 0.6292643077484039, "learning_rate": 2.024894129710656e-06, "loss": 0.2738, "step": 24508 }, { "epoch": 1.1481238581533706, "grad_norm": 0.5625545681575126, "learning_rate": 2.024707937133004e-06, "loss": 0.2631, "step": 24509 }, { "epoch": 1.1481707031432988, "grad_norm": 0.5908226427879885, "learning_rate": 2.0245217472905374e-06, "loss": 0.2804, "step": 24510 }, { "epoch": 1.148217548133227, "grad_norm": 0.5747080822061855, "learning_rate": 2.0243355601843284e-06, "loss": 0.2821, "step": 24511 }, { "epoch": 1.1482643931231555, "grad_norm": 0.5746371864552401, "learning_rate": 2.0241493758154487e-06, "loss": 0.2825, "step": 24512 }, { "epoch": 1.1483112381130838, "grad_norm": 0.6055457699159326, "learning_rate": 2.02396319418497e-06, "loss": 0.2672, "step": 24513 }, { "epoch": 1.148358083103012, "grad_norm": 0.5795928292503374, "learning_rate": 2.023777015293963e-06, "loss": 0.2589, "step": 24514 }, { "epoch": 1.1484049280929405, "grad_norm": 0.6109968963820455, "learning_rate": 2.0235908391435013e-06, "loss": 0.2896, "step": 24515 }, { "epoch": 1.1484517730828687, "grad_norm": 0.5510085245598629, "learning_rate": 2.0234046657346527e-06, "loss": 0.2696, "step": 24516 }, { "epoch": 1.1484986180727972, "grad_norm": 0.5885662074744398, "learning_rate": 2.0232184950684913e-06, "loss": 0.2758, "step": 24517 }, { "epoch": 1.1485454630627254, "grad_norm": 0.6205374605652051, "learning_rate": 2.023032327146087e-06, "loss": 0.2697, "step": 24518 }, { "epoch": 1.1485923080526539, "grad_norm": 0.5930269130851892, "learning_rate": 2.022846161968512e-06, "loss": 0.2833, "step": 24519 }, { "epoch": 1.1486391530425821, "grad_norm": 0.5961811766678383, "learning_rate": 2.0226599995368376e-06, "loss": 0.2646, "step": 24520 }, { "epoch": 1.1486859980325104, "grad_norm": 0.5794888069121121, "learning_rate": 2.0224738398521357e-06, "loss": 0.2671, "step": 24521 }, { "epoch": 1.1487328430224388, "grad_norm": 0.6112298414725306, "learning_rate": 2.022287682915476e-06, "loss": 0.2945, "step": 24522 }, { "epoch": 1.148779688012367, "grad_norm": 0.608131450032016, "learning_rate": 2.02210152872793e-06, "loss": 0.2779, "step": 24523 }, { "epoch": 1.1488265330022953, "grad_norm": 0.5934032768654149, "learning_rate": 2.0219153772905703e-06, "loss": 0.2798, "step": 24524 }, { "epoch": 1.1488733779922238, "grad_norm": 0.6087523539840022, "learning_rate": 2.021729228604467e-06, "loss": 0.2696, "step": 24525 }, { "epoch": 1.148920222982152, "grad_norm": 0.5720524779434292, "learning_rate": 2.0215430826706924e-06, "loss": 0.2874, "step": 24526 }, { "epoch": 1.1489670679720805, "grad_norm": 0.6138385295641565, "learning_rate": 2.021356939490317e-06, "loss": 0.2678, "step": 24527 }, { "epoch": 1.1490139129620087, "grad_norm": 0.6207695840380709, "learning_rate": 2.0211707990644125e-06, "loss": 0.2876, "step": 24528 }, { "epoch": 1.149060757951937, "grad_norm": 0.6091077607514985, "learning_rate": 2.0209846613940486e-06, "loss": 0.2927, "step": 24529 }, { "epoch": 1.1491076029418654, "grad_norm": 0.5414513087629199, "learning_rate": 2.0207985264802983e-06, "loss": 0.2672, "step": 24530 }, { "epoch": 1.1491544479317937, "grad_norm": 0.5986548866701973, "learning_rate": 2.0206123943242315e-06, "loss": 0.2729, "step": 24531 }, { "epoch": 1.1492012929217221, "grad_norm": 0.6181010038805488, "learning_rate": 2.02042626492692e-06, "loss": 0.2769, "step": 24532 }, { "epoch": 1.1492481379116504, "grad_norm": 0.599946887911511, "learning_rate": 2.020240138289436e-06, "loss": 0.2873, "step": 24533 }, { "epoch": 1.1492949829015786, "grad_norm": 0.6372252939374937, "learning_rate": 2.0200540144128487e-06, "loss": 0.3044, "step": 24534 }, { "epoch": 1.149341827891507, "grad_norm": 0.601631726260714, "learning_rate": 2.0198678932982295e-06, "loss": 0.267, "step": 24535 }, { "epoch": 1.1493886728814353, "grad_norm": 0.5942593114108446, "learning_rate": 2.0196817749466496e-06, "loss": 0.2632, "step": 24536 }, { "epoch": 1.1494355178713636, "grad_norm": 0.5788616510144681, "learning_rate": 2.0194956593591813e-06, "loss": 0.2644, "step": 24537 }, { "epoch": 1.149482362861292, "grad_norm": 0.6157685628448943, "learning_rate": 2.019309546536894e-06, "loss": 0.3027, "step": 24538 }, { "epoch": 1.1495292078512203, "grad_norm": 0.6201959626538872, "learning_rate": 2.0191234364808604e-06, "loss": 0.285, "step": 24539 }, { "epoch": 1.1495760528411487, "grad_norm": 0.6267171620553753, "learning_rate": 2.0189373291921495e-06, "loss": 0.2987, "step": 24540 }, { "epoch": 1.149622897831077, "grad_norm": 0.5567262281824098, "learning_rate": 2.0187512246718336e-06, "loss": 0.2667, "step": 24541 }, { "epoch": 1.1496697428210052, "grad_norm": 0.6151565046370238, "learning_rate": 2.0185651229209835e-06, "loss": 0.285, "step": 24542 }, { "epoch": 1.1497165878109337, "grad_norm": 0.6390805363610007, "learning_rate": 2.01837902394067e-06, "loss": 0.2811, "step": 24543 }, { "epoch": 1.149763432800862, "grad_norm": 0.615281910607002, "learning_rate": 2.0181929277319647e-06, "loss": 0.2665, "step": 24544 }, { "epoch": 1.1498102777907904, "grad_norm": 0.5547900203743986, "learning_rate": 2.0180068342959385e-06, "loss": 0.2645, "step": 24545 }, { "epoch": 1.1498571227807186, "grad_norm": 0.5850408895922882, "learning_rate": 2.0178207436336606e-06, "loss": 0.2808, "step": 24546 }, { "epoch": 1.1499039677706469, "grad_norm": 0.6456038603583942, "learning_rate": 2.017634655746203e-06, "loss": 0.2908, "step": 24547 }, { "epoch": 1.1499508127605753, "grad_norm": 0.6057864408714044, "learning_rate": 2.0174485706346375e-06, "loss": 0.289, "step": 24548 }, { "epoch": 1.1499976577505036, "grad_norm": 0.591654139155432, "learning_rate": 2.0172624883000335e-06, "loss": 0.2783, "step": 24549 }, { "epoch": 1.1500445027404318, "grad_norm": 0.6535211091002818, "learning_rate": 2.017076408743463e-06, "loss": 0.2992, "step": 24550 }, { "epoch": 1.1500913477303603, "grad_norm": 0.5996658252615484, "learning_rate": 2.0168903319659965e-06, "loss": 0.2679, "step": 24551 }, { "epoch": 1.1501381927202885, "grad_norm": 0.5734896173638445, "learning_rate": 2.016704257968705e-06, "loss": 0.2619, "step": 24552 }, { "epoch": 1.150185037710217, "grad_norm": 0.6549812904348725, "learning_rate": 2.0165181867526584e-06, "loss": 0.2801, "step": 24553 }, { "epoch": 1.1502318827001452, "grad_norm": 0.5651289727013464, "learning_rate": 2.016332118318928e-06, "loss": 0.2646, "step": 24554 }, { "epoch": 1.1502787276900737, "grad_norm": 0.5847668047786853, "learning_rate": 2.0161460526685854e-06, "loss": 0.2639, "step": 24555 }, { "epoch": 1.150325572680002, "grad_norm": 0.5899499598555752, "learning_rate": 2.0159599898027e-06, "loss": 0.289, "step": 24556 }, { "epoch": 1.1503724176699301, "grad_norm": 0.5915401435510121, "learning_rate": 2.015773929722343e-06, "loss": 0.2664, "step": 24557 }, { "epoch": 1.1504192626598586, "grad_norm": 0.606474491147016, "learning_rate": 2.0155878724285867e-06, "loss": 0.2923, "step": 24558 }, { "epoch": 1.1504661076497869, "grad_norm": 0.5924651018938014, "learning_rate": 2.0154018179224997e-06, "loss": 0.2876, "step": 24559 }, { "epoch": 1.150512952639715, "grad_norm": 0.5663161756796947, "learning_rate": 2.015215766205153e-06, "loss": 0.2727, "step": 24560 }, { "epoch": 1.1505597976296436, "grad_norm": 0.6307533673900158, "learning_rate": 2.0150297172776175e-06, "loss": 0.2971, "step": 24561 }, { "epoch": 1.1506066426195718, "grad_norm": 0.5659755473842272, "learning_rate": 2.014843671140965e-06, "loss": 0.2803, "step": 24562 }, { "epoch": 1.1506534876095003, "grad_norm": 0.6211257895839308, "learning_rate": 2.0146576277962644e-06, "loss": 0.2831, "step": 24563 }, { "epoch": 1.1507003325994285, "grad_norm": 0.5715959580811503, "learning_rate": 2.0144715872445887e-06, "loss": 0.2793, "step": 24564 }, { "epoch": 1.1507471775893567, "grad_norm": 0.5674476614216857, "learning_rate": 2.0142855494870053e-06, "loss": 0.2763, "step": 24565 }, { "epoch": 1.1507940225792852, "grad_norm": 0.6036887917496896, "learning_rate": 2.0140995145245875e-06, "loss": 0.2809, "step": 24566 }, { "epoch": 1.1508408675692134, "grad_norm": 0.6031650367216127, "learning_rate": 2.013913482358404e-06, "loss": 0.2797, "step": 24567 }, { "epoch": 1.150887712559142, "grad_norm": 0.5632059928226232, "learning_rate": 2.0137274529895267e-06, "loss": 0.2695, "step": 24568 }, { "epoch": 1.1509345575490701, "grad_norm": 0.5915240118368608, "learning_rate": 2.0135414264190255e-06, "loss": 0.2797, "step": 24569 }, { "epoch": 1.1509814025389984, "grad_norm": 0.5839470342748798, "learning_rate": 2.0133554026479716e-06, "loss": 0.2686, "step": 24570 }, { "epoch": 1.1510282475289269, "grad_norm": 0.5491751001502063, "learning_rate": 2.0131693816774343e-06, "loss": 0.2542, "step": 24571 }, { "epoch": 1.151075092518855, "grad_norm": 0.6362272481952426, "learning_rate": 2.0129833635084857e-06, "loss": 0.3016, "step": 24572 }, { "epoch": 1.1511219375087833, "grad_norm": 0.631224799557046, "learning_rate": 2.0127973481421945e-06, "loss": 0.2737, "step": 24573 }, { "epoch": 1.1511687824987118, "grad_norm": 0.5980991875766323, "learning_rate": 2.0126113355796324e-06, "loss": 0.2846, "step": 24574 }, { "epoch": 1.15121562748864, "grad_norm": 0.5830185660642546, "learning_rate": 2.01242532582187e-06, "loss": 0.2714, "step": 24575 }, { "epoch": 1.1512624724785685, "grad_norm": 0.6262675042144377, "learning_rate": 2.012239318869978e-06, "loss": 0.2684, "step": 24576 }, { "epoch": 1.1513093174684967, "grad_norm": 0.5912485679579907, "learning_rate": 2.0120533147250247e-06, "loss": 0.2701, "step": 24577 }, { "epoch": 1.151356162458425, "grad_norm": 0.5790404271372019, "learning_rate": 2.011867313388082e-06, "loss": 0.2858, "step": 24578 }, { "epoch": 1.1514030074483534, "grad_norm": 0.6416287839659003, "learning_rate": 2.011681314860221e-06, "loss": 0.2885, "step": 24579 }, { "epoch": 1.1514498524382817, "grad_norm": 0.6195345989221519, "learning_rate": 2.0114953191425105e-06, "loss": 0.2955, "step": 24580 }, { "epoch": 1.1514966974282101, "grad_norm": 0.646465774102617, "learning_rate": 2.0113093262360218e-06, "loss": 0.2947, "step": 24581 }, { "epoch": 1.1515435424181384, "grad_norm": 0.5820003631567163, "learning_rate": 2.0111233361418264e-06, "loss": 0.2556, "step": 24582 }, { "epoch": 1.1515903874080666, "grad_norm": 0.5616244882150315, "learning_rate": 2.0109373488609925e-06, "loss": 0.2495, "step": 24583 }, { "epoch": 1.151637232397995, "grad_norm": 0.5865006067415395, "learning_rate": 2.0107513643945908e-06, "loss": 0.285, "step": 24584 }, { "epoch": 1.1516840773879233, "grad_norm": 0.5512541969283548, "learning_rate": 2.010565382743692e-06, "loss": 0.2812, "step": 24585 }, { "epoch": 1.1517309223778516, "grad_norm": 0.5539029470003959, "learning_rate": 2.0103794039093667e-06, "loss": 0.2594, "step": 24586 }, { "epoch": 1.15177776736778, "grad_norm": 0.6053360442172638, "learning_rate": 2.010193427892685e-06, "loss": 0.2839, "step": 24587 }, { "epoch": 1.1518246123577083, "grad_norm": 0.5971934941040284, "learning_rate": 2.0100074546947173e-06, "loss": 0.2725, "step": 24588 }, { "epoch": 1.1518714573476367, "grad_norm": 0.5917272265594814, "learning_rate": 2.009821484316533e-06, "loss": 0.279, "step": 24589 }, { "epoch": 1.151918302337565, "grad_norm": 0.6258624064536223, "learning_rate": 2.009635516759203e-06, "loss": 0.3133, "step": 24590 }, { "epoch": 1.1519651473274934, "grad_norm": 0.592507467717065, "learning_rate": 2.0094495520237973e-06, "loss": 0.2776, "step": 24591 }, { "epoch": 1.1520119923174217, "grad_norm": 0.5517026318187495, "learning_rate": 2.009263590111386e-06, "loss": 0.2753, "step": 24592 }, { "epoch": 1.15205883730735, "grad_norm": 0.6221471939790248, "learning_rate": 2.0090776310230395e-06, "loss": 0.2852, "step": 24593 }, { "epoch": 1.1521056822972784, "grad_norm": 0.5962047179315705, "learning_rate": 2.008891674759829e-06, "loss": 0.2745, "step": 24594 }, { "epoch": 1.1521525272872066, "grad_norm": 0.58058177700278, "learning_rate": 2.008705721322822e-06, "loss": 0.2716, "step": 24595 }, { "epoch": 1.1521993722771349, "grad_norm": 0.5980505246165057, "learning_rate": 2.0085197707130898e-06, "loss": 0.2787, "step": 24596 }, { "epoch": 1.1522462172670633, "grad_norm": 0.598261701445197, "learning_rate": 2.0083338229317036e-06, "loss": 0.2937, "step": 24597 }, { "epoch": 1.1522930622569916, "grad_norm": 0.6096376430451811, "learning_rate": 2.0081478779797327e-06, "loss": 0.2772, "step": 24598 }, { "epoch": 1.15233990724692, "grad_norm": 0.6325835058093625, "learning_rate": 2.0079619358582466e-06, "loss": 0.2912, "step": 24599 }, { "epoch": 1.1523867522368483, "grad_norm": 0.595597877358628, "learning_rate": 2.007775996568317e-06, "loss": 0.2691, "step": 24600 }, { "epoch": 1.1524335972267765, "grad_norm": 0.5795706887441269, "learning_rate": 2.007590060111012e-06, "loss": 0.2693, "step": 24601 }, { "epoch": 1.152480442216705, "grad_norm": 0.6245503615351734, "learning_rate": 2.0074041264874022e-06, "loss": 0.2819, "step": 24602 }, { "epoch": 1.1525272872066332, "grad_norm": 0.609246410076591, "learning_rate": 2.0072181956985583e-06, "loss": 0.2862, "step": 24603 }, { "epoch": 1.1525741321965617, "grad_norm": 0.6259624812888944, "learning_rate": 2.0070322677455494e-06, "loss": 0.282, "step": 24604 }, { "epoch": 1.15262097718649, "grad_norm": 0.5851580585589645, "learning_rate": 2.006846342629446e-06, "loss": 0.2741, "step": 24605 }, { "epoch": 1.1526678221764182, "grad_norm": 0.5901799547813917, "learning_rate": 2.006660420351319e-06, "loss": 0.2858, "step": 24606 }, { "epoch": 1.1527146671663466, "grad_norm": 0.6210524080929359, "learning_rate": 2.006474500912236e-06, "loss": 0.2657, "step": 24607 }, { "epoch": 1.1527615121562749, "grad_norm": 0.603802701361813, "learning_rate": 2.006288584313269e-06, "loss": 0.2743, "step": 24608 }, { "epoch": 1.152808357146203, "grad_norm": 0.5760750203160281, "learning_rate": 2.0061026705554866e-06, "loss": 0.2712, "step": 24609 }, { "epoch": 1.1528552021361316, "grad_norm": 0.5357601245550453, "learning_rate": 2.005916759639959e-06, "loss": 0.2614, "step": 24610 }, { "epoch": 1.1529020471260598, "grad_norm": 0.6154976827855837, "learning_rate": 2.0057308515677568e-06, "loss": 0.2864, "step": 24611 }, { "epoch": 1.1529488921159883, "grad_norm": 0.5949698586440028, "learning_rate": 2.0055449463399498e-06, "loss": 0.2775, "step": 24612 }, { "epoch": 1.1529957371059165, "grad_norm": 0.571653702508543, "learning_rate": 2.0053590439576064e-06, "loss": 0.2647, "step": 24613 }, { "epoch": 1.1530425820958448, "grad_norm": 0.6320133929313912, "learning_rate": 2.0051731444217973e-06, "loss": 0.2783, "step": 24614 }, { "epoch": 1.1530894270857732, "grad_norm": 0.596835654161206, "learning_rate": 2.004987247733593e-06, "loss": 0.2917, "step": 24615 }, { "epoch": 1.1531362720757015, "grad_norm": 0.6034875626779167, "learning_rate": 2.004801353894062e-06, "loss": 0.2839, "step": 24616 }, { "epoch": 1.15318311706563, "grad_norm": 0.5725986385182014, "learning_rate": 2.0046154629042757e-06, "loss": 0.2618, "step": 24617 }, { "epoch": 1.1532299620555582, "grad_norm": 0.6264499052348615, "learning_rate": 2.004429574765302e-06, "loss": 0.293, "step": 24618 }, { "epoch": 1.1532768070454864, "grad_norm": 0.626356135235441, "learning_rate": 2.0042436894782126e-06, "loss": 0.2748, "step": 24619 }, { "epoch": 1.1533236520354149, "grad_norm": 0.6056309488523328, "learning_rate": 2.004057807044075e-06, "loss": 0.2798, "step": 24620 }, { "epoch": 1.153370497025343, "grad_norm": 0.5931637636501317, "learning_rate": 2.003871927463961e-06, "loss": 0.2995, "step": 24621 }, { "epoch": 1.1534173420152714, "grad_norm": 0.6246866386736415, "learning_rate": 2.0036860507389384e-06, "loss": 0.2712, "step": 24622 }, { "epoch": 1.1534641870051998, "grad_norm": 0.6482680242475162, "learning_rate": 2.003500176870078e-06, "loss": 0.296, "step": 24623 }, { "epoch": 1.153511031995128, "grad_norm": 0.6017849038287854, "learning_rate": 2.0033143058584497e-06, "loss": 0.2877, "step": 24624 }, { "epoch": 1.1535578769850565, "grad_norm": 0.6093685639880788, "learning_rate": 2.0031284377051237e-06, "loss": 0.2909, "step": 24625 }, { "epoch": 1.1536047219749848, "grad_norm": 0.5608436500247711, "learning_rate": 2.0029425724111673e-06, "loss": 0.269, "step": 24626 }, { "epoch": 1.1536515669649132, "grad_norm": 0.5952992607387875, "learning_rate": 2.0027567099776515e-06, "loss": 0.2697, "step": 24627 }, { "epoch": 1.1536984119548415, "grad_norm": 0.5807726953479274, "learning_rate": 2.0025708504056462e-06, "loss": 0.2702, "step": 24628 }, { "epoch": 1.1537452569447697, "grad_norm": 0.5687287456438427, "learning_rate": 2.00238499369622e-06, "loss": 0.2489, "step": 24629 }, { "epoch": 1.1537921019346982, "grad_norm": 0.5705483909094289, "learning_rate": 2.0021991398504435e-06, "loss": 0.2715, "step": 24630 }, { "epoch": 1.1538389469246264, "grad_norm": 0.6205175446170172, "learning_rate": 2.002013288869387e-06, "loss": 0.266, "step": 24631 }, { "epoch": 1.1538857919145546, "grad_norm": 0.5692673476763354, "learning_rate": 2.001827440754118e-06, "loss": 0.2699, "step": 24632 }, { "epoch": 1.1539326369044831, "grad_norm": 0.5932384142859425, "learning_rate": 2.0016415955057064e-06, "loss": 0.2634, "step": 24633 }, { "epoch": 1.1539794818944114, "grad_norm": 0.5737617603384614, "learning_rate": 2.001455753125222e-06, "loss": 0.277, "step": 24634 }, { "epoch": 1.1540263268843398, "grad_norm": 0.6312188509681917, "learning_rate": 2.0012699136137353e-06, "loss": 0.2756, "step": 24635 }, { "epoch": 1.154073171874268, "grad_norm": 0.5954227404642025, "learning_rate": 2.0010840769723142e-06, "loss": 0.2828, "step": 24636 }, { "epoch": 1.1541200168641963, "grad_norm": 0.6065023969949296, "learning_rate": 2.00089824320203e-06, "loss": 0.2855, "step": 24637 }, { "epoch": 1.1541668618541248, "grad_norm": 0.6092131177973902, "learning_rate": 2.0007124123039496e-06, "loss": 0.2794, "step": 24638 }, { "epoch": 1.154213706844053, "grad_norm": 0.5975951031910971, "learning_rate": 2.000526584279145e-06, "loss": 0.2951, "step": 24639 }, { "epoch": 1.1542605518339815, "grad_norm": 0.5648095935379439, "learning_rate": 2.000340759128683e-06, "loss": 0.2794, "step": 24640 }, { "epoch": 1.1543073968239097, "grad_norm": 0.5880241056626107, "learning_rate": 2.0001549368536347e-06, "loss": 0.294, "step": 24641 }, { "epoch": 1.154354241813838, "grad_norm": 0.6144952920753152, "learning_rate": 1.9999691174550693e-06, "loss": 0.2911, "step": 24642 }, { "epoch": 1.1544010868037664, "grad_norm": 0.5750600489352176, "learning_rate": 1.999783300934057e-06, "loss": 0.272, "step": 24643 }, { "epoch": 1.1544479317936946, "grad_norm": 0.5476251207465176, "learning_rate": 1.999597487291665e-06, "loss": 0.2714, "step": 24644 }, { "epoch": 1.154494776783623, "grad_norm": 0.6081642479236492, "learning_rate": 1.9994116765289635e-06, "loss": 0.2695, "step": 24645 }, { "epoch": 1.1545416217735514, "grad_norm": 0.5812003749777676, "learning_rate": 1.9992258686470223e-06, "loss": 0.2628, "step": 24646 }, { "epoch": 1.1545884667634796, "grad_norm": 0.5666008895957936, "learning_rate": 1.99904006364691e-06, "loss": 0.2584, "step": 24647 }, { "epoch": 1.154635311753408, "grad_norm": 0.6144792159152678, "learning_rate": 1.9988542615296967e-06, "loss": 0.2903, "step": 24648 }, { "epoch": 1.1546821567433363, "grad_norm": 0.569133543205735, "learning_rate": 1.9986684622964515e-06, "loss": 0.2602, "step": 24649 }, { "epoch": 1.1547290017332645, "grad_norm": 0.5363544826111515, "learning_rate": 1.9984826659482433e-06, "loss": 0.2604, "step": 24650 }, { "epoch": 1.154775846723193, "grad_norm": 0.5582427226393664, "learning_rate": 1.9982968724861402e-06, "loss": 0.2526, "step": 24651 }, { "epoch": 1.1548226917131212, "grad_norm": 0.534141465814043, "learning_rate": 1.9981110819112133e-06, "loss": 0.2656, "step": 24652 }, { "epoch": 1.1548695367030497, "grad_norm": 0.5590369487776174, "learning_rate": 1.9979252942245307e-06, "loss": 0.2724, "step": 24653 }, { "epoch": 1.154916381692978, "grad_norm": 0.5956678053116743, "learning_rate": 1.9977395094271617e-06, "loss": 0.2656, "step": 24654 }, { "epoch": 1.1549632266829062, "grad_norm": 0.5497780582591623, "learning_rate": 1.9975537275201766e-06, "loss": 0.2804, "step": 24655 }, { "epoch": 1.1550100716728346, "grad_norm": 0.6033215810292701, "learning_rate": 1.9973679485046427e-06, "loss": 0.2856, "step": 24656 }, { "epoch": 1.155056916662763, "grad_norm": 0.5913834222496508, "learning_rate": 1.99718217238163e-06, "loss": 0.2978, "step": 24657 }, { "epoch": 1.1551037616526911, "grad_norm": 0.5962983415469209, "learning_rate": 1.9969963991522074e-06, "loss": 0.2611, "step": 24658 }, { "epoch": 1.1551506066426196, "grad_norm": 0.5386272681928336, "learning_rate": 1.9968106288174437e-06, "loss": 0.2435, "step": 24659 }, { "epoch": 1.1551974516325478, "grad_norm": 0.5525504490133196, "learning_rate": 1.9966248613784094e-06, "loss": 0.2794, "step": 24660 }, { "epoch": 1.1552442966224763, "grad_norm": 0.5653606506922056, "learning_rate": 1.9964390968361727e-06, "loss": 0.2575, "step": 24661 }, { "epoch": 1.1552911416124045, "grad_norm": 0.5588337168528269, "learning_rate": 1.9962533351918014e-06, "loss": 0.2635, "step": 24662 }, { "epoch": 1.155337986602333, "grad_norm": 0.6706242819658353, "learning_rate": 1.9960675764463656e-06, "loss": 0.299, "step": 24663 }, { "epoch": 1.1553848315922612, "grad_norm": 0.5954899531698533, "learning_rate": 1.9958818206009347e-06, "loss": 0.2917, "step": 24664 }, { "epoch": 1.1554316765821895, "grad_norm": 0.5739392105482048, "learning_rate": 1.9956960676565768e-06, "loss": 0.2781, "step": 24665 }, { "epoch": 1.155478521572118, "grad_norm": 0.603137524280658, "learning_rate": 1.9955103176143613e-06, "loss": 0.2744, "step": 24666 }, { "epoch": 1.1555253665620462, "grad_norm": 0.5909503376697248, "learning_rate": 1.9953245704753576e-06, "loss": 0.2871, "step": 24667 }, { "epoch": 1.1555722115519744, "grad_norm": 0.5838804002066175, "learning_rate": 1.9951388262406342e-06, "loss": 0.272, "step": 24668 }, { "epoch": 1.155619056541903, "grad_norm": 0.606199650349646, "learning_rate": 1.9949530849112592e-06, "loss": 0.2933, "step": 24669 }, { "epoch": 1.1556659015318311, "grad_norm": 0.6025509942877201, "learning_rate": 1.9947673464883033e-06, "loss": 0.2839, "step": 24670 }, { "epoch": 1.1557127465217596, "grad_norm": 0.5999010163411386, "learning_rate": 1.9945816109728334e-06, "loss": 0.2679, "step": 24671 }, { "epoch": 1.1557595915116878, "grad_norm": 0.5629620528234824, "learning_rate": 1.994395878365919e-06, "loss": 0.2841, "step": 24672 }, { "epoch": 1.155806436501616, "grad_norm": 0.5713205473321097, "learning_rate": 1.9942101486686307e-06, "loss": 0.2765, "step": 24673 }, { "epoch": 1.1558532814915445, "grad_norm": 0.5647786840023001, "learning_rate": 1.9940244218820356e-06, "loss": 0.2566, "step": 24674 }, { "epoch": 1.1559001264814728, "grad_norm": 0.6495596678232752, "learning_rate": 1.9938386980072017e-06, "loss": 0.3003, "step": 24675 }, { "epoch": 1.1559469714714012, "grad_norm": 0.5883491464073122, "learning_rate": 1.993652977045199e-06, "loss": 0.2779, "step": 24676 }, { "epoch": 1.1559938164613295, "grad_norm": 0.5980769962827112, "learning_rate": 1.9934672589970968e-06, "loss": 0.2828, "step": 24677 }, { "epoch": 1.1560406614512577, "grad_norm": 0.6157669470366752, "learning_rate": 1.9932815438639627e-06, "loss": 0.2938, "step": 24678 }, { "epoch": 1.1560875064411862, "grad_norm": 0.5380005657146828, "learning_rate": 1.9930958316468666e-06, "loss": 0.2585, "step": 24679 }, { "epoch": 1.1561343514311144, "grad_norm": 0.5811917299968017, "learning_rate": 1.992910122346876e-06, "loss": 0.2757, "step": 24680 }, { "epoch": 1.1561811964210427, "grad_norm": 0.5549659704622896, "learning_rate": 1.9927244159650608e-06, "loss": 0.2708, "step": 24681 }, { "epoch": 1.1562280414109711, "grad_norm": 0.6065112456173627, "learning_rate": 1.9925387125024885e-06, "loss": 0.2797, "step": 24682 }, { "epoch": 1.1562748864008994, "grad_norm": 0.5729733335736129, "learning_rate": 1.992353011960228e-06, "loss": 0.2698, "step": 24683 }, { "epoch": 1.1563217313908278, "grad_norm": 0.635804324355777, "learning_rate": 1.9921673143393493e-06, "loss": 0.2954, "step": 24684 }, { "epoch": 1.156368576380756, "grad_norm": 0.5603507267289615, "learning_rate": 1.991981619640919e-06, "loss": 0.2607, "step": 24685 }, { "epoch": 1.1564154213706843, "grad_norm": 0.606999334639243, "learning_rate": 1.9917959278660085e-06, "loss": 0.2706, "step": 24686 }, { "epoch": 1.1564622663606128, "grad_norm": 0.575530055057591, "learning_rate": 1.9916102390156833e-06, "loss": 0.2768, "step": 24687 }, { "epoch": 1.156509111350541, "grad_norm": 0.536232102538902, "learning_rate": 1.991424553091014e-06, "loss": 0.2661, "step": 24688 }, { "epoch": 1.1565559563404695, "grad_norm": 0.6084161041531487, "learning_rate": 1.991238870093068e-06, "loss": 0.2674, "step": 24689 }, { "epoch": 1.1566028013303977, "grad_norm": 0.6104293122311034, "learning_rate": 1.991053190022914e-06, "loss": 0.2721, "step": 24690 }, { "epoch": 1.156649646320326, "grad_norm": 0.5445593122366393, "learning_rate": 1.990867512881622e-06, "loss": 0.2735, "step": 24691 }, { "epoch": 1.1566964913102544, "grad_norm": 0.5595259650575508, "learning_rate": 1.99068183867026e-06, "loss": 0.2484, "step": 24692 }, { "epoch": 1.1567433363001827, "grad_norm": 0.5757032613341637, "learning_rate": 1.990496167389895e-06, "loss": 0.2788, "step": 24693 }, { "epoch": 1.156790181290111, "grad_norm": 0.6299039814918828, "learning_rate": 1.9903104990415964e-06, "loss": 0.2793, "step": 24694 }, { "epoch": 1.1568370262800394, "grad_norm": 0.6227062400904502, "learning_rate": 1.9901248336264334e-06, "loss": 0.2515, "step": 24695 }, { "epoch": 1.1568838712699676, "grad_norm": 0.6037150422265986, "learning_rate": 1.989939171145473e-06, "loss": 0.275, "step": 24696 }, { "epoch": 1.156930716259896, "grad_norm": 0.6101012211199818, "learning_rate": 1.9897535115997845e-06, "loss": 0.2665, "step": 24697 }, { "epoch": 1.1569775612498243, "grad_norm": 0.6158807455209774, "learning_rate": 1.9895678549904378e-06, "loss": 0.2915, "step": 24698 }, { "epoch": 1.1570244062397528, "grad_norm": 0.5822468110452582, "learning_rate": 1.989382201318499e-06, "loss": 0.2823, "step": 24699 }, { "epoch": 1.157071251229681, "grad_norm": 0.6193988714608034, "learning_rate": 1.9891965505850367e-06, "loss": 0.2715, "step": 24700 }, { "epoch": 1.1571180962196093, "grad_norm": 0.6399734689837979, "learning_rate": 1.9890109027911198e-06, "loss": 0.3098, "step": 24701 }, { "epoch": 1.1571649412095377, "grad_norm": 0.6502065106056688, "learning_rate": 1.9888252579378175e-06, "loss": 0.2763, "step": 24702 }, { "epoch": 1.157211786199466, "grad_norm": 0.5789099350282688, "learning_rate": 1.9886396160261966e-06, "loss": 0.2624, "step": 24703 }, { "epoch": 1.1572586311893942, "grad_norm": 0.6316374343342848, "learning_rate": 1.9884539770573275e-06, "loss": 0.2815, "step": 24704 }, { "epoch": 1.1573054761793227, "grad_norm": 0.5956579442799075, "learning_rate": 1.988268341032276e-06, "loss": 0.2632, "step": 24705 }, { "epoch": 1.157352321169251, "grad_norm": 0.5536262934616253, "learning_rate": 1.988082707952112e-06, "loss": 0.2678, "step": 24706 }, { "epoch": 1.1573991661591794, "grad_norm": 0.5965140586871941, "learning_rate": 1.987897077817903e-06, "loss": 0.2738, "step": 24707 }, { "epoch": 1.1574460111491076, "grad_norm": 0.6461317219456876, "learning_rate": 1.987711450630718e-06, "loss": 0.2944, "step": 24708 }, { "epoch": 1.1574928561390359, "grad_norm": 0.6026447376171814, "learning_rate": 1.9875258263916243e-06, "loss": 0.2709, "step": 24709 }, { "epoch": 1.1575397011289643, "grad_norm": 0.5556200553532162, "learning_rate": 1.987340205101692e-06, "loss": 0.2616, "step": 24710 }, { "epoch": 1.1575865461188926, "grad_norm": 0.600362982413706, "learning_rate": 1.9871545867619866e-06, "loss": 0.264, "step": 24711 }, { "epoch": 1.157633391108821, "grad_norm": 0.5487822766435217, "learning_rate": 1.9869689713735784e-06, "loss": 0.2532, "step": 24712 }, { "epoch": 1.1576802360987493, "grad_norm": 0.5591910774726907, "learning_rate": 1.986783358937534e-06, "loss": 0.2581, "step": 24713 }, { "epoch": 1.1577270810886775, "grad_norm": 0.5521625373646415, "learning_rate": 1.9865977494549223e-06, "loss": 0.2889, "step": 24714 }, { "epoch": 1.157773926078606, "grad_norm": 0.5961467914592673, "learning_rate": 1.9864121429268124e-06, "loss": 0.2743, "step": 24715 }, { "epoch": 1.1578207710685342, "grad_norm": 0.6193407664229569, "learning_rate": 1.986226539354272e-06, "loss": 0.2853, "step": 24716 }, { "epoch": 1.1578676160584624, "grad_norm": 0.570980178966102, "learning_rate": 1.9860409387383677e-06, "loss": 0.2623, "step": 24717 }, { "epoch": 1.157914461048391, "grad_norm": 0.5794867119152672, "learning_rate": 1.9858553410801683e-06, "loss": 0.2707, "step": 24718 }, { "epoch": 1.1579613060383191, "grad_norm": 0.5926090971390712, "learning_rate": 1.985669746380743e-06, "loss": 0.2753, "step": 24719 }, { "epoch": 1.1580081510282476, "grad_norm": 0.6260956512305035, "learning_rate": 1.9854841546411584e-06, "loss": 0.2694, "step": 24720 }, { "epoch": 1.1580549960181759, "grad_norm": 0.6265460343926802, "learning_rate": 1.9852985658624833e-06, "loss": 0.2836, "step": 24721 }, { "epoch": 1.158101841008104, "grad_norm": 0.5941211298019868, "learning_rate": 1.9851129800457867e-06, "loss": 0.2693, "step": 24722 }, { "epoch": 1.1581486859980326, "grad_norm": 0.5964265131119056, "learning_rate": 1.9849273971921346e-06, "loss": 0.2838, "step": 24723 }, { "epoch": 1.1581955309879608, "grad_norm": 0.6734479216146417, "learning_rate": 1.9847418173025955e-06, "loss": 0.2775, "step": 24724 }, { "epoch": 1.1582423759778893, "grad_norm": 0.5652746032480411, "learning_rate": 1.984556240378238e-06, "loss": 0.2698, "step": 24725 }, { "epoch": 1.1582892209678175, "grad_norm": 0.6598412838272354, "learning_rate": 1.98437066642013e-06, "loss": 0.2651, "step": 24726 }, { "epoch": 1.1583360659577457, "grad_norm": 0.5740702399958044, "learning_rate": 1.9841850954293392e-06, "loss": 0.2763, "step": 24727 }, { "epoch": 1.1583829109476742, "grad_norm": 0.5839960059608249, "learning_rate": 1.983999527406934e-06, "loss": 0.2736, "step": 24728 }, { "epoch": 1.1584297559376024, "grad_norm": 0.5855305199102004, "learning_rate": 1.983813962353981e-06, "loss": 0.2722, "step": 24729 }, { "epoch": 1.1584766009275307, "grad_norm": 0.6116535771565469, "learning_rate": 1.9836284002715495e-06, "loss": 0.2818, "step": 24730 }, { "epoch": 1.1585234459174591, "grad_norm": 0.6060066841496929, "learning_rate": 1.983442841160706e-06, "loss": 0.2816, "step": 24731 }, { "epoch": 1.1585702909073874, "grad_norm": 0.6355436583391554, "learning_rate": 1.9832572850225193e-06, "loss": 0.2845, "step": 24732 }, { "epoch": 1.1586171358973159, "grad_norm": 0.6379928414434007, "learning_rate": 1.9830717318580577e-06, "loss": 0.3056, "step": 24733 }, { "epoch": 1.158663980887244, "grad_norm": 0.5787262602429936, "learning_rate": 1.9828861816683885e-06, "loss": 0.2765, "step": 24734 }, { "epoch": 1.1587108258771726, "grad_norm": 0.5953310897397655, "learning_rate": 1.9827006344545785e-06, "loss": 0.2582, "step": 24735 }, { "epoch": 1.1587576708671008, "grad_norm": 0.595571408943761, "learning_rate": 1.9825150902176963e-06, "loss": 0.2711, "step": 24736 }, { "epoch": 1.158804515857029, "grad_norm": 0.6148900964972958, "learning_rate": 1.9823295489588105e-06, "loss": 0.2979, "step": 24737 }, { "epoch": 1.1588513608469575, "grad_norm": 0.6431694118818937, "learning_rate": 1.9821440106789874e-06, "loss": 0.2869, "step": 24738 }, { "epoch": 1.1588982058368857, "grad_norm": 0.6073181937379771, "learning_rate": 1.981958475379295e-06, "loss": 0.2741, "step": 24739 }, { "epoch": 1.158945050826814, "grad_norm": 0.6027724446800229, "learning_rate": 1.9817729430608026e-06, "loss": 0.2752, "step": 24740 }, { "epoch": 1.1589918958167424, "grad_norm": 0.6085856838648382, "learning_rate": 1.9815874137245763e-06, "loss": 0.2689, "step": 24741 }, { "epoch": 1.1590387408066707, "grad_norm": 0.5589278828133368, "learning_rate": 1.9814018873716835e-06, "loss": 0.27, "step": 24742 }, { "epoch": 1.1590855857965991, "grad_norm": 0.5864934934728607, "learning_rate": 1.9812163640031927e-06, "loss": 0.2775, "step": 24743 }, { "epoch": 1.1591324307865274, "grad_norm": 0.5780942551966698, "learning_rate": 1.981030843620171e-06, "loss": 0.2813, "step": 24744 }, { "epoch": 1.1591792757764556, "grad_norm": 0.5967065996068008, "learning_rate": 1.980845326223687e-06, "loss": 0.2763, "step": 24745 }, { "epoch": 1.159226120766384, "grad_norm": 0.5844135733788505, "learning_rate": 1.9806598118148084e-06, "loss": 0.2877, "step": 24746 }, { "epoch": 1.1592729657563123, "grad_norm": 0.5935066980195071, "learning_rate": 1.9804743003946008e-06, "loss": 0.2843, "step": 24747 }, { "epoch": 1.1593198107462408, "grad_norm": 0.6196983260727511, "learning_rate": 1.9802887919641336e-06, "loss": 0.2787, "step": 24748 }, { "epoch": 1.159366655736169, "grad_norm": 0.6061500105921361, "learning_rate": 1.980103286524473e-06, "loss": 0.2801, "step": 24749 }, { "epoch": 1.1594135007260973, "grad_norm": 0.6126009705121211, "learning_rate": 1.9799177840766874e-06, "loss": 0.27, "step": 24750 }, { "epoch": 1.1594603457160257, "grad_norm": 0.5674414569918644, "learning_rate": 1.979732284621845e-06, "loss": 0.2692, "step": 24751 }, { "epoch": 1.159507190705954, "grad_norm": 0.5916409269193379, "learning_rate": 1.979546788161012e-06, "loss": 0.2879, "step": 24752 }, { "epoch": 1.1595540356958822, "grad_norm": 0.6197832361337015, "learning_rate": 1.9793612946952574e-06, "loss": 0.2896, "step": 24753 }, { "epoch": 1.1596008806858107, "grad_norm": 0.6227963641990027, "learning_rate": 1.9791758042256466e-06, "loss": 0.2915, "step": 24754 }, { "epoch": 1.159647725675739, "grad_norm": 0.5648044886086567, "learning_rate": 1.9789903167532487e-06, "loss": 0.2649, "step": 24755 }, { "epoch": 1.1596945706656674, "grad_norm": 0.6412073055847307, "learning_rate": 1.9788048322791297e-06, "loss": 0.2775, "step": 24756 }, { "epoch": 1.1597414156555956, "grad_norm": 0.6297307279380014, "learning_rate": 1.9786193508043587e-06, "loss": 0.28, "step": 24757 }, { "epoch": 1.1597882606455239, "grad_norm": 0.5830808574672409, "learning_rate": 1.978433872330002e-06, "loss": 0.2654, "step": 24758 }, { "epoch": 1.1598351056354523, "grad_norm": 0.6078670907175024, "learning_rate": 1.978248396857128e-06, "loss": 0.2733, "step": 24759 }, { "epoch": 1.1598819506253806, "grad_norm": 0.6143061528004032, "learning_rate": 1.9780629243868026e-06, "loss": 0.2926, "step": 24760 }, { "epoch": 1.159928795615309, "grad_norm": 0.5449347223626181, "learning_rate": 1.9778774549200945e-06, "loss": 0.2626, "step": 24761 }, { "epoch": 1.1599756406052373, "grad_norm": 0.5872686645354632, "learning_rate": 1.9776919884580694e-06, "loss": 0.2691, "step": 24762 }, { "epoch": 1.1600224855951655, "grad_norm": 0.6055575806085908, "learning_rate": 1.9775065250017957e-06, "loss": 0.2735, "step": 24763 }, { "epoch": 1.160069330585094, "grad_norm": 0.5819391344281379, "learning_rate": 1.9773210645523416e-06, "loss": 0.2676, "step": 24764 }, { "epoch": 1.1601161755750222, "grad_norm": 0.6113333194496728, "learning_rate": 1.9771356071107735e-06, "loss": 0.2666, "step": 24765 }, { "epoch": 1.1601630205649505, "grad_norm": 0.6401158593958818, "learning_rate": 1.9769501526781578e-06, "loss": 0.2919, "step": 24766 }, { "epoch": 1.160209865554879, "grad_norm": 0.608595730939218, "learning_rate": 1.9767647012555626e-06, "loss": 0.282, "step": 24767 }, { "epoch": 1.1602567105448072, "grad_norm": 0.6233981128802235, "learning_rate": 1.976579252844055e-06, "loss": 0.265, "step": 24768 }, { "epoch": 1.1603035555347356, "grad_norm": 0.618722861388303, "learning_rate": 1.976393807444702e-06, "loss": 0.2818, "step": 24769 }, { "epoch": 1.1603504005246639, "grad_norm": 0.6278494477938329, "learning_rate": 1.976208365058571e-06, "loss": 0.2841, "step": 24770 }, { "epoch": 1.1603972455145923, "grad_norm": 0.6114210453452326, "learning_rate": 1.97602292568673e-06, "loss": 0.2777, "step": 24771 }, { "epoch": 1.1604440905045206, "grad_norm": 0.6198391379184542, "learning_rate": 1.9758374893302456e-06, "loss": 0.2804, "step": 24772 }, { "epoch": 1.1604909354944488, "grad_norm": 0.6236445672452033, "learning_rate": 1.975652055990184e-06, "loss": 0.2973, "step": 24773 }, { "epoch": 1.1605377804843773, "grad_norm": 0.5493724003444626, "learning_rate": 1.9754666256676127e-06, "loss": 0.2571, "step": 24774 }, { "epoch": 1.1605846254743055, "grad_norm": 0.5707700046255315, "learning_rate": 1.9752811983635996e-06, "loss": 0.2631, "step": 24775 }, { "epoch": 1.1606314704642338, "grad_norm": 0.6182989239216169, "learning_rate": 1.9750957740792108e-06, "loss": 0.2882, "step": 24776 }, { "epoch": 1.1606783154541622, "grad_norm": 0.5848263504580149, "learning_rate": 1.974910352815515e-06, "loss": 0.2583, "step": 24777 }, { "epoch": 1.1607251604440905, "grad_norm": 0.6012897866101155, "learning_rate": 1.9747249345735776e-06, "loss": 0.2638, "step": 24778 }, { "epoch": 1.160772005434019, "grad_norm": 0.599175864516398, "learning_rate": 1.9745395193544657e-06, "loss": 0.2644, "step": 24779 }, { "epoch": 1.1608188504239472, "grad_norm": 0.591207975890178, "learning_rate": 1.974354107159247e-06, "loss": 0.2684, "step": 24780 }, { "epoch": 1.1608656954138754, "grad_norm": 0.6078636151839759, "learning_rate": 1.9741686979889884e-06, "loss": 0.2742, "step": 24781 }, { "epoch": 1.1609125404038039, "grad_norm": 0.5507930007854122, "learning_rate": 1.973983291844757e-06, "loss": 0.2623, "step": 24782 }, { "epoch": 1.1609593853937321, "grad_norm": 0.5856393004535929, "learning_rate": 1.9737978887276204e-06, "loss": 0.2776, "step": 24783 }, { "epoch": 1.1610062303836606, "grad_norm": 0.560248718518979, "learning_rate": 1.9736124886386434e-06, "loss": 0.2725, "step": 24784 }, { "epoch": 1.1610530753735888, "grad_norm": 0.5881686391415795, "learning_rate": 1.973427091578894e-06, "loss": 0.2723, "step": 24785 }, { "epoch": 1.161099920363517, "grad_norm": 0.5802685307079564, "learning_rate": 1.9732416975494397e-06, "loss": 0.2865, "step": 24786 }, { "epoch": 1.1611467653534455, "grad_norm": 0.6255670602892731, "learning_rate": 1.9730563065513468e-06, "loss": 0.2801, "step": 24787 }, { "epoch": 1.1611936103433738, "grad_norm": 0.6196436651985954, "learning_rate": 1.9728709185856826e-06, "loss": 0.2813, "step": 24788 }, { "epoch": 1.161240455333302, "grad_norm": 0.6315829199669413, "learning_rate": 1.972685533653515e-06, "loss": 0.284, "step": 24789 }, { "epoch": 1.1612873003232305, "grad_norm": 0.554362660080094, "learning_rate": 1.9725001517559087e-06, "loss": 0.264, "step": 24790 }, { "epoch": 1.1613341453131587, "grad_norm": 0.5399160738032209, "learning_rate": 1.972314772893931e-06, "loss": 0.2678, "step": 24791 }, { "epoch": 1.1613809903030872, "grad_norm": 0.5532673729417162, "learning_rate": 1.972129397068649e-06, "loss": 0.2738, "step": 24792 }, { "epoch": 1.1614278352930154, "grad_norm": 0.5708796788379124, "learning_rate": 1.9719440242811304e-06, "loss": 0.2848, "step": 24793 }, { "epoch": 1.1614746802829437, "grad_norm": 0.5928324929445783, "learning_rate": 1.9717586545324407e-06, "loss": 0.2862, "step": 24794 }, { "epoch": 1.1615215252728721, "grad_norm": 0.6175257543965513, "learning_rate": 1.971573287823648e-06, "loss": 0.2806, "step": 24795 }, { "epoch": 1.1615683702628004, "grad_norm": 0.578575425851179, "learning_rate": 1.9713879241558173e-06, "loss": 0.2675, "step": 24796 }, { "epoch": 1.1616152152527288, "grad_norm": 0.5530705982244764, "learning_rate": 1.971202563530017e-06, "loss": 0.2559, "step": 24797 }, { "epoch": 1.161662060242657, "grad_norm": 0.592752022750702, "learning_rate": 1.9710172059473122e-06, "loss": 0.2705, "step": 24798 }, { "epoch": 1.1617089052325853, "grad_norm": 0.5707357184535431, "learning_rate": 1.9708318514087703e-06, "loss": 0.2644, "step": 24799 }, { "epoch": 1.1617557502225138, "grad_norm": 0.5853563204664697, "learning_rate": 1.970646499915459e-06, "loss": 0.2836, "step": 24800 }, { "epoch": 1.161802595212442, "grad_norm": 0.5808172511676267, "learning_rate": 1.970461151468444e-06, "loss": 0.2742, "step": 24801 }, { "epoch": 1.1618494402023702, "grad_norm": 0.5871483995757588, "learning_rate": 1.970275806068792e-06, "loss": 0.2831, "step": 24802 }, { "epoch": 1.1618962851922987, "grad_norm": 0.6294300302950551, "learning_rate": 1.970090463717569e-06, "loss": 0.268, "step": 24803 }, { "epoch": 1.161943130182227, "grad_norm": 0.6312709106451756, "learning_rate": 1.9699051244158423e-06, "loss": 0.2844, "step": 24804 }, { "epoch": 1.1619899751721554, "grad_norm": 0.571575340687694, "learning_rate": 1.969719788164678e-06, "loss": 0.277, "step": 24805 }, { "epoch": 1.1620368201620837, "grad_norm": 0.5741532628644489, "learning_rate": 1.9695344549651443e-06, "loss": 0.2728, "step": 24806 }, { "epoch": 1.1620836651520121, "grad_norm": 0.5931749258163184, "learning_rate": 1.9693491248183057e-06, "loss": 0.2746, "step": 24807 }, { "epoch": 1.1621305101419404, "grad_norm": 0.5951611572384492, "learning_rate": 1.9691637977252296e-06, "loss": 0.2742, "step": 24808 }, { "epoch": 1.1621773551318686, "grad_norm": 0.5884185513470406, "learning_rate": 1.968978473686982e-06, "loss": 0.2719, "step": 24809 }, { "epoch": 1.162224200121797, "grad_norm": 0.5895271029377205, "learning_rate": 1.9687931527046304e-06, "loss": 0.2741, "step": 24810 }, { "epoch": 1.1622710451117253, "grad_norm": 0.592434605540092, "learning_rate": 1.96860783477924e-06, "loss": 0.2945, "step": 24811 }, { "epoch": 1.1623178901016535, "grad_norm": 0.5720919505815825, "learning_rate": 1.9684225199118785e-06, "loss": 0.2628, "step": 24812 }, { "epoch": 1.162364735091582, "grad_norm": 0.583385482830347, "learning_rate": 1.9682372081036124e-06, "loss": 0.2733, "step": 24813 }, { "epoch": 1.1624115800815102, "grad_norm": 0.5771864087211371, "learning_rate": 1.968051899355507e-06, "loss": 0.2643, "step": 24814 }, { "epoch": 1.1624584250714387, "grad_norm": 0.559776977639651, "learning_rate": 1.967866593668629e-06, "loss": 0.2621, "step": 24815 }, { "epoch": 1.162505270061367, "grad_norm": 0.5864261767773147, "learning_rate": 1.9676812910440447e-06, "loss": 0.2709, "step": 24816 }, { "epoch": 1.1625521150512952, "grad_norm": 0.5864275354550005, "learning_rate": 1.9674959914828212e-06, "loss": 0.2783, "step": 24817 }, { "epoch": 1.1625989600412237, "grad_norm": 0.576672068085572, "learning_rate": 1.967310694986024e-06, "loss": 0.2835, "step": 24818 }, { "epoch": 1.162645805031152, "grad_norm": 0.6089921768689984, "learning_rate": 1.9671254015547197e-06, "loss": 0.2755, "step": 24819 }, { "epoch": 1.1626926500210804, "grad_norm": 0.5728614571487811, "learning_rate": 1.9669401111899765e-06, "loss": 0.2916, "step": 24820 }, { "epoch": 1.1627394950110086, "grad_norm": 0.5642617337979019, "learning_rate": 1.9667548238928575e-06, "loss": 0.2753, "step": 24821 }, { "epoch": 1.1627863400009368, "grad_norm": 0.6014876186956063, "learning_rate": 1.966569539664431e-06, "loss": 0.2795, "step": 24822 }, { "epoch": 1.1628331849908653, "grad_norm": 0.5876263624004461, "learning_rate": 1.966384258505762e-06, "loss": 0.2665, "step": 24823 }, { "epoch": 1.1628800299807935, "grad_norm": 0.5597629951336333, "learning_rate": 1.966198980417918e-06, "loss": 0.2565, "step": 24824 }, { "epoch": 1.1629268749707218, "grad_norm": 0.5336751404287089, "learning_rate": 1.966013705401964e-06, "loss": 0.2567, "step": 24825 }, { "epoch": 1.1629737199606502, "grad_norm": 0.5532174668580876, "learning_rate": 1.9658284334589686e-06, "loss": 0.2595, "step": 24826 }, { "epoch": 1.1630205649505785, "grad_norm": 0.5980849635172389, "learning_rate": 1.965643164589995e-06, "loss": 0.2643, "step": 24827 }, { "epoch": 1.163067409940507, "grad_norm": 0.6107653910306745, "learning_rate": 1.965457898796111e-06, "loss": 0.2945, "step": 24828 }, { "epoch": 1.1631142549304352, "grad_norm": 0.6099259202500068, "learning_rate": 1.965272636078382e-06, "loss": 0.2909, "step": 24829 }, { "epoch": 1.1631610999203634, "grad_norm": 0.5575858774840331, "learning_rate": 1.965087376437875e-06, "loss": 0.2693, "step": 24830 }, { "epoch": 1.163207944910292, "grad_norm": 0.6028557598664006, "learning_rate": 1.9649021198756557e-06, "loss": 0.2699, "step": 24831 }, { "epoch": 1.1632547899002201, "grad_norm": 0.5665306344553599, "learning_rate": 1.9647168663927908e-06, "loss": 0.2708, "step": 24832 }, { "epoch": 1.1633016348901486, "grad_norm": 0.5748417502213594, "learning_rate": 1.964531615990345e-06, "loss": 0.2571, "step": 24833 }, { "epoch": 1.1633484798800768, "grad_norm": 0.6212820800254211, "learning_rate": 1.964346368669385e-06, "loss": 0.2914, "step": 24834 }, { "epoch": 1.163395324870005, "grad_norm": 0.5818614169913107, "learning_rate": 1.9641611244309778e-06, "loss": 0.2754, "step": 24835 }, { "epoch": 1.1634421698599335, "grad_norm": 0.5846220923736083, "learning_rate": 1.963975883276188e-06, "loss": 0.2808, "step": 24836 }, { "epoch": 1.1634890148498618, "grad_norm": 0.629355933719191, "learning_rate": 1.963790645206082e-06, "loss": 0.2809, "step": 24837 }, { "epoch": 1.16353585983979, "grad_norm": 0.5973894458813906, "learning_rate": 1.963605410221727e-06, "loss": 0.2713, "step": 24838 }, { "epoch": 1.1635827048297185, "grad_norm": 0.6194392488929347, "learning_rate": 1.963420178324188e-06, "loss": 0.2751, "step": 24839 }, { "epoch": 1.1636295498196467, "grad_norm": 0.5544266842198681, "learning_rate": 1.9632349495145303e-06, "loss": 0.2742, "step": 24840 }, { "epoch": 1.1636763948095752, "grad_norm": 0.6294180956903171, "learning_rate": 1.963049723793821e-06, "loss": 0.2898, "step": 24841 }, { "epoch": 1.1637232397995034, "grad_norm": 0.5696210082025983, "learning_rate": 1.962864501163125e-06, "loss": 0.292, "step": 24842 }, { "epoch": 1.163770084789432, "grad_norm": 0.5692928366870348, "learning_rate": 1.962679281623509e-06, "loss": 0.2625, "step": 24843 }, { "epoch": 1.1638169297793601, "grad_norm": 0.5896762490738255, "learning_rate": 1.96249406517604e-06, "loss": 0.271, "step": 24844 }, { "epoch": 1.1638637747692884, "grad_norm": 0.5828703086895237, "learning_rate": 1.962308851821781e-06, "loss": 0.2636, "step": 24845 }, { "epoch": 1.1639106197592168, "grad_norm": 0.6048542262365812, "learning_rate": 1.9621236415618e-06, "loss": 0.2651, "step": 24846 }, { "epoch": 1.163957464749145, "grad_norm": 0.54929409264949, "learning_rate": 1.961938434397162e-06, "loss": 0.2633, "step": 24847 }, { "epoch": 1.1640043097390733, "grad_norm": 0.5878477063293965, "learning_rate": 1.9617532303289334e-06, "loss": 0.2825, "step": 24848 }, { "epoch": 1.1640511547290018, "grad_norm": 0.6260704814793577, "learning_rate": 1.961568029358179e-06, "loss": 0.2814, "step": 24849 }, { "epoch": 1.16409799971893, "grad_norm": 0.5709389068990162, "learning_rate": 1.9613828314859666e-06, "loss": 0.2731, "step": 24850 }, { "epoch": 1.1641448447088585, "grad_norm": 0.5921416111460087, "learning_rate": 1.9611976367133596e-06, "loss": 0.2719, "step": 24851 }, { "epoch": 1.1641916896987867, "grad_norm": 0.5434576922492919, "learning_rate": 1.961012445041425e-06, "loss": 0.2787, "step": 24852 }, { "epoch": 1.164238534688715, "grad_norm": 0.6179011745196693, "learning_rate": 1.960827256471228e-06, "loss": 0.273, "step": 24853 }, { "epoch": 1.1642853796786434, "grad_norm": 0.5827431551672757, "learning_rate": 1.9606420710038347e-06, "loss": 0.2702, "step": 24854 }, { "epoch": 1.1643322246685717, "grad_norm": 0.5891011664698568, "learning_rate": 1.960456888640311e-06, "loss": 0.2783, "step": 24855 }, { "epoch": 1.1643790696585001, "grad_norm": 0.5584025244522773, "learning_rate": 1.960271709381723e-06, "loss": 0.2636, "step": 24856 }, { "epoch": 1.1644259146484284, "grad_norm": 0.5891930098247117, "learning_rate": 1.9600865332291345e-06, "loss": 0.2801, "step": 24857 }, { "epoch": 1.1644727596383566, "grad_norm": 0.5978359847193419, "learning_rate": 1.9599013601836125e-06, "loss": 0.2768, "step": 24858 }, { "epoch": 1.164519604628285, "grad_norm": 0.5714378684045449, "learning_rate": 1.9597161902462224e-06, "loss": 0.2586, "step": 24859 }, { "epoch": 1.1645664496182133, "grad_norm": 0.6202275702868623, "learning_rate": 1.95953102341803e-06, "loss": 0.2751, "step": 24860 }, { "epoch": 1.1646132946081416, "grad_norm": 0.6237592878340146, "learning_rate": 1.9593458597001003e-06, "loss": 0.2922, "step": 24861 }, { "epoch": 1.16466013959807, "grad_norm": 0.5948822573552769, "learning_rate": 1.9591606990935007e-06, "loss": 0.2786, "step": 24862 }, { "epoch": 1.1647069845879983, "grad_norm": 0.568448896311276, "learning_rate": 1.9589755415992943e-06, "loss": 0.2852, "step": 24863 }, { "epoch": 1.1647538295779267, "grad_norm": 0.6116264800189813, "learning_rate": 1.958790387218548e-06, "loss": 0.2729, "step": 24864 }, { "epoch": 1.164800674567855, "grad_norm": 0.587467902620424, "learning_rate": 1.958605235952326e-06, "loss": 0.2732, "step": 24865 }, { "epoch": 1.1648475195577832, "grad_norm": 0.5853196187516683, "learning_rate": 1.958420087801696e-06, "loss": 0.273, "step": 24866 }, { "epoch": 1.1648943645477117, "grad_norm": 0.5896400988932344, "learning_rate": 1.958234942767721e-06, "loss": 0.2794, "step": 24867 }, { "epoch": 1.16494120953764, "grad_norm": 0.5672844529442556, "learning_rate": 1.9580498008514697e-06, "loss": 0.267, "step": 24868 }, { "epoch": 1.1649880545275684, "grad_norm": 0.5924697075553443, "learning_rate": 1.957864662054004e-06, "loss": 0.29, "step": 24869 }, { "epoch": 1.1650348995174966, "grad_norm": 0.6730758906245252, "learning_rate": 1.9576795263763913e-06, "loss": 0.304, "step": 24870 }, { "epoch": 1.1650817445074249, "grad_norm": 0.5962501498136291, "learning_rate": 1.9574943938196964e-06, "loss": 0.2663, "step": 24871 }, { "epoch": 1.1651285894973533, "grad_norm": 0.5593638893525043, "learning_rate": 1.957309264384985e-06, "loss": 0.2614, "step": 24872 }, { "epoch": 1.1651754344872816, "grad_norm": 0.6135921515749108, "learning_rate": 1.957124138073323e-06, "loss": 0.273, "step": 24873 }, { "epoch": 1.1652222794772098, "grad_norm": 0.5886484465719728, "learning_rate": 1.956939014885775e-06, "loss": 0.2738, "step": 24874 }, { "epoch": 1.1652691244671383, "grad_norm": 0.5913175584361693, "learning_rate": 1.956753894823406e-06, "loss": 0.2747, "step": 24875 }, { "epoch": 1.1653159694570665, "grad_norm": 0.6157685962630901, "learning_rate": 1.9565687778872816e-06, "loss": 0.276, "step": 24876 }, { "epoch": 1.165362814446995, "grad_norm": 0.5864917154036386, "learning_rate": 1.956383664078468e-06, "loss": 0.2747, "step": 24877 }, { "epoch": 1.1654096594369232, "grad_norm": 0.5951443147417073, "learning_rate": 1.956198553398029e-06, "loss": 0.273, "step": 24878 }, { "epoch": 1.1654565044268517, "grad_norm": 0.6042311819912416, "learning_rate": 1.9560134458470308e-06, "loss": 0.287, "step": 24879 }, { "epoch": 1.16550334941678, "grad_norm": 0.5836042419655174, "learning_rate": 1.9558283414265393e-06, "loss": 0.2784, "step": 24880 }, { "epoch": 1.1655501944067082, "grad_norm": 0.6112480129961795, "learning_rate": 1.955643240137619e-06, "loss": 0.2746, "step": 24881 }, { "epoch": 1.1655970393966366, "grad_norm": 0.553913980369766, "learning_rate": 1.955458141981334e-06, "loss": 0.2774, "step": 24882 }, { "epoch": 1.1656438843865649, "grad_norm": 0.6126455983442486, "learning_rate": 1.9552730469587507e-06, "loss": 0.2802, "step": 24883 }, { "epoch": 1.165690729376493, "grad_norm": 0.5621811120041147, "learning_rate": 1.955087955070934e-06, "loss": 0.272, "step": 24884 }, { "epoch": 1.1657375743664216, "grad_norm": 0.6039060416342172, "learning_rate": 1.9549028663189496e-06, "loss": 0.2817, "step": 24885 }, { "epoch": 1.1657844193563498, "grad_norm": 0.5909588192522915, "learning_rate": 1.954717780703863e-06, "loss": 0.2827, "step": 24886 }, { "epoch": 1.1658312643462783, "grad_norm": 0.6101059031335453, "learning_rate": 1.954532698226737e-06, "loss": 0.2761, "step": 24887 }, { "epoch": 1.1658781093362065, "grad_norm": 0.6204230459905946, "learning_rate": 1.9543476188886394e-06, "loss": 0.2756, "step": 24888 }, { "epoch": 1.1659249543261347, "grad_norm": 0.6089862806331378, "learning_rate": 1.954162542690633e-06, "loss": 0.2782, "step": 24889 }, { "epoch": 1.1659717993160632, "grad_norm": 0.624159464358987, "learning_rate": 1.9539774696337845e-06, "loss": 0.2873, "step": 24890 }, { "epoch": 1.1660186443059914, "grad_norm": 0.538694049009251, "learning_rate": 1.9537923997191584e-06, "loss": 0.2705, "step": 24891 }, { "epoch": 1.16606548929592, "grad_norm": 0.6020490628386372, "learning_rate": 1.9536073329478194e-06, "loss": 0.2891, "step": 24892 }, { "epoch": 1.1661123342858482, "grad_norm": 0.6004017194389056, "learning_rate": 1.9534222693208345e-06, "loss": 0.2726, "step": 24893 }, { "epoch": 1.1661591792757764, "grad_norm": 0.5797543061258185, "learning_rate": 1.9532372088392653e-06, "loss": 0.2803, "step": 24894 }, { "epoch": 1.1662060242657049, "grad_norm": 0.5598965653042017, "learning_rate": 1.953052151504179e-06, "loss": 0.2737, "step": 24895 }, { "epoch": 1.166252869255633, "grad_norm": 0.6243699844780436, "learning_rate": 1.9528670973166403e-06, "loss": 0.2916, "step": 24896 }, { "epoch": 1.1662997142455613, "grad_norm": 0.6214055812613002, "learning_rate": 1.952682046277714e-06, "loss": 0.2718, "step": 24897 }, { "epoch": 1.1663465592354898, "grad_norm": 0.5784010704655057, "learning_rate": 1.9524969983884644e-06, "loss": 0.273, "step": 24898 }, { "epoch": 1.166393404225418, "grad_norm": 0.6178527500190832, "learning_rate": 1.952311953649958e-06, "loss": 0.2846, "step": 24899 }, { "epoch": 1.1664402492153465, "grad_norm": 0.5452977707612875, "learning_rate": 1.952126912063258e-06, "loss": 0.2728, "step": 24900 }, { "epoch": 1.1664870942052747, "grad_norm": 0.5702355343443375, "learning_rate": 1.95194187362943e-06, "loss": 0.2465, "step": 24901 }, { "epoch": 1.166533939195203, "grad_norm": 0.5667961284126504, "learning_rate": 1.9517568383495383e-06, "loss": 0.2722, "step": 24902 }, { "epoch": 1.1665807841851314, "grad_norm": 0.6494242098625982, "learning_rate": 1.9515718062246485e-06, "loss": 0.2844, "step": 24903 }, { "epoch": 1.1666276291750597, "grad_norm": 0.564518628787006, "learning_rate": 1.951386777255825e-06, "loss": 0.2586, "step": 24904 }, { "epoch": 1.1666744741649882, "grad_norm": 0.6981431705113091, "learning_rate": 1.951201751444134e-06, "loss": 0.2887, "step": 24905 }, { "epoch": 1.1667213191549164, "grad_norm": 0.6026075368906078, "learning_rate": 1.9510167287906372e-06, "loss": 0.2811, "step": 24906 }, { "epoch": 1.1667681641448446, "grad_norm": 0.6138417172599074, "learning_rate": 1.950831709296402e-06, "loss": 0.2831, "step": 24907 }, { "epoch": 1.166815009134773, "grad_norm": 0.5491973820106653, "learning_rate": 1.950646692962492e-06, "loss": 0.2768, "step": 24908 }, { "epoch": 1.1668618541247013, "grad_norm": 0.6125212448528071, "learning_rate": 1.950461679789972e-06, "loss": 0.2825, "step": 24909 }, { "epoch": 1.1669086991146296, "grad_norm": 0.5581701418403464, "learning_rate": 1.950276669779907e-06, "loss": 0.2496, "step": 24910 }, { "epoch": 1.166955544104558, "grad_norm": 0.5990412863703476, "learning_rate": 1.9500916629333628e-06, "loss": 0.2735, "step": 24911 }, { "epoch": 1.1670023890944863, "grad_norm": 0.5975976963498217, "learning_rate": 1.9499066592514018e-06, "loss": 0.2799, "step": 24912 }, { "epoch": 1.1670492340844147, "grad_norm": 0.5724985259994361, "learning_rate": 1.9497216587350897e-06, "loss": 0.2798, "step": 24913 }, { "epoch": 1.167096079074343, "grad_norm": 0.5616761599853465, "learning_rate": 1.949536661385491e-06, "loss": 0.2677, "step": 24914 }, { "epoch": 1.1671429240642714, "grad_norm": 0.5656451177145286, "learning_rate": 1.949351667203671e-06, "loss": 0.2732, "step": 24915 }, { "epoch": 1.1671897690541997, "grad_norm": 0.5720477604677363, "learning_rate": 1.949166676190693e-06, "loss": 0.2851, "step": 24916 }, { "epoch": 1.167236614044128, "grad_norm": 0.5617960529610586, "learning_rate": 1.9489816883476235e-06, "loss": 0.2824, "step": 24917 }, { "epoch": 1.1672834590340564, "grad_norm": 0.5672259319800086, "learning_rate": 1.948796703675525e-06, "loss": 0.2722, "step": 24918 }, { "epoch": 1.1673303040239846, "grad_norm": 0.575166336394109, "learning_rate": 1.948611722175463e-06, "loss": 0.2834, "step": 24919 }, { "epoch": 1.1673771490139129, "grad_norm": 0.6161201640187479, "learning_rate": 1.948426743848502e-06, "loss": 0.2763, "step": 24920 }, { "epoch": 1.1674239940038413, "grad_norm": 0.5801269871364884, "learning_rate": 1.948241768695706e-06, "loss": 0.2859, "step": 24921 }, { "epoch": 1.1674708389937696, "grad_norm": 0.6449182153169797, "learning_rate": 1.9480567967181412e-06, "loss": 0.2889, "step": 24922 }, { "epoch": 1.167517683983698, "grad_norm": 0.5729565431816298, "learning_rate": 1.947871827916871e-06, "loss": 0.2877, "step": 24923 }, { "epoch": 1.1675645289736263, "grad_norm": 0.594180825306521, "learning_rate": 1.947686862292958e-06, "loss": 0.2759, "step": 24924 }, { "epoch": 1.1676113739635545, "grad_norm": 0.6183190595075831, "learning_rate": 1.9475018998474685e-06, "loss": 0.2721, "step": 24925 }, { "epoch": 1.167658218953483, "grad_norm": 0.6058642697668148, "learning_rate": 1.947316940581468e-06, "loss": 0.2967, "step": 24926 }, { "epoch": 1.1677050639434112, "grad_norm": 0.5529604772180962, "learning_rate": 1.947131984496018e-06, "loss": 0.2523, "step": 24927 }, { "epoch": 1.1677519089333397, "grad_norm": 0.5934346960360717, "learning_rate": 1.9469470315921853e-06, "loss": 0.2899, "step": 24928 }, { "epoch": 1.167798753923268, "grad_norm": 0.5371056808996533, "learning_rate": 1.946762081871034e-06, "loss": 0.257, "step": 24929 }, { "epoch": 1.1678455989131962, "grad_norm": 0.5740947229383687, "learning_rate": 1.9465771353336275e-06, "loss": 0.2651, "step": 24930 }, { "epoch": 1.1678924439031246, "grad_norm": 0.6049647604200482, "learning_rate": 1.9463921919810304e-06, "loss": 0.2856, "step": 24931 }, { "epoch": 1.1679392888930529, "grad_norm": 0.5927744772114427, "learning_rate": 1.9462072518143064e-06, "loss": 0.272, "step": 24932 }, { "epoch": 1.1679861338829811, "grad_norm": 0.5798289989105291, "learning_rate": 1.9460223148345214e-06, "loss": 0.2692, "step": 24933 }, { "epoch": 1.1680329788729096, "grad_norm": 0.5798297332903256, "learning_rate": 1.945837381042738e-06, "loss": 0.2667, "step": 24934 }, { "epoch": 1.1680798238628378, "grad_norm": 0.5525973805551697, "learning_rate": 1.945652450440022e-06, "loss": 0.2783, "step": 24935 }, { "epoch": 1.1681266688527663, "grad_norm": 0.5595684043496939, "learning_rate": 1.9454675230274365e-06, "loss": 0.2671, "step": 24936 }, { "epoch": 1.1681735138426945, "grad_norm": 0.636633302679819, "learning_rate": 1.9452825988060463e-06, "loss": 0.2858, "step": 24937 }, { "epoch": 1.1682203588326228, "grad_norm": 0.6066443872145588, "learning_rate": 1.9450976777769145e-06, "loss": 0.2699, "step": 24938 }, { "epoch": 1.1682672038225512, "grad_norm": 0.6720780704145806, "learning_rate": 1.944912759941106e-06, "loss": 0.2886, "step": 24939 }, { "epoch": 1.1683140488124795, "grad_norm": 0.5947895539503602, "learning_rate": 1.944727845299686e-06, "loss": 0.2808, "step": 24940 }, { "epoch": 1.168360893802408, "grad_norm": 0.6433327135516089, "learning_rate": 1.944542933853718e-06, "loss": 0.3022, "step": 24941 }, { "epoch": 1.1684077387923362, "grad_norm": 0.5743252880064986, "learning_rate": 1.944358025604265e-06, "loss": 0.2734, "step": 24942 }, { "epoch": 1.1684545837822644, "grad_norm": 0.5597895605351846, "learning_rate": 1.9441731205523915e-06, "loss": 0.2604, "step": 24943 }, { "epoch": 1.1685014287721929, "grad_norm": 0.5732917940968915, "learning_rate": 1.9439882186991628e-06, "loss": 0.2716, "step": 24944 }, { "epoch": 1.1685482737621211, "grad_norm": 0.59843745034851, "learning_rate": 1.943803320045641e-06, "loss": 0.2841, "step": 24945 }, { "epoch": 1.1685951187520494, "grad_norm": 0.611297127443274, "learning_rate": 1.943618424592893e-06, "loss": 0.2791, "step": 24946 }, { "epoch": 1.1686419637419778, "grad_norm": 0.5817516019511956, "learning_rate": 1.9434335323419802e-06, "loss": 0.2779, "step": 24947 }, { "epoch": 1.168688808731906, "grad_norm": 0.5551799602643162, "learning_rate": 1.9432486432939677e-06, "loss": 0.2715, "step": 24948 }, { "epoch": 1.1687356537218345, "grad_norm": 0.6381482383513993, "learning_rate": 1.943063757449919e-06, "loss": 0.2818, "step": 24949 }, { "epoch": 1.1687824987117628, "grad_norm": 0.6200631420082013, "learning_rate": 1.942878874810899e-06, "loss": 0.2884, "step": 24950 }, { "epoch": 1.1688293437016912, "grad_norm": 0.6241953326580263, "learning_rate": 1.9426939953779706e-06, "loss": 0.2679, "step": 24951 }, { "epoch": 1.1688761886916195, "grad_norm": 0.5782011636426538, "learning_rate": 1.9425091191521976e-06, "loss": 0.2806, "step": 24952 }, { "epoch": 1.1689230336815477, "grad_norm": 0.6180673787277832, "learning_rate": 1.9423242461346457e-06, "loss": 0.2758, "step": 24953 }, { "epoch": 1.1689698786714762, "grad_norm": 0.5747469683194883, "learning_rate": 1.9421393763263776e-06, "loss": 0.2538, "step": 24954 }, { "epoch": 1.1690167236614044, "grad_norm": 0.6038152404507753, "learning_rate": 1.9419545097284566e-06, "loss": 0.2729, "step": 24955 }, { "epoch": 1.1690635686513327, "grad_norm": 0.6280608829912356, "learning_rate": 1.9417696463419468e-06, "loss": 0.279, "step": 24956 }, { "epoch": 1.1691104136412611, "grad_norm": 0.551253531397598, "learning_rate": 1.9415847861679127e-06, "loss": 0.2506, "step": 24957 }, { "epoch": 1.1691572586311894, "grad_norm": 0.5902349641424037, "learning_rate": 1.9413999292074174e-06, "loss": 0.2665, "step": 24958 }, { "epoch": 1.1692041036211178, "grad_norm": 0.5927853253777439, "learning_rate": 1.9412150754615257e-06, "loss": 0.2766, "step": 24959 }, { "epoch": 1.169250948611046, "grad_norm": 0.6046158492639151, "learning_rate": 1.9410302249313014e-06, "loss": 0.2808, "step": 24960 }, { "epoch": 1.1692977936009743, "grad_norm": 0.5462121726477099, "learning_rate": 1.9408453776178066e-06, "loss": 0.2613, "step": 24961 }, { "epoch": 1.1693446385909028, "grad_norm": 0.6207092596257034, "learning_rate": 1.9406605335221062e-06, "loss": 0.2695, "step": 24962 }, { "epoch": 1.169391483580831, "grad_norm": 0.6095135471486226, "learning_rate": 1.940475692645264e-06, "loss": 0.2614, "step": 24963 }, { "epoch": 1.1694383285707595, "grad_norm": 0.580400313321972, "learning_rate": 1.940290854988344e-06, "loss": 0.2617, "step": 24964 }, { "epoch": 1.1694851735606877, "grad_norm": 0.5418646472207782, "learning_rate": 1.9401060205524087e-06, "loss": 0.2567, "step": 24965 }, { "epoch": 1.169532018550616, "grad_norm": 0.6273487939899676, "learning_rate": 1.9399211893385234e-06, "loss": 0.2743, "step": 24966 }, { "epoch": 1.1695788635405444, "grad_norm": 0.6358510237273887, "learning_rate": 1.93973636134775e-06, "loss": 0.275, "step": 24967 }, { "epoch": 1.1696257085304727, "grad_norm": 0.5565305003968345, "learning_rate": 1.939551536581154e-06, "loss": 0.2633, "step": 24968 }, { "epoch": 1.169672553520401, "grad_norm": 0.6376450940013015, "learning_rate": 1.9393667150397967e-06, "loss": 0.2789, "step": 24969 }, { "epoch": 1.1697193985103294, "grad_norm": 0.6327290025071292, "learning_rate": 1.939181896724744e-06, "loss": 0.2875, "step": 24970 }, { "epoch": 1.1697662435002576, "grad_norm": 0.5745704840391277, "learning_rate": 1.938997081637058e-06, "loss": 0.2774, "step": 24971 }, { "epoch": 1.169813088490186, "grad_norm": 0.5687354748348086, "learning_rate": 1.938812269777804e-06, "loss": 0.2642, "step": 24972 }, { "epoch": 1.1698599334801143, "grad_norm": 0.630312510993054, "learning_rate": 1.938627461148043e-06, "loss": 0.2855, "step": 24973 }, { "epoch": 1.1699067784700425, "grad_norm": 0.5663737771618902, "learning_rate": 1.9384426557488404e-06, "loss": 0.2754, "step": 24974 }, { "epoch": 1.169953623459971, "grad_norm": 0.5849865045060371, "learning_rate": 1.9382578535812595e-06, "loss": 0.2865, "step": 24975 }, { "epoch": 1.1700004684498992, "grad_norm": 0.5829339847661497, "learning_rate": 1.9380730546463624e-06, "loss": 0.2794, "step": 24976 }, { "epoch": 1.1700473134398277, "grad_norm": 0.603295547397437, "learning_rate": 1.9378882589452143e-06, "loss": 0.285, "step": 24977 }, { "epoch": 1.170094158429756, "grad_norm": 0.6028428580238612, "learning_rate": 1.9377034664788787e-06, "loss": 0.2615, "step": 24978 }, { "epoch": 1.1701410034196842, "grad_norm": 0.5840834829744737, "learning_rate": 1.937518677248418e-06, "loss": 0.281, "step": 24979 }, { "epoch": 1.1701878484096127, "grad_norm": 0.6013623368473643, "learning_rate": 1.937333891254895e-06, "loss": 0.2865, "step": 24980 }, { "epoch": 1.170234693399541, "grad_norm": 0.6029138918891366, "learning_rate": 1.9371491084993745e-06, "loss": 0.2765, "step": 24981 }, { "epoch": 1.1702815383894691, "grad_norm": 0.5746034131700242, "learning_rate": 1.9369643289829195e-06, "loss": 0.2682, "step": 24982 }, { "epoch": 1.1703283833793976, "grad_norm": 0.6212127881585494, "learning_rate": 1.936779552706593e-06, "loss": 0.2691, "step": 24983 }, { "epoch": 1.1703752283693258, "grad_norm": 0.5990038292999327, "learning_rate": 1.9365947796714596e-06, "loss": 0.2733, "step": 24984 }, { "epoch": 1.1704220733592543, "grad_norm": 0.5922556089897494, "learning_rate": 1.9364100098785803e-06, "loss": 0.2649, "step": 24985 }, { "epoch": 1.1704689183491825, "grad_norm": 0.622733333552413, "learning_rate": 1.9362252433290206e-06, "loss": 0.2667, "step": 24986 }, { "epoch": 1.170515763339111, "grad_norm": 0.612170674254253, "learning_rate": 1.9360404800238423e-06, "loss": 0.2969, "step": 24987 }, { "epoch": 1.1705626083290392, "grad_norm": 0.6159331272423759, "learning_rate": 1.9358557199641102e-06, "loss": 0.2707, "step": 24988 }, { "epoch": 1.1706094533189675, "grad_norm": 0.5351903517027478, "learning_rate": 1.935670963150886e-06, "loss": 0.266, "step": 24989 }, { "epoch": 1.170656298308896, "grad_norm": 0.5787307879846364, "learning_rate": 1.9354862095852343e-06, "loss": 0.2733, "step": 24990 }, { "epoch": 1.1707031432988242, "grad_norm": 0.5887616362447939, "learning_rate": 1.9353014592682166e-06, "loss": 0.2678, "step": 24991 }, { "epoch": 1.1707499882887524, "grad_norm": 0.5354915277356348, "learning_rate": 1.9351167122008975e-06, "loss": 0.2574, "step": 24992 }, { "epoch": 1.170796833278681, "grad_norm": 0.6281304450441593, "learning_rate": 1.9349319683843397e-06, "loss": 0.2966, "step": 24993 }, { "epoch": 1.1708436782686091, "grad_norm": 0.5992862807925275, "learning_rate": 1.934747227819606e-06, "loss": 0.2937, "step": 24994 }, { "epoch": 1.1708905232585374, "grad_norm": 0.5787416404817255, "learning_rate": 1.9345624905077604e-06, "loss": 0.2851, "step": 24995 }, { "epoch": 1.1709373682484658, "grad_norm": 0.6121186199782548, "learning_rate": 1.9343777564498662e-06, "loss": 0.285, "step": 24996 }, { "epoch": 1.170984213238394, "grad_norm": 0.6204768803160108, "learning_rate": 1.934193025646985e-06, "loss": 0.2756, "step": 24997 }, { "epoch": 1.1710310582283225, "grad_norm": 0.6099369410383793, "learning_rate": 1.9340082981001806e-06, "loss": 0.2772, "step": 24998 }, { "epoch": 1.1710779032182508, "grad_norm": 0.6125270309593247, "learning_rate": 1.9338235738105164e-06, "loss": 0.2668, "step": 24999 }, { "epoch": 1.1711247482081792, "grad_norm": 0.58337455050841, "learning_rate": 1.933638852779055e-06, "loss": 0.2741, "step": 25000 }, { "epoch": 1.1711715931981075, "grad_norm": 0.5932144809893668, "learning_rate": 1.9334541350068597e-06, "loss": 0.2691, "step": 25001 }, { "epoch": 1.1712184381880357, "grad_norm": 0.6039868090620155, "learning_rate": 1.9332694204949944e-06, "loss": 0.2784, "step": 25002 }, { "epoch": 1.1712652831779642, "grad_norm": 0.5359888797031285, "learning_rate": 1.9330847092445203e-06, "loss": 0.2465, "step": 25003 }, { "epoch": 1.1713121281678924, "grad_norm": 0.5525922894460592, "learning_rate": 1.9329000012565015e-06, "loss": 0.2641, "step": 25004 }, { "epoch": 1.1713589731578207, "grad_norm": 0.6231200643449353, "learning_rate": 1.932715296532e-06, "loss": 0.2795, "step": 25005 }, { "epoch": 1.1714058181477491, "grad_norm": 0.6322718193342116, "learning_rate": 1.93253059507208e-06, "loss": 0.3039, "step": 25006 }, { "epoch": 1.1714526631376774, "grad_norm": 0.6292196056813358, "learning_rate": 1.9323458968778033e-06, "loss": 0.2991, "step": 25007 }, { "epoch": 1.1714995081276058, "grad_norm": 0.5775147835318251, "learning_rate": 1.9321612019502347e-06, "loss": 0.2673, "step": 25008 }, { "epoch": 1.171546353117534, "grad_norm": 0.599212979899633, "learning_rate": 1.931976510290434e-06, "loss": 0.2967, "step": 25009 }, { "epoch": 1.1715931981074623, "grad_norm": 0.557283261786815, "learning_rate": 1.931791821899466e-06, "loss": 0.2685, "step": 25010 }, { "epoch": 1.1716400430973908, "grad_norm": 0.5861264701700238, "learning_rate": 1.9316071367783935e-06, "loss": 0.2767, "step": 25011 }, { "epoch": 1.171686888087319, "grad_norm": 0.5593225385867485, "learning_rate": 1.931422454928279e-06, "loss": 0.2658, "step": 25012 }, { "epoch": 1.1717337330772475, "grad_norm": 0.6102427392489772, "learning_rate": 1.931237776350186e-06, "loss": 0.2754, "step": 25013 }, { "epoch": 1.1717805780671757, "grad_norm": 0.5842874665938403, "learning_rate": 1.9310531010451766e-06, "loss": 0.2751, "step": 25014 }, { "epoch": 1.171827423057104, "grad_norm": 0.5787145567879493, "learning_rate": 1.930868429014313e-06, "loss": 0.2701, "step": 25015 }, { "epoch": 1.1718742680470324, "grad_norm": 0.6413127578416062, "learning_rate": 1.9306837602586584e-06, "loss": 0.2964, "step": 25016 }, { "epoch": 1.1719211130369607, "grad_norm": 0.6112795727411782, "learning_rate": 1.930499094779276e-06, "loss": 0.2773, "step": 25017 }, { "epoch": 1.171967958026889, "grad_norm": 0.5633915859574071, "learning_rate": 1.930314432577228e-06, "loss": 0.2683, "step": 25018 }, { "epoch": 1.1720148030168174, "grad_norm": 0.6390984664579239, "learning_rate": 1.9301297736535772e-06, "loss": 0.2995, "step": 25019 }, { "epoch": 1.1720616480067456, "grad_norm": 0.6334385569831471, "learning_rate": 1.929945118009387e-06, "loss": 0.2958, "step": 25020 }, { "epoch": 1.172108492996674, "grad_norm": 0.5913471037749745, "learning_rate": 1.9297604656457192e-06, "loss": 0.2775, "step": 25021 }, { "epoch": 1.1721553379866023, "grad_norm": 0.6374060332884135, "learning_rate": 1.929575816563636e-06, "loss": 0.2742, "step": 25022 }, { "epoch": 1.1722021829765308, "grad_norm": 0.5976961254005755, "learning_rate": 1.9293911707642004e-06, "loss": 0.2776, "step": 25023 }, { "epoch": 1.172249027966459, "grad_norm": 0.5799572709777643, "learning_rate": 1.9292065282484763e-06, "loss": 0.2563, "step": 25024 }, { "epoch": 1.1722958729563873, "grad_norm": 0.6199010996262093, "learning_rate": 1.929021889017524e-06, "loss": 0.2866, "step": 25025 }, { "epoch": 1.1723427179463157, "grad_norm": 0.6474354543481837, "learning_rate": 1.9288372530724073e-06, "loss": 0.2881, "step": 25026 }, { "epoch": 1.172389562936244, "grad_norm": 0.5933365802660845, "learning_rate": 1.92865262041419e-06, "loss": 0.2686, "step": 25027 }, { "epoch": 1.1724364079261722, "grad_norm": 0.5815542959474718, "learning_rate": 1.928467991043933e-06, "loss": 0.2673, "step": 25028 }, { "epoch": 1.1724832529161007, "grad_norm": 0.5851374214604678, "learning_rate": 1.928283364962698e-06, "loss": 0.2721, "step": 25029 }, { "epoch": 1.172530097906029, "grad_norm": 0.6099997331863396, "learning_rate": 1.9280987421715484e-06, "loss": 0.2751, "step": 25030 }, { "epoch": 1.1725769428959572, "grad_norm": 0.6163950623353222, "learning_rate": 1.9279141226715476e-06, "loss": 0.2834, "step": 25031 }, { "epoch": 1.1726237878858856, "grad_norm": 0.6056314712492562, "learning_rate": 1.9277295064637568e-06, "loss": 0.2737, "step": 25032 }, { "epoch": 1.1726706328758139, "grad_norm": 0.5480626754642974, "learning_rate": 1.9275448935492397e-06, "loss": 0.2588, "step": 25033 }, { "epoch": 1.1727174778657423, "grad_norm": 0.5582026937355131, "learning_rate": 1.927360283929057e-06, "loss": 0.2621, "step": 25034 }, { "epoch": 1.1727643228556706, "grad_norm": 0.6400768044771071, "learning_rate": 1.9271756776042726e-06, "loss": 0.2813, "step": 25035 }, { "epoch": 1.172811167845599, "grad_norm": 0.575614854862104, "learning_rate": 1.926991074575947e-06, "loss": 0.2668, "step": 25036 }, { "epoch": 1.1728580128355273, "grad_norm": 0.559632131408127, "learning_rate": 1.926806474845145e-06, "loss": 0.2633, "step": 25037 }, { "epoch": 1.1729048578254555, "grad_norm": 0.526611931014152, "learning_rate": 1.9266218784129267e-06, "loss": 0.2502, "step": 25038 }, { "epoch": 1.172951702815384, "grad_norm": 0.6015236599456361, "learning_rate": 1.926437285280357e-06, "loss": 0.2801, "step": 25039 }, { "epoch": 1.1729985478053122, "grad_norm": 0.5857046629038979, "learning_rate": 1.926252695448495e-06, "loss": 0.2853, "step": 25040 }, { "epoch": 1.1730453927952404, "grad_norm": 0.5824425692891475, "learning_rate": 1.926068108918405e-06, "loss": 0.2772, "step": 25041 }, { "epoch": 1.173092237785169, "grad_norm": 0.5979227230862346, "learning_rate": 1.9258835256911483e-06, "loss": 0.2718, "step": 25042 }, { "epoch": 1.1731390827750972, "grad_norm": 0.6296741134724061, "learning_rate": 1.925698945767788e-06, "loss": 0.2999, "step": 25043 }, { "epoch": 1.1731859277650256, "grad_norm": 0.5797494524610162, "learning_rate": 1.925514369149386e-06, "loss": 0.2648, "step": 25044 }, { "epoch": 1.1732327727549539, "grad_norm": 0.6448032423801167, "learning_rate": 1.9253297958370056e-06, "loss": 0.2791, "step": 25045 }, { "epoch": 1.173279617744882, "grad_norm": 0.6130565409800982, "learning_rate": 1.925145225831706e-06, "loss": 0.2543, "step": 25046 }, { "epoch": 1.1733264627348106, "grad_norm": 0.6036442568253169, "learning_rate": 1.924960659134551e-06, "loss": 0.2793, "step": 25047 }, { "epoch": 1.1733733077247388, "grad_norm": 0.6376145595395947, "learning_rate": 1.924776095746604e-06, "loss": 0.2822, "step": 25048 }, { "epoch": 1.1734201527146673, "grad_norm": 0.5926918002396228, "learning_rate": 1.9245915356689253e-06, "loss": 0.279, "step": 25049 }, { "epoch": 1.1734669977045955, "grad_norm": 0.6112614819400339, "learning_rate": 1.924406978902578e-06, "loss": 0.2866, "step": 25050 }, { "epoch": 1.1735138426945237, "grad_norm": 0.6141906470227508, "learning_rate": 1.9242224254486243e-06, "loss": 0.2879, "step": 25051 }, { "epoch": 1.1735606876844522, "grad_norm": 0.5522953199057973, "learning_rate": 1.9240378753081256e-06, "loss": 0.2706, "step": 25052 }, { "epoch": 1.1736075326743804, "grad_norm": 0.5613199915803309, "learning_rate": 1.923853328482144e-06, "loss": 0.2638, "step": 25053 }, { "epoch": 1.1736543776643087, "grad_norm": 0.588921770545065, "learning_rate": 1.923668784971741e-06, "loss": 0.2744, "step": 25054 }, { "epoch": 1.1737012226542372, "grad_norm": 0.5716949843612436, "learning_rate": 1.92348424477798e-06, "loss": 0.2589, "step": 25055 }, { "epoch": 1.1737480676441654, "grad_norm": 0.6263418304831794, "learning_rate": 1.923299707901922e-06, "loss": 0.2737, "step": 25056 }, { "epoch": 1.1737949126340939, "grad_norm": 0.5929043334197739, "learning_rate": 1.92311517434463e-06, "loss": 0.2824, "step": 25057 }, { "epoch": 1.173841757624022, "grad_norm": 0.57354150770007, "learning_rate": 1.9229306441071645e-06, "loss": 0.2674, "step": 25058 }, { "epoch": 1.1738886026139506, "grad_norm": 0.5966352699367269, "learning_rate": 1.922746117190588e-06, "loss": 0.2795, "step": 25059 }, { "epoch": 1.1739354476038788, "grad_norm": 0.6007023675627907, "learning_rate": 1.9225615935959626e-06, "loss": 0.2773, "step": 25060 }, { "epoch": 1.173982292593807, "grad_norm": 0.5538231514386869, "learning_rate": 1.92237707332435e-06, "loss": 0.2716, "step": 25061 }, { "epoch": 1.1740291375837355, "grad_norm": 0.6349155888569106, "learning_rate": 1.9221925563768124e-06, "loss": 0.2776, "step": 25062 }, { "epoch": 1.1740759825736637, "grad_norm": 0.612331775957101, "learning_rate": 1.922008042754412e-06, "loss": 0.2773, "step": 25063 }, { "epoch": 1.174122827563592, "grad_norm": 0.6073274356665692, "learning_rate": 1.921823532458209e-06, "loss": 0.2803, "step": 25064 }, { "epoch": 1.1741696725535204, "grad_norm": 0.6088830748214082, "learning_rate": 1.9216390254892665e-06, "loss": 0.28, "step": 25065 }, { "epoch": 1.1742165175434487, "grad_norm": 0.5936305538927558, "learning_rate": 1.9214545218486464e-06, "loss": 0.2734, "step": 25066 }, { "epoch": 1.174263362533377, "grad_norm": 0.6006529641702539, "learning_rate": 1.92127002153741e-06, "loss": 0.2863, "step": 25067 }, { "epoch": 1.1743102075233054, "grad_norm": 0.6175006216249438, "learning_rate": 1.9210855245566185e-06, "loss": 0.2688, "step": 25068 }, { "epoch": 1.1743570525132336, "grad_norm": 0.6087744539044644, "learning_rate": 1.9209010309073356e-06, "loss": 0.278, "step": 25069 }, { "epoch": 1.174403897503162, "grad_norm": 0.5471706874762761, "learning_rate": 1.920716540590621e-06, "loss": 0.2564, "step": 25070 }, { "epoch": 1.1744507424930903, "grad_norm": 0.5705565600015452, "learning_rate": 1.920532053607537e-06, "loss": 0.2786, "step": 25071 }, { "epoch": 1.1744975874830188, "grad_norm": 0.5435077957541073, "learning_rate": 1.920347569959145e-06, "loss": 0.2531, "step": 25072 }, { "epoch": 1.174544432472947, "grad_norm": 0.5703503380146734, "learning_rate": 1.9201630896465077e-06, "loss": 0.2782, "step": 25073 }, { "epoch": 1.1745912774628753, "grad_norm": 0.5854609616742027, "learning_rate": 1.919978612670686e-06, "loss": 0.269, "step": 25074 }, { "epoch": 1.1746381224528037, "grad_norm": 0.564804956179797, "learning_rate": 1.919794139032742e-06, "loss": 0.2687, "step": 25075 }, { "epoch": 1.174684967442732, "grad_norm": 0.5993462536428957, "learning_rate": 1.919609668733736e-06, "loss": 0.2759, "step": 25076 }, { "epoch": 1.1747318124326602, "grad_norm": 0.5874601236841642, "learning_rate": 1.9194252017747315e-06, "loss": 0.2724, "step": 25077 }, { "epoch": 1.1747786574225887, "grad_norm": 0.551611831615442, "learning_rate": 1.919240738156788e-06, "loss": 0.2645, "step": 25078 }, { "epoch": 1.174825502412517, "grad_norm": 0.5875881402778754, "learning_rate": 1.919056277880968e-06, "loss": 0.2776, "step": 25079 }, { "epoch": 1.1748723474024454, "grad_norm": 0.6406381228302791, "learning_rate": 1.918871820948334e-06, "loss": 0.2756, "step": 25080 }, { "epoch": 1.1749191923923736, "grad_norm": 0.6080366970039549, "learning_rate": 1.918687367359947e-06, "loss": 0.2659, "step": 25081 }, { "epoch": 1.1749660373823019, "grad_norm": 0.6218772963266271, "learning_rate": 1.918502917116867e-06, "loss": 0.2771, "step": 25082 }, { "epoch": 1.1750128823722303, "grad_norm": 0.601120179387674, "learning_rate": 1.9183184702201564e-06, "loss": 0.2646, "step": 25083 }, { "epoch": 1.1750597273621586, "grad_norm": 0.596280154590884, "learning_rate": 1.918134026670878e-06, "loss": 0.2832, "step": 25084 }, { "epoch": 1.175106572352087, "grad_norm": 0.5870823875448016, "learning_rate": 1.917949586470091e-06, "loss": 0.2791, "step": 25085 }, { "epoch": 1.1751534173420153, "grad_norm": 0.5876532792524265, "learning_rate": 1.917765149618858e-06, "loss": 0.2691, "step": 25086 }, { "epoch": 1.1752002623319435, "grad_norm": 0.5900183199453134, "learning_rate": 1.9175807161182407e-06, "loss": 0.2731, "step": 25087 }, { "epoch": 1.175247107321872, "grad_norm": 0.5753973509550008, "learning_rate": 1.9173962859693e-06, "loss": 0.262, "step": 25088 }, { "epoch": 1.1752939523118002, "grad_norm": 0.6274275436294386, "learning_rate": 1.9172118591730967e-06, "loss": 0.2882, "step": 25089 }, { "epoch": 1.1753407973017285, "grad_norm": 0.6151250051807582, "learning_rate": 1.917027435730693e-06, "loss": 0.2753, "step": 25090 }, { "epoch": 1.175387642291657, "grad_norm": 0.6015106415106207, "learning_rate": 1.9168430156431495e-06, "loss": 0.2971, "step": 25091 }, { "epoch": 1.1754344872815852, "grad_norm": 0.638741548876127, "learning_rate": 1.916658598911528e-06, "loss": 0.2862, "step": 25092 }, { "epoch": 1.1754813322715136, "grad_norm": 0.572930497234295, "learning_rate": 1.9164741855368898e-06, "loss": 0.2782, "step": 25093 }, { "epoch": 1.1755281772614419, "grad_norm": 0.6049412426253221, "learning_rate": 1.916289775520297e-06, "loss": 0.2845, "step": 25094 }, { "epoch": 1.1755750222513703, "grad_norm": 0.5942787721555972, "learning_rate": 1.9161053688628083e-06, "loss": 0.2675, "step": 25095 }, { "epoch": 1.1756218672412986, "grad_norm": 0.577260812956703, "learning_rate": 1.9159209655654867e-06, "loss": 0.2753, "step": 25096 }, { "epoch": 1.1756687122312268, "grad_norm": 0.6182124803056929, "learning_rate": 1.9157365656293935e-06, "loss": 0.2817, "step": 25097 }, { "epoch": 1.1757155572211553, "grad_norm": 0.6547544262620154, "learning_rate": 1.915552169055589e-06, "loss": 0.2911, "step": 25098 }, { "epoch": 1.1757624022110835, "grad_norm": 0.5694043755246758, "learning_rate": 1.915367775845135e-06, "loss": 0.2736, "step": 25099 }, { "epoch": 1.1758092472010118, "grad_norm": 0.5837187454183318, "learning_rate": 1.9151833859990936e-06, "loss": 0.2706, "step": 25100 }, { "epoch": 1.1758560921909402, "grad_norm": 0.5922416988481786, "learning_rate": 1.9149989995185245e-06, "loss": 0.276, "step": 25101 }, { "epoch": 1.1759029371808685, "grad_norm": 0.6567255025097306, "learning_rate": 1.9148146164044882e-06, "loss": 0.2795, "step": 25102 }, { "epoch": 1.1759497821707967, "grad_norm": 0.5975503055149056, "learning_rate": 1.914630236658047e-06, "loss": 0.2662, "step": 25103 }, { "epoch": 1.1759966271607252, "grad_norm": 0.5677127982739831, "learning_rate": 1.914445860280262e-06, "loss": 0.2685, "step": 25104 }, { "epoch": 1.1760434721506534, "grad_norm": 0.5890373148433644, "learning_rate": 1.9142614872721934e-06, "loss": 0.2747, "step": 25105 }, { "epoch": 1.1760903171405819, "grad_norm": 0.5872406917626228, "learning_rate": 1.9140771176349036e-06, "loss": 0.2746, "step": 25106 }, { "epoch": 1.1761371621305101, "grad_norm": 0.6371317494098766, "learning_rate": 1.913892751369452e-06, "loss": 0.2745, "step": 25107 }, { "epoch": 1.1761840071204386, "grad_norm": 0.6579990335099453, "learning_rate": 1.913708388476901e-06, "loss": 0.2956, "step": 25108 }, { "epoch": 1.1762308521103668, "grad_norm": 0.6200696642378885, "learning_rate": 1.9135240289583097e-06, "loss": 0.2939, "step": 25109 }, { "epoch": 1.176277697100295, "grad_norm": 0.6220373845468333, "learning_rate": 1.913339672814741e-06, "loss": 0.2853, "step": 25110 }, { "epoch": 1.1763245420902235, "grad_norm": 0.5448383012893414, "learning_rate": 1.9131553200472546e-06, "loss": 0.2564, "step": 25111 }, { "epoch": 1.1763713870801518, "grad_norm": 0.6165693431262627, "learning_rate": 1.912970970656913e-06, "loss": 0.2748, "step": 25112 }, { "epoch": 1.17641823207008, "grad_norm": 0.5791500008395689, "learning_rate": 1.9127866246447745e-06, "loss": 0.2529, "step": 25113 }, { "epoch": 1.1764650770600085, "grad_norm": 0.6239298416418302, "learning_rate": 1.912602282011902e-06, "loss": 0.281, "step": 25114 }, { "epoch": 1.1765119220499367, "grad_norm": 0.6409491075628194, "learning_rate": 1.9124179427593555e-06, "loss": 0.2836, "step": 25115 }, { "epoch": 1.1765587670398652, "grad_norm": 0.6086307158967428, "learning_rate": 1.912233606888196e-06, "loss": 0.2852, "step": 25116 }, { "epoch": 1.1766056120297934, "grad_norm": 0.607615404836057, "learning_rate": 1.9120492743994843e-06, "loss": 0.28, "step": 25117 }, { "epoch": 1.1766524570197217, "grad_norm": 0.6076448727501539, "learning_rate": 1.911864945294282e-06, "loss": 0.2693, "step": 25118 }, { "epoch": 1.1766993020096501, "grad_norm": 0.5678846315020663, "learning_rate": 1.911680619573649e-06, "loss": 0.2761, "step": 25119 }, { "epoch": 1.1767461469995784, "grad_norm": 0.551541274889888, "learning_rate": 1.9114962972386454e-06, "loss": 0.2614, "step": 25120 }, { "epoch": 1.1767929919895068, "grad_norm": 0.6164240986687163, "learning_rate": 1.911311978290333e-06, "loss": 0.2572, "step": 25121 }, { "epoch": 1.176839836979435, "grad_norm": 0.5624415964915191, "learning_rate": 1.9111276627297726e-06, "loss": 0.2778, "step": 25122 }, { "epoch": 1.1768866819693633, "grad_norm": 0.5734854971868903, "learning_rate": 1.9109433505580237e-06, "loss": 0.2784, "step": 25123 }, { "epoch": 1.1769335269592918, "grad_norm": 0.5970790440480793, "learning_rate": 1.910759041776149e-06, "loss": 0.2784, "step": 25124 }, { "epoch": 1.17698037194922, "grad_norm": 0.5784001243028307, "learning_rate": 1.910574736385207e-06, "loss": 0.2927, "step": 25125 }, { "epoch": 1.1770272169391482, "grad_norm": 0.600265526857702, "learning_rate": 1.9103904343862595e-06, "loss": 0.2829, "step": 25126 }, { "epoch": 1.1770740619290767, "grad_norm": 0.5842275013521795, "learning_rate": 1.9102061357803662e-06, "loss": 0.2732, "step": 25127 }, { "epoch": 1.177120906919005, "grad_norm": 0.6151657345330416, "learning_rate": 1.9100218405685895e-06, "loss": 0.2847, "step": 25128 }, { "epoch": 1.1771677519089334, "grad_norm": 0.5839770392090489, "learning_rate": 1.909837548751988e-06, "loss": 0.2723, "step": 25129 }, { "epoch": 1.1772145968988617, "grad_norm": 0.5924257390909966, "learning_rate": 1.909653260331624e-06, "loss": 0.2838, "step": 25130 }, { "epoch": 1.1772614418887901, "grad_norm": 0.5467390658208816, "learning_rate": 1.909468975308556e-06, "loss": 0.2549, "step": 25131 }, { "epoch": 1.1773082868787184, "grad_norm": 0.6349367962961621, "learning_rate": 1.9092846936838465e-06, "loss": 0.2855, "step": 25132 }, { "epoch": 1.1773551318686466, "grad_norm": 0.5813205507701636, "learning_rate": 1.9091004154585544e-06, "loss": 0.2654, "step": 25133 }, { "epoch": 1.177401976858575, "grad_norm": 0.5342551005728752, "learning_rate": 1.9089161406337405e-06, "loss": 0.2506, "step": 25134 }, { "epoch": 1.1774488218485033, "grad_norm": 0.52271552880427, "learning_rate": 1.908731869210467e-06, "loss": 0.2702, "step": 25135 }, { "epoch": 1.1774956668384315, "grad_norm": 0.5915276429227334, "learning_rate": 1.9085476011897928e-06, "loss": 0.268, "step": 25136 }, { "epoch": 1.17754251182836, "grad_norm": 0.5822445239244806, "learning_rate": 1.908363336572778e-06, "loss": 0.2589, "step": 25137 }, { "epoch": 1.1775893568182882, "grad_norm": 0.5811473112388026, "learning_rate": 1.908179075360483e-06, "loss": 0.2626, "step": 25138 }, { "epoch": 1.1776362018082165, "grad_norm": 0.6069729926263944, "learning_rate": 1.9079948175539692e-06, "loss": 0.2685, "step": 25139 }, { "epoch": 1.177683046798145, "grad_norm": 0.6033345861378883, "learning_rate": 1.907810563154296e-06, "loss": 0.2772, "step": 25140 }, { "epoch": 1.1777298917880732, "grad_norm": 0.6039289937273611, "learning_rate": 1.9076263121625243e-06, "loss": 0.2696, "step": 25141 }, { "epoch": 1.1777767367780017, "grad_norm": 0.5779230945815377, "learning_rate": 1.9074420645797156e-06, "loss": 0.2809, "step": 25142 }, { "epoch": 1.17782358176793, "grad_norm": 0.5690255815582951, "learning_rate": 1.9072578204069278e-06, "loss": 0.2773, "step": 25143 }, { "epoch": 1.1778704267578584, "grad_norm": 0.6074941876570457, "learning_rate": 1.907073579645222e-06, "loss": 0.2807, "step": 25144 }, { "epoch": 1.1779172717477866, "grad_norm": 0.6179036881880603, "learning_rate": 1.9068893422956585e-06, "loss": 0.2858, "step": 25145 }, { "epoch": 1.1779641167377148, "grad_norm": 0.568127587324014, "learning_rate": 1.9067051083592987e-06, "loss": 0.2631, "step": 25146 }, { "epoch": 1.1780109617276433, "grad_norm": 0.5990535235660217, "learning_rate": 1.9065208778372011e-06, "loss": 0.2791, "step": 25147 }, { "epoch": 1.1780578067175715, "grad_norm": 0.5666699162308979, "learning_rate": 1.9063366507304277e-06, "loss": 0.271, "step": 25148 }, { "epoch": 1.1781046517074998, "grad_norm": 0.587968429312011, "learning_rate": 1.9061524270400367e-06, "loss": 0.2801, "step": 25149 }, { "epoch": 1.1781514966974282, "grad_norm": 0.5824169760342462, "learning_rate": 1.9059682067670899e-06, "loss": 0.2772, "step": 25150 }, { "epoch": 1.1781983416873565, "grad_norm": 0.598358317503002, "learning_rate": 1.9057839899126459e-06, "loss": 0.267, "step": 25151 }, { "epoch": 1.178245186677285, "grad_norm": 0.5368807238605205, "learning_rate": 1.9055997764777658e-06, "loss": 0.2421, "step": 25152 }, { "epoch": 1.1782920316672132, "grad_norm": 0.5712259384985228, "learning_rate": 1.9054155664635105e-06, "loss": 0.2773, "step": 25153 }, { "epoch": 1.1783388766571414, "grad_norm": 0.5683016348728426, "learning_rate": 1.9052313598709393e-06, "loss": 0.2685, "step": 25154 }, { "epoch": 1.17838572164707, "grad_norm": 0.5826959085383838, "learning_rate": 1.9050471567011111e-06, "loss": 0.2669, "step": 25155 }, { "epoch": 1.1784325666369981, "grad_norm": 0.5873581579338498, "learning_rate": 1.9048629569550869e-06, "loss": 0.2647, "step": 25156 }, { "epoch": 1.1784794116269266, "grad_norm": 0.6583526604301562, "learning_rate": 1.9046787606339277e-06, "loss": 0.2875, "step": 25157 }, { "epoch": 1.1785262566168548, "grad_norm": 0.6084738599916779, "learning_rate": 1.9044945677386917e-06, "loss": 0.2756, "step": 25158 }, { "epoch": 1.178573101606783, "grad_norm": 0.5989049828197486, "learning_rate": 1.90431037827044e-06, "loss": 0.2688, "step": 25159 }, { "epoch": 1.1786199465967115, "grad_norm": 0.5646903129385392, "learning_rate": 1.904126192230233e-06, "loss": 0.2583, "step": 25160 }, { "epoch": 1.1786667915866398, "grad_norm": 0.5940612908879761, "learning_rate": 1.9039420096191302e-06, "loss": 0.2647, "step": 25161 }, { "epoch": 1.178713636576568, "grad_norm": 0.6116037620331564, "learning_rate": 1.9037578304381905e-06, "loss": 0.2744, "step": 25162 }, { "epoch": 1.1787604815664965, "grad_norm": 0.5750521598170638, "learning_rate": 1.9035736546884743e-06, "loss": 0.2728, "step": 25163 }, { "epoch": 1.1788073265564247, "grad_norm": 0.5724064466354366, "learning_rate": 1.9033894823710424e-06, "loss": 0.2741, "step": 25164 }, { "epoch": 1.1788541715463532, "grad_norm": 0.5756722875161261, "learning_rate": 1.9032053134869539e-06, "loss": 0.2626, "step": 25165 }, { "epoch": 1.1789010165362814, "grad_norm": 0.573523479211204, "learning_rate": 1.9030211480372687e-06, "loss": 0.2885, "step": 25166 }, { "epoch": 1.17894786152621, "grad_norm": 0.5703654150055136, "learning_rate": 1.9028369860230477e-06, "loss": 0.2627, "step": 25167 }, { "epoch": 1.1789947065161381, "grad_norm": 0.598451855599688, "learning_rate": 1.9026528274453493e-06, "loss": 0.2611, "step": 25168 }, { "epoch": 1.1790415515060664, "grad_norm": 0.6426472660209593, "learning_rate": 1.9024686723052333e-06, "loss": 0.27, "step": 25169 }, { "epoch": 1.1790883964959948, "grad_norm": 0.5984509057394339, "learning_rate": 1.9022845206037595e-06, "loss": 0.2833, "step": 25170 }, { "epoch": 1.179135241485923, "grad_norm": 0.6218902855553153, "learning_rate": 1.902100372341989e-06, "loss": 0.2698, "step": 25171 }, { "epoch": 1.1791820864758513, "grad_norm": 0.5807772557451746, "learning_rate": 1.9019162275209802e-06, "loss": 0.2781, "step": 25172 }, { "epoch": 1.1792289314657798, "grad_norm": 0.6133518124237395, "learning_rate": 1.9017320861417938e-06, "loss": 0.2741, "step": 25173 }, { "epoch": 1.179275776455708, "grad_norm": 0.589704671345441, "learning_rate": 1.901547948205488e-06, "loss": 0.2646, "step": 25174 }, { "epoch": 1.1793226214456363, "grad_norm": 0.5597465901751167, "learning_rate": 1.9013638137131239e-06, "loss": 0.2546, "step": 25175 }, { "epoch": 1.1793694664355647, "grad_norm": 0.5471933256773344, "learning_rate": 1.90117968266576e-06, "loss": 0.2755, "step": 25176 }, { "epoch": 1.179416311425493, "grad_norm": 0.6245869226040789, "learning_rate": 1.900995555064457e-06, "loss": 0.2858, "step": 25177 }, { "epoch": 1.1794631564154214, "grad_norm": 0.6311772480175891, "learning_rate": 1.9008114309102735e-06, "loss": 0.2844, "step": 25178 }, { "epoch": 1.1795100014053497, "grad_norm": 0.6217876751952423, "learning_rate": 1.9006273102042707e-06, "loss": 0.2926, "step": 25179 }, { "epoch": 1.1795568463952781, "grad_norm": 0.5838799368655828, "learning_rate": 1.9004431929475061e-06, "loss": 0.2638, "step": 25180 }, { "epoch": 1.1796036913852064, "grad_norm": 0.5527816881729031, "learning_rate": 1.9002590791410409e-06, "loss": 0.2701, "step": 25181 }, { "epoch": 1.1796505363751346, "grad_norm": 0.5691126597609232, "learning_rate": 1.9000749687859331e-06, "loss": 0.2758, "step": 25182 }, { "epoch": 1.179697381365063, "grad_norm": 0.5828679717772224, "learning_rate": 1.8998908618832434e-06, "loss": 0.2792, "step": 25183 }, { "epoch": 1.1797442263549913, "grad_norm": 0.5858640030350173, "learning_rate": 1.8997067584340313e-06, "loss": 0.2861, "step": 25184 }, { "epoch": 1.1797910713449196, "grad_norm": 0.610753570883968, "learning_rate": 1.8995226584393564e-06, "loss": 0.289, "step": 25185 }, { "epoch": 1.179837916334848, "grad_norm": 0.5707778445855414, "learning_rate": 1.8993385619002766e-06, "loss": 0.2781, "step": 25186 }, { "epoch": 1.1798847613247763, "grad_norm": 0.6192663430518038, "learning_rate": 1.8991544688178526e-06, "loss": 0.2791, "step": 25187 }, { "epoch": 1.1799316063147047, "grad_norm": 0.5733740563262437, "learning_rate": 1.898970379193144e-06, "loss": 0.268, "step": 25188 }, { "epoch": 1.179978451304633, "grad_norm": 0.5868532073389369, "learning_rate": 1.8987862930272093e-06, "loss": 0.2704, "step": 25189 }, { "epoch": 1.1800252962945612, "grad_norm": 0.6323372901865103, "learning_rate": 1.8986022103211082e-06, "loss": 0.2839, "step": 25190 }, { "epoch": 1.1800721412844897, "grad_norm": 0.5981087359683124, "learning_rate": 1.8984181310759017e-06, "loss": 0.27, "step": 25191 }, { "epoch": 1.180118986274418, "grad_norm": 0.5963946842262425, "learning_rate": 1.8982340552926468e-06, "loss": 0.28, "step": 25192 }, { "epoch": 1.1801658312643464, "grad_norm": 0.6089312695631849, "learning_rate": 1.8980499829724033e-06, "loss": 0.2665, "step": 25193 }, { "epoch": 1.1802126762542746, "grad_norm": 0.5957130979414035, "learning_rate": 1.8978659141162308e-06, "loss": 0.2896, "step": 25194 }, { "epoch": 1.1802595212442029, "grad_norm": 0.6161194147082794, "learning_rate": 1.897681848725189e-06, "loss": 0.28, "step": 25195 }, { "epoch": 1.1803063662341313, "grad_norm": 0.5814244538932066, "learning_rate": 1.8974977868003363e-06, "loss": 0.2768, "step": 25196 }, { "epoch": 1.1803532112240596, "grad_norm": 0.6164665391261311, "learning_rate": 1.8973137283427337e-06, "loss": 0.2708, "step": 25197 }, { "epoch": 1.1804000562139878, "grad_norm": 0.5348366538780231, "learning_rate": 1.8971296733534378e-06, "loss": 0.2625, "step": 25198 }, { "epoch": 1.1804469012039163, "grad_norm": 0.6420307657617721, "learning_rate": 1.8969456218335096e-06, "loss": 0.2722, "step": 25199 }, { "epoch": 1.1804937461938445, "grad_norm": 0.5766041868647341, "learning_rate": 1.8967615737840076e-06, "loss": 0.2722, "step": 25200 }, { "epoch": 1.180540591183773, "grad_norm": 0.5894123371853089, "learning_rate": 1.8965775292059909e-06, "loss": 0.2855, "step": 25201 }, { "epoch": 1.1805874361737012, "grad_norm": 0.6321602427293763, "learning_rate": 1.8963934881005194e-06, "loss": 0.2835, "step": 25202 }, { "epoch": 1.1806342811636297, "grad_norm": 0.5929642668275064, "learning_rate": 1.8962094504686525e-06, "loss": 0.2635, "step": 25203 }, { "epoch": 1.180681126153558, "grad_norm": 0.5824142904728736, "learning_rate": 1.8960254163114466e-06, "loss": 0.2822, "step": 25204 }, { "epoch": 1.1807279711434862, "grad_norm": 0.6073139576098198, "learning_rate": 1.8958413856299632e-06, "loss": 0.2838, "step": 25205 }, { "epoch": 1.1807748161334146, "grad_norm": 0.5910908504280308, "learning_rate": 1.8956573584252614e-06, "loss": 0.2783, "step": 25206 }, { "epoch": 1.1808216611233429, "grad_norm": 0.5941232692567193, "learning_rate": 1.895473334698399e-06, "loss": 0.2856, "step": 25207 }, { "epoch": 1.180868506113271, "grad_norm": 0.5466218563064115, "learning_rate": 1.8952893144504353e-06, "loss": 0.2665, "step": 25208 }, { "epoch": 1.1809153511031996, "grad_norm": 0.5779808043129241, "learning_rate": 1.8951052976824309e-06, "loss": 0.2841, "step": 25209 }, { "epoch": 1.1809621960931278, "grad_norm": 0.6359145075582259, "learning_rate": 1.8949212843954428e-06, "loss": 0.2703, "step": 25210 }, { "epoch": 1.181009041083056, "grad_norm": 0.5840987560137177, "learning_rate": 1.8947372745905301e-06, "loss": 0.2668, "step": 25211 }, { "epoch": 1.1810558860729845, "grad_norm": 0.6066879147940482, "learning_rate": 1.8945532682687527e-06, "loss": 0.2792, "step": 25212 }, { "epoch": 1.1811027310629127, "grad_norm": 0.622081271809417, "learning_rate": 1.894369265431169e-06, "loss": 0.2964, "step": 25213 }, { "epoch": 1.1811495760528412, "grad_norm": 0.5401736916683205, "learning_rate": 1.894185266078838e-06, "loss": 0.257, "step": 25214 }, { "epoch": 1.1811964210427695, "grad_norm": 0.667261587639434, "learning_rate": 1.894001270212819e-06, "loss": 0.2985, "step": 25215 }, { "epoch": 1.181243266032698, "grad_norm": 0.5592099783471108, "learning_rate": 1.8938172778341696e-06, "loss": 0.2577, "step": 25216 }, { "epoch": 1.1812901110226262, "grad_norm": 0.5962476839308644, "learning_rate": 1.8936332889439503e-06, "loss": 0.2881, "step": 25217 }, { "epoch": 1.1813369560125544, "grad_norm": 0.5623273697351786, "learning_rate": 1.8934493035432179e-06, "loss": 0.269, "step": 25218 }, { "epoch": 1.1813838010024829, "grad_norm": 0.6056043318444441, "learning_rate": 1.8932653216330329e-06, "loss": 0.2847, "step": 25219 }, { "epoch": 1.181430645992411, "grad_norm": 0.6015480789331754, "learning_rate": 1.8930813432144532e-06, "loss": 0.2825, "step": 25220 }, { "epoch": 1.1814774909823393, "grad_norm": 0.5586403707872764, "learning_rate": 1.8928973682885387e-06, "loss": 0.2452, "step": 25221 }, { "epoch": 1.1815243359722678, "grad_norm": 0.575903832418167, "learning_rate": 1.8927133968563463e-06, "loss": 0.2716, "step": 25222 }, { "epoch": 1.181571180962196, "grad_norm": 0.5822952647574968, "learning_rate": 1.8925294289189358e-06, "loss": 0.2927, "step": 25223 }, { "epoch": 1.1816180259521245, "grad_norm": 0.562219505753854, "learning_rate": 1.8923454644773662e-06, "loss": 0.2612, "step": 25224 }, { "epoch": 1.1816648709420527, "grad_norm": 0.5583568819785613, "learning_rate": 1.892161503532695e-06, "loss": 0.2564, "step": 25225 }, { "epoch": 1.181711715931981, "grad_norm": 0.6066059827802821, "learning_rate": 1.8919775460859824e-06, "loss": 0.2836, "step": 25226 }, { "epoch": 1.1817585609219095, "grad_norm": 0.5756230211568606, "learning_rate": 1.891793592138286e-06, "loss": 0.2669, "step": 25227 }, { "epoch": 1.1818054059118377, "grad_norm": 0.5921830838824289, "learning_rate": 1.8916096416906645e-06, "loss": 0.27, "step": 25228 }, { "epoch": 1.1818522509017662, "grad_norm": 0.6154012776640382, "learning_rate": 1.891425694744176e-06, "loss": 0.2762, "step": 25229 }, { "epoch": 1.1818990958916944, "grad_norm": 0.5961595635291037, "learning_rate": 1.8912417512998807e-06, "loss": 0.2778, "step": 25230 }, { "epoch": 1.1819459408816226, "grad_norm": 0.6217409264655884, "learning_rate": 1.891057811358835e-06, "loss": 0.2774, "step": 25231 }, { "epoch": 1.181992785871551, "grad_norm": 0.5605699018657474, "learning_rate": 1.8908738749220992e-06, "loss": 0.2618, "step": 25232 }, { "epoch": 1.1820396308614793, "grad_norm": 0.5883996491960322, "learning_rate": 1.8906899419907312e-06, "loss": 0.2683, "step": 25233 }, { "epoch": 1.1820864758514076, "grad_norm": 0.5673061932387995, "learning_rate": 1.89050601256579e-06, "loss": 0.2712, "step": 25234 }, { "epoch": 1.182133320841336, "grad_norm": 0.5559752206920868, "learning_rate": 1.8903220866483326e-06, "loss": 0.2534, "step": 25235 }, { "epoch": 1.1821801658312643, "grad_norm": 0.6136413333645901, "learning_rate": 1.8901381642394184e-06, "loss": 0.2676, "step": 25236 }, { "epoch": 1.1822270108211927, "grad_norm": 0.6807750247696343, "learning_rate": 1.8899542453401062e-06, "loss": 0.2862, "step": 25237 }, { "epoch": 1.182273855811121, "grad_norm": 0.5932156413569071, "learning_rate": 1.8897703299514536e-06, "loss": 0.2602, "step": 25238 }, { "epoch": 1.1823207008010495, "grad_norm": 0.5540334742583316, "learning_rate": 1.8895864180745192e-06, "loss": 0.2546, "step": 25239 }, { "epoch": 1.1823675457909777, "grad_norm": 0.6227251271271056, "learning_rate": 1.8894025097103624e-06, "loss": 0.2668, "step": 25240 }, { "epoch": 1.182414390780906, "grad_norm": 0.5928375615116677, "learning_rate": 1.8892186048600406e-06, "loss": 0.2678, "step": 25241 }, { "epoch": 1.1824612357708344, "grad_norm": 0.5852261993796155, "learning_rate": 1.8890347035246116e-06, "loss": 0.2633, "step": 25242 }, { "epoch": 1.1825080807607626, "grad_norm": 0.5765841473311336, "learning_rate": 1.8888508057051342e-06, "loss": 0.2821, "step": 25243 }, { "epoch": 1.1825549257506909, "grad_norm": 0.5861411722943045, "learning_rate": 1.8886669114026673e-06, "loss": 0.268, "step": 25244 }, { "epoch": 1.1826017707406193, "grad_norm": 0.6123915499671349, "learning_rate": 1.888483020618268e-06, "loss": 0.2728, "step": 25245 }, { "epoch": 1.1826486157305476, "grad_norm": 0.6147383588063097, "learning_rate": 1.8882991333529964e-06, "loss": 0.2719, "step": 25246 }, { "epoch": 1.1826954607204758, "grad_norm": 0.5891246644286687, "learning_rate": 1.8881152496079087e-06, "loss": 0.2713, "step": 25247 }, { "epoch": 1.1827423057104043, "grad_norm": 0.5935472416077097, "learning_rate": 1.8879313693840646e-06, "loss": 0.2763, "step": 25248 }, { "epoch": 1.1827891507003325, "grad_norm": 0.5993005420735382, "learning_rate": 1.8877474926825206e-06, "loss": 0.2888, "step": 25249 }, { "epoch": 1.182835995690261, "grad_norm": 0.636139313435702, "learning_rate": 1.8875636195043361e-06, "loss": 0.2887, "step": 25250 }, { "epoch": 1.1828828406801892, "grad_norm": 0.5574279328313637, "learning_rate": 1.8873797498505698e-06, "loss": 0.2591, "step": 25251 }, { "epoch": 1.1829296856701177, "grad_norm": 0.5758283613818049, "learning_rate": 1.8871958837222793e-06, "loss": 0.2775, "step": 25252 }, { "epoch": 1.182976530660046, "grad_norm": 0.6149223901331093, "learning_rate": 1.8870120211205215e-06, "loss": 0.2687, "step": 25253 }, { "epoch": 1.1830233756499742, "grad_norm": 0.5718221662254565, "learning_rate": 1.8868281620463551e-06, "loss": 0.2638, "step": 25254 }, { "epoch": 1.1830702206399026, "grad_norm": 0.5859935779787817, "learning_rate": 1.8866443065008396e-06, "loss": 0.2741, "step": 25255 }, { "epoch": 1.1831170656298309, "grad_norm": 0.6257026282999808, "learning_rate": 1.8864604544850312e-06, "loss": 0.2695, "step": 25256 }, { "epoch": 1.1831639106197591, "grad_norm": 0.5904851339002404, "learning_rate": 1.8862766059999887e-06, "loss": 0.2775, "step": 25257 }, { "epoch": 1.1832107556096876, "grad_norm": 0.6024305309316478, "learning_rate": 1.8860927610467712e-06, "loss": 0.2849, "step": 25258 }, { "epoch": 1.1832576005996158, "grad_norm": 0.5961796865296791, "learning_rate": 1.885908919626435e-06, "loss": 0.2762, "step": 25259 }, { "epoch": 1.1833044455895443, "grad_norm": 0.6596385874344055, "learning_rate": 1.885725081740038e-06, "loss": 0.3025, "step": 25260 }, { "epoch": 1.1833512905794725, "grad_norm": 0.5920739206287742, "learning_rate": 1.8855412473886386e-06, "loss": 0.2831, "step": 25261 }, { "epoch": 1.1833981355694008, "grad_norm": 0.6076725045757578, "learning_rate": 1.8853574165732957e-06, "loss": 0.2691, "step": 25262 }, { "epoch": 1.1834449805593292, "grad_norm": 0.5951695970818559, "learning_rate": 1.885173589295066e-06, "loss": 0.2668, "step": 25263 }, { "epoch": 1.1834918255492575, "grad_norm": 0.5786410907338352, "learning_rate": 1.8849897655550086e-06, "loss": 0.2812, "step": 25264 }, { "epoch": 1.183538670539186, "grad_norm": 0.6446493779501834, "learning_rate": 1.8848059453541794e-06, "loss": 0.2817, "step": 25265 }, { "epoch": 1.1835855155291142, "grad_norm": 0.5668567044975269, "learning_rate": 1.8846221286936378e-06, "loss": 0.287, "step": 25266 }, { "epoch": 1.1836323605190424, "grad_norm": 0.5664671234225178, "learning_rate": 1.8844383155744409e-06, "loss": 0.2702, "step": 25267 }, { "epoch": 1.1836792055089709, "grad_norm": 0.5755575695979969, "learning_rate": 1.8842545059976472e-06, "loss": 0.2692, "step": 25268 }, { "epoch": 1.1837260504988991, "grad_norm": 0.6265231973215291, "learning_rate": 1.8840706999643136e-06, "loss": 0.2823, "step": 25269 }, { "epoch": 1.1837728954888274, "grad_norm": 0.6152379787092951, "learning_rate": 1.8838868974754992e-06, "loss": 0.2828, "step": 25270 }, { "epoch": 1.1838197404787558, "grad_norm": 0.6136621493935217, "learning_rate": 1.88370309853226e-06, "loss": 0.2904, "step": 25271 }, { "epoch": 1.183866585468684, "grad_norm": 0.5728437659594816, "learning_rate": 1.883519303135655e-06, "loss": 0.2824, "step": 25272 }, { "epoch": 1.1839134304586125, "grad_norm": 0.5603159207290501, "learning_rate": 1.883335511286741e-06, "loss": 0.2573, "step": 25273 }, { "epoch": 1.1839602754485408, "grad_norm": 0.5952211447638147, "learning_rate": 1.883151722986576e-06, "loss": 0.2771, "step": 25274 }, { "epoch": 1.1840071204384692, "grad_norm": 0.5602473032386678, "learning_rate": 1.8829679382362184e-06, "loss": 0.2648, "step": 25275 }, { "epoch": 1.1840539654283975, "grad_norm": 0.5874678429833421, "learning_rate": 1.8827841570367259e-06, "loss": 0.2794, "step": 25276 }, { "epoch": 1.1841008104183257, "grad_norm": 0.5895278953995285, "learning_rate": 1.882600379389154e-06, "loss": 0.2696, "step": 25277 }, { "epoch": 1.1841476554082542, "grad_norm": 0.5763153692224317, "learning_rate": 1.882416605294562e-06, "loss": 0.2828, "step": 25278 }, { "epoch": 1.1841945003981824, "grad_norm": 0.6155855181043154, "learning_rate": 1.8822328347540078e-06, "loss": 0.2874, "step": 25279 }, { "epoch": 1.1842413453881107, "grad_norm": 0.6082595033035616, "learning_rate": 1.8820490677685477e-06, "loss": 0.285, "step": 25280 }, { "epoch": 1.1842881903780391, "grad_norm": 0.5599738933139972, "learning_rate": 1.8818653043392402e-06, "loss": 0.2686, "step": 25281 }, { "epoch": 1.1843350353679674, "grad_norm": 0.6155616586674728, "learning_rate": 1.8816815444671432e-06, "loss": 0.2822, "step": 25282 }, { "epoch": 1.1843818803578956, "grad_norm": 0.5827590730933523, "learning_rate": 1.881497788153313e-06, "loss": 0.279, "step": 25283 }, { "epoch": 1.184428725347824, "grad_norm": 0.5711697036692684, "learning_rate": 1.8813140353988073e-06, "loss": 0.276, "step": 25284 }, { "epoch": 1.1844755703377523, "grad_norm": 0.5804684322519948, "learning_rate": 1.8811302862046836e-06, "loss": 0.2688, "step": 25285 }, { "epoch": 1.1845224153276808, "grad_norm": 0.5926085584163454, "learning_rate": 1.8809465405720002e-06, "loss": 0.2726, "step": 25286 }, { "epoch": 1.184569260317609, "grad_norm": 0.5692784936641959, "learning_rate": 1.8807627985018134e-06, "loss": 0.2532, "step": 25287 }, { "epoch": 1.1846161053075375, "grad_norm": 0.5824540739712136, "learning_rate": 1.8805790599951818e-06, "loss": 0.2543, "step": 25288 }, { "epoch": 1.1846629502974657, "grad_norm": 0.5876154731893664, "learning_rate": 1.8803953250531612e-06, "loss": 0.2622, "step": 25289 }, { "epoch": 1.184709795287394, "grad_norm": 0.5622622604288057, "learning_rate": 1.8802115936768103e-06, "loss": 0.2719, "step": 25290 }, { "epoch": 1.1847566402773224, "grad_norm": 0.5638828245564674, "learning_rate": 1.8800278658671855e-06, "loss": 0.2684, "step": 25291 }, { "epoch": 1.1848034852672507, "grad_norm": 0.6293071036169154, "learning_rate": 1.8798441416253444e-06, "loss": 0.2846, "step": 25292 }, { "epoch": 1.184850330257179, "grad_norm": 0.6542616908571574, "learning_rate": 1.8796604209523447e-06, "loss": 0.2961, "step": 25293 }, { "epoch": 1.1848971752471074, "grad_norm": 0.610084590517013, "learning_rate": 1.879476703849244e-06, "loss": 0.2742, "step": 25294 }, { "epoch": 1.1849440202370356, "grad_norm": 0.5859546977483325, "learning_rate": 1.8792929903170981e-06, "loss": 0.2703, "step": 25295 }, { "epoch": 1.184990865226964, "grad_norm": 0.5690522903276932, "learning_rate": 1.879109280356965e-06, "loss": 0.2673, "step": 25296 }, { "epoch": 1.1850377102168923, "grad_norm": 0.5772849385222623, "learning_rate": 1.8789255739699019e-06, "loss": 0.2833, "step": 25297 }, { "epoch": 1.1850845552068205, "grad_norm": 0.59149489941837, "learning_rate": 1.878741871156966e-06, "loss": 0.2787, "step": 25298 }, { "epoch": 1.185131400196749, "grad_norm": 0.6284942952076461, "learning_rate": 1.8785581719192144e-06, "loss": 0.2926, "step": 25299 }, { "epoch": 1.1851782451866772, "grad_norm": 0.5546840488691154, "learning_rate": 1.878374476257705e-06, "loss": 0.2729, "step": 25300 }, { "epoch": 1.1852250901766057, "grad_norm": 0.6267848507439526, "learning_rate": 1.8781907841734945e-06, "loss": 0.285, "step": 25301 }, { "epoch": 1.185271935166534, "grad_norm": 0.5995751049901157, "learning_rate": 1.8780070956676389e-06, "loss": 0.2763, "step": 25302 }, { "epoch": 1.1853187801564622, "grad_norm": 0.6073407966668856, "learning_rate": 1.877823410741196e-06, "loss": 0.2732, "step": 25303 }, { "epoch": 1.1853656251463907, "grad_norm": 0.6118130671424546, "learning_rate": 1.8776397293952237e-06, "loss": 0.2834, "step": 25304 }, { "epoch": 1.185412470136319, "grad_norm": 0.5619190960924566, "learning_rate": 1.8774560516307778e-06, "loss": 0.2733, "step": 25305 }, { "epoch": 1.1854593151262471, "grad_norm": 0.5399023506553045, "learning_rate": 1.8772723774489155e-06, "loss": 0.2555, "step": 25306 }, { "epoch": 1.1855061601161756, "grad_norm": 0.658077851187525, "learning_rate": 1.8770887068506954e-06, "loss": 0.2804, "step": 25307 }, { "epoch": 1.1855530051061038, "grad_norm": 0.6252801842399404, "learning_rate": 1.876905039837173e-06, "loss": 0.2881, "step": 25308 }, { "epoch": 1.1855998500960323, "grad_norm": 0.5613301937473121, "learning_rate": 1.8767213764094045e-06, "loss": 0.2611, "step": 25309 }, { "epoch": 1.1856466950859605, "grad_norm": 0.5246406340961982, "learning_rate": 1.8765377165684482e-06, "loss": 0.2534, "step": 25310 }, { "epoch": 1.185693540075889, "grad_norm": 0.6185857484991278, "learning_rate": 1.8763540603153607e-06, "loss": 0.2846, "step": 25311 }, { "epoch": 1.1857403850658172, "grad_norm": 0.5746386391729037, "learning_rate": 1.8761704076511988e-06, "loss": 0.2579, "step": 25312 }, { "epoch": 1.1857872300557455, "grad_norm": 0.5747523233449786, "learning_rate": 1.8759867585770204e-06, "loss": 0.269, "step": 25313 }, { "epoch": 1.185834075045674, "grad_norm": 0.5897199749674601, "learning_rate": 1.8758031130938801e-06, "loss": 0.2825, "step": 25314 }, { "epoch": 1.1858809200356022, "grad_norm": 0.6225434929292994, "learning_rate": 1.8756194712028363e-06, "loss": 0.2826, "step": 25315 }, { "epoch": 1.1859277650255304, "grad_norm": 0.6249539608201157, "learning_rate": 1.8754358329049455e-06, "loss": 0.2837, "step": 25316 }, { "epoch": 1.185974610015459, "grad_norm": 0.6422480569100879, "learning_rate": 1.8752521982012647e-06, "loss": 0.2723, "step": 25317 }, { "epoch": 1.1860214550053871, "grad_norm": 0.6227061738276312, "learning_rate": 1.8750685670928501e-06, "loss": 0.2938, "step": 25318 }, { "epoch": 1.1860682999953154, "grad_norm": 0.6165000214741811, "learning_rate": 1.87488493958076e-06, "loss": 0.2868, "step": 25319 }, { "epoch": 1.1861151449852438, "grad_norm": 0.6188363633014534, "learning_rate": 1.8747013156660487e-06, "loss": 0.2876, "step": 25320 }, { "epoch": 1.186161989975172, "grad_norm": 0.5644114438720395, "learning_rate": 1.8745176953497746e-06, "loss": 0.2524, "step": 25321 }, { "epoch": 1.1862088349651005, "grad_norm": 0.6395944218177202, "learning_rate": 1.8743340786329936e-06, "loss": 0.2812, "step": 25322 }, { "epoch": 1.1862556799550288, "grad_norm": 0.6310177764943931, "learning_rate": 1.8741504655167625e-06, "loss": 0.2754, "step": 25323 }, { "epoch": 1.1863025249449572, "grad_norm": 0.5727387529232151, "learning_rate": 1.873966856002139e-06, "loss": 0.2704, "step": 25324 }, { "epoch": 1.1863493699348855, "grad_norm": 0.5441781937184667, "learning_rate": 1.8737832500901793e-06, "loss": 0.26, "step": 25325 }, { "epoch": 1.1863962149248137, "grad_norm": 0.6061124776950314, "learning_rate": 1.8735996477819385e-06, "loss": 0.2809, "step": 25326 }, { "epoch": 1.1864430599147422, "grad_norm": 0.6010980195920002, "learning_rate": 1.8734160490784744e-06, "loss": 0.2707, "step": 25327 }, { "epoch": 1.1864899049046704, "grad_norm": 0.5927869898301937, "learning_rate": 1.8732324539808438e-06, "loss": 0.2794, "step": 25328 }, { "epoch": 1.1865367498945987, "grad_norm": 0.6062287064860806, "learning_rate": 1.8730488624901027e-06, "loss": 0.2791, "step": 25329 }, { "epoch": 1.1865835948845271, "grad_norm": 0.569367966198624, "learning_rate": 1.8728652746073075e-06, "loss": 0.272, "step": 25330 }, { "epoch": 1.1866304398744554, "grad_norm": 0.5656897750949884, "learning_rate": 1.8726816903335162e-06, "loss": 0.2714, "step": 25331 }, { "epoch": 1.1866772848643838, "grad_norm": 0.6388815398075356, "learning_rate": 1.8724981096697836e-06, "loss": 0.2816, "step": 25332 }, { "epoch": 1.186724129854312, "grad_norm": 0.5801738249073599, "learning_rate": 1.872314532617166e-06, "loss": 0.2738, "step": 25333 }, { "epoch": 1.1867709748442403, "grad_norm": 0.534509173314475, "learning_rate": 1.8721309591767205e-06, "loss": 0.2534, "step": 25334 }, { "epoch": 1.1868178198341688, "grad_norm": 0.6007915299678532, "learning_rate": 1.8719473893495043e-06, "loss": 0.2765, "step": 25335 }, { "epoch": 1.186864664824097, "grad_norm": 0.638693409170665, "learning_rate": 1.8717638231365726e-06, "loss": 0.2767, "step": 25336 }, { "epoch": 1.1869115098140255, "grad_norm": 0.57797580735569, "learning_rate": 1.8715802605389827e-06, "loss": 0.2512, "step": 25337 }, { "epoch": 1.1869583548039537, "grad_norm": 0.5844506544266105, "learning_rate": 1.8713967015577896e-06, "loss": 0.2826, "step": 25338 }, { "epoch": 1.187005199793882, "grad_norm": 0.6145882759398403, "learning_rate": 1.8712131461940515e-06, "loss": 0.2901, "step": 25339 }, { "epoch": 1.1870520447838104, "grad_norm": 0.5867049646435929, "learning_rate": 1.8710295944488228e-06, "loss": 0.2853, "step": 25340 }, { "epoch": 1.1870988897737387, "grad_norm": 0.6201996921508973, "learning_rate": 1.8708460463231603e-06, "loss": 0.273, "step": 25341 }, { "epoch": 1.187145734763667, "grad_norm": 0.5691175894119112, "learning_rate": 1.8706625018181218e-06, "loss": 0.2632, "step": 25342 }, { "epoch": 1.1871925797535954, "grad_norm": 0.5932065505284906, "learning_rate": 1.8704789609347625e-06, "loss": 0.2719, "step": 25343 }, { "epoch": 1.1872394247435236, "grad_norm": 0.5845074623589183, "learning_rate": 1.8702954236741378e-06, "loss": 0.2613, "step": 25344 }, { "epoch": 1.187286269733452, "grad_norm": 0.6176261048610733, "learning_rate": 1.8701118900373044e-06, "loss": 0.2818, "step": 25345 }, { "epoch": 1.1873331147233803, "grad_norm": 0.5934961490384753, "learning_rate": 1.8699283600253198e-06, "loss": 0.2789, "step": 25346 }, { "epoch": 1.1873799597133088, "grad_norm": 0.5646260841761703, "learning_rate": 1.8697448336392382e-06, "loss": 0.2595, "step": 25347 }, { "epoch": 1.187426804703237, "grad_norm": 0.5857044588898389, "learning_rate": 1.8695613108801167e-06, "loss": 0.273, "step": 25348 }, { "epoch": 1.1874736496931653, "grad_norm": 0.623549258306682, "learning_rate": 1.8693777917490125e-06, "loss": 0.2858, "step": 25349 }, { "epoch": 1.1875204946830937, "grad_norm": 0.599774668857607, "learning_rate": 1.86919427624698e-06, "loss": 0.2704, "step": 25350 }, { "epoch": 1.187567339673022, "grad_norm": 0.5719249621835665, "learning_rate": 1.8690107643750754e-06, "loss": 0.2742, "step": 25351 }, { "epoch": 1.1876141846629502, "grad_norm": 0.5661806738934911, "learning_rate": 1.8688272561343553e-06, "loss": 0.2676, "step": 25352 }, { "epoch": 1.1876610296528787, "grad_norm": 0.5710235930996164, "learning_rate": 1.8686437515258766e-06, "loss": 0.269, "step": 25353 }, { "epoch": 1.187707874642807, "grad_norm": 0.5923877834865225, "learning_rate": 1.8684602505506932e-06, "loss": 0.2808, "step": 25354 }, { "epoch": 1.1877547196327352, "grad_norm": 0.6228821409474026, "learning_rate": 1.8682767532098639e-06, "loss": 0.2639, "step": 25355 }, { "epoch": 1.1878015646226636, "grad_norm": 0.5767156118035569, "learning_rate": 1.8680932595044417e-06, "loss": 0.2755, "step": 25356 }, { "epoch": 1.1878484096125919, "grad_norm": 0.5799456513517878, "learning_rate": 1.8679097694354847e-06, "loss": 0.2746, "step": 25357 }, { "epoch": 1.1878952546025203, "grad_norm": 0.6277073055786933, "learning_rate": 1.8677262830040476e-06, "loss": 0.2811, "step": 25358 }, { "epoch": 1.1879420995924486, "grad_norm": 0.5816334499233039, "learning_rate": 1.8675428002111873e-06, "loss": 0.274, "step": 25359 }, { "epoch": 1.187988944582377, "grad_norm": 0.5844403939802049, "learning_rate": 1.867359321057959e-06, "loss": 0.251, "step": 25360 }, { "epoch": 1.1880357895723053, "grad_norm": 0.6190384939446323, "learning_rate": 1.8671758455454192e-06, "loss": 0.2827, "step": 25361 }, { "epoch": 1.1880826345622335, "grad_norm": 0.6504712051743679, "learning_rate": 1.8669923736746226e-06, "loss": 0.2771, "step": 25362 }, { "epoch": 1.188129479552162, "grad_norm": 0.5713498796379676, "learning_rate": 1.866808905446626e-06, "loss": 0.2778, "step": 25363 }, { "epoch": 1.1881763245420902, "grad_norm": 0.60438629097798, "learning_rate": 1.8666254408624852e-06, "loss": 0.2755, "step": 25364 }, { "epoch": 1.1882231695320185, "grad_norm": 0.58798652278013, "learning_rate": 1.8664419799232553e-06, "loss": 0.2734, "step": 25365 }, { "epoch": 1.188270014521947, "grad_norm": 0.5823560854066028, "learning_rate": 1.8662585226299934e-06, "loss": 0.2799, "step": 25366 }, { "epoch": 1.1883168595118752, "grad_norm": 0.6616116770446323, "learning_rate": 1.8660750689837536e-06, "loss": 0.2717, "step": 25367 }, { "epoch": 1.1883637045018036, "grad_norm": 0.5752224326414341, "learning_rate": 1.8658916189855936e-06, "loss": 0.2805, "step": 25368 }, { "epoch": 1.1884105494917319, "grad_norm": 0.5916287310048532, "learning_rate": 1.865708172636567e-06, "loss": 0.2765, "step": 25369 }, { "epoch": 1.18845739448166, "grad_norm": 0.5917449228102644, "learning_rate": 1.8655247299377311e-06, "loss": 0.2649, "step": 25370 }, { "epoch": 1.1885042394715886, "grad_norm": 0.5803602551964129, "learning_rate": 1.86534129089014e-06, "loss": 0.2624, "step": 25371 }, { "epoch": 1.1885510844615168, "grad_norm": 0.625810135779301, "learning_rate": 1.8651578554948503e-06, "loss": 0.2885, "step": 25372 }, { "epoch": 1.1885979294514453, "grad_norm": 0.610543358046971, "learning_rate": 1.8649744237529183e-06, "loss": 0.2795, "step": 25373 }, { "epoch": 1.1886447744413735, "grad_norm": 0.5844555105461615, "learning_rate": 1.8647909956653992e-06, "loss": 0.2728, "step": 25374 }, { "epoch": 1.1886916194313017, "grad_norm": 0.5927233138290461, "learning_rate": 1.8646075712333476e-06, "loss": 0.2818, "step": 25375 }, { "epoch": 1.1887384644212302, "grad_norm": 0.6198098778938698, "learning_rate": 1.8644241504578192e-06, "loss": 0.2766, "step": 25376 }, { "epoch": 1.1887853094111585, "grad_norm": 0.5806460220612815, "learning_rate": 1.864240733339871e-06, "loss": 0.2719, "step": 25377 }, { "epoch": 1.1888321544010867, "grad_norm": 0.6126043213549697, "learning_rate": 1.864057319880557e-06, "loss": 0.2676, "step": 25378 }, { "epoch": 1.1888789993910152, "grad_norm": 0.5752170480769081, "learning_rate": 1.8638739100809332e-06, "loss": 0.2682, "step": 25379 }, { "epoch": 1.1889258443809434, "grad_norm": 0.6238366079972432, "learning_rate": 1.863690503942056e-06, "loss": 0.2828, "step": 25380 }, { "epoch": 1.1889726893708719, "grad_norm": 0.5795102547233719, "learning_rate": 1.86350710146498e-06, "loss": 0.2738, "step": 25381 }, { "epoch": 1.1890195343608, "grad_norm": 0.5556875190420828, "learning_rate": 1.86332370265076e-06, "loss": 0.2742, "step": 25382 }, { "epoch": 1.1890663793507286, "grad_norm": 0.57416377871928, "learning_rate": 1.8631403075004516e-06, "loss": 0.2857, "step": 25383 }, { "epoch": 1.1891132243406568, "grad_norm": 0.6184580342659738, "learning_rate": 1.8629569160151117e-06, "loss": 0.285, "step": 25384 }, { "epoch": 1.189160069330585, "grad_norm": 0.6315285547183632, "learning_rate": 1.8627735281957937e-06, "loss": 0.2876, "step": 25385 }, { "epoch": 1.1892069143205135, "grad_norm": 0.5959103047833223, "learning_rate": 1.8625901440435553e-06, "loss": 0.2644, "step": 25386 }, { "epoch": 1.1892537593104417, "grad_norm": 0.535428535970807, "learning_rate": 1.8624067635594494e-06, "loss": 0.2487, "step": 25387 }, { "epoch": 1.18930060430037, "grad_norm": 0.6157228294128527, "learning_rate": 1.8622233867445327e-06, "loss": 0.284, "step": 25388 }, { "epoch": 1.1893474492902985, "grad_norm": 0.6382112838004257, "learning_rate": 1.8620400135998595e-06, "loss": 0.2645, "step": 25389 }, { "epoch": 1.1893942942802267, "grad_norm": 0.6169118964630009, "learning_rate": 1.8618566441264857e-06, "loss": 0.297, "step": 25390 }, { "epoch": 1.189441139270155, "grad_norm": 0.5800159676940382, "learning_rate": 1.861673278325467e-06, "loss": 0.2785, "step": 25391 }, { "epoch": 1.1894879842600834, "grad_norm": 0.5914084861602404, "learning_rate": 1.861489916197859e-06, "loss": 0.2758, "step": 25392 }, { "epoch": 1.1895348292500116, "grad_norm": 0.6101319505739413, "learning_rate": 1.8613065577447146e-06, "loss": 0.2779, "step": 25393 }, { "epoch": 1.18958167423994, "grad_norm": 0.5570339163932002, "learning_rate": 1.8611232029670905e-06, "loss": 0.2721, "step": 25394 }, { "epoch": 1.1896285192298683, "grad_norm": 0.5872040464651507, "learning_rate": 1.8609398518660424e-06, "loss": 0.2797, "step": 25395 }, { "epoch": 1.1896753642197968, "grad_norm": 0.6034115553655898, "learning_rate": 1.8607565044426243e-06, "loss": 0.2736, "step": 25396 }, { "epoch": 1.189722209209725, "grad_norm": 0.5632657884558768, "learning_rate": 1.860573160697892e-06, "loss": 0.2713, "step": 25397 }, { "epoch": 1.1897690541996533, "grad_norm": 0.627958596402919, "learning_rate": 1.8603898206329013e-06, "loss": 0.3083, "step": 25398 }, { "epoch": 1.1898158991895817, "grad_norm": 0.5785179355974792, "learning_rate": 1.8602064842487058e-06, "loss": 0.2691, "step": 25399 }, { "epoch": 1.18986274417951, "grad_norm": 0.5854066047034565, "learning_rate": 1.8600231515463606e-06, "loss": 0.2581, "step": 25400 }, { "epoch": 1.1899095891694382, "grad_norm": 0.6549292205194739, "learning_rate": 1.8598398225269218e-06, "loss": 0.2965, "step": 25401 }, { "epoch": 1.1899564341593667, "grad_norm": 0.5348477042885076, "learning_rate": 1.859656497191444e-06, "loss": 0.2665, "step": 25402 }, { "epoch": 1.190003279149295, "grad_norm": 0.6112222248045743, "learning_rate": 1.8594731755409817e-06, "loss": 0.2862, "step": 25403 }, { "epoch": 1.1900501241392234, "grad_norm": 0.5393721659925366, "learning_rate": 1.8592898575765917e-06, "loss": 0.2659, "step": 25404 }, { "epoch": 1.1900969691291516, "grad_norm": 0.5932932156899251, "learning_rate": 1.859106543299326e-06, "loss": 0.2622, "step": 25405 }, { "epoch": 1.1901438141190799, "grad_norm": 0.5589368402402305, "learning_rate": 1.8589232327102419e-06, "loss": 0.2685, "step": 25406 }, { "epoch": 1.1901906591090083, "grad_norm": 0.5854890394412178, "learning_rate": 1.8587399258103928e-06, "loss": 0.2736, "step": 25407 }, { "epoch": 1.1902375040989366, "grad_norm": 0.574996388824612, "learning_rate": 1.8585566226008344e-06, "loss": 0.2702, "step": 25408 }, { "epoch": 1.190284349088865, "grad_norm": 0.5439737459853601, "learning_rate": 1.8583733230826215e-06, "loss": 0.2509, "step": 25409 }, { "epoch": 1.1903311940787933, "grad_norm": 0.546760149173142, "learning_rate": 1.8581900272568096e-06, "loss": 0.2681, "step": 25410 }, { "epoch": 1.1903780390687215, "grad_norm": 0.611607844976269, "learning_rate": 1.8580067351244518e-06, "loss": 0.2726, "step": 25411 }, { "epoch": 1.19042488405865, "grad_norm": 0.610401991573615, "learning_rate": 1.8578234466866046e-06, "loss": 0.266, "step": 25412 }, { "epoch": 1.1904717290485782, "grad_norm": 0.5674615280555468, "learning_rate": 1.8576401619443214e-06, "loss": 0.2682, "step": 25413 }, { "epoch": 1.1905185740385065, "grad_norm": 0.5784629861755249, "learning_rate": 1.8574568808986574e-06, "loss": 0.2564, "step": 25414 }, { "epoch": 1.190565419028435, "grad_norm": 0.6222299423696774, "learning_rate": 1.8572736035506682e-06, "loss": 0.2938, "step": 25415 }, { "epoch": 1.1906122640183632, "grad_norm": 0.5895752116987607, "learning_rate": 1.8570903299014084e-06, "loss": 0.2782, "step": 25416 }, { "epoch": 1.1906591090082916, "grad_norm": 0.5458230580015921, "learning_rate": 1.8569070599519312e-06, "loss": 0.267, "step": 25417 }, { "epoch": 1.1907059539982199, "grad_norm": 0.5944326649764852, "learning_rate": 1.8567237937032923e-06, "loss": 0.2676, "step": 25418 }, { "epoch": 1.1907527989881483, "grad_norm": 0.5846848074914145, "learning_rate": 1.8565405311565466e-06, "loss": 0.2766, "step": 25419 }, { "epoch": 1.1907996439780766, "grad_norm": 0.6264938053150203, "learning_rate": 1.8563572723127476e-06, "loss": 0.2793, "step": 25420 }, { "epoch": 1.1908464889680048, "grad_norm": 0.606631667595456, "learning_rate": 1.8561740171729514e-06, "loss": 0.285, "step": 25421 }, { "epoch": 1.1908933339579333, "grad_norm": 0.6126005578007022, "learning_rate": 1.8559907657382127e-06, "loss": 0.296, "step": 25422 }, { "epoch": 1.1909401789478615, "grad_norm": 0.5925682442613589, "learning_rate": 1.8558075180095846e-06, "loss": 0.2794, "step": 25423 }, { "epoch": 1.1909870239377898, "grad_norm": 0.592020270085716, "learning_rate": 1.8556242739881219e-06, "loss": 0.2832, "step": 25424 }, { "epoch": 1.1910338689277182, "grad_norm": 0.6368301008031712, "learning_rate": 1.8554410336748796e-06, "loss": 0.2756, "step": 25425 }, { "epoch": 1.1910807139176465, "grad_norm": 0.5982226068435014, "learning_rate": 1.8552577970709127e-06, "loss": 0.2774, "step": 25426 }, { "epoch": 1.1911275589075747, "grad_norm": 0.6251091812943181, "learning_rate": 1.8550745641772743e-06, "loss": 0.2727, "step": 25427 }, { "epoch": 1.1911744038975032, "grad_norm": 0.6259500586617703, "learning_rate": 1.854891334995021e-06, "loss": 0.2945, "step": 25428 }, { "epoch": 1.1912212488874314, "grad_norm": 0.6233432953622003, "learning_rate": 1.854708109525205e-06, "loss": 0.2831, "step": 25429 }, { "epoch": 1.1912680938773599, "grad_norm": 0.6082000320382409, "learning_rate": 1.854524887768882e-06, "loss": 0.2597, "step": 25430 }, { "epoch": 1.1913149388672881, "grad_norm": 0.581081310599786, "learning_rate": 1.8543416697271054e-06, "loss": 0.2724, "step": 25431 }, { "epoch": 1.1913617838572166, "grad_norm": 0.5971532438754995, "learning_rate": 1.8541584554009303e-06, "loss": 0.2899, "step": 25432 }, { "epoch": 1.1914086288471448, "grad_norm": 0.6803229364035508, "learning_rate": 1.8539752447914116e-06, "loss": 0.2931, "step": 25433 }, { "epoch": 1.191455473837073, "grad_norm": 0.6080928715921111, "learning_rate": 1.8537920378996025e-06, "loss": 0.2784, "step": 25434 }, { "epoch": 1.1915023188270015, "grad_norm": 0.5346532877181446, "learning_rate": 1.8536088347265586e-06, "loss": 0.2527, "step": 25435 }, { "epoch": 1.1915491638169298, "grad_norm": 0.632918173659925, "learning_rate": 1.8534256352733327e-06, "loss": 0.2928, "step": 25436 }, { "epoch": 1.191596008806858, "grad_norm": 0.6035370638602298, "learning_rate": 1.8532424395409798e-06, "loss": 0.286, "step": 25437 }, { "epoch": 1.1916428537967865, "grad_norm": 0.5693094990967404, "learning_rate": 1.8530592475305539e-06, "loss": 0.261, "step": 25438 }, { "epoch": 1.1916896987867147, "grad_norm": 0.5311236017471849, "learning_rate": 1.8528760592431094e-06, "loss": 0.2655, "step": 25439 }, { "epoch": 1.1917365437766432, "grad_norm": 0.5754327161613062, "learning_rate": 1.8526928746797012e-06, "loss": 0.2754, "step": 25440 }, { "epoch": 1.1917833887665714, "grad_norm": 0.6136120387293453, "learning_rate": 1.852509693841383e-06, "loss": 0.2884, "step": 25441 }, { "epoch": 1.1918302337564997, "grad_norm": 0.6116095307587276, "learning_rate": 1.8523265167292082e-06, "loss": 0.2705, "step": 25442 }, { "epoch": 1.1918770787464281, "grad_norm": 0.579404037843793, "learning_rate": 1.852143343344231e-06, "loss": 0.2775, "step": 25443 }, { "epoch": 1.1919239237363564, "grad_norm": 0.6121780030421938, "learning_rate": 1.8519601736875069e-06, "loss": 0.2927, "step": 25444 }, { "epoch": 1.1919707687262848, "grad_norm": 0.5817916025111873, "learning_rate": 1.8517770077600883e-06, "loss": 0.2749, "step": 25445 }, { "epoch": 1.192017613716213, "grad_norm": 0.6186649973611923, "learning_rate": 1.85159384556303e-06, "loss": 0.2711, "step": 25446 }, { "epoch": 1.1920644587061413, "grad_norm": 0.572057200045901, "learning_rate": 1.8514106870973875e-06, "loss": 0.267, "step": 25447 }, { "epoch": 1.1921113036960698, "grad_norm": 0.6487493673026903, "learning_rate": 1.8512275323642132e-06, "loss": 0.2904, "step": 25448 }, { "epoch": 1.192158148685998, "grad_norm": 0.5843255988858329, "learning_rate": 1.8510443813645603e-06, "loss": 0.2786, "step": 25449 }, { "epoch": 1.1922049936759263, "grad_norm": 0.5682650214134527, "learning_rate": 1.8508612340994841e-06, "loss": 0.2686, "step": 25450 }, { "epoch": 1.1922518386658547, "grad_norm": 0.5790765099341334, "learning_rate": 1.8506780905700388e-06, "loss": 0.2611, "step": 25451 }, { "epoch": 1.192298683655783, "grad_norm": 0.599003467712413, "learning_rate": 1.8504949507772777e-06, "loss": 0.281, "step": 25452 }, { "epoch": 1.1923455286457114, "grad_norm": 0.6172381248216205, "learning_rate": 1.8503118147222556e-06, "loss": 0.283, "step": 25453 }, { "epoch": 1.1923923736356397, "grad_norm": 0.5790367474042294, "learning_rate": 1.850128682406025e-06, "loss": 0.2823, "step": 25454 }, { "epoch": 1.1924392186255681, "grad_norm": 0.6313913467414606, "learning_rate": 1.8499455538296406e-06, "loss": 0.3028, "step": 25455 }, { "epoch": 1.1924860636154964, "grad_norm": 0.5899905012864815, "learning_rate": 1.849762428994156e-06, "loss": 0.292, "step": 25456 }, { "epoch": 1.1925329086054246, "grad_norm": 0.605906032373125, "learning_rate": 1.8495793079006254e-06, "loss": 0.2726, "step": 25457 }, { "epoch": 1.192579753595353, "grad_norm": 0.5696650276189844, "learning_rate": 1.8493961905501021e-06, "loss": 0.2684, "step": 25458 }, { "epoch": 1.1926265985852813, "grad_norm": 0.6263323764380307, "learning_rate": 1.849213076943641e-06, "loss": 0.2778, "step": 25459 }, { "epoch": 1.1926734435752095, "grad_norm": 0.6187382023552817, "learning_rate": 1.8490299670822945e-06, "loss": 0.2848, "step": 25460 }, { "epoch": 1.192720288565138, "grad_norm": 0.5602364491223432, "learning_rate": 1.848846860967117e-06, "loss": 0.2586, "step": 25461 }, { "epoch": 1.1927671335550663, "grad_norm": 0.6728120880054662, "learning_rate": 1.848663758599162e-06, "loss": 0.2938, "step": 25462 }, { "epoch": 1.1928139785449945, "grad_norm": 0.5432121139323961, "learning_rate": 1.848480659979483e-06, "loss": 0.2591, "step": 25463 }, { "epoch": 1.192860823534923, "grad_norm": 0.6006437236820168, "learning_rate": 1.8482975651091348e-06, "loss": 0.2802, "step": 25464 }, { "epoch": 1.1929076685248512, "grad_norm": 0.5957309383333526, "learning_rate": 1.8481144739891706e-06, "loss": 0.2696, "step": 25465 }, { "epoch": 1.1929545135147797, "grad_norm": 0.6314870056535624, "learning_rate": 1.8479313866206427e-06, "loss": 0.2827, "step": 25466 }, { "epoch": 1.193001358504708, "grad_norm": 0.5700496878681173, "learning_rate": 1.8477483030046063e-06, "loss": 0.2635, "step": 25467 }, { "epoch": 1.1930482034946364, "grad_norm": 0.5981488739352452, "learning_rate": 1.8475652231421146e-06, "loss": 0.2752, "step": 25468 }, { "epoch": 1.1930950484845646, "grad_norm": 0.5795239315337612, "learning_rate": 1.8473821470342208e-06, "loss": 0.2554, "step": 25469 }, { "epoch": 1.1931418934744928, "grad_norm": 0.5794007490079058, "learning_rate": 1.847199074681978e-06, "loss": 0.2747, "step": 25470 }, { "epoch": 1.1931887384644213, "grad_norm": 0.6313053103430919, "learning_rate": 1.8470160060864424e-06, "loss": 0.2785, "step": 25471 }, { "epoch": 1.1932355834543495, "grad_norm": 0.5998278899587044, "learning_rate": 1.8468329412486647e-06, "loss": 0.2759, "step": 25472 }, { "epoch": 1.1932824284442778, "grad_norm": 0.6073975041127359, "learning_rate": 1.8466498801696985e-06, "loss": 0.2693, "step": 25473 }, { "epoch": 1.1933292734342063, "grad_norm": 0.6353035081581055, "learning_rate": 1.846466822850598e-06, "loss": 0.2806, "step": 25474 }, { "epoch": 1.1933761184241345, "grad_norm": 0.6306322223923949, "learning_rate": 1.8462837692924173e-06, "loss": 0.2787, "step": 25475 }, { "epoch": 1.193422963414063, "grad_norm": 0.6279559391085642, "learning_rate": 1.8461007194962088e-06, "loss": 0.2778, "step": 25476 }, { "epoch": 1.1934698084039912, "grad_norm": 0.5676633215389023, "learning_rate": 1.8459176734630274e-06, "loss": 0.2727, "step": 25477 }, { "epoch": 1.1935166533939194, "grad_norm": 0.6036972434701653, "learning_rate": 1.8457346311939245e-06, "loss": 0.2705, "step": 25478 }, { "epoch": 1.193563498383848, "grad_norm": 0.5971659700394247, "learning_rate": 1.8455515926899544e-06, "loss": 0.2907, "step": 25479 }, { "epoch": 1.1936103433737761, "grad_norm": 0.5610662320292571, "learning_rate": 1.8453685579521702e-06, "loss": 0.2749, "step": 25480 }, { "epoch": 1.1936571883637046, "grad_norm": 0.5974510995695623, "learning_rate": 1.8451855269816254e-06, "loss": 0.2705, "step": 25481 }, { "epoch": 1.1937040333536328, "grad_norm": 0.5137374655925464, "learning_rate": 1.8450024997793736e-06, "loss": 0.2609, "step": 25482 }, { "epoch": 1.193750878343561, "grad_norm": 0.5747720536393657, "learning_rate": 1.8448194763464685e-06, "loss": 0.2658, "step": 25483 }, { "epoch": 1.1937977233334895, "grad_norm": 0.5883998127124466, "learning_rate": 1.8446364566839617e-06, "loss": 0.2754, "step": 25484 }, { "epoch": 1.1938445683234178, "grad_norm": 0.5949005424348526, "learning_rate": 1.844453440792907e-06, "loss": 0.2784, "step": 25485 }, { "epoch": 1.193891413313346, "grad_norm": 0.5696647479625347, "learning_rate": 1.844270428674359e-06, "loss": 0.2657, "step": 25486 }, { "epoch": 1.1939382583032745, "grad_norm": 0.6215840052514681, "learning_rate": 1.8440874203293691e-06, "loss": 0.2826, "step": 25487 }, { "epoch": 1.1939851032932027, "grad_norm": 0.5549148858259627, "learning_rate": 1.8439044157589911e-06, "loss": 0.2568, "step": 25488 }, { "epoch": 1.1940319482831312, "grad_norm": 0.5987242001118209, "learning_rate": 1.8437214149642797e-06, "loss": 0.2935, "step": 25489 }, { "epoch": 1.1940787932730594, "grad_norm": 0.5426949283301276, "learning_rate": 1.8435384179462857e-06, "loss": 0.2606, "step": 25490 }, { "epoch": 1.1941256382629877, "grad_norm": 0.5567721239331466, "learning_rate": 1.8433554247060626e-06, "loss": 0.2682, "step": 25491 }, { "epoch": 1.1941724832529161, "grad_norm": 0.5724167110687545, "learning_rate": 1.8431724352446642e-06, "loss": 0.2751, "step": 25492 }, { "epoch": 1.1942193282428444, "grad_norm": 0.5713805834960266, "learning_rate": 1.842989449563144e-06, "loss": 0.2738, "step": 25493 }, { "epoch": 1.1942661732327728, "grad_norm": 0.5419771175172189, "learning_rate": 1.8428064676625534e-06, "loss": 0.2578, "step": 25494 }, { "epoch": 1.194313018222701, "grad_norm": 0.6033017804631059, "learning_rate": 1.8426234895439477e-06, "loss": 0.265, "step": 25495 }, { "epoch": 1.1943598632126293, "grad_norm": 0.5435663146470796, "learning_rate": 1.8424405152083773e-06, "loss": 0.2576, "step": 25496 }, { "epoch": 1.1944067082025578, "grad_norm": 0.5911447225171637, "learning_rate": 1.8422575446568974e-06, "loss": 0.2695, "step": 25497 }, { "epoch": 1.194453553192486, "grad_norm": 0.5636985234598881, "learning_rate": 1.8420745778905598e-06, "loss": 0.2695, "step": 25498 }, { "epoch": 1.1945003981824143, "grad_norm": 0.605203431584135, "learning_rate": 1.8418916149104174e-06, "loss": 0.2671, "step": 25499 }, { "epoch": 1.1945472431723427, "grad_norm": 0.5905314188879953, "learning_rate": 1.841708655717523e-06, "loss": 0.2803, "step": 25500 }, { "epoch": 1.194594088162271, "grad_norm": 0.6157597368726907, "learning_rate": 1.8415257003129312e-06, "loss": 0.2922, "step": 25501 }, { "epoch": 1.1946409331521994, "grad_norm": 0.5661610036054566, "learning_rate": 1.841342748697692e-06, "loss": 0.2689, "step": 25502 }, { "epoch": 1.1946877781421277, "grad_norm": 0.5869349584778428, "learning_rate": 1.8411598008728604e-06, "loss": 0.2934, "step": 25503 }, { "epoch": 1.1947346231320561, "grad_norm": 0.5867593038746892, "learning_rate": 1.8409768568394881e-06, "loss": 0.286, "step": 25504 }, { "epoch": 1.1947814681219844, "grad_norm": 0.5829039897485484, "learning_rate": 1.8407939165986286e-06, "loss": 0.2668, "step": 25505 }, { "epoch": 1.1948283131119126, "grad_norm": 0.5794330664806732, "learning_rate": 1.8406109801513347e-06, "loss": 0.2771, "step": 25506 }, { "epoch": 1.194875158101841, "grad_norm": 0.6274083523564518, "learning_rate": 1.8404280474986587e-06, "loss": 0.2817, "step": 25507 }, { "epoch": 1.1949220030917693, "grad_norm": 0.5918402484352013, "learning_rate": 1.8402451186416539e-06, "loss": 0.2721, "step": 25508 }, { "epoch": 1.1949688480816976, "grad_norm": 0.5983636133850224, "learning_rate": 1.8400621935813718e-06, "loss": 0.2859, "step": 25509 }, { "epoch": 1.195015693071626, "grad_norm": 0.5863987988754137, "learning_rate": 1.8398792723188665e-06, "loss": 0.2658, "step": 25510 }, { "epoch": 1.1950625380615543, "grad_norm": 0.5977080610791129, "learning_rate": 1.8396963548551897e-06, "loss": 0.2933, "step": 25511 }, { "epoch": 1.1951093830514827, "grad_norm": 0.546186437305947, "learning_rate": 1.8395134411913946e-06, "loss": 0.2514, "step": 25512 }, { "epoch": 1.195156228041411, "grad_norm": 0.6104047510702968, "learning_rate": 1.8393305313285336e-06, "loss": 0.3057, "step": 25513 }, { "epoch": 1.1952030730313392, "grad_norm": 0.5554130726742676, "learning_rate": 1.8391476252676603e-06, "loss": 0.2557, "step": 25514 }, { "epoch": 1.1952499180212677, "grad_norm": 0.6034803742075024, "learning_rate": 1.838964723009825e-06, "loss": 0.2687, "step": 25515 }, { "epoch": 1.195296763011196, "grad_norm": 0.5717843239238443, "learning_rate": 1.8387818245560816e-06, "loss": 0.26, "step": 25516 }, { "epoch": 1.1953436080011244, "grad_norm": 0.6260191395453357, "learning_rate": 1.838598929907483e-06, "loss": 0.2935, "step": 25517 }, { "epoch": 1.1953904529910526, "grad_norm": 0.5755009182208878, "learning_rate": 1.8384160390650812e-06, "loss": 0.2725, "step": 25518 }, { "epoch": 1.1954372979809809, "grad_norm": 0.5690795771250806, "learning_rate": 1.8382331520299284e-06, "loss": 0.2662, "step": 25519 }, { "epoch": 1.1954841429709093, "grad_norm": 0.6343250697602265, "learning_rate": 1.8380502688030788e-06, "loss": 0.2991, "step": 25520 }, { "epoch": 1.1955309879608376, "grad_norm": 0.6431174048145055, "learning_rate": 1.8378673893855831e-06, "loss": 0.2939, "step": 25521 }, { "epoch": 1.1955778329507658, "grad_norm": 0.5921799921974504, "learning_rate": 1.8376845137784934e-06, "loss": 0.2844, "step": 25522 }, { "epoch": 1.1956246779406943, "grad_norm": 0.5228258594734824, "learning_rate": 1.8375016419828628e-06, "loss": 0.2628, "step": 25523 }, { "epoch": 1.1956715229306225, "grad_norm": 0.599445674158046, "learning_rate": 1.8373187739997445e-06, "loss": 0.2706, "step": 25524 }, { "epoch": 1.195718367920551, "grad_norm": 0.5724808506899148, "learning_rate": 1.8371359098301894e-06, "loss": 0.2736, "step": 25525 }, { "epoch": 1.1957652129104792, "grad_norm": 0.5809551797763508, "learning_rate": 1.836953049475252e-06, "loss": 0.2625, "step": 25526 }, { "epoch": 1.1958120579004075, "grad_norm": 0.6296771863545085, "learning_rate": 1.8367701929359816e-06, "loss": 0.2896, "step": 25527 }, { "epoch": 1.195858902890336, "grad_norm": 0.627683489160719, "learning_rate": 1.8365873402134326e-06, "loss": 0.2767, "step": 25528 }, { "epoch": 1.1959057478802642, "grad_norm": 0.5869107126263285, "learning_rate": 1.8364044913086565e-06, "loss": 0.2767, "step": 25529 }, { "epoch": 1.1959525928701926, "grad_norm": 0.6094233034505617, "learning_rate": 1.8362216462227056e-06, "loss": 0.2715, "step": 25530 }, { "epoch": 1.1959994378601209, "grad_norm": 0.5977081991517247, "learning_rate": 1.8360388049566325e-06, "loss": 0.2944, "step": 25531 }, { "epoch": 1.196046282850049, "grad_norm": 0.5991716616388887, "learning_rate": 1.83585596751149e-06, "loss": 0.2888, "step": 25532 }, { "epoch": 1.1960931278399776, "grad_norm": 0.5858434784801421, "learning_rate": 1.8356731338883285e-06, "loss": 0.2722, "step": 25533 }, { "epoch": 1.1961399728299058, "grad_norm": 0.588187630896487, "learning_rate": 1.8354903040882011e-06, "loss": 0.284, "step": 25534 }, { "epoch": 1.196186817819834, "grad_norm": 0.6198472790181392, "learning_rate": 1.8353074781121606e-06, "loss": 0.2918, "step": 25535 }, { "epoch": 1.1962336628097625, "grad_norm": 0.5752085127606694, "learning_rate": 1.8351246559612579e-06, "loss": 0.2682, "step": 25536 }, { "epoch": 1.1962805077996908, "grad_norm": 0.5770471142927006, "learning_rate": 1.8349418376365455e-06, "loss": 0.2734, "step": 25537 }, { "epoch": 1.1963273527896192, "grad_norm": 0.6037063732339676, "learning_rate": 1.834759023139077e-06, "loss": 0.2799, "step": 25538 }, { "epoch": 1.1963741977795475, "grad_norm": 0.550803143234568, "learning_rate": 1.8345762124699024e-06, "loss": 0.2683, "step": 25539 }, { "epoch": 1.196421042769476, "grad_norm": 0.5442724725783847, "learning_rate": 1.8343934056300738e-06, "loss": 0.2653, "step": 25540 }, { "epoch": 1.1964678877594042, "grad_norm": 0.5866986772126385, "learning_rate": 1.8342106026206442e-06, "loss": 0.2678, "step": 25541 }, { "epoch": 1.1965147327493324, "grad_norm": 0.5660110833009074, "learning_rate": 1.8340278034426657e-06, "loss": 0.2649, "step": 25542 }, { "epoch": 1.1965615777392609, "grad_norm": 0.5998471042443426, "learning_rate": 1.8338450080971893e-06, "loss": 0.2661, "step": 25543 }, { "epoch": 1.196608422729189, "grad_norm": 0.5798427312667497, "learning_rate": 1.8336622165852683e-06, "loss": 0.2648, "step": 25544 }, { "epoch": 1.1966552677191173, "grad_norm": 0.6576828190499137, "learning_rate": 1.8334794289079526e-06, "loss": 0.2897, "step": 25545 }, { "epoch": 1.1967021127090458, "grad_norm": 0.6364396694296345, "learning_rate": 1.833296645066296e-06, "loss": 0.2952, "step": 25546 }, { "epoch": 1.196748957698974, "grad_norm": 0.5922659383941297, "learning_rate": 1.8331138650613494e-06, "loss": 0.2649, "step": 25547 }, { "epoch": 1.1967958026889025, "grad_norm": 0.5889565936541286, "learning_rate": 1.8329310888941648e-06, "loss": 0.2633, "step": 25548 }, { "epoch": 1.1968426476788308, "grad_norm": 0.585242532463909, "learning_rate": 1.832748316565794e-06, "loss": 0.2622, "step": 25549 }, { "epoch": 1.196889492668759, "grad_norm": 0.5427164967297073, "learning_rate": 1.83256554807729e-06, "loss": 0.2581, "step": 25550 }, { "epoch": 1.1969363376586875, "grad_norm": 0.6055971738391542, "learning_rate": 1.8323827834297025e-06, "loss": 0.2967, "step": 25551 }, { "epoch": 1.1969831826486157, "grad_norm": 0.590185959625524, "learning_rate": 1.8322000226240846e-06, "loss": 0.2684, "step": 25552 }, { "epoch": 1.1970300276385442, "grad_norm": 0.6134876290628207, "learning_rate": 1.832017265661487e-06, "loss": 0.2832, "step": 25553 }, { "epoch": 1.1970768726284724, "grad_norm": 0.630833801763123, "learning_rate": 1.8318345125429625e-06, "loss": 0.2805, "step": 25554 }, { "epoch": 1.1971237176184006, "grad_norm": 0.6037744182420101, "learning_rate": 1.831651763269563e-06, "loss": 0.2749, "step": 25555 }, { "epoch": 1.197170562608329, "grad_norm": 0.6363256528124224, "learning_rate": 1.83146901784234e-06, "loss": 0.2981, "step": 25556 }, { "epoch": 1.1972174075982573, "grad_norm": 0.59702859482333, "learning_rate": 1.831286276262344e-06, "loss": 0.2924, "step": 25557 }, { "epoch": 1.1972642525881856, "grad_norm": 0.597298274245846, "learning_rate": 1.831103538530627e-06, "loss": 0.2976, "step": 25558 }, { "epoch": 1.197311097578114, "grad_norm": 0.613946621008271, "learning_rate": 1.830920804648242e-06, "loss": 0.2626, "step": 25559 }, { "epoch": 1.1973579425680423, "grad_norm": 0.5919533848273718, "learning_rate": 1.8307380746162384e-06, "loss": 0.2688, "step": 25560 }, { "epoch": 1.1974047875579708, "grad_norm": 0.5522967158472606, "learning_rate": 1.8305553484356694e-06, "loss": 0.2648, "step": 25561 }, { "epoch": 1.197451632547899, "grad_norm": 0.6027694441971448, "learning_rate": 1.830372626107587e-06, "loss": 0.2774, "step": 25562 }, { "epoch": 1.1974984775378272, "grad_norm": 0.6024391547519046, "learning_rate": 1.8301899076330415e-06, "loss": 0.2656, "step": 25563 }, { "epoch": 1.1975453225277557, "grad_norm": 0.6431316462783094, "learning_rate": 1.8300071930130841e-06, "loss": 0.2742, "step": 25564 }, { "epoch": 1.197592167517684, "grad_norm": 0.6126550779619301, "learning_rate": 1.8298244822487671e-06, "loss": 0.2847, "step": 25565 }, { "epoch": 1.1976390125076124, "grad_norm": 0.6041848720950462, "learning_rate": 1.8296417753411421e-06, "loss": 0.2842, "step": 25566 }, { "epoch": 1.1976858574975406, "grad_norm": 0.6087497508074478, "learning_rate": 1.8294590722912597e-06, "loss": 0.2696, "step": 25567 }, { "epoch": 1.1977327024874689, "grad_norm": 0.5780229866325217, "learning_rate": 1.8292763731001728e-06, "loss": 0.2719, "step": 25568 }, { "epoch": 1.1977795474773973, "grad_norm": 0.5638143010623241, "learning_rate": 1.8290936777689308e-06, "loss": 0.2612, "step": 25569 }, { "epoch": 1.1978263924673256, "grad_norm": 0.6049703456131434, "learning_rate": 1.8289109862985865e-06, "loss": 0.2946, "step": 25570 }, { "epoch": 1.1978732374572538, "grad_norm": 0.5762088465831445, "learning_rate": 1.8287282986901905e-06, "loss": 0.2721, "step": 25571 }, { "epoch": 1.1979200824471823, "grad_norm": 0.5678923151283325, "learning_rate": 1.8285456149447943e-06, "loss": 0.275, "step": 25572 }, { "epoch": 1.1979669274371105, "grad_norm": 0.5925553570580993, "learning_rate": 1.8283629350634496e-06, "loss": 0.2716, "step": 25573 }, { "epoch": 1.198013772427039, "grad_norm": 0.5831128816640291, "learning_rate": 1.8281802590472073e-06, "loss": 0.2668, "step": 25574 }, { "epoch": 1.1980606174169672, "grad_norm": 0.6001059106179082, "learning_rate": 1.8279975868971195e-06, "loss": 0.2928, "step": 25575 }, { "epoch": 1.1981074624068957, "grad_norm": 0.5699914506372917, "learning_rate": 1.8278149186142357e-06, "loss": 0.2684, "step": 25576 }, { "epoch": 1.198154307396824, "grad_norm": 0.5564177789302394, "learning_rate": 1.8276322541996088e-06, "loss": 0.2624, "step": 25577 }, { "epoch": 1.1982011523867522, "grad_norm": 0.589344014916764, "learning_rate": 1.8274495936542886e-06, "loss": 0.2756, "step": 25578 }, { "epoch": 1.1982479973766806, "grad_norm": 0.6194786448726726, "learning_rate": 1.827266936979327e-06, "loss": 0.2939, "step": 25579 }, { "epoch": 1.1982948423666089, "grad_norm": 0.5952820387575063, "learning_rate": 1.8270842841757754e-06, "loss": 0.2787, "step": 25580 }, { "epoch": 1.1983416873565371, "grad_norm": 0.6196802313499801, "learning_rate": 1.8269016352446855e-06, "loss": 0.2901, "step": 25581 }, { "epoch": 1.1983885323464656, "grad_norm": 0.580848502892328, "learning_rate": 1.8267189901871063e-06, "loss": 0.2803, "step": 25582 }, { "epoch": 1.1984353773363938, "grad_norm": 0.6017526311053337, "learning_rate": 1.82653634900409e-06, "loss": 0.2851, "step": 25583 }, { "epoch": 1.1984822223263223, "grad_norm": 0.5742638835647451, "learning_rate": 1.8263537116966883e-06, "loss": 0.2667, "step": 25584 }, { "epoch": 1.1985290673162505, "grad_norm": 0.6084888202974509, "learning_rate": 1.826171078265951e-06, "loss": 0.2818, "step": 25585 }, { "epoch": 1.1985759123061788, "grad_norm": 0.5630397016615638, "learning_rate": 1.8259884487129303e-06, "loss": 0.2814, "step": 25586 }, { "epoch": 1.1986227572961072, "grad_norm": 0.6034415314745094, "learning_rate": 1.8258058230386772e-06, "loss": 0.2673, "step": 25587 }, { "epoch": 1.1986696022860355, "grad_norm": 0.6057502555390805, "learning_rate": 1.8256232012442419e-06, "loss": 0.2787, "step": 25588 }, { "epoch": 1.198716447275964, "grad_norm": 0.5748972819603473, "learning_rate": 1.825440583330675e-06, "loss": 0.2568, "step": 25589 }, { "epoch": 1.1987632922658922, "grad_norm": 0.6204422195402679, "learning_rate": 1.825257969299028e-06, "loss": 0.285, "step": 25590 }, { "epoch": 1.1988101372558204, "grad_norm": 0.6387779103079021, "learning_rate": 1.8250753591503524e-06, "loss": 0.2797, "step": 25591 }, { "epoch": 1.1988569822457489, "grad_norm": 0.5818109209697513, "learning_rate": 1.8248927528856975e-06, "loss": 0.259, "step": 25592 }, { "epoch": 1.1989038272356771, "grad_norm": 0.6354645164960023, "learning_rate": 1.8247101505061166e-06, "loss": 0.2791, "step": 25593 }, { "epoch": 1.1989506722256054, "grad_norm": 0.5761312509220298, "learning_rate": 1.824527552012658e-06, "loss": 0.269, "step": 25594 }, { "epoch": 1.1989975172155338, "grad_norm": 0.5954111310280482, "learning_rate": 1.8243449574063738e-06, "loss": 0.2748, "step": 25595 }, { "epoch": 1.199044362205462, "grad_norm": 0.6111194901088853, "learning_rate": 1.8241623666883143e-06, "loss": 0.2668, "step": 25596 }, { "epoch": 1.1990912071953905, "grad_norm": 0.608284249738902, "learning_rate": 1.823979779859531e-06, "loss": 0.271, "step": 25597 }, { "epoch": 1.1991380521853188, "grad_norm": 0.6037946691883536, "learning_rate": 1.8237971969210736e-06, "loss": 0.2817, "step": 25598 }, { "epoch": 1.199184897175247, "grad_norm": 0.6125089551795538, "learning_rate": 1.8236146178739944e-06, "loss": 0.2739, "step": 25599 }, { "epoch": 1.1992317421651755, "grad_norm": 0.6127845266424611, "learning_rate": 1.8234320427193418e-06, "loss": 0.2717, "step": 25600 }, { "epoch": 1.1992785871551037, "grad_norm": 0.5986775889124591, "learning_rate": 1.823249471458169e-06, "loss": 0.2695, "step": 25601 }, { "epoch": 1.1993254321450322, "grad_norm": 0.5937869253330268, "learning_rate": 1.8230669040915242e-06, "loss": 0.2865, "step": 25602 }, { "epoch": 1.1993722771349604, "grad_norm": 0.57909108458385, "learning_rate": 1.8228843406204594e-06, "loss": 0.2623, "step": 25603 }, { "epoch": 1.1994191221248887, "grad_norm": 0.6443340227876581, "learning_rate": 1.8227017810460254e-06, "loss": 0.2613, "step": 25604 }, { "epoch": 1.1994659671148171, "grad_norm": 0.5768436633175281, "learning_rate": 1.822519225369273e-06, "loss": 0.277, "step": 25605 }, { "epoch": 1.1995128121047454, "grad_norm": 0.585933181398003, "learning_rate": 1.8223366735912513e-06, "loss": 0.2783, "step": 25606 }, { "epoch": 1.1995596570946736, "grad_norm": 0.5783200044073312, "learning_rate": 1.8221541257130118e-06, "loss": 0.2744, "step": 25607 }, { "epoch": 1.199606502084602, "grad_norm": 0.5394564513816038, "learning_rate": 1.8219715817356054e-06, "loss": 0.2602, "step": 25608 }, { "epoch": 1.1996533470745303, "grad_norm": 0.5983608793688057, "learning_rate": 1.8217890416600814e-06, "loss": 0.2604, "step": 25609 }, { "epoch": 1.1997001920644588, "grad_norm": 0.5923573026033266, "learning_rate": 1.821606505487491e-06, "loss": 0.2668, "step": 25610 }, { "epoch": 1.199747037054387, "grad_norm": 0.6355761472700794, "learning_rate": 1.8214239732188859e-06, "loss": 0.283, "step": 25611 }, { "epoch": 1.1997938820443155, "grad_norm": 0.5855672899467506, "learning_rate": 1.8212414448553148e-06, "loss": 0.285, "step": 25612 }, { "epoch": 1.1998407270342437, "grad_norm": 0.5369069174008785, "learning_rate": 1.821058920397828e-06, "loss": 0.262, "step": 25613 }, { "epoch": 1.199887572024172, "grad_norm": 0.6073852281264203, "learning_rate": 1.8208763998474762e-06, "loss": 0.3008, "step": 25614 }, { "epoch": 1.1999344170141004, "grad_norm": 0.5478515553597112, "learning_rate": 1.820693883205311e-06, "loss": 0.2566, "step": 25615 }, { "epoch": 1.1999812620040287, "grad_norm": 0.6105144521569145, "learning_rate": 1.8205113704723809e-06, "loss": 0.2852, "step": 25616 }, { "epoch": 1.200028106993957, "grad_norm": 0.6074201040891756, "learning_rate": 1.8203288616497384e-06, "loss": 0.2876, "step": 25617 }, { "epoch": 1.2000749519838854, "grad_norm": 0.5760245416169406, "learning_rate": 1.8201463567384308e-06, "loss": 0.2662, "step": 25618 }, { "epoch": 1.2001217969738136, "grad_norm": 0.5561641217697788, "learning_rate": 1.8199638557395114e-06, "loss": 0.2733, "step": 25619 }, { "epoch": 1.200168641963742, "grad_norm": 0.5828328124550203, "learning_rate": 1.819781358654028e-06, "loss": 0.2685, "step": 25620 }, { "epoch": 1.2002154869536703, "grad_norm": 0.5987313781353131, "learning_rate": 1.819598865483032e-06, "loss": 0.2782, "step": 25621 }, { "epoch": 1.2002623319435985, "grad_norm": 0.5464549968266418, "learning_rate": 1.8194163762275743e-06, "loss": 0.2545, "step": 25622 }, { "epoch": 1.200309176933527, "grad_norm": 0.6703540135322512, "learning_rate": 1.8192338908887046e-06, "loss": 0.2775, "step": 25623 }, { "epoch": 1.2003560219234553, "grad_norm": 0.6259050679306182, "learning_rate": 1.8190514094674719e-06, "loss": 0.2809, "step": 25624 }, { "epoch": 1.2004028669133837, "grad_norm": 0.6035188658377739, "learning_rate": 1.8188689319649271e-06, "loss": 0.2757, "step": 25625 }, { "epoch": 1.200449711903312, "grad_norm": 0.6121611098860735, "learning_rate": 1.8186864583821208e-06, "loss": 0.2784, "step": 25626 }, { "epoch": 1.2004965568932402, "grad_norm": 0.5611109201310737, "learning_rate": 1.8185039887201022e-06, "loss": 0.2729, "step": 25627 }, { "epoch": 1.2005434018831687, "grad_norm": 0.5943432477456827, "learning_rate": 1.818321522979922e-06, "loss": 0.284, "step": 25628 }, { "epoch": 1.200590246873097, "grad_norm": 0.5284191686800085, "learning_rate": 1.8181390611626309e-06, "loss": 0.2453, "step": 25629 }, { "epoch": 1.2006370918630251, "grad_norm": 0.6333349485547929, "learning_rate": 1.817956603269278e-06, "loss": 0.3039, "step": 25630 }, { "epoch": 1.2006839368529536, "grad_norm": 0.5874274489139091, "learning_rate": 1.8177741493009126e-06, "loss": 0.2824, "step": 25631 }, { "epoch": 1.2007307818428818, "grad_norm": 0.5965726368814048, "learning_rate": 1.8175916992585854e-06, "loss": 0.2702, "step": 25632 }, { "epoch": 1.2007776268328103, "grad_norm": 0.6019616077600152, "learning_rate": 1.8174092531433474e-06, "loss": 0.2903, "step": 25633 }, { "epoch": 1.2008244718227385, "grad_norm": 0.6561979676198957, "learning_rate": 1.8172268109562466e-06, "loss": 0.2948, "step": 25634 }, { "epoch": 1.2008713168126668, "grad_norm": 0.5541372278515176, "learning_rate": 1.817044372698335e-06, "loss": 0.2628, "step": 25635 }, { "epoch": 1.2009181618025953, "grad_norm": 0.6000946185601199, "learning_rate": 1.8168619383706607e-06, "loss": 0.2777, "step": 25636 }, { "epoch": 1.2009650067925235, "grad_norm": 0.56899066912367, "learning_rate": 1.8166795079742744e-06, "loss": 0.2609, "step": 25637 }, { "epoch": 1.201011851782452, "grad_norm": 0.585550344681444, "learning_rate": 1.8164970815102255e-06, "loss": 0.2631, "step": 25638 }, { "epoch": 1.2010586967723802, "grad_norm": 0.5727249302118349, "learning_rate": 1.8163146589795645e-06, "loss": 0.2749, "step": 25639 }, { "epoch": 1.2011055417623084, "grad_norm": 0.6035644239743501, "learning_rate": 1.8161322403833403e-06, "loss": 0.2772, "step": 25640 }, { "epoch": 1.201152386752237, "grad_norm": 0.6294087822755283, "learning_rate": 1.8159498257226032e-06, "loss": 0.2827, "step": 25641 }, { "epoch": 1.2011992317421651, "grad_norm": 0.5403212843485168, "learning_rate": 1.8157674149984039e-06, "loss": 0.2707, "step": 25642 }, { "epoch": 1.2012460767320934, "grad_norm": 0.6177864135535569, "learning_rate": 1.8155850082117907e-06, "loss": 0.2739, "step": 25643 }, { "epoch": 1.2012929217220218, "grad_norm": 0.5661192491754343, "learning_rate": 1.8154026053638132e-06, "loss": 0.266, "step": 25644 }, { "epoch": 1.20133976671195, "grad_norm": 0.5968950402377875, "learning_rate": 1.8152202064555216e-06, "loss": 0.2875, "step": 25645 }, { "epoch": 1.2013866117018785, "grad_norm": 0.5627295790640957, "learning_rate": 1.8150378114879664e-06, "loss": 0.2598, "step": 25646 }, { "epoch": 1.2014334566918068, "grad_norm": 0.5747319381580468, "learning_rate": 1.8148554204621954e-06, "loss": 0.2743, "step": 25647 }, { "epoch": 1.2014803016817353, "grad_norm": 0.5604241779297059, "learning_rate": 1.8146730333792606e-06, "loss": 0.2786, "step": 25648 }, { "epoch": 1.2015271466716635, "grad_norm": 0.610875631110188, "learning_rate": 1.8144906502402094e-06, "loss": 0.2955, "step": 25649 }, { "epoch": 1.2015739916615917, "grad_norm": 0.5722555305763495, "learning_rate": 1.8143082710460923e-06, "loss": 0.2667, "step": 25650 }, { "epoch": 1.2016208366515202, "grad_norm": 0.6622885532582332, "learning_rate": 1.8141258957979585e-06, "loss": 0.3036, "step": 25651 }, { "epoch": 1.2016676816414484, "grad_norm": 0.6393501351237293, "learning_rate": 1.813943524496858e-06, "loss": 0.3082, "step": 25652 }, { "epoch": 1.2017145266313767, "grad_norm": 0.5487427578688585, "learning_rate": 1.81376115714384e-06, "loss": 0.2691, "step": 25653 }, { "epoch": 1.2017613716213051, "grad_norm": 0.6052876716218267, "learning_rate": 1.8135787937399552e-06, "loss": 0.2845, "step": 25654 }, { "epoch": 1.2018082166112334, "grad_norm": 0.5850493105423534, "learning_rate": 1.8133964342862504e-06, "loss": 0.2629, "step": 25655 }, { "epoch": 1.2018550616011618, "grad_norm": 0.563931527075333, "learning_rate": 1.8132140787837766e-06, "loss": 0.2663, "step": 25656 }, { "epoch": 1.20190190659109, "grad_norm": 0.6002133882006038, "learning_rate": 1.813031727233584e-06, "loss": 0.2762, "step": 25657 }, { "epoch": 1.2019487515810183, "grad_norm": 0.5745767534713228, "learning_rate": 1.8128493796367203e-06, "loss": 0.2757, "step": 25658 }, { "epoch": 1.2019955965709468, "grad_norm": 0.5914266536659822, "learning_rate": 1.8126670359942358e-06, "loss": 0.2809, "step": 25659 }, { "epoch": 1.202042441560875, "grad_norm": 0.5864903662300679, "learning_rate": 1.812484696307181e-06, "loss": 0.2635, "step": 25660 }, { "epoch": 1.2020892865508035, "grad_norm": 0.580510842576406, "learning_rate": 1.8123023605766032e-06, "loss": 0.2825, "step": 25661 }, { "epoch": 1.2021361315407317, "grad_norm": 0.6076565069715241, "learning_rate": 1.8121200288035524e-06, "loss": 0.2588, "step": 25662 }, { "epoch": 1.20218297653066, "grad_norm": 0.6769453260471932, "learning_rate": 1.8119377009890773e-06, "loss": 0.3031, "step": 25663 }, { "epoch": 1.2022298215205884, "grad_norm": 0.596612576493927, "learning_rate": 1.8117553771342287e-06, "loss": 0.2936, "step": 25664 }, { "epoch": 1.2022766665105167, "grad_norm": 0.6141003316078238, "learning_rate": 1.8115730572400542e-06, "loss": 0.2832, "step": 25665 }, { "epoch": 1.202323511500445, "grad_norm": 0.6092054606966528, "learning_rate": 1.8113907413076048e-06, "loss": 0.283, "step": 25666 }, { "epoch": 1.2023703564903734, "grad_norm": 0.6246374693163881, "learning_rate": 1.811208429337928e-06, "loss": 0.2745, "step": 25667 }, { "epoch": 1.2024172014803016, "grad_norm": 0.5693429624525783, "learning_rate": 1.8110261213320734e-06, "loss": 0.2624, "step": 25668 }, { "epoch": 1.20246404647023, "grad_norm": 0.6007766416739381, "learning_rate": 1.8108438172910902e-06, "loss": 0.2722, "step": 25669 }, { "epoch": 1.2025108914601583, "grad_norm": 0.5800559692096556, "learning_rate": 1.8106615172160274e-06, "loss": 0.264, "step": 25670 }, { "epoch": 1.2025577364500866, "grad_norm": 0.5756899875700172, "learning_rate": 1.810479221107935e-06, "loss": 0.274, "step": 25671 }, { "epoch": 1.202604581440015, "grad_norm": 0.6045161068568421, "learning_rate": 1.8102969289678617e-06, "loss": 0.2776, "step": 25672 }, { "epoch": 1.2026514264299433, "grad_norm": 0.586874062330171, "learning_rate": 1.8101146407968556e-06, "loss": 0.272, "step": 25673 }, { "epoch": 1.2026982714198717, "grad_norm": 0.5915948906171465, "learning_rate": 1.809932356595966e-06, "loss": 0.2765, "step": 25674 }, { "epoch": 1.2027451164098, "grad_norm": 0.6012840585009123, "learning_rate": 1.8097500763662428e-06, "loss": 0.2873, "step": 25675 }, { "epoch": 1.2027919613997282, "grad_norm": 0.5828451324106646, "learning_rate": 1.8095678001087341e-06, "loss": 0.276, "step": 25676 }, { "epoch": 1.2028388063896567, "grad_norm": 0.6623276419578654, "learning_rate": 1.809385527824489e-06, "loss": 0.2936, "step": 25677 }, { "epoch": 1.202885651379585, "grad_norm": 0.6045915765218298, "learning_rate": 1.8092032595145575e-06, "loss": 0.2815, "step": 25678 }, { "epoch": 1.2029324963695132, "grad_norm": 0.6431996117109958, "learning_rate": 1.8090209951799875e-06, "loss": 0.3027, "step": 25679 }, { "epoch": 1.2029793413594416, "grad_norm": 0.5421743641122442, "learning_rate": 1.8088387348218272e-06, "loss": 0.2521, "step": 25680 }, { "epoch": 1.2030261863493699, "grad_norm": 0.6090378886788351, "learning_rate": 1.8086564784411265e-06, "loss": 0.2668, "step": 25681 }, { "epoch": 1.2030730313392983, "grad_norm": 0.5610017095992661, "learning_rate": 1.8084742260389343e-06, "loss": 0.2715, "step": 25682 }, { "epoch": 1.2031198763292266, "grad_norm": 0.6261091842913302, "learning_rate": 1.8082919776162986e-06, "loss": 0.2909, "step": 25683 }, { "epoch": 1.203166721319155, "grad_norm": 0.6010456739389162, "learning_rate": 1.8081097331742698e-06, "loss": 0.274, "step": 25684 }, { "epoch": 1.2032135663090833, "grad_norm": 0.5674459269804326, "learning_rate": 1.8079274927138945e-06, "loss": 0.2689, "step": 25685 }, { "epoch": 1.2032604112990115, "grad_norm": 0.5818317537887647, "learning_rate": 1.807745256236223e-06, "loss": 0.273, "step": 25686 }, { "epoch": 1.20330725628894, "grad_norm": 0.5746217848817217, "learning_rate": 1.8075630237423031e-06, "loss": 0.2728, "step": 25687 }, { "epoch": 1.2033541012788682, "grad_norm": 0.5717535620023771, "learning_rate": 1.8073807952331845e-06, "loss": 0.264, "step": 25688 }, { "epoch": 1.2034009462687965, "grad_norm": 0.5631466232213063, "learning_rate": 1.807198570709915e-06, "loss": 0.2635, "step": 25689 }, { "epoch": 1.203447791258725, "grad_norm": 0.6375966919227603, "learning_rate": 1.8070163501735444e-06, "loss": 0.3009, "step": 25690 }, { "epoch": 1.2034946362486532, "grad_norm": 0.6216088846362784, "learning_rate": 1.8068341336251197e-06, "loss": 0.2921, "step": 25691 }, { "epoch": 1.2035414812385816, "grad_norm": 0.604006798267647, "learning_rate": 1.8066519210656908e-06, "loss": 0.2614, "step": 25692 }, { "epoch": 1.2035883262285099, "grad_norm": 0.6307725434818451, "learning_rate": 1.8064697124963052e-06, "loss": 0.2768, "step": 25693 }, { "epoch": 1.203635171218438, "grad_norm": 0.6173540249447492, "learning_rate": 1.806287507918012e-06, "loss": 0.2689, "step": 25694 }, { "epoch": 1.2036820162083666, "grad_norm": 0.5708522912701491, "learning_rate": 1.8061053073318606e-06, "loss": 0.2588, "step": 25695 }, { "epoch": 1.2037288611982948, "grad_norm": 0.597299497690232, "learning_rate": 1.8059231107388992e-06, "loss": 0.278, "step": 25696 }, { "epoch": 1.2037757061882233, "grad_norm": 0.510951053360424, "learning_rate": 1.8057409181401748e-06, "loss": 0.2615, "step": 25697 }, { "epoch": 1.2038225511781515, "grad_norm": 0.5663453584998538, "learning_rate": 1.8055587295367366e-06, "loss": 0.251, "step": 25698 }, { "epoch": 1.2038693961680798, "grad_norm": 0.6340494086383347, "learning_rate": 1.8053765449296343e-06, "loss": 0.28, "step": 25699 }, { "epoch": 1.2039162411580082, "grad_norm": 0.5916496787963184, "learning_rate": 1.805194364319915e-06, "loss": 0.2862, "step": 25700 }, { "epoch": 1.2039630861479365, "grad_norm": 0.5938247600252397, "learning_rate": 1.8050121877086268e-06, "loss": 0.2757, "step": 25701 }, { "epoch": 1.2040099311378647, "grad_norm": 0.6082172378816806, "learning_rate": 1.8048300150968207e-06, "loss": 0.2793, "step": 25702 }, { "epoch": 1.2040567761277932, "grad_norm": 0.6012402406634745, "learning_rate": 1.8046478464855421e-06, "loss": 0.2825, "step": 25703 }, { "epoch": 1.2041036211177214, "grad_norm": 0.6491451588513634, "learning_rate": 1.8044656818758399e-06, "loss": 0.2798, "step": 25704 }, { "epoch": 1.2041504661076499, "grad_norm": 0.6241454640829518, "learning_rate": 1.8042835212687626e-06, "loss": 0.2785, "step": 25705 }, { "epoch": 1.204197311097578, "grad_norm": 0.548598161243461, "learning_rate": 1.80410136466536e-06, "loss": 0.2596, "step": 25706 }, { "epoch": 1.2042441560875063, "grad_norm": 0.6356457473122057, "learning_rate": 1.8039192120666782e-06, "loss": 0.2938, "step": 25707 }, { "epoch": 1.2042910010774348, "grad_norm": 0.5657697006010003, "learning_rate": 1.8037370634737661e-06, "loss": 0.281, "step": 25708 }, { "epoch": 1.204337846067363, "grad_norm": 0.6052412451347492, "learning_rate": 1.803554918887674e-06, "loss": 0.2865, "step": 25709 }, { "epoch": 1.2043846910572915, "grad_norm": 0.5875750560658483, "learning_rate": 1.803372778309447e-06, "loss": 0.2858, "step": 25710 }, { "epoch": 1.2044315360472198, "grad_norm": 0.6292496928192102, "learning_rate": 1.8031906417401346e-06, "loss": 0.2869, "step": 25711 }, { "epoch": 1.204478381037148, "grad_norm": 0.619410127387714, "learning_rate": 1.8030085091807848e-06, "loss": 0.2776, "step": 25712 }, { "epoch": 1.2045252260270765, "grad_norm": 0.572113762490747, "learning_rate": 1.802826380632446e-06, "loss": 0.2675, "step": 25713 }, { "epoch": 1.2045720710170047, "grad_norm": 0.5602081899085635, "learning_rate": 1.802644256096166e-06, "loss": 0.2681, "step": 25714 }, { "epoch": 1.204618916006933, "grad_norm": 0.5730729991830404, "learning_rate": 1.8024621355729937e-06, "loss": 0.2846, "step": 25715 }, { "epoch": 1.2046657609968614, "grad_norm": 0.5652398704312565, "learning_rate": 1.8022800190639761e-06, "loss": 0.2647, "step": 25716 }, { "epoch": 1.2047126059867896, "grad_norm": 0.5513106250070798, "learning_rate": 1.8020979065701617e-06, "loss": 0.2709, "step": 25717 }, { "epoch": 1.204759450976718, "grad_norm": 0.5900513583455216, "learning_rate": 1.8019157980925978e-06, "loss": 0.2649, "step": 25718 }, { "epoch": 1.2048062959666463, "grad_norm": 0.5445074008452774, "learning_rate": 1.8017336936323332e-06, "loss": 0.27, "step": 25719 }, { "epoch": 1.2048531409565748, "grad_norm": 0.5470600847593736, "learning_rate": 1.8015515931904165e-06, "loss": 0.2593, "step": 25720 }, { "epoch": 1.204899985946503, "grad_norm": 0.5659207786528313, "learning_rate": 1.8013694967678952e-06, "loss": 0.2708, "step": 25721 }, { "epoch": 1.2049468309364313, "grad_norm": 0.6349384513942017, "learning_rate": 1.8011874043658156e-06, "loss": 0.2979, "step": 25722 }, { "epoch": 1.2049936759263598, "grad_norm": 0.590495555733982, "learning_rate": 1.801005315985227e-06, "loss": 0.297, "step": 25723 }, { "epoch": 1.205040520916288, "grad_norm": 0.5852452510295938, "learning_rate": 1.8008232316271774e-06, "loss": 0.2695, "step": 25724 }, { "epoch": 1.2050873659062162, "grad_norm": 0.5613647436763877, "learning_rate": 1.800641151292714e-06, "loss": 0.2687, "step": 25725 }, { "epoch": 1.2051342108961447, "grad_norm": 0.6312486738002815, "learning_rate": 1.800459074982885e-06, "loss": 0.2907, "step": 25726 }, { "epoch": 1.205181055886073, "grad_norm": 0.5935143437075312, "learning_rate": 1.8002770026987392e-06, "loss": 0.2915, "step": 25727 }, { "epoch": 1.2052279008760014, "grad_norm": 0.5994073251004659, "learning_rate": 1.8000949344413232e-06, "loss": 0.2832, "step": 25728 }, { "epoch": 1.2052747458659296, "grad_norm": 0.581274823722642, "learning_rate": 1.7999128702116841e-06, "loss": 0.259, "step": 25729 }, { "epoch": 1.2053215908558579, "grad_norm": 0.5659677663239285, "learning_rate": 1.7997308100108705e-06, "loss": 0.2519, "step": 25730 }, { "epoch": 1.2053684358457863, "grad_norm": 0.591064210135153, "learning_rate": 1.7995487538399306e-06, "loss": 0.2796, "step": 25731 }, { "epoch": 1.2054152808357146, "grad_norm": 0.5868941403102211, "learning_rate": 1.799366701699911e-06, "loss": 0.2576, "step": 25732 }, { "epoch": 1.205462125825643, "grad_norm": 0.5575434004409884, "learning_rate": 1.799184653591861e-06, "loss": 0.2548, "step": 25733 }, { "epoch": 1.2055089708155713, "grad_norm": 0.5404265149029956, "learning_rate": 1.7990026095168261e-06, "loss": 0.264, "step": 25734 }, { "epoch": 1.2055558158054995, "grad_norm": 0.603459708207144, "learning_rate": 1.798820569475856e-06, "loss": 0.2766, "step": 25735 }, { "epoch": 1.205602660795428, "grad_norm": 0.6162469883128082, "learning_rate": 1.7986385334699963e-06, "loss": 0.2738, "step": 25736 }, { "epoch": 1.2056495057853562, "grad_norm": 0.534410854120158, "learning_rate": 1.7984565015002959e-06, "loss": 0.246, "step": 25737 }, { "epoch": 1.2056963507752845, "grad_norm": 0.6219691032689301, "learning_rate": 1.7982744735678018e-06, "loss": 0.3062, "step": 25738 }, { "epoch": 1.205743195765213, "grad_norm": 0.61261173510962, "learning_rate": 1.7980924496735627e-06, "loss": 0.2829, "step": 25739 }, { "epoch": 1.2057900407551412, "grad_norm": 0.5839333250038762, "learning_rate": 1.7979104298186239e-06, "loss": 0.2724, "step": 25740 }, { "epoch": 1.2058368857450696, "grad_norm": 0.6130844625645725, "learning_rate": 1.7977284140040352e-06, "loss": 0.2832, "step": 25741 }, { "epoch": 1.2058837307349979, "grad_norm": 0.5329703548002138, "learning_rate": 1.797546402230842e-06, "loss": 0.2447, "step": 25742 }, { "epoch": 1.2059305757249261, "grad_norm": 0.6240942105371035, "learning_rate": 1.7973643945000926e-06, "loss": 0.2779, "step": 25743 }, { "epoch": 1.2059774207148546, "grad_norm": 0.6136825858945336, "learning_rate": 1.7971823908128353e-06, "loss": 0.2959, "step": 25744 }, { "epoch": 1.2060242657047828, "grad_norm": 0.5887161147325782, "learning_rate": 1.7970003911701172e-06, "loss": 0.2659, "step": 25745 }, { "epoch": 1.2060711106947113, "grad_norm": 0.5864322748444932, "learning_rate": 1.796818395572984e-06, "loss": 0.2731, "step": 25746 }, { "epoch": 1.2061179556846395, "grad_norm": 0.620053515163103, "learning_rate": 1.7966364040224843e-06, "loss": 0.2602, "step": 25747 }, { "epoch": 1.2061648006745678, "grad_norm": 0.5973683515535637, "learning_rate": 1.7964544165196656e-06, "loss": 0.2903, "step": 25748 }, { "epoch": 1.2062116456644962, "grad_norm": 0.5825432625116395, "learning_rate": 1.7962724330655744e-06, "loss": 0.2945, "step": 25749 }, { "epoch": 1.2062584906544245, "grad_norm": 0.5369532989637404, "learning_rate": 1.796090453661259e-06, "loss": 0.2545, "step": 25750 }, { "epoch": 1.2063053356443527, "grad_norm": 0.594884444275706, "learning_rate": 1.7959084783077665e-06, "loss": 0.2684, "step": 25751 }, { "epoch": 1.2063521806342812, "grad_norm": 0.5604933185195303, "learning_rate": 1.7957265070061437e-06, "loss": 0.256, "step": 25752 }, { "epoch": 1.2063990256242094, "grad_norm": 0.6307205939164006, "learning_rate": 1.795544539757437e-06, "loss": 0.2926, "step": 25753 }, { "epoch": 1.2064458706141379, "grad_norm": 0.6171303405783335, "learning_rate": 1.7953625765626946e-06, "loss": 0.2766, "step": 25754 }, { "epoch": 1.2064927156040661, "grad_norm": 0.6020070803545768, "learning_rate": 1.7951806174229639e-06, "loss": 0.2759, "step": 25755 }, { "epoch": 1.2065395605939946, "grad_norm": 0.5732287937068083, "learning_rate": 1.7949986623392913e-06, "loss": 0.2669, "step": 25756 }, { "epoch": 1.2065864055839228, "grad_norm": 0.6068705096492646, "learning_rate": 1.7948167113127247e-06, "loss": 0.2818, "step": 25757 }, { "epoch": 1.206633250573851, "grad_norm": 0.5707231886194896, "learning_rate": 1.7946347643443103e-06, "loss": 0.274, "step": 25758 }, { "epoch": 1.2066800955637795, "grad_norm": 0.5669006699511511, "learning_rate": 1.7944528214350954e-06, "loss": 0.2756, "step": 25759 }, { "epoch": 1.2067269405537078, "grad_norm": 0.6436934586412983, "learning_rate": 1.794270882586127e-06, "loss": 0.274, "step": 25760 }, { "epoch": 1.206773785543636, "grad_norm": 0.6059545050629038, "learning_rate": 1.7940889477984524e-06, "loss": 0.2821, "step": 25761 }, { "epoch": 1.2068206305335645, "grad_norm": 0.5865757012835734, "learning_rate": 1.7939070170731187e-06, "loss": 0.2839, "step": 25762 }, { "epoch": 1.2068674755234927, "grad_norm": 0.5803258357903274, "learning_rate": 1.7937250904111735e-06, "loss": 0.2738, "step": 25763 }, { "epoch": 1.2069143205134212, "grad_norm": 0.5785434492649012, "learning_rate": 1.7935431678136616e-06, "loss": 0.2718, "step": 25764 }, { "epoch": 1.2069611655033494, "grad_norm": 0.6117673650701557, "learning_rate": 1.7933612492816317e-06, "loss": 0.2807, "step": 25765 }, { "epoch": 1.2070080104932777, "grad_norm": 0.597969095589059, "learning_rate": 1.7931793348161303e-06, "loss": 0.2776, "step": 25766 }, { "epoch": 1.2070548554832061, "grad_norm": 0.6175759123661769, "learning_rate": 1.7929974244182037e-06, "loss": 0.2743, "step": 25767 }, { "epoch": 1.2071017004731344, "grad_norm": 0.6153412863485799, "learning_rate": 1.792815518088899e-06, "loss": 0.2859, "step": 25768 }, { "epoch": 1.2071485454630628, "grad_norm": 0.5614400144297411, "learning_rate": 1.7926336158292646e-06, "loss": 0.265, "step": 25769 }, { "epoch": 1.207195390452991, "grad_norm": 0.6087649116963377, "learning_rate": 1.7924517176403455e-06, "loss": 0.3014, "step": 25770 }, { "epoch": 1.2072422354429193, "grad_norm": 0.5942267508965304, "learning_rate": 1.7922698235231884e-06, "loss": 0.2693, "step": 25771 }, { "epoch": 1.2072890804328478, "grad_norm": 0.5567178632792564, "learning_rate": 1.7920879334788401e-06, "loss": 0.2722, "step": 25772 }, { "epoch": 1.207335925422776, "grad_norm": 0.6261287616954856, "learning_rate": 1.791906047508349e-06, "loss": 0.2758, "step": 25773 }, { "epoch": 1.2073827704127043, "grad_norm": 0.5540907588131561, "learning_rate": 1.7917241656127595e-06, "loss": 0.2639, "step": 25774 }, { "epoch": 1.2074296154026327, "grad_norm": 0.5912183441760877, "learning_rate": 1.7915422877931205e-06, "loss": 0.2605, "step": 25775 }, { "epoch": 1.207476460392561, "grad_norm": 0.5648026589438379, "learning_rate": 1.791360414050477e-06, "loss": 0.271, "step": 25776 }, { "epoch": 1.2075233053824894, "grad_norm": 0.6317577176199457, "learning_rate": 1.7911785443858764e-06, "loss": 0.2861, "step": 25777 }, { "epoch": 1.2075701503724177, "grad_norm": 0.6234203762345194, "learning_rate": 1.790996678800365e-06, "loss": 0.2828, "step": 25778 }, { "epoch": 1.207616995362346, "grad_norm": 0.6080335557713731, "learning_rate": 1.7908148172949895e-06, "loss": 0.2865, "step": 25779 }, { "epoch": 1.2076638403522744, "grad_norm": 0.5919144853035847, "learning_rate": 1.7906329598707961e-06, "loss": 0.2653, "step": 25780 }, { "epoch": 1.2077106853422026, "grad_norm": 0.619511124199797, "learning_rate": 1.790451106528832e-06, "loss": 0.2702, "step": 25781 }, { "epoch": 1.207757530332131, "grad_norm": 0.5984933471246747, "learning_rate": 1.790269257270144e-06, "loss": 0.2762, "step": 25782 }, { "epoch": 1.2078043753220593, "grad_norm": 0.5887459256364338, "learning_rate": 1.7900874120957781e-06, "loss": 0.2695, "step": 25783 }, { "epoch": 1.2078512203119876, "grad_norm": 0.5737665405862142, "learning_rate": 1.7899055710067799e-06, "loss": 0.2507, "step": 25784 }, { "epoch": 1.207898065301916, "grad_norm": 0.5885330031355873, "learning_rate": 1.7897237340041967e-06, "loss": 0.2795, "step": 25785 }, { "epoch": 1.2079449102918443, "grad_norm": 0.5391768591809005, "learning_rate": 1.7895419010890753e-06, "loss": 0.2551, "step": 25786 }, { "epoch": 1.2079917552817725, "grad_norm": 0.5835530270504031, "learning_rate": 1.7893600722624612e-06, "loss": 0.2719, "step": 25787 }, { "epoch": 1.208038600271701, "grad_norm": 0.5820408677461775, "learning_rate": 1.7891782475254024e-06, "loss": 0.2915, "step": 25788 }, { "epoch": 1.2080854452616292, "grad_norm": 0.6226044348228177, "learning_rate": 1.7889964268789431e-06, "loss": 0.2694, "step": 25789 }, { "epoch": 1.2081322902515577, "grad_norm": 0.621193826522941, "learning_rate": 1.7888146103241311e-06, "loss": 0.2639, "step": 25790 }, { "epoch": 1.208179135241486, "grad_norm": 0.611371264468365, "learning_rate": 1.7886327978620116e-06, "loss": 0.2717, "step": 25791 }, { "epoch": 1.2082259802314144, "grad_norm": 0.6182353506469218, "learning_rate": 1.7884509894936318e-06, "loss": 0.2828, "step": 25792 }, { "epoch": 1.2082728252213426, "grad_norm": 0.6079894138993384, "learning_rate": 1.7882691852200384e-06, "loss": 0.2744, "step": 25793 }, { "epoch": 1.2083196702112708, "grad_norm": 0.5376892166848183, "learning_rate": 1.788087385042277e-06, "loss": 0.2513, "step": 25794 }, { "epoch": 1.2083665152011993, "grad_norm": 0.5761701748266732, "learning_rate": 1.787905588961393e-06, "loss": 0.2749, "step": 25795 }, { "epoch": 1.2084133601911276, "grad_norm": 0.5843670380577735, "learning_rate": 1.7877237969784334e-06, "loss": 0.2834, "step": 25796 }, { "epoch": 1.2084602051810558, "grad_norm": 0.6553328450067375, "learning_rate": 1.7875420090944449e-06, "loss": 0.2815, "step": 25797 }, { "epoch": 1.2085070501709843, "grad_norm": 0.6027461097667849, "learning_rate": 1.7873602253104722e-06, "loss": 0.2779, "step": 25798 }, { "epoch": 1.2085538951609125, "grad_norm": 0.5993488800984211, "learning_rate": 1.7871784456275625e-06, "loss": 0.271, "step": 25799 }, { "epoch": 1.208600740150841, "grad_norm": 0.563662836338163, "learning_rate": 1.7869966700467628e-06, "loss": 0.2662, "step": 25800 }, { "epoch": 1.2086475851407692, "grad_norm": 0.6276113874791002, "learning_rate": 1.7868148985691174e-06, "loss": 0.2721, "step": 25801 }, { "epoch": 1.2086944301306974, "grad_norm": 0.6181574473564809, "learning_rate": 1.7866331311956724e-06, "loss": 0.2774, "step": 25802 }, { "epoch": 1.208741275120626, "grad_norm": 0.6099291128948758, "learning_rate": 1.7864513679274747e-06, "loss": 0.2874, "step": 25803 }, { "epoch": 1.2087881201105541, "grad_norm": 0.5792767173149438, "learning_rate": 1.7862696087655706e-06, "loss": 0.2508, "step": 25804 }, { "epoch": 1.2088349651004826, "grad_norm": 0.5788856276204608, "learning_rate": 1.786087853711005e-06, "loss": 0.2933, "step": 25805 }, { "epoch": 1.2088818100904108, "grad_norm": 0.6209625403447653, "learning_rate": 1.785906102764825e-06, "loss": 0.2866, "step": 25806 }, { "epoch": 1.208928655080339, "grad_norm": 0.6134664175859279, "learning_rate": 1.7857243559280752e-06, "loss": 0.2824, "step": 25807 }, { "epoch": 1.2089755000702676, "grad_norm": 0.5895096955561124, "learning_rate": 1.7855426132018024e-06, "loss": 0.2879, "step": 25808 }, { "epoch": 1.2090223450601958, "grad_norm": 0.5548765767097038, "learning_rate": 1.7853608745870521e-06, "loss": 0.2638, "step": 25809 }, { "epoch": 1.209069190050124, "grad_norm": 0.6022068027467742, "learning_rate": 1.78517914008487e-06, "loss": 0.2624, "step": 25810 }, { "epoch": 1.2091160350400525, "grad_norm": 0.5528924589043439, "learning_rate": 1.7849974096963033e-06, "loss": 0.2674, "step": 25811 }, { "epoch": 1.2091628800299807, "grad_norm": 0.5713133296927067, "learning_rate": 1.784815683422397e-06, "loss": 0.2674, "step": 25812 }, { "epoch": 1.2092097250199092, "grad_norm": 0.6055124438175868, "learning_rate": 1.7846339612641956e-06, "loss": 0.2833, "step": 25813 }, { "epoch": 1.2092565700098374, "grad_norm": 0.5639774696530173, "learning_rate": 1.7844522432227463e-06, "loss": 0.2602, "step": 25814 }, { "epoch": 1.2093034149997657, "grad_norm": 0.552521850212075, "learning_rate": 1.7842705292990947e-06, "loss": 0.2491, "step": 25815 }, { "epoch": 1.2093502599896941, "grad_norm": 0.6083879992189982, "learning_rate": 1.7840888194942856e-06, "loss": 0.2824, "step": 25816 }, { "epoch": 1.2093971049796224, "grad_norm": 0.6073475451523991, "learning_rate": 1.7839071138093659e-06, "loss": 0.2732, "step": 25817 }, { "epoch": 1.2094439499695508, "grad_norm": 0.5947633010234872, "learning_rate": 1.7837254122453817e-06, "loss": 0.2733, "step": 25818 }, { "epoch": 1.209490794959479, "grad_norm": 0.6200297603880797, "learning_rate": 1.7835437148033768e-06, "loss": 0.2766, "step": 25819 }, { "epoch": 1.2095376399494073, "grad_norm": 0.6166839467349566, "learning_rate": 1.7833620214843977e-06, "loss": 0.2856, "step": 25820 }, { "epoch": 1.2095844849393358, "grad_norm": 0.6279986836001482, "learning_rate": 1.7831803322894898e-06, "loss": 0.2818, "step": 25821 }, { "epoch": 1.209631329929264, "grad_norm": 0.6219668143226532, "learning_rate": 1.7829986472196994e-06, "loss": 0.3009, "step": 25822 }, { "epoch": 1.2096781749191923, "grad_norm": 0.6438665967568549, "learning_rate": 1.7828169662760714e-06, "loss": 0.2983, "step": 25823 }, { "epoch": 1.2097250199091207, "grad_norm": 0.6560546262540878, "learning_rate": 1.782635289459652e-06, "loss": 0.2887, "step": 25824 }, { "epoch": 1.209771864899049, "grad_norm": 0.5717656683191495, "learning_rate": 1.7824536167714856e-06, "loss": 0.2712, "step": 25825 }, { "epoch": 1.2098187098889774, "grad_norm": 0.6038701430827542, "learning_rate": 1.7822719482126188e-06, "loss": 0.288, "step": 25826 }, { "epoch": 1.2098655548789057, "grad_norm": 0.5998776778385421, "learning_rate": 1.782090283784096e-06, "loss": 0.2651, "step": 25827 }, { "epoch": 1.2099123998688341, "grad_norm": 0.6128809305994799, "learning_rate": 1.7819086234869634e-06, "loss": 0.2696, "step": 25828 }, { "epoch": 1.2099592448587624, "grad_norm": 0.6122791149400314, "learning_rate": 1.7817269673222659e-06, "loss": 0.2662, "step": 25829 }, { "epoch": 1.2100060898486906, "grad_norm": 0.6088267084145951, "learning_rate": 1.78154531529105e-06, "loss": 0.2777, "step": 25830 }, { "epoch": 1.210052934838619, "grad_norm": 0.5699219904058717, "learning_rate": 1.7813636673943597e-06, "loss": 0.2688, "step": 25831 }, { "epoch": 1.2100997798285473, "grad_norm": 0.5905872074410109, "learning_rate": 1.7811820236332409e-06, "loss": 0.2714, "step": 25832 }, { "epoch": 1.2101466248184756, "grad_norm": 0.567533963039917, "learning_rate": 1.7810003840087387e-06, "loss": 0.268, "step": 25833 }, { "epoch": 1.210193469808404, "grad_norm": 0.5905017560730078, "learning_rate": 1.780818748521898e-06, "loss": 0.2793, "step": 25834 }, { "epoch": 1.2102403147983323, "grad_norm": 0.6201772223280628, "learning_rate": 1.7806371171737656e-06, "loss": 0.2925, "step": 25835 }, { "epoch": 1.2102871597882607, "grad_norm": 0.6264778078787534, "learning_rate": 1.7804554899653865e-06, "loss": 0.2912, "step": 25836 }, { "epoch": 1.210334004778189, "grad_norm": 0.5590090309507277, "learning_rate": 1.7802738668978037e-06, "loss": 0.2602, "step": 25837 }, { "epoch": 1.2103808497681172, "grad_norm": 0.73602677393323, "learning_rate": 1.7800922479720644e-06, "loss": 0.2782, "step": 25838 }, { "epoch": 1.2104276947580457, "grad_norm": 0.5755285718689144, "learning_rate": 1.7799106331892136e-06, "loss": 0.2789, "step": 25839 }, { "epoch": 1.210474539747974, "grad_norm": 0.5827403319375152, "learning_rate": 1.7797290225502952e-06, "loss": 0.2705, "step": 25840 }, { "epoch": 1.2105213847379024, "grad_norm": 0.5691603681287207, "learning_rate": 1.7795474160563558e-06, "loss": 0.2804, "step": 25841 }, { "epoch": 1.2105682297278306, "grad_norm": 0.5988562958297818, "learning_rate": 1.7793658137084408e-06, "loss": 0.2848, "step": 25842 }, { "epoch": 1.2106150747177589, "grad_norm": 0.5804739620189249, "learning_rate": 1.7791842155075937e-06, "loss": 0.276, "step": 25843 }, { "epoch": 1.2106619197076873, "grad_norm": 0.6038929613280208, "learning_rate": 1.77900262145486e-06, "loss": 0.2726, "step": 25844 }, { "epoch": 1.2107087646976156, "grad_norm": 0.5641933393498906, "learning_rate": 1.778821031551285e-06, "loss": 0.2544, "step": 25845 }, { "epoch": 1.2107556096875438, "grad_norm": 0.5584270811401845, "learning_rate": 1.778639445797914e-06, "loss": 0.2753, "step": 25846 }, { "epoch": 1.2108024546774723, "grad_norm": 0.6378312281684124, "learning_rate": 1.778457864195791e-06, "loss": 0.2919, "step": 25847 }, { "epoch": 1.2108492996674005, "grad_norm": 0.6187723131910289, "learning_rate": 1.778276286745962e-06, "loss": 0.2738, "step": 25848 }, { "epoch": 1.210896144657329, "grad_norm": 0.6082897745016821, "learning_rate": 1.7780947134494725e-06, "loss": 0.2617, "step": 25849 }, { "epoch": 1.2109429896472572, "grad_norm": 0.6175471837763356, "learning_rate": 1.777913144307366e-06, "loss": 0.2641, "step": 25850 }, { "epoch": 1.2109898346371855, "grad_norm": 0.6006573445131629, "learning_rate": 1.7777315793206873e-06, "loss": 0.2686, "step": 25851 }, { "epoch": 1.211036679627114, "grad_norm": 0.6049917651844695, "learning_rate": 1.777550018490482e-06, "loss": 0.2846, "step": 25852 }, { "epoch": 1.2110835246170422, "grad_norm": 0.5965278773055184, "learning_rate": 1.777368461817795e-06, "loss": 0.2743, "step": 25853 }, { "epoch": 1.2111303696069706, "grad_norm": 0.5497274116074271, "learning_rate": 1.7771869093036706e-06, "loss": 0.2572, "step": 25854 }, { "epoch": 1.2111772145968989, "grad_norm": 0.5879970444034377, "learning_rate": 1.7770053609491547e-06, "loss": 0.2594, "step": 25855 }, { "epoch": 1.211224059586827, "grad_norm": 0.6187169220385802, "learning_rate": 1.7768238167552905e-06, "loss": 0.2699, "step": 25856 }, { "epoch": 1.2112709045767556, "grad_norm": 0.590566327138651, "learning_rate": 1.7766422767231236e-06, "loss": 0.2609, "step": 25857 }, { "epoch": 1.2113177495666838, "grad_norm": 0.5294743521326073, "learning_rate": 1.7764607408536982e-06, "loss": 0.253, "step": 25858 }, { "epoch": 1.211364594556612, "grad_norm": 0.5543296211940621, "learning_rate": 1.7762792091480596e-06, "loss": 0.271, "step": 25859 }, { "epoch": 1.2114114395465405, "grad_norm": 0.5883238401201765, "learning_rate": 1.7760976816072525e-06, "loss": 0.2733, "step": 25860 }, { "epoch": 1.2114582845364688, "grad_norm": 0.6260953418891841, "learning_rate": 1.775916158232322e-06, "loss": 0.2922, "step": 25861 }, { "epoch": 1.2115051295263972, "grad_norm": 0.5315615352948557, "learning_rate": 1.7757346390243107e-06, "loss": 0.2439, "step": 25862 }, { "epoch": 1.2115519745163255, "grad_norm": 0.6031982164590243, "learning_rate": 1.7755531239842649e-06, "loss": 0.2843, "step": 25863 }, { "epoch": 1.211598819506254, "grad_norm": 0.574882082420888, "learning_rate": 1.775371613113229e-06, "loss": 0.274, "step": 25864 }, { "epoch": 1.2116456644961822, "grad_norm": 0.6158619670410024, "learning_rate": 1.7751901064122468e-06, "loss": 0.2844, "step": 25865 }, { "epoch": 1.2116925094861104, "grad_norm": 0.5679842128277472, "learning_rate": 1.7750086038823639e-06, "loss": 0.2777, "step": 25866 }, { "epoch": 1.2117393544760389, "grad_norm": 0.5974015117579645, "learning_rate": 1.7748271055246246e-06, "loss": 0.28, "step": 25867 }, { "epoch": 1.211786199465967, "grad_norm": 0.5541592368983371, "learning_rate": 1.7746456113400728e-06, "loss": 0.2696, "step": 25868 }, { "epoch": 1.2118330444558953, "grad_norm": 0.6340265327165705, "learning_rate": 1.7744641213297528e-06, "loss": 0.2725, "step": 25869 }, { "epoch": 1.2118798894458238, "grad_norm": 0.5766705581576913, "learning_rate": 1.7742826354947093e-06, "loss": 0.2649, "step": 25870 }, { "epoch": 1.211926734435752, "grad_norm": 0.6207075497742535, "learning_rate": 1.7741011538359876e-06, "loss": 0.2815, "step": 25871 }, { "epoch": 1.2119735794256805, "grad_norm": 0.624583643020588, "learning_rate": 1.7739196763546303e-06, "loss": 0.282, "step": 25872 }, { "epoch": 1.2120204244156088, "grad_norm": 0.6699218681052849, "learning_rate": 1.7737382030516843e-06, "loss": 0.2884, "step": 25873 }, { "epoch": 1.212067269405537, "grad_norm": 0.6197374190697817, "learning_rate": 1.7735567339281911e-06, "loss": 0.2832, "step": 25874 }, { "epoch": 1.2121141143954655, "grad_norm": 0.5671873708116759, "learning_rate": 1.7733752689851968e-06, "loss": 0.266, "step": 25875 }, { "epoch": 1.2121609593853937, "grad_norm": 0.5521375538960656, "learning_rate": 1.7731938082237448e-06, "loss": 0.2685, "step": 25876 }, { "epoch": 1.2122078043753222, "grad_norm": 0.5810634707833259, "learning_rate": 1.7730123516448799e-06, "loss": 0.2704, "step": 25877 }, { "epoch": 1.2122546493652504, "grad_norm": 0.5911663193973035, "learning_rate": 1.7728308992496462e-06, "loss": 0.2903, "step": 25878 }, { "epoch": 1.2123014943551786, "grad_norm": 0.5708835871869694, "learning_rate": 1.7726494510390888e-06, "loss": 0.2638, "step": 25879 }, { "epoch": 1.212348339345107, "grad_norm": 0.5804390100685813, "learning_rate": 1.7724680070142497e-06, "loss": 0.2689, "step": 25880 }, { "epoch": 1.2123951843350353, "grad_norm": 0.6638810091591248, "learning_rate": 1.7722865671761753e-06, "loss": 0.2845, "step": 25881 }, { "epoch": 1.2124420293249636, "grad_norm": 0.5570752485417411, "learning_rate": 1.772105131525908e-06, "loss": 0.2567, "step": 25882 }, { "epoch": 1.212488874314892, "grad_norm": 0.5603999481960111, "learning_rate": 1.7719237000644928e-06, "loss": 0.268, "step": 25883 }, { "epoch": 1.2125357193048203, "grad_norm": 0.610701186936275, "learning_rate": 1.7717422727929742e-06, "loss": 0.2879, "step": 25884 }, { "epoch": 1.2125825642947488, "grad_norm": 0.6129023600371073, "learning_rate": 1.7715608497123965e-06, "loss": 0.2587, "step": 25885 }, { "epoch": 1.212629409284677, "grad_norm": 0.5568767514105365, "learning_rate": 1.7713794308238016e-06, "loss": 0.2487, "step": 25886 }, { "epoch": 1.2126762542746052, "grad_norm": 0.5615652113028895, "learning_rate": 1.7711980161282352e-06, "loss": 0.2671, "step": 25887 }, { "epoch": 1.2127230992645337, "grad_norm": 0.5970126799111046, "learning_rate": 1.7710166056267413e-06, "loss": 0.2909, "step": 25888 }, { "epoch": 1.212769944254462, "grad_norm": 0.6143690190151667, "learning_rate": 1.7708351993203635e-06, "loss": 0.2856, "step": 25889 }, { "epoch": 1.2128167892443904, "grad_norm": 0.6438586142654898, "learning_rate": 1.7706537972101455e-06, "loss": 0.2801, "step": 25890 }, { "epoch": 1.2128636342343186, "grad_norm": 0.5963350066436783, "learning_rate": 1.770472399297133e-06, "loss": 0.2711, "step": 25891 }, { "epoch": 1.2129104792242469, "grad_norm": 0.5714507569073114, "learning_rate": 1.7702910055823677e-06, "loss": 0.2629, "step": 25892 }, { "epoch": 1.2129573242141753, "grad_norm": 0.585762704366766, "learning_rate": 1.7701096160668937e-06, "loss": 0.2529, "step": 25893 }, { "epoch": 1.2130041692041036, "grad_norm": 0.5713904226075605, "learning_rate": 1.7699282307517556e-06, "loss": 0.2744, "step": 25894 }, { "epoch": 1.2130510141940318, "grad_norm": 0.5859312711433073, "learning_rate": 1.7697468496379975e-06, "loss": 0.2802, "step": 25895 }, { "epoch": 1.2130978591839603, "grad_norm": 0.5662152274427963, "learning_rate": 1.769565472726662e-06, "loss": 0.2672, "step": 25896 }, { "epoch": 1.2131447041738885, "grad_norm": 0.6058102474653916, "learning_rate": 1.7693841000187951e-06, "loss": 0.278, "step": 25897 }, { "epoch": 1.213191549163817, "grad_norm": 0.5761226165710576, "learning_rate": 1.7692027315154375e-06, "loss": 0.258, "step": 25898 }, { "epoch": 1.2132383941537452, "grad_norm": 0.6176041322973709, "learning_rate": 1.7690213672176354e-06, "loss": 0.2998, "step": 25899 }, { "epoch": 1.2132852391436737, "grad_norm": 0.5850656495015557, "learning_rate": 1.7688400071264308e-06, "loss": 0.2795, "step": 25900 }, { "epoch": 1.213332084133602, "grad_norm": 0.6230393151251339, "learning_rate": 1.7686586512428682e-06, "loss": 0.2983, "step": 25901 }, { "epoch": 1.2133789291235302, "grad_norm": 0.616910931847539, "learning_rate": 1.7684772995679921e-06, "loss": 0.2825, "step": 25902 }, { "epoch": 1.2134257741134586, "grad_norm": 0.587179563701707, "learning_rate": 1.7682959521028457e-06, "loss": 0.2795, "step": 25903 }, { "epoch": 1.2134726191033869, "grad_norm": 0.6078505688827592, "learning_rate": 1.7681146088484711e-06, "loss": 0.2701, "step": 25904 }, { "epoch": 1.2135194640933151, "grad_norm": 0.5832345962693174, "learning_rate": 1.7679332698059126e-06, "loss": 0.268, "step": 25905 }, { "epoch": 1.2135663090832436, "grad_norm": 0.5668496785266779, "learning_rate": 1.7677519349762152e-06, "loss": 0.27, "step": 25906 }, { "epoch": 1.2136131540731718, "grad_norm": 0.6139096356183223, "learning_rate": 1.7675706043604207e-06, "loss": 0.2727, "step": 25907 }, { "epoch": 1.2136599990631003, "grad_norm": 0.6007988301445893, "learning_rate": 1.767389277959573e-06, "loss": 0.2707, "step": 25908 }, { "epoch": 1.2137068440530285, "grad_norm": 0.5572344030346361, "learning_rate": 1.7672079557747175e-06, "loss": 0.268, "step": 25909 }, { "epoch": 1.2137536890429568, "grad_norm": 0.6185440059767174, "learning_rate": 1.7670266378068952e-06, "loss": 0.2898, "step": 25910 }, { "epoch": 1.2138005340328852, "grad_norm": 0.5815764573218266, "learning_rate": 1.7668453240571496e-06, "loss": 0.2852, "step": 25911 }, { "epoch": 1.2138473790228135, "grad_norm": 0.5851212787086946, "learning_rate": 1.766664014526525e-06, "loss": 0.264, "step": 25912 }, { "epoch": 1.213894224012742, "grad_norm": 0.5544307554555072, "learning_rate": 1.7664827092160652e-06, "loss": 0.2791, "step": 25913 }, { "epoch": 1.2139410690026702, "grad_norm": 0.5717433983711663, "learning_rate": 1.7663014081268126e-06, "loss": 0.2746, "step": 25914 }, { "epoch": 1.2139879139925984, "grad_norm": 0.5512644432551081, "learning_rate": 1.7661201112598116e-06, "loss": 0.2626, "step": 25915 }, { "epoch": 1.2140347589825269, "grad_norm": 0.5676575815270078, "learning_rate": 1.7659388186161048e-06, "loss": 0.26, "step": 25916 }, { "epoch": 1.2140816039724551, "grad_norm": 0.5802621904399384, "learning_rate": 1.7657575301967357e-06, "loss": 0.279, "step": 25917 }, { "epoch": 1.2141284489623834, "grad_norm": 0.6163888205299884, "learning_rate": 1.765576246002747e-06, "loss": 0.2854, "step": 25918 }, { "epoch": 1.2141752939523118, "grad_norm": 0.6291197820958772, "learning_rate": 1.7653949660351826e-06, "loss": 0.2694, "step": 25919 }, { "epoch": 1.21422213894224, "grad_norm": 0.5455636281260386, "learning_rate": 1.7652136902950853e-06, "loss": 0.2567, "step": 25920 }, { "epoch": 1.2142689839321685, "grad_norm": 0.6211319372192632, "learning_rate": 1.7650324187834984e-06, "loss": 0.286, "step": 25921 }, { "epoch": 1.2143158289220968, "grad_norm": 0.5497244548011299, "learning_rate": 1.7648511515014663e-06, "loss": 0.2519, "step": 25922 }, { "epoch": 1.214362673912025, "grad_norm": 0.5511780367245778, "learning_rate": 1.7646698884500308e-06, "loss": 0.2637, "step": 25923 }, { "epoch": 1.2144095189019535, "grad_norm": 0.5320840314964593, "learning_rate": 1.7644886296302344e-06, "loss": 0.2426, "step": 25924 }, { "epoch": 1.2144563638918817, "grad_norm": 0.5532751658800785, "learning_rate": 1.7643073750431211e-06, "loss": 0.2688, "step": 25925 }, { "epoch": 1.2145032088818102, "grad_norm": 0.5956598927016842, "learning_rate": 1.7641261246897346e-06, "loss": 0.2619, "step": 25926 }, { "epoch": 1.2145500538717384, "grad_norm": 0.6184511638806218, "learning_rate": 1.7639448785711165e-06, "loss": 0.282, "step": 25927 }, { "epoch": 1.2145968988616667, "grad_norm": 0.6130468905983847, "learning_rate": 1.763763636688312e-06, "loss": 0.2756, "step": 25928 }, { "epoch": 1.2146437438515951, "grad_norm": 0.6430404116765804, "learning_rate": 1.7635823990423615e-06, "loss": 0.2956, "step": 25929 }, { "epoch": 1.2146905888415234, "grad_norm": 0.6187247172617898, "learning_rate": 1.7634011656343097e-06, "loss": 0.2881, "step": 25930 }, { "epoch": 1.2147374338314516, "grad_norm": 0.6011957617168767, "learning_rate": 1.7632199364651986e-06, "loss": 0.2801, "step": 25931 }, { "epoch": 1.21478427882138, "grad_norm": 0.5958050225662547, "learning_rate": 1.7630387115360715e-06, "loss": 0.2705, "step": 25932 }, { "epoch": 1.2148311238113083, "grad_norm": 0.5692726788678327, "learning_rate": 1.762857490847972e-06, "loss": 0.2743, "step": 25933 }, { "epoch": 1.2148779688012368, "grad_norm": 0.6256947720983513, "learning_rate": 1.762676274401943e-06, "loss": 0.2769, "step": 25934 }, { "epoch": 1.214924813791165, "grad_norm": 0.5765815404242461, "learning_rate": 1.7624950621990256e-06, "loss": 0.2629, "step": 25935 }, { "epoch": 1.2149716587810935, "grad_norm": 0.6091192488754289, "learning_rate": 1.7623138542402636e-06, "loss": 0.2818, "step": 25936 }, { "epoch": 1.2150185037710217, "grad_norm": 0.5953011750068982, "learning_rate": 1.7621326505267006e-06, "loss": 0.273, "step": 25937 }, { "epoch": 1.21506534876095, "grad_norm": 0.6076418558023745, "learning_rate": 1.7619514510593783e-06, "loss": 0.269, "step": 25938 }, { "epoch": 1.2151121937508784, "grad_norm": 0.5789011555001707, "learning_rate": 1.7617702558393396e-06, "loss": 0.2676, "step": 25939 }, { "epoch": 1.2151590387408067, "grad_norm": 0.575021113380517, "learning_rate": 1.761589064867629e-06, "loss": 0.2802, "step": 25940 }, { "epoch": 1.215205883730735, "grad_norm": 0.5498863802914772, "learning_rate": 1.761407878145287e-06, "loss": 0.2545, "step": 25941 }, { "epoch": 1.2152527287206634, "grad_norm": 0.5851337543069867, "learning_rate": 1.7612266956733564e-06, "loss": 0.2572, "step": 25942 }, { "epoch": 1.2152995737105916, "grad_norm": 0.644593738216295, "learning_rate": 1.7610455174528806e-06, "loss": 0.275, "step": 25943 }, { "epoch": 1.21534641870052, "grad_norm": 0.5982238816020119, "learning_rate": 1.7608643434849027e-06, "loss": 0.2654, "step": 25944 }, { "epoch": 1.2153932636904483, "grad_norm": 0.6006433691470121, "learning_rate": 1.7606831737704641e-06, "loss": 0.284, "step": 25945 }, { "epoch": 1.2154401086803766, "grad_norm": 0.6194927803831995, "learning_rate": 1.7605020083106089e-06, "loss": 0.2819, "step": 25946 }, { "epoch": 1.215486953670305, "grad_norm": 0.5844895967679464, "learning_rate": 1.7603208471063783e-06, "loss": 0.2643, "step": 25947 }, { "epoch": 1.2155337986602333, "grad_norm": 0.5735765100246029, "learning_rate": 1.7601396901588152e-06, "loss": 0.2682, "step": 25948 }, { "epoch": 1.2155806436501617, "grad_norm": 0.5703288328840617, "learning_rate": 1.759958537468962e-06, "loss": 0.2695, "step": 25949 }, { "epoch": 1.21562748864009, "grad_norm": 0.5651472261231173, "learning_rate": 1.7597773890378614e-06, "loss": 0.2593, "step": 25950 }, { "epoch": 1.2156743336300182, "grad_norm": 0.565052274376068, "learning_rate": 1.7595962448665562e-06, "loss": 0.259, "step": 25951 }, { "epoch": 1.2157211786199467, "grad_norm": 0.5634008018270054, "learning_rate": 1.7594151049560893e-06, "loss": 0.2629, "step": 25952 }, { "epoch": 1.215768023609875, "grad_norm": 0.6037200924317827, "learning_rate": 1.759233969307501e-06, "loss": 0.2801, "step": 25953 }, { "epoch": 1.2158148685998031, "grad_norm": 0.5989658491645146, "learning_rate": 1.7590528379218354e-06, "loss": 0.2656, "step": 25954 }, { "epoch": 1.2158617135897316, "grad_norm": 0.6177768261326061, "learning_rate": 1.758871710800135e-06, "loss": 0.2789, "step": 25955 }, { "epoch": 1.2159085585796598, "grad_norm": 0.6064918788194681, "learning_rate": 1.7586905879434408e-06, "loss": 0.2767, "step": 25956 }, { "epoch": 1.2159554035695883, "grad_norm": 0.5573564021070276, "learning_rate": 1.758509469352796e-06, "loss": 0.267, "step": 25957 }, { "epoch": 1.2160022485595166, "grad_norm": 0.5970306649159477, "learning_rate": 1.758328355029244e-06, "loss": 0.272, "step": 25958 }, { "epoch": 1.2160490935494448, "grad_norm": 0.591726478467628, "learning_rate": 1.7581472449738255e-06, "loss": 0.2865, "step": 25959 }, { "epoch": 1.2160959385393733, "grad_norm": 0.5738671833976277, "learning_rate": 1.7579661391875825e-06, "loss": 0.2752, "step": 25960 }, { "epoch": 1.2161427835293015, "grad_norm": 0.5834314642328904, "learning_rate": 1.7577850376715578e-06, "loss": 0.2687, "step": 25961 }, { "epoch": 1.21618962851923, "grad_norm": 0.5591124195102896, "learning_rate": 1.757603940426794e-06, "loss": 0.261, "step": 25962 }, { "epoch": 1.2162364735091582, "grad_norm": 0.5662052211474963, "learning_rate": 1.7574228474543329e-06, "loss": 0.2857, "step": 25963 }, { "epoch": 1.2162833184990864, "grad_norm": 0.6276032672357352, "learning_rate": 1.7572417587552173e-06, "loss": 0.282, "step": 25964 }, { "epoch": 1.216330163489015, "grad_norm": 0.5892331028419968, "learning_rate": 1.757060674330488e-06, "loss": 0.2585, "step": 25965 }, { "epoch": 1.2163770084789431, "grad_norm": 0.6028198025202631, "learning_rate": 1.7568795941811877e-06, "loss": 0.2817, "step": 25966 }, { "epoch": 1.2164238534688714, "grad_norm": 0.5698228030648436, "learning_rate": 1.7566985183083587e-06, "loss": 0.2763, "step": 25967 }, { "epoch": 1.2164706984587998, "grad_norm": 0.6634020135322548, "learning_rate": 1.756517446713043e-06, "loss": 0.3022, "step": 25968 }, { "epoch": 1.216517543448728, "grad_norm": 0.6095837830541768, "learning_rate": 1.7563363793962824e-06, "loss": 0.2759, "step": 25969 }, { "epoch": 1.2165643884386566, "grad_norm": 0.616156789938302, "learning_rate": 1.7561553163591199e-06, "loss": 0.2539, "step": 25970 }, { "epoch": 1.2166112334285848, "grad_norm": 0.6278179671839953, "learning_rate": 1.7559742576025954e-06, "loss": 0.2836, "step": 25971 }, { "epoch": 1.2166580784185133, "grad_norm": 0.5393216128483813, "learning_rate": 1.7557932031277525e-06, "loss": 0.2583, "step": 25972 }, { "epoch": 1.2167049234084415, "grad_norm": 0.6048215855358166, "learning_rate": 1.7556121529356324e-06, "loss": 0.2786, "step": 25973 }, { "epoch": 1.2167517683983697, "grad_norm": 0.6141746911548127, "learning_rate": 1.7554311070272772e-06, "loss": 0.2848, "step": 25974 }, { "epoch": 1.2167986133882982, "grad_norm": 0.6166218970464011, "learning_rate": 1.7552500654037293e-06, "loss": 0.277, "step": 25975 }, { "epoch": 1.2168454583782264, "grad_norm": 0.583167740758436, "learning_rate": 1.7550690280660307e-06, "loss": 0.2771, "step": 25976 }, { "epoch": 1.2168923033681547, "grad_norm": 0.5544613130987516, "learning_rate": 1.7548879950152215e-06, "loss": 0.2604, "step": 25977 }, { "epoch": 1.2169391483580831, "grad_norm": 0.5658701034959006, "learning_rate": 1.7547069662523446e-06, "loss": 0.253, "step": 25978 }, { "epoch": 1.2169859933480114, "grad_norm": 0.6032491004117287, "learning_rate": 1.7545259417784424e-06, "loss": 0.2744, "step": 25979 }, { "epoch": 1.2170328383379398, "grad_norm": 0.5988308771325724, "learning_rate": 1.7543449215945554e-06, "loss": 0.2731, "step": 25980 }, { "epoch": 1.217079683327868, "grad_norm": 0.6290633586767145, "learning_rate": 1.754163905701726e-06, "loss": 0.2769, "step": 25981 }, { "epoch": 1.2171265283177963, "grad_norm": 0.576157987323093, "learning_rate": 1.7539828941009962e-06, "loss": 0.2854, "step": 25982 }, { "epoch": 1.2171733733077248, "grad_norm": 0.6437751863564656, "learning_rate": 1.753801886793408e-06, "loss": 0.2904, "step": 25983 }, { "epoch": 1.217220218297653, "grad_norm": 0.5677207191477317, "learning_rate": 1.7536208837800018e-06, "loss": 0.2685, "step": 25984 }, { "epoch": 1.2172670632875815, "grad_norm": 0.6507503075812017, "learning_rate": 1.7534398850618192e-06, "loss": 0.3086, "step": 25985 }, { "epoch": 1.2173139082775097, "grad_norm": 0.6068254620374407, "learning_rate": 1.7532588906399035e-06, "loss": 0.2698, "step": 25986 }, { "epoch": 1.217360753267438, "grad_norm": 0.5825878408576443, "learning_rate": 1.7530779005152943e-06, "loss": 0.2674, "step": 25987 }, { "epoch": 1.2174075982573664, "grad_norm": 0.6269938133197055, "learning_rate": 1.752896914689034e-06, "loss": 0.2916, "step": 25988 }, { "epoch": 1.2174544432472947, "grad_norm": 0.6500784015271419, "learning_rate": 1.7527159331621652e-06, "loss": 0.2758, "step": 25989 }, { "epoch": 1.217501288237223, "grad_norm": 0.5831599006644594, "learning_rate": 1.752534955935728e-06, "loss": 0.2764, "step": 25990 }, { "epoch": 1.2175481332271514, "grad_norm": 0.6158237592900563, "learning_rate": 1.7523539830107639e-06, "loss": 0.2771, "step": 25991 }, { "epoch": 1.2175949782170796, "grad_norm": 0.6699438952401415, "learning_rate": 1.7521730143883143e-06, "loss": 0.283, "step": 25992 }, { "epoch": 1.217641823207008, "grad_norm": 0.5738852740840359, "learning_rate": 1.7519920500694218e-06, "loss": 0.2587, "step": 25993 }, { "epoch": 1.2176886681969363, "grad_norm": 0.5724045276549338, "learning_rate": 1.7518110900551267e-06, "loss": 0.2709, "step": 25994 }, { "epoch": 1.2177355131868646, "grad_norm": 0.5419650259750068, "learning_rate": 1.7516301343464713e-06, "loss": 0.2585, "step": 25995 }, { "epoch": 1.217782358176793, "grad_norm": 0.5865904818493409, "learning_rate": 1.7514491829444957e-06, "loss": 0.2612, "step": 25996 }, { "epoch": 1.2178292031667213, "grad_norm": 0.6097542776060367, "learning_rate": 1.7512682358502425e-06, "loss": 0.2766, "step": 25997 }, { "epoch": 1.2178760481566497, "grad_norm": 0.6028541318072985, "learning_rate": 1.7510872930647517e-06, "loss": 0.2781, "step": 25998 }, { "epoch": 1.217922893146578, "grad_norm": 0.5797134246002382, "learning_rate": 1.7509063545890653e-06, "loss": 0.2701, "step": 25999 }, { "epoch": 1.2179697381365062, "grad_norm": 0.6154876537137826, "learning_rate": 1.7507254204242251e-06, "loss": 0.2779, "step": 26000 }, { "epoch": 1.2180165831264347, "grad_norm": 0.6673873299669136, "learning_rate": 1.7505444905712723e-06, "loss": 0.2776, "step": 26001 }, { "epoch": 1.218063428116363, "grad_norm": 0.6543067940160323, "learning_rate": 1.7503635650312467e-06, "loss": 0.2946, "step": 26002 }, { "epoch": 1.2181102731062912, "grad_norm": 0.5693267499521442, "learning_rate": 1.75018264380519e-06, "loss": 0.2641, "step": 26003 }, { "epoch": 1.2181571180962196, "grad_norm": 0.5381830763246981, "learning_rate": 1.7500017268941446e-06, "loss": 0.2825, "step": 26004 }, { "epoch": 1.2182039630861479, "grad_norm": 0.5844535173503919, "learning_rate": 1.74982081429915e-06, "loss": 0.2654, "step": 26005 }, { "epoch": 1.2182508080760763, "grad_norm": 0.5922052457171687, "learning_rate": 1.7496399060212483e-06, "loss": 0.2825, "step": 26006 }, { "epoch": 1.2182976530660046, "grad_norm": 0.6274637078128609, "learning_rate": 1.7494590020614813e-06, "loss": 0.2848, "step": 26007 }, { "epoch": 1.218344498055933, "grad_norm": 0.615176220152075, "learning_rate": 1.7492781024208884e-06, "loss": 0.2897, "step": 26008 }, { "epoch": 1.2183913430458613, "grad_norm": 0.6058177447625297, "learning_rate": 1.749097207100511e-06, "loss": 0.278, "step": 26009 }, { "epoch": 1.2184381880357895, "grad_norm": 0.5574570537862085, "learning_rate": 1.7489163161013905e-06, "loss": 0.2683, "step": 26010 }, { "epoch": 1.218485033025718, "grad_norm": 0.5921455282753266, "learning_rate": 1.7487354294245685e-06, "loss": 0.2828, "step": 26011 }, { "epoch": 1.2185318780156462, "grad_norm": 0.5978578581907587, "learning_rate": 1.7485545470710841e-06, "loss": 0.2872, "step": 26012 }, { "epoch": 1.2185787230055745, "grad_norm": 0.5466909249036748, "learning_rate": 1.7483736690419812e-06, "loss": 0.2607, "step": 26013 }, { "epoch": 1.218625567995503, "grad_norm": 0.6135266468729494, "learning_rate": 1.7481927953382974e-06, "loss": 0.289, "step": 26014 }, { "epoch": 1.2186724129854312, "grad_norm": 0.6017342179876533, "learning_rate": 1.7480119259610755e-06, "loss": 0.2887, "step": 26015 }, { "epoch": 1.2187192579753596, "grad_norm": 0.6446447682461541, "learning_rate": 1.7478310609113558e-06, "loss": 0.2937, "step": 26016 }, { "epoch": 1.2187661029652879, "grad_norm": 0.6089351200659985, "learning_rate": 1.7476502001901799e-06, "loss": 0.2695, "step": 26017 }, { "epoch": 1.218812947955216, "grad_norm": 0.556787849667404, "learning_rate": 1.7474693437985874e-06, "loss": 0.2657, "step": 26018 }, { "epoch": 1.2188597929451446, "grad_norm": 0.5730532375167555, "learning_rate": 1.7472884917376205e-06, "loss": 0.2566, "step": 26019 }, { "epoch": 1.2189066379350728, "grad_norm": 0.5701409475707367, "learning_rate": 1.7471076440083182e-06, "loss": 0.2613, "step": 26020 }, { "epoch": 1.2189534829250013, "grad_norm": 0.6178851956768098, "learning_rate": 1.746926800611723e-06, "loss": 0.2924, "step": 26021 }, { "epoch": 1.2190003279149295, "grad_norm": 0.6255727503955724, "learning_rate": 1.7467459615488737e-06, "loss": 0.2654, "step": 26022 }, { "epoch": 1.2190471729048578, "grad_norm": 0.5732867800922107, "learning_rate": 1.7465651268208123e-06, "loss": 0.2679, "step": 26023 }, { "epoch": 1.2190940178947862, "grad_norm": 0.5684235062086865, "learning_rate": 1.74638429642858e-06, "loss": 0.2694, "step": 26024 }, { "epoch": 1.2191408628847145, "grad_norm": 0.5839267926901163, "learning_rate": 1.746203470373217e-06, "loss": 0.2675, "step": 26025 }, { "epoch": 1.2191877078746427, "grad_norm": 0.582492772156586, "learning_rate": 1.7460226486557624e-06, "loss": 0.2746, "step": 26026 }, { "epoch": 1.2192345528645712, "grad_norm": 0.5634047456926781, "learning_rate": 1.745841831277258e-06, "loss": 0.2681, "step": 26027 }, { "epoch": 1.2192813978544994, "grad_norm": 0.6319239627496914, "learning_rate": 1.745661018238745e-06, "loss": 0.2744, "step": 26028 }, { "epoch": 1.2193282428444279, "grad_norm": 0.5979102590978349, "learning_rate": 1.7454802095412627e-06, "loss": 0.2862, "step": 26029 }, { "epoch": 1.219375087834356, "grad_norm": 0.5964385309997723, "learning_rate": 1.7452994051858518e-06, "loss": 0.2663, "step": 26030 }, { "epoch": 1.2194219328242843, "grad_norm": 0.6068315941401972, "learning_rate": 1.7451186051735548e-06, "loss": 0.292, "step": 26031 }, { "epoch": 1.2194687778142128, "grad_norm": 0.6345206448580034, "learning_rate": 1.7449378095054092e-06, "loss": 0.3035, "step": 26032 }, { "epoch": 1.219515622804141, "grad_norm": 0.5864351090169142, "learning_rate": 1.744757018182457e-06, "loss": 0.2645, "step": 26033 }, { "epoch": 1.2195624677940695, "grad_norm": 0.6629359546090334, "learning_rate": 1.744576231205738e-06, "loss": 0.3097, "step": 26034 }, { "epoch": 1.2196093127839978, "grad_norm": 0.549960232788721, "learning_rate": 1.7443954485762932e-06, "loss": 0.2543, "step": 26035 }, { "epoch": 1.219656157773926, "grad_norm": 0.5772819162917726, "learning_rate": 1.7442146702951624e-06, "loss": 0.2664, "step": 26036 }, { "epoch": 1.2197030027638545, "grad_norm": 0.6035912057652106, "learning_rate": 1.7440338963633874e-06, "loss": 0.2751, "step": 26037 }, { "epoch": 1.2197498477537827, "grad_norm": 0.569148158021884, "learning_rate": 1.7438531267820058e-06, "loss": 0.2582, "step": 26038 }, { "epoch": 1.219796692743711, "grad_norm": 0.5397430884456861, "learning_rate": 1.7436723615520604e-06, "loss": 0.2484, "step": 26039 }, { "epoch": 1.2198435377336394, "grad_norm": 0.6231248396877014, "learning_rate": 1.7434916006745897e-06, "loss": 0.2773, "step": 26040 }, { "epoch": 1.2198903827235676, "grad_norm": 0.6038278490121461, "learning_rate": 1.7433108441506347e-06, "loss": 0.2716, "step": 26041 }, { "epoch": 1.219937227713496, "grad_norm": 0.5903385240192448, "learning_rate": 1.7431300919812363e-06, "loss": 0.2724, "step": 26042 }, { "epoch": 1.2199840727034243, "grad_norm": 0.6195180891957092, "learning_rate": 1.7429493441674344e-06, "loss": 0.2799, "step": 26043 }, { "epoch": 1.2200309176933528, "grad_norm": 0.6080376709876308, "learning_rate": 1.742768600710268e-06, "loss": 0.2756, "step": 26044 }, { "epoch": 1.220077762683281, "grad_norm": 0.6237354427432007, "learning_rate": 1.7425878616107774e-06, "loss": 0.2801, "step": 26045 }, { "epoch": 1.2201246076732093, "grad_norm": 0.5932828919597548, "learning_rate": 1.7424071268700043e-06, "loss": 0.2864, "step": 26046 }, { "epoch": 1.2201714526631378, "grad_norm": 0.6303306974204309, "learning_rate": 1.7422263964889869e-06, "loss": 0.2774, "step": 26047 }, { "epoch": 1.220218297653066, "grad_norm": 0.5267055145857392, "learning_rate": 1.7420456704687663e-06, "loss": 0.2735, "step": 26048 }, { "epoch": 1.2202651426429942, "grad_norm": 0.6018557161263128, "learning_rate": 1.7418649488103828e-06, "loss": 0.2732, "step": 26049 }, { "epoch": 1.2203119876329227, "grad_norm": 0.5719018832985965, "learning_rate": 1.7416842315148767e-06, "loss": 0.2632, "step": 26050 }, { "epoch": 1.220358832622851, "grad_norm": 0.5698012748925645, "learning_rate": 1.741503518583286e-06, "loss": 0.2666, "step": 26051 }, { "epoch": 1.2204056776127794, "grad_norm": 0.631099368876178, "learning_rate": 1.7413228100166517e-06, "loss": 0.2692, "step": 26052 }, { "epoch": 1.2204525226027076, "grad_norm": 0.5620591320195849, "learning_rate": 1.741142105816015e-06, "loss": 0.2776, "step": 26053 }, { "epoch": 1.2204993675926359, "grad_norm": 0.5807439745016115, "learning_rate": 1.740961405982414e-06, "loss": 0.2645, "step": 26054 }, { "epoch": 1.2205462125825643, "grad_norm": 0.6057032007100354, "learning_rate": 1.7407807105168898e-06, "loss": 0.269, "step": 26055 }, { "epoch": 1.2205930575724926, "grad_norm": 0.5992698855151988, "learning_rate": 1.7406000194204816e-06, "loss": 0.2536, "step": 26056 }, { "epoch": 1.220639902562421, "grad_norm": 0.5944876299841414, "learning_rate": 1.7404193326942298e-06, "loss": 0.2769, "step": 26057 }, { "epoch": 1.2206867475523493, "grad_norm": 0.5964831372647115, "learning_rate": 1.7402386503391733e-06, "loss": 0.2631, "step": 26058 }, { "epoch": 1.2207335925422775, "grad_norm": 0.6133166729500732, "learning_rate": 1.7400579723563526e-06, "loss": 0.2864, "step": 26059 }, { "epoch": 1.220780437532206, "grad_norm": 0.6321366221760154, "learning_rate": 1.7398772987468068e-06, "loss": 0.2975, "step": 26060 }, { "epoch": 1.2208272825221342, "grad_norm": 0.5319718665434336, "learning_rate": 1.7396966295115763e-06, "loss": 0.2504, "step": 26061 }, { "epoch": 1.2208741275120625, "grad_norm": 0.5813200278662313, "learning_rate": 1.7395159646517016e-06, "loss": 0.2753, "step": 26062 }, { "epoch": 1.220920972501991, "grad_norm": 0.6019158798445541, "learning_rate": 1.7393353041682207e-06, "loss": 0.2598, "step": 26063 }, { "epoch": 1.2209678174919192, "grad_norm": 0.5822167310688432, "learning_rate": 1.7391546480621735e-06, "loss": 0.2625, "step": 26064 }, { "epoch": 1.2210146624818476, "grad_norm": 0.5843904732927586, "learning_rate": 1.7389739963346004e-06, "loss": 0.2683, "step": 26065 }, { "epoch": 1.2210615074717759, "grad_norm": 0.6687805895326835, "learning_rate": 1.7387933489865405e-06, "loss": 0.313, "step": 26066 }, { "epoch": 1.2211083524617041, "grad_norm": 0.6109938812944189, "learning_rate": 1.7386127060190334e-06, "loss": 0.2835, "step": 26067 }, { "epoch": 1.2211551974516326, "grad_norm": 0.5754823697197483, "learning_rate": 1.73843206743312e-06, "loss": 0.2729, "step": 26068 }, { "epoch": 1.2212020424415608, "grad_norm": 0.576022857699867, "learning_rate": 1.7382514332298376e-06, "loss": 0.2677, "step": 26069 }, { "epoch": 1.2212488874314893, "grad_norm": 0.5911510512027424, "learning_rate": 1.738070803410227e-06, "loss": 0.2776, "step": 26070 }, { "epoch": 1.2212957324214175, "grad_norm": 0.547029835324453, "learning_rate": 1.737890177975327e-06, "loss": 0.2643, "step": 26071 }, { "epoch": 1.2213425774113458, "grad_norm": 0.5932480712878335, "learning_rate": 1.7377095569261776e-06, "loss": 0.2763, "step": 26072 }, { "epoch": 1.2213894224012742, "grad_norm": 0.594958151997035, "learning_rate": 1.7375289402638184e-06, "loss": 0.2717, "step": 26073 }, { "epoch": 1.2214362673912025, "grad_norm": 0.61394843383219, "learning_rate": 1.7373483279892895e-06, "loss": 0.2879, "step": 26074 }, { "epoch": 1.2214831123811307, "grad_norm": 0.606796812784005, "learning_rate": 1.7371677201036278e-06, "loss": 0.2706, "step": 26075 }, { "epoch": 1.2215299573710592, "grad_norm": 0.5632389774427602, "learning_rate": 1.7369871166078742e-06, "loss": 0.2634, "step": 26076 }, { "epoch": 1.2215768023609874, "grad_norm": 0.59373916226781, "learning_rate": 1.7368065175030684e-06, "loss": 0.2671, "step": 26077 }, { "epoch": 1.2216236473509159, "grad_norm": 0.5677944246009328, "learning_rate": 1.7366259227902487e-06, "loss": 0.2646, "step": 26078 }, { "epoch": 1.2216704923408441, "grad_norm": 0.5410197597201001, "learning_rate": 1.7364453324704556e-06, "loss": 0.2612, "step": 26079 }, { "epoch": 1.2217173373307726, "grad_norm": 0.5960062926691773, "learning_rate": 1.736264746544728e-06, "loss": 0.267, "step": 26080 }, { "epoch": 1.2217641823207008, "grad_norm": 0.6321962311403814, "learning_rate": 1.7360841650141046e-06, "loss": 0.2943, "step": 26081 }, { "epoch": 1.221811027310629, "grad_norm": 0.5930929399661916, "learning_rate": 1.7359035878796244e-06, "loss": 0.2764, "step": 26082 }, { "epoch": 1.2218578723005575, "grad_norm": 0.6182612701720286, "learning_rate": 1.7357230151423269e-06, "loss": 0.2678, "step": 26083 }, { "epoch": 1.2219047172904858, "grad_norm": 0.6513003353464949, "learning_rate": 1.7355424468032517e-06, "loss": 0.2829, "step": 26084 }, { "epoch": 1.221951562280414, "grad_norm": 0.5883931000068057, "learning_rate": 1.7353618828634372e-06, "loss": 0.2849, "step": 26085 }, { "epoch": 1.2219984072703425, "grad_norm": 0.5983888192064767, "learning_rate": 1.735181323323924e-06, "loss": 0.2506, "step": 26086 }, { "epoch": 1.2220452522602707, "grad_norm": 0.6137138790727394, "learning_rate": 1.735000768185749e-06, "loss": 0.2726, "step": 26087 }, { "epoch": 1.2220920972501992, "grad_norm": 0.6049112343493653, "learning_rate": 1.7348202174499529e-06, "loss": 0.2699, "step": 26088 }, { "epoch": 1.2221389422401274, "grad_norm": 0.5577445822466465, "learning_rate": 1.7346396711175734e-06, "loss": 0.266, "step": 26089 }, { "epoch": 1.2221857872300557, "grad_norm": 0.5928668832473202, "learning_rate": 1.7344591291896504e-06, "loss": 0.2708, "step": 26090 }, { "epoch": 1.2222326322199841, "grad_norm": 0.577856733219101, "learning_rate": 1.7342785916672231e-06, "loss": 0.2768, "step": 26091 }, { "epoch": 1.2222794772099124, "grad_norm": 0.5792311196410981, "learning_rate": 1.7340980585513306e-06, "loss": 0.2696, "step": 26092 }, { "epoch": 1.2223263221998408, "grad_norm": 0.5975954933496619, "learning_rate": 1.7339175298430102e-06, "loss": 0.2715, "step": 26093 }, { "epoch": 1.222373167189769, "grad_norm": 0.6149395956139196, "learning_rate": 1.733737005543302e-06, "loss": 0.2805, "step": 26094 }, { "epoch": 1.2224200121796973, "grad_norm": 0.5890894378994781, "learning_rate": 1.733556485653245e-06, "loss": 0.2815, "step": 26095 }, { "epoch": 1.2224668571696258, "grad_norm": 0.6266792422653861, "learning_rate": 1.7333759701738775e-06, "loss": 0.28, "step": 26096 }, { "epoch": 1.222513702159554, "grad_norm": 0.6286790955515754, "learning_rate": 1.7331954591062386e-06, "loss": 0.28, "step": 26097 }, { "epoch": 1.2225605471494823, "grad_norm": 0.6556546166940866, "learning_rate": 1.7330149524513682e-06, "loss": 0.2888, "step": 26098 }, { "epoch": 1.2226073921394107, "grad_norm": 0.6048219871406838, "learning_rate": 1.7328344502103034e-06, "loss": 0.2756, "step": 26099 }, { "epoch": 1.222654237129339, "grad_norm": 0.5953018863474261, "learning_rate": 1.732653952384083e-06, "loss": 0.2741, "step": 26100 }, { "epoch": 1.2227010821192674, "grad_norm": 0.6049052506599802, "learning_rate": 1.7324734589737457e-06, "loss": 0.2814, "step": 26101 }, { "epoch": 1.2227479271091957, "grad_norm": 0.6201632688827275, "learning_rate": 1.732292969980332e-06, "loss": 0.2924, "step": 26102 }, { "epoch": 1.222794772099124, "grad_norm": 0.5691235377866397, "learning_rate": 1.7321124854048786e-06, "loss": 0.2759, "step": 26103 }, { "epoch": 1.2228416170890524, "grad_norm": 0.5793533964876255, "learning_rate": 1.7319320052484257e-06, "loss": 0.2774, "step": 26104 }, { "epoch": 1.2228884620789806, "grad_norm": 0.6189364423250431, "learning_rate": 1.7317515295120102e-06, "loss": 0.2912, "step": 26105 }, { "epoch": 1.222935307068909, "grad_norm": 0.5443263479684236, "learning_rate": 1.7315710581966719e-06, "loss": 0.2695, "step": 26106 }, { "epoch": 1.2229821520588373, "grad_norm": 0.6168091251530761, "learning_rate": 1.7313905913034484e-06, "loss": 0.284, "step": 26107 }, { "epoch": 1.2230289970487656, "grad_norm": 0.5781369846007484, "learning_rate": 1.73121012883338e-06, "loss": 0.2753, "step": 26108 }, { "epoch": 1.223075842038694, "grad_norm": 0.5485476973904571, "learning_rate": 1.731029670787503e-06, "loss": 0.2764, "step": 26109 }, { "epoch": 1.2231226870286223, "grad_norm": 0.6753315376867879, "learning_rate": 1.730849217166858e-06, "loss": 0.2774, "step": 26110 }, { "epoch": 1.2231695320185505, "grad_norm": 0.6314783445103602, "learning_rate": 1.7306687679724816e-06, "loss": 0.2812, "step": 26111 }, { "epoch": 1.223216377008479, "grad_norm": 0.5965417703424409, "learning_rate": 1.7304883232054136e-06, "loss": 0.2741, "step": 26112 }, { "epoch": 1.2232632219984072, "grad_norm": 0.6139547858894436, "learning_rate": 1.7303078828666913e-06, "loss": 0.2892, "step": 26113 }, { "epoch": 1.2233100669883357, "grad_norm": 0.6723097641963008, "learning_rate": 1.7301274469573537e-06, "loss": 0.3018, "step": 26114 }, { "epoch": 1.223356911978264, "grad_norm": 0.6191072524031505, "learning_rate": 1.7299470154784397e-06, "loss": 0.2856, "step": 26115 }, { "epoch": 1.2234037569681924, "grad_norm": 0.6323140047536019, "learning_rate": 1.7297665884309867e-06, "loss": 0.2788, "step": 26116 }, { "epoch": 1.2234506019581206, "grad_norm": 0.6497856368118344, "learning_rate": 1.729586165816034e-06, "loss": 0.3009, "step": 26117 }, { "epoch": 1.2234974469480489, "grad_norm": 0.5936897024654773, "learning_rate": 1.7294057476346188e-06, "loss": 0.2779, "step": 26118 }, { "epoch": 1.2235442919379773, "grad_norm": 0.7091453188329289, "learning_rate": 1.7292253338877799e-06, "loss": 0.286, "step": 26119 }, { "epoch": 1.2235911369279056, "grad_norm": 0.5739043649903267, "learning_rate": 1.729044924576555e-06, "loss": 0.2686, "step": 26120 }, { "epoch": 1.2236379819178338, "grad_norm": 0.6130642335261937, "learning_rate": 1.728864519701983e-06, "loss": 0.2844, "step": 26121 }, { "epoch": 1.2236848269077623, "grad_norm": 0.5714051891948773, "learning_rate": 1.7286841192651022e-06, "loss": 0.2749, "step": 26122 }, { "epoch": 1.2237316718976905, "grad_norm": 0.5795720412999563, "learning_rate": 1.728503723266951e-06, "loss": 0.2605, "step": 26123 }, { "epoch": 1.223778516887619, "grad_norm": 0.5882551853685772, "learning_rate": 1.728323331708566e-06, "loss": 0.2769, "step": 26124 }, { "epoch": 1.2238253618775472, "grad_norm": 0.5625209514950622, "learning_rate": 1.7281429445909865e-06, "loss": 0.271, "step": 26125 }, { "epoch": 1.2238722068674754, "grad_norm": 0.6032725908215312, "learning_rate": 1.7279625619152505e-06, "loss": 0.274, "step": 26126 }, { "epoch": 1.223919051857404, "grad_norm": 0.5882190237205654, "learning_rate": 1.7277821836823954e-06, "loss": 0.268, "step": 26127 }, { "epoch": 1.2239658968473321, "grad_norm": 0.6176965698127627, "learning_rate": 1.72760180989346e-06, "loss": 0.2772, "step": 26128 }, { "epoch": 1.2240127418372606, "grad_norm": 0.6139977150572598, "learning_rate": 1.7274214405494826e-06, "loss": 0.2755, "step": 26129 }, { "epoch": 1.2240595868271889, "grad_norm": 0.6366898467067238, "learning_rate": 1.7272410756515007e-06, "loss": 0.2757, "step": 26130 }, { "epoch": 1.224106431817117, "grad_norm": 0.5792322468121037, "learning_rate": 1.7270607152005514e-06, "loss": 0.2792, "step": 26131 }, { "epoch": 1.2241532768070456, "grad_norm": 0.611144825600717, "learning_rate": 1.7268803591976735e-06, "loss": 0.2845, "step": 26132 }, { "epoch": 1.2242001217969738, "grad_norm": 0.6040165652335414, "learning_rate": 1.726700007643905e-06, "loss": 0.2734, "step": 26133 }, { "epoch": 1.224246966786902, "grad_norm": 0.5733743164978196, "learning_rate": 1.7265196605402834e-06, "loss": 0.2635, "step": 26134 }, { "epoch": 1.2242938117768305, "grad_norm": 0.5987068690612821, "learning_rate": 1.726339317887848e-06, "loss": 0.2795, "step": 26135 }, { "epoch": 1.2243406567667587, "grad_norm": 0.6268566253230896, "learning_rate": 1.7261589796876339e-06, "loss": 0.305, "step": 26136 }, { "epoch": 1.2243875017566872, "grad_norm": 0.6799384250577549, "learning_rate": 1.7259786459406807e-06, "loss": 0.2863, "step": 26137 }, { "epoch": 1.2244343467466154, "grad_norm": 0.6274844023961169, "learning_rate": 1.7257983166480258e-06, "loss": 0.276, "step": 26138 }, { "epoch": 1.2244811917365437, "grad_norm": 0.5799823265846459, "learning_rate": 1.7256179918107069e-06, "loss": 0.2709, "step": 26139 }, { "epoch": 1.2245280367264721, "grad_norm": 0.6260916514476941, "learning_rate": 1.7254376714297625e-06, "loss": 0.2961, "step": 26140 }, { "epoch": 1.2245748817164004, "grad_norm": 0.5962908009931028, "learning_rate": 1.72525735550623e-06, "loss": 0.275, "step": 26141 }, { "epoch": 1.2246217267063289, "grad_norm": 0.5904896212792329, "learning_rate": 1.7250770440411454e-06, "loss": 0.2561, "step": 26142 }, { "epoch": 1.224668571696257, "grad_norm": 0.5769562038491214, "learning_rate": 1.724896737035548e-06, "loss": 0.2878, "step": 26143 }, { "epoch": 1.2247154166861853, "grad_norm": 0.5447008412305546, "learning_rate": 1.7247164344904754e-06, "loss": 0.2559, "step": 26144 }, { "epoch": 1.2247622616761138, "grad_norm": 0.548747848365307, "learning_rate": 1.7245361364069644e-06, "loss": 0.2489, "step": 26145 }, { "epoch": 1.224809106666042, "grad_norm": 0.5932326781996865, "learning_rate": 1.724355842786053e-06, "loss": 0.2838, "step": 26146 }, { "epoch": 1.2248559516559703, "grad_norm": 0.6029765223334512, "learning_rate": 1.7241755536287795e-06, "loss": 0.2772, "step": 26147 }, { "epoch": 1.2249027966458987, "grad_norm": 0.5526298366086472, "learning_rate": 1.7239952689361807e-06, "loss": 0.2758, "step": 26148 }, { "epoch": 1.224949641635827, "grad_norm": 0.6208102424175297, "learning_rate": 1.7238149887092936e-06, "loss": 0.2803, "step": 26149 }, { "epoch": 1.2249964866257554, "grad_norm": 0.5533576544783478, "learning_rate": 1.723634712949156e-06, "loss": 0.2634, "step": 26150 }, { "epoch": 1.2250433316156837, "grad_norm": 0.5901455820938234, "learning_rate": 1.7234544416568058e-06, "loss": 0.2828, "step": 26151 }, { "epoch": 1.2250901766056121, "grad_norm": 0.5895242131425235, "learning_rate": 1.7232741748332799e-06, "loss": 0.2995, "step": 26152 }, { "epoch": 1.2251370215955404, "grad_norm": 0.6372475986456131, "learning_rate": 1.7230939124796165e-06, "loss": 0.2911, "step": 26153 }, { "epoch": 1.2251838665854686, "grad_norm": 0.5957367602777027, "learning_rate": 1.7229136545968516e-06, "loss": 0.2665, "step": 26154 }, { "epoch": 1.225230711575397, "grad_norm": 0.5448530550010983, "learning_rate": 1.7227334011860239e-06, "loss": 0.2636, "step": 26155 }, { "epoch": 1.2252775565653253, "grad_norm": 0.5988050926551179, "learning_rate": 1.7225531522481696e-06, "loss": 0.284, "step": 26156 }, { "epoch": 1.2253244015552536, "grad_norm": 0.5819564390507836, "learning_rate": 1.7223729077843268e-06, "loss": 0.268, "step": 26157 }, { "epoch": 1.225371246545182, "grad_norm": 0.5912750098782839, "learning_rate": 1.722192667795532e-06, "loss": 0.2752, "step": 26158 }, { "epoch": 1.2254180915351103, "grad_norm": 0.5777281270255112, "learning_rate": 1.722012432282824e-06, "loss": 0.2666, "step": 26159 }, { "epoch": 1.2254649365250387, "grad_norm": 0.6689772449103848, "learning_rate": 1.721832201247238e-06, "loss": 0.3112, "step": 26160 }, { "epoch": 1.225511781514967, "grad_norm": 0.600672092724866, "learning_rate": 1.7216519746898124e-06, "loss": 0.3055, "step": 26161 }, { "epoch": 1.2255586265048952, "grad_norm": 0.5916625395823376, "learning_rate": 1.7214717526115838e-06, "loss": 0.274, "step": 26162 }, { "epoch": 1.2256054714948237, "grad_norm": 0.5969124342024791, "learning_rate": 1.7212915350135894e-06, "loss": 0.2824, "step": 26163 }, { "epoch": 1.225652316484752, "grad_norm": 0.6058559061754609, "learning_rate": 1.7211113218968673e-06, "loss": 0.2866, "step": 26164 }, { "epoch": 1.2256991614746804, "grad_norm": 0.5892884806485108, "learning_rate": 1.720931113262454e-06, "loss": 0.2793, "step": 26165 }, { "epoch": 1.2257460064646086, "grad_norm": 0.5804886634211631, "learning_rate": 1.720750909111385e-06, "loss": 0.2771, "step": 26166 }, { "epoch": 1.2257928514545369, "grad_norm": 0.5745565267574974, "learning_rate": 1.7205707094446991e-06, "loss": 0.2739, "step": 26167 }, { "epoch": 1.2258396964444653, "grad_norm": 0.5876748754387379, "learning_rate": 1.7203905142634334e-06, "loss": 0.2642, "step": 26168 }, { "epoch": 1.2258865414343936, "grad_norm": 0.6067512236806797, "learning_rate": 1.7202103235686235e-06, "loss": 0.2693, "step": 26169 }, { "epoch": 1.2259333864243218, "grad_norm": 0.5883646837972769, "learning_rate": 1.7200301373613076e-06, "loss": 0.2628, "step": 26170 }, { "epoch": 1.2259802314142503, "grad_norm": 0.591403508141441, "learning_rate": 1.719849955642523e-06, "loss": 0.2827, "step": 26171 }, { "epoch": 1.2260270764041785, "grad_norm": 0.632004130091583, "learning_rate": 1.7196697784133054e-06, "loss": 0.2728, "step": 26172 }, { "epoch": 1.226073921394107, "grad_norm": 0.6262546639863673, "learning_rate": 1.7194896056746915e-06, "loss": 0.2753, "step": 26173 }, { "epoch": 1.2261207663840352, "grad_norm": 0.600339230990445, "learning_rate": 1.7193094374277186e-06, "loss": 0.2824, "step": 26174 }, { "epoch": 1.2261676113739635, "grad_norm": 0.60022987426717, "learning_rate": 1.7191292736734244e-06, "loss": 0.2666, "step": 26175 }, { "epoch": 1.226214456363892, "grad_norm": 0.5852205333626828, "learning_rate": 1.7189491144128445e-06, "loss": 0.2768, "step": 26176 }, { "epoch": 1.2262613013538202, "grad_norm": 0.5939352222862839, "learning_rate": 1.7187689596470172e-06, "loss": 0.2722, "step": 26177 }, { "epoch": 1.2263081463437486, "grad_norm": 0.6351773270557831, "learning_rate": 1.718588809376977e-06, "loss": 0.2864, "step": 26178 }, { "epoch": 1.2263549913336769, "grad_norm": 0.5623446128634543, "learning_rate": 1.7184086636037622e-06, "loss": 0.2779, "step": 26179 }, { "epoch": 1.226401836323605, "grad_norm": 0.5942636475650195, "learning_rate": 1.718228522328409e-06, "loss": 0.2578, "step": 26180 }, { "epoch": 1.2264486813135336, "grad_norm": 0.6033318004301892, "learning_rate": 1.718048385551954e-06, "loss": 0.2659, "step": 26181 }, { "epoch": 1.2264955263034618, "grad_norm": 0.5276937799567383, "learning_rate": 1.7178682532754342e-06, "loss": 0.2448, "step": 26182 }, { "epoch": 1.22654237129339, "grad_norm": 0.6173093071489364, "learning_rate": 1.7176881254998868e-06, "loss": 0.2842, "step": 26183 }, { "epoch": 1.2265892162833185, "grad_norm": 0.6329832283591216, "learning_rate": 1.7175080022263466e-06, "loss": 0.2822, "step": 26184 }, { "epoch": 1.2266360612732468, "grad_norm": 0.5982476166420899, "learning_rate": 1.717327883455851e-06, "loss": 0.2755, "step": 26185 }, { "epoch": 1.2266829062631752, "grad_norm": 0.5852746712673212, "learning_rate": 1.7171477691894372e-06, "loss": 0.264, "step": 26186 }, { "epoch": 1.2267297512531035, "grad_norm": 0.5357296139139945, "learning_rate": 1.716967659428141e-06, "loss": 0.2685, "step": 26187 }, { "epoch": 1.226776596243032, "grad_norm": 0.560728627115505, "learning_rate": 1.7167875541729988e-06, "loss": 0.2645, "step": 26188 }, { "epoch": 1.2268234412329602, "grad_norm": 0.6258987450086464, "learning_rate": 1.7166074534250482e-06, "loss": 0.2728, "step": 26189 }, { "epoch": 1.2268702862228884, "grad_norm": 0.578128837709396, "learning_rate": 1.7164273571853251e-06, "loss": 0.2664, "step": 26190 }, { "epoch": 1.2269171312128169, "grad_norm": 0.5819740534570652, "learning_rate": 1.7162472654548652e-06, "loss": 0.2697, "step": 26191 }, { "epoch": 1.226963976202745, "grad_norm": 0.6049804592667519, "learning_rate": 1.7160671782347043e-06, "loss": 0.2874, "step": 26192 }, { "epoch": 1.2270108211926734, "grad_norm": 0.6284015057590724, "learning_rate": 1.715887095525881e-06, "loss": 0.294, "step": 26193 }, { "epoch": 1.2270576661826018, "grad_norm": 0.6211948012111409, "learning_rate": 1.7157070173294294e-06, "loss": 0.278, "step": 26194 }, { "epoch": 1.22710451117253, "grad_norm": 0.5913734535572684, "learning_rate": 1.7155269436463875e-06, "loss": 0.2744, "step": 26195 }, { "epoch": 1.2271513561624585, "grad_norm": 0.6206115152002147, "learning_rate": 1.715346874477791e-06, "loss": 0.3031, "step": 26196 }, { "epoch": 1.2271982011523868, "grad_norm": 0.5517149121242922, "learning_rate": 1.715166809824676e-06, "loss": 0.2647, "step": 26197 }, { "epoch": 1.227245046142315, "grad_norm": 0.5726650608808271, "learning_rate": 1.7149867496880779e-06, "loss": 0.2703, "step": 26198 }, { "epoch": 1.2272918911322435, "grad_norm": 0.5802715087922766, "learning_rate": 1.7148066940690345e-06, "loss": 0.2839, "step": 26199 }, { "epoch": 1.2273387361221717, "grad_norm": 0.6100982713355068, "learning_rate": 1.7146266429685808e-06, "loss": 0.282, "step": 26200 }, { "epoch": 1.2273855811121002, "grad_norm": 0.5698896015795866, "learning_rate": 1.7144465963877531e-06, "loss": 0.2751, "step": 26201 }, { "epoch": 1.2274324261020284, "grad_norm": 0.6038541215637606, "learning_rate": 1.7142665543275894e-06, "loss": 0.2627, "step": 26202 }, { "epoch": 1.2274792710919566, "grad_norm": 0.6002913702889735, "learning_rate": 1.714086516789123e-06, "loss": 0.2732, "step": 26203 }, { "epoch": 1.227526116081885, "grad_norm": 0.6037673630269516, "learning_rate": 1.713906483773391e-06, "loss": 0.2797, "step": 26204 }, { "epoch": 1.2275729610718134, "grad_norm": 0.6133356707288397, "learning_rate": 1.7137264552814298e-06, "loss": 0.275, "step": 26205 }, { "epoch": 1.2276198060617416, "grad_norm": 0.5861167657977768, "learning_rate": 1.7135464313142752e-06, "loss": 0.2748, "step": 26206 }, { "epoch": 1.22766665105167, "grad_norm": 0.6148550778323537, "learning_rate": 1.7133664118729632e-06, "loss": 0.2895, "step": 26207 }, { "epoch": 1.2277134960415983, "grad_norm": 0.5654472054934745, "learning_rate": 1.7131863969585306e-06, "loss": 0.2803, "step": 26208 }, { "epoch": 1.2277603410315268, "grad_norm": 0.6031573137804462, "learning_rate": 1.7130063865720115e-06, "loss": 0.2726, "step": 26209 }, { "epoch": 1.227807186021455, "grad_norm": 0.6070382977514817, "learning_rate": 1.7128263807144432e-06, "loss": 0.2894, "step": 26210 }, { "epoch": 1.2278540310113832, "grad_norm": 0.6748236826629964, "learning_rate": 1.7126463793868608e-06, "loss": 0.2819, "step": 26211 }, { "epoch": 1.2279008760013117, "grad_norm": 0.5444041607279878, "learning_rate": 1.712466382590301e-06, "loss": 0.2523, "step": 26212 }, { "epoch": 1.22794772099124, "grad_norm": 0.6102629876671263, "learning_rate": 1.7122863903257992e-06, "loss": 0.2608, "step": 26213 }, { "epoch": 1.2279945659811684, "grad_norm": 0.6062037280830235, "learning_rate": 1.712106402594392e-06, "loss": 0.2648, "step": 26214 }, { "epoch": 1.2280414109710966, "grad_norm": 0.5567007996052048, "learning_rate": 1.7119264193971135e-06, "loss": 0.2659, "step": 26215 }, { "epoch": 1.2280882559610249, "grad_norm": 0.6000937269460009, "learning_rate": 1.711746440735e-06, "loss": 0.284, "step": 26216 }, { "epoch": 1.2281351009509534, "grad_norm": 0.5801472645938522, "learning_rate": 1.7115664666090886e-06, "loss": 0.2529, "step": 26217 }, { "epoch": 1.2281819459408816, "grad_norm": 0.5836594856872072, "learning_rate": 1.711386497020413e-06, "loss": 0.2798, "step": 26218 }, { "epoch": 1.2282287909308098, "grad_norm": 0.5977714851953699, "learning_rate": 1.7112065319700106e-06, "loss": 0.2864, "step": 26219 }, { "epoch": 1.2282756359207383, "grad_norm": 0.5902689591500705, "learning_rate": 1.7110265714589169e-06, "loss": 0.2957, "step": 26220 }, { "epoch": 1.2283224809106665, "grad_norm": 0.5699548147429416, "learning_rate": 1.7108466154881665e-06, "loss": 0.2736, "step": 26221 }, { "epoch": 1.228369325900595, "grad_norm": 0.6073823769349705, "learning_rate": 1.7106666640587949e-06, "loss": 0.2614, "step": 26222 }, { "epoch": 1.2284161708905232, "grad_norm": 0.5871370235585592, "learning_rate": 1.7104867171718382e-06, "loss": 0.2724, "step": 26223 }, { "epoch": 1.2284630158804517, "grad_norm": 0.5805235390181216, "learning_rate": 1.7103067748283328e-06, "loss": 0.2752, "step": 26224 }, { "epoch": 1.22850986087038, "grad_norm": 0.593923310783502, "learning_rate": 1.7101268370293128e-06, "loss": 0.2846, "step": 26225 }, { "epoch": 1.2285567058603082, "grad_norm": 0.5921380558113682, "learning_rate": 1.7099469037758154e-06, "loss": 0.2765, "step": 26226 }, { "epoch": 1.2286035508502366, "grad_norm": 0.5697923238952484, "learning_rate": 1.7097669750688738e-06, "loss": 0.2538, "step": 26227 }, { "epoch": 1.2286503958401649, "grad_norm": 0.5755965047201946, "learning_rate": 1.7095870509095253e-06, "loss": 0.2741, "step": 26228 }, { "epoch": 1.2286972408300931, "grad_norm": 0.613232078724169, "learning_rate": 1.7094071312988042e-06, "loss": 0.2847, "step": 26229 }, { "epoch": 1.2287440858200216, "grad_norm": 0.6123906409650559, "learning_rate": 1.709227216237746e-06, "loss": 0.2653, "step": 26230 }, { "epoch": 1.2287909308099498, "grad_norm": 0.5999310119894861, "learning_rate": 1.7090473057273875e-06, "loss": 0.2636, "step": 26231 }, { "epoch": 1.2288377757998783, "grad_norm": 0.5931065428586266, "learning_rate": 1.708867399768763e-06, "loss": 0.2801, "step": 26232 }, { "epoch": 1.2288846207898065, "grad_norm": 0.6530782319188777, "learning_rate": 1.708687498362907e-06, "loss": 0.2924, "step": 26233 }, { "epoch": 1.2289314657797348, "grad_norm": 0.6042045047097792, "learning_rate": 1.7085076015108553e-06, "loss": 0.2705, "step": 26234 }, { "epoch": 1.2289783107696632, "grad_norm": 0.5930471367732443, "learning_rate": 1.7083277092136442e-06, "loss": 0.2797, "step": 26235 }, { "epoch": 1.2290251557595915, "grad_norm": 0.6190799417296755, "learning_rate": 1.7081478214723074e-06, "loss": 0.2506, "step": 26236 }, { "epoch": 1.22907200074952, "grad_norm": 0.5732682728581014, "learning_rate": 1.707967938287881e-06, "loss": 0.2699, "step": 26237 }, { "epoch": 1.2291188457394482, "grad_norm": 0.5765405166646584, "learning_rate": 1.707788059661401e-06, "loss": 0.2558, "step": 26238 }, { "epoch": 1.2291656907293764, "grad_norm": 0.5701800836355051, "learning_rate": 1.7076081855939015e-06, "loss": 0.2641, "step": 26239 }, { "epoch": 1.2292125357193049, "grad_norm": 0.5789319170109003, "learning_rate": 1.7074283160864166e-06, "loss": 0.2668, "step": 26240 }, { "epoch": 1.2292593807092331, "grad_norm": 0.6042319474088061, "learning_rate": 1.7072484511399828e-06, "loss": 0.2737, "step": 26241 }, { "epoch": 1.2293062256991614, "grad_norm": 0.6090759107041342, "learning_rate": 1.7070685907556357e-06, "loss": 0.2747, "step": 26242 }, { "epoch": 1.2293530706890898, "grad_norm": 0.6089794966451023, "learning_rate": 1.7068887349344088e-06, "loss": 0.2734, "step": 26243 }, { "epoch": 1.229399915679018, "grad_norm": 0.5970997227235268, "learning_rate": 1.7067088836773388e-06, "loss": 0.273, "step": 26244 }, { "epoch": 1.2294467606689465, "grad_norm": 0.569151024082196, "learning_rate": 1.7065290369854586e-06, "loss": 0.2688, "step": 26245 }, { "epoch": 1.2294936056588748, "grad_norm": 0.603372093855421, "learning_rate": 1.7063491948598054e-06, "loss": 0.2584, "step": 26246 }, { "epoch": 1.229540450648803, "grad_norm": 0.5750989238111046, "learning_rate": 1.706169357301412e-06, "loss": 0.2608, "step": 26247 }, { "epoch": 1.2295872956387315, "grad_norm": 0.6394664086311198, "learning_rate": 1.7059895243113157e-06, "loss": 0.2984, "step": 26248 }, { "epoch": 1.2296341406286597, "grad_norm": 0.5959252567837734, "learning_rate": 1.705809695890549e-06, "loss": 0.2858, "step": 26249 }, { "epoch": 1.2296809856185882, "grad_norm": 0.6247225449455128, "learning_rate": 1.7056298720401493e-06, "loss": 0.261, "step": 26250 }, { "epoch": 1.2297278306085164, "grad_norm": 0.6137173296436428, "learning_rate": 1.7054500527611486e-06, "loss": 0.2724, "step": 26251 }, { "epoch": 1.2297746755984447, "grad_norm": 0.6394451094022645, "learning_rate": 1.705270238054584e-06, "loss": 0.3031, "step": 26252 }, { "epoch": 1.2298215205883731, "grad_norm": 0.5732615653306314, "learning_rate": 1.7050904279214885e-06, "loss": 0.2609, "step": 26253 }, { "epoch": 1.2298683655783014, "grad_norm": 0.5843958320568575, "learning_rate": 1.704910622362898e-06, "loss": 0.2695, "step": 26254 }, { "epoch": 1.2299152105682296, "grad_norm": 0.6378192607932984, "learning_rate": 1.704730821379848e-06, "loss": 0.2824, "step": 26255 }, { "epoch": 1.229962055558158, "grad_norm": 0.6234756897431504, "learning_rate": 1.704551024973371e-06, "loss": 0.2764, "step": 26256 }, { "epoch": 1.2300089005480863, "grad_norm": 0.5863693331008871, "learning_rate": 1.7043712331445045e-06, "loss": 0.28, "step": 26257 }, { "epoch": 1.2300557455380148, "grad_norm": 0.5818151720156172, "learning_rate": 1.7041914458942804e-06, "loss": 0.2854, "step": 26258 }, { "epoch": 1.230102590527943, "grad_norm": 0.6048948213280173, "learning_rate": 1.704011663223735e-06, "loss": 0.2782, "step": 26259 }, { "epoch": 1.2301494355178715, "grad_norm": 0.6100201416470767, "learning_rate": 1.703831885133902e-06, "loss": 0.2755, "step": 26260 }, { "epoch": 1.2301962805077997, "grad_norm": 0.5992825028340134, "learning_rate": 1.7036521116258158e-06, "loss": 0.2653, "step": 26261 }, { "epoch": 1.230243125497728, "grad_norm": 0.6078266588251502, "learning_rate": 1.7034723427005129e-06, "loss": 0.2791, "step": 26262 }, { "epoch": 1.2302899704876564, "grad_norm": 0.5778733302162792, "learning_rate": 1.7032925783590266e-06, "loss": 0.2707, "step": 26263 }, { "epoch": 1.2303368154775847, "grad_norm": 0.6320782818923385, "learning_rate": 1.70311281860239e-06, "loss": 0.3052, "step": 26264 }, { "epoch": 1.230383660467513, "grad_norm": 0.6041110618797921, "learning_rate": 1.7029330634316393e-06, "loss": 0.2972, "step": 26265 }, { "epoch": 1.2304305054574414, "grad_norm": 0.5842777021381698, "learning_rate": 1.7027533128478085e-06, "loss": 0.2684, "step": 26266 }, { "epoch": 1.2304773504473696, "grad_norm": 0.5925973816487292, "learning_rate": 1.702573566851932e-06, "loss": 0.2751, "step": 26267 }, { "epoch": 1.230524195437298, "grad_norm": 0.5925862588767601, "learning_rate": 1.702393825445044e-06, "loss": 0.2693, "step": 26268 }, { "epoch": 1.2305710404272263, "grad_norm": 0.6139000484936771, "learning_rate": 1.7022140886281798e-06, "loss": 0.2801, "step": 26269 }, { "epoch": 1.2306178854171546, "grad_norm": 0.6558413158721464, "learning_rate": 1.7020343564023727e-06, "loss": 0.3069, "step": 26270 }, { "epoch": 1.230664730407083, "grad_norm": 0.6178666667474092, "learning_rate": 1.7018546287686571e-06, "loss": 0.2831, "step": 26271 }, { "epoch": 1.2307115753970113, "grad_norm": 0.5579432138860879, "learning_rate": 1.701674905728067e-06, "loss": 0.2707, "step": 26272 }, { "epoch": 1.2307584203869397, "grad_norm": 0.659584907851513, "learning_rate": 1.7014951872816382e-06, "loss": 0.2876, "step": 26273 }, { "epoch": 1.230805265376868, "grad_norm": 0.5342288791021177, "learning_rate": 1.7013154734304029e-06, "loss": 0.2682, "step": 26274 }, { "epoch": 1.2308521103667962, "grad_norm": 0.5919812839379051, "learning_rate": 1.701135764175398e-06, "loss": 0.2739, "step": 26275 }, { "epoch": 1.2308989553567247, "grad_norm": 0.5844848745955261, "learning_rate": 1.700956059517654e-06, "loss": 0.2576, "step": 26276 }, { "epoch": 1.230945800346653, "grad_norm": 0.5863317308529249, "learning_rate": 1.7007763594582084e-06, "loss": 0.2601, "step": 26277 }, { "epoch": 1.2309926453365811, "grad_norm": 0.5799891888227486, "learning_rate": 1.7005966639980932e-06, "loss": 0.2715, "step": 26278 }, { "epoch": 1.2310394903265096, "grad_norm": 0.5616639047565479, "learning_rate": 1.700416973138343e-06, "loss": 0.2791, "step": 26279 }, { "epoch": 1.2310863353164379, "grad_norm": 0.5782212275725975, "learning_rate": 1.7002372868799927e-06, "loss": 0.2643, "step": 26280 }, { "epoch": 1.2311331803063663, "grad_norm": 0.6408712261140826, "learning_rate": 1.7000576052240767e-06, "loss": 0.2863, "step": 26281 }, { "epoch": 1.2311800252962946, "grad_norm": 0.5768858047932888, "learning_rate": 1.6998779281716269e-06, "loss": 0.2799, "step": 26282 }, { "epoch": 1.2312268702862228, "grad_norm": 0.6109597635656903, "learning_rate": 1.6996982557236786e-06, "loss": 0.2571, "step": 26283 }, { "epoch": 1.2312737152761513, "grad_norm": 0.5804157994195362, "learning_rate": 1.699518587881266e-06, "loss": 0.2744, "step": 26284 }, { "epoch": 1.2313205602660795, "grad_norm": 0.6061266431397515, "learning_rate": 1.6993389246454223e-06, "loss": 0.2746, "step": 26285 }, { "epoch": 1.231367405256008, "grad_norm": 0.6095097136914617, "learning_rate": 1.6991592660171819e-06, "loss": 0.2736, "step": 26286 }, { "epoch": 1.2314142502459362, "grad_norm": 0.5627190288617001, "learning_rate": 1.6989796119975795e-06, "loss": 0.2678, "step": 26287 }, { "epoch": 1.2314610952358644, "grad_norm": 0.6003387035809447, "learning_rate": 1.6987999625876478e-06, "loss": 0.2765, "step": 26288 }, { "epoch": 1.231507940225793, "grad_norm": 0.556962790916313, "learning_rate": 1.6986203177884202e-06, "loss": 0.2625, "step": 26289 }, { "epoch": 1.2315547852157211, "grad_norm": 0.5556387807000348, "learning_rate": 1.6984406776009315e-06, "loss": 0.2802, "step": 26290 }, { "epoch": 1.2316016302056494, "grad_norm": 0.6134996082076537, "learning_rate": 1.6982610420262157e-06, "loss": 0.2819, "step": 26291 }, { "epoch": 1.2316484751955779, "grad_norm": 0.6609938704570895, "learning_rate": 1.6980814110653057e-06, "loss": 0.2912, "step": 26292 }, { "epoch": 1.231695320185506, "grad_norm": 0.597241575648566, "learning_rate": 1.6979017847192364e-06, "loss": 0.2874, "step": 26293 }, { "epoch": 1.2317421651754346, "grad_norm": 0.635385826604625, "learning_rate": 1.69772216298904e-06, "loss": 0.2841, "step": 26294 }, { "epoch": 1.2317890101653628, "grad_norm": 0.5612461214724538, "learning_rate": 1.697542545875751e-06, "loss": 0.264, "step": 26295 }, { "epoch": 1.2318358551552913, "grad_norm": 0.6350569994967726, "learning_rate": 1.6973629333804027e-06, "loss": 0.2832, "step": 26296 }, { "epoch": 1.2318827001452195, "grad_norm": 0.6180108214889437, "learning_rate": 1.6971833255040295e-06, "loss": 0.2766, "step": 26297 }, { "epoch": 1.2319295451351477, "grad_norm": 0.6187401473108814, "learning_rate": 1.697003722247664e-06, "loss": 0.2847, "step": 26298 }, { "epoch": 1.2319763901250762, "grad_norm": 0.6293438034821457, "learning_rate": 1.6968241236123412e-06, "loss": 0.2759, "step": 26299 }, { "epoch": 1.2320232351150044, "grad_norm": 0.64075725353136, "learning_rate": 1.6966445295990927e-06, "loss": 0.2856, "step": 26300 }, { "epoch": 1.2320700801049327, "grad_norm": 0.5821369729194041, "learning_rate": 1.6964649402089534e-06, "loss": 0.2888, "step": 26301 }, { "epoch": 1.2321169250948611, "grad_norm": 0.6044950653251148, "learning_rate": 1.696285355442956e-06, "loss": 0.2939, "step": 26302 }, { "epoch": 1.2321637700847894, "grad_norm": 0.6322900264532835, "learning_rate": 1.6961057753021344e-06, "loss": 0.2904, "step": 26303 }, { "epoch": 1.2322106150747179, "grad_norm": 0.6207988979421201, "learning_rate": 1.6959261997875225e-06, "loss": 0.2785, "step": 26304 }, { "epoch": 1.232257460064646, "grad_norm": 0.6355668252354386, "learning_rate": 1.695746628900154e-06, "loss": 0.2787, "step": 26305 }, { "epoch": 1.2323043050545743, "grad_norm": 0.5717908973429392, "learning_rate": 1.69556706264106e-06, "loss": 0.2751, "step": 26306 }, { "epoch": 1.2323511500445028, "grad_norm": 0.6246521126172684, "learning_rate": 1.6953875010112759e-06, "loss": 0.2845, "step": 26307 }, { "epoch": 1.232397995034431, "grad_norm": 0.5936379534324914, "learning_rate": 1.6952079440118346e-06, "loss": 0.2798, "step": 26308 }, { "epoch": 1.2324448400243595, "grad_norm": 0.5899353639201667, "learning_rate": 1.6950283916437688e-06, "loss": 0.2856, "step": 26309 }, { "epoch": 1.2324916850142877, "grad_norm": 0.5535943209360267, "learning_rate": 1.6948488439081124e-06, "loss": 0.2707, "step": 26310 }, { "epoch": 1.232538530004216, "grad_norm": 0.6281790707850984, "learning_rate": 1.6946693008058996e-06, "loss": 0.2893, "step": 26311 }, { "epoch": 1.2325853749941444, "grad_norm": 0.5487805346297393, "learning_rate": 1.6944897623381618e-06, "loss": 0.2651, "step": 26312 }, { "epoch": 1.2326322199840727, "grad_norm": 0.5507195791798336, "learning_rate": 1.6943102285059326e-06, "loss": 0.2632, "step": 26313 }, { "epoch": 1.232679064974001, "grad_norm": 0.6250264683747798, "learning_rate": 1.6941306993102458e-06, "loss": 0.2761, "step": 26314 }, { "epoch": 1.2327259099639294, "grad_norm": 0.558332893264138, "learning_rate": 1.6939511747521345e-06, "loss": 0.2719, "step": 26315 }, { "epoch": 1.2327727549538576, "grad_norm": 0.5749091407076762, "learning_rate": 1.6937716548326311e-06, "loss": 0.2567, "step": 26316 }, { "epoch": 1.232819599943786, "grad_norm": 0.617170033708973, "learning_rate": 1.6935921395527705e-06, "loss": 0.2628, "step": 26317 }, { "epoch": 1.2328664449337143, "grad_norm": 0.5595526109912307, "learning_rate": 1.693412628913583e-06, "loss": 0.2686, "step": 26318 }, { "epoch": 1.2329132899236426, "grad_norm": 0.5999140760250365, "learning_rate": 1.6932331229161036e-06, "loss": 0.2883, "step": 26319 }, { "epoch": 1.232960134913571, "grad_norm": 0.6364094038158918, "learning_rate": 1.6930536215613647e-06, "loss": 0.2929, "step": 26320 }, { "epoch": 1.2330069799034993, "grad_norm": 0.5739915548506144, "learning_rate": 1.6928741248503991e-06, "loss": 0.2744, "step": 26321 }, { "epoch": 1.2330538248934277, "grad_norm": 0.5822044354927342, "learning_rate": 1.6926946327842408e-06, "loss": 0.2732, "step": 26322 }, { "epoch": 1.233100669883356, "grad_norm": 0.5840616695512784, "learning_rate": 1.692515145363921e-06, "loss": 0.2708, "step": 26323 }, { "epoch": 1.2331475148732842, "grad_norm": 0.6060277367019108, "learning_rate": 1.6923356625904752e-06, "loss": 0.2629, "step": 26324 }, { "epoch": 1.2331943598632127, "grad_norm": 0.5857485032506247, "learning_rate": 1.6921561844649332e-06, "loss": 0.2771, "step": 26325 }, { "epoch": 1.233241204853141, "grad_norm": 0.5948035248392232, "learning_rate": 1.69197671098833e-06, "loss": 0.2728, "step": 26326 }, { "epoch": 1.2332880498430692, "grad_norm": 0.6011578566313898, "learning_rate": 1.6917972421616973e-06, "loss": 0.2619, "step": 26327 }, { "epoch": 1.2333348948329976, "grad_norm": 0.640061247770228, "learning_rate": 1.6916177779860682e-06, "loss": 0.2855, "step": 26328 }, { "epoch": 1.2333817398229259, "grad_norm": 0.5972522071108212, "learning_rate": 1.691438318462476e-06, "loss": 0.2694, "step": 26329 }, { "epoch": 1.2334285848128543, "grad_norm": 0.5673202017188016, "learning_rate": 1.6912588635919538e-06, "loss": 0.2739, "step": 26330 }, { "epoch": 1.2334754298027826, "grad_norm": 0.5932185765841342, "learning_rate": 1.6910794133755327e-06, "loss": 0.2563, "step": 26331 }, { "epoch": 1.233522274792711, "grad_norm": 0.5875157473787851, "learning_rate": 1.6908999678142458e-06, "loss": 0.2654, "step": 26332 }, { "epoch": 1.2335691197826393, "grad_norm": 0.5816566461592272, "learning_rate": 1.6907205269091271e-06, "loss": 0.2629, "step": 26333 }, { "epoch": 1.2336159647725675, "grad_norm": 0.6044086782297642, "learning_rate": 1.6905410906612076e-06, "loss": 0.2673, "step": 26334 }, { "epoch": 1.233662809762496, "grad_norm": 0.6132837442346594, "learning_rate": 1.6903616590715216e-06, "loss": 0.2765, "step": 26335 }, { "epoch": 1.2337096547524242, "grad_norm": 0.5635117054211207, "learning_rate": 1.6901822321411005e-06, "loss": 0.26, "step": 26336 }, { "epoch": 1.2337564997423525, "grad_norm": 0.6080602484642015, "learning_rate": 1.690002809870977e-06, "loss": 0.2671, "step": 26337 }, { "epoch": 1.233803344732281, "grad_norm": 0.5686184394586471, "learning_rate": 1.6898233922621832e-06, "loss": 0.2707, "step": 26338 }, { "epoch": 1.2338501897222092, "grad_norm": 0.6246241675962504, "learning_rate": 1.6896439793157526e-06, "loss": 0.2635, "step": 26339 }, { "epoch": 1.2338970347121376, "grad_norm": 0.6143757663172986, "learning_rate": 1.689464571032717e-06, "loss": 0.279, "step": 26340 }, { "epoch": 1.2339438797020659, "grad_norm": 0.5859083252050566, "learning_rate": 1.6892851674141093e-06, "loss": 0.2756, "step": 26341 }, { "epoch": 1.2339907246919941, "grad_norm": 0.6378615880854535, "learning_rate": 1.6891057684609621e-06, "loss": 0.2917, "step": 26342 }, { "epoch": 1.2340375696819226, "grad_norm": 0.6006205182593601, "learning_rate": 1.6889263741743073e-06, "loss": 0.2812, "step": 26343 }, { "epoch": 1.2340844146718508, "grad_norm": 0.597490448550303, "learning_rate": 1.6887469845551765e-06, "loss": 0.2731, "step": 26344 }, { "epoch": 1.2341312596617793, "grad_norm": 0.5640459267680874, "learning_rate": 1.6885675996046031e-06, "loss": 0.2711, "step": 26345 }, { "epoch": 1.2341781046517075, "grad_norm": 0.6106301696399651, "learning_rate": 1.6883882193236199e-06, "loss": 0.2732, "step": 26346 }, { "epoch": 1.2342249496416358, "grad_norm": 0.5978908305240517, "learning_rate": 1.6882088437132576e-06, "loss": 0.2826, "step": 26347 }, { "epoch": 1.2342717946315642, "grad_norm": 0.5608195045434522, "learning_rate": 1.6880294727745508e-06, "loss": 0.2575, "step": 26348 }, { "epoch": 1.2343186396214925, "grad_norm": 0.5760937212596452, "learning_rate": 1.687850106508529e-06, "loss": 0.2713, "step": 26349 }, { "epoch": 1.2343654846114207, "grad_norm": 0.5514724257268077, "learning_rate": 1.6876707449162266e-06, "loss": 0.2664, "step": 26350 }, { "epoch": 1.2344123296013492, "grad_norm": 0.6036782043939185, "learning_rate": 1.6874913879986738e-06, "loss": 0.2842, "step": 26351 }, { "epoch": 1.2344591745912774, "grad_norm": 0.553945997220396, "learning_rate": 1.6873120357569045e-06, "loss": 0.2548, "step": 26352 }, { "epoch": 1.2345060195812059, "grad_norm": 0.566316830144704, "learning_rate": 1.6871326881919503e-06, "loss": 0.2699, "step": 26353 }, { "epoch": 1.2345528645711341, "grad_norm": 0.5972501021805445, "learning_rate": 1.6869533453048437e-06, "loss": 0.2766, "step": 26354 }, { "epoch": 1.2345997095610624, "grad_norm": 0.6113521903098165, "learning_rate": 1.686774007096615e-06, "loss": 0.2832, "step": 26355 }, { "epoch": 1.2346465545509908, "grad_norm": 0.561617075850221, "learning_rate": 1.6865946735682978e-06, "loss": 0.2661, "step": 26356 }, { "epoch": 1.234693399540919, "grad_norm": 0.5686012156547315, "learning_rate": 1.6864153447209242e-06, "loss": 0.2769, "step": 26357 }, { "epoch": 1.2347402445308475, "grad_norm": 0.579848294245655, "learning_rate": 1.6862360205555256e-06, "loss": 0.2741, "step": 26358 }, { "epoch": 1.2347870895207758, "grad_norm": 0.5676923393925454, "learning_rate": 1.6860567010731339e-06, "loss": 0.2594, "step": 26359 }, { "epoch": 1.234833934510704, "grad_norm": 0.5843166144679371, "learning_rate": 1.6858773862747823e-06, "loss": 0.2804, "step": 26360 }, { "epoch": 1.2348807795006325, "grad_norm": 0.5769428306741208, "learning_rate": 1.685698076161501e-06, "loss": 0.2747, "step": 26361 }, { "epoch": 1.2349276244905607, "grad_norm": 0.5658183167405039, "learning_rate": 1.6855187707343226e-06, "loss": 0.2693, "step": 26362 }, { "epoch": 1.234974469480489, "grad_norm": 0.6068933412859683, "learning_rate": 1.6853394699942787e-06, "loss": 0.2672, "step": 26363 }, { "epoch": 1.2350213144704174, "grad_norm": 0.5722237694384129, "learning_rate": 1.685160173942402e-06, "loss": 0.273, "step": 26364 }, { "epoch": 1.2350681594603456, "grad_norm": 0.5991389376757273, "learning_rate": 1.6849808825797232e-06, "loss": 0.2881, "step": 26365 }, { "epoch": 1.2351150044502741, "grad_norm": 0.5839308471985991, "learning_rate": 1.6848015959072755e-06, "loss": 0.2617, "step": 26366 }, { "epoch": 1.2351618494402024, "grad_norm": 0.6323296608260205, "learning_rate": 1.6846223139260887e-06, "loss": 0.2829, "step": 26367 }, { "epoch": 1.2352086944301308, "grad_norm": 0.5640905097853365, "learning_rate": 1.6844430366371962e-06, "loss": 0.2696, "step": 26368 }, { "epoch": 1.235255539420059, "grad_norm": 0.5421259978764811, "learning_rate": 1.6842637640416282e-06, "loss": 0.2662, "step": 26369 }, { "epoch": 1.2353023844099873, "grad_norm": 0.5865507080514356, "learning_rate": 1.6840844961404173e-06, "loss": 0.2912, "step": 26370 }, { "epoch": 1.2353492293999158, "grad_norm": 0.6229146604451284, "learning_rate": 1.6839052329345956e-06, "loss": 0.2714, "step": 26371 }, { "epoch": 1.235396074389844, "grad_norm": 0.6036101758218296, "learning_rate": 1.683725974425195e-06, "loss": 0.2836, "step": 26372 }, { "epoch": 1.2354429193797722, "grad_norm": 0.6287618419701433, "learning_rate": 1.683546720613245e-06, "loss": 0.2977, "step": 26373 }, { "epoch": 1.2354897643697007, "grad_norm": 0.6163725329059032, "learning_rate": 1.6833674714997783e-06, "loss": 0.2895, "step": 26374 }, { "epoch": 1.235536609359629, "grad_norm": 0.6111374336923268, "learning_rate": 1.683188227085827e-06, "loss": 0.274, "step": 26375 }, { "epoch": 1.2355834543495574, "grad_norm": 0.6189547067283826, "learning_rate": 1.683008987372422e-06, "loss": 0.2947, "step": 26376 }, { "epoch": 1.2356302993394856, "grad_norm": 0.6067638619481039, "learning_rate": 1.6828297523605946e-06, "loss": 0.2858, "step": 26377 }, { "epoch": 1.235677144329414, "grad_norm": 0.5610020561796781, "learning_rate": 1.6826505220513777e-06, "loss": 0.2743, "step": 26378 }, { "epoch": 1.2357239893193424, "grad_norm": 0.5509384836700514, "learning_rate": 1.6824712964458011e-06, "loss": 0.2615, "step": 26379 }, { "epoch": 1.2357708343092706, "grad_norm": 0.6455562611583263, "learning_rate": 1.6822920755448962e-06, "loss": 0.2906, "step": 26380 }, { "epoch": 1.235817679299199, "grad_norm": 0.5739757398933995, "learning_rate": 1.682112859349695e-06, "loss": 0.2714, "step": 26381 }, { "epoch": 1.2358645242891273, "grad_norm": 0.6732627954534103, "learning_rate": 1.6819336478612291e-06, "loss": 0.2953, "step": 26382 }, { "epoch": 1.2359113692790555, "grad_norm": 0.6145088326361764, "learning_rate": 1.6817544410805293e-06, "loss": 0.2823, "step": 26383 }, { "epoch": 1.235958214268984, "grad_norm": 0.5697417967781788, "learning_rate": 1.6815752390086277e-06, "loss": 0.2714, "step": 26384 }, { "epoch": 1.2360050592589122, "grad_norm": 0.6360530661601796, "learning_rate": 1.6813960416465538e-06, "loss": 0.2969, "step": 26385 }, { "epoch": 1.2360519042488405, "grad_norm": 0.5802333529831087, "learning_rate": 1.6812168489953406e-06, "loss": 0.269, "step": 26386 }, { "epoch": 1.236098749238769, "grad_norm": 0.6019424500383884, "learning_rate": 1.6810376610560181e-06, "loss": 0.2719, "step": 26387 }, { "epoch": 1.2361455942286972, "grad_norm": 0.5457708636061315, "learning_rate": 1.6808584778296185e-06, "loss": 0.2666, "step": 26388 }, { "epoch": 1.2361924392186256, "grad_norm": 0.566078623858671, "learning_rate": 1.6806792993171722e-06, "loss": 0.2711, "step": 26389 }, { "epoch": 1.236239284208554, "grad_norm": 0.5559116264145335, "learning_rate": 1.6805001255197102e-06, "loss": 0.2672, "step": 26390 }, { "epoch": 1.2362861291984821, "grad_norm": 0.5967296187434855, "learning_rate": 1.6803209564382653e-06, "loss": 0.2664, "step": 26391 }, { "epoch": 1.2363329741884106, "grad_norm": 0.5602402233238348, "learning_rate": 1.6801417920738666e-06, "loss": 0.2723, "step": 26392 }, { "epoch": 1.2363798191783388, "grad_norm": 0.5612380346020465, "learning_rate": 1.6799626324275454e-06, "loss": 0.2535, "step": 26393 }, { "epoch": 1.2364266641682673, "grad_norm": 0.5835357058139202, "learning_rate": 1.6797834775003333e-06, "loss": 0.277, "step": 26394 }, { "epoch": 1.2364735091581955, "grad_norm": 0.611279908949327, "learning_rate": 1.6796043272932613e-06, "loss": 0.2832, "step": 26395 }, { "epoch": 1.2365203541481238, "grad_norm": 0.6045892357664905, "learning_rate": 1.6794251818073598e-06, "loss": 0.2729, "step": 26396 }, { "epoch": 1.2365671991380522, "grad_norm": 0.5751470530623235, "learning_rate": 1.6792460410436614e-06, "loss": 0.2668, "step": 26397 }, { "epoch": 1.2366140441279805, "grad_norm": 0.596744757482888, "learning_rate": 1.6790669050031946e-06, "loss": 0.2524, "step": 26398 }, { "epoch": 1.2366608891179087, "grad_norm": 0.5570498650085012, "learning_rate": 1.678887773686992e-06, "loss": 0.2655, "step": 26399 }, { "epoch": 1.2367077341078372, "grad_norm": 0.5826453725143805, "learning_rate": 1.678708647096083e-06, "loss": 0.2627, "step": 26400 }, { "epoch": 1.2367545790977654, "grad_norm": 0.591673902721073, "learning_rate": 1.6785295252314998e-06, "loss": 0.279, "step": 26401 }, { "epoch": 1.236801424087694, "grad_norm": 0.548205562136069, "learning_rate": 1.678350408094273e-06, "loss": 0.2628, "step": 26402 }, { "epoch": 1.2368482690776221, "grad_norm": 0.5642362083036583, "learning_rate": 1.6781712956854335e-06, "loss": 0.2666, "step": 26403 }, { "epoch": 1.2368951140675506, "grad_norm": 0.5664444572431258, "learning_rate": 1.6779921880060107e-06, "loss": 0.2632, "step": 26404 }, { "epoch": 1.2369419590574788, "grad_norm": 0.582474759264662, "learning_rate": 1.6778130850570362e-06, "loss": 0.2502, "step": 26405 }, { "epoch": 1.236988804047407, "grad_norm": 0.6358035564610857, "learning_rate": 1.6776339868395414e-06, "loss": 0.2837, "step": 26406 }, { "epoch": 1.2370356490373355, "grad_norm": 0.6227227407999345, "learning_rate": 1.677454893354556e-06, "loss": 0.2734, "step": 26407 }, { "epoch": 1.2370824940272638, "grad_norm": 0.5628236591811102, "learning_rate": 1.6772758046031105e-06, "loss": 0.2567, "step": 26408 }, { "epoch": 1.237129339017192, "grad_norm": 0.6113007208497374, "learning_rate": 1.677096720586237e-06, "loss": 0.2834, "step": 26409 }, { "epoch": 1.2371761840071205, "grad_norm": 0.6510128765166956, "learning_rate": 1.6769176413049648e-06, "loss": 0.2851, "step": 26410 }, { "epoch": 1.2372230289970487, "grad_norm": 0.626832885227421, "learning_rate": 1.6767385667603242e-06, "loss": 0.2765, "step": 26411 }, { "epoch": 1.2372698739869772, "grad_norm": 0.5500051573584265, "learning_rate": 1.6765594969533462e-06, "loss": 0.2593, "step": 26412 }, { "epoch": 1.2373167189769054, "grad_norm": 0.604809339721457, "learning_rate": 1.6763804318850616e-06, "loss": 0.2659, "step": 26413 }, { "epoch": 1.2373635639668337, "grad_norm": 0.5571345264128418, "learning_rate": 1.6762013715565002e-06, "loss": 0.2677, "step": 26414 }, { "epoch": 1.2374104089567621, "grad_norm": 0.587360103001398, "learning_rate": 1.6760223159686939e-06, "loss": 0.2607, "step": 26415 }, { "epoch": 1.2374572539466904, "grad_norm": 0.5918660488356655, "learning_rate": 1.6758432651226712e-06, "loss": 0.2856, "step": 26416 }, { "epoch": 1.2375040989366188, "grad_norm": 0.5975363661375673, "learning_rate": 1.6756642190194638e-06, "loss": 0.2757, "step": 26417 }, { "epoch": 1.237550943926547, "grad_norm": 0.6127034057858923, "learning_rate": 1.675485177660101e-06, "loss": 0.2791, "step": 26418 }, { "epoch": 1.2375977889164753, "grad_norm": 0.5692875597362206, "learning_rate": 1.675306141045614e-06, "loss": 0.2738, "step": 26419 }, { "epoch": 1.2376446339064038, "grad_norm": 0.5669981249700395, "learning_rate": 1.6751271091770329e-06, "loss": 0.2738, "step": 26420 }, { "epoch": 1.237691478896332, "grad_norm": 0.6350279458421119, "learning_rate": 1.674948082055389e-06, "loss": 0.2908, "step": 26421 }, { "epoch": 1.2377383238862603, "grad_norm": 0.5418537321613535, "learning_rate": 1.67476905968171e-06, "loss": 0.264, "step": 26422 }, { "epoch": 1.2377851688761887, "grad_norm": 0.5878599688686476, "learning_rate": 1.6745900420570283e-06, "loss": 0.2869, "step": 26423 }, { "epoch": 1.237832013866117, "grad_norm": 0.6247861678488318, "learning_rate": 1.6744110291823735e-06, "loss": 0.2829, "step": 26424 }, { "epoch": 1.2378788588560454, "grad_norm": 0.5768062151709697, "learning_rate": 1.6742320210587754e-06, "loss": 0.2814, "step": 26425 }, { "epoch": 1.2379257038459737, "grad_norm": 0.6492396558903968, "learning_rate": 1.6740530176872649e-06, "loss": 0.2678, "step": 26426 }, { "epoch": 1.237972548835902, "grad_norm": 0.5738470470548863, "learning_rate": 1.6738740190688715e-06, "loss": 0.2682, "step": 26427 }, { "epoch": 1.2380193938258304, "grad_norm": 0.6046186812134605, "learning_rate": 1.6736950252046258e-06, "loss": 0.2759, "step": 26428 }, { "epoch": 1.2380662388157586, "grad_norm": 0.5988827859604149, "learning_rate": 1.6735160360955568e-06, "loss": 0.2868, "step": 26429 }, { "epoch": 1.238113083805687, "grad_norm": 0.6206747267487259, "learning_rate": 1.6733370517426956e-06, "loss": 0.2853, "step": 26430 }, { "epoch": 1.2381599287956153, "grad_norm": 0.6870780662304427, "learning_rate": 1.673158072147072e-06, "loss": 0.2917, "step": 26431 }, { "epoch": 1.2382067737855436, "grad_norm": 0.6771679653097166, "learning_rate": 1.6729790973097155e-06, "loss": 0.2747, "step": 26432 }, { "epoch": 1.238253618775472, "grad_norm": 0.5707536548242311, "learning_rate": 1.6728001272316576e-06, "loss": 0.2722, "step": 26433 }, { "epoch": 1.2383004637654003, "grad_norm": 0.5499179508807938, "learning_rate": 1.672621161913926e-06, "loss": 0.2552, "step": 26434 }, { "epoch": 1.2383473087553285, "grad_norm": 0.5610635223355652, "learning_rate": 1.672442201357552e-06, "loss": 0.2579, "step": 26435 }, { "epoch": 1.238394153745257, "grad_norm": 0.6179879659427732, "learning_rate": 1.672263245563565e-06, "loss": 0.272, "step": 26436 }, { "epoch": 1.2384409987351852, "grad_norm": 0.5707802842803962, "learning_rate": 1.672084294532995e-06, "loss": 0.2641, "step": 26437 }, { "epoch": 1.2384878437251137, "grad_norm": 0.62588192846847, "learning_rate": 1.6719053482668719e-06, "loss": 0.2882, "step": 26438 }, { "epoch": 1.238534688715042, "grad_norm": 0.5779227750407611, "learning_rate": 1.6717264067662259e-06, "loss": 0.2539, "step": 26439 }, { "epoch": 1.2385815337049704, "grad_norm": 0.6066229158398208, "learning_rate": 1.6715474700320852e-06, "loss": 0.2778, "step": 26440 }, { "epoch": 1.2386283786948986, "grad_norm": 0.5694404441765808, "learning_rate": 1.6713685380654812e-06, "loss": 0.2586, "step": 26441 }, { "epoch": 1.2386752236848269, "grad_norm": 0.6043346191008053, "learning_rate": 1.6711896108674424e-06, "loss": 0.2823, "step": 26442 }, { "epoch": 1.2387220686747553, "grad_norm": 0.6180567571899753, "learning_rate": 1.6710106884389994e-06, "loss": 0.2862, "step": 26443 }, { "epoch": 1.2387689136646836, "grad_norm": 0.6022168794662826, "learning_rate": 1.6708317707811816e-06, "loss": 0.2644, "step": 26444 }, { "epoch": 1.2388157586546118, "grad_norm": 0.6126247296051053, "learning_rate": 1.6706528578950196e-06, "loss": 0.2798, "step": 26445 }, { "epoch": 1.2388626036445403, "grad_norm": 0.570665855105203, "learning_rate": 1.6704739497815406e-06, "loss": 0.2653, "step": 26446 }, { "epoch": 1.2389094486344685, "grad_norm": 0.6142561160637651, "learning_rate": 1.6702950464417755e-06, "loss": 0.2953, "step": 26447 }, { "epoch": 1.238956293624397, "grad_norm": 0.6239113679160359, "learning_rate": 1.6701161478767546e-06, "loss": 0.2744, "step": 26448 }, { "epoch": 1.2390031386143252, "grad_norm": 0.601335717266593, "learning_rate": 1.6699372540875058e-06, "loss": 0.278, "step": 26449 }, { "epoch": 1.2390499836042534, "grad_norm": 0.6003270974500481, "learning_rate": 1.6697583650750598e-06, "loss": 0.2983, "step": 26450 }, { "epoch": 1.239096828594182, "grad_norm": 0.5539494872938953, "learning_rate": 1.6695794808404466e-06, "loss": 0.2654, "step": 26451 }, { "epoch": 1.2391436735841102, "grad_norm": 0.6111380802464241, "learning_rate": 1.6694006013846944e-06, "loss": 0.2856, "step": 26452 }, { "epoch": 1.2391905185740386, "grad_norm": 0.5930078072574066, "learning_rate": 1.6692217267088324e-06, "loss": 0.271, "step": 26453 }, { "epoch": 1.2392373635639669, "grad_norm": 0.6011684860570732, "learning_rate": 1.6690428568138905e-06, "loss": 0.2776, "step": 26454 }, { "epoch": 1.239284208553895, "grad_norm": 0.6028216503589803, "learning_rate": 1.6688639917008985e-06, "loss": 0.2706, "step": 26455 }, { "epoch": 1.2393310535438236, "grad_norm": 0.5762501568635213, "learning_rate": 1.6686851313708852e-06, "loss": 0.2522, "step": 26456 }, { "epoch": 1.2393778985337518, "grad_norm": 0.6146765508899776, "learning_rate": 1.6685062758248798e-06, "loss": 0.2948, "step": 26457 }, { "epoch": 1.23942474352368, "grad_norm": 0.5642553031153273, "learning_rate": 1.668327425063913e-06, "loss": 0.2622, "step": 26458 }, { "epoch": 1.2394715885136085, "grad_norm": 0.6348481678207957, "learning_rate": 1.6681485790890118e-06, "loss": 0.275, "step": 26459 }, { "epoch": 1.2395184335035367, "grad_norm": 0.6011191968840769, "learning_rate": 1.6679697379012064e-06, "loss": 0.278, "step": 26460 }, { "epoch": 1.2395652784934652, "grad_norm": 0.5902488388328002, "learning_rate": 1.667790901501526e-06, "loss": 0.2716, "step": 26461 }, { "epoch": 1.2396121234833934, "grad_norm": 0.6376326692108002, "learning_rate": 1.6676120698910003e-06, "loss": 0.2727, "step": 26462 }, { "epoch": 1.2396589684733217, "grad_norm": 0.5493453274892772, "learning_rate": 1.6674332430706573e-06, "loss": 0.2701, "step": 26463 }, { "epoch": 1.2397058134632502, "grad_norm": 0.619377854101626, "learning_rate": 1.6672544210415275e-06, "loss": 0.2835, "step": 26464 }, { "epoch": 1.2397526584531784, "grad_norm": 0.58866997015956, "learning_rate": 1.6670756038046387e-06, "loss": 0.2804, "step": 26465 }, { "epoch": 1.2397995034431069, "grad_norm": 0.5780484025103778, "learning_rate": 1.6668967913610207e-06, "loss": 0.2793, "step": 26466 }, { "epoch": 1.239846348433035, "grad_norm": 0.6534676417359663, "learning_rate": 1.6667179837117018e-06, "loss": 0.2972, "step": 26467 }, { "epoch": 1.2398931934229633, "grad_norm": 0.6035325705002454, "learning_rate": 1.6665391808577114e-06, "loss": 0.2629, "step": 26468 }, { "epoch": 1.2399400384128918, "grad_norm": 0.5820998575534526, "learning_rate": 1.666360382800079e-06, "loss": 0.2745, "step": 26469 }, { "epoch": 1.23998688340282, "grad_norm": 0.5987101101080522, "learning_rate": 1.6661815895398336e-06, "loss": 0.2768, "step": 26470 }, { "epoch": 1.2400337283927483, "grad_norm": 0.6154606345865574, "learning_rate": 1.6660028010780027e-06, "loss": 0.286, "step": 26471 }, { "epoch": 1.2400805733826767, "grad_norm": 0.6862681734499625, "learning_rate": 1.665824017415616e-06, "loss": 0.2897, "step": 26472 }, { "epoch": 1.240127418372605, "grad_norm": 0.5883989741164033, "learning_rate": 1.665645238553703e-06, "loss": 0.2655, "step": 26473 }, { "epoch": 1.2401742633625334, "grad_norm": 0.6018555900287534, "learning_rate": 1.6654664644932913e-06, "loss": 0.2764, "step": 26474 }, { "epoch": 1.2402211083524617, "grad_norm": 0.590894408936874, "learning_rate": 1.6652876952354111e-06, "loss": 0.2755, "step": 26475 }, { "epoch": 1.2402679533423902, "grad_norm": 0.5997851107352646, "learning_rate": 1.6651089307810903e-06, "loss": 0.2654, "step": 26476 }, { "epoch": 1.2403147983323184, "grad_norm": 0.5808775019948442, "learning_rate": 1.6649301711313577e-06, "loss": 0.2525, "step": 26477 }, { "epoch": 1.2403616433222466, "grad_norm": 0.5559148026689165, "learning_rate": 1.6647514162872414e-06, "loss": 0.2414, "step": 26478 }, { "epoch": 1.240408488312175, "grad_norm": 0.589908527585212, "learning_rate": 1.6645726662497712e-06, "loss": 0.2712, "step": 26479 }, { "epoch": 1.2404553333021033, "grad_norm": 0.5927970918635002, "learning_rate": 1.6643939210199753e-06, "loss": 0.2714, "step": 26480 }, { "epoch": 1.2405021782920316, "grad_norm": 0.6248719780943746, "learning_rate": 1.664215180598882e-06, "loss": 0.2851, "step": 26481 }, { "epoch": 1.24054902328196, "grad_norm": 0.592659818194347, "learning_rate": 1.6640364449875215e-06, "loss": 0.2842, "step": 26482 }, { "epoch": 1.2405958682718883, "grad_norm": 0.5605697821693392, "learning_rate": 1.6638577141869205e-06, "loss": 0.2609, "step": 26483 }, { "epoch": 1.2406427132618167, "grad_norm": 0.5578907679880034, "learning_rate": 1.663678988198108e-06, "loss": 0.2598, "step": 26484 }, { "epoch": 1.240689558251745, "grad_norm": 0.6226755645934163, "learning_rate": 1.6635002670221123e-06, "loss": 0.2833, "step": 26485 }, { "epoch": 1.2407364032416732, "grad_norm": 0.6516597225884981, "learning_rate": 1.6633215506599632e-06, "loss": 0.2842, "step": 26486 }, { "epoch": 1.2407832482316017, "grad_norm": 0.628830204891315, "learning_rate": 1.6631428391126874e-06, "loss": 0.2725, "step": 26487 }, { "epoch": 1.24083009322153, "grad_norm": 0.6126424666957655, "learning_rate": 1.6629641323813153e-06, "loss": 0.2781, "step": 26488 }, { "epoch": 1.2408769382114584, "grad_norm": 0.5573507553231827, "learning_rate": 1.6627854304668734e-06, "loss": 0.2668, "step": 26489 }, { "epoch": 1.2409237832013866, "grad_norm": 0.5877718302478039, "learning_rate": 1.6626067333703914e-06, "loss": 0.2769, "step": 26490 }, { "epoch": 1.2409706281913149, "grad_norm": 0.6252564796964443, "learning_rate": 1.6624280410928968e-06, "loss": 0.2753, "step": 26491 }, { "epoch": 1.2410174731812433, "grad_norm": 0.6470987472915976, "learning_rate": 1.6622493536354178e-06, "loss": 0.2859, "step": 26492 }, { "epoch": 1.2410643181711716, "grad_norm": 0.634203082184924, "learning_rate": 1.6620706709989841e-06, "loss": 0.2819, "step": 26493 }, { "epoch": 1.2411111631610998, "grad_norm": 0.5593898147785155, "learning_rate": 1.6618919931846235e-06, "loss": 0.2652, "step": 26494 }, { "epoch": 1.2411580081510283, "grad_norm": 0.5616340803313749, "learning_rate": 1.661713320193363e-06, "loss": 0.2729, "step": 26495 }, { "epoch": 1.2412048531409565, "grad_norm": 0.5731804151176094, "learning_rate": 1.6615346520262316e-06, "loss": 0.266, "step": 26496 }, { "epoch": 1.241251698130885, "grad_norm": 0.6093263592069005, "learning_rate": 1.6613559886842579e-06, "loss": 0.2776, "step": 26497 }, { "epoch": 1.2412985431208132, "grad_norm": 0.5996395338707153, "learning_rate": 1.6611773301684691e-06, "loss": 0.2824, "step": 26498 }, { "epoch": 1.2413453881107415, "grad_norm": 0.5975702613329424, "learning_rate": 1.660998676479894e-06, "loss": 0.2844, "step": 26499 }, { "epoch": 1.24139223310067, "grad_norm": 0.61211947165928, "learning_rate": 1.6608200276195618e-06, "loss": 0.2761, "step": 26500 }, { "epoch": 1.2414390780905982, "grad_norm": 0.6281406633754532, "learning_rate": 1.6606413835884987e-06, "loss": 0.2778, "step": 26501 }, { "epoch": 1.2414859230805266, "grad_norm": 0.6179825510467366, "learning_rate": 1.6604627443877333e-06, "loss": 0.2688, "step": 26502 }, { "epoch": 1.2415327680704549, "grad_norm": 0.6390426943165821, "learning_rate": 1.6602841100182938e-06, "loss": 0.2768, "step": 26503 }, { "epoch": 1.2415796130603831, "grad_norm": 0.5880642823561965, "learning_rate": 1.6601054804812083e-06, "loss": 0.2814, "step": 26504 }, { "epoch": 1.2416264580503116, "grad_norm": 0.6207873174226639, "learning_rate": 1.6599268557775046e-06, "loss": 0.2809, "step": 26505 }, { "epoch": 1.2416733030402398, "grad_norm": 0.5696618003647514, "learning_rate": 1.6597482359082113e-06, "loss": 0.2686, "step": 26506 }, { "epoch": 1.241720148030168, "grad_norm": 0.5912930318444389, "learning_rate": 1.659569620874355e-06, "loss": 0.2838, "step": 26507 }, { "epoch": 1.2417669930200965, "grad_norm": 0.5886927794559996, "learning_rate": 1.6593910106769646e-06, "loss": 0.2673, "step": 26508 }, { "epoch": 1.2418138380100248, "grad_norm": 0.6457413206138136, "learning_rate": 1.6592124053170672e-06, "loss": 0.2819, "step": 26509 }, { "epoch": 1.2418606829999532, "grad_norm": 0.6254505129720371, "learning_rate": 1.659033804795691e-06, "loss": 0.2734, "step": 26510 }, { "epoch": 1.2419075279898815, "grad_norm": 0.5555186535842428, "learning_rate": 1.6588552091138643e-06, "loss": 0.2542, "step": 26511 }, { "epoch": 1.24195437297981, "grad_norm": 0.5748889244584475, "learning_rate": 1.6586766182726154e-06, "loss": 0.267, "step": 26512 }, { "epoch": 1.2420012179697382, "grad_norm": 0.5624362041833648, "learning_rate": 1.6584980322729699e-06, "loss": 0.2568, "step": 26513 }, { "epoch": 1.2420480629596664, "grad_norm": 0.63174375917913, "learning_rate": 1.6583194511159567e-06, "loss": 0.2694, "step": 26514 }, { "epoch": 1.2420949079495949, "grad_norm": 0.6334999199709153, "learning_rate": 1.658140874802604e-06, "loss": 0.2915, "step": 26515 }, { "epoch": 1.2421417529395231, "grad_norm": 0.571593407722221, "learning_rate": 1.657962303333938e-06, "loss": 0.2597, "step": 26516 }, { "epoch": 1.2421885979294514, "grad_norm": 0.6158797075897169, "learning_rate": 1.6577837367109881e-06, "loss": 0.2804, "step": 26517 }, { "epoch": 1.2422354429193798, "grad_norm": 0.6023157250596124, "learning_rate": 1.657605174934782e-06, "loss": 0.2892, "step": 26518 }, { "epoch": 1.242282287909308, "grad_norm": 0.6375554966019789, "learning_rate": 1.6574266180063453e-06, "loss": 0.2979, "step": 26519 }, { "epoch": 1.2423291328992365, "grad_norm": 0.5786335231886988, "learning_rate": 1.6572480659267065e-06, "loss": 0.2702, "step": 26520 }, { "epoch": 1.2423759778891648, "grad_norm": 0.601686943411658, "learning_rate": 1.6570695186968933e-06, "loss": 0.2836, "step": 26521 }, { "epoch": 1.242422822879093, "grad_norm": 0.6210560470494257, "learning_rate": 1.6568909763179337e-06, "loss": 0.268, "step": 26522 }, { "epoch": 1.2424696678690215, "grad_norm": 0.6230087621469463, "learning_rate": 1.6567124387908539e-06, "loss": 0.2637, "step": 26523 }, { "epoch": 1.2425165128589497, "grad_norm": 0.5512627203422857, "learning_rate": 1.6565339061166836e-06, "loss": 0.265, "step": 26524 }, { "epoch": 1.2425633578488782, "grad_norm": 0.5933377309754797, "learning_rate": 1.656355378296447e-06, "loss": 0.2856, "step": 26525 }, { "epoch": 1.2426102028388064, "grad_norm": 0.5992200777975195, "learning_rate": 1.6561768553311741e-06, "loss": 0.2803, "step": 26526 }, { "epoch": 1.2426570478287347, "grad_norm": 0.6030443734385681, "learning_rate": 1.6559983372218908e-06, "loss": 0.2987, "step": 26527 }, { "epoch": 1.2427038928186631, "grad_norm": 0.6006201473473919, "learning_rate": 1.655819823969625e-06, "loss": 0.277, "step": 26528 }, { "epoch": 1.2427507378085914, "grad_norm": 0.6221192371841475, "learning_rate": 1.655641315575404e-06, "loss": 0.2823, "step": 26529 }, { "epoch": 1.2427975827985196, "grad_norm": 0.6242757228093694, "learning_rate": 1.6554628120402545e-06, "loss": 0.2946, "step": 26530 }, { "epoch": 1.242844427788448, "grad_norm": 0.5615688242065827, "learning_rate": 1.6552843133652059e-06, "loss": 0.2519, "step": 26531 }, { "epoch": 1.2428912727783763, "grad_norm": 0.6184684261658058, "learning_rate": 1.6551058195512826e-06, "loss": 0.2894, "step": 26532 }, { "epoch": 1.2429381177683048, "grad_norm": 0.6669026362971541, "learning_rate": 1.6549273305995128e-06, "loss": 0.2951, "step": 26533 }, { "epoch": 1.242984962758233, "grad_norm": 0.6164104216251582, "learning_rate": 1.6547488465109235e-06, "loss": 0.2765, "step": 26534 }, { "epoch": 1.2430318077481612, "grad_norm": 0.6146795666162386, "learning_rate": 1.654570367286543e-06, "loss": 0.2869, "step": 26535 }, { "epoch": 1.2430786527380897, "grad_norm": 0.6209465535189522, "learning_rate": 1.6543918929273967e-06, "loss": 0.2742, "step": 26536 }, { "epoch": 1.243125497728018, "grad_norm": 0.5280141376245514, "learning_rate": 1.654213423434514e-06, "loss": 0.2534, "step": 26537 }, { "epoch": 1.2431723427179464, "grad_norm": 0.5786237324079017, "learning_rate": 1.654034958808919e-06, "loss": 0.2796, "step": 26538 }, { "epoch": 1.2432191877078747, "grad_norm": 0.5646089759130284, "learning_rate": 1.6538564990516409e-06, "loss": 0.2669, "step": 26539 }, { "epoch": 1.243266032697803, "grad_norm": 0.5903971344670423, "learning_rate": 1.6536780441637053e-06, "loss": 0.2767, "step": 26540 }, { "epoch": 1.2433128776877314, "grad_norm": 0.5760525034875109, "learning_rate": 1.6534995941461402e-06, "loss": 0.2815, "step": 26541 }, { "epoch": 1.2433597226776596, "grad_norm": 0.6116575729956267, "learning_rate": 1.6533211489999723e-06, "loss": 0.2816, "step": 26542 }, { "epoch": 1.2434065676675878, "grad_norm": 0.5835730922433223, "learning_rate": 1.6531427087262291e-06, "loss": 0.2711, "step": 26543 }, { "epoch": 1.2434534126575163, "grad_norm": 0.5975873009727739, "learning_rate": 1.652964273325936e-06, "loss": 0.277, "step": 26544 }, { "epoch": 1.2435002576474445, "grad_norm": 0.5722886219326597, "learning_rate": 1.6527858428001204e-06, "loss": 0.2809, "step": 26545 }, { "epoch": 1.243547102637373, "grad_norm": 0.5763904039514222, "learning_rate": 1.6526074171498096e-06, "loss": 0.2792, "step": 26546 }, { "epoch": 1.2435939476273012, "grad_norm": 0.6279466506463264, "learning_rate": 1.6524289963760299e-06, "loss": 0.2915, "step": 26547 }, { "epoch": 1.2436407926172297, "grad_norm": 0.5986602324772659, "learning_rate": 1.6522505804798079e-06, "loss": 0.285, "step": 26548 }, { "epoch": 1.243687637607158, "grad_norm": 0.5423281986562644, "learning_rate": 1.6520721694621723e-06, "loss": 0.2578, "step": 26549 }, { "epoch": 1.2437344825970862, "grad_norm": 0.6161857519298684, "learning_rate": 1.6518937633241472e-06, "loss": 0.2708, "step": 26550 }, { "epoch": 1.2437813275870147, "grad_norm": 0.6320484547224491, "learning_rate": 1.6517153620667602e-06, "loss": 0.29, "step": 26551 }, { "epoch": 1.243828172576943, "grad_norm": 0.5927530605543513, "learning_rate": 1.651536965691038e-06, "loss": 0.2679, "step": 26552 }, { "epoch": 1.2438750175668711, "grad_norm": 0.5814920035570835, "learning_rate": 1.6513585741980076e-06, "loss": 0.2597, "step": 26553 }, { "epoch": 1.2439218625567996, "grad_norm": 0.599057708141295, "learning_rate": 1.651180187588695e-06, "loss": 0.2684, "step": 26554 }, { "epoch": 1.2439687075467278, "grad_norm": 0.5899202056856009, "learning_rate": 1.651001805864128e-06, "loss": 0.27, "step": 26555 }, { "epoch": 1.2440155525366563, "grad_norm": 0.5925191887889616, "learning_rate": 1.6508234290253317e-06, "loss": 0.2746, "step": 26556 }, { "epoch": 1.2440623975265845, "grad_norm": 0.5915672859245196, "learning_rate": 1.650645057073333e-06, "loss": 0.2721, "step": 26557 }, { "epoch": 1.2441092425165128, "grad_norm": 0.6260366866981187, "learning_rate": 1.6504666900091581e-06, "loss": 0.2668, "step": 26558 }, { "epoch": 1.2441560875064412, "grad_norm": 0.6170816810360058, "learning_rate": 1.6502883278338345e-06, "loss": 0.2795, "step": 26559 }, { "epoch": 1.2442029324963695, "grad_norm": 0.5801601096961616, "learning_rate": 1.6501099705483878e-06, "loss": 0.2673, "step": 26560 }, { "epoch": 1.244249777486298, "grad_norm": 0.5898429692659741, "learning_rate": 1.6499316181538455e-06, "loss": 0.2603, "step": 26561 }, { "epoch": 1.2442966224762262, "grad_norm": 0.5860901540171377, "learning_rate": 1.6497532706512317e-06, "loss": 0.2821, "step": 26562 }, { "epoch": 1.2443434674661544, "grad_norm": 0.5735624438926276, "learning_rate": 1.6495749280415747e-06, "loss": 0.2787, "step": 26563 }, { "epoch": 1.244390312456083, "grad_norm": 0.6123420223005672, "learning_rate": 1.6493965903259001e-06, "loss": 0.2793, "step": 26564 }, { "epoch": 1.2444371574460111, "grad_norm": 0.6085145981321638, "learning_rate": 1.649218257505234e-06, "loss": 0.2792, "step": 26565 }, { "epoch": 1.2444840024359394, "grad_norm": 0.6099540658704349, "learning_rate": 1.6490399295806037e-06, "loss": 0.2931, "step": 26566 }, { "epoch": 1.2445308474258678, "grad_norm": 0.607362017580814, "learning_rate": 1.6488616065530347e-06, "loss": 0.2647, "step": 26567 }, { "epoch": 1.244577692415796, "grad_norm": 0.556342709007632, "learning_rate": 1.6486832884235532e-06, "loss": 0.2656, "step": 26568 }, { "epoch": 1.2446245374057245, "grad_norm": 0.5806269338555009, "learning_rate": 1.6485049751931848e-06, "loss": 0.2805, "step": 26569 }, { "epoch": 1.2446713823956528, "grad_norm": 0.5934351537368119, "learning_rate": 1.6483266668629562e-06, "loss": 0.2727, "step": 26570 }, { "epoch": 1.244718227385581, "grad_norm": 0.6487630551604299, "learning_rate": 1.6481483634338943e-06, "loss": 0.2989, "step": 26571 }, { "epoch": 1.2447650723755095, "grad_norm": 0.5732811439083588, "learning_rate": 1.647970064907024e-06, "loss": 0.2759, "step": 26572 }, { "epoch": 1.2448119173654377, "grad_norm": 0.6435153782861751, "learning_rate": 1.6477917712833724e-06, "loss": 0.2833, "step": 26573 }, { "epoch": 1.2448587623553662, "grad_norm": 0.5717508874324624, "learning_rate": 1.6476134825639644e-06, "loss": 0.2613, "step": 26574 }, { "epoch": 1.2449056073452944, "grad_norm": 0.5871399383786923, "learning_rate": 1.6474351987498271e-06, "loss": 0.2721, "step": 26575 }, { "epoch": 1.2449524523352227, "grad_norm": 0.5740437777583671, "learning_rate": 1.6472569198419853e-06, "loss": 0.2619, "step": 26576 }, { "epoch": 1.2449992973251511, "grad_norm": 0.6204085267506632, "learning_rate": 1.6470786458414658e-06, "loss": 0.2882, "step": 26577 }, { "epoch": 1.2450461423150794, "grad_norm": 0.6333245909220784, "learning_rate": 1.6469003767492942e-06, "loss": 0.2886, "step": 26578 }, { "epoch": 1.2450929873050076, "grad_norm": 0.620059839452665, "learning_rate": 1.6467221125664973e-06, "loss": 0.2816, "step": 26579 }, { "epoch": 1.245139832294936, "grad_norm": 0.56428140979336, "learning_rate": 1.6465438532940991e-06, "loss": 0.2622, "step": 26580 }, { "epoch": 1.2451866772848643, "grad_norm": 0.5673916074086911, "learning_rate": 1.6463655989331272e-06, "loss": 0.2711, "step": 26581 }, { "epoch": 1.2452335222747928, "grad_norm": 0.5865164867238811, "learning_rate": 1.6461873494846064e-06, "loss": 0.2958, "step": 26582 }, { "epoch": 1.245280367264721, "grad_norm": 0.5857637455067297, "learning_rate": 1.6460091049495627e-06, "loss": 0.274, "step": 26583 }, { "epoch": 1.2453272122546495, "grad_norm": 0.5814641427287514, "learning_rate": 1.6458308653290224e-06, "loss": 0.2609, "step": 26584 }, { "epoch": 1.2453740572445777, "grad_norm": 0.6046060559529748, "learning_rate": 1.6456526306240112e-06, "loss": 0.2593, "step": 26585 }, { "epoch": 1.245420902234506, "grad_norm": 0.5389781551008745, "learning_rate": 1.6454744008355534e-06, "loss": 0.2617, "step": 26586 }, { "epoch": 1.2454677472244344, "grad_norm": 0.5348402497430808, "learning_rate": 1.6452961759646758e-06, "loss": 0.2645, "step": 26587 }, { "epoch": 1.2455145922143627, "grad_norm": 0.6127618601022358, "learning_rate": 1.6451179560124042e-06, "loss": 0.2665, "step": 26588 }, { "epoch": 1.245561437204291, "grad_norm": 0.5639542833337365, "learning_rate": 1.6449397409797635e-06, "loss": 0.2712, "step": 26589 }, { "epoch": 1.2456082821942194, "grad_norm": 0.5552713954445767, "learning_rate": 1.6447615308677794e-06, "loss": 0.2581, "step": 26590 }, { "epoch": 1.2456551271841476, "grad_norm": 0.6144505572673203, "learning_rate": 1.6445833256774793e-06, "loss": 0.2713, "step": 26591 }, { "epoch": 1.245701972174076, "grad_norm": 0.6053291032367717, "learning_rate": 1.6444051254098864e-06, "loss": 0.2601, "step": 26592 }, { "epoch": 1.2457488171640043, "grad_norm": 0.6133701847687304, "learning_rate": 1.6442269300660264e-06, "loss": 0.2892, "step": 26593 }, { "epoch": 1.2457956621539326, "grad_norm": 0.6023432971536878, "learning_rate": 1.6440487396469252e-06, "loss": 0.2693, "step": 26594 }, { "epoch": 1.245842507143861, "grad_norm": 0.5873114782079936, "learning_rate": 1.643870554153609e-06, "loss": 0.2752, "step": 26595 }, { "epoch": 1.2458893521337893, "grad_norm": 0.6334412507471185, "learning_rate": 1.6436923735871018e-06, "loss": 0.2818, "step": 26596 }, { "epoch": 1.2459361971237177, "grad_norm": 0.6001458295832749, "learning_rate": 1.6435141979484304e-06, "loss": 0.2711, "step": 26597 }, { "epoch": 1.245983042113646, "grad_norm": 0.574310938780641, "learning_rate": 1.6433360272386201e-06, "loss": 0.2681, "step": 26598 }, { "epoch": 1.2460298871035742, "grad_norm": 0.5679013832914848, "learning_rate": 1.6431578614586952e-06, "loss": 0.281, "step": 26599 }, { "epoch": 1.2460767320935027, "grad_norm": 0.5503033393457739, "learning_rate": 1.642979700609681e-06, "loss": 0.2559, "step": 26600 }, { "epoch": 1.246123577083431, "grad_norm": 0.5853752055924502, "learning_rate": 1.6428015446926032e-06, "loss": 0.2659, "step": 26601 }, { "epoch": 1.2461704220733592, "grad_norm": 0.635187682029858, "learning_rate": 1.6426233937084874e-06, "loss": 0.2812, "step": 26602 }, { "epoch": 1.2462172670632876, "grad_norm": 0.5429048030037332, "learning_rate": 1.642445247658358e-06, "loss": 0.2676, "step": 26603 }, { "epoch": 1.2462641120532159, "grad_norm": 0.5903523045434331, "learning_rate": 1.642267106543242e-06, "loss": 0.2798, "step": 26604 }, { "epoch": 1.2463109570431443, "grad_norm": 0.5781538054469004, "learning_rate": 1.6420889703641618e-06, "loss": 0.2692, "step": 26605 }, { "epoch": 1.2463578020330726, "grad_norm": 0.5706750547107221, "learning_rate": 1.641910839122145e-06, "loss": 0.2661, "step": 26606 }, { "epoch": 1.2464046470230008, "grad_norm": 0.6467969740731226, "learning_rate": 1.6417327128182147e-06, "loss": 0.2975, "step": 26607 }, { "epoch": 1.2464514920129293, "grad_norm": 0.5995028462354318, "learning_rate": 1.641554591453397e-06, "loss": 0.2913, "step": 26608 }, { "epoch": 1.2464983370028575, "grad_norm": 0.5631344753290587, "learning_rate": 1.6413764750287174e-06, "loss": 0.2607, "step": 26609 }, { "epoch": 1.246545181992786, "grad_norm": 0.6433277052732054, "learning_rate": 1.6411983635452005e-06, "loss": 0.2753, "step": 26610 }, { "epoch": 1.2465920269827142, "grad_norm": 0.5698166445272629, "learning_rate": 1.6410202570038709e-06, "loss": 0.2698, "step": 26611 }, { "epoch": 1.2466388719726424, "grad_norm": 0.6321569166791494, "learning_rate": 1.6408421554057533e-06, "loss": 0.283, "step": 26612 }, { "epoch": 1.246685716962571, "grad_norm": 0.6514958209814472, "learning_rate": 1.6406640587518738e-06, "loss": 0.2792, "step": 26613 }, { "epoch": 1.2467325619524992, "grad_norm": 0.6266957640994492, "learning_rate": 1.640485967043256e-06, "loss": 0.2761, "step": 26614 }, { "epoch": 1.2467794069424274, "grad_norm": 0.6032879361751603, "learning_rate": 1.6403078802809263e-06, "loss": 0.2633, "step": 26615 }, { "epoch": 1.2468262519323559, "grad_norm": 0.5796697170360401, "learning_rate": 1.6401297984659084e-06, "loss": 0.285, "step": 26616 }, { "epoch": 1.246873096922284, "grad_norm": 0.6048056356971854, "learning_rate": 1.6399517215992276e-06, "loss": 0.2778, "step": 26617 }, { "epoch": 1.2469199419122126, "grad_norm": 0.5994502683760972, "learning_rate": 1.6397736496819079e-06, "loss": 0.2797, "step": 26618 }, { "epoch": 1.2469667869021408, "grad_norm": 0.565316879579416, "learning_rate": 1.6395955827149748e-06, "loss": 0.2713, "step": 26619 }, { "epoch": 1.2470136318920693, "grad_norm": 0.5929010945956854, "learning_rate": 1.6394175206994529e-06, "loss": 0.2928, "step": 26620 }, { "epoch": 1.2470604768819975, "grad_norm": 0.6152483828030106, "learning_rate": 1.6392394636363661e-06, "loss": 0.2715, "step": 26621 }, { "epoch": 1.2471073218719257, "grad_norm": 0.5561254245951586, "learning_rate": 1.6390614115267416e-06, "loss": 0.262, "step": 26622 }, { "epoch": 1.2471541668618542, "grad_norm": 0.613165928601972, "learning_rate": 1.6388833643716017e-06, "loss": 0.2847, "step": 26623 }, { "epoch": 1.2472010118517824, "grad_norm": 0.6260343133700619, "learning_rate": 1.6387053221719707e-06, "loss": 0.2816, "step": 26624 }, { "epoch": 1.2472478568417107, "grad_norm": 0.6189527228778976, "learning_rate": 1.6385272849288745e-06, "loss": 0.2753, "step": 26625 }, { "epoch": 1.2472947018316392, "grad_norm": 0.563457764864793, "learning_rate": 1.6383492526433376e-06, "loss": 0.2784, "step": 26626 }, { "epoch": 1.2473415468215674, "grad_norm": 0.6575834524108674, "learning_rate": 1.6381712253163835e-06, "loss": 0.2885, "step": 26627 }, { "epoch": 1.2473883918114959, "grad_norm": 0.6229082427632857, "learning_rate": 1.6379932029490387e-06, "loss": 0.2776, "step": 26628 }, { "epoch": 1.247435236801424, "grad_norm": 0.5638043606553098, "learning_rate": 1.6378151855423253e-06, "loss": 0.2597, "step": 26629 }, { "epoch": 1.2474820817913523, "grad_norm": 0.5691518769113706, "learning_rate": 1.637637173097269e-06, "loss": 0.2753, "step": 26630 }, { "epoch": 1.2475289267812808, "grad_norm": 0.5478961017089012, "learning_rate": 1.6374591656148936e-06, "loss": 0.2633, "step": 26631 }, { "epoch": 1.247575771771209, "grad_norm": 0.5950398830194596, "learning_rate": 1.6372811630962244e-06, "loss": 0.2778, "step": 26632 }, { "epoch": 1.2476226167611375, "grad_norm": 0.5864995457034576, "learning_rate": 1.6371031655422852e-06, "loss": 0.2649, "step": 26633 }, { "epoch": 1.2476694617510657, "grad_norm": 0.6264416945956975, "learning_rate": 1.6369251729541013e-06, "loss": 0.2884, "step": 26634 }, { "epoch": 1.247716306740994, "grad_norm": 0.5889566639979913, "learning_rate": 1.636747185332695e-06, "loss": 0.2633, "step": 26635 }, { "epoch": 1.2477631517309224, "grad_norm": 0.6003529859645343, "learning_rate": 1.6365692026790913e-06, "loss": 0.2757, "step": 26636 }, { "epoch": 1.2478099967208507, "grad_norm": 0.6278968910329872, "learning_rate": 1.6363912249943157e-06, "loss": 0.2648, "step": 26637 }, { "epoch": 1.247856841710779, "grad_norm": 0.6551056085047143, "learning_rate": 1.6362132522793912e-06, "loss": 0.2983, "step": 26638 }, { "epoch": 1.2479036867007074, "grad_norm": 0.5537968923657777, "learning_rate": 1.6360352845353417e-06, "loss": 0.2717, "step": 26639 }, { "epoch": 1.2479505316906356, "grad_norm": 0.5634089706467696, "learning_rate": 1.6358573217631934e-06, "loss": 0.267, "step": 26640 }, { "epoch": 1.247997376680564, "grad_norm": 0.6110411701971121, "learning_rate": 1.6356793639639687e-06, "loss": 0.2715, "step": 26641 }, { "epoch": 1.2480442216704923, "grad_norm": 0.5652690205083709, "learning_rate": 1.6355014111386913e-06, "loss": 0.2879, "step": 26642 }, { "epoch": 1.2480910666604206, "grad_norm": 0.5838579545181272, "learning_rate": 1.6353234632883858e-06, "loss": 0.2761, "step": 26643 }, { "epoch": 1.248137911650349, "grad_norm": 0.6376981438127219, "learning_rate": 1.6351455204140773e-06, "loss": 0.2913, "step": 26644 }, { "epoch": 1.2481847566402773, "grad_norm": 0.6470744883378523, "learning_rate": 1.6349675825167883e-06, "loss": 0.2968, "step": 26645 }, { "epoch": 1.2482316016302057, "grad_norm": 0.6343360353162558, "learning_rate": 1.6347896495975445e-06, "loss": 0.2717, "step": 26646 }, { "epoch": 1.248278446620134, "grad_norm": 0.599524289580626, "learning_rate": 1.6346117216573675e-06, "loss": 0.2882, "step": 26647 }, { "epoch": 1.2483252916100622, "grad_norm": 0.6639132396119541, "learning_rate": 1.6344337986972836e-06, "loss": 0.2799, "step": 26648 }, { "epoch": 1.2483721365999907, "grad_norm": 0.6030881347008181, "learning_rate": 1.6342558807183149e-06, "loss": 0.2656, "step": 26649 }, { "epoch": 1.248418981589919, "grad_norm": 0.6072610807830768, "learning_rate": 1.6340779677214857e-06, "loss": 0.299, "step": 26650 }, { "epoch": 1.2484658265798472, "grad_norm": 0.6577090455590741, "learning_rate": 1.6339000597078208e-06, "loss": 0.2855, "step": 26651 }, { "epoch": 1.2485126715697756, "grad_norm": 0.6308432486329001, "learning_rate": 1.6337221566783443e-06, "loss": 0.2888, "step": 26652 }, { "epoch": 1.2485595165597039, "grad_norm": 0.6349830872049945, "learning_rate": 1.6335442586340778e-06, "loss": 0.2987, "step": 26653 }, { "epoch": 1.2486063615496323, "grad_norm": 0.5820950694222213, "learning_rate": 1.6333663655760463e-06, "loss": 0.2803, "step": 26654 }, { "epoch": 1.2486532065395606, "grad_norm": 0.5880985729067402, "learning_rate": 1.6331884775052737e-06, "loss": 0.2808, "step": 26655 }, { "epoch": 1.248700051529489, "grad_norm": 0.6102229036875075, "learning_rate": 1.6330105944227833e-06, "loss": 0.2809, "step": 26656 }, { "epoch": 1.2487468965194173, "grad_norm": 0.6117717525550262, "learning_rate": 1.6328327163295993e-06, "loss": 0.2756, "step": 26657 }, { "epoch": 1.2487937415093455, "grad_norm": 0.5600163797197251, "learning_rate": 1.6326548432267459e-06, "loss": 0.2683, "step": 26658 }, { "epoch": 1.248840586499274, "grad_norm": 0.5960320597906225, "learning_rate": 1.632476975115245e-06, "loss": 0.2748, "step": 26659 }, { "epoch": 1.2488874314892022, "grad_norm": 0.6480342319852795, "learning_rate": 1.632299111996121e-06, "loss": 0.2869, "step": 26660 }, { "epoch": 1.2489342764791305, "grad_norm": 0.6374315455744781, "learning_rate": 1.6321212538703974e-06, "loss": 0.2637, "step": 26661 }, { "epoch": 1.248981121469059, "grad_norm": 0.5939837729489464, "learning_rate": 1.6319434007390984e-06, "loss": 0.2599, "step": 26662 }, { "epoch": 1.2490279664589872, "grad_norm": 0.603753813746966, "learning_rate": 1.631765552603246e-06, "loss": 0.2787, "step": 26663 }, { "epoch": 1.2490748114489156, "grad_norm": 0.5853008961354903, "learning_rate": 1.6315877094638657e-06, "loss": 0.2633, "step": 26664 }, { "epoch": 1.2491216564388439, "grad_norm": 0.6171864586483963, "learning_rate": 1.6314098713219795e-06, "loss": 0.2828, "step": 26665 }, { "epoch": 1.2491685014287721, "grad_norm": 0.580579892899765, "learning_rate": 1.6312320381786114e-06, "loss": 0.2663, "step": 26666 }, { "epoch": 1.2492153464187006, "grad_norm": 0.5603768817658312, "learning_rate": 1.6310542100347843e-06, "loss": 0.2649, "step": 26667 }, { "epoch": 1.2492621914086288, "grad_norm": 0.6037717594843576, "learning_rate": 1.6308763868915217e-06, "loss": 0.2909, "step": 26668 }, { "epoch": 1.2493090363985573, "grad_norm": 0.5585064049874722, "learning_rate": 1.6306985687498471e-06, "loss": 0.2662, "step": 26669 }, { "epoch": 1.2493558813884855, "grad_norm": 0.5843684677556354, "learning_rate": 1.6305207556107833e-06, "loss": 0.27, "step": 26670 }, { "epoch": 1.2494027263784138, "grad_norm": 0.5781029678720575, "learning_rate": 1.6303429474753554e-06, "loss": 0.2789, "step": 26671 }, { "epoch": 1.2494495713683422, "grad_norm": 0.5634772621947921, "learning_rate": 1.630165144344585e-06, "loss": 0.2768, "step": 26672 }, { "epoch": 1.2494964163582705, "grad_norm": 0.6082143183231203, "learning_rate": 1.6299873462194943e-06, "loss": 0.2661, "step": 26673 }, { "epoch": 1.2495432613481987, "grad_norm": 0.5572955986363437, "learning_rate": 1.6298095531011083e-06, "loss": 0.2502, "step": 26674 }, { "epoch": 1.2495901063381272, "grad_norm": 0.6057623158788906, "learning_rate": 1.6296317649904497e-06, "loss": 0.2755, "step": 26675 }, { "epoch": 1.2496369513280554, "grad_norm": 0.5945231279502424, "learning_rate": 1.6294539818885412e-06, "loss": 0.2677, "step": 26676 }, { "epoch": 1.2496837963179839, "grad_norm": 0.6211353821034926, "learning_rate": 1.6292762037964075e-06, "loss": 0.2679, "step": 26677 }, { "epoch": 1.2497306413079121, "grad_norm": 0.6191062369786771, "learning_rate": 1.6290984307150693e-06, "loss": 0.2757, "step": 26678 }, { "epoch": 1.2497774862978404, "grad_norm": 0.5266609235899757, "learning_rate": 1.6289206626455509e-06, "loss": 0.2494, "step": 26679 }, { "epoch": 1.2498243312877688, "grad_norm": 0.5719597782413988, "learning_rate": 1.628742899588875e-06, "loss": 0.2518, "step": 26680 }, { "epoch": 1.249871176277697, "grad_norm": 0.5419016048738616, "learning_rate": 1.6285651415460646e-06, "loss": 0.2565, "step": 26681 }, { "epoch": 1.2499180212676255, "grad_norm": 0.5966062678578078, "learning_rate": 1.6283873885181433e-06, "loss": 0.2741, "step": 26682 }, { "epoch": 1.2499648662575538, "grad_norm": 0.6167299440340818, "learning_rate": 1.6282096405061337e-06, "loss": 0.2828, "step": 26683 }, { "epoch": 1.250011711247482, "grad_norm": 0.5890248738536739, "learning_rate": 1.628031897511058e-06, "loss": 0.2835, "step": 26684 }, { "epoch": 1.2500585562374105, "grad_norm": 0.5817392905967184, "learning_rate": 1.6278541595339392e-06, "loss": 0.2794, "step": 26685 }, { "epoch": 1.2501054012273387, "grad_norm": 0.6247251355626727, "learning_rate": 1.6276764265758013e-06, "loss": 0.2922, "step": 26686 }, { "epoch": 1.250152246217267, "grad_norm": 0.588380885153067, "learning_rate": 1.6274986986376657e-06, "loss": 0.2705, "step": 26687 }, { "epoch": 1.2501990912071954, "grad_norm": 0.5794675187405501, "learning_rate": 1.627320975720556e-06, "loss": 0.2688, "step": 26688 }, { "epoch": 1.2502459361971237, "grad_norm": 0.6019586902927229, "learning_rate": 1.6271432578254954e-06, "loss": 0.2784, "step": 26689 }, { "epoch": 1.2502927811870521, "grad_norm": 0.6091599098414455, "learning_rate": 1.6269655449535054e-06, "loss": 0.2737, "step": 26690 }, { "epoch": 1.2503396261769804, "grad_norm": 0.5492233708880335, "learning_rate": 1.626787837105609e-06, "loss": 0.2674, "step": 26691 }, { "epoch": 1.2503864711669088, "grad_norm": 0.5753977447200911, "learning_rate": 1.6266101342828291e-06, "loss": 0.2689, "step": 26692 }, { "epoch": 1.250433316156837, "grad_norm": 0.5464744324445916, "learning_rate": 1.6264324364861888e-06, "loss": 0.2604, "step": 26693 }, { "epoch": 1.2504801611467653, "grad_norm": 0.5965933404142181, "learning_rate": 1.6262547437167098e-06, "loss": 0.2587, "step": 26694 }, { "epoch": 1.2505270061366938, "grad_norm": 0.5816907958792178, "learning_rate": 1.6260770559754163e-06, "loss": 0.2758, "step": 26695 }, { "epoch": 1.250573851126622, "grad_norm": 0.6169149314901825, "learning_rate": 1.6258993732633285e-06, "loss": 0.2706, "step": 26696 }, { "epoch": 1.2506206961165502, "grad_norm": 0.6147310130461697, "learning_rate": 1.625721695581471e-06, "loss": 0.2864, "step": 26697 }, { "epoch": 1.2506675411064787, "grad_norm": 0.6303233493231356, "learning_rate": 1.6255440229308644e-06, "loss": 0.2847, "step": 26698 }, { "epoch": 1.250714386096407, "grad_norm": 0.5767241739231682, "learning_rate": 1.625366355312532e-06, "loss": 0.2706, "step": 26699 }, { "epoch": 1.2507612310863352, "grad_norm": 0.5398478364826595, "learning_rate": 1.6251886927274973e-06, "loss": 0.2568, "step": 26700 }, { "epoch": 1.2508080760762637, "grad_norm": 0.6369468565643746, "learning_rate": 1.6250110351767824e-06, "loss": 0.2819, "step": 26701 }, { "epoch": 1.250854921066192, "grad_norm": 0.6241387228339464, "learning_rate": 1.6248333826614076e-06, "loss": 0.2767, "step": 26702 }, { "epoch": 1.2509017660561204, "grad_norm": 0.5856364861861707, "learning_rate": 1.6246557351823972e-06, "loss": 0.2832, "step": 26703 }, { "epoch": 1.2509486110460486, "grad_norm": 0.5778533684697467, "learning_rate": 1.6244780927407733e-06, "loss": 0.2712, "step": 26704 }, { "epoch": 1.250995456035977, "grad_norm": 0.6008739778161167, "learning_rate": 1.6243004553375574e-06, "loss": 0.29, "step": 26705 }, { "epoch": 1.2510423010259053, "grad_norm": 0.6159892622931888, "learning_rate": 1.624122822973773e-06, "loss": 0.2707, "step": 26706 }, { "epoch": 1.2510891460158335, "grad_norm": 0.6040560235808595, "learning_rate": 1.6239451956504414e-06, "loss": 0.2738, "step": 26707 }, { "epoch": 1.251135991005762, "grad_norm": 0.6030764756487335, "learning_rate": 1.6237675733685847e-06, "loss": 0.2866, "step": 26708 }, { "epoch": 1.2511828359956902, "grad_norm": 0.6206176752147863, "learning_rate": 1.6235899561292257e-06, "loss": 0.2744, "step": 26709 }, { "epoch": 1.2512296809856185, "grad_norm": 0.5635634367706727, "learning_rate": 1.623412343933386e-06, "loss": 0.2779, "step": 26710 }, { "epoch": 1.251276525975547, "grad_norm": 0.521375392159375, "learning_rate": 1.6232347367820877e-06, "loss": 0.2465, "step": 26711 }, { "epoch": 1.2513233709654752, "grad_norm": 0.6342012732415093, "learning_rate": 1.6230571346763532e-06, "loss": 0.2988, "step": 26712 }, { "epoch": 1.2513702159554034, "grad_norm": 0.6027857051634908, "learning_rate": 1.6228795376172055e-06, "loss": 0.2834, "step": 26713 }, { "epoch": 1.251417060945332, "grad_norm": 0.5891968733006251, "learning_rate": 1.6227019456056645e-06, "loss": 0.2845, "step": 26714 }, { "epoch": 1.2514639059352604, "grad_norm": 0.6169843544607497, "learning_rate": 1.6225243586427536e-06, "loss": 0.2926, "step": 26715 }, { "epoch": 1.2515107509251886, "grad_norm": 0.6095386215177415, "learning_rate": 1.6223467767294942e-06, "loss": 0.2833, "step": 26716 }, { "epoch": 1.2515575959151168, "grad_norm": 0.5517568311451316, "learning_rate": 1.6221691998669093e-06, "loss": 0.2522, "step": 26717 }, { "epoch": 1.2516044409050453, "grad_norm": 0.6058858981466945, "learning_rate": 1.6219916280560187e-06, "loss": 0.2654, "step": 26718 }, { "epoch": 1.2516512858949735, "grad_norm": 0.6108599383094158, "learning_rate": 1.6218140612978474e-06, "loss": 0.2927, "step": 26719 }, { "epoch": 1.2516981308849018, "grad_norm": 0.6605954771111968, "learning_rate": 1.6216364995934141e-06, "loss": 0.2747, "step": 26720 }, { "epoch": 1.2517449758748302, "grad_norm": 0.5945928043125636, "learning_rate": 1.6214589429437427e-06, "loss": 0.2767, "step": 26721 }, { "epoch": 1.2517918208647585, "grad_norm": 0.6179257994999848, "learning_rate": 1.6212813913498538e-06, "loss": 0.2811, "step": 26722 }, { "epoch": 1.2518386658546867, "grad_norm": 0.6261079298292525, "learning_rate": 1.6211038448127692e-06, "loss": 0.2974, "step": 26723 }, { "epoch": 1.2518855108446152, "grad_norm": 0.6134073325113012, "learning_rate": 1.6209263033335118e-06, "loss": 0.2772, "step": 26724 }, { "epoch": 1.2519323558345434, "grad_norm": 0.6040204809320304, "learning_rate": 1.6207487669131033e-06, "loss": 0.2823, "step": 26725 }, { "epoch": 1.251979200824472, "grad_norm": 0.5986829279093879, "learning_rate": 1.6205712355525632e-06, "loss": 0.2839, "step": 26726 }, { "epoch": 1.2520260458144001, "grad_norm": 0.6202822522264785, "learning_rate": 1.6203937092529149e-06, "loss": 0.296, "step": 26727 }, { "epoch": 1.2520728908043286, "grad_norm": 0.6674831972586696, "learning_rate": 1.6202161880151801e-06, "loss": 0.2883, "step": 26728 }, { "epoch": 1.2521197357942568, "grad_norm": 0.5674245334046565, "learning_rate": 1.6200386718403799e-06, "loss": 0.2615, "step": 26729 }, { "epoch": 1.252166580784185, "grad_norm": 0.5536228922723304, "learning_rate": 1.6198611607295356e-06, "loss": 0.263, "step": 26730 }, { "epoch": 1.2522134257741135, "grad_norm": 0.5854782898810496, "learning_rate": 1.6196836546836696e-06, "loss": 0.2722, "step": 26731 }, { "epoch": 1.2522602707640418, "grad_norm": 0.6001120506610752, "learning_rate": 1.6195061537038034e-06, "loss": 0.2788, "step": 26732 }, { "epoch": 1.25230711575397, "grad_norm": 0.5868088885102594, "learning_rate": 1.6193286577909572e-06, "loss": 0.2686, "step": 26733 }, { "epoch": 1.2523539607438985, "grad_norm": 0.6263819438530257, "learning_rate": 1.619151166946153e-06, "loss": 0.2812, "step": 26734 }, { "epoch": 1.2524008057338267, "grad_norm": 0.7119468612237051, "learning_rate": 1.6189736811704129e-06, "loss": 0.3161, "step": 26735 }, { "epoch": 1.252447650723755, "grad_norm": 0.5274472452292958, "learning_rate": 1.6187962004647575e-06, "loss": 0.2543, "step": 26736 }, { "epoch": 1.2524944957136834, "grad_norm": 0.6053217657185281, "learning_rate": 1.6186187248302085e-06, "loss": 0.2767, "step": 26737 }, { "epoch": 1.2525413407036117, "grad_norm": 0.5926728347900428, "learning_rate": 1.618441254267788e-06, "loss": 0.2828, "step": 26738 }, { "epoch": 1.2525881856935401, "grad_norm": 0.5795838801093278, "learning_rate": 1.6182637887785162e-06, "loss": 0.2601, "step": 26739 }, { "epoch": 1.2526350306834684, "grad_norm": 0.5975931088555613, "learning_rate": 1.6180863283634142e-06, "loss": 0.2796, "step": 26740 }, { "epoch": 1.2526818756733968, "grad_norm": 0.5735860297232533, "learning_rate": 1.6179088730235038e-06, "loss": 0.2681, "step": 26741 }, { "epoch": 1.252728720663325, "grad_norm": 0.6373914271576372, "learning_rate": 1.6177314227598062e-06, "loss": 0.2916, "step": 26742 }, { "epoch": 1.2527755656532533, "grad_norm": 0.5933042185093993, "learning_rate": 1.6175539775733424e-06, "loss": 0.2738, "step": 26743 }, { "epoch": 1.2528224106431818, "grad_norm": 0.6428098920571426, "learning_rate": 1.6173765374651345e-06, "loss": 0.2963, "step": 26744 }, { "epoch": 1.25286925563311, "grad_norm": 0.5664331994319846, "learning_rate": 1.6171991024362021e-06, "loss": 0.2619, "step": 26745 }, { "epoch": 1.2529161006230383, "grad_norm": 0.6462415633324191, "learning_rate": 1.617021672487567e-06, "loss": 0.2943, "step": 26746 }, { "epoch": 1.2529629456129667, "grad_norm": 0.6112037831063641, "learning_rate": 1.61684424762025e-06, "loss": 0.2619, "step": 26747 }, { "epoch": 1.253009790602895, "grad_norm": 0.6123469628970891, "learning_rate": 1.6166668278352727e-06, "loss": 0.2785, "step": 26748 }, { "epoch": 1.2530566355928232, "grad_norm": 0.5799562616961497, "learning_rate": 1.6164894131336556e-06, "loss": 0.2655, "step": 26749 }, { "epoch": 1.2531034805827517, "grad_norm": 0.5814187720394466, "learning_rate": 1.6163120035164209e-06, "loss": 0.2668, "step": 26750 }, { "epoch": 1.2531503255726801, "grad_norm": 0.5424982155354356, "learning_rate": 1.6161345989845873e-06, "loss": 0.2536, "step": 26751 }, { "epoch": 1.2531971705626084, "grad_norm": 0.6039997098533636, "learning_rate": 1.615957199539177e-06, "loss": 0.2834, "step": 26752 }, { "epoch": 1.2532440155525366, "grad_norm": 0.6183915205149891, "learning_rate": 1.6157798051812112e-06, "loss": 0.2854, "step": 26753 }, { "epoch": 1.253290860542465, "grad_norm": 0.6300420988996912, "learning_rate": 1.61560241591171e-06, "loss": 0.2816, "step": 26754 }, { "epoch": 1.2533377055323933, "grad_norm": 0.5708493870572419, "learning_rate": 1.6154250317316949e-06, "loss": 0.2536, "step": 26755 }, { "epoch": 1.2533845505223216, "grad_norm": 0.6508955446324158, "learning_rate": 1.6152476526421868e-06, "loss": 0.285, "step": 26756 }, { "epoch": 1.25343139551225, "grad_norm": 0.6044269203373461, "learning_rate": 1.6150702786442058e-06, "loss": 0.2773, "step": 26757 }, { "epoch": 1.2534782405021783, "grad_norm": 0.5776851496835315, "learning_rate": 1.6148929097387726e-06, "loss": 0.275, "step": 26758 }, { "epoch": 1.2535250854921065, "grad_norm": 0.6094671608768327, "learning_rate": 1.6147155459269087e-06, "loss": 0.2864, "step": 26759 }, { "epoch": 1.253571930482035, "grad_norm": 0.5956124342917274, "learning_rate": 1.6145381872096336e-06, "loss": 0.2675, "step": 26760 }, { "epoch": 1.2536187754719632, "grad_norm": 0.5995368930061736, "learning_rate": 1.614360833587969e-06, "loss": 0.2879, "step": 26761 }, { "epoch": 1.2536656204618917, "grad_norm": 0.6495641068990862, "learning_rate": 1.6141834850629359e-06, "loss": 0.2773, "step": 26762 }, { "epoch": 1.25371246545182, "grad_norm": 0.6301908440755764, "learning_rate": 1.614006141635554e-06, "loss": 0.2754, "step": 26763 }, { "epoch": 1.2537593104417484, "grad_norm": 0.647671735530261, "learning_rate": 1.6138288033068433e-06, "loss": 0.2852, "step": 26764 }, { "epoch": 1.2538061554316766, "grad_norm": 0.615680613173614, "learning_rate": 1.6136514700778252e-06, "loss": 0.2804, "step": 26765 }, { "epoch": 1.2538530004216049, "grad_norm": 0.5652976570219843, "learning_rate": 1.6134741419495205e-06, "loss": 0.2694, "step": 26766 }, { "epoch": 1.2538998454115333, "grad_norm": 0.578593360653583, "learning_rate": 1.6132968189229493e-06, "loss": 0.2587, "step": 26767 }, { "epoch": 1.2539466904014616, "grad_norm": 0.5715143358444015, "learning_rate": 1.6131195009991324e-06, "loss": 0.2654, "step": 26768 }, { "epoch": 1.2539935353913898, "grad_norm": 0.5606050390886781, "learning_rate": 1.612942188179089e-06, "loss": 0.2651, "step": 26769 }, { "epoch": 1.2540403803813183, "grad_norm": 0.6442255040345787, "learning_rate": 1.6127648804638413e-06, "loss": 0.2903, "step": 26770 }, { "epoch": 1.2540872253712465, "grad_norm": 0.5772537583462516, "learning_rate": 1.6125875778544078e-06, "loss": 0.2709, "step": 26771 }, { "epoch": 1.2541340703611747, "grad_norm": 0.602629621451463, "learning_rate": 1.6124102803518099e-06, "loss": 0.2755, "step": 26772 }, { "epoch": 1.2541809153511032, "grad_norm": 0.5670204682307204, "learning_rate": 1.6122329879570682e-06, "loss": 0.2602, "step": 26773 }, { "epoch": 1.2542277603410315, "grad_norm": 0.5591836326759543, "learning_rate": 1.612055700671203e-06, "loss": 0.2691, "step": 26774 }, { "epoch": 1.25427460533096, "grad_norm": 0.6273925406981343, "learning_rate": 1.6118784184952329e-06, "loss": 0.2865, "step": 26775 }, { "epoch": 1.2543214503208882, "grad_norm": 0.6027832886812037, "learning_rate": 1.6117011414301798e-06, "loss": 0.2641, "step": 26776 }, { "epoch": 1.2543682953108166, "grad_norm": 0.6184549534356087, "learning_rate": 1.6115238694770636e-06, "loss": 0.2631, "step": 26777 }, { "epoch": 1.2544151403007449, "grad_norm": 0.5616924052196883, "learning_rate": 1.6113466026369035e-06, "loss": 0.2655, "step": 26778 }, { "epoch": 1.254461985290673, "grad_norm": 0.5447026243750124, "learning_rate": 1.6111693409107204e-06, "loss": 0.2542, "step": 26779 }, { "epoch": 1.2545088302806016, "grad_norm": 0.6710320597574914, "learning_rate": 1.6109920842995356e-06, "loss": 0.2719, "step": 26780 }, { "epoch": 1.2545556752705298, "grad_norm": 0.6144206882479736, "learning_rate": 1.6108148328043672e-06, "loss": 0.2852, "step": 26781 }, { "epoch": 1.254602520260458, "grad_norm": 0.5809527374783169, "learning_rate": 1.6106375864262355e-06, "loss": 0.265, "step": 26782 }, { "epoch": 1.2546493652503865, "grad_norm": 0.5632280254859104, "learning_rate": 1.610460345166161e-06, "loss": 0.2764, "step": 26783 }, { "epoch": 1.2546962102403147, "grad_norm": 0.6110769387466599, "learning_rate": 1.6102831090251642e-06, "loss": 0.2834, "step": 26784 }, { "epoch": 1.254743055230243, "grad_norm": 0.6018229582523834, "learning_rate": 1.610105878004264e-06, "loss": 0.2762, "step": 26785 }, { "epoch": 1.2547899002201715, "grad_norm": 0.6218114935303786, "learning_rate": 1.6099286521044815e-06, "loss": 0.2804, "step": 26786 }, { "epoch": 1.2548367452101, "grad_norm": 0.5307792841768529, "learning_rate": 1.6097514313268352e-06, "loss": 0.2439, "step": 26787 }, { "epoch": 1.2548835902000282, "grad_norm": 0.6360826265015406, "learning_rate": 1.609574215672346e-06, "loss": 0.2799, "step": 26788 }, { "epoch": 1.2549304351899564, "grad_norm": 0.6041196815303643, "learning_rate": 1.6093970051420329e-06, "loss": 0.283, "step": 26789 }, { "epoch": 1.2549772801798849, "grad_norm": 0.603768579650892, "learning_rate": 1.6092197997369162e-06, "loss": 0.2887, "step": 26790 }, { "epoch": 1.255024125169813, "grad_norm": 0.5865271528862436, "learning_rate": 1.6090425994580161e-06, "loss": 0.2576, "step": 26791 }, { "epoch": 1.2550709701597413, "grad_norm": 0.6229755120247242, "learning_rate": 1.6088654043063528e-06, "loss": 0.2611, "step": 26792 }, { "epoch": 1.2551178151496698, "grad_norm": 0.6491925179322295, "learning_rate": 1.6086882142829435e-06, "loss": 0.2688, "step": 26793 }, { "epoch": 1.255164660139598, "grad_norm": 0.5596730885479173, "learning_rate": 1.6085110293888102e-06, "loss": 0.2582, "step": 26794 }, { "epoch": 1.2552115051295263, "grad_norm": 0.5659226627637672, "learning_rate": 1.6083338496249718e-06, "loss": 0.2532, "step": 26795 }, { "epoch": 1.2552583501194547, "grad_norm": 0.5602914759908114, "learning_rate": 1.6081566749924476e-06, "loss": 0.2666, "step": 26796 }, { "epoch": 1.255305195109383, "grad_norm": 0.5856598786264917, "learning_rate": 1.6079795054922576e-06, "loss": 0.276, "step": 26797 }, { "epoch": 1.2553520400993115, "grad_norm": 0.5973521956309592, "learning_rate": 1.6078023411254219e-06, "loss": 0.2678, "step": 26798 }, { "epoch": 1.2553988850892397, "grad_norm": 0.5939287903142628, "learning_rate": 1.60762518189296e-06, "loss": 0.2899, "step": 26799 }, { "epoch": 1.2554457300791682, "grad_norm": 0.6211563804223321, "learning_rate": 1.6074480277958898e-06, "loss": 0.2947, "step": 26800 }, { "epoch": 1.2554925750690964, "grad_norm": 0.6170120779868132, "learning_rate": 1.607270878835232e-06, "loss": 0.2654, "step": 26801 }, { "epoch": 1.2555394200590246, "grad_norm": 0.6393006575334489, "learning_rate": 1.6070937350120063e-06, "loss": 0.2881, "step": 26802 }, { "epoch": 1.255586265048953, "grad_norm": 0.6070227506168991, "learning_rate": 1.6069165963272316e-06, "loss": 0.2731, "step": 26803 }, { "epoch": 1.2556331100388813, "grad_norm": 0.5638791213223312, "learning_rate": 1.6067394627819272e-06, "loss": 0.2645, "step": 26804 }, { "epoch": 1.2556799550288096, "grad_norm": 0.5861673823121923, "learning_rate": 1.606562334377113e-06, "loss": 0.2712, "step": 26805 }, { "epoch": 1.255726800018738, "grad_norm": 0.5992177639621173, "learning_rate": 1.6063852111138084e-06, "loss": 0.2778, "step": 26806 }, { "epoch": 1.2557736450086663, "grad_norm": 0.5915756543656339, "learning_rate": 1.6062080929930312e-06, "loss": 0.2771, "step": 26807 }, { "epoch": 1.2558204899985945, "grad_norm": 0.5752252965578062, "learning_rate": 1.6060309800158028e-06, "loss": 0.2802, "step": 26808 }, { "epoch": 1.255867334988523, "grad_norm": 0.5739357141102625, "learning_rate": 1.6058538721831407e-06, "loss": 0.2603, "step": 26809 }, { "epoch": 1.2559141799784512, "grad_norm": 0.577744049225492, "learning_rate": 1.6056767694960645e-06, "loss": 0.2784, "step": 26810 }, { "epoch": 1.2559610249683797, "grad_norm": 0.6048830032882571, "learning_rate": 1.6054996719555954e-06, "loss": 0.2934, "step": 26811 }, { "epoch": 1.256007869958308, "grad_norm": 0.6641592890133066, "learning_rate": 1.6053225795627498e-06, "loss": 0.2888, "step": 26812 }, { "epoch": 1.2560547149482364, "grad_norm": 0.5719297401016784, "learning_rate": 1.6051454923185479e-06, "loss": 0.2755, "step": 26813 }, { "epoch": 1.2561015599381646, "grad_norm": 0.6039352119516956, "learning_rate": 1.604968410224008e-06, "loss": 0.2941, "step": 26814 }, { "epoch": 1.2561484049280929, "grad_norm": 0.6234819583760531, "learning_rate": 1.6047913332801511e-06, "loss": 0.2894, "step": 26815 }, { "epoch": 1.2561952499180213, "grad_norm": 0.6787824569209888, "learning_rate": 1.6046142614879945e-06, "loss": 0.2928, "step": 26816 }, { "epoch": 1.2562420949079496, "grad_norm": 0.630002183348034, "learning_rate": 1.6044371948485585e-06, "loss": 0.2996, "step": 26817 }, { "epoch": 1.2562889398978778, "grad_norm": 0.5773427232970112, "learning_rate": 1.6042601333628604e-06, "loss": 0.2725, "step": 26818 }, { "epoch": 1.2563357848878063, "grad_norm": 0.5766278449651383, "learning_rate": 1.604083077031921e-06, "loss": 0.2631, "step": 26819 }, { "epoch": 1.2563826298777345, "grad_norm": 0.599212258418971, "learning_rate": 1.6039060258567575e-06, "loss": 0.2526, "step": 26820 }, { "epoch": 1.2564294748676628, "grad_norm": 0.5917301828969688, "learning_rate": 1.6037289798383892e-06, "loss": 0.2724, "step": 26821 }, { "epoch": 1.2564763198575912, "grad_norm": 0.6021003771094106, "learning_rate": 1.6035519389778364e-06, "loss": 0.2591, "step": 26822 }, { "epoch": 1.2565231648475197, "grad_norm": 0.5655214219306964, "learning_rate": 1.6033749032761174e-06, "loss": 0.2703, "step": 26823 }, { "epoch": 1.256570009837448, "grad_norm": 0.5589287280573029, "learning_rate": 1.6031978727342493e-06, "loss": 0.2711, "step": 26824 }, { "epoch": 1.2566168548273762, "grad_norm": 0.6225155229601045, "learning_rate": 1.6030208473532522e-06, "loss": 0.2726, "step": 26825 }, { "epoch": 1.2566636998173046, "grad_norm": 0.6717940184088862, "learning_rate": 1.6028438271341448e-06, "loss": 0.3067, "step": 26826 }, { "epoch": 1.2567105448072329, "grad_norm": 0.6144899783296411, "learning_rate": 1.6026668120779455e-06, "loss": 0.2728, "step": 26827 }, { "epoch": 1.2567573897971611, "grad_norm": 0.5638972379369056, "learning_rate": 1.602489802185673e-06, "loss": 0.2694, "step": 26828 }, { "epoch": 1.2568042347870896, "grad_norm": 0.550664823545356, "learning_rate": 1.6023127974583471e-06, "loss": 0.2657, "step": 26829 }, { "epoch": 1.2568510797770178, "grad_norm": 0.5986010833799272, "learning_rate": 1.602135797896985e-06, "loss": 0.2674, "step": 26830 }, { "epoch": 1.256897924766946, "grad_norm": 0.5792114696867406, "learning_rate": 1.601958803502605e-06, "loss": 0.2822, "step": 26831 }, { "epoch": 1.2569447697568745, "grad_norm": 0.6079712913017116, "learning_rate": 1.6017818142762271e-06, "loss": 0.2687, "step": 26832 }, { "epoch": 1.2569916147468028, "grad_norm": 0.627280745002791, "learning_rate": 1.6016048302188689e-06, "loss": 0.2807, "step": 26833 }, { "epoch": 1.2570384597367312, "grad_norm": 0.576898948498024, "learning_rate": 1.6014278513315488e-06, "loss": 0.2877, "step": 26834 }, { "epoch": 1.2570853047266595, "grad_norm": 0.6366280407079443, "learning_rate": 1.6012508776152862e-06, "loss": 0.313, "step": 26835 }, { "epoch": 1.257132149716588, "grad_norm": 0.5933405297273193, "learning_rate": 1.6010739090710986e-06, "loss": 0.2836, "step": 26836 }, { "epoch": 1.2571789947065162, "grad_norm": 0.636576908255867, "learning_rate": 1.6008969457000048e-06, "loss": 0.3017, "step": 26837 }, { "epoch": 1.2572258396964444, "grad_norm": 0.5901032028105568, "learning_rate": 1.6007199875030228e-06, "loss": 0.2731, "step": 26838 }, { "epoch": 1.2572726846863729, "grad_norm": 0.6146456425676043, "learning_rate": 1.6005430344811713e-06, "loss": 0.2836, "step": 26839 }, { "epoch": 1.2573195296763011, "grad_norm": 0.5830618171755448, "learning_rate": 1.6003660866354686e-06, "loss": 0.2747, "step": 26840 }, { "epoch": 1.2573663746662294, "grad_norm": 0.6304858995278768, "learning_rate": 1.6001891439669337e-06, "loss": 0.2785, "step": 26841 }, { "epoch": 1.2574132196561578, "grad_norm": 0.5797278300508294, "learning_rate": 1.6000122064765832e-06, "loss": 0.2689, "step": 26842 }, { "epoch": 1.257460064646086, "grad_norm": 0.5652247428472293, "learning_rate": 1.599835274165436e-06, "loss": 0.2567, "step": 26843 }, { "epoch": 1.2575069096360143, "grad_norm": 0.5943234345033725, "learning_rate": 1.5996583470345114e-06, "loss": 0.2574, "step": 26844 }, { "epoch": 1.2575537546259428, "grad_norm": 0.5811267836624041, "learning_rate": 1.5994814250848263e-06, "loss": 0.2797, "step": 26845 }, { "epoch": 1.257600599615871, "grad_norm": 0.6018629733132427, "learning_rate": 1.5993045083173995e-06, "loss": 0.2747, "step": 26846 }, { "epoch": 1.2576474446057995, "grad_norm": 0.6220801303861413, "learning_rate": 1.5991275967332487e-06, "loss": 0.2829, "step": 26847 }, { "epoch": 1.2576942895957277, "grad_norm": 0.6120603082860668, "learning_rate": 1.5989506903333923e-06, "loss": 0.2915, "step": 26848 }, { "epoch": 1.2577411345856562, "grad_norm": 0.5675640689527507, "learning_rate": 1.5987737891188482e-06, "loss": 0.2835, "step": 26849 }, { "epoch": 1.2577879795755844, "grad_norm": 0.6166815698769321, "learning_rate": 1.5985968930906343e-06, "loss": 0.2778, "step": 26850 }, { "epoch": 1.2578348245655127, "grad_norm": 0.5642755424064189, "learning_rate": 1.5984200022497684e-06, "loss": 0.2556, "step": 26851 }, { "epoch": 1.2578816695554411, "grad_norm": 0.618439681190948, "learning_rate": 1.598243116597269e-06, "loss": 0.2836, "step": 26852 }, { "epoch": 1.2579285145453694, "grad_norm": 0.6518459471883353, "learning_rate": 1.5980662361341545e-06, "loss": 0.2965, "step": 26853 }, { "epoch": 1.2579753595352976, "grad_norm": 0.5978556450994656, "learning_rate": 1.597889360861441e-06, "loss": 0.2759, "step": 26854 }, { "epoch": 1.258022204525226, "grad_norm": 0.548924835117192, "learning_rate": 1.5977124907801484e-06, "loss": 0.2604, "step": 26855 }, { "epoch": 1.2580690495151543, "grad_norm": 0.5965415749974488, "learning_rate": 1.597535625891293e-06, "loss": 0.279, "step": 26856 }, { "epoch": 1.2581158945050825, "grad_norm": 0.5905958764705664, "learning_rate": 1.5973587661958936e-06, "loss": 0.273, "step": 26857 }, { "epoch": 1.258162739495011, "grad_norm": 0.5905271375275721, "learning_rate": 1.597181911694967e-06, "loss": 0.2739, "step": 26858 }, { "epoch": 1.2582095844849395, "grad_norm": 0.6159643489364561, "learning_rate": 1.5970050623895334e-06, "loss": 0.2797, "step": 26859 }, { "epoch": 1.2582564294748677, "grad_norm": 0.5939615722490653, "learning_rate": 1.596828218280607e-06, "loss": 0.2754, "step": 26860 }, { "epoch": 1.258303274464796, "grad_norm": 0.6694634243769981, "learning_rate": 1.596651379369208e-06, "loss": 0.2882, "step": 26861 }, { "epoch": 1.2583501194547244, "grad_norm": 0.5328965308461514, "learning_rate": 1.5964745456563525e-06, "loss": 0.2626, "step": 26862 }, { "epoch": 1.2583969644446527, "grad_norm": 0.5853723430487441, "learning_rate": 1.5962977171430587e-06, "loss": 0.2792, "step": 26863 }, { "epoch": 1.258443809434581, "grad_norm": 0.5661085636352663, "learning_rate": 1.5961208938303453e-06, "loss": 0.261, "step": 26864 }, { "epoch": 1.2584906544245094, "grad_norm": 0.6169987057797663, "learning_rate": 1.5959440757192296e-06, "loss": 0.2693, "step": 26865 }, { "epoch": 1.2585374994144376, "grad_norm": 0.561103970409341, "learning_rate": 1.595767262810727e-06, "loss": 0.2739, "step": 26866 }, { "epoch": 1.2585843444043658, "grad_norm": 0.5592731591214584, "learning_rate": 1.5955904551058571e-06, "loss": 0.2548, "step": 26867 }, { "epoch": 1.2586311893942943, "grad_norm": 0.6496750813563559, "learning_rate": 1.5954136526056368e-06, "loss": 0.2923, "step": 26868 }, { "epoch": 1.2586780343842225, "grad_norm": 0.582975087835073, "learning_rate": 1.5952368553110834e-06, "loss": 0.2616, "step": 26869 }, { "epoch": 1.258724879374151, "grad_norm": 0.614955398134667, "learning_rate": 1.5950600632232144e-06, "loss": 0.2903, "step": 26870 }, { "epoch": 1.2587717243640792, "grad_norm": 0.585143121627142, "learning_rate": 1.5948832763430478e-06, "loss": 0.2859, "step": 26871 }, { "epoch": 1.2588185693540077, "grad_norm": 0.5405177402796313, "learning_rate": 1.594706494671601e-06, "loss": 0.2527, "step": 26872 }, { "epoch": 1.258865414343936, "grad_norm": 0.5897868122216907, "learning_rate": 1.5945297182098896e-06, "loss": 0.2771, "step": 26873 }, { "epoch": 1.2589122593338642, "grad_norm": 0.5888218295960608, "learning_rate": 1.5943529469589323e-06, "loss": 0.2719, "step": 26874 }, { "epoch": 1.2589591043237927, "grad_norm": 0.5998149110259344, "learning_rate": 1.5941761809197464e-06, "loss": 0.2864, "step": 26875 }, { "epoch": 1.259005949313721, "grad_norm": 0.5716886885669876, "learning_rate": 1.5939994200933489e-06, "loss": 0.2665, "step": 26876 }, { "epoch": 1.2590527943036491, "grad_norm": 0.6188825148883434, "learning_rate": 1.5938226644807564e-06, "loss": 0.2968, "step": 26877 }, { "epoch": 1.2590996392935776, "grad_norm": 0.6012219845403031, "learning_rate": 1.5936459140829882e-06, "loss": 0.2701, "step": 26878 }, { "epoch": 1.2591464842835058, "grad_norm": 0.5862204881767449, "learning_rate": 1.5934691689010595e-06, "loss": 0.2778, "step": 26879 }, { "epoch": 1.259193329273434, "grad_norm": 0.5684230382644012, "learning_rate": 1.593292428935987e-06, "loss": 0.2615, "step": 26880 }, { "epoch": 1.2592401742633625, "grad_norm": 0.6228081117577906, "learning_rate": 1.5931156941887888e-06, "loss": 0.2806, "step": 26881 }, { "epoch": 1.2592870192532908, "grad_norm": 0.6706792554846066, "learning_rate": 1.5929389646604825e-06, "loss": 0.2798, "step": 26882 }, { "epoch": 1.2593338642432192, "grad_norm": 0.6152711886921444, "learning_rate": 1.5927622403520837e-06, "loss": 0.2694, "step": 26883 }, { "epoch": 1.2593807092331475, "grad_norm": 0.5439243867364331, "learning_rate": 1.5925855212646113e-06, "loss": 0.2557, "step": 26884 }, { "epoch": 1.259427554223076, "grad_norm": 0.608878833914181, "learning_rate": 1.5924088073990803e-06, "loss": 0.2743, "step": 26885 }, { "epoch": 1.2594743992130042, "grad_norm": 0.6184379231471946, "learning_rate": 1.5922320987565088e-06, "loss": 0.26, "step": 26886 }, { "epoch": 1.2595212442029324, "grad_norm": 0.5278464890039399, "learning_rate": 1.592055395337913e-06, "loss": 0.2439, "step": 26887 }, { "epoch": 1.259568089192861, "grad_norm": 0.5770796880623948, "learning_rate": 1.5918786971443103e-06, "loss": 0.2596, "step": 26888 }, { "epoch": 1.2596149341827891, "grad_norm": 0.594411652846992, "learning_rate": 1.5917020041767178e-06, "loss": 0.285, "step": 26889 }, { "epoch": 1.2596617791727174, "grad_norm": 0.5703061113023979, "learning_rate": 1.5915253164361528e-06, "loss": 0.2776, "step": 26890 }, { "epoch": 1.2597086241626458, "grad_norm": 0.6184838334478107, "learning_rate": 1.59134863392363e-06, "loss": 0.2812, "step": 26891 }, { "epoch": 1.259755469152574, "grad_norm": 0.5893962998414656, "learning_rate": 1.5911719566401674e-06, "loss": 0.2608, "step": 26892 }, { "epoch": 1.2598023141425023, "grad_norm": 0.5967762909292104, "learning_rate": 1.590995284586782e-06, "loss": 0.2725, "step": 26893 }, { "epoch": 1.2598491591324308, "grad_norm": 0.5510094217945316, "learning_rate": 1.5908186177644902e-06, "loss": 0.2537, "step": 26894 }, { "epoch": 1.2598960041223592, "grad_norm": 0.5885332825623154, "learning_rate": 1.5906419561743092e-06, "loss": 0.2717, "step": 26895 }, { "epoch": 1.2599428491122875, "grad_norm": 0.6068978839265612, "learning_rate": 1.5904652998172552e-06, "loss": 0.286, "step": 26896 }, { "epoch": 1.2599896941022157, "grad_norm": 0.6478751754715066, "learning_rate": 1.5902886486943448e-06, "loss": 0.2898, "step": 26897 }, { "epoch": 1.2600365390921442, "grad_norm": 0.5580028575766297, "learning_rate": 1.5901120028065936e-06, "loss": 0.277, "step": 26898 }, { "epoch": 1.2600833840820724, "grad_norm": 0.6126642914997599, "learning_rate": 1.5899353621550201e-06, "loss": 0.2635, "step": 26899 }, { "epoch": 1.2601302290720007, "grad_norm": 0.5785180271706991, "learning_rate": 1.5897587267406395e-06, "loss": 0.2724, "step": 26900 }, { "epoch": 1.2601770740619291, "grad_norm": 0.6083411706997168, "learning_rate": 1.5895820965644681e-06, "loss": 0.2825, "step": 26901 }, { "epoch": 1.2602239190518574, "grad_norm": 0.5739905940526379, "learning_rate": 1.5894054716275242e-06, "loss": 0.2683, "step": 26902 }, { "epoch": 1.2602707640417856, "grad_norm": 0.593715971333918, "learning_rate": 1.5892288519308224e-06, "loss": 0.27, "step": 26903 }, { "epoch": 1.260317609031714, "grad_norm": 0.6233647120416126, "learning_rate": 1.5890522374753793e-06, "loss": 0.273, "step": 26904 }, { "epoch": 1.2603644540216423, "grad_norm": 0.5860661284981212, "learning_rate": 1.5888756282622114e-06, "loss": 0.266, "step": 26905 }, { "epoch": 1.2604112990115708, "grad_norm": 0.5938174037026959, "learning_rate": 1.588699024292336e-06, "loss": 0.2936, "step": 26906 }, { "epoch": 1.260458144001499, "grad_norm": 0.6095996629646238, "learning_rate": 1.588522425566768e-06, "loss": 0.2715, "step": 26907 }, { "epoch": 1.2605049889914275, "grad_norm": 0.6093044373667612, "learning_rate": 1.5883458320865252e-06, "loss": 0.2784, "step": 26908 }, { "epoch": 1.2605518339813557, "grad_norm": 0.6146758730720384, "learning_rate": 1.5881692438526225e-06, "loss": 0.2792, "step": 26909 }, { "epoch": 1.260598678971284, "grad_norm": 0.6317592173124298, "learning_rate": 1.5879926608660767e-06, "loss": 0.2834, "step": 26910 }, { "epoch": 1.2606455239612124, "grad_norm": 0.6018804487037523, "learning_rate": 1.5878160831279035e-06, "loss": 0.2639, "step": 26911 }, { "epoch": 1.2606923689511407, "grad_norm": 0.6248394817883354, "learning_rate": 1.5876395106391196e-06, "loss": 0.2841, "step": 26912 }, { "epoch": 1.260739213941069, "grad_norm": 0.5695861180780938, "learning_rate": 1.5874629434007412e-06, "loss": 0.2727, "step": 26913 }, { "epoch": 1.2607860589309974, "grad_norm": 0.6623187539453318, "learning_rate": 1.5872863814137852e-06, "loss": 0.2909, "step": 26914 }, { "epoch": 1.2608329039209256, "grad_norm": 0.6165955509852161, "learning_rate": 1.5871098246792652e-06, "loss": 0.2719, "step": 26915 }, { "epoch": 1.2608797489108539, "grad_norm": 0.5894162268550096, "learning_rate": 1.586933273198199e-06, "loss": 0.2856, "step": 26916 }, { "epoch": 1.2609265939007823, "grad_norm": 0.6334768553960446, "learning_rate": 1.5867567269716028e-06, "loss": 0.2869, "step": 26917 }, { "epoch": 1.2609734388907106, "grad_norm": 0.5650784680005447, "learning_rate": 1.5865801860004914e-06, "loss": 0.277, "step": 26918 }, { "epoch": 1.261020283880639, "grad_norm": 0.5522388952513555, "learning_rate": 1.5864036502858815e-06, "loss": 0.2822, "step": 26919 }, { "epoch": 1.2610671288705673, "grad_norm": 0.6161582188218448, "learning_rate": 1.58622711982879e-06, "loss": 0.2778, "step": 26920 }, { "epoch": 1.2611139738604957, "grad_norm": 0.6309985392869166, "learning_rate": 1.5860505946302313e-06, "loss": 0.2815, "step": 26921 }, { "epoch": 1.261160818850424, "grad_norm": 0.6570626104754155, "learning_rate": 1.585874074691221e-06, "loss": 0.2784, "step": 26922 }, { "epoch": 1.2612076638403522, "grad_norm": 0.605053229364492, "learning_rate": 1.5856975600127762e-06, "loss": 0.278, "step": 26923 }, { "epoch": 1.2612545088302807, "grad_norm": 0.611306954077933, "learning_rate": 1.585521050595912e-06, "loss": 0.2837, "step": 26924 }, { "epoch": 1.261301353820209, "grad_norm": 0.663776659054447, "learning_rate": 1.5853445464416442e-06, "loss": 0.2802, "step": 26925 }, { "epoch": 1.2613481988101372, "grad_norm": 0.6387783385220538, "learning_rate": 1.5851680475509895e-06, "loss": 0.2677, "step": 26926 }, { "epoch": 1.2613950438000656, "grad_norm": 0.5488239107931839, "learning_rate": 1.584991553924962e-06, "loss": 0.2549, "step": 26927 }, { "epoch": 1.2614418887899939, "grad_norm": 0.593322247715522, "learning_rate": 1.5848150655645783e-06, "loss": 0.2623, "step": 26928 }, { "epoch": 1.261488733779922, "grad_norm": 0.6303344409543119, "learning_rate": 1.5846385824708536e-06, "loss": 0.2891, "step": 26929 }, { "epoch": 1.2615355787698506, "grad_norm": 0.6020662922459624, "learning_rate": 1.5844621046448034e-06, "loss": 0.2611, "step": 26930 }, { "epoch": 1.261582423759779, "grad_norm": 0.6204624228753242, "learning_rate": 1.5842856320874445e-06, "loss": 0.2788, "step": 26931 }, { "epoch": 1.2616292687497073, "grad_norm": 0.6044965470823631, "learning_rate": 1.5841091647997922e-06, "loss": 0.2738, "step": 26932 }, { "epoch": 1.2616761137396355, "grad_norm": 0.6142422656191391, "learning_rate": 1.5839327027828605e-06, "loss": 0.2693, "step": 26933 }, { "epoch": 1.261722958729564, "grad_norm": 0.6040359164320702, "learning_rate": 1.5837562460376655e-06, "loss": 0.2783, "step": 26934 }, { "epoch": 1.2617698037194922, "grad_norm": 0.5919869654199911, "learning_rate": 1.5835797945652238e-06, "loss": 0.2472, "step": 26935 }, { "epoch": 1.2618166487094205, "grad_norm": 0.5997179674820153, "learning_rate": 1.5834033483665495e-06, "loss": 0.2796, "step": 26936 }, { "epoch": 1.261863493699349, "grad_norm": 0.5854568157571737, "learning_rate": 1.5832269074426584e-06, "loss": 0.264, "step": 26937 }, { "epoch": 1.2619103386892772, "grad_norm": 0.5877152744673071, "learning_rate": 1.5830504717945666e-06, "loss": 0.2607, "step": 26938 }, { "epoch": 1.2619571836792054, "grad_norm": 0.6151267490901143, "learning_rate": 1.5828740414232896e-06, "loss": 0.2836, "step": 26939 }, { "epoch": 1.2620040286691339, "grad_norm": 0.5750713214528438, "learning_rate": 1.5826976163298407e-06, "loss": 0.2705, "step": 26940 }, { "epoch": 1.262050873659062, "grad_norm": 0.5877547286920252, "learning_rate": 1.5825211965152365e-06, "loss": 0.2702, "step": 26941 }, { "epoch": 1.2620977186489906, "grad_norm": 0.6059851659154584, "learning_rate": 1.5823447819804927e-06, "loss": 0.2778, "step": 26942 }, { "epoch": 1.2621445636389188, "grad_norm": 0.6205400398420255, "learning_rate": 1.5821683727266235e-06, "loss": 0.3038, "step": 26943 }, { "epoch": 1.2621914086288473, "grad_norm": 0.5750494284402561, "learning_rate": 1.5819919687546453e-06, "loss": 0.2576, "step": 26944 }, { "epoch": 1.2622382536187755, "grad_norm": 0.5923337302624871, "learning_rate": 1.5818155700655724e-06, "loss": 0.279, "step": 26945 }, { "epoch": 1.2622850986087037, "grad_norm": 0.6705766114893638, "learning_rate": 1.5816391766604206e-06, "loss": 0.2867, "step": 26946 }, { "epoch": 1.2623319435986322, "grad_norm": 0.5762492998235158, "learning_rate": 1.5814627885402034e-06, "loss": 0.2762, "step": 26947 }, { "epoch": 1.2623787885885605, "grad_norm": 0.6019669326941348, "learning_rate": 1.581286405705938e-06, "loss": 0.2845, "step": 26948 }, { "epoch": 1.2624256335784887, "grad_norm": 0.5262102906686803, "learning_rate": 1.5811100281586371e-06, "loss": 0.2368, "step": 26949 }, { "epoch": 1.2624724785684172, "grad_norm": 0.6109036528980952, "learning_rate": 1.580933655899318e-06, "loss": 0.2845, "step": 26950 }, { "epoch": 1.2625193235583454, "grad_norm": 0.5437191626948048, "learning_rate": 1.580757288928995e-06, "loss": 0.2641, "step": 26951 }, { "epoch": 1.2625661685482736, "grad_norm": 0.5991127172282599, "learning_rate": 1.5805809272486826e-06, "loss": 0.2809, "step": 26952 }, { "epoch": 1.262613013538202, "grad_norm": 0.6341101563562999, "learning_rate": 1.5804045708593956e-06, "loss": 0.272, "step": 26953 }, { "epoch": 1.2626598585281303, "grad_norm": 0.604038360590819, "learning_rate": 1.5802282197621487e-06, "loss": 0.2729, "step": 26954 }, { "epoch": 1.2627067035180588, "grad_norm": 0.6705466068750445, "learning_rate": 1.580051873957958e-06, "loss": 0.2841, "step": 26955 }, { "epoch": 1.262753548507987, "grad_norm": 0.5940677734243541, "learning_rate": 1.579875533447837e-06, "loss": 0.2724, "step": 26956 }, { "epoch": 1.2628003934979155, "grad_norm": 0.672970919490316, "learning_rate": 1.5796991982328019e-06, "loss": 0.2918, "step": 26957 }, { "epoch": 1.2628472384878437, "grad_norm": 0.6610697265108836, "learning_rate": 1.5795228683138658e-06, "loss": 0.2948, "step": 26958 }, { "epoch": 1.262894083477772, "grad_norm": 0.5989461909267245, "learning_rate": 1.5793465436920446e-06, "loss": 0.2857, "step": 26959 }, { "epoch": 1.2629409284677005, "grad_norm": 0.5917847143887757, "learning_rate": 1.579170224368352e-06, "loss": 0.2567, "step": 26960 }, { "epoch": 1.2629877734576287, "grad_norm": 0.6122869216552954, "learning_rate": 1.578993910343804e-06, "loss": 0.2788, "step": 26961 }, { "epoch": 1.263034618447557, "grad_norm": 0.6120970141646598, "learning_rate": 1.5788176016194145e-06, "loss": 0.284, "step": 26962 }, { "epoch": 1.2630814634374854, "grad_norm": 0.5707435708637223, "learning_rate": 1.5786412981961986e-06, "loss": 0.2762, "step": 26963 }, { "epoch": 1.2631283084274136, "grad_norm": 0.602231079823529, "learning_rate": 1.5784650000751694e-06, "loss": 0.2914, "step": 26964 }, { "epoch": 1.2631751534173419, "grad_norm": 0.6272674598120653, "learning_rate": 1.5782887072573428e-06, "loss": 0.2861, "step": 26965 }, { "epoch": 1.2632219984072703, "grad_norm": 0.580877543856037, "learning_rate": 1.5781124197437337e-06, "loss": 0.2549, "step": 26966 }, { "epoch": 1.2632688433971988, "grad_norm": 0.5746987782353952, "learning_rate": 1.577936137535355e-06, "loss": 0.2705, "step": 26967 }, { "epoch": 1.263315688387127, "grad_norm": 0.5469751101652216, "learning_rate": 1.5777598606332224e-06, "loss": 0.2667, "step": 26968 }, { "epoch": 1.2633625333770553, "grad_norm": 0.5644057179468625, "learning_rate": 1.5775835890383507e-06, "loss": 0.2837, "step": 26969 }, { "epoch": 1.2634093783669837, "grad_norm": 0.5680332716957623, "learning_rate": 1.5774073227517534e-06, "loss": 0.2704, "step": 26970 }, { "epoch": 1.263456223356912, "grad_norm": 0.558897547497184, "learning_rate": 1.5772310617744442e-06, "loss": 0.2629, "step": 26971 }, { "epoch": 1.2635030683468402, "grad_norm": 0.613442226405582, "learning_rate": 1.5770548061074386e-06, "loss": 0.2704, "step": 26972 }, { "epoch": 1.2635499133367687, "grad_norm": 0.59326298680493, "learning_rate": 1.576878555751751e-06, "loss": 0.2834, "step": 26973 }, { "epoch": 1.263596758326697, "grad_norm": 0.655709594319533, "learning_rate": 1.5767023107083948e-06, "loss": 0.2901, "step": 26974 }, { "epoch": 1.2636436033166252, "grad_norm": 0.5710041490757846, "learning_rate": 1.576526070978386e-06, "loss": 0.2646, "step": 26975 }, { "epoch": 1.2636904483065536, "grad_norm": 0.6058119540288279, "learning_rate": 1.576349836562736e-06, "loss": 0.2584, "step": 26976 }, { "epoch": 1.2637372932964819, "grad_norm": 0.6001855591617228, "learning_rate": 1.5761736074624615e-06, "loss": 0.2873, "step": 26977 }, { "epoch": 1.2637841382864103, "grad_norm": 0.6327053824047175, "learning_rate": 1.575997383678575e-06, "loss": 0.2616, "step": 26978 }, { "epoch": 1.2638309832763386, "grad_norm": 0.5791244833046655, "learning_rate": 1.5758211652120914e-06, "loss": 0.2588, "step": 26979 }, { "epoch": 1.263877828266267, "grad_norm": 0.6115130299959365, "learning_rate": 1.5756449520640251e-06, "loss": 0.2783, "step": 26980 }, { "epoch": 1.2639246732561953, "grad_norm": 0.5536409886874329, "learning_rate": 1.5754687442353906e-06, "loss": 0.2649, "step": 26981 }, { "epoch": 1.2639715182461235, "grad_norm": 0.6102621131051456, "learning_rate": 1.5752925417271997e-06, "loss": 0.2778, "step": 26982 }, { "epoch": 1.264018363236052, "grad_norm": 0.5649806398103838, "learning_rate": 1.575116344540468e-06, "loss": 0.2695, "step": 26983 }, { "epoch": 1.2640652082259802, "grad_norm": 0.5897060653626136, "learning_rate": 1.5749401526762098e-06, "loss": 0.2527, "step": 26984 }, { "epoch": 1.2641120532159085, "grad_norm": 0.5634871910643353, "learning_rate": 1.5747639661354381e-06, "loss": 0.2708, "step": 26985 }, { "epoch": 1.264158898205837, "grad_norm": 0.5660550558265393, "learning_rate": 1.5745877849191674e-06, "loss": 0.2761, "step": 26986 }, { "epoch": 1.2642057431957652, "grad_norm": 0.5730415484857825, "learning_rate": 1.5744116090284118e-06, "loss": 0.273, "step": 26987 }, { "epoch": 1.2642525881856934, "grad_norm": 0.6183936302602353, "learning_rate": 1.5742354384641846e-06, "loss": 0.2796, "step": 26988 }, { "epoch": 1.2642994331756219, "grad_norm": 0.6087640784790879, "learning_rate": 1.574059273227499e-06, "loss": 0.2604, "step": 26989 }, { "epoch": 1.2643462781655501, "grad_norm": 0.5968110458275517, "learning_rate": 1.5738831133193703e-06, "loss": 0.2908, "step": 26990 }, { "epoch": 1.2643931231554786, "grad_norm": 0.5806799764441567, "learning_rate": 1.573706958740811e-06, "loss": 0.2715, "step": 26991 }, { "epoch": 1.2644399681454068, "grad_norm": 0.5718217909626699, "learning_rate": 1.5735308094928352e-06, "loss": 0.2611, "step": 26992 }, { "epoch": 1.2644868131353353, "grad_norm": 0.5906847447448295, "learning_rate": 1.5733546655764578e-06, "loss": 0.2678, "step": 26993 }, { "epoch": 1.2645336581252635, "grad_norm": 0.575395838525883, "learning_rate": 1.5731785269926907e-06, "loss": 0.2615, "step": 26994 }, { "epoch": 1.2645805031151918, "grad_norm": 0.607932784977069, "learning_rate": 1.5730023937425482e-06, "loss": 0.2609, "step": 26995 }, { "epoch": 1.2646273481051202, "grad_norm": 0.6181522975727306, "learning_rate": 1.5728262658270437e-06, "loss": 0.2811, "step": 26996 }, { "epoch": 1.2646741930950485, "grad_norm": 0.6828071625222966, "learning_rate": 1.5726501432471914e-06, "loss": 0.2876, "step": 26997 }, { "epoch": 1.2647210380849767, "grad_norm": 0.5919122261997886, "learning_rate": 1.5724740260040041e-06, "loss": 0.2698, "step": 26998 }, { "epoch": 1.2647678830749052, "grad_norm": 0.574999609268829, "learning_rate": 1.5722979140984968e-06, "loss": 0.2669, "step": 26999 }, { "epoch": 1.2648147280648334, "grad_norm": 0.6435336858065697, "learning_rate": 1.5721218075316808e-06, "loss": 0.281, "step": 27000 }, { "epoch": 1.2648615730547617, "grad_norm": 0.6406886847795209, "learning_rate": 1.5719457063045707e-06, "loss": 0.2911, "step": 27001 }, { "epoch": 1.2649084180446901, "grad_norm": 0.6003891429919309, "learning_rate": 1.5717696104181795e-06, "loss": 0.2829, "step": 27002 }, { "epoch": 1.2649552630346186, "grad_norm": 0.6131986594241754, "learning_rate": 1.571593519873521e-06, "loss": 0.2766, "step": 27003 }, { "epoch": 1.2650021080245468, "grad_norm": 0.5953197553267172, "learning_rate": 1.5714174346716089e-06, "loss": 0.2775, "step": 27004 }, { "epoch": 1.265048953014475, "grad_norm": 0.5798946765131866, "learning_rate": 1.5712413548134553e-06, "loss": 0.2577, "step": 27005 }, { "epoch": 1.2650957980044035, "grad_norm": 0.6126382703318001, "learning_rate": 1.571065280300076e-06, "loss": 0.2705, "step": 27006 }, { "epoch": 1.2651426429943318, "grad_norm": 0.6073898583977987, "learning_rate": 1.5708892111324808e-06, "loss": 0.2707, "step": 27007 }, { "epoch": 1.26518948798426, "grad_norm": 0.6033432580132908, "learning_rate": 1.5707131473116855e-06, "loss": 0.2829, "step": 27008 }, { "epoch": 1.2652363329741885, "grad_norm": 0.5841112338561916, "learning_rate": 1.5705370888387016e-06, "loss": 0.2685, "step": 27009 }, { "epoch": 1.2652831779641167, "grad_norm": 0.6199087513922964, "learning_rate": 1.5703610357145435e-06, "loss": 0.2943, "step": 27010 }, { "epoch": 1.265330022954045, "grad_norm": 0.5872533023725061, "learning_rate": 1.5701849879402244e-06, "loss": 0.2721, "step": 27011 }, { "epoch": 1.2653768679439734, "grad_norm": 0.6000927147816526, "learning_rate": 1.5700089455167578e-06, "loss": 0.2817, "step": 27012 }, { "epoch": 1.2654237129339017, "grad_norm": 0.5832410916059101, "learning_rate": 1.5698329084451545e-06, "loss": 0.272, "step": 27013 }, { "epoch": 1.2654705579238301, "grad_norm": 0.5781801142493861, "learning_rate": 1.5696568767264293e-06, "loss": 0.2758, "step": 27014 }, { "epoch": 1.2655174029137584, "grad_norm": 0.5984440425687739, "learning_rate": 1.5694808503615951e-06, "loss": 0.2859, "step": 27015 }, { "epoch": 1.2655642479036868, "grad_norm": 0.5911375749021668, "learning_rate": 1.5693048293516644e-06, "loss": 0.2772, "step": 27016 }, { "epoch": 1.265611092893615, "grad_norm": 0.6240202409813419, "learning_rate": 1.5691288136976506e-06, "loss": 0.2797, "step": 27017 }, { "epoch": 1.2656579378835433, "grad_norm": 0.5823350013968439, "learning_rate": 1.5689528034005675e-06, "loss": 0.2696, "step": 27018 }, { "epoch": 1.2657047828734718, "grad_norm": 0.5687590350207451, "learning_rate": 1.5687767984614265e-06, "loss": 0.2652, "step": 27019 }, { "epoch": 1.2657516278634, "grad_norm": 0.5633162062823335, "learning_rate": 1.5686007988812404e-06, "loss": 0.273, "step": 27020 }, { "epoch": 1.2657984728533282, "grad_norm": 0.6377213655177447, "learning_rate": 1.5684248046610229e-06, "loss": 0.2985, "step": 27021 }, { "epoch": 1.2658453178432567, "grad_norm": 0.602105043374863, "learning_rate": 1.5682488158017866e-06, "loss": 0.281, "step": 27022 }, { "epoch": 1.265892162833185, "grad_norm": 0.6147728875426747, "learning_rate": 1.568072832304544e-06, "loss": 0.2752, "step": 27023 }, { "epoch": 1.2659390078231132, "grad_norm": 0.5841515733174619, "learning_rate": 1.5678968541703088e-06, "loss": 0.2722, "step": 27024 }, { "epoch": 1.2659858528130417, "grad_norm": 0.5975356517027255, "learning_rate": 1.5677208814000919e-06, "loss": 0.2718, "step": 27025 }, { "epoch": 1.26603269780297, "grad_norm": 0.5790823372805594, "learning_rate": 1.5675449139949075e-06, "loss": 0.2735, "step": 27026 }, { "epoch": 1.2660795427928984, "grad_norm": 0.5903227068866109, "learning_rate": 1.5673689519557671e-06, "loss": 0.2855, "step": 27027 }, { "epoch": 1.2661263877828266, "grad_norm": 0.5663886145440656, "learning_rate": 1.5671929952836846e-06, "loss": 0.2686, "step": 27028 }, { "epoch": 1.266173232772755, "grad_norm": 0.6165837410923803, "learning_rate": 1.567017043979672e-06, "loss": 0.2742, "step": 27029 }, { "epoch": 1.2662200777626833, "grad_norm": 0.6175761482942956, "learning_rate": 1.5668410980447424e-06, "loss": 0.2717, "step": 27030 }, { "epoch": 1.2662669227526115, "grad_norm": 0.5836928768507846, "learning_rate": 1.566665157479907e-06, "loss": 0.268, "step": 27031 }, { "epoch": 1.26631376774254, "grad_norm": 0.5869986094795541, "learning_rate": 1.5664892222861784e-06, "loss": 0.2813, "step": 27032 }, { "epoch": 1.2663606127324682, "grad_norm": 0.5600292701500069, "learning_rate": 1.5663132924645708e-06, "loss": 0.2686, "step": 27033 }, { "epoch": 1.2664074577223965, "grad_norm": 0.5998654204442471, "learning_rate": 1.5661373680160946e-06, "loss": 0.2883, "step": 27034 }, { "epoch": 1.266454302712325, "grad_norm": 0.6338824995046518, "learning_rate": 1.5659614489417637e-06, "loss": 0.2695, "step": 27035 }, { "epoch": 1.2665011477022532, "grad_norm": 0.595947508563773, "learning_rate": 1.5657855352425903e-06, "loss": 0.2603, "step": 27036 }, { "epoch": 1.2665479926921814, "grad_norm": 0.6003924822537529, "learning_rate": 1.5656096269195858e-06, "loss": 0.2835, "step": 27037 }, { "epoch": 1.26659483768211, "grad_norm": 0.5307384561418319, "learning_rate": 1.565433723973763e-06, "loss": 0.2571, "step": 27038 }, { "epoch": 1.2666416826720384, "grad_norm": 0.5098945333160293, "learning_rate": 1.5652578264061345e-06, "loss": 0.2488, "step": 27039 }, { "epoch": 1.2666885276619666, "grad_norm": 0.6422477947422903, "learning_rate": 1.5650819342177118e-06, "loss": 0.2767, "step": 27040 }, { "epoch": 1.2667353726518948, "grad_norm": 0.5982187529191282, "learning_rate": 1.5649060474095075e-06, "loss": 0.2857, "step": 27041 }, { "epoch": 1.2667822176418233, "grad_norm": 0.6086070226908643, "learning_rate": 1.5647301659825348e-06, "loss": 0.2859, "step": 27042 }, { "epoch": 1.2668290626317515, "grad_norm": 0.5991770923485209, "learning_rate": 1.5645542899378047e-06, "loss": 0.2851, "step": 27043 }, { "epoch": 1.2668759076216798, "grad_norm": 0.5988562844631711, "learning_rate": 1.564378419276329e-06, "loss": 0.2811, "step": 27044 }, { "epoch": 1.2669227526116082, "grad_norm": 0.6300948454147192, "learning_rate": 1.56420255399912e-06, "loss": 0.2765, "step": 27045 }, { "epoch": 1.2669695976015365, "grad_norm": 0.6046872405756538, "learning_rate": 1.5640266941071914e-06, "loss": 0.2707, "step": 27046 }, { "epoch": 1.2670164425914647, "grad_norm": 0.6384569153750091, "learning_rate": 1.5638508396015525e-06, "loss": 0.2899, "step": 27047 }, { "epoch": 1.2670632875813932, "grad_norm": 0.5752669014702195, "learning_rate": 1.5636749904832182e-06, "loss": 0.2805, "step": 27048 }, { "epoch": 1.2671101325713214, "grad_norm": 0.5950859212793856, "learning_rate": 1.563499146753198e-06, "loss": 0.2706, "step": 27049 }, { "epoch": 1.26715697756125, "grad_norm": 0.6746639695369354, "learning_rate": 1.5633233084125052e-06, "loss": 0.2988, "step": 27050 }, { "epoch": 1.2672038225511781, "grad_norm": 0.6065565156383024, "learning_rate": 1.563147475462151e-06, "loss": 0.2711, "step": 27051 }, { "epoch": 1.2672506675411066, "grad_norm": 0.6267694225662555, "learning_rate": 1.5629716479031473e-06, "loss": 0.2807, "step": 27052 }, { "epoch": 1.2672975125310348, "grad_norm": 0.5587736284968823, "learning_rate": 1.5627958257365069e-06, "loss": 0.262, "step": 27053 }, { "epoch": 1.267344357520963, "grad_norm": 0.6048184686142263, "learning_rate": 1.5626200089632414e-06, "loss": 0.2911, "step": 27054 }, { "epoch": 1.2673912025108915, "grad_norm": 0.6213401262056318, "learning_rate": 1.5624441975843612e-06, "loss": 0.2932, "step": 27055 }, { "epoch": 1.2674380475008198, "grad_norm": 0.5993913349712785, "learning_rate": 1.562268391600879e-06, "loss": 0.2779, "step": 27056 }, { "epoch": 1.267484892490748, "grad_norm": 0.5905558852215042, "learning_rate": 1.5620925910138074e-06, "loss": 0.268, "step": 27057 }, { "epoch": 1.2675317374806765, "grad_norm": 0.6291978441400002, "learning_rate": 1.5619167958241564e-06, "loss": 0.2891, "step": 27058 }, { "epoch": 1.2675785824706047, "grad_norm": 0.6719446406410846, "learning_rate": 1.5617410060329383e-06, "loss": 0.301, "step": 27059 }, { "epoch": 1.267625427460533, "grad_norm": 0.6433812846533629, "learning_rate": 1.5615652216411658e-06, "loss": 0.2812, "step": 27060 }, { "epoch": 1.2676722724504614, "grad_norm": 0.555123808167367, "learning_rate": 1.5613894426498494e-06, "loss": 0.2637, "step": 27061 }, { "epoch": 1.2677191174403897, "grad_norm": 0.6161688744092895, "learning_rate": 1.5612136690600005e-06, "loss": 0.2804, "step": 27062 }, { "epoch": 1.2677659624303181, "grad_norm": 0.5903751535163769, "learning_rate": 1.5610379008726307e-06, "loss": 0.2773, "step": 27063 }, { "epoch": 1.2678128074202464, "grad_norm": 0.6198105171508316, "learning_rate": 1.5608621380887524e-06, "loss": 0.2805, "step": 27064 }, { "epoch": 1.2678596524101748, "grad_norm": 0.5936463436036821, "learning_rate": 1.5606863807093758e-06, "loss": 0.2689, "step": 27065 }, { "epoch": 1.267906497400103, "grad_norm": 0.6129719937559751, "learning_rate": 1.5605106287355138e-06, "loss": 0.3047, "step": 27066 }, { "epoch": 1.2679533423900313, "grad_norm": 0.6300010655021784, "learning_rate": 1.5603348821681763e-06, "loss": 0.2775, "step": 27067 }, { "epoch": 1.2680001873799598, "grad_norm": 0.5697331339843872, "learning_rate": 1.5601591410083761e-06, "loss": 0.2669, "step": 27068 }, { "epoch": 1.268047032369888, "grad_norm": 0.6050815327156912, "learning_rate": 1.5599834052571227e-06, "loss": 0.2834, "step": 27069 }, { "epoch": 1.2680938773598163, "grad_norm": 0.5997102860950763, "learning_rate": 1.559807674915429e-06, "loss": 0.2757, "step": 27070 }, { "epoch": 1.2681407223497447, "grad_norm": 0.5892537833473536, "learning_rate": 1.5596319499843063e-06, "loss": 0.2787, "step": 27071 }, { "epoch": 1.268187567339673, "grad_norm": 0.6712807222800456, "learning_rate": 1.5594562304647648e-06, "loss": 0.3026, "step": 27072 }, { "epoch": 1.2682344123296012, "grad_norm": 0.6043770694602504, "learning_rate": 1.5592805163578173e-06, "loss": 0.2584, "step": 27073 }, { "epoch": 1.2682812573195297, "grad_norm": 0.5793942007595603, "learning_rate": 1.5591048076644728e-06, "loss": 0.2592, "step": 27074 }, { "epoch": 1.2683281023094581, "grad_norm": 0.6009924995019017, "learning_rate": 1.5589291043857444e-06, "loss": 0.28, "step": 27075 }, { "epoch": 1.2683749472993864, "grad_norm": 0.6200358911709568, "learning_rate": 1.5587534065226417e-06, "loss": 0.2974, "step": 27076 }, { "epoch": 1.2684217922893146, "grad_norm": 0.5916160643948215, "learning_rate": 1.558577714076177e-06, "loss": 0.2952, "step": 27077 }, { "epoch": 1.268468637279243, "grad_norm": 0.5740248512131083, "learning_rate": 1.558402027047361e-06, "loss": 0.2791, "step": 27078 }, { "epoch": 1.2685154822691713, "grad_norm": 0.6148681948121949, "learning_rate": 1.5582263454372055e-06, "loss": 0.2812, "step": 27079 }, { "epoch": 1.2685623272590996, "grad_norm": 0.6029579930355924, "learning_rate": 1.5580506692467196e-06, "loss": 0.2804, "step": 27080 }, { "epoch": 1.268609172249028, "grad_norm": 0.6012268106159996, "learning_rate": 1.5578749984769154e-06, "loss": 0.2683, "step": 27081 }, { "epoch": 1.2686560172389563, "grad_norm": 0.5802198866133428, "learning_rate": 1.557699333128804e-06, "loss": 0.2728, "step": 27082 }, { "epoch": 1.2687028622288845, "grad_norm": 0.5712218270899727, "learning_rate": 1.557523673203396e-06, "loss": 0.2748, "step": 27083 }, { "epoch": 1.268749707218813, "grad_norm": 0.5972590973867551, "learning_rate": 1.5573480187017026e-06, "loss": 0.2734, "step": 27084 }, { "epoch": 1.2687965522087412, "grad_norm": 0.5660568234942642, "learning_rate": 1.5571723696247344e-06, "loss": 0.2602, "step": 27085 }, { "epoch": 1.2688433971986697, "grad_norm": 0.6143653792611727, "learning_rate": 1.5569967259735025e-06, "loss": 0.2863, "step": 27086 }, { "epoch": 1.268890242188598, "grad_norm": 0.5873561663796132, "learning_rate": 1.5568210877490164e-06, "loss": 0.2691, "step": 27087 }, { "epoch": 1.2689370871785264, "grad_norm": 0.6521900271669919, "learning_rate": 1.556645454952289e-06, "loss": 0.272, "step": 27088 }, { "epoch": 1.2689839321684546, "grad_norm": 0.6193505527227973, "learning_rate": 1.5564698275843292e-06, "loss": 0.2764, "step": 27089 }, { "epoch": 1.2690307771583829, "grad_norm": 0.5481108606289309, "learning_rate": 1.5562942056461484e-06, "loss": 0.2467, "step": 27090 }, { "epoch": 1.2690776221483113, "grad_norm": 0.5627525343207573, "learning_rate": 1.5561185891387582e-06, "loss": 0.2791, "step": 27091 }, { "epoch": 1.2691244671382396, "grad_norm": 0.5763766886916585, "learning_rate": 1.5559429780631677e-06, "loss": 0.2727, "step": 27092 }, { "epoch": 1.2691713121281678, "grad_norm": 0.5854927495621207, "learning_rate": 1.5557673724203875e-06, "loss": 0.2757, "step": 27093 }, { "epoch": 1.2692181571180963, "grad_norm": 0.5771379733207245, "learning_rate": 1.555591772211429e-06, "loss": 0.2845, "step": 27094 }, { "epoch": 1.2692650021080245, "grad_norm": 0.567678099007048, "learning_rate": 1.5554161774373029e-06, "loss": 0.2702, "step": 27095 }, { "epoch": 1.2693118470979528, "grad_norm": 0.6426532430044036, "learning_rate": 1.5552405880990188e-06, "loss": 0.2877, "step": 27096 }, { "epoch": 1.2693586920878812, "grad_norm": 0.5949891492843657, "learning_rate": 1.5550650041975885e-06, "loss": 0.2784, "step": 27097 }, { "epoch": 1.2694055370778095, "grad_norm": 0.5890569747876313, "learning_rate": 1.5548894257340208e-06, "loss": 0.28, "step": 27098 }, { "epoch": 1.269452382067738, "grad_norm": 0.5816949948055468, "learning_rate": 1.5547138527093276e-06, "loss": 0.2842, "step": 27099 }, { "epoch": 1.2694992270576662, "grad_norm": 0.5919244920208638, "learning_rate": 1.5545382851245178e-06, "loss": 0.2755, "step": 27100 }, { "epoch": 1.2695460720475946, "grad_norm": 0.6284661377561898, "learning_rate": 1.5543627229806024e-06, "loss": 0.2851, "step": 27101 }, { "epoch": 1.2695929170375229, "grad_norm": 0.5694523087128073, "learning_rate": 1.5541871662785924e-06, "loss": 0.2668, "step": 27102 }, { "epoch": 1.269639762027451, "grad_norm": 0.5478901567681639, "learning_rate": 1.5540116150194984e-06, "loss": 0.2558, "step": 27103 }, { "epoch": 1.2696866070173796, "grad_norm": 0.5853031617591655, "learning_rate": 1.5538360692043286e-06, "loss": 0.2755, "step": 27104 }, { "epoch": 1.2697334520073078, "grad_norm": 0.6154262219802379, "learning_rate": 1.5536605288340941e-06, "loss": 0.27, "step": 27105 }, { "epoch": 1.269780296997236, "grad_norm": 0.6033742376292432, "learning_rate": 1.5534849939098063e-06, "loss": 0.2845, "step": 27106 }, { "epoch": 1.2698271419871645, "grad_norm": 0.6045871374074973, "learning_rate": 1.5533094644324737e-06, "loss": 0.2942, "step": 27107 }, { "epoch": 1.2698739869770928, "grad_norm": 0.5488761120887152, "learning_rate": 1.5531339404031073e-06, "loss": 0.2713, "step": 27108 }, { "epoch": 1.269920831967021, "grad_norm": 0.5726607620820041, "learning_rate": 1.5529584218227185e-06, "loss": 0.2682, "step": 27109 }, { "epoch": 1.2699676769569495, "grad_norm": 0.5722454038011002, "learning_rate": 1.5527829086923146e-06, "loss": 0.2772, "step": 27110 }, { "epoch": 1.270014521946878, "grad_norm": 0.5904676803877543, "learning_rate": 1.5526074010129071e-06, "loss": 0.2821, "step": 27111 }, { "epoch": 1.2700613669368062, "grad_norm": 0.5642868333634052, "learning_rate": 1.5524318987855058e-06, "loss": 0.2725, "step": 27112 }, { "epoch": 1.2701082119267344, "grad_norm": 0.5772580400645886, "learning_rate": 1.552256402011121e-06, "loss": 0.2707, "step": 27113 }, { "epoch": 1.2701550569166629, "grad_norm": 0.5638431296858532, "learning_rate": 1.5520809106907623e-06, "loss": 0.2604, "step": 27114 }, { "epoch": 1.270201901906591, "grad_norm": 0.6426187634451824, "learning_rate": 1.5519054248254407e-06, "loss": 0.2718, "step": 27115 }, { "epoch": 1.2702487468965193, "grad_norm": 0.5594465321251897, "learning_rate": 1.5517299444161637e-06, "loss": 0.2584, "step": 27116 }, { "epoch": 1.2702955918864478, "grad_norm": 0.5687379412891853, "learning_rate": 1.551554469463943e-06, "loss": 0.2736, "step": 27117 }, { "epoch": 1.270342436876376, "grad_norm": 0.6407992257162531, "learning_rate": 1.551378999969788e-06, "loss": 0.2932, "step": 27118 }, { "epoch": 1.2703892818663043, "grad_norm": 0.5815150121525254, "learning_rate": 1.5512035359347077e-06, "loss": 0.2641, "step": 27119 }, { "epoch": 1.2704361268562328, "grad_norm": 0.628344682273713, "learning_rate": 1.5510280773597137e-06, "loss": 0.277, "step": 27120 }, { "epoch": 1.270482971846161, "grad_norm": 0.6316328020078209, "learning_rate": 1.550852624245815e-06, "loss": 0.2813, "step": 27121 }, { "epoch": 1.2705298168360895, "grad_norm": 0.6469294240197254, "learning_rate": 1.5506771765940196e-06, "loss": 0.3016, "step": 27122 }, { "epoch": 1.2705766618260177, "grad_norm": 0.5938815767363127, "learning_rate": 1.5505017344053387e-06, "loss": 0.2813, "step": 27123 }, { "epoch": 1.2706235068159462, "grad_norm": 0.6368075966611771, "learning_rate": 1.550326297680782e-06, "loss": 0.2843, "step": 27124 }, { "epoch": 1.2706703518058744, "grad_norm": 0.6231189991705989, "learning_rate": 1.5501508664213583e-06, "loss": 0.267, "step": 27125 }, { "epoch": 1.2707171967958026, "grad_norm": 0.5650765489201249, "learning_rate": 1.5499754406280781e-06, "loss": 0.2717, "step": 27126 }, { "epoch": 1.270764041785731, "grad_norm": 0.5907168551225455, "learning_rate": 1.5498000203019506e-06, "loss": 0.2774, "step": 27127 }, { "epoch": 1.2708108867756593, "grad_norm": 0.6251541806781548, "learning_rate": 1.549624605443985e-06, "loss": 0.2847, "step": 27128 }, { "epoch": 1.2708577317655876, "grad_norm": 0.5585233762875256, "learning_rate": 1.5494491960551904e-06, "loss": 0.2575, "step": 27129 }, { "epoch": 1.270904576755516, "grad_norm": 0.5815326818709515, "learning_rate": 1.5492737921365774e-06, "loss": 0.2654, "step": 27130 }, { "epoch": 1.2709514217454443, "grad_norm": 0.6210728867537245, "learning_rate": 1.5490983936891546e-06, "loss": 0.2784, "step": 27131 }, { "epoch": 1.2709982667353725, "grad_norm": 0.597151538974653, "learning_rate": 1.5489230007139311e-06, "loss": 0.2725, "step": 27132 }, { "epoch": 1.271045111725301, "grad_norm": 0.6092793500151644, "learning_rate": 1.5487476132119178e-06, "loss": 0.2782, "step": 27133 }, { "epoch": 1.2710919567152292, "grad_norm": 0.6847900228292334, "learning_rate": 1.5485722311841224e-06, "loss": 0.2965, "step": 27134 }, { "epoch": 1.2711388017051577, "grad_norm": 0.6034014486368183, "learning_rate": 1.5483968546315542e-06, "loss": 0.2739, "step": 27135 }, { "epoch": 1.271185646695086, "grad_norm": 0.5845411902670107, "learning_rate": 1.5482214835552229e-06, "loss": 0.2739, "step": 27136 }, { "epoch": 1.2712324916850144, "grad_norm": 0.5600391654355278, "learning_rate": 1.5480461179561381e-06, "loss": 0.2678, "step": 27137 }, { "epoch": 1.2712793366749426, "grad_norm": 0.6189778027314184, "learning_rate": 1.5478707578353083e-06, "loss": 0.2825, "step": 27138 }, { "epoch": 1.2713261816648709, "grad_norm": 0.5710233155325473, "learning_rate": 1.547695403193743e-06, "loss": 0.2583, "step": 27139 }, { "epoch": 1.2713730266547993, "grad_norm": 0.5714248068912813, "learning_rate": 1.547520054032452e-06, "loss": 0.2707, "step": 27140 }, { "epoch": 1.2714198716447276, "grad_norm": 0.5649982674771915, "learning_rate": 1.5473447103524436e-06, "loss": 0.2745, "step": 27141 }, { "epoch": 1.2714667166346558, "grad_norm": 0.555943819427214, "learning_rate": 1.5471693721547263e-06, "loss": 0.26, "step": 27142 }, { "epoch": 1.2715135616245843, "grad_norm": 0.6446701855139749, "learning_rate": 1.5469940394403098e-06, "loss": 0.292, "step": 27143 }, { "epoch": 1.2715604066145125, "grad_norm": 0.5885432745557387, "learning_rate": 1.5468187122102036e-06, "loss": 0.2762, "step": 27144 }, { "epoch": 1.2716072516044408, "grad_norm": 0.579372281286474, "learning_rate": 1.5466433904654154e-06, "loss": 0.2694, "step": 27145 }, { "epoch": 1.2716540965943692, "grad_norm": 0.6003320380543542, "learning_rate": 1.546468074206956e-06, "loss": 0.2791, "step": 27146 }, { "epoch": 1.2717009415842977, "grad_norm": 0.5736359560988467, "learning_rate": 1.5462927634358321e-06, "loss": 0.284, "step": 27147 }, { "epoch": 1.271747786574226, "grad_norm": 0.6118210488961237, "learning_rate": 1.5461174581530543e-06, "loss": 0.2697, "step": 27148 }, { "epoch": 1.2717946315641542, "grad_norm": 0.5924942784515854, "learning_rate": 1.5459421583596304e-06, "loss": 0.2746, "step": 27149 }, { "epoch": 1.2718414765540826, "grad_norm": 0.6492205794846911, "learning_rate": 1.5457668640565693e-06, "loss": 0.2966, "step": 27150 }, { "epoch": 1.2718883215440109, "grad_norm": 0.5552481444245454, "learning_rate": 1.5455915752448807e-06, "loss": 0.2703, "step": 27151 }, { "epoch": 1.2719351665339391, "grad_norm": 0.6002018154237324, "learning_rate": 1.5454162919255732e-06, "loss": 0.2794, "step": 27152 }, { "epoch": 1.2719820115238676, "grad_norm": 0.5940383284979126, "learning_rate": 1.545241014099654e-06, "loss": 0.27, "step": 27153 }, { "epoch": 1.2720288565137958, "grad_norm": 0.6170453042448623, "learning_rate": 1.5450657417681328e-06, "loss": 0.2567, "step": 27154 }, { "epoch": 1.272075701503724, "grad_norm": 0.6105167674106937, "learning_rate": 1.5448904749320187e-06, "loss": 0.2729, "step": 27155 }, { "epoch": 1.2721225464936525, "grad_norm": 0.6325719454809079, "learning_rate": 1.5447152135923194e-06, "loss": 0.2945, "step": 27156 }, { "epoch": 1.2721693914835808, "grad_norm": 0.5894920562926109, "learning_rate": 1.5445399577500439e-06, "loss": 0.2897, "step": 27157 }, { "epoch": 1.2722162364735092, "grad_norm": 0.6200742102407046, "learning_rate": 1.5443647074062018e-06, "loss": 0.282, "step": 27158 }, { "epoch": 1.2722630814634375, "grad_norm": 0.5935865194252482, "learning_rate": 1.5441894625618004e-06, "loss": 0.2706, "step": 27159 }, { "epoch": 1.272309926453366, "grad_norm": 0.6213856552467812, "learning_rate": 1.5440142232178473e-06, "loss": 0.2848, "step": 27160 }, { "epoch": 1.2723567714432942, "grad_norm": 0.5815300886205298, "learning_rate": 1.5438389893753525e-06, "loss": 0.2727, "step": 27161 }, { "epoch": 1.2724036164332224, "grad_norm": 0.5756293480043204, "learning_rate": 1.5436637610353245e-06, "loss": 0.278, "step": 27162 }, { "epoch": 1.2724504614231509, "grad_norm": 0.5613180527381701, "learning_rate": 1.5434885381987707e-06, "loss": 0.2652, "step": 27163 }, { "epoch": 1.2724973064130791, "grad_norm": 0.579800223736671, "learning_rate": 1.5433133208667006e-06, "loss": 0.2645, "step": 27164 }, { "epoch": 1.2725441514030074, "grad_norm": 0.579229718850036, "learning_rate": 1.5431381090401216e-06, "loss": 0.2792, "step": 27165 }, { "epoch": 1.2725909963929358, "grad_norm": 0.6359261552038152, "learning_rate": 1.5429629027200422e-06, "loss": 0.26, "step": 27166 }, { "epoch": 1.272637841382864, "grad_norm": 0.6127937370356847, "learning_rate": 1.5427877019074703e-06, "loss": 0.3027, "step": 27167 }, { "epoch": 1.2726846863727923, "grad_norm": 0.577178342624638, "learning_rate": 1.5426125066034147e-06, "loss": 0.2753, "step": 27168 }, { "epoch": 1.2727315313627208, "grad_norm": 0.5806345911031187, "learning_rate": 1.5424373168088842e-06, "loss": 0.2748, "step": 27169 }, { "epoch": 1.272778376352649, "grad_norm": 0.5806983413788636, "learning_rate": 1.5422621325248863e-06, "loss": 0.2697, "step": 27170 }, { "epoch": 1.2728252213425775, "grad_norm": 0.6009750359182416, "learning_rate": 1.5420869537524283e-06, "loss": 0.2818, "step": 27171 }, { "epoch": 1.2728720663325057, "grad_norm": 0.6146395777174508, "learning_rate": 1.5419117804925191e-06, "loss": 0.2763, "step": 27172 }, { "epoch": 1.2729189113224342, "grad_norm": 0.5560196867558662, "learning_rate": 1.5417366127461674e-06, "loss": 0.2509, "step": 27173 }, { "epoch": 1.2729657563123624, "grad_norm": 0.6041096459576647, "learning_rate": 1.54156145051438e-06, "loss": 0.2767, "step": 27174 }, { "epoch": 1.2730126013022907, "grad_norm": 0.6821341834986917, "learning_rate": 1.541386293798166e-06, "loss": 0.2904, "step": 27175 }, { "epoch": 1.2730594462922191, "grad_norm": 0.614042074772451, "learning_rate": 1.5412111425985333e-06, "loss": 0.2816, "step": 27176 }, { "epoch": 1.2731062912821474, "grad_norm": 0.63546074231763, "learning_rate": 1.5410359969164895e-06, "loss": 0.2858, "step": 27177 }, { "epoch": 1.2731531362720756, "grad_norm": 0.5706546050264011, "learning_rate": 1.5408608567530418e-06, "loss": 0.2687, "step": 27178 }, { "epoch": 1.273199981262004, "grad_norm": 0.5929417547507007, "learning_rate": 1.5406857221091993e-06, "loss": 0.278, "step": 27179 }, { "epoch": 1.2732468262519323, "grad_norm": 0.6034646849711286, "learning_rate": 1.5405105929859688e-06, "loss": 0.2824, "step": 27180 }, { "epoch": 1.2732936712418605, "grad_norm": 0.6354015824869725, "learning_rate": 1.5403354693843587e-06, "loss": 0.2857, "step": 27181 }, { "epoch": 1.273340516231789, "grad_norm": 0.5588999004783345, "learning_rate": 1.5401603513053776e-06, "loss": 0.2624, "step": 27182 }, { "epoch": 1.2733873612217175, "grad_norm": 0.539181782036852, "learning_rate": 1.5399852387500325e-06, "loss": 0.2584, "step": 27183 }, { "epoch": 1.2734342062116457, "grad_norm": 0.5834370530052179, "learning_rate": 1.5398101317193299e-06, "loss": 0.2751, "step": 27184 }, { "epoch": 1.273481051201574, "grad_norm": 0.6167252223402565, "learning_rate": 1.539635030214279e-06, "loss": 0.2792, "step": 27185 }, { "epoch": 1.2735278961915024, "grad_norm": 0.5853961270062918, "learning_rate": 1.5394599342358876e-06, "loss": 0.2775, "step": 27186 }, { "epoch": 1.2735747411814307, "grad_norm": 0.5956300886416932, "learning_rate": 1.5392848437851623e-06, "loss": 0.2821, "step": 27187 }, { "epoch": 1.273621586171359, "grad_norm": 0.6456820592369436, "learning_rate": 1.5391097588631124e-06, "loss": 0.2846, "step": 27188 }, { "epoch": 1.2736684311612874, "grad_norm": 0.5721682032882648, "learning_rate": 1.5389346794707433e-06, "loss": 0.275, "step": 27189 }, { "epoch": 1.2737152761512156, "grad_norm": 0.5533322346391188, "learning_rate": 1.5387596056090636e-06, "loss": 0.2504, "step": 27190 }, { "epoch": 1.2737621211411438, "grad_norm": 0.6004523320609262, "learning_rate": 1.538584537279081e-06, "loss": 0.287, "step": 27191 }, { "epoch": 1.2738089661310723, "grad_norm": 0.5771026809672848, "learning_rate": 1.5384094744818023e-06, "loss": 0.268, "step": 27192 }, { "epoch": 1.2738558111210005, "grad_norm": 0.5795471149409799, "learning_rate": 1.5382344172182359e-06, "loss": 0.2597, "step": 27193 }, { "epoch": 1.273902656110929, "grad_norm": 0.6157896900084274, "learning_rate": 1.5380593654893894e-06, "loss": 0.265, "step": 27194 }, { "epoch": 1.2739495011008573, "grad_norm": 0.594762680942186, "learning_rate": 1.5378843192962683e-06, "loss": 0.2683, "step": 27195 }, { "epoch": 1.2739963460907857, "grad_norm": 0.601169623826527, "learning_rate": 1.5377092786398812e-06, "loss": 0.2924, "step": 27196 }, { "epoch": 1.274043191080714, "grad_norm": 0.60933201218184, "learning_rate": 1.5375342435212358e-06, "loss": 0.2549, "step": 27197 }, { "epoch": 1.2740900360706422, "grad_norm": 0.5884065237522152, "learning_rate": 1.5373592139413385e-06, "loss": 0.2705, "step": 27198 }, { "epoch": 1.2741368810605707, "grad_norm": 0.6321299754392304, "learning_rate": 1.537184189901197e-06, "loss": 0.2782, "step": 27199 }, { "epoch": 1.274183726050499, "grad_norm": 0.6060447429377976, "learning_rate": 1.5370091714018193e-06, "loss": 0.2621, "step": 27200 }, { "epoch": 1.2742305710404271, "grad_norm": 0.5765789289145181, "learning_rate": 1.5368341584442115e-06, "loss": 0.2649, "step": 27201 }, { "epoch": 1.2742774160303556, "grad_norm": 0.6640642878567071, "learning_rate": 1.5366591510293804e-06, "loss": 0.3001, "step": 27202 }, { "epoch": 1.2743242610202838, "grad_norm": 0.6158857712858814, "learning_rate": 1.5364841491583338e-06, "loss": 0.2836, "step": 27203 }, { "epoch": 1.274371106010212, "grad_norm": 0.6156320406118271, "learning_rate": 1.536309152832079e-06, "loss": 0.2708, "step": 27204 }, { "epoch": 1.2744179510001405, "grad_norm": 0.6357284276687346, "learning_rate": 1.5361341620516227e-06, "loss": 0.3044, "step": 27205 }, { "epoch": 1.2744647959900688, "grad_norm": 0.5947273248897523, "learning_rate": 1.5359591768179726e-06, "loss": 0.2526, "step": 27206 }, { "epoch": 1.2745116409799973, "grad_norm": 0.5891348867448639, "learning_rate": 1.5357841971321347e-06, "loss": 0.2598, "step": 27207 }, { "epoch": 1.2745584859699255, "grad_norm": 0.6111481606658884, "learning_rate": 1.5356092229951167e-06, "loss": 0.2843, "step": 27208 }, { "epoch": 1.274605330959854, "grad_norm": 0.6184497090495766, "learning_rate": 1.5354342544079246e-06, "loss": 0.2674, "step": 27209 }, { "epoch": 1.2746521759497822, "grad_norm": 0.5902010734848684, "learning_rate": 1.5352592913715658e-06, "loss": 0.2701, "step": 27210 }, { "epoch": 1.2746990209397104, "grad_norm": 0.6012090009245341, "learning_rate": 1.535084333887048e-06, "loss": 0.2808, "step": 27211 }, { "epoch": 1.274745865929639, "grad_norm": 0.6009827859156015, "learning_rate": 1.5349093819553767e-06, "loss": 0.2831, "step": 27212 }, { "epoch": 1.2747927109195671, "grad_norm": 0.6308967580897007, "learning_rate": 1.5347344355775606e-06, "loss": 0.2776, "step": 27213 }, { "epoch": 1.2748395559094954, "grad_norm": 0.6233681955688436, "learning_rate": 1.5345594947546042e-06, "loss": 0.2716, "step": 27214 }, { "epoch": 1.2748864008994238, "grad_norm": 0.5203189398133964, "learning_rate": 1.5343845594875156e-06, "loss": 0.2521, "step": 27215 }, { "epoch": 1.274933245889352, "grad_norm": 0.5806847262892025, "learning_rate": 1.5342096297773007e-06, "loss": 0.2706, "step": 27216 }, { "epoch": 1.2749800908792803, "grad_norm": 0.5906720874949982, "learning_rate": 1.5340347056249666e-06, "loss": 0.2886, "step": 27217 }, { "epoch": 1.2750269358692088, "grad_norm": 0.5765588089352621, "learning_rate": 1.5338597870315206e-06, "loss": 0.2555, "step": 27218 }, { "epoch": 1.2750737808591373, "grad_norm": 0.6095159206485803, "learning_rate": 1.533684873997969e-06, "loss": 0.2876, "step": 27219 }, { "epoch": 1.2751206258490655, "grad_norm": 0.5938070200615, "learning_rate": 1.5335099665253173e-06, "loss": 0.2855, "step": 27220 }, { "epoch": 1.2751674708389937, "grad_norm": 0.5688154988261985, "learning_rate": 1.5333350646145725e-06, "loss": 0.2686, "step": 27221 }, { "epoch": 1.2752143158289222, "grad_norm": 0.5540092006673911, "learning_rate": 1.5331601682667425e-06, "loss": 0.2599, "step": 27222 }, { "epoch": 1.2752611608188504, "grad_norm": 0.6042209552264488, "learning_rate": 1.5329852774828317e-06, "loss": 0.2719, "step": 27223 }, { "epoch": 1.2753080058087787, "grad_norm": 0.571976289896342, "learning_rate": 1.5328103922638482e-06, "loss": 0.2619, "step": 27224 }, { "epoch": 1.2753548507987071, "grad_norm": 0.5892359459071581, "learning_rate": 1.5326355126107978e-06, "loss": 0.2842, "step": 27225 }, { "epoch": 1.2754016957886354, "grad_norm": 0.5996045878443289, "learning_rate": 1.532460638524687e-06, "loss": 0.2673, "step": 27226 }, { "epoch": 1.2754485407785636, "grad_norm": 0.5855361876970449, "learning_rate": 1.5322857700065213e-06, "loss": 0.2831, "step": 27227 }, { "epoch": 1.275495385768492, "grad_norm": 0.5695627731973397, "learning_rate": 1.5321109070573087e-06, "loss": 0.2641, "step": 27228 }, { "epoch": 1.2755422307584203, "grad_norm": 0.5867295153730671, "learning_rate": 1.5319360496780538e-06, "loss": 0.2616, "step": 27229 }, { "epoch": 1.2755890757483488, "grad_norm": 0.6121925602298687, "learning_rate": 1.5317611978697636e-06, "loss": 0.2773, "step": 27230 }, { "epoch": 1.275635920738277, "grad_norm": 0.5894451358912784, "learning_rate": 1.5315863516334453e-06, "loss": 0.2729, "step": 27231 }, { "epoch": 1.2756827657282055, "grad_norm": 0.5978901347093925, "learning_rate": 1.531411510970104e-06, "loss": 0.2782, "step": 27232 }, { "epoch": 1.2757296107181337, "grad_norm": 0.5974872440635017, "learning_rate": 1.531236675880745e-06, "loss": 0.27, "step": 27233 }, { "epoch": 1.275776455708062, "grad_norm": 0.5606411749058978, "learning_rate": 1.5310618463663758e-06, "loss": 0.2653, "step": 27234 }, { "epoch": 1.2758233006979904, "grad_norm": 0.635768962542008, "learning_rate": 1.5308870224280023e-06, "loss": 0.2828, "step": 27235 }, { "epoch": 1.2758701456879187, "grad_norm": 0.6019213201208383, "learning_rate": 1.53071220406663e-06, "loss": 0.2703, "step": 27236 }, { "epoch": 1.275916990677847, "grad_norm": 0.6334072768044992, "learning_rate": 1.5305373912832667e-06, "loss": 0.2821, "step": 27237 }, { "epoch": 1.2759638356677754, "grad_norm": 0.5607104621002527, "learning_rate": 1.5303625840789157e-06, "loss": 0.273, "step": 27238 }, { "epoch": 1.2760106806577036, "grad_norm": 0.5824011732269281, "learning_rate": 1.530187782454585e-06, "loss": 0.2639, "step": 27239 }, { "epoch": 1.2760575256476319, "grad_norm": 0.5865887486311716, "learning_rate": 1.5300129864112792e-06, "loss": 0.2612, "step": 27240 }, { "epoch": 1.2761043706375603, "grad_norm": 0.6247946122670519, "learning_rate": 1.529838195950005e-06, "loss": 0.2819, "step": 27241 }, { "epoch": 1.2761512156274886, "grad_norm": 0.6229804278061855, "learning_rate": 1.5296634110717683e-06, "loss": 0.2898, "step": 27242 }, { "epoch": 1.276198060617417, "grad_norm": 0.594545058442663, "learning_rate": 1.529488631777576e-06, "loss": 0.2663, "step": 27243 }, { "epoch": 1.2762449056073453, "grad_norm": 0.6087077297506353, "learning_rate": 1.5293138580684311e-06, "loss": 0.2662, "step": 27244 }, { "epoch": 1.2762917505972737, "grad_norm": 0.6234142225558184, "learning_rate": 1.5291390899453412e-06, "loss": 0.2768, "step": 27245 }, { "epoch": 1.276338595587202, "grad_norm": 0.6032428455093143, "learning_rate": 1.5289643274093123e-06, "loss": 0.2945, "step": 27246 }, { "epoch": 1.2763854405771302, "grad_norm": 0.6223536071267453, "learning_rate": 1.5287895704613491e-06, "loss": 0.2932, "step": 27247 }, { "epoch": 1.2764322855670587, "grad_norm": 0.6108269625348143, "learning_rate": 1.5286148191024577e-06, "loss": 0.2772, "step": 27248 }, { "epoch": 1.276479130556987, "grad_norm": 0.5835234977283822, "learning_rate": 1.5284400733336453e-06, "loss": 0.2614, "step": 27249 }, { "epoch": 1.2765259755469152, "grad_norm": 0.5939036343536054, "learning_rate": 1.528265333155915e-06, "loss": 0.2833, "step": 27250 }, { "epoch": 1.2765728205368436, "grad_norm": 0.5875560967913408, "learning_rate": 1.5280905985702732e-06, "loss": 0.2774, "step": 27251 }, { "epoch": 1.2766196655267719, "grad_norm": 0.5841197373071848, "learning_rate": 1.5279158695777257e-06, "loss": 0.2676, "step": 27252 }, { "epoch": 1.2766665105167, "grad_norm": 0.5491096795662257, "learning_rate": 1.5277411461792787e-06, "loss": 0.2651, "step": 27253 }, { "epoch": 1.2767133555066286, "grad_norm": 0.6120950084201273, "learning_rate": 1.5275664283759367e-06, "loss": 0.2748, "step": 27254 }, { "epoch": 1.276760200496557, "grad_norm": 0.5964604937672044, "learning_rate": 1.5273917161687063e-06, "loss": 0.2579, "step": 27255 }, { "epoch": 1.2768070454864853, "grad_norm": 0.5842232876202046, "learning_rate": 1.5272170095585908e-06, "loss": 0.266, "step": 27256 }, { "epoch": 1.2768538904764135, "grad_norm": 0.5854410815443541, "learning_rate": 1.5270423085465977e-06, "loss": 0.2759, "step": 27257 }, { "epoch": 1.276900735466342, "grad_norm": 0.6104366769800198, "learning_rate": 1.5268676131337311e-06, "loss": 0.2921, "step": 27258 }, { "epoch": 1.2769475804562702, "grad_norm": 0.5966329810578976, "learning_rate": 1.5266929233209969e-06, "loss": 0.2536, "step": 27259 }, { "epoch": 1.2769944254461985, "grad_norm": 0.5919955881930417, "learning_rate": 1.5265182391094006e-06, "loss": 0.2677, "step": 27260 }, { "epoch": 1.277041270436127, "grad_norm": 0.589153344895682, "learning_rate": 1.5263435604999478e-06, "loss": 0.2709, "step": 27261 }, { "epoch": 1.2770881154260552, "grad_norm": 0.6450536962186834, "learning_rate": 1.5261688874936417e-06, "loss": 0.2796, "step": 27262 }, { "epoch": 1.2771349604159834, "grad_norm": 0.586300180210138, "learning_rate": 1.5259942200914894e-06, "loss": 0.29, "step": 27263 }, { "epoch": 1.2771818054059119, "grad_norm": 0.6191364431470991, "learning_rate": 1.5258195582944956e-06, "loss": 0.2698, "step": 27264 }, { "epoch": 1.27722865039584, "grad_norm": 0.6135671700700815, "learning_rate": 1.525644902103665e-06, "loss": 0.2757, "step": 27265 }, { "epoch": 1.2772754953857686, "grad_norm": 0.6019613963145894, "learning_rate": 1.5254702515200037e-06, "loss": 0.2732, "step": 27266 }, { "epoch": 1.2773223403756968, "grad_norm": 0.5936870813929813, "learning_rate": 1.5252956065445162e-06, "loss": 0.2718, "step": 27267 }, { "epoch": 1.2773691853656253, "grad_norm": 0.5950775609435456, "learning_rate": 1.5251209671782076e-06, "loss": 0.2702, "step": 27268 }, { "epoch": 1.2774160303555535, "grad_norm": 0.6112841883057217, "learning_rate": 1.524946333422082e-06, "loss": 0.2801, "step": 27269 }, { "epoch": 1.2774628753454818, "grad_norm": 0.6117030434336244, "learning_rate": 1.524771705277146e-06, "loss": 0.2996, "step": 27270 }, { "epoch": 1.2775097203354102, "grad_norm": 0.558704746578558, "learning_rate": 1.5245970827444032e-06, "loss": 0.2513, "step": 27271 }, { "epoch": 1.2775565653253385, "grad_norm": 0.6147635639598467, "learning_rate": 1.5244224658248591e-06, "loss": 0.2832, "step": 27272 }, { "epoch": 1.2776034103152667, "grad_norm": 0.5972543413086921, "learning_rate": 1.524247854519519e-06, "loss": 0.2879, "step": 27273 }, { "epoch": 1.2776502553051952, "grad_norm": 0.6103615400322663, "learning_rate": 1.5240732488293874e-06, "loss": 0.2646, "step": 27274 }, { "epoch": 1.2776971002951234, "grad_norm": 0.5820229459939472, "learning_rate": 1.523898648755468e-06, "loss": 0.2663, "step": 27275 }, { "epoch": 1.2777439452850516, "grad_norm": 0.5588997566996465, "learning_rate": 1.5237240542987667e-06, "loss": 0.2556, "step": 27276 }, { "epoch": 1.27779079027498, "grad_norm": 0.582169716274845, "learning_rate": 1.5235494654602884e-06, "loss": 0.28, "step": 27277 }, { "epoch": 1.2778376352649083, "grad_norm": 0.5570330833619702, "learning_rate": 1.5233748822410375e-06, "loss": 0.2543, "step": 27278 }, { "epoch": 1.2778844802548368, "grad_norm": 0.5406295698994467, "learning_rate": 1.5232003046420183e-06, "loss": 0.2597, "step": 27279 }, { "epoch": 1.277931325244765, "grad_norm": 0.6052899172820406, "learning_rate": 1.5230257326642367e-06, "loss": 0.2803, "step": 27280 }, { "epoch": 1.2779781702346935, "grad_norm": 0.5903430891921513, "learning_rate": 1.5228511663086964e-06, "loss": 0.2718, "step": 27281 }, { "epoch": 1.2780250152246218, "grad_norm": 0.5693329301870083, "learning_rate": 1.522676605576401e-06, "loss": 0.2736, "step": 27282 }, { "epoch": 1.27807186021455, "grad_norm": 0.6015700681388191, "learning_rate": 1.5225020504683566e-06, "loss": 0.2601, "step": 27283 }, { "epoch": 1.2781187052044785, "grad_norm": 0.6355739544173994, "learning_rate": 1.5223275009855676e-06, "loss": 0.2938, "step": 27284 }, { "epoch": 1.2781655501944067, "grad_norm": 0.5980762622238098, "learning_rate": 1.5221529571290371e-06, "loss": 0.285, "step": 27285 }, { "epoch": 1.278212395184335, "grad_norm": 0.600529940061588, "learning_rate": 1.5219784188997722e-06, "loss": 0.2685, "step": 27286 }, { "epoch": 1.2782592401742634, "grad_norm": 0.6030756929686245, "learning_rate": 1.5218038862987745e-06, "loss": 0.2906, "step": 27287 }, { "epoch": 1.2783060851641916, "grad_norm": 0.5626074869825687, "learning_rate": 1.52162935932705e-06, "loss": 0.2625, "step": 27288 }, { "epoch": 1.2783529301541199, "grad_norm": 0.6159132793982556, "learning_rate": 1.5214548379856024e-06, "loss": 0.2856, "step": 27289 }, { "epoch": 1.2783997751440483, "grad_norm": 0.618154350795558, "learning_rate": 1.5212803222754358e-06, "loss": 0.288, "step": 27290 }, { "epoch": 1.2784466201339768, "grad_norm": 0.6112774151929818, "learning_rate": 1.5211058121975559e-06, "loss": 0.2794, "step": 27291 }, { "epoch": 1.278493465123905, "grad_norm": 0.5729061551771428, "learning_rate": 1.520931307752966e-06, "loss": 0.2793, "step": 27292 }, { "epoch": 1.2785403101138333, "grad_norm": 0.5853612895669833, "learning_rate": 1.5207568089426697e-06, "loss": 0.2677, "step": 27293 }, { "epoch": 1.2785871551037618, "grad_norm": 0.5723419944813023, "learning_rate": 1.520582315767672e-06, "loss": 0.2738, "step": 27294 }, { "epoch": 1.27863400009369, "grad_norm": 0.5953461012890529, "learning_rate": 1.5204078282289768e-06, "loss": 0.2604, "step": 27295 }, { "epoch": 1.2786808450836182, "grad_norm": 0.6025903551413241, "learning_rate": 1.5202333463275884e-06, "loss": 0.2747, "step": 27296 }, { "epoch": 1.2787276900735467, "grad_norm": 0.5702851127670109, "learning_rate": 1.5200588700645108e-06, "loss": 0.272, "step": 27297 }, { "epoch": 1.278774535063475, "grad_norm": 0.5660079008883845, "learning_rate": 1.5198843994407488e-06, "loss": 0.2672, "step": 27298 }, { "epoch": 1.2788213800534032, "grad_norm": 0.6099161170739476, "learning_rate": 1.5197099344573055e-06, "loss": 0.2763, "step": 27299 }, { "epoch": 1.2788682250433316, "grad_norm": 0.5963848315275903, "learning_rate": 1.5195354751151845e-06, "loss": 0.2635, "step": 27300 }, { "epoch": 1.2789150700332599, "grad_norm": 0.6077806627082896, "learning_rate": 1.5193610214153904e-06, "loss": 0.2833, "step": 27301 }, { "epoch": 1.2789619150231883, "grad_norm": 0.5344566429572645, "learning_rate": 1.5191865733589278e-06, "loss": 0.2534, "step": 27302 }, { "epoch": 1.2790087600131166, "grad_norm": 0.6403381631361127, "learning_rate": 1.5190121309467992e-06, "loss": 0.274, "step": 27303 }, { "epoch": 1.279055605003045, "grad_norm": 0.6229052418108401, "learning_rate": 1.5188376941800103e-06, "loss": 0.2787, "step": 27304 }, { "epoch": 1.2791024499929733, "grad_norm": 0.5534694591576896, "learning_rate": 1.5186632630595631e-06, "loss": 0.2685, "step": 27305 }, { "epoch": 1.2791492949829015, "grad_norm": 0.6339623470136561, "learning_rate": 1.5184888375864625e-06, "loss": 0.2916, "step": 27306 }, { "epoch": 1.27919613997283, "grad_norm": 0.5647828163941009, "learning_rate": 1.5183144177617116e-06, "loss": 0.2532, "step": 27307 }, { "epoch": 1.2792429849627582, "grad_norm": 0.6655261094990875, "learning_rate": 1.5181400035863142e-06, "loss": 0.3083, "step": 27308 }, { "epoch": 1.2792898299526865, "grad_norm": 0.5874244067007162, "learning_rate": 1.517965595061275e-06, "loss": 0.2619, "step": 27309 }, { "epoch": 1.279336674942615, "grad_norm": 0.6064916701584988, "learning_rate": 1.5177911921875974e-06, "loss": 0.2912, "step": 27310 }, { "epoch": 1.2793835199325432, "grad_norm": 0.5825385732303927, "learning_rate": 1.5176167949662834e-06, "loss": 0.2628, "step": 27311 }, { "epoch": 1.2794303649224714, "grad_norm": 0.5714199105138346, "learning_rate": 1.517442403398338e-06, "loss": 0.2807, "step": 27312 }, { "epoch": 1.2794772099123999, "grad_norm": 0.606335631252506, "learning_rate": 1.5172680174847654e-06, "loss": 0.289, "step": 27313 }, { "epoch": 1.2795240549023281, "grad_norm": 0.652948140132629, "learning_rate": 1.5170936372265677e-06, "loss": 0.3076, "step": 27314 }, { "epoch": 1.2795708998922566, "grad_norm": 0.6550964385461506, "learning_rate": 1.5169192626247495e-06, "loss": 0.3122, "step": 27315 }, { "epoch": 1.2796177448821848, "grad_norm": 0.6114115037923121, "learning_rate": 1.5167448936803137e-06, "loss": 0.2849, "step": 27316 }, { "epoch": 1.2796645898721133, "grad_norm": 0.5841997384268385, "learning_rate": 1.516570530394264e-06, "loss": 0.242, "step": 27317 }, { "epoch": 1.2797114348620415, "grad_norm": 0.624124240849446, "learning_rate": 1.5163961727676036e-06, "loss": 0.2661, "step": 27318 }, { "epoch": 1.2797582798519698, "grad_norm": 0.6641047896339036, "learning_rate": 1.5162218208013363e-06, "loss": 0.2925, "step": 27319 }, { "epoch": 1.2798051248418982, "grad_norm": 0.5500572148272469, "learning_rate": 1.5160474744964642e-06, "loss": 0.2639, "step": 27320 }, { "epoch": 1.2798519698318265, "grad_norm": 0.5907088637034104, "learning_rate": 1.5158731338539922e-06, "loss": 0.2705, "step": 27321 }, { "epoch": 1.2798988148217547, "grad_norm": 0.6333206920629513, "learning_rate": 1.5156987988749237e-06, "loss": 0.2869, "step": 27322 }, { "epoch": 1.2799456598116832, "grad_norm": 0.6102568587782818, "learning_rate": 1.5155244695602605e-06, "loss": 0.2742, "step": 27323 }, { "epoch": 1.2799925048016114, "grad_norm": 0.5899979846396758, "learning_rate": 1.5153501459110064e-06, "loss": 0.2647, "step": 27324 }, { "epoch": 1.2800393497915397, "grad_norm": 0.640926708979096, "learning_rate": 1.5151758279281645e-06, "loss": 0.2606, "step": 27325 }, { "epoch": 1.2800861947814681, "grad_norm": 0.6330711816348112, "learning_rate": 1.515001515612739e-06, "loss": 0.2903, "step": 27326 }, { "epoch": 1.2801330397713966, "grad_norm": 0.6100407813036894, "learning_rate": 1.5148272089657312e-06, "loss": 0.2818, "step": 27327 }, { "epoch": 1.2801798847613248, "grad_norm": 0.6322939487127188, "learning_rate": 1.5146529079881468e-06, "loss": 0.2772, "step": 27328 }, { "epoch": 1.280226729751253, "grad_norm": 0.5879419532460474, "learning_rate": 1.5144786126809857e-06, "loss": 0.264, "step": 27329 }, { "epoch": 1.2802735747411815, "grad_norm": 0.600315185796651, "learning_rate": 1.5143043230452531e-06, "loss": 0.2546, "step": 27330 }, { "epoch": 1.2803204197311098, "grad_norm": 0.5933540126642978, "learning_rate": 1.5141300390819511e-06, "loss": 0.2827, "step": 27331 }, { "epoch": 1.280367264721038, "grad_norm": 0.6042723350538087, "learning_rate": 1.513955760792083e-06, "loss": 0.2579, "step": 27332 }, { "epoch": 1.2804141097109665, "grad_norm": 0.5855705530297571, "learning_rate": 1.5137814881766517e-06, "loss": 0.2648, "step": 27333 }, { "epoch": 1.2804609547008947, "grad_norm": 0.5614193648250791, "learning_rate": 1.5136072212366608e-06, "loss": 0.2779, "step": 27334 }, { "epoch": 1.280507799690823, "grad_norm": 0.6058594205723883, "learning_rate": 1.5134329599731117e-06, "loss": 0.2894, "step": 27335 }, { "epoch": 1.2805546446807514, "grad_norm": 0.5809765483678129, "learning_rate": 1.5132587043870076e-06, "loss": 0.2786, "step": 27336 }, { "epoch": 1.2806014896706797, "grad_norm": 0.5832185827635123, "learning_rate": 1.5130844544793521e-06, "loss": 0.2857, "step": 27337 }, { "epoch": 1.2806483346606081, "grad_norm": 0.6109019259247072, "learning_rate": 1.5129102102511473e-06, "loss": 0.2719, "step": 27338 }, { "epoch": 1.2806951796505364, "grad_norm": 0.6041605439793651, "learning_rate": 1.5127359717033964e-06, "loss": 0.2706, "step": 27339 }, { "epoch": 1.2807420246404648, "grad_norm": 0.5827895227601495, "learning_rate": 1.5125617388371022e-06, "loss": 0.2786, "step": 27340 }, { "epoch": 1.280788869630393, "grad_norm": 0.5932358982890227, "learning_rate": 1.5123875116532666e-06, "loss": 0.271, "step": 27341 }, { "epoch": 1.2808357146203213, "grad_norm": 0.5792571289755201, "learning_rate": 1.5122132901528924e-06, "loss": 0.2613, "step": 27342 }, { "epoch": 1.2808825596102498, "grad_norm": 0.6108047943829631, "learning_rate": 1.5120390743369823e-06, "loss": 0.2689, "step": 27343 }, { "epoch": 1.280929404600178, "grad_norm": 0.6163532987905797, "learning_rate": 1.5118648642065398e-06, "loss": 0.2758, "step": 27344 }, { "epoch": 1.2809762495901063, "grad_norm": 0.6257557430462245, "learning_rate": 1.5116906597625657e-06, "loss": 0.281, "step": 27345 }, { "epoch": 1.2810230945800347, "grad_norm": 0.6161378618708833, "learning_rate": 1.511516461006064e-06, "loss": 0.278, "step": 27346 }, { "epoch": 1.281069939569963, "grad_norm": 0.5916323702717635, "learning_rate": 1.511342267938037e-06, "loss": 0.2745, "step": 27347 }, { "epoch": 1.2811167845598912, "grad_norm": 0.6052407366565411, "learning_rate": 1.5111680805594867e-06, "loss": 0.2658, "step": 27348 }, { "epoch": 1.2811636295498197, "grad_norm": 0.6034035597471376, "learning_rate": 1.510993898871415e-06, "loss": 0.2816, "step": 27349 }, { "epoch": 1.281210474539748, "grad_norm": 0.5911647099647616, "learning_rate": 1.5108197228748245e-06, "loss": 0.2758, "step": 27350 }, { "epoch": 1.2812573195296764, "grad_norm": 0.5537494178000253, "learning_rate": 1.5106455525707186e-06, "loss": 0.2545, "step": 27351 }, { "epoch": 1.2813041645196046, "grad_norm": 0.5609564836993105, "learning_rate": 1.5104713879600986e-06, "loss": 0.2669, "step": 27352 }, { "epoch": 1.281351009509533, "grad_norm": 0.5901029244834024, "learning_rate": 1.5102972290439678e-06, "loss": 0.2589, "step": 27353 }, { "epoch": 1.2813978544994613, "grad_norm": 0.5814942391384723, "learning_rate": 1.5101230758233267e-06, "loss": 0.2563, "step": 27354 }, { "epoch": 1.2814446994893895, "grad_norm": 0.6006079364289273, "learning_rate": 1.5099489282991792e-06, "loss": 0.28, "step": 27355 }, { "epoch": 1.281491544479318, "grad_norm": 0.5283527442974219, "learning_rate": 1.509774786472526e-06, "loss": 0.246, "step": 27356 }, { "epoch": 1.2815383894692463, "grad_norm": 0.5824249903202315, "learning_rate": 1.5096006503443703e-06, "loss": 0.2713, "step": 27357 }, { "epoch": 1.2815852344591745, "grad_norm": 0.6144153272772095, "learning_rate": 1.5094265199157143e-06, "loss": 0.2843, "step": 27358 }, { "epoch": 1.281632079449103, "grad_norm": 0.5771584044462849, "learning_rate": 1.50925239518756e-06, "loss": 0.276, "step": 27359 }, { "epoch": 1.2816789244390312, "grad_norm": 0.6025555635680591, "learning_rate": 1.5090782761609086e-06, "loss": 0.2649, "step": 27360 }, { "epoch": 1.2817257694289594, "grad_norm": 0.5881159785864232, "learning_rate": 1.5089041628367624e-06, "loss": 0.2842, "step": 27361 }, { "epoch": 1.281772614418888, "grad_norm": 0.5817430043993327, "learning_rate": 1.5087300552161238e-06, "loss": 0.2666, "step": 27362 }, { "epoch": 1.2818194594088164, "grad_norm": 0.6396396786861747, "learning_rate": 1.5085559532999948e-06, "loss": 0.2768, "step": 27363 }, { "epoch": 1.2818663043987446, "grad_norm": 0.5886185091041454, "learning_rate": 1.5083818570893772e-06, "loss": 0.2725, "step": 27364 }, { "epoch": 1.2819131493886728, "grad_norm": 0.6452129252731713, "learning_rate": 1.5082077665852727e-06, "loss": 0.2809, "step": 27365 }, { "epoch": 1.2819599943786013, "grad_norm": 0.5651362408764306, "learning_rate": 1.508033681788683e-06, "loss": 0.255, "step": 27366 }, { "epoch": 1.2820068393685295, "grad_norm": 0.6619084872867474, "learning_rate": 1.5078596027006104e-06, "loss": 0.2773, "step": 27367 }, { "epoch": 1.2820536843584578, "grad_norm": 0.5931456633925692, "learning_rate": 1.5076855293220562e-06, "loss": 0.2706, "step": 27368 }, { "epoch": 1.2821005293483863, "grad_norm": 0.6180682592050283, "learning_rate": 1.5075114616540224e-06, "loss": 0.2658, "step": 27369 }, { "epoch": 1.2821473743383145, "grad_norm": 0.576735245801214, "learning_rate": 1.5073373996975104e-06, "loss": 0.276, "step": 27370 }, { "epoch": 1.2821942193282427, "grad_norm": 0.622542528492815, "learning_rate": 1.5071633434535233e-06, "loss": 0.2752, "step": 27371 }, { "epoch": 1.2822410643181712, "grad_norm": 0.5908823806113349, "learning_rate": 1.506989292923061e-06, "loss": 0.2826, "step": 27372 }, { "epoch": 1.2822879093080994, "grad_norm": 0.5915814636138402, "learning_rate": 1.5068152481071253e-06, "loss": 0.2655, "step": 27373 }, { "epoch": 1.282334754298028, "grad_norm": 0.6463932000801423, "learning_rate": 1.506641209006718e-06, "loss": 0.2811, "step": 27374 }, { "epoch": 1.2823815992879561, "grad_norm": 0.6318816681503793, "learning_rate": 1.5064671756228417e-06, "loss": 0.2734, "step": 27375 }, { "epoch": 1.2824284442778846, "grad_norm": 0.5981811689725985, "learning_rate": 1.5062931479564962e-06, "loss": 0.2695, "step": 27376 }, { "epoch": 1.2824752892678128, "grad_norm": 0.5760841842008938, "learning_rate": 1.5061191260086855e-06, "loss": 0.2691, "step": 27377 }, { "epoch": 1.282522134257741, "grad_norm": 0.5722432744337572, "learning_rate": 1.5059451097804078e-06, "loss": 0.2904, "step": 27378 }, { "epoch": 1.2825689792476695, "grad_norm": 0.5939960405119917, "learning_rate": 1.505771099272667e-06, "loss": 0.2715, "step": 27379 }, { "epoch": 1.2826158242375978, "grad_norm": 0.5723447535131566, "learning_rate": 1.5055970944864627e-06, "loss": 0.2734, "step": 27380 }, { "epoch": 1.282662669227526, "grad_norm": 0.6273883969906269, "learning_rate": 1.5054230954227978e-06, "loss": 0.2879, "step": 27381 }, { "epoch": 1.2827095142174545, "grad_norm": 0.6206495138291177, "learning_rate": 1.5052491020826732e-06, "loss": 0.2776, "step": 27382 }, { "epoch": 1.2827563592073827, "grad_norm": 0.625682739933733, "learning_rate": 1.5050751144670905e-06, "loss": 0.2774, "step": 27383 }, { "epoch": 1.282803204197311, "grad_norm": 0.6092439105085393, "learning_rate": 1.5049011325770495e-06, "loss": 0.2966, "step": 27384 }, { "epoch": 1.2828500491872394, "grad_norm": 0.6006396836771064, "learning_rate": 1.5047271564135524e-06, "loss": 0.2671, "step": 27385 }, { "epoch": 1.2828968941771677, "grad_norm": 0.5882435981125158, "learning_rate": 1.5045531859776008e-06, "loss": 0.2575, "step": 27386 }, { "epoch": 1.2829437391670961, "grad_norm": 0.6026746224581339, "learning_rate": 1.5043792212701949e-06, "loss": 0.3, "step": 27387 }, { "epoch": 1.2829905841570244, "grad_norm": 0.5751296402731559, "learning_rate": 1.5042052622923364e-06, "loss": 0.2831, "step": 27388 }, { "epoch": 1.2830374291469528, "grad_norm": 0.5868139144288941, "learning_rate": 1.5040313090450273e-06, "loss": 0.2787, "step": 27389 }, { "epoch": 1.283084274136881, "grad_norm": 0.53738133801179, "learning_rate": 1.5038573615292674e-06, "loss": 0.2554, "step": 27390 }, { "epoch": 1.2831311191268093, "grad_norm": 0.5824990722192159, "learning_rate": 1.503683419746057e-06, "loss": 0.2747, "step": 27391 }, { "epoch": 1.2831779641167378, "grad_norm": 0.5911862077068534, "learning_rate": 1.5035094836963988e-06, "loss": 0.2566, "step": 27392 }, { "epoch": 1.283224809106666, "grad_norm": 0.6221166017249901, "learning_rate": 1.503335553381293e-06, "loss": 0.2708, "step": 27393 }, { "epoch": 1.2832716540965943, "grad_norm": 0.5732752365823276, "learning_rate": 1.5031616288017404e-06, "loss": 0.2706, "step": 27394 }, { "epoch": 1.2833184990865227, "grad_norm": 0.621110728904487, "learning_rate": 1.5029877099587429e-06, "loss": 0.2794, "step": 27395 }, { "epoch": 1.283365344076451, "grad_norm": 0.596482582972717, "learning_rate": 1.5028137968532996e-06, "loss": 0.2576, "step": 27396 }, { "epoch": 1.2834121890663792, "grad_norm": 0.6077781505359606, "learning_rate": 1.5026398894864125e-06, "loss": 0.2782, "step": 27397 }, { "epoch": 1.2834590340563077, "grad_norm": 0.629995200792319, "learning_rate": 1.502465987859082e-06, "loss": 0.2984, "step": 27398 }, { "epoch": 1.2835058790462361, "grad_norm": 0.6022464341948346, "learning_rate": 1.502292091972309e-06, "loss": 0.2524, "step": 27399 }, { "epoch": 1.2835527240361644, "grad_norm": 0.578679354578485, "learning_rate": 1.5021182018270947e-06, "loss": 0.2793, "step": 27400 }, { "epoch": 1.2835995690260926, "grad_norm": 0.6202883295397962, "learning_rate": 1.5019443174244396e-06, "loss": 0.2915, "step": 27401 }, { "epoch": 1.283646414016021, "grad_norm": 0.5877151351508998, "learning_rate": 1.5017704387653435e-06, "loss": 0.266, "step": 27402 }, { "epoch": 1.2836932590059493, "grad_norm": 0.6197828152936309, "learning_rate": 1.5015965658508074e-06, "loss": 0.2623, "step": 27403 }, { "epoch": 1.2837401039958776, "grad_norm": 0.5619661897569471, "learning_rate": 1.5014226986818326e-06, "loss": 0.2598, "step": 27404 }, { "epoch": 1.283786948985806, "grad_norm": 0.6288015438640165, "learning_rate": 1.5012488372594186e-06, "loss": 0.2686, "step": 27405 }, { "epoch": 1.2838337939757343, "grad_norm": 0.5994743259859638, "learning_rate": 1.5010749815845674e-06, "loss": 0.2867, "step": 27406 }, { "epoch": 1.2838806389656625, "grad_norm": 0.5860540108577584, "learning_rate": 1.5009011316582783e-06, "loss": 0.2773, "step": 27407 }, { "epoch": 1.283927483955591, "grad_norm": 0.5666391083583622, "learning_rate": 1.500727287481552e-06, "loss": 0.2781, "step": 27408 }, { "epoch": 1.2839743289455192, "grad_norm": 0.6093400343088157, "learning_rate": 1.5005534490553889e-06, "loss": 0.2912, "step": 27409 }, { "epoch": 1.2840211739354477, "grad_norm": 0.6266744797654784, "learning_rate": 1.5003796163807898e-06, "loss": 0.2892, "step": 27410 }, { "epoch": 1.284068018925376, "grad_norm": 0.5931682209563139, "learning_rate": 1.500205789458754e-06, "loss": 0.2875, "step": 27411 }, { "epoch": 1.2841148639153044, "grad_norm": 0.6332241115395565, "learning_rate": 1.500031968290283e-06, "loss": 0.2764, "step": 27412 }, { "epoch": 1.2841617089052326, "grad_norm": 0.6191773168275437, "learning_rate": 1.499858152876377e-06, "loss": 0.2777, "step": 27413 }, { "epoch": 1.2842085538951609, "grad_norm": 0.5652234357793671, "learning_rate": 1.4996843432180363e-06, "loss": 0.2726, "step": 27414 }, { "epoch": 1.2842553988850893, "grad_norm": 0.5865024835115843, "learning_rate": 1.49951053931626e-06, "loss": 0.2637, "step": 27415 }, { "epoch": 1.2843022438750176, "grad_norm": 0.5907977116245408, "learning_rate": 1.4993367411720489e-06, "loss": 0.2586, "step": 27416 }, { "epoch": 1.2843490888649458, "grad_norm": 0.6737575082220858, "learning_rate": 1.499162948786404e-06, "loss": 0.2834, "step": 27417 }, { "epoch": 1.2843959338548743, "grad_norm": 0.6154649696492103, "learning_rate": 1.498989162160324e-06, "loss": 0.2808, "step": 27418 }, { "epoch": 1.2844427788448025, "grad_norm": 0.5925483989803826, "learning_rate": 1.4988153812948104e-06, "loss": 0.2731, "step": 27419 }, { "epoch": 1.2844896238347308, "grad_norm": 0.611080453391747, "learning_rate": 1.4986416061908633e-06, "loss": 0.2817, "step": 27420 }, { "epoch": 1.2845364688246592, "grad_norm": 0.6228239561929019, "learning_rate": 1.4984678368494814e-06, "loss": 0.2887, "step": 27421 }, { "epoch": 1.2845833138145875, "grad_norm": 0.6668963688921935, "learning_rate": 1.498294073271665e-06, "loss": 0.2715, "step": 27422 }, { "epoch": 1.284630158804516, "grad_norm": 0.6596608803244877, "learning_rate": 1.4981203154584144e-06, "loss": 0.2994, "step": 27423 }, { "epoch": 1.2846770037944442, "grad_norm": 0.5547006979462646, "learning_rate": 1.4979465634107304e-06, "loss": 0.2651, "step": 27424 }, { "epoch": 1.2847238487843726, "grad_norm": 0.5998875679331666, "learning_rate": 1.4977728171296113e-06, "loss": 0.2779, "step": 27425 }, { "epoch": 1.2847706937743009, "grad_norm": 0.5983081168379797, "learning_rate": 1.4975990766160586e-06, "loss": 0.271, "step": 27426 }, { "epoch": 1.284817538764229, "grad_norm": 0.6005708861441155, "learning_rate": 1.4974253418710704e-06, "loss": 0.2819, "step": 27427 }, { "epoch": 1.2848643837541576, "grad_norm": 0.5861729504225986, "learning_rate": 1.497251612895648e-06, "loss": 0.2815, "step": 27428 }, { "epoch": 1.2849112287440858, "grad_norm": 0.5464770159149501, "learning_rate": 1.49707788969079e-06, "loss": 0.2592, "step": 27429 }, { "epoch": 1.284958073734014, "grad_norm": 0.5929682601791819, "learning_rate": 1.4969041722574964e-06, "loss": 0.2626, "step": 27430 }, { "epoch": 1.2850049187239425, "grad_norm": 0.623519821480276, "learning_rate": 1.4967304605967681e-06, "loss": 0.2941, "step": 27431 }, { "epoch": 1.2850517637138708, "grad_norm": 0.5605409223376161, "learning_rate": 1.4965567547096038e-06, "loss": 0.2804, "step": 27432 }, { "epoch": 1.285098608703799, "grad_norm": 0.5901920535460106, "learning_rate": 1.4963830545970027e-06, "loss": 0.2935, "step": 27433 }, { "epoch": 1.2851454536937275, "grad_norm": 0.613235186359934, "learning_rate": 1.4962093602599644e-06, "loss": 0.2639, "step": 27434 }, { "epoch": 1.285192298683656, "grad_norm": 0.6122346694737458, "learning_rate": 1.4960356716994898e-06, "loss": 0.2663, "step": 27435 }, { "epoch": 1.2852391436735842, "grad_norm": 0.6242681173107151, "learning_rate": 1.4958619889165768e-06, "loss": 0.2819, "step": 27436 }, { "epoch": 1.2852859886635124, "grad_norm": 0.6062289925138787, "learning_rate": 1.4956883119122259e-06, "loss": 0.2639, "step": 27437 }, { "epoch": 1.2853328336534409, "grad_norm": 0.65594747522935, "learning_rate": 1.4955146406874372e-06, "loss": 0.2772, "step": 27438 }, { "epoch": 1.285379678643369, "grad_norm": 0.5895344116102164, "learning_rate": 1.4953409752432088e-06, "loss": 0.2854, "step": 27439 }, { "epoch": 1.2854265236332973, "grad_norm": 0.6670303588200798, "learning_rate": 1.49516731558054e-06, "loss": 0.278, "step": 27440 }, { "epoch": 1.2854733686232258, "grad_norm": 0.554386309368001, "learning_rate": 1.4949936617004309e-06, "loss": 0.2543, "step": 27441 }, { "epoch": 1.285520213613154, "grad_norm": 0.6164319954880366, "learning_rate": 1.4948200136038813e-06, "loss": 0.2683, "step": 27442 }, { "epoch": 1.2855670586030823, "grad_norm": 0.5912476485181134, "learning_rate": 1.494646371291889e-06, "loss": 0.2826, "step": 27443 }, { "epoch": 1.2856139035930108, "grad_norm": 0.5728890207822676, "learning_rate": 1.4944727347654552e-06, "loss": 0.2811, "step": 27444 }, { "epoch": 1.285660748582939, "grad_norm": 0.6167570959553849, "learning_rate": 1.4942991040255774e-06, "loss": 0.2699, "step": 27445 }, { "epoch": 1.2857075935728675, "grad_norm": 0.6471448161519547, "learning_rate": 1.4941254790732559e-06, "loss": 0.2838, "step": 27446 }, { "epoch": 1.2857544385627957, "grad_norm": 0.6368609376047488, "learning_rate": 1.4939518599094887e-06, "loss": 0.2824, "step": 27447 }, { "epoch": 1.2858012835527242, "grad_norm": 0.5796591860823185, "learning_rate": 1.4937782465352762e-06, "loss": 0.2655, "step": 27448 }, { "epoch": 1.2858481285426524, "grad_norm": 0.5928610045129388, "learning_rate": 1.493604638951617e-06, "loss": 0.281, "step": 27449 }, { "epoch": 1.2858949735325806, "grad_norm": 0.5768066428412788, "learning_rate": 1.4934310371595107e-06, "loss": 0.263, "step": 27450 }, { "epoch": 1.285941818522509, "grad_norm": 0.5543335088577545, "learning_rate": 1.4932574411599548e-06, "loss": 0.2631, "step": 27451 }, { "epoch": 1.2859886635124373, "grad_norm": 0.6583774434905071, "learning_rate": 1.4930838509539497e-06, "loss": 0.317, "step": 27452 }, { "epoch": 1.2860355085023656, "grad_norm": 0.5688856501192148, "learning_rate": 1.4929102665424938e-06, "loss": 0.2698, "step": 27453 }, { "epoch": 1.286082353492294, "grad_norm": 0.5894984801424027, "learning_rate": 1.4927366879265862e-06, "loss": 0.2687, "step": 27454 }, { "epoch": 1.2861291984822223, "grad_norm": 0.5950654764166453, "learning_rate": 1.4925631151072262e-06, "loss": 0.2724, "step": 27455 }, { "epoch": 1.2861760434721505, "grad_norm": 0.5860345342611525, "learning_rate": 1.4923895480854124e-06, "loss": 0.2873, "step": 27456 }, { "epoch": 1.286222888462079, "grad_norm": 0.560674362481353, "learning_rate": 1.492215986862143e-06, "loss": 0.264, "step": 27457 }, { "epoch": 1.2862697334520072, "grad_norm": 0.6079605195829969, "learning_rate": 1.4920424314384172e-06, "loss": 0.2878, "step": 27458 }, { "epoch": 1.2863165784419357, "grad_norm": 0.6183215196647187, "learning_rate": 1.491868881815234e-06, "loss": 0.2868, "step": 27459 }, { "epoch": 1.286363423431864, "grad_norm": 0.6467597910779243, "learning_rate": 1.4916953379935919e-06, "loss": 0.2962, "step": 27460 }, { "epoch": 1.2864102684217924, "grad_norm": 0.5763082085631914, "learning_rate": 1.4915217999744896e-06, "loss": 0.2683, "step": 27461 }, { "epoch": 1.2864571134117206, "grad_norm": 0.5564652065137861, "learning_rate": 1.491348267758927e-06, "loss": 0.2624, "step": 27462 }, { "epoch": 1.2865039584016489, "grad_norm": 0.5975795314000065, "learning_rate": 1.4911747413479005e-06, "loss": 0.2776, "step": 27463 }, { "epoch": 1.2865508033915773, "grad_norm": 0.5764455133331392, "learning_rate": 1.49100122074241e-06, "loss": 0.2659, "step": 27464 }, { "epoch": 1.2865976483815056, "grad_norm": 0.5962919515875414, "learning_rate": 1.4908277059434535e-06, "loss": 0.2597, "step": 27465 }, { "epoch": 1.2866444933714338, "grad_norm": 0.6274177078954378, "learning_rate": 1.4906541969520306e-06, "loss": 0.2867, "step": 27466 }, { "epoch": 1.2866913383613623, "grad_norm": 0.5735056565835341, "learning_rate": 1.4904806937691384e-06, "loss": 0.2508, "step": 27467 }, { "epoch": 1.2867381833512905, "grad_norm": 0.6447935265214949, "learning_rate": 1.4903071963957772e-06, "loss": 0.2973, "step": 27468 }, { "epoch": 1.2867850283412188, "grad_norm": 0.6038624074314747, "learning_rate": 1.4901337048329434e-06, "loss": 0.2808, "step": 27469 }, { "epoch": 1.2868318733311472, "grad_norm": 0.5919221637682647, "learning_rate": 1.4899602190816365e-06, "loss": 0.2735, "step": 27470 }, { "epoch": 1.2868787183210757, "grad_norm": 0.6004535432060987, "learning_rate": 1.4897867391428544e-06, "loss": 0.2644, "step": 27471 }, { "epoch": 1.286925563311004, "grad_norm": 0.5930766698839324, "learning_rate": 1.4896132650175959e-06, "loss": 0.2785, "step": 27472 }, { "epoch": 1.2869724083009322, "grad_norm": 0.6292983784438112, "learning_rate": 1.4894397967068592e-06, "loss": 0.2875, "step": 27473 }, { "epoch": 1.2870192532908606, "grad_norm": 0.6292073410366956, "learning_rate": 1.4892663342116432e-06, "loss": 0.2767, "step": 27474 }, { "epoch": 1.2870660982807889, "grad_norm": 0.5737231878534628, "learning_rate": 1.4890928775329444e-06, "loss": 0.2755, "step": 27475 }, { "epoch": 1.2871129432707171, "grad_norm": 0.6105486343733483, "learning_rate": 1.4889194266717621e-06, "loss": 0.2847, "step": 27476 }, { "epoch": 1.2871597882606456, "grad_norm": 0.5514550108522452, "learning_rate": 1.4887459816290948e-06, "loss": 0.2734, "step": 27477 }, { "epoch": 1.2872066332505738, "grad_norm": 0.5650207262478116, "learning_rate": 1.4885725424059394e-06, "loss": 0.2574, "step": 27478 }, { "epoch": 1.287253478240502, "grad_norm": 0.5832799352963021, "learning_rate": 1.488399109003295e-06, "loss": 0.2567, "step": 27479 }, { "epoch": 1.2873003232304305, "grad_norm": 0.6178197214452688, "learning_rate": 1.4882256814221603e-06, "loss": 0.2797, "step": 27480 }, { "epoch": 1.2873471682203588, "grad_norm": 0.6472373234740824, "learning_rate": 1.4880522596635327e-06, "loss": 0.2775, "step": 27481 }, { "epoch": 1.2873940132102872, "grad_norm": 0.6283177841741963, "learning_rate": 1.4878788437284092e-06, "loss": 0.2863, "step": 27482 }, { "epoch": 1.2874408582002155, "grad_norm": 0.6363456995480811, "learning_rate": 1.4877054336177882e-06, "loss": 0.2799, "step": 27483 }, { "epoch": 1.287487703190144, "grad_norm": 0.5801490541894334, "learning_rate": 1.487532029332669e-06, "loss": 0.2786, "step": 27484 }, { "epoch": 1.2875345481800722, "grad_norm": 0.601466857511389, "learning_rate": 1.4873586308740476e-06, "loss": 0.2711, "step": 27485 }, { "epoch": 1.2875813931700004, "grad_norm": 0.586548440700456, "learning_rate": 1.487185238242923e-06, "loss": 0.2704, "step": 27486 }, { "epoch": 1.2876282381599289, "grad_norm": 0.5966537044321186, "learning_rate": 1.4870118514402937e-06, "loss": 0.278, "step": 27487 }, { "epoch": 1.2876750831498571, "grad_norm": 0.61578016005997, "learning_rate": 1.486838470467156e-06, "loss": 0.2735, "step": 27488 }, { "epoch": 1.2877219281397854, "grad_norm": 0.6073151081068612, "learning_rate": 1.4866650953245076e-06, "loss": 0.287, "step": 27489 }, { "epoch": 1.2877687731297138, "grad_norm": 0.6209604808163807, "learning_rate": 1.486491726013347e-06, "loss": 0.2752, "step": 27490 }, { "epoch": 1.287815618119642, "grad_norm": 0.5881587601509205, "learning_rate": 1.4863183625346728e-06, "loss": 0.2638, "step": 27491 }, { "epoch": 1.2878624631095703, "grad_norm": 0.5781135638531094, "learning_rate": 1.4861450048894803e-06, "loss": 0.2655, "step": 27492 }, { "epoch": 1.2879093080994988, "grad_norm": 0.5766519835082732, "learning_rate": 1.48597165307877e-06, "loss": 0.2633, "step": 27493 }, { "epoch": 1.287956153089427, "grad_norm": 0.5607856567279768, "learning_rate": 1.485798307103537e-06, "loss": 0.2583, "step": 27494 }, { "epoch": 1.2880029980793555, "grad_norm": 0.6338698329447637, "learning_rate": 1.4856249669647802e-06, "loss": 0.2871, "step": 27495 }, { "epoch": 1.2880498430692837, "grad_norm": 0.5830440138101033, "learning_rate": 1.4854516326634963e-06, "loss": 0.2689, "step": 27496 }, { "epoch": 1.2880966880592122, "grad_norm": 0.562749607284343, "learning_rate": 1.485278304200683e-06, "loss": 0.2719, "step": 27497 }, { "epoch": 1.2881435330491404, "grad_norm": 0.5420279493072446, "learning_rate": 1.4851049815773389e-06, "loss": 0.2639, "step": 27498 }, { "epoch": 1.2881903780390687, "grad_norm": 0.5887537497712366, "learning_rate": 1.4849316647944608e-06, "loss": 0.2601, "step": 27499 }, { "epoch": 1.2882372230289971, "grad_norm": 0.5765729538217141, "learning_rate": 1.484758353853045e-06, "loss": 0.2691, "step": 27500 }, { "epoch": 1.2882840680189254, "grad_norm": 0.6045082795891134, "learning_rate": 1.4845850487540896e-06, "loss": 0.2776, "step": 27501 }, { "epoch": 1.2883309130088536, "grad_norm": 0.6065278769206637, "learning_rate": 1.4844117494985927e-06, "loss": 0.2756, "step": 27502 }, { "epoch": 1.288377757998782, "grad_norm": 0.5934572180593306, "learning_rate": 1.48423845608755e-06, "loss": 0.2687, "step": 27503 }, { "epoch": 1.2884246029887103, "grad_norm": 0.5349669867843707, "learning_rate": 1.4840651685219607e-06, "loss": 0.2503, "step": 27504 }, { "epoch": 1.2884714479786386, "grad_norm": 0.5573740824014897, "learning_rate": 1.4838918868028207e-06, "loss": 0.2764, "step": 27505 }, { "epoch": 1.288518292968567, "grad_norm": 0.5856065991407268, "learning_rate": 1.4837186109311275e-06, "loss": 0.2628, "step": 27506 }, { "epoch": 1.2885651379584955, "grad_norm": 0.6104021331472596, "learning_rate": 1.4835453409078779e-06, "loss": 0.2721, "step": 27507 }, { "epoch": 1.2886119829484237, "grad_norm": 0.5749995216162671, "learning_rate": 1.4833720767340699e-06, "loss": 0.2661, "step": 27508 }, { "epoch": 1.288658827938352, "grad_norm": 0.5777585282506822, "learning_rate": 1.4831988184106994e-06, "loss": 0.2579, "step": 27509 }, { "epoch": 1.2887056729282804, "grad_norm": 0.5479063574520533, "learning_rate": 1.4830255659387644e-06, "loss": 0.2621, "step": 27510 }, { "epoch": 1.2887525179182087, "grad_norm": 0.6345040935527019, "learning_rate": 1.4828523193192623e-06, "loss": 0.2783, "step": 27511 }, { "epoch": 1.288799362908137, "grad_norm": 0.5548882293093076, "learning_rate": 1.482679078553189e-06, "loss": 0.2729, "step": 27512 }, { "epoch": 1.2888462078980654, "grad_norm": 0.6376195386699858, "learning_rate": 1.4825058436415418e-06, "loss": 0.2974, "step": 27513 }, { "epoch": 1.2888930528879936, "grad_norm": 0.5404613987549954, "learning_rate": 1.4823326145853173e-06, "loss": 0.26, "step": 27514 }, { "epoch": 1.2889398978779218, "grad_norm": 0.5754938155556594, "learning_rate": 1.4821593913855136e-06, "loss": 0.283, "step": 27515 }, { "epoch": 1.2889867428678503, "grad_norm": 0.5981239394374624, "learning_rate": 1.4819861740431262e-06, "loss": 0.266, "step": 27516 }, { "epoch": 1.2890335878577786, "grad_norm": 0.5893652908763285, "learning_rate": 1.4818129625591537e-06, "loss": 0.2915, "step": 27517 }, { "epoch": 1.289080432847707, "grad_norm": 0.5455101963337134, "learning_rate": 1.4816397569345903e-06, "loss": 0.2566, "step": 27518 }, { "epoch": 1.2891272778376353, "grad_norm": 0.5838101620708928, "learning_rate": 1.4814665571704351e-06, "loss": 0.2758, "step": 27519 }, { "epoch": 1.2891741228275637, "grad_norm": 0.5929081540352543, "learning_rate": 1.4812933632676828e-06, "loss": 0.2675, "step": 27520 }, { "epoch": 1.289220967817492, "grad_norm": 0.5696670697369877, "learning_rate": 1.4811201752273317e-06, "loss": 0.2604, "step": 27521 }, { "epoch": 1.2892678128074202, "grad_norm": 0.6021430997543775, "learning_rate": 1.480946993050378e-06, "loss": 0.2812, "step": 27522 }, { "epoch": 1.2893146577973487, "grad_norm": 0.5995163866503584, "learning_rate": 1.4807738167378188e-06, "loss": 0.2736, "step": 27523 }, { "epoch": 1.289361502787277, "grad_norm": 0.608906955229948, "learning_rate": 1.480600646290649e-06, "loss": 0.2839, "step": 27524 }, { "epoch": 1.2894083477772051, "grad_norm": 0.5974002682034616, "learning_rate": 1.4804274817098669e-06, "loss": 0.2821, "step": 27525 }, { "epoch": 1.2894551927671336, "grad_norm": 0.5621036819271923, "learning_rate": 1.4802543229964683e-06, "loss": 0.2585, "step": 27526 }, { "epoch": 1.2895020377570618, "grad_norm": 0.65298884587199, "learning_rate": 1.4800811701514494e-06, "loss": 0.2748, "step": 27527 }, { "epoch": 1.28954888274699, "grad_norm": 0.58729721624668, "learning_rate": 1.479908023175807e-06, "loss": 0.2741, "step": 27528 }, { "epoch": 1.2895957277369186, "grad_norm": 0.5570506054374016, "learning_rate": 1.4797348820705385e-06, "loss": 0.2724, "step": 27529 }, { "epoch": 1.2896425727268468, "grad_norm": 0.5995323485197322, "learning_rate": 1.479561746836639e-06, "loss": 0.2765, "step": 27530 }, { "epoch": 1.2896894177167753, "grad_norm": 0.5655731205862934, "learning_rate": 1.4793886174751042e-06, "loss": 0.2584, "step": 27531 }, { "epoch": 1.2897362627067035, "grad_norm": 0.6024596988528554, "learning_rate": 1.4792154939869318e-06, "loss": 0.274, "step": 27532 }, { "epoch": 1.289783107696632, "grad_norm": 0.6452974115080797, "learning_rate": 1.4790423763731181e-06, "loss": 0.2961, "step": 27533 }, { "epoch": 1.2898299526865602, "grad_norm": 0.6162001371283989, "learning_rate": 1.4788692646346585e-06, "loss": 0.2756, "step": 27534 }, { "epoch": 1.2898767976764884, "grad_norm": 0.6510572520734821, "learning_rate": 1.4786961587725508e-06, "loss": 0.2873, "step": 27535 }, { "epoch": 1.289923642666417, "grad_norm": 0.6103749460700978, "learning_rate": 1.4785230587877886e-06, "loss": 0.281, "step": 27536 }, { "epoch": 1.2899704876563451, "grad_norm": 0.5985283411052517, "learning_rate": 1.4783499646813701e-06, "loss": 0.2811, "step": 27537 }, { "epoch": 1.2900173326462734, "grad_norm": 0.5310302471590997, "learning_rate": 1.4781768764542903e-06, "loss": 0.2543, "step": 27538 }, { "epoch": 1.2900641776362018, "grad_norm": 0.5836291923787786, "learning_rate": 1.4780037941075457e-06, "loss": 0.2722, "step": 27539 }, { "epoch": 1.29011102262613, "grad_norm": 0.5784842167277411, "learning_rate": 1.4778307176421331e-06, "loss": 0.2549, "step": 27540 }, { "epoch": 1.2901578676160583, "grad_norm": 0.5675498198987984, "learning_rate": 1.4776576470590482e-06, "loss": 0.2678, "step": 27541 }, { "epoch": 1.2902047126059868, "grad_norm": 0.6375128862528665, "learning_rate": 1.4774845823592853e-06, "loss": 0.2885, "step": 27542 }, { "epoch": 1.2902515575959153, "grad_norm": 0.5524426468224979, "learning_rate": 1.4773115235438417e-06, "loss": 0.2548, "step": 27543 }, { "epoch": 1.2902984025858435, "grad_norm": 0.5939794180944464, "learning_rate": 1.477138470613714e-06, "loss": 0.2701, "step": 27544 }, { "epoch": 1.2903452475757717, "grad_norm": 0.5887335171723703, "learning_rate": 1.4769654235698965e-06, "loss": 0.2752, "step": 27545 }, { "epoch": 1.2903920925657002, "grad_norm": 0.5816625364524309, "learning_rate": 1.4767923824133867e-06, "loss": 0.2789, "step": 27546 }, { "epoch": 1.2904389375556284, "grad_norm": 0.606071960119756, "learning_rate": 1.4766193471451795e-06, "loss": 0.2684, "step": 27547 }, { "epoch": 1.2904857825455567, "grad_norm": 0.6142330659050066, "learning_rate": 1.4764463177662708e-06, "loss": 0.2832, "step": 27548 }, { "epoch": 1.2905326275354851, "grad_norm": 0.5760757880809566, "learning_rate": 1.4762732942776556e-06, "loss": 0.2749, "step": 27549 }, { "epoch": 1.2905794725254134, "grad_norm": 0.6079569771399932, "learning_rate": 1.4761002766803306e-06, "loss": 0.2894, "step": 27550 }, { "epoch": 1.2906263175153416, "grad_norm": 0.6159121585979717, "learning_rate": 1.475927264975291e-06, "loss": 0.2811, "step": 27551 }, { "epoch": 1.29067316250527, "grad_norm": 0.6021063682771318, "learning_rate": 1.4757542591635327e-06, "loss": 0.2757, "step": 27552 }, { "epoch": 1.2907200074951983, "grad_norm": 0.5977437085952174, "learning_rate": 1.4755812592460514e-06, "loss": 0.2849, "step": 27553 }, { "epoch": 1.2907668524851268, "grad_norm": 0.5973795608861523, "learning_rate": 1.475408265223843e-06, "loss": 0.2754, "step": 27554 }, { "epoch": 1.290813697475055, "grad_norm": 0.5776636162757606, "learning_rate": 1.4752352770979018e-06, "loss": 0.2635, "step": 27555 }, { "epoch": 1.2908605424649835, "grad_norm": 0.6297171257470423, "learning_rate": 1.4750622948692236e-06, "loss": 0.2883, "step": 27556 }, { "epoch": 1.2909073874549117, "grad_norm": 0.5967426239549763, "learning_rate": 1.4748893185388053e-06, "loss": 0.2779, "step": 27557 }, { "epoch": 1.29095423244484, "grad_norm": 0.5994123470249909, "learning_rate": 1.4747163481076404e-06, "loss": 0.2702, "step": 27558 }, { "epoch": 1.2910010774347684, "grad_norm": 0.5960596668130835, "learning_rate": 1.4745433835767254e-06, "loss": 0.276, "step": 27559 }, { "epoch": 1.2910479224246967, "grad_norm": 0.5506051301456535, "learning_rate": 1.4743704249470562e-06, "loss": 0.2611, "step": 27560 }, { "epoch": 1.291094767414625, "grad_norm": 0.5951927397966513, "learning_rate": 1.4741974722196273e-06, "loss": 0.296, "step": 27561 }, { "epoch": 1.2911416124045534, "grad_norm": 0.6065024588964464, "learning_rate": 1.4740245253954332e-06, "loss": 0.2644, "step": 27562 }, { "epoch": 1.2911884573944816, "grad_norm": 0.5892812013934352, "learning_rate": 1.4738515844754702e-06, "loss": 0.2703, "step": 27563 }, { "epoch": 1.2912353023844099, "grad_norm": 0.6263664333398073, "learning_rate": 1.4736786494607342e-06, "loss": 0.286, "step": 27564 }, { "epoch": 1.2912821473743383, "grad_norm": 0.5961640539265688, "learning_rate": 1.4735057203522191e-06, "loss": 0.2785, "step": 27565 }, { "epoch": 1.2913289923642666, "grad_norm": 0.5973540035168032, "learning_rate": 1.4733327971509216e-06, "loss": 0.2695, "step": 27566 }, { "epoch": 1.291375837354195, "grad_norm": 0.5969905542677506, "learning_rate": 1.4731598798578345e-06, "loss": 0.2813, "step": 27567 }, { "epoch": 1.2914226823441233, "grad_norm": 0.6326771415448451, "learning_rate": 1.472986968473955e-06, "loss": 0.2771, "step": 27568 }, { "epoch": 1.2914695273340517, "grad_norm": 0.5735899815211293, "learning_rate": 1.4728140630002765e-06, "loss": 0.2791, "step": 27569 }, { "epoch": 1.29151637232398, "grad_norm": 0.5803430000160277, "learning_rate": 1.4726411634377951e-06, "loss": 0.2784, "step": 27570 }, { "epoch": 1.2915632173139082, "grad_norm": 0.559600946270327, "learning_rate": 1.4724682697875065e-06, "loss": 0.2722, "step": 27571 }, { "epoch": 1.2916100623038367, "grad_norm": 0.5862291657560978, "learning_rate": 1.4722953820504047e-06, "loss": 0.274, "step": 27572 }, { "epoch": 1.291656907293765, "grad_norm": 0.6071334602213591, "learning_rate": 1.472122500227484e-06, "loss": 0.2831, "step": 27573 }, { "epoch": 1.2917037522836932, "grad_norm": 0.6063497005888908, "learning_rate": 1.4719496243197395e-06, "loss": 0.2822, "step": 27574 }, { "epoch": 1.2917505972736216, "grad_norm": 0.6117155105262894, "learning_rate": 1.4717767543281674e-06, "loss": 0.2841, "step": 27575 }, { "epoch": 1.2917974422635499, "grad_norm": 0.589021169850765, "learning_rate": 1.471603890253761e-06, "loss": 0.2695, "step": 27576 }, { "epoch": 1.291844287253478, "grad_norm": 0.5822088626637973, "learning_rate": 1.4714310320975159e-06, "loss": 0.2708, "step": 27577 }, { "epoch": 1.2918911322434066, "grad_norm": 0.5989291478301053, "learning_rate": 1.4712581798604275e-06, "loss": 0.2659, "step": 27578 }, { "epoch": 1.291937977233335, "grad_norm": 0.5735797705715432, "learning_rate": 1.4710853335434894e-06, "loss": 0.2672, "step": 27579 }, { "epoch": 1.2919848222232633, "grad_norm": 0.611789633448181, "learning_rate": 1.470912493147696e-06, "loss": 0.283, "step": 27580 }, { "epoch": 1.2920316672131915, "grad_norm": 0.5592603375679303, "learning_rate": 1.4707396586740424e-06, "loss": 0.2681, "step": 27581 }, { "epoch": 1.29207851220312, "grad_norm": 0.5609405347777896, "learning_rate": 1.4705668301235242e-06, "loss": 0.2652, "step": 27582 }, { "epoch": 1.2921253571930482, "grad_norm": 0.6149876606443955, "learning_rate": 1.4703940074971345e-06, "loss": 0.2819, "step": 27583 }, { "epoch": 1.2921722021829765, "grad_norm": 0.5884710549741751, "learning_rate": 1.4702211907958697e-06, "loss": 0.2692, "step": 27584 }, { "epoch": 1.292219047172905, "grad_norm": 0.5694320250475546, "learning_rate": 1.4700483800207217e-06, "loss": 0.2625, "step": 27585 }, { "epoch": 1.2922658921628332, "grad_norm": 0.5971958546665053, "learning_rate": 1.4698755751726873e-06, "loss": 0.2789, "step": 27586 }, { "epoch": 1.2923127371527614, "grad_norm": 0.591189761900104, "learning_rate": 1.4697027762527593e-06, "loss": 0.268, "step": 27587 }, { "epoch": 1.2923595821426899, "grad_norm": 0.584324979314272, "learning_rate": 1.4695299832619328e-06, "loss": 0.253, "step": 27588 }, { "epoch": 1.292406427132618, "grad_norm": 0.6259575346761218, "learning_rate": 1.469357196201203e-06, "loss": 0.2807, "step": 27589 }, { "epoch": 1.2924532721225466, "grad_norm": 0.5830765018093115, "learning_rate": 1.4691844150715639e-06, "loss": 0.2615, "step": 27590 }, { "epoch": 1.2925001171124748, "grad_norm": 0.6004322324713888, "learning_rate": 1.4690116398740084e-06, "loss": 0.2777, "step": 27591 }, { "epoch": 1.2925469621024033, "grad_norm": 0.5927008681292272, "learning_rate": 1.4688388706095317e-06, "loss": 0.2709, "step": 27592 }, { "epoch": 1.2925938070923315, "grad_norm": 0.6203571858653864, "learning_rate": 1.4686661072791287e-06, "loss": 0.2809, "step": 27593 }, { "epoch": 1.2926406520822598, "grad_norm": 0.6117552983189918, "learning_rate": 1.4684933498837922e-06, "loss": 0.2851, "step": 27594 }, { "epoch": 1.2926874970721882, "grad_norm": 0.6469420781742833, "learning_rate": 1.4683205984245183e-06, "loss": 0.3033, "step": 27595 }, { "epoch": 1.2927343420621165, "grad_norm": 0.5742424018568628, "learning_rate": 1.4681478529022996e-06, "loss": 0.266, "step": 27596 }, { "epoch": 1.2927811870520447, "grad_norm": 0.6861039136832642, "learning_rate": 1.4679751133181308e-06, "loss": 0.2995, "step": 27597 }, { "epoch": 1.2928280320419732, "grad_norm": 0.5663563141143082, "learning_rate": 1.4678023796730056e-06, "loss": 0.2731, "step": 27598 }, { "epoch": 1.2928748770319014, "grad_norm": 0.6181492714035051, "learning_rate": 1.4676296519679183e-06, "loss": 0.2865, "step": 27599 }, { "epoch": 1.2929217220218296, "grad_norm": 0.5397024055229772, "learning_rate": 1.4674569302038628e-06, "loss": 0.2511, "step": 27600 }, { "epoch": 1.292968567011758, "grad_norm": 0.5618824144342051, "learning_rate": 1.467284214381833e-06, "loss": 0.2718, "step": 27601 }, { "epoch": 1.2930154120016863, "grad_norm": 0.5437796179200569, "learning_rate": 1.4671115045028239e-06, "loss": 0.2643, "step": 27602 }, { "epoch": 1.2930622569916148, "grad_norm": 0.5657299826347113, "learning_rate": 1.466938800567828e-06, "loss": 0.2788, "step": 27603 }, { "epoch": 1.293109101981543, "grad_norm": 0.6302159723301134, "learning_rate": 1.4667661025778393e-06, "loss": 0.2869, "step": 27604 }, { "epoch": 1.2931559469714715, "grad_norm": 0.5488901587714218, "learning_rate": 1.466593410533852e-06, "loss": 0.2628, "step": 27605 }, { "epoch": 1.2932027919613998, "grad_norm": 0.5994499763120861, "learning_rate": 1.46642072443686e-06, "loss": 0.2705, "step": 27606 }, { "epoch": 1.293249636951328, "grad_norm": 0.5736739436212254, "learning_rate": 1.4662480442878568e-06, "loss": 0.2718, "step": 27607 }, { "epoch": 1.2932964819412565, "grad_norm": 0.6058419936567712, "learning_rate": 1.4660753700878372e-06, "loss": 0.2754, "step": 27608 }, { "epoch": 1.2933433269311847, "grad_norm": 0.5894527403656131, "learning_rate": 1.465902701837793e-06, "loss": 0.2769, "step": 27609 }, { "epoch": 1.293390171921113, "grad_norm": 0.6059845821747274, "learning_rate": 1.4657300395387195e-06, "loss": 0.2761, "step": 27610 }, { "epoch": 1.2934370169110414, "grad_norm": 0.5443001178657169, "learning_rate": 1.465557383191609e-06, "loss": 0.256, "step": 27611 }, { "epoch": 1.2934838619009696, "grad_norm": 0.621603264934878, "learning_rate": 1.4653847327974558e-06, "loss": 0.2923, "step": 27612 }, { "epoch": 1.2935307068908979, "grad_norm": 0.5688852586160987, "learning_rate": 1.4652120883572537e-06, "loss": 0.2775, "step": 27613 }, { "epoch": 1.2935775518808263, "grad_norm": 0.5941218637645621, "learning_rate": 1.4650394498719967e-06, "loss": 0.2856, "step": 27614 }, { "epoch": 1.2936243968707548, "grad_norm": 0.5512321143673815, "learning_rate": 1.4648668173426765e-06, "loss": 0.2668, "step": 27615 }, { "epoch": 1.293671241860683, "grad_norm": 0.6391014136734996, "learning_rate": 1.4646941907702876e-06, "loss": 0.279, "step": 27616 }, { "epoch": 1.2937180868506113, "grad_norm": 0.5981910068107914, "learning_rate": 1.4645215701558236e-06, "loss": 0.2789, "step": 27617 }, { "epoch": 1.2937649318405398, "grad_norm": 0.5959735965321036, "learning_rate": 1.4643489555002777e-06, "loss": 0.2632, "step": 27618 }, { "epoch": 1.293811776830468, "grad_norm": 0.5749725272484371, "learning_rate": 1.4641763468046426e-06, "loss": 0.2716, "step": 27619 }, { "epoch": 1.2938586218203962, "grad_norm": 0.545207681803951, "learning_rate": 1.4640037440699134e-06, "loss": 0.262, "step": 27620 }, { "epoch": 1.2939054668103247, "grad_norm": 0.6110524871306252, "learning_rate": 1.463831147297083e-06, "loss": 0.2845, "step": 27621 }, { "epoch": 1.293952311800253, "grad_norm": 0.5736384587796282, "learning_rate": 1.4636585564871423e-06, "loss": 0.2693, "step": 27622 }, { "epoch": 1.2939991567901812, "grad_norm": 0.5844768609816504, "learning_rate": 1.4634859716410863e-06, "loss": 0.2654, "step": 27623 }, { "epoch": 1.2940460017801096, "grad_norm": 0.6083167665465654, "learning_rate": 1.4633133927599085e-06, "loss": 0.2808, "step": 27624 }, { "epoch": 1.2940928467700379, "grad_norm": 0.5941153994134127, "learning_rate": 1.4631408198446013e-06, "loss": 0.2696, "step": 27625 }, { "epoch": 1.2941396917599663, "grad_norm": 0.6164480893947863, "learning_rate": 1.4629682528961575e-06, "loss": 0.2652, "step": 27626 }, { "epoch": 1.2941865367498946, "grad_norm": 0.5817215421262889, "learning_rate": 1.4627956919155722e-06, "loss": 0.272, "step": 27627 }, { "epoch": 1.294233381739823, "grad_norm": 0.5814350220373046, "learning_rate": 1.4626231369038365e-06, "loss": 0.2699, "step": 27628 }, { "epoch": 1.2942802267297513, "grad_norm": 0.5601053882590222, "learning_rate": 1.4624505878619432e-06, "loss": 0.2676, "step": 27629 }, { "epoch": 1.2943270717196795, "grad_norm": 0.5679091878089126, "learning_rate": 1.4622780447908862e-06, "loss": 0.2598, "step": 27630 }, { "epoch": 1.294373916709608, "grad_norm": 0.5891258448415921, "learning_rate": 1.462105507691659e-06, "loss": 0.2708, "step": 27631 }, { "epoch": 1.2944207616995362, "grad_norm": 0.5847202580815205, "learning_rate": 1.461932976565253e-06, "loss": 0.2828, "step": 27632 }, { "epoch": 1.2944676066894645, "grad_norm": 0.5813335402679708, "learning_rate": 1.4617604514126627e-06, "loss": 0.2765, "step": 27633 }, { "epoch": 1.294514451679393, "grad_norm": 0.5364391819696017, "learning_rate": 1.4615879322348788e-06, "loss": 0.2627, "step": 27634 }, { "epoch": 1.2945612966693212, "grad_norm": 0.6031983266208569, "learning_rate": 1.461415419032896e-06, "loss": 0.2791, "step": 27635 }, { "epoch": 1.2946081416592494, "grad_norm": 0.6019720313286483, "learning_rate": 1.4612429118077063e-06, "loss": 0.2761, "step": 27636 }, { "epoch": 1.2946549866491779, "grad_norm": 0.6078009179687358, "learning_rate": 1.4610704105603024e-06, "loss": 0.2856, "step": 27637 }, { "epoch": 1.2947018316391061, "grad_norm": 0.6021171636848706, "learning_rate": 1.4608979152916775e-06, "loss": 0.2934, "step": 27638 }, { "epoch": 1.2947486766290346, "grad_norm": 0.6338062588896843, "learning_rate": 1.4607254260028244e-06, "loss": 0.2928, "step": 27639 }, { "epoch": 1.2947955216189628, "grad_norm": 0.5938096836531555, "learning_rate": 1.4605529426947345e-06, "loss": 0.2741, "step": 27640 }, { "epoch": 1.2948423666088913, "grad_norm": 0.6346485798923902, "learning_rate": 1.4603804653684009e-06, "loss": 0.2836, "step": 27641 }, { "epoch": 1.2948892115988195, "grad_norm": 0.565068721620488, "learning_rate": 1.460207994024817e-06, "loss": 0.2638, "step": 27642 }, { "epoch": 1.2949360565887478, "grad_norm": 0.6090577654198017, "learning_rate": 1.4600355286649741e-06, "loss": 0.2745, "step": 27643 }, { "epoch": 1.2949829015786762, "grad_norm": 0.5697421523511644, "learning_rate": 1.4598630692898663e-06, "loss": 0.2689, "step": 27644 }, { "epoch": 1.2950297465686045, "grad_norm": 0.5928469504718281, "learning_rate": 1.4596906159004849e-06, "loss": 0.2771, "step": 27645 }, { "epoch": 1.2950765915585327, "grad_norm": 0.6240751125729452, "learning_rate": 1.4595181684978227e-06, "loss": 0.2772, "step": 27646 }, { "epoch": 1.2951234365484612, "grad_norm": 0.5693237011571073, "learning_rate": 1.4593457270828709e-06, "loss": 0.2757, "step": 27647 }, { "epoch": 1.2951702815383894, "grad_norm": 0.6259761050790782, "learning_rate": 1.459173291656623e-06, "loss": 0.2821, "step": 27648 }, { "epoch": 1.2952171265283177, "grad_norm": 0.6251993679495786, "learning_rate": 1.459000862220072e-06, "loss": 0.2858, "step": 27649 }, { "epoch": 1.2952639715182461, "grad_norm": 0.6051037330367535, "learning_rate": 1.4588284387742097e-06, "loss": 0.275, "step": 27650 }, { "epoch": 1.2953108165081746, "grad_norm": 0.6126145906167023, "learning_rate": 1.458656021320028e-06, "loss": 0.2755, "step": 27651 }, { "epoch": 1.2953576614981028, "grad_norm": 0.6339867033887406, "learning_rate": 1.4584836098585181e-06, "loss": 0.2682, "step": 27652 }, { "epoch": 1.295404506488031, "grad_norm": 0.5962409651811265, "learning_rate": 1.4583112043906737e-06, "loss": 0.2809, "step": 27653 }, { "epoch": 1.2954513514779595, "grad_norm": 0.5460515983875535, "learning_rate": 1.458138804917486e-06, "loss": 0.2474, "step": 27654 }, { "epoch": 1.2954981964678878, "grad_norm": 0.5692917588309722, "learning_rate": 1.4579664114399481e-06, "loss": 0.2695, "step": 27655 }, { "epoch": 1.295545041457816, "grad_norm": 0.556135547894557, "learning_rate": 1.4577940239590512e-06, "loss": 0.2607, "step": 27656 }, { "epoch": 1.2955918864477445, "grad_norm": 0.5864643942130131, "learning_rate": 1.4576216424757888e-06, "loss": 0.2689, "step": 27657 }, { "epoch": 1.2956387314376727, "grad_norm": 0.6193124288134081, "learning_rate": 1.4574492669911522e-06, "loss": 0.2803, "step": 27658 }, { "epoch": 1.295685576427601, "grad_norm": 0.5927587854218368, "learning_rate": 1.4572768975061318e-06, "loss": 0.2773, "step": 27659 }, { "epoch": 1.2957324214175294, "grad_norm": 0.5859291155758158, "learning_rate": 1.4571045340217208e-06, "loss": 0.2778, "step": 27660 }, { "epoch": 1.2957792664074577, "grad_norm": 0.5824810826841231, "learning_rate": 1.456932176538911e-06, "loss": 0.2664, "step": 27661 }, { "epoch": 1.2958261113973861, "grad_norm": 0.5884071128865221, "learning_rate": 1.4567598250586945e-06, "loss": 0.278, "step": 27662 }, { "epoch": 1.2958729563873144, "grad_norm": 0.6104491625390182, "learning_rate": 1.4565874795820638e-06, "loss": 0.2655, "step": 27663 }, { "epoch": 1.2959198013772428, "grad_norm": 0.5839675460685518, "learning_rate": 1.456415140110009e-06, "loss": 0.2621, "step": 27664 }, { "epoch": 1.295966646367171, "grad_norm": 0.6077870984649897, "learning_rate": 1.4562428066435242e-06, "loss": 0.2774, "step": 27665 }, { "epoch": 1.2960134913570993, "grad_norm": 0.5556560926085244, "learning_rate": 1.456070479183598e-06, "loss": 0.2609, "step": 27666 }, { "epoch": 1.2960603363470278, "grad_norm": 0.6107125301923464, "learning_rate": 1.4558981577312242e-06, "loss": 0.2674, "step": 27667 }, { "epoch": 1.296107181336956, "grad_norm": 0.615291067762554, "learning_rate": 1.4557258422873939e-06, "loss": 0.2958, "step": 27668 }, { "epoch": 1.2961540263268843, "grad_norm": 0.5935523642525965, "learning_rate": 1.4555535328531001e-06, "loss": 0.2908, "step": 27669 }, { "epoch": 1.2962008713168127, "grad_norm": 0.5541320201423899, "learning_rate": 1.455381229429332e-06, "loss": 0.2606, "step": 27670 }, { "epoch": 1.296247716306741, "grad_norm": 0.6082276764405095, "learning_rate": 1.4552089320170837e-06, "loss": 0.2689, "step": 27671 }, { "epoch": 1.2962945612966692, "grad_norm": 0.5993685752056497, "learning_rate": 1.4550366406173438e-06, "loss": 0.2765, "step": 27672 }, { "epoch": 1.2963414062865977, "grad_norm": 0.6075425148678859, "learning_rate": 1.454864355231106e-06, "loss": 0.2755, "step": 27673 }, { "epoch": 1.296388251276526, "grad_norm": 0.5781317212926885, "learning_rate": 1.4546920758593608e-06, "loss": 0.2764, "step": 27674 }, { "epoch": 1.2964350962664544, "grad_norm": 0.614860228204433, "learning_rate": 1.4545198025031012e-06, "loss": 0.28, "step": 27675 }, { "epoch": 1.2964819412563826, "grad_norm": 0.622487882741663, "learning_rate": 1.4543475351633164e-06, "loss": 0.2693, "step": 27676 }, { "epoch": 1.296528786246311, "grad_norm": 0.5690594955735987, "learning_rate": 1.4541752738409987e-06, "loss": 0.2556, "step": 27677 }, { "epoch": 1.2965756312362393, "grad_norm": 0.6054238736052412, "learning_rate": 1.4540030185371401e-06, "loss": 0.2778, "step": 27678 }, { "epoch": 1.2966224762261676, "grad_norm": 0.6101286606994611, "learning_rate": 1.4538307692527304e-06, "loss": 0.2815, "step": 27679 }, { "epoch": 1.296669321216096, "grad_norm": 0.5977291839377793, "learning_rate": 1.4536585259887616e-06, "loss": 0.2745, "step": 27680 }, { "epoch": 1.2967161662060243, "grad_norm": 0.6260976927937503, "learning_rate": 1.4534862887462265e-06, "loss": 0.2697, "step": 27681 }, { "epoch": 1.2967630111959525, "grad_norm": 0.6030806955038774, "learning_rate": 1.4533140575261131e-06, "loss": 0.2759, "step": 27682 }, { "epoch": 1.296809856185881, "grad_norm": 0.6044890759841198, "learning_rate": 1.4531418323294143e-06, "loss": 0.2939, "step": 27683 }, { "epoch": 1.2968567011758092, "grad_norm": 0.5786965836735632, "learning_rate": 1.4529696131571217e-06, "loss": 0.2613, "step": 27684 }, { "epoch": 1.2969035461657374, "grad_norm": 0.6010136327645056, "learning_rate": 1.4527974000102262e-06, "loss": 0.2829, "step": 27685 }, { "epoch": 1.296950391155666, "grad_norm": 0.6384430177419302, "learning_rate": 1.4526251928897179e-06, "loss": 0.2865, "step": 27686 }, { "epoch": 1.2969972361455941, "grad_norm": 0.5760692512103524, "learning_rate": 1.4524529917965882e-06, "loss": 0.2699, "step": 27687 }, { "epoch": 1.2970440811355226, "grad_norm": 0.6051085509875783, "learning_rate": 1.4522807967318291e-06, "loss": 0.2717, "step": 27688 }, { "epoch": 1.2970909261254508, "grad_norm": 0.5834824730218842, "learning_rate": 1.4521086076964303e-06, "loss": 0.2653, "step": 27689 }, { "epoch": 1.2971377711153793, "grad_norm": 0.6198839564248965, "learning_rate": 1.4519364246913826e-06, "loss": 0.2753, "step": 27690 }, { "epoch": 1.2971846161053076, "grad_norm": 0.5727844235072612, "learning_rate": 1.451764247717677e-06, "loss": 0.2741, "step": 27691 }, { "epoch": 1.2972314610952358, "grad_norm": 0.6235917341117274, "learning_rate": 1.4515920767763062e-06, "loss": 0.2841, "step": 27692 }, { "epoch": 1.2972783060851643, "grad_norm": 0.6044197045474136, "learning_rate": 1.4514199118682582e-06, "loss": 0.2638, "step": 27693 }, { "epoch": 1.2973251510750925, "grad_norm": 0.6032184935642209, "learning_rate": 1.4512477529945263e-06, "loss": 0.2744, "step": 27694 }, { "epoch": 1.2973719960650207, "grad_norm": 0.5857863327951777, "learning_rate": 1.4510756001560988e-06, "loss": 0.2799, "step": 27695 }, { "epoch": 1.2974188410549492, "grad_norm": 0.638101803770823, "learning_rate": 1.450903453353968e-06, "loss": 0.2814, "step": 27696 }, { "epoch": 1.2974656860448774, "grad_norm": 0.6048512519563495, "learning_rate": 1.4507313125891237e-06, "loss": 0.2631, "step": 27697 }, { "epoch": 1.297512531034806, "grad_norm": 0.5820746914300596, "learning_rate": 1.4505591778625571e-06, "loss": 0.2773, "step": 27698 }, { "epoch": 1.2975593760247341, "grad_norm": 0.5498279214143235, "learning_rate": 1.4503870491752594e-06, "loss": 0.2758, "step": 27699 }, { "epoch": 1.2976062210146626, "grad_norm": 0.5658142099482645, "learning_rate": 1.4502149265282208e-06, "loss": 0.2613, "step": 27700 }, { "epoch": 1.2976530660045908, "grad_norm": 0.5712360371525356, "learning_rate": 1.4500428099224304e-06, "loss": 0.2681, "step": 27701 }, { "epoch": 1.297699910994519, "grad_norm": 0.5961141410436395, "learning_rate": 1.4498706993588796e-06, "loss": 0.2571, "step": 27702 }, { "epoch": 1.2977467559844476, "grad_norm": 0.5914440482848139, "learning_rate": 1.4496985948385589e-06, "loss": 0.2741, "step": 27703 }, { "epoch": 1.2977936009743758, "grad_norm": 0.5513505732194668, "learning_rate": 1.449526496362459e-06, "loss": 0.2566, "step": 27704 }, { "epoch": 1.297840445964304, "grad_norm": 0.5662340656469299, "learning_rate": 1.449354403931571e-06, "loss": 0.2631, "step": 27705 }, { "epoch": 1.2978872909542325, "grad_norm": 0.5814122509337107, "learning_rate": 1.4491823175468834e-06, "loss": 0.2733, "step": 27706 }, { "epoch": 1.2979341359441607, "grad_norm": 0.6164918694606929, "learning_rate": 1.4490102372093884e-06, "loss": 0.2748, "step": 27707 }, { "epoch": 1.297980980934089, "grad_norm": 0.6295720387582336, "learning_rate": 1.4488381629200742e-06, "loss": 0.2735, "step": 27708 }, { "epoch": 1.2980278259240174, "grad_norm": 0.5679083401610839, "learning_rate": 1.4486660946799324e-06, "loss": 0.2654, "step": 27709 }, { "epoch": 1.2980746709139457, "grad_norm": 0.6014000155001181, "learning_rate": 1.448494032489953e-06, "loss": 0.2839, "step": 27710 }, { "epoch": 1.2981215159038741, "grad_norm": 0.6055020282401927, "learning_rate": 1.4483219763511261e-06, "loss": 0.2789, "step": 27711 }, { "epoch": 1.2981683608938024, "grad_norm": 0.6117224840357133, "learning_rate": 1.4481499262644426e-06, "loss": 0.2785, "step": 27712 }, { "epoch": 1.2982152058837308, "grad_norm": 0.6068391189488844, "learning_rate": 1.4479778822308916e-06, "loss": 0.2746, "step": 27713 }, { "epoch": 1.298262050873659, "grad_norm": 0.582051319208891, "learning_rate": 1.4478058442514637e-06, "loss": 0.2752, "step": 27714 }, { "epoch": 1.2983088958635873, "grad_norm": 0.5865826530753034, "learning_rate": 1.4476338123271482e-06, "loss": 0.2833, "step": 27715 }, { "epoch": 1.2983557408535158, "grad_norm": 0.639626011564291, "learning_rate": 1.4474617864589351e-06, "loss": 0.2952, "step": 27716 }, { "epoch": 1.298402585843444, "grad_norm": 0.5978833360568995, "learning_rate": 1.4472897666478153e-06, "loss": 0.281, "step": 27717 }, { "epoch": 1.2984494308333723, "grad_norm": 0.5602267109721171, "learning_rate": 1.4471177528947795e-06, "loss": 0.2693, "step": 27718 }, { "epoch": 1.2984962758233007, "grad_norm": 0.5518211166943968, "learning_rate": 1.446945745200815e-06, "loss": 0.2752, "step": 27719 }, { "epoch": 1.298543120813229, "grad_norm": 0.6010262707555872, "learning_rate": 1.4467737435669143e-06, "loss": 0.2756, "step": 27720 }, { "epoch": 1.2985899658031572, "grad_norm": 0.5236819141291021, "learning_rate": 1.4466017479940647e-06, "loss": 0.2554, "step": 27721 }, { "epoch": 1.2986368107930857, "grad_norm": 0.6365786576178353, "learning_rate": 1.4464297584832577e-06, "loss": 0.2766, "step": 27722 }, { "epoch": 1.298683655783014, "grad_norm": 0.5728414273443795, "learning_rate": 1.4462577750354823e-06, "loss": 0.2584, "step": 27723 }, { "epoch": 1.2987305007729424, "grad_norm": 0.6081029515007632, "learning_rate": 1.4460857976517294e-06, "loss": 0.2802, "step": 27724 }, { "epoch": 1.2987773457628706, "grad_norm": 0.6194543886908355, "learning_rate": 1.4459138263329873e-06, "loss": 0.2777, "step": 27725 }, { "epoch": 1.298824190752799, "grad_norm": 0.6023712993626043, "learning_rate": 1.4457418610802459e-06, "loss": 0.2724, "step": 27726 }, { "epoch": 1.2988710357427273, "grad_norm": 0.588568894377646, "learning_rate": 1.4455699018944964e-06, "loss": 0.277, "step": 27727 }, { "epoch": 1.2989178807326556, "grad_norm": 0.6624126449035779, "learning_rate": 1.4453979487767255e-06, "loss": 0.2775, "step": 27728 }, { "epoch": 1.298964725722584, "grad_norm": 0.6214489489744485, "learning_rate": 1.4452260017279248e-06, "loss": 0.305, "step": 27729 }, { "epoch": 1.2990115707125123, "grad_norm": 0.5798564417322181, "learning_rate": 1.4450540607490843e-06, "loss": 0.2685, "step": 27730 }, { "epoch": 1.2990584157024405, "grad_norm": 0.5869182665952056, "learning_rate": 1.4448821258411916e-06, "loss": 0.2838, "step": 27731 }, { "epoch": 1.299105260692369, "grad_norm": 0.5919564001858949, "learning_rate": 1.444710197005237e-06, "loss": 0.2576, "step": 27732 }, { "epoch": 1.2991521056822972, "grad_norm": 0.5876732381754662, "learning_rate": 1.4445382742422097e-06, "loss": 0.2757, "step": 27733 }, { "epoch": 1.2991989506722257, "grad_norm": 0.5864308493729099, "learning_rate": 1.4443663575531008e-06, "loss": 0.2617, "step": 27734 }, { "epoch": 1.299245795662154, "grad_norm": 0.5987666773923225, "learning_rate": 1.444194446938897e-06, "loss": 0.2625, "step": 27735 }, { "epoch": 1.2992926406520824, "grad_norm": 0.5512854134053596, "learning_rate": 1.4440225424005897e-06, "loss": 0.2599, "step": 27736 }, { "epoch": 1.2993394856420106, "grad_norm": 0.6722494777969632, "learning_rate": 1.4438506439391665e-06, "loss": 0.2815, "step": 27737 }, { "epoch": 1.2993863306319389, "grad_norm": 0.6263980121331653, "learning_rate": 1.4436787515556171e-06, "loss": 0.2837, "step": 27738 }, { "epoch": 1.2994331756218673, "grad_norm": 0.596064484635121, "learning_rate": 1.443506865250931e-06, "loss": 0.2676, "step": 27739 }, { "epoch": 1.2994800206117956, "grad_norm": 0.6232592673366182, "learning_rate": 1.4433349850260975e-06, "loss": 0.278, "step": 27740 }, { "epoch": 1.2995268656017238, "grad_norm": 0.554590848605432, "learning_rate": 1.4431631108821065e-06, "loss": 0.265, "step": 27741 }, { "epoch": 1.2995737105916523, "grad_norm": 0.6079454770994477, "learning_rate": 1.4429912428199463e-06, "loss": 0.2632, "step": 27742 }, { "epoch": 1.2996205555815805, "grad_norm": 0.6016841143916898, "learning_rate": 1.442819380840605e-06, "loss": 0.2669, "step": 27743 }, { "epoch": 1.2996674005715088, "grad_norm": 0.6274682074428429, "learning_rate": 1.442647524945072e-06, "loss": 0.2858, "step": 27744 }, { "epoch": 1.2997142455614372, "grad_norm": 0.6343670760784683, "learning_rate": 1.4424756751343368e-06, "loss": 0.2798, "step": 27745 }, { "epoch": 1.2997610905513655, "grad_norm": 0.5871855580256081, "learning_rate": 1.4423038314093884e-06, "loss": 0.2817, "step": 27746 }, { "epoch": 1.299807935541294, "grad_norm": 0.6328027953646946, "learning_rate": 1.442131993771216e-06, "loss": 0.2772, "step": 27747 }, { "epoch": 1.2998547805312222, "grad_norm": 0.5986422345702656, "learning_rate": 1.4419601622208084e-06, "loss": 0.2721, "step": 27748 }, { "epoch": 1.2999016255211506, "grad_norm": 0.6142185688923133, "learning_rate": 1.4417883367591545e-06, "loss": 0.2983, "step": 27749 }, { "epoch": 1.2999484705110789, "grad_norm": 0.6695697139756387, "learning_rate": 1.4416165173872418e-06, "loss": 0.2878, "step": 27750 }, { "epoch": 1.299995315501007, "grad_norm": 0.5812985980560689, "learning_rate": 1.4414447041060598e-06, "loss": 0.2795, "step": 27751 }, { "epoch": 1.3000421604909356, "grad_norm": 0.5490717687496144, "learning_rate": 1.4412728969165979e-06, "loss": 0.2584, "step": 27752 }, { "epoch": 1.3000890054808638, "grad_norm": 0.5573242886312808, "learning_rate": 1.441101095819844e-06, "loss": 0.2805, "step": 27753 }, { "epoch": 1.300135850470792, "grad_norm": 0.5638247341053707, "learning_rate": 1.4409293008167882e-06, "loss": 0.2631, "step": 27754 }, { "epoch": 1.3001826954607205, "grad_norm": 0.5928200118558713, "learning_rate": 1.4407575119084172e-06, "loss": 0.2806, "step": 27755 }, { "epoch": 1.3002295404506488, "grad_norm": 0.58786716425952, "learning_rate": 1.4405857290957215e-06, "loss": 0.2726, "step": 27756 }, { "epoch": 1.300276385440577, "grad_norm": 0.578377113081704, "learning_rate": 1.4404139523796872e-06, "loss": 0.2685, "step": 27757 }, { "epoch": 1.3003232304305055, "grad_norm": 0.5745656039327495, "learning_rate": 1.440242181761305e-06, "loss": 0.2614, "step": 27758 }, { "epoch": 1.3003700754204337, "grad_norm": 0.5991140954075251, "learning_rate": 1.4400704172415623e-06, "loss": 0.2704, "step": 27759 }, { "epoch": 1.3004169204103622, "grad_norm": 0.5813062165601672, "learning_rate": 1.439898658821448e-06, "loss": 0.2646, "step": 27760 }, { "epoch": 1.3004637654002904, "grad_norm": 0.6088745749314893, "learning_rate": 1.4397269065019514e-06, "loss": 0.2754, "step": 27761 }, { "epoch": 1.3005106103902189, "grad_norm": 0.5949790198466018, "learning_rate": 1.4395551602840603e-06, "loss": 0.2657, "step": 27762 }, { "epoch": 1.300557455380147, "grad_norm": 0.567212086782185, "learning_rate": 1.4393834201687613e-06, "loss": 0.2656, "step": 27763 }, { "epoch": 1.3006043003700754, "grad_norm": 0.5998524809239667, "learning_rate": 1.439211686157044e-06, "loss": 0.2757, "step": 27764 }, { "epoch": 1.3006511453600038, "grad_norm": 0.5954214526181527, "learning_rate": 1.439039958249897e-06, "loss": 0.2664, "step": 27765 }, { "epoch": 1.300697990349932, "grad_norm": 0.6101296906428378, "learning_rate": 1.4388682364483086e-06, "loss": 0.2756, "step": 27766 }, { "epoch": 1.3007448353398603, "grad_norm": 0.5679143500189542, "learning_rate": 1.4386965207532676e-06, "loss": 0.2668, "step": 27767 }, { "epoch": 1.3007916803297888, "grad_norm": 0.608808635412202, "learning_rate": 1.4385248111657607e-06, "loss": 0.2834, "step": 27768 }, { "epoch": 1.300838525319717, "grad_norm": 0.5613932165128299, "learning_rate": 1.4383531076867774e-06, "loss": 0.2732, "step": 27769 }, { "epoch": 1.3008853703096455, "grad_norm": 0.6337530426749451, "learning_rate": 1.4381814103173042e-06, "loss": 0.2895, "step": 27770 }, { "epoch": 1.3009322152995737, "grad_norm": 0.5870660739036729, "learning_rate": 1.4380097190583303e-06, "loss": 0.2728, "step": 27771 }, { "epoch": 1.3009790602895022, "grad_norm": 0.5950311200740502, "learning_rate": 1.4378380339108434e-06, "loss": 0.2692, "step": 27772 }, { "epoch": 1.3010259052794304, "grad_norm": 0.6117625105609246, "learning_rate": 1.4376663548758324e-06, "loss": 0.2952, "step": 27773 }, { "epoch": 1.3010727502693586, "grad_norm": 0.5877328594312221, "learning_rate": 1.4374946819542837e-06, "loss": 0.2713, "step": 27774 }, { "epoch": 1.301119595259287, "grad_norm": 0.5896246370113994, "learning_rate": 1.437323015147186e-06, "loss": 0.2707, "step": 27775 }, { "epoch": 1.3011664402492154, "grad_norm": 0.5507660884680802, "learning_rate": 1.4371513544555285e-06, "loss": 0.2513, "step": 27776 }, { "epoch": 1.3012132852391436, "grad_norm": 0.6423518992031054, "learning_rate": 1.4369796998802965e-06, "loss": 0.294, "step": 27777 }, { "epoch": 1.301260130229072, "grad_norm": 0.6315864760127947, "learning_rate": 1.4368080514224793e-06, "loss": 0.28, "step": 27778 }, { "epoch": 1.3013069752190003, "grad_norm": 0.5970668346257095, "learning_rate": 1.4366364090830655e-06, "loss": 0.2732, "step": 27779 }, { "epoch": 1.3013538202089285, "grad_norm": 0.6373549352030181, "learning_rate": 1.436464772863041e-06, "loss": 0.2766, "step": 27780 }, { "epoch": 1.301400665198857, "grad_norm": 0.5857932133667486, "learning_rate": 1.4362931427633944e-06, "loss": 0.2765, "step": 27781 }, { "epoch": 1.3014475101887852, "grad_norm": 0.5262714232418337, "learning_rate": 1.4361215187851136e-06, "loss": 0.2538, "step": 27782 }, { "epoch": 1.3014943551787137, "grad_norm": 0.6311296420254493, "learning_rate": 1.4359499009291867e-06, "loss": 0.2929, "step": 27783 }, { "epoch": 1.301541200168642, "grad_norm": 0.6407429949290805, "learning_rate": 1.4357782891966e-06, "loss": 0.2789, "step": 27784 }, { "epoch": 1.3015880451585704, "grad_norm": 0.5586917228979101, "learning_rate": 1.4356066835883426e-06, "loss": 0.2542, "step": 27785 }, { "epoch": 1.3016348901484986, "grad_norm": 0.6166898722313122, "learning_rate": 1.4354350841054e-06, "loss": 0.2803, "step": 27786 }, { "epoch": 1.3016817351384269, "grad_norm": 0.626572441782757, "learning_rate": 1.4352634907487612e-06, "loss": 0.278, "step": 27787 }, { "epoch": 1.3017285801283554, "grad_norm": 0.5740020975591081, "learning_rate": 1.4350919035194133e-06, "loss": 0.2568, "step": 27788 }, { "epoch": 1.3017754251182836, "grad_norm": 0.5633102777532749, "learning_rate": 1.434920322418344e-06, "loss": 0.2753, "step": 27789 }, { "epoch": 1.3018222701082118, "grad_norm": 0.54813288356251, "learning_rate": 1.434748747446541e-06, "loss": 0.271, "step": 27790 }, { "epoch": 1.3018691150981403, "grad_norm": 0.6627601084477189, "learning_rate": 1.4345771786049916e-06, "loss": 0.2843, "step": 27791 }, { "epoch": 1.3019159600880685, "grad_norm": 0.5930220353208937, "learning_rate": 1.434405615894682e-06, "loss": 0.2787, "step": 27792 }, { "epoch": 1.3019628050779968, "grad_norm": 0.5726099438050488, "learning_rate": 1.4342340593166e-06, "loss": 0.2767, "step": 27793 }, { "epoch": 1.3020096500679252, "grad_norm": 0.5592843176243757, "learning_rate": 1.434062508871733e-06, "loss": 0.274, "step": 27794 }, { "epoch": 1.3020564950578535, "grad_norm": 0.5813030393356701, "learning_rate": 1.4338909645610689e-06, "loss": 0.2788, "step": 27795 }, { "epoch": 1.302103340047782, "grad_norm": 0.5961051675359342, "learning_rate": 1.4337194263855936e-06, "loss": 0.2739, "step": 27796 }, { "epoch": 1.3021501850377102, "grad_norm": 0.6117991769410582, "learning_rate": 1.4335478943462965e-06, "loss": 0.277, "step": 27797 }, { "epoch": 1.3021970300276386, "grad_norm": 0.5685568959509975, "learning_rate": 1.4333763684441633e-06, "loss": 0.2615, "step": 27798 }, { "epoch": 1.3022438750175669, "grad_norm": 0.6013703745336572, "learning_rate": 1.4332048486801794e-06, "loss": 0.2784, "step": 27799 }, { "epoch": 1.3022907200074951, "grad_norm": 0.6157470481261149, "learning_rate": 1.433033335055334e-06, "loss": 0.2704, "step": 27800 }, { "epoch": 1.3023375649974236, "grad_norm": 0.6274636206555428, "learning_rate": 1.4328618275706135e-06, "loss": 0.2768, "step": 27801 }, { "epoch": 1.3023844099873518, "grad_norm": 0.597924955479411, "learning_rate": 1.4326903262270047e-06, "loss": 0.2627, "step": 27802 }, { "epoch": 1.30243125497728, "grad_norm": 0.5988131735956111, "learning_rate": 1.4325188310254962e-06, "loss": 0.2784, "step": 27803 }, { "epoch": 1.3024780999672085, "grad_norm": 0.5850499836319222, "learning_rate": 1.4323473419670723e-06, "loss": 0.2747, "step": 27804 }, { "epoch": 1.3025249449571368, "grad_norm": 0.5471677003721395, "learning_rate": 1.4321758590527219e-06, "loss": 0.249, "step": 27805 }, { "epoch": 1.3025717899470652, "grad_norm": 0.5611673012272422, "learning_rate": 1.4320043822834304e-06, "loss": 0.2529, "step": 27806 }, { "epoch": 1.3026186349369935, "grad_norm": 0.6050300570379513, "learning_rate": 1.4318329116601854e-06, "loss": 0.2801, "step": 27807 }, { "epoch": 1.302665479926922, "grad_norm": 0.5330391936291076, "learning_rate": 1.431661447183973e-06, "loss": 0.2597, "step": 27808 }, { "epoch": 1.3027123249168502, "grad_norm": 0.5684226907075424, "learning_rate": 1.4314899888557818e-06, "loss": 0.2613, "step": 27809 }, { "epoch": 1.3027591699067784, "grad_norm": 0.6158983506369393, "learning_rate": 1.4313185366765958e-06, "loss": 0.2796, "step": 27810 }, { "epoch": 1.3028060148967069, "grad_norm": 0.5627550894808993, "learning_rate": 1.4311470906474045e-06, "loss": 0.2402, "step": 27811 }, { "epoch": 1.3028528598866351, "grad_norm": 0.5585427820424202, "learning_rate": 1.4309756507691918e-06, "loss": 0.2582, "step": 27812 }, { "epoch": 1.3028997048765634, "grad_norm": 0.5839839152356177, "learning_rate": 1.4308042170429453e-06, "loss": 0.278, "step": 27813 }, { "epoch": 1.3029465498664918, "grad_norm": 0.5687357077695554, "learning_rate": 1.4306327894696522e-06, "loss": 0.2672, "step": 27814 }, { "epoch": 1.30299339485642, "grad_norm": 0.5641128929458408, "learning_rate": 1.4304613680502995e-06, "loss": 0.2613, "step": 27815 }, { "epoch": 1.3030402398463483, "grad_norm": 0.571390076551263, "learning_rate": 1.430289952785872e-06, "loss": 0.2546, "step": 27816 }, { "epoch": 1.3030870848362768, "grad_norm": 0.5842221727843409, "learning_rate": 1.430118543677357e-06, "loss": 0.281, "step": 27817 }, { "epoch": 1.303133929826205, "grad_norm": 0.6238318528995372, "learning_rate": 1.4299471407257414e-06, "loss": 0.2846, "step": 27818 }, { "epoch": 1.3031807748161335, "grad_norm": 0.5865200516480515, "learning_rate": 1.4297757439320103e-06, "loss": 0.2582, "step": 27819 }, { "epoch": 1.3032276198060617, "grad_norm": 0.6397658702384716, "learning_rate": 1.4296043532971507e-06, "loss": 0.2919, "step": 27820 }, { "epoch": 1.3032744647959902, "grad_norm": 0.6406029055332652, "learning_rate": 1.4294329688221492e-06, "loss": 0.293, "step": 27821 }, { "epoch": 1.3033213097859184, "grad_norm": 0.6158230364517264, "learning_rate": 1.4292615905079926e-06, "loss": 0.2727, "step": 27822 }, { "epoch": 1.3033681547758467, "grad_norm": 0.5484861747352657, "learning_rate": 1.4290902183556656e-06, "loss": 0.2536, "step": 27823 }, { "epoch": 1.3034149997657751, "grad_norm": 0.5857525388572967, "learning_rate": 1.4289188523661554e-06, "loss": 0.2754, "step": 27824 }, { "epoch": 1.3034618447557034, "grad_norm": 0.5814811707738791, "learning_rate": 1.4287474925404488e-06, "loss": 0.2697, "step": 27825 }, { "epoch": 1.3035086897456316, "grad_norm": 0.5860204948987147, "learning_rate": 1.4285761388795302e-06, "loss": 0.2708, "step": 27826 }, { "epoch": 1.30355553473556, "grad_norm": 0.5596888038515095, "learning_rate": 1.4284047913843868e-06, "loss": 0.2613, "step": 27827 }, { "epoch": 1.3036023797254883, "grad_norm": 0.6540985200964856, "learning_rate": 1.4282334500560051e-06, "loss": 0.2986, "step": 27828 }, { "epoch": 1.3036492247154166, "grad_norm": 0.6183674407430422, "learning_rate": 1.4280621148953697e-06, "loss": 0.2784, "step": 27829 }, { "epoch": 1.303696069705345, "grad_norm": 0.5863504727384536, "learning_rate": 1.4278907859034674e-06, "loss": 0.2772, "step": 27830 }, { "epoch": 1.3037429146952733, "grad_norm": 0.607827622674293, "learning_rate": 1.4277194630812841e-06, "loss": 0.2748, "step": 27831 }, { "epoch": 1.3037897596852017, "grad_norm": 0.5830523263190932, "learning_rate": 1.4275481464298068e-06, "loss": 0.2818, "step": 27832 }, { "epoch": 1.30383660467513, "grad_norm": 0.5864223610853564, "learning_rate": 1.4273768359500195e-06, "loss": 0.2763, "step": 27833 }, { "epoch": 1.3038834496650584, "grad_norm": 0.6013351485267644, "learning_rate": 1.42720553164291e-06, "loss": 0.2799, "step": 27834 }, { "epoch": 1.3039302946549867, "grad_norm": 0.566756535376819, "learning_rate": 1.4270342335094618e-06, "loss": 0.2601, "step": 27835 }, { "epoch": 1.303977139644915, "grad_norm": 0.5498415887686615, "learning_rate": 1.4268629415506618e-06, "loss": 0.2665, "step": 27836 }, { "epoch": 1.3040239846348434, "grad_norm": 0.5799370238338726, "learning_rate": 1.4266916557674965e-06, "loss": 0.2797, "step": 27837 }, { "epoch": 1.3040708296247716, "grad_norm": 0.6305017524930692, "learning_rate": 1.4265203761609503e-06, "loss": 0.2974, "step": 27838 }, { "epoch": 1.3041176746146999, "grad_norm": 0.565469190339049, "learning_rate": 1.4263491027320108e-06, "loss": 0.2581, "step": 27839 }, { "epoch": 1.3041645196046283, "grad_norm": 0.6429415207217711, "learning_rate": 1.426177835481662e-06, "loss": 0.2799, "step": 27840 }, { "epoch": 1.3042113645945566, "grad_norm": 0.5910829077863168, "learning_rate": 1.4260065744108892e-06, "loss": 0.2777, "step": 27841 }, { "epoch": 1.304258209584485, "grad_norm": 0.6543980263212701, "learning_rate": 1.4258353195206786e-06, "loss": 0.2788, "step": 27842 }, { "epoch": 1.3043050545744133, "grad_norm": 0.6098944274565417, "learning_rate": 1.4256640708120156e-06, "loss": 0.2767, "step": 27843 }, { "epoch": 1.3043518995643417, "grad_norm": 0.5476131153481338, "learning_rate": 1.4254928282858859e-06, "loss": 0.2538, "step": 27844 }, { "epoch": 1.30439874455427, "grad_norm": 0.6084542806108402, "learning_rate": 1.4253215919432762e-06, "loss": 0.2685, "step": 27845 }, { "epoch": 1.3044455895441982, "grad_norm": 0.5978151707510455, "learning_rate": 1.4251503617851694e-06, "loss": 0.2632, "step": 27846 }, { "epoch": 1.3044924345341267, "grad_norm": 0.6520642009559342, "learning_rate": 1.4249791378125533e-06, "loss": 0.2637, "step": 27847 }, { "epoch": 1.304539279524055, "grad_norm": 0.6039566090975422, "learning_rate": 1.424807920026411e-06, "loss": 0.2516, "step": 27848 }, { "epoch": 1.3045861245139831, "grad_norm": 0.6166355512367764, "learning_rate": 1.4246367084277284e-06, "loss": 0.2901, "step": 27849 }, { "epoch": 1.3046329695039116, "grad_norm": 0.5745144912214439, "learning_rate": 1.4244655030174916e-06, "loss": 0.2769, "step": 27850 }, { "epoch": 1.3046798144938399, "grad_norm": 0.5814279341510511, "learning_rate": 1.4242943037966856e-06, "loss": 0.2647, "step": 27851 }, { "epoch": 1.304726659483768, "grad_norm": 0.5800118801276212, "learning_rate": 1.4241231107662965e-06, "loss": 0.2677, "step": 27852 }, { "epoch": 1.3047735044736966, "grad_norm": 0.6046772605714057, "learning_rate": 1.4239519239273076e-06, "loss": 0.2758, "step": 27853 }, { "epoch": 1.3048203494636248, "grad_norm": 0.5688565001344804, "learning_rate": 1.4237807432807054e-06, "loss": 0.2778, "step": 27854 }, { "epoch": 1.3048671944535533, "grad_norm": 0.6198833799032097, "learning_rate": 1.4236095688274743e-06, "loss": 0.2839, "step": 27855 }, { "epoch": 1.3049140394434815, "grad_norm": 0.6092941959509607, "learning_rate": 1.423438400568599e-06, "loss": 0.2825, "step": 27856 }, { "epoch": 1.30496088443341, "grad_norm": 0.5961279954139814, "learning_rate": 1.4232672385050654e-06, "loss": 0.2767, "step": 27857 }, { "epoch": 1.3050077294233382, "grad_norm": 0.549833988719755, "learning_rate": 1.423096082637859e-06, "loss": 0.2657, "step": 27858 }, { "epoch": 1.3050545744132664, "grad_norm": 0.555719131112854, "learning_rate": 1.4229249329679628e-06, "loss": 0.2685, "step": 27859 }, { "epoch": 1.305101419403195, "grad_norm": 0.5585808275221585, "learning_rate": 1.4227537894963644e-06, "loss": 0.2683, "step": 27860 }, { "epoch": 1.3051482643931231, "grad_norm": 0.6041370543586846, "learning_rate": 1.422582652224046e-06, "loss": 0.2765, "step": 27861 }, { "epoch": 1.3051951093830514, "grad_norm": 0.5656877520553683, "learning_rate": 1.4224115211519934e-06, "loss": 0.2685, "step": 27862 }, { "epoch": 1.3052419543729799, "grad_norm": 0.5880511001653389, "learning_rate": 1.422240396281192e-06, "loss": 0.2783, "step": 27863 }, { "epoch": 1.305288799362908, "grad_norm": 0.5579791888284287, "learning_rate": 1.4220692776126271e-06, "loss": 0.2661, "step": 27864 }, { "epoch": 1.3053356443528363, "grad_norm": 0.5836423697717215, "learning_rate": 1.4218981651472816e-06, "loss": 0.2609, "step": 27865 }, { "epoch": 1.3053824893427648, "grad_norm": 0.6237667216480576, "learning_rate": 1.4217270588861409e-06, "loss": 0.2875, "step": 27866 }, { "epoch": 1.305429334332693, "grad_norm": 0.5747183243199914, "learning_rate": 1.4215559588301914e-06, "loss": 0.2685, "step": 27867 }, { "epoch": 1.3054761793226215, "grad_norm": 0.6306841374191376, "learning_rate": 1.4213848649804152e-06, "loss": 0.2913, "step": 27868 }, { "epoch": 1.3055230243125497, "grad_norm": 0.5700409065532558, "learning_rate": 1.421213777337798e-06, "loss": 0.2601, "step": 27869 }, { "epoch": 1.3055698693024782, "grad_norm": 0.6172443116048058, "learning_rate": 1.4210426959033253e-06, "loss": 0.2601, "step": 27870 }, { "epoch": 1.3056167142924064, "grad_norm": 0.6067684309036909, "learning_rate": 1.4208716206779796e-06, "loss": 0.2726, "step": 27871 }, { "epoch": 1.3056635592823347, "grad_norm": 0.5619870683441184, "learning_rate": 1.4207005516627465e-06, "loss": 0.2758, "step": 27872 }, { "epoch": 1.3057104042722631, "grad_norm": 0.5706555470974527, "learning_rate": 1.4205294888586109e-06, "loss": 0.2629, "step": 27873 }, { "epoch": 1.3057572492621914, "grad_norm": 0.5924925149150707, "learning_rate": 1.420358432266557e-06, "loss": 0.2776, "step": 27874 }, { "epoch": 1.3058040942521196, "grad_norm": 0.5994277764829311, "learning_rate": 1.4201873818875684e-06, "loss": 0.2749, "step": 27875 }, { "epoch": 1.305850939242048, "grad_norm": 0.596685298570345, "learning_rate": 1.4200163377226312e-06, "loss": 0.2861, "step": 27876 }, { "epoch": 1.3058977842319763, "grad_norm": 0.565328485731893, "learning_rate": 1.4198452997727272e-06, "loss": 0.2685, "step": 27877 }, { "epoch": 1.3059446292219048, "grad_norm": 0.5567485670928269, "learning_rate": 1.419674268038842e-06, "loss": 0.2611, "step": 27878 }, { "epoch": 1.305991474211833, "grad_norm": 0.5665270327275551, "learning_rate": 1.4195032425219602e-06, "loss": 0.259, "step": 27879 }, { "epoch": 1.3060383192017615, "grad_norm": 0.5571504816722962, "learning_rate": 1.4193322232230655e-06, "loss": 0.2577, "step": 27880 }, { "epoch": 1.3060851641916897, "grad_norm": 0.5917742066154041, "learning_rate": 1.4191612101431433e-06, "loss": 0.2701, "step": 27881 }, { "epoch": 1.306132009181618, "grad_norm": 0.5986487048806918, "learning_rate": 1.4189902032831765e-06, "loss": 0.2657, "step": 27882 }, { "epoch": 1.3061788541715464, "grad_norm": 0.6100628299014067, "learning_rate": 1.4188192026441483e-06, "loss": 0.2754, "step": 27883 }, { "epoch": 1.3062256991614747, "grad_norm": 0.5842782893001659, "learning_rate": 1.4186482082270442e-06, "loss": 0.2661, "step": 27884 }, { "epoch": 1.306272544151403, "grad_norm": 0.5730403185220966, "learning_rate": 1.4184772200328475e-06, "loss": 0.273, "step": 27885 }, { "epoch": 1.3063193891413314, "grad_norm": 0.611320567472519, "learning_rate": 1.4183062380625428e-06, "loss": 0.2764, "step": 27886 }, { "epoch": 1.3063662341312596, "grad_norm": 0.6181683210796206, "learning_rate": 1.4181352623171137e-06, "loss": 0.2913, "step": 27887 }, { "epoch": 1.3064130791211879, "grad_norm": 0.5919054221896666, "learning_rate": 1.4179642927975453e-06, "loss": 0.2766, "step": 27888 }, { "epoch": 1.3064599241111163, "grad_norm": 0.5549513429284589, "learning_rate": 1.4177933295048207e-06, "loss": 0.2577, "step": 27889 }, { "epoch": 1.3065067691010446, "grad_norm": 0.5960549652576554, "learning_rate": 1.417622372439922e-06, "loss": 0.2679, "step": 27890 }, { "epoch": 1.306553614090973, "grad_norm": 0.6124230468406167, "learning_rate": 1.4174514216038348e-06, "loss": 0.2761, "step": 27891 }, { "epoch": 1.3066004590809013, "grad_norm": 0.642245589174932, "learning_rate": 1.4172804769975425e-06, "loss": 0.2881, "step": 27892 }, { "epoch": 1.3066473040708297, "grad_norm": 0.6141677732870888, "learning_rate": 1.417109538622029e-06, "loss": 0.2667, "step": 27893 }, { "epoch": 1.306694149060758, "grad_norm": 0.526374367990353, "learning_rate": 1.4169386064782789e-06, "loss": 0.2584, "step": 27894 }, { "epoch": 1.3067409940506862, "grad_norm": 0.5714114274921687, "learning_rate": 1.4167676805672738e-06, "loss": 0.2595, "step": 27895 }, { "epoch": 1.3067878390406147, "grad_norm": 0.6007454865476454, "learning_rate": 1.4165967608899998e-06, "loss": 0.2728, "step": 27896 }, { "epoch": 1.306834684030543, "grad_norm": 0.6226462990525474, "learning_rate": 1.4164258474474377e-06, "loss": 0.2877, "step": 27897 }, { "epoch": 1.3068815290204712, "grad_norm": 0.6269827527121132, "learning_rate": 1.416254940240573e-06, "loss": 0.2779, "step": 27898 }, { "epoch": 1.3069283740103996, "grad_norm": 0.5839649191999854, "learning_rate": 1.416084039270388e-06, "loss": 0.2831, "step": 27899 }, { "epoch": 1.3069752190003279, "grad_norm": 0.5704230420163032, "learning_rate": 1.4159131445378673e-06, "loss": 0.2616, "step": 27900 }, { "epoch": 1.3070220639902561, "grad_norm": 0.598873571041569, "learning_rate": 1.4157422560439951e-06, "loss": 0.2671, "step": 27901 }, { "epoch": 1.3070689089801846, "grad_norm": 0.6501754152534533, "learning_rate": 1.4155713737897537e-06, "loss": 0.2959, "step": 27902 }, { "epoch": 1.3071157539701128, "grad_norm": 0.5933207111085105, "learning_rate": 1.4154004977761255e-06, "loss": 0.2763, "step": 27903 }, { "epoch": 1.3071625989600413, "grad_norm": 0.6441233906739413, "learning_rate": 1.4152296280040945e-06, "loss": 0.2803, "step": 27904 }, { "epoch": 1.3072094439499695, "grad_norm": 0.5960069143998955, "learning_rate": 1.4150587644746444e-06, "loss": 0.2679, "step": 27905 }, { "epoch": 1.307256288939898, "grad_norm": 0.5580601678540613, "learning_rate": 1.4148879071887586e-06, "loss": 0.2489, "step": 27906 }, { "epoch": 1.3073031339298262, "grad_norm": 0.5652511584376997, "learning_rate": 1.414717056147421e-06, "loss": 0.2676, "step": 27907 }, { "epoch": 1.3073499789197545, "grad_norm": 0.6025358534329642, "learning_rate": 1.4145462113516133e-06, "loss": 0.2754, "step": 27908 }, { "epoch": 1.307396823909683, "grad_norm": 0.6347718170726249, "learning_rate": 1.4143753728023202e-06, "loss": 0.2873, "step": 27909 }, { "epoch": 1.3074436688996112, "grad_norm": 0.5589278884961029, "learning_rate": 1.4142045405005229e-06, "loss": 0.2652, "step": 27910 }, { "epoch": 1.3074905138895394, "grad_norm": 0.5673245861152123, "learning_rate": 1.4140337144472055e-06, "loss": 0.2682, "step": 27911 }, { "epoch": 1.3075373588794679, "grad_norm": 0.6112512369588021, "learning_rate": 1.4138628946433514e-06, "loss": 0.2659, "step": 27912 }, { "epoch": 1.3075842038693961, "grad_norm": 0.6168198780765074, "learning_rate": 1.413692081089944e-06, "loss": 0.2861, "step": 27913 }, { "epoch": 1.3076310488593246, "grad_norm": 0.6607679423747903, "learning_rate": 1.4135212737879648e-06, "loss": 0.2692, "step": 27914 }, { "epoch": 1.3076778938492528, "grad_norm": 0.5897085393045494, "learning_rate": 1.4133504727383974e-06, "loss": 0.2595, "step": 27915 }, { "epoch": 1.3077247388391813, "grad_norm": 0.6079961449535312, "learning_rate": 1.4131796779422263e-06, "loss": 0.2798, "step": 27916 }, { "epoch": 1.3077715838291095, "grad_norm": 0.634697325161298, "learning_rate": 1.4130088894004319e-06, "loss": 0.2873, "step": 27917 }, { "epoch": 1.3078184288190378, "grad_norm": 0.5880481059682022, "learning_rate": 1.412838107113998e-06, "loss": 0.2798, "step": 27918 }, { "epoch": 1.3078652738089662, "grad_norm": 0.5767313366839578, "learning_rate": 1.4126673310839084e-06, "loss": 0.2735, "step": 27919 }, { "epoch": 1.3079121187988945, "grad_norm": 0.5845895090065663, "learning_rate": 1.4124965613111442e-06, "loss": 0.2768, "step": 27920 }, { "epoch": 1.3079589637888227, "grad_norm": 0.5955585025730306, "learning_rate": 1.4123257977966886e-06, "loss": 0.2789, "step": 27921 }, { "epoch": 1.3080058087787512, "grad_norm": 0.6028976295176435, "learning_rate": 1.4121550405415253e-06, "loss": 0.2739, "step": 27922 }, { "epoch": 1.3080526537686794, "grad_norm": 0.5888983349400522, "learning_rate": 1.4119842895466368e-06, "loss": 0.2831, "step": 27923 }, { "epoch": 1.3080994987586076, "grad_norm": 0.6048535257597788, "learning_rate": 1.4118135448130046e-06, "loss": 0.2673, "step": 27924 }, { "epoch": 1.3081463437485361, "grad_norm": 0.5801703566509009, "learning_rate": 1.4116428063416126e-06, "loss": 0.2519, "step": 27925 }, { "epoch": 1.3081931887384644, "grad_norm": 0.5894491161237936, "learning_rate": 1.411472074133442e-06, "loss": 0.2758, "step": 27926 }, { "epoch": 1.3082400337283928, "grad_norm": 0.6262408572629132, "learning_rate": 1.4113013481894761e-06, "loss": 0.2814, "step": 27927 }, { "epoch": 1.308286878718321, "grad_norm": 0.6133205648494019, "learning_rate": 1.4111306285106972e-06, "loss": 0.2774, "step": 27928 }, { "epoch": 1.3083337237082495, "grad_norm": 0.6119481561696878, "learning_rate": 1.4109599150980876e-06, "loss": 0.2801, "step": 27929 }, { "epoch": 1.3083805686981778, "grad_norm": 0.6460649363759282, "learning_rate": 1.4107892079526315e-06, "loss": 0.2743, "step": 27930 }, { "epoch": 1.308427413688106, "grad_norm": 0.5684532457435068, "learning_rate": 1.4106185070753093e-06, "loss": 0.2626, "step": 27931 }, { "epoch": 1.3084742586780345, "grad_norm": 0.5655309601752108, "learning_rate": 1.4104478124671028e-06, "loss": 0.2515, "step": 27932 }, { "epoch": 1.3085211036679627, "grad_norm": 0.5998154659260512, "learning_rate": 1.4102771241289954e-06, "loss": 0.2806, "step": 27933 }, { "epoch": 1.308567948657891, "grad_norm": 0.5657545186797048, "learning_rate": 1.4101064420619693e-06, "loss": 0.2768, "step": 27934 }, { "epoch": 1.3086147936478194, "grad_norm": 0.6210689613050682, "learning_rate": 1.4099357662670066e-06, "loss": 0.2786, "step": 27935 }, { "epoch": 1.3086616386377476, "grad_norm": 0.6047097783832556, "learning_rate": 1.4097650967450896e-06, "loss": 0.2815, "step": 27936 }, { "epoch": 1.308708483627676, "grad_norm": 0.5201310248755401, "learning_rate": 1.4095944334972014e-06, "loss": 0.253, "step": 27937 }, { "epoch": 1.3087553286176044, "grad_norm": 0.5581614108466948, "learning_rate": 1.4094237765243234e-06, "loss": 0.2638, "step": 27938 }, { "epoch": 1.3088021736075326, "grad_norm": 0.5702963504348322, "learning_rate": 1.409253125827436e-06, "loss": 0.2837, "step": 27939 }, { "epoch": 1.308849018597461, "grad_norm": 0.5692122144810553, "learning_rate": 1.4090824814075233e-06, "loss": 0.2643, "step": 27940 }, { "epoch": 1.3088958635873893, "grad_norm": 0.607846050575053, "learning_rate": 1.4089118432655663e-06, "loss": 0.2821, "step": 27941 }, { "epoch": 1.3089427085773178, "grad_norm": 0.5697552875605054, "learning_rate": 1.4087412114025476e-06, "loss": 0.2613, "step": 27942 }, { "epoch": 1.308989553567246, "grad_norm": 0.6047274378009991, "learning_rate": 1.4085705858194498e-06, "loss": 0.2753, "step": 27943 }, { "epoch": 1.3090363985571742, "grad_norm": 0.6017808289982837, "learning_rate": 1.4083999665172532e-06, "loss": 0.2738, "step": 27944 }, { "epoch": 1.3090832435471027, "grad_norm": 0.5554793032067925, "learning_rate": 1.4082293534969413e-06, "loss": 0.2629, "step": 27945 }, { "epoch": 1.309130088537031, "grad_norm": 0.5672757573612994, "learning_rate": 1.4080587467594942e-06, "loss": 0.2712, "step": 27946 }, { "epoch": 1.3091769335269592, "grad_norm": 0.6509499526142885, "learning_rate": 1.4078881463058942e-06, "loss": 0.288, "step": 27947 }, { "epoch": 1.3092237785168876, "grad_norm": 0.6190462878143445, "learning_rate": 1.4077175521371236e-06, "loss": 0.2829, "step": 27948 }, { "epoch": 1.309270623506816, "grad_norm": 0.5999726411887562, "learning_rate": 1.4075469642541651e-06, "loss": 0.2823, "step": 27949 }, { "epoch": 1.3093174684967444, "grad_norm": 0.6110349552756065, "learning_rate": 1.407376382657998e-06, "loss": 0.2849, "step": 27950 }, { "epoch": 1.3093643134866726, "grad_norm": 0.5428930318043151, "learning_rate": 1.4072058073496063e-06, "loss": 0.2614, "step": 27951 }, { "epoch": 1.309411158476601, "grad_norm": 0.6294345933503117, "learning_rate": 1.4070352383299695e-06, "loss": 0.2714, "step": 27952 }, { "epoch": 1.3094580034665293, "grad_norm": 0.5942477430885867, "learning_rate": 1.4068646756000704e-06, "loss": 0.2829, "step": 27953 }, { "epoch": 1.3095048484564575, "grad_norm": 0.6028417043289461, "learning_rate": 1.40669411916089e-06, "loss": 0.2795, "step": 27954 }, { "epoch": 1.309551693446386, "grad_norm": 0.58011430023621, "learning_rate": 1.4065235690134115e-06, "loss": 0.2682, "step": 27955 }, { "epoch": 1.3095985384363142, "grad_norm": 0.5945191232062013, "learning_rate": 1.4063530251586139e-06, "loss": 0.2706, "step": 27956 }, { "epoch": 1.3096453834262425, "grad_norm": 0.5666376259712625, "learning_rate": 1.40618248759748e-06, "loss": 0.2711, "step": 27957 }, { "epoch": 1.309692228416171, "grad_norm": 0.5426598275711091, "learning_rate": 1.4060119563309916e-06, "loss": 0.2505, "step": 27958 }, { "epoch": 1.3097390734060992, "grad_norm": 0.5999510798300043, "learning_rate": 1.4058414313601285e-06, "loss": 0.2594, "step": 27959 }, { "epoch": 1.3097859183960274, "grad_norm": 0.5929418791013262, "learning_rate": 1.4056709126858732e-06, "loss": 0.293, "step": 27960 }, { "epoch": 1.309832763385956, "grad_norm": 0.5577085062398481, "learning_rate": 1.4055004003092063e-06, "loss": 0.2621, "step": 27961 }, { "epoch": 1.3098796083758841, "grad_norm": 0.5679482183723934, "learning_rate": 1.4053298942311105e-06, "loss": 0.2496, "step": 27962 }, { "epoch": 1.3099264533658126, "grad_norm": 0.6248493674064638, "learning_rate": 1.4051593944525655e-06, "loss": 0.2842, "step": 27963 }, { "epoch": 1.3099732983557408, "grad_norm": 0.6444451445957242, "learning_rate": 1.4049889009745526e-06, "loss": 0.2971, "step": 27964 }, { "epoch": 1.3100201433456693, "grad_norm": 0.5674528768899407, "learning_rate": 1.4048184137980548e-06, "loss": 0.2639, "step": 27965 }, { "epoch": 1.3100669883355975, "grad_norm": 0.5859283619094556, "learning_rate": 1.4046479329240504e-06, "loss": 0.2811, "step": 27966 }, { "epoch": 1.3101138333255258, "grad_norm": 0.5734774586867741, "learning_rate": 1.4044774583535217e-06, "loss": 0.2431, "step": 27967 }, { "epoch": 1.3101606783154542, "grad_norm": 0.5930305537034649, "learning_rate": 1.4043069900874513e-06, "loss": 0.2718, "step": 27968 }, { "epoch": 1.3102075233053825, "grad_norm": 0.588391778825993, "learning_rate": 1.4041365281268178e-06, "loss": 0.2633, "step": 27969 }, { "epoch": 1.3102543682953107, "grad_norm": 0.5954360125232578, "learning_rate": 1.4039660724726027e-06, "loss": 0.2614, "step": 27970 }, { "epoch": 1.3103012132852392, "grad_norm": 0.621829304470453, "learning_rate": 1.4037956231257877e-06, "loss": 0.2939, "step": 27971 }, { "epoch": 1.3103480582751674, "grad_norm": 0.5896458848855136, "learning_rate": 1.403625180087354e-06, "loss": 0.2775, "step": 27972 }, { "epoch": 1.3103949032650957, "grad_norm": 0.5723154279901288, "learning_rate": 1.4034547433582812e-06, "loss": 0.2752, "step": 27973 }, { "epoch": 1.3104417482550241, "grad_norm": 0.5737120942429244, "learning_rate": 1.4032843129395516e-06, "loss": 0.274, "step": 27974 }, { "epoch": 1.3104885932449524, "grad_norm": 0.6205423915656182, "learning_rate": 1.4031138888321442e-06, "loss": 0.288, "step": 27975 }, { "epoch": 1.3105354382348808, "grad_norm": 0.5828495406653186, "learning_rate": 1.4029434710370405e-06, "loss": 0.2714, "step": 27976 }, { "epoch": 1.310582283224809, "grad_norm": 0.5558764181955906, "learning_rate": 1.4027730595552216e-06, "loss": 0.2622, "step": 27977 }, { "epoch": 1.3106291282147375, "grad_norm": 0.6012181273034222, "learning_rate": 1.4026026543876682e-06, "loss": 0.2832, "step": 27978 }, { "epoch": 1.3106759732046658, "grad_norm": 0.5747006141412674, "learning_rate": 1.4024322555353612e-06, "loss": 0.2814, "step": 27979 }, { "epoch": 1.310722818194594, "grad_norm": 0.6449015947909095, "learning_rate": 1.4022618629992806e-06, "loss": 0.2676, "step": 27980 }, { "epoch": 1.3107696631845225, "grad_norm": 0.63343193528664, "learning_rate": 1.4020914767804067e-06, "loss": 0.296, "step": 27981 }, { "epoch": 1.3108165081744507, "grad_norm": 0.587601080367697, "learning_rate": 1.4019210968797201e-06, "loss": 0.2631, "step": 27982 }, { "epoch": 1.310863353164379, "grad_norm": 0.6084739429454542, "learning_rate": 1.4017507232982015e-06, "loss": 0.2843, "step": 27983 }, { "epoch": 1.3109101981543074, "grad_norm": 0.661558995565211, "learning_rate": 1.4015803560368315e-06, "loss": 0.3079, "step": 27984 }, { "epoch": 1.3109570431442357, "grad_norm": 0.6026680298947471, "learning_rate": 1.4014099950965914e-06, "loss": 0.2669, "step": 27985 }, { "epoch": 1.3110038881341641, "grad_norm": 0.600891865348145, "learning_rate": 1.4012396404784601e-06, "loss": 0.2661, "step": 27986 }, { "epoch": 1.3110507331240924, "grad_norm": 0.5755227566734965, "learning_rate": 1.401069292183419e-06, "loss": 0.2604, "step": 27987 }, { "epoch": 1.3110975781140208, "grad_norm": 0.621747640870489, "learning_rate": 1.400898950212447e-06, "loss": 0.2721, "step": 27988 }, { "epoch": 1.311144423103949, "grad_norm": 0.5678521993050036, "learning_rate": 1.4007286145665255e-06, "loss": 0.2822, "step": 27989 }, { "epoch": 1.3111912680938773, "grad_norm": 0.5779434045459424, "learning_rate": 1.4005582852466344e-06, "loss": 0.288, "step": 27990 }, { "epoch": 1.3112381130838058, "grad_norm": 0.5943607492103088, "learning_rate": 1.4003879622537543e-06, "loss": 0.2667, "step": 27991 }, { "epoch": 1.311284958073734, "grad_norm": 0.5760776491331151, "learning_rate": 1.4002176455888655e-06, "loss": 0.257, "step": 27992 }, { "epoch": 1.3113318030636623, "grad_norm": 0.616020293860383, "learning_rate": 1.400047335252947e-06, "loss": 0.2703, "step": 27993 }, { "epoch": 1.3113786480535907, "grad_norm": 0.6075554416739314, "learning_rate": 1.399877031246981e-06, "loss": 0.2752, "step": 27994 }, { "epoch": 1.311425493043519, "grad_norm": 0.6181557677424252, "learning_rate": 1.3997067335719447e-06, "loss": 0.2845, "step": 27995 }, { "epoch": 1.3114723380334472, "grad_norm": 0.5706954762184531, "learning_rate": 1.39953644222882e-06, "loss": 0.2542, "step": 27996 }, { "epoch": 1.3115191830233757, "grad_norm": 0.6308174794180446, "learning_rate": 1.399366157218586e-06, "loss": 0.2758, "step": 27997 }, { "epoch": 1.311566028013304, "grad_norm": 0.5838580400881191, "learning_rate": 1.3991958785422243e-06, "loss": 0.2789, "step": 27998 }, { "epoch": 1.3116128730032324, "grad_norm": 0.6109790094054504, "learning_rate": 1.399025606200713e-06, "loss": 0.2651, "step": 27999 }, { "epoch": 1.3116597179931606, "grad_norm": 0.5990631261456437, "learning_rate": 1.3988553401950334e-06, "loss": 0.2776, "step": 28000 }, { "epoch": 1.311706562983089, "grad_norm": 0.5667408639432052, "learning_rate": 1.3986850805261632e-06, "loss": 0.2744, "step": 28001 }, { "epoch": 1.3117534079730173, "grad_norm": 0.639167421201948, "learning_rate": 1.398514827195084e-06, "loss": 0.2817, "step": 28002 }, { "epoch": 1.3118002529629456, "grad_norm": 0.6085911863984125, "learning_rate": 1.398344580202775e-06, "loss": 0.2635, "step": 28003 }, { "epoch": 1.311847097952874, "grad_norm": 0.6387823205173214, "learning_rate": 1.398174339550217e-06, "loss": 0.2838, "step": 28004 }, { "epoch": 1.3118939429428023, "grad_norm": 0.6417114911992788, "learning_rate": 1.3980041052383879e-06, "loss": 0.2732, "step": 28005 }, { "epoch": 1.3119407879327305, "grad_norm": 0.6324603553890441, "learning_rate": 1.3978338772682682e-06, "loss": 0.2845, "step": 28006 }, { "epoch": 1.311987632922659, "grad_norm": 0.6154132635873448, "learning_rate": 1.3976636556408382e-06, "loss": 0.2783, "step": 28007 }, { "epoch": 1.3120344779125872, "grad_norm": 0.5650914805911821, "learning_rate": 1.3974934403570761e-06, "loss": 0.2694, "step": 28008 }, { "epoch": 1.3120813229025154, "grad_norm": 0.6247151868099226, "learning_rate": 1.3973232314179619e-06, "loss": 0.2795, "step": 28009 }, { "epoch": 1.312128167892444, "grad_norm": 0.5821307464004046, "learning_rate": 1.3971530288244767e-06, "loss": 0.2798, "step": 28010 }, { "epoch": 1.3121750128823721, "grad_norm": 0.5652891960611124, "learning_rate": 1.3969828325775972e-06, "loss": 0.2636, "step": 28011 }, { "epoch": 1.3122218578723006, "grad_norm": 0.5882234131374967, "learning_rate": 1.3968126426783046e-06, "loss": 0.2713, "step": 28012 }, { "epoch": 1.3122687028622289, "grad_norm": 0.634837542096158, "learning_rate": 1.3966424591275778e-06, "loss": 0.2634, "step": 28013 }, { "epoch": 1.3123155478521573, "grad_norm": 0.5720300491713498, "learning_rate": 1.3964722819263974e-06, "loss": 0.2692, "step": 28014 }, { "epoch": 1.3123623928420856, "grad_norm": 0.6042624071341146, "learning_rate": 1.3963021110757408e-06, "loss": 0.2738, "step": 28015 }, { "epoch": 1.3124092378320138, "grad_norm": 0.598751317407997, "learning_rate": 1.3961319465765888e-06, "loss": 0.2778, "step": 28016 }, { "epoch": 1.3124560828219423, "grad_norm": 0.5891887763464321, "learning_rate": 1.3959617884299193e-06, "loss": 0.2516, "step": 28017 }, { "epoch": 1.3125029278118705, "grad_norm": 0.6210759790557401, "learning_rate": 1.395791636636712e-06, "loss": 0.2984, "step": 28018 }, { "epoch": 1.3125497728017987, "grad_norm": 0.6044885640364903, "learning_rate": 1.3956214911979465e-06, "loss": 0.281, "step": 28019 }, { "epoch": 1.3125966177917272, "grad_norm": 0.5607428514008747, "learning_rate": 1.3954513521146016e-06, "loss": 0.2582, "step": 28020 }, { "epoch": 1.3126434627816554, "grad_norm": 0.5943740866139128, "learning_rate": 1.3952812193876575e-06, "loss": 0.273, "step": 28021 }, { "epoch": 1.312690307771584, "grad_norm": 0.5881772331516375, "learning_rate": 1.3951110930180925e-06, "loss": 0.2603, "step": 28022 }, { "epoch": 1.3127371527615121, "grad_norm": 0.6345078445753661, "learning_rate": 1.3949409730068843e-06, "loss": 0.2893, "step": 28023 }, { "epoch": 1.3127839977514406, "grad_norm": 0.6112653176722486, "learning_rate": 1.394770859355013e-06, "loss": 0.2788, "step": 28024 }, { "epoch": 1.3128308427413689, "grad_norm": 0.5852269714227338, "learning_rate": 1.394600752063458e-06, "loss": 0.2713, "step": 28025 }, { "epoch": 1.312877687731297, "grad_norm": 0.5687070605001497, "learning_rate": 1.3944306511331977e-06, "loss": 0.2679, "step": 28026 }, { "epoch": 1.3129245327212256, "grad_norm": 0.5978042634309616, "learning_rate": 1.3942605565652107e-06, "loss": 0.2795, "step": 28027 }, { "epoch": 1.3129713777111538, "grad_norm": 0.5713315584301936, "learning_rate": 1.3940904683604778e-06, "loss": 0.2585, "step": 28028 }, { "epoch": 1.313018222701082, "grad_norm": 0.6146077433582503, "learning_rate": 1.3939203865199761e-06, "loss": 0.2722, "step": 28029 }, { "epoch": 1.3130650676910105, "grad_norm": 0.6098688985160157, "learning_rate": 1.3937503110446835e-06, "loss": 0.2747, "step": 28030 }, { "epoch": 1.3131119126809387, "grad_norm": 0.5879051878642237, "learning_rate": 1.3935802419355797e-06, "loss": 0.2697, "step": 28031 }, { "epoch": 1.313158757670867, "grad_norm": 0.6572807475034895, "learning_rate": 1.3934101791936439e-06, "loss": 0.3064, "step": 28032 }, { "epoch": 1.3132056026607954, "grad_norm": 0.5620563746162627, "learning_rate": 1.3932401228198541e-06, "loss": 0.2719, "step": 28033 }, { "epoch": 1.3132524476507237, "grad_norm": 0.5975185737297614, "learning_rate": 1.3930700728151903e-06, "loss": 0.2886, "step": 28034 }, { "epoch": 1.3132992926406521, "grad_norm": 0.5933990119487323, "learning_rate": 1.3929000291806292e-06, "loss": 0.2829, "step": 28035 }, { "epoch": 1.3133461376305804, "grad_norm": 0.5708937287853265, "learning_rate": 1.3927299919171508e-06, "loss": 0.2541, "step": 28036 }, { "epoch": 1.3133929826205089, "grad_norm": 0.6271670228197515, "learning_rate": 1.3925599610257324e-06, "loss": 0.2727, "step": 28037 }, { "epoch": 1.313439827610437, "grad_norm": 0.5879573314621223, "learning_rate": 1.392389936507353e-06, "loss": 0.2635, "step": 28038 }, { "epoch": 1.3134866726003653, "grad_norm": 0.584026364020446, "learning_rate": 1.392219918362991e-06, "loss": 0.2888, "step": 28039 }, { "epoch": 1.3135335175902938, "grad_norm": 0.5992637250720961, "learning_rate": 1.3920499065936246e-06, "loss": 0.2708, "step": 28040 }, { "epoch": 1.313580362580222, "grad_norm": 0.5414211308706164, "learning_rate": 1.3918799012002337e-06, "loss": 0.264, "step": 28041 }, { "epoch": 1.3136272075701503, "grad_norm": 0.6323612267176653, "learning_rate": 1.391709902183796e-06, "loss": 0.3018, "step": 28042 }, { "epoch": 1.3136740525600787, "grad_norm": 0.6179660769140434, "learning_rate": 1.3915399095452876e-06, "loss": 0.2739, "step": 28043 }, { "epoch": 1.313720897550007, "grad_norm": 0.5651175869519961, "learning_rate": 1.3913699232856887e-06, "loss": 0.2636, "step": 28044 }, { "epoch": 1.3137677425399352, "grad_norm": 0.5874203584624627, "learning_rate": 1.3911999434059772e-06, "loss": 0.2811, "step": 28045 }, { "epoch": 1.3138145875298637, "grad_norm": 0.5634373573743474, "learning_rate": 1.3910299699071315e-06, "loss": 0.259, "step": 28046 }, { "epoch": 1.313861432519792, "grad_norm": 0.6071213010260551, "learning_rate": 1.3908600027901304e-06, "loss": 0.2732, "step": 28047 }, { "epoch": 1.3139082775097204, "grad_norm": 0.5749146023354079, "learning_rate": 1.3906900420559503e-06, "loss": 0.2594, "step": 28048 }, { "epoch": 1.3139551224996486, "grad_norm": 0.5841770483004719, "learning_rate": 1.390520087705571e-06, "loss": 0.2765, "step": 28049 }, { "epoch": 1.314001967489577, "grad_norm": 0.5821011025525125, "learning_rate": 1.390350139739969e-06, "loss": 0.2841, "step": 28050 }, { "epoch": 1.3140488124795053, "grad_norm": 0.5971501946590695, "learning_rate": 1.3901801981601231e-06, "loss": 0.2721, "step": 28051 }, { "epoch": 1.3140956574694336, "grad_norm": 0.6065465056066797, "learning_rate": 1.390010262967011e-06, "loss": 0.2936, "step": 28052 }, { "epoch": 1.314142502459362, "grad_norm": 0.577982374926317, "learning_rate": 1.3898403341616118e-06, "loss": 0.288, "step": 28053 }, { "epoch": 1.3141893474492903, "grad_norm": 0.6047401848450629, "learning_rate": 1.3896704117449017e-06, "loss": 0.2827, "step": 28054 }, { "epoch": 1.3142361924392185, "grad_norm": 0.6212832041441885, "learning_rate": 1.3895004957178594e-06, "loss": 0.2973, "step": 28055 }, { "epoch": 1.314283037429147, "grad_norm": 0.5572101784555713, "learning_rate": 1.3893305860814632e-06, "loss": 0.2461, "step": 28056 }, { "epoch": 1.3143298824190752, "grad_norm": 0.6164591709820462, "learning_rate": 1.3891606828366893e-06, "loss": 0.2734, "step": 28057 }, { "epoch": 1.3143767274090037, "grad_norm": 0.5942544599683152, "learning_rate": 1.388990785984517e-06, "loss": 0.2722, "step": 28058 }, { "epoch": 1.314423572398932, "grad_norm": 0.5914785480145813, "learning_rate": 1.388820895525924e-06, "loss": 0.2772, "step": 28059 }, { "epoch": 1.3144704173888604, "grad_norm": 0.6272885498927764, "learning_rate": 1.3886510114618866e-06, "loss": 0.2898, "step": 28060 }, { "epoch": 1.3145172623787886, "grad_norm": 0.5713930796669023, "learning_rate": 1.3884811337933833e-06, "loss": 0.2645, "step": 28061 }, { "epoch": 1.3145641073687169, "grad_norm": 0.5917411000117576, "learning_rate": 1.3883112625213919e-06, "loss": 0.2511, "step": 28062 }, { "epoch": 1.3146109523586453, "grad_norm": 0.619189605055701, "learning_rate": 1.3881413976468905e-06, "loss": 0.2669, "step": 28063 }, { "epoch": 1.3146577973485736, "grad_norm": 0.5720450299076427, "learning_rate": 1.387971539170855e-06, "loss": 0.2657, "step": 28064 }, { "epoch": 1.3147046423385018, "grad_norm": 0.6002270544999775, "learning_rate": 1.3878016870942652e-06, "loss": 0.2569, "step": 28065 }, { "epoch": 1.3147514873284303, "grad_norm": 0.5627131994697535, "learning_rate": 1.3876318414180955e-06, "loss": 0.2678, "step": 28066 }, { "epoch": 1.3147983323183585, "grad_norm": 0.6091246379514973, "learning_rate": 1.3874620021433252e-06, "loss": 0.2827, "step": 28067 }, { "epoch": 1.3148451773082868, "grad_norm": 0.6496904821686064, "learning_rate": 1.3872921692709314e-06, "loss": 0.291, "step": 28068 }, { "epoch": 1.3148920222982152, "grad_norm": 0.5814657791632674, "learning_rate": 1.3871223428018918e-06, "loss": 0.2554, "step": 28069 }, { "epoch": 1.3149388672881435, "grad_norm": 0.5760764137355695, "learning_rate": 1.3869525227371842e-06, "loss": 0.2664, "step": 28070 }, { "epoch": 1.314985712278072, "grad_norm": 0.5971069016508538, "learning_rate": 1.386782709077785e-06, "loss": 0.2877, "step": 28071 }, { "epoch": 1.3150325572680002, "grad_norm": 0.6608093803493097, "learning_rate": 1.3866129018246705e-06, "loss": 0.2952, "step": 28072 }, { "epoch": 1.3150794022579286, "grad_norm": 0.6302893908188287, "learning_rate": 1.386443100978819e-06, "loss": 0.2877, "step": 28073 }, { "epoch": 1.3151262472478569, "grad_norm": 0.5695416273988078, "learning_rate": 1.3862733065412076e-06, "loss": 0.2688, "step": 28074 }, { "epoch": 1.3151730922377851, "grad_norm": 0.6106750607484611, "learning_rate": 1.3861035185128134e-06, "loss": 0.2811, "step": 28075 }, { "epoch": 1.3152199372277136, "grad_norm": 0.601943830461928, "learning_rate": 1.3859337368946135e-06, "loss": 0.2794, "step": 28076 }, { "epoch": 1.3152667822176418, "grad_norm": 0.5709883658212882, "learning_rate": 1.3857639616875857e-06, "loss": 0.2686, "step": 28077 }, { "epoch": 1.31531362720757, "grad_norm": 0.664473494069148, "learning_rate": 1.3855941928927063e-06, "loss": 0.2796, "step": 28078 }, { "epoch": 1.3153604721974985, "grad_norm": 0.6074376325259495, "learning_rate": 1.3854244305109515e-06, "loss": 0.2657, "step": 28079 }, { "epoch": 1.3154073171874268, "grad_norm": 0.628484537208586, "learning_rate": 1.3852546745432988e-06, "loss": 0.2612, "step": 28080 }, { "epoch": 1.315454162177355, "grad_norm": 0.5793283791595741, "learning_rate": 1.3850849249907256e-06, "loss": 0.2899, "step": 28081 }, { "epoch": 1.3155010071672835, "grad_norm": 0.6055625123208626, "learning_rate": 1.3849151818542077e-06, "loss": 0.2789, "step": 28082 }, { "epoch": 1.3155478521572117, "grad_norm": 0.5828840637678042, "learning_rate": 1.3847454451347242e-06, "loss": 0.2634, "step": 28083 }, { "epoch": 1.3155946971471402, "grad_norm": 0.5994117921693419, "learning_rate": 1.3845757148332494e-06, "loss": 0.2754, "step": 28084 }, { "epoch": 1.3156415421370684, "grad_norm": 0.59438936048093, "learning_rate": 1.3844059909507618e-06, "loss": 0.2808, "step": 28085 }, { "epoch": 1.3156883871269969, "grad_norm": 0.634657221778771, "learning_rate": 1.3842362734882363e-06, "loss": 0.2867, "step": 28086 }, { "epoch": 1.3157352321169251, "grad_norm": 0.5818601911397261, "learning_rate": 1.384066562446651e-06, "loss": 0.2606, "step": 28087 }, { "epoch": 1.3157820771068534, "grad_norm": 0.5809619305264332, "learning_rate": 1.3838968578269817e-06, "loss": 0.2723, "step": 28088 }, { "epoch": 1.3158289220967818, "grad_norm": 0.5874588146905688, "learning_rate": 1.3837271596302066e-06, "loss": 0.2572, "step": 28089 }, { "epoch": 1.31587576708671, "grad_norm": 0.5508173961942856, "learning_rate": 1.3835574678573e-06, "loss": 0.2528, "step": 28090 }, { "epoch": 1.3159226120766383, "grad_norm": 0.6743726660119665, "learning_rate": 1.3833877825092408e-06, "loss": 0.2841, "step": 28091 }, { "epoch": 1.3159694570665668, "grad_norm": 0.5601097112720499, "learning_rate": 1.383218103587003e-06, "loss": 0.265, "step": 28092 }, { "epoch": 1.316016302056495, "grad_norm": 0.6235527908801822, "learning_rate": 1.3830484310915646e-06, "loss": 0.2843, "step": 28093 }, { "epoch": 1.3160631470464235, "grad_norm": 0.5970437511785893, "learning_rate": 1.3828787650239017e-06, "loss": 0.2735, "step": 28094 }, { "epoch": 1.3161099920363517, "grad_norm": 0.6162323099382394, "learning_rate": 1.3827091053849907e-06, "loss": 0.2674, "step": 28095 }, { "epoch": 1.3161568370262802, "grad_norm": 0.5547597138342403, "learning_rate": 1.3825394521758087e-06, "loss": 0.2657, "step": 28096 }, { "epoch": 1.3162036820162084, "grad_norm": 0.5625681661777547, "learning_rate": 1.3823698053973306e-06, "loss": 0.2631, "step": 28097 }, { "epoch": 1.3162505270061367, "grad_norm": 0.5586740062618702, "learning_rate": 1.3822001650505343e-06, "loss": 0.2486, "step": 28098 }, { "epoch": 1.3162973719960651, "grad_norm": 0.6287644869921407, "learning_rate": 1.382030531136394e-06, "loss": 0.2804, "step": 28099 }, { "epoch": 1.3163442169859934, "grad_norm": 0.5722557424200644, "learning_rate": 1.3818609036558873e-06, "loss": 0.26, "step": 28100 }, { "epoch": 1.3163910619759216, "grad_norm": 0.5731133962173963, "learning_rate": 1.38169128260999e-06, "loss": 0.2805, "step": 28101 }, { "epoch": 1.31643790696585, "grad_norm": 0.9753583171849168, "learning_rate": 1.381521667999679e-06, "loss": 0.2884, "step": 28102 }, { "epoch": 1.3164847519557783, "grad_norm": 0.6139340532450118, "learning_rate": 1.3813520598259286e-06, "loss": 0.2746, "step": 28103 }, { "epoch": 1.3165315969457065, "grad_norm": 0.5862047227746982, "learning_rate": 1.3811824580897161e-06, "loss": 0.2941, "step": 28104 }, { "epoch": 1.316578441935635, "grad_norm": 0.6059864325481624, "learning_rate": 1.3810128627920186e-06, "loss": 0.2718, "step": 28105 }, { "epoch": 1.3166252869255632, "grad_norm": 0.5944179570930197, "learning_rate": 1.3808432739338095e-06, "loss": 0.2703, "step": 28106 }, { "epoch": 1.3166721319154917, "grad_norm": 0.5556560742860327, "learning_rate": 1.3806736915160662e-06, "loss": 0.2555, "step": 28107 }, { "epoch": 1.31671897690542, "grad_norm": 0.6227849493771976, "learning_rate": 1.3805041155397656e-06, "loss": 0.2653, "step": 28108 }, { "epoch": 1.3167658218953484, "grad_norm": 0.5889464200657597, "learning_rate": 1.3803345460058815e-06, "loss": 0.2774, "step": 28109 }, { "epoch": 1.3168126668852767, "grad_norm": 0.6316180149111333, "learning_rate": 1.3801649829153902e-06, "loss": 0.2852, "step": 28110 }, { "epoch": 1.316859511875205, "grad_norm": 0.6272301130223339, "learning_rate": 1.3799954262692682e-06, "loss": 0.2787, "step": 28111 }, { "epoch": 1.3169063568651334, "grad_norm": 0.5853191332997411, "learning_rate": 1.3798258760684923e-06, "loss": 0.2528, "step": 28112 }, { "epoch": 1.3169532018550616, "grad_norm": 0.5996138881712978, "learning_rate": 1.3796563323140354e-06, "loss": 0.2843, "step": 28113 }, { "epoch": 1.3170000468449898, "grad_norm": 0.584906429100769, "learning_rate": 1.3794867950068764e-06, "loss": 0.2615, "step": 28114 }, { "epoch": 1.3170468918349183, "grad_norm": 0.640742693210019, "learning_rate": 1.379317264147988e-06, "loss": 0.2768, "step": 28115 }, { "epoch": 1.3170937368248465, "grad_norm": 0.6115902159172527, "learning_rate": 1.3791477397383469e-06, "loss": 0.2961, "step": 28116 }, { "epoch": 1.3171405818147748, "grad_norm": 0.5698185280406444, "learning_rate": 1.3789782217789289e-06, "loss": 0.2569, "step": 28117 }, { "epoch": 1.3171874268047032, "grad_norm": 0.5744502689958307, "learning_rate": 1.3788087102707093e-06, "loss": 0.2546, "step": 28118 }, { "epoch": 1.3172342717946315, "grad_norm": 0.6100134020462639, "learning_rate": 1.3786392052146649e-06, "loss": 0.2847, "step": 28119 }, { "epoch": 1.31728111678456, "grad_norm": 0.6460789195986939, "learning_rate": 1.3784697066117702e-06, "loss": 0.2829, "step": 28120 }, { "epoch": 1.3173279617744882, "grad_norm": 0.5709883157861911, "learning_rate": 1.3783002144629992e-06, "loss": 0.2649, "step": 28121 }, { "epoch": 1.3173748067644167, "grad_norm": 0.5825212617878575, "learning_rate": 1.3781307287693288e-06, "loss": 0.2653, "step": 28122 }, { "epoch": 1.317421651754345, "grad_norm": 0.6448354416023723, "learning_rate": 1.377961249531734e-06, "loss": 0.2991, "step": 28123 }, { "epoch": 1.3174684967442731, "grad_norm": 0.613535854382667, "learning_rate": 1.37779177675119e-06, "loss": 0.2795, "step": 28124 }, { "epoch": 1.3175153417342016, "grad_norm": 0.5829737933927157, "learning_rate": 1.3776223104286734e-06, "loss": 0.2622, "step": 28125 }, { "epoch": 1.3175621867241298, "grad_norm": 0.5852554658479043, "learning_rate": 1.3774528505651573e-06, "loss": 0.279, "step": 28126 }, { "epoch": 1.317609031714058, "grad_norm": 0.6222647930784287, "learning_rate": 1.377283397161619e-06, "loss": 0.2687, "step": 28127 }, { "epoch": 1.3176558767039865, "grad_norm": 0.6167998106216055, "learning_rate": 1.3771139502190316e-06, "loss": 0.2883, "step": 28128 }, { "epoch": 1.3177027216939148, "grad_norm": 0.5761250410868698, "learning_rate": 1.376944509738371e-06, "loss": 0.2745, "step": 28129 }, { "epoch": 1.3177495666838432, "grad_norm": 0.604632274448763, "learning_rate": 1.3767750757206129e-06, "loss": 0.2834, "step": 28130 }, { "epoch": 1.3177964116737715, "grad_norm": 0.5955090437753056, "learning_rate": 1.376605648166731e-06, "loss": 0.2803, "step": 28131 }, { "epoch": 1.3178432566637, "grad_norm": 0.5917949166906711, "learning_rate": 1.3764362270777033e-06, "loss": 0.2701, "step": 28132 }, { "epoch": 1.3178901016536282, "grad_norm": 0.6277268950711075, "learning_rate": 1.376266812454501e-06, "loss": 0.2859, "step": 28133 }, { "epoch": 1.3179369466435564, "grad_norm": 0.591650739015368, "learning_rate": 1.376097404298102e-06, "loss": 0.2646, "step": 28134 }, { "epoch": 1.317983791633485, "grad_norm": 0.5790661288495008, "learning_rate": 1.375928002609479e-06, "loss": 0.2545, "step": 28135 }, { "epoch": 1.3180306366234131, "grad_norm": 0.5937176850772108, "learning_rate": 1.375758607389608e-06, "loss": 0.27, "step": 28136 }, { "epoch": 1.3180774816133414, "grad_norm": 0.5548254424782942, "learning_rate": 1.3755892186394631e-06, "loss": 0.2684, "step": 28137 }, { "epoch": 1.3181243266032698, "grad_norm": 0.5982697625670219, "learning_rate": 1.3754198363600214e-06, "loss": 0.2773, "step": 28138 }, { "epoch": 1.318171171593198, "grad_norm": 0.5881598877795124, "learning_rate": 1.3752504605522543e-06, "loss": 0.2556, "step": 28139 }, { "epoch": 1.3182180165831263, "grad_norm": 0.5912383420973832, "learning_rate": 1.3750810912171391e-06, "loss": 0.2668, "step": 28140 }, { "epoch": 1.3182648615730548, "grad_norm": 0.5997189714499225, "learning_rate": 1.3749117283556487e-06, "loss": 0.2667, "step": 28141 }, { "epoch": 1.318311706562983, "grad_norm": 0.5602083396410623, "learning_rate": 1.3747423719687586e-06, "loss": 0.2641, "step": 28142 }, { "epoch": 1.3183585515529115, "grad_norm": 0.5713393311379269, "learning_rate": 1.3745730220574433e-06, "loss": 0.2799, "step": 28143 }, { "epoch": 1.3184053965428397, "grad_norm": 0.5799768274003487, "learning_rate": 1.374403678622678e-06, "loss": 0.2633, "step": 28144 }, { "epoch": 1.3184522415327682, "grad_norm": 0.5907903913918718, "learning_rate": 1.3742343416654357e-06, "loss": 0.2716, "step": 28145 }, { "epoch": 1.3184990865226964, "grad_norm": 0.5642132223976161, "learning_rate": 1.3740650111866921e-06, "loss": 0.2651, "step": 28146 }, { "epoch": 1.3185459315126247, "grad_norm": 0.5744888419159544, "learning_rate": 1.3738956871874226e-06, "loss": 0.2546, "step": 28147 }, { "epoch": 1.3185927765025531, "grad_norm": 0.5814626332978841, "learning_rate": 1.3737263696685988e-06, "loss": 0.2719, "step": 28148 }, { "epoch": 1.3186396214924814, "grad_norm": 0.5605145639590979, "learning_rate": 1.3735570586311968e-06, "loss": 0.2713, "step": 28149 }, { "epoch": 1.3186864664824096, "grad_norm": 0.5738141001819744, "learning_rate": 1.3733877540761922e-06, "loss": 0.2631, "step": 28150 }, { "epoch": 1.318733311472338, "grad_norm": 0.5684879320114822, "learning_rate": 1.3732184560045567e-06, "loss": 0.2803, "step": 28151 }, { "epoch": 1.3187801564622663, "grad_norm": 0.5723615717913718, "learning_rate": 1.3730491644172656e-06, "loss": 0.2611, "step": 28152 }, { "epoch": 1.3188270014521946, "grad_norm": 0.6021727341062213, "learning_rate": 1.3728798793152936e-06, "loss": 0.2828, "step": 28153 }, { "epoch": 1.318873846442123, "grad_norm": 0.6135977283680825, "learning_rate": 1.3727106006996155e-06, "loss": 0.2651, "step": 28154 }, { "epoch": 1.3189206914320513, "grad_norm": 0.5788440522449378, "learning_rate": 1.3725413285712032e-06, "loss": 0.2606, "step": 28155 }, { "epoch": 1.3189675364219797, "grad_norm": 0.6052497617235085, "learning_rate": 1.3723720629310335e-06, "loss": 0.2774, "step": 28156 }, { "epoch": 1.319014381411908, "grad_norm": 0.6353935329532472, "learning_rate": 1.3722028037800782e-06, "loss": 0.2898, "step": 28157 }, { "epoch": 1.3190612264018364, "grad_norm": 0.5749581199596302, "learning_rate": 1.3720335511193122e-06, "loss": 0.2785, "step": 28158 }, { "epoch": 1.3191080713917647, "grad_norm": 0.6159155234901944, "learning_rate": 1.37186430494971e-06, "loss": 0.2835, "step": 28159 }, { "epoch": 1.319154916381693, "grad_norm": 0.5803731403692182, "learning_rate": 1.3716950652722446e-06, "loss": 0.2691, "step": 28160 }, { "epoch": 1.3192017613716214, "grad_norm": 0.6039112403940322, "learning_rate": 1.3715258320878915e-06, "loss": 0.2671, "step": 28161 }, { "epoch": 1.3192486063615496, "grad_norm": 0.5988597065831434, "learning_rate": 1.371356605397623e-06, "loss": 0.277, "step": 28162 }, { "epoch": 1.3192954513514779, "grad_norm": 0.5872639254053849, "learning_rate": 1.3711873852024144e-06, "loss": 0.2589, "step": 28163 }, { "epoch": 1.3193422963414063, "grad_norm": 0.583950509539466, "learning_rate": 1.3710181715032378e-06, "loss": 0.2668, "step": 28164 }, { "epoch": 1.3193891413313346, "grad_norm": 0.6154639740208316, "learning_rate": 1.3708489643010677e-06, "loss": 0.276, "step": 28165 }, { "epoch": 1.319435986321263, "grad_norm": 0.5556204004151839, "learning_rate": 1.3706797635968783e-06, "loss": 0.2492, "step": 28166 }, { "epoch": 1.3194828313111913, "grad_norm": 0.5652329763215599, "learning_rate": 1.3705105693916426e-06, "loss": 0.2783, "step": 28167 }, { "epoch": 1.3195296763011197, "grad_norm": 0.6307889149864387, "learning_rate": 1.3703413816863359e-06, "loss": 0.2733, "step": 28168 }, { "epoch": 1.319576521291048, "grad_norm": 0.621246794646428, "learning_rate": 1.3701722004819306e-06, "loss": 0.2853, "step": 28169 }, { "epoch": 1.3196233662809762, "grad_norm": 0.5544422503957933, "learning_rate": 1.3700030257793991e-06, "loss": 0.2729, "step": 28170 }, { "epoch": 1.3196702112709047, "grad_norm": 0.6018716327908469, "learning_rate": 1.3698338575797164e-06, "loss": 0.2739, "step": 28171 }, { "epoch": 1.319717056260833, "grad_norm": 0.5883231372286171, "learning_rate": 1.3696646958838562e-06, "loss": 0.2759, "step": 28172 }, { "epoch": 1.3197639012507612, "grad_norm": 0.5829880015351822, "learning_rate": 1.3694955406927907e-06, "loss": 0.2642, "step": 28173 }, { "epoch": 1.3198107462406896, "grad_norm": 0.602213125033144, "learning_rate": 1.3693263920074956e-06, "loss": 0.271, "step": 28174 }, { "epoch": 1.3198575912306179, "grad_norm": 0.6404044923852222, "learning_rate": 1.3691572498289424e-06, "loss": 0.2849, "step": 28175 }, { "epoch": 1.319904436220546, "grad_norm": 0.6282207816282489, "learning_rate": 1.3689881141581055e-06, "loss": 0.2845, "step": 28176 }, { "epoch": 1.3199512812104746, "grad_norm": 0.6283845348502701, "learning_rate": 1.3688189849959566e-06, "loss": 0.285, "step": 28177 }, { "epoch": 1.3199981262004028, "grad_norm": 0.5477559263011711, "learning_rate": 1.3686498623434705e-06, "loss": 0.2609, "step": 28178 }, { "epoch": 1.3200449711903313, "grad_norm": 0.6390418760846309, "learning_rate": 1.3684807462016198e-06, "loss": 0.2999, "step": 28179 }, { "epoch": 1.3200918161802595, "grad_norm": 0.6158650598556513, "learning_rate": 1.3683116365713783e-06, "loss": 0.2843, "step": 28180 }, { "epoch": 1.320138661170188, "grad_norm": 0.6328034154630297, "learning_rate": 1.3681425334537197e-06, "loss": 0.2924, "step": 28181 }, { "epoch": 1.3201855061601162, "grad_norm": 0.5863620634570016, "learning_rate": 1.3679734368496165e-06, "loss": 0.2757, "step": 28182 }, { "epoch": 1.3202323511500444, "grad_norm": 0.6413714309457883, "learning_rate": 1.3678043467600405e-06, "loss": 0.2887, "step": 28183 }, { "epoch": 1.320279196139973, "grad_norm": 0.6001322568586421, "learning_rate": 1.3676352631859658e-06, "loss": 0.2728, "step": 28184 }, { "epoch": 1.3203260411299012, "grad_norm": 0.5992136405280952, "learning_rate": 1.367466186128366e-06, "loss": 0.2726, "step": 28185 }, { "epoch": 1.3203728861198294, "grad_norm": 0.5639505892625671, "learning_rate": 1.3672971155882132e-06, "loss": 0.2783, "step": 28186 }, { "epoch": 1.3204197311097579, "grad_norm": 0.6086090389916315, "learning_rate": 1.3671280515664819e-06, "loss": 0.2781, "step": 28187 }, { "epoch": 1.320466576099686, "grad_norm": 0.6101809081839598, "learning_rate": 1.3669589940641432e-06, "loss": 0.2745, "step": 28188 }, { "epoch": 1.3205134210896143, "grad_norm": 0.6092105082628453, "learning_rate": 1.3667899430821713e-06, "loss": 0.2737, "step": 28189 }, { "epoch": 1.3205602660795428, "grad_norm": 0.5990620060677969, "learning_rate": 1.3666208986215377e-06, "loss": 0.2803, "step": 28190 }, { "epoch": 1.320607111069471, "grad_norm": 0.5715220246854343, "learning_rate": 1.3664518606832158e-06, "loss": 0.2639, "step": 28191 }, { "epoch": 1.3206539560593995, "grad_norm": 0.6241409557255737, "learning_rate": 1.3662828292681787e-06, "loss": 0.269, "step": 28192 }, { "epoch": 1.3207008010493277, "grad_norm": 0.6087619452489427, "learning_rate": 1.3661138043774e-06, "loss": 0.2765, "step": 28193 }, { "epoch": 1.3207476460392562, "grad_norm": 0.5519789874397564, "learning_rate": 1.3659447860118502e-06, "loss": 0.2768, "step": 28194 }, { "epoch": 1.3207944910291844, "grad_norm": 0.6364812931318612, "learning_rate": 1.365775774172503e-06, "loss": 0.275, "step": 28195 }, { "epoch": 1.3208413360191127, "grad_norm": 0.613102636075386, "learning_rate": 1.3656067688603325e-06, "loss": 0.2774, "step": 28196 }, { "epoch": 1.3208881810090412, "grad_norm": 0.6085311423924324, "learning_rate": 1.3654377700763089e-06, "loss": 0.2724, "step": 28197 }, { "epoch": 1.3209350259989694, "grad_norm": 0.6314540676209819, "learning_rate": 1.3652687778214058e-06, "loss": 0.281, "step": 28198 }, { "epoch": 1.3209818709888976, "grad_norm": 0.5958191955145968, "learning_rate": 1.3650997920965964e-06, "loss": 0.2671, "step": 28199 }, { "epoch": 1.321028715978826, "grad_norm": 0.6438078855120829, "learning_rate": 1.3649308129028516e-06, "loss": 0.2944, "step": 28200 }, { "epoch": 1.3210755609687543, "grad_norm": 0.5877879114399911, "learning_rate": 1.3647618402411445e-06, "loss": 0.2648, "step": 28201 }, { "epoch": 1.3211224059586828, "grad_norm": 0.6150208837577739, "learning_rate": 1.3645928741124479e-06, "loss": 0.2836, "step": 28202 }, { "epoch": 1.321169250948611, "grad_norm": 0.605461219472512, "learning_rate": 1.3644239145177349e-06, "loss": 0.2813, "step": 28203 }, { "epoch": 1.3212160959385395, "grad_norm": 0.6051801003534516, "learning_rate": 1.3642549614579759e-06, "loss": 0.2911, "step": 28204 }, { "epoch": 1.3212629409284677, "grad_norm": 0.6662776367507769, "learning_rate": 1.364086014934145e-06, "loss": 0.3002, "step": 28205 }, { "epoch": 1.321309785918396, "grad_norm": 0.5680447977216004, "learning_rate": 1.3639170749472125e-06, "loss": 0.2636, "step": 28206 }, { "epoch": 1.3213566309083244, "grad_norm": 0.5834502968848536, "learning_rate": 1.3637481414981517e-06, "loss": 0.2734, "step": 28207 }, { "epoch": 1.3214034758982527, "grad_norm": 0.5932076016039666, "learning_rate": 1.363579214587935e-06, "loss": 0.2716, "step": 28208 }, { "epoch": 1.321450320888181, "grad_norm": 0.5807525104116443, "learning_rate": 1.3634102942175342e-06, "loss": 0.2539, "step": 28209 }, { "epoch": 1.3214971658781094, "grad_norm": 0.5841692355543883, "learning_rate": 1.3632413803879223e-06, "loss": 0.2732, "step": 28210 }, { "epoch": 1.3215440108680376, "grad_norm": 0.5859192128743274, "learning_rate": 1.3630724731000711e-06, "loss": 0.2707, "step": 28211 }, { "epoch": 1.3215908558579659, "grad_norm": 0.6163276374282829, "learning_rate": 1.3629035723549506e-06, "loss": 0.2737, "step": 28212 }, { "epoch": 1.3216377008478943, "grad_norm": 0.6096329209702422, "learning_rate": 1.3627346781535344e-06, "loss": 0.2734, "step": 28213 }, { "epoch": 1.3216845458378226, "grad_norm": 0.639705901188742, "learning_rate": 1.3625657904967942e-06, "loss": 0.2819, "step": 28214 }, { "epoch": 1.321731390827751, "grad_norm": 0.569716826079041, "learning_rate": 1.362396909385702e-06, "loss": 0.258, "step": 28215 }, { "epoch": 1.3217782358176793, "grad_norm": 0.5838549657464646, "learning_rate": 1.3622280348212297e-06, "loss": 0.2626, "step": 28216 }, { "epoch": 1.3218250808076077, "grad_norm": 0.5962161301622027, "learning_rate": 1.3620591668043498e-06, "loss": 0.2847, "step": 28217 }, { "epoch": 1.321871925797536, "grad_norm": 0.5925587646329976, "learning_rate": 1.3618903053360338e-06, "loss": 0.2799, "step": 28218 }, { "epoch": 1.3219187707874642, "grad_norm": 0.604588059411216, "learning_rate": 1.361721450417252e-06, "loss": 0.2903, "step": 28219 }, { "epoch": 1.3219656157773927, "grad_norm": 0.6045254400010929, "learning_rate": 1.3615526020489767e-06, "loss": 0.2758, "step": 28220 }, { "epoch": 1.322012460767321, "grad_norm": 0.5578044461953233, "learning_rate": 1.3613837602321806e-06, "loss": 0.2672, "step": 28221 }, { "epoch": 1.3220593057572492, "grad_norm": 0.5809156367944702, "learning_rate": 1.3612149249678344e-06, "loss": 0.2675, "step": 28222 }, { "epoch": 1.3221061507471776, "grad_norm": 0.5738839758132738, "learning_rate": 1.3610460962569111e-06, "loss": 0.2616, "step": 28223 }, { "epoch": 1.3221529957371059, "grad_norm": 0.6113395553526142, "learning_rate": 1.3608772741003807e-06, "loss": 0.2873, "step": 28224 }, { "epoch": 1.3221998407270341, "grad_norm": 0.6047050641841809, "learning_rate": 1.360708458499216e-06, "loss": 0.2712, "step": 28225 }, { "epoch": 1.3222466857169626, "grad_norm": 0.6013935179405241, "learning_rate": 1.3605396494543865e-06, "loss": 0.2852, "step": 28226 }, { "epoch": 1.3222935307068908, "grad_norm": 0.5939526369457367, "learning_rate": 1.3603708469668653e-06, "loss": 0.2547, "step": 28227 }, { "epoch": 1.3223403756968193, "grad_norm": 0.5983554287068903, "learning_rate": 1.3602020510376231e-06, "loss": 0.2794, "step": 28228 }, { "epoch": 1.3223872206867475, "grad_norm": 0.565900620000808, "learning_rate": 1.3600332616676328e-06, "loss": 0.2623, "step": 28229 }, { "epoch": 1.322434065676676, "grad_norm": 0.559910346331596, "learning_rate": 1.3598644788578635e-06, "loss": 0.2601, "step": 28230 }, { "epoch": 1.3224809106666042, "grad_norm": 0.5687680764595483, "learning_rate": 1.3596957026092888e-06, "loss": 0.2866, "step": 28231 }, { "epoch": 1.3225277556565325, "grad_norm": 0.5927441505491456, "learning_rate": 1.3595269329228777e-06, "loss": 0.2675, "step": 28232 }, { "epoch": 1.322574600646461, "grad_norm": 0.5965273086133357, "learning_rate": 1.3593581697996023e-06, "loss": 0.2711, "step": 28233 }, { "epoch": 1.3226214456363892, "grad_norm": 0.5547041500149873, "learning_rate": 1.3591894132404338e-06, "loss": 0.2626, "step": 28234 }, { "epoch": 1.3226682906263174, "grad_norm": 0.6270183416880603, "learning_rate": 1.3590206632463438e-06, "loss": 0.2875, "step": 28235 }, { "epoch": 1.3227151356162459, "grad_norm": 0.626894979525286, "learning_rate": 1.358851919818304e-06, "loss": 0.2844, "step": 28236 }, { "epoch": 1.3227619806061741, "grad_norm": 0.6011692223696915, "learning_rate": 1.3586831829572834e-06, "loss": 0.2767, "step": 28237 }, { "epoch": 1.3228088255961026, "grad_norm": 0.5862014263106974, "learning_rate": 1.3585144526642554e-06, "loss": 0.2676, "step": 28238 }, { "epoch": 1.3228556705860308, "grad_norm": 0.5908200424001105, "learning_rate": 1.3583457289401888e-06, "loss": 0.2811, "step": 28239 }, { "epoch": 1.3229025155759593, "grad_norm": 0.6024534760347942, "learning_rate": 1.3581770117860554e-06, "loss": 0.2769, "step": 28240 }, { "epoch": 1.3229493605658875, "grad_norm": 0.5573649332355455, "learning_rate": 1.3580083012028266e-06, "loss": 0.2514, "step": 28241 }, { "epoch": 1.3229962055558158, "grad_norm": 0.6017915381637374, "learning_rate": 1.357839597191474e-06, "loss": 0.2905, "step": 28242 }, { "epoch": 1.3230430505457442, "grad_norm": 0.6038420499655871, "learning_rate": 1.3576708997529665e-06, "loss": 0.2745, "step": 28243 }, { "epoch": 1.3230898955356725, "grad_norm": 0.6180199933538196, "learning_rate": 1.3575022088882755e-06, "loss": 0.2795, "step": 28244 }, { "epoch": 1.3231367405256007, "grad_norm": 0.5530317201819236, "learning_rate": 1.3573335245983733e-06, "loss": 0.2459, "step": 28245 }, { "epoch": 1.3231835855155292, "grad_norm": 0.5629959332764147, "learning_rate": 1.3571648468842286e-06, "loss": 0.2639, "step": 28246 }, { "epoch": 1.3232304305054574, "grad_norm": 0.5555109747440206, "learning_rate": 1.3569961757468127e-06, "loss": 0.2728, "step": 28247 }, { "epoch": 1.3232772754953857, "grad_norm": 0.5535070750230027, "learning_rate": 1.3568275111870978e-06, "loss": 0.2611, "step": 28248 }, { "epoch": 1.3233241204853141, "grad_norm": 0.6337814024730835, "learning_rate": 1.356658853206052e-06, "loss": 0.2628, "step": 28249 }, { "epoch": 1.3233709654752424, "grad_norm": 0.6091021849295747, "learning_rate": 1.3564902018046474e-06, "loss": 0.2678, "step": 28250 }, { "epoch": 1.3234178104651708, "grad_norm": 0.5614848269476229, "learning_rate": 1.3563215569838543e-06, "loss": 0.2536, "step": 28251 }, { "epoch": 1.323464655455099, "grad_norm": 0.6138625980656908, "learning_rate": 1.3561529187446437e-06, "loss": 0.2685, "step": 28252 }, { "epoch": 1.3235115004450275, "grad_norm": 0.5858605684187983, "learning_rate": 1.3559842870879852e-06, "loss": 0.2738, "step": 28253 }, { "epoch": 1.3235583454349558, "grad_norm": 0.5609234365746556, "learning_rate": 1.3558156620148502e-06, "loss": 0.261, "step": 28254 }, { "epoch": 1.323605190424884, "grad_norm": 0.6171306049195202, "learning_rate": 1.3556470435262076e-06, "loss": 0.2818, "step": 28255 }, { "epoch": 1.3236520354148125, "grad_norm": 0.6288103226201148, "learning_rate": 1.3554784316230285e-06, "loss": 0.2868, "step": 28256 }, { "epoch": 1.3236988804047407, "grad_norm": 0.6210763396898624, "learning_rate": 1.3553098263062835e-06, "loss": 0.2699, "step": 28257 }, { "epoch": 1.323745725394669, "grad_norm": 0.6044718403272995, "learning_rate": 1.3551412275769426e-06, "loss": 0.2712, "step": 28258 }, { "epoch": 1.3237925703845974, "grad_norm": 0.6194919581935252, "learning_rate": 1.3549726354359775e-06, "loss": 0.2843, "step": 28259 }, { "epoch": 1.3238394153745257, "grad_norm": 0.5692802813983939, "learning_rate": 1.3548040498843567e-06, "loss": 0.2569, "step": 28260 }, { "epoch": 1.323886260364454, "grad_norm": 0.6093060094491209, "learning_rate": 1.3546354709230502e-06, "loss": 0.2859, "step": 28261 }, { "epoch": 1.3239331053543824, "grad_norm": 0.589909054408253, "learning_rate": 1.3544668985530286e-06, "loss": 0.2763, "step": 28262 }, { "epoch": 1.3239799503443106, "grad_norm": 0.5949263634838254, "learning_rate": 1.3542983327752617e-06, "loss": 0.2615, "step": 28263 }, { "epoch": 1.324026795334239, "grad_norm": 0.6267486205982157, "learning_rate": 1.35412977359072e-06, "loss": 0.2944, "step": 28264 }, { "epoch": 1.3240736403241673, "grad_norm": 0.6166331500191592, "learning_rate": 1.3539612210003747e-06, "loss": 0.2726, "step": 28265 }, { "epoch": 1.3241204853140958, "grad_norm": 0.5928721614374225, "learning_rate": 1.3537926750051932e-06, "loss": 0.2777, "step": 28266 }, { "epoch": 1.324167330304024, "grad_norm": 0.6050054910327042, "learning_rate": 1.353624135606148e-06, "loss": 0.281, "step": 28267 }, { "epoch": 1.3242141752939522, "grad_norm": 0.5766472076600303, "learning_rate": 1.3534556028042067e-06, "loss": 0.2669, "step": 28268 }, { "epoch": 1.3242610202838807, "grad_norm": 0.5553177529343367, "learning_rate": 1.3532870766003407e-06, "loss": 0.2668, "step": 28269 }, { "epoch": 1.324307865273809, "grad_norm": 0.6038366505429145, "learning_rate": 1.3531185569955189e-06, "loss": 0.2685, "step": 28270 }, { "epoch": 1.3243547102637372, "grad_norm": 0.5586102237153266, "learning_rate": 1.3529500439907115e-06, "loss": 0.2713, "step": 28271 }, { "epoch": 1.3244015552536657, "grad_norm": 0.6079226047584304, "learning_rate": 1.3527815375868895e-06, "loss": 0.2741, "step": 28272 }, { "epoch": 1.324448400243594, "grad_norm": 0.6371781136816638, "learning_rate": 1.352613037785021e-06, "loss": 0.27, "step": 28273 }, { "epoch": 1.3244952452335224, "grad_norm": 0.6141859408562237, "learning_rate": 1.3524445445860756e-06, "loss": 0.283, "step": 28274 }, { "epoch": 1.3245420902234506, "grad_norm": 0.5847657589093911, "learning_rate": 1.3522760579910233e-06, "loss": 0.2586, "step": 28275 }, { "epoch": 1.324588935213379, "grad_norm": 0.5997534500829009, "learning_rate": 1.3521075780008336e-06, "loss": 0.2761, "step": 28276 }, { "epoch": 1.3246357802033073, "grad_norm": 0.6021194045597836, "learning_rate": 1.3519391046164764e-06, "loss": 0.2663, "step": 28277 }, { "epoch": 1.3246826251932355, "grad_norm": 0.581061768065009, "learning_rate": 1.3517706378389222e-06, "loss": 0.2638, "step": 28278 }, { "epoch": 1.324729470183164, "grad_norm": 0.5659384416981265, "learning_rate": 1.3516021776691383e-06, "loss": 0.2693, "step": 28279 }, { "epoch": 1.3247763151730922, "grad_norm": 0.6547503106979119, "learning_rate": 1.351433724108096e-06, "loss": 0.2989, "step": 28280 }, { "epoch": 1.3248231601630205, "grad_norm": 0.5637670374493259, "learning_rate": 1.351265277156763e-06, "loss": 0.2723, "step": 28281 }, { "epoch": 1.324870005152949, "grad_norm": 0.6035047847438817, "learning_rate": 1.3510968368161097e-06, "loss": 0.274, "step": 28282 }, { "epoch": 1.3249168501428772, "grad_norm": 0.5680860659371458, "learning_rate": 1.3509284030871056e-06, "loss": 0.2621, "step": 28283 }, { "epoch": 1.3249636951328054, "grad_norm": 0.5911010605497911, "learning_rate": 1.3507599759707207e-06, "loss": 0.2771, "step": 28284 }, { "epoch": 1.325010540122734, "grad_norm": 0.601060543714394, "learning_rate": 1.350591555467922e-06, "loss": 0.2761, "step": 28285 }, { "epoch": 1.3250573851126621, "grad_norm": 0.583986010090479, "learning_rate": 1.3504231415796801e-06, "loss": 0.2838, "step": 28286 }, { "epoch": 1.3251042301025906, "grad_norm": 0.583258467140793, "learning_rate": 1.3502547343069649e-06, "loss": 0.28, "step": 28287 }, { "epoch": 1.3251510750925188, "grad_norm": 0.5888401508494769, "learning_rate": 1.3500863336507442e-06, "loss": 0.2707, "step": 28288 }, { "epoch": 1.3251979200824473, "grad_norm": 0.5823026623537698, "learning_rate": 1.3499179396119872e-06, "loss": 0.2762, "step": 28289 }, { "epoch": 1.3252447650723755, "grad_norm": 0.5236284731474834, "learning_rate": 1.3497495521916649e-06, "loss": 0.2453, "step": 28290 }, { "epoch": 1.3252916100623038, "grad_norm": 0.6063397123081052, "learning_rate": 1.349581171390743e-06, "loss": 0.2717, "step": 28291 }, { "epoch": 1.3253384550522322, "grad_norm": 0.5473707209547081, "learning_rate": 1.3494127972101928e-06, "loss": 0.2622, "step": 28292 }, { "epoch": 1.3253853000421605, "grad_norm": 0.6209716930262406, "learning_rate": 1.349244429650983e-06, "loss": 0.2669, "step": 28293 }, { "epoch": 1.3254321450320887, "grad_norm": 0.6221620393557549, "learning_rate": 1.349076068714083e-06, "loss": 0.2623, "step": 28294 }, { "epoch": 1.3254789900220172, "grad_norm": 0.5653616163477861, "learning_rate": 1.3489077144004598e-06, "loss": 0.2784, "step": 28295 }, { "epoch": 1.3255258350119454, "grad_norm": 0.5987234534507457, "learning_rate": 1.3487393667110848e-06, "loss": 0.2713, "step": 28296 }, { "epoch": 1.3255726800018737, "grad_norm": 0.6021297648410928, "learning_rate": 1.3485710256469243e-06, "loss": 0.2657, "step": 28297 }, { "epoch": 1.3256195249918021, "grad_norm": 0.5883478090291459, "learning_rate": 1.3484026912089476e-06, "loss": 0.2603, "step": 28298 }, { "epoch": 1.3256663699817304, "grad_norm": 0.5962132476949683, "learning_rate": 1.3482343633981242e-06, "loss": 0.2684, "step": 28299 }, { "epoch": 1.3257132149716588, "grad_norm": 0.5759754377740465, "learning_rate": 1.3480660422154228e-06, "loss": 0.2544, "step": 28300 }, { "epoch": 1.325760059961587, "grad_norm": 0.6320949461767349, "learning_rate": 1.347897727661813e-06, "loss": 0.2687, "step": 28301 }, { "epoch": 1.3258069049515155, "grad_norm": 0.5780891669825114, "learning_rate": 1.3477294197382607e-06, "loss": 0.2704, "step": 28302 }, { "epoch": 1.3258537499414438, "grad_norm": 0.6163047720637848, "learning_rate": 1.3475611184457373e-06, "loss": 0.286, "step": 28303 }, { "epoch": 1.325900594931372, "grad_norm": 0.5762841833586235, "learning_rate": 1.347392823785209e-06, "loss": 0.2787, "step": 28304 }, { "epoch": 1.3259474399213005, "grad_norm": 0.5991528152683122, "learning_rate": 1.3472245357576453e-06, "loss": 0.2615, "step": 28305 }, { "epoch": 1.3259942849112287, "grad_norm": 0.6270672988056935, "learning_rate": 1.3470562543640144e-06, "loss": 0.2813, "step": 28306 }, { "epoch": 1.326041129901157, "grad_norm": 0.5857480222172243, "learning_rate": 1.3468879796052852e-06, "loss": 0.2741, "step": 28307 }, { "epoch": 1.3260879748910854, "grad_norm": 0.6039199787934633, "learning_rate": 1.3467197114824268e-06, "loss": 0.2696, "step": 28308 }, { "epoch": 1.3261348198810137, "grad_norm": 0.5446545726076247, "learning_rate": 1.3465514499964067e-06, "loss": 0.2709, "step": 28309 }, { "epoch": 1.3261816648709421, "grad_norm": 0.5609904252661001, "learning_rate": 1.346383195148192e-06, "loss": 0.2682, "step": 28310 }, { "epoch": 1.3262285098608704, "grad_norm": 0.5692205916669469, "learning_rate": 1.346214946938752e-06, "loss": 0.2789, "step": 28311 }, { "epoch": 1.3262753548507988, "grad_norm": 0.6089257264499772, "learning_rate": 1.346046705369055e-06, "loss": 0.2881, "step": 28312 }, { "epoch": 1.326322199840727, "grad_norm": 0.5588051631061005, "learning_rate": 1.3458784704400691e-06, "loss": 0.2507, "step": 28313 }, { "epoch": 1.3263690448306553, "grad_norm": 0.6066957987450797, "learning_rate": 1.3457102421527638e-06, "loss": 0.271, "step": 28314 }, { "epoch": 1.3264158898205838, "grad_norm": 0.5810181171049464, "learning_rate": 1.3455420205081048e-06, "loss": 0.272, "step": 28315 }, { "epoch": 1.326462734810512, "grad_norm": 0.5907635117948576, "learning_rate": 1.345373805507062e-06, "loss": 0.2874, "step": 28316 }, { "epoch": 1.3265095798004403, "grad_norm": 0.5727199947625172, "learning_rate": 1.3452055971506024e-06, "loss": 0.2687, "step": 28317 }, { "epoch": 1.3265564247903687, "grad_norm": 0.6103782883379072, "learning_rate": 1.345037395439694e-06, "loss": 0.2826, "step": 28318 }, { "epoch": 1.326603269780297, "grad_norm": 0.5969020778957811, "learning_rate": 1.344869200375305e-06, "loss": 0.2675, "step": 28319 }, { "epoch": 1.3266501147702252, "grad_norm": 0.617646343614558, "learning_rate": 1.3447010119584034e-06, "loss": 0.2648, "step": 28320 }, { "epoch": 1.3266969597601537, "grad_norm": 0.6310643078631222, "learning_rate": 1.344532830189958e-06, "loss": 0.2819, "step": 28321 }, { "epoch": 1.326743804750082, "grad_norm": 0.625110914852347, "learning_rate": 1.344364655070936e-06, "loss": 0.2811, "step": 28322 }, { "epoch": 1.3267906497400104, "grad_norm": 0.5688378127015175, "learning_rate": 1.3441964866023039e-06, "loss": 0.2707, "step": 28323 }, { "epoch": 1.3268374947299386, "grad_norm": 0.6272786527715585, "learning_rate": 1.3440283247850306e-06, "loss": 0.2816, "step": 28324 }, { "epoch": 1.326884339719867, "grad_norm": 0.553080116592239, "learning_rate": 1.3438601696200836e-06, "loss": 0.2573, "step": 28325 }, { "epoch": 1.3269311847097953, "grad_norm": 0.5601144062580493, "learning_rate": 1.343692021108431e-06, "loss": 0.2552, "step": 28326 }, { "epoch": 1.3269780296997236, "grad_norm": 0.5954270804354056, "learning_rate": 1.3435238792510407e-06, "loss": 0.254, "step": 28327 }, { "epoch": 1.327024874689652, "grad_norm": 0.5655980725464981, "learning_rate": 1.3433557440488792e-06, "loss": 0.2534, "step": 28328 }, { "epoch": 1.3270717196795803, "grad_norm": 0.5819665508205506, "learning_rate": 1.3431876155029156e-06, "loss": 0.2559, "step": 28329 }, { "epoch": 1.3271185646695085, "grad_norm": 0.5908347079621403, "learning_rate": 1.3430194936141156e-06, "loss": 0.2727, "step": 28330 }, { "epoch": 1.327165409659437, "grad_norm": 0.6551648835913432, "learning_rate": 1.3428513783834473e-06, "loss": 0.2898, "step": 28331 }, { "epoch": 1.3272122546493652, "grad_norm": 0.5976409128026505, "learning_rate": 1.3426832698118788e-06, "loss": 0.2788, "step": 28332 }, { "epoch": 1.3272590996392934, "grad_norm": 0.5972962777716481, "learning_rate": 1.3425151679003782e-06, "loss": 0.2514, "step": 28333 }, { "epoch": 1.327305944629222, "grad_norm": 0.587229334816796, "learning_rate": 1.3423470726499105e-06, "loss": 0.2715, "step": 28334 }, { "epoch": 1.3273527896191502, "grad_norm": 0.5880475360130296, "learning_rate": 1.3421789840614449e-06, "loss": 0.2766, "step": 28335 }, { "epoch": 1.3273996346090786, "grad_norm": 0.5662024238203703, "learning_rate": 1.342010902135949e-06, "loss": 0.2577, "step": 28336 }, { "epoch": 1.3274464795990069, "grad_norm": 0.6080324102518658, "learning_rate": 1.3418428268743882e-06, "loss": 0.2728, "step": 28337 }, { "epoch": 1.3274933245889353, "grad_norm": 0.5811507936089996, "learning_rate": 1.3416747582777312e-06, "loss": 0.2702, "step": 28338 }, { "epoch": 1.3275401695788636, "grad_norm": 0.5625129458417655, "learning_rate": 1.3415066963469457e-06, "loss": 0.2833, "step": 28339 }, { "epoch": 1.3275870145687918, "grad_norm": 0.718563786159953, "learning_rate": 1.341338641082997e-06, "loss": 0.2858, "step": 28340 }, { "epoch": 1.3276338595587203, "grad_norm": 0.5904834061552554, "learning_rate": 1.341170592486853e-06, "loss": 0.2779, "step": 28341 }, { "epoch": 1.3276807045486485, "grad_norm": 0.5822939240665683, "learning_rate": 1.3410025505594808e-06, "loss": 0.2759, "step": 28342 }, { "epoch": 1.3277275495385767, "grad_norm": 0.6686182170689089, "learning_rate": 1.3408345153018492e-06, "loss": 0.2892, "step": 28343 }, { "epoch": 1.3277743945285052, "grad_norm": 0.5856212517994281, "learning_rate": 1.3406664867149222e-06, "loss": 0.2743, "step": 28344 }, { "epoch": 1.3278212395184334, "grad_norm": 0.5739123632123017, "learning_rate": 1.3404984647996695e-06, "loss": 0.2742, "step": 28345 }, { "epoch": 1.327868084508362, "grad_norm": 0.5985895347351783, "learning_rate": 1.3403304495570554e-06, "loss": 0.2715, "step": 28346 }, { "epoch": 1.3279149294982902, "grad_norm": 0.6196207231793786, "learning_rate": 1.340162440988048e-06, "loss": 0.2816, "step": 28347 }, { "epoch": 1.3279617744882186, "grad_norm": 0.5422873895268039, "learning_rate": 1.3399944390936149e-06, "loss": 0.2628, "step": 28348 }, { "epoch": 1.3280086194781469, "grad_norm": 0.6018272340360176, "learning_rate": 1.3398264438747216e-06, "loss": 0.2847, "step": 28349 }, { "epoch": 1.328055464468075, "grad_norm": 0.593953228301944, "learning_rate": 1.3396584553323366e-06, "loss": 0.2673, "step": 28350 }, { "epoch": 1.3281023094580036, "grad_norm": 0.5648372331162772, "learning_rate": 1.3394904734674257e-06, "loss": 0.2597, "step": 28351 }, { "epoch": 1.3281491544479318, "grad_norm": 0.5760804090797528, "learning_rate": 1.3393224982809544e-06, "loss": 0.2687, "step": 28352 }, { "epoch": 1.32819599943786, "grad_norm": 0.5839176983752905, "learning_rate": 1.3391545297738905e-06, "loss": 0.2598, "step": 28353 }, { "epoch": 1.3282428444277885, "grad_norm": 0.5564016313855287, "learning_rate": 1.3389865679472002e-06, "loss": 0.2597, "step": 28354 }, { "epoch": 1.3282896894177167, "grad_norm": 0.5649535688381413, "learning_rate": 1.338818612801851e-06, "loss": 0.2732, "step": 28355 }, { "epoch": 1.328336534407645, "grad_norm": 0.6039116924737304, "learning_rate": 1.3386506643388083e-06, "loss": 0.2765, "step": 28356 }, { "epoch": 1.3283833793975734, "grad_norm": 0.5763202552905098, "learning_rate": 1.3384827225590402e-06, "loss": 0.2546, "step": 28357 }, { "epoch": 1.3284302243875017, "grad_norm": 0.5770190081424669, "learning_rate": 1.3383147874635122e-06, "loss": 0.2663, "step": 28358 }, { "epoch": 1.3284770693774302, "grad_norm": 0.6398950957396529, "learning_rate": 1.3381468590531896e-06, "loss": 0.2619, "step": 28359 }, { "epoch": 1.3285239143673584, "grad_norm": 0.535563111207199, "learning_rate": 1.3379789373290398e-06, "loss": 0.2635, "step": 28360 }, { "epoch": 1.3285707593572869, "grad_norm": 0.6024354193858443, "learning_rate": 1.3378110222920293e-06, "loss": 0.26, "step": 28361 }, { "epoch": 1.328617604347215, "grad_norm": 0.5974995053013274, "learning_rate": 1.3376431139431239e-06, "loss": 0.2779, "step": 28362 }, { "epoch": 1.3286644493371433, "grad_norm": 0.5835572188634143, "learning_rate": 1.3374752122832914e-06, "loss": 0.2763, "step": 28363 }, { "epoch": 1.3287112943270718, "grad_norm": 0.606158213913463, "learning_rate": 1.3373073173134959e-06, "loss": 0.2627, "step": 28364 }, { "epoch": 1.328758139317, "grad_norm": 0.594641452244491, "learning_rate": 1.3371394290347055e-06, "loss": 0.276, "step": 28365 }, { "epoch": 1.3288049843069283, "grad_norm": 0.5885777007397003, "learning_rate": 1.3369715474478844e-06, "loss": 0.2686, "step": 28366 }, { "epoch": 1.3288518292968567, "grad_norm": 0.5824830061959972, "learning_rate": 1.336803672554e-06, "loss": 0.2789, "step": 28367 }, { "epoch": 1.328898674286785, "grad_norm": 0.5806119349916721, "learning_rate": 1.3366358043540177e-06, "loss": 0.2478, "step": 28368 }, { "epoch": 1.3289455192767132, "grad_norm": 0.6096558753244259, "learning_rate": 1.3364679428489045e-06, "loss": 0.2606, "step": 28369 }, { "epoch": 1.3289923642666417, "grad_norm": 0.5964876323866696, "learning_rate": 1.3363000880396264e-06, "loss": 0.2821, "step": 28370 }, { "epoch": 1.32903920925657, "grad_norm": 0.5940422511144603, "learning_rate": 1.3361322399271489e-06, "loss": 0.2844, "step": 28371 }, { "epoch": 1.3290860542464984, "grad_norm": 0.6231038273629104, "learning_rate": 1.3359643985124367e-06, "loss": 0.2855, "step": 28372 }, { "epoch": 1.3291328992364266, "grad_norm": 0.5831316605552583, "learning_rate": 1.3357965637964571e-06, "loss": 0.2608, "step": 28373 }, { "epoch": 1.329179744226355, "grad_norm": 0.6190026980526552, "learning_rate": 1.3356287357801757e-06, "loss": 0.2892, "step": 28374 }, { "epoch": 1.3292265892162833, "grad_norm": 0.5877320672168799, "learning_rate": 1.335460914464558e-06, "loss": 0.2727, "step": 28375 }, { "epoch": 1.3292734342062116, "grad_norm": 0.6119720448889744, "learning_rate": 1.3352930998505708e-06, "loss": 0.2804, "step": 28376 }, { "epoch": 1.32932027919614, "grad_norm": 0.61114155346204, "learning_rate": 1.3351252919391788e-06, "loss": 0.2772, "step": 28377 }, { "epoch": 1.3293671241860683, "grad_norm": 1.192232709029196, "learning_rate": 1.3349574907313485e-06, "loss": 0.2898, "step": 28378 }, { "epoch": 1.3294139691759965, "grad_norm": 0.5623503687926058, "learning_rate": 1.3347896962280442e-06, "loss": 0.2596, "step": 28379 }, { "epoch": 1.329460814165925, "grad_norm": 0.6236129159744546, "learning_rate": 1.334621908430232e-06, "loss": 0.2822, "step": 28380 }, { "epoch": 1.3295076591558532, "grad_norm": 0.5868997356490927, "learning_rate": 1.334454127338878e-06, "loss": 0.2786, "step": 28381 }, { "epoch": 1.3295545041457817, "grad_norm": 0.5761300830939942, "learning_rate": 1.3342863529549489e-06, "loss": 0.2643, "step": 28382 }, { "epoch": 1.32960134913571, "grad_norm": 0.6318197102828178, "learning_rate": 1.3341185852794072e-06, "loss": 0.2671, "step": 28383 }, { "epoch": 1.3296481941256384, "grad_norm": 0.5643218719614893, "learning_rate": 1.3339508243132205e-06, "loss": 0.2637, "step": 28384 }, { "epoch": 1.3296950391155666, "grad_norm": 0.581214243419309, "learning_rate": 1.3337830700573545e-06, "loss": 0.2691, "step": 28385 }, { "epoch": 1.3297418841054949, "grad_norm": 0.6067017518217221, "learning_rate": 1.333615322512773e-06, "loss": 0.2861, "step": 28386 }, { "epoch": 1.3297887290954233, "grad_norm": 0.5914092379940252, "learning_rate": 1.333447581680442e-06, "loss": 0.2805, "step": 28387 }, { "epoch": 1.3298355740853516, "grad_norm": 0.6238083983027067, "learning_rate": 1.333279847561328e-06, "loss": 0.2744, "step": 28388 }, { "epoch": 1.3298824190752798, "grad_norm": 0.57488031806806, "learning_rate": 1.333112120156394e-06, "loss": 0.2724, "step": 28389 }, { "epoch": 1.3299292640652083, "grad_norm": 0.5878188071099185, "learning_rate": 1.332944399466607e-06, "loss": 0.2587, "step": 28390 }, { "epoch": 1.3299761090551365, "grad_norm": 0.5934780505478645, "learning_rate": 1.3327766854929315e-06, "loss": 0.2772, "step": 28391 }, { "epoch": 1.3300229540450648, "grad_norm": 0.6088521833240879, "learning_rate": 1.3326089782363333e-06, "loss": 0.2641, "step": 28392 }, { "epoch": 1.3300697990349932, "grad_norm": 0.5770780585442296, "learning_rate": 1.3324412776977763e-06, "loss": 0.2617, "step": 28393 }, { "epoch": 1.3301166440249215, "grad_norm": 0.5873287647058122, "learning_rate": 1.3322735838782274e-06, "loss": 0.2873, "step": 28394 }, { "epoch": 1.33016348901485, "grad_norm": 0.597128884485493, "learning_rate": 1.3321058967786496e-06, "loss": 0.2725, "step": 28395 }, { "epoch": 1.3302103340047782, "grad_norm": 0.6429457121599484, "learning_rate": 1.3319382164000088e-06, "loss": 0.2826, "step": 28396 }, { "epoch": 1.3302571789947066, "grad_norm": 0.6279873249099125, "learning_rate": 1.3317705427432697e-06, "loss": 0.2776, "step": 28397 }, { "epoch": 1.3303040239846349, "grad_norm": 0.6402525483551352, "learning_rate": 1.331602875809398e-06, "loss": 0.2801, "step": 28398 }, { "epoch": 1.3303508689745631, "grad_norm": 0.6192967184692235, "learning_rate": 1.3314352155993586e-06, "loss": 0.2858, "step": 28399 }, { "epoch": 1.3303977139644916, "grad_norm": 0.6166344258664112, "learning_rate": 1.331267562114116e-06, "loss": 0.2784, "step": 28400 }, { "epoch": 1.3304445589544198, "grad_norm": 0.6074946742050646, "learning_rate": 1.331099915354634e-06, "loss": 0.2684, "step": 28401 }, { "epoch": 1.330491403944348, "grad_norm": 0.588902755408647, "learning_rate": 1.330932275321878e-06, "loss": 0.2825, "step": 28402 }, { "epoch": 1.3305382489342765, "grad_norm": 0.6486677937518438, "learning_rate": 1.330764642016813e-06, "loss": 0.2871, "step": 28403 }, { "epoch": 1.3305850939242048, "grad_norm": 0.5885098320906103, "learning_rate": 1.3305970154404034e-06, "loss": 0.2795, "step": 28404 }, { "epoch": 1.330631938914133, "grad_norm": 0.6312974393504662, "learning_rate": 1.3304293955936153e-06, "loss": 0.2827, "step": 28405 }, { "epoch": 1.3306787839040615, "grad_norm": 0.6123352736236322, "learning_rate": 1.3302617824774111e-06, "loss": 0.2688, "step": 28406 }, { "epoch": 1.3307256288939897, "grad_norm": 0.5824109114387676, "learning_rate": 1.3300941760927571e-06, "loss": 0.269, "step": 28407 }, { "epoch": 1.3307724738839182, "grad_norm": 0.5758430727062354, "learning_rate": 1.329926576440616e-06, "loss": 0.2714, "step": 28408 }, { "epoch": 1.3308193188738464, "grad_norm": 0.5474064894316909, "learning_rate": 1.3297589835219537e-06, "loss": 0.2609, "step": 28409 }, { "epoch": 1.3308661638637749, "grad_norm": 0.5677727566731151, "learning_rate": 1.3295913973377345e-06, "loss": 0.2565, "step": 28410 }, { "epoch": 1.3309130088537031, "grad_norm": 0.6108260500587, "learning_rate": 1.3294238178889224e-06, "loss": 0.2708, "step": 28411 }, { "epoch": 1.3309598538436314, "grad_norm": 0.575596948966037, "learning_rate": 1.3292562451764828e-06, "loss": 0.2647, "step": 28412 }, { "epoch": 1.3310066988335598, "grad_norm": 0.5952752876457849, "learning_rate": 1.3290886792013794e-06, "loss": 0.2601, "step": 28413 }, { "epoch": 1.331053543823488, "grad_norm": 0.5408990980903959, "learning_rate": 1.3289211199645752e-06, "loss": 0.2527, "step": 28414 }, { "epoch": 1.3311003888134163, "grad_norm": 0.5914922693740994, "learning_rate": 1.328753567467036e-06, "loss": 0.2649, "step": 28415 }, { "epoch": 1.3311472338033448, "grad_norm": 0.592869781186407, "learning_rate": 1.3285860217097252e-06, "loss": 0.2717, "step": 28416 }, { "epoch": 1.331194078793273, "grad_norm": 0.5887305287722505, "learning_rate": 1.3284184826936075e-06, "loss": 0.2737, "step": 28417 }, { "epoch": 1.3312409237832015, "grad_norm": 0.615658887583836, "learning_rate": 1.3282509504196483e-06, "loss": 0.2622, "step": 28418 }, { "epoch": 1.3312877687731297, "grad_norm": 0.6058890699747861, "learning_rate": 1.328083424888809e-06, "loss": 0.2749, "step": 28419 }, { "epoch": 1.3313346137630582, "grad_norm": 0.6201899007561436, "learning_rate": 1.3279159061020564e-06, "loss": 0.278, "step": 28420 }, { "epoch": 1.3313814587529864, "grad_norm": 0.6208118652686149, "learning_rate": 1.327748394060352e-06, "loss": 0.2885, "step": 28421 }, { "epoch": 1.3314283037429147, "grad_norm": 0.6129823254016976, "learning_rate": 1.3275808887646608e-06, "loss": 0.2778, "step": 28422 }, { "epoch": 1.3314751487328431, "grad_norm": 0.5891079855337172, "learning_rate": 1.3274133902159469e-06, "loss": 0.2715, "step": 28423 }, { "epoch": 1.3315219937227714, "grad_norm": 0.5778613500859596, "learning_rate": 1.3272458984151753e-06, "loss": 0.265, "step": 28424 }, { "epoch": 1.3315688387126996, "grad_norm": 0.5824745786295833, "learning_rate": 1.327078413363308e-06, "loss": 0.2913, "step": 28425 }, { "epoch": 1.331615683702628, "grad_norm": 0.5954012686645916, "learning_rate": 1.3269109350613096e-06, "loss": 0.2917, "step": 28426 }, { "epoch": 1.3316625286925563, "grad_norm": 0.6133775217667105, "learning_rate": 1.3267434635101448e-06, "loss": 0.2667, "step": 28427 }, { "epoch": 1.3317093736824845, "grad_norm": 0.5708138509799244, "learning_rate": 1.3265759987107757e-06, "loss": 0.2639, "step": 28428 }, { "epoch": 1.331756218672413, "grad_norm": 0.6007160251586555, "learning_rate": 1.3264085406641663e-06, "loss": 0.2725, "step": 28429 }, { "epoch": 1.3318030636623412, "grad_norm": 0.5980351924712788, "learning_rate": 1.326241089371282e-06, "loss": 0.2642, "step": 28430 }, { "epoch": 1.3318499086522697, "grad_norm": 0.5965978080839796, "learning_rate": 1.3260736448330846e-06, "loss": 0.2635, "step": 28431 }, { "epoch": 1.331896753642198, "grad_norm": 0.6243854133355002, "learning_rate": 1.325906207050538e-06, "loss": 0.2671, "step": 28432 }, { "epoch": 1.3319435986321264, "grad_norm": 0.6112709824558508, "learning_rate": 1.325738776024606e-06, "loss": 0.2797, "step": 28433 }, { "epoch": 1.3319904436220547, "grad_norm": 0.5767941302665753, "learning_rate": 1.3255713517562533e-06, "loss": 0.2554, "step": 28434 }, { "epoch": 1.332037288611983, "grad_norm": 0.5906630377599639, "learning_rate": 1.3254039342464412e-06, "loss": 0.2712, "step": 28435 }, { "epoch": 1.3320841336019114, "grad_norm": 0.7031974145868016, "learning_rate": 1.3252365234961346e-06, "loss": 0.3159, "step": 28436 }, { "epoch": 1.3321309785918396, "grad_norm": 0.6001611570660328, "learning_rate": 1.325069119506297e-06, "loss": 0.2834, "step": 28437 }, { "epoch": 1.3321778235817678, "grad_norm": 0.5582595148143771, "learning_rate": 1.3249017222778905e-06, "loss": 0.2682, "step": 28438 }, { "epoch": 1.3322246685716963, "grad_norm": 0.6263958376125817, "learning_rate": 1.3247343318118793e-06, "loss": 0.267, "step": 28439 }, { "epoch": 1.3322715135616245, "grad_norm": 0.6202806387608711, "learning_rate": 1.3245669481092265e-06, "loss": 0.2811, "step": 28440 }, { "epoch": 1.3323183585515528, "grad_norm": 0.622498948074656, "learning_rate": 1.3243995711708967e-06, "loss": 0.2773, "step": 28441 }, { "epoch": 1.3323652035414812, "grad_norm": 0.6014171440650072, "learning_rate": 1.3242322009978508e-06, "loss": 0.2826, "step": 28442 }, { "epoch": 1.3324120485314095, "grad_norm": 0.5726154501990058, "learning_rate": 1.3240648375910543e-06, "loss": 0.2587, "step": 28443 }, { "epoch": 1.332458893521338, "grad_norm": 0.5658384454402456, "learning_rate": 1.3238974809514676e-06, "loss": 0.2641, "step": 28444 }, { "epoch": 1.3325057385112662, "grad_norm": 0.570355913671634, "learning_rate": 1.3237301310800557e-06, "loss": 0.2795, "step": 28445 }, { "epoch": 1.3325525835011947, "grad_norm": 0.5574035152529503, "learning_rate": 1.3235627879777813e-06, "loss": 0.2722, "step": 28446 }, { "epoch": 1.332599428491123, "grad_norm": 0.6095789060076743, "learning_rate": 1.323395451645607e-06, "loss": 0.2529, "step": 28447 }, { "epoch": 1.3326462734810511, "grad_norm": 0.5893779648656139, "learning_rate": 1.3232281220844973e-06, "loss": 0.2773, "step": 28448 }, { "epoch": 1.3326931184709796, "grad_norm": 0.5987606878684985, "learning_rate": 1.3230607992954138e-06, "loss": 0.2797, "step": 28449 }, { "epoch": 1.3327399634609078, "grad_norm": 0.6366397353299036, "learning_rate": 1.322893483279319e-06, "loss": 0.2907, "step": 28450 }, { "epoch": 1.332786808450836, "grad_norm": 0.622621795098955, "learning_rate": 1.322726174037176e-06, "loss": 0.2839, "step": 28451 }, { "epoch": 1.3328336534407645, "grad_norm": 0.5976877716402381, "learning_rate": 1.3225588715699484e-06, "loss": 0.2764, "step": 28452 }, { "epoch": 1.3328804984306928, "grad_norm": 0.5933820044732895, "learning_rate": 1.3223915758785981e-06, "loss": 0.2568, "step": 28453 }, { "epoch": 1.3329273434206212, "grad_norm": 0.5925187675292292, "learning_rate": 1.3222242869640895e-06, "loss": 0.2684, "step": 28454 }, { "epoch": 1.3329741884105495, "grad_norm": 0.622466227359238, "learning_rate": 1.3220570048273832e-06, "loss": 0.2865, "step": 28455 }, { "epoch": 1.333021033400478, "grad_norm": 0.5887438923827113, "learning_rate": 1.3218897294694436e-06, "loss": 0.2602, "step": 28456 }, { "epoch": 1.3330678783904062, "grad_norm": 0.5662280404629817, "learning_rate": 1.3217224608912315e-06, "loss": 0.2674, "step": 28457 }, { "epoch": 1.3331147233803344, "grad_norm": 0.6368927525548533, "learning_rate": 1.3215551990937106e-06, "loss": 0.295, "step": 28458 }, { "epoch": 1.333161568370263, "grad_norm": 0.6027684760602515, "learning_rate": 1.3213879440778432e-06, "loss": 0.2646, "step": 28459 }, { "epoch": 1.3332084133601911, "grad_norm": 0.619980762025483, "learning_rate": 1.3212206958445922e-06, "loss": 0.282, "step": 28460 }, { "epoch": 1.3332552583501194, "grad_norm": 0.6038911045233761, "learning_rate": 1.3210534543949205e-06, "loss": 0.2862, "step": 28461 }, { "epoch": 1.3333021033400478, "grad_norm": 0.5672210469270988, "learning_rate": 1.32088621972979e-06, "loss": 0.2698, "step": 28462 }, { "epoch": 1.333348948329976, "grad_norm": 0.6172270516736384, "learning_rate": 1.320718991850162e-06, "loss": 0.2864, "step": 28463 }, { "epoch": 1.3333957933199043, "grad_norm": 0.5462396706081376, "learning_rate": 1.3205517707569998e-06, "loss": 0.2595, "step": 28464 }, { "epoch": 1.3334426383098328, "grad_norm": 0.6294875131930353, "learning_rate": 1.3203845564512657e-06, "loss": 0.2999, "step": 28465 }, { "epoch": 1.333489483299761, "grad_norm": 0.5720180448782184, "learning_rate": 1.3202173489339221e-06, "loss": 0.2682, "step": 28466 }, { "epoch": 1.3335363282896895, "grad_norm": 0.5773709870097409, "learning_rate": 1.320050148205932e-06, "loss": 0.2739, "step": 28467 }, { "epoch": 1.3335831732796177, "grad_norm": 0.6173365038492703, "learning_rate": 1.3198829542682556e-06, "loss": 0.2778, "step": 28468 }, { "epoch": 1.3336300182695462, "grad_norm": 0.5661354522718082, "learning_rate": 1.319715767121857e-06, "loss": 0.2503, "step": 28469 }, { "epoch": 1.3336768632594744, "grad_norm": 0.5811062518041166, "learning_rate": 1.3195485867676972e-06, "loss": 0.271, "step": 28470 }, { "epoch": 1.3337237082494027, "grad_norm": 0.6032390089929867, "learning_rate": 1.319381413206738e-06, "loss": 0.2766, "step": 28471 }, { "epoch": 1.3337705532393311, "grad_norm": 0.5782505649855482, "learning_rate": 1.3192142464399422e-06, "loss": 0.2604, "step": 28472 }, { "epoch": 1.3338173982292594, "grad_norm": 0.5920103826360275, "learning_rate": 1.3190470864682725e-06, "loss": 0.2845, "step": 28473 }, { "epoch": 1.3338642432191876, "grad_norm": 0.6549070444629768, "learning_rate": 1.3188799332926894e-06, "loss": 0.295, "step": 28474 }, { "epoch": 1.333911088209116, "grad_norm": 0.5927330106960862, "learning_rate": 1.3187127869141552e-06, "loss": 0.2701, "step": 28475 }, { "epoch": 1.3339579331990443, "grad_norm": 0.6150925421064309, "learning_rate": 1.3185456473336328e-06, "loss": 0.275, "step": 28476 }, { "epoch": 1.3340047781889726, "grad_norm": 0.6020852577649353, "learning_rate": 1.3183785145520825e-06, "loss": 0.2575, "step": 28477 }, { "epoch": 1.334051623178901, "grad_norm": 0.6288863833150048, "learning_rate": 1.3182113885704667e-06, "loss": 0.2892, "step": 28478 }, { "epoch": 1.3340984681688293, "grad_norm": 0.5947856777504655, "learning_rate": 1.3180442693897485e-06, "loss": 0.2681, "step": 28479 }, { "epoch": 1.3341453131587577, "grad_norm": 0.6328528196412677, "learning_rate": 1.3178771570108873e-06, "loss": 0.2656, "step": 28480 }, { "epoch": 1.334192158148686, "grad_norm": 0.5546460242942153, "learning_rate": 1.3177100514348462e-06, "loss": 0.2656, "step": 28481 }, { "epoch": 1.3342390031386144, "grad_norm": 0.5914339205770228, "learning_rate": 1.3175429526625865e-06, "loss": 0.2909, "step": 28482 }, { "epoch": 1.3342858481285427, "grad_norm": 0.5707230191392056, "learning_rate": 1.3173758606950708e-06, "loss": 0.2503, "step": 28483 }, { "epoch": 1.334332693118471, "grad_norm": 0.6037859482119269, "learning_rate": 1.3172087755332585e-06, "loss": 0.2884, "step": 28484 }, { "epoch": 1.3343795381083994, "grad_norm": 0.6142368828067202, "learning_rate": 1.3170416971781139e-06, "loss": 0.2753, "step": 28485 }, { "epoch": 1.3344263830983276, "grad_norm": 0.5827372975592473, "learning_rate": 1.316874625630596e-06, "loss": 0.275, "step": 28486 }, { "epoch": 1.3344732280882559, "grad_norm": 0.6091152795081788, "learning_rate": 1.3167075608916669e-06, "loss": 0.2715, "step": 28487 }, { "epoch": 1.3345200730781843, "grad_norm": 0.5915780094230337, "learning_rate": 1.3165405029622885e-06, "loss": 0.2776, "step": 28488 }, { "epoch": 1.3345669180681126, "grad_norm": 0.6035153853968268, "learning_rate": 1.3163734518434224e-06, "loss": 0.2737, "step": 28489 }, { "epoch": 1.334613763058041, "grad_norm": 0.6802318278428194, "learning_rate": 1.3162064075360304e-06, "loss": 0.2899, "step": 28490 }, { "epoch": 1.3346606080479693, "grad_norm": 0.6151710569040615, "learning_rate": 1.316039370041073e-06, "loss": 0.2911, "step": 28491 }, { "epoch": 1.3347074530378977, "grad_norm": 0.612490517051796, "learning_rate": 1.3158723393595101e-06, "loss": 0.2788, "step": 28492 }, { "epoch": 1.334754298027826, "grad_norm": 0.5555675509656343, "learning_rate": 1.3157053154923043e-06, "loss": 0.2654, "step": 28493 }, { "epoch": 1.3348011430177542, "grad_norm": 0.6284574435885283, "learning_rate": 1.3155382984404173e-06, "loss": 0.2841, "step": 28494 }, { "epoch": 1.3348479880076827, "grad_norm": 0.5916818986436317, "learning_rate": 1.3153712882048091e-06, "loss": 0.2729, "step": 28495 }, { "epoch": 1.334894832997611, "grad_norm": 0.5957073550340451, "learning_rate": 1.3152042847864415e-06, "loss": 0.2692, "step": 28496 }, { "epoch": 1.3349416779875392, "grad_norm": 0.6176256283097907, "learning_rate": 1.315037288186277e-06, "loss": 0.2681, "step": 28497 }, { "epoch": 1.3349885229774676, "grad_norm": 0.5359080185468956, "learning_rate": 1.3148702984052747e-06, "loss": 0.2741, "step": 28498 }, { "epoch": 1.3350353679673959, "grad_norm": 0.5552864843519955, "learning_rate": 1.3147033154443947e-06, "loss": 0.2532, "step": 28499 }, { "epoch": 1.335082212957324, "grad_norm": 0.6121982576220524, "learning_rate": 1.3145363393045995e-06, "loss": 0.2892, "step": 28500 }, { "epoch": 1.3351290579472526, "grad_norm": 0.5672548917068735, "learning_rate": 1.3143693699868497e-06, "loss": 0.2847, "step": 28501 }, { "epoch": 1.3351759029371808, "grad_norm": 0.5574271153755835, "learning_rate": 1.3142024074921062e-06, "loss": 0.2669, "step": 28502 }, { "epoch": 1.3352227479271093, "grad_norm": 0.5386935355898006, "learning_rate": 1.3140354518213306e-06, "loss": 0.2548, "step": 28503 }, { "epoch": 1.3352695929170375, "grad_norm": 0.5285843367289051, "learning_rate": 1.3138685029754822e-06, "loss": 0.2595, "step": 28504 }, { "epoch": 1.335316437906966, "grad_norm": 0.627975070376952, "learning_rate": 1.3137015609555232e-06, "loss": 0.2854, "step": 28505 }, { "epoch": 1.3353632828968942, "grad_norm": 0.5641923967030209, "learning_rate": 1.3135346257624126e-06, "loss": 0.2633, "step": 28506 }, { "epoch": 1.3354101278868225, "grad_norm": 0.6053060768984927, "learning_rate": 1.3133676973971122e-06, "loss": 0.2809, "step": 28507 }, { "epoch": 1.335456972876751, "grad_norm": 0.616059684615228, "learning_rate": 1.3132007758605822e-06, "loss": 0.2699, "step": 28508 }, { "epoch": 1.3355038178666792, "grad_norm": 0.593746634475666, "learning_rate": 1.3130338611537836e-06, "loss": 0.2616, "step": 28509 }, { "epoch": 1.3355506628566074, "grad_norm": 0.6475879055390457, "learning_rate": 1.3128669532776778e-06, "loss": 0.2897, "step": 28510 }, { "epoch": 1.3355975078465359, "grad_norm": 0.6047394048738665, "learning_rate": 1.3127000522332239e-06, "loss": 0.2901, "step": 28511 }, { "epoch": 1.335644352836464, "grad_norm": 0.5784823151759643, "learning_rate": 1.3125331580213824e-06, "loss": 0.26, "step": 28512 }, { "epoch": 1.3356911978263923, "grad_norm": 0.5963328799888407, "learning_rate": 1.312366270643114e-06, "loss": 0.2698, "step": 28513 }, { "epoch": 1.3357380428163208, "grad_norm": 0.5738285941955789, "learning_rate": 1.312199390099379e-06, "loss": 0.27, "step": 28514 }, { "epoch": 1.335784887806249, "grad_norm": 0.6073891464772355, "learning_rate": 1.312032516391138e-06, "loss": 0.278, "step": 28515 }, { "epoch": 1.3358317327961775, "grad_norm": 0.5828367562313961, "learning_rate": 1.3118656495193522e-06, "loss": 0.2721, "step": 28516 }, { "epoch": 1.3358785777861057, "grad_norm": 0.589439716472458, "learning_rate": 1.3116987894849804e-06, "loss": 0.2801, "step": 28517 }, { "epoch": 1.3359254227760342, "grad_norm": 0.5837143504952294, "learning_rate": 1.3115319362889838e-06, "loss": 0.2657, "step": 28518 }, { "epoch": 1.3359722677659625, "grad_norm": 0.6171389369286733, "learning_rate": 1.3113650899323215e-06, "loss": 0.2699, "step": 28519 }, { "epoch": 1.3360191127558907, "grad_norm": 0.6088331454169965, "learning_rate": 1.3111982504159542e-06, "loss": 0.2719, "step": 28520 }, { "epoch": 1.3360659577458192, "grad_norm": 0.615383985966849, "learning_rate": 1.3110314177408423e-06, "loss": 0.2715, "step": 28521 }, { "epoch": 1.3361128027357474, "grad_norm": 0.6054310861200619, "learning_rate": 1.310864591907947e-06, "loss": 0.2694, "step": 28522 }, { "epoch": 1.3361596477256756, "grad_norm": 0.5897278973732197, "learning_rate": 1.3106977729182258e-06, "loss": 0.2784, "step": 28523 }, { "epoch": 1.336206492715604, "grad_norm": 0.5994061981203875, "learning_rate": 1.3105309607726402e-06, "loss": 0.2558, "step": 28524 }, { "epoch": 1.3362533377055323, "grad_norm": 0.5629175112171289, "learning_rate": 1.3103641554721509e-06, "loss": 0.2643, "step": 28525 }, { "epoch": 1.3363001826954608, "grad_norm": 0.5854108719192584, "learning_rate": 1.3101973570177157e-06, "loss": 0.271, "step": 28526 }, { "epoch": 1.336347027685389, "grad_norm": 0.6370494435932358, "learning_rate": 1.3100305654102957e-06, "loss": 0.2749, "step": 28527 }, { "epoch": 1.3363938726753175, "grad_norm": 0.5921166510425103, "learning_rate": 1.309863780650852e-06, "loss": 0.279, "step": 28528 }, { "epoch": 1.3364407176652457, "grad_norm": 0.5666335136034318, "learning_rate": 1.3096970027403416e-06, "loss": 0.264, "step": 28529 }, { "epoch": 1.336487562655174, "grad_norm": 0.5566638376481758, "learning_rate": 1.309530231679726e-06, "loss": 0.2693, "step": 28530 }, { "epoch": 1.3365344076451025, "grad_norm": 0.5652951228418214, "learning_rate": 1.3093634674699646e-06, "loss": 0.2666, "step": 28531 }, { "epoch": 1.3365812526350307, "grad_norm": 0.6024294650815728, "learning_rate": 1.3091967101120184e-06, "loss": 0.2805, "step": 28532 }, { "epoch": 1.336628097624959, "grad_norm": 0.5861190240919697, "learning_rate": 1.3090299596068451e-06, "loss": 0.2582, "step": 28533 }, { "epoch": 1.3366749426148874, "grad_norm": 0.624395772942898, "learning_rate": 1.3088632159554055e-06, "loss": 0.2837, "step": 28534 }, { "epoch": 1.3367217876048156, "grad_norm": 0.6465003955959796, "learning_rate": 1.308696479158658e-06, "loss": 0.3032, "step": 28535 }, { "epoch": 1.3367686325947439, "grad_norm": 0.5868375090962158, "learning_rate": 1.3085297492175628e-06, "loss": 0.2974, "step": 28536 }, { "epoch": 1.3368154775846723, "grad_norm": 0.601193781298603, "learning_rate": 1.3083630261330792e-06, "loss": 0.2836, "step": 28537 }, { "epoch": 1.3368623225746006, "grad_norm": 0.5459487581490192, "learning_rate": 1.3081963099061674e-06, "loss": 0.2598, "step": 28538 }, { "epoch": 1.336909167564529, "grad_norm": 0.6252501692374182, "learning_rate": 1.3080296005377867e-06, "loss": 0.2527, "step": 28539 }, { "epoch": 1.3369560125544573, "grad_norm": 0.5507924634861433, "learning_rate": 1.3078628980288966e-06, "loss": 0.2537, "step": 28540 }, { "epoch": 1.3370028575443857, "grad_norm": 0.6633556775821063, "learning_rate": 1.3076962023804546e-06, "loss": 0.3017, "step": 28541 }, { "epoch": 1.337049702534314, "grad_norm": 0.582006012554237, "learning_rate": 1.3075295135934213e-06, "loss": 0.2662, "step": 28542 }, { "epoch": 1.3370965475242422, "grad_norm": 0.6056694865769542, "learning_rate": 1.307362831668756e-06, "loss": 0.2708, "step": 28543 }, { "epoch": 1.3371433925141707, "grad_norm": 0.5995537384841242, "learning_rate": 1.3071961566074177e-06, "loss": 0.2723, "step": 28544 }, { "epoch": 1.337190237504099, "grad_norm": 0.5872235218197296, "learning_rate": 1.3070294884103669e-06, "loss": 0.2734, "step": 28545 }, { "epoch": 1.3372370824940272, "grad_norm": 0.6464403366884133, "learning_rate": 1.3068628270785603e-06, "loss": 0.2862, "step": 28546 }, { "epoch": 1.3372839274839556, "grad_norm": 0.6085306476833818, "learning_rate": 1.3066961726129596e-06, "loss": 0.2621, "step": 28547 }, { "epoch": 1.3373307724738839, "grad_norm": 0.6487796129022695, "learning_rate": 1.306529525014521e-06, "loss": 0.2979, "step": 28548 }, { "epoch": 1.3373776174638121, "grad_norm": 0.5899775809638604, "learning_rate": 1.3063628842842051e-06, "loss": 0.2784, "step": 28549 }, { "epoch": 1.3374244624537406, "grad_norm": 0.6004182091271387, "learning_rate": 1.3061962504229714e-06, "loss": 0.2801, "step": 28550 }, { "epoch": 1.3374713074436688, "grad_norm": 0.59082001812799, "learning_rate": 1.3060296234317777e-06, "loss": 0.2698, "step": 28551 }, { "epoch": 1.3375181524335973, "grad_norm": 0.5670685948385183, "learning_rate": 1.3058630033115847e-06, "loss": 0.2528, "step": 28552 }, { "epoch": 1.3375649974235255, "grad_norm": 0.5939798370051893, "learning_rate": 1.3056963900633497e-06, "loss": 0.2833, "step": 28553 }, { "epoch": 1.337611842413454, "grad_norm": 0.6122251169224378, "learning_rate": 1.3055297836880309e-06, "loss": 0.274, "step": 28554 }, { "epoch": 1.3376586874033822, "grad_norm": 0.6038266669167076, "learning_rate": 1.3053631841865883e-06, "loss": 0.2885, "step": 28555 }, { "epoch": 1.3377055323933105, "grad_norm": 0.610902688169486, "learning_rate": 1.3051965915599799e-06, "loss": 0.2768, "step": 28556 }, { "epoch": 1.337752377383239, "grad_norm": 0.5782648239608287, "learning_rate": 1.3050300058091647e-06, "loss": 0.2707, "step": 28557 }, { "epoch": 1.3377992223731672, "grad_norm": 0.5468088359499227, "learning_rate": 1.3048634269351029e-06, "loss": 0.2641, "step": 28558 }, { "epoch": 1.3378460673630954, "grad_norm": 0.6277246031171413, "learning_rate": 1.3046968549387507e-06, "loss": 0.2715, "step": 28559 }, { "epoch": 1.3378929123530239, "grad_norm": 0.6180699604096527, "learning_rate": 1.3045302898210682e-06, "loss": 0.2754, "step": 28560 }, { "epoch": 1.3379397573429521, "grad_norm": 0.5900788681872678, "learning_rate": 1.3043637315830126e-06, "loss": 0.2708, "step": 28561 }, { "epoch": 1.3379866023328806, "grad_norm": 0.5992740015545768, "learning_rate": 1.3041971802255436e-06, "loss": 0.271, "step": 28562 }, { "epoch": 1.3380334473228088, "grad_norm": 0.6141567047198782, "learning_rate": 1.3040306357496188e-06, "loss": 0.2894, "step": 28563 }, { "epoch": 1.3380802923127373, "grad_norm": 0.6118206632633213, "learning_rate": 1.3038640981561985e-06, "loss": 0.2743, "step": 28564 }, { "epoch": 1.3381271373026655, "grad_norm": 0.605847684372031, "learning_rate": 1.3036975674462382e-06, "loss": 0.2713, "step": 28565 }, { "epoch": 1.3381739822925938, "grad_norm": 0.6641761880040669, "learning_rate": 1.303531043620698e-06, "loss": 0.2818, "step": 28566 }, { "epoch": 1.3382208272825222, "grad_norm": 0.5694924734043119, "learning_rate": 1.303364526680537e-06, "loss": 0.2626, "step": 28567 }, { "epoch": 1.3382676722724505, "grad_norm": 0.6037624536546157, "learning_rate": 1.303198016626711e-06, "loss": 0.2612, "step": 28568 }, { "epoch": 1.3383145172623787, "grad_norm": 0.5935430739701519, "learning_rate": 1.30303151346018e-06, "loss": 0.2801, "step": 28569 }, { "epoch": 1.3383613622523072, "grad_norm": 0.6136777524986865, "learning_rate": 1.302865017181903e-06, "loss": 0.2747, "step": 28570 }, { "epoch": 1.3384082072422354, "grad_norm": 0.6281047739473512, "learning_rate": 1.3026985277928356e-06, "loss": 0.2735, "step": 28571 }, { "epoch": 1.3384550522321637, "grad_norm": 0.588320047627743, "learning_rate": 1.3025320452939373e-06, "loss": 0.2833, "step": 28572 }, { "epoch": 1.3385018972220921, "grad_norm": 0.5718663694938998, "learning_rate": 1.3023655696861659e-06, "loss": 0.2636, "step": 28573 }, { "epoch": 1.3385487422120204, "grad_norm": 0.5801263158366092, "learning_rate": 1.3021991009704807e-06, "loss": 0.2531, "step": 28574 }, { "epoch": 1.3385955872019488, "grad_norm": 0.6129402627500778, "learning_rate": 1.3020326391478376e-06, "loss": 0.2791, "step": 28575 }, { "epoch": 1.338642432191877, "grad_norm": 0.6115088590989346, "learning_rate": 1.301866184219196e-06, "loss": 0.2777, "step": 28576 }, { "epoch": 1.3386892771818055, "grad_norm": 0.618316272015917, "learning_rate": 1.3016997361855138e-06, "loss": 0.2738, "step": 28577 }, { "epoch": 1.3387361221717338, "grad_norm": 0.5957987968103093, "learning_rate": 1.3015332950477478e-06, "loss": 0.2749, "step": 28578 }, { "epoch": 1.338782967161662, "grad_norm": 0.5629892876876268, "learning_rate": 1.3013668608068563e-06, "loss": 0.2619, "step": 28579 }, { "epoch": 1.3388298121515905, "grad_norm": 0.6119056319136233, "learning_rate": 1.3012004334637972e-06, "loss": 0.2677, "step": 28580 }, { "epoch": 1.3388766571415187, "grad_norm": 0.5823922303835856, "learning_rate": 1.3010340130195296e-06, "loss": 0.2723, "step": 28581 }, { "epoch": 1.338923502131447, "grad_norm": 0.5989111444268276, "learning_rate": 1.3008675994750087e-06, "loss": 0.2715, "step": 28582 }, { "epoch": 1.3389703471213754, "grad_norm": 0.5683024286604641, "learning_rate": 1.3007011928311945e-06, "loss": 0.2634, "step": 28583 }, { "epoch": 1.3390171921113037, "grad_norm": 0.5705271748182003, "learning_rate": 1.3005347930890422e-06, "loss": 0.2487, "step": 28584 }, { "epoch": 1.339064037101232, "grad_norm": 0.5875825360809964, "learning_rate": 1.300368400249511e-06, "loss": 0.2736, "step": 28585 }, { "epoch": 1.3391108820911604, "grad_norm": 0.6146618184437319, "learning_rate": 1.300202014313558e-06, "loss": 0.2794, "step": 28586 }, { "epoch": 1.3391577270810886, "grad_norm": 0.5706897163323391, "learning_rate": 1.3000356352821408e-06, "loss": 0.2557, "step": 28587 }, { "epoch": 1.339204572071017, "grad_norm": 0.5795882497579065, "learning_rate": 1.299869263156218e-06, "loss": 0.2666, "step": 28588 }, { "epoch": 1.3392514170609453, "grad_norm": 0.5710721472534779, "learning_rate": 1.299702897936746e-06, "loss": 0.2845, "step": 28589 }, { "epoch": 1.3392982620508738, "grad_norm": 0.5960002129541495, "learning_rate": 1.2995365396246812e-06, "loss": 0.2571, "step": 28590 }, { "epoch": 1.339345107040802, "grad_norm": 0.6248983160331356, "learning_rate": 1.2993701882209814e-06, "loss": 0.2803, "step": 28591 }, { "epoch": 1.3393919520307302, "grad_norm": 0.5731578126437549, "learning_rate": 1.299203843726605e-06, "loss": 0.2747, "step": 28592 }, { "epoch": 1.3394387970206587, "grad_norm": 0.5652470703496365, "learning_rate": 1.2990375061425085e-06, "loss": 0.2502, "step": 28593 }, { "epoch": 1.339485642010587, "grad_norm": 0.5528214957139215, "learning_rate": 1.29887117546965e-06, "loss": 0.2692, "step": 28594 }, { "epoch": 1.3395324870005152, "grad_norm": 0.6333936468831325, "learning_rate": 1.2987048517089853e-06, "loss": 0.2834, "step": 28595 }, { "epoch": 1.3395793319904437, "grad_norm": 0.6085822280464404, "learning_rate": 1.2985385348614731e-06, "loss": 0.2816, "step": 28596 }, { "epoch": 1.339626176980372, "grad_norm": 0.5559430890980346, "learning_rate": 1.298372224928069e-06, "loss": 0.2594, "step": 28597 }, { "epoch": 1.3396730219703004, "grad_norm": 0.5983293146011912, "learning_rate": 1.2982059219097304e-06, "loss": 0.2715, "step": 28598 }, { "epoch": 1.3397198669602286, "grad_norm": 0.6210448667067205, "learning_rate": 1.2980396258074146e-06, "loss": 0.2878, "step": 28599 }, { "epoch": 1.339766711950157, "grad_norm": 0.6125163344346969, "learning_rate": 1.297873336622079e-06, "loss": 0.2735, "step": 28600 }, { "epoch": 1.3398135569400853, "grad_norm": 0.5803128486314857, "learning_rate": 1.2977070543546807e-06, "loss": 0.2825, "step": 28601 }, { "epoch": 1.3398604019300135, "grad_norm": 0.5766305665702589, "learning_rate": 1.2975407790061764e-06, "loss": 0.2838, "step": 28602 }, { "epoch": 1.339907246919942, "grad_norm": 0.6076972176860203, "learning_rate": 1.2973745105775218e-06, "loss": 0.2744, "step": 28603 }, { "epoch": 1.3399540919098702, "grad_norm": 0.6007314925051093, "learning_rate": 1.2972082490696744e-06, "loss": 0.271, "step": 28604 }, { "epoch": 1.3400009368997985, "grad_norm": 0.6133952869844999, "learning_rate": 1.2970419944835913e-06, "loss": 0.2662, "step": 28605 }, { "epoch": 1.340047781889727, "grad_norm": 0.5935917507497533, "learning_rate": 1.2968757468202291e-06, "loss": 0.2742, "step": 28606 }, { "epoch": 1.3400946268796552, "grad_norm": 0.6263987047018705, "learning_rate": 1.2967095060805456e-06, "loss": 0.2778, "step": 28607 }, { "epoch": 1.3401414718695834, "grad_norm": 0.6052383260026528, "learning_rate": 1.2965432722654958e-06, "loss": 0.2656, "step": 28608 }, { "epoch": 1.340188316859512, "grad_norm": 0.5967338702080762, "learning_rate": 1.2963770453760378e-06, "loss": 0.2697, "step": 28609 }, { "epoch": 1.3402351618494401, "grad_norm": 0.5709762890532645, "learning_rate": 1.296210825413126e-06, "loss": 0.259, "step": 28610 }, { "epoch": 1.3402820068393686, "grad_norm": 0.5967747550123698, "learning_rate": 1.2960446123777187e-06, "loss": 0.265, "step": 28611 }, { "epoch": 1.3403288518292968, "grad_norm": 0.6080230607118947, "learning_rate": 1.2958784062707723e-06, "loss": 0.2678, "step": 28612 }, { "epoch": 1.3403756968192253, "grad_norm": 0.6180968734721477, "learning_rate": 1.2957122070932438e-06, "loss": 0.2685, "step": 28613 }, { "epoch": 1.3404225418091535, "grad_norm": 0.590927258855897, "learning_rate": 1.2955460148460874e-06, "loss": 0.279, "step": 28614 }, { "epoch": 1.3404693867990818, "grad_norm": 0.5905001916688855, "learning_rate": 1.2953798295302616e-06, "loss": 0.2624, "step": 28615 }, { "epoch": 1.3405162317890102, "grad_norm": 0.5864494559298555, "learning_rate": 1.2952136511467228e-06, "loss": 0.2667, "step": 28616 }, { "epoch": 1.3405630767789385, "grad_norm": 0.5717228846181449, "learning_rate": 1.2950474796964257e-06, "loss": 0.2693, "step": 28617 }, { "epoch": 1.3406099217688667, "grad_norm": 0.5769294930505947, "learning_rate": 1.2948813151803273e-06, "loss": 0.277, "step": 28618 }, { "epoch": 1.3406567667587952, "grad_norm": 0.6380514195669006, "learning_rate": 1.2947151575993854e-06, "loss": 0.2826, "step": 28619 }, { "epoch": 1.3407036117487234, "grad_norm": 0.6505089359437155, "learning_rate": 1.2945490069545536e-06, "loss": 0.2935, "step": 28620 }, { "epoch": 1.3407504567386517, "grad_norm": 0.5581795179317851, "learning_rate": 1.2943828632467892e-06, "loss": 0.2562, "step": 28621 }, { "epoch": 1.3407973017285801, "grad_norm": 0.662496529715681, "learning_rate": 1.2942167264770487e-06, "loss": 0.2657, "step": 28622 }, { "epoch": 1.3408441467185084, "grad_norm": 0.5940563471635356, "learning_rate": 1.2940505966462884e-06, "loss": 0.2792, "step": 28623 }, { "epoch": 1.3408909917084368, "grad_norm": 0.5324471137155604, "learning_rate": 1.2938844737554635e-06, "loss": 0.2603, "step": 28624 }, { "epoch": 1.340937836698365, "grad_norm": 0.5771244527558642, "learning_rate": 1.2937183578055307e-06, "loss": 0.2778, "step": 28625 }, { "epoch": 1.3409846816882935, "grad_norm": 0.6491160724850942, "learning_rate": 1.293552248797445e-06, "loss": 0.2809, "step": 28626 }, { "epoch": 1.3410315266782218, "grad_norm": 0.563956440341517, "learning_rate": 1.2933861467321627e-06, "loss": 0.2564, "step": 28627 }, { "epoch": 1.34107837166815, "grad_norm": 0.6020234685502612, "learning_rate": 1.2932200516106398e-06, "loss": 0.2766, "step": 28628 }, { "epoch": 1.3411252166580785, "grad_norm": 0.6042328098980166, "learning_rate": 1.2930539634338322e-06, "loss": 0.2704, "step": 28629 }, { "epoch": 1.3411720616480067, "grad_norm": 0.6179543474802938, "learning_rate": 1.2928878822026967e-06, "loss": 0.2756, "step": 28630 }, { "epoch": 1.341218906637935, "grad_norm": 0.5914850518455724, "learning_rate": 1.292721807918188e-06, "loss": 0.2582, "step": 28631 }, { "epoch": 1.3412657516278634, "grad_norm": 0.5732782652655989, "learning_rate": 1.2925557405812608e-06, "loss": 0.2818, "step": 28632 }, { "epoch": 1.3413125966177917, "grad_norm": 0.5574530335318071, "learning_rate": 1.292389680192872e-06, "loss": 0.2722, "step": 28633 }, { "epoch": 1.34135944160772, "grad_norm": 0.6200945154827882, "learning_rate": 1.292223626753977e-06, "loss": 0.2812, "step": 28634 }, { "epoch": 1.3414062865976484, "grad_norm": 0.5941412173347925, "learning_rate": 1.2920575802655316e-06, "loss": 0.272, "step": 28635 }, { "epoch": 1.3414531315875768, "grad_norm": 0.604210699944704, "learning_rate": 1.291891540728491e-06, "loss": 0.2784, "step": 28636 }, { "epoch": 1.341499976577505, "grad_norm": 0.6041117392548451, "learning_rate": 1.291725508143812e-06, "loss": 0.2696, "step": 28637 }, { "epoch": 1.3415468215674333, "grad_norm": 0.6106587875621201, "learning_rate": 1.2915594825124488e-06, "loss": 0.2635, "step": 28638 }, { "epoch": 1.3415936665573618, "grad_norm": 0.6023371789698011, "learning_rate": 1.2913934638353565e-06, "loss": 0.2661, "step": 28639 }, { "epoch": 1.34164051154729, "grad_norm": 0.588700954590226, "learning_rate": 1.291227452113491e-06, "loss": 0.2661, "step": 28640 }, { "epoch": 1.3416873565372183, "grad_norm": 0.6055799853937454, "learning_rate": 1.2910614473478072e-06, "loss": 0.2976, "step": 28641 }, { "epoch": 1.3417342015271467, "grad_norm": 0.6128950333810627, "learning_rate": 1.2908954495392615e-06, "loss": 0.2705, "step": 28642 }, { "epoch": 1.341781046517075, "grad_norm": 0.6022923283413785, "learning_rate": 1.2907294586888092e-06, "loss": 0.2831, "step": 28643 }, { "epoch": 1.3418278915070032, "grad_norm": 0.5611251720646885, "learning_rate": 1.2905634747974045e-06, "loss": 0.2628, "step": 28644 }, { "epoch": 1.3418747364969317, "grad_norm": 0.6060209145420058, "learning_rate": 1.2903974978660034e-06, "loss": 0.2839, "step": 28645 }, { "epoch": 1.34192158148686, "grad_norm": 0.6060136308486547, "learning_rate": 1.2902315278955595e-06, "loss": 0.264, "step": 28646 }, { "epoch": 1.3419684264767884, "grad_norm": 0.5571867208087404, "learning_rate": 1.2900655648870298e-06, "loss": 0.2663, "step": 28647 }, { "epoch": 1.3420152714667166, "grad_norm": 0.570210836018471, "learning_rate": 1.2898996088413684e-06, "loss": 0.2602, "step": 28648 }, { "epoch": 1.342062116456645, "grad_norm": 0.5625901817993398, "learning_rate": 1.2897336597595306e-06, "loss": 0.2557, "step": 28649 }, { "epoch": 1.3421089614465733, "grad_norm": 0.6013439256953239, "learning_rate": 1.289567717642472e-06, "loss": 0.2731, "step": 28650 }, { "epoch": 1.3421558064365016, "grad_norm": 0.630584828331849, "learning_rate": 1.2894017824911476e-06, "loss": 0.2751, "step": 28651 }, { "epoch": 1.34220265142643, "grad_norm": 0.5878807368429848, "learning_rate": 1.2892358543065104e-06, "loss": 0.28, "step": 28652 }, { "epoch": 1.3422494964163583, "grad_norm": 0.6270573041659971, "learning_rate": 1.2890699330895163e-06, "loss": 0.2845, "step": 28653 }, { "epoch": 1.3422963414062865, "grad_norm": 0.5907208948359439, "learning_rate": 1.2889040188411207e-06, "loss": 0.2805, "step": 28654 }, { "epoch": 1.342343186396215, "grad_norm": 0.5876733350459485, "learning_rate": 1.2887381115622777e-06, "loss": 0.2727, "step": 28655 }, { "epoch": 1.3423900313861432, "grad_norm": 0.6085600053154991, "learning_rate": 1.288572211253944e-06, "loss": 0.2879, "step": 28656 }, { "epoch": 1.3424368763760715, "grad_norm": 0.5641597887543987, "learning_rate": 1.288406317917071e-06, "loss": 0.2699, "step": 28657 }, { "epoch": 1.342483721366, "grad_norm": 0.6470710159240495, "learning_rate": 1.2882404315526168e-06, "loss": 0.2942, "step": 28658 }, { "epoch": 1.3425305663559282, "grad_norm": 0.581790498277443, "learning_rate": 1.2880745521615328e-06, "loss": 0.2759, "step": 28659 }, { "epoch": 1.3425774113458566, "grad_norm": 0.6012331451357757, "learning_rate": 1.2879086797447758e-06, "loss": 0.2722, "step": 28660 }, { "epoch": 1.3426242563357849, "grad_norm": 0.6635783192143772, "learning_rate": 1.2877428143032995e-06, "loss": 0.2841, "step": 28661 }, { "epoch": 1.3426711013257133, "grad_norm": 0.6022563915558388, "learning_rate": 1.2875769558380592e-06, "loss": 0.2671, "step": 28662 }, { "epoch": 1.3427179463156416, "grad_norm": 0.5741326577033027, "learning_rate": 1.2874111043500082e-06, "loss": 0.2792, "step": 28663 }, { "epoch": 1.3427647913055698, "grad_norm": 0.6064884873612186, "learning_rate": 1.2872452598401011e-06, "loss": 0.283, "step": 28664 }, { "epoch": 1.3428116362954983, "grad_norm": 0.6232930556897988, "learning_rate": 1.2870794223092941e-06, "loss": 0.2791, "step": 28665 }, { "epoch": 1.3428584812854265, "grad_norm": 0.5840925693512686, "learning_rate": 1.286913591758539e-06, "loss": 0.2662, "step": 28666 }, { "epoch": 1.3429053262753547, "grad_norm": 0.6180899134646405, "learning_rate": 1.2867477681887913e-06, "loss": 0.2666, "step": 28667 }, { "epoch": 1.3429521712652832, "grad_norm": 0.606755280248863, "learning_rate": 1.2865819516010065e-06, "loss": 0.2726, "step": 28668 }, { "epoch": 1.3429990162552115, "grad_norm": 0.6231648612002215, "learning_rate": 1.2864161419961362e-06, "loss": 0.2832, "step": 28669 }, { "epoch": 1.3430458612451397, "grad_norm": 0.613177614352387, "learning_rate": 1.286250339375136e-06, "loss": 0.2909, "step": 28670 }, { "epoch": 1.3430927062350682, "grad_norm": 0.6159781043442912, "learning_rate": 1.2860845437389602e-06, "loss": 0.2784, "step": 28671 }, { "epoch": 1.3431395512249966, "grad_norm": 0.6207708446614563, "learning_rate": 1.2859187550885639e-06, "loss": 0.2796, "step": 28672 }, { "epoch": 1.3431863962149249, "grad_norm": 0.5774072121861484, "learning_rate": 1.2857529734248986e-06, "loss": 0.2799, "step": 28673 }, { "epoch": 1.343233241204853, "grad_norm": 0.6084078049261011, "learning_rate": 1.285587198748921e-06, "loss": 0.301, "step": 28674 }, { "epoch": 1.3432800861947816, "grad_norm": 0.6003265302141462, "learning_rate": 1.285421431061583e-06, "loss": 0.2697, "step": 28675 }, { "epoch": 1.3433269311847098, "grad_norm": 0.5722637200328177, "learning_rate": 1.2852556703638391e-06, "loss": 0.252, "step": 28676 }, { "epoch": 1.343373776174638, "grad_norm": 0.6045994788971787, "learning_rate": 1.285089916656644e-06, "loss": 0.2717, "step": 28677 }, { "epoch": 1.3434206211645665, "grad_norm": 0.5893269589735242, "learning_rate": 1.2849241699409503e-06, "loss": 0.2791, "step": 28678 }, { "epoch": 1.3434674661544947, "grad_norm": 0.5618787687989106, "learning_rate": 1.284758430217714e-06, "loss": 0.2498, "step": 28679 }, { "epoch": 1.343514311144423, "grad_norm": 0.5812652182046917, "learning_rate": 1.2845926974878874e-06, "loss": 0.2699, "step": 28680 }, { "epoch": 1.3435611561343515, "grad_norm": 0.5689797522702926, "learning_rate": 1.2844269717524239e-06, "loss": 0.2715, "step": 28681 }, { "epoch": 1.3436080011242797, "grad_norm": 0.6440023516231437, "learning_rate": 1.284261253012277e-06, "loss": 0.3049, "step": 28682 }, { "epoch": 1.3436548461142082, "grad_norm": 0.5588509164085572, "learning_rate": 1.2840955412684014e-06, "loss": 0.2508, "step": 28683 }, { "epoch": 1.3437016911041364, "grad_norm": 0.5994661308532094, "learning_rate": 1.28392983652175e-06, "loss": 0.2797, "step": 28684 }, { "epoch": 1.3437485360940649, "grad_norm": 0.575478154960199, "learning_rate": 1.283764138773278e-06, "loss": 0.2697, "step": 28685 }, { "epoch": 1.343795381083993, "grad_norm": 0.563426929716019, "learning_rate": 1.2835984480239367e-06, "loss": 0.2666, "step": 28686 }, { "epoch": 1.3438422260739213, "grad_norm": 0.55893561985093, "learning_rate": 1.2834327642746813e-06, "loss": 0.2689, "step": 28687 }, { "epoch": 1.3438890710638498, "grad_norm": 0.5163059718754708, "learning_rate": 1.2832670875264638e-06, "loss": 0.2378, "step": 28688 }, { "epoch": 1.343935916053778, "grad_norm": 0.609193505511221, "learning_rate": 1.2831014177802385e-06, "loss": 0.2925, "step": 28689 }, { "epoch": 1.3439827610437063, "grad_norm": 0.606800026277877, "learning_rate": 1.2829357550369586e-06, "loss": 0.2783, "step": 28690 }, { "epoch": 1.3440296060336347, "grad_norm": 0.6052002184390458, "learning_rate": 1.2827700992975772e-06, "loss": 0.2703, "step": 28691 }, { "epoch": 1.344076451023563, "grad_norm": 0.6028699826036703, "learning_rate": 1.2826044505630495e-06, "loss": 0.2816, "step": 28692 }, { "epoch": 1.3441232960134912, "grad_norm": 0.6213451335299749, "learning_rate": 1.2824388088343268e-06, "loss": 0.2892, "step": 28693 }, { "epoch": 1.3441701410034197, "grad_norm": 0.5430780155418362, "learning_rate": 1.2822731741123618e-06, "loss": 0.2518, "step": 28694 }, { "epoch": 1.344216985993348, "grad_norm": 0.5876268570392783, "learning_rate": 1.2821075463981087e-06, "loss": 0.281, "step": 28695 }, { "epoch": 1.3442638309832764, "grad_norm": 0.6093110822774352, "learning_rate": 1.281941925692521e-06, "loss": 0.2795, "step": 28696 }, { "epoch": 1.3443106759732046, "grad_norm": 0.5774344564453707, "learning_rate": 1.2817763119965505e-06, "loss": 0.2653, "step": 28697 }, { "epoch": 1.344357520963133, "grad_norm": 0.5881751357815573, "learning_rate": 1.2816107053111526e-06, "loss": 0.26, "step": 28698 }, { "epoch": 1.3444043659530613, "grad_norm": 0.6830527148838588, "learning_rate": 1.281445105637278e-06, "loss": 0.2876, "step": 28699 }, { "epoch": 1.3444512109429896, "grad_norm": 0.584671183217539, "learning_rate": 1.2812795129758812e-06, "loss": 0.2569, "step": 28700 }, { "epoch": 1.344498055932918, "grad_norm": 0.6136931441101683, "learning_rate": 1.281113927327914e-06, "loss": 0.2918, "step": 28701 }, { "epoch": 1.3445449009228463, "grad_norm": 0.5982988268584575, "learning_rate": 1.2809483486943291e-06, "loss": 0.276, "step": 28702 }, { "epoch": 1.3445917459127745, "grad_norm": 0.5930535070081673, "learning_rate": 1.2807827770760805e-06, "loss": 0.2602, "step": 28703 }, { "epoch": 1.344638590902703, "grad_norm": 0.6411056420510514, "learning_rate": 1.2806172124741214e-06, "loss": 0.2965, "step": 28704 }, { "epoch": 1.3446854358926312, "grad_norm": 0.6030605782945991, "learning_rate": 1.280451654889403e-06, "loss": 0.2801, "step": 28705 }, { "epoch": 1.3447322808825595, "grad_norm": 0.5544349618865506, "learning_rate": 1.2802861043228786e-06, "loss": 0.2555, "step": 28706 }, { "epoch": 1.344779125872488, "grad_norm": 0.5554753558827698, "learning_rate": 1.2801205607755022e-06, "loss": 0.2534, "step": 28707 }, { "epoch": 1.3448259708624164, "grad_norm": 0.6142767559495599, "learning_rate": 1.2799550242482242e-06, "loss": 0.2861, "step": 28708 }, { "epoch": 1.3448728158523446, "grad_norm": 0.5720564515431753, "learning_rate": 1.2797894947419986e-06, "loss": 0.268, "step": 28709 }, { "epoch": 1.3449196608422729, "grad_norm": 0.6137504435465746, "learning_rate": 1.2796239722577775e-06, "loss": 0.2707, "step": 28710 }, { "epoch": 1.3449665058322013, "grad_norm": 0.6575628951799584, "learning_rate": 1.2794584567965151e-06, "loss": 0.2872, "step": 28711 }, { "epoch": 1.3450133508221296, "grad_norm": 0.5970523713940236, "learning_rate": 1.2792929483591613e-06, "loss": 0.2761, "step": 28712 }, { "epoch": 1.3450601958120578, "grad_norm": 0.5746742672355029, "learning_rate": 1.2791274469466696e-06, "loss": 0.2644, "step": 28713 }, { "epoch": 1.3451070408019863, "grad_norm": 0.5758425140991562, "learning_rate": 1.2789619525599938e-06, "loss": 0.2606, "step": 28714 }, { "epoch": 1.3451538857919145, "grad_norm": 0.6250532458821655, "learning_rate": 1.278796465200084e-06, "loss": 0.2841, "step": 28715 }, { "epoch": 1.3452007307818428, "grad_norm": 0.5974754440730263, "learning_rate": 1.2786309848678938e-06, "loss": 0.2763, "step": 28716 }, { "epoch": 1.3452475757717712, "grad_norm": 0.5909135796048216, "learning_rate": 1.278465511564376e-06, "loss": 0.2688, "step": 28717 }, { "epoch": 1.3452944207616995, "grad_norm": 0.5830568905110786, "learning_rate": 1.2783000452904814e-06, "loss": 0.2639, "step": 28718 }, { "epoch": 1.345341265751628, "grad_norm": 0.5636704446048252, "learning_rate": 1.2781345860471628e-06, "loss": 0.2746, "step": 28719 }, { "epoch": 1.3453881107415562, "grad_norm": 0.5751698352014526, "learning_rate": 1.2779691338353727e-06, "loss": 0.2706, "step": 28720 }, { "epoch": 1.3454349557314846, "grad_norm": 0.5865991005696979, "learning_rate": 1.277803688656064e-06, "loss": 0.2763, "step": 28721 }, { "epoch": 1.3454818007214129, "grad_norm": 0.6091117060986208, "learning_rate": 1.277638250510187e-06, "loss": 0.267, "step": 28722 }, { "epoch": 1.3455286457113411, "grad_norm": 0.6264918442019026, "learning_rate": 1.2774728193986953e-06, "loss": 0.2827, "step": 28723 }, { "epoch": 1.3455754907012696, "grad_norm": 0.5463775191936153, "learning_rate": 1.2773073953225396e-06, "loss": 0.2441, "step": 28724 }, { "epoch": 1.3456223356911978, "grad_norm": 0.573970823686965, "learning_rate": 1.277141978282672e-06, "loss": 0.2655, "step": 28725 }, { "epoch": 1.345669180681126, "grad_norm": 0.6147493065906556, "learning_rate": 1.2769765682800455e-06, "loss": 0.2615, "step": 28726 }, { "epoch": 1.3457160256710545, "grad_norm": 0.6211001751465357, "learning_rate": 1.2768111653156118e-06, "loss": 0.2743, "step": 28727 }, { "epoch": 1.3457628706609828, "grad_norm": 0.6047065437309422, "learning_rate": 1.2766457693903228e-06, "loss": 0.2728, "step": 28728 }, { "epoch": 1.345809715650911, "grad_norm": 0.6284148121471494, "learning_rate": 1.2764803805051302e-06, "loss": 0.2731, "step": 28729 }, { "epoch": 1.3458565606408395, "grad_norm": 0.5643007037818848, "learning_rate": 1.276314998660984e-06, "loss": 0.262, "step": 28730 }, { "epoch": 1.3459034056307677, "grad_norm": 0.6246411310864927, "learning_rate": 1.2761496238588377e-06, "loss": 0.2842, "step": 28731 }, { "epoch": 1.3459502506206962, "grad_norm": 0.562883614436771, "learning_rate": 1.275984256099643e-06, "loss": 0.2651, "step": 28732 }, { "epoch": 1.3459970956106244, "grad_norm": 0.5733543435015315, "learning_rate": 1.275818895384351e-06, "loss": 0.2679, "step": 28733 }, { "epoch": 1.3460439406005529, "grad_norm": 0.5900201382664215, "learning_rate": 1.2756535417139146e-06, "loss": 0.2622, "step": 28734 }, { "epoch": 1.3460907855904811, "grad_norm": 0.58566533781895, "learning_rate": 1.2754881950892834e-06, "loss": 0.2669, "step": 28735 }, { "epoch": 1.3461376305804094, "grad_norm": 0.662505206174523, "learning_rate": 1.2753228555114109e-06, "loss": 0.28, "step": 28736 }, { "epoch": 1.3461844755703378, "grad_norm": 0.5679281797456612, "learning_rate": 1.2751575229812463e-06, "loss": 0.271, "step": 28737 }, { "epoch": 1.346231320560266, "grad_norm": 0.5525957473068087, "learning_rate": 1.2749921974997425e-06, "loss": 0.2709, "step": 28738 }, { "epoch": 1.3462781655501943, "grad_norm": 0.5865857579407566, "learning_rate": 1.2748268790678508e-06, "loss": 0.2699, "step": 28739 }, { "epoch": 1.3463250105401228, "grad_norm": 0.6378469973284768, "learning_rate": 1.2746615676865224e-06, "loss": 0.2829, "step": 28740 }, { "epoch": 1.346371855530051, "grad_norm": 0.592705632948997, "learning_rate": 1.2744962633567096e-06, "loss": 0.2598, "step": 28741 }, { "epoch": 1.3464187005199793, "grad_norm": 0.5749527200415776, "learning_rate": 1.2743309660793629e-06, "loss": 0.2688, "step": 28742 }, { "epoch": 1.3464655455099077, "grad_norm": 0.6212031418048303, "learning_rate": 1.2741656758554322e-06, "loss": 0.275, "step": 28743 }, { "epoch": 1.3465123904998362, "grad_norm": 0.5717052949424862, "learning_rate": 1.2740003926858703e-06, "loss": 0.2787, "step": 28744 }, { "epoch": 1.3465592354897644, "grad_norm": 0.5769620746193417, "learning_rate": 1.273835116571628e-06, "loss": 0.2531, "step": 28745 }, { "epoch": 1.3466060804796927, "grad_norm": 0.611408460406283, "learning_rate": 1.273669847513656e-06, "loss": 0.2714, "step": 28746 }, { "epoch": 1.3466529254696211, "grad_norm": 0.5819379622859899, "learning_rate": 1.2735045855129068e-06, "loss": 0.2732, "step": 28747 }, { "epoch": 1.3466997704595494, "grad_norm": 0.554507260162585, "learning_rate": 1.2733393305703298e-06, "loss": 0.2598, "step": 28748 }, { "epoch": 1.3467466154494776, "grad_norm": 0.581184729223013, "learning_rate": 1.2731740826868776e-06, "loss": 0.2665, "step": 28749 }, { "epoch": 1.346793460439406, "grad_norm": 0.5961150269207328, "learning_rate": 1.273008841863499e-06, "loss": 0.2735, "step": 28750 }, { "epoch": 1.3468403054293343, "grad_norm": 0.602719629095094, "learning_rate": 1.2728436081011464e-06, "loss": 0.2804, "step": 28751 }, { "epoch": 1.3468871504192625, "grad_norm": 0.5786785383784256, "learning_rate": 1.2726783814007702e-06, "loss": 0.2729, "step": 28752 }, { "epoch": 1.346933995409191, "grad_norm": 0.6291597234843672, "learning_rate": 1.2725131617633225e-06, "loss": 0.2838, "step": 28753 }, { "epoch": 1.3469808403991193, "grad_norm": 0.5966281739678908, "learning_rate": 1.272347949189752e-06, "loss": 0.282, "step": 28754 }, { "epoch": 1.3470276853890477, "grad_norm": 0.5872810888141682, "learning_rate": 1.2721827436810103e-06, "loss": 0.2739, "step": 28755 }, { "epoch": 1.347074530378976, "grad_norm": 0.5539454536411065, "learning_rate": 1.2720175452380496e-06, "loss": 0.2567, "step": 28756 }, { "epoch": 1.3471213753689044, "grad_norm": 0.6062435565455973, "learning_rate": 1.2718523538618182e-06, "loss": 0.2817, "step": 28757 }, { "epoch": 1.3471682203588327, "grad_norm": 0.6251456873000477, "learning_rate": 1.271687169553268e-06, "loss": 0.2868, "step": 28758 }, { "epoch": 1.347215065348761, "grad_norm": 0.5882287681576306, "learning_rate": 1.27152199231335e-06, "loss": 0.2751, "step": 28759 }, { "epoch": 1.3472619103386894, "grad_norm": 0.6165079493462233, "learning_rate": 1.2713568221430133e-06, "loss": 0.272, "step": 28760 }, { "epoch": 1.3473087553286176, "grad_norm": 0.6449095881541741, "learning_rate": 1.2711916590432096e-06, "loss": 0.2848, "step": 28761 }, { "epoch": 1.3473556003185458, "grad_norm": 0.6020714497129324, "learning_rate": 1.2710265030148888e-06, "loss": 0.2763, "step": 28762 }, { "epoch": 1.3474024453084743, "grad_norm": 0.6101209141464011, "learning_rate": 1.2708613540590026e-06, "loss": 0.2647, "step": 28763 }, { "epoch": 1.3474492902984025, "grad_norm": 0.6344699844848186, "learning_rate": 1.2706962121764992e-06, "loss": 0.2774, "step": 28764 }, { "epoch": 1.3474961352883308, "grad_norm": 0.6078659662536848, "learning_rate": 1.2705310773683316e-06, "loss": 0.2772, "step": 28765 }, { "epoch": 1.3475429802782593, "grad_norm": 0.6015977110263798, "learning_rate": 1.2703659496354475e-06, "loss": 0.2697, "step": 28766 }, { "epoch": 1.3475898252681875, "grad_norm": 0.5609148445298037, "learning_rate": 1.2702008289787982e-06, "loss": 0.2605, "step": 28767 }, { "epoch": 1.347636670258116, "grad_norm": 0.6197647584968332, "learning_rate": 1.2700357153993337e-06, "loss": 0.2931, "step": 28768 }, { "epoch": 1.3476835152480442, "grad_norm": 0.5690148322723806, "learning_rate": 1.269870608898005e-06, "loss": 0.2663, "step": 28769 }, { "epoch": 1.3477303602379727, "grad_norm": 0.5397291739649626, "learning_rate": 1.2697055094757626e-06, "loss": 0.2489, "step": 28770 }, { "epoch": 1.347777205227901, "grad_norm": 0.5692270148953845, "learning_rate": 1.2695404171335558e-06, "loss": 0.2634, "step": 28771 }, { "epoch": 1.3478240502178291, "grad_norm": 0.6264141167747462, "learning_rate": 1.2693753318723335e-06, "loss": 0.2648, "step": 28772 }, { "epoch": 1.3478708952077576, "grad_norm": 0.597259683356245, "learning_rate": 1.2692102536930467e-06, "loss": 0.2777, "step": 28773 }, { "epoch": 1.3479177401976858, "grad_norm": 0.5864887797856013, "learning_rate": 1.269045182596646e-06, "loss": 0.2752, "step": 28774 }, { "epoch": 1.347964585187614, "grad_norm": 0.5766885854555018, "learning_rate": 1.2688801185840808e-06, "loss": 0.2681, "step": 28775 }, { "epoch": 1.3480114301775425, "grad_norm": 0.6028024028425569, "learning_rate": 1.268715061656301e-06, "loss": 0.2738, "step": 28776 }, { "epoch": 1.3480582751674708, "grad_norm": 0.5942751791481762, "learning_rate": 1.2685500118142575e-06, "loss": 0.2749, "step": 28777 }, { "epoch": 1.348105120157399, "grad_norm": 0.5933906946965698, "learning_rate": 1.2683849690588989e-06, "loss": 0.2684, "step": 28778 }, { "epoch": 1.3481519651473275, "grad_norm": 0.6236280723137432, "learning_rate": 1.2682199333911746e-06, "loss": 0.2773, "step": 28779 }, { "epoch": 1.348198810137256, "grad_norm": 0.6100871940542117, "learning_rate": 1.268054904812035e-06, "loss": 0.2722, "step": 28780 }, { "epoch": 1.3482456551271842, "grad_norm": 0.605542782845018, "learning_rate": 1.2678898833224297e-06, "loss": 0.283, "step": 28781 }, { "epoch": 1.3482925001171124, "grad_norm": 0.6061520112658403, "learning_rate": 1.2677248689233084e-06, "loss": 0.2725, "step": 28782 }, { "epoch": 1.348339345107041, "grad_norm": 0.5744530439744702, "learning_rate": 1.2675598616156218e-06, "loss": 0.2696, "step": 28783 }, { "epoch": 1.3483861900969691, "grad_norm": 0.5809591403311558, "learning_rate": 1.2673948614003173e-06, "loss": 0.2617, "step": 28784 }, { "epoch": 1.3484330350868974, "grad_norm": 0.5555139093551348, "learning_rate": 1.2672298682783467e-06, "loss": 0.2705, "step": 28785 }, { "epoch": 1.3484798800768258, "grad_norm": 0.5999649870901087, "learning_rate": 1.2670648822506576e-06, "loss": 0.2766, "step": 28786 }, { "epoch": 1.348526725066754, "grad_norm": 0.6092930242281686, "learning_rate": 1.2668999033181996e-06, "loss": 0.2845, "step": 28787 }, { "epoch": 1.3485735700566823, "grad_norm": 0.61322513493941, "learning_rate": 1.2667349314819233e-06, "loss": 0.2902, "step": 28788 }, { "epoch": 1.3486204150466108, "grad_norm": 0.6344962082642042, "learning_rate": 1.2665699667427771e-06, "loss": 0.2788, "step": 28789 }, { "epoch": 1.348667260036539, "grad_norm": 0.5804378726818219, "learning_rate": 1.266405009101712e-06, "loss": 0.271, "step": 28790 }, { "epoch": 1.3487141050264675, "grad_norm": 0.6207112362552234, "learning_rate": 1.2662400585596763e-06, "loss": 0.2869, "step": 28791 }, { "epoch": 1.3487609500163957, "grad_norm": 0.5857280775599105, "learning_rate": 1.2660751151176176e-06, "loss": 0.2669, "step": 28792 }, { "epoch": 1.3488077950063242, "grad_norm": 0.6033015819552209, "learning_rate": 1.2659101787764862e-06, "loss": 0.2717, "step": 28793 }, { "epoch": 1.3488546399962524, "grad_norm": 0.6014284266444685, "learning_rate": 1.2657452495372319e-06, "loss": 0.2748, "step": 28794 }, { "epoch": 1.3489014849861807, "grad_norm": 0.556971787344387, "learning_rate": 1.2655803274008036e-06, "loss": 0.272, "step": 28795 }, { "epoch": 1.3489483299761091, "grad_norm": 0.5566981782541163, "learning_rate": 1.2654154123681512e-06, "loss": 0.252, "step": 28796 }, { "epoch": 1.3489951749660374, "grad_norm": 0.6036923292699126, "learning_rate": 1.2652505044402215e-06, "loss": 0.2736, "step": 28797 }, { "epoch": 1.3490420199559656, "grad_norm": 0.6151241486648885, "learning_rate": 1.265085603617966e-06, "loss": 0.2802, "step": 28798 }, { "epoch": 1.349088864945894, "grad_norm": 0.552388735523642, "learning_rate": 1.2649207099023315e-06, "loss": 0.2713, "step": 28799 }, { "epoch": 1.3491357099358223, "grad_norm": 0.583893304514464, "learning_rate": 1.2647558232942674e-06, "loss": 0.2627, "step": 28800 }, { "epoch": 1.3491825549257506, "grad_norm": 0.6276095216474576, "learning_rate": 1.2645909437947238e-06, "loss": 0.2773, "step": 28801 }, { "epoch": 1.349229399915679, "grad_norm": 0.655461839804451, "learning_rate": 1.2644260714046496e-06, "loss": 0.284, "step": 28802 }, { "epoch": 1.3492762449056073, "grad_norm": 0.61122405519163, "learning_rate": 1.2642612061249916e-06, "loss": 0.2748, "step": 28803 }, { "epoch": 1.3493230898955357, "grad_norm": 0.6003840485628225, "learning_rate": 1.2640963479566999e-06, "loss": 0.2689, "step": 28804 }, { "epoch": 1.349369934885464, "grad_norm": 0.5678201690267503, "learning_rate": 1.263931496900724e-06, "loss": 0.2648, "step": 28805 }, { "epoch": 1.3494167798753924, "grad_norm": 0.5784544318850243, "learning_rate": 1.2637666529580108e-06, "loss": 0.2768, "step": 28806 }, { "epoch": 1.3494636248653207, "grad_norm": 0.593822204571201, "learning_rate": 1.26360181612951e-06, "loss": 0.2804, "step": 28807 }, { "epoch": 1.349510469855249, "grad_norm": 0.6190374569089692, "learning_rate": 1.2634369864161705e-06, "loss": 0.2815, "step": 28808 }, { "epoch": 1.3495573148451774, "grad_norm": 0.5858996363680441, "learning_rate": 1.2632721638189394e-06, "loss": 0.2772, "step": 28809 }, { "epoch": 1.3496041598351056, "grad_norm": 0.5766570148652883, "learning_rate": 1.2631073483387666e-06, "loss": 0.2648, "step": 28810 }, { "epoch": 1.3496510048250339, "grad_norm": 0.5451705603940835, "learning_rate": 1.2629425399765998e-06, "loss": 0.2603, "step": 28811 }, { "epoch": 1.3496978498149623, "grad_norm": 0.6296784632572928, "learning_rate": 1.2627777387333886e-06, "loss": 0.2836, "step": 28812 }, { "epoch": 1.3497446948048906, "grad_norm": 0.6163424687641228, "learning_rate": 1.2626129446100799e-06, "loss": 0.2727, "step": 28813 }, { "epoch": 1.3497915397948188, "grad_norm": 0.586881857350277, "learning_rate": 1.2624481576076236e-06, "loss": 0.2861, "step": 28814 }, { "epoch": 1.3498383847847473, "grad_norm": 0.5928935002419649, "learning_rate": 1.2622833777269662e-06, "loss": 0.269, "step": 28815 }, { "epoch": 1.3498852297746757, "grad_norm": 0.5592823725571054, "learning_rate": 1.2621186049690564e-06, "loss": 0.261, "step": 28816 }, { "epoch": 1.349932074764604, "grad_norm": 0.5576366640575017, "learning_rate": 1.2619538393348435e-06, "loss": 0.2588, "step": 28817 }, { "epoch": 1.3499789197545322, "grad_norm": 0.5967440724587505, "learning_rate": 1.2617890808252747e-06, "loss": 0.2713, "step": 28818 }, { "epoch": 1.3500257647444607, "grad_norm": 0.6270730444839945, "learning_rate": 1.2616243294412997e-06, "loss": 0.2876, "step": 28819 }, { "epoch": 1.350072609734389, "grad_norm": 0.6551777927375947, "learning_rate": 1.2614595851838652e-06, "loss": 0.2735, "step": 28820 }, { "epoch": 1.3501194547243172, "grad_norm": 0.621928198866179, "learning_rate": 1.261294848053919e-06, "loss": 0.279, "step": 28821 }, { "epoch": 1.3501662997142456, "grad_norm": 0.5675850239672734, "learning_rate": 1.2611301180524092e-06, "loss": 0.2584, "step": 28822 }, { "epoch": 1.3502131447041739, "grad_norm": 0.6175339550946318, "learning_rate": 1.2609653951802842e-06, "loss": 0.2698, "step": 28823 }, { "epoch": 1.350259989694102, "grad_norm": 0.5910194911100208, "learning_rate": 1.260800679438492e-06, "loss": 0.2721, "step": 28824 }, { "epoch": 1.3503068346840306, "grad_norm": 0.5768213987420021, "learning_rate": 1.2606359708279814e-06, "loss": 0.2743, "step": 28825 }, { "epoch": 1.3503536796739588, "grad_norm": 0.6027856795217137, "learning_rate": 1.2604712693496985e-06, "loss": 0.2698, "step": 28826 }, { "epoch": 1.3504005246638873, "grad_norm": 0.5793251128284762, "learning_rate": 1.2603065750045927e-06, "loss": 0.2731, "step": 28827 }, { "epoch": 1.3504473696538155, "grad_norm": 0.5905806838033221, "learning_rate": 1.26014188779361e-06, "loss": 0.2973, "step": 28828 }, { "epoch": 1.350494214643744, "grad_norm": 0.6185737204191154, "learning_rate": 1.259977207717699e-06, "loss": 0.2703, "step": 28829 }, { "epoch": 1.3505410596336722, "grad_norm": 0.6136422758978824, "learning_rate": 1.2598125347778076e-06, "loss": 0.2938, "step": 28830 }, { "epoch": 1.3505879046236005, "grad_norm": 0.5971487264861737, "learning_rate": 1.2596478689748834e-06, "loss": 0.2881, "step": 28831 }, { "epoch": 1.350634749613529, "grad_norm": 0.6394409645966107, "learning_rate": 1.2594832103098748e-06, "loss": 0.2836, "step": 28832 }, { "epoch": 1.3506815946034572, "grad_norm": 0.6012484581791677, "learning_rate": 1.2593185587837286e-06, "loss": 0.2853, "step": 28833 }, { "epoch": 1.3507284395933854, "grad_norm": 0.651043609815336, "learning_rate": 1.2591539143973913e-06, "loss": 0.2993, "step": 28834 }, { "epoch": 1.3507752845833139, "grad_norm": 0.6417446796446062, "learning_rate": 1.2589892771518115e-06, "loss": 0.2948, "step": 28835 }, { "epoch": 1.350822129573242, "grad_norm": 0.56229340900354, "learning_rate": 1.2588246470479364e-06, "loss": 0.256, "step": 28836 }, { "epoch": 1.3508689745631703, "grad_norm": 0.5866161294260189, "learning_rate": 1.2586600240867137e-06, "loss": 0.2654, "step": 28837 }, { "epoch": 1.3509158195530988, "grad_norm": 0.5755570051265402, "learning_rate": 1.258495408269091e-06, "loss": 0.2831, "step": 28838 }, { "epoch": 1.350962664543027, "grad_norm": 0.5498740910591091, "learning_rate": 1.2583307995960148e-06, "loss": 0.2497, "step": 28839 }, { "epoch": 1.3510095095329555, "grad_norm": 0.6130644012606192, "learning_rate": 1.2581661980684333e-06, "loss": 0.2828, "step": 28840 }, { "epoch": 1.3510563545228838, "grad_norm": 0.6019334742739118, "learning_rate": 1.2580016036872921e-06, "loss": 0.2798, "step": 28841 }, { "epoch": 1.3511031995128122, "grad_norm": 0.647582469861163, "learning_rate": 1.25783701645354e-06, "loss": 0.2825, "step": 28842 }, { "epoch": 1.3511500445027405, "grad_norm": 0.6220218189792898, "learning_rate": 1.2576724363681237e-06, "loss": 0.2749, "step": 28843 }, { "epoch": 1.3511968894926687, "grad_norm": 0.6009614655415426, "learning_rate": 1.257507863431991e-06, "loss": 0.2742, "step": 28844 }, { "epoch": 1.3512437344825972, "grad_norm": 0.5656662968051349, "learning_rate": 1.257343297646087e-06, "loss": 0.2526, "step": 28845 }, { "epoch": 1.3512905794725254, "grad_norm": 0.6074564752057577, "learning_rate": 1.2571787390113605e-06, "loss": 0.267, "step": 28846 }, { "epoch": 1.3513374244624536, "grad_norm": 0.5816392042978057, "learning_rate": 1.2570141875287587e-06, "loss": 0.2742, "step": 28847 }, { "epoch": 1.351384269452382, "grad_norm": 0.5930724047428065, "learning_rate": 1.2568496431992266e-06, "loss": 0.2726, "step": 28848 }, { "epoch": 1.3514311144423103, "grad_norm": 0.6296469092313888, "learning_rate": 1.2566851060237128e-06, "loss": 0.2893, "step": 28849 }, { "epoch": 1.3514779594322386, "grad_norm": 0.5594256002040087, "learning_rate": 1.2565205760031637e-06, "loss": 0.2748, "step": 28850 }, { "epoch": 1.351524804422167, "grad_norm": 0.5897212300721009, "learning_rate": 1.2563560531385272e-06, "loss": 0.2726, "step": 28851 }, { "epoch": 1.3515716494120955, "grad_norm": 0.587300232864941, "learning_rate": 1.2561915374307476e-06, "loss": 0.2754, "step": 28852 }, { "epoch": 1.3516184944020238, "grad_norm": 0.6380314230626933, "learning_rate": 1.2560270288807736e-06, "loss": 0.2796, "step": 28853 }, { "epoch": 1.351665339391952, "grad_norm": 0.6235019055378033, "learning_rate": 1.255862527489552e-06, "loss": 0.2841, "step": 28854 }, { "epoch": 1.3517121843818805, "grad_norm": 0.5954297331110976, "learning_rate": 1.255698033258028e-06, "loss": 0.264, "step": 28855 }, { "epoch": 1.3517590293718087, "grad_norm": 0.5594945074654005, "learning_rate": 1.255533546187149e-06, "loss": 0.2673, "step": 28856 }, { "epoch": 1.351805874361737, "grad_norm": 0.6301814956655483, "learning_rate": 1.255369066277863e-06, "loss": 0.2648, "step": 28857 }, { "epoch": 1.3518527193516654, "grad_norm": 0.61395934349696, "learning_rate": 1.2552045935311135e-06, "loss": 0.2789, "step": 28858 }, { "epoch": 1.3518995643415936, "grad_norm": 0.6131437993130425, "learning_rate": 1.2550401279478492e-06, "loss": 0.2767, "step": 28859 }, { "epoch": 1.3519464093315219, "grad_norm": 0.5890179100557908, "learning_rate": 1.2548756695290159e-06, "loss": 0.2771, "step": 28860 }, { "epoch": 1.3519932543214503, "grad_norm": 0.560468445921004, "learning_rate": 1.254711218275561e-06, "loss": 0.2671, "step": 28861 }, { "epoch": 1.3520400993113786, "grad_norm": 0.6059467154503473, "learning_rate": 1.2545467741884295e-06, "loss": 0.2683, "step": 28862 }, { "epoch": 1.352086944301307, "grad_norm": 0.5682616762790254, "learning_rate": 1.2543823372685694e-06, "loss": 0.2482, "step": 28863 }, { "epoch": 1.3521337892912353, "grad_norm": 0.6113461637617631, "learning_rate": 1.2542179075169241e-06, "loss": 0.2777, "step": 28864 }, { "epoch": 1.3521806342811638, "grad_norm": 0.5622247150668531, "learning_rate": 1.2540534849344421e-06, "loss": 0.2634, "step": 28865 }, { "epoch": 1.352227479271092, "grad_norm": 0.5686836705062365, "learning_rate": 1.2538890695220692e-06, "loss": 0.2557, "step": 28866 }, { "epoch": 1.3522743242610202, "grad_norm": 0.5540245207630227, "learning_rate": 1.2537246612807513e-06, "loss": 0.2615, "step": 28867 }, { "epoch": 1.3523211692509487, "grad_norm": 0.6201782908149333, "learning_rate": 1.2535602602114362e-06, "loss": 0.2646, "step": 28868 }, { "epoch": 1.352368014240877, "grad_norm": 0.6019402615006303, "learning_rate": 1.253395866315068e-06, "loss": 0.2743, "step": 28869 }, { "epoch": 1.3524148592308052, "grad_norm": 0.5937634366076731, "learning_rate": 1.2532314795925924e-06, "loss": 0.2783, "step": 28870 }, { "epoch": 1.3524617042207336, "grad_norm": 0.5948825837428326, "learning_rate": 1.2530671000449563e-06, "loss": 0.2785, "step": 28871 }, { "epoch": 1.3525085492106619, "grad_norm": 0.5684800248890934, "learning_rate": 1.2529027276731057e-06, "loss": 0.2822, "step": 28872 }, { "epoch": 1.3525553942005901, "grad_norm": 0.6098280498983107, "learning_rate": 1.2527383624779865e-06, "loss": 0.2815, "step": 28873 }, { "epoch": 1.3526022391905186, "grad_norm": 0.5938647479022552, "learning_rate": 1.2525740044605454e-06, "loss": 0.2706, "step": 28874 }, { "epoch": 1.3526490841804468, "grad_norm": 0.5894660547328793, "learning_rate": 1.2524096536217262e-06, "loss": 0.2822, "step": 28875 }, { "epoch": 1.3526959291703753, "grad_norm": 0.6296313792525003, "learning_rate": 1.252245309962477e-06, "loss": 0.2865, "step": 28876 }, { "epoch": 1.3527427741603035, "grad_norm": 0.6264213824123828, "learning_rate": 1.2520809734837415e-06, "loss": 0.2776, "step": 28877 }, { "epoch": 1.352789619150232, "grad_norm": 0.5716051772995067, "learning_rate": 1.2519166441864667e-06, "loss": 0.2716, "step": 28878 }, { "epoch": 1.3528364641401602, "grad_norm": 0.5897802654961616, "learning_rate": 1.2517523220715974e-06, "loss": 0.2748, "step": 28879 }, { "epoch": 1.3528833091300885, "grad_norm": 0.6095555271880807, "learning_rate": 1.2515880071400798e-06, "loss": 0.2773, "step": 28880 }, { "epoch": 1.352930154120017, "grad_norm": 0.5807476915786838, "learning_rate": 1.2514236993928608e-06, "loss": 0.2819, "step": 28881 }, { "epoch": 1.3529769991099452, "grad_norm": 0.6087177221003993, "learning_rate": 1.2512593988308841e-06, "loss": 0.2874, "step": 28882 }, { "epoch": 1.3530238440998734, "grad_norm": 0.6830601880044661, "learning_rate": 1.2510951054550952e-06, "loss": 0.2868, "step": 28883 }, { "epoch": 1.3530706890898019, "grad_norm": 0.5817822414739298, "learning_rate": 1.2509308192664396e-06, "loss": 0.2618, "step": 28884 }, { "epoch": 1.3531175340797301, "grad_norm": 0.6032070726108888, "learning_rate": 1.2507665402658636e-06, "loss": 0.2688, "step": 28885 }, { "epoch": 1.3531643790696584, "grad_norm": 0.5729740240804887, "learning_rate": 1.2506022684543117e-06, "loss": 0.2744, "step": 28886 }, { "epoch": 1.3532112240595868, "grad_norm": 0.6120440624152949, "learning_rate": 1.2504380038327312e-06, "loss": 0.2758, "step": 28887 }, { "epoch": 1.3532580690495153, "grad_norm": 0.5799234670014296, "learning_rate": 1.2502737464020647e-06, "loss": 0.2697, "step": 28888 }, { "epoch": 1.3533049140394435, "grad_norm": 0.5543326221880416, "learning_rate": 1.25010949616326e-06, "loss": 0.2535, "step": 28889 }, { "epoch": 1.3533517590293718, "grad_norm": 0.558143161736855, "learning_rate": 1.2499452531172595e-06, "loss": 0.2633, "step": 28890 }, { "epoch": 1.3533986040193002, "grad_norm": 0.6070017058039586, "learning_rate": 1.24978101726501e-06, "loss": 0.2767, "step": 28891 }, { "epoch": 1.3534454490092285, "grad_norm": 0.5799862248182684, "learning_rate": 1.2496167886074567e-06, "loss": 0.2742, "step": 28892 }, { "epoch": 1.3534922939991567, "grad_norm": 0.5943063288206499, "learning_rate": 1.2494525671455455e-06, "loss": 0.2662, "step": 28893 }, { "epoch": 1.3535391389890852, "grad_norm": 0.5941055905617558, "learning_rate": 1.2492883528802194e-06, "loss": 0.2788, "step": 28894 }, { "epoch": 1.3535859839790134, "grad_norm": 0.6149609451845558, "learning_rate": 1.2491241458124245e-06, "loss": 0.2771, "step": 28895 }, { "epoch": 1.3536328289689417, "grad_norm": 0.5566304231320548, "learning_rate": 1.2489599459431065e-06, "loss": 0.2518, "step": 28896 }, { "epoch": 1.3536796739588701, "grad_norm": 0.5911538140360427, "learning_rate": 1.248795753273209e-06, "loss": 0.2756, "step": 28897 }, { "epoch": 1.3537265189487984, "grad_norm": 0.5932260966977984, "learning_rate": 1.2486315678036772e-06, "loss": 0.2757, "step": 28898 }, { "epoch": 1.3537733639387268, "grad_norm": 0.6067406647809966, "learning_rate": 1.2484673895354571e-06, "loss": 0.2657, "step": 28899 }, { "epoch": 1.353820208928655, "grad_norm": 0.6220310365874268, "learning_rate": 1.2483032184694917e-06, "loss": 0.2888, "step": 28900 }, { "epoch": 1.3538670539185835, "grad_norm": 0.6353554774138369, "learning_rate": 1.2481390546067267e-06, "loss": 0.2814, "step": 28901 }, { "epoch": 1.3539138989085118, "grad_norm": 0.5780576824889563, "learning_rate": 1.2479748979481071e-06, "loss": 0.2727, "step": 28902 }, { "epoch": 1.35396074389844, "grad_norm": 0.6367175989324808, "learning_rate": 1.2478107484945778e-06, "loss": 0.2888, "step": 28903 }, { "epoch": 1.3540075888883685, "grad_norm": 0.6388226861236478, "learning_rate": 1.2476466062470825e-06, "loss": 0.2773, "step": 28904 }, { "epoch": 1.3540544338782967, "grad_norm": 0.5982050823502476, "learning_rate": 1.2474824712065667e-06, "loss": 0.2583, "step": 28905 }, { "epoch": 1.354101278868225, "grad_norm": 0.6499544120677946, "learning_rate": 1.2473183433739736e-06, "loss": 0.3129, "step": 28906 }, { "epoch": 1.3541481238581534, "grad_norm": 0.5956494688338395, "learning_rate": 1.2471542227502487e-06, "loss": 0.29, "step": 28907 }, { "epoch": 1.3541949688480817, "grad_norm": 0.5603300455759563, "learning_rate": 1.2469901093363363e-06, "loss": 0.2767, "step": 28908 }, { "epoch": 1.35424181383801, "grad_norm": 0.6028050685008952, "learning_rate": 1.2468260031331807e-06, "loss": 0.2725, "step": 28909 }, { "epoch": 1.3542886588279384, "grad_norm": 0.6350336811764662, "learning_rate": 1.2466619041417274e-06, "loss": 0.2879, "step": 28910 }, { "epoch": 1.3543355038178666, "grad_norm": 0.584437289870965, "learning_rate": 1.2464978123629201e-06, "loss": 0.2675, "step": 28911 }, { "epoch": 1.354382348807795, "grad_norm": 0.5830827131700843, "learning_rate": 1.246333727797702e-06, "loss": 0.2818, "step": 28912 }, { "epoch": 1.3544291937977233, "grad_norm": 0.5657275267833181, "learning_rate": 1.2461696504470177e-06, "loss": 0.2799, "step": 28913 }, { "epoch": 1.3544760387876518, "grad_norm": 0.5520962750883173, "learning_rate": 1.2460055803118124e-06, "loss": 0.2556, "step": 28914 }, { "epoch": 1.35452288377758, "grad_norm": 0.5754889823063614, "learning_rate": 1.2458415173930296e-06, "loss": 0.2557, "step": 28915 }, { "epoch": 1.3545697287675083, "grad_norm": 0.6362841174407641, "learning_rate": 1.2456774616916137e-06, "loss": 0.2788, "step": 28916 }, { "epoch": 1.3546165737574367, "grad_norm": 0.6169885168284028, "learning_rate": 1.2455134132085097e-06, "loss": 0.2938, "step": 28917 }, { "epoch": 1.354663418747365, "grad_norm": 0.5776237611231464, "learning_rate": 1.2453493719446607e-06, "loss": 0.2763, "step": 28918 }, { "epoch": 1.3547102637372932, "grad_norm": 0.5769950316181947, "learning_rate": 1.2451853379010097e-06, "loss": 0.268, "step": 28919 }, { "epoch": 1.3547571087272217, "grad_norm": 0.5475877403668965, "learning_rate": 1.2450213110785018e-06, "loss": 0.2552, "step": 28920 }, { "epoch": 1.35480395371715, "grad_norm": 0.5911988844399162, "learning_rate": 1.2448572914780808e-06, "loss": 0.2628, "step": 28921 }, { "epoch": 1.3548507987070781, "grad_norm": 0.6165825187629832, "learning_rate": 1.2446932791006904e-06, "loss": 0.2766, "step": 28922 }, { "epoch": 1.3548976436970066, "grad_norm": 0.5571024997326187, "learning_rate": 1.2445292739472758e-06, "loss": 0.2768, "step": 28923 }, { "epoch": 1.354944488686935, "grad_norm": 0.607721629241492, "learning_rate": 1.2443652760187789e-06, "loss": 0.2882, "step": 28924 }, { "epoch": 1.3549913336768633, "grad_norm": 0.6484940215891678, "learning_rate": 1.244201285316145e-06, "loss": 0.2815, "step": 28925 }, { "epoch": 1.3550381786667915, "grad_norm": 0.6144845679909323, "learning_rate": 1.2440373018403162e-06, "loss": 0.2793, "step": 28926 }, { "epoch": 1.35508502365672, "grad_norm": 0.6652552784094198, "learning_rate": 1.243873325592237e-06, "loss": 0.2866, "step": 28927 }, { "epoch": 1.3551318686466483, "grad_norm": 0.5819667868046072, "learning_rate": 1.243709356572851e-06, "loss": 0.2583, "step": 28928 }, { "epoch": 1.3551787136365765, "grad_norm": 0.5923258488913776, "learning_rate": 1.243545394783102e-06, "loss": 0.2677, "step": 28929 }, { "epoch": 1.355225558626505, "grad_norm": 0.5844411400501867, "learning_rate": 1.2433814402239344e-06, "loss": 0.2535, "step": 28930 }, { "epoch": 1.3552724036164332, "grad_norm": 0.6016223987582384, "learning_rate": 1.2432174928962908e-06, "loss": 0.2578, "step": 28931 }, { "epoch": 1.3553192486063614, "grad_norm": 0.5861598209392508, "learning_rate": 1.2430535528011137e-06, "loss": 0.2585, "step": 28932 }, { "epoch": 1.35536609359629, "grad_norm": 0.6175358954780965, "learning_rate": 1.2428896199393472e-06, "loss": 0.2776, "step": 28933 }, { "epoch": 1.3554129385862181, "grad_norm": 0.5674195087123702, "learning_rate": 1.2427256943119353e-06, "loss": 0.2638, "step": 28934 }, { "epoch": 1.3554597835761466, "grad_norm": 0.5635964802633563, "learning_rate": 1.2425617759198211e-06, "loss": 0.2666, "step": 28935 }, { "epoch": 1.3555066285660748, "grad_norm": 0.5913454938435678, "learning_rate": 1.2423978647639486e-06, "loss": 0.2857, "step": 28936 }, { "epoch": 1.3555534735560033, "grad_norm": 0.5999816715292711, "learning_rate": 1.2422339608452594e-06, "loss": 0.2858, "step": 28937 }, { "epoch": 1.3556003185459315, "grad_norm": 0.5744590113846315, "learning_rate": 1.2420700641646985e-06, "loss": 0.2594, "step": 28938 }, { "epoch": 1.3556471635358598, "grad_norm": 0.5177095167939885, "learning_rate": 1.2419061747232072e-06, "loss": 0.2628, "step": 28939 }, { "epoch": 1.3556940085257883, "grad_norm": 0.5975316077972513, "learning_rate": 1.24174229252173e-06, "loss": 0.2948, "step": 28940 }, { "epoch": 1.3557408535157165, "grad_norm": 0.5741536667874083, "learning_rate": 1.2415784175612094e-06, "loss": 0.2666, "step": 28941 }, { "epoch": 1.3557876985056447, "grad_norm": 0.5525861535154464, "learning_rate": 1.2414145498425897e-06, "loss": 0.255, "step": 28942 }, { "epoch": 1.3558345434955732, "grad_norm": 0.6039548097760283, "learning_rate": 1.2412506893668119e-06, "loss": 0.2794, "step": 28943 }, { "epoch": 1.3558813884855014, "grad_norm": 0.5630300059739491, "learning_rate": 1.2410868361348203e-06, "loss": 0.2777, "step": 28944 }, { "epoch": 1.3559282334754297, "grad_norm": 0.604403124761153, "learning_rate": 1.240922990147558e-06, "loss": 0.278, "step": 28945 }, { "epoch": 1.3559750784653581, "grad_norm": 0.5543731685479679, "learning_rate": 1.240759151405967e-06, "loss": 0.2563, "step": 28946 }, { "epoch": 1.3560219234552864, "grad_norm": 0.5579298157297945, "learning_rate": 1.2405953199109902e-06, "loss": 0.2675, "step": 28947 }, { "epoch": 1.3560687684452148, "grad_norm": 0.63910775572447, "learning_rate": 1.2404314956635718e-06, "loss": 0.288, "step": 28948 }, { "epoch": 1.356115613435143, "grad_norm": 0.5675035854885269, "learning_rate": 1.2402676786646529e-06, "loss": 0.2463, "step": 28949 }, { "epoch": 1.3561624584250715, "grad_norm": 0.5688251016318172, "learning_rate": 1.2401038689151768e-06, "loss": 0.2406, "step": 28950 }, { "epoch": 1.3562093034149998, "grad_norm": 0.6012740803680168, "learning_rate": 1.239940066416086e-06, "loss": 0.279, "step": 28951 }, { "epoch": 1.356256148404928, "grad_norm": 0.5667234351132043, "learning_rate": 1.2397762711683245e-06, "loss": 0.2556, "step": 28952 }, { "epoch": 1.3563029933948565, "grad_norm": 0.6094367009037931, "learning_rate": 1.2396124831728329e-06, "loss": 0.2838, "step": 28953 }, { "epoch": 1.3563498383847847, "grad_norm": 0.6065002370979489, "learning_rate": 1.2394487024305557e-06, "loss": 0.2863, "step": 28954 }, { "epoch": 1.356396683374713, "grad_norm": 0.6244509013582542, "learning_rate": 1.2392849289424333e-06, "loss": 0.2804, "step": 28955 }, { "epoch": 1.3564435283646414, "grad_norm": 0.5958918140264663, "learning_rate": 1.2391211627094094e-06, "loss": 0.2847, "step": 28956 }, { "epoch": 1.3564903733545697, "grad_norm": 0.5437136007304492, "learning_rate": 1.238957403732426e-06, "loss": 0.2667, "step": 28957 }, { "epoch": 1.356537218344498, "grad_norm": 0.6146913270363284, "learning_rate": 1.2387936520124261e-06, "loss": 0.2858, "step": 28958 }, { "epoch": 1.3565840633344264, "grad_norm": 0.6201861200517865, "learning_rate": 1.2386299075503527e-06, "loss": 0.2809, "step": 28959 }, { "epoch": 1.3566309083243548, "grad_norm": 0.594801567445389, "learning_rate": 1.2384661703471471e-06, "loss": 0.2803, "step": 28960 }, { "epoch": 1.356677753314283, "grad_norm": 0.6123774065409776, "learning_rate": 1.2383024404037506e-06, "loss": 0.2719, "step": 28961 }, { "epoch": 1.3567245983042113, "grad_norm": 0.6735488341905342, "learning_rate": 1.2381387177211063e-06, "loss": 0.2903, "step": 28962 }, { "epoch": 1.3567714432941398, "grad_norm": 0.6055549089644432, "learning_rate": 1.237975002300157e-06, "loss": 0.2949, "step": 28963 }, { "epoch": 1.356818288284068, "grad_norm": 0.5979745140142813, "learning_rate": 1.2378112941418438e-06, "loss": 0.2489, "step": 28964 }, { "epoch": 1.3568651332739963, "grad_norm": 0.5822630071446047, "learning_rate": 1.2376475932471104e-06, "loss": 0.2673, "step": 28965 }, { "epoch": 1.3569119782639247, "grad_norm": 0.5577650410223055, "learning_rate": 1.2374838996168972e-06, "loss": 0.2547, "step": 28966 }, { "epoch": 1.356958823253853, "grad_norm": 0.58717358189809, "learning_rate": 1.2373202132521475e-06, "loss": 0.2851, "step": 28967 }, { "epoch": 1.3570056682437812, "grad_norm": 0.6139012670279008, "learning_rate": 1.237156534153802e-06, "loss": 0.2729, "step": 28968 }, { "epoch": 1.3570525132337097, "grad_norm": 0.6213158309827317, "learning_rate": 1.236992862322803e-06, "loss": 0.2918, "step": 28969 }, { "epoch": 1.357099358223638, "grad_norm": 0.5640881062484434, "learning_rate": 1.2368291977600923e-06, "loss": 0.274, "step": 28970 }, { "epoch": 1.3571462032135664, "grad_norm": 0.5403104284345225, "learning_rate": 1.2366655404666128e-06, "loss": 0.2585, "step": 28971 }, { "epoch": 1.3571930482034946, "grad_norm": 0.5580770596573736, "learning_rate": 1.236501890443306e-06, "loss": 0.2596, "step": 28972 }, { "epoch": 1.357239893193423, "grad_norm": 0.6197796980962471, "learning_rate": 1.2363382476911135e-06, "loss": 0.2784, "step": 28973 }, { "epoch": 1.3572867381833513, "grad_norm": 0.5968384423002555, "learning_rate": 1.2361746122109757e-06, "loss": 0.2861, "step": 28974 }, { "epoch": 1.3573335831732796, "grad_norm": 0.6022572551404508, "learning_rate": 1.2360109840038353e-06, "loss": 0.2628, "step": 28975 }, { "epoch": 1.357380428163208, "grad_norm": 0.6072305249978007, "learning_rate": 1.2358473630706341e-06, "loss": 0.2777, "step": 28976 }, { "epoch": 1.3574272731531363, "grad_norm": 0.5564462268581949, "learning_rate": 1.2356837494123135e-06, "loss": 0.2608, "step": 28977 }, { "epoch": 1.3574741181430645, "grad_norm": 0.6043469360969868, "learning_rate": 1.2355201430298164e-06, "loss": 0.2815, "step": 28978 }, { "epoch": 1.357520963132993, "grad_norm": 0.6045075005125059, "learning_rate": 1.235356543924082e-06, "loss": 0.2626, "step": 28979 }, { "epoch": 1.3575678081229212, "grad_norm": 0.6296104245642369, "learning_rate": 1.2351929520960534e-06, "loss": 0.2888, "step": 28980 }, { "epoch": 1.3576146531128495, "grad_norm": 0.5962057721695543, "learning_rate": 1.235029367546671e-06, "loss": 0.262, "step": 28981 }, { "epoch": 1.357661498102778, "grad_norm": 0.5762347206591838, "learning_rate": 1.2348657902768762e-06, "loss": 0.2636, "step": 28982 }, { "epoch": 1.3577083430927062, "grad_norm": 0.5606178656565525, "learning_rate": 1.2347022202876113e-06, "loss": 0.271, "step": 28983 }, { "epoch": 1.3577551880826346, "grad_norm": 0.600315586306198, "learning_rate": 1.2345386575798169e-06, "loss": 0.276, "step": 28984 }, { "epoch": 1.3578020330725629, "grad_norm": 0.6201502445462932, "learning_rate": 1.2343751021544354e-06, "loss": 0.2756, "step": 28985 }, { "epoch": 1.3578488780624913, "grad_norm": 0.6002574936930598, "learning_rate": 1.2342115540124058e-06, "loss": 0.2605, "step": 28986 }, { "epoch": 1.3578957230524196, "grad_norm": 0.545458891299663, "learning_rate": 1.234048013154672e-06, "loss": 0.2589, "step": 28987 }, { "epoch": 1.3579425680423478, "grad_norm": 0.5791103622481348, "learning_rate": 1.2338844795821726e-06, "loss": 0.2623, "step": 28988 }, { "epoch": 1.3579894130322763, "grad_norm": 0.6111303558136607, "learning_rate": 1.2337209532958496e-06, "loss": 0.2869, "step": 28989 }, { "epoch": 1.3580362580222045, "grad_norm": 0.63210114412288, "learning_rate": 1.2335574342966444e-06, "loss": 0.2839, "step": 28990 }, { "epoch": 1.3580831030121328, "grad_norm": 0.5914244729275538, "learning_rate": 1.2333939225854987e-06, "loss": 0.2846, "step": 28991 }, { "epoch": 1.3581299480020612, "grad_norm": 0.5948249173684755, "learning_rate": 1.233230418163352e-06, "loss": 0.2613, "step": 28992 }, { "epoch": 1.3581767929919895, "grad_norm": 0.5620293430124744, "learning_rate": 1.2330669210311455e-06, "loss": 0.2744, "step": 28993 }, { "epoch": 1.3582236379819177, "grad_norm": 0.6315531934829719, "learning_rate": 1.2329034311898217e-06, "loss": 0.2835, "step": 28994 }, { "epoch": 1.3582704829718462, "grad_norm": 0.5963942779272847, "learning_rate": 1.2327399486403187e-06, "loss": 0.2593, "step": 28995 }, { "epoch": 1.3583173279617746, "grad_norm": 0.5964970885196956, "learning_rate": 1.2325764733835792e-06, "loss": 0.2716, "step": 28996 }, { "epoch": 1.3583641729517029, "grad_norm": 0.6027605184513377, "learning_rate": 1.2324130054205446e-06, "loss": 0.2818, "step": 28997 }, { "epoch": 1.358411017941631, "grad_norm": 0.5976354109482938, "learning_rate": 1.2322495447521534e-06, "loss": 0.2933, "step": 28998 }, { "epoch": 1.3584578629315596, "grad_norm": 0.5880685444588823, "learning_rate": 1.2320860913793476e-06, "loss": 0.2826, "step": 28999 }, { "epoch": 1.3585047079214878, "grad_norm": 0.5996148750779766, "learning_rate": 1.2319226453030672e-06, "loss": 0.2477, "step": 29000 }, { "epoch": 1.358551552911416, "grad_norm": 0.6003004774968761, "learning_rate": 1.2317592065242548e-06, "loss": 0.2747, "step": 29001 }, { "epoch": 1.3585983979013445, "grad_norm": 0.6473332169787341, "learning_rate": 1.2315957750438482e-06, "loss": 0.2825, "step": 29002 }, { "epoch": 1.3586452428912728, "grad_norm": 0.5977813328471466, "learning_rate": 1.2314323508627899e-06, "loss": 0.2611, "step": 29003 }, { "epoch": 1.358692087881201, "grad_norm": 0.6221152066417629, "learning_rate": 1.231268933982019e-06, "loss": 0.2825, "step": 29004 }, { "epoch": 1.3587389328711295, "grad_norm": 0.652408605344263, "learning_rate": 1.231105524402476e-06, "loss": 0.283, "step": 29005 }, { "epoch": 1.3587857778610577, "grad_norm": 0.5626538756940405, "learning_rate": 1.2309421221251022e-06, "loss": 0.26, "step": 29006 }, { "epoch": 1.3588326228509862, "grad_norm": 0.6075766550161891, "learning_rate": 1.2307787271508372e-06, "loss": 0.2766, "step": 29007 }, { "epoch": 1.3588794678409144, "grad_norm": 0.6043408784931963, "learning_rate": 1.2306153394806228e-06, "loss": 0.2775, "step": 29008 }, { "epoch": 1.3589263128308429, "grad_norm": 0.5708945792256763, "learning_rate": 1.2304519591153983e-06, "loss": 0.2611, "step": 29009 }, { "epoch": 1.358973157820771, "grad_norm": 0.6429961147686402, "learning_rate": 1.2302885860561024e-06, "loss": 0.2878, "step": 29010 }, { "epoch": 1.3590200028106993, "grad_norm": 0.6284877093202198, "learning_rate": 1.2301252203036764e-06, "loss": 0.2823, "step": 29011 }, { "epoch": 1.3590668478006278, "grad_norm": 0.6164918581517657, "learning_rate": 1.229961861859061e-06, "loss": 0.2731, "step": 29012 }, { "epoch": 1.359113692790556, "grad_norm": 0.5833440376473015, "learning_rate": 1.2297985107231954e-06, "loss": 0.2615, "step": 29013 }, { "epoch": 1.3591605377804843, "grad_norm": 0.5842150886481442, "learning_rate": 1.2296351668970216e-06, "loss": 0.2714, "step": 29014 }, { "epoch": 1.3592073827704128, "grad_norm": 0.6160785282153937, "learning_rate": 1.2294718303814767e-06, "loss": 0.2825, "step": 29015 }, { "epoch": 1.359254227760341, "grad_norm": 0.6027305353064473, "learning_rate": 1.2293085011775036e-06, "loss": 0.2763, "step": 29016 }, { "epoch": 1.3593010727502692, "grad_norm": 0.5962644065090122, "learning_rate": 1.2291451792860393e-06, "loss": 0.2724, "step": 29017 }, { "epoch": 1.3593479177401977, "grad_norm": 0.6182439361664948, "learning_rate": 1.2289818647080252e-06, "loss": 0.2616, "step": 29018 }, { "epoch": 1.359394762730126, "grad_norm": 0.6258496942593972, "learning_rate": 1.2288185574444008e-06, "loss": 0.2874, "step": 29019 }, { "epoch": 1.3594416077200544, "grad_norm": 0.5920296896577629, "learning_rate": 1.2286552574961063e-06, "loss": 0.2568, "step": 29020 }, { "epoch": 1.3594884527099826, "grad_norm": 0.5958076288884208, "learning_rate": 1.2284919648640823e-06, "loss": 0.2844, "step": 29021 }, { "epoch": 1.359535297699911, "grad_norm": 0.6119549876563517, "learning_rate": 1.2283286795492675e-06, "loss": 0.2796, "step": 29022 }, { "epoch": 1.3595821426898393, "grad_norm": 0.572409331135026, "learning_rate": 1.2281654015526004e-06, "loss": 0.2742, "step": 29023 }, { "epoch": 1.3596289876797676, "grad_norm": 0.5433253841983403, "learning_rate": 1.2280021308750217e-06, "loss": 0.2584, "step": 29024 }, { "epoch": 1.359675832669696, "grad_norm": 0.618860628429254, "learning_rate": 1.227838867517471e-06, "loss": 0.2744, "step": 29025 }, { "epoch": 1.3597226776596243, "grad_norm": 0.5844806595568021, "learning_rate": 1.227675611480888e-06, "loss": 0.2607, "step": 29026 }, { "epoch": 1.3597695226495525, "grad_norm": 0.5918956720214166, "learning_rate": 1.2275123627662131e-06, "loss": 0.2838, "step": 29027 }, { "epoch": 1.359816367639481, "grad_norm": 0.5850380676919034, "learning_rate": 1.2273491213743836e-06, "loss": 0.2685, "step": 29028 }, { "epoch": 1.3598632126294092, "grad_norm": 0.5868614482364753, "learning_rate": 1.2271858873063411e-06, "loss": 0.2774, "step": 29029 }, { "epoch": 1.3599100576193375, "grad_norm": 0.5999269243749266, "learning_rate": 1.227022660563023e-06, "loss": 0.2488, "step": 29030 }, { "epoch": 1.359956902609266, "grad_norm": 0.6963385205897938, "learning_rate": 1.2268594411453693e-06, "loss": 0.2943, "step": 29031 }, { "epoch": 1.3600037475991944, "grad_norm": 0.6169195787121452, "learning_rate": 1.2266962290543196e-06, "loss": 0.2708, "step": 29032 }, { "epoch": 1.3600505925891226, "grad_norm": 0.5711457048965514, "learning_rate": 1.2265330242908143e-06, "loss": 0.2646, "step": 29033 }, { "epoch": 1.3600974375790509, "grad_norm": 0.5923889762319253, "learning_rate": 1.2263698268557903e-06, "loss": 0.2735, "step": 29034 }, { "epoch": 1.3601442825689793, "grad_norm": 0.6546446722391972, "learning_rate": 1.2262066367501876e-06, "loss": 0.281, "step": 29035 }, { "epoch": 1.3601911275589076, "grad_norm": 0.5874590876016178, "learning_rate": 1.2260434539749465e-06, "loss": 0.2663, "step": 29036 }, { "epoch": 1.3602379725488358, "grad_norm": 0.5710001466349682, "learning_rate": 1.2258802785310045e-06, "loss": 0.2545, "step": 29037 }, { "epoch": 1.3602848175387643, "grad_norm": 0.603889545062353, "learning_rate": 1.225717110419301e-06, "loss": 0.2714, "step": 29038 }, { "epoch": 1.3603316625286925, "grad_norm": 0.5782000179697773, "learning_rate": 1.2255539496407765e-06, "loss": 0.2771, "step": 29039 }, { "epoch": 1.3603785075186208, "grad_norm": 0.6593493053135867, "learning_rate": 1.2253907961963675e-06, "loss": 0.3066, "step": 29040 }, { "epoch": 1.3604253525085492, "grad_norm": 0.6181606312357397, "learning_rate": 1.225227650087014e-06, "loss": 0.2852, "step": 29041 }, { "epoch": 1.3604721974984775, "grad_norm": 0.5602045759093338, "learning_rate": 1.2250645113136553e-06, "loss": 0.2783, "step": 29042 }, { "epoch": 1.360519042488406, "grad_norm": 0.6168119188213312, "learning_rate": 1.2249013798772305e-06, "loss": 0.2702, "step": 29043 }, { "epoch": 1.3605658874783342, "grad_norm": 0.6240177549328556, "learning_rate": 1.2247382557786772e-06, "loss": 0.272, "step": 29044 }, { "epoch": 1.3606127324682626, "grad_norm": 0.6498734012727143, "learning_rate": 1.2245751390189352e-06, "loss": 0.2887, "step": 29045 }, { "epoch": 1.3606595774581909, "grad_norm": 0.5557114769420135, "learning_rate": 1.224412029598942e-06, "loss": 0.2501, "step": 29046 }, { "epoch": 1.3607064224481191, "grad_norm": 0.5780165311571674, "learning_rate": 1.2242489275196371e-06, "loss": 0.2685, "step": 29047 }, { "epoch": 1.3607532674380476, "grad_norm": 0.5846670840127006, "learning_rate": 1.2240858327819587e-06, "loss": 0.2648, "step": 29048 }, { "epoch": 1.3608001124279758, "grad_norm": 0.6172362077338011, "learning_rate": 1.223922745386846e-06, "loss": 0.2701, "step": 29049 }, { "epoch": 1.360846957417904, "grad_norm": 0.5557642669864832, "learning_rate": 1.223759665335238e-06, "loss": 0.2502, "step": 29050 }, { "epoch": 1.3608938024078325, "grad_norm": 0.5566562865123224, "learning_rate": 1.2235965926280713e-06, "loss": 0.2553, "step": 29051 }, { "epoch": 1.3609406473977608, "grad_norm": 0.5854500534403594, "learning_rate": 1.2234335272662862e-06, "loss": 0.2637, "step": 29052 }, { "epoch": 1.360987492387689, "grad_norm": 0.5785600088040952, "learning_rate": 1.2232704692508195e-06, "loss": 0.2596, "step": 29053 }, { "epoch": 1.3610343373776175, "grad_norm": 0.6036951829879581, "learning_rate": 1.2231074185826102e-06, "loss": 0.2702, "step": 29054 }, { "epoch": 1.3610811823675457, "grad_norm": 0.5750763832517009, "learning_rate": 1.2229443752625973e-06, "loss": 0.2611, "step": 29055 }, { "epoch": 1.3611280273574742, "grad_norm": 0.5714313898194142, "learning_rate": 1.222781339291718e-06, "loss": 0.2686, "step": 29056 }, { "epoch": 1.3611748723474024, "grad_norm": 0.6347608834960935, "learning_rate": 1.2226183106709122e-06, "loss": 0.2789, "step": 29057 }, { "epoch": 1.3612217173373309, "grad_norm": 0.576950876480253, "learning_rate": 1.2224552894011172e-06, "loss": 0.2749, "step": 29058 }, { "epoch": 1.3612685623272591, "grad_norm": 0.5881163662082425, "learning_rate": 1.2222922754832695e-06, "loss": 0.2752, "step": 29059 }, { "epoch": 1.3613154073171874, "grad_norm": 0.6009757654410858, "learning_rate": 1.222129268918309e-06, "loss": 0.2705, "step": 29060 }, { "epoch": 1.3613622523071158, "grad_norm": 0.6033989014269793, "learning_rate": 1.2219662697071732e-06, "loss": 0.2648, "step": 29061 }, { "epoch": 1.361409097297044, "grad_norm": 0.5867616191539311, "learning_rate": 1.2218032778508002e-06, "loss": 0.282, "step": 29062 }, { "epoch": 1.3614559422869723, "grad_norm": 0.5908922937837747, "learning_rate": 1.2216402933501291e-06, "loss": 0.2725, "step": 29063 }, { "epoch": 1.3615027872769008, "grad_norm": 0.5956246743216175, "learning_rate": 1.2214773162060962e-06, "loss": 0.2644, "step": 29064 }, { "epoch": 1.361549632266829, "grad_norm": 0.6672765513789662, "learning_rate": 1.2213143464196407e-06, "loss": 0.2863, "step": 29065 }, { "epoch": 1.3615964772567573, "grad_norm": 0.6142891234621681, "learning_rate": 1.2211513839916985e-06, "loss": 0.2701, "step": 29066 }, { "epoch": 1.3616433222466857, "grad_norm": 0.6316464531975047, "learning_rate": 1.2209884289232087e-06, "loss": 0.2799, "step": 29067 }, { "epoch": 1.3616901672366142, "grad_norm": 0.6113369375152158, "learning_rate": 1.2208254812151093e-06, "loss": 0.2494, "step": 29068 }, { "epoch": 1.3617370122265424, "grad_norm": 0.6033084666569863, "learning_rate": 1.2206625408683378e-06, "loss": 0.2636, "step": 29069 }, { "epoch": 1.3617838572164707, "grad_norm": 0.6288628648772099, "learning_rate": 1.2204996078838327e-06, "loss": 0.2792, "step": 29070 }, { "epoch": 1.3618307022063991, "grad_norm": 0.5958394845854641, "learning_rate": 1.2203366822625307e-06, "loss": 0.2848, "step": 29071 }, { "epoch": 1.3618775471963274, "grad_norm": 0.6018629858594488, "learning_rate": 1.2201737640053684e-06, "loss": 0.2791, "step": 29072 }, { "epoch": 1.3619243921862556, "grad_norm": 0.5639948567521378, "learning_rate": 1.2200108531132848e-06, "loss": 0.2745, "step": 29073 }, { "epoch": 1.361971237176184, "grad_norm": 0.6365425172468716, "learning_rate": 1.2198479495872167e-06, "loss": 0.2739, "step": 29074 }, { "epoch": 1.3620180821661123, "grad_norm": 0.6138633879360061, "learning_rate": 1.2196850534281021e-06, "loss": 0.2737, "step": 29075 }, { "epoch": 1.3620649271560406, "grad_norm": 0.5861088334365614, "learning_rate": 1.219522164636879e-06, "loss": 0.2691, "step": 29076 }, { "epoch": 1.362111772145969, "grad_norm": 0.6010146760017357, "learning_rate": 1.2193592832144829e-06, "loss": 0.2697, "step": 29077 }, { "epoch": 1.3621586171358973, "grad_norm": 0.552238223923379, "learning_rate": 1.2191964091618535e-06, "loss": 0.2661, "step": 29078 }, { "epoch": 1.3622054621258257, "grad_norm": 0.5862516873106949, "learning_rate": 1.219033542479926e-06, "loss": 0.2683, "step": 29079 }, { "epoch": 1.362252307115754, "grad_norm": 0.6559966390663781, "learning_rate": 1.2188706831696382e-06, "loss": 0.2853, "step": 29080 }, { "epoch": 1.3622991521056824, "grad_norm": 0.6415018497722904, "learning_rate": 1.2187078312319278e-06, "loss": 0.2977, "step": 29081 }, { "epoch": 1.3623459970956107, "grad_norm": 0.5965184494743412, "learning_rate": 1.2185449866677326e-06, "loss": 0.279, "step": 29082 }, { "epoch": 1.362392842085539, "grad_norm": 0.5979819344433209, "learning_rate": 1.2183821494779879e-06, "loss": 0.2718, "step": 29083 }, { "epoch": 1.3624396870754674, "grad_norm": 0.5910051307053316, "learning_rate": 1.2182193196636322e-06, "loss": 0.2858, "step": 29084 }, { "epoch": 1.3624865320653956, "grad_norm": 0.6222146297649028, "learning_rate": 1.2180564972256027e-06, "loss": 0.2871, "step": 29085 }, { "epoch": 1.3625333770553238, "grad_norm": 0.5995702258528639, "learning_rate": 1.2178936821648351e-06, "loss": 0.2598, "step": 29086 }, { "epoch": 1.3625802220452523, "grad_norm": 0.619834728359861, "learning_rate": 1.2177308744822669e-06, "loss": 0.2835, "step": 29087 }, { "epoch": 1.3626270670351806, "grad_norm": 0.5913139601217284, "learning_rate": 1.2175680741788364e-06, "loss": 0.2575, "step": 29088 }, { "epoch": 1.3626739120251088, "grad_norm": 0.5712049627520698, "learning_rate": 1.2174052812554785e-06, "loss": 0.2763, "step": 29089 }, { "epoch": 1.3627207570150373, "grad_norm": 0.5992539090160777, "learning_rate": 1.2172424957131304e-06, "loss": 0.2731, "step": 29090 }, { "epoch": 1.3627676020049655, "grad_norm": 0.5955592784214904, "learning_rate": 1.2170797175527296e-06, "loss": 0.271, "step": 29091 }, { "epoch": 1.362814446994894, "grad_norm": 0.575874024738685, "learning_rate": 1.2169169467752135e-06, "loss": 0.2686, "step": 29092 }, { "epoch": 1.3628612919848222, "grad_norm": 0.6268128300768678, "learning_rate": 1.2167541833815168e-06, "loss": 0.2842, "step": 29093 }, { "epoch": 1.3629081369747507, "grad_norm": 0.6043125843577943, "learning_rate": 1.2165914273725785e-06, "loss": 0.2806, "step": 29094 }, { "epoch": 1.362954981964679, "grad_norm": 0.5689921548557768, "learning_rate": 1.216428678749333e-06, "loss": 0.265, "step": 29095 }, { "epoch": 1.3630018269546071, "grad_norm": 0.6077500481836824, "learning_rate": 1.2162659375127176e-06, "loss": 0.2693, "step": 29096 }, { "epoch": 1.3630486719445356, "grad_norm": 0.5627811618751886, "learning_rate": 1.2161032036636691e-06, "loss": 0.2548, "step": 29097 }, { "epoch": 1.3630955169344638, "grad_norm": 0.5815481855285465, "learning_rate": 1.2159404772031239e-06, "loss": 0.2759, "step": 29098 }, { "epoch": 1.363142361924392, "grad_norm": 0.5630085249183385, "learning_rate": 1.2157777581320197e-06, "loss": 0.2588, "step": 29099 }, { "epoch": 1.3631892069143206, "grad_norm": 0.5806911075460975, "learning_rate": 1.2156150464512917e-06, "loss": 0.2597, "step": 29100 }, { "epoch": 1.3632360519042488, "grad_norm": 0.5962504267976828, "learning_rate": 1.2154523421618752e-06, "loss": 0.2833, "step": 29101 }, { "epoch": 1.363282896894177, "grad_norm": 0.6316652649784161, "learning_rate": 1.2152896452647076e-06, "loss": 0.2791, "step": 29102 }, { "epoch": 1.3633297418841055, "grad_norm": 0.5750551835797757, "learning_rate": 1.215126955760725e-06, "loss": 0.2513, "step": 29103 }, { "epoch": 1.363376586874034, "grad_norm": 0.5773285800792658, "learning_rate": 1.2149642736508643e-06, "loss": 0.2689, "step": 29104 }, { "epoch": 1.3634234318639622, "grad_norm": 0.6281262713114265, "learning_rate": 1.2148015989360621e-06, "loss": 0.2945, "step": 29105 }, { "epoch": 1.3634702768538904, "grad_norm": 0.5994477569035743, "learning_rate": 1.2146389316172525e-06, "loss": 0.2733, "step": 29106 }, { "epoch": 1.363517121843819, "grad_norm": 0.5510391137642667, "learning_rate": 1.2144762716953736e-06, "loss": 0.2591, "step": 29107 }, { "epoch": 1.3635639668337471, "grad_norm": 0.6068170536579293, "learning_rate": 1.21431361917136e-06, "loss": 0.2881, "step": 29108 }, { "epoch": 1.3636108118236754, "grad_norm": 0.5832317570306352, "learning_rate": 1.2141509740461482e-06, "loss": 0.2742, "step": 29109 }, { "epoch": 1.3636576568136038, "grad_norm": 0.6271984985833686, "learning_rate": 1.2139883363206744e-06, "loss": 0.27, "step": 29110 }, { "epoch": 1.363704501803532, "grad_norm": 0.5805480912510016, "learning_rate": 1.2138257059958747e-06, "loss": 0.27, "step": 29111 }, { "epoch": 1.3637513467934603, "grad_norm": 0.6339265419855021, "learning_rate": 1.2136630830726855e-06, "loss": 0.3035, "step": 29112 }, { "epoch": 1.3637981917833888, "grad_norm": 0.6049020071544814, "learning_rate": 1.213500467552042e-06, "loss": 0.2808, "step": 29113 }, { "epoch": 1.363845036773317, "grad_norm": 0.5836610448846662, "learning_rate": 1.213337859434879e-06, "loss": 0.2872, "step": 29114 }, { "epoch": 1.3638918817632455, "grad_norm": 0.5434374575934473, "learning_rate": 1.2131752587221334e-06, "loss": 0.2641, "step": 29115 }, { "epoch": 1.3639387267531737, "grad_norm": 0.5575949784979847, "learning_rate": 1.2130126654147405e-06, "loss": 0.2546, "step": 29116 }, { "epoch": 1.3639855717431022, "grad_norm": 0.5868349519286773, "learning_rate": 1.2128500795136367e-06, "loss": 0.2663, "step": 29117 }, { "epoch": 1.3640324167330304, "grad_norm": 0.5859648352361219, "learning_rate": 1.2126875010197568e-06, "loss": 0.2743, "step": 29118 }, { "epoch": 1.3640792617229587, "grad_norm": 0.6257302434773089, "learning_rate": 1.2125249299340378e-06, "loss": 0.2781, "step": 29119 }, { "epoch": 1.3641261067128871, "grad_norm": 0.6075618814424998, "learning_rate": 1.212362366257414e-06, "loss": 0.2714, "step": 29120 }, { "epoch": 1.3641729517028154, "grad_norm": 0.6105533477716107, "learning_rate": 1.2121998099908205e-06, "loss": 0.2657, "step": 29121 }, { "epoch": 1.3642197966927436, "grad_norm": 0.6224345421403941, "learning_rate": 1.2120372611351932e-06, "loss": 0.2759, "step": 29122 }, { "epoch": 1.364266641682672, "grad_norm": 0.6226063582096315, "learning_rate": 1.211874719691468e-06, "loss": 0.2715, "step": 29123 }, { "epoch": 1.3643134866726003, "grad_norm": 0.6102526810812713, "learning_rate": 1.2117121856605798e-06, "loss": 0.269, "step": 29124 }, { "epoch": 1.3643603316625286, "grad_norm": 0.6242468808798344, "learning_rate": 1.2115496590434653e-06, "loss": 0.2891, "step": 29125 }, { "epoch": 1.364407176652457, "grad_norm": 0.5961717749703203, "learning_rate": 1.2113871398410575e-06, "loss": 0.2612, "step": 29126 }, { "epoch": 1.3644540216423853, "grad_norm": 0.5659095908944362, "learning_rate": 1.211224628054294e-06, "loss": 0.276, "step": 29127 }, { "epoch": 1.3645008666323137, "grad_norm": 0.6214438720100603, "learning_rate": 1.211062123684108e-06, "loss": 0.2789, "step": 29128 }, { "epoch": 1.364547711622242, "grad_norm": 0.5894700725155911, "learning_rate": 1.2108996267314352e-06, "loss": 0.2724, "step": 29129 }, { "epoch": 1.3645945566121704, "grad_norm": 0.6291829875874929, "learning_rate": 1.2107371371972113e-06, "loss": 0.2705, "step": 29130 }, { "epoch": 1.3646414016020987, "grad_norm": 0.5879984770819248, "learning_rate": 1.2105746550823718e-06, "loss": 0.2701, "step": 29131 }, { "epoch": 1.364688246592027, "grad_norm": 0.5977709801838321, "learning_rate": 1.2104121803878505e-06, "loss": 0.2802, "step": 29132 }, { "epoch": 1.3647350915819554, "grad_norm": 0.5651278228918223, "learning_rate": 1.2102497131145827e-06, "loss": 0.2479, "step": 29133 }, { "epoch": 1.3647819365718836, "grad_norm": 0.5561870136282357, "learning_rate": 1.2100872532635046e-06, "loss": 0.2508, "step": 29134 }, { "epoch": 1.3648287815618119, "grad_norm": 0.6087428222483504, "learning_rate": 1.2099248008355493e-06, "loss": 0.2638, "step": 29135 }, { "epoch": 1.3648756265517403, "grad_norm": 0.6123672210178613, "learning_rate": 1.2097623558316523e-06, "loss": 0.2577, "step": 29136 }, { "epoch": 1.3649224715416686, "grad_norm": 0.6446959218945203, "learning_rate": 1.20959991825275e-06, "loss": 0.2753, "step": 29137 }, { "epoch": 1.3649693165315968, "grad_norm": 0.6414120427226115, "learning_rate": 1.2094374880997744e-06, "loss": 0.285, "step": 29138 }, { "epoch": 1.3650161615215253, "grad_norm": 0.599592224147624, "learning_rate": 1.209275065373662e-06, "loss": 0.2772, "step": 29139 }, { "epoch": 1.3650630065114537, "grad_norm": 0.6488447430318753, "learning_rate": 1.2091126500753473e-06, "loss": 0.2866, "step": 29140 }, { "epoch": 1.365109851501382, "grad_norm": 0.6274379241929482, "learning_rate": 1.2089502422057656e-06, "loss": 0.2871, "step": 29141 }, { "epoch": 1.3651566964913102, "grad_norm": 0.596709294696737, "learning_rate": 1.20878784176585e-06, "loss": 0.2741, "step": 29142 }, { "epoch": 1.3652035414812387, "grad_norm": 0.6196011560803254, "learning_rate": 1.2086254487565365e-06, "loss": 0.2844, "step": 29143 }, { "epoch": 1.365250386471167, "grad_norm": 0.5590903098193261, "learning_rate": 1.2084630631787582e-06, "loss": 0.2526, "step": 29144 }, { "epoch": 1.3652972314610952, "grad_norm": 0.5722076854262839, "learning_rate": 1.2083006850334504e-06, "loss": 0.2679, "step": 29145 }, { "epoch": 1.3653440764510236, "grad_norm": 0.5801707929596815, "learning_rate": 1.2081383143215472e-06, "loss": 0.2571, "step": 29146 }, { "epoch": 1.3653909214409519, "grad_norm": 0.6527592929264646, "learning_rate": 1.2079759510439835e-06, "loss": 0.296, "step": 29147 }, { "epoch": 1.36543776643088, "grad_norm": 0.5984359527513707, "learning_rate": 1.2078135952016944e-06, "loss": 0.2635, "step": 29148 }, { "epoch": 1.3654846114208086, "grad_norm": 0.586702084689599, "learning_rate": 1.2076512467956135e-06, "loss": 0.2672, "step": 29149 }, { "epoch": 1.3655314564107368, "grad_norm": 0.587893664993095, "learning_rate": 1.2074889058266736e-06, "loss": 0.2674, "step": 29150 }, { "epoch": 1.3655783014006653, "grad_norm": 0.603355463873995, "learning_rate": 1.2073265722958103e-06, "loss": 0.2663, "step": 29151 }, { "epoch": 1.3656251463905935, "grad_norm": 0.6247273642514745, "learning_rate": 1.2071642462039577e-06, "loss": 0.2807, "step": 29152 }, { "epoch": 1.365671991380522, "grad_norm": 0.5969199650316911, "learning_rate": 1.2070019275520499e-06, "loss": 0.2768, "step": 29153 }, { "epoch": 1.3657188363704502, "grad_norm": 0.5489678623828514, "learning_rate": 1.206839616341022e-06, "loss": 0.26, "step": 29154 }, { "epoch": 1.3657656813603785, "grad_norm": 0.5862947630806383, "learning_rate": 1.206677312571806e-06, "loss": 0.257, "step": 29155 }, { "epoch": 1.365812526350307, "grad_norm": 0.5845489369400311, "learning_rate": 1.2065150162453383e-06, "loss": 0.2741, "step": 29156 }, { "epoch": 1.3658593713402352, "grad_norm": 0.552775369609799, "learning_rate": 1.2063527273625504e-06, "loss": 0.2542, "step": 29157 }, { "epoch": 1.3659062163301634, "grad_norm": 0.5863919068949246, "learning_rate": 1.206190445924377e-06, "loss": 0.2816, "step": 29158 }, { "epoch": 1.3659530613200919, "grad_norm": 0.6459788061939999, "learning_rate": 1.206028171931753e-06, "loss": 0.3033, "step": 29159 }, { "epoch": 1.36599990631002, "grad_norm": 0.6134047127739195, "learning_rate": 1.2058659053856114e-06, "loss": 0.2924, "step": 29160 }, { "epoch": 1.3660467512999483, "grad_norm": 0.6132760887260504, "learning_rate": 1.2057036462868871e-06, "loss": 0.2795, "step": 29161 }, { "epoch": 1.3660935962898768, "grad_norm": 0.5379270504396045, "learning_rate": 1.2055413946365132e-06, "loss": 0.2401, "step": 29162 }, { "epoch": 1.366140441279805, "grad_norm": 0.5700643337629421, "learning_rate": 1.2053791504354223e-06, "loss": 0.2616, "step": 29163 }, { "epoch": 1.3661872862697335, "grad_norm": 0.588059312328345, "learning_rate": 1.2052169136845487e-06, "loss": 0.2832, "step": 29164 }, { "epoch": 1.3662341312596618, "grad_norm": 0.6047246524414641, "learning_rate": 1.2050546843848266e-06, "loss": 0.2612, "step": 29165 }, { "epoch": 1.3662809762495902, "grad_norm": 0.6101027019654263, "learning_rate": 1.2048924625371894e-06, "loss": 0.2741, "step": 29166 }, { "epoch": 1.3663278212395185, "grad_norm": 0.6106364376774922, "learning_rate": 1.2047302481425712e-06, "loss": 0.283, "step": 29167 }, { "epoch": 1.3663746662294467, "grad_norm": 0.5679439858654054, "learning_rate": 1.2045680412019042e-06, "loss": 0.2561, "step": 29168 }, { "epoch": 1.3664215112193752, "grad_norm": 0.5834479394515271, "learning_rate": 1.2044058417161233e-06, "loss": 0.2742, "step": 29169 }, { "epoch": 1.3664683562093034, "grad_norm": 0.6056859632064653, "learning_rate": 1.2042436496861603e-06, "loss": 0.277, "step": 29170 }, { "epoch": 1.3665152011992316, "grad_norm": 0.5438923244310194, "learning_rate": 1.2040814651129493e-06, "loss": 0.2526, "step": 29171 }, { "epoch": 1.36656204618916, "grad_norm": 0.6350816701905184, "learning_rate": 1.2039192879974237e-06, "loss": 0.2583, "step": 29172 }, { "epoch": 1.3666088911790883, "grad_norm": 0.5975743200406733, "learning_rate": 1.2037571183405182e-06, "loss": 0.2622, "step": 29173 }, { "epoch": 1.3666557361690166, "grad_norm": 0.5639645515958829, "learning_rate": 1.2035949561431633e-06, "loss": 0.2577, "step": 29174 }, { "epoch": 1.366702581158945, "grad_norm": 0.5755292245709924, "learning_rate": 1.2034328014062938e-06, "loss": 0.2662, "step": 29175 }, { "epoch": 1.3667494261488735, "grad_norm": 0.6097813760634664, "learning_rate": 1.2032706541308436e-06, "loss": 0.2731, "step": 29176 }, { "epoch": 1.3667962711388018, "grad_norm": 0.637896142295885, "learning_rate": 1.2031085143177438e-06, "loss": 0.2763, "step": 29177 }, { "epoch": 1.36684311612873, "grad_norm": 0.6574633364232328, "learning_rate": 1.2029463819679284e-06, "loss": 0.2899, "step": 29178 }, { "epoch": 1.3668899611186585, "grad_norm": 0.5973014687059663, "learning_rate": 1.202784257082332e-06, "loss": 0.2801, "step": 29179 }, { "epoch": 1.3669368061085867, "grad_norm": 0.6255234632235842, "learning_rate": 1.2026221396618848e-06, "loss": 0.2902, "step": 29180 }, { "epoch": 1.366983651098515, "grad_norm": 0.6233653331522421, "learning_rate": 1.2024600297075212e-06, "loss": 0.2784, "step": 29181 }, { "epoch": 1.3670304960884434, "grad_norm": 0.5735247193029687, "learning_rate": 1.202297927220174e-06, "loss": 0.2757, "step": 29182 }, { "epoch": 1.3670773410783716, "grad_norm": 0.6195646832915294, "learning_rate": 1.202135832200777e-06, "loss": 0.2931, "step": 29183 }, { "epoch": 1.3671241860682999, "grad_norm": 0.572104001603703, "learning_rate": 1.2019737446502614e-06, "loss": 0.2525, "step": 29184 }, { "epoch": 1.3671710310582283, "grad_norm": 0.5564876246607862, "learning_rate": 1.2018116645695613e-06, "loss": 0.268, "step": 29185 }, { "epoch": 1.3672178760481566, "grad_norm": 0.5978505740109903, "learning_rate": 1.2016495919596077e-06, "loss": 0.2701, "step": 29186 }, { "epoch": 1.367264721038085, "grad_norm": 0.5707323720199817, "learning_rate": 1.2014875268213347e-06, "loss": 0.2794, "step": 29187 }, { "epoch": 1.3673115660280133, "grad_norm": 0.6237888115163098, "learning_rate": 1.2013254691556745e-06, "loss": 0.28, "step": 29188 }, { "epoch": 1.3673584110179418, "grad_norm": 0.592238738909225, "learning_rate": 1.2011634189635598e-06, "loss": 0.2686, "step": 29189 }, { "epoch": 1.36740525600787, "grad_norm": 0.6177727020253391, "learning_rate": 1.201001376245924e-06, "loss": 0.2834, "step": 29190 }, { "epoch": 1.3674521009977982, "grad_norm": 0.6152657733045168, "learning_rate": 1.200839341003698e-06, "loss": 0.2765, "step": 29191 }, { "epoch": 1.3674989459877267, "grad_norm": 0.5943503003617462, "learning_rate": 1.2006773132378161e-06, "loss": 0.2894, "step": 29192 }, { "epoch": 1.367545790977655, "grad_norm": 0.5935268075109127, "learning_rate": 1.2005152929492086e-06, "loss": 0.2716, "step": 29193 }, { "epoch": 1.3675926359675832, "grad_norm": 0.571745631279145, "learning_rate": 1.2003532801388091e-06, "loss": 0.2615, "step": 29194 }, { "epoch": 1.3676394809575116, "grad_norm": 0.5950754380977566, "learning_rate": 1.2001912748075498e-06, "loss": 0.2597, "step": 29195 }, { "epoch": 1.3676863259474399, "grad_norm": 0.5854652545458963, "learning_rate": 1.2000292769563638e-06, "loss": 0.2767, "step": 29196 }, { "epoch": 1.3677331709373681, "grad_norm": 0.617777660219621, "learning_rate": 1.1998672865861816e-06, "loss": 0.2897, "step": 29197 }, { "epoch": 1.3677800159272966, "grad_norm": 0.5757786821678755, "learning_rate": 1.1997053036979376e-06, "loss": 0.2837, "step": 29198 }, { "epoch": 1.3678268609172248, "grad_norm": 0.5773195055668772, "learning_rate": 1.1995433282925617e-06, "loss": 0.2569, "step": 29199 }, { "epoch": 1.3678737059071533, "grad_norm": 0.6075752291008638, "learning_rate": 1.199381360370987e-06, "loss": 0.2542, "step": 29200 }, { "epoch": 1.3679205508970815, "grad_norm": 0.6255233941616745, "learning_rate": 1.1992193999341457e-06, "loss": 0.278, "step": 29201 }, { "epoch": 1.36796739588701, "grad_norm": 0.6299036301913847, "learning_rate": 1.1990574469829698e-06, "loss": 0.2699, "step": 29202 }, { "epoch": 1.3680142408769382, "grad_norm": 0.5874549213202673, "learning_rate": 1.1988955015183924e-06, "loss": 0.2689, "step": 29203 }, { "epoch": 1.3680610858668665, "grad_norm": 0.6288550387665818, "learning_rate": 1.1987335635413434e-06, "loss": 0.2838, "step": 29204 }, { "epoch": 1.368107930856795, "grad_norm": 0.5944048849709795, "learning_rate": 1.1985716330527563e-06, "loss": 0.2643, "step": 29205 }, { "epoch": 1.3681547758467232, "grad_norm": 0.6212572532415427, "learning_rate": 1.1984097100535616e-06, "loss": 0.2775, "step": 29206 }, { "epoch": 1.3682016208366514, "grad_norm": 0.63445845843183, "learning_rate": 1.1982477945446921e-06, "loss": 0.2802, "step": 29207 }, { "epoch": 1.3682484658265799, "grad_norm": 0.6025434774547863, "learning_rate": 1.1980858865270792e-06, "loss": 0.2769, "step": 29208 }, { "epoch": 1.3682953108165081, "grad_norm": 0.6256084829492662, "learning_rate": 1.1979239860016549e-06, "loss": 0.281, "step": 29209 }, { "epoch": 1.3683421558064364, "grad_norm": 0.6324585700456072, "learning_rate": 1.1977620929693514e-06, "loss": 0.2821, "step": 29210 }, { "epoch": 1.3683890007963648, "grad_norm": 0.5925292922277002, "learning_rate": 1.1976002074311e-06, "loss": 0.2773, "step": 29211 }, { "epoch": 1.3684358457862933, "grad_norm": 0.5669562278700194, "learning_rate": 1.1974383293878312e-06, "loss": 0.2717, "step": 29212 }, { "epoch": 1.3684826907762215, "grad_norm": 0.5987289619254369, "learning_rate": 1.1972764588404772e-06, "loss": 0.2793, "step": 29213 }, { "epoch": 1.3685295357661498, "grad_norm": 0.571778199613023, "learning_rate": 1.1971145957899699e-06, "loss": 0.2613, "step": 29214 }, { "epoch": 1.3685763807560782, "grad_norm": 0.6632335756592628, "learning_rate": 1.1969527402372406e-06, "loss": 0.3025, "step": 29215 }, { "epoch": 1.3686232257460065, "grad_norm": 0.5908232149938345, "learning_rate": 1.1967908921832216e-06, "loss": 0.2613, "step": 29216 }, { "epoch": 1.3686700707359347, "grad_norm": 0.5999457480683633, "learning_rate": 1.196629051628843e-06, "loss": 0.2704, "step": 29217 }, { "epoch": 1.3687169157258632, "grad_norm": 0.5395364223727488, "learning_rate": 1.196467218575037e-06, "loss": 0.2424, "step": 29218 }, { "epoch": 1.3687637607157914, "grad_norm": 0.6000318618052557, "learning_rate": 1.1963053930227336e-06, "loss": 0.2617, "step": 29219 }, { "epoch": 1.3688106057057197, "grad_norm": 0.5856698896720611, "learning_rate": 1.1961435749728654e-06, "loss": 0.2549, "step": 29220 }, { "epoch": 1.3688574506956481, "grad_norm": 0.5936649023469431, "learning_rate": 1.195981764426363e-06, "loss": 0.2494, "step": 29221 }, { "epoch": 1.3689042956855764, "grad_norm": 0.5585497162842246, "learning_rate": 1.1958199613841586e-06, "loss": 0.2531, "step": 29222 }, { "epoch": 1.3689511406755048, "grad_norm": 0.5823284809466782, "learning_rate": 1.1956581658471816e-06, "loss": 0.2845, "step": 29223 }, { "epoch": 1.368997985665433, "grad_norm": 0.5668530566223402, "learning_rate": 1.1954963778163641e-06, "loss": 0.2541, "step": 29224 }, { "epoch": 1.3690448306553615, "grad_norm": 0.6275302687992305, "learning_rate": 1.1953345972926383e-06, "loss": 0.2876, "step": 29225 }, { "epoch": 1.3690916756452898, "grad_norm": 0.5682653555502732, "learning_rate": 1.1951728242769329e-06, "loss": 0.2645, "step": 29226 }, { "epoch": 1.369138520635218, "grad_norm": 0.5766492880157145, "learning_rate": 1.1950110587701796e-06, "loss": 0.2701, "step": 29227 }, { "epoch": 1.3691853656251465, "grad_norm": 0.5870746486081769, "learning_rate": 1.1948493007733109e-06, "loss": 0.2648, "step": 29228 }, { "epoch": 1.3692322106150747, "grad_norm": 0.5778338625337012, "learning_rate": 1.1946875502872555e-06, "loss": 0.2715, "step": 29229 }, { "epoch": 1.369279055605003, "grad_norm": 0.5769364132627044, "learning_rate": 1.194525807312945e-06, "loss": 0.2658, "step": 29230 }, { "epoch": 1.3693259005949314, "grad_norm": 0.6468871094251418, "learning_rate": 1.1943640718513106e-06, "loss": 0.2745, "step": 29231 }, { "epoch": 1.3693727455848597, "grad_norm": 0.5823423314574949, "learning_rate": 1.1942023439032834e-06, "loss": 0.2779, "step": 29232 }, { "epoch": 1.369419590574788, "grad_norm": 0.6062954534922744, "learning_rate": 1.1940406234697928e-06, "loss": 0.2928, "step": 29233 }, { "epoch": 1.3694664355647164, "grad_norm": 0.5735776335799389, "learning_rate": 1.193878910551771e-06, "loss": 0.2445, "step": 29234 }, { "epoch": 1.3695132805546446, "grad_norm": 0.5741354803290403, "learning_rate": 1.193717205150147e-06, "loss": 0.2664, "step": 29235 }, { "epoch": 1.369560125544573, "grad_norm": 0.6027050016473662, "learning_rate": 1.1935555072658523e-06, "loss": 0.2788, "step": 29236 }, { "epoch": 1.3696069705345013, "grad_norm": 0.6337970474431951, "learning_rate": 1.193393816899817e-06, "loss": 0.2779, "step": 29237 }, { "epoch": 1.3696538155244298, "grad_norm": 0.6148930000335261, "learning_rate": 1.1932321340529717e-06, "loss": 0.2742, "step": 29238 }, { "epoch": 1.369700660514358, "grad_norm": 0.580470806551401, "learning_rate": 1.1930704587262484e-06, "loss": 0.2617, "step": 29239 }, { "epoch": 1.3697475055042863, "grad_norm": 0.5486759762237845, "learning_rate": 1.1929087909205761e-06, "loss": 0.2621, "step": 29240 }, { "epoch": 1.3697943504942147, "grad_norm": 0.5911491207601487, "learning_rate": 1.192747130636884e-06, "loss": 0.2642, "step": 29241 }, { "epoch": 1.369841195484143, "grad_norm": 0.6258137047178862, "learning_rate": 1.1925854778761037e-06, "loss": 0.2757, "step": 29242 }, { "epoch": 1.3698880404740712, "grad_norm": 0.5645197521300317, "learning_rate": 1.1924238326391654e-06, "loss": 0.2782, "step": 29243 }, { "epoch": 1.3699348854639997, "grad_norm": 0.593771599207421, "learning_rate": 1.192262194926999e-06, "loss": 0.2817, "step": 29244 }, { "epoch": 1.369981730453928, "grad_norm": 0.6121258561029403, "learning_rate": 1.1921005647405365e-06, "loss": 0.2901, "step": 29245 }, { "epoch": 1.3700285754438561, "grad_norm": 0.5886111141996944, "learning_rate": 1.191938942080705e-06, "loss": 0.2791, "step": 29246 }, { "epoch": 1.3700754204337846, "grad_norm": 0.5608583819285513, "learning_rate": 1.1917773269484375e-06, "loss": 0.2592, "step": 29247 }, { "epoch": 1.370122265423713, "grad_norm": 0.6045781300265833, "learning_rate": 1.1916157193446614e-06, "loss": 0.2692, "step": 29248 }, { "epoch": 1.3701691104136413, "grad_norm": 0.5567409132726102, "learning_rate": 1.1914541192703084e-06, "loss": 0.2643, "step": 29249 }, { "epoch": 1.3702159554035696, "grad_norm": 0.5607786359102626, "learning_rate": 1.1912925267263076e-06, "loss": 0.2623, "step": 29250 }, { "epoch": 1.370262800393498, "grad_norm": 0.5951319948453639, "learning_rate": 1.1911309417135897e-06, "loss": 0.2704, "step": 29251 }, { "epoch": 1.3703096453834263, "grad_norm": 0.6713172612498842, "learning_rate": 1.190969364233085e-06, "loss": 0.2861, "step": 29252 }, { "epoch": 1.3703564903733545, "grad_norm": 0.5668870430881304, "learning_rate": 1.1908077942857226e-06, "loss": 0.2661, "step": 29253 }, { "epoch": 1.370403335363283, "grad_norm": 0.6161058745054301, "learning_rate": 1.1906462318724316e-06, "loss": 0.2854, "step": 29254 }, { "epoch": 1.3704501803532112, "grad_norm": 0.6127725775841755, "learning_rate": 1.1904846769941422e-06, "loss": 0.2931, "step": 29255 }, { "epoch": 1.3704970253431394, "grad_norm": 0.5824486740661622, "learning_rate": 1.1903231296517844e-06, "loss": 0.2665, "step": 29256 }, { "epoch": 1.370543870333068, "grad_norm": 0.5638850826276346, "learning_rate": 1.190161589846288e-06, "loss": 0.2746, "step": 29257 }, { "epoch": 1.3705907153229961, "grad_norm": 0.6069733473772663, "learning_rate": 1.1900000575785819e-06, "loss": 0.2691, "step": 29258 }, { "epoch": 1.3706375603129246, "grad_norm": 0.626384561656022, "learning_rate": 1.1898385328495976e-06, "loss": 0.2995, "step": 29259 }, { "epoch": 1.3706844053028528, "grad_norm": 0.6484809043999896, "learning_rate": 1.1896770156602632e-06, "loss": 0.2866, "step": 29260 }, { "epoch": 1.3707312502927813, "grad_norm": 0.607090713937024, "learning_rate": 1.1895155060115073e-06, "loss": 0.2726, "step": 29261 }, { "epoch": 1.3707780952827096, "grad_norm": 0.5666580886435928, "learning_rate": 1.1893540039042603e-06, "loss": 0.2773, "step": 29262 }, { "epoch": 1.3708249402726378, "grad_norm": 0.6260644966364622, "learning_rate": 1.1891925093394516e-06, "loss": 0.2791, "step": 29263 }, { "epoch": 1.3708717852625663, "grad_norm": 0.5453404268856212, "learning_rate": 1.1890310223180105e-06, "loss": 0.2708, "step": 29264 }, { "epoch": 1.3709186302524945, "grad_norm": 0.631433021829474, "learning_rate": 1.1888695428408672e-06, "loss": 0.2784, "step": 29265 }, { "epoch": 1.3709654752424227, "grad_norm": 0.6141110908827953, "learning_rate": 1.1887080709089494e-06, "loss": 0.2798, "step": 29266 }, { "epoch": 1.3710123202323512, "grad_norm": 0.6154376373456023, "learning_rate": 1.1885466065231877e-06, "loss": 0.2765, "step": 29267 }, { "epoch": 1.3710591652222794, "grad_norm": 0.5977238286067982, "learning_rate": 1.18838514968451e-06, "loss": 0.2708, "step": 29268 }, { "epoch": 1.3711060102122077, "grad_norm": 0.6020709348771156, "learning_rate": 1.188223700393846e-06, "loss": 0.2623, "step": 29269 }, { "epoch": 1.3711528552021361, "grad_norm": 0.6000374019302961, "learning_rate": 1.1880622586521247e-06, "loss": 0.2697, "step": 29270 }, { "epoch": 1.3711997001920644, "grad_norm": 0.6035897238478782, "learning_rate": 1.1879008244602766e-06, "loss": 0.2786, "step": 29271 }, { "epoch": 1.3712465451819928, "grad_norm": 0.6323212173254187, "learning_rate": 1.1877393978192286e-06, "loss": 0.2637, "step": 29272 }, { "epoch": 1.371293390171921, "grad_norm": 0.611234163800244, "learning_rate": 1.1875779787299105e-06, "loss": 0.2936, "step": 29273 }, { "epoch": 1.3713402351618496, "grad_norm": 0.6039893873362074, "learning_rate": 1.187416567193252e-06, "loss": 0.261, "step": 29274 }, { "epoch": 1.3713870801517778, "grad_norm": 0.6041662622568069, "learning_rate": 1.1872551632101804e-06, "loss": 0.268, "step": 29275 }, { "epoch": 1.371433925141706, "grad_norm": 0.5773005504245241, "learning_rate": 1.1870937667816254e-06, "loss": 0.2604, "step": 29276 }, { "epoch": 1.3714807701316345, "grad_norm": 0.5930932558227259, "learning_rate": 1.1869323779085168e-06, "loss": 0.2661, "step": 29277 }, { "epoch": 1.3715276151215627, "grad_norm": 0.591171530878943, "learning_rate": 1.186770996591781e-06, "loss": 0.288, "step": 29278 }, { "epoch": 1.371574460111491, "grad_norm": 0.6027480214238115, "learning_rate": 1.1866096228323487e-06, "loss": 0.255, "step": 29279 }, { "epoch": 1.3716213051014194, "grad_norm": 0.5877128619796048, "learning_rate": 1.1864482566311474e-06, "loss": 0.2763, "step": 29280 }, { "epoch": 1.3716681500913477, "grad_norm": 0.6053822726054809, "learning_rate": 1.1862868979891072e-06, "loss": 0.2652, "step": 29281 }, { "epoch": 1.371714995081276, "grad_norm": 0.5651750493277501, "learning_rate": 1.1861255469071552e-06, "loss": 0.2646, "step": 29282 }, { "epoch": 1.3717618400712044, "grad_norm": 0.5753534916705002, "learning_rate": 1.1859642033862212e-06, "loss": 0.2691, "step": 29283 }, { "epoch": 1.3718086850611328, "grad_norm": 0.6047666033169716, "learning_rate": 1.1858028674272318e-06, "loss": 0.2722, "step": 29284 }, { "epoch": 1.371855530051061, "grad_norm": 0.5647830969714991, "learning_rate": 1.1856415390311168e-06, "loss": 0.2638, "step": 29285 }, { "epoch": 1.3719023750409893, "grad_norm": 0.6348107899299443, "learning_rate": 1.1854802181988043e-06, "loss": 0.2795, "step": 29286 }, { "epoch": 1.3719492200309178, "grad_norm": 0.5575642021417377, "learning_rate": 1.1853189049312228e-06, "loss": 0.2512, "step": 29287 }, { "epoch": 1.371996065020846, "grad_norm": 0.6018803320152889, "learning_rate": 1.1851575992293013e-06, "loss": 0.2779, "step": 29288 }, { "epoch": 1.3720429100107743, "grad_norm": 0.5954355950673048, "learning_rate": 1.1849963010939674e-06, "loss": 0.2729, "step": 29289 }, { "epoch": 1.3720897550007027, "grad_norm": 0.6456822752911822, "learning_rate": 1.1848350105261485e-06, "loss": 0.2862, "step": 29290 }, { "epoch": 1.372136599990631, "grad_norm": 0.6203714084998593, "learning_rate": 1.1846737275267735e-06, "loss": 0.2759, "step": 29291 }, { "epoch": 1.3721834449805592, "grad_norm": 0.5665354793979802, "learning_rate": 1.1845124520967707e-06, "loss": 0.2681, "step": 29292 }, { "epoch": 1.3722302899704877, "grad_norm": 0.6197659859022252, "learning_rate": 1.1843511842370678e-06, "loss": 0.287, "step": 29293 }, { "epoch": 1.372277134960416, "grad_norm": 0.608065602479706, "learning_rate": 1.1841899239485943e-06, "loss": 0.2818, "step": 29294 }, { "epoch": 1.3723239799503444, "grad_norm": 0.6430537465370427, "learning_rate": 1.1840286712322763e-06, "loss": 0.2915, "step": 29295 }, { "epoch": 1.3723708249402726, "grad_norm": 0.60849778886861, "learning_rate": 1.183867426089043e-06, "loss": 0.2949, "step": 29296 }, { "epoch": 1.372417669930201, "grad_norm": 0.598451882598282, "learning_rate": 1.1837061885198214e-06, "loss": 0.2784, "step": 29297 }, { "epoch": 1.3724645149201293, "grad_norm": 0.6236065191384246, "learning_rate": 1.18354495852554e-06, "loss": 0.2828, "step": 29298 }, { "epoch": 1.3725113599100576, "grad_norm": 0.5513186021873078, "learning_rate": 1.183383736107126e-06, "loss": 0.2682, "step": 29299 }, { "epoch": 1.372558204899986, "grad_norm": 0.5802987967502945, "learning_rate": 1.1832225212655078e-06, "loss": 0.2704, "step": 29300 }, { "epoch": 1.3726050498899143, "grad_norm": 0.5925604827432129, "learning_rate": 1.183061314001614e-06, "loss": 0.2724, "step": 29301 }, { "epoch": 1.3726518948798425, "grad_norm": 0.5643042462548724, "learning_rate": 1.1829001143163712e-06, "loss": 0.2824, "step": 29302 }, { "epoch": 1.372698739869771, "grad_norm": 0.5852822844814718, "learning_rate": 1.1827389222107067e-06, "loss": 0.2716, "step": 29303 }, { "epoch": 1.3727455848596992, "grad_norm": 0.6040112548530201, "learning_rate": 1.1825777376855482e-06, "loss": 0.2801, "step": 29304 }, { "epoch": 1.3727924298496275, "grad_norm": 0.5947903143673904, "learning_rate": 1.1824165607418239e-06, "loss": 0.2627, "step": 29305 }, { "epoch": 1.372839274839556, "grad_norm": 0.589811322120179, "learning_rate": 1.1822553913804611e-06, "loss": 0.2727, "step": 29306 }, { "epoch": 1.3728861198294842, "grad_norm": 0.5901125299687683, "learning_rate": 1.1820942296023882e-06, "loss": 0.2728, "step": 29307 }, { "epoch": 1.3729329648194126, "grad_norm": 0.5316550826632355, "learning_rate": 1.1819330754085309e-06, "loss": 0.2498, "step": 29308 }, { "epoch": 1.3729798098093409, "grad_norm": 0.6408677297317822, "learning_rate": 1.1817719287998184e-06, "loss": 0.2696, "step": 29309 }, { "epoch": 1.3730266547992693, "grad_norm": 0.5643542037686695, "learning_rate": 1.1816107897771761e-06, "loss": 0.2502, "step": 29310 }, { "epoch": 1.3730734997891976, "grad_norm": 0.5547610936630362, "learning_rate": 1.1814496583415321e-06, "loss": 0.2585, "step": 29311 }, { "epoch": 1.3731203447791258, "grad_norm": 0.5685362250160275, "learning_rate": 1.1812885344938144e-06, "loss": 0.2728, "step": 29312 }, { "epoch": 1.3731671897690543, "grad_norm": 0.5826495201876644, "learning_rate": 1.1811274182349506e-06, "loss": 0.2685, "step": 29313 }, { "epoch": 1.3732140347589825, "grad_norm": 0.5865099846187259, "learning_rate": 1.1809663095658657e-06, "loss": 0.2775, "step": 29314 }, { "epoch": 1.3732608797489108, "grad_norm": 0.6166122320470756, "learning_rate": 1.1808052084874885e-06, "loss": 0.2761, "step": 29315 }, { "epoch": 1.3733077247388392, "grad_norm": 0.5756999114885638, "learning_rate": 1.1806441150007467e-06, "loss": 0.2694, "step": 29316 }, { "epoch": 1.3733545697287675, "grad_norm": 0.5791983286307288, "learning_rate": 1.1804830291065653e-06, "loss": 0.2844, "step": 29317 }, { "epoch": 1.3734014147186957, "grad_norm": 0.6484926843818062, "learning_rate": 1.1803219508058724e-06, "loss": 0.2709, "step": 29318 }, { "epoch": 1.3734482597086242, "grad_norm": 0.61268019049991, "learning_rate": 1.1801608800995962e-06, "loss": 0.2771, "step": 29319 }, { "epoch": 1.3734951046985526, "grad_norm": 0.6246987488607064, "learning_rate": 1.1799998169886612e-06, "loss": 0.2631, "step": 29320 }, { "epoch": 1.3735419496884809, "grad_norm": 0.5943288036475941, "learning_rate": 1.1798387614739956e-06, "loss": 0.2697, "step": 29321 }, { "epoch": 1.373588794678409, "grad_norm": 0.5493031259370326, "learning_rate": 1.1796777135565263e-06, "loss": 0.2647, "step": 29322 }, { "epoch": 1.3736356396683376, "grad_norm": 0.5642841608173951, "learning_rate": 1.1795166732371804e-06, "loss": 0.2722, "step": 29323 }, { "epoch": 1.3736824846582658, "grad_norm": 0.5504818253530753, "learning_rate": 1.1793556405168835e-06, "loss": 0.2635, "step": 29324 }, { "epoch": 1.373729329648194, "grad_norm": 0.600346374937279, "learning_rate": 1.179194615396563e-06, "loss": 0.2821, "step": 29325 }, { "epoch": 1.3737761746381225, "grad_norm": 0.5914306784548526, "learning_rate": 1.1790335978771463e-06, "loss": 0.2678, "step": 29326 }, { "epoch": 1.3738230196280508, "grad_norm": 0.5698141876958615, "learning_rate": 1.1788725879595583e-06, "loss": 0.2718, "step": 29327 }, { "epoch": 1.373869864617979, "grad_norm": 0.6381375630844465, "learning_rate": 1.1787115856447268e-06, "loss": 0.2939, "step": 29328 }, { "epoch": 1.3739167096079075, "grad_norm": 0.5875069147805569, "learning_rate": 1.1785505909335776e-06, "loss": 0.2665, "step": 29329 }, { "epoch": 1.3739635545978357, "grad_norm": 0.5845403958031943, "learning_rate": 1.178389603827039e-06, "loss": 0.2788, "step": 29330 }, { "epoch": 1.3740103995877642, "grad_norm": 0.6301350366874529, "learning_rate": 1.1782286243260347e-06, "loss": 0.2711, "step": 29331 }, { "epoch": 1.3740572445776924, "grad_norm": 0.5827003085638196, "learning_rate": 1.1780676524314936e-06, "loss": 0.2615, "step": 29332 }, { "epoch": 1.3741040895676209, "grad_norm": 0.5702003718248125, "learning_rate": 1.1779066881443398e-06, "loss": 0.2575, "step": 29333 }, { "epoch": 1.374150934557549, "grad_norm": 0.6109957409754826, "learning_rate": 1.177745731465501e-06, "loss": 0.2901, "step": 29334 }, { "epoch": 1.3741977795474773, "grad_norm": 0.6166386008519354, "learning_rate": 1.177584782395903e-06, "loss": 0.287, "step": 29335 }, { "epoch": 1.3742446245374058, "grad_norm": 0.5993780125540096, "learning_rate": 1.1774238409364733e-06, "loss": 0.2838, "step": 29336 }, { "epoch": 1.374291469527334, "grad_norm": 0.6230579095128322, "learning_rate": 1.1772629070881358e-06, "loss": 0.2723, "step": 29337 }, { "epoch": 1.3743383145172623, "grad_norm": 0.6067331446318801, "learning_rate": 1.1771019808518188e-06, "loss": 0.2761, "step": 29338 }, { "epoch": 1.3743851595071908, "grad_norm": 0.581517824932715, "learning_rate": 1.1769410622284467e-06, "loss": 0.2681, "step": 29339 }, { "epoch": 1.374432004497119, "grad_norm": 0.5586776479667236, "learning_rate": 1.176780151218946e-06, "loss": 0.27, "step": 29340 }, { "epoch": 1.3744788494870472, "grad_norm": 0.5635221888322709, "learning_rate": 1.176619247824243e-06, "loss": 0.2576, "step": 29341 }, { "epoch": 1.3745256944769757, "grad_norm": 0.5813429339296078, "learning_rate": 1.1764583520452635e-06, "loss": 0.2757, "step": 29342 }, { "epoch": 1.374572539466904, "grad_norm": 0.6472162196465502, "learning_rate": 1.1762974638829347e-06, "loss": 0.2623, "step": 29343 }, { "epoch": 1.3746193844568324, "grad_norm": 0.6221410150339144, "learning_rate": 1.1761365833381801e-06, "loss": 0.2764, "step": 29344 }, { "epoch": 1.3746662294467606, "grad_norm": 0.6320345581197626, "learning_rate": 1.175975710411928e-06, "loss": 0.2791, "step": 29345 }, { "epoch": 1.374713074436689, "grad_norm": 0.6354656639127472, "learning_rate": 1.1758148451051015e-06, "loss": 0.2786, "step": 29346 }, { "epoch": 1.3747599194266173, "grad_norm": 0.5810442771229986, "learning_rate": 1.1756539874186281e-06, "loss": 0.2685, "step": 29347 }, { "epoch": 1.3748067644165456, "grad_norm": 0.5563850963224471, "learning_rate": 1.1754931373534332e-06, "loss": 0.2589, "step": 29348 }, { "epoch": 1.374853609406474, "grad_norm": 0.6090682619156168, "learning_rate": 1.175332294910442e-06, "loss": 0.2721, "step": 29349 }, { "epoch": 1.3749004543964023, "grad_norm": 0.6434830111990949, "learning_rate": 1.175171460090582e-06, "loss": 0.2858, "step": 29350 }, { "epoch": 1.3749472993863305, "grad_norm": 0.6004871198657262, "learning_rate": 1.1750106328947768e-06, "loss": 0.268, "step": 29351 }, { "epoch": 1.374994144376259, "grad_norm": 0.6039659019647032, "learning_rate": 1.1748498133239517e-06, "loss": 0.2698, "step": 29352 }, { "epoch": 1.3750409893661872, "grad_norm": 0.5993171153666725, "learning_rate": 1.1746890013790326e-06, "loss": 0.2669, "step": 29353 }, { "epoch": 1.3750878343561155, "grad_norm": 0.573300462540321, "learning_rate": 1.1745281970609453e-06, "loss": 0.2694, "step": 29354 }, { "epoch": 1.375134679346044, "grad_norm": 0.5785446506319352, "learning_rate": 1.174367400370615e-06, "loss": 0.2717, "step": 29355 }, { "epoch": 1.3751815243359724, "grad_norm": 0.6044341995152898, "learning_rate": 1.1742066113089685e-06, "loss": 0.2928, "step": 29356 }, { "epoch": 1.3752283693259006, "grad_norm": 0.5881324959403825, "learning_rate": 1.1740458298769284e-06, "loss": 0.2611, "step": 29357 }, { "epoch": 1.3752752143158289, "grad_norm": 0.6120148004834457, "learning_rate": 1.1738850560754222e-06, "loss": 0.2757, "step": 29358 }, { "epoch": 1.3753220593057573, "grad_norm": 0.575268432396064, "learning_rate": 1.1737242899053736e-06, "loss": 0.2667, "step": 29359 }, { "epoch": 1.3753689042956856, "grad_norm": 0.609406915638802, "learning_rate": 1.173563531367708e-06, "loss": 0.2728, "step": 29360 }, { "epoch": 1.3754157492856138, "grad_norm": 0.6360028652554438, "learning_rate": 1.1734027804633511e-06, "loss": 0.2801, "step": 29361 }, { "epoch": 1.3754625942755423, "grad_norm": 0.6237511938792812, "learning_rate": 1.1732420371932285e-06, "loss": 0.2823, "step": 29362 }, { "epoch": 1.3755094392654705, "grad_norm": 0.6237402616424504, "learning_rate": 1.1730813015582636e-06, "loss": 0.2853, "step": 29363 }, { "epoch": 1.3755562842553988, "grad_norm": 0.6302228655384234, "learning_rate": 1.1729205735593827e-06, "loss": 0.3006, "step": 29364 }, { "epoch": 1.3756031292453272, "grad_norm": 0.5649593506843701, "learning_rate": 1.172759853197511e-06, "loss": 0.2714, "step": 29365 }, { "epoch": 1.3756499742352555, "grad_norm": 0.6100686995403848, "learning_rate": 1.1725991404735717e-06, "loss": 0.3036, "step": 29366 }, { "epoch": 1.375696819225184, "grad_norm": 0.6082882005696599, "learning_rate": 1.1724384353884908e-06, "loss": 0.2865, "step": 29367 }, { "epoch": 1.3757436642151122, "grad_norm": 0.6142531987046997, "learning_rate": 1.172277737943194e-06, "loss": 0.2705, "step": 29368 }, { "epoch": 1.3757905092050406, "grad_norm": 0.5640071604287866, "learning_rate": 1.172117048138604e-06, "loss": 0.2557, "step": 29369 }, { "epoch": 1.3758373541949689, "grad_norm": 0.6077290197987556, "learning_rate": 1.171956365975647e-06, "loss": 0.2743, "step": 29370 }, { "epoch": 1.3758841991848971, "grad_norm": 0.6048539841711054, "learning_rate": 1.171795691455247e-06, "loss": 0.2829, "step": 29371 }, { "epoch": 1.3759310441748256, "grad_norm": 0.6245711166487027, "learning_rate": 1.17163502457833e-06, "loss": 0.2778, "step": 29372 }, { "epoch": 1.3759778891647538, "grad_norm": 0.5852624568207437, "learning_rate": 1.1714743653458188e-06, "loss": 0.2662, "step": 29373 }, { "epoch": 1.376024734154682, "grad_norm": 0.5541936874407468, "learning_rate": 1.1713137137586395e-06, "loss": 0.2586, "step": 29374 }, { "epoch": 1.3760715791446105, "grad_norm": 0.5529943260656442, "learning_rate": 1.1711530698177148e-06, "loss": 0.2516, "step": 29375 }, { "epoch": 1.3761184241345388, "grad_norm": 0.5646142153639814, "learning_rate": 1.1709924335239702e-06, "loss": 0.2592, "step": 29376 }, { "epoch": 1.376165269124467, "grad_norm": 0.6112812613091424, "learning_rate": 1.1708318048783304e-06, "loss": 0.2752, "step": 29377 }, { "epoch": 1.3762121141143955, "grad_norm": 0.5798162970586184, "learning_rate": 1.1706711838817192e-06, "loss": 0.2687, "step": 29378 }, { "epoch": 1.3762589591043237, "grad_norm": 0.6012281071101573, "learning_rate": 1.1705105705350623e-06, "loss": 0.2751, "step": 29379 }, { "epoch": 1.3763058040942522, "grad_norm": 0.5792999641020248, "learning_rate": 1.1703499648392826e-06, "loss": 0.2616, "step": 29380 }, { "epoch": 1.3763526490841804, "grad_norm": 0.6093827091893271, "learning_rate": 1.170189366795304e-06, "loss": 0.2671, "step": 29381 }, { "epoch": 1.3763994940741089, "grad_norm": 0.5762890336580926, "learning_rate": 1.1700287764040517e-06, "loss": 0.2675, "step": 29382 }, { "epoch": 1.3764463390640371, "grad_norm": 0.6117257558716032, "learning_rate": 1.1698681936664488e-06, "loss": 0.2768, "step": 29383 }, { "epoch": 1.3764931840539654, "grad_norm": 0.554203774863581, "learning_rate": 1.1697076185834205e-06, "loss": 0.2545, "step": 29384 }, { "epoch": 1.3765400290438938, "grad_norm": 0.5471940373218844, "learning_rate": 1.1695470511558914e-06, "loss": 0.2507, "step": 29385 }, { "epoch": 1.376586874033822, "grad_norm": 0.591268165676421, "learning_rate": 1.1693864913847838e-06, "loss": 0.285, "step": 29386 }, { "epoch": 1.3766337190237503, "grad_norm": 0.5595081799428284, "learning_rate": 1.1692259392710234e-06, "loss": 0.2623, "step": 29387 }, { "epoch": 1.3766805640136788, "grad_norm": 0.5800417068222481, "learning_rate": 1.1690653948155322e-06, "loss": 0.29, "step": 29388 }, { "epoch": 1.376727409003607, "grad_norm": 0.6090635553162176, "learning_rate": 1.1689048580192353e-06, "loss": 0.2836, "step": 29389 }, { "epoch": 1.3767742539935353, "grad_norm": 0.5816596320046601, "learning_rate": 1.168744328883056e-06, "loss": 0.2696, "step": 29390 }, { "epoch": 1.3768210989834637, "grad_norm": 0.5830613122391545, "learning_rate": 1.168583807407919e-06, "loss": 0.2658, "step": 29391 }, { "epoch": 1.3768679439733922, "grad_norm": 0.5775795412665876, "learning_rate": 1.1684232935947482e-06, "loss": 0.2643, "step": 29392 }, { "epoch": 1.3769147889633204, "grad_norm": 0.6712647206210313, "learning_rate": 1.1682627874444667e-06, "loss": 0.2987, "step": 29393 }, { "epoch": 1.3769616339532487, "grad_norm": 0.6072260863325805, "learning_rate": 1.1681022889579971e-06, "loss": 0.2651, "step": 29394 }, { "epoch": 1.3770084789431771, "grad_norm": 0.5798966574510951, "learning_rate": 1.1679417981362642e-06, "loss": 0.2731, "step": 29395 }, { "epoch": 1.3770553239331054, "grad_norm": 0.6073253132500539, "learning_rate": 1.1677813149801914e-06, "loss": 0.2641, "step": 29396 }, { "epoch": 1.3771021689230336, "grad_norm": 0.6147729757841639, "learning_rate": 1.1676208394907024e-06, "loss": 0.2773, "step": 29397 }, { "epoch": 1.377149013912962, "grad_norm": 0.5962795836300332, "learning_rate": 1.1674603716687204e-06, "loss": 0.2839, "step": 29398 }, { "epoch": 1.3771958589028903, "grad_norm": 0.5766521653853537, "learning_rate": 1.1672999115151701e-06, "loss": 0.2653, "step": 29399 }, { "epoch": 1.3772427038928186, "grad_norm": 0.5294406337304405, "learning_rate": 1.167139459030974e-06, "loss": 0.2632, "step": 29400 }, { "epoch": 1.377289548882747, "grad_norm": 0.6284206184153537, "learning_rate": 1.166979014217054e-06, "loss": 0.279, "step": 29401 }, { "epoch": 1.3773363938726753, "grad_norm": 0.5738128288845804, "learning_rate": 1.1668185770743351e-06, "loss": 0.2709, "step": 29402 }, { "epoch": 1.3773832388626037, "grad_norm": 0.6106028203918245, "learning_rate": 1.16665814760374e-06, "loss": 0.2908, "step": 29403 }, { "epoch": 1.377430083852532, "grad_norm": 0.5645696435454672, "learning_rate": 1.166497725806192e-06, "loss": 0.2596, "step": 29404 }, { "epoch": 1.3774769288424604, "grad_norm": 0.5835148369222777, "learning_rate": 1.1663373116826157e-06, "loss": 0.2692, "step": 29405 }, { "epoch": 1.3775237738323887, "grad_norm": 0.5851463102018218, "learning_rate": 1.166176905233932e-06, "loss": 0.2605, "step": 29406 }, { "epoch": 1.377570618822317, "grad_norm": 0.6352696954666687, "learning_rate": 1.1660165064610657e-06, "loss": 0.2735, "step": 29407 }, { "epoch": 1.3776174638122454, "grad_norm": 0.5738887300833669, "learning_rate": 1.165856115364938e-06, "loss": 0.2691, "step": 29408 }, { "epoch": 1.3776643088021736, "grad_norm": 0.5694455577061648, "learning_rate": 1.1656957319464735e-06, "loss": 0.2672, "step": 29409 }, { "epoch": 1.3777111537921019, "grad_norm": 0.5900005718262634, "learning_rate": 1.1655353562065943e-06, "loss": 0.2735, "step": 29410 }, { "epoch": 1.3777579987820303, "grad_norm": 0.5778900460247677, "learning_rate": 1.165374988146225e-06, "loss": 0.2722, "step": 29411 }, { "epoch": 1.3778048437719586, "grad_norm": 0.6156801333814674, "learning_rate": 1.165214627766286e-06, "loss": 0.2565, "step": 29412 }, { "epoch": 1.3778516887618868, "grad_norm": 0.5952274495095509, "learning_rate": 1.1650542750677016e-06, "loss": 0.2635, "step": 29413 }, { "epoch": 1.3778985337518153, "grad_norm": 0.5691049922810187, "learning_rate": 1.164893930051395e-06, "loss": 0.2689, "step": 29414 }, { "epoch": 1.3779453787417435, "grad_norm": 0.5609110491793223, "learning_rate": 1.1647335927182873e-06, "loss": 0.2693, "step": 29415 }, { "epoch": 1.377992223731672, "grad_norm": 0.5483425649861702, "learning_rate": 1.1645732630693025e-06, "loss": 0.2539, "step": 29416 }, { "epoch": 1.3780390687216002, "grad_norm": 0.5566837953509298, "learning_rate": 1.1644129411053638e-06, "loss": 0.2612, "step": 29417 }, { "epoch": 1.3780859137115287, "grad_norm": 0.5637229390642705, "learning_rate": 1.1642526268273916e-06, "loss": 0.2657, "step": 29418 }, { "epoch": 1.378132758701457, "grad_norm": 0.5627599828184606, "learning_rate": 1.16409232023631e-06, "loss": 0.2471, "step": 29419 }, { "epoch": 1.3781796036913851, "grad_norm": 0.591690089405638, "learning_rate": 1.1639320213330415e-06, "loss": 0.2566, "step": 29420 }, { "epoch": 1.3782264486813136, "grad_norm": 0.5919357591303882, "learning_rate": 1.1637717301185091e-06, "loss": 0.2722, "step": 29421 }, { "epoch": 1.3782732936712419, "grad_norm": 0.6096775939406297, "learning_rate": 1.1636114465936337e-06, "loss": 0.2721, "step": 29422 }, { "epoch": 1.37832013866117, "grad_norm": 0.6120208674050518, "learning_rate": 1.1634511707593397e-06, "loss": 0.2817, "step": 29423 }, { "epoch": 1.3783669836510986, "grad_norm": 0.6034016566731673, "learning_rate": 1.163290902616547e-06, "loss": 0.2637, "step": 29424 }, { "epoch": 1.3784138286410268, "grad_norm": 0.5808850590740062, "learning_rate": 1.1631306421661795e-06, "loss": 0.2686, "step": 29425 }, { "epoch": 1.378460673630955, "grad_norm": 0.642490832271982, "learning_rate": 1.162970389409159e-06, "loss": 0.2771, "step": 29426 }, { "epoch": 1.3785075186208835, "grad_norm": 0.6097253017504394, "learning_rate": 1.162810144346408e-06, "loss": 0.2812, "step": 29427 }, { "epoch": 1.378554363610812, "grad_norm": 0.6264318362530853, "learning_rate": 1.1626499069788491e-06, "loss": 0.2868, "step": 29428 }, { "epoch": 1.3786012086007402, "grad_norm": 0.5719410680096874, "learning_rate": 1.162489677307404e-06, "loss": 0.2585, "step": 29429 }, { "epoch": 1.3786480535906684, "grad_norm": 0.6386892528787534, "learning_rate": 1.162329455332994e-06, "loss": 0.272, "step": 29430 }, { "epoch": 1.378694898580597, "grad_norm": 0.6422426978226213, "learning_rate": 1.1621692410565413e-06, "loss": 0.2822, "step": 29431 }, { "epoch": 1.3787417435705251, "grad_norm": 0.5828926314301778, "learning_rate": 1.1620090344789685e-06, "loss": 0.2711, "step": 29432 }, { "epoch": 1.3787885885604534, "grad_norm": 0.6191045044617506, "learning_rate": 1.1618488356011978e-06, "loss": 0.2699, "step": 29433 }, { "epoch": 1.3788354335503819, "grad_norm": 0.6024165510615062, "learning_rate": 1.1616886444241512e-06, "loss": 0.2725, "step": 29434 }, { "epoch": 1.37888227854031, "grad_norm": 0.5682303989869942, "learning_rate": 1.1615284609487495e-06, "loss": 0.272, "step": 29435 }, { "epoch": 1.3789291235302383, "grad_norm": 0.6437814733918497, "learning_rate": 1.1613682851759159e-06, "loss": 0.2869, "step": 29436 }, { "epoch": 1.3789759685201668, "grad_norm": 0.5725001050287325, "learning_rate": 1.1612081171065704e-06, "loss": 0.2515, "step": 29437 }, { "epoch": 1.379022813510095, "grad_norm": 0.6007823648584409, "learning_rate": 1.1610479567416355e-06, "loss": 0.2807, "step": 29438 }, { "epoch": 1.3790696585000235, "grad_norm": 0.6260658023541146, "learning_rate": 1.1608878040820334e-06, "loss": 0.2803, "step": 29439 }, { "epoch": 1.3791165034899517, "grad_norm": 0.6560406156967784, "learning_rate": 1.1607276591286853e-06, "loss": 0.284, "step": 29440 }, { "epoch": 1.3791633484798802, "grad_norm": 0.6320151065705318, "learning_rate": 1.1605675218825139e-06, "loss": 0.2907, "step": 29441 }, { "epoch": 1.3792101934698084, "grad_norm": 0.6257181884271029, "learning_rate": 1.1604073923444395e-06, "loss": 0.2852, "step": 29442 }, { "epoch": 1.3792570384597367, "grad_norm": 0.6104538686962092, "learning_rate": 1.160247270515383e-06, "loss": 0.2896, "step": 29443 }, { "epoch": 1.3793038834496651, "grad_norm": 0.5844685251786934, "learning_rate": 1.1600871563962671e-06, "loss": 0.2785, "step": 29444 }, { "epoch": 1.3793507284395934, "grad_norm": 0.6274416079359278, "learning_rate": 1.1599270499880126e-06, "loss": 0.2757, "step": 29445 }, { "epoch": 1.3793975734295216, "grad_norm": 0.562715019027914, "learning_rate": 1.1597669512915409e-06, "loss": 0.271, "step": 29446 }, { "epoch": 1.37944441841945, "grad_norm": 0.6159195908057136, "learning_rate": 1.1596068603077745e-06, "loss": 0.2823, "step": 29447 }, { "epoch": 1.3794912634093783, "grad_norm": 0.5711602903074904, "learning_rate": 1.159446777037633e-06, "loss": 0.2688, "step": 29448 }, { "epoch": 1.3795381083993066, "grad_norm": 0.6403560677554881, "learning_rate": 1.1592867014820395e-06, "loss": 0.2812, "step": 29449 }, { "epoch": 1.379584953389235, "grad_norm": 0.5925619588090175, "learning_rate": 1.1591266336419127e-06, "loss": 0.2691, "step": 29450 }, { "epoch": 1.3796317983791633, "grad_norm": 0.6030731180367256, "learning_rate": 1.1589665735181752e-06, "loss": 0.2674, "step": 29451 }, { "epoch": 1.3796786433690917, "grad_norm": 0.5973165416610288, "learning_rate": 1.158806521111748e-06, "loss": 0.2663, "step": 29452 }, { "epoch": 1.37972548835902, "grad_norm": 0.573708419661372, "learning_rate": 1.1586464764235534e-06, "loss": 0.2699, "step": 29453 }, { "epoch": 1.3797723333489484, "grad_norm": 0.6113904864972102, "learning_rate": 1.15848643945451e-06, "loss": 0.2687, "step": 29454 }, { "epoch": 1.3798191783388767, "grad_norm": 0.5625946778023255, "learning_rate": 1.15832641020554e-06, "loss": 0.2642, "step": 29455 }, { "epoch": 1.379866023328805, "grad_norm": 0.5895144413670006, "learning_rate": 1.1581663886775652e-06, "loss": 0.2785, "step": 29456 }, { "epoch": 1.3799128683187334, "grad_norm": 0.5545712651729577, "learning_rate": 1.1580063748715048e-06, "loss": 0.2822, "step": 29457 }, { "epoch": 1.3799597133086616, "grad_norm": 0.6358590416463753, "learning_rate": 1.1578463687882802e-06, "loss": 0.2799, "step": 29458 }, { "epoch": 1.3800065582985899, "grad_norm": 0.5804218496591477, "learning_rate": 1.1576863704288124e-06, "loss": 0.2754, "step": 29459 }, { "epoch": 1.3800534032885183, "grad_norm": 0.6400923348188691, "learning_rate": 1.1575263797940233e-06, "loss": 0.2857, "step": 29460 }, { "epoch": 1.3801002482784466, "grad_norm": 0.5709230316163754, "learning_rate": 1.1573663968848314e-06, "loss": 0.2722, "step": 29461 }, { "epoch": 1.3801470932683748, "grad_norm": 0.5636810286999766, "learning_rate": 1.1572064217021586e-06, "loss": 0.2839, "step": 29462 }, { "epoch": 1.3801939382583033, "grad_norm": 0.5915342234477653, "learning_rate": 1.1570464542469263e-06, "loss": 0.2754, "step": 29463 }, { "epoch": 1.3802407832482317, "grad_norm": 0.6105861095598863, "learning_rate": 1.1568864945200528e-06, "loss": 0.2644, "step": 29464 }, { "epoch": 1.38028762823816, "grad_norm": 0.5761922929233422, "learning_rate": 1.1567265425224603e-06, "loss": 0.2758, "step": 29465 }, { "epoch": 1.3803344732280882, "grad_norm": 0.5892831722849764, "learning_rate": 1.1565665982550698e-06, "loss": 0.2729, "step": 29466 }, { "epoch": 1.3803813182180167, "grad_norm": 0.6823671303469252, "learning_rate": 1.1564066617187998e-06, "loss": 0.2749, "step": 29467 }, { "epoch": 1.380428163207945, "grad_norm": 0.563823881167297, "learning_rate": 1.1562467329145715e-06, "loss": 0.2718, "step": 29468 }, { "epoch": 1.3804750081978732, "grad_norm": 0.6430492677356406, "learning_rate": 1.1560868118433062e-06, "loss": 0.2758, "step": 29469 }, { "epoch": 1.3805218531878016, "grad_norm": 0.5992325716768003, "learning_rate": 1.1559268985059239e-06, "loss": 0.2644, "step": 29470 }, { "epoch": 1.3805686981777299, "grad_norm": 0.5272168022666679, "learning_rate": 1.1557669929033438e-06, "loss": 0.2586, "step": 29471 }, { "epoch": 1.380615543167658, "grad_norm": 0.5400684194019882, "learning_rate": 1.1556070950364876e-06, "loss": 0.265, "step": 29472 }, { "epoch": 1.3806623881575866, "grad_norm": 0.6521577935673761, "learning_rate": 1.1554472049062737e-06, "loss": 0.2857, "step": 29473 }, { "epoch": 1.3807092331475148, "grad_norm": 0.5755779309420253, "learning_rate": 1.1552873225136236e-06, "loss": 0.2559, "step": 29474 }, { "epoch": 1.3807560781374433, "grad_norm": 0.6063662697751916, "learning_rate": 1.1551274478594565e-06, "loss": 0.267, "step": 29475 }, { "epoch": 1.3808029231273715, "grad_norm": 0.6190731722704612, "learning_rate": 1.1549675809446938e-06, "loss": 0.2737, "step": 29476 }, { "epoch": 1.3808497681173, "grad_norm": 0.6262590617025299, "learning_rate": 1.1548077217702542e-06, "loss": 0.2812, "step": 29477 }, { "epoch": 1.3808966131072282, "grad_norm": 0.5923033260570624, "learning_rate": 1.1546478703370587e-06, "loss": 0.2691, "step": 29478 }, { "epoch": 1.3809434580971565, "grad_norm": 0.5931919488103087, "learning_rate": 1.1544880266460256e-06, "loss": 0.2772, "step": 29479 }, { "epoch": 1.380990303087085, "grad_norm": 0.5763352668931637, "learning_rate": 1.1543281906980757e-06, "loss": 0.2674, "step": 29480 }, { "epoch": 1.3810371480770132, "grad_norm": 0.609828372963716, "learning_rate": 1.1541683624941289e-06, "loss": 0.2782, "step": 29481 }, { "epoch": 1.3810839930669414, "grad_norm": 0.5515632021930484, "learning_rate": 1.154008542035105e-06, "loss": 0.2536, "step": 29482 }, { "epoch": 1.3811308380568699, "grad_norm": 0.5872380354934906, "learning_rate": 1.1538487293219245e-06, "loss": 0.273, "step": 29483 }, { "epoch": 1.381177683046798, "grad_norm": 0.609378790604922, "learning_rate": 1.1536889243555051e-06, "loss": 0.2865, "step": 29484 }, { "epoch": 1.3812245280367264, "grad_norm": 0.5749525716162, "learning_rate": 1.1535291271367689e-06, "loss": 0.2753, "step": 29485 }, { "epoch": 1.3812713730266548, "grad_norm": 0.6245223906943609, "learning_rate": 1.1533693376666328e-06, "loss": 0.2876, "step": 29486 }, { "epoch": 1.381318218016583, "grad_norm": 0.5556059905127191, "learning_rate": 1.1532095559460177e-06, "loss": 0.2529, "step": 29487 }, { "epoch": 1.3813650630065115, "grad_norm": 0.5855861820975308, "learning_rate": 1.1530497819758434e-06, "loss": 0.2739, "step": 29488 }, { "epoch": 1.3814119079964398, "grad_norm": 0.5499020403209991, "learning_rate": 1.1528900157570288e-06, "loss": 0.2637, "step": 29489 }, { "epoch": 1.3814587529863682, "grad_norm": 0.5836307609963534, "learning_rate": 1.1527302572904948e-06, "loss": 0.2791, "step": 29490 }, { "epoch": 1.3815055979762965, "grad_norm": 0.6308881816015358, "learning_rate": 1.1525705065771595e-06, "loss": 0.2828, "step": 29491 }, { "epoch": 1.3815524429662247, "grad_norm": 0.6259432118269505, "learning_rate": 1.152410763617941e-06, "loss": 0.2798, "step": 29492 }, { "epoch": 1.3815992879561532, "grad_norm": 0.5998389892874366, "learning_rate": 1.1522510284137601e-06, "loss": 0.2826, "step": 29493 }, { "epoch": 1.3816461329460814, "grad_norm": 0.6271303946569384, "learning_rate": 1.1520913009655358e-06, "loss": 0.2565, "step": 29494 }, { "epoch": 1.3816929779360096, "grad_norm": 0.5816359053360362, "learning_rate": 1.1519315812741871e-06, "loss": 0.2752, "step": 29495 }, { "epoch": 1.381739822925938, "grad_norm": 0.570268389895226, "learning_rate": 1.1517718693406346e-06, "loss": 0.2746, "step": 29496 }, { "epoch": 1.3817866679158664, "grad_norm": 0.5911427297488469, "learning_rate": 1.151612165165795e-06, "loss": 0.2654, "step": 29497 }, { "epoch": 1.3818335129057946, "grad_norm": 0.6042369190278174, "learning_rate": 1.1514524687505892e-06, "loss": 0.2802, "step": 29498 }, { "epoch": 1.381880357895723, "grad_norm": 0.5891353748825291, "learning_rate": 1.1512927800959351e-06, "loss": 0.2731, "step": 29499 }, { "epoch": 1.3819272028856515, "grad_norm": 0.5903882748695994, "learning_rate": 1.1511330992027517e-06, "loss": 0.2757, "step": 29500 }, { "epoch": 1.3819740478755798, "grad_norm": 0.6081073024201124, "learning_rate": 1.1509734260719582e-06, "loss": 0.2847, "step": 29501 }, { "epoch": 1.382020892865508, "grad_norm": 0.60438974729331, "learning_rate": 1.1508137607044748e-06, "loss": 0.2706, "step": 29502 }, { "epoch": 1.3820677378554365, "grad_norm": 0.5651779777848197, "learning_rate": 1.1506541031012178e-06, "loss": 0.2807, "step": 29503 }, { "epoch": 1.3821145828453647, "grad_norm": 0.5552027030803123, "learning_rate": 1.1504944532631076e-06, "loss": 0.2537, "step": 29504 }, { "epoch": 1.382161427835293, "grad_norm": 0.5995258990305101, "learning_rate": 1.1503348111910633e-06, "loss": 0.2681, "step": 29505 }, { "epoch": 1.3822082728252214, "grad_norm": 0.6018829264465074, "learning_rate": 1.1501751768860024e-06, "loss": 0.2831, "step": 29506 }, { "epoch": 1.3822551178151496, "grad_norm": 0.6093097081053286, "learning_rate": 1.1500155503488436e-06, "loss": 0.2775, "step": 29507 }, { "epoch": 1.3823019628050779, "grad_norm": 0.5619731152250574, "learning_rate": 1.1498559315805074e-06, "loss": 0.2571, "step": 29508 }, { "epoch": 1.3823488077950064, "grad_norm": 0.5776258366670847, "learning_rate": 1.1496963205819097e-06, "loss": 0.2636, "step": 29509 }, { "epoch": 1.3823956527849346, "grad_norm": 0.5357329789555043, "learning_rate": 1.1495367173539705e-06, "loss": 0.2595, "step": 29510 }, { "epoch": 1.382442497774863, "grad_norm": 0.5486636571374045, "learning_rate": 1.1493771218976079e-06, "loss": 0.2511, "step": 29511 }, { "epoch": 1.3824893427647913, "grad_norm": 0.614947891387888, "learning_rate": 1.1492175342137416e-06, "loss": 0.2646, "step": 29512 }, { "epoch": 1.3825361877547198, "grad_norm": 0.5822890487841749, "learning_rate": 1.1490579543032879e-06, "loss": 0.2743, "step": 29513 }, { "epoch": 1.382583032744648, "grad_norm": 0.5946884554080959, "learning_rate": 1.148898382167167e-06, "loss": 0.2609, "step": 29514 }, { "epoch": 1.3826298777345762, "grad_norm": 0.5723021948877588, "learning_rate": 1.1487388178062953e-06, "loss": 0.2591, "step": 29515 }, { "epoch": 1.3826767227245047, "grad_norm": 0.5947923571570216, "learning_rate": 1.1485792612215924e-06, "loss": 0.2894, "step": 29516 }, { "epoch": 1.382723567714433, "grad_norm": 0.6005540577817977, "learning_rate": 1.148419712413976e-06, "loss": 0.2838, "step": 29517 }, { "epoch": 1.3827704127043612, "grad_norm": 0.6681327954053273, "learning_rate": 1.1482601713843642e-06, "loss": 0.2828, "step": 29518 }, { "epoch": 1.3828172576942896, "grad_norm": 0.5784937103397173, "learning_rate": 1.1481006381336765e-06, "loss": 0.2621, "step": 29519 }, { "epoch": 1.3828641026842179, "grad_norm": 0.6171443024987608, "learning_rate": 1.1479411126628299e-06, "loss": 0.2714, "step": 29520 }, { "epoch": 1.3829109476741461, "grad_norm": 0.5970678776306731, "learning_rate": 1.1477815949727413e-06, "loss": 0.2495, "step": 29521 }, { "epoch": 1.3829577926640746, "grad_norm": 0.6520901367710227, "learning_rate": 1.14762208506433e-06, "loss": 0.2827, "step": 29522 }, { "epoch": 1.3830046376540028, "grad_norm": 0.6262767743226894, "learning_rate": 1.1474625829385136e-06, "loss": 0.2841, "step": 29523 }, { "epoch": 1.3830514826439313, "grad_norm": 0.5892640460820672, "learning_rate": 1.14730308859621e-06, "loss": 0.2708, "step": 29524 }, { "epoch": 1.3830983276338595, "grad_norm": 0.6390754518043755, "learning_rate": 1.1471436020383382e-06, "loss": 0.2902, "step": 29525 }, { "epoch": 1.383145172623788, "grad_norm": 0.5852796999220516, "learning_rate": 1.1469841232658139e-06, "loss": 0.2593, "step": 29526 }, { "epoch": 1.3831920176137162, "grad_norm": 0.6112116479452567, "learning_rate": 1.1468246522795567e-06, "loss": 0.272, "step": 29527 }, { "epoch": 1.3832388626036445, "grad_norm": 0.6261919351035479, "learning_rate": 1.1466651890804827e-06, "loss": 0.2825, "step": 29528 }, { "epoch": 1.383285707593573, "grad_norm": 0.6186196942324605, "learning_rate": 1.1465057336695104e-06, "loss": 0.275, "step": 29529 }, { "epoch": 1.3833325525835012, "grad_norm": 0.5868886123768342, "learning_rate": 1.1463462860475574e-06, "loss": 0.2659, "step": 29530 }, { "epoch": 1.3833793975734294, "grad_norm": 0.6238437690344388, "learning_rate": 1.1461868462155416e-06, "loss": 0.2681, "step": 29531 }, { "epoch": 1.3834262425633579, "grad_norm": 0.5837281515478882, "learning_rate": 1.1460274141743808e-06, "loss": 0.2785, "step": 29532 }, { "epoch": 1.3834730875532861, "grad_norm": 0.5965550841585213, "learning_rate": 1.1458679899249919e-06, "loss": 0.2802, "step": 29533 }, { "epoch": 1.3835199325432144, "grad_norm": 0.5966797666716666, "learning_rate": 1.1457085734682917e-06, "loss": 0.2862, "step": 29534 }, { "epoch": 1.3835667775331428, "grad_norm": 0.6383233083808625, "learning_rate": 1.1455491648051981e-06, "loss": 0.2752, "step": 29535 }, { "epoch": 1.3836136225230713, "grad_norm": 0.6146347190070767, "learning_rate": 1.1453897639366288e-06, "loss": 0.2614, "step": 29536 }, { "epoch": 1.3836604675129995, "grad_norm": 0.6537887520165038, "learning_rate": 1.1452303708635007e-06, "loss": 0.2913, "step": 29537 }, { "epoch": 1.3837073125029278, "grad_norm": 0.5461992868462029, "learning_rate": 1.1450709855867314e-06, "loss": 0.2777, "step": 29538 }, { "epoch": 1.3837541574928562, "grad_norm": 0.6040160946568731, "learning_rate": 1.144911608107239e-06, "loss": 0.2754, "step": 29539 }, { "epoch": 1.3838010024827845, "grad_norm": 0.589657782330908, "learning_rate": 1.1447522384259396e-06, "loss": 0.2774, "step": 29540 }, { "epoch": 1.3838478474727127, "grad_norm": 0.5699613325450944, "learning_rate": 1.1445928765437496e-06, "loss": 0.2714, "step": 29541 }, { "epoch": 1.3838946924626412, "grad_norm": 0.6179354663396305, "learning_rate": 1.1444335224615868e-06, "loss": 0.2821, "step": 29542 }, { "epoch": 1.3839415374525694, "grad_norm": 0.6286871956206675, "learning_rate": 1.1442741761803686e-06, "loss": 0.2786, "step": 29543 }, { "epoch": 1.3839883824424977, "grad_norm": 0.594292963572957, "learning_rate": 1.1441148377010116e-06, "loss": 0.2629, "step": 29544 }, { "epoch": 1.3840352274324261, "grad_norm": 0.5990197523786046, "learning_rate": 1.143955507024434e-06, "loss": 0.2853, "step": 29545 }, { "epoch": 1.3840820724223544, "grad_norm": 0.6046072840556498, "learning_rate": 1.1437961841515504e-06, "loss": 0.2765, "step": 29546 }, { "epoch": 1.3841289174122828, "grad_norm": 0.5697900033330923, "learning_rate": 1.1436368690832802e-06, "loss": 0.2899, "step": 29547 }, { "epoch": 1.384175762402211, "grad_norm": 0.6016597210643391, "learning_rate": 1.1434775618205377e-06, "loss": 0.2868, "step": 29548 }, { "epoch": 1.3842226073921395, "grad_norm": 0.5700019134228287, "learning_rate": 1.143318262364241e-06, "loss": 0.2619, "step": 29549 }, { "epoch": 1.3842694523820678, "grad_norm": 0.6748270505857873, "learning_rate": 1.1431589707153068e-06, "loss": 0.2678, "step": 29550 }, { "epoch": 1.384316297371996, "grad_norm": 0.5707676987601816, "learning_rate": 1.1429996868746524e-06, "loss": 0.2747, "step": 29551 }, { "epoch": 1.3843631423619245, "grad_norm": 0.6223146415179595, "learning_rate": 1.1428404108431929e-06, "loss": 0.2655, "step": 29552 }, { "epoch": 1.3844099873518527, "grad_norm": 0.5512517852535241, "learning_rate": 1.1426811426218457e-06, "loss": 0.2702, "step": 29553 }, { "epoch": 1.384456832341781, "grad_norm": 0.5947927141361464, "learning_rate": 1.1425218822115283e-06, "loss": 0.2576, "step": 29554 }, { "epoch": 1.3845036773317094, "grad_norm": 0.5930053939878986, "learning_rate": 1.1423626296131554e-06, "loss": 0.2781, "step": 29555 }, { "epoch": 1.3845505223216377, "grad_norm": 0.5773925961563153, "learning_rate": 1.1422033848276446e-06, "loss": 0.2565, "step": 29556 }, { "epoch": 1.384597367311566, "grad_norm": 0.5839876236093942, "learning_rate": 1.1420441478559124e-06, "loss": 0.2676, "step": 29557 }, { "epoch": 1.3846442123014944, "grad_norm": 0.5667952024075934, "learning_rate": 1.1418849186988743e-06, "loss": 0.2514, "step": 29558 }, { "epoch": 1.3846910572914226, "grad_norm": 0.6035675276076281, "learning_rate": 1.141725697357447e-06, "loss": 0.2825, "step": 29559 }, { "epoch": 1.384737902281351, "grad_norm": 0.6095296602809338, "learning_rate": 1.141566483832547e-06, "loss": 0.2784, "step": 29560 }, { "epoch": 1.3847847472712793, "grad_norm": 0.5758410718946653, "learning_rate": 1.1414072781250914e-06, "loss": 0.2648, "step": 29561 }, { "epoch": 1.3848315922612078, "grad_norm": 0.6189189173711337, "learning_rate": 1.1412480802359945e-06, "loss": 0.2838, "step": 29562 }, { "epoch": 1.384878437251136, "grad_norm": 0.6048947293402382, "learning_rate": 1.1410888901661742e-06, "loss": 0.2718, "step": 29563 }, { "epoch": 1.3849252822410643, "grad_norm": 0.5628110534149138, "learning_rate": 1.1409297079165452e-06, "loss": 0.2713, "step": 29564 }, { "epoch": 1.3849721272309927, "grad_norm": 0.5876665142911183, "learning_rate": 1.1407705334880236e-06, "loss": 0.2712, "step": 29565 }, { "epoch": 1.385018972220921, "grad_norm": 0.5919772214528836, "learning_rate": 1.1406113668815263e-06, "loss": 0.269, "step": 29566 }, { "epoch": 1.3850658172108492, "grad_norm": 0.6003478348708247, "learning_rate": 1.1404522080979689e-06, "loss": 0.2652, "step": 29567 }, { "epoch": 1.3851126622007777, "grad_norm": 0.5924256220212544, "learning_rate": 1.1402930571382682e-06, "loss": 0.2556, "step": 29568 }, { "epoch": 1.385159507190706, "grad_norm": 0.6063124538450317, "learning_rate": 1.1401339140033393e-06, "loss": 0.2842, "step": 29569 }, { "epoch": 1.3852063521806341, "grad_norm": 0.5696831296016928, "learning_rate": 1.1399747786940968e-06, "loss": 0.2694, "step": 29570 }, { "epoch": 1.3852531971705626, "grad_norm": 0.5956667730347548, "learning_rate": 1.1398156512114578e-06, "loss": 0.2614, "step": 29571 }, { "epoch": 1.385300042160491, "grad_norm": 0.6255536405416806, "learning_rate": 1.1396565315563378e-06, "loss": 0.2936, "step": 29572 }, { "epoch": 1.3853468871504193, "grad_norm": 0.5998951634271198, "learning_rate": 1.1394974197296527e-06, "loss": 0.2818, "step": 29573 }, { "epoch": 1.3853937321403476, "grad_norm": 0.6155700416499746, "learning_rate": 1.1393383157323187e-06, "loss": 0.2662, "step": 29574 }, { "epoch": 1.385440577130276, "grad_norm": 0.5705058158990771, "learning_rate": 1.1391792195652498e-06, "loss": 0.2654, "step": 29575 }, { "epoch": 1.3854874221202043, "grad_norm": 0.600281796773426, "learning_rate": 1.1390201312293637e-06, "loss": 0.2742, "step": 29576 }, { "epoch": 1.3855342671101325, "grad_norm": 0.5940073966340924, "learning_rate": 1.1388610507255733e-06, "loss": 0.2698, "step": 29577 }, { "epoch": 1.385581112100061, "grad_norm": 0.5469122283196516, "learning_rate": 1.1387019780547956e-06, "loss": 0.2683, "step": 29578 }, { "epoch": 1.3856279570899892, "grad_norm": 0.6063702661346432, "learning_rate": 1.138542913217946e-06, "loss": 0.2656, "step": 29579 }, { "epoch": 1.3856748020799174, "grad_norm": 0.6132390400947342, "learning_rate": 1.1383838562159397e-06, "loss": 0.2858, "step": 29580 }, { "epoch": 1.385721647069846, "grad_norm": 0.6406389741940726, "learning_rate": 1.138224807049693e-06, "loss": 0.2731, "step": 29581 }, { "epoch": 1.3857684920597741, "grad_norm": 0.6093495176590259, "learning_rate": 1.13806576572012e-06, "loss": 0.2607, "step": 29582 }, { "epoch": 1.3858153370497026, "grad_norm": 0.6061397498284602, "learning_rate": 1.1379067322281356e-06, "loss": 0.279, "step": 29583 }, { "epoch": 1.3858621820396309, "grad_norm": 0.6311198201065461, "learning_rate": 1.1377477065746557e-06, "loss": 0.2794, "step": 29584 }, { "epoch": 1.3859090270295593, "grad_norm": 0.5863993009108436, "learning_rate": 1.1375886887605949e-06, "loss": 0.2675, "step": 29585 }, { "epoch": 1.3859558720194876, "grad_norm": 0.593404503295178, "learning_rate": 1.137429678786869e-06, "loss": 0.265, "step": 29586 }, { "epoch": 1.3860027170094158, "grad_norm": 0.6006873114455821, "learning_rate": 1.137270676654394e-06, "loss": 0.2882, "step": 29587 }, { "epoch": 1.3860495619993443, "grad_norm": 0.6138077578615114, "learning_rate": 1.1371116823640824e-06, "loss": 0.2734, "step": 29588 }, { "epoch": 1.3860964069892725, "grad_norm": 0.5797243931196109, "learning_rate": 1.1369526959168517e-06, "loss": 0.2594, "step": 29589 }, { "epoch": 1.3861432519792007, "grad_norm": 0.6211384837188899, "learning_rate": 1.1367937173136148e-06, "loss": 0.2897, "step": 29590 }, { "epoch": 1.3861900969691292, "grad_norm": 0.6282045993037115, "learning_rate": 1.1366347465552874e-06, "loss": 0.3053, "step": 29591 }, { "epoch": 1.3862369419590574, "grad_norm": 0.6100187059951802, "learning_rate": 1.1364757836427843e-06, "loss": 0.2679, "step": 29592 }, { "epoch": 1.3862837869489857, "grad_norm": 0.5594625333952602, "learning_rate": 1.136316828577021e-06, "loss": 0.2475, "step": 29593 }, { "epoch": 1.3863306319389141, "grad_norm": 0.6271218776463068, "learning_rate": 1.1361578813589112e-06, "loss": 0.2695, "step": 29594 }, { "epoch": 1.3863774769288424, "grad_norm": 0.5999740433224969, "learning_rate": 1.1359989419893697e-06, "loss": 0.2669, "step": 29595 }, { "epoch": 1.3864243219187709, "grad_norm": 0.6116229603650611, "learning_rate": 1.1358400104693124e-06, "loss": 0.2764, "step": 29596 }, { "epoch": 1.386471166908699, "grad_norm": 0.5838934568929611, "learning_rate": 1.135681086799652e-06, "loss": 0.2722, "step": 29597 }, { "epoch": 1.3865180118986276, "grad_norm": 0.6251550758111724, "learning_rate": 1.1355221709813041e-06, "loss": 0.2723, "step": 29598 }, { "epoch": 1.3865648568885558, "grad_norm": 0.6096083485110525, "learning_rate": 1.135363263015183e-06, "loss": 0.2723, "step": 29599 }, { "epoch": 1.386611701878484, "grad_norm": 0.6223653362416858, "learning_rate": 1.1352043629022043e-06, "loss": 0.2751, "step": 29600 }, { "epoch": 1.3866585468684125, "grad_norm": 0.5752013769929187, "learning_rate": 1.135045470643281e-06, "loss": 0.266, "step": 29601 }, { "epoch": 1.3867053918583407, "grad_norm": 0.628408923147131, "learning_rate": 1.1348865862393274e-06, "loss": 0.2644, "step": 29602 }, { "epoch": 1.386752236848269, "grad_norm": 0.5962237376702215, "learning_rate": 1.1347277096912595e-06, "loss": 0.2751, "step": 29603 }, { "epoch": 1.3867990818381974, "grad_norm": 0.6164791965454609, "learning_rate": 1.1345688409999897e-06, "loss": 0.2679, "step": 29604 }, { "epoch": 1.3868459268281257, "grad_norm": 0.582679559952932, "learning_rate": 1.134409980166433e-06, "loss": 0.2731, "step": 29605 }, { "epoch": 1.386892771818054, "grad_norm": 0.5606201116068154, "learning_rate": 1.1342511271915043e-06, "loss": 0.2749, "step": 29606 }, { "epoch": 1.3869396168079824, "grad_norm": 0.619601756873515, "learning_rate": 1.1340922820761163e-06, "loss": 0.2573, "step": 29607 }, { "epoch": 1.3869864617979106, "grad_norm": 0.5920994087270335, "learning_rate": 1.133933444821184e-06, "loss": 0.2787, "step": 29608 }, { "epoch": 1.387033306787839, "grad_norm": 0.613776969370694, "learning_rate": 1.1337746154276214e-06, "loss": 0.273, "step": 29609 }, { "epoch": 1.3870801517777673, "grad_norm": 0.6106859489561254, "learning_rate": 1.1336157938963432e-06, "loss": 0.2803, "step": 29610 }, { "epoch": 1.3871269967676958, "grad_norm": 0.5810448430084867, "learning_rate": 1.1334569802282619e-06, "loss": 0.2527, "step": 29611 }, { "epoch": 1.387173841757624, "grad_norm": 0.5762468827560091, "learning_rate": 1.1332981744242932e-06, "loss": 0.2724, "step": 29612 }, { "epoch": 1.3872206867475523, "grad_norm": 0.5890123700117568, "learning_rate": 1.1331393764853491e-06, "loss": 0.2739, "step": 29613 }, { "epoch": 1.3872675317374807, "grad_norm": 0.5926361269185855, "learning_rate": 1.1329805864123442e-06, "loss": 0.2773, "step": 29614 }, { "epoch": 1.387314376727409, "grad_norm": 0.6030176674879127, "learning_rate": 1.1328218042061925e-06, "loss": 0.2785, "step": 29615 }, { "epoch": 1.3873612217173372, "grad_norm": 0.6033518614082322, "learning_rate": 1.1326630298678088e-06, "loss": 0.2823, "step": 29616 }, { "epoch": 1.3874080667072657, "grad_norm": 0.657477249272742, "learning_rate": 1.1325042633981043e-06, "loss": 0.297, "step": 29617 }, { "epoch": 1.387454911697194, "grad_norm": 0.595287175901192, "learning_rate": 1.1323455047979953e-06, "loss": 0.2775, "step": 29618 }, { "epoch": 1.3875017566871224, "grad_norm": 0.6293688694096001, "learning_rate": 1.1321867540683934e-06, "loss": 0.2887, "step": 29619 }, { "epoch": 1.3875486016770506, "grad_norm": 0.6056137439612808, "learning_rate": 1.1320280112102124e-06, "loss": 0.2761, "step": 29620 }, { "epoch": 1.387595446666979, "grad_norm": 0.6379636414153179, "learning_rate": 1.1318692762243668e-06, "loss": 0.2983, "step": 29621 }, { "epoch": 1.3876422916569073, "grad_norm": 0.5752183426965224, "learning_rate": 1.1317105491117697e-06, "loss": 0.2773, "step": 29622 }, { "epoch": 1.3876891366468356, "grad_norm": 0.5893309540869257, "learning_rate": 1.1315518298733352e-06, "loss": 0.2845, "step": 29623 }, { "epoch": 1.387735981636764, "grad_norm": 0.6330059824966342, "learning_rate": 1.1313931185099751e-06, "loss": 0.2866, "step": 29624 }, { "epoch": 1.3877828266266923, "grad_norm": 0.6095140421440997, "learning_rate": 1.1312344150226046e-06, "loss": 0.2752, "step": 29625 }, { "epoch": 1.3878296716166205, "grad_norm": 0.5821388357949522, "learning_rate": 1.1310757194121353e-06, "loss": 0.2739, "step": 29626 }, { "epoch": 1.387876516606549, "grad_norm": 0.6133379226756689, "learning_rate": 1.1309170316794806e-06, "loss": 0.2827, "step": 29627 }, { "epoch": 1.3879233615964772, "grad_norm": 0.575044025759396, "learning_rate": 1.1307583518255546e-06, "loss": 0.2816, "step": 29628 }, { "epoch": 1.3879702065864055, "grad_norm": 0.6062586226431045, "learning_rate": 1.1305996798512703e-06, "loss": 0.2741, "step": 29629 }, { "epoch": 1.388017051576334, "grad_norm": 0.6212271349902524, "learning_rate": 1.1304410157575412e-06, "loss": 0.2814, "step": 29630 }, { "epoch": 1.3880638965662622, "grad_norm": 0.634121318481067, "learning_rate": 1.1302823595452803e-06, "loss": 0.2822, "step": 29631 }, { "epoch": 1.3881107415561906, "grad_norm": 0.5777180881539428, "learning_rate": 1.130123711215399e-06, "loss": 0.2577, "step": 29632 }, { "epoch": 1.3881575865461189, "grad_norm": 0.5529122989671017, "learning_rate": 1.1299650707688115e-06, "loss": 0.2672, "step": 29633 }, { "epoch": 1.3882044315360473, "grad_norm": 0.5670821042468378, "learning_rate": 1.1298064382064306e-06, "loss": 0.2531, "step": 29634 }, { "epoch": 1.3882512765259756, "grad_norm": 0.5757498727024591, "learning_rate": 1.129647813529169e-06, "loss": 0.2707, "step": 29635 }, { "epoch": 1.3882981215159038, "grad_norm": 0.6273607525619145, "learning_rate": 1.129489196737941e-06, "loss": 0.2949, "step": 29636 }, { "epoch": 1.3883449665058323, "grad_norm": 0.5809871470974852, "learning_rate": 1.1293305878336575e-06, "loss": 0.2616, "step": 29637 }, { "epoch": 1.3883918114957605, "grad_norm": 0.5445352000914466, "learning_rate": 1.1291719868172324e-06, "loss": 0.2682, "step": 29638 }, { "epoch": 1.3884386564856888, "grad_norm": 0.5740665448630999, "learning_rate": 1.1290133936895775e-06, "loss": 0.2625, "step": 29639 }, { "epoch": 1.3884855014756172, "grad_norm": 0.5755527298056249, "learning_rate": 1.1288548084516056e-06, "loss": 0.2805, "step": 29640 }, { "epoch": 1.3885323464655455, "grad_norm": 0.5859722899403312, "learning_rate": 1.1286962311042298e-06, "loss": 0.2884, "step": 29641 }, { "epoch": 1.3885791914554737, "grad_norm": 0.6620201698154751, "learning_rate": 1.1285376616483635e-06, "loss": 0.2894, "step": 29642 }, { "epoch": 1.3886260364454022, "grad_norm": 0.5954466217188453, "learning_rate": 1.1283791000849172e-06, "loss": 0.2653, "step": 29643 }, { "epoch": 1.3886728814353304, "grad_norm": 0.5833522952607274, "learning_rate": 1.1282205464148044e-06, "loss": 0.2731, "step": 29644 }, { "epoch": 1.3887197264252589, "grad_norm": 0.6110128277910319, "learning_rate": 1.1280620006389386e-06, "loss": 0.2677, "step": 29645 }, { "epoch": 1.3887665714151871, "grad_norm": 0.5882778696423144, "learning_rate": 1.1279034627582303e-06, "loss": 0.2736, "step": 29646 }, { "epoch": 1.3888134164051156, "grad_norm": 0.5847166204958236, "learning_rate": 1.1277449327735926e-06, "loss": 0.2607, "step": 29647 }, { "epoch": 1.3888602613950438, "grad_norm": 0.5757160174074853, "learning_rate": 1.1275864106859392e-06, "loss": 0.2631, "step": 29648 }, { "epoch": 1.388907106384972, "grad_norm": 0.5589609422796669, "learning_rate": 1.1274278964961796e-06, "loss": 0.2672, "step": 29649 }, { "epoch": 1.3889539513749005, "grad_norm": 0.6221770843403761, "learning_rate": 1.1272693902052275e-06, "loss": 0.2729, "step": 29650 }, { "epoch": 1.3890007963648288, "grad_norm": 0.5698171191400194, "learning_rate": 1.1271108918139953e-06, "loss": 0.2606, "step": 29651 }, { "epoch": 1.389047641354757, "grad_norm": 0.6704670327907666, "learning_rate": 1.126952401323396e-06, "loss": 0.2859, "step": 29652 }, { "epoch": 1.3890944863446855, "grad_norm": 0.5854363877613689, "learning_rate": 1.1267939187343391e-06, "loss": 0.2622, "step": 29653 }, { "epoch": 1.3891413313346137, "grad_norm": 0.5845450763220241, "learning_rate": 1.1266354440477392e-06, "loss": 0.2713, "step": 29654 }, { "epoch": 1.3891881763245422, "grad_norm": 0.5681593822211569, "learning_rate": 1.1264769772645063e-06, "loss": 0.2604, "step": 29655 }, { "epoch": 1.3892350213144704, "grad_norm": 0.6153567452949075, "learning_rate": 1.126318518385553e-06, "loss": 0.2824, "step": 29656 }, { "epoch": 1.3892818663043989, "grad_norm": 0.5622580469903189, "learning_rate": 1.1261600674117914e-06, "loss": 0.2594, "step": 29657 }, { "epoch": 1.3893287112943271, "grad_norm": 0.6155263776484958, "learning_rate": 1.1260016243441335e-06, "loss": 0.2723, "step": 29658 }, { "epoch": 1.3893755562842554, "grad_norm": 0.6581053418122248, "learning_rate": 1.1258431891834914e-06, "loss": 0.2933, "step": 29659 }, { "epoch": 1.3894224012741838, "grad_norm": 0.5998494136341928, "learning_rate": 1.1256847619307767e-06, "loss": 0.2765, "step": 29660 }, { "epoch": 1.389469246264112, "grad_norm": 0.5771706563351385, "learning_rate": 1.1255263425868997e-06, "loss": 0.2636, "step": 29661 }, { "epoch": 1.3895160912540403, "grad_norm": 0.5908328283587473, "learning_rate": 1.1253679311527735e-06, "loss": 0.2636, "step": 29662 }, { "epoch": 1.3895629362439688, "grad_norm": 0.5785265106184991, "learning_rate": 1.125209527629309e-06, "loss": 0.2624, "step": 29663 }, { "epoch": 1.389609781233897, "grad_norm": 0.5602570037291069, "learning_rate": 1.1250511320174182e-06, "loss": 0.2642, "step": 29664 }, { "epoch": 1.3896566262238252, "grad_norm": 0.534300075101351, "learning_rate": 1.1248927443180136e-06, "loss": 0.2547, "step": 29665 }, { "epoch": 1.3897034712137537, "grad_norm": 0.5790476908931654, "learning_rate": 1.1247343645320046e-06, "loss": 0.2742, "step": 29666 }, { "epoch": 1.389750316203682, "grad_norm": 0.5441303068163202, "learning_rate": 1.1245759926603045e-06, "loss": 0.2532, "step": 29667 }, { "epoch": 1.3897971611936104, "grad_norm": 0.5828972374438559, "learning_rate": 1.124417628703823e-06, "loss": 0.2716, "step": 29668 }, { "epoch": 1.3898440061835386, "grad_norm": 0.57100193148888, "learning_rate": 1.1242592726634725e-06, "loss": 0.2803, "step": 29669 }, { "epoch": 1.3898908511734671, "grad_norm": 0.6331436809746739, "learning_rate": 1.124100924540164e-06, "loss": 0.2759, "step": 29670 }, { "epoch": 1.3899376961633954, "grad_norm": 0.5984249907505523, "learning_rate": 1.1239425843348087e-06, "loss": 0.2851, "step": 29671 }, { "epoch": 1.3899845411533236, "grad_norm": 0.572403316315139, "learning_rate": 1.1237842520483192e-06, "loss": 0.2791, "step": 29672 }, { "epoch": 1.390031386143252, "grad_norm": 0.5569243388174684, "learning_rate": 1.123625927681605e-06, "loss": 0.257, "step": 29673 }, { "epoch": 1.3900782311331803, "grad_norm": 0.592168299768327, "learning_rate": 1.1234676112355773e-06, "loss": 0.2629, "step": 29674 }, { "epoch": 1.3901250761231085, "grad_norm": 0.6130943954325627, "learning_rate": 1.1233093027111468e-06, "loss": 0.2645, "step": 29675 }, { "epoch": 1.390171921113037, "grad_norm": 0.6264482973095411, "learning_rate": 1.1231510021092258e-06, "loss": 0.2856, "step": 29676 }, { "epoch": 1.3902187661029652, "grad_norm": 0.5636918031551487, "learning_rate": 1.1229927094307247e-06, "loss": 0.2674, "step": 29677 }, { "epoch": 1.3902656110928935, "grad_norm": 0.6075293150537621, "learning_rate": 1.122834424676554e-06, "loss": 0.282, "step": 29678 }, { "epoch": 1.390312456082822, "grad_norm": 0.6109623935483255, "learning_rate": 1.1226761478476263e-06, "loss": 0.2665, "step": 29679 }, { "epoch": 1.3903593010727502, "grad_norm": 0.6285933080873726, "learning_rate": 1.1225178789448512e-06, "loss": 0.2737, "step": 29680 }, { "epoch": 1.3904061460626786, "grad_norm": 0.5961268751200941, "learning_rate": 1.1223596179691387e-06, "loss": 0.2773, "step": 29681 }, { "epoch": 1.390452991052607, "grad_norm": 0.6324945119247162, "learning_rate": 1.1222013649214002e-06, "loss": 0.275, "step": 29682 }, { "epoch": 1.3904998360425354, "grad_norm": 0.6054438873899866, "learning_rate": 1.1220431198025464e-06, "loss": 0.293, "step": 29683 }, { "epoch": 1.3905466810324636, "grad_norm": 0.6367927330419841, "learning_rate": 1.1218848826134884e-06, "loss": 0.2891, "step": 29684 }, { "epoch": 1.3905935260223918, "grad_norm": 0.5870919688464836, "learning_rate": 1.1217266533551372e-06, "loss": 0.2639, "step": 29685 }, { "epoch": 1.3906403710123203, "grad_norm": 0.6508497796497152, "learning_rate": 1.1215684320284017e-06, "loss": 0.2941, "step": 29686 }, { "epoch": 1.3906872160022485, "grad_norm": 0.5806009869525842, "learning_rate": 1.1214102186341947e-06, "loss": 0.2783, "step": 29687 }, { "epoch": 1.3907340609921768, "grad_norm": 0.6526226003256452, "learning_rate": 1.1212520131734241e-06, "loss": 0.279, "step": 29688 }, { "epoch": 1.3907809059821052, "grad_norm": 0.5939348513930253, "learning_rate": 1.121093815647002e-06, "loss": 0.274, "step": 29689 }, { "epoch": 1.3908277509720335, "grad_norm": 0.5961198419794873, "learning_rate": 1.1209356260558383e-06, "loss": 0.2757, "step": 29690 }, { "epoch": 1.390874595961962, "grad_norm": 0.601046381790709, "learning_rate": 1.1207774444008442e-06, "loss": 0.2648, "step": 29691 }, { "epoch": 1.3909214409518902, "grad_norm": 0.5629582615388053, "learning_rate": 1.1206192706829283e-06, "loss": 0.2784, "step": 29692 }, { "epoch": 1.3909682859418186, "grad_norm": 0.5889514269853897, "learning_rate": 1.120461104903002e-06, "loss": 0.2791, "step": 29693 }, { "epoch": 1.391015130931747, "grad_norm": 0.5760345448518087, "learning_rate": 1.1203029470619762e-06, "loss": 0.2784, "step": 29694 }, { "epoch": 1.3910619759216751, "grad_norm": 0.5982304060424258, "learning_rate": 1.1201447971607593e-06, "loss": 0.28, "step": 29695 }, { "epoch": 1.3911088209116036, "grad_norm": 0.569658640689111, "learning_rate": 1.1199866552002621e-06, "loss": 0.2692, "step": 29696 }, { "epoch": 1.3911556659015318, "grad_norm": 0.5759282142491817, "learning_rate": 1.1198285211813958e-06, "loss": 0.2675, "step": 29697 }, { "epoch": 1.39120251089146, "grad_norm": 0.6078886433879973, "learning_rate": 1.1196703951050686e-06, "loss": 0.2737, "step": 29698 }, { "epoch": 1.3912493558813885, "grad_norm": 0.6375403928475794, "learning_rate": 1.1195122769721915e-06, "loss": 0.2878, "step": 29699 }, { "epoch": 1.3912962008713168, "grad_norm": 0.6062675684266833, "learning_rate": 1.119354166783674e-06, "loss": 0.2635, "step": 29700 }, { "epoch": 1.391343045861245, "grad_norm": 0.5678964725062607, "learning_rate": 1.1191960645404273e-06, "loss": 0.2673, "step": 29701 }, { "epoch": 1.3913898908511735, "grad_norm": 0.5863114597327764, "learning_rate": 1.1190379702433592e-06, "loss": 0.2642, "step": 29702 }, { "epoch": 1.3914367358411017, "grad_norm": 0.5817822122176225, "learning_rate": 1.1188798838933815e-06, "loss": 0.2724, "step": 29703 }, { "epoch": 1.3914835808310302, "grad_norm": 0.5743369711990238, "learning_rate": 1.1187218054914021e-06, "loss": 0.2657, "step": 29704 }, { "epoch": 1.3915304258209584, "grad_norm": 0.6642722166366677, "learning_rate": 1.1185637350383312e-06, "loss": 0.2977, "step": 29705 }, { "epoch": 1.391577270810887, "grad_norm": 0.6297119367542823, "learning_rate": 1.1184056725350792e-06, "loss": 0.2722, "step": 29706 }, { "epoch": 1.3916241158008151, "grad_norm": 0.6091103342406484, "learning_rate": 1.1182476179825552e-06, "loss": 0.2856, "step": 29707 }, { "epoch": 1.3916709607907434, "grad_norm": 0.5851484950278812, "learning_rate": 1.1180895713816698e-06, "loss": 0.2499, "step": 29708 }, { "epoch": 1.3917178057806718, "grad_norm": 0.586095879511733, "learning_rate": 1.1179315327333314e-06, "loss": 0.2728, "step": 29709 }, { "epoch": 1.3917646507706, "grad_norm": 0.592119663605037, "learning_rate": 1.1177735020384489e-06, "loss": 0.2716, "step": 29710 }, { "epoch": 1.3918114957605283, "grad_norm": 0.583044505532845, "learning_rate": 1.1176154792979324e-06, "loss": 0.2648, "step": 29711 }, { "epoch": 1.3918583407504568, "grad_norm": 0.5923622926841446, "learning_rate": 1.1174574645126914e-06, "loss": 0.2826, "step": 29712 }, { "epoch": 1.391905185740385, "grad_norm": 0.5755331344792204, "learning_rate": 1.1172994576836351e-06, "loss": 0.2647, "step": 29713 }, { "epoch": 1.3919520307303133, "grad_norm": 0.6196972490098219, "learning_rate": 1.117141458811674e-06, "loss": 0.2823, "step": 29714 }, { "epoch": 1.3919988757202417, "grad_norm": 0.623473144081751, "learning_rate": 1.116983467897715e-06, "loss": 0.2911, "step": 29715 }, { "epoch": 1.39204572071017, "grad_norm": 0.6027367257606576, "learning_rate": 1.11682548494267e-06, "loss": 0.2774, "step": 29716 }, { "epoch": 1.3920925657000984, "grad_norm": 0.6254177242758541, "learning_rate": 1.1166675099474453e-06, "loss": 0.2753, "step": 29717 }, { "epoch": 1.3921394106900267, "grad_norm": 0.5993158571676027, "learning_rate": 1.1165095429129515e-06, "loss": 0.2821, "step": 29718 }, { "epoch": 1.3921862556799551, "grad_norm": 0.5584681396487211, "learning_rate": 1.1163515838400977e-06, "loss": 0.2713, "step": 29719 }, { "epoch": 1.3922331006698834, "grad_norm": 0.590147014215753, "learning_rate": 1.1161936327297926e-06, "loss": 0.2638, "step": 29720 }, { "epoch": 1.3922799456598116, "grad_norm": 0.60386831010198, "learning_rate": 1.1160356895829462e-06, "loss": 0.2622, "step": 29721 }, { "epoch": 1.39232679064974, "grad_norm": 0.5955860090176754, "learning_rate": 1.115877754400467e-06, "loss": 0.2827, "step": 29722 }, { "epoch": 1.3923736356396683, "grad_norm": 0.5764834703937824, "learning_rate": 1.115719827183262e-06, "loss": 0.2679, "step": 29723 }, { "epoch": 1.3924204806295966, "grad_norm": 0.6166239421531254, "learning_rate": 1.1155619079322419e-06, "loss": 0.272, "step": 29724 }, { "epoch": 1.392467325619525, "grad_norm": 0.6016214580496452, "learning_rate": 1.1154039966483146e-06, "loss": 0.2509, "step": 29725 }, { "epoch": 1.3925141706094533, "grad_norm": 0.5933231330336104, "learning_rate": 1.1152460933323897e-06, "loss": 0.2657, "step": 29726 }, { "epoch": 1.3925610155993817, "grad_norm": 0.5927598887850153, "learning_rate": 1.115088197985376e-06, "loss": 0.2673, "step": 29727 }, { "epoch": 1.39260786058931, "grad_norm": 0.6172514407792663, "learning_rate": 1.114930310608181e-06, "loss": 0.2822, "step": 29728 }, { "epoch": 1.3926547055792384, "grad_norm": 0.6012134530697637, "learning_rate": 1.114772431201715e-06, "loss": 0.2573, "step": 29729 }, { "epoch": 1.3927015505691667, "grad_norm": 0.5710853027884022, "learning_rate": 1.1146145597668842e-06, "loss": 0.265, "step": 29730 }, { "epoch": 1.392748395559095, "grad_norm": 0.6149695554572464, "learning_rate": 1.1144566963045985e-06, "loss": 0.2753, "step": 29731 }, { "epoch": 1.3927952405490234, "grad_norm": 0.5810894943544335, "learning_rate": 1.1142988408157665e-06, "loss": 0.2671, "step": 29732 }, { "epoch": 1.3928420855389516, "grad_norm": 0.5823785688600999, "learning_rate": 1.1141409933012962e-06, "loss": 0.2758, "step": 29733 }, { "epoch": 1.3928889305288799, "grad_norm": 0.5697847166017564, "learning_rate": 1.1139831537620972e-06, "loss": 0.2723, "step": 29734 }, { "epoch": 1.3929357755188083, "grad_norm": 0.5796326886805204, "learning_rate": 1.1138253221990758e-06, "loss": 0.281, "step": 29735 }, { "epoch": 1.3929826205087366, "grad_norm": 0.5752279343126686, "learning_rate": 1.113667498613142e-06, "loss": 0.2705, "step": 29736 }, { "epoch": 1.3930294654986648, "grad_norm": 0.6056994097550649, "learning_rate": 1.1135096830052028e-06, "loss": 0.2733, "step": 29737 }, { "epoch": 1.3930763104885933, "grad_norm": 0.6266053227044766, "learning_rate": 1.1133518753761665e-06, "loss": 0.2881, "step": 29738 }, { "epoch": 1.3931231554785215, "grad_norm": 0.5839766624638226, "learning_rate": 1.1131940757269419e-06, "loss": 0.273, "step": 29739 }, { "epoch": 1.39317000046845, "grad_norm": 0.5778754988269362, "learning_rate": 1.1130362840584375e-06, "loss": 0.2854, "step": 29740 }, { "epoch": 1.3932168454583782, "grad_norm": 0.6013681613462137, "learning_rate": 1.11287850037156e-06, "loss": 0.2564, "step": 29741 }, { "epoch": 1.3932636904483067, "grad_norm": 0.6215581801746362, "learning_rate": 1.1127207246672178e-06, "loss": 0.2872, "step": 29742 }, { "epoch": 1.393310535438235, "grad_norm": 0.6209709268882468, "learning_rate": 1.1125629569463203e-06, "loss": 0.279, "step": 29743 }, { "epoch": 1.3933573804281632, "grad_norm": 0.6086879079137073, "learning_rate": 1.1124051972097735e-06, "loss": 0.2622, "step": 29744 }, { "epoch": 1.3934042254180916, "grad_norm": 0.5692291194756435, "learning_rate": 1.1122474454584858e-06, "loss": 0.2718, "step": 29745 }, { "epoch": 1.3934510704080199, "grad_norm": 0.5934254028740725, "learning_rate": 1.1120897016933663e-06, "loss": 0.2743, "step": 29746 }, { "epoch": 1.393497915397948, "grad_norm": 0.6267286150903052, "learning_rate": 1.1119319659153204e-06, "loss": 0.2687, "step": 29747 }, { "epoch": 1.3935447603878766, "grad_norm": 0.5957326481861784, "learning_rate": 1.1117742381252578e-06, "loss": 0.2714, "step": 29748 }, { "epoch": 1.3935916053778048, "grad_norm": 0.5664587929358742, "learning_rate": 1.111616518324085e-06, "loss": 0.2691, "step": 29749 }, { "epoch": 1.393638450367733, "grad_norm": 0.5835583058523302, "learning_rate": 1.1114588065127113e-06, "loss": 0.2676, "step": 29750 }, { "epoch": 1.3936852953576615, "grad_norm": 0.6021022222396937, "learning_rate": 1.1113011026920423e-06, "loss": 0.2752, "step": 29751 }, { "epoch": 1.3937321403475897, "grad_norm": 0.6190814540120342, "learning_rate": 1.1111434068629876e-06, "loss": 0.2687, "step": 29752 }, { "epoch": 1.3937789853375182, "grad_norm": 0.5813892698923326, "learning_rate": 1.1109857190264522e-06, "loss": 0.2681, "step": 29753 }, { "epoch": 1.3938258303274464, "grad_norm": 0.614210309687702, "learning_rate": 1.1108280391833447e-06, "loss": 0.2841, "step": 29754 }, { "epoch": 1.393872675317375, "grad_norm": 0.5868567093877282, "learning_rate": 1.1106703673345731e-06, "loss": 0.2607, "step": 29755 }, { "epoch": 1.3939195203073032, "grad_norm": 0.5505481910878008, "learning_rate": 1.1105127034810453e-06, "loss": 0.2673, "step": 29756 }, { "epoch": 1.3939663652972314, "grad_norm": 0.5660551094108177, "learning_rate": 1.1103550476236666e-06, "loss": 0.2592, "step": 29757 }, { "epoch": 1.3940132102871599, "grad_norm": 0.5832951511339468, "learning_rate": 1.110197399763346e-06, "loss": 0.2835, "step": 29758 }, { "epoch": 1.394060055277088, "grad_norm": 0.5559063912450686, "learning_rate": 1.1100397599009892e-06, "loss": 0.2588, "step": 29759 }, { "epoch": 1.3941069002670163, "grad_norm": 0.6055041624964386, "learning_rate": 1.1098821280375046e-06, "loss": 0.2714, "step": 29760 }, { "epoch": 1.3941537452569448, "grad_norm": 0.6208264401340295, "learning_rate": 1.1097245041737986e-06, "loss": 0.274, "step": 29761 }, { "epoch": 1.394200590246873, "grad_norm": 0.5624387034628998, "learning_rate": 1.1095668883107788e-06, "loss": 0.2655, "step": 29762 }, { "epoch": 1.3942474352368015, "grad_norm": 0.5959906270768272, "learning_rate": 1.109409280449353e-06, "loss": 0.2714, "step": 29763 }, { "epoch": 1.3942942802267297, "grad_norm": 0.5909892916425823, "learning_rate": 1.1092516805904275e-06, "loss": 0.2733, "step": 29764 }, { "epoch": 1.3943411252166582, "grad_norm": 0.5694101591256202, "learning_rate": 1.1090940887349079e-06, "loss": 0.2778, "step": 29765 }, { "epoch": 1.3943879702065864, "grad_norm": 0.5817724869529906, "learning_rate": 1.108936504883702e-06, "loss": 0.2753, "step": 29766 }, { "epoch": 1.3944348151965147, "grad_norm": 0.6027247102299962, "learning_rate": 1.108778929037717e-06, "loss": 0.2915, "step": 29767 }, { "epoch": 1.3944816601864432, "grad_norm": 0.636978789766095, "learning_rate": 1.1086213611978596e-06, "loss": 0.2709, "step": 29768 }, { "epoch": 1.3945285051763714, "grad_norm": 0.6010648998802431, "learning_rate": 1.1084638013650368e-06, "loss": 0.2697, "step": 29769 }, { "epoch": 1.3945753501662996, "grad_norm": 0.6233611026456608, "learning_rate": 1.1083062495401558e-06, "loss": 0.2753, "step": 29770 }, { "epoch": 1.394622195156228, "grad_norm": 0.6521259585427222, "learning_rate": 1.1081487057241225e-06, "loss": 0.2936, "step": 29771 }, { "epoch": 1.3946690401461563, "grad_norm": 0.5844494583875661, "learning_rate": 1.1079911699178429e-06, "loss": 0.2742, "step": 29772 }, { "epoch": 1.3947158851360846, "grad_norm": 0.6013524629994239, "learning_rate": 1.1078336421222237e-06, "loss": 0.2826, "step": 29773 }, { "epoch": 1.394762730126013, "grad_norm": 0.5614254702930778, "learning_rate": 1.1076761223381726e-06, "loss": 0.2552, "step": 29774 }, { "epoch": 1.3948095751159413, "grad_norm": 0.6116457520838161, "learning_rate": 1.1075186105665953e-06, "loss": 0.2706, "step": 29775 }, { "epoch": 1.3948564201058697, "grad_norm": 0.6145480903366007, "learning_rate": 1.1073611068083993e-06, "loss": 0.2918, "step": 29776 }, { "epoch": 1.394903265095798, "grad_norm": 0.5976532374061284, "learning_rate": 1.107203611064489e-06, "loss": 0.2527, "step": 29777 }, { "epoch": 1.3949501100857264, "grad_norm": 0.5764047094401163, "learning_rate": 1.107046123335773e-06, "loss": 0.2617, "step": 29778 }, { "epoch": 1.3949969550756547, "grad_norm": 0.6089483047251991, "learning_rate": 1.106888643623155e-06, "loss": 0.2799, "step": 29779 }, { "epoch": 1.395043800065583, "grad_norm": 0.5314589050066975, "learning_rate": 1.1067311719275435e-06, "loss": 0.245, "step": 29780 }, { "epoch": 1.3950906450555114, "grad_norm": 0.5885715835917817, "learning_rate": 1.1065737082498434e-06, "loss": 0.2791, "step": 29781 }, { "epoch": 1.3951374900454396, "grad_norm": 0.5912464385383314, "learning_rate": 1.1064162525909624e-06, "loss": 0.2767, "step": 29782 }, { "epoch": 1.3951843350353679, "grad_norm": 0.5667376289271685, "learning_rate": 1.106258804951805e-06, "loss": 0.2626, "step": 29783 }, { "epoch": 1.3952311800252963, "grad_norm": 0.6076543763289703, "learning_rate": 1.1061013653332775e-06, "loss": 0.2744, "step": 29784 }, { "epoch": 1.3952780250152246, "grad_norm": 0.575305607940359, "learning_rate": 1.1059439337362873e-06, "loss": 0.2784, "step": 29785 }, { "epoch": 1.3953248700051528, "grad_norm": 0.5548792350072297, "learning_rate": 1.1057865101617384e-06, "loss": 0.2593, "step": 29786 }, { "epoch": 1.3953717149950813, "grad_norm": 0.5441075288668535, "learning_rate": 1.105629094610538e-06, "loss": 0.2605, "step": 29787 }, { "epoch": 1.3954185599850095, "grad_norm": 0.5658139168594952, "learning_rate": 1.1054716870835923e-06, "loss": 0.2579, "step": 29788 }, { "epoch": 1.395465404974938, "grad_norm": 0.5491739380315068, "learning_rate": 1.105314287581806e-06, "loss": 0.2647, "step": 29789 }, { "epoch": 1.3955122499648662, "grad_norm": 0.5552690565947234, "learning_rate": 1.1051568961060849e-06, "loss": 0.2596, "step": 29790 }, { "epoch": 1.3955590949547947, "grad_norm": 0.5731377575590033, "learning_rate": 1.1049995126573356e-06, "loss": 0.2597, "step": 29791 }, { "epoch": 1.395605939944723, "grad_norm": 0.6076881856535457, "learning_rate": 1.1048421372364646e-06, "loss": 0.2904, "step": 29792 }, { "epoch": 1.3956527849346512, "grad_norm": 0.605584298456466, "learning_rate": 1.1046847698443753e-06, "loss": 0.2699, "step": 29793 }, { "epoch": 1.3956996299245796, "grad_norm": 0.5903869139473994, "learning_rate": 1.1045274104819754e-06, "loss": 0.2607, "step": 29794 }, { "epoch": 1.3957464749145079, "grad_norm": 0.5742572023411688, "learning_rate": 1.1043700591501682e-06, "loss": 0.2816, "step": 29795 }, { "epoch": 1.3957933199044361, "grad_norm": 0.5459924024197985, "learning_rate": 1.1042127158498612e-06, "loss": 0.2798, "step": 29796 }, { "epoch": 1.3958401648943646, "grad_norm": 0.5552003954747945, "learning_rate": 1.1040553805819587e-06, "loss": 0.2457, "step": 29797 }, { "epoch": 1.3958870098842928, "grad_norm": 0.5518928626767022, "learning_rate": 1.103898053347367e-06, "loss": 0.2557, "step": 29798 }, { "epoch": 1.3959338548742213, "grad_norm": 0.5928584100303448, "learning_rate": 1.1037407341469916e-06, "loss": 0.2718, "step": 29799 }, { "epoch": 1.3959806998641495, "grad_norm": 0.6128886978058324, "learning_rate": 1.103583422981737e-06, "loss": 0.2799, "step": 29800 }, { "epoch": 1.396027544854078, "grad_norm": 0.6016962557037651, "learning_rate": 1.1034261198525093e-06, "loss": 0.2708, "step": 29801 }, { "epoch": 1.3960743898440062, "grad_norm": 0.5858971886952604, "learning_rate": 1.1032688247602124e-06, "loss": 0.2758, "step": 29802 }, { "epoch": 1.3961212348339345, "grad_norm": 0.5701869748834001, "learning_rate": 1.1031115377057524e-06, "loss": 0.2633, "step": 29803 }, { "epoch": 1.396168079823863, "grad_norm": 0.6330957832302647, "learning_rate": 1.1029542586900343e-06, "loss": 0.2832, "step": 29804 }, { "epoch": 1.3962149248137912, "grad_norm": 0.5766700521136793, "learning_rate": 1.1027969877139643e-06, "loss": 0.2833, "step": 29805 }, { "epoch": 1.3962617698037194, "grad_norm": 0.6284558968474333, "learning_rate": 1.1026397247784457e-06, "loss": 0.2833, "step": 29806 }, { "epoch": 1.3963086147936479, "grad_norm": 0.6356648214693893, "learning_rate": 1.102482469884385e-06, "loss": 0.2844, "step": 29807 }, { "epoch": 1.3963554597835761, "grad_norm": 0.6009141405728191, "learning_rate": 1.1023252230326857e-06, "loss": 0.2773, "step": 29808 }, { "epoch": 1.3964023047735044, "grad_norm": 0.5486098211359712, "learning_rate": 1.1021679842242534e-06, "loss": 0.2596, "step": 29809 }, { "epoch": 1.3964491497634328, "grad_norm": 0.5794727739316722, "learning_rate": 1.1020107534599928e-06, "loss": 0.2742, "step": 29810 }, { "epoch": 1.396495994753361, "grad_norm": 0.5872495175245307, "learning_rate": 1.101853530740809e-06, "loss": 0.2562, "step": 29811 }, { "epoch": 1.3965428397432895, "grad_norm": 0.6430510667828419, "learning_rate": 1.1016963160676078e-06, "loss": 0.2909, "step": 29812 }, { "epoch": 1.3965896847332178, "grad_norm": 0.6084113121160997, "learning_rate": 1.1015391094412928e-06, "loss": 0.2834, "step": 29813 }, { "epoch": 1.3966365297231462, "grad_norm": 0.5837511779212353, "learning_rate": 1.101381910862768e-06, "loss": 0.2632, "step": 29814 }, { "epoch": 1.3966833747130745, "grad_norm": 0.593655115366214, "learning_rate": 1.1012247203329384e-06, "loss": 0.2612, "step": 29815 }, { "epoch": 1.3967302197030027, "grad_norm": 0.5995095507350657, "learning_rate": 1.1010675378527091e-06, "loss": 0.2938, "step": 29816 }, { "epoch": 1.3967770646929312, "grad_norm": 0.5661380229062954, "learning_rate": 1.1009103634229845e-06, "loss": 0.2777, "step": 29817 }, { "epoch": 1.3968239096828594, "grad_norm": 0.5879597509264703, "learning_rate": 1.1007531970446694e-06, "loss": 0.2829, "step": 29818 }, { "epoch": 1.3968707546727877, "grad_norm": 0.6075687567820166, "learning_rate": 1.1005960387186684e-06, "loss": 0.2821, "step": 29819 }, { "epoch": 1.3969175996627161, "grad_norm": 0.6361269620705873, "learning_rate": 1.1004388884458859e-06, "loss": 0.2739, "step": 29820 }, { "epoch": 1.3969644446526444, "grad_norm": 0.6259463897532218, "learning_rate": 1.1002817462272248e-06, "loss": 0.2745, "step": 29821 }, { "epoch": 1.3970112896425726, "grad_norm": 0.5646947552226474, "learning_rate": 1.1001246120635903e-06, "loss": 0.2749, "step": 29822 }, { "epoch": 1.397058134632501, "grad_norm": 0.5759316915174484, "learning_rate": 1.0999674859558868e-06, "loss": 0.2756, "step": 29823 }, { "epoch": 1.3971049796224293, "grad_norm": 0.5817454403979431, "learning_rate": 1.0998103679050188e-06, "loss": 0.2664, "step": 29824 }, { "epoch": 1.3971518246123578, "grad_norm": 0.5952131973953096, "learning_rate": 1.0996532579118907e-06, "loss": 0.2656, "step": 29825 }, { "epoch": 1.397198669602286, "grad_norm": 0.5887349835034271, "learning_rate": 1.0994961559774057e-06, "loss": 0.2669, "step": 29826 }, { "epoch": 1.3972455145922145, "grad_norm": 0.6365282184876435, "learning_rate": 1.0993390621024688e-06, "loss": 0.2852, "step": 29827 }, { "epoch": 1.3972923595821427, "grad_norm": 0.5791915180779351, "learning_rate": 1.099181976287983e-06, "loss": 0.2664, "step": 29828 }, { "epoch": 1.397339204572071, "grad_norm": 0.6056753783686999, "learning_rate": 1.0990248985348525e-06, "loss": 0.2825, "step": 29829 }, { "epoch": 1.3973860495619994, "grad_norm": 0.7026907246357151, "learning_rate": 1.0988678288439819e-06, "loss": 0.2718, "step": 29830 }, { "epoch": 1.3974328945519277, "grad_norm": 0.5733622018868803, "learning_rate": 1.0987107672162756e-06, "loss": 0.2654, "step": 29831 }, { "epoch": 1.397479739541856, "grad_norm": 0.5843682560808866, "learning_rate": 1.0985537136526356e-06, "loss": 0.2618, "step": 29832 }, { "epoch": 1.3975265845317844, "grad_norm": 0.6189691816423445, "learning_rate": 1.0983966681539668e-06, "loss": 0.2723, "step": 29833 }, { "epoch": 1.3975734295217126, "grad_norm": 0.6174549456710114, "learning_rate": 1.098239630721174e-06, "loss": 0.2775, "step": 29834 }, { "epoch": 1.397620274511641, "grad_norm": 0.5890911388145771, "learning_rate": 1.0980826013551584e-06, "loss": 0.2605, "step": 29835 }, { "epoch": 1.3976671195015693, "grad_norm": 0.6201464733066222, "learning_rate": 1.0979255800568257e-06, "loss": 0.2732, "step": 29836 }, { "epoch": 1.3977139644914978, "grad_norm": 0.5720388046342866, "learning_rate": 1.0977685668270794e-06, "loss": 0.2596, "step": 29837 }, { "epoch": 1.397760809481426, "grad_norm": 0.5724876750355405, "learning_rate": 1.0976115616668218e-06, "loss": 0.2534, "step": 29838 }, { "epoch": 1.3978076544713542, "grad_norm": 0.587932547908333, "learning_rate": 1.0974545645769571e-06, "loss": 0.2688, "step": 29839 }, { "epoch": 1.3978544994612827, "grad_norm": 0.5841387561045797, "learning_rate": 1.097297575558389e-06, "loss": 0.2713, "step": 29840 }, { "epoch": 1.397901344451211, "grad_norm": 0.6085772797923472, "learning_rate": 1.0971405946120215e-06, "loss": 0.2721, "step": 29841 }, { "epoch": 1.3979481894411392, "grad_norm": 0.5733965770313525, "learning_rate": 1.0969836217387563e-06, "loss": 0.2699, "step": 29842 }, { "epoch": 1.3979950344310677, "grad_norm": 0.5616144898840822, "learning_rate": 1.0968266569394988e-06, "loss": 0.2715, "step": 29843 }, { "epoch": 1.398041879420996, "grad_norm": 0.5953972569299989, "learning_rate": 1.0966697002151506e-06, "loss": 0.2646, "step": 29844 }, { "epoch": 1.3980887244109241, "grad_norm": 0.5812946018863879, "learning_rate": 1.0965127515666153e-06, "loss": 0.2683, "step": 29845 }, { "epoch": 1.3981355694008526, "grad_norm": 0.6100631562016818, "learning_rate": 1.0963558109947962e-06, "loss": 0.2727, "step": 29846 }, { "epoch": 1.3981824143907808, "grad_norm": 0.5931269912000687, "learning_rate": 1.0961988785005966e-06, "loss": 0.2815, "step": 29847 }, { "epoch": 1.3982292593807093, "grad_norm": 0.626279099452442, "learning_rate": 1.0960419540849207e-06, "loss": 0.2628, "step": 29848 }, { "epoch": 1.3982761043706375, "grad_norm": 0.6185803949347827, "learning_rate": 1.0958850377486708e-06, "loss": 0.2644, "step": 29849 }, { "epoch": 1.398322949360566, "grad_norm": 0.6066158125864481, "learning_rate": 1.0957281294927483e-06, "loss": 0.2752, "step": 29850 }, { "epoch": 1.3983697943504942, "grad_norm": 0.586928153527705, "learning_rate": 1.0955712293180573e-06, "loss": 0.2943, "step": 29851 }, { "epoch": 1.3984166393404225, "grad_norm": 0.5278246056564508, "learning_rate": 1.095414337225501e-06, "loss": 0.2673, "step": 29852 }, { "epoch": 1.398463484330351, "grad_norm": 0.6037846003271276, "learning_rate": 1.0952574532159824e-06, "loss": 0.2819, "step": 29853 }, { "epoch": 1.3985103293202792, "grad_norm": 0.631012865085019, "learning_rate": 1.095100577290405e-06, "loss": 0.28, "step": 29854 }, { "epoch": 1.3985571743102074, "grad_norm": 0.5898768321511466, "learning_rate": 1.0949437094496696e-06, "loss": 0.2886, "step": 29855 }, { "epoch": 1.398604019300136, "grad_norm": 0.5706745944865225, "learning_rate": 1.0947868496946809e-06, "loss": 0.2727, "step": 29856 }, { "epoch": 1.3986508642900641, "grad_norm": 0.6001285472985879, "learning_rate": 1.0946299980263398e-06, "loss": 0.2636, "step": 29857 }, { "epoch": 1.3986977092799924, "grad_norm": 0.6340264397832572, "learning_rate": 1.0944731544455498e-06, "loss": 0.2735, "step": 29858 }, { "epoch": 1.3987445542699208, "grad_norm": 0.6338947873542726, "learning_rate": 1.0943163189532136e-06, "loss": 0.294, "step": 29859 }, { "epoch": 1.398791399259849, "grad_norm": 0.6073055989073678, "learning_rate": 1.0941594915502338e-06, "loss": 0.2863, "step": 29860 }, { "epoch": 1.3988382442497775, "grad_norm": 0.6031528833764671, "learning_rate": 1.0940026722375133e-06, "loss": 0.2696, "step": 29861 }, { "epoch": 1.3988850892397058, "grad_norm": 0.5994162063324009, "learning_rate": 1.0938458610159543e-06, "loss": 0.2715, "step": 29862 }, { "epoch": 1.3989319342296342, "grad_norm": 0.6033262803474633, "learning_rate": 1.0936890578864581e-06, "loss": 0.2611, "step": 29863 }, { "epoch": 1.3989787792195625, "grad_norm": 0.610363912831851, "learning_rate": 1.0935322628499278e-06, "loss": 0.2669, "step": 29864 }, { "epoch": 1.3990256242094907, "grad_norm": 0.6173398530131482, "learning_rate": 1.093375475907266e-06, "loss": 0.2971, "step": 29865 }, { "epoch": 1.3990724691994192, "grad_norm": 0.5939001063439843, "learning_rate": 1.0932186970593744e-06, "loss": 0.2622, "step": 29866 }, { "epoch": 1.3991193141893474, "grad_norm": 0.6150176920697366, "learning_rate": 1.0930619263071571e-06, "loss": 0.2826, "step": 29867 }, { "epoch": 1.3991661591792757, "grad_norm": 0.5786905101236293, "learning_rate": 1.0929051636515133e-06, "loss": 0.2613, "step": 29868 }, { "epoch": 1.3992130041692041, "grad_norm": 0.6306507976963378, "learning_rate": 1.092748409093348e-06, "loss": 0.2584, "step": 29869 }, { "epoch": 1.3992598491591324, "grad_norm": 0.5954619916711915, "learning_rate": 1.0925916626335605e-06, "loss": 0.2622, "step": 29870 }, { "epoch": 1.3993066941490608, "grad_norm": 0.6116393585971188, "learning_rate": 1.0924349242730548e-06, "loss": 0.2886, "step": 29871 }, { "epoch": 1.399353539138989, "grad_norm": 0.5879392858493286, "learning_rate": 1.092278194012732e-06, "loss": 0.2626, "step": 29872 }, { "epoch": 1.3994003841289175, "grad_norm": 0.6030936303758135, "learning_rate": 1.0921214718534945e-06, "loss": 0.2565, "step": 29873 }, { "epoch": 1.3994472291188458, "grad_norm": 0.6039403396168138, "learning_rate": 1.0919647577962451e-06, "loss": 0.2744, "step": 29874 }, { "epoch": 1.399494074108774, "grad_norm": 0.6027071549137933, "learning_rate": 1.0918080518418834e-06, "loss": 0.2833, "step": 29875 }, { "epoch": 1.3995409190987025, "grad_norm": 0.5698056992991231, "learning_rate": 1.0916513539913135e-06, "loss": 0.262, "step": 29876 }, { "epoch": 1.3995877640886307, "grad_norm": 0.621911901410353, "learning_rate": 1.091494664245435e-06, "loss": 0.2792, "step": 29877 }, { "epoch": 1.399634609078559, "grad_norm": 0.5609183873019314, "learning_rate": 1.091337982605151e-06, "loss": 0.2696, "step": 29878 }, { "epoch": 1.3996814540684874, "grad_norm": 0.5739907280310353, "learning_rate": 1.0911813090713626e-06, "loss": 0.2606, "step": 29879 }, { "epoch": 1.3997282990584157, "grad_norm": 0.5761018728814837, "learning_rate": 1.0910246436449725e-06, "loss": 0.2659, "step": 29880 }, { "epoch": 1.399775144048344, "grad_norm": 0.5602720339799977, "learning_rate": 1.090867986326881e-06, "loss": 0.2475, "step": 29881 }, { "epoch": 1.3998219890382724, "grad_norm": 0.5680414924298836, "learning_rate": 1.0907113371179897e-06, "loss": 0.2657, "step": 29882 }, { "epoch": 1.3998688340282006, "grad_norm": 0.5360684696313969, "learning_rate": 1.0905546960192016e-06, "loss": 0.2638, "step": 29883 }, { "epoch": 1.399915679018129, "grad_norm": 0.5584394660514442, "learning_rate": 1.0903980630314156e-06, "loss": 0.2673, "step": 29884 }, { "epoch": 1.3999625240080573, "grad_norm": 0.6438611681479202, "learning_rate": 1.0902414381555349e-06, "loss": 0.2704, "step": 29885 }, { "epoch": 1.4000093689979858, "grad_norm": 0.6299421369356107, "learning_rate": 1.0900848213924611e-06, "loss": 0.2955, "step": 29886 }, { "epoch": 1.400056213987914, "grad_norm": 0.6023963663659594, "learning_rate": 1.0899282127430942e-06, "loss": 0.2727, "step": 29887 }, { "epoch": 1.4001030589778423, "grad_norm": 0.6417280388290211, "learning_rate": 1.0897716122083355e-06, "loss": 0.2749, "step": 29888 }, { "epoch": 1.4001499039677707, "grad_norm": 0.6408430054627938, "learning_rate": 1.089615019789087e-06, "loss": 0.2808, "step": 29889 }, { "epoch": 1.400196748957699, "grad_norm": 0.5972781441242503, "learning_rate": 1.0894584354862505e-06, "loss": 0.2702, "step": 29890 }, { "epoch": 1.4002435939476272, "grad_norm": 0.5803712044077582, "learning_rate": 1.089301859300725e-06, "loss": 0.2578, "step": 29891 }, { "epoch": 1.4002904389375557, "grad_norm": 0.5618065599910546, "learning_rate": 1.089145291233414e-06, "loss": 0.266, "step": 29892 }, { "epoch": 1.400337283927484, "grad_norm": 0.5929546019824415, "learning_rate": 1.0889887312852163e-06, "loss": 0.2657, "step": 29893 }, { "epoch": 1.4003841289174122, "grad_norm": 0.582362695907556, "learning_rate": 1.0888321794570336e-06, "loss": 0.2728, "step": 29894 }, { "epoch": 1.4004309739073406, "grad_norm": 0.5517168147243992, "learning_rate": 1.0886756357497672e-06, "loss": 0.2564, "step": 29895 }, { "epoch": 1.4004778188972689, "grad_norm": 0.5978119412556262, "learning_rate": 1.0885191001643185e-06, "loss": 0.2905, "step": 29896 }, { "epoch": 1.4005246638871973, "grad_norm": 0.5995121930085107, "learning_rate": 1.088362572701587e-06, "loss": 0.2767, "step": 29897 }, { "epoch": 1.4005715088771256, "grad_norm": 0.6053493689796496, "learning_rate": 1.0882060533624749e-06, "loss": 0.2815, "step": 29898 }, { "epoch": 1.400618353867054, "grad_norm": 0.5714430621083604, "learning_rate": 1.0880495421478813e-06, "loss": 0.2726, "step": 29899 }, { "epoch": 1.4006651988569823, "grad_norm": 0.6145318493192538, "learning_rate": 1.0878930390587075e-06, "loss": 0.2705, "step": 29900 }, { "epoch": 1.4007120438469105, "grad_norm": 0.5811654146918113, "learning_rate": 1.0877365440958543e-06, "loss": 0.2663, "step": 29901 }, { "epoch": 1.400758888836839, "grad_norm": 0.6027843757192949, "learning_rate": 1.0875800572602224e-06, "loss": 0.2782, "step": 29902 }, { "epoch": 1.4008057338267672, "grad_norm": 0.594500159882675, "learning_rate": 1.0874235785527135e-06, "loss": 0.2752, "step": 29903 }, { "epoch": 1.4008525788166954, "grad_norm": 0.5905261966489173, "learning_rate": 1.0872671079742263e-06, "loss": 0.2687, "step": 29904 }, { "epoch": 1.400899423806624, "grad_norm": 0.5916300238391805, "learning_rate": 1.0871106455256614e-06, "loss": 0.2665, "step": 29905 }, { "epoch": 1.4009462687965522, "grad_norm": 0.5401243734461024, "learning_rate": 1.0869541912079194e-06, "loss": 0.2565, "step": 29906 }, { "epoch": 1.4009931137864806, "grad_norm": 0.5560145319675829, "learning_rate": 1.0867977450219008e-06, "loss": 0.2645, "step": 29907 }, { "epoch": 1.4010399587764089, "grad_norm": 0.6462796003994082, "learning_rate": 1.0866413069685062e-06, "loss": 0.3027, "step": 29908 }, { "epoch": 1.4010868037663373, "grad_norm": 0.588188897072693, "learning_rate": 1.0864848770486356e-06, "loss": 0.2692, "step": 29909 }, { "epoch": 1.4011336487562656, "grad_norm": 0.6120065928207279, "learning_rate": 1.08632845526319e-06, "loss": 0.2726, "step": 29910 }, { "epoch": 1.4011804937461938, "grad_norm": 0.649251083797211, "learning_rate": 1.0861720416130689e-06, "loss": 0.287, "step": 29911 }, { "epoch": 1.4012273387361223, "grad_norm": 0.6041511618100736, "learning_rate": 1.0860156360991713e-06, "loss": 0.274, "step": 29912 }, { "epoch": 1.4012741837260505, "grad_norm": 0.6086374198857515, "learning_rate": 1.0858592387223984e-06, "loss": 0.27, "step": 29913 }, { "epoch": 1.4013210287159787, "grad_norm": 0.6182103763716541, "learning_rate": 1.0857028494836504e-06, "loss": 0.2772, "step": 29914 }, { "epoch": 1.4013678737059072, "grad_norm": 0.5657913364187437, "learning_rate": 1.0855464683838269e-06, "loss": 0.2614, "step": 29915 }, { "epoch": 1.4014147186958354, "grad_norm": 0.5702337109693462, "learning_rate": 1.0853900954238288e-06, "loss": 0.2817, "step": 29916 }, { "epoch": 1.4014615636857637, "grad_norm": 0.6233646270853656, "learning_rate": 1.0852337306045543e-06, "loss": 0.2842, "step": 29917 }, { "epoch": 1.4015084086756922, "grad_norm": 0.6605837027647271, "learning_rate": 1.085077373926905e-06, "loss": 0.2896, "step": 29918 }, { "epoch": 1.4015552536656204, "grad_norm": 0.5859207397703563, "learning_rate": 1.084921025391779e-06, "loss": 0.2726, "step": 29919 }, { "epoch": 1.4016020986555489, "grad_norm": 0.5731366770302703, "learning_rate": 1.0847646850000765e-06, "loss": 0.2789, "step": 29920 }, { "epoch": 1.401648943645477, "grad_norm": 0.5875976455875452, "learning_rate": 1.0846083527526976e-06, "loss": 0.2677, "step": 29921 }, { "epoch": 1.4016957886354056, "grad_norm": 0.6034556981628566, "learning_rate": 1.0844520286505431e-06, "loss": 0.2629, "step": 29922 }, { "epoch": 1.4017426336253338, "grad_norm": 0.6110889984099547, "learning_rate": 1.0842957126945103e-06, "loss": 0.2622, "step": 29923 }, { "epoch": 1.401789478615262, "grad_norm": 0.5864415672384111, "learning_rate": 1.0841394048854998e-06, "loss": 0.276, "step": 29924 }, { "epoch": 1.4018363236051905, "grad_norm": 0.6018983152096364, "learning_rate": 1.083983105224412e-06, "loss": 0.2894, "step": 29925 }, { "epoch": 1.4018831685951187, "grad_norm": 0.6133524869432797, "learning_rate": 1.0838268137121447e-06, "loss": 0.2771, "step": 29926 }, { "epoch": 1.401930013585047, "grad_norm": 0.5792518664581683, "learning_rate": 1.0836705303495982e-06, "loss": 0.2557, "step": 29927 }, { "epoch": 1.4019768585749754, "grad_norm": 0.6507156149995497, "learning_rate": 1.0835142551376725e-06, "loss": 0.2882, "step": 29928 }, { "epoch": 1.4020237035649037, "grad_norm": 0.5903881579475325, "learning_rate": 1.0833579880772656e-06, "loss": 0.2577, "step": 29929 }, { "epoch": 1.402070548554832, "grad_norm": 0.5825608149107931, "learning_rate": 1.0832017291692775e-06, "loss": 0.2642, "step": 29930 }, { "epoch": 1.4021173935447604, "grad_norm": 0.6264799772212782, "learning_rate": 1.0830454784146072e-06, "loss": 0.2854, "step": 29931 }, { "epoch": 1.4021642385346886, "grad_norm": 0.6230794080512595, "learning_rate": 1.0828892358141548e-06, "loss": 0.2985, "step": 29932 }, { "epoch": 1.402211083524617, "grad_norm": 0.6250500082576854, "learning_rate": 1.082733001368818e-06, "loss": 0.2725, "step": 29933 }, { "epoch": 1.4022579285145453, "grad_norm": 0.5959017468256301, "learning_rate": 1.0825767750794974e-06, "loss": 0.273, "step": 29934 }, { "epoch": 1.4023047735044738, "grad_norm": 0.5930417820053634, "learning_rate": 1.0824205569470902e-06, "loss": 0.2704, "step": 29935 }, { "epoch": 1.402351618494402, "grad_norm": 0.62074151761313, "learning_rate": 1.0822643469724966e-06, "loss": 0.2725, "step": 29936 }, { "epoch": 1.4023984634843303, "grad_norm": 0.6367060714975165, "learning_rate": 1.0821081451566151e-06, "loss": 0.277, "step": 29937 }, { "epoch": 1.4024453084742587, "grad_norm": 0.597980885790349, "learning_rate": 1.0819519515003452e-06, "loss": 0.2714, "step": 29938 }, { "epoch": 1.402492153464187, "grad_norm": 0.5645966358674985, "learning_rate": 1.081795766004586e-06, "loss": 0.267, "step": 29939 }, { "epoch": 1.4025389984541152, "grad_norm": 0.5835115427330893, "learning_rate": 1.081639588670235e-06, "loss": 0.2799, "step": 29940 }, { "epoch": 1.4025858434440437, "grad_norm": 0.5913781544579864, "learning_rate": 1.0814834194981924e-06, "loss": 0.2855, "step": 29941 }, { "epoch": 1.402632688433972, "grad_norm": 0.6449359696960251, "learning_rate": 1.0813272584893552e-06, "loss": 0.2869, "step": 29942 }, { "epoch": 1.4026795334239004, "grad_norm": 0.58591343620565, "learning_rate": 1.0811711056446234e-06, "loss": 0.2779, "step": 29943 }, { "epoch": 1.4027263784138286, "grad_norm": 0.6208303872547956, "learning_rate": 1.0810149609648948e-06, "loss": 0.2731, "step": 29944 }, { "epoch": 1.402773223403757, "grad_norm": 0.5351744720701407, "learning_rate": 1.0808588244510698e-06, "loss": 0.2531, "step": 29945 }, { "epoch": 1.4028200683936853, "grad_norm": 0.6088827320172274, "learning_rate": 1.0807026961040446e-06, "loss": 0.2639, "step": 29946 }, { "epoch": 1.4028669133836136, "grad_norm": 0.5781462980743889, "learning_rate": 1.0805465759247194e-06, "loss": 0.2692, "step": 29947 }, { "epoch": 1.402913758373542, "grad_norm": 0.5551122068880099, "learning_rate": 1.0803904639139912e-06, "loss": 0.2484, "step": 29948 }, { "epoch": 1.4029606033634703, "grad_norm": 0.6000660705875361, "learning_rate": 1.0802343600727588e-06, "loss": 0.2663, "step": 29949 }, { "epoch": 1.4030074483533985, "grad_norm": 0.6152396017427352, "learning_rate": 1.080078264401921e-06, "loss": 0.274, "step": 29950 }, { "epoch": 1.403054293343327, "grad_norm": 0.6066585275434072, "learning_rate": 1.0799221769023758e-06, "loss": 0.2756, "step": 29951 }, { "epoch": 1.4031011383332552, "grad_norm": 0.6232335524698238, "learning_rate": 1.0797660975750227e-06, "loss": 0.2596, "step": 29952 }, { "epoch": 1.4031479833231835, "grad_norm": 0.6464136464414902, "learning_rate": 1.0796100264207585e-06, "loss": 0.2661, "step": 29953 }, { "epoch": 1.403194828313112, "grad_norm": 0.5966463822758288, "learning_rate": 1.0794539634404808e-06, "loss": 0.2755, "step": 29954 }, { "epoch": 1.4032416733030402, "grad_norm": 0.543640076750254, "learning_rate": 1.0792979086350882e-06, "loss": 0.253, "step": 29955 }, { "epoch": 1.4032885182929686, "grad_norm": 0.6249383681266798, "learning_rate": 1.0791418620054794e-06, "loss": 0.2803, "step": 29956 }, { "epoch": 1.4033353632828969, "grad_norm": 0.6016702476448996, "learning_rate": 1.0789858235525519e-06, "loss": 0.2805, "step": 29957 }, { "epoch": 1.4033822082728253, "grad_norm": 0.6101315758914255, "learning_rate": 1.0788297932772042e-06, "loss": 0.281, "step": 29958 }, { "epoch": 1.4034290532627536, "grad_norm": 0.5936482904573351, "learning_rate": 1.0786737711803341e-06, "loss": 0.2776, "step": 29959 }, { "epoch": 1.4034758982526818, "grad_norm": 0.6270980578428192, "learning_rate": 1.0785177572628397e-06, "loss": 0.2792, "step": 29960 }, { "epoch": 1.4035227432426103, "grad_norm": 0.5560879611439304, "learning_rate": 1.0783617515256174e-06, "loss": 0.261, "step": 29961 }, { "epoch": 1.4035695882325385, "grad_norm": 0.6541715746591172, "learning_rate": 1.0782057539695657e-06, "loss": 0.2939, "step": 29962 }, { "epoch": 1.4036164332224668, "grad_norm": 0.5980532905244295, "learning_rate": 1.0780497645955825e-06, "loss": 0.2609, "step": 29963 }, { "epoch": 1.4036632782123952, "grad_norm": 0.5976832728994952, "learning_rate": 1.077893783404566e-06, "loss": 0.2583, "step": 29964 }, { "epoch": 1.4037101232023235, "grad_norm": 0.5727187941555175, "learning_rate": 1.0777378103974135e-06, "loss": 0.2679, "step": 29965 }, { "epoch": 1.4037569681922517, "grad_norm": 0.6151961091601922, "learning_rate": 1.077581845575022e-06, "loss": 0.2747, "step": 29966 }, { "epoch": 1.4038038131821802, "grad_norm": 0.5422101850255969, "learning_rate": 1.07742588893829e-06, "loss": 0.2552, "step": 29967 }, { "epoch": 1.4038506581721084, "grad_norm": 0.5947493531417543, "learning_rate": 1.077269940488114e-06, "loss": 0.261, "step": 29968 }, { "epoch": 1.4038975031620369, "grad_norm": 0.5737567112631664, "learning_rate": 1.0771140002253919e-06, "loss": 0.2698, "step": 29969 }, { "epoch": 1.4039443481519651, "grad_norm": 0.6114601564777744, "learning_rate": 1.0769580681510208e-06, "loss": 0.2692, "step": 29970 }, { "epoch": 1.4039911931418936, "grad_norm": 0.5321610839529107, "learning_rate": 1.0768021442658993e-06, "loss": 0.2619, "step": 29971 }, { "epoch": 1.4040380381318218, "grad_norm": 0.5668867139232086, "learning_rate": 1.076646228570923e-06, "loss": 0.2713, "step": 29972 }, { "epoch": 1.40408488312175, "grad_norm": 0.5825925814082319, "learning_rate": 1.07649032106699e-06, "loss": 0.2841, "step": 29973 }, { "epoch": 1.4041317281116785, "grad_norm": 0.5920117695921628, "learning_rate": 1.0763344217549982e-06, "loss": 0.2696, "step": 29974 }, { "epoch": 1.4041785731016068, "grad_norm": 0.6141750363248867, "learning_rate": 1.0761785306358427e-06, "loss": 0.2713, "step": 29975 }, { "epoch": 1.404225418091535, "grad_norm": 0.550515545515791, "learning_rate": 1.0760226477104225e-06, "loss": 0.2695, "step": 29976 }, { "epoch": 1.4042722630814635, "grad_norm": 0.6456661269253651, "learning_rate": 1.0758667729796346e-06, "loss": 0.2876, "step": 29977 }, { "epoch": 1.4043191080713917, "grad_norm": 0.6141766262633035, "learning_rate": 1.0757109064443745e-06, "loss": 0.2769, "step": 29978 }, { "epoch": 1.4043659530613202, "grad_norm": 0.5627320469112088, "learning_rate": 1.0755550481055404e-06, "loss": 0.2532, "step": 29979 }, { "epoch": 1.4044127980512484, "grad_norm": 0.5953902324260564, "learning_rate": 1.0753991979640288e-06, "loss": 0.2709, "step": 29980 }, { "epoch": 1.4044596430411769, "grad_norm": 0.6351556815835797, "learning_rate": 1.075243356020738e-06, "loss": 0.283, "step": 29981 }, { "epoch": 1.4045064880311051, "grad_norm": 0.6073325883882832, "learning_rate": 1.0750875222765621e-06, "loss": 0.2819, "step": 29982 }, { "epoch": 1.4045533330210334, "grad_norm": 0.6073302198557182, "learning_rate": 1.0749316967324009e-06, "loss": 0.2581, "step": 29983 }, { "epoch": 1.4046001780109618, "grad_norm": 0.6024853365046254, "learning_rate": 1.0747758793891482e-06, "loss": 0.2727, "step": 29984 }, { "epoch": 1.40464702300089, "grad_norm": 0.6155817555417641, "learning_rate": 1.0746200702477025e-06, "loss": 0.2836, "step": 29985 }, { "epoch": 1.4046938679908183, "grad_norm": 0.5881767002413939, "learning_rate": 1.0744642693089598e-06, "loss": 0.2679, "step": 29986 }, { "epoch": 1.4047407129807468, "grad_norm": 0.5888745906327061, "learning_rate": 1.074308476573817e-06, "loss": 0.2726, "step": 29987 }, { "epoch": 1.404787557970675, "grad_norm": 0.580401391293271, "learning_rate": 1.0741526920431713e-06, "loss": 0.2517, "step": 29988 }, { "epoch": 1.4048344029606032, "grad_norm": 0.624636506099638, "learning_rate": 1.073996915717919e-06, "loss": 0.2736, "step": 29989 }, { "epoch": 1.4048812479505317, "grad_norm": 0.5918739374664753, "learning_rate": 1.0738411475989546e-06, "loss": 0.2644, "step": 29990 }, { "epoch": 1.40492809294046, "grad_norm": 0.5711546998095254, "learning_rate": 1.073685387687176e-06, "loss": 0.2614, "step": 29991 }, { "epoch": 1.4049749379303884, "grad_norm": 0.5616338132607158, "learning_rate": 1.0735296359834798e-06, "loss": 0.2632, "step": 29992 }, { "epoch": 1.4050217829203167, "grad_norm": 0.5916401031425889, "learning_rate": 1.0733738924887619e-06, "loss": 0.2872, "step": 29993 }, { "epoch": 1.4050686279102451, "grad_norm": 0.5978066825142454, "learning_rate": 1.0732181572039194e-06, "loss": 0.2734, "step": 29994 }, { "epoch": 1.4051154729001734, "grad_norm": 0.5827010359467174, "learning_rate": 1.073062430129847e-06, "loss": 0.2627, "step": 29995 }, { "epoch": 1.4051623178901016, "grad_norm": 0.5473491777004675, "learning_rate": 1.0729067112674426e-06, "loss": 0.2519, "step": 29996 }, { "epoch": 1.40520916288003, "grad_norm": 0.5915777428531827, "learning_rate": 1.0727510006176006e-06, "loss": 0.2803, "step": 29997 }, { "epoch": 1.4052560078699583, "grad_norm": 0.624241028492728, "learning_rate": 1.0725952981812176e-06, "loss": 0.2708, "step": 29998 }, { "epoch": 1.4053028528598865, "grad_norm": 0.5627528243096219, "learning_rate": 1.0724396039591904e-06, "loss": 0.2514, "step": 29999 }, { "epoch": 1.405349697849815, "grad_norm": 0.5967285523323611, "learning_rate": 1.0722839179524139e-06, "loss": 0.2673, "step": 30000 }, { "epoch": 1.4053965428397432, "grad_norm": 0.5822219540073114, "learning_rate": 1.0721282401617859e-06, "loss": 0.273, "step": 30001 }, { "epoch": 1.4054433878296715, "grad_norm": 0.5925871070728955, "learning_rate": 1.0719725705882005e-06, "loss": 0.2649, "step": 30002 }, { "epoch": 1.4054902328196, "grad_norm": 0.5668272088472605, "learning_rate": 1.0718169092325536e-06, "loss": 0.2526, "step": 30003 }, { "epoch": 1.4055370778095282, "grad_norm": 0.5447562634550698, "learning_rate": 1.0716612560957413e-06, "loss": 0.2752, "step": 30004 }, { "epoch": 1.4055839227994567, "grad_norm": 0.6473485486148474, "learning_rate": 1.0715056111786595e-06, "loss": 0.2731, "step": 30005 }, { "epoch": 1.405630767789385, "grad_norm": 0.6103001395921558, "learning_rate": 1.0713499744822036e-06, "loss": 0.2667, "step": 30006 }, { "epoch": 1.4056776127793134, "grad_norm": 0.5888350063386535, "learning_rate": 1.07119434600727e-06, "loss": 0.271, "step": 30007 }, { "epoch": 1.4057244577692416, "grad_norm": 0.6100607324730436, "learning_rate": 1.0710387257547544e-06, "loss": 0.2677, "step": 30008 }, { "epoch": 1.4057713027591698, "grad_norm": 0.5880903489407249, "learning_rate": 1.0708831137255517e-06, "loss": 0.2768, "step": 30009 }, { "epoch": 1.4058181477490983, "grad_norm": 0.637313694470499, "learning_rate": 1.0707275099205568e-06, "loss": 0.2745, "step": 30010 }, { "epoch": 1.4058649927390265, "grad_norm": 0.5954929857056818, "learning_rate": 1.070571914340666e-06, "loss": 0.2862, "step": 30011 }, { "epoch": 1.4059118377289548, "grad_norm": 0.610600392625391, "learning_rate": 1.0704163269867742e-06, "loss": 0.2899, "step": 30012 }, { "epoch": 1.4059586827188832, "grad_norm": 0.5733735266463345, "learning_rate": 1.0702607478597774e-06, "loss": 0.2708, "step": 30013 }, { "epoch": 1.4060055277088115, "grad_norm": 0.5935385118795604, "learning_rate": 1.0701051769605714e-06, "loss": 0.2723, "step": 30014 }, { "epoch": 1.40605237269874, "grad_norm": 0.5840884255906752, "learning_rate": 1.0699496142900498e-06, "loss": 0.2722, "step": 30015 }, { "epoch": 1.4060992176886682, "grad_norm": 0.5993051657738511, "learning_rate": 1.0697940598491097e-06, "loss": 0.2792, "step": 30016 }, { "epoch": 1.4061460626785967, "grad_norm": 0.5589631970027568, "learning_rate": 1.0696385136386441e-06, "loss": 0.2564, "step": 30017 }, { "epoch": 1.406192907668525, "grad_norm": 0.5992572494494035, "learning_rate": 1.0694829756595499e-06, "loss": 0.2705, "step": 30018 }, { "epoch": 1.4062397526584531, "grad_norm": 0.6241625309619188, "learning_rate": 1.0693274459127212e-06, "loss": 0.2885, "step": 30019 }, { "epoch": 1.4062865976483816, "grad_norm": 0.5747863304127396, "learning_rate": 1.0691719243990547e-06, "loss": 0.2755, "step": 30020 }, { "epoch": 1.4063334426383098, "grad_norm": 0.5963963928107553, "learning_rate": 1.0690164111194428e-06, "loss": 0.2756, "step": 30021 }, { "epoch": 1.406380287628238, "grad_norm": 0.6123361512920805, "learning_rate": 1.0688609060747823e-06, "loss": 0.2831, "step": 30022 }, { "epoch": 1.4064271326181665, "grad_norm": 0.6162472284966346, "learning_rate": 1.0687054092659682e-06, "loss": 0.2731, "step": 30023 }, { "epoch": 1.4064739776080948, "grad_norm": 0.619911677974475, "learning_rate": 1.068549920693894e-06, "loss": 0.2716, "step": 30024 }, { "epoch": 1.406520822598023, "grad_norm": 0.5540756805422408, "learning_rate": 1.0683944403594551e-06, "loss": 0.264, "step": 30025 }, { "epoch": 1.4065676675879515, "grad_norm": 0.553771648077955, "learning_rate": 1.0682389682635472e-06, "loss": 0.259, "step": 30026 }, { "epoch": 1.4066145125778797, "grad_norm": 0.6358600695596476, "learning_rate": 1.0680835044070634e-06, "loss": 0.2718, "step": 30027 }, { "epoch": 1.4066613575678082, "grad_norm": 0.6243873547726275, "learning_rate": 1.067928048790899e-06, "loss": 0.292, "step": 30028 }, { "epoch": 1.4067082025577364, "grad_norm": 0.5988495630119735, "learning_rate": 1.0677726014159487e-06, "loss": 0.2788, "step": 30029 }, { "epoch": 1.406755047547665, "grad_norm": 0.557021029928546, "learning_rate": 1.0676171622831082e-06, "loss": 0.2625, "step": 30030 }, { "epoch": 1.4068018925375931, "grad_norm": 0.6105993620347855, "learning_rate": 1.0674617313932701e-06, "loss": 0.2876, "step": 30031 }, { "epoch": 1.4068487375275214, "grad_norm": 0.549283147151783, "learning_rate": 1.0673063087473303e-06, "loss": 0.2593, "step": 30032 }, { "epoch": 1.4068955825174498, "grad_norm": 0.6138664092980475, "learning_rate": 1.067150894346182e-06, "loss": 0.2733, "step": 30033 }, { "epoch": 1.406942427507378, "grad_norm": 0.5913274474346637, "learning_rate": 1.06699548819072e-06, "loss": 0.2769, "step": 30034 }, { "epoch": 1.4069892724973063, "grad_norm": 0.6079669117259936, "learning_rate": 1.0668400902818384e-06, "loss": 0.2833, "step": 30035 }, { "epoch": 1.4070361174872348, "grad_norm": 0.5500008379660325, "learning_rate": 1.0666847006204333e-06, "loss": 0.2643, "step": 30036 }, { "epoch": 1.407082962477163, "grad_norm": 0.6638489207809204, "learning_rate": 1.0665293192073965e-06, "loss": 0.2759, "step": 30037 }, { "epoch": 1.4071298074670913, "grad_norm": 0.5886596982996823, "learning_rate": 1.066373946043624e-06, "loss": 0.281, "step": 30038 }, { "epoch": 1.4071766524570197, "grad_norm": 0.5873663271762336, "learning_rate": 1.0662185811300083e-06, "loss": 0.2827, "step": 30039 }, { "epoch": 1.407223497446948, "grad_norm": 0.5957341808448466, "learning_rate": 1.0660632244674443e-06, "loss": 0.2671, "step": 30040 }, { "epoch": 1.4072703424368764, "grad_norm": 0.6340653947375259, "learning_rate": 1.065907876056826e-06, "loss": 0.2872, "step": 30041 }, { "epoch": 1.4073171874268047, "grad_norm": 0.528189799950723, "learning_rate": 1.0657525358990472e-06, "loss": 0.2429, "step": 30042 }, { "epoch": 1.4073640324167331, "grad_norm": 0.5672928857214016, "learning_rate": 1.0655972039950032e-06, "loss": 0.2641, "step": 30043 }, { "epoch": 1.4074108774066614, "grad_norm": 0.5916691041285194, "learning_rate": 1.0654418803455868e-06, "loss": 0.2713, "step": 30044 }, { "epoch": 1.4074577223965896, "grad_norm": 0.5904755313757171, "learning_rate": 1.0652865649516908e-06, "loss": 0.2951, "step": 30045 }, { "epoch": 1.407504567386518, "grad_norm": 0.5693996275345385, "learning_rate": 1.06513125781421e-06, "loss": 0.2682, "step": 30046 }, { "epoch": 1.4075514123764463, "grad_norm": 0.6462218292253016, "learning_rate": 1.0649759589340383e-06, "loss": 0.2916, "step": 30047 }, { "epoch": 1.4075982573663746, "grad_norm": 0.5819248651606064, "learning_rate": 1.0648206683120693e-06, "loss": 0.2651, "step": 30048 }, { "epoch": 1.407645102356303, "grad_norm": 0.5770766754854596, "learning_rate": 1.0646653859491964e-06, "loss": 0.2695, "step": 30049 }, { "epoch": 1.4076919473462313, "grad_norm": 0.6657485565056727, "learning_rate": 1.0645101118463147e-06, "loss": 0.3044, "step": 30050 }, { "epoch": 1.4077387923361597, "grad_norm": 0.6403917283144294, "learning_rate": 1.0643548460043163e-06, "loss": 0.2775, "step": 30051 }, { "epoch": 1.407785637326088, "grad_norm": 0.6149721366317309, "learning_rate": 1.064199588424094e-06, "loss": 0.274, "step": 30052 }, { "epoch": 1.4078324823160164, "grad_norm": 0.5619191341273496, "learning_rate": 1.0640443391065422e-06, "loss": 0.2603, "step": 30053 }, { "epoch": 1.4078793273059447, "grad_norm": 0.5862428335134929, "learning_rate": 1.0638890980525545e-06, "loss": 0.2741, "step": 30054 }, { "epoch": 1.407926172295873, "grad_norm": 0.6243825498396769, "learning_rate": 1.0637338652630238e-06, "loss": 0.2887, "step": 30055 }, { "epoch": 1.4079730172858014, "grad_norm": 0.6171398784984391, "learning_rate": 1.063578640738845e-06, "loss": 0.2779, "step": 30056 }, { "epoch": 1.4080198622757296, "grad_norm": 0.630462650589446, "learning_rate": 1.0634234244809088e-06, "loss": 0.2918, "step": 30057 }, { "epoch": 1.4080667072656579, "grad_norm": 0.6546520052024137, "learning_rate": 1.0632682164901106e-06, "loss": 0.2898, "step": 30058 }, { "epoch": 1.4081135522555863, "grad_norm": 0.6647443156503268, "learning_rate": 1.063113016767342e-06, "loss": 0.259, "step": 30059 }, { "epoch": 1.4081603972455146, "grad_norm": 0.6007399048170898, "learning_rate": 1.062957825313497e-06, "loss": 0.2854, "step": 30060 }, { "epoch": 1.4082072422354428, "grad_norm": 0.5793733616804729, "learning_rate": 1.0628026421294682e-06, "loss": 0.2788, "step": 30061 }, { "epoch": 1.4082540872253713, "grad_norm": 0.6180907265499334, "learning_rate": 1.0626474672161502e-06, "loss": 0.2834, "step": 30062 }, { "epoch": 1.4083009322152995, "grad_norm": 0.5748998739588335, "learning_rate": 1.0624923005744336e-06, "loss": 0.2737, "step": 30063 }, { "epoch": 1.408347777205228, "grad_norm": 0.6146265663491051, "learning_rate": 1.0623371422052126e-06, "loss": 0.279, "step": 30064 }, { "epoch": 1.4083946221951562, "grad_norm": 0.5680719351607346, "learning_rate": 1.0621819921093806e-06, "loss": 0.2629, "step": 30065 }, { "epoch": 1.4084414671850847, "grad_norm": 0.5670171452649029, "learning_rate": 1.0620268502878296e-06, "loss": 0.2731, "step": 30066 }, { "epoch": 1.408488312175013, "grad_norm": 0.5723188013685058, "learning_rate": 1.0618717167414522e-06, "loss": 0.2525, "step": 30067 }, { "epoch": 1.4085351571649412, "grad_norm": 0.6133089743475294, "learning_rate": 1.0617165914711425e-06, "loss": 0.2821, "step": 30068 }, { "epoch": 1.4085820021548696, "grad_norm": 0.6246215869635647, "learning_rate": 1.0615614744777915e-06, "loss": 0.2696, "step": 30069 }, { "epoch": 1.4086288471447979, "grad_norm": 0.631991513162819, "learning_rate": 1.0614063657622928e-06, "loss": 0.2936, "step": 30070 }, { "epoch": 1.408675692134726, "grad_norm": 0.5778995693946701, "learning_rate": 1.0612512653255385e-06, "loss": 0.2751, "step": 30071 }, { "epoch": 1.4087225371246546, "grad_norm": 0.5341636483867704, "learning_rate": 1.061096173168423e-06, "loss": 0.2587, "step": 30072 }, { "epoch": 1.4087693821145828, "grad_norm": 0.5908878762730109, "learning_rate": 1.0609410892918363e-06, "loss": 0.2616, "step": 30073 }, { "epoch": 1.408816227104511, "grad_norm": 0.5830856571590102, "learning_rate": 1.0607860136966715e-06, "loss": 0.2656, "step": 30074 }, { "epoch": 1.4088630720944395, "grad_norm": 0.6566190989626799, "learning_rate": 1.060630946383823e-06, "loss": 0.2828, "step": 30075 }, { "epoch": 1.4089099170843677, "grad_norm": 0.5518382737200197, "learning_rate": 1.0604758873541804e-06, "loss": 0.2568, "step": 30076 }, { "epoch": 1.4089567620742962, "grad_norm": 0.582032423912807, "learning_rate": 1.060320836608637e-06, "loss": 0.2589, "step": 30077 }, { "epoch": 1.4090036070642245, "grad_norm": 0.6080764618174204, "learning_rate": 1.0601657941480858e-06, "loss": 0.2696, "step": 30078 }, { "epoch": 1.409050452054153, "grad_norm": 0.5999895143629311, "learning_rate": 1.0600107599734193e-06, "loss": 0.2704, "step": 30079 }, { "epoch": 1.4090972970440812, "grad_norm": 0.5815601722351416, "learning_rate": 1.059855734085528e-06, "loss": 0.2666, "step": 30080 }, { "epoch": 1.4091441420340094, "grad_norm": 0.5872068255189109, "learning_rate": 1.0597007164853059e-06, "loss": 0.2627, "step": 30081 }, { "epoch": 1.4091909870239379, "grad_norm": 0.6213052224921579, "learning_rate": 1.0595457071736433e-06, "loss": 0.2753, "step": 30082 }, { "epoch": 1.409237832013866, "grad_norm": 0.5582560918907581, "learning_rate": 1.0593907061514331e-06, "loss": 0.2632, "step": 30083 }, { "epoch": 1.4092846770037943, "grad_norm": 0.5980222223850067, "learning_rate": 1.0592357134195674e-06, "loss": 0.2692, "step": 30084 }, { "epoch": 1.4093315219937228, "grad_norm": 0.5485199883493475, "learning_rate": 1.0590807289789388e-06, "loss": 0.2586, "step": 30085 }, { "epoch": 1.409378366983651, "grad_norm": 0.5644992848903598, "learning_rate": 1.0589257528304376e-06, "loss": 0.2564, "step": 30086 }, { "epoch": 1.4094252119735795, "grad_norm": 0.5966774972690563, "learning_rate": 1.0587707849749574e-06, "loss": 0.283, "step": 30087 }, { "epoch": 1.4094720569635077, "grad_norm": 0.5654625016774989, "learning_rate": 1.0586158254133883e-06, "loss": 0.2553, "step": 30088 }, { "epoch": 1.4095189019534362, "grad_norm": 0.603667047525032, "learning_rate": 1.0584608741466225e-06, "loss": 0.2918, "step": 30089 }, { "epoch": 1.4095657469433645, "grad_norm": 0.6195811994440614, "learning_rate": 1.0583059311755521e-06, "loss": 0.2791, "step": 30090 }, { "epoch": 1.4096125919332927, "grad_norm": 0.6147781251325652, "learning_rate": 1.0581509965010691e-06, "loss": 0.2817, "step": 30091 }, { "epoch": 1.4096594369232212, "grad_norm": 0.59063710797419, "learning_rate": 1.0579960701240656e-06, "loss": 0.2727, "step": 30092 }, { "epoch": 1.4097062819131494, "grad_norm": 0.5876761698082927, "learning_rate": 1.0578411520454317e-06, "loss": 0.2571, "step": 30093 }, { "epoch": 1.4097531269030776, "grad_norm": 0.6574804412651166, "learning_rate": 1.057686242266059e-06, "loss": 0.2991, "step": 30094 }, { "epoch": 1.409799971893006, "grad_norm": 0.595615524299263, "learning_rate": 1.0575313407868396e-06, "loss": 0.2696, "step": 30095 }, { "epoch": 1.4098468168829343, "grad_norm": 0.5953071354205736, "learning_rate": 1.0573764476086648e-06, "loss": 0.2716, "step": 30096 }, { "epoch": 1.4098936618728626, "grad_norm": 0.6508786587439566, "learning_rate": 1.0572215627324257e-06, "loss": 0.2797, "step": 30097 }, { "epoch": 1.409940506862791, "grad_norm": 0.5898710212421121, "learning_rate": 1.057066686159014e-06, "loss": 0.2637, "step": 30098 }, { "epoch": 1.4099873518527193, "grad_norm": 0.5358099118667842, "learning_rate": 1.0569118178893217e-06, "loss": 0.2639, "step": 30099 }, { "epoch": 1.4100341968426477, "grad_norm": 0.616424127945936, "learning_rate": 1.0567569579242392e-06, "loss": 0.2778, "step": 30100 }, { "epoch": 1.410081041832576, "grad_norm": 0.6323377486971388, "learning_rate": 1.0566021062646567e-06, "loss": 0.2797, "step": 30101 }, { "epoch": 1.4101278868225045, "grad_norm": 0.6252348783395006, "learning_rate": 1.0564472629114664e-06, "loss": 0.2752, "step": 30102 }, { "epoch": 1.4101747318124327, "grad_norm": 0.5936188216031245, "learning_rate": 1.0562924278655592e-06, "loss": 0.2692, "step": 30103 }, { "epoch": 1.410221576802361, "grad_norm": 0.5900370624856294, "learning_rate": 1.0561376011278262e-06, "loss": 0.2674, "step": 30104 }, { "epoch": 1.4102684217922894, "grad_norm": 0.6084806311613954, "learning_rate": 1.0559827826991596e-06, "loss": 0.2669, "step": 30105 }, { "epoch": 1.4103152667822176, "grad_norm": 0.6503470238211941, "learning_rate": 1.0558279725804477e-06, "loss": 0.2882, "step": 30106 }, { "epoch": 1.4103621117721459, "grad_norm": 0.6389322881920979, "learning_rate": 1.0556731707725842e-06, "loss": 0.2819, "step": 30107 }, { "epoch": 1.4104089567620743, "grad_norm": 0.5996924662500086, "learning_rate": 1.0555183772764573e-06, "loss": 0.2715, "step": 30108 }, { "epoch": 1.4104558017520026, "grad_norm": 0.6183625190383548, "learning_rate": 1.0553635920929595e-06, "loss": 0.2748, "step": 30109 }, { "epoch": 1.4105026467419308, "grad_norm": 0.6256256407145827, "learning_rate": 1.0552088152229808e-06, "loss": 0.2621, "step": 30110 }, { "epoch": 1.4105494917318593, "grad_norm": 0.5854770582478587, "learning_rate": 1.0550540466674133e-06, "loss": 0.2662, "step": 30111 }, { "epoch": 1.4105963367217875, "grad_norm": 0.6199525979389416, "learning_rate": 1.0548992864271456e-06, "loss": 0.2818, "step": 30112 }, { "epoch": 1.410643181711716, "grad_norm": 0.589144778771844, "learning_rate": 1.0547445345030694e-06, "loss": 0.2627, "step": 30113 }, { "epoch": 1.4106900267016442, "grad_norm": 0.5860524868317734, "learning_rate": 1.0545897908960762e-06, "loss": 0.262, "step": 30114 }, { "epoch": 1.4107368716915727, "grad_norm": 0.6008731603229612, "learning_rate": 1.0544350556070543e-06, "loss": 0.3006, "step": 30115 }, { "epoch": 1.410783716681501, "grad_norm": 0.631360914622149, "learning_rate": 1.0542803286368953e-06, "loss": 0.2832, "step": 30116 }, { "epoch": 1.4108305616714292, "grad_norm": 0.5830880722711037, "learning_rate": 1.0541256099864908e-06, "loss": 0.2792, "step": 30117 }, { "epoch": 1.4108774066613576, "grad_norm": 0.641804556799447, "learning_rate": 1.0539708996567288e-06, "loss": 0.2661, "step": 30118 }, { "epoch": 1.4109242516512859, "grad_norm": 0.5819557626745984, "learning_rate": 1.0538161976485009e-06, "loss": 0.2755, "step": 30119 }, { "epoch": 1.4109710966412141, "grad_norm": 0.6522614475916768, "learning_rate": 1.0536615039626974e-06, "loss": 0.2851, "step": 30120 }, { "epoch": 1.4110179416311426, "grad_norm": 0.6114703541523916, "learning_rate": 1.0535068186002094e-06, "loss": 0.2805, "step": 30121 }, { "epoch": 1.4110647866210708, "grad_norm": 0.6251500140706181, "learning_rate": 1.053352141561925e-06, "loss": 0.2835, "step": 30122 }, { "epoch": 1.4111116316109993, "grad_norm": 0.5922445690505405, "learning_rate": 1.0531974728487366e-06, "loss": 0.2802, "step": 30123 }, { "epoch": 1.4111584766009275, "grad_norm": 0.6083142495801319, "learning_rate": 1.053042812461532e-06, "loss": 0.3013, "step": 30124 }, { "epoch": 1.411205321590856, "grad_norm": 0.6039381763968262, "learning_rate": 1.0528881604012022e-06, "loss": 0.2811, "step": 30125 }, { "epoch": 1.4112521665807842, "grad_norm": 0.6105383675955273, "learning_rate": 1.0527335166686376e-06, "loss": 0.2866, "step": 30126 }, { "epoch": 1.4112990115707125, "grad_norm": 0.682870929761392, "learning_rate": 1.0525788812647275e-06, "loss": 0.2857, "step": 30127 }, { "epoch": 1.411345856560641, "grad_norm": 0.6898453148438813, "learning_rate": 1.0524242541903634e-06, "loss": 0.2858, "step": 30128 }, { "epoch": 1.4113927015505692, "grad_norm": 0.5906438924011397, "learning_rate": 1.0522696354464337e-06, "loss": 0.2655, "step": 30129 }, { "epoch": 1.4114395465404974, "grad_norm": 0.5716916055513914, "learning_rate": 1.0521150250338274e-06, "loss": 0.2753, "step": 30130 }, { "epoch": 1.4114863915304259, "grad_norm": 0.5881842306898436, "learning_rate": 1.0519604229534356e-06, "loss": 0.2635, "step": 30131 }, { "epoch": 1.4115332365203541, "grad_norm": 0.5781165573786403, "learning_rate": 1.0518058292061472e-06, "loss": 0.2795, "step": 30132 }, { "epoch": 1.4115800815102824, "grad_norm": 0.6189841358217547, "learning_rate": 1.0516512437928526e-06, "loss": 0.264, "step": 30133 }, { "epoch": 1.4116269265002108, "grad_norm": 0.6038406359160307, "learning_rate": 1.0514966667144416e-06, "loss": 0.2728, "step": 30134 }, { "epoch": 1.411673771490139, "grad_norm": 0.5949911922701944, "learning_rate": 1.0513420979718025e-06, "loss": 0.2708, "step": 30135 }, { "epoch": 1.4117206164800675, "grad_norm": 0.5609798486982536, "learning_rate": 1.0511875375658265e-06, "loss": 0.2659, "step": 30136 }, { "epoch": 1.4117674614699958, "grad_norm": 0.6342890189980229, "learning_rate": 1.0510329854974011e-06, "loss": 0.2808, "step": 30137 }, { "epoch": 1.4118143064599242, "grad_norm": 0.5479590080284971, "learning_rate": 1.0508784417674165e-06, "loss": 0.246, "step": 30138 }, { "epoch": 1.4118611514498525, "grad_norm": 0.617879107384979, "learning_rate": 1.0507239063767624e-06, "loss": 0.271, "step": 30139 }, { "epoch": 1.4119079964397807, "grad_norm": 0.5691767590812689, "learning_rate": 1.050569379326328e-06, "loss": 0.2636, "step": 30140 }, { "epoch": 1.4119548414297092, "grad_norm": 0.6115138240775925, "learning_rate": 1.0504148606170034e-06, "loss": 0.2786, "step": 30141 }, { "epoch": 1.4120016864196374, "grad_norm": 0.6192551243707708, "learning_rate": 1.050260350249677e-06, "loss": 0.2693, "step": 30142 }, { "epoch": 1.4120485314095657, "grad_norm": 0.6102648303449318, "learning_rate": 1.0501058482252368e-06, "loss": 0.2782, "step": 30143 }, { "epoch": 1.4120953763994941, "grad_norm": 0.6005396206097885, "learning_rate": 1.0499513545445734e-06, "loss": 0.2818, "step": 30144 }, { "epoch": 1.4121422213894224, "grad_norm": 0.6212387278692346, "learning_rate": 1.0497968692085754e-06, "loss": 0.2567, "step": 30145 }, { "epoch": 1.4121890663793506, "grad_norm": 0.5698978783567733, "learning_rate": 1.0496423922181314e-06, "loss": 0.2589, "step": 30146 }, { "epoch": 1.412235911369279, "grad_norm": 0.6079572842363898, "learning_rate": 1.0494879235741314e-06, "loss": 0.28, "step": 30147 }, { "epoch": 1.4122827563592073, "grad_norm": 0.6004373830975804, "learning_rate": 1.0493334632774647e-06, "loss": 0.2752, "step": 30148 }, { "epoch": 1.4123296013491358, "grad_norm": 0.6459156533342679, "learning_rate": 1.0491790113290192e-06, "loss": 0.2705, "step": 30149 }, { "epoch": 1.412376446339064, "grad_norm": 0.5816384429951913, "learning_rate": 1.0490245677296828e-06, "loss": 0.2747, "step": 30150 }, { "epoch": 1.4124232913289925, "grad_norm": 0.582247485195872, "learning_rate": 1.0488701324803457e-06, "loss": 0.2716, "step": 30151 }, { "epoch": 1.4124701363189207, "grad_norm": 0.5887099500818394, "learning_rate": 1.048715705581896e-06, "loss": 0.2672, "step": 30152 }, { "epoch": 1.412516981308849, "grad_norm": 0.5903502703339728, "learning_rate": 1.0485612870352227e-06, "loss": 0.2708, "step": 30153 }, { "epoch": 1.4125638262987774, "grad_norm": 0.6588883069523567, "learning_rate": 1.0484068768412153e-06, "loss": 0.3009, "step": 30154 }, { "epoch": 1.4126106712887057, "grad_norm": 0.6081456876851156, "learning_rate": 1.0482524750007605e-06, "loss": 0.2841, "step": 30155 }, { "epoch": 1.412657516278634, "grad_norm": 0.6223488927536923, "learning_rate": 1.0480980815147488e-06, "loss": 0.2709, "step": 30156 }, { "epoch": 1.4127043612685624, "grad_norm": 0.5948628744614716, "learning_rate": 1.047943696384067e-06, "loss": 0.2666, "step": 30157 }, { "epoch": 1.4127512062584906, "grad_norm": 0.5814686063305571, "learning_rate": 1.047789319609604e-06, "loss": 0.2671, "step": 30158 }, { "epoch": 1.412798051248419, "grad_norm": 0.6226496489744497, "learning_rate": 1.0476349511922485e-06, "loss": 0.2837, "step": 30159 }, { "epoch": 1.4128448962383473, "grad_norm": 0.5996584632778299, "learning_rate": 1.0474805911328897e-06, "loss": 0.2875, "step": 30160 }, { "epoch": 1.4128917412282758, "grad_norm": 0.5832249533970911, "learning_rate": 1.047326239432414e-06, "loss": 0.2668, "step": 30161 }, { "epoch": 1.412938586218204, "grad_norm": 0.5644928066153111, "learning_rate": 1.047171896091711e-06, "loss": 0.2697, "step": 30162 }, { "epoch": 1.4129854312081322, "grad_norm": 0.6474991305430968, "learning_rate": 1.0470175611116692e-06, "loss": 0.3, "step": 30163 }, { "epoch": 1.4130322761980607, "grad_norm": 0.6081284992200994, "learning_rate": 1.0468632344931755e-06, "loss": 0.2916, "step": 30164 }, { "epoch": 1.413079121187989, "grad_norm": 0.5617463304589992, "learning_rate": 1.0467089162371186e-06, "loss": 0.2691, "step": 30165 }, { "epoch": 1.4131259661779172, "grad_norm": 0.6086314864104192, "learning_rate": 1.0465546063443874e-06, "loss": 0.2876, "step": 30166 }, { "epoch": 1.4131728111678457, "grad_norm": 0.6317679360925258, "learning_rate": 1.0464003048158684e-06, "loss": 0.2681, "step": 30167 }, { "epoch": 1.413219656157774, "grad_norm": 0.6061967658404315, "learning_rate": 1.04624601165245e-06, "loss": 0.2768, "step": 30168 }, { "epoch": 1.4132665011477021, "grad_norm": 0.6001901277412672, "learning_rate": 1.0460917268550206e-06, "loss": 0.2816, "step": 30169 }, { "epoch": 1.4133133461376306, "grad_norm": 0.578905363378371, "learning_rate": 1.0459374504244687e-06, "loss": 0.2656, "step": 30170 }, { "epoch": 1.4133601911275588, "grad_norm": 0.58301568605521, "learning_rate": 1.0457831823616807e-06, "loss": 0.2788, "step": 30171 }, { "epoch": 1.4134070361174873, "grad_norm": 0.6402805151312819, "learning_rate": 1.0456289226675457e-06, "loss": 0.286, "step": 30172 }, { "epoch": 1.4134538811074155, "grad_norm": 0.6055320941179168, "learning_rate": 1.0454746713429498e-06, "loss": 0.2916, "step": 30173 }, { "epoch": 1.413500726097344, "grad_norm": 0.562235743752141, "learning_rate": 1.0453204283887813e-06, "loss": 0.2671, "step": 30174 }, { "epoch": 1.4135475710872722, "grad_norm": 0.5947982584704352, "learning_rate": 1.0451661938059284e-06, "loss": 0.2721, "step": 30175 }, { "epoch": 1.4135944160772005, "grad_norm": 0.5987788653648536, "learning_rate": 1.0450119675952794e-06, "loss": 0.2583, "step": 30176 }, { "epoch": 1.413641261067129, "grad_norm": 0.5830494208302185, "learning_rate": 1.0448577497577198e-06, "loss": 0.2791, "step": 30177 }, { "epoch": 1.4136881060570572, "grad_norm": 0.5996827597466864, "learning_rate": 1.0447035402941388e-06, "loss": 0.2743, "step": 30178 }, { "epoch": 1.4137349510469854, "grad_norm": 0.6175948458513475, "learning_rate": 1.0445493392054227e-06, "loss": 0.2965, "step": 30179 }, { "epoch": 1.413781796036914, "grad_norm": 0.5816387168688386, "learning_rate": 1.044395146492459e-06, "loss": 0.2632, "step": 30180 }, { "epoch": 1.4138286410268421, "grad_norm": 0.5364985760611835, "learning_rate": 1.0442409621561355e-06, "loss": 0.2478, "step": 30181 }, { "epoch": 1.4138754860167704, "grad_norm": 0.6315536268135568, "learning_rate": 1.0440867861973394e-06, "loss": 0.2858, "step": 30182 }, { "epoch": 1.4139223310066988, "grad_norm": 0.6122792833102833, "learning_rate": 1.0439326186169587e-06, "loss": 0.2764, "step": 30183 }, { "epoch": 1.413969175996627, "grad_norm": 0.5731857403485752, "learning_rate": 1.0437784594158798e-06, "loss": 0.2619, "step": 30184 }, { "epoch": 1.4140160209865555, "grad_norm": 0.5849761261293981, "learning_rate": 1.0436243085949891e-06, "loss": 0.2622, "step": 30185 }, { "epoch": 1.4140628659764838, "grad_norm": 0.5678355191771096, "learning_rate": 1.0434701661551744e-06, "loss": 0.2656, "step": 30186 }, { "epoch": 1.4141097109664122, "grad_norm": 0.5764886690058053, "learning_rate": 1.043316032097323e-06, "loss": 0.2687, "step": 30187 }, { "epoch": 1.4141565559563405, "grad_norm": 0.636526993971016, "learning_rate": 1.0431619064223214e-06, "loss": 0.2781, "step": 30188 }, { "epoch": 1.4142034009462687, "grad_norm": 0.573618444611915, "learning_rate": 1.043007789131057e-06, "loss": 0.2547, "step": 30189 }, { "epoch": 1.4142502459361972, "grad_norm": 0.6034866274276095, "learning_rate": 1.0428536802244175e-06, "loss": 0.2705, "step": 30190 }, { "epoch": 1.4142970909261254, "grad_norm": 0.6079877787098691, "learning_rate": 1.0426995797032888e-06, "loss": 0.2819, "step": 30191 }, { "epoch": 1.4143439359160537, "grad_norm": 0.6196894878501032, "learning_rate": 1.0425454875685568e-06, "loss": 0.2869, "step": 30192 }, { "epoch": 1.4143907809059821, "grad_norm": 0.5783391335554909, "learning_rate": 1.042391403821109e-06, "loss": 0.2609, "step": 30193 }, { "epoch": 1.4144376258959104, "grad_norm": 0.576855882499272, "learning_rate": 1.0422373284618325e-06, "loss": 0.2743, "step": 30194 }, { "epoch": 1.4144844708858388, "grad_norm": 0.5619960008847791, "learning_rate": 1.042083261491614e-06, "loss": 0.2641, "step": 30195 }, { "epoch": 1.414531315875767, "grad_norm": 0.5787651462502499, "learning_rate": 1.0419292029113407e-06, "loss": 0.2704, "step": 30196 }, { "epoch": 1.4145781608656955, "grad_norm": 0.5571713227314007, "learning_rate": 1.0417751527218972e-06, "loss": 0.2651, "step": 30197 }, { "epoch": 1.4146250058556238, "grad_norm": 0.5815528033113813, "learning_rate": 1.0416211109241725e-06, "loss": 0.2789, "step": 30198 }, { "epoch": 1.414671850845552, "grad_norm": 0.6011161780113833, "learning_rate": 1.0414670775190506e-06, "loss": 0.2775, "step": 30199 }, { "epoch": 1.4147186958354805, "grad_norm": 0.5844853923901051, "learning_rate": 1.0413130525074191e-06, "loss": 0.2661, "step": 30200 }, { "epoch": 1.4147655408254087, "grad_norm": 0.5932126312257938, "learning_rate": 1.0411590358901643e-06, "loss": 0.2825, "step": 30201 }, { "epoch": 1.414812385815337, "grad_norm": 0.6179644736749529, "learning_rate": 1.0410050276681735e-06, "loss": 0.2825, "step": 30202 }, { "epoch": 1.4148592308052654, "grad_norm": 0.5883866777559456, "learning_rate": 1.0408510278423313e-06, "loss": 0.264, "step": 30203 }, { "epoch": 1.4149060757951937, "grad_norm": 0.5567470197111186, "learning_rate": 1.0406970364135248e-06, "loss": 0.2644, "step": 30204 }, { "epoch": 1.414952920785122, "grad_norm": 0.5953758685824702, "learning_rate": 1.0405430533826406e-06, "loss": 0.2821, "step": 30205 }, { "epoch": 1.4149997657750504, "grad_norm": 0.6017364934371707, "learning_rate": 1.0403890787505635e-06, "loss": 0.2721, "step": 30206 }, { "epoch": 1.4150466107649786, "grad_norm": 0.6011555494161329, "learning_rate": 1.0402351125181804e-06, "loss": 0.2799, "step": 30207 }, { "epoch": 1.415093455754907, "grad_norm": 0.5500119894240202, "learning_rate": 1.0400811546863784e-06, "loss": 0.2423, "step": 30208 }, { "epoch": 1.4151403007448353, "grad_norm": 0.5949870108628668, "learning_rate": 1.0399272052560414e-06, "loss": 0.2842, "step": 30209 }, { "epoch": 1.4151871457347638, "grad_norm": 0.5685316223845513, "learning_rate": 1.0397732642280564e-06, "loss": 0.2586, "step": 30210 }, { "epoch": 1.415233990724692, "grad_norm": 0.5657246985442204, "learning_rate": 1.039619331603309e-06, "loss": 0.2642, "step": 30211 }, { "epoch": 1.4152808357146203, "grad_norm": 0.637048759546313, "learning_rate": 1.0394654073826865e-06, "loss": 0.2657, "step": 30212 }, { "epoch": 1.4153276807045487, "grad_norm": 0.5977474180936513, "learning_rate": 1.0393114915670727e-06, "loss": 0.2751, "step": 30213 }, { "epoch": 1.415374525694477, "grad_norm": 0.591573813843062, "learning_rate": 1.0391575841573538e-06, "loss": 0.2716, "step": 30214 }, { "epoch": 1.4154213706844052, "grad_norm": 0.5850632162583148, "learning_rate": 1.039003685154417e-06, "loss": 0.2878, "step": 30215 }, { "epoch": 1.4154682156743337, "grad_norm": 0.5971453353801435, "learning_rate": 1.0388497945591456e-06, "loss": 0.2642, "step": 30216 }, { "epoch": 1.415515060664262, "grad_norm": 0.5777530309228789, "learning_rate": 1.0386959123724265e-06, "loss": 0.2536, "step": 30217 }, { "epoch": 1.4155619056541902, "grad_norm": 0.6243000518756124, "learning_rate": 1.0385420385951454e-06, "loss": 0.2873, "step": 30218 }, { "epoch": 1.4156087506441186, "grad_norm": 0.619595950659001, "learning_rate": 1.038388173228188e-06, "loss": 0.2765, "step": 30219 }, { "epoch": 1.4156555956340469, "grad_norm": 0.5935207547301452, "learning_rate": 1.0382343162724387e-06, "loss": 0.2582, "step": 30220 }, { "epoch": 1.4157024406239753, "grad_norm": 0.6124975614358217, "learning_rate": 1.0380804677287844e-06, "loss": 0.2583, "step": 30221 }, { "epoch": 1.4157492856139036, "grad_norm": 0.5970438762014723, "learning_rate": 1.0379266275981088e-06, "loss": 0.2732, "step": 30222 }, { "epoch": 1.415796130603832, "grad_norm": 0.5822913421026772, "learning_rate": 1.0377727958812975e-06, "loss": 0.2713, "step": 30223 }, { "epoch": 1.4158429755937603, "grad_norm": 0.5802375293139486, "learning_rate": 1.0376189725792365e-06, "loss": 0.2609, "step": 30224 }, { "epoch": 1.4158898205836885, "grad_norm": 0.6235269417569395, "learning_rate": 1.037465157692812e-06, "loss": 0.274, "step": 30225 }, { "epoch": 1.415936665573617, "grad_norm": 0.5642561577869003, "learning_rate": 1.0373113512229067e-06, "loss": 0.2724, "step": 30226 }, { "epoch": 1.4159835105635452, "grad_norm": 0.6076598738525836, "learning_rate": 1.0371575531704076e-06, "loss": 0.2777, "step": 30227 }, { "epoch": 1.4160303555534735, "grad_norm": 0.6157464257594444, "learning_rate": 1.0370037635361985e-06, "loss": 0.2843, "step": 30228 }, { "epoch": 1.416077200543402, "grad_norm": 0.6163815112794402, "learning_rate": 1.036849982321165e-06, "loss": 0.2703, "step": 30229 }, { "epoch": 1.4161240455333302, "grad_norm": 0.5719369185073663, "learning_rate": 1.036696209526192e-06, "loss": 0.2753, "step": 30230 }, { "epoch": 1.4161708905232586, "grad_norm": 0.6335875912676757, "learning_rate": 1.0365424451521647e-06, "loss": 0.2789, "step": 30231 }, { "epoch": 1.4162177355131869, "grad_norm": 0.5629163347335665, "learning_rate": 1.0363886891999687e-06, "loss": 0.2566, "step": 30232 }, { "epoch": 1.4162645805031153, "grad_norm": 0.5944812675640342, "learning_rate": 1.036234941670488e-06, "loss": 0.2709, "step": 30233 }, { "epoch": 1.4163114254930436, "grad_norm": 0.5874651475230546, "learning_rate": 1.036081202564606e-06, "loss": 0.2794, "step": 30234 }, { "epoch": 1.4163582704829718, "grad_norm": 0.6227345973185393, "learning_rate": 1.035927471883209e-06, "loss": 0.2858, "step": 30235 }, { "epoch": 1.4164051154729003, "grad_norm": 0.5786436417631177, "learning_rate": 1.0357737496271816e-06, "loss": 0.2462, "step": 30236 }, { "epoch": 1.4164519604628285, "grad_norm": 0.5957218051758345, "learning_rate": 1.035620035797408e-06, "loss": 0.2759, "step": 30237 }, { "epoch": 1.4164988054527567, "grad_norm": 0.5639675178350845, "learning_rate": 1.0354663303947732e-06, "loss": 0.2738, "step": 30238 }, { "epoch": 1.4165456504426852, "grad_norm": 0.6050901868459909, "learning_rate": 1.035312633420162e-06, "loss": 0.2782, "step": 30239 }, { "epoch": 1.4165924954326135, "grad_norm": 0.5786607257384455, "learning_rate": 1.0351589448744592e-06, "loss": 0.2497, "step": 30240 }, { "epoch": 1.4166393404225417, "grad_norm": 0.619893708800214, "learning_rate": 1.035005264758547e-06, "loss": 0.2707, "step": 30241 }, { "epoch": 1.4166861854124702, "grad_norm": 0.5968441599096498, "learning_rate": 1.0348515930733116e-06, "loss": 0.265, "step": 30242 }, { "epoch": 1.4167330304023984, "grad_norm": 0.6008533285067653, "learning_rate": 1.034697929819637e-06, "loss": 0.2701, "step": 30243 }, { "epoch": 1.4167798753923269, "grad_norm": 0.564996856002132, "learning_rate": 1.0345442749984076e-06, "loss": 0.2687, "step": 30244 }, { "epoch": 1.416826720382255, "grad_norm": 0.5887503021085883, "learning_rate": 1.0343906286105082e-06, "loss": 0.2747, "step": 30245 }, { "epoch": 1.4168735653721836, "grad_norm": 0.6216882910014278, "learning_rate": 1.0342369906568217e-06, "loss": 0.2841, "step": 30246 }, { "epoch": 1.4169204103621118, "grad_norm": 0.5669926956090235, "learning_rate": 1.0340833611382337e-06, "loss": 0.2659, "step": 30247 }, { "epoch": 1.41696725535204, "grad_norm": 0.5932539788787674, "learning_rate": 1.0339297400556266e-06, "loss": 0.261, "step": 30248 }, { "epoch": 1.4170141003419685, "grad_norm": 0.5585903520390477, "learning_rate": 1.0337761274098855e-06, "loss": 0.2632, "step": 30249 }, { "epoch": 1.4170609453318967, "grad_norm": 0.5920399362879558, "learning_rate": 1.033622523201894e-06, "loss": 0.2693, "step": 30250 }, { "epoch": 1.417107790321825, "grad_norm": 0.5994245549687474, "learning_rate": 1.0334689274325373e-06, "loss": 0.2831, "step": 30251 }, { "epoch": 1.4171546353117535, "grad_norm": 0.6168834096457375, "learning_rate": 1.0333153401026977e-06, "loss": 0.2836, "step": 30252 }, { "epoch": 1.4172014803016817, "grad_norm": 0.5381287463164905, "learning_rate": 1.0331617612132594e-06, "loss": 0.2631, "step": 30253 }, { "epoch": 1.41724832529161, "grad_norm": 0.599398796330554, "learning_rate": 1.0330081907651075e-06, "loss": 0.2734, "step": 30254 }, { "epoch": 1.4172951702815384, "grad_norm": 0.6197082629687605, "learning_rate": 1.0328546287591237e-06, "loss": 0.2861, "step": 30255 }, { "epoch": 1.4173420152714666, "grad_norm": 0.607590421882926, "learning_rate": 1.032701075196193e-06, "loss": 0.2743, "step": 30256 }, { "epoch": 1.417388860261395, "grad_norm": 0.6027536928776198, "learning_rate": 1.0325475300771998e-06, "loss": 0.2966, "step": 30257 }, { "epoch": 1.4174357052513233, "grad_norm": 0.5817376338220376, "learning_rate": 1.0323939934030255e-06, "loss": 0.2613, "step": 30258 }, { "epoch": 1.4174825502412518, "grad_norm": 0.5974116567119168, "learning_rate": 1.0322404651745548e-06, "loss": 0.2533, "step": 30259 }, { "epoch": 1.41752939523118, "grad_norm": 0.6106910360314252, "learning_rate": 1.0320869453926718e-06, "loss": 0.2711, "step": 30260 }, { "epoch": 1.4175762402211083, "grad_norm": 0.5715336238380545, "learning_rate": 1.0319334340582602e-06, "loss": 0.2674, "step": 30261 }, { "epoch": 1.4176230852110367, "grad_norm": 0.5706259766920372, "learning_rate": 1.0317799311722015e-06, "loss": 0.257, "step": 30262 }, { "epoch": 1.417669930200965, "grad_norm": 0.6830480432788281, "learning_rate": 1.0316264367353815e-06, "loss": 0.2836, "step": 30263 }, { "epoch": 1.4177167751908932, "grad_norm": 0.5986700495131065, "learning_rate": 1.0314729507486815e-06, "loss": 0.2918, "step": 30264 }, { "epoch": 1.4177636201808217, "grad_norm": 0.6069532093734196, "learning_rate": 1.0313194732129852e-06, "loss": 0.2835, "step": 30265 }, { "epoch": 1.41781046517075, "grad_norm": 0.6039790201126459, "learning_rate": 1.0311660041291762e-06, "loss": 0.2602, "step": 30266 }, { "epoch": 1.4178573101606784, "grad_norm": 0.6381645610125237, "learning_rate": 1.031012543498138e-06, "loss": 0.2758, "step": 30267 }, { "epoch": 1.4179041551506066, "grad_norm": 0.6142605079572546, "learning_rate": 1.0308590913207541e-06, "loss": 0.2959, "step": 30268 }, { "epoch": 1.417951000140535, "grad_norm": 0.621164510123923, "learning_rate": 1.0307056475979068e-06, "loss": 0.2917, "step": 30269 }, { "epoch": 1.4179978451304633, "grad_norm": 0.6109064023491367, "learning_rate": 1.0305522123304784e-06, "loss": 0.2744, "step": 30270 }, { "epoch": 1.4180446901203916, "grad_norm": 0.6402728578279734, "learning_rate": 1.0303987855193528e-06, "loss": 0.2774, "step": 30271 }, { "epoch": 1.41809153511032, "grad_norm": 0.5917605740161442, "learning_rate": 1.0302453671654129e-06, "loss": 0.2607, "step": 30272 }, { "epoch": 1.4181383801002483, "grad_norm": 0.5539414330319915, "learning_rate": 1.0300919572695412e-06, "loss": 0.2601, "step": 30273 }, { "epoch": 1.4181852250901765, "grad_norm": 0.5725037408419896, "learning_rate": 1.029938555832622e-06, "loss": 0.2629, "step": 30274 }, { "epoch": 1.418232070080105, "grad_norm": 0.5890541275975347, "learning_rate": 1.0297851628555357e-06, "loss": 0.2664, "step": 30275 }, { "epoch": 1.4182789150700332, "grad_norm": 0.6087719690296802, "learning_rate": 1.0296317783391674e-06, "loss": 0.2732, "step": 30276 }, { "epoch": 1.4183257600599615, "grad_norm": 0.6087737722643882, "learning_rate": 1.0294784022843975e-06, "loss": 0.2796, "step": 30277 }, { "epoch": 1.41837260504989, "grad_norm": 0.5986887532621162, "learning_rate": 1.0293250346921102e-06, "loss": 0.2708, "step": 30278 }, { "epoch": 1.4184194500398182, "grad_norm": 0.5992179605336808, "learning_rate": 1.0291716755631876e-06, "loss": 0.2713, "step": 30279 }, { "epoch": 1.4184662950297466, "grad_norm": 0.6132081524130906, "learning_rate": 1.029018324898512e-06, "loss": 0.2933, "step": 30280 }, { "epoch": 1.4185131400196749, "grad_norm": 0.6211722352179071, "learning_rate": 1.0288649826989674e-06, "loss": 0.2925, "step": 30281 }, { "epoch": 1.4185599850096033, "grad_norm": 0.5681989403295756, "learning_rate": 1.028711648965435e-06, "loss": 0.2791, "step": 30282 }, { "epoch": 1.4186068299995316, "grad_norm": 0.6598511929432845, "learning_rate": 1.0285583236987964e-06, "loss": 0.2795, "step": 30283 }, { "epoch": 1.4186536749894598, "grad_norm": 0.5637349587013329, "learning_rate": 1.0284050068999345e-06, "loss": 0.2639, "step": 30284 }, { "epoch": 1.4187005199793883, "grad_norm": 0.598351209307548, "learning_rate": 1.0282516985697324e-06, "loss": 0.2572, "step": 30285 }, { "epoch": 1.4187473649693165, "grad_norm": 0.597655475336741, "learning_rate": 1.0280983987090717e-06, "loss": 0.2715, "step": 30286 }, { "epoch": 1.4187942099592448, "grad_norm": 0.5524367702099322, "learning_rate": 1.0279451073188346e-06, "loss": 0.2662, "step": 30287 }, { "epoch": 1.4188410549491732, "grad_norm": 0.5761810400636028, "learning_rate": 1.0277918243999043e-06, "loss": 0.274, "step": 30288 }, { "epoch": 1.4188878999391015, "grad_norm": 0.6334356535799379, "learning_rate": 1.0276385499531625e-06, "loss": 0.2771, "step": 30289 }, { "epoch": 1.4189347449290297, "grad_norm": 0.5596031670747614, "learning_rate": 1.0274852839794894e-06, "loss": 0.2602, "step": 30290 }, { "epoch": 1.4189815899189582, "grad_norm": 0.6004872026918219, "learning_rate": 1.0273320264797684e-06, "loss": 0.261, "step": 30291 }, { "epoch": 1.4190284349088864, "grad_norm": 0.583170928294683, "learning_rate": 1.0271787774548815e-06, "loss": 0.2744, "step": 30292 }, { "epoch": 1.4190752798988149, "grad_norm": 0.5816510291313409, "learning_rate": 1.027025536905711e-06, "loss": 0.2718, "step": 30293 }, { "epoch": 1.4191221248887431, "grad_norm": 0.622478366058456, "learning_rate": 1.0268723048331386e-06, "loss": 0.3047, "step": 30294 }, { "epoch": 1.4191689698786716, "grad_norm": 0.5641990239298418, "learning_rate": 1.0267190812380454e-06, "loss": 0.2549, "step": 30295 }, { "epoch": 1.4192158148685998, "grad_norm": 0.5465629624204135, "learning_rate": 1.026565866121314e-06, "loss": 0.2582, "step": 30296 }, { "epoch": 1.419262659858528, "grad_norm": 0.645499811229811, "learning_rate": 1.0264126594838253e-06, "loss": 0.2871, "step": 30297 }, { "epoch": 1.4193095048484565, "grad_norm": 0.6147586141307607, "learning_rate": 1.0262594613264611e-06, "loss": 0.2712, "step": 30298 }, { "epoch": 1.4193563498383848, "grad_norm": 0.6118383140431858, "learning_rate": 1.0261062716501033e-06, "loss": 0.2878, "step": 30299 }, { "epoch": 1.419403194828313, "grad_norm": 0.6428449558262902, "learning_rate": 1.0259530904556344e-06, "loss": 0.2731, "step": 30300 }, { "epoch": 1.4194500398182415, "grad_norm": 0.6527489853433939, "learning_rate": 1.025799917743934e-06, "loss": 0.2888, "step": 30301 }, { "epoch": 1.4194968848081697, "grad_norm": 0.6208997097183165, "learning_rate": 1.0256467535158848e-06, "loss": 0.2796, "step": 30302 }, { "epoch": 1.4195437297980982, "grad_norm": 0.6813460248111409, "learning_rate": 1.0254935977723688e-06, "loss": 0.2918, "step": 30303 }, { "epoch": 1.4195905747880264, "grad_norm": 0.6155132837507404, "learning_rate": 1.0253404505142652e-06, "loss": 0.276, "step": 30304 }, { "epoch": 1.4196374197779549, "grad_norm": 0.5950583468687991, "learning_rate": 1.0251873117424572e-06, "loss": 0.2567, "step": 30305 }, { "epoch": 1.4196842647678831, "grad_norm": 0.6166753530293817, "learning_rate": 1.0250341814578263e-06, "loss": 0.2753, "step": 30306 }, { "epoch": 1.4197311097578114, "grad_norm": 0.5856095258269862, "learning_rate": 1.0248810596612519e-06, "loss": 0.2668, "step": 30307 }, { "epoch": 1.4197779547477398, "grad_norm": 0.5783028449351001, "learning_rate": 1.0247279463536164e-06, "loss": 0.2692, "step": 30308 }, { "epoch": 1.419824799737668, "grad_norm": 0.6116195825089398, "learning_rate": 1.0245748415358007e-06, "loss": 0.2653, "step": 30309 }, { "epoch": 1.4198716447275963, "grad_norm": 0.5709274015782104, "learning_rate": 1.0244217452086868e-06, "loss": 0.2591, "step": 30310 }, { "epoch": 1.4199184897175248, "grad_norm": 0.5574759478072925, "learning_rate": 1.0242686573731542e-06, "loss": 0.2613, "step": 30311 }, { "epoch": 1.419965334707453, "grad_norm": 0.578901556174012, "learning_rate": 1.0241155780300852e-06, "loss": 0.2777, "step": 30312 }, { "epoch": 1.4200121796973812, "grad_norm": 0.5526016395166535, "learning_rate": 1.0239625071803595e-06, "loss": 0.2615, "step": 30313 }, { "epoch": 1.4200590246873097, "grad_norm": 0.6174717734116916, "learning_rate": 1.0238094448248585e-06, "loss": 0.2816, "step": 30314 }, { "epoch": 1.420105869677238, "grad_norm": 0.5817105704610138, "learning_rate": 1.0236563909644629e-06, "loss": 0.2829, "step": 30315 }, { "epoch": 1.4201527146671664, "grad_norm": 0.6001506407308554, "learning_rate": 1.0235033456000546e-06, "loss": 0.2656, "step": 30316 }, { "epoch": 1.4201995596570947, "grad_norm": 0.6386951887357595, "learning_rate": 1.0233503087325128e-06, "loss": 0.28, "step": 30317 }, { "epoch": 1.4202464046470231, "grad_norm": 0.5980068963049167, "learning_rate": 1.0231972803627197e-06, "loss": 0.2739, "step": 30318 }, { "epoch": 1.4202932496369514, "grad_norm": 0.6191146095249037, "learning_rate": 1.0230442604915538e-06, "loss": 0.295, "step": 30319 }, { "epoch": 1.4203400946268796, "grad_norm": 0.5660011825395543, "learning_rate": 1.0228912491198972e-06, "loss": 0.2565, "step": 30320 }, { "epoch": 1.420386939616808, "grad_norm": 0.6353751943419964, "learning_rate": 1.0227382462486304e-06, "loss": 0.2785, "step": 30321 }, { "epoch": 1.4204337846067363, "grad_norm": 0.6059618460855498, "learning_rate": 1.0225852518786336e-06, "loss": 0.2816, "step": 30322 }, { "epoch": 1.4204806295966645, "grad_norm": 0.6047733198364628, "learning_rate": 1.0224322660107883e-06, "loss": 0.2797, "step": 30323 }, { "epoch": 1.420527474586593, "grad_norm": 0.5981186536136762, "learning_rate": 1.0222792886459738e-06, "loss": 0.2902, "step": 30324 }, { "epoch": 1.4205743195765212, "grad_norm": 0.6023846613248726, "learning_rate": 1.0221263197850695e-06, "loss": 0.261, "step": 30325 }, { "epoch": 1.4206211645664495, "grad_norm": 0.5895743261883144, "learning_rate": 1.0219733594289571e-06, "loss": 0.2735, "step": 30326 }, { "epoch": 1.420668009556378, "grad_norm": 0.5846369626927634, "learning_rate": 1.0218204075785162e-06, "loss": 0.2709, "step": 30327 }, { "epoch": 1.4207148545463062, "grad_norm": 0.5944493622440961, "learning_rate": 1.0216674642346277e-06, "loss": 0.2813, "step": 30328 }, { "epoch": 1.4207616995362347, "grad_norm": 0.6304589845318868, "learning_rate": 1.0215145293981713e-06, "loss": 0.2711, "step": 30329 }, { "epoch": 1.420808544526163, "grad_norm": 0.5840385125022264, "learning_rate": 1.021361603070028e-06, "loss": 0.275, "step": 30330 }, { "epoch": 1.4208553895160914, "grad_norm": 0.6247036013399504, "learning_rate": 1.021208685251077e-06, "loss": 0.2781, "step": 30331 }, { "epoch": 1.4209022345060196, "grad_norm": 0.5973881036207289, "learning_rate": 1.0210557759421973e-06, "loss": 0.2726, "step": 30332 }, { "epoch": 1.4209490794959478, "grad_norm": 0.610421796507623, "learning_rate": 1.02090287514427e-06, "loss": 0.2851, "step": 30333 }, { "epoch": 1.4209959244858763, "grad_norm": 0.6373623439594187, "learning_rate": 1.020749982858175e-06, "loss": 0.2844, "step": 30334 }, { "epoch": 1.4210427694758045, "grad_norm": 0.6092093760612571, "learning_rate": 1.0205970990847922e-06, "loss": 0.2886, "step": 30335 }, { "epoch": 1.4210896144657328, "grad_norm": 0.5801002520774613, "learning_rate": 1.0204442238250018e-06, "loss": 0.2605, "step": 30336 }, { "epoch": 1.4211364594556612, "grad_norm": 0.5747062539307239, "learning_rate": 1.0202913570796824e-06, "loss": 0.2777, "step": 30337 }, { "epoch": 1.4211833044455895, "grad_norm": 0.6419242325153115, "learning_rate": 1.0201384988497152e-06, "loss": 0.2888, "step": 30338 }, { "epoch": 1.421230149435518, "grad_norm": 0.6208790060128867, "learning_rate": 1.0199856491359777e-06, "loss": 0.2755, "step": 30339 }, { "epoch": 1.4212769944254462, "grad_norm": 0.5744729164548333, "learning_rate": 1.0198328079393513e-06, "loss": 0.2646, "step": 30340 }, { "epoch": 1.4213238394153747, "grad_norm": 0.6162244040014638, "learning_rate": 1.0196799752607147e-06, "loss": 0.2722, "step": 30341 }, { "epoch": 1.421370684405303, "grad_norm": 0.6254518776349348, "learning_rate": 1.019527151100949e-06, "loss": 0.2839, "step": 30342 }, { "epoch": 1.4214175293952311, "grad_norm": 0.6203539749010004, "learning_rate": 1.0193743354609315e-06, "loss": 0.2844, "step": 30343 }, { "epoch": 1.4214643743851596, "grad_norm": 0.6284356026847122, "learning_rate": 1.0192215283415424e-06, "loss": 0.291, "step": 30344 }, { "epoch": 1.4215112193750878, "grad_norm": 0.6070842359046082, "learning_rate": 1.0190687297436622e-06, "loss": 0.2659, "step": 30345 }, { "epoch": 1.421558064365016, "grad_norm": 0.6095945852962699, "learning_rate": 1.0189159396681684e-06, "loss": 0.2689, "step": 30346 }, { "epoch": 1.4216049093549445, "grad_norm": 0.5831127526068185, "learning_rate": 1.0187631581159407e-06, "loss": 0.2715, "step": 30347 }, { "epoch": 1.4216517543448728, "grad_norm": 0.582624383692956, "learning_rate": 1.0186103850878593e-06, "loss": 0.2592, "step": 30348 }, { "epoch": 1.421698599334801, "grad_norm": 0.6047016654870218, "learning_rate": 1.0184576205848035e-06, "loss": 0.2864, "step": 30349 }, { "epoch": 1.4217454443247295, "grad_norm": 0.6203339462593699, "learning_rate": 1.0183048646076508e-06, "loss": 0.2688, "step": 30350 }, { "epoch": 1.4217922893146577, "grad_norm": 0.5839317233243809, "learning_rate": 1.0181521171572812e-06, "loss": 0.2695, "step": 30351 }, { "epoch": 1.4218391343045862, "grad_norm": 0.6099840178642396, "learning_rate": 1.0179993782345742e-06, "loss": 0.2874, "step": 30352 }, { "epoch": 1.4218859792945144, "grad_norm": 0.5829688770401077, "learning_rate": 1.017846647840408e-06, "loss": 0.2559, "step": 30353 }, { "epoch": 1.421932824284443, "grad_norm": 0.6286080658467011, "learning_rate": 1.0176939259756616e-06, "loss": 0.2741, "step": 30354 }, { "epoch": 1.4219796692743711, "grad_norm": 0.6459614010808292, "learning_rate": 1.017541212641215e-06, "loss": 0.3027, "step": 30355 }, { "epoch": 1.4220265142642994, "grad_norm": 0.5861056184357892, "learning_rate": 1.0173885078379453e-06, "loss": 0.2716, "step": 30356 }, { "epoch": 1.4220733592542278, "grad_norm": 0.5903062400907535, "learning_rate": 1.017235811566732e-06, "loss": 0.2742, "step": 30357 }, { "epoch": 1.422120204244156, "grad_norm": 0.5981563297276177, "learning_rate": 1.017083123828454e-06, "loss": 0.2872, "step": 30358 }, { "epoch": 1.4221670492340843, "grad_norm": 0.609048419609286, "learning_rate": 1.0169304446239906e-06, "loss": 0.2684, "step": 30359 }, { "epoch": 1.4222138942240128, "grad_norm": 0.6086125364132517, "learning_rate": 1.016777773954219e-06, "loss": 0.27, "step": 30360 }, { "epoch": 1.422260739213941, "grad_norm": 0.6103852807721243, "learning_rate": 1.0166251118200192e-06, "loss": 0.2762, "step": 30361 }, { "epoch": 1.4223075842038693, "grad_norm": 0.6373900561400445, "learning_rate": 1.0164724582222684e-06, "loss": 0.2908, "step": 30362 }, { "epoch": 1.4223544291937977, "grad_norm": 0.5687026468632567, "learning_rate": 1.0163198131618457e-06, "loss": 0.2651, "step": 30363 }, { "epoch": 1.422401274183726, "grad_norm": 0.6043639580222929, "learning_rate": 1.0161671766396298e-06, "loss": 0.2681, "step": 30364 }, { "epoch": 1.4224481191736544, "grad_norm": 0.6173957399861121, "learning_rate": 1.0160145486564994e-06, "loss": 0.2784, "step": 30365 }, { "epoch": 1.4224949641635827, "grad_norm": 0.5729720351131369, "learning_rate": 1.0158619292133315e-06, "loss": 0.2676, "step": 30366 }, { "epoch": 1.4225418091535111, "grad_norm": 0.6107404614830855, "learning_rate": 1.015709318311006e-06, "loss": 0.2724, "step": 30367 }, { "epoch": 1.4225886541434394, "grad_norm": 0.5938721203649029, "learning_rate": 1.0155567159503992e-06, "loss": 0.2807, "step": 30368 }, { "epoch": 1.4226354991333676, "grad_norm": 0.5519301480287532, "learning_rate": 1.0154041221323908e-06, "loss": 0.2601, "step": 30369 }, { "epoch": 1.422682344123296, "grad_norm": 0.5620589031440105, "learning_rate": 1.0152515368578582e-06, "loss": 0.2658, "step": 30370 }, { "epoch": 1.4227291891132243, "grad_norm": 0.5765743812441999, "learning_rate": 1.0150989601276803e-06, "loss": 0.2548, "step": 30371 }, { "epoch": 1.4227760341031526, "grad_norm": 0.5847342880980108, "learning_rate": 1.0149463919427352e-06, "loss": 0.2662, "step": 30372 }, { "epoch": 1.422822879093081, "grad_norm": 0.5870190079048458, "learning_rate": 1.0147938323039005e-06, "loss": 0.2766, "step": 30373 }, { "epoch": 1.4228697240830093, "grad_norm": 0.5913767611758668, "learning_rate": 1.0146412812120531e-06, "loss": 0.2631, "step": 30374 }, { "epoch": 1.4229165690729377, "grad_norm": 0.5913977620517632, "learning_rate": 1.0144887386680718e-06, "loss": 0.2679, "step": 30375 }, { "epoch": 1.422963414062866, "grad_norm": 0.5599406085276356, "learning_rate": 1.0143362046728345e-06, "loss": 0.2588, "step": 30376 }, { "epoch": 1.4230102590527944, "grad_norm": 0.563177058783829, "learning_rate": 1.0141836792272187e-06, "loss": 0.2471, "step": 30377 }, { "epoch": 1.4230571040427227, "grad_norm": 0.5766688712665711, "learning_rate": 1.0140311623321025e-06, "loss": 0.2658, "step": 30378 }, { "epoch": 1.423103949032651, "grad_norm": 0.590619066300786, "learning_rate": 1.0138786539883643e-06, "loss": 0.2786, "step": 30379 }, { "epoch": 1.4231507940225794, "grad_norm": 0.5811315108911483, "learning_rate": 1.0137261541968809e-06, "loss": 0.2934, "step": 30380 }, { "epoch": 1.4231976390125076, "grad_norm": 0.5626289167738068, "learning_rate": 1.0135736629585295e-06, "loss": 0.2723, "step": 30381 }, { "epoch": 1.4232444840024359, "grad_norm": 0.6329540771637099, "learning_rate": 1.0134211802741876e-06, "loss": 0.2693, "step": 30382 }, { "epoch": 1.4232913289923643, "grad_norm": 0.6106679624517259, "learning_rate": 1.013268706144733e-06, "loss": 0.272, "step": 30383 }, { "epoch": 1.4233381739822926, "grad_norm": 0.6065158019423948, "learning_rate": 1.0131162405710437e-06, "loss": 0.2794, "step": 30384 }, { "epoch": 1.4233850189722208, "grad_norm": 0.6096515488774739, "learning_rate": 1.0129637835539976e-06, "loss": 0.2759, "step": 30385 }, { "epoch": 1.4234318639621493, "grad_norm": 0.59570002996587, "learning_rate": 1.0128113350944702e-06, "loss": 0.2758, "step": 30386 }, { "epoch": 1.4234787089520775, "grad_norm": 0.5991178495103805, "learning_rate": 1.0126588951933405e-06, "loss": 0.2789, "step": 30387 }, { "epoch": 1.423525553942006, "grad_norm": 0.5891238233528824, "learning_rate": 1.0125064638514844e-06, "loss": 0.2583, "step": 30388 }, { "epoch": 1.4235723989319342, "grad_norm": 0.5845779520672293, "learning_rate": 1.0123540410697797e-06, "loss": 0.2616, "step": 30389 }, { "epoch": 1.4236192439218627, "grad_norm": 0.6271769969442776, "learning_rate": 1.0122016268491033e-06, "loss": 0.2795, "step": 30390 }, { "epoch": 1.423666088911791, "grad_norm": 0.5684218958137186, "learning_rate": 1.0120492211903337e-06, "loss": 0.2684, "step": 30391 }, { "epoch": 1.4237129339017192, "grad_norm": 0.5723195145204153, "learning_rate": 1.011896824094346e-06, "loss": 0.2626, "step": 30392 }, { "epoch": 1.4237597788916476, "grad_norm": 0.6049139350460271, "learning_rate": 1.0117444355620181e-06, "loss": 0.2815, "step": 30393 }, { "epoch": 1.4238066238815759, "grad_norm": 0.5914554595596759, "learning_rate": 1.0115920555942278e-06, "loss": 0.2619, "step": 30394 }, { "epoch": 1.423853468871504, "grad_norm": 0.5900969906348388, "learning_rate": 1.01143968419185e-06, "loss": 0.2648, "step": 30395 }, { "epoch": 1.4239003138614326, "grad_norm": 0.6232417340639097, "learning_rate": 1.0112873213557628e-06, "loss": 0.279, "step": 30396 }, { "epoch": 1.4239471588513608, "grad_norm": 0.6356228149424222, "learning_rate": 1.011134967086844e-06, "loss": 0.2696, "step": 30397 }, { "epoch": 1.423994003841289, "grad_norm": 0.6311950415117781, "learning_rate": 1.010982621385968e-06, "loss": 0.2889, "step": 30398 }, { "epoch": 1.4240408488312175, "grad_norm": 0.6098392959332715, "learning_rate": 1.010830284254013e-06, "loss": 0.273, "step": 30399 }, { "epoch": 1.4240876938211458, "grad_norm": 0.5732650575269936, "learning_rate": 1.0106779556918553e-06, "loss": 0.2658, "step": 30400 }, { "epoch": 1.4241345388110742, "grad_norm": 0.6069719021772753, "learning_rate": 1.0105256357003726e-06, "loss": 0.2607, "step": 30401 }, { "epoch": 1.4241813838010025, "grad_norm": 0.5576136605236448, "learning_rate": 1.0103733242804398e-06, "loss": 0.2464, "step": 30402 }, { "epoch": 1.424228228790931, "grad_norm": 0.5879535199654078, "learning_rate": 1.0102210214329346e-06, "loss": 0.262, "step": 30403 }, { "epoch": 1.4242750737808592, "grad_norm": 0.5730846248192997, "learning_rate": 1.0100687271587323e-06, "loss": 0.2601, "step": 30404 }, { "epoch": 1.4243219187707874, "grad_norm": 0.607274392553425, "learning_rate": 1.0099164414587102e-06, "loss": 0.2687, "step": 30405 }, { "epoch": 1.4243687637607159, "grad_norm": 0.6278333856868874, "learning_rate": 1.0097641643337442e-06, "loss": 0.2872, "step": 30406 }, { "epoch": 1.424415608750644, "grad_norm": 0.6014952400115728, "learning_rate": 1.009611895784711e-06, "loss": 0.27, "step": 30407 }, { "epoch": 1.4244624537405723, "grad_norm": 0.5595557333823965, "learning_rate": 1.0094596358124876e-06, "loss": 0.2691, "step": 30408 }, { "epoch": 1.4245092987305008, "grad_norm": 0.6021406815699273, "learning_rate": 1.009307384417949e-06, "loss": 0.2703, "step": 30409 }, { "epoch": 1.424556143720429, "grad_norm": 0.5643768231872023, "learning_rate": 1.0091551416019713e-06, "loss": 0.2632, "step": 30410 }, { "epoch": 1.4246029887103575, "grad_norm": 0.6086221506034918, "learning_rate": 1.0090029073654308e-06, "loss": 0.2627, "step": 30411 }, { "epoch": 1.4246498337002858, "grad_norm": 0.5106085904690385, "learning_rate": 1.0088506817092043e-06, "loss": 0.2385, "step": 30412 }, { "epoch": 1.4246966786902142, "grad_norm": 0.6157976207946249, "learning_rate": 1.008698464634167e-06, "loss": 0.2697, "step": 30413 }, { "epoch": 1.4247435236801425, "grad_norm": 0.6006740056667449, "learning_rate": 1.0085462561411958e-06, "loss": 0.2817, "step": 30414 }, { "epoch": 1.4247903686700707, "grad_norm": 0.5909724189718716, "learning_rate": 1.0083940562311652e-06, "loss": 0.271, "step": 30415 }, { "epoch": 1.4248372136599992, "grad_norm": 0.5619397801705224, "learning_rate": 1.0082418649049532e-06, "loss": 0.2652, "step": 30416 }, { "epoch": 1.4248840586499274, "grad_norm": 0.5468787715267139, "learning_rate": 1.0080896821634333e-06, "loss": 0.2664, "step": 30417 }, { "epoch": 1.4249309036398556, "grad_norm": 0.583744585811549, "learning_rate": 1.0079375080074825e-06, "loss": 0.2743, "step": 30418 }, { "epoch": 1.424977748629784, "grad_norm": 0.6225012095469555, "learning_rate": 1.0077853424379758e-06, "loss": 0.2755, "step": 30419 }, { "epoch": 1.4250245936197123, "grad_norm": 0.5999037738141785, "learning_rate": 1.0076331854557898e-06, "loss": 0.2752, "step": 30420 }, { "epoch": 1.4250714386096406, "grad_norm": 0.6109602774488416, "learning_rate": 1.0074810370618004e-06, "loss": 0.2806, "step": 30421 }, { "epoch": 1.425118283599569, "grad_norm": 0.5577621681817553, "learning_rate": 1.0073288972568826e-06, "loss": 0.2662, "step": 30422 }, { "epoch": 1.4251651285894973, "grad_norm": 0.6266872609742443, "learning_rate": 1.007176766041911e-06, "loss": 0.2731, "step": 30423 }, { "epoch": 1.4252119735794258, "grad_norm": 0.5922869387516914, "learning_rate": 1.007024643417762e-06, "loss": 0.2711, "step": 30424 }, { "epoch": 1.425258818569354, "grad_norm": 0.6432735053192972, "learning_rate": 1.0068725293853107e-06, "loss": 0.2697, "step": 30425 }, { "epoch": 1.4253056635592825, "grad_norm": 0.6200915535050776, "learning_rate": 1.0067204239454329e-06, "loss": 0.2872, "step": 30426 }, { "epoch": 1.4253525085492107, "grad_norm": 0.5802065420709369, "learning_rate": 1.0065683270990033e-06, "loss": 0.2701, "step": 30427 }, { "epoch": 1.425399353539139, "grad_norm": 0.5942466231346781, "learning_rate": 1.006416238846899e-06, "loss": 0.2764, "step": 30428 }, { "epoch": 1.4254461985290674, "grad_norm": 0.6051759639920312, "learning_rate": 1.0062641591899936e-06, "loss": 0.2703, "step": 30429 }, { "epoch": 1.4254930435189956, "grad_norm": 0.5791255812587636, "learning_rate": 1.0061120881291617e-06, "loss": 0.2638, "step": 30430 }, { "epoch": 1.4255398885089239, "grad_norm": 0.6163769578238276, "learning_rate": 1.0059600256652793e-06, "loss": 0.2674, "step": 30431 }, { "epoch": 1.4255867334988523, "grad_norm": 0.6119344404670388, "learning_rate": 1.0058079717992213e-06, "loss": 0.2724, "step": 30432 }, { "epoch": 1.4256335784887806, "grad_norm": 0.5635439791983782, "learning_rate": 1.0056559265318634e-06, "loss": 0.2601, "step": 30433 }, { "epoch": 1.4256804234787088, "grad_norm": 0.5929216121267883, "learning_rate": 1.0055038898640803e-06, "loss": 0.2759, "step": 30434 }, { "epoch": 1.4257272684686373, "grad_norm": 0.6011397019257911, "learning_rate": 1.0053518617967462e-06, "loss": 0.2816, "step": 30435 }, { "epoch": 1.4257741134585655, "grad_norm": 0.5866293633514761, "learning_rate": 1.0051998423307371e-06, "loss": 0.2628, "step": 30436 }, { "epoch": 1.425820958448494, "grad_norm": 0.6080292025056835, "learning_rate": 1.0050478314669262e-06, "loss": 0.268, "step": 30437 }, { "epoch": 1.4258678034384222, "grad_norm": 0.5862756055479731, "learning_rate": 1.0048958292061898e-06, "loss": 0.2674, "step": 30438 }, { "epoch": 1.4259146484283507, "grad_norm": 0.5711044055987586, "learning_rate": 1.0047438355494016e-06, "loss": 0.2646, "step": 30439 }, { "epoch": 1.425961493418279, "grad_norm": 0.5856754094307878, "learning_rate": 1.0045918504974378e-06, "loss": 0.2731, "step": 30440 }, { "epoch": 1.4260083384082072, "grad_norm": 0.6111166864863421, "learning_rate": 1.0044398740511713e-06, "loss": 0.267, "step": 30441 }, { "epoch": 1.4260551833981356, "grad_norm": 0.5935985164254785, "learning_rate": 1.0042879062114775e-06, "loss": 0.2739, "step": 30442 }, { "epoch": 1.4261020283880639, "grad_norm": 0.6190153580428991, "learning_rate": 1.0041359469792317e-06, "loss": 0.2744, "step": 30443 }, { "epoch": 1.4261488733779921, "grad_norm": 0.5939679956234081, "learning_rate": 1.0039839963553066e-06, "loss": 0.2703, "step": 30444 }, { "epoch": 1.4261957183679206, "grad_norm": 0.5833031423774749, "learning_rate": 1.0038320543405774e-06, "loss": 0.2637, "step": 30445 }, { "epoch": 1.4262425633578488, "grad_norm": 0.6235860777568182, "learning_rate": 1.0036801209359199e-06, "loss": 0.2818, "step": 30446 }, { "epoch": 1.4262894083477773, "grad_norm": 0.5502729503644694, "learning_rate": 1.003528196142206e-06, "loss": 0.2495, "step": 30447 }, { "epoch": 1.4263362533377055, "grad_norm": 0.6436925192775247, "learning_rate": 1.0033762799603118e-06, "loss": 0.2775, "step": 30448 }, { "epoch": 1.426383098327634, "grad_norm": 0.5912579278100484, "learning_rate": 1.0032243723911104e-06, "loss": 0.2825, "step": 30449 }, { "epoch": 1.4264299433175622, "grad_norm": 0.5594365714143356, "learning_rate": 1.0030724734354776e-06, "loss": 0.2576, "step": 30450 }, { "epoch": 1.4264767883074905, "grad_norm": 0.6206946410321765, "learning_rate": 1.0029205830942855e-06, "loss": 0.2908, "step": 30451 }, { "epoch": 1.426523633297419, "grad_norm": 0.6999587779863593, "learning_rate": 1.00276870136841e-06, "loss": 0.3015, "step": 30452 }, { "epoch": 1.4265704782873472, "grad_norm": 0.5896796088726167, "learning_rate": 1.0026168282587237e-06, "loss": 0.2635, "step": 30453 }, { "epoch": 1.4266173232772754, "grad_norm": 0.5633445562513099, "learning_rate": 1.002464963766101e-06, "loss": 0.2676, "step": 30454 }, { "epoch": 1.4266641682672039, "grad_norm": 0.6103256158451147, "learning_rate": 1.0023131078914161e-06, "loss": 0.267, "step": 30455 }, { "epoch": 1.4267110132571321, "grad_norm": 0.638871025348931, "learning_rate": 1.0021612606355438e-06, "loss": 0.2892, "step": 30456 }, { "epoch": 1.4267578582470604, "grad_norm": 0.6350383275271353, "learning_rate": 1.0020094219993558e-06, "loss": 0.2794, "step": 30457 }, { "epoch": 1.4268047032369888, "grad_norm": 0.5910833974534692, "learning_rate": 1.0018575919837285e-06, "loss": 0.2777, "step": 30458 }, { "epoch": 1.426851548226917, "grad_norm": 0.5706560901818841, "learning_rate": 1.0017057705895329e-06, "loss": 0.2782, "step": 30459 }, { "epoch": 1.4268983932168455, "grad_norm": 0.6198728647446846, "learning_rate": 1.001553957817644e-06, "loss": 0.2659, "step": 30460 }, { "epoch": 1.4269452382067738, "grad_norm": 0.6194113051243437, "learning_rate": 1.0014021536689355e-06, "loss": 0.2725, "step": 30461 }, { "epoch": 1.4269920831967022, "grad_norm": 0.5662598457991832, "learning_rate": 1.0012503581442808e-06, "loss": 0.2723, "step": 30462 }, { "epoch": 1.4270389281866305, "grad_norm": 0.6219244484672317, "learning_rate": 1.0010985712445548e-06, "loss": 0.2882, "step": 30463 }, { "epoch": 1.4270857731765587, "grad_norm": 0.6156491422065283, "learning_rate": 1.0009467929706298e-06, "loss": 0.2721, "step": 30464 }, { "epoch": 1.4271326181664872, "grad_norm": 0.5886679988476874, "learning_rate": 1.0007950233233782e-06, "loss": 0.2588, "step": 30465 }, { "epoch": 1.4271794631564154, "grad_norm": 0.6099191901874557, "learning_rate": 1.0006432623036745e-06, "loss": 0.2683, "step": 30466 }, { "epoch": 1.4272263081463437, "grad_norm": 0.6709704060045191, "learning_rate": 1.000491509912392e-06, "loss": 0.2932, "step": 30467 }, { "epoch": 1.4272731531362721, "grad_norm": 0.5803266216285364, "learning_rate": 1.0003397661504036e-06, "loss": 0.2754, "step": 30468 }, { "epoch": 1.4273199981262004, "grad_norm": 0.6835055490324794, "learning_rate": 1.0001880310185833e-06, "loss": 0.296, "step": 30469 }, { "epoch": 1.4273668431161286, "grad_norm": 0.5720743241347369, "learning_rate": 1.0000363045178047e-06, "loss": 0.2472, "step": 30470 }, { "epoch": 1.427413688106057, "grad_norm": 0.5878304949893641, "learning_rate": 9.9988458664894e-07, "loss": 0.2711, "step": 30471 }, { "epoch": 1.4274605330959853, "grad_norm": 0.5778950542593744, "learning_rate": 9.99732877412862e-07, "loss": 0.2708, "step": 30472 }, { "epoch": 1.4275073780859138, "grad_norm": 0.6423886159806608, "learning_rate": 9.99581176810444e-07, "loss": 0.2847, "step": 30473 }, { "epoch": 1.427554223075842, "grad_norm": 0.5572359427565046, "learning_rate": 9.99429484842559e-07, "loss": 0.2555, "step": 30474 }, { "epoch": 1.4276010680657705, "grad_norm": 0.5895436190871717, "learning_rate": 9.992778015100804e-07, "loss": 0.2755, "step": 30475 }, { "epoch": 1.4276479130556987, "grad_norm": 0.5881165939549297, "learning_rate": 9.991261268138816e-07, "loss": 0.2714, "step": 30476 }, { "epoch": 1.427694758045627, "grad_norm": 0.5771908485333836, "learning_rate": 9.98974460754834e-07, "loss": 0.2745, "step": 30477 }, { "epoch": 1.4277416030355554, "grad_norm": 0.6408084263710424, "learning_rate": 9.988228033338118e-07, "loss": 0.2957, "step": 30478 }, { "epoch": 1.4277884480254837, "grad_norm": 0.6189024918655494, "learning_rate": 9.986711545516864e-07, "loss": 0.2799, "step": 30479 }, { "epoch": 1.427835293015412, "grad_norm": 0.6105172360685119, "learning_rate": 9.98519514409331e-07, "loss": 0.2694, "step": 30480 }, { "epoch": 1.4278821380053404, "grad_norm": 0.5992379667435422, "learning_rate": 9.983678829076185e-07, "loss": 0.2645, "step": 30481 }, { "epoch": 1.4279289829952686, "grad_norm": 0.6121806578217638, "learning_rate": 9.982162600474213e-07, "loss": 0.2809, "step": 30482 }, { "epoch": 1.427975827985197, "grad_norm": 0.62783863876218, "learning_rate": 9.98064645829613e-07, "loss": 0.2659, "step": 30483 }, { "epoch": 1.4280226729751253, "grad_norm": 0.5648730301941483, "learning_rate": 9.979130402550644e-07, "loss": 0.2691, "step": 30484 }, { "epoch": 1.4280695179650538, "grad_norm": 0.606331959966857, "learning_rate": 9.977614433246493e-07, "loss": 0.2734, "step": 30485 }, { "epoch": 1.428116362954982, "grad_norm": 0.580463440629306, "learning_rate": 9.97609855039239e-07, "loss": 0.2691, "step": 30486 }, { "epoch": 1.4281632079449103, "grad_norm": 0.5602984267348351, "learning_rate": 9.97458275399706e-07, "loss": 0.2651, "step": 30487 }, { "epoch": 1.4282100529348387, "grad_norm": 0.6249569570675054, "learning_rate": 9.973067044069235e-07, "loss": 0.2715, "step": 30488 }, { "epoch": 1.428256897924767, "grad_norm": 0.6082511066153325, "learning_rate": 9.971551420617636e-07, "loss": 0.2608, "step": 30489 }, { "epoch": 1.4283037429146952, "grad_norm": 0.635628516136112, "learning_rate": 9.970035883650975e-07, "loss": 0.2667, "step": 30490 }, { "epoch": 1.4283505879046237, "grad_norm": 0.6341710903445316, "learning_rate": 9.96852043317798e-07, "loss": 0.2863, "step": 30491 }, { "epoch": 1.428397432894552, "grad_norm": 0.633771581756624, "learning_rate": 9.96700506920738e-07, "loss": 0.2786, "step": 30492 }, { "epoch": 1.4284442778844801, "grad_norm": 0.6147970352571395, "learning_rate": 9.965489791747878e-07, "loss": 0.284, "step": 30493 }, { "epoch": 1.4284911228744086, "grad_norm": 0.5858343246641505, "learning_rate": 9.963974600808205e-07, "loss": 0.2586, "step": 30494 }, { "epoch": 1.4285379678643368, "grad_norm": 0.6075035200109051, "learning_rate": 9.962459496397087e-07, "loss": 0.2677, "step": 30495 }, { "epoch": 1.4285848128542653, "grad_norm": 0.5990258632332688, "learning_rate": 9.960944478523224e-07, "loss": 0.2711, "step": 30496 }, { "epoch": 1.4286316578441935, "grad_norm": 0.6538352693846796, "learning_rate": 9.959429547195348e-07, "loss": 0.2934, "step": 30497 }, { "epoch": 1.428678502834122, "grad_norm": 0.6717675105394073, "learning_rate": 9.957914702422176e-07, "loss": 0.2731, "step": 30498 }, { "epoch": 1.4287253478240503, "grad_norm": 0.593559602794528, "learning_rate": 9.956399944212431e-07, "loss": 0.2811, "step": 30499 }, { "epoch": 1.4287721928139785, "grad_norm": 0.571199775527896, "learning_rate": 9.954885272574813e-07, "loss": 0.2598, "step": 30500 }, { "epoch": 1.428819037803907, "grad_norm": 0.6009177063531222, "learning_rate": 9.95337068751806e-07, "loss": 0.2672, "step": 30501 }, { "epoch": 1.4288658827938352, "grad_norm": 0.6049162724474948, "learning_rate": 9.951856189050868e-07, "loss": 0.2649, "step": 30502 }, { "epoch": 1.4289127277837634, "grad_norm": 0.6156129186364078, "learning_rate": 9.95034177718196e-07, "loss": 0.2574, "step": 30503 }, { "epoch": 1.428959572773692, "grad_norm": 0.6002156346562919, "learning_rate": 9.948827451920054e-07, "loss": 0.2699, "step": 30504 }, { "epoch": 1.4290064177636201, "grad_norm": 0.5746716886988975, "learning_rate": 9.947313213273871e-07, "loss": 0.2562, "step": 30505 }, { "epoch": 1.4290532627535484, "grad_norm": 0.6440824363769858, "learning_rate": 9.945799061252109e-07, "loss": 0.2888, "step": 30506 }, { "epoch": 1.4291001077434768, "grad_norm": 0.5826150773648546, "learning_rate": 9.944284995863498e-07, "loss": 0.2702, "step": 30507 }, { "epoch": 1.429146952733405, "grad_norm": 0.5484540977947089, "learning_rate": 9.942771017116735e-07, "loss": 0.2473, "step": 30508 }, { "epoch": 1.4291937977233335, "grad_norm": 0.5946803189653849, "learning_rate": 9.94125712502054e-07, "loss": 0.2732, "step": 30509 }, { "epoch": 1.4292406427132618, "grad_norm": 0.6025693364232168, "learning_rate": 9.939743319583622e-07, "loss": 0.2723, "step": 30510 }, { "epoch": 1.4292874877031903, "grad_norm": 0.5686067212143854, "learning_rate": 9.9382296008147e-07, "loss": 0.2676, "step": 30511 }, { "epoch": 1.4293343326931185, "grad_norm": 0.6573615534159563, "learning_rate": 9.936715968722487e-07, "loss": 0.2842, "step": 30512 }, { "epoch": 1.4293811776830467, "grad_norm": 0.5789354320711977, "learning_rate": 9.93520242331569e-07, "loss": 0.2697, "step": 30513 }, { "epoch": 1.4294280226729752, "grad_norm": 0.6157796406961639, "learning_rate": 9.93368896460301e-07, "loss": 0.292, "step": 30514 }, { "epoch": 1.4294748676629034, "grad_norm": 0.605872554332781, "learning_rate": 9.932175592593157e-07, "loss": 0.2689, "step": 30515 }, { "epoch": 1.4295217126528317, "grad_norm": 0.566326401245861, "learning_rate": 9.93066230729485e-07, "loss": 0.2685, "step": 30516 }, { "epoch": 1.4295685576427601, "grad_norm": 0.585095181946555, "learning_rate": 9.929149108716792e-07, "loss": 0.2643, "step": 30517 }, { "epoch": 1.4296154026326884, "grad_norm": 0.6013263961502122, "learning_rate": 9.927635996867697e-07, "loss": 0.2754, "step": 30518 }, { "epoch": 1.4296622476226168, "grad_norm": 0.582757694659945, "learning_rate": 9.926122971756273e-07, "loss": 0.2652, "step": 30519 }, { "epoch": 1.429709092612545, "grad_norm": 0.5847537782727747, "learning_rate": 9.924610033391225e-07, "loss": 0.2665, "step": 30520 }, { "epoch": 1.4297559376024735, "grad_norm": 0.6195160830722035, "learning_rate": 9.923097181781246e-07, "loss": 0.289, "step": 30521 }, { "epoch": 1.4298027825924018, "grad_norm": 0.597337936178448, "learning_rate": 9.921584416935054e-07, "loss": 0.2879, "step": 30522 }, { "epoch": 1.42984962758233, "grad_norm": 0.57000019527092, "learning_rate": 9.920071738861357e-07, "loss": 0.2783, "step": 30523 }, { "epoch": 1.4298964725722585, "grad_norm": 0.6395666861938423, "learning_rate": 9.918559147568855e-07, "loss": 0.2863, "step": 30524 }, { "epoch": 1.4299433175621867, "grad_norm": 0.6025165340128822, "learning_rate": 9.91704664306626e-07, "loss": 0.2632, "step": 30525 }, { "epoch": 1.429990162552115, "grad_norm": 0.6255612890628661, "learning_rate": 9.915534225362267e-07, "loss": 0.2848, "step": 30526 }, { "epoch": 1.4300370075420434, "grad_norm": 0.5809744617864725, "learning_rate": 9.914021894465589e-07, "loss": 0.2664, "step": 30527 }, { "epoch": 1.4300838525319717, "grad_norm": 0.5685250076938764, "learning_rate": 9.912509650384916e-07, "loss": 0.2684, "step": 30528 }, { "epoch": 1.4301306975219, "grad_norm": 0.6460270208777845, "learning_rate": 9.910997493128956e-07, "loss": 0.265, "step": 30529 }, { "epoch": 1.4301775425118284, "grad_norm": 0.6118997007218212, "learning_rate": 9.909485422706414e-07, "loss": 0.2693, "step": 30530 }, { "epoch": 1.4302243875017566, "grad_norm": 0.56267606992978, "learning_rate": 9.907973439126e-07, "loss": 0.2653, "step": 30531 }, { "epoch": 1.430271232491685, "grad_norm": 0.6030960685012458, "learning_rate": 9.906461542396398e-07, "loss": 0.2602, "step": 30532 }, { "epoch": 1.4303180774816133, "grad_norm": 0.6096150062032962, "learning_rate": 9.904949732526313e-07, "loss": 0.2854, "step": 30533 }, { "epoch": 1.4303649224715418, "grad_norm": 0.5986962664717494, "learning_rate": 9.903438009524457e-07, "loss": 0.2777, "step": 30534 }, { "epoch": 1.43041176746147, "grad_norm": 0.5941385734936717, "learning_rate": 9.901926373399515e-07, "loss": 0.2808, "step": 30535 }, { "epoch": 1.4304586124513983, "grad_norm": 0.5917537333122797, "learning_rate": 9.90041482416019e-07, "loss": 0.2833, "step": 30536 }, { "epoch": 1.4305054574413267, "grad_norm": 0.6249757703303718, "learning_rate": 9.898903361815193e-07, "loss": 0.271, "step": 30537 }, { "epoch": 1.430552302431255, "grad_norm": 0.5899308378941907, "learning_rate": 9.8973919863732e-07, "loss": 0.2566, "step": 30538 }, { "epoch": 1.4305991474211832, "grad_norm": 0.5887270787075773, "learning_rate": 9.895880697842924e-07, "loss": 0.2575, "step": 30539 }, { "epoch": 1.4306459924111117, "grad_norm": 0.6275736490465859, "learning_rate": 9.894369496233056e-07, "loss": 0.2752, "step": 30540 }, { "epoch": 1.43069283740104, "grad_norm": 0.7854109144378447, "learning_rate": 9.892858381552303e-07, "loss": 0.2725, "step": 30541 }, { "epoch": 1.4307396823909682, "grad_norm": 0.5762837702766036, "learning_rate": 9.891347353809347e-07, "loss": 0.2695, "step": 30542 }, { "epoch": 1.4307865273808966, "grad_norm": 0.5524691946413507, "learning_rate": 9.889836413012897e-07, "loss": 0.2571, "step": 30543 }, { "epoch": 1.4308333723708249, "grad_norm": 0.6358615062359321, "learning_rate": 9.88832555917163e-07, "loss": 0.2912, "step": 30544 }, { "epoch": 1.4308802173607533, "grad_norm": 0.6063647480816949, "learning_rate": 9.886814792294252e-07, "loss": 0.2766, "step": 30545 }, { "epoch": 1.4309270623506816, "grad_norm": 0.5988708715902828, "learning_rate": 9.885304112389457e-07, "loss": 0.2652, "step": 30546 }, { "epoch": 1.43097390734061, "grad_norm": 0.6097139894817635, "learning_rate": 9.883793519465948e-07, "loss": 0.2979, "step": 30547 }, { "epoch": 1.4310207523305383, "grad_norm": 0.560687070042441, "learning_rate": 9.882283013532398e-07, "loss": 0.263, "step": 30548 }, { "epoch": 1.4310675973204665, "grad_norm": 0.589209415554287, "learning_rate": 9.880772594597518e-07, "loss": 0.2752, "step": 30549 }, { "epoch": 1.431114442310395, "grad_norm": 0.6564972073646477, "learning_rate": 9.879262262669983e-07, "loss": 0.2783, "step": 30550 }, { "epoch": 1.4311612873003232, "grad_norm": 0.5849130832511544, "learning_rate": 9.877752017758494e-07, "loss": 0.2987, "step": 30551 }, { "epoch": 1.4312081322902515, "grad_norm": 0.6176291516067611, "learning_rate": 9.876241859871737e-07, "loss": 0.2979, "step": 30552 }, { "epoch": 1.43125497728018, "grad_norm": 0.5542713882440843, "learning_rate": 9.874731789018412e-07, "loss": 0.2659, "step": 30553 }, { "epoch": 1.4313018222701082, "grad_norm": 0.5833868476969515, "learning_rate": 9.873221805207212e-07, "loss": 0.2601, "step": 30554 }, { "epoch": 1.4313486672600366, "grad_norm": 0.614751939697991, "learning_rate": 9.871711908446807e-07, "loss": 0.2648, "step": 30555 }, { "epoch": 1.4313955122499649, "grad_norm": 0.6317738404426274, "learning_rate": 9.87020209874591e-07, "loss": 0.2682, "step": 30556 }, { "epoch": 1.4314423572398933, "grad_norm": 0.6180915180768974, "learning_rate": 9.868692376113186e-07, "loss": 0.2836, "step": 30557 }, { "epoch": 1.4314892022298216, "grad_norm": 0.633386118973142, "learning_rate": 9.867182740557335e-07, "loss": 0.2774, "step": 30558 }, { "epoch": 1.4315360472197498, "grad_norm": 0.6163052321360777, "learning_rate": 9.865673192087044e-07, "loss": 0.2696, "step": 30559 }, { "epoch": 1.4315828922096783, "grad_norm": 0.6513836794953483, "learning_rate": 9.864163730711e-07, "loss": 0.2888, "step": 30560 }, { "epoch": 1.4316297371996065, "grad_norm": 0.6118897358745455, "learning_rate": 9.862654356437898e-07, "loss": 0.2636, "step": 30561 }, { "epoch": 1.4316765821895348, "grad_norm": 0.6117330286673504, "learning_rate": 9.861145069276415e-07, "loss": 0.2677, "step": 30562 }, { "epoch": 1.4317234271794632, "grad_norm": 0.6044898827567422, "learning_rate": 9.85963586923523e-07, "loss": 0.2797, "step": 30563 }, { "epoch": 1.4317702721693915, "grad_norm": 0.5835027723098157, "learning_rate": 9.858126756323034e-07, "loss": 0.2807, "step": 30564 }, { "epoch": 1.4318171171593197, "grad_norm": 0.6392866465191948, "learning_rate": 9.856617730548512e-07, "loss": 0.2777, "step": 30565 }, { "epoch": 1.4318639621492482, "grad_norm": 0.6370067002807593, "learning_rate": 9.855108791920352e-07, "loss": 0.2787, "step": 30566 }, { "epoch": 1.4319108071391764, "grad_norm": 0.6047065932993064, "learning_rate": 9.853599940447232e-07, "loss": 0.2699, "step": 30567 }, { "epoch": 1.4319576521291049, "grad_norm": 0.5635163121055373, "learning_rate": 9.852091176137845e-07, "loss": 0.2712, "step": 30568 }, { "epoch": 1.432004497119033, "grad_norm": 0.590988450444146, "learning_rate": 9.850582499000866e-07, "loss": 0.2751, "step": 30569 }, { "epoch": 1.4320513421089616, "grad_norm": 0.596281345905545, "learning_rate": 9.849073909044971e-07, "loss": 0.2704, "step": 30570 }, { "epoch": 1.4320981870988898, "grad_norm": 0.5755542465001015, "learning_rate": 9.847565406278845e-07, "loss": 0.2686, "step": 30571 }, { "epoch": 1.432145032088818, "grad_norm": 0.610129650271886, "learning_rate": 9.846056990711172e-07, "loss": 0.2758, "step": 30572 }, { "epoch": 1.4321918770787465, "grad_norm": 0.616860363299615, "learning_rate": 9.844548662350633e-07, "loss": 0.2718, "step": 30573 }, { "epoch": 1.4322387220686748, "grad_norm": 0.5789968522645915, "learning_rate": 9.843040421205918e-07, "loss": 0.2702, "step": 30574 }, { "epoch": 1.432285567058603, "grad_norm": 0.5537488290642458, "learning_rate": 9.841532267285684e-07, "loss": 0.2507, "step": 30575 }, { "epoch": 1.4323324120485315, "grad_norm": 0.5930810993561595, "learning_rate": 9.840024200598633e-07, "loss": 0.2859, "step": 30576 }, { "epoch": 1.4323792570384597, "grad_norm": 0.6237109328299412, "learning_rate": 9.83851622115342e-07, "loss": 0.2832, "step": 30577 }, { "epoch": 1.432426102028388, "grad_norm": 0.5927033871292373, "learning_rate": 9.837008328958738e-07, "loss": 0.2623, "step": 30578 }, { "epoch": 1.4324729470183164, "grad_norm": 0.5448203663090976, "learning_rate": 9.835500524023263e-07, "loss": 0.2614, "step": 30579 }, { "epoch": 1.4325197920082446, "grad_norm": 0.5783785817931671, "learning_rate": 9.83399280635568e-07, "loss": 0.2621, "step": 30580 }, { "epoch": 1.432566636998173, "grad_norm": 0.5938430660480108, "learning_rate": 9.832485175964646e-07, "loss": 0.2725, "step": 30581 }, { "epoch": 1.4326134819881013, "grad_norm": 0.5848785503634458, "learning_rate": 9.830977632858846e-07, "loss": 0.2636, "step": 30582 }, { "epoch": 1.4326603269780298, "grad_norm": 0.6338062409232432, "learning_rate": 9.829470177046968e-07, "loss": 0.2776, "step": 30583 }, { "epoch": 1.432707171967958, "grad_norm": 0.5650897559170057, "learning_rate": 9.827962808537669e-07, "loss": 0.2627, "step": 30584 }, { "epoch": 1.4327540169578863, "grad_norm": 0.5766029385920676, "learning_rate": 9.82645552733963e-07, "loss": 0.2745, "step": 30585 }, { "epoch": 1.4328008619478148, "grad_norm": 0.5893664720451813, "learning_rate": 9.82494833346153e-07, "loss": 0.2788, "step": 30586 }, { "epoch": 1.432847706937743, "grad_norm": 0.6343887625342998, "learning_rate": 9.823441226912033e-07, "loss": 0.2952, "step": 30587 }, { "epoch": 1.4328945519276712, "grad_norm": 0.5908174194237082, "learning_rate": 9.82193420769982e-07, "loss": 0.2657, "step": 30588 }, { "epoch": 1.4329413969175997, "grad_norm": 0.5910986520027132, "learning_rate": 9.820427275833558e-07, "loss": 0.2854, "step": 30589 }, { "epoch": 1.432988241907528, "grad_norm": 0.6091030548846357, "learning_rate": 9.81892043132193e-07, "loss": 0.2698, "step": 30590 }, { "epoch": 1.4330350868974562, "grad_norm": 0.5749353110122246, "learning_rate": 9.81741367417359e-07, "loss": 0.2672, "step": 30591 }, { "epoch": 1.4330819318873846, "grad_norm": 0.5780090350407927, "learning_rate": 9.81590700439723e-07, "loss": 0.2739, "step": 30592 }, { "epoch": 1.433128776877313, "grad_norm": 0.5840383818791463, "learning_rate": 9.814400422001497e-07, "loss": 0.2796, "step": 30593 }, { "epoch": 1.4331756218672413, "grad_norm": 0.5398861098096405, "learning_rate": 9.812893926995074e-07, "loss": 0.2464, "step": 30594 }, { "epoch": 1.4332224668571696, "grad_norm": 0.6025921170794002, "learning_rate": 9.811387519386627e-07, "loss": 0.2822, "step": 30595 }, { "epoch": 1.433269311847098, "grad_norm": 0.5990887626368113, "learning_rate": 9.80988119918484e-07, "loss": 0.2776, "step": 30596 }, { "epoch": 1.4333161568370263, "grad_norm": 0.5832881406338108, "learning_rate": 9.80837496639836e-07, "loss": 0.2732, "step": 30597 }, { "epoch": 1.4333630018269545, "grad_norm": 0.555067202289595, "learning_rate": 9.806868821035869e-07, "loss": 0.2569, "step": 30598 }, { "epoch": 1.433409846816883, "grad_norm": 0.6171240664117865, "learning_rate": 9.805362763106024e-07, "loss": 0.2796, "step": 30599 }, { "epoch": 1.4334566918068112, "grad_norm": 0.6044150190282858, "learning_rate": 9.803856792617495e-07, "loss": 0.288, "step": 30600 }, { "epoch": 1.4335035367967395, "grad_norm": 0.601656837505605, "learning_rate": 9.802350909578951e-07, "loss": 0.2833, "step": 30601 }, { "epoch": 1.433550381786668, "grad_norm": 0.5712699903332791, "learning_rate": 9.800845113999058e-07, "loss": 0.2532, "step": 30602 }, { "epoch": 1.4335972267765962, "grad_norm": 0.6348620286869169, "learning_rate": 9.79933940588649e-07, "loss": 0.2707, "step": 30603 }, { "epoch": 1.4336440717665246, "grad_norm": 0.5897578666456167, "learning_rate": 9.797833785249906e-07, "loss": 0.2581, "step": 30604 }, { "epoch": 1.4336909167564529, "grad_norm": 0.5739235861217777, "learning_rate": 9.796328252097955e-07, "loss": 0.2649, "step": 30605 }, { "epoch": 1.4337377617463813, "grad_norm": 0.5831348546282558, "learning_rate": 9.794822806439314e-07, "loss": 0.2634, "step": 30606 }, { "epoch": 1.4337846067363096, "grad_norm": 0.6046935657754069, "learning_rate": 9.793317448282647e-07, "loss": 0.2723, "step": 30607 }, { "epoch": 1.4338314517262378, "grad_norm": 0.5740635408883553, "learning_rate": 9.791812177636616e-07, "loss": 0.2766, "step": 30608 }, { "epoch": 1.4338782967161663, "grad_norm": 0.5473955014682778, "learning_rate": 9.790306994509884e-07, "loss": 0.2514, "step": 30609 }, { "epoch": 1.4339251417060945, "grad_norm": 0.6151290989189238, "learning_rate": 9.788801898911122e-07, "loss": 0.276, "step": 30610 }, { "epoch": 1.4339719866960228, "grad_norm": 0.5580921158422175, "learning_rate": 9.78729689084898e-07, "loss": 0.2639, "step": 30611 }, { "epoch": 1.4340188316859512, "grad_norm": 0.5811837534020442, "learning_rate": 9.785791970332112e-07, "loss": 0.2754, "step": 30612 }, { "epoch": 1.4340656766758795, "grad_norm": 0.5762319100546971, "learning_rate": 9.78428713736919e-07, "loss": 0.2661, "step": 30613 }, { "epoch": 1.4341125216658077, "grad_norm": 0.5884037282563265, "learning_rate": 9.782782391968868e-07, "loss": 0.2724, "step": 30614 }, { "epoch": 1.4341593666557362, "grad_norm": 0.5538500827241037, "learning_rate": 9.781277734139806e-07, "loss": 0.2583, "step": 30615 }, { "epoch": 1.4342062116456644, "grad_norm": 0.6263826932690768, "learning_rate": 9.77977316389068e-07, "loss": 0.2873, "step": 30616 }, { "epoch": 1.4342530566355929, "grad_norm": 0.5597517765492007, "learning_rate": 9.778268681230125e-07, "loss": 0.253, "step": 30617 }, { "epoch": 1.4342999016255211, "grad_norm": 0.5770297992182067, "learning_rate": 9.776764286166815e-07, "loss": 0.2644, "step": 30618 }, { "epoch": 1.4343467466154496, "grad_norm": 0.5853780203891575, "learning_rate": 9.775259978709389e-07, "loss": 0.2744, "step": 30619 }, { "epoch": 1.4343935916053778, "grad_norm": 0.5973636869212818, "learning_rate": 9.773755758866518e-07, "loss": 0.2789, "step": 30620 }, { "epoch": 1.434440436595306, "grad_norm": 0.5848888423994063, "learning_rate": 9.772251626646855e-07, "loss": 0.2706, "step": 30621 }, { "epoch": 1.4344872815852345, "grad_norm": 0.5717802747286616, "learning_rate": 9.77074758205906e-07, "loss": 0.2764, "step": 30622 }, { "epoch": 1.4345341265751628, "grad_norm": 0.6342767690829729, "learning_rate": 9.769243625111789e-07, "loss": 0.2947, "step": 30623 }, { "epoch": 1.434580971565091, "grad_norm": 0.5976414770965787, "learning_rate": 9.767739755813685e-07, "loss": 0.279, "step": 30624 }, { "epoch": 1.4346278165550195, "grad_norm": 0.6490208433524667, "learning_rate": 9.766235974173419e-07, "loss": 0.2884, "step": 30625 }, { "epoch": 1.4346746615449477, "grad_norm": 0.6015336384123212, "learning_rate": 9.764732280199628e-07, "loss": 0.2717, "step": 30626 }, { "epoch": 1.434721506534876, "grad_norm": 0.5613246033162033, "learning_rate": 9.763228673900973e-07, "loss": 0.2648, "step": 30627 }, { "epoch": 1.4347683515248044, "grad_norm": 0.5841326197874173, "learning_rate": 9.761725155286108e-07, "loss": 0.2819, "step": 30628 }, { "epoch": 1.4348151965147329, "grad_norm": 0.6259108135552107, "learning_rate": 9.760221724363694e-07, "loss": 0.268, "step": 30629 }, { "epoch": 1.4348620415046611, "grad_norm": 0.571750322651283, "learning_rate": 9.758718381142365e-07, "loss": 0.2787, "step": 30630 }, { "epoch": 1.4349088864945894, "grad_norm": 0.6062973558945188, "learning_rate": 9.757215125630778e-07, "loss": 0.2906, "step": 30631 }, { "epoch": 1.4349557314845178, "grad_norm": 0.5453500422221975, "learning_rate": 9.755711957837598e-07, "loss": 0.254, "step": 30632 }, { "epoch": 1.435002576474446, "grad_norm": 0.5512726715347903, "learning_rate": 9.754208877771456e-07, "loss": 0.2503, "step": 30633 }, { "epoch": 1.4350494214643743, "grad_norm": 0.5744487913176569, "learning_rate": 9.75270588544101e-07, "loss": 0.248, "step": 30634 }, { "epoch": 1.4350962664543028, "grad_norm": 0.6164942448708853, "learning_rate": 9.751202980854916e-07, "loss": 0.2789, "step": 30635 }, { "epoch": 1.435143111444231, "grad_norm": 0.5762269728306514, "learning_rate": 9.749700164021808e-07, "loss": 0.2481, "step": 30636 }, { "epoch": 1.4351899564341593, "grad_norm": 0.6230323939021615, "learning_rate": 9.748197434950343e-07, "loss": 0.2817, "step": 30637 }, { "epoch": 1.4352368014240877, "grad_norm": 0.5971528774212104, "learning_rate": 9.74669479364917e-07, "loss": 0.2733, "step": 30638 }, { "epoch": 1.435283646414016, "grad_norm": 0.5938466832904106, "learning_rate": 9.74519224012694e-07, "loss": 0.2735, "step": 30639 }, { "epoch": 1.4353304914039444, "grad_norm": 0.5707522716686969, "learning_rate": 9.743689774392289e-07, "loss": 0.2674, "step": 30640 }, { "epoch": 1.4353773363938727, "grad_norm": 0.5895944768235305, "learning_rate": 9.742187396453879e-07, "loss": 0.2606, "step": 30641 }, { "epoch": 1.4354241813838011, "grad_norm": 0.5543725082421213, "learning_rate": 9.740685106320334e-07, "loss": 0.2449, "step": 30642 }, { "epoch": 1.4354710263737294, "grad_norm": 0.5404052194663747, "learning_rate": 9.73918290400031e-07, "loss": 0.255, "step": 30643 }, { "epoch": 1.4355178713636576, "grad_norm": 0.6043205773942548, "learning_rate": 9.737680789502455e-07, "loss": 0.2654, "step": 30644 }, { "epoch": 1.435564716353586, "grad_norm": 0.5806899743271993, "learning_rate": 9.736178762835417e-07, "loss": 0.2777, "step": 30645 }, { "epoch": 1.4356115613435143, "grad_norm": 0.5755431794616546, "learning_rate": 9.734676824007827e-07, "loss": 0.277, "step": 30646 }, { "epoch": 1.4356584063334425, "grad_norm": 0.577013124215784, "learning_rate": 9.733174973028345e-07, "loss": 0.2801, "step": 30647 }, { "epoch": 1.435705251323371, "grad_norm": 0.6074355862064051, "learning_rate": 9.731673209905592e-07, "loss": 0.2777, "step": 30648 }, { "epoch": 1.4357520963132993, "grad_norm": 0.5939560821642124, "learning_rate": 9.730171534648226e-07, "loss": 0.2738, "step": 30649 }, { "epoch": 1.4357989413032275, "grad_norm": 0.6293469865765627, "learning_rate": 9.72866994726488e-07, "loss": 0.2868, "step": 30650 }, { "epoch": 1.435845786293156, "grad_norm": 0.6170403442697948, "learning_rate": 9.727168447764204e-07, "loss": 0.2725, "step": 30651 }, { "epoch": 1.4358926312830842, "grad_norm": 0.5613268164000561, "learning_rate": 9.725667036154842e-07, "loss": 0.2616, "step": 30652 }, { "epoch": 1.4359394762730127, "grad_norm": 0.6091881259697824, "learning_rate": 9.724165712445427e-07, "loss": 0.2741, "step": 30653 }, { "epoch": 1.435986321262941, "grad_norm": 0.597245474603728, "learning_rate": 9.722664476644592e-07, "loss": 0.2657, "step": 30654 }, { "epoch": 1.4360331662528694, "grad_norm": 0.5445422130931789, "learning_rate": 9.721163328760982e-07, "loss": 0.2591, "step": 30655 }, { "epoch": 1.4360800112427976, "grad_norm": 0.6063126211693048, "learning_rate": 9.719662268803237e-07, "loss": 0.2813, "step": 30656 }, { "epoch": 1.4361268562327258, "grad_norm": 0.5914894037111129, "learning_rate": 9.718161296779994e-07, "loss": 0.2753, "step": 30657 }, { "epoch": 1.4361737012226543, "grad_norm": 0.5696892885180838, "learning_rate": 9.716660412699893e-07, "loss": 0.2619, "step": 30658 }, { "epoch": 1.4362205462125825, "grad_norm": 0.6344210799801022, "learning_rate": 9.71515961657158e-07, "loss": 0.3088, "step": 30659 }, { "epoch": 1.4362673912025108, "grad_norm": 0.6031441309091512, "learning_rate": 9.713658908403677e-07, "loss": 0.2672, "step": 30660 }, { "epoch": 1.4363142361924393, "grad_norm": 0.5627978042642887, "learning_rate": 9.71215828820482e-07, "loss": 0.2686, "step": 30661 }, { "epoch": 1.4363610811823675, "grad_norm": 0.580178136023124, "learning_rate": 9.71065775598365e-07, "loss": 0.2642, "step": 30662 }, { "epoch": 1.4364079261722957, "grad_norm": 0.6541949178169233, "learning_rate": 9.709157311748798e-07, "loss": 0.2815, "step": 30663 }, { "epoch": 1.4364547711622242, "grad_norm": 0.6254671027030102, "learning_rate": 9.707656955508906e-07, "loss": 0.271, "step": 30664 }, { "epoch": 1.4365016161521527, "grad_norm": 0.6091073561408992, "learning_rate": 9.706156687272614e-07, "loss": 0.263, "step": 30665 }, { "epoch": 1.436548461142081, "grad_norm": 0.5767679955093366, "learning_rate": 9.704656507048536e-07, "loss": 0.2617, "step": 30666 }, { "epoch": 1.4365953061320091, "grad_norm": 0.5612259722391069, "learning_rate": 9.703156414845322e-07, "loss": 0.2604, "step": 30667 }, { "epoch": 1.4366421511219376, "grad_norm": 0.6074109024736278, "learning_rate": 9.701656410671592e-07, "loss": 0.2672, "step": 30668 }, { "epoch": 1.4366889961118658, "grad_norm": 0.5916789940454147, "learning_rate": 9.700156494535984e-07, "loss": 0.2674, "step": 30669 }, { "epoch": 1.436735841101794, "grad_norm": 0.5673410844092189, "learning_rate": 9.69865666644713e-07, "loss": 0.2692, "step": 30670 }, { "epoch": 1.4367826860917225, "grad_norm": 0.5901337916033245, "learning_rate": 9.69715692641367e-07, "loss": 0.2741, "step": 30671 }, { "epoch": 1.4368295310816508, "grad_norm": 0.6032897665629451, "learning_rate": 9.695657274444218e-07, "loss": 0.2623, "step": 30672 }, { "epoch": 1.436876376071579, "grad_norm": 0.5657784888614054, "learning_rate": 9.69415771054741e-07, "loss": 0.264, "step": 30673 }, { "epoch": 1.4369232210615075, "grad_norm": 0.5824699131965, "learning_rate": 9.692658234731886e-07, "loss": 0.2731, "step": 30674 }, { "epoch": 1.4369700660514357, "grad_norm": 0.5806284485655073, "learning_rate": 9.691158847006258e-07, "loss": 0.2622, "step": 30675 }, { "epoch": 1.4370169110413642, "grad_norm": 0.5794534114190774, "learning_rate": 9.689659547379162e-07, "loss": 0.27, "step": 30676 }, { "epoch": 1.4370637560312924, "grad_norm": 0.5896027793703416, "learning_rate": 9.688160335859238e-07, "loss": 0.2657, "step": 30677 }, { "epoch": 1.437110601021221, "grad_norm": 0.6059635059596364, "learning_rate": 9.686661212455093e-07, "loss": 0.2872, "step": 30678 }, { "epoch": 1.4371574460111491, "grad_norm": 0.6032506676310944, "learning_rate": 9.685162177175364e-07, "loss": 0.2673, "step": 30679 }, { "epoch": 1.4372042910010774, "grad_norm": 0.5887833111075783, "learning_rate": 9.683663230028679e-07, "loss": 0.2585, "step": 30680 }, { "epoch": 1.4372511359910058, "grad_norm": 0.6096594606798198, "learning_rate": 9.682164371023666e-07, "loss": 0.2699, "step": 30681 }, { "epoch": 1.437297980980934, "grad_norm": 0.6145359141980873, "learning_rate": 9.680665600168942e-07, "loss": 0.2797, "step": 30682 }, { "epoch": 1.4373448259708623, "grad_norm": 0.6335722290291524, "learning_rate": 9.67916691747315e-07, "loss": 0.2569, "step": 30683 }, { "epoch": 1.4373916709607908, "grad_norm": 0.6220218244931185, "learning_rate": 9.677668322944886e-07, "loss": 0.285, "step": 30684 }, { "epoch": 1.437438515950719, "grad_norm": 0.5886848336695619, "learning_rate": 9.676169816592793e-07, "loss": 0.2777, "step": 30685 }, { "epoch": 1.4374853609406473, "grad_norm": 0.6065066047205536, "learning_rate": 9.67467139842549e-07, "loss": 0.2704, "step": 30686 }, { "epoch": 1.4375322059305757, "grad_norm": 0.5884491168833187, "learning_rate": 9.673173068451611e-07, "loss": 0.2827, "step": 30687 }, { "epoch": 1.437579050920504, "grad_norm": 0.6069773563659725, "learning_rate": 9.671674826679761e-07, "loss": 0.2807, "step": 30688 }, { "epoch": 1.4376258959104324, "grad_norm": 0.5722410277698272, "learning_rate": 9.670176673118566e-07, "loss": 0.2635, "step": 30689 }, { "epoch": 1.4376727409003607, "grad_norm": 0.606135610252528, "learning_rate": 9.668678607776663e-07, "loss": 0.2753, "step": 30690 }, { "epoch": 1.4377195858902891, "grad_norm": 0.5791054322959591, "learning_rate": 9.667180630662653e-07, "loss": 0.2746, "step": 30691 }, { "epoch": 1.4377664308802174, "grad_norm": 0.5975365218388387, "learning_rate": 9.665682741785163e-07, "loss": 0.2781, "step": 30692 }, { "epoch": 1.4378132758701456, "grad_norm": 0.5902723873863909, "learning_rate": 9.664184941152815e-07, "loss": 0.2617, "step": 30693 }, { "epoch": 1.437860120860074, "grad_norm": 0.5765347381899559, "learning_rate": 9.662687228774236e-07, "loss": 0.2684, "step": 30694 }, { "epoch": 1.4379069658500023, "grad_norm": 0.5971777241912186, "learning_rate": 9.66118960465803e-07, "loss": 0.2785, "step": 30695 }, { "epoch": 1.4379538108399306, "grad_norm": 0.5902956530138033, "learning_rate": 9.659692068812832e-07, "loss": 0.272, "step": 30696 }, { "epoch": 1.438000655829859, "grad_norm": 0.6053861254584078, "learning_rate": 9.658194621247241e-07, "loss": 0.2692, "step": 30697 }, { "epoch": 1.4380475008197873, "grad_norm": 0.636025119245371, "learning_rate": 9.656697261969883e-07, "loss": 0.2728, "step": 30698 }, { "epoch": 1.4380943458097155, "grad_norm": 0.5671795560469808, "learning_rate": 9.655199990989376e-07, "loss": 0.2651, "step": 30699 }, { "epoch": 1.438141190799644, "grad_norm": 0.6080392078467228, "learning_rate": 9.653702808314336e-07, "loss": 0.2647, "step": 30700 }, { "epoch": 1.4381880357895724, "grad_norm": 0.5849759163344816, "learning_rate": 9.65220571395339e-07, "loss": 0.2672, "step": 30701 }, { "epoch": 1.4382348807795007, "grad_norm": 0.6215683737434694, "learning_rate": 9.65070870791514e-07, "loss": 0.266, "step": 30702 }, { "epoch": 1.438281725769429, "grad_norm": 0.6144162041392013, "learning_rate": 9.649211790208196e-07, "loss": 0.2795, "step": 30703 }, { "epoch": 1.4383285707593574, "grad_norm": 0.6189790794228854, "learning_rate": 9.647714960841182e-07, "loss": 0.2893, "step": 30704 }, { "epoch": 1.4383754157492856, "grad_norm": 0.5842033410841737, "learning_rate": 9.646218219822706e-07, "loss": 0.2673, "step": 30705 }, { "epoch": 1.4384222607392139, "grad_norm": 0.563028797215941, "learning_rate": 9.644721567161387e-07, "loss": 0.2582, "step": 30706 }, { "epoch": 1.4384691057291423, "grad_norm": 0.639909165843531, "learning_rate": 9.643225002865836e-07, "loss": 0.2802, "step": 30707 }, { "epoch": 1.4385159507190706, "grad_norm": 0.602414615705951, "learning_rate": 9.64172852694467e-07, "loss": 0.2789, "step": 30708 }, { "epoch": 1.4385627957089988, "grad_norm": 0.5738874837393507, "learning_rate": 9.640232139406502e-07, "loss": 0.2697, "step": 30709 }, { "epoch": 1.4386096406989273, "grad_norm": 0.5442226301791022, "learning_rate": 9.638735840259928e-07, "loss": 0.2575, "step": 30710 }, { "epoch": 1.4386564856888555, "grad_norm": 0.5853255405528265, "learning_rate": 9.637239629513568e-07, "loss": 0.2695, "step": 30711 }, { "epoch": 1.438703330678784, "grad_norm": 0.6089366979084546, "learning_rate": 9.635743507176028e-07, "loss": 0.2719, "step": 30712 }, { "epoch": 1.4387501756687122, "grad_norm": 0.5837176397222457, "learning_rate": 9.634247473255927e-07, "loss": 0.2753, "step": 30713 }, { "epoch": 1.4387970206586407, "grad_norm": 0.6031127791963177, "learning_rate": 9.632751527761878e-07, "loss": 0.2657, "step": 30714 }, { "epoch": 1.438843865648569, "grad_norm": 0.5618157054556009, "learning_rate": 9.631255670702472e-07, "loss": 0.2621, "step": 30715 }, { "epoch": 1.4388907106384972, "grad_norm": 0.556156764337022, "learning_rate": 9.629759902086336e-07, "loss": 0.2703, "step": 30716 }, { "epoch": 1.4389375556284256, "grad_norm": 0.6346691261898632, "learning_rate": 9.62826422192206e-07, "loss": 0.2928, "step": 30717 }, { "epoch": 1.4389844006183539, "grad_norm": 0.5455800852531163, "learning_rate": 9.62676863021826e-07, "loss": 0.2575, "step": 30718 }, { "epoch": 1.439031245608282, "grad_norm": 0.6291539390818176, "learning_rate": 9.625273126983542e-07, "loss": 0.2855, "step": 30719 }, { "epoch": 1.4390780905982106, "grad_norm": 0.6117035779005501, "learning_rate": 9.623777712226524e-07, "loss": 0.2938, "step": 30720 }, { "epoch": 1.4391249355881388, "grad_norm": 0.6766551748988493, "learning_rate": 9.62228238595579e-07, "loss": 0.2874, "step": 30721 }, { "epoch": 1.439171780578067, "grad_norm": 0.5670876517405972, "learning_rate": 9.620787148179953e-07, "loss": 0.2666, "step": 30722 }, { "epoch": 1.4392186255679955, "grad_norm": 0.6450969380020763, "learning_rate": 9.619291998907631e-07, "loss": 0.286, "step": 30723 }, { "epoch": 1.4392654705579238, "grad_norm": 0.6098072516017748, "learning_rate": 9.61779693814741e-07, "loss": 0.2837, "step": 30724 }, { "epoch": 1.4393123155478522, "grad_norm": 0.5387786941838941, "learning_rate": 9.616301965907901e-07, "loss": 0.2576, "step": 30725 }, { "epoch": 1.4393591605377805, "grad_norm": 0.5916992935368908, "learning_rate": 9.61480708219772e-07, "loss": 0.2721, "step": 30726 }, { "epoch": 1.439406005527709, "grad_norm": 0.6074827134586848, "learning_rate": 9.613312287025442e-07, "loss": 0.271, "step": 30727 }, { "epoch": 1.4394528505176372, "grad_norm": 0.6172970146108061, "learning_rate": 9.611817580399688e-07, "loss": 0.2938, "step": 30728 }, { "epoch": 1.4394996955075654, "grad_norm": 0.5687239656046184, "learning_rate": 9.610322962329057e-07, "loss": 0.2705, "step": 30729 }, { "epoch": 1.4395465404974939, "grad_norm": 0.5966709895537651, "learning_rate": 9.608828432822156e-07, "loss": 0.2605, "step": 30730 }, { "epoch": 1.439593385487422, "grad_norm": 0.6702818723289476, "learning_rate": 9.607333991887572e-07, "loss": 0.2728, "step": 30731 }, { "epoch": 1.4396402304773503, "grad_norm": 0.6506410207610562, "learning_rate": 9.60583963953392e-07, "loss": 0.2883, "step": 30732 }, { "epoch": 1.4396870754672788, "grad_norm": 0.5752413156973563, "learning_rate": 9.604345375769783e-07, "loss": 0.2645, "step": 30733 }, { "epoch": 1.439733920457207, "grad_norm": 0.5765999128110324, "learning_rate": 9.602851200603769e-07, "loss": 0.2731, "step": 30734 }, { "epoch": 1.4397807654471353, "grad_norm": 0.6135736691546864, "learning_rate": 9.601357114044476e-07, "loss": 0.2778, "step": 30735 }, { "epoch": 1.4398276104370638, "grad_norm": 0.6007102977254005, "learning_rate": 9.59986311610051e-07, "loss": 0.2656, "step": 30736 }, { "epoch": 1.4398744554269922, "grad_norm": 0.583356360644601, "learning_rate": 9.598369206780456e-07, "loss": 0.2657, "step": 30737 }, { "epoch": 1.4399213004169205, "grad_norm": 0.5694114763794671, "learning_rate": 9.596875386092922e-07, "loss": 0.2596, "step": 30738 }, { "epoch": 1.4399681454068487, "grad_norm": 0.60170789334449, "learning_rate": 9.59538165404649e-07, "loss": 0.275, "step": 30739 }, { "epoch": 1.4400149903967772, "grad_norm": 0.608053162103291, "learning_rate": 9.593888010649765e-07, "loss": 0.266, "step": 30740 }, { "epoch": 1.4400618353867054, "grad_norm": 0.6526399020188365, "learning_rate": 9.592394455911345e-07, "loss": 0.2942, "step": 30741 }, { "epoch": 1.4401086803766336, "grad_norm": 0.5607332613915842, "learning_rate": 9.590900989839818e-07, "loss": 0.2517, "step": 30742 }, { "epoch": 1.440155525366562, "grad_norm": 0.5997432563735288, "learning_rate": 9.589407612443794e-07, "loss": 0.2814, "step": 30743 }, { "epoch": 1.4402023703564903, "grad_norm": 0.6223430459682586, "learning_rate": 9.587914323731856e-07, "loss": 0.2761, "step": 30744 }, { "epoch": 1.4402492153464186, "grad_norm": 0.5838528934894024, "learning_rate": 9.586421123712589e-07, "loss": 0.2555, "step": 30745 }, { "epoch": 1.440296060336347, "grad_norm": 0.5987158480613675, "learning_rate": 9.584928012394592e-07, "loss": 0.2729, "step": 30746 }, { "epoch": 1.4403429053262753, "grad_norm": 0.5661154432571025, "learning_rate": 9.58343498978646e-07, "loss": 0.2586, "step": 30747 }, { "epoch": 1.4403897503162038, "grad_norm": 0.5654410502274985, "learning_rate": 9.581942055896788e-07, "loss": 0.2792, "step": 30748 }, { "epoch": 1.440436595306132, "grad_norm": 0.6009686052546809, "learning_rate": 9.580449210734163e-07, "loss": 0.275, "step": 30749 }, { "epoch": 1.4404834402960605, "grad_norm": 0.601705929385633, "learning_rate": 9.578956454307183e-07, "loss": 0.2718, "step": 30750 }, { "epoch": 1.4405302852859887, "grad_norm": 0.6019168916038363, "learning_rate": 9.577463786624436e-07, "loss": 0.2884, "step": 30751 }, { "epoch": 1.440577130275917, "grad_norm": 0.5848706529602856, "learning_rate": 9.575971207694501e-07, "loss": 0.262, "step": 30752 }, { "epoch": 1.4406239752658454, "grad_norm": 0.585104756494361, "learning_rate": 9.574478717525973e-07, "loss": 0.2595, "step": 30753 }, { "epoch": 1.4406708202557736, "grad_norm": 0.6047149285102746, "learning_rate": 9.572986316127442e-07, "loss": 0.2666, "step": 30754 }, { "epoch": 1.4407176652457019, "grad_norm": 0.6128792637473561, "learning_rate": 9.571494003507497e-07, "loss": 0.2848, "step": 30755 }, { "epoch": 1.4407645102356303, "grad_norm": 0.5635094298813509, "learning_rate": 9.570001779674728e-07, "loss": 0.2557, "step": 30756 }, { "epoch": 1.4408113552255586, "grad_norm": 0.6576330504879984, "learning_rate": 9.56850964463773e-07, "loss": 0.2853, "step": 30757 }, { "epoch": 1.4408582002154868, "grad_norm": 0.5935878339741034, "learning_rate": 9.567017598405078e-07, "loss": 0.2676, "step": 30758 }, { "epoch": 1.4409050452054153, "grad_norm": 0.6032441066346133, "learning_rate": 9.565525640985355e-07, "loss": 0.2835, "step": 30759 }, { "epoch": 1.4409518901953435, "grad_norm": 0.5868741127327677, "learning_rate": 9.56403377238715e-07, "loss": 0.2722, "step": 30760 }, { "epoch": 1.440998735185272, "grad_norm": 0.5872582425453066, "learning_rate": 9.562541992619052e-07, "loss": 0.2762, "step": 30761 }, { "epoch": 1.4410455801752002, "grad_norm": 0.5549689739965693, "learning_rate": 9.561050301689645e-07, "loss": 0.2536, "step": 30762 }, { "epoch": 1.4410924251651287, "grad_norm": 0.59908665370992, "learning_rate": 9.559558699607521e-07, "loss": 0.2643, "step": 30763 }, { "epoch": 1.441139270155057, "grad_norm": 0.5462156880090663, "learning_rate": 9.558067186381248e-07, "loss": 0.263, "step": 30764 }, { "epoch": 1.4411861151449852, "grad_norm": 0.5889501173380958, "learning_rate": 9.556575762019427e-07, "loss": 0.2768, "step": 30765 }, { "epoch": 1.4412329601349136, "grad_norm": 0.6236261641095675, "learning_rate": 9.55508442653062e-07, "loss": 0.274, "step": 30766 }, { "epoch": 1.4412798051248419, "grad_norm": 0.5882005916194825, "learning_rate": 9.55359317992342e-07, "loss": 0.27, "step": 30767 }, { "epoch": 1.4413266501147701, "grad_norm": 0.5692464501513751, "learning_rate": 9.55210202220641e-07, "loss": 0.2648, "step": 30768 }, { "epoch": 1.4413734951046986, "grad_norm": 0.5692755129679387, "learning_rate": 9.550610953388178e-07, "loss": 0.264, "step": 30769 }, { "epoch": 1.4414203400946268, "grad_norm": 0.6292145075583326, "learning_rate": 9.549119973477288e-07, "loss": 0.282, "step": 30770 }, { "epoch": 1.441467185084555, "grad_norm": 0.5944144452547605, "learning_rate": 9.547629082482332e-07, "loss": 0.2787, "step": 30771 }, { "epoch": 1.4415140300744835, "grad_norm": 0.6064730583554342, "learning_rate": 9.546138280411892e-07, "loss": 0.2734, "step": 30772 }, { "epoch": 1.441560875064412, "grad_norm": 0.5960538771185265, "learning_rate": 9.544647567274537e-07, "loss": 0.2695, "step": 30773 }, { "epoch": 1.4416077200543402, "grad_norm": 0.6277788153549755, "learning_rate": 9.543156943078847e-07, "loss": 0.2938, "step": 30774 }, { "epoch": 1.4416545650442685, "grad_norm": 0.5593910494911516, "learning_rate": 9.541666407833414e-07, "loss": 0.252, "step": 30775 }, { "epoch": 1.441701410034197, "grad_norm": 0.6171206592517501, "learning_rate": 9.540175961546796e-07, "loss": 0.2788, "step": 30776 }, { "epoch": 1.4417482550241252, "grad_norm": 0.6159809525546203, "learning_rate": 9.538685604227581e-07, "loss": 0.2923, "step": 30777 }, { "epoch": 1.4417951000140534, "grad_norm": 0.5824449898508856, "learning_rate": 9.537195335884345e-07, "loss": 0.2638, "step": 30778 }, { "epoch": 1.4418419450039819, "grad_norm": 0.6001183783334239, "learning_rate": 9.53570515652567e-07, "loss": 0.2668, "step": 30779 }, { "epoch": 1.4418887899939101, "grad_norm": 0.5716057623061536, "learning_rate": 9.534215066160118e-07, "loss": 0.2746, "step": 30780 }, { "epoch": 1.4419356349838384, "grad_norm": 0.5797140505327759, "learning_rate": 9.53272506479628e-07, "loss": 0.2672, "step": 30781 }, { "epoch": 1.4419824799737668, "grad_norm": 0.5519465625663187, "learning_rate": 9.531235152442711e-07, "loss": 0.2615, "step": 30782 }, { "epoch": 1.442029324963695, "grad_norm": 0.6411960514103585, "learning_rate": 9.529745329107998e-07, "loss": 0.2676, "step": 30783 }, { "epoch": 1.4420761699536235, "grad_norm": 0.5598487506239606, "learning_rate": 9.528255594800711e-07, "loss": 0.2676, "step": 30784 }, { "epoch": 1.4421230149435518, "grad_norm": 0.5905461424298418, "learning_rate": 9.526765949529432e-07, "loss": 0.2826, "step": 30785 }, { "epoch": 1.4421698599334802, "grad_norm": 0.6249434884427306, "learning_rate": 9.525276393302721e-07, "loss": 0.282, "step": 30786 }, { "epoch": 1.4422167049234085, "grad_norm": 0.5819539954395728, "learning_rate": 9.523786926129161e-07, "loss": 0.2688, "step": 30787 }, { "epoch": 1.4422635499133367, "grad_norm": 0.5829406009710429, "learning_rate": 9.522297548017307e-07, "loss": 0.2703, "step": 30788 }, { "epoch": 1.4423103949032652, "grad_norm": 0.6337506917453731, "learning_rate": 9.520808258975742e-07, "loss": 0.2645, "step": 30789 }, { "epoch": 1.4423572398931934, "grad_norm": 0.6057159797868485, "learning_rate": 9.519319059013036e-07, "loss": 0.2849, "step": 30790 }, { "epoch": 1.4424040848831217, "grad_norm": 0.571230805583411, "learning_rate": 9.517829948137758e-07, "loss": 0.2729, "step": 30791 }, { "epoch": 1.4424509298730501, "grad_norm": 0.5958805085302485, "learning_rate": 9.516340926358486e-07, "loss": 0.2638, "step": 30792 }, { "epoch": 1.4424977748629784, "grad_norm": 0.6210072855501128, "learning_rate": 9.514851993683777e-07, "loss": 0.2597, "step": 30793 }, { "epoch": 1.4425446198529066, "grad_norm": 0.6215264620148004, "learning_rate": 9.513363150122198e-07, "loss": 0.2847, "step": 30794 }, { "epoch": 1.442591464842835, "grad_norm": 0.6227623991096259, "learning_rate": 9.511874395682319e-07, "loss": 0.2828, "step": 30795 }, { "epoch": 1.4426383098327633, "grad_norm": 0.689594611096841, "learning_rate": 9.510385730372712e-07, "loss": 0.299, "step": 30796 }, { "epoch": 1.4426851548226918, "grad_norm": 0.6450950057118704, "learning_rate": 9.50889715420194e-07, "loss": 0.2678, "step": 30797 }, { "epoch": 1.44273199981262, "grad_norm": 0.5643456052440802, "learning_rate": 9.507408667178575e-07, "loss": 0.2686, "step": 30798 }, { "epoch": 1.4427788448025485, "grad_norm": 0.6082145923504548, "learning_rate": 9.505920269311184e-07, "loss": 0.257, "step": 30799 }, { "epoch": 1.4428256897924767, "grad_norm": 0.5647204332518254, "learning_rate": 9.504431960608329e-07, "loss": 0.2658, "step": 30800 }, { "epoch": 1.442872534782405, "grad_norm": 0.5771298015366366, "learning_rate": 9.502943741078566e-07, "loss": 0.2742, "step": 30801 }, { "epoch": 1.4429193797723334, "grad_norm": 0.6455130786051632, "learning_rate": 9.501455610730464e-07, "loss": 0.2849, "step": 30802 }, { "epoch": 1.4429662247622617, "grad_norm": 0.5736785994704171, "learning_rate": 9.49996756957259e-07, "loss": 0.2752, "step": 30803 }, { "epoch": 1.44301306975219, "grad_norm": 0.6159586930468757, "learning_rate": 9.498479617613509e-07, "loss": 0.2678, "step": 30804 }, { "epoch": 1.4430599147421184, "grad_norm": 0.5855468557382703, "learning_rate": 9.49699175486179e-07, "loss": 0.2659, "step": 30805 }, { "epoch": 1.4431067597320466, "grad_norm": 0.5764532130588093, "learning_rate": 9.495503981325977e-07, "loss": 0.2713, "step": 30806 }, { "epoch": 1.4431536047219748, "grad_norm": 0.5910724288590803, "learning_rate": 9.494016297014649e-07, "loss": 0.277, "step": 30807 }, { "epoch": 1.4432004497119033, "grad_norm": 0.6098064161866245, "learning_rate": 9.492528701936354e-07, "loss": 0.283, "step": 30808 }, { "epoch": 1.4432472947018318, "grad_norm": 0.5778799636690908, "learning_rate": 9.491041196099656e-07, "loss": 0.2659, "step": 30809 }, { "epoch": 1.44329413969176, "grad_norm": 0.6054014041305135, "learning_rate": 9.489553779513122e-07, "loss": 0.2769, "step": 30810 }, { "epoch": 1.4433409846816883, "grad_norm": 0.6056345446465767, "learning_rate": 9.488066452185313e-07, "loss": 0.297, "step": 30811 }, { "epoch": 1.4433878296716167, "grad_norm": 0.6127172902647235, "learning_rate": 9.486579214124775e-07, "loss": 0.2611, "step": 30812 }, { "epoch": 1.443434674661545, "grad_norm": 0.5422764669468063, "learning_rate": 9.485092065340073e-07, "loss": 0.2532, "step": 30813 }, { "epoch": 1.4434815196514732, "grad_norm": 0.6128510183862894, "learning_rate": 9.48360500583978e-07, "loss": 0.2832, "step": 30814 }, { "epoch": 1.4435283646414017, "grad_norm": 0.5754648646167427, "learning_rate": 9.482118035632426e-07, "loss": 0.2673, "step": 30815 }, { "epoch": 1.44357520963133, "grad_norm": 0.558285292884149, "learning_rate": 9.480631154726586e-07, "loss": 0.2647, "step": 30816 }, { "epoch": 1.4436220546212581, "grad_norm": 0.5671544421002637, "learning_rate": 9.479144363130821e-07, "loss": 0.2584, "step": 30817 }, { "epoch": 1.4436688996111866, "grad_norm": 0.5945758908366824, "learning_rate": 9.47765766085367e-07, "loss": 0.271, "step": 30818 }, { "epoch": 1.4437157446011148, "grad_norm": 0.5801556908014578, "learning_rate": 9.476171047903696e-07, "loss": 0.2691, "step": 30819 }, { "epoch": 1.4437625895910433, "grad_norm": 0.5949139005990018, "learning_rate": 9.47468452428946e-07, "loss": 0.2919, "step": 30820 }, { "epoch": 1.4438094345809716, "grad_norm": 0.5558183677762317, "learning_rate": 9.473198090019517e-07, "loss": 0.2505, "step": 30821 }, { "epoch": 1.4438562795709, "grad_norm": 0.6562968076878585, "learning_rate": 9.471711745102411e-07, "loss": 0.2827, "step": 30822 }, { "epoch": 1.4439031245608283, "grad_norm": 0.636655950075526, "learning_rate": 9.470225489546697e-07, "loss": 0.2788, "step": 30823 }, { "epoch": 1.4439499695507565, "grad_norm": 0.5784842302573929, "learning_rate": 9.468739323360945e-07, "loss": 0.26, "step": 30824 }, { "epoch": 1.443996814540685, "grad_norm": 0.6127646792561006, "learning_rate": 9.467253246553684e-07, "loss": 0.2659, "step": 30825 }, { "epoch": 1.4440436595306132, "grad_norm": 0.5593889593202404, "learning_rate": 9.465767259133476e-07, "loss": 0.2658, "step": 30826 }, { "epoch": 1.4440905045205414, "grad_norm": 0.5672891445375322, "learning_rate": 9.464281361108882e-07, "loss": 0.2528, "step": 30827 }, { "epoch": 1.44413734951047, "grad_norm": 0.6194830922778285, "learning_rate": 9.462795552488436e-07, "loss": 0.2774, "step": 30828 }, { "epoch": 1.4441841945003981, "grad_norm": 0.5765570806917136, "learning_rate": 9.461309833280696e-07, "loss": 0.2684, "step": 30829 }, { "epoch": 1.4442310394903264, "grad_norm": 0.6003357884609323, "learning_rate": 9.459824203494223e-07, "loss": 0.281, "step": 30830 }, { "epoch": 1.4442778844802548, "grad_norm": 0.6071237025681263, "learning_rate": 9.458338663137545e-07, "loss": 0.29, "step": 30831 }, { "epoch": 1.444324729470183, "grad_norm": 0.5900275679902912, "learning_rate": 9.456853212219222e-07, "loss": 0.2637, "step": 30832 }, { "epoch": 1.4443715744601116, "grad_norm": 0.5877471987524995, "learning_rate": 9.455367850747801e-07, "loss": 0.2759, "step": 30833 }, { "epoch": 1.4444184194500398, "grad_norm": 0.6092902114402701, "learning_rate": 9.453882578731843e-07, "loss": 0.2902, "step": 30834 }, { "epoch": 1.4444652644399683, "grad_norm": 0.594981864727516, "learning_rate": 9.452397396179871e-07, "loss": 0.2619, "step": 30835 }, { "epoch": 1.4445121094298965, "grad_norm": 0.6465289632350921, "learning_rate": 9.450912303100454e-07, "loss": 0.2828, "step": 30836 }, { "epoch": 1.4445589544198247, "grad_norm": 0.5884988294631791, "learning_rate": 9.449427299502123e-07, "loss": 0.2593, "step": 30837 }, { "epoch": 1.4446057994097532, "grad_norm": 0.6045455020522275, "learning_rate": 9.447942385393427e-07, "loss": 0.2702, "step": 30838 }, { "epoch": 1.4446526443996814, "grad_norm": 0.595271359841523, "learning_rate": 9.446457560782912e-07, "loss": 0.2746, "step": 30839 }, { "epoch": 1.4446994893896097, "grad_norm": 0.5982005088150552, "learning_rate": 9.444972825679127e-07, "loss": 0.2744, "step": 30840 }, { "epoch": 1.4447463343795381, "grad_norm": 0.629771240674017, "learning_rate": 9.44348818009062e-07, "loss": 0.2711, "step": 30841 }, { "epoch": 1.4447931793694664, "grad_norm": 0.603564553591856, "learning_rate": 9.44200362402593e-07, "loss": 0.2729, "step": 30842 }, { "epoch": 1.4448400243593946, "grad_norm": 0.5590742203950239, "learning_rate": 9.440519157493591e-07, "loss": 0.2535, "step": 30843 }, { "epoch": 1.444886869349323, "grad_norm": 0.5783889794392649, "learning_rate": 9.439034780502154e-07, "loss": 0.2694, "step": 30844 }, { "epoch": 1.4449337143392516, "grad_norm": 0.5402316609050782, "learning_rate": 9.43755049306016e-07, "loss": 0.2698, "step": 30845 }, { "epoch": 1.4449805593291798, "grad_norm": 0.5933105490501416, "learning_rate": 9.436066295176149e-07, "loss": 0.2857, "step": 30846 }, { "epoch": 1.445027404319108, "grad_norm": 0.5857169844078295, "learning_rate": 9.434582186858668e-07, "loss": 0.2714, "step": 30847 }, { "epoch": 1.4450742493090365, "grad_norm": 0.6656711166737479, "learning_rate": 9.433098168116262e-07, "loss": 0.2838, "step": 30848 }, { "epoch": 1.4451210942989647, "grad_norm": 0.6117705318485575, "learning_rate": 9.431614238957462e-07, "loss": 0.2835, "step": 30849 }, { "epoch": 1.445167939288893, "grad_norm": 0.5853147207337993, "learning_rate": 9.430130399390802e-07, "loss": 0.2716, "step": 30850 }, { "epoch": 1.4452147842788214, "grad_norm": 0.5924245532082839, "learning_rate": 9.428646649424827e-07, "loss": 0.2711, "step": 30851 }, { "epoch": 1.4452616292687497, "grad_norm": 0.6036249990532583, "learning_rate": 9.427162989068078e-07, "loss": 0.2901, "step": 30852 }, { "epoch": 1.445308474258678, "grad_norm": 0.581179309552143, "learning_rate": 9.425679418329093e-07, "loss": 0.2712, "step": 30853 }, { "epoch": 1.4453553192486064, "grad_norm": 0.5854014617489353, "learning_rate": 9.424195937216413e-07, "loss": 0.278, "step": 30854 }, { "epoch": 1.4454021642385346, "grad_norm": 0.6055508479768443, "learning_rate": 9.422712545738566e-07, "loss": 0.2695, "step": 30855 }, { "epoch": 1.445449009228463, "grad_norm": 0.6010991335393234, "learning_rate": 9.421229243904099e-07, "loss": 0.2835, "step": 30856 }, { "epoch": 1.4454958542183913, "grad_norm": 0.6136501603812594, "learning_rate": 9.419746031721535e-07, "loss": 0.2777, "step": 30857 }, { "epoch": 1.4455426992083198, "grad_norm": 0.5627208026339536, "learning_rate": 9.418262909199416e-07, "loss": 0.2647, "step": 30858 }, { "epoch": 1.445589544198248, "grad_norm": 0.5691066782569404, "learning_rate": 9.416779876346277e-07, "loss": 0.2651, "step": 30859 }, { "epoch": 1.4456363891881763, "grad_norm": 0.609480185794394, "learning_rate": 9.415296933170665e-07, "loss": 0.2673, "step": 30860 }, { "epoch": 1.4456832341781047, "grad_norm": 0.597627232313029, "learning_rate": 9.413814079681091e-07, "loss": 0.2695, "step": 30861 }, { "epoch": 1.445730079168033, "grad_norm": 0.6063341107227659, "learning_rate": 9.412331315886098e-07, "loss": 0.2718, "step": 30862 }, { "epoch": 1.4457769241579612, "grad_norm": 0.6403039395956852, "learning_rate": 9.410848641794232e-07, "loss": 0.277, "step": 30863 }, { "epoch": 1.4458237691478897, "grad_norm": 0.5919771243617493, "learning_rate": 9.409366057414004e-07, "loss": 0.2802, "step": 30864 }, { "epoch": 1.445870614137818, "grad_norm": 0.6053801451672358, "learning_rate": 9.407883562753956e-07, "loss": 0.2639, "step": 30865 }, { "epoch": 1.4459174591277462, "grad_norm": 0.5781804924005332, "learning_rate": 9.406401157822625e-07, "loss": 0.2696, "step": 30866 }, { "epoch": 1.4459643041176746, "grad_norm": 0.5560330378318171, "learning_rate": 9.404918842628533e-07, "loss": 0.263, "step": 30867 }, { "epoch": 1.4460111491076029, "grad_norm": 0.5981441259756644, "learning_rate": 9.403436617180208e-07, "loss": 0.2766, "step": 30868 }, { "epoch": 1.4460579940975313, "grad_norm": 0.6438992795292513, "learning_rate": 9.401954481486189e-07, "loss": 0.2694, "step": 30869 }, { "epoch": 1.4461048390874596, "grad_norm": 0.5918008225224222, "learning_rate": 9.400472435555008e-07, "loss": 0.2749, "step": 30870 }, { "epoch": 1.446151684077388, "grad_norm": 0.6112139690114801, "learning_rate": 9.398990479395179e-07, "loss": 0.2845, "step": 30871 }, { "epoch": 1.4461985290673163, "grad_norm": 0.5857903360381129, "learning_rate": 9.397508613015249e-07, "loss": 0.2679, "step": 30872 }, { "epoch": 1.4462453740572445, "grad_norm": 0.5811661544101941, "learning_rate": 9.396026836423725e-07, "loss": 0.2642, "step": 30873 }, { "epoch": 1.446292219047173, "grad_norm": 0.5841188190398272, "learning_rate": 9.394545149629145e-07, "loss": 0.2649, "step": 30874 }, { "epoch": 1.4463390640371012, "grad_norm": 0.5757026529216784, "learning_rate": 9.393063552640036e-07, "loss": 0.2526, "step": 30875 }, { "epoch": 1.4463859090270295, "grad_norm": 0.6196633100619255, "learning_rate": 9.391582045464931e-07, "loss": 0.2755, "step": 30876 }, { "epoch": 1.446432754016958, "grad_norm": 0.5693335770537765, "learning_rate": 9.390100628112342e-07, "loss": 0.2655, "step": 30877 }, { "epoch": 1.4464795990068862, "grad_norm": 0.5995681622014911, "learning_rate": 9.388619300590806e-07, "loss": 0.2671, "step": 30878 }, { "epoch": 1.4465264439968144, "grad_norm": 0.5890814670811647, "learning_rate": 9.387138062908835e-07, "loss": 0.2724, "step": 30879 }, { "epoch": 1.4465732889867429, "grad_norm": 0.6074428779301466, "learning_rate": 9.385656915074961e-07, "loss": 0.2691, "step": 30880 }, { "epoch": 1.4466201339766713, "grad_norm": 0.650260757433458, "learning_rate": 9.384175857097704e-07, "loss": 0.2734, "step": 30881 }, { "epoch": 1.4466669789665996, "grad_norm": 0.6084160629676499, "learning_rate": 9.382694888985594e-07, "loss": 0.2615, "step": 30882 }, { "epoch": 1.4467138239565278, "grad_norm": 0.5680026464882949, "learning_rate": 9.381214010747158e-07, "loss": 0.2775, "step": 30883 }, { "epoch": 1.4467606689464563, "grad_norm": 0.5568713124060872, "learning_rate": 9.379733222390908e-07, "loss": 0.2666, "step": 30884 }, { "epoch": 1.4468075139363845, "grad_norm": 0.6072446326281905, "learning_rate": 9.378252523925358e-07, "loss": 0.2729, "step": 30885 }, { "epoch": 1.4468543589263128, "grad_norm": 0.6501332400992755, "learning_rate": 9.37677191535904e-07, "loss": 0.2819, "step": 30886 }, { "epoch": 1.4469012039162412, "grad_norm": 0.5964382368496076, "learning_rate": 9.375291396700473e-07, "loss": 0.2694, "step": 30887 }, { "epoch": 1.4469480489061695, "grad_norm": 0.6233946777807393, "learning_rate": 9.373810967958175e-07, "loss": 0.2844, "step": 30888 }, { "epoch": 1.4469948938960977, "grad_norm": 0.6135446754832805, "learning_rate": 9.372330629140669e-07, "loss": 0.275, "step": 30889 }, { "epoch": 1.4470417388860262, "grad_norm": 0.5946207364962914, "learning_rate": 9.37085038025648e-07, "loss": 0.2702, "step": 30890 }, { "epoch": 1.4470885838759544, "grad_norm": 0.5722771124414727, "learning_rate": 9.36937022131412e-07, "loss": 0.2639, "step": 30891 }, { "epoch": 1.4471354288658829, "grad_norm": 0.6016765474195338, "learning_rate": 9.367890152322096e-07, "loss": 0.2785, "step": 30892 }, { "epoch": 1.447182273855811, "grad_norm": 0.5907214673780917, "learning_rate": 9.366410173288937e-07, "loss": 0.2705, "step": 30893 }, { "epoch": 1.4472291188457396, "grad_norm": 0.5878714597044629, "learning_rate": 9.364930284223156e-07, "loss": 0.2513, "step": 30894 }, { "epoch": 1.4472759638356678, "grad_norm": 0.6336385641250444, "learning_rate": 9.363450485133271e-07, "loss": 0.2655, "step": 30895 }, { "epoch": 1.447322808825596, "grad_norm": 0.6082246637685019, "learning_rate": 9.361970776027801e-07, "loss": 0.2755, "step": 30896 }, { "epoch": 1.4473696538155245, "grad_norm": 0.5886737479999435, "learning_rate": 9.360491156915266e-07, "loss": 0.2688, "step": 30897 }, { "epoch": 1.4474164988054528, "grad_norm": 0.604264675133339, "learning_rate": 9.359011627804174e-07, "loss": 0.2793, "step": 30898 }, { "epoch": 1.447463343795381, "grad_norm": 0.6282285916194879, "learning_rate": 9.357532188703028e-07, "loss": 0.278, "step": 30899 }, { "epoch": 1.4475101887853095, "grad_norm": 0.5566704462242363, "learning_rate": 9.356052839620356e-07, "loss": 0.2666, "step": 30900 }, { "epoch": 1.4475570337752377, "grad_norm": 0.5838492339050457, "learning_rate": 9.354573580564666e-07, "loss": 0.2755, "step": 30901 }, { "epoch": 1.447603878765166, "grad_norm": 0.5897336040290244, "learning_rate": 9.353094411544473e-07, "loss": 0.2786, "step": 30902 }, { "epoch": 1.4476507237550944, "grad_norm": 0.621012835563174, "learning_rate": 9.351615332568298e-07, "loss": 0.2795, "step": 30903 }, { "epoch": 1.4476975687450226, "grad_norm": 0.5961355195378625, "learning_rate": 9.350136343644633e-07, "loss": 0.261, "step": 30904 }, { "epoch": 1.447744413734951, "grad_norm": 0.565830821142737, "learning_rate": 9.348657444782011e-07, "loss": 0.2836, "step": 30905 }, { "epoch": 1.4477912587248793, "grad_norm": 0.6110331475795356, "learning_rate": 9.347178635988921e-07, "loss": 0.2804, "step": 30906 }, { "epoch": 1.4478381037148078, "grad_norm": 0.562294431533347, "learning_rate": 9.345699917273885e-07, "loss": 0.2549, "step": 30907 }, { "epoch": 1.447884948704736, "grad_norm": 0.591942680859737, "learning_rate": 9.344221288645411e-07, "loss": 0.2679, "step": 30908 }, { "epoch": 1.4479317936946643, "grad_norm": 0.5633893507191655, "learning_rate": 9.342742750112019e-07, "loss": 0.2441, "step": 30909 }, { "epoch": 1.4479786386845928, "grad_norm": 0.5956702849446655, "learning_rate": 9.341264301682196e-07, "loss": 0.2863, "step": 30910 }, { "epoch": 1.448025483674521, "grad_norm": 0.6293144032657897, "learning_rate": 9.339785943364463e-07, "loss": 0.2837, "step": 30911 }, { "epoch": 1.4480723286644492, "grad_norm": 0.5826030520309341, "learning_rate": 9.338307675167335e-07, "loss": 0.2567, "step": 30912 }, { "epoch": 1.4481191736543777, "grad_norm": 0.5928080708854805, "learning_rate": 9.336829497099298e-07, "loss": 0.282, "step": 30913 }, { "epoch": 1.448166018644306, "grad_norm": 0.5774350382291941, "learning_rate": 9.335351409168875e-07, "loss": 0.2588, "step": 30914 }, { "epoch": 1.4482128636342342, "grad_norm": 0.5903643859316883, "learning_rate": 9.333873411384572e-07, "loss": 0.273, "step": 30915 }, { "epoch": 1.4482597086241626, "grad_norm": 0.5631176138902813, "learning_rate": 9.332395503754885e-07, "loss": 0.2687, "step": 30916 }, { "epoch": 1.448306553614091, "grad_norm": 0.5814351673868788, "learning_rate": 9.330917686288321e-07, "loss": 0.284, "step": 30917 }, { "epoch": 1.4483533986040193, "grad_norm": 0.5827236468993691, "learning_rate": 9.329439958993388e-07, "loss": 0.2719, "step": 30918 }, { "epoch": 1.4484002435939476, "grad_norm": 0.6015976969696403, "learning_rate": 9.327962321878597e-07, "loss": 0.2853, "step": 30919 }, { "epoch": 1.448447088583876, "grad_norm": 0.622897374091437, "learning_rate": 9.326484774952438e-07, "loss": 0.2881, "step": 30920 }, { "epoch": 1.4484939335738043, "grad_norm": 0.616318849679053, "learning_rate": 9.325007318223428e-07, "loss": 0.2768, "step": 30921 }, { "epoch": 1.4485407785637325, "grad_norm": 0.6364949548583905, "learning_rate": 9.32352995170005e-07, "loss": 0.2843, "step": 30922 }, { "epoch": 1.448587623553661, "grad_norm": 0.5760568435717278, "learning_rate": 9.322052675390819e-07, "loss": 0.2614, "step": 30923 }, { "epoch": 1.4486344685435892, "grad_norm": 0.5989579369060382, "learning_rate": 9.320575489304234e-07, "loss": 0.2682, "step": 30924 }, { "epoch": 1.4486813135335175, "grad_norm": 0.6206544665597059, "learning_rate": 9.319098393448805e-07, "loss": 0.2835, "step": 30925 }, { "epoch": 1.448728158523446, "grad_norm": 0.5845461982552782, "learning_rate": 9.317621387833014e-07, "loss": 0.28, "step": 30926 }, { "epoch": 1.4487750035133742, "grad_norm": 0.5968272362503381, "learning_rate": 9.316144472465379e-07, "loss": 0.2696, "step": 30927 }, { "epoch": 1.4488218485033026, "grad_norm": 0.557254740246585, "learning_rate": 9.314667647354384e-07, "loss": 0.2703, "step": 30928 }, { "epoch": 1.4488686934932309, "grad_norm": 0.6156385868815224, "learning_rate": 9.313190912508532e-07, "loss": 0.27, "step": 30929 }, { "epoch": 1.4489155384831593, "grad_norm": 0.5792745642965818, "learning_rate": 9.311714267936323e-07, "loss": 0.2799, "step": 30930 }, { "epoch": 1.4489623834730876, "grad_norm": 0.6083097988168673, "learning_rate": 9.310237713646259e-07, "loss": 0.2962, "step": 30931 }, { "epoch": 1.4490092284630158, "grad_norm": 0.5546418641559203, "learning_rate": 9.308761249646839e-07, "loss": 0.2622, "step": 30932 }, { "epoch": 1.4490560734529443, "grad_norm": 0.6207748973366224, "learning_rate": 9.307284875946554e-07, "loss": 0.279, "step": 30933 }, { "epoch": 1.4491029184428725, "grad_norm": 0.5842176571742598, "learning_rate": 9.305808592553892e-07, "loss": 0.2656, "step": 30934 }, { "epoch": 1.4491497634328008, "grad_norm": 0.5936000267067235, "learning_rate": 9.304332399477358e-07, "loss": 0.2807, "step": 30935 }, { "epoch": 1.4491966084227292, "grad_norm": 0.5986518174985896, "learning_rate": 9.302856296725446e-07, "loss": 0.2724, "step": 30936 }, { "epoch": 1.4492434534126575, "grad_norm": 0.6255793207316298, "learning_rate": 9.301380284306649e-07, "loss": 0.2746, "step": 30937 }, { "epoch": 1.4492902984025857, "grad_norm": 0.5672161263304413, "learning_rate": 9.299904362229464e-07, "loss": 0.2612, "step": 30938 }, { "epoch": 1.4493371433925142, "grad_norm": 0.5504166632102651, "learning_rate": 9.298428530502393e-07, "loss": 0.2607, "step": 30939 }, { "epoch": 1.4493839883824424, "grad_norm": 0.6341695551261389, "learning_rate": 9.296952789133917e-07, "loss": 0.2668, "step": 30940 }, { "epoch": 1.4494308333723709, "grad_norm": 0.6097164897188643, "learning_rate": 9.295477138132525e-07, "loss": 0.2667, "step": 30941 }, { "epoch": 1.4494776783622991, "grad_norm": 0.5971514965393794, "learning_rate": 9.294001577506714e-07, "loss": 0.2816, "step": 30942 }, { "epoch": 1.4495245233522276, "grad_norm": 0.6090043574269847, "learning_rate": 9.292526107264976e-07, "loss": 0.283, "step": 30943 }, { "epoch": 1.4495713683421558, "grad_norm": 0.5846533797638113, "learning_rate": 9.291050727415801e-07, "loss": 0.2622, "step": 30944 }, { "epoch": 1.449618213332084, "grad_norm": 0.6164583324192007, "learning_rate": 9.28957543796769e-07, "loss": 0.2796, "step": 30945 }, { "epoch": 1.4496650583220125, "grad_norm": 0.6258294661655023, "learning_rate": 9.288100238929118e-07, "loss": 0.2577, "step": 30946 }, { "epoch": 1.4497119033119408, "grad_norm": 0.5903052557324256, "learning_rate": 9.286625130308585e-07, "loss": 0.2657, "step": 30947 }, { "epoch": 1.449758748301869, "grad_norm": 0.6268031272802214, "learning_rate": 9.285150112114569e-07, "loss": 0.2834, "step": 30948 }, { "epoch": 1.4498055932917975, "grad_norm": 0.5988615006992261, "learning_rate": 9.283675184355562e-07, "loss": 0.2767, "step": 30949 }, { "epoch": 1.4498524382817257, "grad_norm": 0.6625816902279507, "learning_rate": 9.282200347040055e-07, "loss": 0.2839, "step": 30950 }, { "epoch": 1.449899283271654, "grad_norm": 0.6263219172024802, "learning_rate": 9.280725600176543e-07, "loss": 0.2736, "step": 30951 }, { "epoch": 1.4499461282615824, "grad_norm": 0.5873538036474111, "learning_rate": 9.279250943773496e-07, "loss": 0.2761, "step": 30952 }, { "epoch": 1.4499929732515109, "grad_norm": 0.589828116239532, "learning_rate": 9.277776377839409e-07, "loss": 0.2692, "step": 30953 }, { "epoch": 1.4500398182414391, "grad_norm": 0.5818315872797767, "learning_rate": 9.276301902382775e-07, "loss": 0.2618, "step": 30954 }, { "epoch": 1.4500866632313674, "grad_norm": 0.6476418512004459, "learning_rate": 9.274827517412064e-07, "loss": 0.2895, "step": 30955 }, { "epoch": 1.4501335082212958, "grad_norm": 0.5897652994767875, "learning_rate": 9.273353222935769e-07, "loss": 0.2713, "step": 30956 }, { "epoch": 1.450180353211224, "grad_norm": 0.6023477754537008, "learning_rate": 9.271879018962379e-07, "loss": 0.2738, "step": 30957 }, { "epoch": 1.4502271982011523, "grad_norm": 0.6228807920258239, "learning_rate": 9.270404905500366e-07, "loss": 0.2809, "step": 30958 }, { "epoch": 1.4502740431910808, "grad_norm": 0.5962784660043022, "learning_rate": 9.268930882558219e-07, "loss": 0.2654, "step": 30959 }, { "epoch": 1.450320888181009, "grad_norm": 0.6293457229875302, "learning_rate": 9.267456950144418e-07, "loss": 0.2622, "step": 30960 }, { "epoch": 1.4503677331709373, "grad_norm": 0.5534911924168462, "learning_rate": 9.265983108267459e-07, "loss": 0.2501, "step": 30961 }, { "epoch": 1.4504145781608657, "grad_norm": 0.5949586800702422, "learning_rate": 9.264509356935805e-07, "loss": 0.2706, "step": 30962 }, { "epoch": 1.450461423150794, "grad_norm": 0.585387091809621, "learning_rate": 9.263035696157943e-07, "loss": 0.2669, "step": 30963 }, { "epoch": 1.4505082681407224, "grad_norm": 0.5751578523560879, "learning_rate": 9.261562125942367e-07, "loss": 0.2681, "step": 30964 }, { "epoch": 1.4505551131306507, "grad_norm": 0.6054012726967538, "learning_rate": 9.260088646297533e-07, "loss": 0.269, "step": 30965 }, { "epoch": 1.4506019581205791, "grad_norm": 0.5973307746902033, "learning_rate": 9.258615257231934e-07, "loss": 0.2592, "step": 30966 }, { "epoch": 1.4506488031105074, "grad_norm": 0.5619457045715002, "learning_rate": 9.257141958754057e-07, "loss": 0.2564, "step": 30967 }, { "epoch": 1.4506956481004356, "grad_norm": 0.551351350032344, "learning_rate": 9.255668750872362e-07, "loss": 0.2675, "step": 30968 }, { "epoch": 1.450742493090364, "grad_norm": 0.6028853638209019, "learning_rate": 9.254195633595336e-07, "loss": 0.2653, "step": 30969 }, { "epoch": 1.4507893380802923, "grad_norm": 0.5542804373821877, "learning_rate": 9.252722606931463e-07, "loss": 0.267, "step": 30970 }, { "epoch": 1.4508361830702206, "grad_norm": 0.6002474654010106, "learning_rate": 9.251249670889209e-07, "loss": 0.276, "step": 30971 }, { "epoch": 1.450883028060149, "grad_norm": 0.5957867131573739, "learning_rate": 9.249776825477052e-07, "loss": 0.2615, "step": 30972 }, { "epoch": 1.4509298730500773, "grad_norm": 0.6775192662754173, "learning_rate": 9.248304070703474e-07, "loss": 0.298, "step": 30973 }, { "epoch": 1.4509767180400055, "grad_norm": 0.5726251094620599, "learning_rate": 9.246831406576953e-07, "loss": 0.28, "step": 30974 }, { "epoch": 1.451023563029934, "grad_norm": 0.5699569836262387, "learning_rate": 9.24535883310595e-07, "loss": 0.2611, "step": 30975 }, { "epoch": 1.4510704080198622, "grad_norm": 0.6064300772941027, "learning_rate": 9.243886350298952e-07, "loss": 0.2717, "step": 30976 }, { "epoch": 1.4511172530097907, "grad_norm": 0.581284275633006, "learning_rate": 9.242413958164426e-07, "loss": 0.2628, "step": 30977 }, { "epoch": 1.451164097999719, "grad_norm": 0.5735331219194285, "learning_rate": 9.240941656710842e-07, "loss": 0.2767, "step": 30978 }, { "epoch": 1.4512109429896474, "grad_norm": 0.5885209204233005, "learning_rate": 9.23946944594668e-07, "loss": 0.2668, "step": 30979 }, { "epoch": 1.4512577879795756, "grad_norm": 0.6032618620518992, "learning_rate": 9.23799732588041e-07, "loss": 0.2712, "step": 30980 }, { "epoch": 1.4513046329695038, "grad_norm": 0.5737672848048607, "learning_rate": 9.236525296520513e-07, "loss": 0.2706, "step": 30981 }, { "epoch": 1.4513514779594323, "grad_norm": 0.5837804848826096, "learning_rate": 9.235053357875448e-07, "loss": 0.245, "step": 30982 }, { "epoch": 1.4513983229493606, "grad_norm": 0.5979994936416292, "learning_rate": 9.233581509953682e-07, "loss": 0.2688, "step": 30983 }, { "epoch": 1.4514451679392888, "grad_norm": 0.5997043805388952, "learning_rate": 9.232109752763693e-07, "loss": 0.2851, "step": 30984 }, { "epoch": 1.4514920129292173, "grad_norm": 0.6104962409524493, "learning_rate": 9.230638086313948e-07, "loss": 0.275, "step": 30985 }, { "epoch": 1.4515388579191455, "grad_norm": 0.5607145993553688, "learning_rate": 9.229166510612917e-07, "loss": 0.2582, "step": 30986 }, { "epoch": 1.4515857029090737, "grad_norm": 0.5839474447485856, "learning_rate": 9.227695025669068e-07, "loss": 0.2757, "step": 30987 }, { "epoch": 1.4516325478990022, "grad_norm": 0.607047312667643, "learning_rate": 9.226223631490879e-07, "loss": 0.2797, "step": 30988 }, { "epoch": 1.4516793928889307, "grad_norm": 0.6280449519007617, "learning_rate": 9.224752328086811e-07, "loss": 0.2961, "step": 30989 }, { "epoch": 1.451726237878859, "grad_norm": 0.561367878908164, "learning_rate": 9.223281115465315e-07, "loss": 0.2544, "step": 30990 }, { "epoch": 1.4517730828687871, "grad_norm": 0.5895455311002885, "learning_rate": 9.221809993634875e-07, "loss": 0.2668, "step": 30991 }, { "epoch": 1.4518199278587156, "grad_norm": 0.5729329833227346, "learning_rate": 9.220338962603953e-07, "loss": 0.2652, "step": 30992 }, { "epoch": 1.4518667728486438, "grad_norm": 0.7677378635819372, "learning_rate": 9.218868022381012e-07, "loss": 0.2883, "step": 30993 }, { "epoch": 1.451913617838572, "grad_norm": 0.5905918075797144, "learning_rate": 9.21739717297453e-07, "loss": 0.2804, "step": 30994 }, { "epoch": 1.4519604628285006, "grad_norm": 0.6548187100219889, "learning_rate": 9.215926414392948e-07, "loss": 0.3044, "step": 30995 }, { "epoch": 1.4520073078184288, "grad_norm": 0.5942488862922414, "learning_rate": 9.214455746644755e-07, "loss": 0.2702, "step": 30996 }, { "epoch": 1.452054152808357, "grad_norm": 0.5750824440194265, "learning_rate": 9.21298516973839e-07, "loss": 0.2606, "step": 30997 }, { "epoch": 1.4521009977982855, "grad_norm": 0.5816398887759834, "learning_rate": 9.21151468368233e-07, "loss": 0.2648, "step": 30998 }, { "epoch": 1.4521478427882137, "grad_norm": 0.6171018957304255, "learning_rate": 9.210044288485032e-07, "loss": 0.2597, "step": 30999 }, { "epoch": 1.4521946877781422, "grad_norm": 0.6267087335442737, "learning_rate": 9.20857398415497e-07, "loss": 0.2755, "step": 31000 }, { "epoch": 1.4522415327680704, "grad_norm": 0.6314255185604526, "learning_rate": 9.207103770700587e-07, "loss": 0.27, "step": 31001 }, { "epoch": 1.452288377757999, "grad_norm": 0.6039605704648296, "learning_rate": 9.205633648130352e-07, "loss": 0.276, "step": 31002 }, { "epoch": 1.4523352227479271, "grad_norm": 0.5839126600983225, "learning_rate": 9.204163616452736e-07, "loss": 0.2696, "step": 31003 }, { "epoch": 1.4523820677378554, "grad_norm": 0.561896200504255, "learning_rate": 9.202693675676178e-07, "loss": 0.2649, "step": 31004 }, { "epoch": 1.4524289127277838, "grad_norm": 0.5955220062837374, "learning_rate": 9.20122382580915e-07, "loss": 0.2696, "step": 31005 }, { "epoch": 1.452475757717712, "grad_norm": 0.6214232027913037, "learning_rate": 9.199754066860115e-07, "loss": 0.2717, "step": 31006 }, { "epoch": 1.4525226027076403, "grad_norm": 0.6229180556378198, "learning_rate": 9.198284398837515e-07, "loss": 0.2749, "step": 31007 }, { "epoch": 1.4525694476975688, "grad_norm": 0.5661362505299695, "learning_rate": 9.196814821749817e-07, "loss": 0.2577, "step": 31008 }, { "epoch": 1.452616292687497, "grad_norm": 0.5904789739809971, "learning_rate": 9.195345335605476e-07, "loss": 0.2497, "step": 31009 }, { "epoch": 1.4526631376774253, "grad_norm": 0.5814239460092488, "learning_rate": 9.193875940412961e-07, "loss": 0.2672, "step": 31010 }, { "epoch": 1.4527099826673537, "grad_norm": 0.6204192414730029, "learning_rate": 9.192406636180709e-07, "loss": 0.2735, "step": 31011 }, { "epoch": 1.452756827657282, "grad_norm": 0.5875628768244647, "learning_rate": 9.19093742291719e-07, "loss": 0.2724, "step": 31012 }, { "epoch": 1.4528036726472104, "grad_norm": 0.5679313228683776, "learning_rate": 9.189468300630846e-07, "loss": 0.2621, "step": 31013 }, { "epoch": 1.4528505176371387, "grad_norm": 0.6172720881205802, "learning_rate": 9.187999269330136e-07, "loss": 0.2879, "step": 31014 }, { "epoch": 1.4528973626270671, "grad_norm": 0.5949979079988589, "learning_rate": 9.186530329023519e-07, "loss": 0.2742, "step": 31015 }, { "epoch": 1.4529442076169954, "grad_norm": 0.6356525523660479, "learning_rate": 9.185061479719454e-07, "loss": 0.2724, "step": 31016 }, { "epoch": 1.4529910526069236, "grad_norm": 0.5944166512509487, "learning_rate": 9.183592721426374e-07, "loss": 0.2649, "step": 31017 }, { "epoch": 1.453037897596852, "grad_norm": 0.5466405597866573, "learning_rate": 9.182124054152752e-07, "loss": 0.2695, "step": 31018 }, { "epoch": 1.4530847425867803, "grad_norm": 0.5734116887375742, "learning_rate": 9.180655477907022e-07, "loss": 0.2538, "step": 31019 }, { "epoch": 1.4531315875767086, "grad_norm": 0.616473133968278, "learning_rate": 9.179186992697644e-07, "loss": 0.2725, "step": 31020 }, { "epoch": 1.453178432566637, "grad_norm": 0.5927929280831989, "learning_rate": 9.177718598533069e-07, "loss": 0.2793, "step": 31021 }, { "epoch": 1.4532252775565653, "grad_norm": 0.5569240579072453, "learning_rate": 9.176250295421746e-07, "loss": 0.2633, "step": 31022 }, { "epoch": 1.4532721225464935, "grad_norm": 0.584434469274428, "learning_rate": 9.174782083372133e-07, "loss": 0.277, "step": 31023 }, { "epoch": 1.453318967536422, "grad_norm": 0.5375707555205033, "learning_rate": 9.173313962392674e-07, "loss": 0.2475, "step": 31024 }, { "epoch": 1.4533658125263504, "grad_norm": 0.5938102305890985, "learning_rate": 9.171845932491805e-07, "loss": 0.2775, "step": 31025 }, { "epoch": 1.4534126575162787, "grad_norm": 0.5998933246657562, "learning_rate": 9.170377993677984e-07, "loss": 0.2686, "step": 31026 }, { "epoch": 1.453459502506207, "grad_norm": 0.5850394345900992, "learning_rate": 9.168910145959659e-07, "loss": 0.2668, "step": 31027 }, { "epoch": 1.4535063474961354, "grad_norm": 0.6054892951942077, "learning_rate": 9.16744238934528e-07, "loss": 0.2861, "step": 31028 }, { "epoch": 1.4535531924860636, "grad_norm": 0.6053522288955127, "learning_rate": 9.165974723843288e-07, "loss": 0.269, "step": 31029 }, { "epoch": 1.4536000374759919, "grad_norm": 0.6024192783020733, "learning_rate": 9.16450714946214e-07, "loss": 0.2687, "step": 31030 }, { "epoch": 1.4536468824659203, "grad_norm": 0.6269375026601631, "learning_rate": 9.163039666210274e-07, "loss": 0.2756, "step": 31031 }, { "epoch": 1.4536937274558486, "grad_norm": 0.596384452459604, "learning_rate": 9.161572274096125e-07, "loss": 0.2675, "step": 31032 }, { "epoch": 1.4537405724457768, "grad_norm": 0.6087708849266394, "learning_rate": 9.160104973128147e-07, "loss": 0.2822, "step": 31033 }, { "epoch": 1.4537874174357053, "grad_norm": 0.5963001288780652, "learning_rate": 9.158637763314784e-07, "loss": 0.2574, "step": 31034 }, { "epoch": 1.4538342624256335, "grad_norm": 0.6013524262261766, "learning_rate": 9.157170644664482e-07, "loss": 0.2573, "step": 31035 }, { "epoch": 1.453881107415562, "grad_norm": 0.6091018739478802, "learning_rate": 9.155703617185677e-07, "loss": 0.2804, "step": 31036 }, { "epoch": 1.4539279524054902, "grad_norm": 0.5578002696654516, "learning_rate": 9.154236680886825e-07, "loss": 0.2635, "step": 31037 }, { "epoch": 1.4539747973954187, "grad_norm": 0.5437772338307779, "learning_rate": 9.152769835776357e-07, "loss": 0.2488, "step": 31038 }, { "epoch": 1.454021642385347, "grad_norm": 0.578892822861993, "learning_rate": 9.151303081862709e-07, "loss": 0.2713, "step": 31039 }, { "epoch": 1.4540684873752752, "grad_norm": 0.5798489631097832, "learning_rate": 9.149836419154328e-07, "loss": 0.2714, "step": 31040 }, { "epoch": 1.4541153323652036, "grad_norm": 0.6043492321270953, "learning_rate": 9.148369847659655e-07, "loss": 0.2569, "step": 31041 }, { "epoch": 1.4541621773551319, "grad_norm": 0.5694761480250262, "learning_rate": 9.146903367387128e-07, "loss": 0.2638, "step": 31042 }, { "epoch": 1.45420902234506, "grad_norm": 0.6296168508285798, "learning_rate": 9.145436978345196e-07, "loss": 0.2871, "step": 31043 }, { "epoch": 1.4542558673349886, "grad_norm": 0.6228245745249935, "learning_rate": 9.143970680542283e-07, "loss": 0.2842, "step": 31044 }, { "epoch": 1.4543027123249168, "grad_norm": 0.600386146997337, "learning_rate": 9.142504473986841e-07, "loss": 0.2733, "step": 31045 }, { "epoch": 1.454349557314845, "grad_norm": 0.5602633237670874, "learning_rate": 9.141038358687291e-07, "loss": 0.2597, "step": 31046 }, { "epoch": 1.4543964023047735, "grad_norm": 0.5976800860444256, "learning_rate": 9.139572334652078e-07, "loss": 0.2675, "step": 31047 }, { "epoch": 1.4544432472947018, "grad_norm": 0.6174652432681724, "learning_rate": 9.138106401889644e-07, "loss": 0.295, "step": 31048 }, { "epoch": 1.4544900922846302, "grad_norm": 0.6179595374309826, "learning_rate": 9.136640560408424e-07, "loss": 0.2888, "step": 31049 }, { "epoch": 1.4545369372745585, "grad_norm": 0.5505630203051327, "learning_rate": 9.135174810216846e-07, "loss": 0.2564, "step": 31050 }, { "epoch": 1.454583782264487, "grad_norm": 0.5995262051027769, "learning_rate": 9.133709151323347e-07, "loss": 0.2623, "step": 31051 }, { "epoch": 1.4546306272544152, "grad_norm": 0.5959904020583799, "learning_rate": 9.132243583736375e-07, "loss": 0.2745, "step": 31052 }, { "epoch": 1.4546774722443434, "grad_norm": 0.5728576106674275, "learning_rate": 9.130778107464342e-07, "loss": 0.2711, "step": 31053 }, { "epoch": 1.4547243172342719, "grad_norm": 0.6221450330433285, "learning_rate": 9.129312722515693e-07, "loss": 0.2813, "step": 31054 }, { "epoch": 1.4547711622242, "grad_norm": 0.5776011605729154, "learning_rate": 9.12784742889887e-07, "loss": 0.2647, "step": 31055 }, { "epoch": 1.4548180072141284, "grad_norm": 0.5776375439765256, "learning_rate": 9.126382226622288e-07, "loss": 0.2727, "step": 31056 }, { "epoch": 1.4548648522040568, "grad_norm": 0.5529308689913549, "learning_rate": 9.124917115694385e-07, "loss": 0.2748, "step": 31057 }, { "epoch": 1.454911697193985, "grad_norm": 0.554145463927487, "learning_rate": 9.123452096123592e-07, "loss": 0.259, "step": 31058 }, { "epoch": 1.4549585421839133, "grad_norm": 0.5817523733376632, "learning_rate": 9.121987167918353e-07, "loss": 0.2732, "step": 31059 }, { "epoch": 1.4550053871738418, "grad_norm": 0.5883635439829615, "learning_rate": 9.120522331087079e-07, "loss": 0.2709, "step": 31060 }, { "epoch": 1.4550522321637702, "grad_norm": 0.6070372356336926, "learning_rate": 9.119057585638214e-07, "loss": 0.2649, "step": 31061 }, { "epoch": 1.4550990771536985, "grad_norm": 0.6018367981547411, "learning_rate": 9.117592931580174e-07, "loss": 0.2728, "step": 31062 }, { "epoch": 1.4551459221436267, "grad_norm": 0.5851339074784552, "learning_rate": 9.116128368921395e-07, "loss": 0.2801, "step": 31063 }, { "epoch": 1.4551927671335552, "grad_norm": 0.5358471215503967, "learning_rate": 9.114663897670303e-07, "loss": 0.2481, "step": 31064 }, { "epoch": 1.4552396121234834, "grad_norm": 0.6147484218455589, "learning_rate": 9.113199517835339e-07, "loss": 0.2562, "step": 31065 }, { "epoch": 1.4552864571134116, "grad_norm": 0.5840574221758199, "learning_rate": 9.111735229424909e-07, "loss": 0.2787, "step": 31066 }, { "epoch": 1.45533330210334, "grad_norm": 0.5930638385702337, "learning_rate": 9.110271032447459e-07, "loss": 0.2673, "step": 31067 }, { "epoch": 1.4553801470932684, "grad_norm": 0.6255915001763863, "learning_rate": 9.108806926911396e-07, "loss": 0.2783, "step": 31068 }, { "epoch": 1.4554269920831966, "grad_norm": 0.5801663652743844, "learning_rate": 9.107342912825154e-07, "loss": 0.2615, "step": 31069 }, { "epoch": 1.455473837073125, "grad_norm": 0.6076894229349017, "learning_rate": 9.105878990197159e-07, "loss": 0.2711, "step": 31070 }, { "epoch": 1.4555206820630533, "grad_norm": 0.6663740779625272, "learning_rate": 9.104415159035837e-07, "loss": 0.3002, "step": 31071 }, { "epoch": 1.4555675270529818, "grad_norm": 0.6182401339196744, "learning_rate": 9.10295141934962e-07, "loss": 0.275, "step": 31072 }, { "epoch": 1.45561437204291, "grad_norm": 0.5895501918308615, "learning_rate": 9.10148777114692e-07, "loss": 0.2687, "step": 31073 }, { "epoch": 1.4556612170328385, "grad_norm": 0.6063553509573663, "learning_rate": 9.100024214436154e-07, "loss": 0.2786, "step": 31074 }, { "epoch": 1.4557080620227667, "grad_norm": 0.6432886605455892, "learning_rate": 9.098560749225751e-07, "loss": 0.2979, "step": 31075 }, { "epoch": 1.455754907012695, "grad_norm": 0.623628999583537, "learning_rate": 9.097097375524136e-07, "loss": 0.2788, "step": 31076 }, { "epoch": 1.4558017520026234, "grad_norm": 0.6489670485694955, "learning_rate": 9.095634093339728e-07, "loss": 0.2995, "step": 31077 }, { "epoch": 1.4558485969925516, "grad_norm": 0.6129972935476559, "learning_rate": 9.094170902680949e-07, "loss": 0.2771, "step": 31078 }, { "epoch": 1.4558954419824799, "grad_norm": 0.6239946506119225, "learning_rate": 9.092707803556228e-07, "loss": 0.2744, "step": 31079 }, { "epoch": 1.4559422869724084, "grad_norm": 0.6233205341220283, "learning_rate": 9.091244795973975e-07, "loss": 0.2739, "step": 31080 }, { "epoch": 1.4559891319623366, "grad_norm": 0.6076429127854511, "learning_rate": 9.089781879942599e-07, "loss": 0.2826, "step": 31081 }, { "epoch": 1.4560359769522648, "grad_norm": 0.5647750422365218, "learning_rate": 9.088319055470532e-07, "loss": 0.2791, "step": 31082 }, { "epoch": 1.4560828219421933, "grad_norm": 0.5719927038238829, "learning_rate": 9.086856322566187e-07, "loss": 0.2677, "step": 31083 }, { "epoch": 1.4561296669321215, "grad_norm": 0.6124435531663834, "learning_rate": 9.085393681237986e-07, "loss": 0.2639, "step": 31084 }, { "epoch": 1.45617651192205, "grad_norm": 0.5911836836780021, "learning_rate": 9.083931131494353e-07, "loss": 0.2785, "step": 31085 }, { "epoch": 1.4562233569119782, "grad_norm": 0.6445295222814318, "learning_rate": 9.082468673343686e-07, "loss": 0.2792, "step": 31086 }, { "epoch": 1.4562702019019067, "grad_norm": 0.5917069620713269, "learning_rate": 9.081006306794421e-07, "loss": 0.2772, "step": 31087 }, { "epoch": 1.456317046891835, "grad_norm": 0.5690309586540072, "learning_rate": 9.079544031854954e-07, "loss": 0.2656, "step": 31088 }, { "epoch": 1.4563638918817632, "grad_norm": 0.5958573519701633, "learning_rate": 9.078081848533709e-07, "loss": 0.2849, "step": 31089 }, { "epoch": 1.4564107368716916, "grad_norm": 0.6200191476032771, "learning_rate": 9.076619756839103e-07, "loss": 0.2925, "step": 31090 }, { "epoch": 1.4564575818616199, "grad_norm": 0.6348103244063106, "learning_rate": 9.075157756779557e-07, "loss": 0.294, "step": 31091 }, { "epoch": 1.4565044268515481, "grad_norm": 0.6111431477282403, "learning_rate": 9.073695848363468e-07, "loss": 0.2785, "step": 31092 }, { "epoch": 1.4565512718414766, "grad_norm": 0.618060112272022, "learning_rate": 9.072234031599253e-07, "loss": 0.2859, "step": 31093 }, { "epoch": 1.4565981168314048, "grad_norm": 0.5948614974590676, "learning_rate": 9.070772306495337e-07, "loss": 0.2661, "step": 31094 }, { "epoch": 1.456644961821333, "grad_norm": 0.560083886927611, "learning_rate": 9.069310673060117e-07, "loss": 0.2662, "step": 31095 }, { "epoch": 1.4566918068112615, "grad_norm": 0.6107418565168027, "learning_rate": 9.067849131302007e-07, "loss": 0.2815, "step": 31096 }, { "epoch": 1.45673865180119, "grad_norm": 0.6049866258422817, "learning_rate": 9.066387681229421e-07, "loss": 0.2709, "step": 31097 }, { "epoch": 1.4567854967911182, "grad_norm": 0.6129789793192412, "learning_rate": 9.064926322850781e-07, "loss": 0.2807, "step": 31098 }, { "epoch": 1.4568323417810465, "grad_norm": 0.616489939658819, "learning_rate": 9.063465056174472e-07, "loss": 0.2728, "step": 31099 }, { "epoch": 1.456879186770975, "grad_norm": 0.6059406905261794, "learning_rate": 9.06200388120892e-07, "loss": 0.2683, "step": 31100 }, { "epoch": 1.4569260317609032, "grad_norm": 0.6129510043706216, "learning_rate": 9.060542797962535e-07, "loss": 0.2759, "step": 31101 }, { "epoch": 1.4569728767508314, "grad_norm": 0.6237030633296808, "learning_rate": 9.059081806443712e-07, "loss": 0.2836, "step": 31102 }, { "epoch": 1.4570197217407599, "grad_norm": 0.6230652442567821, "learning_rate": 9.057620906660869e-07, "loss": 0.2643, "step": 31103 }, { "epoch": 1.4570665667306881, "grad_norm": 0.6131846182713635, "learning_rate": 9.056160098622419e-07, "loss": 0.2723, "step": 31104 }, { "epoch": 1.4571134117206164, "grad_norm": 0.5877735904611695, "learning_rate": 9.054699382336749e-07, "loss": 0.2726, "step": 31105 }, { "epoch": 1.4571602567105448, "grad_norm": 0.5989046553230714, "learning_rate": 9.053238757812277e-07, "loss": 0.2622, "step": 31106 }, { "epoch": 1.457207101700473, "grad_norm": 0.5620868159411119, "learning_rate": 9.051778225057417e-07, "loss": 0.2641, "step": 31107 }, { "epoch": 1.4572539466904015, "grad_norm": 0.6204543679169567, "learning_rate": 9.050317784080559e-07, "loss": 0.271, "step": 31108 }, { "epoch": 1.4573007916803298, "grad_norm": 0.5524897265418472, "learning_rate": 9.04885743489011e-07, "loss": 0.262, "step": 31109 }, { "epoch": 1.4573476366702582, "grad_norm": 0.6105629662344627, "learning_rate": 9.047397177494485e-07, "loss": 0.2808, "step": 31110 }, { "epoch": 1.4573944816601865, "grad_norm": 0.6467013519872477, "learning_rate": 9.045937011902076e-07, "loss": 0.2811, "step": 31111 }, { "epoch": 1.4574413266501147, "grad_norm": 0.6504514050549302, "learning_rate": 9.044476938121286e-07, "loss": 0.2809, "step": 31112 }, { "epoch": 1.4574881716400432, "grad_norm": 0.6203470452989739, "learning_rate": 9.043016956160522e-07, "loss": 0.2569, "step": 31113 }, { "epoch": 1.4575350166299714, "grad_norm": 0.6159871883817976, "learning_rate": 9.041557066028192e-07, "loss": 0.2741, "step": 31114 }, { "epoch": 1.4575818616198997, "grad_norm": 0.6490582215427804, "learning_rate": 9.040097267732681e-07, "loss": 0.2701, "step": 31115 }, { "epoch": 1.4576287066098281, "grad_norm": 0.58445438162486, "learning_rate": 9.03863756128241e-07, "loss": 0.2727, "step": 31116 }, { "epoch": 1.4576755515997564, "grad_norm": 0.5816296810533771, "learning_rate": 9.037177946685757e-07, "loss": 0.2796, "step": 31117 }, { "epoch": 1.4577223965896846, "grad_norm": 0.6024418493502226, "learning_rate": 9.035718423951134e-07, "loss": 0.2855, "step": 31118 }, { "epoch": 1.457769241579613, "grad_norm": 0.5910210480500654, "learning_rate": 9.034258993086939e-07, "loss": 0.2646, "step": 31119 }, { "epoch": 1.4578160865695413, "grad_norm": 0.6179739302426759, "learning_rate": 9.03279965410157e-07, "loss": 0.2774, "step": 31120 }, { "epoch": 1.4578629315594698, "grad_norm": 0.582613291332463, "learning_rate": 9.031340407003436e-07, "loss": 0.2598, "step": 31121 }, { "epoch": 1.457909776549398, "grad_norm": 0.5844250750489971, "learning_rate": 9.029881251800923e-07, "loss": 0.2697, "step": 31122 }, { "epoch": 1.4579566215393265, "grad_norm": 0.619489201027917, "learning_rate": 9.028422188502423e-07, "loss": 0.2753, "step": 31123 }, { "epoch": 1.4580034665292547, "grad_norm": 0.6358445461340383, "learning_rate": 9.026963217116338e-07, "loss": 0.2691, "step": 31124 }, { "epoch": 1.458050311519183, "grad_norm": 0.5926383784955178, "learning_rate": 9.025504337651065e-07, "loss": 0.2656, "step": 31125 }, { "epoch": 1.4580971565091114, "grad_norm": 0.6319518444257072, "learning_rate": 9.024045550114999e-07, "loss": 0.2871, "step": 31126 }, { "epoch": 1.4581440014990397, "grad_norm": 0.5864369789141672, "learning_rate": 9.022586854516535e-07, "loss": 0.2776, "step": 31127 }, { "epoch": 1.458190846488968, "grad_norm": 0.5942468623117363, "learning_rate": 9.021128250864075e-07, "loss": 0.2815, "step": 31128 }, { "epoch": 1.4582376914788964, "grad_norm": 0.6277617177764181, "learning_rate": 9.01966973916601e-07, "loss": 0.2939, "step": 31129 }, { "epoch": 1.4582845364688246, "grad_norm": 0.5940773428823781, "learning_rate": 9.018211319430716e-07, "loss": 0.2897, "step": 31130 }, { "epoch": 1.4583313814587529, "grad_norm": 0.6290473055386601, "learning_rate": 9.016752991666602e-07, "loss": 0.285, "step": 31131 }, { "epoch": 1.4583782264486813, "grad_norm": 0.6115371629009017, "learning_rate": 9.015294755882057e-07, "loss": 0.2772, "step": 31132 }, { "epoch": 1.4584250714386098, "grad_norm": 0.6505019488872443, "learning_rate": 9.013836612085472e-07, "loss": 0.2797, "step": 31133 }, { "epoch": 1.458471916428538, "grad_norm": 0.5943538885133998, "learning_rate": 9.012378560285248e-07, "loss": 0.2674, "step": 31134 }, { "epoch": 1.4585187614184663, "grad_norm": 0.6157252238676042, "learning_rate": 9.010920600489759e-07, "loss": 0.2643, "step": 31135 }, { "epoch": 1.4585656064083947, "grad_norm": 0.6318569935126129, "learning_rate": 9.009462732707411e-07, "loss": 0.2836, "step": 31136 }, { "epoch": 1.458612451398323, "grad_norm": 0.6168218492644938, "learning_rate": 9.008004956946579e-07, "loss": 0.2859, "step": 31137 }, { "epoch": 1.4586592963882512, "grad_norm": 0.602857387635998, "learning_rate": 9.006547273215657e-07, "loss": 0.2663, "step": 31138 }, { "epoch": 1.4587061413781797, "grad_norm": 0.5922788049394755, "learning_rate": 9.005089681523039e-07, "loss": 0.2695, "step": 31139 }, { "epoch": 1.458752986368108, "grad_norm": 0.6094111177754357, "learning_rate": 9.003632181877115e-07, "loss": 0.2671, "step": 31140 }, { "epoch": 1.4587998313580361, "grad_norm": 0.5829746951264164, "learning_rate": 9.002174774286259e-07, "loss": 0.2594, "step": 31141 }, { "epoch": 1.4588466763479646, "grad_norm": 0.5979898974951882, "learning_rate": 9.000717458758868e-07, "loss": 0.2552, "step": 31142 }, { "epoch": 1.4588935213378929, "grad_norm": 0.5507702357544956, "learning_rate": 8.999260235303334e-07, "loss": 0.2478, "step": 31143 }, { "epoch": 1.4589403663278213, "grad_norm": 0.5798257463103965, "learning_rate": 8.997803103928026e-07, "loss": 0.2673, "step": 31144 }, { "epoch": 1.4589872113177496, "grad_norm": 0.5993724418894308, "learning_rate": 8.996346064641342e-07, "loss": 0.2678, "step": 31145 }, { "epoch": 1.459034056307678, "grad_norm": 0.6478764421441275, "learning_rate": 8.994889117451672e-07, "loss": 0.2724, "step": 31146 }, { "epoch": 1.4590809012976063, "grad_norm": 0.5744293727916392, "learning_rate": 8.993432262367383e-07, "loss": 0.2719, "step": 31147 }, { "epoch": 1.4591277462875345, "grad_norm": 0.63327001138489, "learning_rate": 8.991975499396868e-07, "loss": 0.2865, "step": 31148 }, { "epoch": 1.459174591277463, "grad_norm": 0.5737259758725641, "learning_rate": 8.990518828548511e-07, "loss": 0.2585, "step": 31149 }, { "epoch": 1.4592214362673912, "grad_norm": 0.6104123935660536, "learning_rate": 8.989062249830702e-07, "loss": 0.2788, "step": 31150 }, { "epoch": 1.4592682812573194, "grad_norm": 0.6225204767926609, "learning_rate": 8.987605763251808e-07, "loss": 0.2812, "step": 31151 }, { "epoch": 1.459315126247248, "grad_norm": 0.5653110950097312, "learning_rate": 8.986149368820226e-07, "loss": 0.259, "step": 31152 }, { "epoch": 1.4593619712371761, "grad_norm": 0.6378965432220931, "learning_rate": 8.984693066544319e-07, "loss": 0.2807, "step": 31153 }, { "epoch": 1.4594088162271044, "grad_norm": 0.5701865085065021, "learning_rate": 8.983236856432483e-07, "loss": 0.2696, "step": 31154 }, { "epoch": 1.4594556612170329, "grad_norm": 0.6379441782687884, "learning_rate": 8.98178073849309e-07, "loss": 0.2902, "step": 31155 }, { "epoch": 1.459502506206961, "grad_norm": 0.6400900912907731, "learning_rate": 8.980324712734531e-07, "loss": 0.2833, "step": 31156 }, { "epoch": 1.4595493511968896, "grad_norm": 0.5514255571266551, "learning_rate": 8.97886877916517e-07, "loss": 0.2498, "step": 31157 }, { "epoch": 1.4595961961868178, "grad_norm": 0.5867757008069505, "learning_rate": 8.977412937793401e-07, "loss": 0.2504, "step": 31158 }, { "epoch": 1.4596430411767463, "grad_norm": 0.5592149882602777, "learning_rate": 8.975957188627585e-07, "loss": 0.2684, "step": 31159 }, { "epoch": 1.4596898861666745, "grad_norm": 0.6235293931665258, "learning_rate": 8.974501531676106e-07, "loss": 0.2808, "step": 31160 }, { "epoch": 1.4597367311566027, "grad_norm": 0.5398618385337233, "learning_rate": 8.973045966947344e-07, "loss": 0.2441, "step": 31161 }, { "epoch": 1.4597835761465312, "grad_norm": 0.6116520467056332, "learning_rate": 8.971590494449675e-07, "loss": 0.265, "step": 31162 }, { "epoch": 1.4598304211364594, "grad_norm": 0.6156805208397904, "learning_rate": 8.97013511419148e-07, "loss": 0.2533, "step": 31163 }, { "epoch": 1.4598772661263877, "grad_norm": 0.6107799150532076, "learning_rate": 8.968679826181123e-07, "loss": 0.2808, "step": 31164 }, { "epoch": 1.4599241111163161, "grad_norm": 0.5820891262251267, "learning_rate": 8.96722463042699e-07, "loss": 0.2699, "step": 31165 }, { "epoch": 1.4599709561062444, "grad_norm": 0.6169808377692028, "learning_rate": 8.965769526937446e-07, "loss": 0.2821, "step": 31166 }, { "epoch": 1.4600178010961726, "grad_norm": 0.5991817158757031, "learning_rate": 8.964314515720862e-07, "loss": 0.2806, "step": 31167 }, { "epoch": 1.460064646086101, "grad_norm": 0.5865529066172068, "learning_rate": 8.962859596785622e-07, "loss": 0.261, "step": 31168 }, { "epoch": 1.4601114910760296, "grad_norm": 0.5685668825340089, "learning_rate": 8.961404770140091e-07, "loss": 0.2733, "step": 31169 }, { "epoch": 1.4601583360659578, "grad_norm": 0.5933399812908647, "learning_rate": 8.959950035792656e-07, "loss": 0.2806, "step": 31170 }, { "epoch": 1.460205181055886, "grad_norm": 0.5875266588845585, "learning_rate": 8.958495393751676e-07, "loss": 0.277, "step": 31171 }, { "epoch": 1.4602520260458145, "grad_norm": 0.6489797888554435, "learning_rate": 8.957040844025514e-07, "loss": 0.2859, "step": 31172 }, { "epoch": 1.4602988710357427, "grad_norm": 0.5806213628400767, "learning_rate": 8.95558638662255e-07, "loss": 0.2598, "step": 31173 }, { "epoch": 1.460345716025671, "grad_norm": 0.5821439838308697, "learning_rate": 8.954132021551154e-07, "loss": 0.2661, "step": 31174 }, { "epoch": 1.4603925610155994, "grad_norm": 0.5827474556242387, "learning_rate": 8.952677748819694e-07, "loss": 0.2676, "step": 31175 }, { "epoch": 1.4604394060055277, "grad_norm": 0.5747242600031567, "learning_rate": 8.951223568436542e-07, "loss": 0.2596, "step": 31176 }, { "epoch": 1.460486250995456, "grad_norm": 0.5787951498817869, "learning_rate": 8.949769480410072e-07, "loss": 0.2637, "step": 31177 }, { "epoch": 1.4605330959853844, "grad_norm": 0.610015953954258, "learning_rate": 8.948315484748645e-07, "loss": 0.2723, "step": 31178 }, { "epoch": 1.4605799409753126, "grad_norm": 0.5512857822728576, "learning_rate": 8.94686158146062e-07, "loss": 0.2516, "step": 31179 }, { "epoch": 1.460626785965241, "grad_norm": 0.6202614710822962, "learning_rate": 8.945407770554371e-07, "loss": 0.2716, "step": 31180 }, { "epoch": 1.4606736309551693, "grad_norm": 0.6221365559141558, "learning_rate": 8.943954052038264e-07, "loss": 0.3061, "step": 31181 }, { "epoch": 1.4607204759450978, "grad_norm": 0.6049178598271646, "learning_rate": 8.942500425920669e-07, "loss": 0.2924, "step": 31182 }, { "epoch": 1.460767320935026, "grad_norm": 0.5889787802523437, "learning_rate": 8.941046892209954e-07, "loss": 0.2538, "step": 31183 }, { "epoch": 1.4608141659249543, "grad_norm": 0.6120203793010343, "learning_rate": 8.939593450914469e-07, "loss": 0.2833, "step": 31184 }, { "epoch": 1.4608610109148827, "grad_norm": 0.5637515802327562, "learning_rate": 8.938140102042597e-07, "loss": 0.2593, "step": 31185 }, { "epoch": 1.460907855904811, "grad_norm": 0.6179498598983172, "learning_rate": 8.936686845602683e-07, "loss": 0.283, "step": 31186 }, { "epoch": 1.4609547008947392, "grad_norm": 0.6000832051810866, "learning_rate": 8.9352336816031e-07, "loss": 0.2709, "step": 31187 }, { "epoch": 1.4610015458846677, "grad_norm": 0.5997338255691105, "learning_rate": 8.933780610052209e-07, "loss": 0.2619, "step": 31188 }, { "epoch": 1.461048390874596, "grad_norm": 0.594122995948063, "learning_rate": 8.932327630958379e-07, "loss": 0.2695, "step": 31189 }, { "epoch": 1.4610952358645242, "grad_norm": 0.6092255649066254, "learning_rate": 8.930874744329957e-07, "loss": 0.2763, "step": 31190 }, { "epoch": 1.4611420808544526, "grad_norm": 0.6369469022432476, "learning_rate": 8.929421950175313e-07, "loss": 0.2779, "step": 31191 }, { "epoch": 1.4611889258443809, "grad_norm": 0.6138800400708562, "learning_rate": 8.927969248502815e-07, "loss": 0.2673, "step": 31192 }, { "epoch": 1.4612357708343093, "grad_norm": 0.530847395688859, "learning_rate": 8.926516639320806e-07, "loss": 0.2409, "step": 31193 }, { "epoch": 1.4612826158242376, "grad_norm": 0.6632626216928409, "learning_rate": 8.925064122637653e-07, "loss": 0.2834, "step": 31194 }, { "epoch": 1.461329460814166, "grad_norm": 0.6162730317723087, "learning_rate": 8.923611698461726e-07, "loss": 0.2866, "step": 31195 }, { "epoch": 1.4613763058040943, "grad_norm": 0.6118822210653265, "learning_rate": 8.922159366801361e-07, "loss": 0.2732, "step": 31196 }, { "epoch": 1.4614231507940225, "grad_norm": 0.6044293252891871, "learning_rate": 8.920707127664932e-07, "loss": 0.2812, "step": 31197 }, { "epoch": 1.461469995783951, "grad_norm": 0.6289861170409735, "learning_rate": 8.919254981060791e-07, "loss": 0.2846, "step": 31198 }, { "epoch": 1.4615168407738792, "grad_norm": 0.607797114449916, "learning_rate": 8.917802926997302e-07, "loss": 0.2795, "step": 31199 }, { "epoch": 1.4615636857638075, "grad_norm": 0.6051443127599869, "learning_rate": 8.916350965482809e-07, "loss": 0.2842, "step": 31200 }, { "epoch": 1.461610530753736, "grad_norm": 0.5580826320710381, "learning_rate": 8.914899096525681e-07, "loss": 0.263, "step": 31201 }, { "epoch": 1.4616573757436642, "grad_norm": 0.5903534792197014, "learning_rate": 8.913447320134256e-07, "loss": 0.2589, "step": 31202 }, { "epoch": 1.4617042207335924, "grad_norm": 0.5927130686603822, "learning_rate": 8.9119956363169e-07, "loss": 0.2705, "step": 31203 }, { "epoch": 1.4617510657235209, "grad_norm": 0.5701327341336201, "learning_rate": 8.910544045081967e-07, "loss": 0.2622, "step": 31204 }, { "epoch": 1.4617979107134493, "grad_norm": 0.6649539885393091, "learning_rate": 8.909092546437814e-07, "loss": 0.2911, "step": 31205 }, { "epoch": 1.4618447557033776, "grad_norm": 0.5398998882356774, "learning_rate": 8.907641140392784e-07, "loss": 0.2614, "step": 31206 }, { "epoch": 1.4618916006933058, "grad_norm": 0.5769434654819803, "learning_rate": 8.906189826955242e-07, "loss": 0.2624, "step": 31207 }, { "epoch": 1.4619384456832343, "grad_norm": 0.6112503301927757, "learning_rate": 8.904738606133523e-07, "loss": 0.2674, "step": 31208 }, { "epoch": 1.4619852906731625, "grad_norm": 0.5601136518568797, "learning_rate": 8.90328747793599e-07, "loss": 0.269, "step": 31209 }, { "epoch": 1.4620321356630908, "grad_norm": 0.6020459125013363, "learning_rate": 8.90183644237099e-07, "loss": 0.2749, "step": 31210 }, { "epoch": 1.4620789806530192, "grad_norm": 0.5404410182638089, "learning_rate": 8.900385499446876e-07, "loss": 0.2612, "step": 31211 }, { "epoch": 1.4621258256429475, "grad_norm": 0.5931759786961351, "learning_rate": 8.898934649172006e-07, "loss": 0.2755, "step": 31212 }, { "epoch": 1.4621726706328757, "grad_norm": 0.6175871640114714, "learning_rate": 8.897483891554721e-07, "loss": 0.3046, "step": 31213 }, { "epoch": 1.4622195156228042, "grad_norm": 0.6116897831759408, "learning_rate": 8.896033226603357e-07, "loss": 0.2827, "step": 31214 }, { "epoch": 1.4622663606127324, "grad_norm": 0.635631366907448, "learning_rate": 8.894582654326275e-07, "loss": 0.2906, "step": 31215 }, { "epoch": 1.4623132056026609, "grad_norm": 0.593016957042912, "learning_rate": 8.893132174731826e-07, "loss": 0.278, "step": 31216 }, { "epoch": 1.4623600505925891, "grad_norm": 0.6012774610083415, "learning_rate": 8.891681787828349e-07, "loss": 0.2721, "step": 31217 }, { "epoch": 1.4624068955825176, "grad_norm": 0.592834963923453, "learning_rate": 8.890231493624197e-07, "loss": 0.2611, "step": 31218 }, { "epoch": 1.4624537405724458, "grad_norm": 0.5862154407177528, "learning_rate": 8.888781292127719e-07, "loss": 0.2695, "step": 31219 }, { "epoch": 1.462500585562374, "grad_norm": 0.6049098357649758, "learning_rate": 8.887331183347256e-07, "loss": 0.2702, "step": 31220 }, { "epoch": 1.4625474305523025, "grad_norm": 0.5786009423898173, "learning_rate": 8.885881167291147e-07, "loss": 0.2653, "step": 31221 }, { "epoch": 1.4625942755422308, "grad_norm": 0.6073938049040206, "learning_rate": 8.884431243967739e-07, "loss": 0.2785, "step": 31222 }, { "epoch": 1.462641120532159, "grad_norm": 0.6292880155910341, "learning_rate": 8.882981413385381e-07, "loss": 0.2754, "step": 31223 }, { "epoch": 1.4626879655220875, "grad_norm": 0.596508566702763, "learning_rate": 8.88153167555241e-07, "loss": 0.2506, "step": 31224 }, { "epoch": 1.4627348105120157, "grad_norm": 0.5787010623497936, "learning_rate": 8.880082030477186e-07, "loss": 0.2645, "step": 31225 }, { "epoch": 1.462781655501944, "grad_norm": 0.5822756167476985, "learning_rate": 8.878632478168025e-07, "loss": 0.2749, "step": 31226 }, { "epoch": 1.4628285004918724, "grad_norm": 0.6491254870569622, "learning_rate": 8.877183018633295e-07, "loss": 0.2796, "step": 31227 }, { "epoch": 1.4628753454818006, "grad_norm": 0.5620565609778215, "learning_rate": 8.875733651881313e-07, "loss": 0.2659, "step": 31228 }, { "epoch": 1.4629221904717291, "grad_norm": 0.5858910192528666, "learning_rate": 8.874284377920434e-07, "loss": 0.2683, "step": 31229 }, { "epoch": 1.4629690354616574, "grad_norm": 0.5707343062956428, "learning_rate": 8.872835196758995e-07, "loss": 0.2645, "step": 31230 }, { "epoch": 1.4630158804515858, "grad_norm": 0.5557055842531198, "learning_rate": 8.871386108405341e-07, "loss": 0.258, "step": 31231 }, { "epoch": 1.463062725441514, "grad_norm": 0.5876277954329382, "learning_rate": 8.869937112867801e-07, "loss": 0.2724, "step": 31232 }, { "epoch": 1.4631095704314423, "grad_norm": 0.6090107114385448, "learning_rate": 8.868488210154719e-07, "loss": 0.2692, "step": 31233 }, { "epoch": 1.4631564154213708, "grad_norm": 0.57721884965505, "learning_rate": 8.867039400274438e-07, "loss": 0.2705, "step": 31234 }, { "epoch": 1.463203260411299, "grad_norm": 0.6233715250765481, "learning_rate": 8.865590683235287e-07, "loss": 0.2822, "step": 31235 }, { "epoch": 1.4632501054012272, "grad_norm": 0.5297944824614943, "learning_rate": 8.864142059045602e-07, "loss": 0.2461, "step": 31236 }, { "epoch": 1.4632969503911557, "grad_norm": 0.624154826854641, "learning_rate": 8.862693527713726e-07, "loss": 0.2668, "step": 31237 }, { "epoch": 1.463343795381084, "grad_norm": 0.5788014745189122, "learning_rate": 8.861245089247999e-07, "loss": 0.2828, "step": 31238 }, { "epoch": 1.4633906403710122, "grad_norm": 0.5882417391025994, "learning_rate": 8.859796743656745e-07, "loss": 0.2734, "step": 31239 }, { "epoch": 1.4634374853609406, "grad_norm": 0.6292330078594514, "learning_rate": 8.858348490948302e-07, "loss": 0.2949, "step": 31240 }, { "epoch": 1.4634843303508691, "grad_norm": 0.5795896533803345, "learning_rate": 8.856900331131016e-07, "loss": 0.2718, "step": 31241 }, { "epoch": 1.4635311753407974, "grad_norm": 0.6064559383489152, "learning_rate": 8.855452264213202e-07, "loss": 0.276, "step": 31242 }, { "epoch": 1.4635780203307256, "grad_norm": 0.6025768354193395, "learning_rate": 8.854004290203202e-07, "loss": 0.2788, "step": 31243 }, { "epoch": 1.463624865320654, "grad_norm": 0.584802217062749, "learning_rate": 8.852556409109359e-07, "loss": 0.2599, "step": 31244 }, { "epoch": 1.4636717103105823, "grad_norm": 0.5782254716413422, "learning_rate": 8.851108620939985e-07, "loss": 0.2598, "step": 31245 }, { "epoch": 1.4637185553005105, "grad_norm": 0.5863658077771369, "learning_rate": 8.849660925703421e-07, "loss": 0.2861, "step": 31246 }, { "epoch": 1.463765400290439, "grad_norm": 0.6036276681598169, "learning_rate": 8.848213323408011e-07, "loss": 0.2736, "step": 31247 }, { "epoch": 1.4638122452803672, "grad_norm": 0.5771794123366505, "learning_rate": 8.846765814062063e-07, "loss": 0.2813, "step": 31248 }, { "epoch": 1.4638590902702955, "grad_norm": 0.5920431853773275, "learning_rate": 8.845318397673919e-07, "loss": 0.2696, "step": 31249 }, { "epoch": 1.463905935260224, "grad_norm": 0.5869740084114536, "learning_rate": 8.843871074251914e-07, "loss": 0.26, "step": 31250 }, { "epoch": 1.4639527802501522, "grad_norm": 0.6189310865884718, "learning_rate": 8.842423843804365e-07, "loss": 0.2779, "step": 31251 }, { "epoch": 1.4639996252400806, "grad_norm": 0.577990621138144, "learning_rate": 8.840976706339605e-07, "loss": 0.2729, "step": 31252 }, { "epoch": 1.464046470230009, "grad_norm": 0.5652227383543332, "learning_rate": 8.839529661865958e-07, "loss": 0.2712, "step": 31253 }, { "epoch": 1.4640933152199374, "grad_norm": 0.5929395190931732, "learning_rate": 8.838082710391771e-07, "loss": 0.2647, "step": 31254 }, { "epoch": 1.4641401602098656, "grad_norm": 0.6272955742086264, "learning_rate": 8.836635851925343e-07, "loss": 0.2776, "step": 31255 }, { "epoch": 1.4641870051997938, "grad_norm": 0.5857661340577675, "learning_rate": 8.835189086475023e-07, "loss": 0.2637, "step": 31256 }, { "epoch": 1.4642338501897223, "grad_norm": 0.604999408134777, "learning_rate": 8.833742414049117e-07, "loss": 0.292, "step": 31257 }, { "epoch": 1.4642806951796505, "grad_norm": 0.576648210877621, "learning_rate": 8.832295834655962e-07, "loss": 0.2648, "step": 31258 }, { "epoch": 1.4643275401695788, "grad_norm": 0.5625336904317032, "learning_rate": 8.830849348303883e-07, "loss": 0.2548, "step": 31259 }, { "epoch": 1.4643743851595072, "grad_norm": 0.5762205479278849, "learning_rate": 8.829402955001198e-07, "loss": 0.2585, "step": 31260 }, { "epoch": 1.4644212301494355, "grad_norm": 0.6133308618931963, "learning_rate": 8.827956654756246e-07, "loss": 0.2745, "step": 31261 }, { "epoch": 1.4644680751393637, "grad_norm": 0.56797000879785, "learning_rate": 8.826510447577336e-07, "loss": 0.2674, "step": 31262 }, { "epoch": 1.4645149201292922, "grad_norm": 0.5617795900654258, "learning_rate": 8.825064333472785e-07, "loss": 0.2531, "step": 31263 }, { "epoch": 1.4645617651192204, "grad_norm": 0.6220171813977414, "learning_rate": 8.823618312450926e-07, "loss": 0.2694, "step": 31264 }, { "epoch": 1.464608610109149, "grad_norm": 0.5861038486158546, "learning_rate": 8.822172384520078e-07, "loss": 0.2743, "step": 31265 }, { "epoch": 1.4646554550990771, "grad_norm": 0.6052625364691225, "learning_rate": 8.82072654968856e-07, "loss": 0.2813, "step": 31266 }, { "epoch": 1.4647023000890056, "grad_norm": 0.5652086415902097, "learning_rate": 8.819280807964695e-07, "loss": 0.2504, "step": 31267 }, { "epoch": 1.4647491450789338, "grad_norm": 0.6153203818559456, "learning_rate": 8.817835159356808e-07, "loss": 0.2706, "step": 31268 }, { "epoch": 1.464795990068862, "grad_norm": 0.6085096505690116, "learning_rate": 8.816389603873215e-07, "loss": 0.2761, "step": 31269 }, { "epoch": 1.4648428350587905, "grad_norm": 0.5693219838062941, "learning_rate": 8.814944141522222e-07, "loss": 0.2748, "step": 31270 }, { "epoch": 1.4648896800487188, "grad_norm": 0.5951816028685919, "learning_rate": 8.81349877231216e-07, "loss": 0.2565, "step": 31271 }, { "epoch": 1.464936525038647, "grad_norm": 0.6676404607205695, "learning_rate": 8.812053496251344e-07, "loss": 0.2647, "step": 31272 }, { "epoch": 1.4649833700285755, "grad_norm": 0.602057980882362, "learning_rate": 8.810608313348091e-07, "loss": 0.2733, "step": 31273 }, { "epoch": 1.4650302150185037, "grad_norm": 0.5649985664100591, "learning_rate": 8.809163223610723e-07, "loss": 0.2554, "step": 31274 }, { "epoch": 1.465077060008432, "grad_norm": 0.571921687088824, "learning_rate": 8.807718227047546e-07, "loss": 0.2674, "step": 31275 }, { "epoch": 1.4651239049983604, "grad_norm": 0.5961722885154586, "learning_rate": 8.80627332366689e-07, "loss": 0.2751, "step": 31276 }, { "epoch": 1.465170749988289, "grad_norm": 0.65096822653985, "learning_rate": 8.804828513477051e-07, "loss": 0.2618, "step": 31277 }, { "epoch": 1.4652175949782171, "grad_norm": 0.6278797085125598, "learning_rate": 8.803383796486353e-07, "loss": 0.2913, "step": 31278 }, { "epoch": 1.4652644399681454, "grad_norm": 0.5723297743116482, "learning_rate": 8.801939172703111e-07, "loss": 0.2773, "step": 31279 }, { "epoch": 1.4653112849580738, "grad_norm": 0.5832401165037119, "learning_rate": 8.800494642135649e-07, "loss": 0.2732, "step": 31280 }, { "epoch": 1.465358129948002, "grad_norm": 0.5681136798326336, "learning_rate": 8.799050204792257e-07, "loss": 0.266, "step": 31281 }, { "epoch": 1.4654049749379303, "grad_norm": 0.5814372591275869, "learning_rate": 8.79760586068126e-07, "loss": 0.2745, "step": 31282 }, { "epoch": 1.4654518199278588, "grad_norm": 0.5839486360792778, "learning_rate": 8.796161609810977e-07, "loss": 0.2804, "step": 31283 }, { "epoch": 1.465498664917787, "grad_norm": 0.5757101930857058, "learning_rate": 8.794717452189702e-07, "loss": 0.2629, "step": 31284 }, { "epoch": 1.4655455099077153, "grad_norm": 0.5651581058890823, "learning_rate": 8.793273387825757e-07, "loss": 0.2545, "step": 31285 }, { "epoch": 1.4655923548976437, "grad_norm": 0.5569279715168746, "learning_rate": 8.791829416727457e-07, "loss": 0.2513, "step": 31286 }, { "epoch": 1.465639199887572, "grad_norm": 0.5881117945685973, "learning_rate": 8.790385538903098e-07, "loss": 0.2635, "step": 31287 }, { "epoch": 1.4656860448775004, "grad_norm": 0.601097339258184, "learning_rate": 8.788941754360997e-07, "loss": 0.2805, "step": 31288 }, { "epoch": 1.4657328898674287, "grad_norm": 0.6033867340752007, "learning_rate": 8.787498063109459e-07, "loss": 0.266, "step": 31289 }, { "epoch": 1.4657797348573571, "grad_norm": 0.6514115915360835, "learning_rate": 8.786054465156804e-07, "loss": 0.2785, "step": 31290 }, { "epoch": 1.4658265798472854, "grad_norm": 0.6032473473038682, "learning_rate": 8.784610960511323e-07, "loss": 0.2754, "step": 31291 }, { "epoch": 1.4658734248372136, "grad_norm": 0.6194151642636078, "learning_rate": 8.783167549181338e-07, "loss": 0.2833, "step": 31292 }, { "epoch": 1.465920269827142, "grad_norm": 0.585614599361064, "learning_rate": 8.781724231175137e-07, "loss": 0.2785, "step": 31293 }, { "epoch": 1.4659671148170703, "grad_norm": 0.6081789412868489, "learning_rate": 8.78028100650104e-07, "loss": 0.2745, "step": 31294 }, { "epoch": 1.4660139598069986, "grad_norm": 0.580638002713417, "learning_rate": 8.778837875167348e-07, "loss": 0.2754, "step": 31295 }, { "epoch": 1.466060804796927, "grad_norm": 0.5347049424873204, "learning_rate": 8.777394837182376e-07, "loss": 0.2488, "step": 31296 }, { "epoch": 1.4661076497868553, "grad_norm": 0.5833775887715849, "learning_rate": 8.77595189255441e-07, "loss": 0.2832, "step": 31297 }, { "epoch": 1.4661544947767835, "grad_norm": 0.6110191291893531, "learning_rate": 8.774509041291774e-07, "loss": 0.268, "step": 31298 }, { "epoch": 1.466201339766712, "grad_norm": 0.5813166243896253, "learning_rate": 8.773066283402748e-07, "loss": 0.2675, "step": 31299 }, { "epoch": 1.4662481847566402, "grad_norm": 0.5923692224506155, "learning_rate": 8.77162361889565e-07, "loss": 0.2729, "step": 31300 }, { "epoch": 1.4662950297465687, "grad_norm": 0.5520161840667827, "learning_rate": 8.770181047778778e-07, "loss": 0.2844, "step": 31301 }, { "epoch": 1.466341874736497, "grad_norm": 0.5628260466796918, "learning_rate": 8.768738570060436e-07, "loss": 0.2536, "step": 31302 }, { "epoch": 1.4663887197264254, "grad_norm": 0.6068350861335721, "learning_rate": 8.76729618574893e-07, "loss": 0.2834, "step": 31303 }, { "epoch": 1.4664355647163536, "grad_norm": 0.6142414840526071, "learning_rate": 8.76585389485255e-07, "loss": 0.2781, "step": 31304 }, { "epoch": 1.4664824097062819, "grad_norm": 0.5743072910348933, "learning_rate": 8.764411697379604e-07, "loss": 0.2666, "step": 31305 }, { "epoch": 1.4665292546962103, "grad_norm": 0.6006924747771917, "learning_rate": 8.762969593338383e-07, "loss": 0.2852, "step": 31306 }, { "epoch": 1.4665760996861386, "grad_norm": 0.6200564923041503, "learning_rate": 8.761527582737189e-07, "loss": 0.264, "step": 31307 }, { "epoch": 1.4666229446760668, "grad_norm": 0.6242643815022735, "learning_rate": 8.760085665584325e-07, "loss": 0.287, "step": 31308 }, { "epoch": 1.4666697896659953, "grad_norm": 0.5787031730466347, "learning_rate": 8.758643841888084e-07, "loss": 0.2659, "step": 31309 }, { "epoch": 1.4667166346559235, "grad_norm": 0.6055571026096992, "learning_rate": 8.757202111656776e-07, "loss": 0.2757, "step": 31310 }, { "epoch": 1.4667634796458517, "grad_norm": 0.5749317974199005, "learning_rate": 8.755760474898687e-07, "loss": 0.2578, "step": 31311 }, { "epoch": 1.4668103246357802, "grad_norm": 0.6116900399694891, "learning_rate": 8.754318931622102e-07, "loss": 0.2821, "step": 31312 }, { "epoch": 1.4668571696257087, "grad_norm": 0.5897065273706178, "learning_rate": 8.752877481835331e-07, "loss": 0.2524, "step": 31313 }, { "epoch": 1.466904014615637, "grad_norm": 0.6064288602698624, "learning_rate": 8.751436125546667e-07, "loss": 0.2766, "step": 31314 }, { "epoch": 1.4669508596055651, "grad_norm": 0.618874129507243, "learning_rate": 8.749994862764402e-07, "loss": 0.2685, "step": 31315 }, { "epoch": 1.4669977045954936, "grad_norm": 0.5435694342219268, "learning_rate": 8.748553693496837e-07, "loss": 0.2613, "step": 31316 }, { "epoch": 1.4670445495854219, "grad_norm": 0.558151895841944, "learning_rate": 8.747112617752262e-07, "loss": 0.2497, "step": 31317 }, { "epoch": 1.46709139457535, "grad_norm": 0.5811868986008044, "learning_rate": 8.745671635538975e-07, "loss": 0.2607, "step": 31318 }, { "epoch": 1.4671382395652786, "grad_norm": 0.5999479597281429, "learning_rate": 8.744230746865251e-07, "loss": 0.2693, "step": 31319 }, { "epoch": 1.4671850845552068, "grad_norm": 0.577029592903233, "learning_rate": 8.742789951739395e-07, "loss": 0.2585, "step": 31320 }, { "epoch": 1.467231929545135, "grad_norm": 0.571677845065441, "learning_rate": 8.741349250169695e-07, "loss": 0.2681, "step": 31321 }, { "epoch": 1.4672787745350635, "grad_norm": 0.6028215567164245, "learning_rate": 8.739908642164444e-07, "loss": 0.28, "step": 31322 }, { "epoch": 1.4673256195249917, "grad_norm": 0.5792955946804069, "learning_rate": 8.738468127731942e-07, "loss": 0.2679, "step": 31323 }, { "epoch": 1.4673724645149202, "grad_norm": 0.6117755297960464, "learning_rate": 8.737027706880458e-07, "loss": 0.2686, "step": 31324 }, { "epoch": 1.4674193095048484, "grad_norm": 0.5665031735117532, "learning_rate": 8.735587379618302e-07, "loss": 0.249, "step": 31325 }, { "epoch": 1.467466154494777, "grad_norm": 0.571872444405599, "learning_rate": 8.734147145953745e-07, "loss": 0.2596, "step": 31326 }, { "epoch": 1.4675129994847051, "grad_norm": 0.5208633765921153, "learning_rate": 8.73270700589508e-07, "loss": 0.244, "step": 31327 }, { "epoch": 1.4675598444746334, "grad_norm": 0.5535733300881807, "learning_rate": 8.731266959450599e-07, "loss": 0.2594, "step": 31328 }, { "epoch": 1.4676066894645619, "grad_norm": 0.6265332577644126, "learning_rate": 8.729827006628597e-07, "loss": 0.2895, "step": 31329 }, { "epoch": 1.46765353445449, "grad_norm": 0.6200325584580766, "learning_rate": 8.728387147437342e-07, "loss": 0.2741, "step": 31330 }, { "epoch": 1.4677003794444183, "grad_norm": 0.582677410438208, "learning_rate": 8.726947381885129e-07, "loss": 0.2734, "step": 31331 }, { "epoch": 1.4677472244343468, "grad_norm": 0.5833161948947689, "learning_rate": 8.725507709980252e-07, "loss": 0.2642, "step": 31332 }, { "epoch": 1.467794069424275, "grad_norm": 0.5972701138916096, "learning_rate": 8.724068131730981e-07, "loss": 0.2685, "step": 31333 }, { "epoch": 1.4678409144142033, "grad_norm": 0.6145352787913478, "learning_rate": 8.722628647145607e-07, "loss": 0.2776, "step": 31334 }, { "epoch": 1.4678877594041317, "grad_norm": 0.5969389535619928, "learning_rate": 8.721189256232421e-07, "loss": 0.2608, "step": 31335 }, { "epoch": 1.46793460439406, "grad_norm": 0.602388149603067, "learning_rate": 8.71974995899969e-07, "loss": 0.2757, "step": 31336 }, { "epoch": 1.4679814493839884, "grad_norm": 0.6100848284958211, "learning_rate": 8.718310755455709e-07, "loss": 0.2877, "step": 31337 }, { "epoch": 1.4680282943739167, "grad_norm": 0.5871158106488137, "learning_rate": 8.716871645608754e-07, "loss": 0.2872, "step": 31338 }, { "epoch": 1.4680751393638451, "grad_norm": 0.6100987094997047, "learning_rate": 8.715432629467122e-07, "loss": 0.268, "step": 31339 }, { "epoch": 1.4681219843537734, "grad_norm": 0.5981377939222565, "learning_rate": 8.71399370703907e-07, "loss": 0.2818, "step": 31340 }, { "epoch": 1.4681688293437016, "grad_norm": 0.5681102576906969, "learning_rate": 8.712554878332902e-07, "loss": 0.2585, "step": 31341 }, { "epoch": 1.46821567433363, "grad_norm": 0.6579719588227452, "learning_rate": 8.711116143356879e-07, "loss": 0.2811, "step": 31342 }, { "epoch": 1.4682625193235583, "grad_norm": 0.6014922650087962, "learning_rate": 8.709677502119287e-07, "loss": 0.2713, "step": 31343 }, { "epoch": 1.4683093643134866, "grad_norm": 0.6178283840199053, "learning_rate": 8.708238954628407e-07, "loss": 0.2836, "step": 31344 }, { "epoch": 1.468356209303415, "grad_norm": 0.5980315833305179, "learning_rate": 8.706800500892523e-07, "loss": 0.2718, "step": 31345 }, { "epoch": 1.4684030542933433, "grad_norm": 0.6063522352129543, "learning_rate": 8.705362140919901e-07, "loss": 0.2859, "step": 31346 }, { "epoch": 1.4684498992832715, "grad_norm": 0.5650226585083854, "learning_rate": 8.703923874718834e-07, "loss": 0.2558, "step": 31347 }, { "epoch": 1.4684967442732, "grad_norm": 0.58444656352567, "learning_rate": 8.702485702297581e-07, "loss": 0.2787, "step": 31348 }, { "epoch": 1.4685435892631284, "grad_norm": 0.5862090488315452, "learning_rate": 8.701047623664424e-07, "loss": 0.2657, "step": 31349 }, { "epoch": 1.4685904342530567, "grad_norm": 0.6281793963403517, "learning_rate": 8.699609638827642e-07, "loss": 0.2873, "step": 31350 }, { "epoch": 1.468637279242985, "grad_norm": 0.6252018499036645, "learning_rate": 8.698171747795512e-07, "loss": 0.2872, "step": 31351 }, { "epoch": 1.4686841242329134, "grad_norm": 0.5728380240147213, "learning_rate": 8.696733950576311e-07, "loss": 0.2613, "step": 31352 }, { "epoch": 1.4687309692228416, "grad_norm": 0.5969694338701913, "learning_rate": 8.695296247178312e-07, "loss": 0.2725, "step": 31353 }, { "epoch": 1.4687778142127699, "grad_norm": 0.5865682877216621, "learning_rate": 8.693858637609773e-07, "loss": 0.2715, "step": 31354 }, { "epoch": 1.4688246592026983, "grad_norm": 0.5800962259557298, "learning_rate": 8.69242112187898e-07, "loss": 0.2647, "step": 31355 }, { "epoch": 1.4688715041926266, "grad_norm": 0.6016149711076385, "learning_rate": 8.690983699994207e-07, "loss": 0.2819, "step": 31356 }, { "epoch": 1.4689183491825548, "grad_norm": 0.6455936131670975, "learning_rate": 8.689546371963722e-07, "loss": 0.2811, "step": 31357 }, { "epoch": 1.4689651941724833, "grad_norm": 0.6041689947848756, "learning_rate": 8.688109137795799e-07, "loss": 0.2728, "step": 31358 }, { "epoch": 1.4690120391624115, "grad_norm": 0.6574576226596044, "learning_rate": 8.686671997498717e-07, "loss": 0.2893, "step": 31359 }, { "epoch": 1.46905888415234, "grad_norm": 0.6142396100322206, "learning_rate": 8.685234951080734e-07, "loss": 0.2783, "step": 31360 }, { "epoch": 1.4691057291422682, "grad_norm": 0.6256490549628424, "learning_rate": 8.68379799855012e-07, "loss": 0.272, "step": 31361 }, { "epoch": 1.4691525741321967, "grad_norm": 0.5865603196063578, "learning_rate": 8.682361139915144e-07, "loss": 0.2676, "step": 31362 }, { "epoch": 1.469199419122125, "grad_norm": 0.5965587339124668, "learning_rate": 8.68092437518408e-07, "loss": 0.2734, "step": 31363 }, { "epoch": 1.4692462641120532, "grad_norm": 0.6159188295066139, "learning_rate": 8.679487704365191e-07, "loss": 0.288, "step": 31364 }, { "epoch": 1.4692931091019816, "grad_norm": 0.5789061464728061, "learning_rate": 8.67805112746676e-07, "loss": 0.2632, "step": 31365 }, { "epoch": 1.4693399540919099, "grad_norm": 0.5678835013525387, "learning_rate": 8.676614644497034e-07, "loss": 0.2758, "step": 31366 }, { "epoch": 1.4693867990818381, "grad_norm": 0.6402382115123101, "learning_rate": 8.675178255464295e-07, "loss": 0.2782, "step": 31367 }, { "epoch": 1.4694336440717666, "grad_norm": 0.6124880207839741, "learning_rate": 8.673741960376797e-07, "loss": 0.2818, "step": 31368 }, { "epoch": 1.4694804890616948, "grad_norm": 0.5920841829816572, "learning_rate": 8.672305759242807e-07, "loss": 0.2675, "step": 31369 }, { "epoch": 1.469527334051623, "grad_norm": 0.5649672708547321, "learning_rate": 8.670869652070596e-07, "loss": 0.2523, "step": 31370 }, { "epoch": 1.4695741790415515, "grad_norm": 0.6390581276052387, "learning_rate": 8.669433638868424e-07, "loss": 0.3028, "step": 31371 }, { "epoch": 1.4696210240314798, "grad_norm": 0.5923080704806634, "learning_rate": 8.667997719644566e-07, "loss": 0.2738, "step": 31372 }, { "epoch": 1.4696678690214082, "grad_norm": 0.5783563870327653, "learning_rate": 8.666561894407266e-07, "loss": 0.2657, "step": 31373 }, { "epoch": 1.4697147140113365, "grad_norm": 0.5523125895678564, "learning_rate": 8.665126163164808e-07, "loss": 0.2773, "step": 31374 }, { "epoch": 1.469761559001265, "grad_norm": 0.5673840353232499, "learning_rate": 8.663690525925433e-07, "loss": 0.2466, "step": 31375 }, { "epoch": 1.4698084039911932, "grad_norm": 0.5787574302965262, "learning_rate": 8.662254982697416e-07, "loss": 0.2634, "step": 31376 }, { "epoch": 1.4698552489811214, "grad_norm": 0.5905145903212814, "learning_rate": 8.660819533489013e-07, "loss": 0.2744, "step": 31377 }, { "epoch": 1.4699020939710499, "grad_norm": 0.6058015140112284, "learning_rate": 8.659384178308494e-07, "loss": 0.2706, "step": 31378 }, { "epoch": 1.4699489389609781, "grad_norm": 0.5541095342577397, "learning_rate": 8.657948917164105e-07, "loss": 0.2559, "step": 31379 }, { "epoch": 1.4699957839509064, "grad_norm": 0.6125763601739053, "learning_rate": 8.656513750064113e-07, "loss": 0.2741, "step": 31380 }, { "epoch": 1.4700426289408348, "grad_norm": 0.6310841226935621, "learning_rate": 8.655078677016787e-07, "loss": 0.2865, "step": 31381 }, { "epoch": 1.470089473930763, "grad_norm": 0.6262766348472432, "learning_rate": 8.653643698030365e-07, "loss": 0.2811, "step": 31382 }, { "epoch": 1.4701363189206913, "grad_norm": 0.590516676454502, "learning_rate": 8.652208813113114e-07, "loss": 0.2592, "step": 31383 }, { "epoch": 1.4701831639106198, "grad_norm": 0.5884962991783182, "learning_rate": 8.6507740222733e-07, "loss": 0.2825, "step": 31384 }, { "epoch": 1.4702300089005482, "grad_norm": 0.6291282912304064, "learning_rate": 8.649339325519165e-07, "loss": 0.2832, "step": 31385 }, { "epoch": 1.4702768538904765, "grad_norm": 0.6290907233555566, "learning_rate": 8.647904722858974e-07, "loss": 0.2757, "step": 31386 }, { "epoch": 1.4703236988804047, "grad_norm": 0.6152502230636488, "learning_rate": 8.646470214300989e-07, "loss": 0.2872, "step": 31387 }, { "epoch": 1.4703705438703332, "grad_norm": 0.6012240426070339, "learning_rate": 8.645035799853449e-07, "loss": 0.2781, "step": 31388 }, { "epoch": 1.4704173888602614, "grad_norm": 0.610094193277291, "learning_rate": 8.643601479524615e-07, "loss": 0.2879, "step": 31389 }, { "epoch": 1.4704642338501897, "grad_norm": 0.6298114851710552, "learning_rate": 8.642167253322756e-07, "loss": 0.2816, "step": 31390 }, { "epoch": 1.4705110788401181, "grad_norm": 0.6086824036818161, "learning_rate": 8.640733121256101e-07, "loss": 0.273, "step": 31391 }, { "epoch": 1.4705579238300464, "grad_norm": 0.6193567132143846, "learning_rate": 8.639299083332917e-07, "loss": 0.2837, "step": 31392 }, { "epoch": 1.4706047688199746, "grad_norm": 0.60363998623435, "learning_rate": 8.637865139561455e-07, "loss": 0.278, "step": 31393 }, { "epoch": 1.470651613809903, "grad_norm": 0.6292653331406309, "learning_rate": 8.636431289949973e-07, "loss": 0.2921, "step": 31394 }, { "epoch": 1.4706984587998313, "grad_norm": 0.6121812301043845, "learning_rate": 8.634997534506709e-07, "loss": 0.2762, "step": 31395 }, { "epoch": 1.4707453037897598, "grad_norm": 0.5600838609463838, "learning_rate": 8.633563873239931e-07, "loss": 0.2566, "step": 31396 }, { "epoch": 1.470792148779688, "grad_norm": 0.572307858680193, "learning_rate": 8.632130306157868e-07, "loss": 0.2683, "step": 31397 }, { "epoch": 1.4708389937696165, "grad_norm": 0.5855442420577871, "learning_rate": 8.630696833268784e-07, "loss": 0.2711, "step": 31398 }, { "epoch": 1.4708858387595447, "grad_norm": 0.6005296010687173, "learning_rate": 8.629263454580925e-07, "loss": 0.2615, "step": 31399 }, { "epoch": 1.470932683749473, "grad_norm": 0.61737211850507, "learning_rate": 8.627830170102539e-07, "loss": 0.2631, "step": 31400 }, { "epoch": 1.4709795287394014, "grad_norm": 0.6518982595502752, "learning_rate": 8.626396979841883e-07, "loss": 0.2789, "step": 31401 }, { "epoch": 1.4710263737293297, "grad_norm": 0.6100396151851503, "learning_rate": 8.6249638838072e-07, "loss": 0.2614, "step": 31402 }, { "epoch": 1.471073218719258, "grad_norm": 0.5951905459129022, "learning_rate": 8.623530882006722e-07, "loss": 0.2785, "step": 31403 }, { "epoch": 1.4711200637091864, "grad_norm": 0.6075465225847028, "learning_rate": 8.62209797444871e-07, "loss": 0.2719, "step": 31404 }, { "epoch": 1.4711669086991146, "grad_norm": 0.5913059403672346, "learning_rate": 8.620665161141409e-07, "loss": 0.2602, "step": 31405 }, { "epoch": 1.4712137536890428, "grad_norm": 0.670051605888644, "learning_rate": 8.619232442093059e-07, "loss": 0.2825, "step": 31406 }, { "epoch": 1.4712605986789713, "grad_norm": 0.619496006826161, "learning_rate": 8.617799817311912e-07, "loss": 0.3018, "step": 31407 }, { "epoch": 1.4713074436688995, "grad_norm": 0.5889564923156428, "learning_rate": 8.616367286806219e-07, "loss": 0.2534, "step": 31408 }, { "epoch": 1.471354288658828, "grad_norm": 0.5776317305015847, "learning_rate": 8.614934850584211e-07, "loss": 0.2698, "step": 31409 }, { "epoch": 1.4714011336487562, "grad_norm": 0.588713135130107, "learning_rate": 8.613502508654129e-07, "loss": 0.2582, "step": 31410 }, { "epoch": 1.4714479786386847, "grad_norm": 0.5825027517758199, "learning_rate": 8.612070261024221e-07, "loss": 0.2739, "step": 31411 }, { "epoch": 1.471494823628613, "grad_norm": 0.6166780633471949, "learning_rate": 8.610638107702729e-07, "loss": 0.275, "step": 31412 }, { "epoch": 1.4715416686185412, "grad_norm": 0.6234385767999502, "learning_rate": 8.609206048697896e-07, "loss": 0.2774, "step": 31413 }, { "epoch": 1.4715885136084697, "grad_norm": 0.5880995534567188, "learning_rate": 8.607774084017973e-07, "loss": 0.2668, "step": 31414 }, { "epoch": 1.471635358598398, "grad_norm": 0.5930725048969069, "learning_rate": 8.606342213671179e-07, "loss": 0.2706, "step": 31415 }, { "epoch": 1.4716822035883261, "grad_norm": 0.6517047573625988, "learning_rate": 8.604910437665773e-07, "loss": 0.2831, "step": 31416 }, { "epoch": 1.4717290485782546, "grad_norm": 0.6790369401409535, "learning_rate": 8.603478756009981e-07, "loss": 0.3032, "step": 31417 }, { "epoch": 1.4717758935681828, "grad_norm": 0.5834862922336305, "learning_rate": 8.602047168712044e-07, "loss": 0.2609, "step": 31418 }, { "epoch": 1.471822738558111, "grad_norm": 0.6143456719964175, "learning_rate": 8.600615675780207e-07, "loss": 0.2817, "step": 31419 }, { "epoch": 1.4718695835480395, "grad_norm": 0.5620053050932514, "learning_rate": 8.599184277222711e-07, "loss": 0.2643, "step": 31420 }, { "epoch": 1.471916428537968, "grad_norm": 0.5870536316878233, "learning_rate": 8.597752973047782e-07, "loss": 0.2585, "step": 31421 }, { "epoch": 1.4719632735278962, "grad_norm": 0.6158091571726412, "learning_rate": 8.59632176326366e-07, "loss": 0.273, "step": 31422 }, { "epoch": 1.4720101185178245, "grad_norm": 0.5676328033808199, "learning_rate": 8.594890647878592e-07, "loss": 0.2622, "step": 31423 }, { "epoch": 1.472056963507753, "grad_norm": 0.6041795019543658, "learning_rate": 8.593459626900796e-07, "loss": 0.2847, "step": 31424 }, { "epoch": 1.4721038084976812, "grad_norm": 0.5946798926550794, "learning_rate": 8.592028700338517e-07, "loss": 0.2522, "step": 31425 }, { "epoch": 1.4721506534876094, "grad_norm": 0.5853701091561027, "learning_rate": 8.590597868199999e-07, "loss": 0.2711, "step": 31426 }, { "epoch": 1.472197498477538, "grad_norm": 0.5973445851565063, "learning_rate": 8.589167130493456e-07, "loss": 0.2663, "step": 31427 }, { "epoch": 1.4722443434674661, "grad_norm": 0.6016765752419041, "learning_rate": 8.58773648722713e-07, "loss": 0.2659, "step": 31428 }, { "epoch": 1.4722911884573944, "grad_norm": 0.5827609014999223, "learning_rate": 8.586305938409257e-07, "loss": 0.2629, "step": 31429 }, { "epoch": 1.4723380334473228, "grad_norm": 0.6018801700265505, "learning_rate": 8.584875484048075e-07, "loss": 0.2739, "step": 31430 }, { "epoch": 1.472384878437251, "grad_norm": 0.5428556261354962, "learning_rate": 8.583445124151801e-07, "loss": 0.2458, "step": 31431 }, { "epoch": 1.4724317234271795, "grad_norm": 0.6345146812617692, "learning_rate": 8.582014858728685e-07, "loss": 0.2955, "step": 31432 }, { "epoch": 1.4724785684171078, "grad_norm": 0.606261685630893, "learning_rate": 8.580584687786936e-07, "loss": 0.2663, "step": 31433 }, { "epoch": 1.4725254134070362, "grad_norm": 0.556844597165385, "learning_rate": 8.579154611334794e-07, "loss": 0.2508, "step": 31434 }, { "epoch": 1.4725722583969645, "grad_norm": 0.5859387184083334, "learning_rate": 8.577724629380493e-07, "loss": 0.2806, "step": 31435 }, { "epoch": 1.4726191033868927, "grad_norm": 0.6120797347832261, "learning_rate": 8.576294741932268e-07, "loss": 0.2791, "step": 31436 }, { "epoch": 1.4726659483768212, "grad_norm": 0.5890120133748413, "learning_rate": 8.574864948998326e-07, "loss": 0.2793, "step": 31437 }, { "epoch": 1.4727127933667494, "grad_norm": 0.6207300451706603, "learning_rate": 8.573435250586912e-07, "loss": 0.2972, "step": 31438 }, { "epoch": 1.4727596383566777, "grad_norm": 0.5727145059730269, "learning_rate": 8.572005646706255e-07, "loss": 0.2667, "step": 31439 }, { "epoch": 1.4728064833466061, "grad_norm": 0.5663399435298105, "learning_rate": 8.570576137364572e-07, "loss": 0.255, "step": 31440 }, { "epoch": 1.4728533283365344, "grad_norm": 0.5945686432657942, "learning_rate": 8.56914672257009e-07, "loss": 0.2587, "step": 31441 }, { "epoch": 1.4729001733264626, "grad_norm": 0.6055068434937991, "learning_rate": 8.567717402331041e-07, "loss": 0.2662, "step": 31442 }, { "epoch": 1.472947018316391, "grad_norm": 0.6125901248641076, "learning_rate": 8.566288176655657e-07, "loss": 0.2706, "step": 31443 }, { "epoch": 1.4729938633063193, "grad_norm": 0.6345693671071212, "learning_rate": 8.564859045552143e-07, "loss": 0.2791, "step": 31444 }, { "epoch": 1.4730407082962478, "grad_norm": 0.569947858380708, "learning_rate": 8.563430009028745e-07, "loss": 0.2482, "step": 31445 }, { "epoch": 1.473087553286176, "grad_norm": 0.6152196582401473, "learning_rate": 8.562001067093667e-07, "loss": 0.2673, "step": 31446 }, { "epoch": 1.4731343982761045, "grad_norm": 0.6114837338903159, "learning_rate": 8.560572219755142e-07, "loss": 0.2678, "step": 31447 }, { "epoch": 1.4731812432660327, "grad_norm": 0.6091844627786127, "learning_rate": 8.559143467021392e-07, "loss": 0.2771, "step": 31448 }, { "epoch": 1.473228088255961, "grad_norm": 0.5911137361412792, "learning_rate": 8.557714808900638e-07, "loss": 0.2607, "step": 31449 }, { "epoch": 1.4732749332458894, "grad_norm": 0.6171438325059966, "learning_rate": 8.556286245401113e-07, "loss": 0.2775, "step": 31450 }, { "epoch": 1.4733217782358177, "grad_norm": 0.5833406324136545, "learning_rate": 8.554857776531025e-07, "loss": 0.2607, "step": 31451 }, { "epoch": 1.473368623225746, "grad_norm": 0.5893150632439984, "learning_rate": 8.553429402298591e-07, "loss": 0.2649, "step": 31452 }, { "epoch": 1.4734154682156744, "grad_norm": 0.5580192490301067, "learning_rate": 8.552001122712036e-07, "loss": 0.2641, "step": 31453 }, { "epoch": 1.4734623132056026, "grad_norm": 0.6187491211080229, "learning_rate": 8.55057293777958e-07, "loss": 0.2722, "step": 31454 }, { "epoch": 1.4735091581955309, "grad_norm": 0.5738886982088409, "learning_rate": 8.549144847509445e-07, "loss": 0.2717, "step": 31455 }, { "epoch": 1.4735560031854593, "grad_norm": 0.6112970543987626, "learning_rate": 8.547716851909843e-07, "loss": 0.2914, "step": 31456 }, { "epoch": 1.4736028481753878, "grad_norm": 0.5683871807926293, "learning_rate": 8.546288950989007e-07, "loss": 0.2641, "step": 31457 }, { "epoch": 1.473649693165316, "grad_norm": 0.6138115822330752, "learning_rate": 8.54486114475514e-07, "loss": 0.2742, "step": 31458 }, { "epoch": 1.4736965381552443, "grad_norm": 0.5546310749790909, "learning_rate": 8.543433433216455e-07, "loss": 0.2579, "step": 31459 }, { "epoch": 1.4737433831451727, "grad_norm": 0.5987890675909051, "learning_rate": 8.542005816381175e-07, "loss": 0.2616, "step": 31460 }, { "epoch": 1.473790228135101, "grad_norm": 0.5766214640823403, "learning_rate": 8.540578294257512e-07, "loss": 0.2599, "step": 31461 }, { "epoch": 1.4738370731250292, "grad_norm": 0.607405521490744, "learning_rate": 8.539150866853685e-07, "loss": 0.2857, "step": 31462 }, { "epoch": 1.4738839181149577, "grad_norm": 0.5691621421409543, "learning_rate": 8.537723534177917e-07, "loss": 0.2641, "step": 31463 }, { "epoch": 1.473930763104886, "grad_norm": 0.6090180235627518, "learning_rate": 8.536296296238403e-07, "loss": 0.2761, "step": 31464 }, { "epoch": 1.4739776080948142, "grad_norm": 0.6011924649238103, "learning_rate": 8.534869153043374e-07, "loss": 0.2898, "step": 31465 }, { "epoch": 1.4740244530847426, "grad_norm": 0.5716402841129837, "learning_rate": 8.533442104601028e-07, "loss": 0.2549, "step": 31466 }, { "epoch": 1.4740712980746709, "grad_norm": 0.5817262267389081, "learning_rate": 8.532015150919582e-07, "loss": 0.2756, "step": 31467 }, { "epoch": 1.4741181430645993, "grad_norm": 0.5944364635011582, "learning_rate": 8.53058829200725e-07, "loss": 0.262, "step": 31468 }, { "epoch": 1.4741649880545276, "grad_norm": 0.625133986935557, "learning_rate": 8.529161527872251e-07, "loss": 0.2838, "step": 31469 }, { "epoch": 1.474211833044456, "grad_norm": 0.564625827045539, "learning_rate": 8.527734858522782e-07, "loss": 0.2649, "step": 31470 }, { "epoch": 1.4742586780343843, "grad_norm": 0.6110435909180584, "learning_rate": 8.526308283967056e-07, "loss": 0.2567, "step": 31471 }, { "epoch": 1.4743055230243125, "grad_norm": 0.588208357659081, "learning_rate": 8.524881804213294e-07, "loss": 0.2662, "step": 31472 }, { "epoch": 1.474352368014241, "grad_norm": 0.5976143990138437, "learning_rate": 8.523455419269688e-07, "loss": 0.277, "step": 31473 }, { "epoch": 1.4743992130041692, "grad_norm": 0.6088051198355886, "learning_rate": 8.522029129144457e-07, "loss": 0.2664, "step": 31474 }, { "epoch": 1.4744460579940974, "grad_norm": 0.579919438563373, "learning_rate": 8.520602933845811e-07, "loss": 0.2669, "step": 31475 }, { "epoch": 1.474492902984026, "grad_norm": 0.5942373396627885, "learning_rate": 8.519176833381948e-07, "loss": 0.2587, "step": 31476 }, { "epoch": 1.4745397479739542, "grad_norm": 0.6193432832090623, "learning_rate": 8.517750827761079e-07, "loss": 0.2656, "step": 31477 }, { "epoch": 1.4745865929638824, "grad_norm": 0.5700762799956538, "learning_rate": 8.516324916991411e-07, "loss": 0.2701, "step": 31478 }, { "epoch": 1.4746334379538109, "grad_norm": 0.6307336772735915, "learning_rate": 8.514899101081162e-07, "loss": 0.2686, "step": 31479 }, { "epoch": 1.474680282943739, "grad_norm": 0.6243244624973935, "learning_rate": 8.513473380038511e-07, "loss": 0.2725, "step": 31480 }, { "epoch": 1.4747271279336676, "grad_norm": 0.6314373864499874, "learning_rate": 8.512047753871691e-07, "loss": 0.2673, "step": 31481 }, { "epoch": 1.4747739729235958, "grad_norm": 0.6256523347553797, "learning_rate": 8.51062222258888e-07, "loss": 0.2686, "step": 31482 }, { "epoch": 1.4748208179135243, "grad_norm": 0.5657422825517902, "learning_rate": 8.509196786198296e-07, "loss": 0.2695, "step": 31483 }, { "epoch": 1.4748676629034525, "grad_norm": 0.6119082198280488, "learning_rate": 8.507771444708138e-07, "loss": 0.2656, "step": 31484 }, { "epoch": 1.4749145078933807, "grad_norm": 0.6629805208288146, "learning_rate": 8.506346198126619e-07, "loss": 0.2648, "step": 31485 }, { "epoch": 1.4749613528833092, "grad_norm": 0.6061725269082509, "learning_rate": 8.504921046461922e-07, "loss": 0.2596, "step": 31486 }, { "epoch": 1.4750081978732374, "grad_norm": 0.6043823788892714, "learning_rate": 8.503495989722268e-07, "loss": 0.2767, "step": 31487 }, { "epoch": 1.4750550428631657, "grad_norm": 0.5657387529474089, "learning_rate": 8.502071027915842e-07, "loss": 0.2635, "step": 31488 }, { "epoch": 1.4751018878530942, "grad_norm": 0.5703762868679481, "learning_rate": 8.500646161050849e-07, "loss": 0.2718, "step": 31489 }, { "epoch": 1.4751487328430224, "grad_norm": 0.6367110582276185, "learning_rate": 8.49922138913549e-07, "loss": 0.2896, "step": 31490 }, { "epoch": 1.4751955778329506, "grad_norm": 0.6153148518825275, "learning_rate": 8.497796712177967e-07, "loss": 0.2897, "step": 31491 }, { "epoch": 1.475242422822879, "grad_norm": 0.6200616868033764, "learning_rate": 8.496372130186481e-07, "loss": 0.2742, "step": 31492 }, { "epoch": 1.4752892678128076, "grad_norm": 0.606433908959152, "learning_rate": 8.494947643169227e-07, "loss": 0.2804, "step": 31493 }, { "epoch": 1.4753361128027358, "grad_norm": 0.6147908676040661, "learning_rate": 8.493523251134391e-07, "loss": 0.2625, "step": 31494 }, { "epoch": 1.475382957792664, "grad_norm": 0.6098404251889221, "learning_rate": 8.492098954090183e-07, "loss": 0.2783, "step": 31495 }, { "epoch": 1.4754298027825925, "grad_norm": 0.6111445897180785, "learning_rate": 8.490674752044795e-07, "loss": 0.2739, "step": 31496 }, { "epoch": 1.4754766477725207, "grad_norm": 0.6104353707851873, "learning_rate": 8.489250645006425e-07, "loss": 0.2732, "step": 31497 }, { "epoch": 1.475523492762449, "grad_norm": 0.6293420073239672, "learning_rate": 8.487826632983267e-07, "loss": 0.3018, "step": 31498 }, { "epoch": 1.4755703377523774, "grad_norm": 0.6083762413848621, "learning_rate": 8.486402715983524e-07, "loss": 0.275, "step": 31499 }, { "epoch": 1.4756171827423057, "grad_norm": 0.6154372683357437, "learning_rate": 8.484978894015386e-07, "loss": 0.2852, "step": 31500 }, { "epoch": 1.475664027732234, "grad_norm": 0.5635632240709921, "learning_rate": 8.483555167087032e-07, "loss": 0.2578, "step": 31501 }, { "epoch": 1.4757108727221624, "grad_norm": 0.5791201904827404, "learning_rate": 8.482131535206667e-07, "loss": 0.2669, "step": 31502 }, { "epoch": 1.4757577177120906, "grad_norm": 0.6188519119386137, "learning_rate": 8.480707998382484e-07, "loss": 0.277, "step": 31503 }, { "epoch": 1.475804562702019, "grad_norm": 0.5965878561801075, "learning_rate": 8.479284556622675e-07, "loss": 0.2692, "step": 31504 }, { "epoch": 1.4758514076919473, "grad_norm": 0.5561604714374645, "learning_rate": 8.477861209935431e-07, "loss": 0.2683, "step": 31505 }, { "epoch": 1.4758982526818758, "grad_norm": 0.613261618625438, "learning_rate": 8.476437958328951e-07, "loss": 0.276, "step": 31506 }, { "epoch": 1.475945097671804, "grad_norm": 0.5464849413117775, "learning_rate": 8.475014801811418e-07, "loss": 0.2587, "step": 31507 }, { "epoch": 1.4759919426617323, "grad_norm": 0.5878247224675731, "learning_rate": 8.473591740391013e-07, "loss": 0.2615, "step": 31508 }, { "epoch": 1.4760387876516607, "grad_norm": 0.5527485996846122, "learning_rate": 8.472168774075934e-07, "loss": 0.2655, "step": 31509 }, { "epoch": 1.476085632641589, "grad_norm": 0.6129791004689982, "learning_rate": 8.47074590287437e-07, "loss": 0.3006, "step": 31510 }, { "epoch": 1.4761324776315172, "grad_norm": 0.6030321932911218, "learning_rate": 8.469323126794507e-07, "loss": 0.2769, "step": 31511 }, { "epoch": 1.4761793226214457, "grad_norm": 0.6302495957470223, "learning_rate": 8.467900445844543e-07, "loss": 0.2927, "step": 31512 }, { "epoch": 1.476226167611374, "grad_norm": 0.5885880304502941, "learning_rate": 8.466477860032649e-07, "loss": 0.2677, "step": 31513 }, { "epoch": 1.4762730126013022, "grad_norm": 0.6014630792828581, "learning_rate": 8.465055369367029e-07, "loss": 0.2802, "step": 31514 }, { "epoch": 1.4763198575912306, "grad_norm": 0.5893858446957585, "learning_rate": 8.463632973855848e-07, "loss": 0.274, "step": 31515 }, { "epoch": 1.4763667025811589, "grad_norm": 0.5634292875024655, "learning_rate": 8.462210673507306e-07, "loss": 0.2646, "step": 31516 }, { "epoch": 1.4764135475710873, "grad_norm": 0.5752598270938232, "learning_rate": 8.460788468329584e-07, "loss": 0.2812, "step": 31517 }, { "epoch": 1.4764603925610156, "grad_norm": 0.6302654346010584, "learning_rate": 8.459366358330875e-07, "loss": 0.2795, "step": 31518 }, { "epoch": 1.476507237550944, "grad_norm": 0.6428877462149655, "learning_rate": 8.457944343519347e-07, "loss": 0.2798, "step": 31519 }, { "epoch": 1.4765540825408723, "grad_norm": 0.6567726313361181, "learning_rate": 8.456522423903193e-07, "loss": 0.2749, "step": 31520 }, { "epoch": 1.4766009275308005, "grad_norm": 0.6051168316614863, "learning_rate": 8.455100599490603e-07, "loss": 0.2893, "step": 31521 }, { "epoch": 1.476647772520729, "grad_norm": 0.5676955348674491, "learning_rate": 8.453678870289741e-07, "loss": 0.2685, "step": 31522 }, { "epoch": 1.4766946175106572, "grad_norm": 0.5985459845579953, "learning_rate": 8.4522572363088e-07, "loss": 0.2724, "step": 31523 }, { "epoch": 1.4767414625005855, "grad_norm": 0.6159274057852373, "learning_rate": 8.450835697555965e-07, "loss": 0.2629, "step": 31524 }, { "epoch": 1.476788307490514, "grad_norm": 0.5676972339538597, "learning_rate": 8.449414254039407e-07, "loss": 0.2549, "step": 31525 }, { "epoch": 1.4768351524804422, "grad_norm": 0.5746515948914295, "learning_rate": 8.447992905767305e-07, "loss": 0.2518, "step": 31526 }, { "epoch": 1.4768819974703704, "grad_norm": 0.6278818381405353, "learning_rate": 8.446571652747854e-07, "loss": 0.2772, "step": 31527 }, { "epoch": 1.4769288424602989, "grad_norm": 0.6473339532767224, "learning_rate": 8.445150494989216e-07, "loss": 0.2877, "step": 31528 }, { "epoch": 1.4769756874502273, "grad_norm": 0.6207390342337905, "learning_rate": 8.443729432499573e-07, "loss": 0.2782, "step": 31529 }, { "epoch": 1.4770225324401556, "grad_norm": 0.5611349100252916, "learning_rate": 8.442308465287116e-07, "loss": 0.254, "step": 31530 }, { "epoch": 1.4770693774300838, "grad_norm": 0.626702509594666, "learning_rate": 8.440887593360003e-07, "loss": 0.2632, "step": 31531 }, { "epoch": 1.4771162224200123, "grad_norm": 0.6047670740642763, "learning_rate": 8.439466816726421e-07, "loss": 0.2649, "step": 31532 }, { "epoch": 1.4771630674099405, "grad_norm": 0.601730396854949, "learning_rate": 8.438046135394545e-07, "loss": 0.259, "step": 31533 }, { "epoch": 1.4772099123998688, "grad_norm": 0.5839329575268914, "learning_rate": 8.43662554937256e-07, "loss": 0.2772, "step": 31534 }, { "epoch": 1.4772567573897972, "grad_norm": 0.563014031713085, "learning_rate": 8.435205058668622e-07, "loss": 0.2531, "step": 31535 }, { "epoch": 1.4773036023797255, "grad_norm": 0.5954898997521874, "learning_rate": 8.433784663290925e-07, "loss": 0.2681, "step": 31536 }, { "epoch": 1.4773504473696537, "grad_norm": 0.5514001530080513, "learning_rate": 8.432364363247625e-07, "loss": 0.2684, "step": 31537 }, { "epoch": 1.4773972923595822, "grad_norm": 0.6047026227449601, "learning_rate": 8.430944158546902e-07, "loss": 0.2832, "step": 31538 }, { "epoch": 1.4774441373495104, "grad_norm": 0.6076244774078117, "learning_rate": 8.429524049196933e-07, "loss": 0.2633, "step": 31539 }, { "epoch": 1.4774909823394389, "grad_norm": 0.6086884220871563, "learning_rate": 8.428104035205886e-07, "loss": 0.269, "step": 31540 }, { "epoch": 1.4775378273293671, "grad_norm": 0.6006620438764659, "learning_rate": 8.426684116581945e-07, "loss": 0.266, "step": 31541 }, { "epoch": 1.4775846723192956, "grad_norm": 0.5570864328656965, "learning_rate": 8.425264293333271e-07, "loss": 0.265, "step": 31542 }, { "epoch": 1.4776315173092238, "grad_norm": 0.6079119671252412, "learning_rate": 8.423844565468029e-07, "loss": 0.2786, "step": 31543 }, { "epoch": 1.477678362299152, "grad_norm": 0.5932758190133393, "learning_rate": 8.422424932994391e-07, "loss": 0.2617, "step": 31544 }, { "epoch": 1.4777252072890805, "grad_norm": 0.5941114823928602, "learning_rate": 8.421005395920534e-07, "loss": 0.2697, "step": 31545 }, { "epoch": 1.4777720522790088, "grad_norm": 0.5683122435870204, "learning_rate": 8.419585954254625e-07, "loss": 0.2703, "step": 31546 }, { "epoch": 1.477818897268937, "grad_norm": 0.6377838715117644, "learning_rate": 8.41816660800483e-07, "loss": 0.264, "step": 31547 }, { "epoch": 1.4778657422588655, "grad_norm": 0.6010573121673704, "learning_rate": 8.416747357179325e-07, "loss": 0.2775, "step": 31548 }, { "epoch": 1.4779125872487937, "grad_norm": 0.592137877915769, "learning_rate": 8.415328201786271e-07, "loss": 0.2694, "step": 31549 }, { "epoch": 1.477959432238722, "grad_norm": 0.6207090079934413, "learning_rate": 8.413909141833828e-07, "loss": 0.2712, "step": 31550 }, { "epoch": 1.4780062772286504, "grad_norm": 0.6012483475796164, "learning_rate": 8.41249017733017e-07, "loss": 0.2804, "step": 31551 }, { "epoch": 1.4780531222185787, "grad_norm": 0.5830185252508879, "learning_rate": 8.411071308283461e-07, "loss": 0.2677, "step": 31552 }, { "epoch": 1.4780999672085071, "grad_norm": 0.6293735594147403, "learning_rate": 8.409652534701867e-07, "loss": 0.265, "step": 31553 }, { "epoch": 1.4781468121984354, "grad_norm": 0.5824005984847385, "learning_rate": 8.408233856593562e-07, "loss": 0.2684, "step": 31554 }, { "epoch": 1.4781936571883638, "grad_norm": 0.5462395744871954, "learning_rate": 8.406815273966693e-07, "loss": 0.2653, "step": 31555 }, { "epoch": 1.478240502178292, "grad_norm": 0.6060046775581815, "learning_rate": 8.405396786829437e-07, "loss": 0.2821, "step": 31556 }, { "epoch": 1.4782873471682203, "grad_norm": 0.6259018300136514, "learning_rate": 8.403978395189946e-07, "loss": 0.2961, "step": 31557 }, { "epoch": 1.4783341921581488, "grad_norm": 0.6121092447655196, "learning_rate": 8.402560099056386e-07, "loss": 0.278, "step": 31558 }, { "epoch": 1.478381037148077, "grad_norm": 0.5541974428137434, "learning_rate": 8.401141898436924e-07, "loss": 0.2717, "step": 31559 }, { "epoch": 1.4784278821380052, "grad_norm": 0.6136014498141569, "learning_rate": 8.399723793339723e-07, "loss": 0.2832, "step": 31560 }, { "epoch": 1.4784747271279337, "grad_norm": 0.6017459273603614, "learning_rate": 8.398305783772934e-07, "loss": 0.278, "step": 31561 }, { "epoch": 1.478521572117862, "grad_norm": 0.5951024411955793, "learning_rate": 8.396887869744722e-07, "loss": 0.2766, "step": 31562 }, { "epoch": 1.4785684171077902, "grad_norm": 0.5786534839980887, "learning_rate": 8.395470051263254e-07, "loss": 0.273, "step": 31563 }, { "epoch": 1.4786152620977187, "grad_norm": 0.6055662510883828, "learning_rate": 8.394052328336674e-07, "loss": 0.2722, "step": 31564 }, { "epoch": 1.478662107087647, "grad_norm": 0.5711647481678297, "learning_rate": 8.392634700973151e-07, "loss": 0.2628, "step": 31565 }, { "epoch": 1.4787089520775754, "grad_norm": 0.5620787183200981, "learning_rate": 8.391217169180849e-07, "loss": 0.2812, "step": 31566 }, { "epoch": 1.4787557970675036, "grad_norm": 0.5811903213075924, "learning_rate": 8.389799732967909e-07, "loss": 0.2669, "step": 31567 }, { "epoch": 1.478802642057432, "grad_norm": 0.5725044889797206, "learning_rate": 8.388382392342497e-07, "loss": 0.2709, "step": 31568 }, { "epoch": 1.4788494870473603, "grad_norm": 0.5926320440225377, "learning_rate": 8.386965147312768e-07, "loss": 0.2767, "step": 31569 }, { "epoch": 1.4788963320372885, "grad_norm": 0.604467455255092, "learning_rate": 8.385547997886889e-07, "loss": 0.2954, "step": 31570 }, { "epoch": 1.478943177027217, "grad_norm": 0.5994821726767913, "learning_rate": 8.384130944072997e-07, "loss": 0.2816, "step": 31571 }, { "epoch": 1.4789900220171452, "grad_norm": 0.6327820759298479, "learning_rate": 8.382713985879262e-07, "loss": 0.268, "step": 31572 }, { "epoch": 1.4790368670070735, "grad_norm": 0.794187412957796, "learning_rate": 8.381297123313825e-07, "loss": 0.2889, "step": 31573 }, { "epoch": 1.479083711997002, "grad_norm": 0.6079725990304293, "learning_rate": 8.379880356384845e-07, "loss": 0.2777, "step": 31574 }, { "epoch": 1.4791305569869302, "grad_norm": 0.6016758967876679, "learning_rate": 8.378463685100477e-07, "loss": 0.273, "step": 31575 }, { "epoch": 1.4791774019768587, "grad_norm": 0.5719032773457708, "learning_rate": 8.37704710946888e-07, "loss": 0.2685, "step": 31576 }, { "epoch": 1.479224246966787, "grad_norm": 0.5715831027087601, "learning_rate": 8.375630629498191e-07, "loss": 0.2679, "step": 31577 }, { "epoch": 1.4792710919567154, "grad_norm": 0.5971554521191287, "learning_rate": 8.374214245196569e-07, "loss": 0.2738, "step": 31578 }, { "epoch": 1.4793179369466436, "grad_norm": 0.5694733737572661, "learning_rate": 8.372797956572173e-07, "loss": 0.2642, "step": 31579 }, { "epoch": 1.4793647819365718, "grad_norm": 0.6278409508390402, "learning_rate": 8.371381763633138e-07, "loss": 0.2543, "step": 31580 }, { "epoch": 1.4794116269265003, "grad_norm": 0.5675131391810542, "learning_rate": 8.369965666387622e-07, "loss": 0.2652, "step": 31581 }, { "epoch": 1.4794584719164285, "grad_norm": 0.6703152748397271, "learning_rate": 8.368549664843775e-07, "loss": 0.3024, "step": 31582 }, { "epoch": 1.4795053169063568, "grad_norm": 0.5796496011095801, "learning_rate": 8.367133759009752e-07, "loss": 0.2724, "step": 31583 }, { "epoch": 1.4795521618962852, "grad_norm": 0.5751447141988251, "learning_rate": 8.365717948893684e-07, "loss": 0.2718, "step": 31584 }, { "epoch": 1.4795990068862135, "grad_norm": 0.5519712403531567, "learning_rate": 8.364302234503738e-07, "loss": 0.2627, "step": 31585 }, { "epoch": 1.4796458518761417, "grad_norm": 0.581591964550035, "learning_rate": 8.362886615848042e-07, "loss": 0.2712, "step": 31586 }, { "epoch": 1.4796926968660702, "grad_norm": 0.6118819376445572, "learning_rate": 8.361471092934753e-07, "loss": 0.2759, "step": 31587 }, { "epoch": 1.4797395418559984, "grad_norm": 0.604732224789574, "learning_rate": 8.360055665772016e-07, "loss": 0.2806, "step": 31588 }, { "epoch": 1.479786386845927, "grad_norm": 0.5678298700593175, "learning_rate": 8.358640334367976e-07, "loss": 0.2611, "step": 31589 }, { "epoch": 1.4798332318358551, "grad_norm": 0.583303528240195, "learning_rate": 8.357225098730784e-07, "loss": 0.2735, "step": 31590 }, { "epoch": 1.4798800768257836, "grad_norm": 0.5870981917391397, "learning_rate": 8.355809958868583e-07, "loss": 0.2536, "step": 31591 }, { "epoch": 1.4799269218157118, "grad_norm": 0.5859790152450477, "learning_rate": 8.354394914789501e-07, "loss": 0.2669, "step": 31592 }, { "epoch": 1.47997376680564, "grad_norm": 0.5875870698654255, "learning_rate": 8.352979966501693e-07, "loss": 0.275, "step": 31593 }, { "epoch": 1.4800206117955685, "grad_norm": 0.602382976587003, "learning_rate": 8.351565114013302e-07, "loss": 0.2641, "step": 31594 }, { "epoch": 1.4800674567854968, "grad_norm": 0.6111683444000218, "learning_rate": 8.35015035733247e-07, "loss": 0.2848, "step": 31595 }, { "epoch": 1.480114301775425, "grad_norm": 0.5821947064521787, "learning_rate": 8.348735696467336e-07, "loss": 0.2761, "step": 31596 }, { "epoch": 1.4801611467653535, "grad_norm": 0.563854203195069, "learning_rate": 8.347321131426054e-07, "loss": 0.2677, "step": 31597 }, { "epoch": 1.4802079917552817, "grad_norm": 0.6990326338899411, "learning_rate": 8.345906662216749e-07, "loss": 0.2886, "step": 31598 }, { "epoch": 1.48025483674521, "grad_norm": 0.5571170209218219, "learning_rate": 8.34449228884756e-07, "loss": 0.2573, "step": 31599 }, { "epoch": 1.4803016817351384, "grad_norm": 0.6259276847619493, "learning_rate": 8.343078011326633e-07, "loss": 0.2772, "step": 31600 }, { "epoch": 1.4803485267250667, "grad_norm": 0.6070035347916507, "learning_rate": 8.341663829662103e-07, "loss": 0.2886, "step": 31601 }, { "epoch": 1.4803953717149951, "grad_norm": 0.5578887295495241, "learning_rate": 8.340249743862111e-07, "loss": 0.2571, "step": 31602 }, { "epoch": 1.4804422167049234, "grad_norm": 0.6219397757664227, "learning_rate": 8.338835753934804e-07, "loss": 0.2924, "step": 31603 }, { "epoch": 1.4804890616948518, "grad_norm": 0.5850125413848336, "learning_rate": 8.337421859888301e-07, "loss": 0.2565, "step": 31604 }, { "epoch": 1.48053590668478, "grad_norm": 0.6146365727657469, "learning_rate": 8.336008061730755e-07, "loss": 0.2953, "step": 31605 }, { "epoch": 1.4805827516747083, "grad_norm": 0.6149754340505844, "learning_rate": 8.334594359470285e-07, "loss": 0.2842, "step": 31606 }, { "epoch": 1.4806295966646368, "grad_norm": 0.5910933436321358, "learning_rate": 8.333180753115036e-07, "loss": 0.2591, "step": 31607 }, { "epoch": 1.480676441654565, "grad_norm": 0.6124713715383666, "learning_rate": 8.331767242673145e-07, "loss": 0.2587, "step": 31608 }, { "epoch": 1.4807232866444933, "grad_norm": 0.5840602992551225, "learning_rate": 8.330353828152752e-07, "loss": 0.2691, "step": 31609 }, { "epoch": 1.4807701316344217, "grad_norm": 0.6475977159302544, "learning_rate": 8.328940509561972e-07, "loss": 0.2735, "step": 31610 }, { "epoch": 1.48081697662435, "grad_norm": 0.625904994813213, "learning_rate": 8.327527286908952e-07, "loss": 0.2864, "step": 31611 }, { "epoch": 1.4808638216142784, "grad_norm": 0.5570348742995429, "learning_rate": 8.326114160201829e-07, "loss": 0.2655, "step": 31612 }, { "epoch": 1.4809106666042067, "grad_norm": 0.5631215035999256, "learning_rate": 8.32470112944872e-07, "loss": 0.262, "step": 31613 }, { "epoch": 1.4809575115941351, "grad_norm": 0.5747112318757157, "learning_rate": 8.323288194657764e-07, "loss": 0.2635, "step": 31614 }, { "epoch": 1.4810043565840634, "grad_norm": 0.5978355362573358, "learning_rate": 8.321875355837103e-07, "loss": 0.2799, "step": 31615 }, { "epoch": 1.4810512015739916, "grad_norm": 0.6054397395965531, "learning_rate": 8.320462612994848e-07, "loss": 0.2844, "step": 31616 }, { "epoch": 1.48109804656392, "grad_norm": 0.6038939860761962, "learning_rate": 8.319049966139136e-07, "loss": 0.2725, "step": 31617 }, { "epoch": 1.4811448915538483, "grad_norm": 0.6221991047549433, "learning_rate": 8.317637415278105e-07, "loss": 0.284, "step": 31618 }, { "epoch": 1.4811917365437766, "grad_norm": 0.5792268635659045, "learning_rate": 8.31622496041988e-07, "loss": 0.2734, "step": 31619 }, { "epoch": 1.481238581533705, "grad_norm": 0.6355698652908267, "learning_rate": 8.314812601572583e-07, "loss": 0.2703, "step": 31620 }, { "epoch": 1.4812854265236333, "grad_norm": 0.6229307283131913, "learning_rate": 8.313400338744351e-07, "loss": 0.2828, "step": 31621 }, { "epoch": 1.4813322715135615, "grad_norm": 0.5931700326349449, "learning_rate": 8.3119881719433e-07, "loss": 0.2894, "step": 31622 }, { "epoch": 1.48137911650349, "grad_norm": 0.6216190918437559, "learning_rate": 8.310576101177562e-07, "loss": 0.2727, "step": 31623 }, { "epoch": 1.4814259614934182, "grad_norm": 0.5958745635623681, "learning_rate": 8.309164126455263e-07, "loss": 0.263, "step": 31624 }, { "epoch": 1.4814728064833467, "grad_norm": 0.5959422620222204, "learning_rate": 8.30775224778454e-07, "loss": 0.2789, "step": 31625 }, { "epoch": 1.481519651473275, "grad_norm": 0.6111521850720921, "learning_rate": 8.306340465173496e-07, "loss": 0.2826, "step": 31626 }, { "epoch": 1.4815664964632034, "grad_norm": 0.587239209763846, "learning_rate": 8.304928778630275e-07, "loss": 0.2676, "step": 31627 }, { "epoch": 1.4816133414531316, "grad_norm": 0.5935304349703661, "learning_rate": 8.303517188162988e-07, "loss": 0.2874, "step": 31628 }, { "epoch": 1.4816601864430599, "grad_norm": 0.58812967274777, "learning_rate": 8.302105693779761e-07, "loss": 0.2664, "step": 31629 }, { "epoch": 1.4817070314329883, "grad_norm": 0.570280368528144, "learning_rate": 8.300694295488718e-07, "loss": 0.2635, "step": 31630 }, { "epoch": 1.4817538764229166, "grad_norm": 0.6024483877621923, "learning_rate": 8.299282993297983e-07, "loss": 0.272, "step": 31631 }, { "epoch": 1.4818007214128448, "grad_norm": 0.6211709035413684, "learning_rate": 8.297871787215686e-07, "loss": 0.2747, "step": 31632 }, { "epoch": 1.4818475664027733, "grad_norm": 0.5986900607868816, "learning_rate": 8.296460677249937e-07, "loss": 0.2688, "step": 31633 }, { "epoch": 1.4818944113927015, "grad_norm": 0.6139118429159708, "learning_rate": 8.295049663408852e-07, "loss": 0.2852, "step": 31634 }, { "epoch": 1.4819412563826297, "grad_norm": 0.5880208037118931, "learning_rate": 8.293638745700555e-07, "loss": 0.2706, "step": 31635 }, { "epoch": 1.4819881013725582, "grad_norm": 0.6603943308064846, "learning_rate": 8.29222792413317e-07, "loss": 0.2912, "step": 31636 }, { "epoch": 1.4820349463624864, "grad_norm": 0.6004742942624328, "learning_rate": 8.290817198714815e-07, "loss": 0.2765, "step": 31637 }, { "epoch": 1.482081791352415, "grad_norm": 0.6071681842979981, "learning_rate": 8.289406569453607e-07, "loss": 0.2767, "step": 31638 }, { "epoch": 1.4821286363423432, "grad_norm": 0.5569652310912153, "learning_rate": 8.287996036357671e-07, "loss": 0.2565, "step": 31639 }, { "epoch": 1.4821754813322716, "grad_norm": 0.6163669168629585, "learning_rate": 8.286585599435118e-07, "loss": 0.2793, "step": 31640 }, { "epoch": 1.4822223263221999, "grad_norm": 0.6565021379022673, "learning_rate": 8.285175258694056e-07, "loss": 0.3022, "step": 31641 }, { "epoch": 1.482269171312128, "grad_norm": 0.6234048634979308, "learning_rate": 8.283765014142608e-07, "loss": 0.2673, "step": 31642 }, { "epoch": 1.4823160163020566, "grad_norm": 0.5980566788501228, "learning_rate": 8.282354865788892e-07, "loss": 0.272, "step": 31643 }, { "epoch": 1.4823628612919848, "grad_norm": 0.6204327730975628, "learning_rate": 8.28094481364102e-07, "loss": 0.2645, "step": 31644 }, { "epoch": 1.482409706281913, "grad_norm": 0.6004436573098167, "learning_rate": 8.279534857707111e-07, "loss": 0.2751, "step": 31645 }, { "epoch": 1.4824565512718415, "grad_norm": 0.591665507567655, "learning_rate": 8.278124997995284e-07, "loss": 0.2412, "step": 31646 }, { "epoch": 1.4825033962617697, "grad_norm": 0.6043270529396775, "learning_rate": 8.276715234513647e-07, "loss": 0.2778, "step": 31647 }, { "epoch": 1.4825502412516982, "grad_norm": 0.6125741816731725, "learning_rate": 8.275305567270298e-07, "loss": 0.2748, "step": 31648 }, { "epoch": 1.4825970862416264, "grad_norm": 0.587060513819759, "learning_rate": 8.273895996273365e-07, "loss": 0.2824, "step": 31649 }, { "epoch": 1.482643931231555, "grad_norm": 0.588411885301978, "learning_rate": 8.272486521530954e-07, "loss": 0.2682, "step": 31650 }, { "epoch": 1.4826907762214832, "grad_norm": 0.6277070256932994, "learning_rate": 8.271077143051181e-07, "loss": 0.2701, "step": 31651 }, { "epoch": 1.4827376212114114, "grad_norm": 0.63511591973133, "learning_rate": 8.269667860842162e-07, "loss": 0.2897, "step": 31652 }, { "epoch": 1.4827844662013399, "grad_norm": 0.5943621238076334, "learning_rate": 8.268258674911992e-07, "loss": 0.2605, "step": 31653 }, { "epoch": 1.482831311191268, "grad_norm": 0.5698762812581848, "learning_rate": 8.266849585268794e-07, "loss": 0.2576, "step": 31654 }, { "epoch": 1.4828781561811963, "grad_norm": 0.5813982548491896, "learning_rate": 8.265440591920665e-07, "loss": 0.2665, "step": 31655 }, { "epoch": 1.4829250011711248, "grad_norm": 0.6138491189799088, "learning_rate": 8.264031694875719e-07, "loss": 0.2769, "step": 31656 }, { "epoch": 1.482971846161053, "grad_norm": 0.5910967045960018, "learning_rate": 8.262622894142061e-07, "loss": 0.2808, "step": 31657 }, { "epoch": 1.4830186911509813, "grad_norm": 0.588277692211348, "learning_rate": 8.261214189727812e-07, "loss": 0.2626, "step": 31658 }, { "epoch": 1.4830655361409097, "grad_norm": 0.598417739351534, "learning_rate": 8.259805581641062e-07, "loss": 0.264, "step": 31659 }, { "epoch": 1.483112381130838, "grad_norm": 0.5587267195723042, "learning_rate": 8.25839706988992e-07, "loss": 0.251, "step": 31660 }, { "epoch": 1.4831592261207664, "grad_norm": 0.6113649970056463, "learning_rate": 8.256988654482506e-07, "loss": 0.2702, "step": 31661 }, { "epoch": 1.4832060711106947, "grad_norm": 0.6018877143012596, "learning_rate": 8.255580335426905e-07, "loss": 0.264, "step": 31662 }, { "epoch": 1.4832529161006232, "grad_norm": 0.6369562560730498, "learning_rate": 8.25417211273123e-07, "loss": 0.2633, "step": 31663 }, { "epoch": 1.4832997610905514, "grad_norm": 0.6324479219199491, "learning_rate": 8.252763986403592e-07, "loss": 0.2742, "step": 31664 }, { "epoch": 1.4833466060804796, "grad_norm": 0.6503377909002144, "learning_rate": 8.251355956452084e-07, "loss": 0.2867, "step": 31665 }, { "epoch": 1.483393451070408, "grad_norm": 0.5603063884378797, "learning_rate": 8.24994802288481e-07, "loss": 0.2725, "step": 31666 }, { "epoch": 1.4834402960603363, "grad_norm": 0.5587148476079508, "learning_rate": 8.248540185709883e-07, "loss": 0.2651, "step": 31667 }, { "epoch": 1.4834871410502646, "grad_norm": 0.5914565444540921, "learning_rate": 8.247132444935391e-07, "loss": 0.2692, "step": 31668 }, { "epoch": 1.483533986040193, "grad_norm": 0.6333283334020211, "learning_rate": 8.245724800569438e-07, "loss": 0.2838, "step": 31669 }, { "epoch": 1.4835808310301213, "grad_norm": 0.5630530889394352, "learning_rate": 8.244317252620137e-07, "loss": 0.2644, "step": 31670 }, { "epoch": 1.4836276760200495, "grad_norm": 0.5635221137258427, "learning_rate": 8.242909801095572e-07, "loss": 0.2565, "step": 31671 }, { "epoch": 1.483674521009978, "grad_norm": 0.5565644331930641, "learning_rate": 8.241502446003849e-07, "loss": 0.272, "step": 31672 }, { "epoch": 1.4837213659999062, "grad_norm": 0.6077479350794787, "learning_rate": 8.240095187353067e-07, "loss": 0.2741, "step": 31673 }, { "epoch": 1.4837682109898347, "grad_norm": 0.569394243606017, "learning_rate": 8.238688025151334e-07, "loss": 0.2564, "step": 31674 }, { "epoch": 1.483815055979763, "grad_norm": 0.5909239507631764, "learning_rate": 8.23728095940673e-07, "loss": 0.2664, "step": 31675 }, { "epoch": 1.4838619009696914, "grad_norm": 0.5787514382161054, "learning_rate": 8.235873990127369e-07, "loss": 0.2769, "step": 31676 }, { "epoch": 1.4839087459596196, "grad_norm": 0.6208455741090926, "learning_rate": 8.234467117321329e-07, "loss": 0.2675, "step": 31677 }, { "epoch": 1.4839555909495479, "grad_norm": 0.6235662702299712, "learning_rate": 8.23306034099672e-07, "loss": 0.2874, "step": 31678 }, { "epoch": 1.4840024359394763, "grad_norm": 0.5437579683033281, "learning_rate": 8.231653661161634e-07, "loss": 0.2634, "step": 31679 }, { "epoch": 1.4840492809294046, "grad_norm": 0.6275360949746239, "learning_rate": 8.230247077824166e-07, "loss": 0.2897, "step": 31680 }, { "epoch": 1.4840961259193328, "grad_norm": 0.5484027086380079, "learning_rate": 8.228840590992417e-07, "loss": 0.2616, "step": 31681 }, { "epoch": 1.4841429709092613, "grad_norm": 0.6137241879801212, "learning_rate": 8.227434200674481e-07, "loss": 0.2853, "step": 31682 }, { "epoch": 1.4841898158991895, "grad_norm": 0.5705556004358723, "learning_rate": 8.226027906878434e-07, "loss": 0.2664, "step": 31683 }, { "epoch": 1.484236660889118, "grad_norm": 0.5668615530945337, "learning_rate": 8.224621709612379e-07, "loss": 0.2603, "step": 31684 }, { "epoch": 1.4842835058790462, "grad_norm": 0.6621607517803662, "learning_rate": 8.22321560888441e-07, "loss": 0.2856, "step": 31685 }, { "epoch": 1.4843303508689747, "grad_norm": 0.584437741200186, "learning_rate": 8.221809604702621e-07, "loss": 0.2548, "step": 31686 }, { "epoch": 1.484377195858903, "grad_norm": 0.58323991837516, "learning_rate": 8.220403697075099e-07, "loss": 0.259, "step": 31687 }, { "epoch": 1.4844240408488312, "grad_norm": 0.5793758792186933, "learning_rate": 8.218997886009944e-07, "loss": 0.2705, "step": 31688 }, { "epoch": 1.4844708858387596, "grad_norm": 0.6381468816148348, "learning_rate": 8.21759217151524e-07, "loss": 0.2938, "step": 31689 }, { "epoch": 1.4845177308286879, "grad_norm": 0.5920148932385799, "learning_rate": 8.216186553599064e-07, "loss": 0.266, "step": 31690 }, { "epoch": 1.4845645758186161, "grad_norm": 0.5968555787116141, "learning_rate": 8.21478103226952e-07, "loss": 0.2644, "step": 31691 }, { "epoch": 1.4846114208085446, "grad_norm": 0.5853956433194355, "learning_rate": 8.21337560753469e-07, "loss": 0.2647, "step": 31692 }, { "epoch": 1.4846582657984728, "grad_norm": 0.6229679559785379, "learning_rate": 8.211970279402665e-07, "loss": 0.2753, "step": 31693 }, { "epoch": 1.484705110788401, "grad_norm": 0.6001540686011914, "learning_rate": 8.210565047881538e-07, "loss": 0.2778, "step": 31694 }, { "epoch": 1.4847519557783295, "grad_norm": 0.6132803023223014, "learning_rate": 8.209159912979384e-07, "loss": 0.2885, "step": 31695 }, { "epoch": 1.4847988007682578, "grad_norm": 0.580648191237294, "learning_rate": 8.207754874704299e-07, "loss": 0.264, "step": 31696 }, { "epoch": 1.4848456457581862, "grad_norm": 0.6028730307066195, "learning_rate": 8.206349933064359e-07, "loss": 0.2634, "step": 31697 }, { "epoch": 1.4848924907481145, "grad_norm": 0.6421338868819015, "learning_rate": 8.204945088067653e-07, "loss": 0.3027, "step": 31698 }, { "epoch": 1.484939335738043, "grad_norm": 0.6429060760440946, "learning_rate": 8.203540339722266e-07, "loss": 0.2868, "step": 31699 }, { "epoch": 1.4849861807279712, "grad_norm": 0.5752405577275771, "learning_rate": 8.202135688036292e-07, "loss": 0.2591, "step": 31700 }, { "epoch": 1.4850330257178994, "grad_norm": 0.575012177575682, "learning_rate": 8.200731133017797e-07, "loss": 0.2712, "step": 31701 }, { "epoch": 1.4850798707078279, "grad_norm": 0.6067096440643469, "learning_rate": 8.199326674674871e-07, "loss": 0.2798, "step": 31702 }, { "epoch": 1.4851267156977561, "grad_norm": 0.5712169933827086, "learning_rate": 8.197922313015607e-07, "loss": 0.2689, "step": 31703 }, { "epoch": 1.4851735606876844, "grad_norm": 0.6360949707973016, "learning_rate": 8.196518048048066e-07, "loss": 0.2911, "step": 31704 }, { "epoch": 1.4852204056776128, "grad_norm": 0.6336518192418337, "learning_rate": 8.195113879780345e-07, "loss": 0.2758, "step": 31705 }, { "epoch": 1.485267250667541, "grad_norm": 0.582023604706903, "learning_rate": 8.193709808220524e-07, "loss": 0.2807, "step": 31706 }, { "epoch": 1.4853140956574693, "grad_norm": 0.608053769553214, "learning_rate": 8.19230583337667e-07, "loss": 0.282, "step": 31707 }, { "epoch": 1.4853609406473978, "grad_norm": 0.5637843633685803, "learning_rate": 8.190901955256874e-07, "loss": 0.263, "step": 31708 }, { "epoch": 1.485407785637326, "grad_norm": 0.5532031974203202, "learning_rate": 8.189498173869215e-07, "loss": 0.2655, "step": 31709 }, { "epoch": 1.4854546306272545, "grad_norm": 0.5905549059662852, "learning_rate": 8.188094489221773e-07, "loss": 0.2581, "step": 31710 }, { "epoch": 1.4855014756171827, "grad_norm": 0.575895715001422, "learning_rate": 8.186690901322613e-07, "loss": 0.2641, "step": 31711 }, { "epoch": 1.4855483206071112, "grad_norm": 0.6031768052997328, "learning_rate": 8.185287410179823e-07, "loss": 0.2601, "step": 31712 }, { "epoch": 1.4855951655970394, "grad_norm": 0.6207194117377417, "learning_rate": 8.183884015801486e-07, "loss": 0.2873, "step": 31713 }, { "epoch": 1.4856420105869677, "grad_norm": 0.6016268710577499, "learning_rate": 8.182480718195663e-07, "loss": 0.2595, "step": 31714 }, { "epoch": 1.4856888555768961, "grad_norm": 0.614874454918007, "learning_rate": 8.181077517370434e-07, "loss": 0.2777, "step": 31715 }, { "epoch": 1.4857357005668244, "grad_norm": 0.6393271866632657, "learning_rate": 8.179674413333886e-07, "loss": 0.2831, "step": 31716 }, { "epoch": 1.4857825455567526, "grad_norm": 0.5661523822243278, "learning_rate": 8.178271406094074e-07, "loss": 0.2597, "step": 31717 }, { "epoch": 1.485829390546681, "grad_norm": 0.565670020980705, "learning_rate": 8.176868495659082e-07, "loss": 0.2691, "step": 31718 }, { "epoch": 1.4858762355366093, "grad_norm": 0.6308089933373149, "learning_rate": 8.175465682036995e-07, "loss": 0.277, "step": 31719 }, { "epoch": 1.4859230805265378, "grad_norm": 0.5881110159902526, "learning_rate": 8.174062965235863e-07, "loss": 0.2663, "step": 31720 }, { "epoch": 1.485969925516466, "grad_norm": 0.5796084294228487, "learning_rate": 8.172660345263772e-07, "loss": 0.2621, "step": 31721 }, { "epoch": 1.4860167705063945, "grad_norm": 0.60824675981038, "learning_rate": 8.171257822128789e-07, "loss": 0.2868, "step": 31722 }, { "epoch": 1.4860636154963227, "grad_norm": 0.6044406686406733, "learning_rate": 8.169855395838997e-07, "loss": 0.2717, "step": 31723 }, { "epoch": 1.486110460486251, "grad_norm": 0.570875750581077, "learning_rate": 8.168453066402449e-07, "loss": 0.2711, "step": 31724 }, { "epoch": 1.4861573054761794, "grad_norm": 0.5572630652087998, "learning_rate": 8.167050833827231e-07, "loss": 0.2609, "step": 31725 }, { "epoch": 1.4862041504661077, "grad_norm": 0.6153662884649476, "learning_rate": 8.165648698121398e-07, "loss": 0.2869, "step": 31726 }, { "epoch": 1.486250995456036, "grad_norm": 0.5794719165203257, "learning_rate": 8.164246659293023e-07, "loss": 0.2706, "step": 31727 }, { "epoch": 1.4862978404459644, "grad_norm": 0.6119159052753129, "learning_rate": 8.162844717350179e-07, "loss": 0.2743, "step": 31728 }, { "epoch": 1.4863446854358926, "grad_norm": 0.5918095059879633, "learning_rate": 8.161442872300932e-07, "loss": 0.2747, "step": 31729 }, { "epoch": 1.4863915304258208, "grad_norm": 0.5555389250209507, "learning_rate": 8.160041124153353e-07, "loss": 0.2545, "step": 31730 }, { "epoch": 1.4864383754157493, "grad_norm": 0.6117394506045692, "learning_rate": 8.158639472915508e-07, "loss": 0.2615, "step": 31731 }, { "epoch": 1.4864852204056775, "grad_norm": 0.5841674915985103, "learning_rate": 8.157237918595454e-07, "loss": 0.2637, "step": 31732 }, { "epoch": 1.486532065395606, "grad_norm": 0.6509594109367698, "learning_rate": 8.15583646120126e-07, "loss": 0.2648, "step": 31733 }, { "epoch": 1.4865789103855342, "grad_norm": 0.6137498811720851, "learning_rate": 8.154435100740993e-07, "loss": 0.2697, "step": 31734 }, { "epoch": 1.4866257553754627, "grad_norm": 0.5627177150381567, "learning_rate": 8.153033837222718e-07, "loss": 0.2645, "step": 31735 }, { "epoch": 1.486672600365391, "grad_norm": 0.5826984598770562, "learning_rate": 8.151632670654499e-07, "loss": 0.2591, "step": 31736 }, { "epoch": 1.4867194453553192, "grad_norm": 0.555086552826246, "learning_rate": 8.150231601044408e-07, "loss": 0.2715, "step": 31737 }, { "epoch": 1.4867662903452477, "grad_norm": 0.6138497584715511, "learning_rate": 8.1488306284005e-07, "loss": 0.2812, "step": 31738 }, { "epoch": 1.486813135335176, "grad_norm": 0.5454686979913091, "learning_rate": 8.147429752730828e-07, "loss": 0.2616, "step": 31739 }, { "epoch": 1.4868599803251041, "grad_norm": 0.5609005722914393, "learning_rate": 8.146028974043462e-07, "loss": 0.2654, "step": 31740 }, { "epoch": 1.4869068253150326, "grad_norm": 0.5661820453582942, "learning_rate": 8.144628292346463e-07, "loss": 0.2786, "step": 31741 }, { "epoch": 1.4869536703049608, "grad_norm": 0.6146388510049957, "learning_rate": 8.143227707647891e-07, "loss": 0.2778, "step": 31742 }, { "epoch": 1.487000515294889, "grad_norm": 0.6120488205074638, "learning_rate": 8.141827219955816e-07, "loss": 0.2848, "step": 31743 }, { "epoch": 1.4870473602848175, "grad_norm": 0.563621008383918, "learning_rate": 8.140426829278278e-07, "loss": 0.2611, "step": 31744 }, { "epoch": 1.4870942052747458, "grad_norm": 0.5981850872791461, "learning_rate": 8.139026535623357e-07, "loss": 0.2861, "step": 31745 }, { "epoch": 1.4871410502646742, "grad_norm": 0.5942650784634513, "learning_rate": 8.137626338999091e-07, "loss": 0.2618, "step": 31746 }, { "epoch": 1.4871878952546025, "grad_norm": 0.6421522539211935, "learning_rate": 8.136226239413548e-07, "loss": 0.2833, "step": 31747 }, { "epoch": 1.487234740244531, "grad_norm": 0.5948815051935751, "learning_rate": 8.134826236874782e-07, "loss": 0.272, "step": 31748 }, { "epoch": 1.4872815852344592, "grad_norm": 0.6220788382707033, "learning_rate": 8.133426331390862e-07, "loss": 0.2733, "step": 31749 }, { "epoch": 1.4873284302243874, "grad_norm": 0.6167557514444576, "learning_rate": 8.132026522969827e-07, "loss": 0.2773, "step": 31750 }, { "epoch": 1.487375275214316, "grad_norm": 0.6302421441153157, "learning_rate": 8.130626811619738e-07, "loss": 0.2958, "step": 31751 }, { "epoch": 1.4874221202042441, "grad_norm": 0.5853415535773333, "learning_rate": 8.12922719734866e-07, "loss": 0.2673, "step": 31752 }, { "epoch": 1.4874689651941724, "grad_norm": 0.5783827651985989, "learning_rate": 8.127827680164632e-07, "loss": 0.2634, "step": 31753 }, { "epoch": 1.4875158101841008, "grad_norm": 0.5991998892403975, "learning_rate": 8.126428260075714e-07, "loss": 0.2821, "step": 31754 }, { "epoch": 1.487562655174029, "grad_norm": 0.583256369361063, "learning_rate": 8.125028937089968e-07, "loss": 0.2541, "step": 31755 }, { "epoch": 1.4876095001639575, "grad_norm": 0.6562603633390286, "learning_rate": 8.123629711215433e-07, "loss": 0.2961, "step": 31756 }, { "epoch": 1.4876563451538858, "grad_norm": 0.5961682386775001, "learning_rate": 8.122230582460166e-07, "loss": 0.2652, "step": 31757 }, { "epoch": 1.4877031901438142, "grad_norm": 0.597461089343786, "learning_rate": 8.12083155083222e-07, "loss": 0.2775, "step": 31758 }, { "epoch": 1.4877500351337425, "grad_norm": 0.6290866866716063, "learning_rate": 8.119432616339656e-07, "loss": 0.2849, "step": 31759 }, { "epoch": 1.4877968801236707, "grad_norm": 0.5814885423822364, "learning_rate": 8.118033778990506e-07, "loss": 0.2658, "step": 31760 }, { "epoch": 1.4878437251135992, "grad_norm": 0.5647897121524976, "learning_rate": 8.116635038792834e-07, "loss": 0.2601, "step": 31761 }, { "epoch": 1.4878905701035274, "grad_norm": 0.552921609579716, "learning_rate": 8.115236395754678e-07, "loss": 0.2704, "step": 31762 }, { "epoch": 1.4879374150934557, "grad_norm": 0.5988626958016016, "learning_rate": 8.11383784988409e-07, "loss": 0.2811, "step": 31763 }, { "epoch": 1.4879842600833841, "grad_norm": 0.5903281081270766, "learning_rate": 8.112439401189123e-07, "loss": 0.2646, "step": 31764 }, { "epoch": 1.4880311050733124, "grad_norm": 0.5776371289410702, "learning_rate": 8.111041049677831e-07, "loss": 0.2552, "step": 31765 }, { "epoch": 1.4880779500632406, "grad_norm": 0.6203054439871, "learning_rate": 8.109642795358244e-07, "loss": 0.2819, "step": 31766 }, { "epoch": 1.488124795053169, "grad_norm": 0.5748953936954239, "learning_rate": 8.108244638238427e-07, "loss": 0.2569, "step": 31767 }, { "epoch": 1.4881716400430973, "grad_norm": 0.5783327030172972, "learning_rate": 8.106846578326408e-07, "loss": 0.2664, "step": 31768 }, { "epoch": 1.4882184850330258, "grad_norm": 0.6040085249625651, "learning_rate": 8.105448615630241e-07, "loss": 0.2547, "step": 31769 }, { "epoch": 1.488265330022954, "grad_norm": 0.6160750429664408, "learning_rate": 8.104050750157968e-07, "loss": 0.2799, "step": 31770 }, { "epoch": 1.4883121750128825, "grad_norm": 0.6166674456252766, "learning_rate": 8.102652981917641e-07, "loss": 0.3, "step": 31771 }, { "epoch": 1.4883590200028107, "grad_norm": 0.5939240539849372, "learning_rate": 8.101255310917305e-07, "loss": 0.2562, "step": 31772 }, { "epoch": 1.488405864992739, "grad_norm": 0.6038321182312679, "learning_rate": 8.099857737164996e-07, "loss": 0.2808, "step": 31773 }, { "epoch": 1.4884527099826674, "grad_norm": 0.5713677190467681, "learning_rate": 8.098460260668753e-07, "loss": 0.255, "step": 31774 }, { "epoch": 1.4884995549725957, "grad_norm": 0.5963074031606223, "learning_rate": 8.097062881436621e-07, "loss": 0.2768, "step": 31775 }, { "epoch": 1.488546399962524, "grad_norm": 0.5601053810937675, "learning_rate": 8.095665599476644e-07, "loss": 0.2453, "step": 31776 }, { "epoch": 1.4885932449524524, "grad_norm": 0.5874705644012317, "learning_rate": 8.094268414796863e-07, "loss": 0.2689, "step": 31777 }, { "epoch": 1.4886400899423806, "grad_norm": 0.5828928662735658, "learning_rate": 8.092871327405316e-07, "loss": 0.2597, "step": 31778 }, { "epoch": 1.4886869349323089, "grad_norm": 0.5850193780848221, "learning_rate": 8.091474337310057e-07, "loss": 0.2771, "step": 31779 }, { "epoch": 1.4887337799222373, "grad_norm": 0.6278171658764196, "learning_rate": 8.090077444519112e-07, "loss": 0.2736, "step": 31780 }, { "epoch": 1.4887806249121656, "grad_norm": 0.620890907022542, "learning_rate": 8.088680649040512e-07, "loss": 0.2875, "step": 31781 }, { "epoch": 1.488827469902094, "grad_norm": 0.6005660768823557, "learning_rate": 8.087283950882305e-07, "loss": 0.2664, "step": 31782 }, { "epoch": 1.4888743148920223, "grad_norm": 0.6274780648602347, "learning_rate": 8.085887350052526e-07, "loss": 0.2872, "step": 31783 }, { "epoch": 1.4889211598819507, "grad_norm": 0.5869294383648432, "learning_rate": 8.084490846559218e-07, "loss": 0.2749, "step": 31784 }, { "epoch": 1.488968004871879, "grad_norm": 0.6126926580662276, "learning_rate": 8.083094440410411e-07, "loss": 0.2655, "step": 31785 }, { "epoch": 1.4890148498618072, "grad_norm": 0.6220370844776573, "learning_rate": 8.081698131614152e-07, "loss": 0.2808, "step": 31786 }, { "epoch": 1.4890616948517357, "grad_norm": 0.6265882271886687, "learning_rate": 8.080301920178469e-07, "loss": 0.2835, "step": 31787 }, { "epoch": 1.489108539841664, "grad_norm": 0.6004333106569558, "learning_rate": 8.078905806111387e-07, "loss": 0.2667, "step": 31788 }, { "epoch": 1.4891553848315922, "grad_norm": 0.5791802001549802, "learning_rate": 8.077509789420948e-07, "loss": 0.2548, "step": 31789 }, { "epoch": 1.4892022298215206, "grad_norm": 0.5998959198504383, "learning_rate": 8.07611387011519e-07, "loss": 0.2822, "step": 31790 }, { "epoch": 1.4892490748114489, "grad_norm": 0.5994078555616982, "learning_rate": 8.074718048202141e-07, "loss": 0.2814, "step": 31791 }, { "epoch": 1.4892959198013773, "grad_norm": 0.6045005032970373, "learning_rate": 8.073322323689844e-07, "loss": 0.265, "step": 31792 }, { "epoch": 1.4893427647913056, "grad_norm": 0.5808886106921495, "learning_rate": 8.071926696586313e-07, "loss": 0.2675, "step": 31793 }, { "epoch": 1.489389609781234, "grad_norm": 0.5905699470050159, "learning_rate": 8.0705311668996e-07, "loss": 0.2786, "step": 31794 }, { "epoch": 1.4894364547711623, "grad_norm": 0.5784401202651308, "learning_rate": 8.069135734637715e-07, "loss": 0.2665, "step": 31795 }, { "epoch": 1.4894832997610905, "grad_norm": 0.6107034594529235, "learning_rate": 8.067740399808699e-07, "loss": 0.2806, "step": 31796 }, { "epoch": 1.489530144751019, "grad_norm": 0.6053547495444, "learning_rate": 8.066345162420583e-07, "loss": 0.2637, "step": 31797 }, { "epoch": 1.4895769897409472, "grad_norm": 0.6450447617752383, "learning_rate": 8.064950022481399e-07, "loss": 0.2808, "step": 31798 }, { "epoch": 1.4896238347308755, "grad_norm": 0.569457305438104, "learning_rate": 8.063554979999166e-07, "loss": 0.2585, "step": 31799 }, { "epoch": 1.489670679720804, "grad_norm": 0.5976398941074855, "learning_rate": 8.062160034981917e-07, "loss": 0.2678, "step": 31800 }, { "epoch": 1.4897175247107322, "grad_norm": 0.5372080672200522, "learning_rate": 8.060765187437686e-07, "loss": 0.2506, "step": 31801 }, { "epoch": 1.4897643697006604, "grad_norm": 0.5699541861698302, "learning_rate": 8.059370437374486e-07, "loss": 0.2639, "step": 31802 }, { "epoch": 1.4898112146905889, "grad_norm": 0.5786760308268905, "learning_rate": 8.057975784800354e-07, "loss": 0.2797, "step": 31803 }, { "epoch": 1.489858059680517, "grad_norm": 0.6112528319996561, "learning_rate": 8.056581229723318e-07, "loss": 0.2568, "step": 31804 }, { "epoch": 1.4899049046704456, "grad_norm": 0.603592519139789, "learning_rate": 8.055186772151391e-07, "loss": 0.2688, "step": 31805 }, { "epoch": 1.4899517496603738, "grad_norm": 0.5705068580322815, "learning_rate": 8.053792412092607e-07, "loss": 0.2785, "step": 31806 }, { "epoch": 1.4899985946503023, "grad_norm": 0.6180673315476262, "learning_rate": 8.052398149554996e-07, "loss": 0.2754, "step": 31807 }, { "epoch": 1.4900454396402305, "grad_norm": 0.6160333180202132, "learning_rate": 8.051003984546566e-07, "loss": 0.2895, "step": 31808 }, { "epoch": 1.4900922846301587, "grad_norm": 0.62928100763714, "learning_rate": 8.049609917075346e-07, "loss": 0.2772, "step": 31809 }, { "epoch": 1.4901391296200872, "grad_norm": 0.6247481972891294, "learning_rate": 8.048215947149371e-07, "loss": 0.2759, "step": 31810 }, { "epoch": 1.4901859746100155, "grad_norm": 0.588655506857825, "learning_rate": 8.046822074776645e-07, "loss": 0.2595, "step": 31811 }, { "epoch": 1.4902328195999437, "grad_norm": 0.6278293634513716, "learning_rate": 8.045428299965194e-07, "loss": 0.2747, "step": 31812 }, { "epoch": 1.4902796645898722, "grad_norm": 0.5864738557256662, "learning_rate": 8.044034622723043e-07, "loss": 0.2786, "step": 31813 }, { "epoch": 1.4903265095798004, "grad_norm": 0.5824740081737256, "learning_rate": 8.042641043058219e-07, "loss": 0.2919, "step": 31814 }, { "epoch": 1.4903733545697286, "grad_norm": 0.6264308685774811, "learning_rate": 8.041247560978726e-07, "loss": 0.2888, "step": 31815 }, { "epoch": 1.490420199559657, "grad_norm": 0.5957585788468929, "learning_rate": 8.039854176492601e-07, "loss": 0.2621, "step": 31816 }, { "epoch": 1.4904670445495853, "grad_norm": 0.5936710454028126, "learning_rate": 8.038460889607841e-07, "loss": 0.2802, "step": 31817 }, { "epoch": 1.4905138895395138, "grad_norm": 0.6336374839482624, "learning_rate": 8.037067700332477e-07, "loss": 0.2885, "step": 31818 }, { "epoch": 1.490560734529442, "grad_norm": 0.6290557600751177, "learning_rate": 8.035674608674523e-07, "loss": 0.2782, "step": 31819 }, { "epoch": 1.4906075795193705, "grad_norm": 0.5485411880979456, "learning_rate": 8.034281614642001e-07, "loss": 0.2617, "step": 31820 }, { "epoch": 1.4906544245092987, "grad_norm": 0.5916141074032661, "learning_rate": 8.032888718242932e-07, "loss": 0.2619, "step": 31821 }, { "epoch": 1.490701269499227, "grad_norm": 0.6146154388957146, "learning_rate": 8.031495919485322e-07, "loss": 0.277, "step": 31822 }, { "epoch": 1.4907481144891555, "grad_norm": 0.5910985587103226, "learning_rate": 8.030103218377181e-07, "loss": 0.2781, "step": 31823 }, { "epoch": 1.4907949594790837, "grad_norm": 0.6254282678360852, "learning_rate": 8.02871061492653e-07, "loss": 0.2835, "step": 31824 }, { "epoch": 1.490841804469012, "grad_norm": 0.5801980265048786, "learning_rate": 8.027318109141385e-07, "loss": 0.265, "step": 31825 }, { "epoch": 1.4908886494589404, "grad_norm": 0.585348166696875, "learning_rate": 8.025925701029758e-07, "loss": 0.2635, "step": 31826 }, { "epoch": 1.4909354944488686, "grad_norm": 0.593391548183927, "learning_rate": 8.024533390599662e-07, "loss": 0.2755, "step": 31827 }, { "epoch": 1.490982339438797, "grad_norm": 0.5973232843547881, "learning_rate": 8.023141177859117e-07, "loss": 0.2604, "step": 31828 }, { "epoch": 1.4910291844287253, "grad_norm": 0.5667605285611779, "learning_rate": 8.021749062816128e-07, "loss": 0.2634, "step": 31829 }, { "epoch": 1.4910760294186538, "grad_norm": 0.5894642503312172, "learning_rate": 8.020357045478699e-07, "loss": 0.2669, "step": 31830 }, { "epoch": 1.491122874408582, "grad_norm": 0.6211021088505294, "learning_rate": 8.018965125854847e-07, "loss": 0.2837, "step": 31831 }, { "epoch": 1.4911697193985103, "grad_norm": 0.5856034691485905, "learning_rate": 8.017573303952581e-07, "loss": 0.2691, "step": 31832 }, { "epoch": 1.4912165643884387, "grad_norm": 0.5675933572276863, "learning_rate": 8.016181579779913e-07, "loss": 0.2584, "step": 31833 }, { "epoch": 1.491263409378367, "grad_norm": 0.6052624485034275, "learning_rate": 8.014789953344862e-07, "loss": 0.2503, "step": 31834 }, { "epoch": 1.4913102543682952, "grad_norm": 0.6535262469097758, "learning_rate": 8.013398424655413e-07, "loss": 0.2781, "step": 31835 }, { "epoch": 1.4913570993582237, "grad_norm": 0.6138448870884837, "learning_rate": 8.012006993719599e-07, "loss": 0.2888, "step": 31836 }, { "epoch": 1.491403944348152, "grad_norm": 0.5686662215881964, "learning_rate": 8.010615660545404e-07, "loss": 0.2658, "step": 31837 }, { "epoch": 1.4914507893380802, "grad_norm": 0.6025306210611684, "learning_rate": 8.00922442514085e-07, "loss": 0.2789, "step": 31838 }, { "epoch": 1.4914976343280086, "grad_norm": 0.6187222143450316, "learning_rate": 8.007833287513933e-07, "loss": 0.291, "step": 31839 }, { "epoch": 1.4915444793179369, "grad_norm": 0.6457416194691186, "learning_rate": 8.006442247672677e-07, "loss": 0.2684, "step": 31840 }, { "epoch": 1.4915913243078653, "grad_norm": 0.6161334689379999, "learning_rate": 8.005051305625064e-07, "loss": 0.2826, "step": 31841 }, { "epoch": 1.4916381692977936, "grad_norm": 0.5949423459087212, "learning_rate": 8.003660461379112e-07, "loss": 0.2774, "step": 31842 }, { "epoch": 1.491685014287722, "grad_norm": 0.5480938649654218, "learning_rate": 8.002269714942829e-07, "loss": 0.2514, "step": 31843 }, { "epoch": 1.4917318592776503, "grad_norm": 0.5654315068183119, "learning_rate": 8.000879066324202e-07, "loss": 0.266, "step": 31844 }, { "epoch": 1.4917787042675785, "grad_norm": 0.6421743371384615, "learning_rate": 7.999488515531245e-07, "loss": 0.2806, "step": 31845 }, { "epoch": 1.491825549257507, "grad_norm": 0.6048612618150953, "learning_rate": 7.998098062571965e-07, "loss": 0.2739, "step": 31846 }, { "epoch": 1.4918723942474352, "grad_norm": 0.5990381247606886, "learning_rate": 7.996707707454352e-07, "loss": 0.2892, "step": 31847 }, { "epoch": 1.4919192392373635, "grad_norm": 0.5985826931205873, "learning_rate": 7.99531745018641e-07, "loss": 0.2955, "step": 31848 }, { "epoch": 1.491966084227292, "grad_norm": 0.6222247476446985, "learning_rate": 7.993927290776144e-07, "loss": 0.2991, "step": 31849 }, { "epoch": 1.4920129292172202, "grad_norm": 0.6176620311043395, "learning_rate": 7.992537229231559e-07, "loss": 0.2796, "step": 31850 }, { "epoch": 1.4920597742071484, "grad_norm": 0.6002163979513662, "learning_rate": 7.99114726556064e-07, "loss": 0.2617, "step": 31851 }, { "epoch": 1.4921066191970769, "grad_norm": 0.5830505748214467, "learning_rate": 7.989757399771394e-07, "loss": 0.2609, "step": 31852 }, { "epoch": 1.4921534641870051, "grad_norm": 0.5622824775224478, "learning_rate": 7.988367631871827e-07, "loss": 0.271, "step": 31853 }, { "epoch": 1.4922003091769336, "grad_norm": 0.6180988104125931, "learning_rate": 7.986977961869919e-07, "loss": 0.2857, "step": 31854 }, { "epoch": 1.4922471541668618, "grad_norm": 0.6236385053097543, "learning_rate": 7.985588389773676e-07, "loss": 0.2886, "step": 31855 }, { "epoch": 1.4922939991567903, "grad_norm": 0.5706590568820854, "learning_rate": 7.984198915591107e-07, "loss": 0.2686, "step": 31856 }, { "epoch": 1.4923408441467185, "grad_norm": 0.6135183236280846, "learning_rate": 7.982809539330185e-07, "loss": 0.2846, "step": 31857 }, { "epoch": 1.4923876891366468, "grad_norm": 0.6222381730105426, "learning_rate": 7.981420260998921e-07, "loss": 0.2905, "step": 31858 }, { "epoch": 1.4924345341265752, "grad_norm": 0.5816834353276322, "learning_rate": 7.980031080605311e-07, "loss": 0.2461, "step": 31859 }, { "epoch": 1.4924813791165035, "grad_norm": 0.5994885837644189, "learning_rate": 7.978641998157336e-07, "loss": 0.2659, "step": 31860 }, { "epoch": 1.4925282241064317, "grad_norm": 0.6631222914435742, "learning_rate": 7.977253013663e-07, "loss": 0.2731, "step": 31861 }, { "epoch": 1.4925750690963602, "grad_norm": 0.6309518834408202, "learning_rate": 7.975864127130292e-07, "loss": 0.2729, "step": 31862 }, { "epoch": 1.4926219140862884, "grad_norm": 0.5833160800972335, "learning_rate": 7.974475338567217e-07, "loss": 0.2825, "step": 31863 }, { "epoch": 1.4926687590762169, "grad_norm": 0.5584892856852777, "learning_rate": 7.973086647981748e-07, "loss": 0.2449, "step": 31864 }, { "epoch": 1.4927156040661451, "grad_norm": 0.6027953956677367, "learning_rate": 7.971698055381896e-07, "loss": 0.2575, "step": 31865 }, { "epoch": 1.4927624490560736, "grad_norm": 0.5712775589893979, "learning_rate": 7.97030956077563e-07, "loss": 0.263, "step": 31866 }, { "epoch": 1.4928092940460018, "grad_norm": 0.6107804568698558, "learning_rate": 7.968921164170956e-07, "loss": 0.2752, "step": 31867 }, { "epoch": 1.49285613903593, "grad_norm": 0.632170287380662, "learning_rate": 7.967532865575858e-07, "loss": 0.2752, "step": 31868 }, { "epoch": 1.4929029840258585, "grad_norm": 0.5755780488835422, "learning_rate": 7.966144664998326e-07, "loss": 0.2749, "step": 31869 }, { "epoch": 1.4929498290157868, "grad_norm": 0.6138860271308841, "learning_rate": 7.964756562446363e-07, "loss": 0.2683, "step": 31870 }, { "epoch": 1.492996674005715, "grad_norm": 0.6382307490756454, "learning_rate": 7.963368557927942e-07, "loss": 0.2827, "step": 31871 }, { "epoch": 1.4930435189956435, "grad_norm": 0.5963570760433102, "learning_rate": 7.961980651451043e-07, "loss": 0.2793, "step": 31872 }, { "epoch": 1.4930903639855717, "grad_norm": 0.53732759240852, "learning_rate": 7.960592843023665e-07, "loss": 0.2646, "step": 31873 }, { "epoch": 1.4931372089755, "grad_norm": 0.6326892635315735, "learning_rate": 7.959205132653791e-07, "loss": 0.2655, "step": 31874 }, { "epoch": 1.4931840539654284, "grad_norm": 0.5910503736868028, "learning_rate": 7.957817520349409e-07, "loss": 0.2838, "step": 31875 }, { "epoch": 1.4932308989553567, "grad_norm": 0.6008267471834053, "learning_rate": 7.956430006118504e-07, "loss": 0.2816, "step": 31876 }, { "epoch": 1.4932777439452851, "grad_norm": 0.6212152273090533, "learning_rate": 7.95504258996907e-07, "loss": 0.2792, "step": 31877 }, { "epoch": 1.4933245889352134, "grad_norm": 0.5946993249276913, "learning_rate": 7.95365527190908e-07, "loss": 0.2657, "step": 31878 }, { "epoch": 1.4933714339251418, "grad_norm": 0.5607217203781211, "learning_rate": 7.95226805194651e-07, "loss": 0.2612, "step": 31879 }, { "epoch": 1.49341827891507, "grad_norm": 0.5755480113010597, "learning_rate": 7.950880930089355e-07, "loss": 0.2961, "step": 31880 }, { "epoch": 1.4934651239049983, "grad_norm": 0.6147305259985546, "learning_rate": 7.949493906345595e-07, "loss": 0.2825, "step": 31881 }, { "epoch": 1.4935119688949268, "grad_norm": 0.6377639030898093, "learning_rate": 7.948106980723211e-07, "loss": 0.2884, "step": 31882 }, { "epoch": 1.493558813884855, "grad_norm": 0.5459227336399128, "learning_rate": 7.946720153230194e-07, "loss": 0.2589, "step": 31883 }, { "epoch": 1.4936056588747832, "grad_norm": 0.6617512901109093, "learning_rate": 7.94533342387451e-07, "loss": 0.296, "step": 31884 }, { "epoch": 1.4936525038647117, "grad_norm": 0.603752883765919, "learning_rate": 7.943946792664153e-07, "loss": 0.272, "step": 31885 }, { "epoch": 1.49369934885464, "grad_norm": 0.6073859486044498, "learning_rate": 7.942560259607085e-07, "loss": 0.2595, "step": 31886 }, { "epoch": 1.4937461938445682, "grad_norm": 0.5911284833589739, "learning_rate": 7.941173824711299e-07, "loss": 0.2816, "step": 31887 }, { "epoch": 1.4937930388344967, "grad_norm": 0.6048624544410192, "learning_rate": 7.939787487984765e-07, "loss": 0.286, "step": 31888 }, { "epoch": 1.493839883824425, "grad_norm": 0.658747260475096, "learning_rate": 7.938401249435479e-07, "loss": 0.2929, "step": 31889 }, { "epoch": 1.4938867288143534, "grad_norm": 0.580786606470136, "learning_rate": 7.937015109071395e-07, "loss": 0.262, "step": 31890 }, { "epoch": 1.4939335738042816, "grad_norm": 0.6194149979531822, "learning_rate": 7.935629066900502e-07, "loss": 0.2739, "step": 31891 }, { "epoch": 1.49398041879421, "grad_norm": 0.5610039061965846, "learning_rate": 7.934243122930782e-07, "loss": 0.2689, "step": 31892 }, { "epoch": 1.4940272637841383, "grad_norm": 0.6372128515600085, "learning_rate": 7.932857277170194e-07, "loss": 0.2698, "step": 31893 }, { "epoch": 1.4940741087740665, "grad_norm": 0.5823856385303874, "learning_rate": 7.931471529626724e-07, "loss": 0.2557, "step": 31894 }, { "epoch": 1.494120953763995, "grad_norm": 0.6082732170606527, "learning_rate": 7.930085880308355e-07, "loss": 0.2599, "step": 31895 }, { "epoch": 1.4941677987539232, "grad_norm": 0.6155556344147578, "learning_rate": 7.928700329223041e-07, "loss": 0.2954, "step": 31896 }, { "epoch": 1.4942146437438515, "grad_norm": 0.5953659300189765, "learning_rate": 7.927314876378766e-07, "loss": 0.2776, "step": 31897 }, { "epoch": 1.49426148873378, "grad_norm": 0.6074016650754375, "learning_rate": 7.925929521783504e-07, "loss": 0.267, "step": 31898 }, { "epoch": 1.4943083337237082, "grad_norm": 0.6257495652030844, "learning_rate": 7.924544265445233e-07, "loss": 0.2859, "step": 31899 }, { "epoch": 1.4943551787136367, "grad_norm": 0.5974008365605433, "learning_rate": 7.923159107371911e-07, "loss": 0.2745, "step": 31900 }, { "epoch": 1.494402023703565, "grad_norm": 0.5964067755751289, "learning_rate": 7.921774047571524e-07, "loss": 0.2866, "step": 31901 }, { "epoch": 1.4944488686934934, "grad_norm": 0.6352632330489685, "learning_rate": 7.920389086052027e-07, "loss": 0.2695, "step": 31902 }, { "epoch": 1.4944957136834216, "grad_norm": 0.5994383655761649, "learning_rate": 7.919004222821397e-07, "loss": 0.2784, "step": 31903 }, { "epoch": 1.4945425586733498, "grad_norm": 0.5923118550404414, "learning_rate": 7.917619457887607e-07, "loss": 0.2724, "step": 31904 }, { "epoch": 1.4945894036632783, "grad_norm": 0.6296551650989952, "learning_rate": 7.916234791258629e-07, "loss": 0.2786, "step": 31905 }, { "epoch": 1.4946362486532065, "grad_norm": 0.5636570822015288, "learning_rate": 7.914850222942419e-07, "loss": 0.266, "step": 31906 }, { "epoch": 1.4946830936431348, "grad_norm": 0.6125315165267149, "learning_rate": 7.913465752946958e-07, "loss": 0.2971, "step": 31907 }, { "epoch": 1.4947299386330632, "grad_norm": 0.7628924077659262, "learning_rate": 7.912081381280202e-07, "loss": 0.2683, "step": 31908 }, { "epoch": 1.4947767836229915, "grad_norm": 0.5602595287581161, "learning_rate": 7.910697107950122e-07, "loss": 0.2569, "step": 31909 }, { "epoch": 1.4948236286129197, "grad_norm": 0.5780992074064094, "learning_rate": 7.909312932964682e-07, "loss": 0.2685, "step": 31910 }, { "epoch": 1.4948704736028482, "grad_norm": 0.6043357768900983, "learning_rate": 7.907928856331853e-07, "loss": 0.2778, "step": 31911 }, { "epoch": 1.4949173185927764, "grad_norm": 0.6397327536602258, "learning_rate": 7.906544878059607e-07, "loss": 0.2779, "step": 31912 }, { "epoch": 1.494964163582705, "grad_norm": 0.6194403764582398, "learning_rate": 7.905160998155898e-07, "loss": 0.2944, "step": 31913 }, { "epoch": 1.4950110085726331, "grad_norm": 0.651125195846037, "learning_rate": 7.903777216628683e-07, "loss": 0.2972, "step": 31914 }, { "epoch": 1.4950578535625616, "grad_norm": 0.5926455627159419, "learning_rate": 7.90239353348593e-07, "loss": 0.258, "step": 31915 }, { "epoch": 1.4951046985524898, "grad_norm": 0.6310235894625102, "learning_rate": 7.901009948735608e-07, "loss": 0.2712, "step": 31916 }, { "epoch": 1.495151543542418, "grad_norm": 0.5743297608147481, "learning_rate": 7.899626462385677e-07, "loss": 0.2716, "step": 31917 }, { "epoch": 1.4951983885323465, "grad_norm": 0.6074830414068382, "learning_rate": 7.898243074444095e-07, "loss": 0.2634, "step": 31918 }, { "epoch": 1.4952452335222748, "grad_norm": 0.5733059337979673, "learning_rate": 7.896859784918834e-07, "loss": 0.2733, "step": 31919 }, { "epoch": 1.495292078512203, "grad_norm": 0.5933043613497596, "learning_rate": 7.895476593817846e-07, "loss": 0.2737, "step": 31920 }, { "epoch": 1.4953389235021315, "grad_norm": 0.5830160807427756, "learning_rate": 7.894093501149086e-07, "loss": 0.2713, "step": 31921 }, { "epoch": 1.4953857684920597, "grad_norm": 0.617363628987705, "learning_rate": 7.892710506920515e-07, "loss": 0.2767, "step": 31922 }, { "epoch": 1.495432613481988, "grad_norm": 0.6059387437322762, "learning_rate": 7.891327611140098e-07, "loss": 0.2741, "step": 31923 }, { "epoch": 1.4954794584719164, "grad_norm": 0.5883998360671775, "learning_rate": 7.889944813815789e-07, "loss": 0.2698, "step": 31924 }, { "epoch": 1.4955263034618447, "grad_norm": 0.5994718246439865, "learning_rate": 7.888562114955548e-07, "loss": 0.2688, "step": 31925 }, { "epoch": 1.4955731484517731, "grad_norm": 0.5312740289928031, "learning_rate": 7.887179514567337e-07, "loss": 0.2554, "step": 31926 }, { "epoch": 1.4956199934417014, "grad_norm": 0.6402248931745592, "learning_rate": 7.885797012659108e-07, "loss": 0.2911, "step": 31927 }, { "epoch": 1.4956668384316298, "grad_norm": 0.6095204791121573, "learning_rate": 7.884414609238808e-07, "loss": 0.2636, "step": 31928 }, { "epoch": 1.495713683421558, "grad_norm": 0.5841433795141, "learning_rate": 7.8830323043144e-07, "loss": 0.2772, "step": 31929 }, { "epoch": 1.4957605284114863, "grad_norm": 0.6157077830435254, "learning_rate": 7.881650097893839e-07, "loss": 0.2735, "step": 31930 }, { "epoch": 1.4958073734014148, "grad_norm": 0.5799254921517647, "learning_rate": 7.88026798998508e-07, "loss": 0.2683, "step": 31931 }, { "epoch": 1.495854218391343, "grad_norm": 0.5891152556580579, "learning_rate": 7.878885980596085e-07, "loss": 0.2616, "step": 31932 }, { "epoch": 1.4959010633812713, "grad_norm": 0.626068776243336, "learning_rate": 7.877504069734788e-07, "loss": 0.2894, "step": 31933 }, { "epoch": 1.4959479083711997, "grad_norm": 0.6018812505549195, "learning_rate": 7.876122257409158e-07, "loss": 0.2694, "step": 31934 }, { "epoch": 1.495994753361128, "grad_norm": 0.5953190968322087, "learning_rate": 7.874740543627135e-07, "loss": 0.2674, "step": 31935 }, { "epoch": 1.4960415983510564, "grad_norm": 0.6061385103159949, "learning_rate": 7.873358928396674e-07, "loss": 0.28, "step": 31936 }, { "epoch": 1.4960884433409847, "grad_norm": 0.5948009750451927, "learning_rate": 7.871977411725728e-07, "loss": 0.2704, "step": 31937 }, { "epoch": 1.4961352883309131, "grad_norm": 0.6345299944396511, "learning_rate": 7.870595993622257e-07, "loss": 0.2784, "step": 31938 }, { "epoch": 1.4961821333208414, "grad_norm": 0.5755904501987288, "learning_rate": 7.869214674094192e-07, "loss": 0.2782, "step": 31939 }, { "epoch": 1.4962289783107696, "grad_norm": 0.6305503586910807, "learning_rate": 7.867833453149489e-07, "loss": 0.288, "step": 31940 }, { "epoch": 1.496275823300698, "grad_norm": 0.5614193247226685, "learning_rate": 7.866452330796109e-07, "loss": 0.2538, "step": 31941 }, { "epoch": 1.4963226682906263, "grad_norm": 0.6110485885683232, "learning_rate": 7.86507130704198e-07, "loss": 0.2956, "step": 31942 }, { "epoch": 1.4963695132805546, "grad_norm": 0.5793379066472432, "learning_rate": 7.863690381895059e-07, "loss": 0.2801, "step": 31943 }, { "epoch": 1.496416358270483, "grad_norm": 0.569741521432525, "learning_rate": 7.862309555363301e-07, "loss": 0.2536, "step": 31944 }, { "epoch": 1.4964632032604113, "grad_norm": 0.575733284929519, "learning_rate": 7.860928827454636e-07, "loss": 0.2752, "step": 31945 }, { "epoch": 1.4965100482503395, "grad_norm": 0.5659749944648187, "learning_rate": 7.859548198177017e-07, "loss": 0.2644, "step": 31946 }, { "epoch": 1.496556893240268, "grad_norm": 0.6190317824957159, "learning_rate": 7.858167667538399e-07, "loss": 0.2642, "step": 31947 }, { "epoch": 1.4966037382301962, "grad_norm": 0.6025206267515157, "learning_rate": 7.856787235546709e-07, "loss": 0.2614, "step": 31948 }, { "epoch": 1.4966505832201247, "grad_norm": 0.588256888452419, "learning_rate": 7.855406902209903e-07, "loss": 0.2719, "step": 31949 }, { "epoch": 1.496697428210053, "grad_norm": 0.5973654001009181, "learning_rate": 7.854026667535925e-07, "loss": 0.2657, "step": 31950 }, { "epoch": 1.4967442731999814, "grad_norm": 0.6194769976115151, "learning_rate": 7.852646531532706e-07, "loss": 0.2792, "step": 31951 }, { "epoch": 1.4967911181899096, "grad_norm": 0.6130899593726999, "learning_rate": 7.851266494208198e-07, "loss": 0.28, "step": 31952 }, { "epoch": 1.4968379631798379, "grad_norm": 0.6051761650476207, "learning_rate": 7.849886555570343e-07, "loss": 0.2721, "step": 31953 }, { "epoch": 1.4968848081697663, "grad_norm": 0.5314491753350781, "learning_rate": 7.848506715627085e-07, "loss": 0.2586, "step": 31954 }, { "epoch": 1.4969316531596946, "grad_norm": 0.60636191376492, "learning_rate": 7.847126974386355e-07, "loss": 0.2672, "step": 31955 }, { "epoch": 1.4969784981496228, "grad_norm": 0.5564611507378445, "learning_rate": 7.845747331856104e-07, "loss": 0.2639, "step": 31956 }, { "epoch": 1.4970253431395513, "grad_norm": 0.5818710497901503, "learning_rate": 7.844367788044258e-07, "loss": 0.2639, "step": 31957 }, { "epoch": 1.4970721881294795, "grad_norm": 0.6154063928111259, "learning_rate": 7.842988342958763e-07, "loss": 0.2789, "step": 31958 }, { "epoch": 1.4971190331194077, "grad_norm": 0.6248016382675222, "learning_rate": 7.841608996607558e-07, "loss": 0.2737, "step": 31959 }, { "epoch": 1.4971658781093362, "grad_norm": 0.6189892346154007, "learning_rate": 7.840229748998582e-07, "loss": 0.2698, "step": 31960 }, { "epoch": 1.4972127230992645, "grad_norm": 0.5824803978625163, "learning_rate": 7.838850600139778e-07, "loss": 0.2574, "step": 31961 }, { "epoch": 1.497259568089193, "grad_norm": 0.5617098246904761, "learning_rate": 7.837471550039074e-07, "loss": 0.2672, "step": 31962 }, { "epoch": 1.4973064130791212, "grad_norm": 0.6001510314499658, "learning_rate": 7.836092598704401e-07, "loss": 0.2777, "step": 31963 }, { "epoch": 1.4973532580690496, "grad_norm": 0.5841754771875568, "learning_rate": 7.834713746143702e-07, "loss": 0.2817, "step": 31964 }, { "epoch": 1.4974001030589779, "grad_norm": 0.5661604680794357, "learning_rate": 7.83333499236491e-07, "loss": 0.268, "step": 31965 }, { "epoch": 1.497446948048906, "grad_norm": 0.6167179888567621, "learning_rate": 7.83195633737596e-07, "loss": 0.2681, "step": 31966 }, { "epoch": 1.4974937930388346, "grad_norm": 0.6427591531073836, "learning_rate": 7.830577781184786e-07, "loss": 0.283, "step": 31967 }, { "epoch": 1.4975406380287628, "grad_norm": 0.5921657226443474, "learning_rate": 7.829199323799331e-07, "loss": 0.2642, "step": 31968 }, { "epoch": 1.497587483018691, "grad_norm": 0.5988977149088431, "learning_rate": 7.827820965227517e-07, "loss": 0.2547, "step": 31969 }, { "epoch": 1.4976343280086195, "grad_norm": 0.5820172422439234, "learning_rate": 7.826442705477269e-07, "loss": 0.2584, "step": 31970 }, { "epoch": 1.4976811729985477, "grad_norm": 0.6387325878543268, "learning_rate": 7.825064544556529e-07, "loss": 0.2752, "step": 31971 }, { "epoch": 1.4977280179884762, "grad_norm": 0.596982659619069, "learning_rate": 7.823686482473222e-07, "loss": 0.2598, "step": 31972 }, { "epoch": 1.4977748629784045, "grad_norm": 0.5681897623939125, "learning_rate": 7.822308519235286e-07, "loss": 0.2701, "step": 31973 }, { "epoch": 1.497821707968333, "grad_norm": 0.6054393809678745, "learning_rate": 7.820930654850653e-07, "loss": 0.2695, "step": 31974 }, { "epoch": 1.4978685529582612, "grad_norm": 0.6578746371860031, "learning_rate": 7.819552889327239e-07, "loss": 0.2747, "step": 31975 }, { "epoch": 1.4979153979481894, "grad_norm": 0.589331490616593, "learning_rate": 7.818175222672988e-07, "loss": 0.2837, "step": 31976 }, { "epoch": 1.4979622429381179, "grad_norm": 0.5452992083029973, "learning_rate": 7.816797654895811e-07, "loss": 0.2556, "step": 31977 }, { "epoch": 1.498009087928046, "grad_norm": 0.5942438493659595, "learning_rate": 7.815420186003647e-07, "loss": 0.265, "step": 31978 }, { "epoch": 1.4980559329179743, "grad_norm": 0.6273359893411664, "learning_rate": 7.814042816004419e-07, "loss": 0.2773, "step": 31979 }, { "epoch": 1.4981027779079028, "grad_norm": 0.5722921283575128, "learning_rate": 7.812665544906062e-07, "loss": 0.2713, "step": 31980 }, { "epoch": 1.498149622897831, "grad_norm": 0.6039153858142221, "learning_rate": 7.811288372716489e-07, "loss": 0.2623, "step": 31981 }, { "epoch": 1.4981964678877593, "grad_norm": 0.6209794754908696, "learning_rate": 7.809911299443629e-07, "loss": 0.29, "step": 31982 }, { "epoch": 1.4982433128776877, "grad_norm": 0.5740377525545696, "learning_rate": 7.808534325095418e-07, "loss": 0.2625, "step": 31983 }, { "epoch": 1.498290157867616, "grad_norm": 0.6341192122181738, "learning_rate": 7.807157449679764e-07, "loss": 0.2896, "step": 31984 }, { "epoch": 1.4983370028575445, "grad_norm": 0.6521638900239353, "learning_rate": 7.805780673204596e-07, "loss": 0.2704, "step": 31985 }, { "epoch": 1.4983838478474727, "grad_norm": 0.6030420793209014, "learning_rate": 7.804403995677843e-07, "loss": 0.2792, "step": 31986 }, { "epoch": 1.4984306928374012, "grad_norm": 0.5902648426932721, "learning_rate": 7.803027417107425e-07, "loss": 0.2729, "step": 31987 }, { "epoch": 1.4984775378273294, "grad_norm": 0.6043241188926269, "learning_rate": 7.801650937501257e-07, "loss": 0.2777, "step": 31988 }, { "epoch": 1.4985243828172576, "grad_norm": 0.604430536464273, "learning_rate": 7.800274556867266e-07, "loss": 0.273, "step": 31989 }, { "epoch": 1.498571227807186, "grad_norm": 0.5812476840577103, "learning_rate": 7.798898275213376e-07, "loss": 0.2518, "step": 31990 }, { "epoch": 1.4986180727971143, "grad_norm": 0.5428197911441714, "learning_rate": 7.797522092547499e-07, "loss": 0.2654, "step": 31991 }, { "epoch": 1.4986649177870426, "grad_norm": 0.6261108833016575, "learning_rate": 7.796146008877559e-07, "loss": 0.2697, "step": 31992 }, { "epoch": 1.498711762776971, "grad_norm": 0.5908524145112365, "learning_rate": 7.794770024211479e-07, "loss": 0.2733, "step": 31993 }, { "epoch": 1.4987586077668993, "grad_norm": 0.6215710445533711, "learning_rate": 7.793394138557168e-07, "loss": 0.2613, "step": 31994 }, { "epoch": 1.4988054527568275, "grad_norm": 0.5612056172065315, "learning_rate": 7.792018351922547e-07, "loss": 0.2637, "step": 31995 }, { "epoch": 1.498852297746756, "grad_norm": 0.5869332517472213, "learning_rate": 7.790642664315546e-07, "loss": 0.266, "step": 31996 }, { "epoch": 1.4988991427366842, "grad_norm": 0.585305378261924, "learning_rate": 7.789267075744061e-07, "loss": 0.2653, "step": 31997 }, { "epoch": 1.4989459877266127, "grad_norm": 0.6047099796646057, "learning_rate": 7.78789158621602e-07, "loss": 0.2657, "step": 31998 }, { "epoch": 1.498992832716541, "grad_norm": 0.5584456940406808, "learning_rate": 7.786516195739344e-07, "loss": 0.2615, "step": 31999 }, { "epoch": 1.4990396777064694, "grad_norm": 0.5674402348838523, "learning_rate": 7.785140904321931e-07, "loss": 0.2597, "step": 32000 }, { "epoch": 1.4990865226963976, "grad_norm": 0.6238678224062715, "learning_rate": 7.783765711971708e-07, "loss": 0.2905, "step": 32001 }, { "epoch": 1.4991333676863259, "grad_norm": 0.606436094074214, "learning_rate": 7.782390618696584e-07, "loss": 0.2672, "step": 32002 }, { "epoch": 1.4991802126762543, "grad_norm": 0.7013711878690981, "learning_rate": 7.781015624504485e-07, "loss": 0.309, "step": 32003 }, { "epoch": 1.4992270576661826, "grad_norm": 0.6053378311745937, "learning_rate": 7.779640729403304e-07, "loss": 0.2851, "step": 32004 }, { "epoch": 1.4992739026561108, "grad_norm": 0.6247947476633164, "learning_rate": 7.77826593340097e-07, "loss": 0.2825, "step": 32005 }, { "epoch": 1.4993207476460393, "grad_norm": 0.5812758166426761, "learning_rate": 7.776891236505379e-07, "loss": 0.2808, "step": 32006 }, { "epoch": 1.4993675926359675, "grad_norm": 0.5766948894196953, "learning_rate": 7.775516638724451e-07, "loss": 0.2549, "step": 32007 }, { "epoch": 1.499414437625896, "grad_norm": 0.6003121660236098, "learning_rate": 7.774142140066094e-07, "loss": 0.2607, "step": 32008 }, { "epoch": 1.4994612826158242, "grad_norm": 0.6467286982223275, "learning_rate": 7.77276774053822e-07, "loss": 0.2803, "step": 32009 }, { "epoch": 1.4995081276057527, "grad_norm": 0.6412592162411664, "learning_rate": 7.771393440148745e-07, "loss": 0.29, "step": 32010 }, { "epoch": 1.499554972595681, "grad_norm": 0.5987731811423378, "learning_rate": 7.770019238905571e-07, "loss": 0.2723, "step": 32011 }, { "epoch": 1.4996018175856092, "grad_norm": 0.5808525378371154, "learning_rate": 7.7686451368166e-07, "loss": 0.2659, "step": 32012 }, { "epoch": 1.4996486625755376, "grad_norm": 0.5596467738855919, "learning_rate": 7.767271133889743e-07, "loss": 0.2627, "step": 32013 }, { "epoch": 1.4996955075654659, "grad_norm": 0.6142350849517965, "learning_rate": 7.76589723013291e-07, "loss": 0.2929, "step": 32014 }, { "epoch": 1.4997423525553941, "grad_norm": 0.5686781961735756, "learning_rate": 7.764523425554008e-07, "loss": 0.2551, "step": 32015 }, { "epoch": 1.4997891975453226, "grad_norm": 0.5824034189384361, "learning_rate": 7.763149720160939e-07, "loss": 0.2576, "step": 32016 }, { "epoch": 1.4998360425352508, "grad_norm": 0.6120373749238809, "learning_rate": 7.761776113961619e-07, "loss": 0.2679, "step": 32017 }, { "epoch": 1.499882887525179, "grad_norm": 0.6068108314273856, "learning_rate": 7.760402606963949e-07, "loss": 0.2762, "step": 32018 }, { "epoch": 1.4999297325151075, "grad_norm": 0.617028834246129, "learning_rate": 7.759029199175816e-07, "loss": 0.2769, "step": 32019 }, { "epoch": 1.4999765775050358, "grad_norm": 0.5850715716631604, "learning_rate": 7.75765589060514e-07, "loss": 0.2878, "step": 32020 }, { "epoch": 1.500023422494964, "grad_norm": 0.6321080687820548, "learning_rate": 7.756282681259819e-07, "loss": 0.2701, "step": 32021 }, { "epoch": 1.5000702674848925, "grad_norm": 0.6244995093456438, "learning_rate": 7.754909571147757e-07, "loss": 0.2781, "step": 32022 }, { "epoch": 1.500117112474821, "grad_norm": 0.6215849473415992, "learning_rate": 7.753536560276864e-07, "loss": 0.2628, "step": 32023 }, { "epoch": 1.5001639574647492, "grad_norm": 0.6462064734622165, "learning_rate": 7.752163648655028e-07, "loss": 0.2701, "step": 32024 }, { "epoch": 1.5002108024546774, "grad_norm": 0.5762546105813376, "learning_rate": 7.750790836290159e-07, "loss": 0.2659, "step": 32025 }, { "epoch": 1.5002576474446059, "grad_norm": 0.6089638222799642, "learning_rate": 7.74941812319015e-07, "loss": 0.2628, "step": 32026 }, { "epoch": 1.5003044924345341, "grad_norm": 0.6147632861581586, "learning_rate": 7.748045509362901e-07, "loss": 0.2849, "step": 32027 }, { "epoch": 1.5003513374244624, "grad_norm": 0.5542842591186986, "learning_rate": 7.746672994816312e-07, "loss": 0.2795, "step": 32028 }, { "epoch": 1.5003981824143908, "grad_norm": 0.6526050705064094, "learning_rate": 7.745300579558296e-07, "loss": 0.2856, "step": 32029 }, { "epoch": 1.500445027404319, "grad_norm": 0.6392155851787171, "learning_rate": 7.743928263596728e-07, "loss": 0.2763, "step": 32030 }, { "epoch": 1.5004918723942473, "grad_norm": 0.6758720245289181, "learning_rate": 7.742556046939515e-07, "loss": 0.2866, "step": 32031 }, { "epoch": 1.5005387173841758, "grad_norm": 0.5886584230999502, "learning_rate": 7.741183929594565e-07, "loss": 0.2699, "step": 32032 }, { "epoch": 1.5005855623741042, "grad_norm": 0.6075095671363223, "learning_rate": 7.739811911569753e-07, "loss": 0.2687, "step": 32033 }, { "epoch": 1.5006324073640323, "grad_norm": 0.5968660713631635, "learning_rate": 7.738439992872984e-07, "loss": 0.2711, "step": 32034 }, { "epoch": 1.5006792523539607, "grad_norm": 0.6029558911562656, "learning_rate": 7.737068173512167e-07, "loss": 0.2747, "step": 32035 }, { "epoch": 1.5007260973438892, "grad_norm": 0.596826911200086, "learning_rate": 7.735696453495172e-07, "loss": 0.2737, "step": 32036 }, { "epoch": 1.5007729423338174, "grad_norm": 0.5937780304612392, "learning_rate": 7.734324832829906e-07, "loss": 0.2651, "step": 32037 }, { "epoch": 1.5008197873237457, "grad_norm": 0.5994692804482543, "learning_rate": 7.732953311524268e-07, "loss": 0.2656, "step": 32038 }, { "epoch": 1.5008666323136741, "grad_norm": 0.5539560752300364, "learning_rate": 7.731581889586137e-07, "loss": 0.2706, "step": 32039 }, { "epoch": 1.5009134773036024, "grad_norm": 0.6036799654553499, "learning_rate": 7.730210567023414e-07, "loss": 0.2767, "step": 32040 }, { "epoch": 1.5009603222935306, "grad_norm": 0.6105170640046357, "learning_rate": 7.728839343843994e-07, "loss": 0.2827, "step": 32041 }, { "epoch": 1.501007167283459, "grad_norm": 0.5808902433073971, "learning_rate": 7.727468220055753e-07, "loss": 0.2541, "step": 32042 }, { "epoch": 1.5010540122733873, "grad_norm": 0.6081806218914233, "learning_rate": 7.726097195666593e-07, "loss": 0.2763, "step": 32043 }, { "epoch": 1.5011008572633155, "grad_norm": 0.5529327865591037, "learning_rate": 7.724726270684402e-07, "loss": 0.2436, "step": 32044 }, { "epoch": 1.501147702253244, "grad_norm": 0.6149060024065109, "learning_rate": 7.723355445117078e-07, "loss": 0.2754, "step": 32045 }, { "epoch": 1.5011945472431725, "grad_norm": 0.5715474895085233, "learning_rate": 7.721984718972491e-07, "loss": 0.2649, "step": 32046 }, { "epoch": 1.5012413922331007, "grad_norm": 0.6384427235154816, "learning_rate": 7.720614092258552e-07, "loss": 0.2966, "step": 32047 }, { "epoch": 1.501288237223029, "grad_norm": 0.6927796341271057, "learning_rate": 7.719243564983123e-07, "loss": 0.2773, "step": 32048 }, { "epoch": 1.5013350822129574, "grad_norm": 0.5740573085003172, "learning_rate": 7.717873137154106e-07, "loss": 0.2665, "step": 32049 }, { "epoch": 1.5013819272028857, "grad_norm": 0.554760738794991, "learning_rate": 7.716502808779385e-07, "loss": 0.2704, "step": 32050 }, { "epoch": 1.501428772192814, "grad_norm": 0.6199530391937669, "learning_rate": 7.715132579866846e-07, "loss": 0.282, "step": 32051 }, { "epoch": 1.5014756171827424, "grad_norm": 0.5817292198348658, "learning_rate": 7.713762450424383e-07, "loss": 0.2728, "step": 32052 }, { "epoch": 1.5015224621726706, "grad_norm": 0.615837394347924, "learning_rate": 7.712392420459863e-07, "loss": 0.2706, "step": 32053 }, { "epoch": 1.5015693071625988, "grad_norm": 0.5783586157458397, "learning_rate": 7.711022489981191e-07, "loss": 0.2637, "step": 32054 }, { "epoch": 1.5016161521525273, "grad_norm": 0.6312659199462984, "learning_rate": 7.709652658996228e-07, "loss": 0.2708, "step": 32055 }, { "epoch": 1.5016629971424558, "grad_norm": 0.5930133596660426, "learning_rate": 7.708282927512869e-07, "loss": 0.2687, "step": 32056 }, { "epoch": 1.5017098421323838, "grad_norm": 0.5783609353700355, "learning_rate": 7.706913295538995e-07, "loss": 0.2577, "step": 32057 }, { "epoch": 1.5017566871223123, "grad_norm": 0.5684170539308069, "learning_rate": 7.70554376308249e-07, "loss": 0.258, "step": 32058 }, { "epoch": 1.5018035321122407, "grad_norm": 0.6385957167141426, "learning_rate": 7.704174330151243e-07, "loss": 0.2829, "step": 32059 }, { "epoch": 1.501850377102169, "grad_norm": 0.5942952333496537, "learning_rate": 7.702804996753122e-07, "loss": 0.2651, "step": 32060 }, { "epoch": 1.5018972220920972, "grad_norm": 0.5891599281869185, "learning_rate": 7.701435762896006e-07, "loss": 0.2731, "step": 32061 }, { "epoch": 1.5019440670820257, "grad_norm": 0.5882733911705014, "learning_rate": 7.700066628587777e-07, "loss": 0.2915, "step": 32062 }, { "epoch": 1.501990912071954, "grad_norm": 0.5972270526294257, "learning_rate": 7.698697593836315e-07, "loss": 0.2569, "step": 32063 }, { "epoch": 1.5020377570618821, "grad_norm": 0.6476705794310911, "learning_rate": 7.697328658649502e-07, "loss": 0.2666, "step": 32064 }, { "epoch": 1.5020846020518106, "grad_norm": 0.6987250477613514, "learning_rate": 7.695959823035215e-07, "loss": 0.2438, "step": 32065 }, { "epoch": 1.5021314470417388, "grad_norm": 0.5825385179170628, "learning_rate": 7.694591087001338e-07, "loss": 0.2718, "step": 32066 }, { "epoch": 1.502178292031667, "grad_norm": 0.5896250287834258, "learning_rate": 7.693222450555737e-07, "loss": 0.2619, "step": 32067 }, { "epoch": 1.5022251370215955, "grad_norm": 0.6847893418300806, "learning_rate": 7.691853913706285e-07, "loss": 0.2892, "step": 32068 }, { "epoch": 1.502271982011524, "grad_norm": 0.5763259319220676, "learning_rate": 7.690485476460862e-07, "loss": 0.2624, "step": 32069 }, { "epoch": 1.502318827001452, "grad_norm": 0.6056762521325627, "learning_rate": 7.689117138827345e-07, "loss": 0.2757, "step": 32070 }, { "epoch": 1.5023656719913805, "grad_norm": 0.5862472266249237, "learning_rate": 7.68774890081361e-07, "loss": 0.2747, "step": 32071 }, { "epoch": 1.502412516981309, "grad_norm": 0.5997071902425799, "learning_rate": 7.686380762427537e-07, "loss": 0.2675, "step": 32072 }, { "epoch": 1.5024593619712372, "grad_norm": 0.6020543004983107, "learning_rate": 7.685012723676979e-07, "loss": 0.2758, "step": 32073 }, { "epoch": 1.5025062069611654, "grad_norm": 0.6835358870266663, "learning_rate": 7.683644784569833e-07, "loss": 0.299, "step": 32074 }, { "epoch": 1.502553051951094, "grad_norm": 0.6124710076542844, "learning_rate": 7.682276945113948e-07, "loss": 0.2739, "step": 32075 }, { "epoch": 1.5025998969410221, "grad_norm": 0.6350728200016567, "learning_rate": 7.680909205317208e-07, "loss": 0.2728, "step": 32076 }, { "epoch": 1.5026467419309504, "grad_norm": 0.5882400251217563, "learning_rate": 7.679541565187482e-07, "loss": 0.2635, "step": 32077 }, { "epoch": 1.5026935869208788, "grad_norm": 0.5457987831973343, "learning_rate": 7.678174024732648e-07, "loss": 0.2522, "step": 32078 }, { "epoch": 1.502740431910807, "grad_norm": 0.6099787624902527, "learning_rate": 7.676806583960564e-07, "loss": 0.2809, "step": 32079 }, { "epoch": 1.5027872769007353, "grad_norm": 0.6288806992320805, "learning_rate": 7.675439242879101e-07, "loss": 0.2799, "step": 32080 }, { "epoch": 1.5028341218906638, "grad_norm": 0.5939243631773383, "learning_rate": 7.674072001496139e-07, "loss": 0.2708, "step": 32081 }, { "epoch": 1.5028809668805923, "grad_norm": 0.5841841848551093, "learning_rate": 7.672704859819529e-07, "loss": 0.2744, "step": 32082 }, { "epoch": 1.5029278118705205, "grad_norm": 0.6180273463748295, "learning_rate": 7.671337817857152e-07, "loss": 0.2816, "step": 32083 }, { "epoch": 1.5029746568604487, "grad_norm": 0.6133014148751019, "learning_rate": 7.669970875616875e-07, "loss": 0.2765, "step": 32084 }, { "epoch": 1.5030215018503772, "grad_norm": 0.5738117739914771, "learning_rate": 7.668604033106552e-07, "loss": 0.2635, "step": 32085 }, { "epoch": 1.5030683468403054, "grad_norm": 0.6239953476465822, "learning_rate": 7.667237290334056e-07, "loss": 0.2785, "step": 32086 }, { "epoch": 1.5031151918302337, "grad_norm": 0.5429861913215867, "learning_rate": 7.665870647307264e-07, "loss": 0.2548, "step": 32087 }, { "epoch": 1.5031620368201621, "grad_norm": 0.627671820631575, "learning_rate": 7.664504104034021e-07, "loss": 0.2729, "step": 32088 }, { "epoch": 1.5032088818100904, "grad_norm": 0.6140630878391743, "learning_rate": 7.663137660522196e-07, "loss": 0.2855, "step": 32089 }, { "epoch": 1.5032557268000186, "grad_norm": 0.6029277889638701, "learning_rate": 7.661771316779668e-07, "loss": 0.2707, "step": 32090 }, { "epoch": 1.503302571789947, "grad_norm": 0.5619431870501089, "learning_rate": 7.660405072814278e-07, "loss": 0.2623, "step": 32091 }, { "epoch": 1.5033494167798755, "grad_norm": 0.6161232188257515, "learning_rate": 7.6590389286339e-07, "loss": 0.2619, "step": 32092 }, { "epoch": 1.5033962617698036, "grad_norm": 0.5612492941282403, "learning_rate": 7.657672884246395e-07, "loss": 0.2577, "step": 32093 }, { "epoch": 1.503443106759732, "grad_norm": 0.669330672100782, "learning_rate": 7.65630693965963e-07, "loss": 0.2839, "step": 32094 }, { "epoch": 1.5034899517496605, "grad_norm": 0.6206989477626017, "learning_rate": 7.654941094881454e-07, "loss": 0.278, "step": 32095 }, { "epoch": 1.5035367967395887, "grad_norm": 0.553712225630426, "learning_rate": 7.65357534991974e-07, "loss": 0.2694, "step": 32096 }, { "epoch": 1.503583641729517, "grad_norm": 0.6233154436407575, "learning_rate": 7.652209704782329e-07, "loss": 0.2718, "step": 32097 }, { "epoch": 1.5036304867194454, "grad_norm": 0.6129354087951671, "learning_rate": 7.650844159477095e-07, "loss": 0.2756, "step": 32098 }, { "epoch": 1.5036773317093737, "grad_norm": 0.5704125802900702, "learning_rate": 7.649478714011888e-07, "loss": 0.2764, "step": 32099 }, { "epoch": 1.503724176699302, "grad_norm": 0.6418989877135156, "learning_rate": 7.648113368394572e-07, "loss": 0.2856, "step": 32100 }, { "epoch": 1.5037710216892304, "grad_norm": 0.6070796011833012, "learning_rate": 7.64674812263301e-07, "loss": 0.2695, "step": 32101 }, { "epoch": 1.5038178666791586, "grad_norm": 0.6017216024851447, "learning_rate": 7.645382976735053e-07, "loss": 0.2905, "step": 32102 }, { "epoch": 1.5038647116690869, "grad_norm": 0.6026462179146225, "learning_rate": 7.644017930708544e-07, "loss": 0.254, "step": 32103 }, { "epoch": 1.5039115566590153, "grad_norm": 0.6075957378104648, "learning_rate": 7.642652984561352e-07, "loss": 0.2673, "step": 32104 }, { "epoch": 1.5039584016489438, "grad_norm": 0.5913833271378846, "learning_rate": 7.641288138301328e-07, "loss": 0.2771, "step": 32105 }, { "epoch": 1.5040052466388718, "grad_norm": 0.6009181893568168, "learning_rate": 7.63992339193633e-07, "loss": 0.2774, "step": 32106 }, { "epoch": 1.5040520916288003, "grad_norm": 0.6247718385095175, "learning_rate": 7.638558745474209e-07, "loss": 0.2721, "step": 32107 }, { "epoch": 1.5040989366187287, "grad_norm": 0.6173434323038959, "learning_rate": 7.637194198922826e-07, "loss": 0.292, "step": 32108 }, { "epoch": 1.504145781608657, "grad_norm": 0.6015119736929118, "learning_rate": 7.635829752290025e-07, "loss": 0.2641, "step": 32109 }, { "epoch": 1.5041926265985852, "grad_norm": 0.6657616200675411, "learning_rate": 7.634465405583652e-07, "loss": 0.2906, "step": 32110 }, { "epoch": 1.5042394715885137, "grad_norm": 0.6351412888438077, "learning_rate": 7.633101158811567e-07, "loss": 0.2698, "step": 32111 }, { "epoch": 1.504286316578442, "grad_norm": 0.5951464855712175, "learning_rate": 7.631737011981618e-07, "loss": 0.2807, "step": 32112 }, { "epoch": 1.5043331615683702, "grad_norm": 0.6194210205972867, "learning_rate": 7.63037296510166e-07, "loss": 0.2613, "step": 32113 }, { "epoch": 1.5043800065582986, "grad_norm": 0.6241292089367464, "learning_rate": 7.629009018179548e-07, "loss": 0.2866, "step": 32114 }, { "epoch": 1.5044268515482269, "grad_norm": 0.6079807559990728, "learning_rate": 7.627645171223114e-07, "loss": 0.2907, "step": 32115 }, { "epoch": 1.504473696538155, "grad_norm": 0.6047560078375686, "learning_rate": 7.626281424240223e-07, "loss": 0.2703, "step": 32116 }, { "epoch": 1.5045205415280836, "grad_norm": 0.5745958738639386, "learning_rate": 7.624917777238708e-07, "loss": 0.2586, "step": 32117 }, { "epoch": 1.504567386518012, "grad_norm": 0.5683597760282509, "learning_rate": 7.623554230226423e-07, "loss": 0.2507, "step": 32118 }, { "epoch": 1.5046142315079403, "grad_norm": 0.5953607864346336, "learning_rate": 7.622190783211217e-07, "loss": 0.2701, "step": 32119 }, { "epoch": 1.5046610764978685, "grad_norm": 0.5938629740437108, "learning_rate": 7.620827436200934e-07, "loss": 0.2758, "step": 32120 }, { "epoch": 1.504707921487797, "grad_norm": 0.5925194094330394, "learning_rate": 7.619464189203432e-07, "loss": 0.2793, "step": 32121 }, { "epoch": 1.5047547664777252, "grad_norm": 0.5478970378797753, "learning_rate": 7.618101042226536e-07, "loss": 0.2464, "step": 32122 }, { "epoch": 1.5048016114676535, "grad_norm": 0.556584986459807, "learning_rate": 7.616737995278107e-07, "loss": 0.2833, "step": 32123 }, { "epoch": 1.504848456457582, "grad_norm": 0.6283135108114626, "learning_rate": 7.615375048365972e-07, "loss": 0.286, "step": 32124 }, { "epoch": 1.5048953014475102, "grad_norm": 0.6433145432186704, "learning_rate": 7.614012201497986e-07, "loss": 0.283, "step": 32125 }, { "epoch": 1.5049421464374384, "grad_norm": 0.5777287101205087, "learning_rate": 7.612649454681992e-07, "loss": 0.2732, "step": 32126 }, { "epoch": 1.5049889914273669, "grad_norm": 0.5753657201688115, "learning_rate": 7.611286807925835e-07, "loss": 0.2672, "step": 32127 }, { "epoch": 1.5050358364172953, "grad_norm": 0.5886926782256641, "learning_rate": 7.609924261237345e-07, "loss": 0.2753, "step": 32128 }, { "epoch": 1.5050826814072233, "grad_norm": 0.6226806019999259, "learning_rate": 7.608561814624371e-07, "loss": 0.2708, "step": 32129 }, { "epoch": 1.5051295263971518, "grad_norm": 0.6450031611293263, "learning_rate": 7.607199468094758e-07, "loss": 0.2904, "step": 32130 }, { "epoch": 1.5051763713870803, "grad_norm": 0.6398416813674475, "learning_rate": 7.605837221656337e-07, "loss": 0.2945, "step": 32131 }, { "epoch": 1.5052232163770085, "grad_norm": 0.6360186149831446, "learning_rate": 7.604475075316947e-07, "loss": 0.28, "step": 32132 }, { "epoch": 1.5052700613669368, "grad_norm": 0.6077063905255374, "learning_rate": 7.603113029084441e-07, "loss": 0.2775, "step": 32133 }, { "epoch": 1.5053169063568652, "grad_norm": 0.538120845385097, "learning_rate": 7.601751082966638e-07, "loss": 0.2521, "step": 32134 }, { "epoch": 1.5053637513467935, "grad_norm": 0.5915376471202974, "learning_rate": 7.600389236971384e-07, "loss": 0.277, "step": 32135 }, { "epoch": 1.5054105963367217, "grad_norm": 0.6065873701157005, "learning_rate": 7.599027491106525e-07, "loss": 0.291, "step": 32136 }, { "epoch": 1.5054574413266502, "grad_norm": 0.5966787283278746, "learning_rate": 7.597665845379881e-07, "loss": 0.2575, "step": 32137 }, { "epoch": 1.5055042863165784, "grad_norm": 0.5849734881888962, "learning_rate": 7.596304299799298e-07, "loss": 0.2719, "step": 32138 }, { "epoch": 1.5055511313065066, "grad_norm": 0.5584986778522544, "learning_rate": 7.594942854372617e-07, "loss": 0.2599, "step": 32139 }, { "epoch": 1.505597976296435, "grad_norm": 0.587794357742277, "learning_rate": 7.593581509107656e-07, "loss": 0.2624, "step": 32140 }, { "epoch": 1.5056448212863636, "grad_norm": 0.5873359660957648, "learning_rate": 7.59222026401226e-07, "loss": 0.2499, "step": 32141 }, { "epoch": 1.5056916662762916, "grad_norm": 0.5810390420893706, "learning_rate": 7.59085911909426e-07, "loss": 0.2692, "step": 32142 }, { "epoch": 1.50573851126622, "grad_norm": 0.5671311996153364, "learning_rate": 7.589498074361499e-07, "loss": 0.2547, "step": 32143 }, { "epoch": 1.5057853562561485, "grad_norm": 0.595477470532725, "learning_rate": 7.588137129821791e-07, "loss": 0.2624, "step": 32144 }, { "epoch": 1.5058322012460768, "grad_norm": 0.5668203453612758, "learning_rate": 7.586776285482986e-07, "loss": 0.274, "step": 32145 }, { "epoch": 1.505879046236005, "grad_norm": 0.6261743285120134, "learning_rate": 7.5854155413529e-07, "loss": 0.2809, "step": 32146 }, { "epoch": 1.5059258912259335, "grad_norm": 0.597892174608525, "learning_rate": 7.584054897439369e-07, "loss": 0.276, "step": 32147 }, { "epoch": 1.5059727362158617, "grad_norm": 0.5861579506867921, "learning_rate": 7.582694353750226e-07, "loss": 0.2929, "step": 32148 }, { "epoch": 1.50601958120579, "grad_norm": 0.5606424747939496, "learning_rate": 7.581333910293298e-07, "loss": 0.2517, "step": 32149 }, { "epoch": 1.5060664261957184, "grad_norm": 0.6049323138007031, "learning_rate": 7.579973567076424e-07, "loss": 0.277, "step": 32150 }, { "epoch": 1.5061132711856466, "grad_norm": 0.6212796840710064, "learning_rate": 7.578613324107426e-07, "loss": 0.2836, "step": 32151 }, { "epoch": 1.5061601161755749, "grad_norm": 0.6246147948921089, "learning_rate": 7.577253181394118e-07, "loss": 0.2724, "step": 32152 }, { "epoch": 1.5062069611655033, "grad_norm": 0.6044493310384773, "learning_rate": 7.57589313894434e-07, "loss": 0.2639, "step": 32153 }, { "epoch": 1.5062538061554318, "grad_norm": 0.5272415403954396, "learning_rate": 7.574533196765918e-07, "loss": 0.2469, "step": 32154 }, { "epoch": 1.50630065114536, "grad_norm": 0.9814159351693912, "learning_rate": 7.573173354866678e-07, "loss": 0.292, "step": 32155 }, { "epoch": 1.5063474961352883, "grad_norm": 0.6148055136304964, "learning_rate": 7.571813613254444e-07, "loss": 0.287, "step": 32156 }, { "epoch": 1.5063943411252168, "grad_norm": 0.584124463970157, "learning_rate": 7.570453971937053e-07, "loss": 0.2686, "step": 32157 }, { "epoch": 1.506441186115145, "grad_norm": 0.5785986746919385, "learning_rate": 7.569094430922317e-07, "loss": 0.2583, "step": 32158 }, { "epoch": 1.5064880311050732, "grad_norm": 0.5976030569190566, "learning_rate": 7.567734990218054e-07, "loss": 0.2658, "step": 32159 }, { "epoch": 1.5065348760950017, "grad_norm": 0.6130081746702815, "learning_rate": 7.566375649832091e-07, "loss": 0.2783, "step": 32160 }, { "epoch": 1.50658172108493, "grad_norm": 0.5726349989137263, "learning_rate": 7.56501640977226e-07, "loss": 0.2679, "step": 32161 }, { "epoch": 1.5066285660748582, "grad_norm": 0.6488203982877337, "learning_rate": 7.563657270046373e-07, "loss": 0.2672, "step": 32162 }, { "epoch": 1.5066754110647866, "grad_norm": 0.6413594129225964, "learning_rate": 7.562298230662268e-07, "loss": 0.2703, "step": 32163 }, { "epoch": 1.506722256054715, "grad_norm": 0.6009699063702678, "learning_rate": 7.560939291627742e-07, "loss": 0.2739, "step": 32164 }, { "epoch": 1.5067691010446431, "grad_norm": 0.6135015457516342, "learning_rate": 7.559580452950638e-07, "loss": 0.2746, "step": 32165 }, { "epoch": 1.5068159460345716, "grad_norm": 0.629639997390189, "learning_rate": 7.558221714638756e-07, "loss": 0.2926, "step": 32166 }, { "epoch": 1.5068627910245, "grad_norm": 0.568131454232803, "learning_rate": 7.556863076699925e-07, "loss": 0.2713, "step": 32167 }, { "epoch": 1.5069096360144283, "grad_norm": 0.608721404996823, "learning_rate": 7.555504539141962e-07, "loss": 0.28, "step": 32168 }, { "epoch": 1.5069564810043565, "grad_norm": 0.5960415068150811, "learning_rate": 7.554146101972693e-07, "loss": 0.2848, "step": 32169 }, { "epoch": 1.507003325994285, "grad_norm": 0.582499521241663, "learning_rate": 7.55278776519992e-07, "loss": 0.2701, "step": 32170 }, { "epoch": 1.5070501709842132, "grad_norm": 0.6827886112900009, "learning_rate": 7.55142952883147e-07, "loss": 0.2799, "step": 32171 }, { "epoch": 1.5070970159741415, "grad_norm": 0.6097373765794517, "learning_rate": 7.550071392875166e-07, "loss": 0.2658, "step": 32172 }, { "epoch": 1.50714386096407, "grad_norm": 0.6276938710933553, "learning_rate": 7.548713357338805e-07, "loss": 0.2901, "step": 32173 }, { "epoch": 1.5071907059539982, "grad_norm": 0.6083789202924729, "learning_rate": 7.547355422230216e-07, "loss": 0.2679, "step": 32174 }, { "epoch": 1.5072375509439264, "grad_norm": 0.5852000187528137, "learning_rate": 7.545997587557213e-07, "loss": 0.2672, "step": 32175 }, { "epoch": 1.5072843959338549, "grad_norm": 0.6094954563714134, "learning_rate": 7.544639853327601e-07, "loss": 0.2664, "step": 32176 }, { "epoch": 1.5073312409237833, "grad_norm": 0.6052963887081858, "learning_rate": 7.543282219549203e-07, "loss": 0.2717, "step": 32177 }, { "epoch": 1.5073780859137114, "grad_norm": 0.5712503507336434, "learning_rate": 7.541924686229832e-07, "loss": 0.2741, "step": 32178 }, { "epoch": 1.5074249309036398, "grad_norm": 0.5894099727120272, "learning_rate": 7.54056725337729e-07, "loss": 0.2727, "step": 32179 }, { "epoch": 1.5074717758935683, "grad_norm": 0.5975308596313039, "learning_rate": 7.539209920999393e-07, "loss": 0.2792, "step": 32180 }, { "epoch": 1.5075186208834965, "grad_norm": 0.5806270249923783, "learning_rate": 7.537852689103966e-07, "loss": 0.2408, "step": 32181 }, { "epoch": 1.5075654658734248, "grad_norm": 0.5943739972826405, "learning_rate": 7.536495557698797e-07, "loss": 0.2603, "step": 32182 }, { "epoch": 1.5076123108633532, "grad_norm": 0.6101138898340296, "learning_rate": 7.535138526791707e-07, "loss": 0.2842, "step": 32183 }, { "epoch": 1.5076591558532815, "grad_norm": 0.5576700100993123, "learning_rate": 7.533781596390507e-07, "loss": 0.266, "step": 32184 }, { "epoch": 1.5077060008432097, "grad_norm": 0.566425741397041, "learning_rate": 7.53242476650301e-07, "loss": 0.2601, "step": 32185 }, { "epoch": 1.5077528458331382, "grad_norm": 0.6254111864298479, "learning_rate": 7.531068037137012e-07, "loss": 0.2908, "step": 32186 }, { "epoch": 1.5077996908230664, "grad_norm": 0.5747489851553089, "learning_rate": 7.529711408300333e-07, "loss": 0.2779, "step": 32187 }, { "epoch": 1.5078465358129947, "grad_norm": 0.6087421213675982, "learning_rate": 7.528354880000769e-07, "loss": 0.2667, "step": 32188 }, { "epoch": 1.5078933808029231, "grad_norm": 0.6431456418611358, "learning_rate": 7.526998452246129e-07, "loss": 0.2885, "step": 32189 }, { "epoch": 1.5079402257928516, "grad_norm": 0.5949087212875597, "learning_rate": 7.525642125044222e-07, "loss": 0.2581, "step": 32190 }, { "epoch": 1.5079870707827798, "grad_norm": 0.6090008738046081, "learning_rate": 7.52428589840285e-07, "loss": 0.2882, "step": 32191 }, { "epoch": 1.508033915772708, "grad_norm": 0.6252650283158789, "learning_rate": 7.522929772329832e-07, "loss": 0.2857, "step": 32192 }, { "epoch": 1.5080807607626365, "grad_norm": 0.631391002843271, "learning_rate": 7.52157374683295e-07, "loss": 0.2698, "step": 32193 }, { "epoch": 1.5081276057525648, "grad_norm": 0.6242964920127096, "learning_rate": 7.520217821920026e-07, "loss": 0.2637, "step": 32194 }, { "epoch": 1.508174450742493, "grad_norm": 0.6339340834394604, "learning_rate": 7.518861997598848e-07, "loss": 0.2755, "step": 32195 }, { "epoch": 1.5082212957324215, "grad_norm": 0.6141047091127971, "learning_rate": 7.517506273877225e-07, "loss": 0.2894, "step": 32196 }, { "epoch": 1.5082681407223497, "grad_norm": 0.5945649027527852, "learning_rate": 7.51615065076296e-07, "loss": 0.2628, "step": 32197 }, { "epoch": 1.508314985712278, "grad_norm": 0.5973525220217, "learning_rate": 7.514795128263852e-07, "loss": 0.2754, "step": 32198 }, { "epoch": 1.5083618307022064, "grad_norm": 0.571923223981887, "learning_rate": 7.513439706387712e-07, "loss": 0.2609, "step": 32199 }, { "epoch": 1.5084086756921349, "grad_norm": 0.5945104279820098, "learning_rate": 7.512084385142332e-07, "loss": 0.2845, "step": 32200 }, { "epoch": 1.508455520682063, "grad_norm": 0.601224484383382, "learning_rate": 7.510729164535505e-07, "loss": 0.2732, "step": 32201 }, { "epoch": 1.5085023656719914, "grad_norm": 0.5851338384031728, "learning_rate": 7.509374044575033e-07, "loss": 0.2841, "step": 32202 }, { "epoch": 1.5085492106619198, "grad_norm": 0.6372433060618368, "learning_rate": 7.508019025268717e-07, "loss": 0.2943, "step": 32203 }, { "epoch": 1.508596055651848, "grad_norm": 0.5973965770853945, "learning_rate": 7.506664106624357e-07, "loss": 0.2628, "step": 32204 }, { "epoch": 1.5086429006417763, "grad_norm": 0.5619923632151592, "learning_rate": 7.505309288649748e-07, "loss": 0.2552, "step": 32205 }, { "epoch": 1.5086897456317048, "grad_norm": 0.572551093119471, "learning_rate": 7.503954571352692e-07, "loss": 0.2627, "step": 32206 }, { "epoch": 1.508736590621633, "grad_norm": 0.5744306698760695, "learning_rate": 7.502599954740983e-07, "loss": 0.2763, "step": 32207 }, { "epoch": 1.5087834356115613, "grad_norm": 0.621035583615824, "learning_rate": 7.501245438822405e-07, "loss": 0.2783, "step": 32208 }, { "epoch": 1.5088302806014897, "grad_norm": 0.5497058860888868, "learning_rate": 7.49989102360476e-07, "loss": 0.2509, "step": 32209 }, { "epoch": 1.508877125591418, "grad_norm": 0.51871986286339, "learning_rate": 7.498536709095846e-07, "loss": 0.2291, "step": 32210 }, { "epoch": 1.5089239705813462, "grad_norm": 0.6548331589482892, "learning_rate": 7.497182495303453e-07, "loss": 0.2662, "step": 32211 }, { "epoch": 1.5089708155712747, "grad_norm": 0.6099963030697598, "learning_rate": 7.495828382235384e-07, "loss": 0.2787, "step": 32212 }, { "epoch": 1.5090176605612031, "grad_norm": 0.6106648627236162, "learning_rate": 7.494474369899416e-07, "loss": 0.27, "step": 32213 }, { "epoch": 1.5090645055511311, "grad_norm": 0.598432313012487, "learning_rate": 7.493120458303354e-07, "loss": 0.27, "step": 32214 }, { "epoch": 1.5091113505410596, "grad_norm": 0.5528678524285235, "learning_rate": 7.491766647454976e-07, "loss": 0.2644, "step": 32215 }, { "epoch": 1.509158195530988, "grad_norm": 0.5731655639074443, "learning_rate": 7.490412937362082e-07, "loss": 0.2599, "step": 32216 }, { "epoch": 1.5092050405209163, "grad_norm": 0.5774714426132416, "learning_rate": 7.489059328032459e-07, "loss": 0.267, "step": 32217 }, { "epoch": 1.5092518855108445, "grad_norm": 0.5992798459990268, "learning_rate": 7.487705819473906e-07, "loss": 0.2741, "step": 32218 }, { "epoch": 1.509298730500773, "grad_norm": 0.5544807607494899, "learning_rate": 7.486352411694198e-07, "loss": 0.2544, "step": 32219 }, { "epoch": 1.5093455754907013, "grad_norm": 0.5586340704315965, "learning_rate": 7.484999104701129e-07, "loss": 0.2478, "step": 32220 }, { "epoch": 1.5093924204806295, "grad_norm": 0.5612445803290915, "learning_rate": 7.483645898502497e-07, "loss": 0.2437, "step": 32221 }, { "epoch": 1.509439265470558, "grad_norm": 0.5605671854270274, "learning_rate": 7.482292793106072e-07, "loss": 0.2653, "step": 32222 }, { "epoch": 1.5094861104604862, "grad_norm": 0.599320845434133, "learning_rate": 7.480939788519647e-07, "loss": 0.2764, "step": 32223 }, { "epoch": 1.5095329554504144, "grad_norm": 0.6120369976910283, "learning_rate": 7.479586884751022e-07, "loss": 0.2766, "step": 32224 }, { "epoch": 1.509579800440343, "grad_norm": 0.581307658842203, "learning_rate": 7.478234081807962e-07, "loss": 0.2708, "step": 32225 }, { "epoch": 1.5096266454302714, "grad_norm": 0.6265776162536276, "learning_rate": 7.476881379698258e-07, "loss": 0.3038, "step": 32226 }, { "epoch": 1.5096734904201996, "grad_norm": 0.5604437515912406, "learning_rate": 7.475528778429708e-07, "loss": 0.2577, "step": 32227 }, { "epoch": 1.5097203354101278, "grad_norm": 0.6422515177442589, "learning_rate": 7.474176278010075e-07, "loss": 0.2937, "step": 32228 }, { "epoch": 1.5097671804000563, "grad_norm": 0.621536570847989, "learning_rate": 7.472823878447156e-07, "loss": 0.2727, "step": 32229 }, { "epoch": 1.5098140253899845, "grad_norm": 0.5788678364964318, "learning_rate": 7.471471579748735e-07, "loss": 0.2692, "step": 32230 }, { "epoch": 1.5098608703799128, "grad_norm": 0.5987662944287555, "learning_rate": 7.470119381922583e-07, "loss": 0.2832, "step": 32231 }, { "epoch": 1.5099077153698413, "grad_norm": 0.631620057164692, "learning_rate": 7.468767284976489e-07, "loss": 0.2861, "step": 32232 }, { "epoch": 1.5099545603597695, "grad_norm": 0.6218936198822897, "learning_rate": 7.467415288918231e-07, "loss": 0.2835, "step": 32233 }, { "epoch": 1.5100014053496977, "grad_norm": 0.5449022555844517, "learning_rate": 7.466063393755599e-07, "loss": 0.2583, "step": 32234 }, { "epoch": 1.5100482503396262, "grad_norm": 0.5775145850337167, "learning_rate": 7.464711599496358e-07, "loss": 0.2738, "step": 32235 }, { "epoch": 1.5100950953295547, "grad_norm": 0.5982183499614084, "learning_rate": 7.463359906148305e-07, "loss": 0.2686, "step": 32236 }, { "epoch": 1.5101419403194827, "grad_norm": 0.6258137493618577, "learning_rate": 7.462008313719197e-07, "loss": 0.2683, "step": 32237 }, { "epoch": 1.5101887853094111, "grad_norm": 0.593781033995096, "learning_rate": 7.460656822216821e-07, "loss": 0.2592, "step": 32238 }, { "epoch": 1.5102356302993396, "grad_norm": 0.5659053278017594, "learning_rate": 7.459305431648961e-07, "loss": 0.2467, "step": 32239 }, { "epoch": 1.5102824752892678, "grad_norm": 0.556396339940828, "learning_rate": 7.457954142023388e-07, "loss": 0.2522, "step": 32240 }, { "epoch": 1.510329320279196, "grad_norm": 0.5798700291695192, "learning_rate": 7.456602953347888e-07, "loss": 0.273, "step": 32241 }, { "epoch": 1.5103761652691245, "grad_norm": 0.604635809124549, "learning_rate": 7.455251865630228e-07, "loss": 0.2786, "step": 32242 }, { "epoch": 1.5104230102590528, "grad_norm": 0.596962455170375, "learning_rate": 7.453900878878176e-07, "loss": 0.2763, "step": 32243 }, { "epoch": 1.510469855248981, "grad_norm": 0.5861278966989513, "learning_rate": 7.452549993099517e-07, "loss": 0.26, "step": 32244 }, { "epoch": 1.5105167002389095, "grad_norm": 0.59198264404077, "learning_rate": 7.451199208302018e-07, "loss": 0.2862, "step": 32245 }, { "epoch": 1.5105635452288377, "grad_norm": 0.5985710275012428, "learning_rate": 7.449848524493461e-07, "loss": 0.2731, "step": 32246 }, { "epoch": 1.510610390218766, "grad_norm": 0.6380803330827095, "learning_rate": 7.448497941681612e-07, "loss": 0.2801, "step": 32247 }, { "epoch": 1.5106572352086944, "grad_norm": 0.6631220453670277, "learning_rate": 7.447147459874254e-07, "loss": 0.2847, "step": 32248 }, { "epoch": 1.510704080198623, "grad_norm": 0.6333113423212402, "learning_rate": 7.44579707907915e-07, "loss": 0.2846, "step": 32249 }, { "epoch": 1.510750925188551, "grad_norm": 0.5674405503868216, "learning_rate": 7.444446799304067e-07, "loss": 0.252, "step": 32250 }, { "epoch": 1.5107977701784794, "grad_norm": 0.6200265147514715, "learning_rate": 7.443096620556778e-07, "loss": 0.2801, "step": 32251 }, { "epoch": 1.5108446151684078, "grad_norm": 0.6266037161241419, "learning_rate": 7.441746542845055e-07, "loss": 0.2673, "step": 32252 }, { "epoch": 1.510891460158336, "grad_norm": 0.573350799375114, "learning_rate": 7.44039656617667e-07, "loss": 0.2576, "step": 32253 }, { "epoch": 1.5109383051482643, "grad_norm": 0.5766706592907807, "learning_rate": 7.439046690559393e-07, "loss": 0.2751, "step": 32254 }, { "epoch": 1.5109851501381928, "grad_norm": 0.6090144995938679, "learning_rate": 7.437696916000983e-07, "loss": 0.2755, "step": 32255 }, { "epoch": 1.511031995128121, "grad_norm": 0.59152639921202, "learning_rate": 7.43634724250922e-07, "loss": 0.2716, "step": 32256 }, { "epoch": 1.5110788401180493, "grad_norm": 0.568192389438318, "learning_rate": 7.434997670091859e-07, "loss": 0.2577, "step": 32257 }, { "epoch": 1.5111256851079777, "grad_norm": 0.593315064921107, "learning_rate": 7.433648198756668e-07, "loss": 0.2713, "step": 32258 }, { "epoch": 1.511172530097906, "grad_norm": 0.6066328849874215, "learning_rate": 7.432298828511419e-07, "loss": 0.2683, "step": 32259 }, { "epoch": 1.5112193750878342, "grad_norm": 0.6220605417874578, "learning_rate": 7.430949559363876e-07, "loss": 0.2902, "step": 32260 }, { "epoch": 1.5112662200777627, "grad_norm": 0.5624985417678389, "learning_rate": 7.429600391321807e-07, "loss": 0.2745, "step": 32261 }, { "epoch": 1.5113130650676911, "grad_norm": 0.5792094913764496, "learning_rate": 7.428251324392965e-07, "loss": 0.2624, "step": 32262 }, { "epoch": 1.5113599100576194, "grad_norm": 0.5735999544743323, "learning_rate": 7.426902358585128e-07, "loss": 0.2677, "step": 32263 }, { "epoch": 1.5114067550475476, "grad_norm": 0.5980950881002497, "learning_rate": 7.425553493906043e-07, "loss": 0.2662, "step": 32264 }, { "epoch": 1.511453600037476, "grad_norm": 0.5997762213533744, "learning_rate": 7.424204730363482e-07, "loss": 0.2651, "step": 32265 }, { "epoch": 1.5115004450274043, "grad_norm": 0.5832413717120554, "learning_rate": 7.422856067965203e-07, "loss": 0.2715, "step": 32266 }, { "epoch": 1.5115472900173326, "grad_norm": 0.5599156257877503, "learning_rate": 7.421507506718978e-07, "loss": 0.2534, "step": 32267 }, { "epoch": 1.511594135007261, "grad_norm": 0.6435518038038661, "learning_rate": 7.420159046632552e-07, "loss": 0.27, "step": 32268 }, { "epoch": 1.5116409799971893, "grad_norm": 0.6100770492451847, "learning_rate": 7.418810687713693e-07, "loss": 0.2658, "step": 32269 }, { "epoch": 1.5116878249871175, "grad_norm": 0.6096534473518234, "learning_rate": 7.417462429970165e-07, "loss": 0.2747, "step": 32270 }, { "epoch": 1.511734669977046, "grad_norm": 0.5932954558977636, "learning_rate": 7.416114273409716e-07, "loss": 0.273, "step": 32271 }, { "epoch": 1.5117815149669744, "grad_norm": 0.5940737695052283, "learning_rate": 7.414766218040109e-07, "loss": 0.2728, "step": 32272 }, { "epoch": 1.5118283599569025, "grad_norm": 0.5578254830281828, "learning_rate": 7.41341826386911e-07, "loss": 0.2516, "step": 32273 }, { "epoch": 1.511875204946831, "grad_norm": 0.5974215404754138, "learning_rate": 7.41207041090446e-07, "loss": 0.2758, "step": 32274 }, { "epoch": 1.5119220499367594, "grad_norm": 0.5934924691179543, "learning_rate": 7.410722659153927e-07, "loss": 0.2782, "step": 32275 }, { "epoch": 1.5119688949266876, "grad_norm": 0.6061781611480249, "learning_rate": 7.409375008625269e-07, "loss": 0.2708, "step": 32276 }, { "epoch": 1.5120157399166159, "grad_norm": 0.5938597326709347, "learning_rate": 7.408027459326231e-07, "loss": 0.2789, "step": 32277 }, { "epoch": 1.5120625849065443, "grad_norm": 0.6068014020305688, "learning_rate": 7.406680011264572e-07, "loss": 0.2689, "step": 32278 }, { "epoch": 1.5121094298964726, "grad_norm": 0.57485656520986, "learning_rate": 7.405332664448053e-07, "loss": 0.2655, "step": 32279 }, { "epoch": 1.5121562748864008, "grad_norm": 0.5724723200517428, "learning_rate": 7.403985418884419e-07, "loss": 0.2695, "step": 32280 }, { "epoch": 1.5122031198763293, "grad_norm": 0.5856390184211671, "learning_rate": 7.402638274581425e-07, "loss": 0.2776, "step": 32281 }, { "epoch": 1.5122499648662575, "grad_norm": 0.584631105667858, "learning_rate": 7.401291231546823e-07, "loss": 0.2659, "step": 32282 }, { "epoch": 1.5122968098561858, "grad_norm": 0.5964965055796451, "learning_rate": 7.399944289788378e-07, "loss": 0.267, "step": 32283 }, { "epoch": 1.5123436548461142, "grad_norm": 0.5740251535452255, "learning_rate": 7.398597449313818e-07, "loss": 0.2747, "step": 32284 }, { "epoch": 1.5123904998360427, "grad_norm": 0.6378664595985001, "learning_rate": 7.397250710130915e-07, "loss": 0.2763, "step": 32285 }, { "epoch": 1.5124373448259707, "grad_norm": 0.5815927958585008, "learning_rate": 7.395904072247403e-07, "loss": 0.2728, "step": 32286 }, { "epoch": 1.5124841898158992, "grad_norm": 0.6510833628713796, "learning_rate": 7.39455753567104e-07, "loss": 0.2813, "step": 32287 }, { "epoch": 1.5125310348058276, "grad_norm": 0.579730721690065, "learning_rate": 7.39321110040957e-07, "loss": 0.2689, "step": 32288 }, { "epoch": 1.5125778797957559, "grad_norm": 0.6037357474032717, "learning_rate": 7.391864766470746e-07, "loss": 0.2541, "step": 32289 }, { "epoch": 1.512624724785684, "grad_norm": 0.6233747274321053, "learning_rate": 7.390518533862323e-07, "loss": 0.29, "step": 32290 }, { "epoch": 1.5126715697756126, "grad_norm": 0.6171558436815675, "learning_rate": 7.38917240259204e-07, "loss": 0.2906, "step": 32291 }, { "epoch": 1.5127184147655408, "grad_norm": 0.6534195935522028, "learning_rate": 7.387826372667636e-07, "loss": 0.2786, "step": 32292 }, { "epoch": 1.512765259755469, "grad_norm": 0.5443961631878677, "learning_rate": 7.386480444096863e-07, "loss": 0.2694, "step": 32293 }, { "epoch": 1.5128121047453975, "grad_norm": 0.5804076761647381, "learning_rate": 7.385134616887471e-07, "loss": 0.272, "step": 32294 }, { "epoch": 1.5128589497353258, "grad_norm": 0.645621065950226, "learning_rate": 7.3837888910472e-07, "loss": 0.2785, "step": 32295 }, { "epoch": 1.512905794725254, "grad_norm": 0.6052757291850127, "learning_rate": 7.382443266583794e-07, "loss": 0.2927, "step": 32296 }, { "epoch": 1.5129526397151825, "grad_norm": 0.6165971389021776, "learning_rate": 7.381097743505011e-07, "loss": 0.2834, "step": 32297 }, { "epoch": 1.512999484705111, "grad_norm": 0.5692839072839407, "learning_rate": 7.379752321818579e-07, "loss": 0.265, "step": 32298 }, { "epoch": 1.5130463296950392, "grad_norm": 0.6092433789747812, "learning_rate": 7.378407001532237e-07, "loss": 0.2809, "step": 32299 }, { "epoch": 1.5130931746849674, "grad_norm": 0.6254022567110522, "learning_rate": 7.377061782653733e-07, "loss": 0.2766, "step": 32300 }, { "epoch": 1.5131400196748959, "grad_norm": 0.6229886484348867, "learning_rate": 7.37571666519081e-07, "loss": 0.2902, "step": 32301 }, { "epoch": 1.513186864664824, "grad_norm": 0.5745087039823137, "learning_rate": 7.374371649151208e-07, "loss": 0.2735, "step": 32302 }, { "epoch": 1.5132337096547523, "grad_norm": 0.6196026305941101, "learning_rate": 7.373026734542673e-07, "loss": 0.2811, "step": 32303 }, { "epoch": 1.5132805546446808, "grad_norm": 0.5888746721163808, "learning_rate": 7.371681921372934e-07, "loss": 0.2543, "step": 32304 }, { "epoch": 1.513327399634609, "grad_norm": 0.6190412845794714, "learning_rate": 7.370337209649742e-07, "loss": 0.2696, "step": 32305 }, { "epoch": 1.5133742446245373, "grad_norm": 0.618892299686632, "learning_rate": 7.36899259938082e-07, "loss": 0.2789, "step": 32306 }, { "epoch": 1.5134210896144658, "grad_norm": 0.6012343981023487, "learning_rate": 7.367648090573915e-07, "loss": 0.2727, "step": 32307 }, { "epoch": 1.5134679346043942, "grad_norm": 0.6794757287442788, "learning_rate": 7.366303683236761e-07, "loss": 0.2427, "step": 32308 }, { "epoch": 1.5135147795943222, "grad_norm": 0.6532799183220005, "learning_rate": 7.364959377377107e-07, "loss": 0.294, "step": 32309 }, { "epoch": 1.5135616245842507, "grad_norm": 0.6147239911763157, "learning_rate": 7.363615173002669e-07, "loss": 0.2581, "step": 32310 }, { "epoch": 1.5136084695741792, "grad_norm": 0.5758977685789117, "learning_rate": 7.362271070121197e-07, "loss": 0.2697, "step": 32311 }, { "epoch": 1.5136553145641074, "grad_norm": 0.6014791525315244, "learning_rate": 7.360927068740429e-07, "loss": 0.274, "step": 32312 }, { "epoch": 1.5137021595540356, "grad_norm": 0.5661635422291049, "learning_rate": 7.359583168868081e-07, "loss": 0.2613, "step": 32313 }, { "epoch": 1.513749004543964, "grad_norm": 0.6114060002848354, "learning_rate": 7.358239370511902e-07, "loss": 0.2854, "step": 32314 }, { "epoch": 1.5137958495338923, "grad_norm": 0.5472441143477143, "learning_rate": 7.356895673679626e-07, "loss": 0.2453, "step": 32315 }, { "epoch": 1.5138426945238206, "grad_norm": 0.5769910785911716, "learning_rate": 7.355552078378975e-07, "loss": 0.267, "step": 32316 }, { "epoch": 1.513889539513749, "grad_norm": 0.601445046367712, "learning_rate": 7.354208584617686e-07, "loss": 0.2707, "step": 32317 }, { "epoch": 1.5139363845036773, "grad_norm": 0.5708813895697513, "learning_rate": 7.352865192403499e-07, "loss": 0.261, "step": 32318 }, { "epoch": 1.5139832294936055, "grad_norm": 0.6061922189782193, "learning_rate": 7.351521901744133e-07, "loss": 0.2896, "step": 32319 }, { "epoch": 1.514030074483534, "grad_norm": 0.5853567667920604, "learning_rate": 7.350178712647319e-07, "loss": 0.2609, "step": 32320 }, { "epoch": 1.5140769194734625, "grad_norm": 0.5621834066152027, "learning_rate": 7.3488356251208e-07, "loss": 0.2648, "step": 32321 }, { "epoch": 1.5141237644633905, "grad_norm": 0.5944756180658046, "learning_rate": 7.347492639172288e-07, "loss": 0.2623, "step": 32322 }, { "epoch": 1.514170609453319, "grad_norm": 0.6108063499805837, "learning_rate": 7.346149754809517e-07, "loss": 0.2868, "step": 32323 }, { "epoch": 1.5142174544432474, "grad_norm": 0.5934184790027861, "learning_rate": 7.34480697204022e-07, "loss": 0.265, "step": 32324 }, { "epoch": 1.5142642994331756, "grad_norm": 0.5935106464657726, "learning_rate": 7.343464290872126e-07, "loss": 0.2689, "step": 32325 }, { "epoch": 1.5143111444231039, "grad_norm": 0.6126048178543709, "learning_rate": 7.34212171131295e-07, "loss": 0.2859, "step": 32326 }, { "epoch": 1.5143579894130323, "grad_norm": 0.5989932191803868, "learning_rate": 7.340779233370426e-07, "loss": 0.2628, "step": 32327 }, { "epoch": 1.5144048344029606, "grad_norm": 0.6058032657153035, "learning_rate": 7.339436857052288e-07, "loss": 0.2564, "step": 32328 }, { "epoch": 1.5144516793928888, "grad_norm": 0.586659558178631, "learning_rate": 7.338094582366242e-07, "loss": 0.2713, "step": 32329 }, { "epoch": 1.5144985243828173, "grad_norm": 0.586726535529272, "learning_rate": 7.336752409320025e-07, "loss": 0.2717, "step": 32330 }, { "epoch": 1.5145453693727455, "grad_norm": 0.55560076565589, "learning_rate": 7.335410337921358e-07, "loss": 0.27, "step": 32331 }, { "epoch": 1.5145922143626738, "grad_norm": 0.6332286412284288, "learning_rate": 7.334068368177971e-07, "loss": 0.2809, "step": 32332 }, { "epoch": 1.5146390593526022, "grad_norm": 0.6061919217728566, "learning_rate": 7.332726500097573e-07, "loss": 0.2726, "step": 32333 }, { "epoch": 1.5146859043425307, "grad_norm": 0.6039480468138957, "learning_rate": 7.331384733687901e-07, "loss": 0.2656, "step": 32334 }, { "epoch": 1.514732749332459, "grad_norm": 0.5841549044473062, "learning_rate": 7.330043068956664e-07, "loss": 0.2669, "step": 32335 }, { "epoch": 1.5147795943223872, "grad_norm": 0.6269548296172469, "learning_rate": 7.328701505911584e-07, "loss": 0.2754, "step": 32336 }, { "epoch": 1.5148264393123156, "grad_norm": 0.6217844834673326, "learning_rate": 7.327360044560389e-07, "loss": 0.2688, "step": 32337 }, { "epoch": 1.5148732843022439, "grad_norm": 0.6452205068968038, "learning_rate": 7.326018684910793e-07, "loss": 0.2988, "step": 32338 }, { "epoch": 1.5149201292921721, "grad_norm": 0.573448456141488, "learning_rate": 7.324677426970525e-07, "loss": 0.2697, "step": 32339 }, { "epoch": 1.5149669742821006, "grad_norm": 0.5770128542736073, "learning_rate": 7.323336270747297e-07, "loss": 0.2793, "step": 32340 }, { "epoch": 1.5150138192720288, "grad_norm": 0.5891635941250771, "learning_rate": 7.321995216248817e-07, "loss": 0.255, "step": 32341 }, { "epoch": 1.515060664261957, "grad_norm": 0.6223895951390123, "learning_rate": 7.32065426348281e-07, "loss": 0.2793, "step": 32342 }, { "epoch": 1.5151075092518855, "grad_norm": 0.6183409800010374, "learning_rate": 7.319313412456996e-07, "loss": 0.2786, "step": 32343 }, { "epoch": 1.515154354241814, "grad_norm": 0.5648244751935562, "learning_rate": 7.31797266317909e-07, "loss": 0.2648, "step": 32344 }, { "epoch": 1.515201199231742, "grad_norm": 0.5948749500020163, "learning_rate": 7.316632015656805e-07, "loss": 0.2807, "step": 32345 }, { "epoch": 1.5152480442216705, "grad_norm": 0.5848668821753529, "learning_rate": 7.315291469897868e-07, "loss": 0.2697, "step": 32346 }, { "epoch": 1.515294889211599, "grad_norm": 0.6119127317196934, "learning_rate": 7.313951025909982e-07, "loss": 0.2797, "step": 32347 }, { "epoch": 1.5153417342015272, "grad_norm": 0.5761811916389665, "learning_rate": 7.312610683700857e-07, "loss": 0.2703, "step": 32348 }, { "epoch": 1.5153885791914554, "grad_norm": 0.6105763807435537, "learning_rate": 7.311270443278213e-07, "loss": 0.2679, "step": 32349 }, { "epoch": 1.5154354241813839, "grad_norm": 0.5764677547772836, "learning_rate": 7.309930304649757e-07, "loss": 0.2685, "step": 32350 }, { "epoch": 1.5154822691713121, "grad_norm": 0.5662798018790403, "learning_rate": 7.30859026782321e-07, "loss": 0.2589, "step": 32351 }, { "epoch": 1.5155291141612404, "grad_norm": 0.5973968820040403, "learning_rate": 7.307250332806285e-07, "loss": 0.2759, "step": 32352 }, { "epoch": 1.5155759591511688, "grad_norm": 0.5954392248165316, "learning_rate": 7.30591049960668e-07, "loss": 0.2737, "step": 32353 }, { "epoch": 1.515622804141097, "grad_norm": 0.6259040992820606, "learning_rate": 7.304570768232122e-07, "loss": 0.2732, "step": 32354 }, { "epoch": 1.5156696491310253, "grad_norm": 0.5746141752188803, "learning_rate": 7.303231138690301e-07, "loss": 0.2699, "step": 32355 }, { "epoch": 1.5157164941209538, "grad_norm": 0.5698261276800005, "learning_rate": 7.301891610988939e-07, "loss": 0.2729, "step": 32356 }, { "epoch": 1.5157633391108822, "grad_norm": 0.6216644708025757, "learning_rate": 7.300552185135743e-07, "loss": 0.2694, "step": 32357 }, { "epoch": 1.5158101841008103, "grad_norm": 0.6038161319341208, "learning_rate": 7.299212861138427e-07, "loss": 0.2708, "step": 32358 }, { "epoch": 1.5158570290907387, "grad_norm": 0.6122971074783234, "learning_rate": 7.297873639004685e-07, "loss": 0.2872, "step": 32359 }, { "epoch": 1.5159038740806672, "grad_norm": 0.5448067340896683, "learning_rate": 7.296534518742229e-07, "loss": 0.2549, "step": 32360 }, { "epoch": 1.5159507190705954, "grad_norm": 0.6397847564644494, "learning_rate": 7.295195500358779e-07, "loss": 0.2886, "step": 32361 }, { "epoch": 1.5159975640605237, "grad_norm": 0.5993883965755035, "learning_rate": 7.293856583862019e-07, "loss": 0.2752, "step": 32362 }, { "epoch": 1.5160444090504521, "grad_norm": 0.6208219992459303, "learning_rate": 7.292517769259663e-07, "loss": 0.2907, "step": 32363 }, { "epoch": 1.5160912540403804, "grad_norm": 0.6158959708232619, "learning_rate": 7.291179056559425e-07, "loss": 0.2679, "step": 32364 }, { "epoch": 1.5161380990303086, "grad_norm": 0.61079138431585, "learning_rate": 7.289840445768995e-07, "loss": 0.2786, "step": 32365 }, { "epoch": 1.516184944020237, "grad_norm": 0.683851105820028, "learning_rate": 7.288501936896078e-07, "loss": 0.2916, "step": 32366 }, { "epoch": 1.5162317890101653, "grad_norm": 0.5968410570350604, "learning_rate": 7.287163529948394e-07, "loss": 0.2736, "step": 32367 }, { "epoch": 1.5162786340000936, "grad_norm": 0.5807813427250401, "learning_rate": 7.285825224933618e-07, "loss": 0.261, "step": 32368 }, { "epoch": 1.516325478990022, "grad_norm": 0.6001312442298087, "learning_rate": 7.284487021859469e-07, "loss": 0.2721, "step": 32369 }, { "epoch": 1.5163723239799505, "grad_norm": 0.567962064072008, "learning_rate": 7.283148920733651e-07, "loss": 0.2517, "step": 32370 }, { "epoch": 1.5164191689698787, "grad_norm": 0.6233777343968938, "learning_rate": 7.281810921563854e-07, "loss": 0.2732, "step": 32371 }, { "epoch": 1.516466013959807, "grad_norm": 0.6276943216232561, "learning_rate": 7.280473024357776e-07, "loss": 0.2892, "step": 32372 }, { "epoch": 1.5165128589497354, "grad_norm": 0.5911528477276996, "learning_rate": 7.279135229123127e-07, "loss": 0.2724, "step": 32373 }, { "epoch": 1.5165597039396637, "grad_norm": 0.6333385628500147, "learning_rate": 7.277797535867607e-07, "loss": 0.2829, "step": 32374 }, { "epoch": 1.516606548929592, "grad_norm": 0.5918873875263058, "learning_rate": 7.276459944598899e-07, "loss": 0.27, "step": 32375 }, { "epoch": 1.5166533939195204, "grad_norm": 0.6117745692065684, "learning_rate": 7.275122455324718e-07, "loss": 0.2888, "step": 32376 }, { "epoch": 1.5167002389094486, "grad_norm": 0.621871279257186, "learning_rate": 7.273785068052744e-07, "loss": 0.2903, "step": 32377 }, { "epoch": 1.5167470838993768, "grad_norm": 0.5595278423536746, "learning_rate": 7.272447782790681e-07, "loss": 0.259, "step": 32378 }, { "epoch": 1.5167939288893053, "grad_norm": 0.5884222876321016, "learning_rate": 7.271110599546227e-07, "loss": 0.2824, "step": 32379 }, { "epoch": 1.5168407738792338, "grad_norm": 0.5478106937143457, "learning_rate": 7.269773518327075e-07, "loss": 0.2567, "step": 32380 }, { "epoch": 1.5168876188691618, "grad_norm": 0.6153927394223273, "learning_rate": 7.268436539140927e-07, "loss": 0.276, "step": 32381 }, { "epoch": 1.5169344638590903, "grad_norm": 0.6504060425271849, "learning_rate": 7.267099661995469e-07, "loss": 0.2775, "step": 32382 }, { "epoch": 1.5169813088490187, "grad_norm": 0.5649216853263053, "learning_rate": 7.265762886898389e-07, "loss": 0.2666, "step": 32383 }, { "epoch": 1.517028153838947, "grad_norm": 0.5946161986444718, "learning_rate": 7.264426213857387e-07, "loss": 0.2731, "step": 32384 }, { "epoch": 1.5170749988288752, "grad_norm": 0.5980707222709721, "learning_rate": 7.263089642880156e-07, "loss": 0.2669, "step": 32385 }, { "epoch": 1.5171218438188037, "grad_norm": 0.6001225072159088, "learning_rate": 7.261753173974384e-07, "loss": 0.2704, "step": 32386 }, { "epoch": 1.517168688808732, "grad_norm": 0.6026290092906538, "learning_rate": 7.260416807147763e-07, "loss": 0.2742, "step": 32387 }, { "epoch": 1.5172155337986601, "grad_norm": 0.6309783188223385, "learning_rate": 7.259080542407995e-07, "loss": 0.2758, "step": 32388 }, { "epoch": 1.5172623787885886, "grad_norm": 0.6173972977977648, "learning_rate": 7.257744379762757e-07, "loss": 0.2914, "step": 32389 }, { "epoch": 1.5173092237785168, "grad_norm": 0.5802596077992422, "learning_rate": 7.256408319219735e-07, "loss": 0.2896, "step": 32390 }, { "epoch": 1.517356068768445, "grad_norm": 0.5984168201750777, "learning_rate": 7.255072360786622e-07, "loss": 0.2752, "step": 32391 }, { "epoch": 1.5174029137583736, "grad_norm": 0.5768000726359688, "learning_rate": 7.25373650447111e-07, "loss": 0.2664, "step": 32392 }, { "epoch": 1.517449758748302, "grad_norm": 0.61171226836263, "learning_rate": 7.25240075028088e-07, "loss": 0.2848, "step": 32393 }, { "epoch": 1.51749660373823, "grad_norm": 0.656679759180959, "learning_rate": 7.251065098223625e-07, "loss": 0.2877, "step": 32394 }, { "epoch": 1.5175434487281585, "grad_norm": 0.5849867522677579, "learning_rate": 7.249729548307038e-07, "loss": 0.2696, "step": 32395 }, { "epoch": 1.517590293718087, "grad_norm": 0.5609842853399982, "learning_rate": 7.248394100538794e-07, "loss": 0.2659, "step": 32396 }, { "epoch": 1.5176371387080152, "grad_norm": 0.5989683271045934, "learning_rate": 7.247058754926575e-07, "loss": 0.2606, "step": 32397 }, { "epoch": 1.5176839836979434, "grad_norm": 0.590663851788473, "learning_rate": 7.24572351147807e-07, "loss": 0.278, "step": 32398 }, { "epoch": 1.517730828687872, "grad_norm": 0.6285699585558956, "learning_rate": 7.244388370200963e-07, "loss": 0.2733, "step": 32399 }, { "epoch": 1.5177776736778001, "grad_norm": 0.5804466110879458, "learning_rate": 7.243053331102939e-07, "loss": 0.2653, "step": 32400 }, { "epoch": 1.5178245186677284, "grad_norm": 0.5911791587784352, "learning_rate": 7.241718394191688e-07, "loss": 0.28, "step": 32401 }, { "epoch": 1.5178713636576568, "grad_norm": 0.600419778663002, "learning_rate": 7.240383559474875e-07, "loss": 0.2886, "step": 32402 }, { "epoch": 1.517918208647585, "grad_norm": 0.5801155943230709, "learning_rate": 7.239048826960199e-07, "loss": 0.2739, "step": 32403 }, { "epoch": 1.5179650536375133, "grad_norm": 0.6106064664780467, "learning_rate": 7.237714196655327e-07, "loss": 0.2741, "step": 32404 }, { "epoch": 1.5180118986274418, "grad_norm": 0.6548723524554259, "learning_rate": 7.236379668567944e-07, "loss": 0.2921, "step": 32405 }, { "epoch": 1.5180587436173703, "grad_norm": 0.5652654284686118, "learning_rate": 7.235045242705732e-07, "loss": 0.2728, "step": 32406 }, { "epoch": 1.5181055886072985, "grad_norm": 0.5681803365611686, "learning_rate": 7.233710919076375e-07, "loss": 0.269, "step": 32407 }, { "epoch": 1.5181524335972267, "grad_norm": 0.6145040654514258, "learning_rate": 7.232376697687543e-07, "loss": 0.2907, "step": 32408 }, { "epoch": 1.5181992785871552, "grad_norm": 0.6359666755085366, "learning_rate": 7.231042578546913e-07, "loss": 0.2806, "step": 32409 }, { "epoch": 1.5182461235770834, "grad_norm": 0.5851894609532874, "learning_rate": 7.229708561662177e-07, "loss": 0.2522, "step": 32410 }, { "epoch": 1.5182929685670117, "grad_norm": 0.6468902043058622, "learning_rate": 7.228374647040995e-07, "loss": 0.2846, "step": 32411 }, { "epoch": 1.5183398135569401, "grad_norm": 0.5749907841890388, "learning_rate": 7.227040834691049e-07, "loss": 0.2613, "step": 32412 }, { "epoch": 1.5183866585468684, "grad_norm": 0.567490733989908, "learning_rate": 7.225707124620021e-07, "loss": 0.2678, "step": 32413 }, { "epoch": 1.5184335035367966, "grad_norm": 0.5825349746976454, "learning_rate": 7.224373516835575e-07, "loss": 0.261, "step": 32414 }, { "epoch": 1.518480348526725, "grad_norm": 0.5955150021362656, "learning_rate": 7.223040011345395e-07, "loss": 0.2653, "step": 32415 }, { "epoch": 1.5185271935166536, "grad_norm": 0.5739338688208916, "learning_rate": 7.221706608157156e-07, "loss": 0.2621, "step": 32416 }, { "epoch": 1.5185740385065816, "grad_norm": 0.5720344921455078, "learning_rate": 7.220373307278519e-07, "loss": 0.2621, "step": 32417 }, { "epoch": 1.51862088349651, "grad_norm": 0.6077622200725378, "learning_rate": 7.219040108717168e-07, "loss": 0.2782, "step": 32418 }, { "epoch": 1.5186677284864385, "grad_norm": 0.5635556445074866, "learning_rate": 7.217707012480777e-07, "loss": 0.2517, "step": 32419 }, { "epoch": 1.5187145734763667, "grad_norm": 0.667309946771076, "learning_rate": 7.216374018577005e-07, "loss": 0.2874, "step": 32420 }, { "epoch": 1.518761418466295, "grad_norm": 0.598255852636638, "learning_rate": 7.215041127013533e-07, "loss": 0.2799, "step": 32421 }, { "epoch": 1.5188082634562234, "grad_norm": 0.6296335064347987, "learning_rate": 7.213708337798028e-07, "loss": 0.2542, "step": 32422 }, { "epoch": 1.5188551084461517, "grad_norm": 0.6361649324032818, "learning_rate": 7.212375650938166e-07, "loss": 0.2897, "step": 32423 }, { "epoch": 1.51890195343608, "grad_norm": 0.6282778129603087, "learning_rate": 7.211043066441608e-07, "loss": 0.2904, "step": 32424 }, { "epoch": 1.5189487984260084, "grad_norm": 0.6204903130090826, "learning_rate": 7.209710584316032e-07, "loss": 0.2698, "step": 32425 }, { "epoch": 1.5189956434159366, "grad_norm": 0.6303696129813885, "learning_rate": 7.208378204569092e-07, "loss": 0.2817, "step": 32426 }, { "epoch": 1.5190424884058649, "grad_norm": 0.623121217102233, "learning_rate": 7.207045927208464e-07, "loss": 0.2814, "step": 32427 }, { "epoch": 1.5190893333957933, "grad_norm": 0.5934672335752155, "learning_rate": 7.205713752241816e-07, "loss": 0.2653, "step": 32428 }, { "epoch": 1.5191361783857218, "grad_norm": 0.6222872713642636, "learning_rate": 7.204381679676811e-07, "loss": 0.2854, "step": 32429 }, { "epoch": 1.5191830233756498, "grad_norm": 0.6075763117996695, "learning_rate": 7.203049709521126e-07, "loss": 0.2748, "step": 32430 }, { "epoch": 1.5192298683655783, "grad_norm": 0.6055111528416303, "learning_rate": 7.201717841782416e-07, "loss": 0.2729, "step": 32431 }, { "epoch": 1.5192767133555067, "grad_norm": 0.6016847325296756, "learning_rate": 7.200386076468338e-07, "loss": 0.2719, "step": 32432 }, { "epoch": 1.519323558345435, "grad_norm": 0.5721468805909791, "learning_rate": 7.199054413586564e-07, "loss": 0.2584, "step": 32433 }, { "epoch": 1.5193704033353632, "grad_norm": 0.6210020597100049, "learning_rate": 7.197722853144759e-07, "loss": 0.2557, "step": 32434 }, { "epoch": 1.5194172483252917, "grad_norm": 0.6267635215283176, "learning_rate": 7.196391395150585e-07, "loss": 0.279, "step": 32435 }, { "epoch": 1.51946409331522, "grad_norm": 0.6192976006577116, "learning_rate": 7.195060039611703e-07, "loss": 0.2806, "step": 32436 }, { "epoch": 1.5195109383051482, "grad_norm": 0.5883759979120271, "learning_rate": 7.193728786535784e-07, "loss": 0.2587, "step": 32437 }, { "epoch": 1.5195577832950766, "grad_norm": 0.5674522690352779, "learning_rate": 7.192397635930479e-07, "loss": 0.2621, "step": 32438 }, { "epoch": 1.5196046282850049, "grad_norm": 0.5467139254268293, "learning_rate": 7.191066587803444e-07, "loss": 0.2544, "step": 32439 }, { "epoch": 1.519651473274933, "grad_norm": 0.5962091056807112, "learning_rate": 7.189735642162343e-07, "loss": 0.2762, "step": 32440 }, { "epoch": 1.5196983182648616, "grad_norm": 0.585627917659474, "learning_rate": 7.188404799014836e-07, "loss": 0.2738, "step": 32441 }, { "epoch": 1.51974516325479, "grad_norm": 0.6233781037750626, "learning_rate": 7.187074058368585e-07, "loss": 0.27, "step": 32442 }, { "epoch": 1.5197920082447183, "grad_norm": 0.6038944781107823, "learning_rate": 7.185743420231254e-07, "loss": 0.2656, "step": 32443 }, { "epoch": 1.5198388532346465, "grad_norm": 0.5741130429764846, "learning_rate": 7.184412884610481e-07, "loss": 0.2741, "step": 32444 }, { "epoch": 1.519885698224575, "grad_norm": 0.6565255025229298, "learning_rate": 7.183082451513942e-07, "loss": 0.2828, "step": 32445 }, { "epoch": 1.5199325432145032, "grad_norm": 0.6391534018412033, "learning_rate": 7.181752120949281e-07, "loss": 0.2803, "step": 32446 }, { "epoch": 1.5199793882044315, "grad_norm": 0.6094364714154682, "learning_rate": 7.180421892924156e-07, "loss": 0.2828, "step": 32447 }, { "epoch": 1.52002623319436, "grad_norm": 0.6191392927959811, "learning_rate": 7.179091767446225e-07, "loss": 0.286, "step": 32448 }, { "epoch": 1.5200730781842882, "grad_norm": 0.6217907891096924, "learning_rate": 7.177761744523149e-07, "loss": 0.253, "step": 32449 }, { "epoch": 1.5201199231742164, "grad_norm": 0.5909584354054949, "learning_rate": 7.176431824162566e-07, "loss": 0.2709, "step": 32450 }, { "epoch": 1.5201667681641449, "grad_norm": 0.6174800996544937, "learning_rate": 7.17510200637214e-07, "loss": 0.284, "step": 32451 }, { "epoch": 1.5202136131540733, "grad_norm": 0.5730881974218152, "learning_rate": 7.173772291159528e-07, "loss": 0.2714, "step": 32452 }, { "epoch": 1.5202604581440013, "grad_norm": 0.5728898912740452, "learning_rate": 7.172442678532368e-07, "loss": 0.2686, "step": 32453 }, { "epoch": 1.5203073031339298, "grad_norm": 0.5756044016236228, "learning_rate": 7.17111316849832e-07, "loss": 0.2707, "step": 32454 }, { "epoch": 1.5203541481238583, "grad_norm": 0.5672287242508808, "learning_rate": 7.169783761065044e-07, "loss": 0.2676, "step": 32455 }, { "epoch": 1.5204009931137865, "grad_norm": 0.6104769848249875, "learning_rate": 7.168454456240173e-07, "loss": 0.2738, "step": 32456 }, { "epoch": 1.5204478381037148, "grad_norm": 0.6082403228634828, "learning_rate": 7.167125254031362e-07, "loss": 0.2746, "step": 32457 }, { "epoch": 1.5204946830936432, "grad_norm": 0.5733451592381043, "learning_rate": 7.165796154446272e-07, "loss": 0.2809, "step": 32458 }, { "epoch": 1.5205415280835715, "grad_norm": 0.5737289380911984, "learning_rate": 7.164467157492539e-07, "loss": 0.281, "step": 32459 }, { "epoch": 1.5205883730734997, "grad_norm": 0.6132089148223066, "learning_rate": 7.16313826317781e-07, "loss": 0.2692, "step": 32460 }, { "epoch": 1.5206352180634282, "grad_norm": 0.6042704024703043, "learning_rate": 7.161809471509737e-07, "loss": 0.2852, "step": 32461 }, { "epoch": 1.5206820630533564, "grad_norm": 0.5916744908343801, "learning_rate": 7.160480782495977e-07, "loss": 0.2578, "step": 32462 }, { "epoch": 1.5207289080432846, "grad_norm": 0.618417289357976, "learning_rate": 7.159152196144159e-07, "loss": 0.2489, "step": 32463 }, { "epoch": 1.520775753033213, "grad_norm": 0.5446176444334467, "learning_rate": 7.157823712461934e-07, "loss": 0.2578, "step": 32464 }, { "epoch": 1.5208225980231416, "grad_norm": 0.5930456352829169, "learning_rate": 7.156495331456959e-07, "loss": 0.2705, "step": 32465 }, { "epoch": 1.5208694430130696, "grad_norm": 0.5234090428251025, "learning_rate": 7.155167053136857e-07, "loss": 0.2496, "step": 32466 }, { "epoch": 1.520916288002998, "grad_norm": 0.5751614596001131, "learning_rate": 7.153838877509287e-07, "loss": 0.2695, "step": 32467 }, { "epoch": 1.5209631329929265, "grad_norm": 0.583959794312962, "learning_rate": 7.152510804581894e-07, "loss": 0.2808, "step": 32468 }, { "epoch": 1.5210099779828548, "grad_norm": 0.6440177212326474, "learning_rate": 7.15118283436231e-07, "loss": 0.2793, "step": 32469 }, { "epoch": 1.521056822972783, "grad_norm": 0.650657670755455, "learning_rate": 7.149854966858183e-07, "loss": 0.2822, "step": 32470 }, { "epoch": 1.5211036679627115, "grad_norm": 0.5950512919278758, "learning_rate": 7.148527202077152e-07, "loss": 0.279, "step": 32471 }, { "epoch": 1.5211505129526397, "grad_norm": 0.6052456414722122, "learning_rate": 7.14719954002687e-07, "loss": 0.2768, "step": 32472 }, { "epoch": 1.521197357942568, "grad_norm": 0.6304823068440454, "learning_rate": 7.145871980714958e-07, "loss": 0.2826, "step": 32473 }, { "epoch": 1.5212442029324964, "grad_norm": 0.6012815009916306, "learning_rate": 7.144544524149075e-07, "loss": 0.2725, "step": 32474 }, { "epoch": 1.5212910479224246, "grad_norm": 0.5498281784355576, "learning_rate": 7.143217170336842e-07, "loss": 0.2578, "step": 32475 }, { "epoch": 1.5213378929123529, "grad_norm": 0.5860333539607073, "learning_rate": 7.141889919285905e-07, "loss": 0.2685, "step": 32476 }, { "epoch": 1.5213847379022813, "grad_norm": 0.60569845700416, "learning_rate": 7.140562771003903e-07, "loss": 0.2786, "step": 32477 }, { "epoch": 1.5214315828922098, "grad_norm": 0.6057272123558055, "learning_rate": 7.139235725498475e-07, "loss": 0.275, "step": 32478 }, { "epoch": 1.521478427882138, "grad_norm": 0.6212812787949333, "learning_rate": 7.137908782777261e-07, "loss": 0.2699, "step": 32479 }, { "epoch": 1.5215252728720663, "grad_norm": 0.6071407760756761, "learning_rate": 7.136581942847895e-07, "loss": 0.2859, "step": 32480 }, { "epoch": 1.5215721178619948, "grad_norm": 0.6385869844729365, "learning_rate": 7.135255205718003e-07, "loss": 0.2646, "step": 32481 }, { "epoch": 1.521618962851923, "grad_norm": 0.5272745506615603, "learning_rate": 7.133928571395227e-07, "loss": 0.2534, "step": 32482 }, { "epoch": 1.5216658078418512, "grad_norm": 0.6071710526691945, "learning_rate": 7.132602039887198e-07, "loss": 0.2619, "step": 32483 }, { "epoch": 1.5217126528317797, "grad_norm": 0.5913211072330614, "learning_rate": 7.131275611201558e-07, "loss": 0.2784, "step": 32484 }, { "epoch": 1.521759497821708, "grad_norm": 0.6097641970216874, "learning_rate": 7.129949285345933e-07, "loss": 0.2807, "step": 32485 }, { "epoch": 1.5218063428116362, "grad_norm": 0.6227506072671143, "learning_rate": 7.128623062327966e-07, "loss": 0.2899, "step": 32486 }, { "epoch": 1.5218531878015646, "grad_norm": 0.589764360586272, "learning_rate": 7.127296942155285e-07, "loss": 0.2705, "step": 32487 }, { "epoch": 1.521900032791493, "grad_norm": 0.5837915856057694, "learning_rate": 7.125970924835507e-07, "loss": 0.2783, "step": 32488 }, { "epoch": 1.5219468777814211, "grad_norm": 0.5783140041892868, "learning_rate": 7.124645010376275e-07, "loss": 0.271, "step": 32489 }, { "epoch": 1.5219937227713496, "grad_norm": 0.5649678331834125, "learning_rate": 7.123319198785217e-07, "loss": 0.2647, "step": 32490 }, { "epoch": 1.522040567761278, "grad_norm": 0.6140579340906493, "learning_rate": 7.121993490069964e-07, "loss": 0.2777, "step": 32491 }, { "epoch": 1.5220874127512063, "grad_norm": 0.6028752603061588, "learning_rate": 7.120667884238153e-07, "loss": 0.2732, "step": 32492 }, { "epoch": 1.5221342577411345, "grad_norm": 0.5698332594238973, "learning_rate": 7.119342381297397e-07, "loss": 0.2622, "step": 32493 }, { "epoch": 1.522181102731063, "grad_norm": 0.6135886732528301, "learning_rate": 7.118016981255341e-07, "loss": 0.2676, "step": 32494 }, { "epoch": 1.5222279477209912, "grad_norm": 0.616217157594594, "learning_rate": 7.116691684119592e-07, "loss": 0.2816, "step": 32495 }, { "epoch": 1.5222747927109195, "grad_norm": 0.6211729395127193, "learning_rate": 7.11536648989779e-07, "loss": 0.2819, "step": 32496 }, { "epoch": 1.522321637700848, "grad_norm": 0.6116874086515396, "learning_rate": 7.114041398597557e-07, "loss": 0.2931, "step": 32497 }, { "epoch": 1.5223684826907762, "grad_norm": 0.5910923738704281, "learning_rate": 7.112716410226527e-07, "loss": 0.2762, "step": 32498 }, { "epoch": 1.5224153276807044, "grad_norm": 0.6320178176408214, "learning_rate": 7.111391524792313e-07, "loss": 0.2821, "step": 32499 }, { "epoch": 1.5224621726706329, "grad_norm": 0.5889101570815793, "learning_rate": 7.110066742302546e-07, "loss": 0.2764, "step": 32500 }, { "epoch": 1.5225090176605613, "grad_norm": 0.5994026626003677, "learning_rate": 7.108742062764854e-07, "loss": 0.2682, "step": 32501 }, { "epoch": 1.5225558626504894, "grad_norm": 0.6079517618702366, "learning_rate": 7.107417486186846e-07, "loss": 0.2646, "step": 32502 }, { "epoch": 1.5226027076404178, "grad_norm": 0.5825902468261231, "learning_rate": 7.106093012576154e-07, "loss": 0.2649, "step": 32503 }, { "epoch": 1.5226495526303463, "grad_norm": 0.5972096930762362, "learning_rate": 7.104768641940407e-07, "loss": 0.2771, "step": 32504 }, { "epoch": 1.5226963976202745, "grad_norm": 0.6230200787592551, "learning_rate": 7.10344437428721e-07, "loss": 0.2699, "step": 32505 }, { "epoch": 1.5227432426102028, "grad_norm": 0.5869167385775468, "learning_rate": 7.102120209624195e-07, "loss": 0.2609, "step": 32506 }, { "epoch": 1.5227900876001312, "grad_norm": 0.567920070858226, "learning_rate": 7.100796147958986e-07, "loss": 0.2703, "step": 32507 }, { "epoch": 1.5228369325900595, "grad_norm": 0.6349231632159305, "learning_rate": 7.099472189299189e-07, "loss": 0.2739, "step": 32508 }, { "epoch": 1.5228837775799877, "grad_norm": 0.598966701730526, "learning_rate": 7.09814833365243e-07, "loss": 0.2827, "step": 32509 }, { "epoch": 1.5229306225699162, "grad_norm": 0.6571973995336461, "learning_rate": 7.096824581026335e-07, "loss": 0.2891, "step": 32510 }, { "epoch": 1.5229774675598444, "grad_norm": 0.5668573496013658, "learning_rate": 7.095500931428509e-07, "loss": 0.2672, "step": 32511 }, { "epoch": 1.5230243125497727, "grad_norm": 0.6192758283067952, "learning_rate": 7.094177384866574e-07, "loss": 0.2745, "step": 32512 }, { "epoch": 1.5230711575397011, "grad_norm": 0.6168367024846134, "learning_rate": 7.092853941348146e-07, "loss": 0.3005, "step": 32513 }, { "epoch": 1.5231180025296296, "grad_norm": 0.6020166921405692, "learning_rate": 7.091530600880853e-07, "loss": 0.2705, "step": 32514 }, { "epoch": 1.5231648475195578, "grad_norm": 0.5776385356208676, "learning_rate": 7.09020736347229e-07, "loss": 0.2672, "step": 32515 }, { "epoch": 1.523211692509486, "grad_norm": 0.6251394686608663, "learning_rate": 7.088884229130091e-07, "loss": 0.2663, "step": 32516 }, { "epoch": 1.5232585374994145, "grad_norm": 0.5999898595922137, "learning_rate": 7.087561197861855e-07, "loss": 0.2885, "step": 32517 }, { "epoch": 1.5233053824893428, "grad_norm": 0.6140663590375136, "learning_rate": 7.086238269675202e-07, "loss": 0.2756, "step": 32518 }, { "epoch": 1.523352227479271, "grad_norm": 0.5830798346588575, "learning_rate": 7.084915444577745e-07, "loss": 0.2786, "step": 32519 }, { "epoch": 1.5233990724691995, "grad_norm": 0.5882461625088066, "learning_rate": 7.083592722577096e-07, "loss": 0.2824, "step": 32520 }, { "epoch": 1.5234459174591277, "grad_norm": 0.6144804265231932, "learning_rate": 7.082270103680875e-07, "loss": 0.2856, "step": 32521 }, { "epoch": 1.523492762449056, "grad_norm": 0.5653074207587052, "learning_rate": 7.080947587896686e-07, "loss": 0.258, "step": 32522 }, { "epoch": 1.5235396074389844, "grad_norm": 0.6118689792989651, "learning_rate": 7.079625175232135e-07, "loss": 0.2616, "step": 32523 }, { "epoch": 1.5235864524289129, "grad_norm": 0.6046979054686741, "learning_rate": 7.078302865694833e-07, "loss": 0.2729, "step": 32524 }, { "epoch": 1.523633297418841, "grad_norm": 0.5716752471739572, "learning_rate": 7.076980659292398e-07, "loss": 0.2689, "step": 32525 }, { "epoch": 1.5236801424087694, "grad_norm": 0.6270961150259297, "learning_rate": 7.07565855603243e-07, "loss": 0.259, "step": 32526 }, { "epoch": 1.5237269873986978, "grad_norm": 0.6256159375836979, "learning_rate": 7.074336555922545e-07, "loss": 0.2679, "step": 32527 }, { "epoch": 1.523773832388626, "grad_norm": 0.634354395458295, "learning_rate": 7.073014658970356e-07, "loss": 0.2934, "step": 32528 }, { "epoch": 1.5238206773785543, "grad_norm": 0.5826019117601045, "learning_rate": 7.07169286518346e-07, "loss": 0.2635, "step": 32529 }, { "epoch": 1.5238675223684828, "grad_norm": 0.5671877544921549, "learning_rate": 7.070371174569457e-07, "loss": 0.2734, "step": 32530 }, { "epoch": 1.523914367358411, "grad_norm": 0.6116090544430786, "learning_rate": 7.069049587135962e-07, "loss": 0.2807, "step": 32531 }, { "epoch": 1.5239612123483393, "grad_norm": 0.5927645450086656, "learning_rate": 7.06772810289058e-07, "loss": 0.268, "step": 32532 }, { "epoch": 1.5240080573382677, "grad_norm": 0.5872501701938249, "learning_rate": 7.066406721840918e-07, "loss": 0.269, "step": 32533 }, { "epoch": 1.524054902328196, "grad_norm": 0.6080410542858113, "learning_rate": 7.065085443994577e-07, "loss": 0.2814, "step": 32534 }, { "epoch": 1.5241017473181242, "grad_norm": 0.579507546111324, "learning_rate": 7.063764269359166e-07, "loss": 0.2712, "step": 32535 }, { "epoch": 1.5241485923080527, "grad_norm": 0.6210209766285257, "learning_rate": 7.062443197942286e-07, "loss": 0.2721, "step": 32536 }, { "epoch": 1.5241954372979811, "grad_norm": 0.5377122973403332, "learning_rate": 7.06112222975153e-07, "loss": 0.2591, "step": 32537 }, { "epoch": 1.5242422822879091, "grad_norm": 0.5779503319983668, "learning_rate": 7.059801364794505e-07, "loss": 0.2642, "step": 32538 }, { "epoch": 1.5242891272778376, "grad_norm": 0.5833580677275021, "learning_rate": 7.058480603078816e-07, "loss": 0.2621, "step": 32539 }, { "epoch": 1.524335972267766, "grad_norm": 0.6227797916185417, "learning_rate": 7.05715994461206e-07, "loss": 0.2735, "step": 32540 }, { "epoch": 1.5243828172576943, "grad_norm": 0.6581457666491817, "learning_rate": 7.055839389401847e-07, "loss": 0.2772, "step": 32541 }, { "epoch": 1.5244296622476226, "grad_norm": 0.6180500854439965, "learning_rate": 7.054518937455759e-07, "loss": 0.2766, "step": 32542 }, { "epoch": 1.524476507237551, "grad_norm": 0.620017352136649, "learning_rate": 7.053198588781413e-07, "loss": 0.2699, "step": 32543 }, { "epoch": 1.5245233522274793, "grad_norm": 0.6172399349146444, "learning_rate": 7.051878343386393e-07, "loss": 0.2825, "step": 32544 }, { "epoch": 1.5245701972174075, "grad_norm": 0.6368182089609957, "learning_rate": 7.050558201278298e-07, "loss": 0.29, "step": 32545 }, { "epoch": 1.524617042207336, "grad_norm": 0.5626343330548362, "learning_rate": 7.04923816246473e-07, "loss": 0.2572, "step": 32546 }, { "epoch": 1.5246638871972642, "grad_norm": 0.5636044672043601, "learning_rate": 7.047918226953295e-07, "loss": 0.2556, "step": 32547 }, { "epoch": 1.5247107321871924, "grad_norm": 0.6042697690697736, "learning_rate": 7.046598394751569e-07, "loss": 0.2698, "step": 32548 }, { "epoch": 1.524757577177121, "grad_norm": 0.5614354071609123, "learning_rate": 7.045278665867159e-07, "loss": 0.2681, "step": 32549 }, { "epoch": 1.5248044221670494, "grad_norm": 0.6882345785400961, "learning_rate": 7.04395904030766e-07, "loss": 0.287, "step": 32550 }, { "epoch": 1.5248512671569776, "grad_norm": 0.6075571611539663, "learning_rate": 7.04263951808066e-07, "loss": 0.29, "step": 32551 }, { "epoch": 1.5248981121469058, "grad_norm": 0.5991883250322996, "learning_rate": 7.041320099193757e-07, "loss": 0.2812, "step": 32552 }, { "epoch": 1.5249449571368343, "grad_norm": 0.6301184231049041, "learning_rate": 7.040000783654549e-07, "loss": 0.2764, "step": 32553 }, { "epoch": 1.5249918021267626, "grad_norm": 0.5841710846304701, "learning_rate": 7.038681571470615e-07, "loss": 0.2645, "step": 32554 }, { "epoch": 1.5250386471166908, "grad_norm": 0.6152807493211119, "learning_rate": 7.037362462649552e-07, "loss": 0.2716, "step": 32555 }, { "epoch": 1.5250854921066193, "grad_norm": 0.6247956978885276, "learning_rate": 7.036043457198963e-07, "loss": 0.2832, "step": 32556 }, { "epoch": 1.5251323370965475, "grad_norm": 0.5657754516937271, "learning_rate": 7.034724555126421e-07, "loss": 0.2525, "step": 32557 }, { "epoch": 1.5251791820864757, "grad_norm": 0.6377516273718645, "learning_rate": 7.033405756439527e-07, "loss": 0.2758, "step": 32558 }, { "epoch": 1.5252260270764042, "grad_norm": 0.5597655338306184, "learning_rate": 7.032087061145871e-07, "loss": 0.2665, "step": 32559 }, { "epoch": 1.5252728720663327, "grad_norm": 0.618290153340702, "learning_rate": 7.03076846925303e-07, "loss": 0.2574, "step": 32560 }, { "epoch": 1.5253197170562607, "grad_norm": 0.5782070532608022, "learning_rate": 7.029449980768601e-07, "loss": 0.268, "step": 32561 }, { "epoch": 1.5253665620461891, "grad_norm": 0.6127960991548727, "learning_rate": 7.028131595700171e-07, "loss": 0.2717, "step": 32562 }, { "epoch": 1.5254134070361176, "grad_norm": 0.5918368593561666, "learning_rate": 7.026813314055333e-07, "loss": 0.2725, "step": 32563 }, { "epoch": 1.5254602520260458, "grad_norm": 0.6315937423156749, "learning_rate": 7.025495135841662e-07, "loss": 0.2933, "step": 32564 }, { "epoch": 1.525507097015974, "grad_norm": 0.5672061260410144, "learning_rate": 7.024177061066753e-07, "loss": 0.2578, "step": 32565 }, { "epoch": 1.5255539420059026, "grad_norm": 0.5725143256063522, "learning_rate": 7.022859089738182e-07, "loss": 0.2711, "step": 32566 }, { "epoch": 1.5256007869958308, "grad_norm": 0.602778205859593, "learning_rate": 7.021541221863538e-07, "loss": 0.2704, "step": 32567 }, { "epoch": 1.525647631985759, "grad_norm": 0.6010646146067168, "learning_rate": 7.020223457450404e-07, "loss": 0.2704, "step": 32568 }, { "epoch": 1.5256944769756875, "grad_norm": 0.5955533014693186, "learning_rate": 7.018905796506364e-07, "loss": 0.2788, "step": 32569 }, { "epoch": 1.5257413219656157, "grad_norm": 0.6036384313410957, "learning_rate": 7.017588239039014e-07, "loss": 0.2789, "step": 32570 }, { "epoch": 1.525788166955544, "grad_norm": 0.5792809833579293, "learning_rate": 7.01627078505592e-07, "loss": 0.275, "step": 32571 }, { "epoch": 1.5258350119454724, "grad_norm": 0.603756327597194, "learning_rate": 7.014953434564662e-07, "loss": 0.2747, "step": 32572 }, { "epoch": 1.525881856935401, "grad_norm": 0.6067243786477572, "learning_rate": 7.013636187572825e-07, "loss": 0.261, "step": 32573 }, { "epoch": 1.525928701925329, "grad_norm": 0.6211555436932432, "learning_rate": 7.012319044087992e-07, "loss": 0.2746, "step": 32574 }, { "epoch": 1.5259755469152574, "grad_norm": 0.6028003124191961, "learning_rate": 7.011002004117742e-07, "loss": 0.2666, "step": 32575 }, { "epoch": 1.5260223919051858, "grad_norm": 0.6074360903923596, "learning_rate": 7.009685067669655e-07, "loss": 0.2658, "step": 32576 }, { "epoch": 1.526069236895114, "grad_norm": 0.6032561472392872, "learning_rate": 7.008368234751315e-07, "loss": 0.259, "step": 32577 }, { "epoch": 1.5261160818850423, "grad_norm": 0.5978124118908886, "learning_rate": 7.007051505370293e-07, "loss": 0.2687, "step": 32578 }, { "epoch": 1.5261629268749708, "grad_norm": 0.5723688339345298, "learning_rate": 7.005734879534162e-07, "loss": 0.2422, "step": 32579 }, { "epoch": 1.526209771864899, "grad_norm": 0.5953535899830065, "learning_rate": 7.004418357250503e-07, "loss": 0.277, "step": 32580 }, { "epoch": 1.5262566168548273, "grad_norm": 0.5960480377681563, "learning_rate": 7.003101938526893e-07, "loss": 0.2723, "step": 32581 }, { "epoch": 1.5263034618447557, "grad_norm": 0.5858420127803194, "learning_rate": 7.001785623370908e-07, "loss": 0.2634, "step": 32582 }, { "epoch": 1.526350306834684, "grad_norm": 0.626773822855191, "learning_rate": 7.000469411790131e-07, "loss": 0.2691, "step": 32583 }, { "epoch": 1.5263971518246122, "grad_norm": 0.573545765116289, "learning_rate": 6.999153303792122e-07, "loss": 0.2553, "step": 32584 }, { "epoch": 1.5264439968145407, "grad_norm": 0.6421159258298773, "learning_rate": 6.997837299384467e-07, "loss": 0.2767, "step": 32585 }, { "epoch": 1.5264908418044691, "grad_norm": 0.5739028856706414, "learning_rate": 6.996521398574727e-07, "loss": 0.2759, "step": 32586 }, { "epoch": 1.5265376867943974, "grad_norm": 0.617173109681711, "learning_rate": 6.995205601370481e-07, "loss": 0.2752, "step": 32587 }, { "epoch": 1.5265845317843256, "grad_norm": 0.6370412141025557, "learning_rate": 6.9938899077793e-07, "loss": 0.3024, "step": 32588 }, { "epoch": 1.526631376774254, "grad_norm": 0.6094653051515906, "learning_rate": 6.992574317808768e-07, "loss": 0.2817, "step": 32589 }, { "epoch": 1.5266782217641823, "grad_norm": 0.5764913320753873, "learning_rate": 6.991258831466435e-07, "loss": 0.265, "step": 32590 }, { "epoch": 1.5267250667541106, "grad_norm": 0.5897447275305895, "learning_rate": 6.989943448759882e-07, "loss": 0.2766, "step": 32591 }, { "epoch": 1.526771911744039, "grad_norm": 0.5928438605981595, "learning_rate": 6.988628169696685e-07, "loss": 0.2828, "step": 32592 }, { "epoch": 1.5268187567339673, "grad_norm": 0.6130433921771231, "learning_rate": 6.987312994284399e-07, "loss": 0.2686, "step": 32593 }, { "epoch": 1.5268656017238955, "grad_norm": 0.5746102645203743, "learning_rate": 6.985997922530596e-07, "loss": 0.2665, "step": 32594 }, { "epoch": 1.526912446713824, "grad_norm": 0.6016898318799245, "learning_rate": 6.984682954442859e-07, "loss": 0.279, "step": 32595 }, { "epoch": 1.5269592917037524, "grad_norm": 0.6117507688408175, "learning_rate": 6.983368090028733e-07, "loss": 0.2942, "step": 32596 }, { "epoch": 1.5270061366936805, "grad_norm": 0.5549118013050989, "learning_rate": 6.982053329295796e-07, "loss": 0.2587, "step": 32597 }, { "epoch": 1.527052981683609, "grad_norm": 0.5600354313251218, "learning_rate": 6.980738672251622e-07, "loss": 0.2671, "step": 32598 }, { "epoch": 1.5270998266735374, "grad_norm": 0.6141700381161648, "learning_rate": 6.979424118903761e-07, "loss": 0.2793, "step": 32599 }, { "epoch": 1.5271466716634656, "grad_norm": 0.6236249504818592, "learning_rate": 6.978109669259783e-07, "loss": 0.2632, "step": 32600 }, { "epoch": 1.5271935166533939, "grad_norm": 0.5912971061953325, "learning_rate": 6.976795323327256e-07, "loss": 0.2736, "step": 32601 }, { "epoch": 1.5272403616433223, "grad_norm": 0.6132965720160657, "learning_rate": 6.97548108111375e-07, "loss": 0.2819, "step": 32602 }, { "epoch": 1.5272872066332506, "grad_norm": 0.5864319092911263, "learning_rate": 6.974166942626812e-07, "loss": 0.2597, "step": 32603 }, { "epoch": 1.5273340516231788, "grad_norm": 0.5309379941241394, "learning_rate": 6.972852907874012e-07, "loss": 0.2515, "step": 32604 }, { "epoch": 1.5273808966131073, "grad_norm": 0.5927960392033982, "learning_rate": 6.971538976862918e-07, "loss": 0.2587, "step": 32605 }, { "epoch": 1.5274277416030355, "grad_norm": 0.5590536693206016, "learning_rate": 6.970225149601081e-07, "loss": 0.2761, "step": 32606 }, { "epoch": 1.5274745865929638, "grad_norm": 0.6314786435614531, "learning_rate": 6.968911426096068e-07, "loss": 0.3022, "step": 32607 }, { "epoch": 1.5275214315828922, "grad_norm": 0.5988605043898182, "learning_rate": 6.967597806355442e-07, "loss": 0.2636, "step": 32608 }, { "epoch": 1.5275682765728207, "grad_norm": 0.6361953299538148, "learning_rate": 6.966284290386751e-07, "loss": 0.2993, "step": 32609 }, { "epoch": 1.5276151215627487, "grad_norm": 0.5792762685732624, "learning_rate": 6.964970878197563e-07, "loss": 0.2663, "step": 32610 }, { "epoch": 1.5276619665526772, "grad_norm": 0.5961835544229028, "learning_rate": 6.963657569795434e-07, "loss": 0.2644, "step": 32611 }, { "epoch": 1.5277088115426056, "grad_norm": 0.5384369206079024, "learning_rate": 6.962344365187932e-07, "loss": 0.2425, "step": 32612 }, { "epoch": 1.5277556565325339, "grad_norm": 0.6429655712995614, "learning_rate": 6.961031264382592e-07, "loss": 0.2783, "step": 32613 }, { "epoch": 1.527802501522462, "grad_norm": 0.6046581360600048, "learning_rate": 6.959718267386994e-07, "loss": 0.2757, "step": 32614 }, { "epoch": 1.5278493465123906, "grad_norm": 0.6277902911068988, "learning_rate": 6.958405374208676e-07, "loss": 0.2799, "step": 32615 }, { "epoch": 1.5278961915023188, "grad_norm": 0.5902082632648337, "learning_rate": 6.957092584855202e-07, "loss": 0.2611, "step": 32616 }, { "epoch": 1.527943036492247, "grad_norm": 0.5579258269544652, "learning_rate": 6.955779899334123e-07, "loss": 0.2536, "step": 32617 }, { "epoch": 1.5279898814821755, "grad_norm": 0.6038089139795682, "learning_rate": 6.954467317652994e-07, "loss": 0.2734, "step": 32618 }, { "epoch": 1.5280367264721038, "grad_norm": 0.6449568339553067, "learning_rate": 6.953154839819379e-07, "loss": 0.2643, "step": 32619 }, { "epoch": 1.528083571462032, "grad_norm": 0.5624178208791902, "learning_rate": 6.951842465840824e-07, "loss": 0.2678, "step": 32620 }, { "epoch": 1.5281304164519605, "grad_norm": 0.5885323018089464, "learning_rate": 6.95053019572487e-07, "loss": 0.2637, "step": 32621 }, { "epoch": 1.528177261441889, "grad_norm": 0.5771491746167753, "learning_rate": 6.949218029479077e-07, "loss": 0.2887, "step": 32622 }, { "epoch": 1.5282241064318172, "grad_norm": 0.5432955166019862, "learning_rate": 6.947905967110999e-07, "loss": 0.2638, "step": 32623 }, { "epoch": 1.5282709514217454, "grad_norm": 0.5840402155368166, "learning_rate": 6.946594008628186e-07, "loss": 0.2528, "step": 32624 }, { "epoch": 1.5283177964116739, "grad_norm": 0.6050160969952668, "learning_rate": 6.945282154038182e-07, "loss": 0.2621, "step": 32625 }, { "epoch": 1.528364641401602, "grad_norm": 0.5794272635554716, "learning_rate": 6.943970403348555e-07, "loss": 0.257, "step": 32626 }, { "epoch": 1.5284114863915303, "grad_norm": 0.5803455228369295, "learning_rate": 6.942658756566836e-07, "loss": 0.2575, "step": 32627 }, { "epoch": 1.5284583313814588, "grad_norm": 0.6060036567596621, "learning_rate": 6.941347213700573e-07, "loss": 0.2732, "step": 32628 }, { "epoch": 1.528505176371387, "grad_norm": 0.6076475535481157, "learning_rate": 6.940035774757314e-07, "loss": 0.2665, "step": 32629 }, { "epoch": 1.5285520213613153, "grad_norm": 0.5699258619655078, "learning_rate": 6.938724439744612e-07, "loss": 0.2556, "step": 32630 }, { "epoch": 1.5285988663512438, "grad_norm": 0.6084685900092807, "learning_rate": 6.937413208670013e-07, "loss": 0.2572, "step": 32631 }, { "epoch": 1.5286457113411722, "grad_norm": 0.6453186926198441, "learning_rate": 6.936102081541065e-07, "loss": 0.2817, "step": 32632 }, { "epoch": 1.5286925563311002, "grad_norm": 0.5696611198721441, "learning_rate": 6.934791058365303e-07, "loss": 0.2519, "step": 32633 }, { "epoch": 1.5287394013210287, "grad_norm": 0.5591930500441517, "learning_rate": 6.933480139150286e-07, "loss": 0.2581, "step": 32634 }, { "epoch": 1.5287862463109572, "grad_norm": 0.6206280704586397, "learning_rate": 6.932169323903545e-07, "loss": 0.2861, "step": 32635 }, { "epoch": 1.5288330913008854, "grad_norm": 0.6096958017750134, "learning_rate": 6.930858612632626e-07, "loss": 0.2746, "step": 32636 }, { "epoch": 1.5288799362908136, "grad_norm": 0.5718440754510856, "learning_rate": 6.929548005345075e-07, "loss": 0.2618, "step": 32637 }, { "epoch": 1.528926781280742, "grad_norm": 0.5934023732377298, "learning_rate": 6.928237502048437e-07, "loss": 0.2694, "step": 32638 }, { "epoch": 1.5289736262706703, "grad_norm": 0.6004018182134157, "learning_rate": 6.926927102750247e-07, "loss": 0.2826, "step": 32639 }, { "epoch": 1.5290204712605986, "grad_norm": 0.5945617533574483, "learning_rate": 6.925616807458046e-07, "loss": 0.2676, "step": 32640 }, { "epoch": 1.529067316250527, "grad_norm": 0.6047435206388759, "learning_rate": 6.924306616179388e-07, "loss": 0.2674, "step": 32641 }, { "epoch": 1.5291141612404553, "grad_norm": 0.6027646119826976, "learning_rate": 6.922996528921794e-07, "loss": 0.2716, "step": 32642 }, { "epoch": 1.5291610062303835, "grad_norm": 0.5809795868031744, "learning_rate": 6.92168654569281e-07, "loss": 0.2758, "step": 32643 }, { "epoch": 1.529207851220312, "grad_norm": 0.5646910792204668, "learning_rate": 6.920376666499984e-07, "loss": 0.2523, "step": 32644 }, { "epoch": 1.5292546962102405, "grad_norm": 0.5632475946916924, "learning_rate": 6.91906689135084e-07, "loss": 0.2585, "step": 32645 }, { "epoch": 1.5293015412001685, "grad_norm": 0.6135709215533915, "learning_rate": 6.917757220252921e-07, "loss": 0.2917, "step": 32646 }, { "epoch": 1.529348386190097, "grad_norm": 0.5851931130473378, "learning_rate": 6.91644765321377e-07, "loss": 0.2843, "step": 32647 }, { "epoch": 1.5293952311800254, "grad_norm": 0.6414818911829103, "learning_rate": 6.915138190240913e-07, "loss": 0.2811, "step": 32648 }, { "epoch": 1.5294420761699536, "grad_norm": 0.6072269721936583, "learning_rate": 6.91382883134189e-07, "loss": 0.2753, "step": 32649 }, { "epoch": 1.5294889211598819, "grad_norm": 0.5867487236409602, "learning_rate": 6.91251957652424e-07, "loss": 0.2733, "step": 32650 }, { "epoch": 1.5295357661498103, "grad_norm": 0.6483435683038259, "learning_rate": 6.91121042579549e-07, "loss": 0.2926, "step": 32651 }, { "epoch": 1.5295826111397386, "grad_norm": 0.5910922931688036, "learning_rate": 6.909901379163178e-07, "loss": 0.267, "step": 32652 }, { "epoch": 1.5296294561296668, "grad_norm": 0.61094841565388, "learning_rate": 6.908592436634834e-07, "loss": 0.2732, "step": 32653 }, { "epoch": 1.5296763011195953, "grad_norm": 0.6229379775799255, "learning_rate": 6.907283598218003e-07, "loss": 0.2689, "step": 32654 }, { "epoch": 1.5297231461095235, "grad_norm": 0.6226030068370995, "learning_rate": 6.905974863920198e-07, "loss": 0.2762, "step": 32655 }, { "epoch": 1.5297699910994518, "grad_norm": 0.6172495565523136, "learning_rate": 6.904666233748969e-07, "loss": 0.2835, "step": 32656 }, { "epoch": 1.5298168360893802, "grad_norm": 0.6182998561587745, "learning_rate": 6.90335770771183e-07, "loss": 0.2737, "step": 32657 }, { "epoch": 1.5298636810793087, "grad_norm": 0.5998858578589741, "learning_rate": 6.902049285816318e-07, "loss": 0.2514, "step": 32658 }, { "epoch": 1.529910526069237, "grad_norm": 0.6401587237916094, "learning_rate": 6.900740968069966e-07, "loss": 0.2894, "step": 32659 }, { "epoch": 1.5299573710591652, "grad_norm": 0.5831451320926355, "learning_rate": 6.899432754480298e-07, "loss": 0.2594, "step": 32660 }, { "epoch": 1.5300042160490936, "grad_norm": 0.6007270934404234, "learning_rate": 6.898124645054855e-07, "loss": 0.2582, "step": 32661 }, { "epoch": 1.5300510610390219, "grad_norm": 0.592494996740161, "learning_rate": 6.896816639801152e-07, "loss": 0.2777, "step": 32662 }, { "epoch": 1.5300979060289501, "grad_norm": 0.5852403492493506, "learning_rate": 6.895508738726714e-07, "loss": 0.2583, "step": 32663 }, { "epoch": 1.5301447510188786, "grad_norm": 0.5873675864538688, "learning_rate": 6.894200941839071e-07, "loss": 0.2848, "step": 32664 }, { "epoch": 1.5301915960088068, "grad_norm": 0.5553566649227336, "learning_rate": 6.892893249145752e-07, "loss": 0.2712, "step": 32665 }, { "epoch": 1.530238440998735, "grad_norm": 0.6331789900529922, "learning_rate": 6.891585660654282e-07, "loss": 0.2785, "step": 32666 }, { "epoch": 1.5302852859886635, "grad_norm": 0.6302359764838588, "learning_rate": 6.890278176372183e-07, "loss": 0.283, "step": 32667 }, { "epoch": 1.530332130978592, "grad_norm": 0.6200317229830218, "learning_rate": 6.88897079630699e-07, "loss": 0.2755, "step": 32668 }, { "epoch": 1.53037897596852, "grad_norm": 0.5737548978500162, "learning_rate": 6.88766352046622e-07, "loss": 0.2505, "step": 32669 }, { "epoch": 1.5304258209584485, "grad_norm": 0.617205069726271, "learning_rate": 6.886356348857384e-07, "loss": 0.2883, "step": 32670 }, { "epoch": 1.530472665948377, "grad_norm": 0.6270630108313339, "learning_rate": 6.885049281488015e-07, "loss": 0.28, "step": 32671 }, { "epoch": 1.5305195109383052, "grad_norm": 0.6307581015608538, "learning_rate": 6.883742318365636e-07, "loss": 0.2707, "step": 32672 }, { "epoch": 1.5305663559282334, "grad_norm": 0.5627698511847453, "learning_rate": 6.882435459497764e-07, "loss": 0.27, "step": 32673 }, { "epoch": 1.5306132009181619, "grad_norm": 0.6216020794894144, "learning_rate": 6.881128704891924e-07, "loss": 0.2679, "step": 32674 }, { "epoch": 1.5306600459080901, "grad_norm": 0.5916507507883475, "learning_rate": 6.87982205455564e-07, "loss": 0.2746, "step": 32675 }, { "epoch": 1.5307068908980184, "grad_norm": 0.5950888335028081, "learning_rate": 6.878515508496428e-07, "loss": 0.241, "step": 32676 }, { "epoch": 1.5307537358879468, "grad_norm": 0.6166940153556744, "learning_rate": 6.877209066721796e-07, "loss": 0.287, "step": 32677 }, { "epoch": 1.530800580877875, "grad_norm": 0.6343903998796155, "learning_rate": 6.875902729239273e-07, "loss": 0.2594, "step": 32678 }, { "epoch": 1.5308474258678033, "grad_norm": 0.6109170957196488, "learning_rate": 6.874596496056371e-07, "loss": 0.2808, "step": 32679 }, { "epoch": 1.5308942708577318, "grad_norm": 0.5774198767430591, "learning_rate": 6.873290367180613e-07, "loss": 0.2639, "step": 32680 }, { "epoch": 1.5309411158476602, "grad_norm": 0.5960771290985972, "learning_rate": 6.87198434261952e-07, "loss": 0.2657, "step": 32681 }, { "epoch": 1.5309879608375883, "grad_norm": 0.5791125063108304, "learning_rate": 6.870678422380591e-07, "loss": 0.2737, "step": 32682 }, { "epoch": 1.5310348058275167, "grad_norm": 0.6402544243394608, "learning_rate": 6.869372606471361e-07, "loss": 0.2827, "step": 32683 }, { "epoch": 1.5310816508174452, "grad_norm": 0.6178577615662024, "learning_rate": 6.868066894899328e-07, "loss": 0.2793, "step": 32684 }, { "epoch": 1.5311284958073734, "grad_norm": 0.5494972328108237, "learning_rate": 6.866761287672011e-07, "loss": 0.2504, "step": 32685 }, { "epoch": 1.5311753407973017, "grad_norm": 0.5554664240107635, "learning_rate": 6.865455784796923e-07, "loss": 0.2772, "step": 32686 }, { "epoch": 1.5312221857872301, "grad_norm": 0.603214426678631, "learning_rate": 6.86415038628159e-07, "loss": 0.2623, "step": 32687 }, { "epoch": 1.5312690307771584, "grad_norm": 0.5889220453827322, "learning_rate": 6.862845092133505e-07, "loss": 0.2675, "step": 32688 }, { "epoch": 1.5313158757670866, "grad_norm": 0.5872648552939335, "learning_rate": 6.861539902360187e-07, "loss": 0.2503, "step": 32689 }, { "epoch": 1.531362720757015, "grad_norm": 0.578000634940039, "learning_rate": 6.860234816969155e-07, "loss": 0.2505, "step": 32690 }, { "epoch": 1.5314095657469433, "grad_norm": 0.5634870993287382, "learning_rate": 6.858929835967906e-07, "loss": 0.26, "step": 32691 }, { "epoch": 1.5314564107368716, "grad_norm": 0.5936108753019805, "learning_rate": 6.857624959363956e-07, "loss": 0.2684, "step": 32692 }, { "epoch": 1.5315032557268, "grad_norm": 0.6236267789909593, "learning_rate": 6.856320187164822e-07, "loss": 0.2634, "step": 32693 }, { "epoch": 1.5315501007167285, "grad_norm": 0.6244926496204447, "learning_rate": 6.855015519378e-07, "loss": 0.2717, "step": 32694 }, { "epoch": 1.5315969457066567, "grad_norm": 0.5513707491207345, "learning_rate": 6.853710956011e-07, "loss": 0.255, "step": 32695 }, { "epoch": 1.531643790696585, "grad_norm": 0.592574692464672, "learning_rate": 6.85240649707134e-07, "loss": 0.2872, "step": 32696 }, { "epoch": 1.5316906356865134, "grad_norm": 0.5591605835092979, "learning_rate": 6.851102142566512e-07, "loss": 0.2552, "step": 32697 }, { "epoch": 1.5317374806764417, "grad_norm": 0.5934333605551797, "learning_rate": 6.849797892504031e-07, "loss": 0.2802, "step": 32698 }, { "epoch": 1.53178432566637, "grad_norm": 0.5791166326776133, "learning_rate": 6.848493746891408e-07, "loss": 0.2657, "step": 32699 }, { "epoch": 1.5318311706562984, "grad_norm": 0.5925018797401861, "learning_rate": 6.847189705736134e-07, "loss": 0.2647, "step": 32700 }, { "epoch": 1.5318780156462266, "grad_norm": 0.5700279482882509, "learning_rate": 6.84588576904572e-07, "loss": 0.2628, "step": 32701 }, { "epoch": 1.5319248606361549, "grad_norm": 0.6579755802829971, "learning_rate": 6.844581936827671e-07, "loss": 0.2868, "step": 32702 }, { "epoch": 1.5319717056260833, "grad_norm": 0.6139668334938293, "learning_rate": 6.843278209089499e-07, "loss": 0.2735, "step": 32703 }, { "epoch": 1.5320185506160118, "grad_norm": 0.6308858280529297, "learning_rate": 6.841974585838687e-07, "loss": 0.2746, "step": 32704 }, { "epoch": 1.5320653956059398, "grad_norm": 0.5907142845025806, "learning_rate": 6.840671067082758e-07, "loss": 0.2829, "step": 32705 }, { "epoch": 1.5321122405958683, "grad_norm": 0.5868155682126078, "learning_rate": 6.83936765282919e-07, "loss": 0.2626, "step": 32706 }, { "epoch": 1.5321590855857967, "grad_norm": 0.6304833925242576, "learning_rate": 6.838064343085501e-07, "loss": 0.2778, "step": 32707 }, { "epoch": 1.532205930575725, "grad_norm": 0.5877799659786542, "learning_rate": 6.836761137859185e-07, "loss": 0.2729, "step": 32708 }, { "epoch": 1.5322527755656532, "grad_norm": 0.644073457497551, "learning_rate": 6.835458037157746e-07, "loss": 0.302, "step": 32709 }, { "epoch": 1.5322996205555817, "grad_norm": 0.6224348927380833, "learning_rate": 6.834155040988686e-07, "loss": 0.2888, "step": 32710 }, { "epoch": 1.53234646554551, "grad_norm": 0.6163490477069101, "learning_rate": 6.832852149359498e-07, "loss": 0.2772, "step": 32711 }, { "epoch": 1.5323933105354381, "grad_norm": 0.5923200619476637, "learning_rate": 6.831549362277673e-07, "loss": 0.2819, "step": 32712 }, { "epoch": 1.5324401555253666, "grad_norm": 0.5943660521480547, "learning_rate": 6.830246679750716e-07, "loss": 0.2732, "step": 32713 }, { "epoch": 1.5324870005152949, "grad_norm": 0.5593946050438676, "learning_rate": 6.828944101786119e-07, "loss": 0.2662, "step": 32714 }, { "epoch": 1.532533845505223, "grad_norm": 0.5900248771598774, "learning_rate": 6.827641628391385e-07, "loss": 0.2692, "step": 32715 }, { "epoch": 1.5325806904951516, "grad_norm": 0.6211600468978652, "learning_rate": 6.826339259574006e-07, "loss": 0.2908, "step": 32716 }, { "epoch": 1.53262753548508, "grad_norm": 0.5608175732457978, "learning_rate": 6.82503699534148e-07, "loss": 0.2606, "step": 32717 }, { "epoch": 1.532674380475008, "grad_norm": 0.5890908991466225, "learning_rate": 6.823734835701301e-07, "loss": 0.2604, "step": 32718 }, { "epoch": 1.5327212254649365, "grad_norm": 0.5223096968071016, "learning_rate": 6.822432780660953e-07, "loss": 0.2529, "step": 32719 }, { "epoch": 1.532768070454865, "grad_norm": 0.5749976842896882, "learning_rate": 6.821130830227935e-07, "loss": 0.2702, "step": 32720 }, { "epoch": 1.5328149154447932, "grad_norm": 0.579717628843916, "learning_rate": 6.81982898440974e-07, "loss": 0.2498, "step": 32721 }, { "epoch": 1.5328617604347214, "grad_norm": 0.5654409427868521, "learning_rate": 6.81852724321386e-07, "loss": 0.2557, "step": 32722 }, { "epoch": 1.53290860542465, "grad_norm": 0.5502629531679396, "learning_rate": 6.817225606647793e-07, "loss": 0.2456, "step": 32723 }, { "epoch": 1.5329554504145781, "grad_norm": 0.5749912173947891, "learning_rate": 6.815924074719013e-07, "loss": 0.2776, "step": 32724 }, { "epoch": 1.5330022954045064, "grad_norm": 0.6003984995647454, "learning_rate": 6.814622647435029e-07, "loss": 0.2748, "step": 32725 }, { "epoch": 1.5330491403944349, "grad_norm": 0.6120539009447042, "learning_rate": 6.813321324803316e-07, "loss": 0.2597, "step": 32726 }, { "epoch": 1.533095985384363, "grad_norm": 0.6178434107457708, "learning_rate": 6.812020106831363e-07, "loss": 0.2775, "step": 32727 }, { "epoch": 1.5331428303742913, "grad_norm": 0.5895477844429023, "learning_rate": 6.810718993526666e-07, "loss": 0.2784, "step": 32728 }, { "epoch": 1.5331896753642198, "grad_norm": 0.6263900077917877, "learning_rate": 6.809417984896716e-07, "loss": 0.2799, "step": 32729 }, { "epoch": 1.5332365203541483, "grad_norm": 0.6021904616414292, "learning_rate": 6.808117080948986e-07, "loss": 0.2738, "step": 32730 }, { "epoch": 1.5332833653440765, "grad_norm": 0.5729006861719117, "learning_rate": 6.80681628169097e-07, "loss": 0.2688, "step": 32731 }, { "epoch": 1.5333302103340047, "grad_norm": 0.624989113375905, "learning_rate": 6.80551558713016e-07, "loss": 0.2754, "step": 32732 }, { "epoch": 1.5333770553239332, "grad_norm": 0.6956041339080756, "learning_rate": 6.804214997274028e-07, "loss": 0.2982, "step": 32733 }, { "epoch": 1.5334239003138614, "grad_norm": 0.5356137104743064, "learning_rate": 6.802914512130065e-07, "loss": 0.2365, "step": 32734 }, { "epoch": 1.5334707453037897, "grad_norm": 0.5874287299133448, "learning_rate": 6.801614131705758e-07, "loss": 0.2764, "step": 32735 }, { "epoch": 1.5335175902937181, "grad_norm": 0.6797353208891066, "learning_rate": 6.800313856008592e-07, "loss": 0.2902, "step": 32736 }, { "epoch": 1.5335644352836464, "grad_norm": 0.5894595019137017, "learning_rate": 6.799013685046038e-07, "loss": 0.2688, "step": 32737 }, { "epoch": 1.5336112802735746, "grad_norm": 0.602334632364298, "learning_rate": 6.797713618825596e-07, "loss": 0.2804, "step": 32738 }, { "epoch": 1.533658125263503, "grad_norm": 0.593005419662457, "learning_rate": 6.796413657354728e-07, "loss": 0.2615, "step": 32739 }, { "epoch": 1.5337049702534316, "grad_norm": 0.5938240868814935, "learning_rate": 6.795113800640923e-07, "loss": 0.2753, "step": 32740 }, { "epoch": 1.5337518152433596, "grad_norm": 0.5722390945144483, "learning_rate": 6.793814048691663e-07, "loss": 0.2596, "step": 32741 }, { "epoch": 1.533798660233288, "grad_norm": 0.5696515846338825, "learning_rate": 6.792514401514436e-07, "loss": 0.2531, "step": 32742 }, { "epoch": 1.5338455052232165, "grad_norm": 0.5720069450808206, "learning_rate": 6.791214859116705e-07, "loss": 0.2524, "step": 32743 }, { "epoch": 1.5338923502131447, "grad_norm": 0.5786696801696463, "learning_rate": 6.789915421505952e-07, "loss": 0.2616, "step": 32744 }, { "epoch": 1.533939195203073, "grad_norm": 0.6202672810160473, "learning_rate": 6.78861608868967e-07, "loss": 0.2641, "step": 32745 }, { "epoch": 1.5339860401930014, "grad_norm": 0.6041414467982045, "learning_rate": 6.787316860675316e-07, "loss": 0.2755, "step": 32746 }, { "epoch": 1.5340328851829297, "grad_norm": 0.5939113320781899, "learning_rate": 6.786017737470377e-07, "loss": 0.2709, "step": 32747 }, { "epoch": 1.534079730172858, "grad_norm": 0.5847543346654532, "learning_rate": 6.784718719082337e-07, "loss": 0.2736, "step": 32748 }, { "epoch": 1.5341265751627864, "grad_norm": 0.5789846585785015, "learning_rate": 6.783419805518651e-07, "loss": 0.2587, "step": 32749 }, { "epoch": 1.5341734201527146, "grad_norm": 0.5838821046217526, "learning_rate": 6.782120996786809e-07, "loss": 0.2652, "step": 32750 }, { "epoch": 1.5342202651426429, "grad_norm": 0.5516800284512638, "learning_rate": 6.780822292894279e-07, "loss": 0.2567, "step": 32751 }, { "epoch": 1.5342671101325713, "grad_norm": 0.5696900903282668, "learning_rate": 6.779523693848547e-07, "loss": 0.263, "step": 32752 }, { "epoch": 1.5343139551224998, "grad_norm": 0.60378457990448, "learning_rate": 6.778225199657068e-07, "loss": 0.2678, "step": 32753 }, { "epoch": 1.5343608001124278, "grad_norm": 0.6640594418301755, "learning_rate": 6.776926810327331e-07, "loss": 0.2929, "step": 32754 }, { "epoch": 1.5344076451023563, "grad_norm": 0.6211458428445582, "learning_rate": 6.775628525866793e-07, "loss": 0.2911, "step": 32755 }, { "epoch": 1.5344544900922847, "grad_norm": 0.6284975238224422, "learning_rate": 6.774330346282931e-07, "loss": 0.2748, "step": 32756 }, { "epoch": 1.534501335082213, "grad_norm": 0.5926099299964457, "learning_rate": 6.773032271583219e-07, "loss": 0.2672, "step": 32757 }, { "epoch": 1.5345481800721412, "grad_norm": 0.5533269896549334, "learning_rate": 6.771734301775124e-07, "loss": 0.2562, "step": 32758 }, { "epoch": 1.5345950250620697, "grad_norm": 0.5672786276780747, "learning_rate": 6.770436436866124e-07, "loss": 0.2629, "step": 32759 }, { "epoch": 1.534641870051998, "grad_norm": 0.5917804935089668, "learning_rate": 6.769138676863682e-07, "loss": 0.2511, "step": 32760 }, { "epoch": 1.5346887150419262, "grad_norm": 0.5604784900284246, "learning_rate": 6.767841021775254e-07, "loss": 0.2588, "step": 32761 }, { "epoch": 1.5347355600318546, "grad_norm": 0.6093153460185421, "learning_rate": 6.76654347160832e-07, "loss": 0.2771, "step": 32762 }, { "epoch": 1.5347824050217829, "grad_norm": 0.6308094943169704, "learning_rate": 6.765246026370345e-07, "loss": 0.2939, "step": 32763 }, { "epoch": 1.534829250011711, "grad_norm": 0.592054909373063, "learning_rate": 6.763948686068797e-07, "loss": 0.2617, "step": 32764 }, { "epoch": 1.5348760950016396, "grad_norm": 0.5888260812416102, "learning_rate": 6.762651450711141e-07, "loss": 0.2605, "step": 32765 }, { "epoch": 1.534922939991568, "grad_norm": 0.5886344166320657, "learning_rate": 6.76135432030485e-07, "loss": 0.2766, "step": 32766 }, { "epoch": 1.5349697849814963, "grad_norm": 0.5626680727072333, "learning_rate": 6.76005729485738e-07, "loss": 0.2632, "step": 32767 }, { "epoch": 1.5350166299714245, "grad_norm": 0.6341381661907913, "learning_rate": 6.758760374376189e-07, "loss": 0.2975, "step": 32768 }, { "epoch": 1.535063474961353, "grad_norm": 0.60351811201678, "learning_rate": 6.757463558868746e-07, "loss": 0.2718, "step": 32769 }, { "epoch": 1.5351103199512812, "grad_norm": 0.6088839481441737, "learning_rate": 6.756166848342515e-07, "loss": 0.2598, "step": 32770 }, { "epoch": 1.5351571649412095, "grad_norm": 0.5981578849525012, "learning_rate": 6.754870242804959e-07, "loss": 0.2716, "step": 32771 }, { "epoch": 1.535204009931138, "grad_norm": 0.5788243343376746, "learning_rate": 6.753573742263547e-07, "loss": 0.2605, "step": 32772 }, { "epoch": 1.5352508549210662, "grad_norm": 0.6061129168459656, "learning_rate": 6.752277346725725e-07, "loss": 0.267, "step": 32773 }, { "epoch": 1.5352976999109944, "grad_norm": 0.6498422053246261, "learning_rate": 6.750981056198966e-07, "loss": 0.2842, "step": 32774 }, { "epoch": 1.5353445449009229, "grad_norm": 0.5759360884595951, "learning_rate": 6.749684870690718e-07, "loss": 0.2682, "step": 32775 }, { "epoch": 1.5353913898908513, "grad_norm": 0.5877643711150641, "learning_rate": 6.748388790208446e-07, "loss": 0.2634, "step": 32776 }, { "epoch": 1.5354382348807794, "grad_norm": 0.5776360455073769, "learning_rate": 6.747092814759609e-07, "loss": 0.2636, "step": 32777 }, { "epoch": 1.5354850798707078, "grad_norm": 0.5725554661386528, "learning_rate": 6.745796944351676e-07, "loss": 0.2537, "step": 32778 }, { "epoch": 1.5355319248606363, "grad_norm": 0.6229055546664493, "learning_rate": 6.744501178992082e-07, "loss": 0.2714, "step": 32779 }, { "epoch": 1.5355787698505645, "grad_norm": 0.5626356103003719, "learning_rate": 6.743205518688298e-07, "loss": 0.2609, "step": 32780 }, { "epoch": 1.5356256148404928, "grad_norm": 0.6522347179831957, "learning_rate": 6.741909963447782e-07, "loss": 0.3042, "step": 32781 }, { "epoch": 1.5356724598304212, "grad_norm": 0.6071200194258187, "learning_rate": 6.740614513277979e-07, "loss": 0.2698, "step": 32782 }, { "epoch": 1.5357193048203495, "grad_norm": 0.5953260935007036, "learning_rate": 6.739319168186351e-07, "loss": 0.2681, "step": 32783 }, { "epoch": 1.5357661498102777, "grad_norm": 0.5537305296034585, "learning_rate": 6.738023928180359e-07, "loss": 0.2579, "step": 32784 }, { "epoch": 1.5358129948002062, "grad_norm": 0.5447631379834474, "learning_rate": 6.736728793267441e-07, "loss": 0.2515, "step": 32785 }, { "epoch": 1.5358598397901344, "grad_norm": 0.6356795703223935, "learning_rate": 6.73543376345506e-07, "loss": 0.2791, "step": 32786 }, { "epoch": 1.5359066847800626, "grad_norm": 0.5591012014202336, "learning_rate": 6.734138838750672e-07, "loss": 0.2595, "step": 32787 }, { "epoch": 1.535953529769991, "grad_norm": 0.5691625612253971, "learning_rate": 6.732844019161719e-07, "loss": 0.2606, "step": 32788 }, { "epoch": 1.5360003747599196, "grad_norm": 0.6123562499638445, "learning_rate": 6.731549304695656e-07, "loss": 0.2645, "step": 32789 }, { "epoch": 1.5360472197498476, "grad_norm": 0.5765100470606078, "learning_rate": 6.730254695359944e-07, "loss": 0.2717, "step": 32790 }, { "epoch": 1.536094064739776, "grad_norm": 0.5859753734347743, "learning_rate": 6.728960191162015e-07, "loss": 0.2595, "step": 32791 }, { "epoch": 1.5361409097297045, "grad_norm": 0.5676921914915636, "learning_rate": 6.727665792109331e-07, "loss": 0.2568, "step": 32792 }, { "epoch": 1.5361877547196328, "grad_norm": 0.5652571586951164, "learning_rate": 6.726371498209333e-07, "loss": 0.2577, "step": 32793 }, { "epoch": 1.536234599709561, "grad_norm": 0.6316194517146225, "learning_rate": 6.725077309469485e-07, "loss": 0.2767, "step": 32794 }, { "epoch": 1.5362814446994895, "grad_norm": 0.6332126382827613, "learning_rate": 6.723783225897215e-07, "loss": 0.2859, "step": 32795 }, { "epoch": 1.5363282896894177, "grad_norm": 0.6169526795280235, "learning_rate": 6.722489247499989e-07, "loss": 0.2882, "step": 32796 }, { "epoch": 1.536375134679346, "grad_norm": 0.6088385832917058, "learning_rate": 6.721195374285233e-07, "loss": 0.2706, "step": 32797 }, { "epoch": 1.5364219796692744, "grad_norm": 0.5827948264555528, "learning_rate": 6.719901606260404e-07, "loss": 0.2659, "step": 32798 }, { "epoch": 1.5364688246592026, "grad_norm": 0.630144392592937, "learning_rate": 6.718607943432948e-07, "loss": 0.2709, "step": 32799 }, { "epoch": 1.5365156696491309, "grad_norm": 0.6279149911487268, "learning_rate": 6.717314385810306e-07, "loss": 0.2896, "step": 32800 }, { "epoch": 1.5365625146390594, "grad_norm": 0.5728268195386734, "learning_rate": 6.716020933399933e-07, "loss": 0.266, "step": 32801 }, { "epoch": 1.5366093596289878, "grad_norm": 0.6054602399576668, "learning_rate": 6.714727586209258e-07, "loss": 0.2823, "step": 32802 }, { "epoch": 1.536656204618916, "grad_norm": 0.5956432568463736, "learning_rate": 6.713434344245737e-07, "loss": 0.2788, "step": 32803 }, { "epoch": 1.5367030496088443, "grad_norm": 0.5825580381576749, "learning_rate": 6.712141207516798e-07, "loss": 0.272, "step": 32804 }, { "epoch": 1.5367498945987728, "grad_norm": 0.6077437860406795, "learning_rate": 6.710848176029888e-07, "loss": 0.2786, "step": 32805 }, { "epoch": 1.536796739588701, "grad_norm": 0.6111437190789247, "learning_rate": 6.709555249792452e-07, "loss": 0.2815, "step": 32806 }, { "epoch": 1.5368435845786292, "grad_norm": 0.6071209019935251, "learning_rate": 6.708262428811926e-07, "loss": 0.2803, "step": 32807 }, { "epoch": 1.5368904295685577, "grad_norm": 0.5985676837330638, "learning_rate": 6.706969713095763e-07, "loss": 0.2567, "step": 32808 }, { "epoch": 1.536937274558486, "grad_norm": 0.5828736798772212, "learning_rate": 6.70567710265139e-07, "loss": 0.2576, "step": 32809 }, { "epoch": 1.5369841195484142, "grad_norm": 0.5622494793358569, "learning_rate": 6.704384597486238e-07, "loss": 0.2554, "step": 32810 }, { "epoch": 1.5370309645383426, "grad_norm": 0.6347018506252936, "learning_rate": 6.703092197607755e-07, "loss": 0.2943, "step": 32811 }, { "epoch": 1.537077809528271, "grad_norm": 0.6280673687629755, "learning_rate": 6.701799903023379e-07, "loss": 0.2958, "step": 32812 }, { "epoch": 1.5371246545181991, "grad_norm": 0.6134160116938272, "learning_rate": 6.700507713740545e-07, "loss": 0.2665, "step": 32813 }, { "epoch": 1.5371714995081276, "grad_norm": 0.5870856107964725, "learning_rate": 6.699215629766689e-07, "loss": 0.2717, "step": 32814 }, { "epoch": 1.537218344498056, "grad_norm": 0.6477617235750207, "learning_rate": 6.697923651109256e-07, "loss": 0.2967, "step": 32815 }, { "epoch": 1.5372651894879843, "grad_norm": 0.6243982874211496, "learning_rate": 6.69663177777567e-07, "loss": 0.2849, "step": 32816 }, { "epoch": 1.5373120344779125, "grad_norm": 0.5920586249141014, "learning_rate": 6.695340009773363e-07, "loss": 0.2698, "step": 32817 }, { "epoch": 1.537358879467841, "grad_norm": 0.5857940331521339, "learning_rate": 6.694048347109772e-07, "loss": 0.2698, "step": 32818 }, { "epoch": 1.5374057244577692, "grad_norm": 0.6728261720936888, "learning_rate": 6.692756789792332e-07, "loss": 0.2846, "step": 32819 }, { "epoch": 1.5374525694476975, "grad_norm": 0.5947647704312968, "learning_rate": 6.691465337828477e-07, "loss": 0.2637, "step": 32820 }, { "epoch": 1.537499414437626, "grad_norm": 0.6108213490705952, "learning_rate": 6.690173991225641e-07, "loss": 0.2753, "step": 32821 }, { "epoch": 1.5375462594275542, "grad_norm": 0.5696909969141443, "learning_rate": 6.688882749991246e-07, "loss": 0.2589, "step": 32822 }, { "epoch": 1.5375931044174824, "grad_norm": 0.6072510933190259, "learning_rate": 6.687591614132738e-07, "loss": 0.2859, "step": 32823 }, { "epoch": 1.5376399494074109, "grad_norm": 0.6335081376763556, "learning_rate": 6.686300583657527e-07, "loss": 0.2724, "step": 32824 }, { "epoch": 1.5376867943973394, "grad_norm": 0.556552852666859, "learning_rate": 6.685009658573055e-07, "loss": 0.2506, "step": 32825 }, { "epoch": 1.5377336393872674, "grad_norm": 0.6162897639947972, "learning_rate": 6.683718838886749e-07, "loss": 0.2799, "step": 32826 }, { "epoch": 1.5377804843771958, "grad_norm": 0.5656808622325099, "learning_rate": 6.682428124606044e-07, "loss": 0.2639, "step": 32827 }, { "epoch": 1.5378273293671243, "grad_norm": 0.6470025787980691, "learning_rate": 6.681137515738356e-07, "loss": 0.296, "step": 32828 }, { "epoch": 1.5378741743570525, "grad_norm": 0.5997394467566304, "learning_rate": 6.679847012291116e-07, "loss": 0.2713, "step": 32829 }, { "epoch": 1.5379210193469808, "grad_norm": 0.6399250662120191, "learning_rate": 6.678556614271759e-07, "loss": 0.2905, "step": 32830 }, { "epoch": 1.5379678643369092, "grad_norm": 0.617667980968896, "learning_rate": 6.677266321687694e-07, "loss": 0.2731, "step": 32831 }, { "epoch": 1.5380147093268375, "grad_norm": 0.6393889143760482, "learning_rate": 6.675976134546358e-07, "loss": 0.2869, "step": 32832 }, { "epoch": 1.5380615543167657, "grad_norm": 0.5858136568179225, "learning_rate": 6.674686052855184e-07, "loss": 0.2884, "step": 32833 }, { "epoch": 1.5381083993066942, "grad_norm": 0.566681869441911, "learning_rate": 6.673396076621575e-07, "loss": 0.2668, "step": 32834 }, { "epoch": 1.5381552442966224, "grad_norm": 0.5881545346413599, "learning_rate": 6.672106205852965e-07, "loss": 0.2752, "step": 32835 }, { "epoch": 1.5382020892865507, "grad_norm": 0.5668427732823627, "learning_rate": 6.670816440556788e-07, "loss": 0.2515, "step": 32836 }, { "epoch": 1.5382489342764791, "grad_norm": 0.5673265156470502, "learning_rate": 6.669526780740445e-07, "loss": 0.2632, "step": 32837 }, { "epoch": 1.5382957792664076, "grad_norm": 0.6138177810022828, "learning_rate": 6.66823722641137e-07, "loss": 0.2717, "step": 32838 }, { "epoch": 1.5383426242563358, "grad_norm": 0.6082939229114794, "learning_rate": 6.66694777757699e-07, "loss": 0.2744, "step": 32839 }, { "epoch": 1.538389469246264, "grad_norm": 0.5953189773435018, "learning_rate": 6.66565843424471e-07, "loss": 0.2704, "step": 32840 }, { "epoch": 1.5384363142361925, "grad_norm": 0.6093800476794722, "learning_rate": 6.664369196421955e-07, "loss": 0.2771, "step": 32841 }, { "epoch": 1.5384831592261208, "grad_norm": 0.5944441076591782, "learning_rate": 6.663080064116148e-07, "loss": 0.2929, "step": 32842 }, { "epoch": 1.538530004216049, "grad_norm": 0.5482030180430896, "learning_rate": 6.661791037334716e-07, "loss": 0.2582, "step": 32843 }, { "epoch": 1.5385768492059775, "grad_norm": 0.6043024845002422, "learning_rate": 6.660502116085057e-07, "loss": 0.2802, "step": 32844 }, { "epoch": 1.5386236941959057, "grad_norm": 0.5770609546972227, "learning_rate": 6.659213300374609e-07, "loss": 0.2715, "step": 32845 }, { "epoch": 1.538670539185834, "grad_norm": 0.6159627507186012, "learning_rate": 6.65792459021077e-07, "loss": 0.2813, "step": 32846 }, { "epoch": 1.5387173841757624, "grad_norm": 0.6345306213551977, "learning_rate": 6.656635985600965e-07, "loss": 0.2821, "step": 32847 }, { "epoch": 1.5387642291656909, "grad_norm": 0.6228677367351566, "learning_rate": 6.655347486552611e-07, "loss": 0.2901, "step": 32848 }, { "epoch": 1.538811074155619, "grad_norm": 0.5854388307935644, "learning_rate": 6.654059093073118e-07, "loss": 0.2704, "step": 32849 }, { "epoch": 1.5388579191455474, "grad_norm": 0.5877849844303609, "learning_rate": 6.652770805169914e-07, "loss": 0.2665, "step": 32850 }, { "epoch": 1.5389047641354758, "grad_norm": 0.6505735149145732, "learning_rate": 6.6514826228504e-07, "loss": 0.2973, "step": 32851 }, { "epoch": 1.538951609125404, "grad_norm": 0.5804899696758602, "learning_rate": 6.650194546121988e-07, "loss": 0.2757, "step": 32852 }, { "epoch": 1.5389984541153323, "grad_norm": 0.5560809222229014, "learning_rate": 6.648906574992092e-07, "loss": 0.2531, "step": 32853 }, { "epoch": 1.5390452991052608, "grad_norm": 0.5949054494120221, "learning_rate": 6.647618709468126e-07, "loss": 0.2742, "step": 32854 }, { "epoch": 1.539092144095189, "grad_norm": 0.61105377350265, "learning_rate": 6.646330949557503e-07, "loss": 0.2694, "step": 32855 }, { "epoch": 1.5391389890851173, "grad_norm": 0.58403257258153, "learning_rate": 6.645043295267631e-07, "loss": 0.2704, "step": 32856 }, { "epoch": 1.5391858340750457, "grad_norm": 0.5822415862799387, "learning_rate": 6.643755746605929e-07, "loss": 0.2572, "step": 32857 }, { "epoch": 1.539232679064974, "grad_norm": 0.5943355657249083, "learning_rate": 6.642468303579799e-07, "loss": 0.2613, "step": 32858 }, { "epoch": 1.5392795240549022, "grad_norm": 0.5847028732679583, "learning_rate": 6.641180966196644e-07, "loss": 0.2562, "step": 32859 }, { "epoch": 1.5393263690448307, "grad_norm": 0.5985762036272165, "learning_rate": 6.639893734463879e-07, "loss": 0.2672, "step": 32860 }, { "epoch": 1.5393732140347591, "grad_norm": 0.5667946275696731, "learning_rate": 6.638606608388909e-07, "loss": 0.2645, "step": 32861 }, { "epoch": 1.5394200590246871, "grad_norm": 0.6326322829917425, "learning_rate": 6.637319587979141e-07, "loss": 0.3015, "step": 32862 }, { "epoch": 1.5394669040146156, "grad_norm": 0.6099057222103913, "learning_rate": 6.636032673241993e-07, "loss": 0.2711, "step": 32863 }, { "epoch": 1.539513749004544, "grad_norm": 0.5522806016896941, "learning_rate": 6.634745864184855e-07, "loss": 0.2627, "step": 32864 }, { "epoch": 1.5395605939944723, "grad_norm": 0.6335750720023782, "learning_rate": 6.633459160815145e-07, "loss": 0.2926, "step": 32865 }, { "epoch": 1.5396074389844006, "grad_norm": 0.6298042797471529, "learning_rate": 6.632172563140255e-07, "loss": 0.2841, "step": 32866 }, { "epoch": 1.539654283974329, "grad_norm": 0.5774862460031619, "learning_rate": 6.630886071167594e-07, "loss": 0.2591, "step": 32867 }, { "epoch": 1.5397011289642573, "grad_norm": 0.5886076470588824, "learning_rate": 6.629599684904566e-07, "loss": 0.2739, "step": 32868 }, { "epoch": 1.5397479739541855, "grad_norm": 0.6316243204879518, "learning_rate": 6.628313404358583e-07, "loss": 0.2864, "step": 32869 }, { "epoch": 1.539794818944114, "grad_norm": 0.6119337484454997, "learning_rate": 6.627027229537034e-07, "loss": 0.2648, "step": 32870 }, { "epoch": 1.5398416639340422, "grad_norm": 0.5305370706652114, "learning_rate": 6.625741160447322e-07, "loss": 0.2639, "step": 32871 }, { "epoch": 1.5398885089239704, "grad_norm": 0.6000423300811482, "learning_rate": 6.62445519709686e-07, "loss": 0.2667, "step": 32872 }, { "epoch": 1.539935353913899, "grad_norm": 0.6072417644208162, "learning_rate": 6.623169339493033e-07, "loss": 0.2701, "step": 32873 }, { "epoch": 1.5399821989038274, "grad_norm": 0.5989043339452207, "learning_rate": 6.621883587643246e-07, "loss": 0.2757, "step": 32874 }, { "epoch": 1.5400290438937556, "grad_norm": 0.5441697451515334, "learning_rate": 6.620597941554899e-07, "loss": 0.2754, "step": 32875 }, { "epoch": 1.5400758888836839, "grad_norm": 0.5810448115267397, "learning_rate": 6.619312401235401e-07, "loss": 0.2711, "step": 32876 }, { "epoch": 1.5401227338736123, "grad_norm": 0.6162210178507951, "learning_rate": 6.618026966692132e-07, "loss": 0.2914, "step": 32877 }, { "epoch": 1.5401695788635406, "grad_norm": 0.5591510976540766, "learning_rate": 6.616741637932505e-07, "loss": 0.2582, "step": 32878 }, { "epoch": 1.5402164238534688, "grad_norm": 0.5977927805734444, "learning_rate": 6.615456414963903e-07, "loss": 0.2841, "step": 32879 }, { "epoch": 1.5402632688433973, "grad_norm": 0.6332899563097978, "learning_rate": 6.614171297793728e-07, "loss": 0.2672, "step": 32880 }, { "epoch": 1.5403101138333255, "grad_norm": 0.6378141216790478, "learning_rate": 6.612886286429377e-07, "loss": 0.276, "step": 32881 }, { "epoch": 1.5403569588232537, "grad_norm": 0.6274676706241934, "learning_rate": 6.611601380878249e-07, "loss": 0.2782, "step": 32882 }, { "epoch": 1.5404038038131822, "grad_norm": 0.5859855750234936, "learning_rate": 6.610316581147727e-07, "loss": 0.2659, "step": 32883 }, { "epoch": 1.5404506488031107, "grad_norm": 0.6120948739975872, "learning_rate": 6.609031887245213e-07, "loss": 0.2765, "step": 32884 }, { "epoch": 1.5404974937930387, "grad_norm": 0.5938781993186025, "learning_rate": 6.607747299178102e-07, "loss": 0.273, "step": 32885 }, { "epoch": 1.5405443387829671, "grad_norm": 0.5821793633284437, "learning_rate": 6.606462816953777e-07, "loss": 0.2603, "step": 32886 }, { "epoch": 1.5405911837728956, "grad_norm": 0.5772471516807296, "learning_rate": 6.605178440579635e-07, "loss": 0.2585, "step": 32887 }, { "epoch": 1.5406380287628239, "grad_norm": 0.5790903941826532, "learning_rate": 6.603894170063078e-07, "loss": 0.2741, "step": 32888 }, { "epoch": 1.540684873752752, "grad_norm": 0.583107348229206, "learning_rate": 6.602610005411475e-07, "loss": 0.2621, "step": 32889 }, { "epoch": 1.5407317187426806, "grad_norm": 0.6397844500676638, "learning_rate": 6.601325946632228e-07, "loss": 0.2831, "step": 32890 }, { "epoch": 1.5407785637326088, "grad_norm": 0.607629361893147, "learning_rate": 6.600041993732725e-07, "loss": 0.268, "step": 32891 }, { "epoch": 1.540825408722537, "grad_norm": 0.5737605287816163, "learning_rate": 6.598758146720366e-07, "loss": 0.2576, "step": 32892 }, { "epoch": 1.5408722537124655, "grad_norm": 0.5689838865086255, "learning_rate": 6.597474405602522e-07, "loss": 0.27, "step": 32893 }, { "epoch": 1.5409190987023937, "grad_norm": 0.5520302879016833, "learning_rate": 6.596190770386593e-07, "loss": 0.2697, "step": 32894 }, { "epoch": 1.540965943692322, "grad_norm": 0.6287884078774685, "learning_rate": 6.594907241079951e-07, "loss": 0.2858, "step": 32895 }, { "epoch": 1.5410127886822504, "grad_norm": 0.5582035777190322, "learning_rate": 6.593623817689995e-07, "loss": 0.2662, "step": 32896 }, { "epoch": 1.541059633672179, "grad_norm": 0.584515134012431, "learning_rate": 6.592340500224106e-07, "loss": 0.2692, "step": 32897 }, { "epoch": 1.541106478662107, "grad_norm": 0.5874216917230697, "learning_rate": 6.591057288689673e-07, "loss": 0.2677, "step": 32898 }, { "epoch": 1.5411533236520354, "grad_norm": 0.6221009158885863, "learning_rate": 6.589774183094083e-07, "loss": 0.2823, "step": 32899 }, { "epoch": 1.5412001686419639, "grad_norm": 0.5716072733364181, "learning_rate": 6.588491183444717e-07, "loss": 0.2536, "step": 32900 }, { "epoch": 1.541247013631892, "grad_norm": 0.5941972693516534, "learning_rate": 6.58720828974895e-07, "loss": 0.2913, "step": 32901 }, { "epoch": 1.5412938586218203, "grad_norm": 0.5664799578244899, "learning_rate": 6.58592550201417e-07, "loss": 0.2666, "step": 32902 }, { "epoch": 1.5413407036117488, "grad_norm": 0.5752163084966532, "learning_rate": 6.584642820247761e-07, "loss": 0.2625, "step": 32903 }, { "epoch": 1.541387548601677, "grad_norm": 0.606884354462278, "learning_rate": 6.583360244457105e-07, "loss": 0.2675, "step": 32904 }, { "epoch": 1.5414343935916053, "grad_norm": 0.5981261094338371, "learning_rate": 6.582077774649578e-07, "loss": 0.2871, "step": 32905 }, { "epoch": 1.5414812385815337, "grad_norm": 0.5980686552883631, "learning_rate": 6.580795410832577e-07, "loss": 0.2699, "step": 32906 }, { "epoch": 1.541528083571462, "grad_norm": 0.6376070944984532, "learning_rate": 6.579513153013467e-07, "loss": 0.266, "step": 32907 }, { "epoch": 1.5415749285613902, "grad_norm": 0.5762121822459247, "learning_rate": 6.57823100119962e-07, "loss": 0.2679, "step": 32908 }, { "epoch": 1.5416217735513187, "grad_norm": 0.5853814163317608, "learning_rate": 6.576948955398423e-07, "loss": 0.2632, "step": 32909 }, { "epoch": 1.5416686185412471, "grad_norm": 0.5537378357945402, "learning_rate": 6.575667015617257e-07, "loss": 0.2582, "step": 32910 }, { "epoch": 1.5417154635311754, "grad_norm": 0.5881052509491496, "learning_rate": 6.574385181863496e-07, "loss": 0.2705, "step": 32911 }, { "epoch": 1.5417623085211036, "grad_norm": 0.5836268157909119, "learning_rate": 6.573103454144525e-07, "loss": 0.2621, "step": 32912 }, { "epoch": 1.541809153511032, "grad_norm": 0.6203570053849811, "learning_rate": 6.571821832467703e-07, "loss": 0.2669, "step": 32913 }, { "epoch": 1.5418559985009603, "grad_norm": 0.651841506684642, "learning_rate": 6.57054031684042e-07, "loss": 0.2902, "step": 32914 }, { "epoch": 1.5419028434908886, "grad_norm": 0.6042899635346621, "learning_rate": 6.569258907270043e-07, "loss": 0.2593, "step": 32915 }, { "epoch": 1.541949688480817, "grad_norm": 0.5633422863498322, "learning_rate": 6.567977603763948e-07, "loss": 0.2617, "step": 32916 }, { "epoch": 1.5419965334707453, "grad_norm": 0.5756201645679431, "learning_rate": 6.566696406329506e-07, "loss": 0.263, "step": 32917 }, { "epoch": 1.5420433784606735, "grad_norm": 0.6123210682426261, "learning_rate": 6.565415314974102e-07, "loss": 0.2904, "step": 32918 }, { "epoch": 1.542090223450602, "grad_norm": 0.6024634021783505, "learning_rate": 6.564134329705091e-07, "loss": 0.2836, "step": 32919 }, { "epoch": 1.5421370684405304, "grad_norm": 0.6399308590691659, "learning_rate": 6.562853450529855e-07, "loss": 0.2948, "step": 32920 }, { "epoch": 1.5421839134304585, "grad_norm": 0.6185027921528871, "learning_rate": 6.561572677455771e-07, "loss": 0.2832, "step": 32921 }, { "epoch": 1.542230758420387, "grad_norm": 0.6135714483458496, "learning_rate": 6.560292010490194e-07, "loss": 0.2644, "step": 32922 }, { "epoch": 1.5422776034103154, "grad_norm": 0.5849725248787956, "learning_rate": 6.559011449640501e-07, "loss": 0.2676, "step": 32923 }, { "epoch": 1.5423244484002436, "grad_norm": 0.6273185360957055, "learning_rate": 6.557730994914069e-07, "loss": 0.2717, "step": 32924 }, { "epoch": 1.5423712933901719, "grad_norm": 0.664721872251476, "learning_rate": 6.556450646318255e-07, "loss": 0.2867, "step": 32925 }, { "epoch": 1.5424181383801003, "grad_norm": 0.6292849585176117, "learning_rate": 6.555170403860431e-07, "loss": 0.278, "step": 32926 }, { "epoch": 1.5424649833700286, "grad_norm": 0.6552702216997393, "learning_rate": 6.553890267547971e-07, "loss": 0.2721, "step": 32927 }, { "epoch": 1.5425118283599568, "grad_norm": 0.6387292020413735, "learning_rate": 6.55261023738823e-07, "loss": 0.2603, "step": 32928 }, { "epoch": 1.5425586733498853, "grad_norm": 0.5979988604316545, "learning_rate": 6.551330313388582e-07, "loss": 0.2784, "step": 32929 }, { "epoch": 1.5426055183398135, "grad_norm": 0.6504068228930856, "learning_rate": 6.550050495556395e-07, "loss": 0.2918, "step": 32930 }, { "epoch": 1.5426523633297418, "grad_norm": 0.5894739537933728, "learning_rate": 6.548770783899025e-07, "loss": 0.2762, "step": 32931 }, { "epoch": 1.5426992083196702, "grad_norm": 0.6134084048847951, "learning_rate": 6.547491178423843e-07, "loss": 0.2766, "step": 32932 }, { "epoch": 1.5427460533095987, "grad_norm": 0.5734075193798451, "learning_rate": 6.546211679138209e-07, "loss": 0.2627, "step": 32933 }, { "epoch": 1.5427928982995267, "grad_norm": 0.5745012937558253, "learning_rate": 6.544932286049496e-07, "loss": 0.2685, "step": 32934 }, { "epoch": 1.5428397432894552, "grad_norm": 0.5676061980978565, "learning_rate": 6.543652999165054e-07, "loss": 0.2637, "step": 32935 }, { "epoch": 1.5428865882793836, "grad_norm": 0.6056523490622686, "learning_rate": 6.542373818492256e-07, "loss": 0.2776, "step": 32936 }, { "epoch": 1.5429334332693119, "grad_norm": 0.6039732537479773, "learning_rate": 6.541094744038451e-07, "loss": 0.2636, "step": 32937 }, { "epoch": 1.5429802782592401, "grad_norm": 0.6257240997914529, "learning_rate": 6.539815775811007e-07, "loss": 0.2898, "step": 32938 }, { "epoch": 1.5430271232491686, "grad_norm": 0.6243805956622822, "learning_rate": 6.538536913817281e-07, "loss": 0.2793, "step": 32939 }, { "epoch": 1.5430739682390968, "grad_norm": 0.5449203378804838, "learning_rate": 6.537258158064636e-07, "loss": 0.2453, "step": 32940 }, { "epoch": 1.543120813229025, "grad_norm": 0.6186720082492756, "learning_rate": 6.535979508560436e-07, "loss": 0.2845, "step": 32941 }, { "epoch": 1.5431676582189535, "grad_norm": 0.6220056028457934, "learning_rate": 6.534700965312026e-07, "loss": 0.2826, "step": 32942 }, { "epoch": 1.5432145032088818, "grad_norm": 0.6262640338169093, "learning_rate": 6.533422528326783e-07, "loss": 0.2743, "step": 32943 }, { "epoch": 1.54326134819881, "grad_norm": 0.5522941754593482, "learning_rate": 6.532144197612039e-07, "loss": 0.2583, "step": 32944 }, { "epoch": 1.5433081931887385, "grad_norm": 0.627883211592979, "learning_rate": 6.530865973175166e-07, "loss": 0.2686, "step": 32945 }, { "epoch": 1.543355038178667, "grad_norm": 0.5621330785549021, "learning_rate": 6.529587855023517e-07, "loss": 0.2688, "step": 32946 }, { "epoch": 1.5434018831685952, "grad_norm": 0.6192734832976377, "learning_rate": 6.528309843164446e-07, "loss": 0.2719, "step": 32947 }, { "epoch": 1.5434487281585234, "grad_norm": 0.5841038474494797, "learning_rate": 6.527031937605319e-07, "loss": 0.2609, "step": 32948 }, { "epoch": 1.5434955731484519, "grad_norm": 0.5834551717463152, "learning_rate": 6.525754138353477e-07, "loss": 0.2719, "step": 32949 }, { "epoch": 1.5435424181383801, "grad_norm": 0.6125384052157476, "learning_rate": 6.524476445416272e-07, "loss": 0.2746, "step": 32950 }, { "epoch": 1.5435892631283084, "grad_norm": 0.568460050776728, "learning_rate": 6.523198858801061e-07, "loss": 0.2549, "step": 32951 }, { "epoch": 1.5436361081182368, "grad_norm": 0.5874447756779683, "learning_rate": 6.521921378515195e-07, "loss": 0.265, "step": 32952 }, { "epoch": 1.543682953108165, "grad_norm": 0.5622133809514658, "learning_rate": 6.52064400456603e-07, "loss": 0.2696, "step": 32953 }, { "epoch": 1.5437297980980933, "grad_norm": 0.5622824729158353, "learning_rate": 6.51936673696091e-07, "loss": 0.265, "step": 32954 }, { "epoch": 1.5437766430880218, "grad_norm": 0.5614989030559784, "learning_rate": 6.518089575707201e-07, "loss": 0.2548, "step": 32955 }, { "epoch": 1.5438234880779502, "grad_norm": 0.5610228850641642, "learning_rate": 6.516812520812241e-07, "loss": 0.2702, "step": 32956 }, { "epoch": 1.5438703330678782, "grad_norm": 0.5786765277366358, "learning_rate": 6.515535572283368e-07, "loss": 0.2748, "step": 32957 }, { "epoch": 1.5439171780578067, "grad_norm": 0.6153528763312456, "learning_rate": 6.514258730127945e-07, "loss": 0.2499, "step": 32958 }, { "epoch": 1.5439640230477352, "grad_norm": 0.5997958643291388, "learning_rate": 6.512981994353315e-07, "loss": 0.2743, "step": 32959 }, { "epoch": 1.5440108680376634, "grad_norm": 0.6064583113940948, "learning_rate": 6.51170536496683e-07, "loss": 0.2839, "step": 32960 }, { "epoch": 1.5440577130275916, "grad_norm": 0.5463439769359961, "learning_rate": 6.510428841975838e-07, "loss": 0.259, "step": 32961 }, { "epoch": 1.5441045580175201, "grad_norm": 0.5462698249821482, "learning_rate": 6.509152425387677e-07, "loss": 0.2403, "step": 32962 }, { "epoch": 1.5441514030074484, "grad_norm": 0.5848120223733329, "learning_rate": 6.507876115209702e-07, "loss": 0.2655, "step": 32963 }, { "epoch": 1.5441982479973766, "grad_norm": 0.6046349326503083, "learning_rate": 6.506599911449244e-07, "loss": 0.2671, "step": 32964 }, { "epoch": 1.544245092987305, "grad_norm": 0.5703919261181859, "learning_rate": 6.505323814113654e-07, "loss": 0.2718, "step": 32965 }, { "epoch": 1.5442919379772333, "grad_norm": 0.6458689320433502, "learning_rate": 6.50404782321028e-07, "loss": 0.2724, "step": 32966 }, { "epoch": 1.5443387829671615, "grad_norm": 0.6249110613606021, "learning_rate": 6.502771938746469e-07, "loss": 0.2699, "step": 32967 }, { "epoch": 1.54438562795709, "grad_norm": 0.5997206429250153, "learning_rate": 6.501496160729548e-07, "loss": 0.2614, "step": 32968 }, { "epoch": 1.5444324729470185, "grad_norm": 0.5865527855482771, "learning_rate": 6.500220489166866e-07, "loss": 0.2511, "step": 32969 }, { "epoch": 1.5444793179369465, "grad_norm": 0.5846374646545953, "learning_rate": 6.498944924065772e-07, "loss": 0.2582, "step": 32970 }, { "epoch": 1.544526162926875, "grad_norm": 0.5845717936662137, "learning_rate": 6.497669465433595e-07, "loss": 0.2754, "step": 32971 }, { "epoch": 1.5445730079168034, "grad_norm": 0.609451590316304, "learning_rate": 6.496394113277679e-07, "loss": 0.2733, "step": 32972 }, { "epoch": 1.5446198529067316, "grad_norm": 0.5923639390321273, "learning_rate": 6.495118867605373e-07, "loss": 0.2763, "step": 32973 }, { "epoch": 1.54466669789666, "grad_norm": 0.6080029477470876, "learning_rate": 6.493843728423998e-07, "loss": 0.2838, "step": 32974 }, { "epoch": 1.5447135428865884, "grad_norm": 0.5864401390781562, "learning_rate": 6.492568695740902e-07, "loss": 0.2688, "step": 32975 }, { "epoch": 1.5447603878765166, "grad_norm": 0.6215517080784334, "learning_rate": 6.491293769563428e-07, "loss": 0.2745, "step": 32976 }, { "epoch": 1.5448072328664448, "grad_norm": 0.594682009482527, "learning_rate": 6.490018949898899e-07, "loss": 0.2762, "step": 32977 }, { "epoch": 1.5448540778563733, "grad_norm": 0.6431908703029534, "learning_rate": 6.48874423675466e-07, "loss": 0.2761, "step": 32978 }, { "epoch": 1.5449009228463015, "grad_norm": 0.5776544322778939, "learning_rate": 6.487469630138052e-07, "loss": 0.2684, "step": 32979 }, { "epoch": 1.5449477678362298, "grad_norm": 0.6216536892318397, "learning_rate": 6.486195130056394e-07, "loss": 0.2656, "step": 32980 }, { "epoch": 1.5449946128261582, "grad_norm": 0.5738436791519911, "learning_rate": 6.484920736517031e-07, "loss": 0.276, "step": 32981 }, { "epoch": 1.5450414578160867, "grad_norm": 0.5832810464278829, "learning_rate": 6.483646449527298e-07, "loss": 0.2639, "step": 32982 }, { "epoch": 1.545088302806015, "grad_norm": 0.5921437573426731, "learning_rate": 6.482372269094531e-07, "loss": 0.2724, "step": 32983 }, { "epoch": 1.5451351477959432, "grad_norm": 0.5753906693348967, "learning_rate": 6.481098195226049e-07, "loss": 0.2804, "step": 32984 }, { "epoch": 1.5451819927858716, "grad_norm": 0.6059929352626834, "learning_rate": 6.479824227929199e-07, "loss": 0.2615, "step": 32985 }, { "epoch": 1.5452288377758, "grad_norm": 0.606362851543813, "learning_rate": 6.4785503672113e-07, "loss": 0.2716, "step": 32986 }, { "epoch": 1.5452756827657281, "grad_norm": 0.5937030939763074, "learning_rate": 6.477276613079689e-07, "loss": 0.2675, "step": 32987 }, { "epoch": 1.5453225277556566, "grad_norm": 0.5973128936640003, "learning_rate": 6.476002965541695e-07, "loss": 0.2662, "step": 32988 }, { "epoch": 1.5453693727455848, "grad_norm": 0.5718723709793163, "learning_rate": 6.474729424604647e-07, "loss": 0.266, "step": 32989 }, { "epoch": 1.545416217735513, "grad_norm": 0.5608404238087121, "learning_rate": 6.473455990275885e-07, "loss": 0.2678, "step": 32990 }, { "epoch": 1.5454630627254415, "grad_norm": 0.6537308931697212, "learning_rate": 6.472182662562726e-07, "loss": 0.2752, "step": 32991 }, { "epoch": 1.54550990771537, "grad_norm": 0.6063366834902404, "learning_rate": 6.470909441472493e-07, "loss": 0.275, "step": 32992 }, { "epoch": 1.545556752705298, "grad_norm": 0.5634283972354205, "learning_rate": 6.469636327012516e-07, "loss": 0.252, "step": 32993 }, { "epoch": 1.5456035976952265, "grad_norm": 0.5912621576271166, "learning_rate": 6.468363319190127e-07, "loss": 0.2593, "step": 32994 }, { "epoch": 1.545650442685155, "grad_norm": 0.6049860626523753, "learning_rate": 6.467090418012648e-07, "loss": 0.2629, "step": 32995 }, { "epoch": 1.5456972876750832, "grad_norm": 0.5660436543878148, "learning_rate": 6.465817623487405e-07, "loss": 0.2638, "step": 32996 }, { "epoch": 1.5457441326650114, "grad_norm": 0.6818971058190154, "learning_rate": 6.464544935621731e-07, "loss": 0.28, "step": 32997 }, { "epoch": 1.54579097765494, "grad_norm": 0.5817343224386681, "learning_rate": 6.463272354422942e-07, "loss": 0.275, "step": 32998 }, { "epoch": 1.5458378226448681, "grad_norm": 0.6047208887444615, "learning_rate": 6.461999879898356e-07, "loss": 0.2789, "step": 32999 }, { "epoch": 1.5458846676347964, "grad_norm": 0.5805005063422352, "learning_rate": 6.460727512055301e-07, "loss": 0.2606, "step": 33000 }, { "epoch": 1.5459315126247248, "grad_norm": 0.6014704690170608, "learning_rate": 6.459455250901098e-07, "loss": 0.2781, "step": 33001 }, { "epoch": 1.545978357614653, "grad_norm": 0.6241948669072914, "learning_rate": 6.458183096443071e-07, "loss": 0.2628, "step": 33002 }, { "epoch": 1.5460252026045813, "grad_norm": 0.5539083281819791, "learning_rate": 6.456911048688547e-07, "loss": 0.2705, "step": 33003 }, { "epoch": 1.5460720475945098, "grad_norm": 0.6072638532232616, "learning_rate": 6.455639107644832e-07, "loss": 0.2845, "step": 33004 }, { "epoch": 1.5461188925844382, "grad_norm": 0.5891289264080033, "learning_rate": 6.454367273319259e-07, "loss": 0.2618, "step": 33005 }, { "epoch": 1.5461657375743663, "grad_norm": 0.6484637807671795, "learning_rate": 6.453095545719135e-07, "loss": 0.2773, "step": 33006 }, { "epoch": 1.5462125825642947, "grad_norm": 0.6183315321084634, "learning_rate": 6.451823924851785e-07, "loss": 0.2713, "step": 33007 }, { "epoch": 1.5462594275542232, "grad_norm": 0.6034923476726773, "learning_rate": 6.450552410724526e-07, "loss": 0.2665, "step": 33008 }, { "epoch": 1.5463062725441514, "grad_norm": 0.5658802290063629, "learning_rate": 6.449281003344676e-07, "loss": 0.2556, "step": 33009 }, { "epoch": 1.5463531175340797, "grad_norm": 0.6011008744630795, "learning_rate": 6.448009702719557e-07, "loss": 0.2638, "step": 33010 }, { "epoch": 1.5463999625240081, "grad_norm": 0.5482767605916973, "learning_rate": 6.446738508856473e-07, "loss": 0.2546, "step": 33011 }, { "epoch": 1.5464468075139364, "grad_norm": 0.5363811971424722, "learning_rate": 6.445467421762755e-07, "loss": 0.253, "step": 33012 }, { "epoch": 1.5464936525038646, "grad_norm": 0.6336115930504036, "learning_rate": 6.444196441445699e-07, "loss": 0.2852, "step": 33013 }, { "epoch": 1.546540497493793, "grad_norm": 0.6041990923910026, "learning_rate": 6.442925567912628e-07, "loss": 0.2784, "step": 33014 }, { "epoch": 1.5465873424837213, "grad_norm": 0.5890909593941775, "learning_rate": 6.441654801170857e-07, "loss": 0.268, "step": 33015 }, { "epoch": 1.5466341874736496, "grad_norm": 0.5841895197959426, "learning_rate": 6.440384141227707e-07, "loss": 0.2701, "step": 33016 }, { "epoch": 1.546681032463578, "grad_norm": 0.5775415060190535, "learning_rate": 6.439113588090473e-07, "loss": 0.2806, "step": 33017 }, { "epoch": 1.5467278774535065, "grad_norm": 0.6032409209971262, "learning_rate": 6.437843141766481e-07, "loss": 0.2755, "step": 33018 }, { "epoch": 1.5467747224434347, "grad_norm": 0.606915406248371, "learning_rate": 6.436572802263031e-07, "loss": 0.2627, "step": 33019 }, { "epoch": 1.546821567433363, "grad_norm": 0.6046954026783276, "learning_rate": 6.435302569587437e-07, "loss": 0.2774, "step": 33020 }, { "epoch": 1.5468684124232914, "grad_norm": 0.6464704331834099, "learning_rate": 6.434032443747013e-07, "loss": 0.3, "step": 33021 }, { "epoch": 1.5469152574132197, "grad_norm": 0.5509920962990322, "learning_rate": 6.432762424749073e-07, "loss": 0.2595, "step": 33022 }, { "epoch": 1.546962102403148, "grad_norm": 0.5807444103919792, "learning_rate": 6.43149251260091e-07, "loss": 0.2656, "step": 33023 }, { "epoch": 1.5470089473930764, "grad_norm": 0.6187207534362082, "learning_rate": 6.430222707309841e-07, "loss": 0.2744, "step": 33024 }, { "epoch": 1.5470557923830046, "grad_norm": 0.5980377803201727, "learning_rate": 6.428953008883179e-07, "loss": 0.2801, "step": 33025 }, { "epoch": 1.5471026373729329, "grad_norm": 0.5981538393482063, "learning_rate": 6.427683417328217e-07, "loss": 0.2633, "step": 33026 }, { "epoch": 1.5471494823628613, "grad_norm": 0.5685206179691221, "learning_rate": 6.426413932652273e-07, "loss": 0.26, "step": 33027 }, { "epoch": 1.5471963273527898, "grad_norm": 0.5865473244961557, "learning_rate": 6.425144554862653e-07, "loss": 0.2791, "step": 33028 }, { "epoch": 1.5472431723427178, "grad_norm": 0.6068455186076884, "learning_rate": 6.423875283966652e-07, "loss": 0.2673, "step": 33029 }, { "epoch": 1.5472900173326463, "grad_norm": 0.5799004961702962, "learning_rate": 6.422606119971578e-07, "loss": 0.2641, "step": 33030 }, { "epoch": 1.5473368623225747, "grad_norm": 0.6013198319303102, "learning_rate": 6.421337062884736e-07, "loss": 0.2655, "step": 33031 }, { "epoch": 1.547383707312503, "grad_norm": 0.5786161525610178, "learning_rate": 6.420068112713437e-07, "loss": 0.2684, "step": 33032 }, { "epoch": 1.5474305523024312, "grad_norm": 0.6063215229449926, "learning_rate": 6.41879926946497e-07, "loss": 0.2796, "step": 33033 }, { "epoch": 1.5474773972923597, "grad_norm": 0.5934479564317129, "learning_rate": 6.417530533146651e-07, "loss": 0.2809, "step": 33034 }, { "epoch": 1.547524242282288, "grad_norm": 0.5946534188383769, "learning_rate": 6.416261903765764e-07, "loss": 0.2663, "step": 33035 }, { "epoch": 1.5475710872722162, "grad_norm": 0.6009292796556196, "learning_rate": 6.414993381329621e-07, "loss": 0.271, "step": 33036 }, { "epoch": 1.5476179322621446, "grad_norm": 0.5471762370163504, "learning_rate": 6.413724965845516e-07, "loss": 0.2453, "step": 33037 }, { "epoch": 1.5476647772520729, "grad_norm": 0.5984122100906046, "learning_rate": 6.412456657320756e-07, "loss": 0.2723, "step": 33038 }, { "epoch": 1.547711622242001, "grad_norm": 0.6260077274309914, "learning_rate": 6.411188455762643e-07, "loss": 0.2677, "step": 33039 }, { "epoch": 1.5477584672319296, "grad_norm": 0.5775376248535098, "learning_rate": 6.409920361178467e-07, "loss": 0.2503, "step": 33040 }, { "epoch": 1.547805312221858, "grad_norm": 0.56715939935576, "learning_rate": 6.408652373575522e-07, "loss": 0.2608, "step": 33041 }, { "epoch": 1.547852157211786, "grad_norm": 0.5943555531256344, "learning_rate": 6.407384492961107e-07, "loss": 0.2611, "step": 33042 }, { "epoch": 1.5478990022017145, "grad_norm": 0.6346189432640842, "learning_rate": 6.406116719342523e-07, "loss": 0.2835, "step": 33043 }, { "epoch": 1.547945847191643, "grad_norm": 0.6029634327619332, "learning_rate": 6.404849052727063e-07, "loss": 0.2839, "step": 33044 }, { "epoch": 1.5479926921815712, "grad_norm": 0.6081553876476494, "learning_rate": 6.403581493122024e-07, "loss": 0.2642, "step": 33045 }, { "epoch": 1.5480395371714994, "grad_norm": 0.5767742986692355, "learning_rate": 6.402314040534705e-07, "loss": 0.2583, "step": 33046 }, { "epoch": 1.548086382161428, "grad_norm": 0.5584534576736515, "learning_rate": 6.401046694972396e-07, "loss": 0.2545, "step": 33047 }, { "epoch": 1.5481332271513562, "grad_norm": 0.5865620716391486, "learning_rate": 6.399779456442379e-07, "loss": 0.2658, "step": 33048 }, { "epoch": 1.5481800721412844, "grad_norm": 0.5549515390470613, "learning_rate": 6.398512324951958e-07, "loss": 0.2721, "step": 33049 }, { "epoch": 1.5482269171312129, "grad_norm": 0.5833444889767647, "learning_rate": 6.397245300508422e-07, "loss": 0.2558, "step": 33050 }, { "epoch": 1.548273762121141, "grad_norm": 0.6263622674541616, "learning_rate": 6.395978383119067e-07, "loss": 0.2671, "step": 33051 }, { "epoch": 1.5483206071110693, "grad_norm": 0.5671918993383941, "learning_rate": 6.394711572791185e-07, "loss": 0.2741, "step": 33052 }, { "epoch": 1.5483674521009978, "grad_norm": 0.5915453551422353, "learning_rate": 6.393444869532056e-07, "loss": 0.2757, "step": 33053 }, { "epoch": 1.5484142970909263, "grad_norm": 0.610689208357877, "learning_rate": 6.392178273348981e-07, "loss": 0.2944, "step": 33054 }, { "epoch": 1.5484611420808545, "grad_norm": 0.5952155852768327, "learning_rate": 6.390911784249234e-07, "loss": 0.2681, "step": 33055 }, { "epoch": 1.5485079870707827, "grad_norm": 0.6116201736733701, "learning_rate": 6.389645402240116e-07, "loss": 0.2624, "step": 33056 }, { "epoch": 1.5485548320607112, "grad_norm": 0.5978293158851707, "learning_rate": 6.388379127328911e-07, "loss": 0.2718, "step": 33057 }, { "epoch": 1.5486016770506394, "grad_norm": 0.6107693356214026, "learning_rate": 6.387112959522912e-07, "loss": 0.2659, "step": 33058 }, { "epoch": 1.5486485220405677, "grad_norm": 0.6113498500972452, "learning_rate": 6.385846898829393e-07, "loss": 0.291, "step": 33059 }, { "epoch": 1.5486953670304962, "grad_norm": 0.6352792495673838, "learning_rate": 6.384580945255647e-07, "loss": 0.2852, "step": 33060 }, { "epoch": 1.5487422120204244, "grad_norm": 0.5823118827282299, "learning_rate": 6.383315098808965e-07, "loss": 0.2765, "step": 33061 }, { "epoch": 1.5487890570103526, "grad_norm": 0.5556558901760613, "learning_rate": 6.38204935949662e-07, "loss": 0.2636, "step": 33062 }, { "epoch": 1.548835902000281, "grad_norm": 0.6055157078936936, "learning_rate": 6.380783727325898e-07, "loss": 0.2728, "step": 33063 }, { "epoch": 1.5488827469902096, "grad_norm": 0.574836101431231, "learning_rate": 6.379518202304097e-07, "loss": 0.262, "step": 33064 }, { "epoch": 1.5489295919801376, "grad_norm": 0.6360837075592962, "learning_rate": 6.37825278443848e-07, "loss": 0.2894, "step": 33065 }, { "epoch": 1.548976436970066, "grad_norm": 0.607900116531243, "learning_rate": 6.376987473736337e-07, "loss": 0.2978, "step": 33066 }, { "epoch": 1.5490232819599945, "grad_norm": 0.5981670797108092, "learning_rate": 6.37572227020496e-07, "loss": 0.2818, "step": 33067 }, { "epoch": 1.5490701269499227, "grad_norm": 0.5729437943402993, "learning_rate": 6.374457173851609e-07, "loss": 0.259, "step": 33068 }, { "epoch": 1.549116971939851, "grad_norm": 0.6251704201538577, "learning_rate": 6.373192184683579e-07, "loss": 0.2786, "step": 33069 }, { "epoch": 1.5491638169297794, "grad_norm": 0.6009481776125337, "learning_rate": 6.371927302708153e-07, "loss": 0.2773, "step": 33070 }, { "epoch": 1.5492106619197077, "grad_norm": 0.604457084532972, "learning_rate": 6.370662527932595e-07, "loss": 0.2722, "step": 33071 }, { "epoch": 1.549257506909636, "grad_norm": 0.583844973987084, "learning_rate": 6.369397860364193e-07, "loss": 0.2703, "step": 33072 }, { "epoch": 1.5493043518995644, "grad_norm": 0.5856304450415007, "learning_rate": 6.368133300010223e-07, "loss": 0.2666, "step": 33073 }, { "epoch": 1.5493511968894926, "grad_norm": 0.6077980103944903, "learning_rate": 6.366868846877968e-07, "loss": 0.2857, "step": 33074 }, { "epoch": 1.5493980418794209, "grad_norm": 0.6162758715947053, "learning_rate": 6.365604500974695e-07, "loss": 0.2713, "step": 33075 }, { "epoch": 1.5494448868693493, "grad_norm": 0.5948464194293059, "learning_rate": 6.364340262307687e-07, "loss": 0.2681, "step": 33076 }, { "epoch": 1.5494917318592778, "grad_norm": 0.5654225062148404, "learning_rate": 6.363076130884219e-07, "loss": 0.2563, "step": 33077 }, { "epoch": 1.5495385768492058, "grad_norm": 0.6836338475264495, "learning_rate": 6.361812106711559e-07, "loss": 0.2834, "step": 33078 }, { "epoch": 1.5495854218391343, "grad_norm": 0.5895623930782998, "learning_rate": 6.360548189796986e-07, "loss": 0.2703, "step": 33079 }, { "epoch": 1.5496322668290627, "grad_norm": 0.5772611926803372, "learning_rate": 6.359284380147773e-07, "loss": 0.2567, "step": 33080 }, { "epoch": 1.549679111818991, "grad_norm": 0.5715229295789507, "learning_rate": 6.358020677771201e-07, "loss": 0.2485, "step": 33081 }, { "epoch": 1.5497259568089192, "grad_norm": 0.5578268687173359, "learning_rate": 6.356757082674525e-07, "loss": 0.2473, "step": 33082 }, { "epoch": 1.5497728017988477, "grad_norm": 0.659176207198834, "learning_rate": 6.355493594865036e-07, "loss": 0.2874, "step": 33083 }, { "epoch": 1.549819646788776, "grad_norm": 0.5798601323554768, "learning_rate": 6.354230214349988e-07, "loss": 0.2481, "step": 33084 }, { "epoch": 1.5498664917787042, "grad_norm": 0.6441976906678452, "learning_rate": 6.352966941136656e-07, "loss": 0.2727, "step": 33085 }, { "epoch": 1.5499133367686326, "grad_norm": 0.5970207302748805, "learning_rate": 6.351703775232315e-07, "loss": 0.2674, "step": 33086 }, { "epoch": 1.5499601817585609, "grad_norm": 0.6560029204065353, "learning_rate": 6.35044071664423e-07, "loss": 0.2708, "step": 33087 }, { "epoch": 1.5500070267484891, "grad_norm": 0.6092729926258481, "learning_rate": 6.349177765379679e-07, "loss": 0.2734, "step": 33088 }, { "epoch": 1.5500538717384176, "grad_norm": 0.6025575893618473, "learning_rate": 6.347914921445924e-07, "loss": 0.2824, "step": 33089 }, { "epoch": 1.550100716728346, "grad_norm": 0.6385971392638292, "learning_rate": 6.346652184850222e-07, "loss": 0.2839, "step": 33090 }, { "epoch": 1.5501475617182743, "grad_norm": 0.6031469673763195, "learning_rate": 6.345389555599849e-07, "loss": 0.264, "step": 33091 }, { "epoch": 1.5501944067082025, "grad_norm": 0.5808202709202842, "learning_rate": 6.344127033702069e-07, "loss": 0.2616, "step": 33092 }, { "epoch": 1.550241251698131, "grad_norm": 0.5763528484883549, "learning_rate": 6.342864619164149e-07, "loss": 0.2604, "step": 33093 }, { "epoch": 1.5502880966880592, "grad_norm": 0.5785375772576946, "learning_rate": 6.341602311993356e-07, "loss": 0.2634, "step": 33094 }, { "epoch": 1.5503349416779875, "grad_norm": 0.5763761071282754, "learning_rate": 6.340340112196958e-07, "loss": 0.2649, "step": 33095 }, { "epoch": 1.550381786667916, "grad_norm": 0.6101850403358607, "learning_rate": 6.339078019782211e-07, "loss": 0.2596, "step": 33096 }, { "epoch": 1.5504286316578442, "grad_norm": 0.6291372634734698, "learning_rate": 6.337816034756375e-07, "loss": 0.2904, "step": 33097 }, { "epoch": 1.5504754766477724, "grad_norm": 0.5790146683068711, "learning_rate": 6.336554157126715e-07, "loss": 0.2672, "step": 33098 }, { "epoch": 1.5505223216377009, "grad_norm": 0.5712578023398313, "learning_rate": 6.335292386900496e-07, "loss": 0.2612, "step": 33099 }, { "epoch": 1.5505691666276293, "grad_norm": 0.585302219951614, "learning_rate": 6.33403072408498e-07, "loss": 0.2566, "step": 33100 }, { "epoch": 1.5506160116175574, "grad_norm": 0.6048244487601966, "learning_rate": 6.332769168687431e-07, "loss": 0.2705, "step": 33101 }, { "epoch": 1.5506628566074858, "grad_norm": 0.5970753490723582, "learning_rate": 6.331507720715097e-07, "loss": 0.274, "step": 33102 }, { "epoch": 1.5507097015974143, "grad_norm": 0.656496199707951, "learning_rate": 6.33024638017525e-07, "loss": 0.2771, "step": 33103 }, { "epoch": 1.5507565465873425, "grad_norm": 0.5726343430907563, "learning_rate": 6.328985147075137e-07, "loss": 0.2702, "step": 33104 }, { "epoch": 1.5508033915772708, "grad_norm": 0.5822341507827223, "learning_rate": 6.327724021422018e-07, "loss": 0.2578, "step": 33105 }, { "epoch": 1.5508502365671992, "grad_norm": 0.5650866187953059, "learning_rate": 6.326463003223157e-07, "loss": 0.2555, "step": 33106 }, { "epoch": 1.5508970815571275, "grad_norm": 0.6383298117526652, "learning_rate": 6.325202092485816e-07, "loss": 0.2745, "step": 33107 }, { "epoch": 1.5509439265470557, "grad_norm": 0.6356413468901586, "learning_rate": 6.323941289217236e-07, "loss": 0.2836, "step": 33108 }, { "epoch": 1.5509907715369842, "grad_norm": 0.5791496378145924, "learning_rate": 6.32268059342468e-07, "loss": 0.259, "step": 33109 }, { "epoch": 1.5510376165269124, "grad_norm": 0.5910493807973601, "learning_rate": 6.321420005115408e-07, "loss": 0.2734, "step": 33110 }, { "epoch": 1.5510844615168407, "grad_norm": 0.5512708228004567, "learning_rate": 6.320159524296662e-07, "loss": 0.2688, "step": 33111 }, { "epoch": 1.5511313065067691, "grad_norm": 0.6289147344342061, "learning_rate": 6.318899150975705e-07, "loss": 0.2819, "step": 33112 }, { "epoch": 1.5511781514966976, "grad_norm": 0.5881785176354798, "learning_rate": 6.317638885159793e-07, "loss": 0.2509, "step": 33113 }, { "epoch": 1.5512249964866256, "grad_norm": 0.6112635918360406, "learning_rate": 6.316378726856168e-07, "loss": 0.278, "step": 33114 }, { "epoch": 1.551271841476554, "grad_norm": 0.5625969981637726, "learning_rate": 6.315118676072088e-07, "loss": 0.2679, "step": 33115 }, { "epoch": 1.5513186864664825, "grad_norm": 0.6316525716313167, "learning_rate": 6.313858732814809e-07, "loss": 0.2863, "step": 33116 }, { "epoch": 1.5513655314564108, "grad_norm": 0.5892395421838613, "learning_rate": 6.312598897091571e-07, "loss": 0.2739, "step": 33117 }, { "epoch": 1.551412376446339, "grad_norm": 0.6312288019073191, "learning_rate": 6.311339168909628e-07, "loss": 0.2892, "step": 33118 }, { "epoch": 1.5514592214362675, "grad_norm": 0.5544152832049385, "learning_rate": 6.310079548276241e-07, "loss": 0.2504, "step": 33119 }, { "epoch": 1.5515060664261957, "grad_norm": 0.5662269462867278, "learning_rate": 6.308820035198637e-07, "loss": 0.2741, "step": 33120 }, { "epoch": 1.551552911416124, "grad_norm": 0.5853350626306014, "learning_rate": 6.307560629684078e-07, "loss": 0.257, "step": 33121 }, { "epoch": 1.5515997564060524, "grad_norm": 0.5762237166284601, "learning_rate": 6.306301331739812e-07, "loss": 0.2614, "step": 33122 }, { "epoch": 1.5516466013959807, "grad_norm": 0.633524504930393, "learning_rate": 6.305042141373086e-07, "loss": 0.2958, "step": 33123 }, { "epoch": 1.551693446385909, "grad_norm": 0.5810924786526499, "learning_rate": 6.30378305859114e-07, "loss": 0.2729, "step": 33124 }, { "epoch": 1.5517402913758374, "grad_norm": 0.5511932709544156, "learning_rate": 6.302524083401229e-07, "loss": 0.2513, "step": 33125 }, { "epoch": 1.5517871363657658, "grad_norm": 0.6238667808523294, "learning_rate": 6.301265215810584e-07, "loss": 0.2629, "step": 33126 }, { "epoch": 1.551833981355694, "grad_norm": 0.559842972140519, "learning_rate": 6.30000645582646e-07, "loss": 0.2604, "step": 33127 }, { "epoch": 1.5518808263456223, "grad_norm": 0.580884311472612, "learning_rate": 6.298747803456099e-07, "loss": 0.267, "step": 33128 }, { "epoch": 1.5519276713355508, "grad_norm": 0.6335787069132043, "learning_rate": 6.297489258706741e-07, "loss": 0.2935, "step": 33129 }, { "epoch": 1.551974516325479, "grad_norm": 0.6536773489124958, "learning_rate": 6.296230821585642e-07, "loss": 0.2829, "step": 33130 }, { "epoch": 1.5520213613154072, "grad_norm": 0.5949479697386917, "learning_rate": 6.294972492100032e-07, "loss": 0.2647, "step": 33131 }, { "epoch": 1.5520682063053357, "grad_norm": 0.5731160542965154, "learning_rate": 6.293714270257148e-07, "loss": 0.258, "step": 33132 }, { "epoch": 1.552115051295264, "grad_norm": 0.6196499636291457, "learning_rate": 6.292456156064236e-07, "loss": 0.2739, "step": 33133 }, { "epoch": 1.5521618962851922, "grad_norm": 0.6088241031574139, "learning_rate": 6.291198149528535e-07, "loss": 0.2615, "step": 33134 }, { "epoch": 1.5522087412751207, "grad_norm": 0.6457339707108825, "learning_rate": 6.28994025065729e-07, "loss": 0.2876, "step": 33135 }, { "epoch": 1.5522555862650491, "grad_norm": 0.6420559659596131, "learning_rate": 6.288682459457734e-07, "loss": 0.2811, "step": 33136 }, { "epoch": 1.5523024312549771, "grad_norm": 0.6030214228763014, "learning_rate": 6.287424775937115e-07, "loss": 0.2678, "step": 33137 }, { "epoch": 1.5523492762449056, "grad_norm": 0.5976029295436267, "learning_rate": 6.286167200102663e-07, "loss": 0.2778, "step": 33138 }, { "epoch": 1.552396121234834, "grad_norm": 0.5671009762383584, "learning_rate": 6.284909731961608e-07, "loss": 0.2583, "step": 33139 }, { "epoch": 1.5524429662247623, "grad_norm": 0.626319259611591, "learning_rate": 6.283652371521196e-07, "loss": 0.2857, "step": 33140 }, { "epoch": 1.5524898112146905, "grad_norm": 0.6418079805186216, "learning_rate": 6.282395118788659e-07, "loss": 0.2954, "step": 33141 }, { "epoch": 1.552536656204619, "grad_norm": 0.6054749053130692, "learning_rate": 6.281137973771231e-07, "loss": 0.2639, "step": 33142 }, { "epoch": 1.5525835011945472, "grad_norm": 0.6069748124071118, "learning_rate": 6.279880936476151e-07, "loss": 0.2692, "step": 33143 }, { "epoch": 1.5526303461844755, "grad_norm": 0.6078437178863252, "learning_rate": 6.278624006910661e-07, "loss": 0.2643, "step": 33144 }, { "epoch": 1.552677191174404, "grad_norm": 0.6129658069633441, "learning_rate": 6.277367185081981e-07, "loss": 0.2801, "step": 33145 }, { "epoch": 1.5527240361643322, "grad_norm": 0.5428079791750863, "learning_rate": 6.27611047099734e-07, "loss": 0.2511, "step": 33146 }, { "epoch": 1.5527708811542604, "grad_norm": 0.5870444174809248, "learning_rate": 6.27485386466398e-07, "loss": 0.2677, "step": 33147 }, { "epoch": 1.552817726144189, "grad_norm": 0.6330427831549978, "learning_rate": 6.273597366089127e-07, "loss": 0.3057, "step": 33148 }, { "epoch": 1.5528645711341174, "grad_norm": 0.5759953554174944, "learning_rate": 6.272340975280014e-07, "loss": 0.2674, "step": 33149 }, { "epoch": 1.5529114161240454, "grad_norm": 0.6615349321386628, "learning_rate": 6.271084692243881e-07, "loss": 0.2919, "step": 33150 }, { "epoch": 1.5529582611139738, "grad_norm": 0.6403299860642325, "learning_rate": 6.269828516987939e-07, "loss": 0.282, "step": 33151 }, { "epoch": 1.5530051061039023, "grad_norm": 0.5851021157868187, "learning_rate": 6.268572449519434e-07, "loss": 0.2876, "step": 33152 }, { "epoch": 1.5530519510938305, "grad_norm": 0.5850286125813307, "learning_rate": 6.26731648984558e-07, "loss": 0.2709, "step": 33153 }, { "epoch": 1.5530987960837588, "grad_norm": 0.5802676526666763, "learning_rate": 6.266060637973612e-07, "loss": 0.2597, "step": 33154 }, { "epoch": 1.5531456410736872, "grad_norm": 0.5948339596054798, "learning_rate": 6.264804893910755e-07, "loss": 0.2662, "step": 33155 }, { "epoch": 1.5531924860636155, "grad_norm": 0.5956977982550697, "learning_rate": 6.263549257664244e-07, "loss": 0.2601, "step": 33156 }, { "epoch": 1.5532393310535437, "grad_norm": 0.588971891386133, "learning_rate": 6.262293729241292e-07, "loss": 0.2757, "step": 33157 }, { "epoch": 1.5532861760434722, "grad_norm": 0.5942195194223113, "learning_rate": 6.261038308649137e-07, "loss": 0.268, "step": 33158 }, { "epoch": 1.5533330210334004, "grad_norm": 0.6003906819192106, "learning_rate": 6.259782995894989e-07, "loss": 0.261, "step": 33159 }, { "epoch": 1.5533798660233287, "grad_norm": 0.5938005082734232, "learning_rate": 6.25852779098608e-07, "loss": 0.2624, "step": 33160 }, { "epoch": 1.5534267110132571, "grad_norm": 0.6053547142131823, "learning_rate": 6.257272693929631e-07, "loss": 0.2857, "step": 33161 }, { "epoch": 1.5534735560031856, "grad_norm": 0.59861593031812, "learning_rate": 6.256017704732876e-07, "loss": 0.2731, "step": 33162 }, { "epoch": 1.5535204009931138, "grad_norm": 0.6320134923023226, "learning_rate": 6.254762823403021e-07, "loss": 0.2853, "step": 33163 }, { "epoch": 1.553567245983042, "grad_norm": 0.6518444744687036, "learning_rate": 6.253508049947294e-07, "loss": 0.2741, "step": 33164 }, { "epoch": 1.5536140909729705, "grad_norm": 0.5965582234261405, "learning_rate": 6.252253384372925e-07, "loss": 0.274, "step": 33165 }, { "epoch": 1.5536609359628988, "grad_norm": 0.595212715129879, "learning_rate": 6.250998826687116e-07, "loss": 0.2756, "step": 33166 }, { "epoch": 1.553707780952827, "grad_norm": 0.6142405265826711, "learning_rate": 6.249744376897096e-07, "loss": 0.2813, "step": 33167 }, { "epoch": 1.5537546259427555, "grad_norm": 0.6210119328626837, "learning_rate": 6.248490035010093e-07, "loss": 0.2615, "step": 33168 }, { "epoch": 1.5538014709326837, "grad_norm": 0.6060782318129591, "learning_rate": 6.24723580103331e-07, "loss": 0.2738, "step": 33169 }, { "epoch": 1.553848315922612, "grad_norm": 0.6347302821887771, "learning_rate": 6.245981674973972e-07, "loss": 0.3035, "step": 33170 }, { "epoch": 1.5538951609125404, "grad_norm": 0.6553287400558648, "learning_rate": 6.244727656839298e-07, "loss": 0.2752, "step": 33171 }, { "epoch": 1.553942005902469, "grad_norm": 0.5979661034667084, "learning_rate": 6.243473746636505e-07, "loss": 0.2723, "step": 33172 }, { "epoch": 1.553988850892397, "grad_norm": 0.5855046956468047, "learning_rate": 6.242219944372802e-07, "loss": 0.2542, "step": 33173 }, { "epoch": 1.5540356958823254, "grad_norm": 0.6034172801251505, "learning_rate": 6.240966250055413e-07, "loss": 0.2845, "step": 33174 }, { "epoch": 1.5540825408722538, "grad_norm": 0.6100740177941276, "learning_rate": 6.239712663691544e-07, "loss": 0.2812, "step": 33175 }, { "epoch": 1.554129385862182, "grad_norm": 0.6377447207665183, "learning_rate": 6.238459185288412e-07, "loss": 0.2766, "step": 33176 }, { "epoch": 1.5541762308521103, "grad_norm": 0.6629304644745196, "learning_rate": 6.237205814853231e-07, "loss": 0.2794, "step": 33177 }, { "epoch": 1.5542230758420388, "grad_norm": 0.6174638432853716, "learning_rate": 6.235952552393218e-07, "loss": 0.2793, "step": 33178 }, { "epoch": 1.554269920831967, "grad_norm": 0.6221383197054039, "learning_rate": 6.234699397915586e-07, "loss": 0.2859, "step": 33179 }, { "epoch": 1.5543167658218953, "grad_norm": 0.5810208029573608, "learning_rate": 6.233446351427544e-07, "loss": 0.27, "step": 33180 }, { "epoch": 1.5543636108118237, "grad_norm": 0.6289803910639198, "learning_rate": 6.232193412936294e-07, "loss": 0.2737, "step": 33181 }, { "epoch": 1.554410455801752, "grad_norm": 0.5669311654948236, "learning_rate": 6.230940582449052e-07, "loss": 0.2568, "step": 33182 }, { "epoch": 1.5544573007916802, "grad_norm": 0.5823845725054906, "learning_rate": 6.22968785997303e-07, "loss": 0.2682, "step": 33183 }, { "epoch": 1.5545041457816087, "grad_norm": 0.581299860338432, "learning_rate": 6.228435245515438e-07, "loss": 0.2606, "step": 33184 }, { "epoch": 1.5545509907715371, "grad_norm": 0.5734537458085559, "learning_rate": 6.227182739083485e-07, "loss": 0.2827, "step": 33185 }, { "epoch": 1.5545978357614652, "grad_norm": 0.6160899253009088, "learning_rate": 6.22593034068438e-07, "loss": 0.2781, "step": 33186 }, { "epoch": 1.5546446807513936, "grad_norm": 0.6370333877231202, "learning_rate": 6.22467805032533e-07, "loss": 0.3, "step": 33187 }, { "epoch": 1.554691525741322, "grad_norm": 0.560605942541627, "learning_rate": 6.22342586801353e-07, "loss": 0.2431, "step": 33188 }, { "epoch": 1.5547383707312503, "grad_norm": 0.6187406143480464, "learning_rate": 6.222173793756195e-07, "loss": 0.2788, "step": 33189 }, { "epoch": 1.5547852157211786, "grad_norm": 0.583669102499441, "learning_rate": 6.220921827560531e-07, "loss": 0.2695, "step": 33190 }, { "epoch": 1.554832060711107, "grad_norm": 0.6172139104993045, "learning_rate": 6.219669969433742e-07, "loss": 0.275, "step": 33191 }, { "epoch": 1.5548789057010353, "grad_norm": 0.5920727409521894, "learning_rate": 6.218418219383038e-07, "loss": 0.2656, "step": 33192 }, { "epoch": 1.5549257506909635, "grad_norm": 0.5929092230057968, "learning_rate": 6.217166577415612e-07, "loss": 0.2732, "step": 33193 }, { "epoch": 1.554972595680892, "grad_norm": 0.5600887788598742, "learning_rate": 6.215915043538676e-07, "loss": 0.2613, "step": 33194 }, { "epoch": 1.5550194406708202, "grad_norm": 0.6083517660315441, "learning_rate": 6.214663617759423e-07, "loss": 0.2653, "step": 33195 }, { "epoch": 1.5550662856607484, "grad_norm": 0.6836803197120714, "learning_rate": 6.213412300085056e-07, "loss": 0.2937, "step": 33196 }, { "epoch": 1.555113130650677, "grad_norm": 0.6381498781124518, "learning_rate": 6.212161090522781e-07, "loss": 0.2645, "step": 33197 }, { "epoch": 1.5551599756406054, "grad_norm": 0.5995380638400983, "learning_rate": 6.210909989079805e-07, "loss": 0.2822, "step": 33198 }, { "epoch": 1.5552068206305336, "grad_norm": 0.5904018258221543, "learning_rate": 6.209658995763312e-07, "loss": 0.2731, "step": 33199 }, { "epoch": 1.5552536656204619, "grad_norm": 0.605226125482961, "learning_rate": 6.208408110580507e-07, "loss": 0.2588, "step": 33200 }, { "epoch": 1.5553005106103903, "grad_norm": 0.6206476353250514, "learning_rate": 6.207157333538599e-07, "loss": 0.275, "step": 33201 }, { "epoch": 1.5553473556003186, "grad_norm": 0.5622556293972569, "learning_rate": 6.20590666464477e-07, "loss": 0.2587, "step": 33202 }, { "epoch": 1.5553942005902468, "grad_norm": 0.6288406492392826, "learning_rate": 6.204656103906223e-07, "loss": 0.2766, "step": 33203 }, { "epoch": 1.5554410455801753, "grad_norm": 0.5999037389352171, "learning_rate": 6.203405651330166e-07, "loss": 0.2736, "step": 33204 }, { "epoch": 1.5554878905701035, "grad_norm": 0.5675181013345935, "learning_rate": 6.202155306923777e-07, "loss": 0.2504, "step": 33205 }, { "epoch": 1.5555347355600317, "grad_norm": 0.5593452869319026, "learning_rate": 6.200905070694258e-07, "loss": 0.2628, "step": 33206 }, { "epoch": 1.5555815805499602, "grad_norm": 0.6144455259274566, "learning_rate": 6.199654942648814e-07, "loss": 0.2758, "step": 33207 }, { "epoch": 1.5556284255398887, "grad_norm": 0.5743819066888279, "learning_rate": 6.198404922794621e-07, "loss": 0.2459, "step": 33208 }, { "epoch": 1.5556752705298167, "grad_norm": 0.5891944176498713, "learning_rate": 6.197155011138883e-07, "loss": 0.2743, "step": 33209 }, { "epoch": 1.5557221155197452, "grad_norm": 0.5727378861735304, "learning_rate": 6.195905207688802e-07, "loss": 0.2592, "step": 33210 }, { "epoch": 1.5557689605096736, "grad_norm": 0.6211427480595088, "learning_rate": 6.19465551245155e-07, "loss": 0.2614, "step": 33211 }, { "epoch": 1.5558158054996019, "grad_norm": 0.5873743862020405, "learning_rate": 6.193405925434332e-07, "loss": 0.2588, "step": 33212 }, { "epoch": 1.55586265048953, "grad_norm": 0.6114739035305731, "learning_rate": 6.192156446644332e-07, "loss": 0.2842, "step": 33213 }, { "epoch": 1.5559094954794586, "grad_norm": 0.6849716684272644, "learning_rate": 6.190907076088753e-07, "loss": 0.276, "step": 33214 }, { "epoch": 1.5559563404693868, "grad_norm": 0.5791239043892071, "learning_rate": 6.189657813774771e-07, "loss": 0.2602, "step": 33215 }, { "epoch": 1.556003185459315, "grad_norm": 0.5901904724771793, "learning_rate": 6.18840865970958e-07, "loss": 0.2718, "step": 33216 }, { "epoch": 1.5560500304492435, "grad_norm": 0.6282361215737253, "learning_rate": 6.187159613900376e-07, "loss": 0.2721, "step": 33217 }, { "epoch": 1.5560968754391717, "grad_norm": 0.6243954207640261, "learning_rate": 6.185910676354331e-07, "loss": 0.2792, "step": 33218 }, { "epoch": 1.5561437204291, "grad_norm": 0.6116990625631264, "learning_rate": 6.184661847078643e-07, "loss": 0.2703, "step": 33219 }, { "epoch": 1.5561905654190284, "grad_norm": 0.6493607536864351, "learning_rate": 6.183413126080495e-07, "loss": 0.2772, "step": 33220 }, { "epoch": 1.556237410408957, "grad_norm": 0.6078343556549815, "learning_rate": 6.182164513367086e-07, "loss": 0.2791, "step": 33221 }, { "epoch": 1.556284255398885, "grad_norm": 0.6670261926947838, "learning_rate": 6.180916008945581e-07, "loss": 0.2752, "step": 33222 }, { "epoch": 1.5563311003888134, "grad_norm": 0.5660176003194414, "learning_rate": 6.179667612823182e-07, "loss": 0.2518, "step": 33223 }, { "epoch": 1.5563779453787419, "grad_norm": 0.56220917696784, "learning_rate": 6.178419325007056e-07, "loss": 0.2725, "step": 33224 }, { "epoch": 1.55642479036867, "grad_norm": 0.5947367776081404, "learning_rate": 6.177171145504399e-07, "loss": 0.2726, "step": 33225 }, { "epoch": 1.5564716353585983, "grad_norm": 0.6212890764727899, "learning_rate": 6.17592307432239e-07, "loss": 0.2669, "step": 33226 }, { "epoch": 1.5565184803485268, "grad_norm": 0.5712429811152528, "learning_rate": 6.174675111468214e-07, "loss": 0.2786, "step": 33227 }, { "epoch": 1.556565325338455, "grad_norm": 0.6070184515631056, "learning_rate": 6.173427256949058e-07, "loss": 0.2791, "step": 33228 }, { "epoch": 1.5566121703283833, "grad_norm": 0.6009410877588298, "learning_rate": 6.172179510772095e-07, "loss": 0.2721, "step": 33229 }, { "epoch": 1.5566590153183117, "grad_norm": 0.5955741456951844, "learning_rate": 6.1709318729445e-07, "loss": 0.2672, "step": 33230 }, { "epoch": 1.55670586030824, "grad_norm": 0.647655803596181, "learning_rate": 6.169684343473461e-07, "loss": 0.2783, "step": 33231 }, { "epoch": 1.5567527052981682, "grad_norm": 0.5848673434961961, "learning_rate": 6.168436922366153e-07, "loss": 0.2692, "step": 33232 }, { "epoch": 1.5567995502880967, "grad_norm": 0.6314903747339065, "learning_rate": 6.167189609629759e-07, "loss": 0.2807, "step": 33233 }, { "epoch": 1.5568463952780252, "grad_norm": 0.5631944085687304, "learning_rate": 6.165942405271455e-07, "loss": 0.2523, "step": 33234 }, { "epoch": 1.5568932402679534, "grad_norm": 0.6110353112936018, "learning_rate": 6.164695309298426e-07, "loss": 0.2661, "step": 33235 }, { "epoch": 1.5569400852578816, "grad_norm": 0.638924264390061, "learning_rate": 6.163448321717843e-07, "loss": 0.2848, "step": 33236 }, { "epoch": 1.55698693024781, "grad_norm": 0.567633452346289, "learning_rate": 6.162201442536871e-07, "loss": 0.2589, "step": 33237 }, { "epoch": 1.5570337752377383, "grad_norm": 0.6788161743069074, "learning_rate": 6.160954671762696e-07, "loss": 0.2853, "step": 33238 }, { "epoch": 1.5570806202276666, "grad_norm": 0.6069682153753699, "learning_rate": 6.15970800940249e-07, "loss": 0.2725, "step": 33239 }, { "epoch": 1.557127465217595, "grad_norm": 0.611956510232538, "learning_rate": 6.158461455463432e-07, "loss": 0.2727, "step": 33240 }, { "epoch": 1.5571743102075233, "grad_norm": 0.6010129814537374, "learning_rate": 6.157215009952699e-07, "loss": 0.266, "step": 33241 }, { "epoch": 1.5572211551974515, "grad_norm": 0.5895863439145884, "learning_rate": 6.15596867287745e-07, "loss": 0.2522, "step": 33242 }, { "epoch": 1.55726800018738, "grad_norm": 0.5881850823532562, "learning_rate": 6.154722444244874e-07, "loss": 0.2742, "step": 33243 }, { "epoch": 1.5573148451773084, "grad_norm": 0.5878661017023886, "learning_rate": 6.153476324062124e-07, "loss": 0.259, "step": 33244 }, { "epoch": 1.5573616901672365, "grad_norm": 0.6949703486261538, "learning_rate": 6.15223031233638e-07, "loss": 0.3045, "step": 33245 }, { "epoch": 1.557408535157165, "grad_norm": 0.6126640930344459, "learning_rate": 6.150984409074818e-07, "loss": 0.2802, "step": 33246 }, { "epoch": 1.5574553801470934, "grad_norm": 0.5824898033418672, "learning_rate": 6.149738614284606e-07, "loss": 0.2656, "step": 33247 }, { "epoch": 1.5575022251370216, "grad_norm": 0.5785574361254421, "learning_rate": 6.148492927972904e-07, "loss": 0.2668, "step": 33248 }, { "epoch": 1.5575490701269499, "grad_norm": 0.6479751612136475, "learning_rate": 6.147247350146887e-07, "loss": 0.3035, "step": 33249 }, { "epoch": 1.5575959151168783, "grad_norm": 0.6107628028645216, "learning_rate": 6.146001880813731e-07, "loss": 0.2814, "step": 33250 }, { "epoch": 1.5576427601068066, "grad_norm": 0.5704074209402803, "learning_rate": 6.144756519980588e-07, "loss": 0.2619, "step": 33251 }, { "epoch": 1.5576896050967348, "grad_norm": 0.5729152203332089, "learning_rate": 6.143511267654634e-07, "loss": 0.2579, "step": 33252 }, { "epoch": 1.5577364500866633, "grad_norm": 0.6257740446669828, "learning_rate": 6.142266123843038e-07, "loss": 0.2821, "step": 33253 }, { "epoch": 1.5577832950765915, "grad_norm": 0.6178361932875098, "learning_rate": 6.141021088552953e-07, "loss": 0.2838, "step": 33254 }, { "epoch": 1.5578301400665198, "grad_norm": 0.571377113337084, "learning_rate": 6.139776161791555e-07, "loss": 0.2658, "step": 33255 }, { "epoch": 1.5578769850564482, "grad_norm": 0.576873915302979, "learning_rate": 6.13853134356601e-07, "loss": 0.2552, "step": 33256 }, { "epoch": 1.5579238300463767, "grad_norm": 0.6257328423537019, "learning_rate": 6.137286633883469e-07, "loss": 0.287, "step": 33257 }, { "epoch": 1.5579706750363047, "grad_norm": 0.6065588616866452, "learning_rate": 6.136042032751102e-07, "loss": 0.2859, "step": 33258 }, { "epoch": 1.5580175200262332, "grad_norm": 0.5712784333735038, "learning_rate": 6.13479754017608e-07, "loss": 0.2725, "step": 33259 }, { "epoch": 1.5580643650161616, "grad_norm": 0.6247371910638033, "learning_rate": 6.13355315616555e-07, "loss": 0.2841, "step": 33260 }, { "epoch": 1.5581112100060899, "grad_norm": 0.6090398365375368, "learning_rate": 6.132308880726678e-07, "loss": 0.2658, "step": 33261 }, { "epoch": 1.5581580549960181, "grad_norm": 0.56270247074045, "learning_rate": 6.131064713866628e-07, "loss": 0.259, "step": 33262 }, { "epoch": 1.5582048999859466, "grad_norm": 0.5516615133975115, "learning_rate": 6.129820655592564e-07, "loss": 0.2551, "step": 33263 }, { "epoch": 1.5582517449758748, "grad_norm": 0.5296247775715759, "learning_rate": 6.128576705911632e-07, "loss": 0.2592, "step": 33264 }, { "epoch": 1.558298589965803, "grad_norm": 0.6183219517850828, "learning_rate": 6.127332864831004e-07, "loss": 0.2803, "step": 33265 }, { "epoch": 1.5583454349557315, "grad_norm": 0.5700448353762213, "learning_rate": 6.126089132357826e-07, "loss": 0.2614, "step": 33266 }, { "epoch": 1.5583922799456598, "grad_norm": 0.6502188866979124, "learning_rate": 6.124845508499261e-07, "loss": 0.2695, "step": 33267 }, { "epoch": 1.558439124935588, "grad_norm": 0.5876635322506318, "learning_rate": 6.123601993262468e-07, "loss": 0.2604, "step": 33268 }, { "epoch": 1.5584859699255165, "grad_norm": 0.5737185979062346, "learning_rate": 6.122358586654598e-07, "loss": 0.2566, "step": 33269 }, { "epoch": 1.558532814915445, "grad_norm": 0.6165852193548126, "learning_rate": 6.121115288682819e-07, "loss": 0.278, "step": 33270 }, { "epoch": 1.5585796599053732, "grad_norm": 0.5915530888527781, "learning_rate": 6.119872099354276e-07, "loss": 0.2629, "step": 33271 }, { "epoch": 1.5586265048953014, "grad_norm": 0.6102219341484919, "learning_rate": 6.118629018676117e-07, "loss": 0.2706, "step": 33272 }, { "epoch": 1.5586733498852299, "grad_norm": 0.5459174563277104, "learning_rate": 6.117386046655502e-07, "loss": 0.2526, "step": 33273 }, { "epoch": 1.5587201948751581, "grad_norm": 0.5538238251685913, "learning_rate": 6.116143183299584e-07, "loss": 0.2665, "step": 33274 }, { "epoch": 1.5587670398650864, "grad_norm": 0.5768621105316392, "learning_rate": 6.114900428615514e-07, "loss": 0.2704, "step": 33275 }, { "epoch": 1.5588138848550148, "grad_norm": 0.6193462314915487, "learning_rate": 6.113657782610447e-07, "loss": 0.2708, "step": 33276 }, { "epoch": 1.558860729844943, "grad_norm": 0.6166298456459033, "learning_rate": 6.112415245291542e-07, "loss": 0.2711, "step": 33277 }, { "epoch": 1.5589075748348713, "grad_norm": 0.636280280864558, "learning_rate": 6.111172816665936e-07, "loss": 0.2759, "step": 33278 }, { "epoch": 1.5589544198247998, "grad_norm": 0.6259782167103716, "learning_rate": 6.109930496740779e-07, "loss": 0.2699, "step": 33279 }, { "epoch": 1.5590012648147282, "grad_norm": 0.5965750713640289, "learning_rate": 6.108688285523223e-07, "loss": 0.2701, "step": 33280 }, { "epoch": 1.5590481098046562, "grad_norm": 0.6443040792828316, "learning_rate": 6.107446183020416e-07, "loss": 0.2731, "step": 33281 }, { "epoch": 1.5590949547945847, "grad_norm": 0.6306559588557149, "learning_rate": 6.106204189239509e-07, "loss": 0.2963, "step": 33282 }, { "epoch": 1.5591417997845132, "grad_norm": 0.6191653603131105, "learning_rate": 6.10496230418765e-07, "loss": 0.2794, "step": 33283 }, { "epoch": 1.5591886447744414, "grad_norm": 0.5962447700534873, "learning_rate": 6.103720527871989e-07, "loss": 0.2642, "step": 33284 }, { "epoch": 1.5592354897643697, "grad_norm": 0.6078955246806578, "learning_rate": 6.102478860299668e-07, "loss": 0.2922, "step": 33285 }, { "epoch": 1.5592823347542981, "grad_norm": 0.604902049121968, "learning_rate": 6.101237301477823e-07, "loss": 0.2822, "step": 33286 }, { "epoch": 1.5593291797442264, "grad_norm": 0.6513740998609896, "learning_rate": 6.099995851413607e-07, "loss": 0.2915, "step": 33287 }, { "epoch": 1.5593760247341546, "grad_norm": 0.608042248920122, "learning_rate": 6.098754510114166e-07, "loss": 0.2599, "step": 33288 }, { "epoch": 1.559422869724083, "grad_norm": 0.6207107337688562, "learning_rate": 6.097513277586642e-07, "loss": 0.279, "step": 33289 }, { "epoch": 1.5594697147140113, "grad_norm": 0.5666058580321875, "learning_rate": 6.096272153838184e-07, "loss": 0.2667, "step": 33290 }, { "epoch": 1.5595165597039395, "grad_norm": 0.6074567296538268, "learning_rate": 6.095031138875923e-07, "loss": 0.2785, "step": 33291 }, { "epoch": 1.559563404693868, "grad_norm": 0.5934876350661696, "learning_rate": 6.093790232707014e-07, "loss": 0.2607, "step": 33292 }, { "epoch": 1.5596102496837965, "grad_norm": 0.6029126576049191, "learning_rate": 6.092549435338579e-07, "loss": 0.2782, "step": 33293 }, { "epoch": 1.5596570946737245, "grad_norm": 0.59189233292875, "learning_rate": 6.091308746777774e-07, "loss": 0.2694, "step": 33294 }, { "epoch": 1.559703939663653, "grad_norm": 0.5950881098830767, "learning_rate": 6.090068167031735e-07, "loss": 0.2811, "step": 33295 }, { "epoch": 1.5597507846535814, "grad_norm": 0.5949198318150548, "learning_rate": 6.088827696107605e-07, "loss": 0.28, "step": 33296 }, { "epoch": 1.5597976296435097, "grad_norm": 0.6012740405685779, "learning_rate": 6.087587334012513e-07, "loss": 0.2811, "step": 33297 }, { "epoch": 1.559844474633438, "grad_norm": 0.6278093828295843, "learning_rate": 6.086347080753607e-07, "loss": 0.2747, "step": 33298 }, { "epoch": 1.5598913196233664, "grad_norm": 0.6530330860026313, "learning_rate": 6.085106936338017e-07, "loss": 0.2848, "step": 33299 }, { "epoch": 1.5599381646132946, "grad_norm": 0.6371767056664016, "learning_rate": 6.08386690077288e-07, "loss": 0.2732, "step": 33300 }, { "epoch": 1.5599850096032228, "grad_norm": 0.5941297268144297, "learning_rate": 6.082626974065334e-07, "loss": 0.2694, "step": 33301 }, { "epoch": 1.5600318545931513, "grad_norm": 0.593195216872475, "learning_rate": 6.081387156222523e-07, "loss": 0.2692, "step": 33302 }, { "epoch": 1.5600786995830795, "grad_norm": 0.5977302056216444, "learning_rate": 6.080147447251566e-07, "loss": 0.2717, "step": 33303 }, { "epoch": 1.5601255445730078, "grad_norm": 0.5875118491143496, "learning_rate": 6.078907847159607e-07, "loss": 0.2668, "step": 33304 }, { "epoch": 1.5601723895629362, "grad_norm": 0.5479996814098278, "learning_rate": 6.077668355953784e-07, "loss": 0.2554, "step": 33305 }, { "epoch": 1.5602192345528647, "grad_norm": 0.6372244732080408, "learning_rate": 6.076428973641216e-07, "loss": 0.2861, "step": 33306 }, { "epoch": 1.560266079542793, "grad_norm": 0.5968955284211686, "learning_rate": 6.075189700229045e-07, "loss": 0.2625, "step": 33307 }, { "epoch": 1.5603129245327212, "grad_norm": 0.591209129379092, "learning_rate": 6.073950535724405e-07, "loss": 0.2748, "step": 33308 }, { "epoch": 1.5603597695226497, "grad_norm": 0.619753616340443, "learning_rate": 6.072711480134416e-07, "loss": 0.2675, "step": 33309 }, { "epoch": 1.560406614512578, "grad_norm": 0.665222354868407, "learning_rate": 6.071472533466216e-07, "loss": 0.2922, "step": 33310 }, { "epoch": 1.5604534595025061, "grad_norm": 0.5483274766591595, "learning_rate": 6.070233695726935e-07, "loss": 0.2659, "step": 33311 }, { "epoch": 1.5605003044924346, "grad_norm": 0.6248668362988903, "learning_rate": 6.068994966923708e-07, "loss": 0.2896, "step": 33312 }, { "epoch": 1.5605471494823628, "grad_norm": 0.5751764138958155, "learning_rate": 6.06775634706365e-07, "loss": 0.2493, "step": 33313 }, { "epoch": 1.560593994472291, "grad_norm": 0.5725980440312394, "learning_rate": 6.066517836153901e-07, "loss": 0.2786, "step": 33314 }, { "epoch": 1.5606408394622195, "grad_norm": 0.5719697473488239, "learning_rate": 6.065279434201576e-07, "loss": 0.2632, "step": 33315 }, { "epoch": 1.560687684452148, "grad_norm": 0.6138273920872083, "learning_rate": 6.064041141213811e-07, "loss": 0.2825, "step": 33316 }, { "epoch": 1.560734529442076, "grad_norm": 0.6196612945161878, "learning_rate": 6.062802957197727e-07, "loss": 0.2831, "step": 33317 }, { "epoch": 1.5607813744320045, "grad_norm": 0.6138370858546114, "learning_rate": 6.061564882160456e-07, "loss": 0.2728, "step": 33318 }, { "epoch": 1.560828219421933, "grad_norm": 0.6122432034895301, "learning_rate": 6.060326916109125e-07, "loss": 0.2576, "step": 33319 }, { "epoch": 1.5608750644118612, "grad_norm": 0.6041632520231371, "learning_rate": 6.059089059050852e-07, "loss": 0.2712, "step": 33320 }, { "epoch": 1.5609219094017894, "grad_norm": 0.6038756529019815, "learning_rate": 6.057851310992752e-07, "loss": 0.2653, "step": 33321 }, { "epoch": 1.560968754391718, "grad_norm": 0.5774062297461304, "learning_rate": 6.056613671941958e-07, "loss": 0.2467, "step": 33322 }, { "epoch": 1.5610155993816461, "grad_norm": 0.5888782690962624, "learning_rate": 6.055376141905592e-07, "loss": 0.2694, "step": 33323 }, { "epoch": 1.5610624443715744, "grad_norm": 0.6037992204963042, "learning_rate": 6.054138720890774e-07, "loss": 0.2583, "step": 33324 }, { "epoch": 1.5611092893615028, "grad_norm": 0.6584064984758109, "learning_rate": 6.052901408904624e-07, "loss": 0.2981, "step": 33325 }, { "epoch": 1.561156134351431, "grad_norm": 0.5450566485106655, "learning_rate": 6.051664205954274e-07, "loss": 0.2574, "step": 33326 }, { "epoch": 1.5612029793413593, "grad_norm": 0.6059274059609893, "learning_rate": 6.050427112046834e-07, "loss": 0.2612, "step": 33327 }, { "epoch": 1.5612498243312878, "grad_norm": 0.6198107819230818, "learning_rate": 6.049190127189414e-07, "loss": 0.2718, "step": 33328 }, { "epoch": 1.5612966693212162, "grad_norm": 0.5857170618308736, "learning_rate": 6.047953251389144e-07, "loss": 0.2722, "step": 33329 }, { "epoch": 1.5613435143111443, "grad_norm": 0.5885139914666259, "learning_rate": 6.046716484653137e-07, "loss": 0.2627, "step": 33330 }, { "epoch": 1.5613903593010727, "grad_norm": 0.5677548546480031, "learning_rate": 6.045479826988515e-07, "loss": 0.253, "step": 33331 }, { "epoch": 1.5614372042910012, "grad_norm": 0.5739785423474013, "learning_rate": 6.044243278402398e-07, "loss": 0.2718, "step": 33332 }, { "epoch": 1.5614840492809294, "grad_norm": 0.6126808089108121, "learning_rate": 6.043006838901891e-07, "loss": 0.2721, "step": 33333 }, { "epoch": 1.5615308942708577, "grad_norm": 0.5388253548189325, "learning_rate": 6.041770508494121e-07, "loss": 0.2627, "step": 33334 }, { "epoch": 1.5615777392607861, "grad_norm": 0.6130987106932316, "learning_rate": 6.04053428718619e-07, "loss": 0.2644, "step": 33335 }, { "epoch": 1.5616245842507144, "grad_norm": 0.5866035445732561, "learning_rate": 6.03929817498522e-07, "loss": 0.2599, "step": 33336 }, { "epoch": 1.5616714292406426, "grad_norm": 0.6214650671924747, "learning_rate": 6.038062171898323e-07, "loss": 0.2829, "step": 33337 }, { "epoch": 1.561718274230571, "grad_norm": 0.6737443466646023, "learning_rate": 6.036826277932617e-07, "loss": 0.2881, "step": 33338 }, { "epoch": 1.5617651192204993, "grad_norm": 0.5831851029584261, "learning_rate": 6.035590493095206e-07, "loss": 0.2678, "step": 33339 }, { "epoch": 1.5618119642104276, "grad_norm": 0.6067335361145105, "learning_rate": 6.034354817393204e-07, "loss": 0.2802, "step": 33340 }, { "epoch": 1.561858809200356, "grad_norm": 0.5997782718235559, "learning_rate": 6.03311925083373e-07, "loss": 0.2542, "step": 33341 }, { "epoch": 1.5619056541902845, "grad_norm": 0.6149102870685574, "learning_rate": 6.031883793423879e-07, "loss": 0.2839, "step": 33342 }, { "epoch": 1.5619524991802127, "grad_norm": 0.5982188219615198, "learning_rate": 6.030648445170769e-07, "loss": 0.2771, "step": 33343 }, { "epoch": 1.561999344170141, "grad_norm": 0.608709803334902, "learning_rate": 6.029413206081519e-07, "loss": 0.2789, "step": 33344 }, { "epoch": 1.5620461891600694, "grad_norm": 0.5843054053311857, "learning_rate": 6.028178076163221e-07, "loss": 0.2665, "step": 33345 }, { "epoch": 1.5620930341499977, "grad_norm": 0.5916222948677812, "learning_rate": 6.026943055422987e-07, "loss": 0.2836, "step": 33346 }, { "epoch": 1.562139879139926, "grad_norm": 0.5710870033553971, "learning_rate": 6.025708143867936e-07, "loss": 0.2648, "step": 33347 }, { "epoch": 1.5621867241298544, "grad_norm": 0.5777664324010355, "learning_rate": 6.024473341505161e-07, "loss": 0.2755, "step": 33348 }, { "epoch": 1.5622335691197826, "grad_norm": 0.5689832705994198, "learning_rate": 6.02323864834177e-07, "loss": 0.2694, "step": 33349 }, { "epoch": 1.5622804141097109, "grad_norm": 0.5798576990056216, "learning_rate": 6.022004064384871e-07, "loss": 0.2567, "step": 33350 }, { "epoch": 1.5623272590996393, "grad_norm": 0.5843088882784828, "learning_rate": 6.020769589641576e-07, "loss": 0.2701, "step": 33351 }, { "epoch": 1.5623741040895678, "grad_norm": 0.6243165853531091, "learning_rate": 6.019535224118972e-07, "loss": 0.2663, "step": 33352 }, { "epoch": 1.5624209490794958, "grad_norm": 0.6415083180809347, "learning_rate": 6.018300967824176e-07, "loss": 0.2853, "step": 33353 }, { "epoch": 1.5624677940694243, "grad_norm": 0.5762316599312814, "learning_rate": 6.017066820764291e-07, "loss": 0.2611, "step": 33354 }, { "epoch": 1.5625146390593527, "grad_norm": 0.6411084802970616, "learning_rate": 6.015832782946413e-07, "loss": 0.2783, "step": 33355 }, { "epoch": 1.562561484049281, "grad_norm": 0.6312754490945865, "learning_rate": 6.01459885437764e-07, "loss": 0.2861, "step": 33356 }, { "epoch": 1.5626083290392092, "grad_norm": 0.6114203205101282, "learning_rate": 6.013365035065089e-07, "loss": 0.2822, "step": 33357 }, { "epoch": 1.5626551740291377, "grad_norm": 0.6006302200961823, "learning_rate": 6.012131325015844e-07, "loss": 0.2698, "step": 33358 }, { "epoch": 1.562702019019066, "grad_norm": 0.584833580695163, "learning_rate": 6.010897724237008e-07, "loss": 0.2685, "step": 33359 }, { "epoch": 1.5627488640089942, "grad_norm": 0.581744540043699, "learning_rate": 6.009664232735685e-07, "loss": 0.2714, "step": 33360 }, { "epoch": 1.5627957089989226, "grad_norm": 0.5938236119643147, "learning_rate": 6.008430850518979e-07, "loss": 0.2686, "step": 33361 }, { "epoch": 1.5628425539888509, "grad_norm": 0.6596011753505097, "learning_rate": 6.00719757759397e-07, "loss": 0.2879, "step": 33362 }, { "epoch": 1.562889398978779, "grad_norm": 0.6454564363582743, "learning_rate": 6.005964413967775e-07, "loss": 0.2825, "step": 33363 }, { "epoch": 1.5629362439687076, "grad_norm": 0.6228178866784051, "learning_rate": 6.004731359647473e-07, "loss": 0.2693, "step": 33364 }, { "epoch": 1.562983088958636, "grad_norm": 0.6253872011784262, "learning_rate": 6.003498414640169e-07, "loss": 0.2929, "step": 33365 }, { "epoch": 1.563029933948564, "grad_norm": 0.6269103321846458, "learning_rate": 6.002265578952954e-07, "loss": 0.263, "step": 33366 }, { "epoch": 1.5630767789384925, "grad_norm": 0.6182685950109476, "learning_rate": 6.001032852592928e-07, "loss": 0.2958, "step": 33367 }, { "epoch": 1.563123623928421, "grad_norm": 0.6141805556037606, "learning_rate": 5.999800235567188e-07, "loss": 0.2664, "step": 33368 }, { "epoch": 1.5631704689183492, "grad_norm": 0.5456484915279357, "learning_rate": 5.998567727882823e-07, "loss": 0.2512, "step": 33369 }, { "epoch": 1.5632173139082775, "grad_norm": 0.6354566138508692, "learning_rate": 5.997335329546919e-07, "loss": 0.2586, "step": 33370 }, { "epoch": 1.563264158898206, "grad_norm": 0.6324076768351857, "learning_rate": 5.996103040566572e-07, "loss": 0.2873, "step": 33371 }, { "epoch": 1.5633110038881342, "grad_norm": 0.6196432470143671, "learning_rate": 5.994870860948879e-07, "loss": 0.2838, "step": 33372 }, { "epoch": 1.5633578488780624, "grad_norm": 0.6125814723867483, "learning_rate": 5.993638790700923e-07, "loss": 0.2836, "step": 33373 }, { "epoch": 1.5634046938679909, "grad_norm": 0.6005223972279475, "learning_rate": 5.9924068298298e-07, "loss": 0.2775, "step": 33374 }, { "epoch": 1.563451538857919, "grad_norm": 0.6006412673525336, "learning_rate": 5.991174978342607e-07, "loss": 0.2689, "step": 33375 }, { "epoch": 1.5634983838478473, "grad_norm": 0.5804876822709741, "learning_rate": 5.989943236246423e-07, "loss": 0.2754, "step": 33376 }, { "epoch": 1.5635452288377758, "grad_norm": 0.5915118456303163, "learning_rate": 5.988711603548333e-07, "loss": 0.2527, "step": 33377 }, { "epoch": 1.5635920738277043, "grad_norm": 0.6346888276159216, "learning_rate": 5.987480080255426e-07, "loss": 0.2795, "step": 33378 }, { "epoch": 1.5636389188176325, "grad_norm": 0.6195425250639003, "learning_rate": 5.986248666374794e-07, "loss": 0.2647, "step": 33379 }, { "epoch": 1.5636857638075607, "grad_norm": 0.5509078840926463, "learning_rate": 5.985017361913523e-07, "loss": 0.2448, "step": 33380 }, { "epoch": 1.5637326087974892, "grad_norm": 0.6050358887067679, "learning_rate": 5.983786166878703e-07, "loss": 0.2891, "step": 33381 }, { "epoch": 1.5637794537874175, "grad_norm": 0.602192087843168, "learning_rate": 5.982555081277408e-07, "loss": 0.2834, "step": 33382 }, { "epoch": 1.5638262987773457, "grad_norm": 0.6000580988746345, "learning_rate": 5.981324105116737e-07, "loss": 0.273, "step": 33383 }, { "epoch": 1.5638731437672742, "grad_norm": 0.5863915599213071, "learning_rate": 5.980093238403756e-07, "loss": 0.2834, "step": 33384 }, { "epoch": 1.5639199887572024, "grad_norm": 0.5556027153984139, "learning_rate": 5.978862481145558e-07, "loss": 0.2505, "step": 33385 }, { "epoch": 1.5639668337471306, "grad_norm": 0.6011049863550253, "learning_rate": 5.977631833349228e-07, "loss": 0.2588, "step": 33386 }, { "epoch": 1.564013678737059, "grad_norm": 0.6005948196204195, "learning_rate": 5.976401295021853e-07, "loss": 0.2701, "step": 33387 }, { "epoch": 1.5640605237269876, "grad_norm": 0.5980812367497931, "learning_rate": 5.975170866170499e-07, "loss": 0.2684, "step": 33388 }, { "epoch": 1.5641073687169156, "grad_norm": 0.58932814010774, "learning_rate": 5.973940546802254e-07, "loss": 0.276, "step": 33389 }, { "epoch": 1.564154213706844, "grad_norm": 0.6100561467053219, "learning_rate": 5.972710336924206e-07, "loss": 0.2569, "step": 33390 }, { "epoch": 1.5642010586967725, "grad_norm": 0.5836021091843506, "learning_rate": 5.97148023654342e-07, "loss": 0.2615, "step": 33391 }, { "epoch": 1.5642479036867007, "grad_norm": 0.5755057015356888, "learning_rate": 5.970250245666986e-07, "loss": 0.2683, "step": 33392 }, { "epoch": 1.564294748676629, "grad_norm": 0.582313620289832, "learning_rate": 5.969020364301983e-07, "loss": 0.2478, "step": 33393 }, { "epoch": 1.5643415936665575, "grad_norm": 0.5737702995265723, "learning_rate": 5.967790592455478e-07, "loss": 0.2845, "step": 33394 }, { "epoch": 1.5643884386564857, "grad_norm": 0.6337594899310632, "learning_rate": 5.966560930134554e-07, "loss": 0.2956, "step": 33395 }, { "epoch": 1.564435283646414, "grad_norm": 0.5875517689489962, "learning_rate": 5.965331377346298e-07, "loss": 0.2707, "step": 33396 }, { "epoch": 1.5644821286363424, "grad_norm": 0.6037152652873508, "learning_rate": 5.964101934097766e-07, "loss": 0.2809, "step": 33397 }, { "epoch": 1.5645289736262706, "grad_norm": 0.6359030550629385, "learning_rate": 5.962872600396044e-07, "loss": 0.2783, "step": 33398 }, { "epoch": 1.5645758186161989, "grad_norm": 0.5433716562834864, "learning_rate": 5.961643376248211e-07, "loss": 0.2585, "step": 33399 }, { "epoch": 1.5646226636061273, "grad_norm": 0.5990575115545164, "learning_rate": 5.960414261661329e-07, "loss": 0.2764, "step": 33400 }, { "epoch": 1.5646695085960558, "grad_norm": 0.5963264256016535, "learning_rate": 5.959185256642481e-07, "loss": 0.2814, "step": 33401 }, { "epoch": 1.5647163535859838, "grad_norm": 0.5763415005095015, "learning_rate": 5.95795636119873e-07, "loss": 0.2622, "step": 33402 }, { "epoch": 1.5647631985759123, "grad_norm": 0.5795757821656683, "learning_rate": 5.956727575337166e-07, "loss": 0.2669, "step": 33403 }, { "epoch": 1.5648100435658407, "grad_norm": 0.6065545731649149, "learning_rate": 5.955498899064837e-07, "loss": 0.2719, "step": 33404 }, { "epoch": 1.564856888555769, "grad_norm": 0.6318273873914741, "learning_rate": 5.954270332388837e-07, "loss": 0.2839, "step": 33405 }, { "epoch": 1.5649037335456972, "grad_norm": 0.5722168223029688, "learning_rate": 5.953041875316215e-07, "loss": 0.2677, "step": 33406 }, { "epoch": 1.5649505785356257, "grad_norm": 0.58700848531659, "learning_rate": 5.951813527854048e-07, "loss": 0.2705, "step": 33407 }, { "epoch": 1.564997423525554, "grad_norm": 0.6344326453270136, "learning_rate": 5.950585290009409e-07, "loss": 0.2857, "step": 33408 }, { "epoch": 1.5650442685154822, "grad_norm": 0.5521266509532717, "learning_rate": 5.949357161789362e-07, "loss": 0.2538, "step": 33409 }, { "epoch": 1.5650911135054106, "grad_norm": 0.6448896629242473, "learning_rate": 5.948129143200985e-07, "loss": 0.2927, "step": 33410 }, { "epoch": 1.5651379584953389, "grad_norm": 0.6255049643626639, "learning_rate": 5.946901234251334e-07, "loss": 0.2805, "step": 33411 }, { "epoch": 1.5651848034852671, "grad_norm": 0.5966088553765562, "learning_rate": 5.945673434947474e-07, "loss": 0.2753, "step": 33412 }, { "epoch": 1.5652316484751956, "grad_norm": 0.6010160194383174, "learning_rate": 5.944445745296474e-07, "loss": 0.2694, "step": 33413 }, { "epoch": 1.565278493465124, "grad_norm": 0.5836811355171342, "learning_rate": 5.943218165305395e-07, "loss": 0.2589, "step": 33414 }, { "epoch": 1.5653253384550523, "grad_norm": 0.6001849413256152, "learning_rate": 5.941990694981308e-07, "loss": 0.2545, "step": 33415 }, { "epoch": 1.5653721834449805, "grad_norm": 0.6343047282726558, "learning_rate": 5.940763334331276e-07, "loss": 0.2778, "step": 33416 }, { "epoch": 1.565419028434909, "grad_norm": 0.6539577665933484, "learning_rate": 5.939536083362365e-07, "loss": 0.2753, "step": 33417 }, { "epoch": 1.5654658734248372, "grad_norm": 0.5797558268942167, "learning_rate": 5.938308942081636e-07, "loss": 0.2631, "step": 33418 }, { "epoch": 1.5655127184147655, "grad_norm": 0.5704532743810704, "learning_rate": 5.93708191049614e-07, "loss": 0.2707, "step": 33419 }, { "epoch": 1.565559563404694, "grad_norm": 0.5802703114512285, "learning_rate": 5.935854988612946e-07, "loss": 0.2755, "step": 33420 }, { "epoch": 1.5656064083946222, "grad_norm": 0.5830109284602373, "learning_rate": 5.934628176439114e-07, "loss": 0.273, "step": 33421 }, { "epoch": 1.5656532533845504, "grad_norm": 0.6529892145964659, "learning_rate": 5.933401473981706e-07, "loss": 0.2859, "step": 33422 }, { "epoch": 1.5657000983744789, "grad_norm": 0.6807041071559136, "learning_rate": 5.932174881247782e-07, "loss": 0.2802, "step": 33423 }, { "epoch": 1.5657469433644073, "grad_norm": 0.5818346749559161, "learning_rate": 5.930948398244405e-07, "loss": 0.2692, "step": 33424 }, { "epoch": 1.5657937883543354, "grad_norm": 0.6065834373639515, "learning_rate": 5.929722024978626e-07, "loss": 0.2787, "step": 33425 }, { "epoch": 1.5658406333442638, "grad_norm": 0.5587561534181463, "learning_rate": 5.928495761457498e-07, "loss": 0.263, "step": 33426 }, { "epoch": 1.5658874783341923, "grad_norm": 0.6363714182593366, "learning_rate": 5.92726960768808e-07, "loss": 0.2894, "step": 33427 }, { "epoch": 1.5659343233241205, "grad_norm": 0.6174791339557041, "learning_rate": 5.926043563677436e-07, "loss": 0.2685, "step": 33428 }, { "epoch": 1.5659811683140488, "grad_norm": 0.6169206927264874, "learning_rate": 5.924817629432614e-07, "loss": 0.2812, "step": 33429 }, { "epoch": 1.5660280133039772, "grad_norm": 0.592827322262281, "learning_rate": 5.923591804960682e-07, "loss": 0.2746, "step": 33430 }, { "epoch": 1.5660748582939055, "grad_norm": 0.6188489396788502, "learning_rate": 5.922366090268675e-07, "loss": 0.2696, "step": 33431 }, { "epoch": 1.5661217032838337, "grad_norm": 0.6371853719402925, "learning_rate": 5.921140485363666e-07, "loss": 0.3071, "step": 33432 }, { "epoch": 1.5661685482737622, "grad_norm": 0.5915510094740828, "learning_rate": 5.919914990252687e-07, "loss": 0.2745, "step": 33433 }, { "epoch": 1.5662153932636904, "grad_norm": 0.6523694629160863, "learning_rate": 5.918689604942806e-07, "loss": 0.2967, "step": 33434 }, { "epoch": 1.5662622382536187, "grad_norm": 0.5460918513430935, "learning_rate": 5.917464329441067e-07, "loss": 0.2682, "step": 33435 }, { "epoch": 1.5663090832435471, "grad_norm": 0.6374075232053136, "learning_rate": 5.916239163754534e-07, "loss": 0.2827, "step": 33436 }, { "epoch": 1.5663559282334756, "grad_norm": 0.6281436161512056, "learning_rate": 5.915014107890241e-07, "loss": 0.2748, "step": 33437 }, { "epoch": 1.5664027732234036, "grad_norm": 0.584769382526658, "learning_rate": 5.913789161855251e-07, "loss": 0.2669, "step": 33438 }, { "epoch": 1.566449618213332, "grad_norm": 0.5689462501316359, "learning_rate": 5.9125643256566e-07, "loss": 0.2646, "step": 33439 }, { "epoch": 1.5664964632032605, "grad_norm": 0.641434028440374, "learning_rate": 5.911339599301344e-07, "loss": 0.2896, "step": 33440 }, { "epoch": 1.5665433081931888, "grad_norm": 0.6257265362086807, "learning_rate": 5.91011498279653e-07, "loss": 0.2978, "step": 33441 }, { "epoch": 1.566590153183117, "grad_norm": 0.6104864898844357, "learning_rate": 5.908890476149215e-07, "loss": 0.2672, "step": 33442 }, { "epoch": 1.5666369981730455, "grad_norm": 0.5701655435535702, "learning_rate": 5.907666079366431e-07, "loss": 0.2505, "step": 33443 }, { "epoch": 1.5666838431629737, "grad_norm": 0.5846630326754674, "learning_rate": 5.906441792455228e-07, "loss": 0.2603, "step": 33444 }, { "epoch": 1.566730688152902, "grad_norm": 0.5708855360725896, "learning_rate": 5.905217615422659e-07, "loss": 0.2471, "step": 33445 }, { "epoch": 1.5667775331428304, "grad_norm": 0.604261488147762, "learning_rate": 5.90399354827576e-07, "loss": 0.2853, "step": 33446 }, { "epoch": 1.5668243781327587, "grad_norm": 0.5674709533599822, "learning_rate": 5.902769591021576e-07, "loss": 0.2417, "step": 33447 }, { "epoch": 1.566871223122687, "grad_norm": 0.6633300370241195, "learning_rate": 5.901545743667162e-07, "loss": 0.2837, "step": 33448 }, { "epoch": 1.5669180681126154, "grad_norm": 0.5982303364380985, "learning_rate": 5.900322006219541e-07, "loss": 0.2735, "step": 33449 }, { "epoch": 1.5669649131025438, "grad_norm": 0.6291672982114588, "learning_rate": 5.899098378685772e-07, "loss": 0.2798, "step": 33450 }, { "epoch": 1.567011758092472, "grad_norm": 0.5666679831789094, "learning_rate": 5.897874861072886e-07, "loss": 0.2625, "step": 33451 }, { "epoch": 1.5670586030824003, "grad_norm": 0.6134668594565151, "learning_rate": 5.896651453387938e-07, "loss": 0.2728, "step": 33452 }, { "epoch": 1.5671054480723288, "grad_norm": 0.608082242576667, "learning_rate": 5.895428155637953e-07, "loss": 0.276, "step": 33453 }, { "epoch": 1.567152293062257, "grad_norm": 0.5812613520548795, "learning_rate": 5.894204967829984e-07, "loss": 0.2689, "step": 33454 }, { "epoch": 1.5671991380521852, "grad_norm": 0.6073046065978395, "learning_rate": 5.892981889971056e-07, "loss": 0.2671, "step": 33455 }, { "epoch": 1.5672459830421137, "grad_norm": 0.6029996198265783, "learning_rate": 5.891758922068216e-07, "loss": 0.2754, "step": 33456 }, { "epoch": 1.567292828032042, "grad_norm": 0.5954708430209666, "learning_rate": 5.8905360641285e-07, "loss": 0.2577, "step": 33457 }, { "epoch": 1.5673396730219702, "grad_norm": 0.5932572000797095, "learning_rate": 5.889313316158945e-07, "loss": 0.2486, "step": 33458 }, { "epoch": 1.5673865180118987, "grad_norm": 0.6092187465239118, "learning_rate": 5.888090678166597e-07, "loss": 0.2751, "step": 33459 }, { "epoch": 1.5674333630018271, "grad_norm": 0.5970560821957357, "learning_rate": 5.886868150158481e-07, "loss": 0.271, "step": 33460 }, { "epoch": 1.5674802079917551, "grad_norm": 0.6617195847155685, "learning_rate": 5.885645732141632e-07, "loss": 0.2917, "step": 33461 }, { "epoch": 1.5675270529816836, "grad_norm": 0.5672546294196165, "learning_rate": 5.884423424123084e-07, "loss": 0.271, "step": 33462 }, { "epoch": 1.567573897971612, "grad_norm": 0.5954771016796717, "learning_rate": 5.883201226109877e-07, "loss": 0.2646, "step": 33463 }, { "epoch": 1.5676207429615403, "grad_norm": 0.6595049595495482, "learning_rate": 5.88197913810904e-07, "loss": 0.288, "step": 33464 }, { "epoch": 1.5676675879514685, "grad_norm": 0.5735695569926376, "learning_rate": 5.880757160127609e-07, "loss": 0.2697, "step": 33465 }, { "epoch": 1.567714432941397, "grad_norm": 0.6739037093167459, "learning_rate": 5.879535292172623e-07, "loss": 0.2816, "step": 33466 }, { "epoch": 1.5677612779313252, "grad_norm": 0.5699780339052631, "learning_rate": 5.878313534251104e-07, "loss": 0.2548, "step": 33467 }, { "epoch": 1.5678081229212535, "grad_norm": 0.6208766298228285, "learning_rate": 5.877091886370078e-07, "loss": 0.2777, "step": 33468 }, { "epoch": 1.567854967911182, "grad_norm": 0.6182949271756933, "learning_rate": 5.875870348536583e-07, "loss": 0.2719, "step": 33469 }, { "epoch": 1.5679018129011102, "grad_norm": 0.6123048285453712, "learning_rate": 5.874648920757648e-07, "loss": 0.2802, "step": 33470 }, { "epoch": 1.5679486578910384, "grad_norm": 0.5700462982149901, "learning_rate": 5.8734276030403e-07, "loss": 0.2642, "step": 33471 }, { "epoch": 1.567995502880967, "grad_norm": 0.6352579885005645, "learning_rate": 5.872206395391575e-07, "loss": 0.2665, "step": 33472 }, { "epoch": 1.5680423478708954, "grad_norm": 0.6370050451814708, "learning_rate": 5.870985297818488e-07, "loss": 0.262, "step": 33473 }, { "epoch": 1.5680891928608234, "grad_norm": 0.6209835304733772, "learning_rate": 5.86976431032808e-07, "loss": 0.2796, "step": 33474 }, { "epoch": 1.5681360378507518, "grad_norm": 0.6239376799127657, "learning_rate": 5.868543432927365e-07, "loss": 0.2742, "step": 33475 }, { "epoch": 1.5681828828406803, "grad_norm": 0.5641358353692824, "learning_rate": 5.867322665623371e-07, "loss": 0.2583, "step": 33476 }, { "epoch": 1.5682297278306085, "grad_norm": 0.5997916973995487, "learning_rate": 5.866102008423127e-07, "loss": 0.2696, "step": 33477 }, { "epoch": 1.5682765728205368, "grad_norm": 0.6548760021257423, "learning_rate": 5.864881461333666e-07, "loss": 0.2785, "step": 33478 }, { "epoch": 1.5683234178104652, "grad_norm": 0.6001583110730608, "learning_rate": 5.863661024361994e-07, "loss": 0.2773, "step": 33479 }, { "epoch": 1.5683702628003935, "grad_norm": 0.5912723227539711, "learning_rate": 5.862440697515143e-07, "loss": 0.264, "step": 33480 }, { "epoch": 1.5684171077903217, "grad_norm": 0.5770206424088226, "learning_rate": 5.861220480800145e-07, "loss": 0.2587, "step": 33481 }, { "epoch": 1.5684639527802502, "grad_norm": 0.6015708311791642, "learning_rate": 5.860000374224004e-07, "loss": 0.2643, "step": 33482 }, { "epoch": 1.5685107977701784, "grad_norm": 0.6161789029031101, "learning_rate": 5.85878037779375e-07, "loss": 0.2899, "step": 33483 }, { "epoch": 1.5685576427601067, "grad_norm": 0.6531220819114579, "learning_rate": 5.857560491516404e-07, "loss": 0.2737, "step": 33484 }, { "epoch": 1.5686044877500351, "grad_norm": 0.6208934885866594, "learning_rate": 5.856340715398992e-07, "loss": 0.2728, "step": 33485 }, { "epoch": 1.5686513327399636, "grad_norm": 0.6328264372567203, "learning_rate": 5.855121049448522e-07, "loss": 0.3027, "step": 33486 }, { "epoch": 1.5686981777298918, "grad_norm": 0.636669312270371, "learning_rate": 5.853901493672026e-07, "loss": 0.2846, "step": 33487 }, { "epoch": 1.56874502271982, "grad_norm": 0.6548069910507012, "learning_rate": 5.852682048076508e-07, "loss": 0.2905, "step": 33488 }, { "epoch": 1.5687918677097485, "grad_norm": 0.6281476991370879, "learning_rate": 5.85146271266899e-07, "loss": 0.2803, "step": 33489 }, { "epoch": 1.5688387126996768, "grad_norm": 0.597181181257627, "learning_rate": 5.850243487456492e-07, "loss": 0.2656, "step": 33490 }, { "epoch": 1.568885557689605, "grad_norm": 0.6425107602730162, "learning_rate": 5.849024372446038e-07, "loss": 0.2882, "step": 33491 }, { "epoch": 1.5689324026795335, "grad_norm": 0.6418865069130754, "learning_rate": 5.847805367644627e-07, "loss": 0.2721, "step": 33492 }, { "epoch": 1.5689792476694617, "grad_norm": 0.5719229075178398, "learning_rate": 5.846586473059282e-07, "loss": 0.2663, "step": 33493 }, { "epoch": 1.56902609265939, "grad_norm": 0.6231851359099786, "learning_rate": 5.845367688697027e-07, "loss": 0.2854, "step": 33494 }, { "epoch": 1.5690729376493184, "grad_norm": 0.5742454065618924, "learning_rate": 5.84414901456486e-07, "loss": 0.2618, "step": 33495 }, { "epoch": 1.569119782639247, "grad_norm": 0.6403664666972535, "learning_rate": 5.842930450669798e-07, "loss": 0.2907, "step": 33496 }, { "epoch": 1.569166627629175, "grad_norm": 0.6257715862414364, "learning_rate": 5.841711997018864e-07, "loss": 0.2626, "step": 33497 }, { "epoch": 1.5692134726191034, "grad_norm": 0.5809648176826591, "learning_rate": 5.840493653619056e-07, "loss": 0.2725, "step": 33498 }, { "epoch": 1.5692603176090318, "grad_norm": 0.5701036500281598, "learning_rate": 5.839275420477392e-07, "loss": 0.2619, "step": 33499 }, { "epoch": 1.56930716259896, "grad_norm": 0.5943942871677697, "learning_rate": 5.838057297600879e-07, "loss": 0.276, "step": 33500 }, { "epoch": 1.5693540075888883, "grad_norm": 0.6139533111220272, "learning_rate": 5.83683928499654e-07, "loss": 0.2857, "step": 33501 }, { "epoch": 1.5694008525788168, "grad_norm": 0.6072829663181271, "learning_rate": 5.835621382671366e-07, "loss": 0.2596, "step": 33502 }, { "epoch": 1.569447697568745, "grad_norm": 0.6321965539681039, "learning_rate": 5.834403590632382e-07, "loss": 0.2818, "step": 33503 }, { "epoch": 1.5694945425586733, "grad_norm": 0.6011690682334161, "learning_rate": 5.833185908886582e-07, "loss": 0.2819, "step": 33504 }, { "epoch": 1.5695413875486017, "grad_norm": 0.6225935609887884, "learning_rate": 5.831968337440979e-07, "loss": 0.258, "step": 33505 }, { "epoch": 1.56958823253853, "grad_norm": 0.583964314616602, "learning_rate": 5.83075087630258e-07, "loss": 0.2776, "step": 33506 }, { "epoch": 1.5696350775284582, "grad_norm": 0.5694226836995784, "learning_rate": 5.829533525478393e-07, "loss": 0.2517, "step": 33507 }, { "epoch": 1.5696819225183867, "grad_norm": 0.678912755119447, "learning_rate": 5.828316284975427e-07, "loss": 0.2939, "step": 33508 }, { "epoch": 1.5697287675083151, "grad_norm": 0.5667354484913069, "learning_rate": 5.827099154800683e-07, "loss": 0.2512, "step": 33509 }, { "epoch": 1.5697756124982432, "grad_norm": 0.5802916308392798, "learning_rate": 5.825882134961158e-07, "loss": 0.2612, "step": 33510 }, { "epoch": 1.5698224574881716, "grad_norm": 0.602700696320932, "learning_rate": 5.824665225463863e-07, "loss": 0.2693, "step": 33511 }, { "epoch": 1.5698693024781, "grad_norm": 0.6174554132605565, "learning_rate": 5.823448426315798e-07, "loss": 0.2639, "step": 33512 }, { "epoch": 1.5699161474680283, "grad_norm": 0.5965826404818985, "learning_rate": 5.82223173752397e-07, "loss": 0.2831, "step": 33513 }, { "epoch": 1.5699629924579566, "grad_norm": 0.5645082014711037, "learning_rate": 5.821015159095375e-07, "loss": 0.2625, "step": 33514 }, { "epoch": 1.570009837447885, "grad_norm": 0.6386682735877903, "learning_rate": 5.819798691037026e-07, "loss": 0.2814, "step": 33515 }, { "epoch": 1.5700566824378133, "grad_norm": 0.5885563059581407, "learning_rate": 5.818582333355916e-07, "loss": 0.2582, "step": 33516 }, { "epoch": 1.5701035274277415, "grad_norm": 0.6170582991796869, "learning_rate": 5.817366086059034e-07, "loss": 0.273, "step": 33517 }, { "epoch": 1.57015037241767, "grad_norm": 0.5865288297191038, "learning_rate": 5.816149949153391e-07, "loss": 0.2644, "step": 33518 }, { "epoch": 1.5701972174075982, "grad_norm": 0.5413872256891016, "learning_rate": 5.814933922645982e-07, "loss": 0.2499, "step": 33519 }, { "epoch": 1.5702440623975265, "grad_norm": 0.5338249840312302, "learning_rate": 5.813718006543806e-07, "loss": 0.2517, "step": 33520 }, { "epoch": 1.570290907387455, "grad_norm": 0.6192369572182563, "learning_rate": 5.812502200853871e-07, "loss": 0.2765, "step": 33521 }, { "epoch": 1.5703377523773834, "grad_norm": 0.5830033593970998, "learning_rate": 5.811286505583152e-07, "loss": 0.2644, "step": 33522 }, { "epoch": 1.5703845973673116, "grad_norm": 0.604566968667846, "learning_rate": 5.810070920738664e-07, "loss": 0.2852, "step": 33523 }, { "epoch": 1.5704314423572399, "grad_norm": 0.6348228612039112, "learning_rate": 5.808855446327391e-07, "loss": 0.2967, "step": 33524 }, { "epoch": 1.5704782873471683, "grad_norm": 0.6064579716114147, "learning_rate": 5.807640082356328e-07, "loss": 0.2772, "step": 33525 }, { "epoch": 1.5705251323370966, "grad_norm": 0.5678386468744304, "learning_rate": 5.806424828832475e-07, "loss": 0.2664, "step": 33526 }, { "epoch": 1.5705719773270248, "grad_norm": 0.5994655279523466, "learning_rate": 5.805209685762828e-07, "loss": 0.2673, "step": 33527 }, { "epoch": 1.5706188223169533, "grad_norm": 0.6420889059519567, "learning_rate": 5.803994653154366e-07, "loss": 0.2873, "step": 33528 }, { "epoch": 1.5706656673068815, "grad_norm": 0.6503204311035072, "learning_rate": 5.8027797310141e-07, "loss": 0.2721, "step": 33529 }, { "epoch": 1.5707125122968097, "grad_norm": 0.6183224920043138, "learning_rate": 5.801564919349003e-07, "loss": 0.2791, "step": 33530 }, { "epoch": 1.5707593572867382, "grad_norm": 0.6087017284323799, "learning_rate": 5.800350218166076e-07, "loss": 0.2806, "step": 33531 }, { "epoch": 1.5708062022766667, "grad_norm": 0.6074751308960238, "learning_rate": 5.799135627472308e-07, "loss": 0.2648, "step": 33532 }, { "epoch": 1.5708530472665947, "grad_norm": 0.6384219586309253, "learning_rate": 5.797921147274696e-07, "loss": 0.2856, "step": 33533 }, { "epoch": 1.5708998922565232, "grad_norm": 0.6245149257867115, "learning_rate": 5.796706777580213e-07, "loss": 0.2828, "step": 33534 }, { "epoch": 1.5709467372464516, "grad_norm": 0.5986160649669445, "learning_rate": 5.795492518395857e-07, "loss": 0.2741, "step": 33535 }, { "epoch": 1.5709935822363799, "grad_norm": 0.5751588220028366, "learning_rate": 5.79427836972862e-07, "loss": 0.261, "step": 33536 }, { "epoch": 1.571040427226308, "grad_norm": 0.6084569050615819, "learning_rate": 5.793064331585477e-07, "loss": 0.2781, "step": 33537 }, { "epoch": 1.5710872722162366, "grad_norm": 0.5988158406738356, "learning_rate": 5.791850403973422e-07, "loss": 0.2646, "step": 33538 }, { "epoch": 1.5711341172061648, "grad_norm": 0.6134539934944334, "learning_rate": 5.790636586899448e-07, "loss": 0.2716, "step": 33539 }, { "epoch": 1.571180962196093, "grad_norm": 0.6682855868847752, "learning_rate": 5.789422880370524e-07, "loss": 0.2877, "step": 33540 }, { "epoch": 1.5712278071860215, "grad_norm": 0.5907542464962157, "learning_rate": 5.788209284393642e-07, "loss": 0.2759, "step": 33541 }, { "epoch": 1.5712746521759497, "grad_norm": 0.5973471927195353, "learning_rate": 5.786995798975789e-07, "loss": 0.2746, "step": 33542 }, { "epoch": 1.571321497165878, "grad_norm": 0.5967268151212383, "learning_rate": 5.785782424123954e-07, "loss": 0.2542, "step": 33543 }, { "epoch": 1.5713683421558065, "grad_norm": 0.5427678547454717, "learning_rate": 5.784569159845102e-07, "loss": 0.2556, "step": 33544 }, { "epoch": 1.571415187145735, "grad_norm": 0.5944201867905118, "learning_rate": 5.783356006146234e-07, "loss": 0.273, "step": 33545 }, { "epoch": 1.571462032135663, "grad_norm": 0.5822563865979109, "learning_rate": 5.782142963034316e-07, "loss": 0.2611, "step": 33546 }, { "epoch": 1.5715088771255914, "grad_norm": 0.628909336568835, "learning_rate": 5.780930030516333e-07, "loss": 0.2786, "step": 33547 }, { "epoch": 1.5715557221155199, "grad_norm": 0.6132879961799006, "learning_rate": 5.779717208599267e-07, "loss": 0.273, "step": 33548 }, { "epoch": 1.571602567105448, "grad_norm": 0.5896216458406899, "learning_rate": 5.778504497290096e-07, "loss": 0.266, "step": 33549 }, { "epoch": 1.5716494120953763, "grad_norm": 0.590960027050907, "learning_rate": 5.777291896595811e-07, "loss": 0.2644, "step": 33550 }, { "epoch": 1.5716962570853048, "grad_norm": 0.6537677988717736, "learning_rate": 5.77607940652338e-07, "loss": 0.2807, "step": 33551 }, { "epoch": 1.571743102075233, "grad_norm": 0.5924547467967922, "learning_rate": 5.774867027079769e-07, "loss": 0.266, "step": 33552 }, { "epoch": 1.5717899470651613, "grad_norm": 0.5940887688187295, "learning_rate": 5.773654758271971e-07, "loss": 0.2809, "step": 33553 }, { "epoch": 1.5718367920550897, "grad_norm": 0.5878012993118511, "learning_rate": 5.772442600106954e-07, "loss": 0.2714, "step": 33554 }, { "epoch": 1.571883637045018, "grad_norm": 0.6306744452181355, "learning_rate": 5.7712305525917e-07, "loss": 0.2692, "step": 33555 }, { "epoch": 1.5719304820349462, "grad_norm": 0.5674407044640128, "learning_rate": 5.770018615733178e-07, "loss": 0.2647, "step": 33556 }, { "epoch": 1.5719773270248747, "grad_norm": 0.5909095950188361, "learning_rate": 5.768806789538375e-07, "loss": 0.2673, "step": 33557 }, { "epoch": 1.5720241720148032, "grad_norm": 0.6017575766487966, "learning_rate": 5.767595074014254e-07, "loss": 0.265, "step": 33558 }, { "epoch": 1.5720710170047314, "grad_norm": 0.6081430637371689, "learning_rate": 5.766383469167783e-07, "loss": 0.2842, "step": 33559 }, { "epoch": 1.5721178619946596, "grad_norm": 0.5672067420377886, "learning_rate": 5.765171975005943e-07, "loss": 0.2678, "step": 33560 }, { "epoch": 1.572164706984588, "grad_norm": 0.5671067975759552, "learning_rate": 5.7639605915357e-07, "loss": 0.2699, "step": 33561 }, { "epoch": 1.5722115519745163, "grad_norm": 0.5625740489548641, "learning_rate": 5.762749318764033e-07, "loss": 0.2515, "step": 33562 }, { "epoch": 1.5722583969644446, "grad_norm": 0.593315139144813, "learning_rate": 5.761538156697904e-07, "loss": 0.2787, "step": 33563 }, { "epoch": 1.572305241954373, "grad_norm": 0.6242489876602568, "learning_rate": 5.760327105344299e-07, "loss": 0.2722, "step": 33564 }, { "epoch": 1.5723520869443013, "grad_norm": 0.5833565531821227, "learning_rate": 5.759116164710174e-07, "loss": 0.2676, "step": 33565 }, { "epoch": 1.5723989319342295, "grad_norm": 0.6895497439667837, "learning_rate": 5.757905334802491e-07, "loss": 0.2868, "step": 33566 }, { "epoch": 1.572445776924158, "grad_norm": 0.616794568722109, "learning_rate": 5.756694615628228e-07, "loss": 0.2848, "step": 33567 }, { "epoch": 1.5724926219140865, "grad_norm": 0.5994324365756121, "learning_rate": 5.755484007194351e-07, "loss": 0.2706, "step": 33568 }, { "epoch": 1.5725394669040145, "grad_norm": 0.6529821005851304, "learning_rate": 5.754273509507827e-07, "loss": 0.2895, "step": 33569 }, { "epoch": 1.572586311893943, "grad_norm": 0.6214900967959017, "learning_rate": 5.753063122575628e-07, "loss": 0.2791, "step": 33570 }, { "epoch": 1.5726331568838714, "grad_norm": 0.6972837686281997, "learning_rate": 5.751852846404704e-07, "loss": 0.2656, "step": 33571 }, { "epoch": 1.5726800018737996, "grad_norm": 0.6248008394313771, "learning_rate": 5.750642681002039e-07, "loss": 0.2685, "step": 33572 }, { "epoch": 1.5727268468637279, "grad_norm": 0.5948508678267729, "learning_rate": 5.74943262637458e-07, "loss": 0.2643, "step": 33573 }, { "epoch": 1.5727736918536563, "grad_norm": 0.6567382114928253, "learning_rate": 5.748222682529297e-07, "loss": 0.2667, "step": 33574 }, { "epoch": 1.5728205368435846, "grad_norm": 0.6134512730406142, "learning_rate": 5.747012849473155e-07, "loss": 0.2676, "step": 33575 }, { "epoch": 1.5728673818335128, "grad_norm": 0.577076408877641, "learning_rate": 5.74580312721312e-07, "loss": 0.2548, "step": 33576 }, { "epoch": 1.5729142268234413, "grad_norm": 0.6069954766549074, "learning_rate": 5.744593515756142e-07, "loss": 0.2753, "step": 33577 }, { "epoch": 1.5729610718133695, "grad_norm": 0.5727994354323402, "learning_rate": 5.743384015109196e-07, "loss": 0.2526, "step": 33578 }, { "epoch": 1.5730079168032978, "grad_norm": 0.603847850327296, "learning_rate": 5.742174625279229e-07, "loss": 0.2614, "step": 33579 }, { "epoch": 1.5730547617932262, "grad_norm": 0.6212828971946335, "learning_rate": 5.740965346273206e-07, "loss": 0.2853, "step": 33580 }, { "epoch": 1.5731016067831547, "grad_norm": 0.648359868652038, "learning_rate": 5.739756178098085e-07, "loss": 0.2835, "step": 33581 }, { "epoch": 1.5731484517730827, "grad_norm": 0.5901519487241846, "learning_rate": 5.738547120760837e-07, "loss": 0.2577, "step": 33582 }, { "epoch": 1.5731952967630112, "grad_norm": 0.6005592443827127, "learning_rate": 5.7373381742684e-07, "loss": 0.2661, "step": 33583 }, { "epoch": 1.5732421417529396, "grad_norm": 0.6278186206571259, "learning_rate": 5.736129338627741e-07, "loss": 0.303, "step": 33584 }, { "epoch": 1.5732889867428679, "grad_norm": 0.6212200578679855, "learning_rate": 5.734920613845821e-07, "loss": 0.2568, "step": 33585 }, { "epoch": 1.5733358317327961, "grad_norm": 0.592748347870144, "learning_rate": 5.733711999929586e-07, "loss": 0.2643, "step": 33586 }, { "epoch": 1.5733826767227246, "grad_norm": 0.6003629748609097, "learning_rate": 5.732503496885994e-07, "loss": 0.2654, "step": 33587 }, { "epoch": 1.5734295217126528, "grad_norm": 0.5830305985995494, "learning_rate": 5.731295104722009e-07, "loss": 0.25, "step": 33588 }, { "epoch": 1.573476366702581, "grad_norm": 0.5658909045619264, "learning_rate": 5.730086823444572e-07, "loss": 0.2625, "step": 33589 }, { "epoch": 1.5735232116925095, "grad_norm": 0.5824341788450458, "learning_rate": 5.728878653060643e-07, "loss": 0.28, "step": 33590 }, { "epoch": 1.5735700566824378, "grad_norm": 0.5999900127560646, "learning_rate": 5.727670593577172e-07, "loss": 0.2863, "step": 33591 }, { "epoch": 1.573616901672366, "grad_norm": 0.5970224746198064, "learning_rate": 5.726462645001121e-07, "loss": 0.2682, "step": 33592 }, { "epoch": 1.5736637466622945, "grad_norm": 0.6016124379216152, "learning_rate": 5.725254807339425e-07, "loss": 0.2785, "step": 33593 }, { "epoch": 1.573710591652223, "grad_norm": 0.6122888796073674, "learning_rate": 5.724047080599052e-07, "loss": 0.259, "step": 33594 }, { "epoch": 1.5737574366421512, "grad_norm": 0.6341175397190838, "learning_rate": 5.722839464786934e-07, "loss": 0.272, "step": 33595 }, { "epoch": 1.5738042816320794, "grad_norm": 0.6099873711475057, "learning_rate": 5.721631959910029e-07, "loss": 0.2784, "step": 33596 }, { "epoch": 1.5738511266220079, "grad_norm": 0.6188562093136896, "learning_rate": 5.720424565975289e-07, "loss": 0.2821, "step": 33597 }, { "epoch": 1.5738979716119361, "grad_norm": 0.5872160387974403, "learning_rate": 5.719217282989659e-07, "loss": 0.2753, "step": 33598 }, { "epoch": 1.5739448166018644, "grad_norm": 0.5923197147673075, "learning_rate": 5.718010110960093e-07, "loss": 0.2826, "step": 33599 }, { "epoch": 1.5739916615917928, "grad_norm": 0.6001798286593079, "learning_rate": 5.716803049893535e-07, "loss": 0.2576, "step": 33600 }, { "epoch": 1.574038506581721, "grad_norm": 0.6166922899712626, "learning_rate": 5.715596099796922e-07, "loss": 0.2624, "step": 33601 }, { "epoch": 1.5740853515716493, "grad_norm": 0.5629627253428036, "learning_rate": 5.714389260677203e-07, "loss": 0.2575, "step": 33602 }, { "epoch": 1.5741321965615778, "grad_norm": 0.5969998904190459, "learning_rate": 5.713182532541328e-07, "loss": 0.267, "step": 33603 }, { "epoch": 1.5741790415515062, "grad_norm": 0.6269916940626116, "learning_rate": 5.711975915396242e-07, "loss": 0.2921, "step": 33604 }, { "epoch": 1.5742258865414342, "grad_norm": 0.6057076694977324, "learning_rate": 5.710769409248887e-07, "loss": 0.2726, "step": 33605 }, { "epoch": 1.5742727315313627, "grad_norm": 0.6100077097778925, "learning_rate": 5.709563014106209e-07, "loss": 0.2818, "step": 33606 }, { "epoch": 1.5743195765212912, "grad_norm": 0.5861444769076568, "learning_rate": 5.70835672997515e-07, "loss": 0.2714, "step": 33607 }, { "epoch": 1.5743664215112194, "grad_norm": 0.5733366422753381, "learning_rate": 5.707150556862643e-07, "loss": 0.2601, "step": 33608 }, { "epoch": 1.5744132665011477, "grad_norm": 0.6157038242183903, "learning_rate": 5.705944494775634e-07, "loss": 0.2829, "step": 33609 }, { "epoch": 1.5744601114910761, "grad_norm": 0.6193428656463345, "learning_rate": 5.704738543721067e-07, "loss": 0.2811, "step": 33610 }, { "epoch": 1.5745069564810044, "grad_norm": 0.5703644445683106, "learning_rate": 5.703532703705878e-07, "loss": 0.262, "step": 33611 }, { "epoch": 1.5745538014709326, "grad_norm": 0.564806506874375, "learning_rate": 5.702326974737016e-07, "loss": 0.2543, "step": 33612 }, { "epoch": 1.574600646460861, "grad_norm": 0.5758282651075801, "learning_rate": 5.701121356821404e-07, "loss": 0.267, "step": 33613 }, { "epoch": 1.5746474914507893, "grad_norm": 0.5642092129864444, "learning_rate": 5.699915849965995e-07, "loss": 0.2666, "step": 33614 }, { "epoch": 1.5746943364407175, "grad_norm": 0.6484752055146454, "learning_rate": 5.698710454177714e-07, "loss": 0.2817, "step": 33615 }, { "epoch": 1.574741181430646, "grad_norm": 0.6078377224333649, "learning_rate": 5.697505169463502e-07, "loss": 0.2628, "step": 33616 }, { "epoch": 1.5747880264205745, "grad_norm": 0.6220953683288922, "learning_rate": 5.696299995830296e-07, "loss": 0.2835, "step": 33617 }, { "epoch": 1.5748348714105025, "grad_norm": 0.6545027648719539, "learning_rate": 5.695094933285039e-07, "loss": 0.2821, "step": 33618 }, { "epoch": 1.574881716400431, "grad_norm": 0.582157800734237, "learning_rate": 5.693889981834652e-07, "loss": 0.2493, "step": 33619 }, { "epoch": 1.5749285613903594, "grad_norm": 0.6295997311565255, "learning_rate": 5.692685141486076e-07, "loss": 0.2884, "step": 33620 }, { "epoch": 1.5749754063802877, "grad_norm": 0.6341087171339284, "learning_rate": 5.691480412246251e-07, "loss": 0.2727, "step": 33621 }, { "epoch": 1.575022251370216, "grad_norm": 0.6049903911284881, "learning_rate": 5.690275794122096e-07, "loss": 0.2788, "step": 33622 }, { "epoch": 1.5750690963601444, "grad_norm": 0.6128710845124018, "learning_rate": 5.689071287120552e-07, "loss": 0.2666, "step": 33623 }, { "epoch": 1.5751159413500726, "grad_norm": 0.5375826556362654, "learning_rate": 5.687866891248547e-07, "loss": 0.246, "step": 33624 }, { "epoch": 1.5751627863400008, "grad_norm": 0.6249191038108208, "learning_rate": 5.686662606513021e-07, "loss": 0.2788, "step": 33625 }, { "epoch": 1.5752096313299293, "grad_norm": 0.603696556856864, "learning_rate": 5.685458432920893e-07, "loss": 0.2762, "step": 33626 }, { "epoch": 1.5752564763198575, "grad_norm": 0.5887394162239493, "learning_rate": 5.684254370479101e-07, "loss": 0.2699, "step": 33627 }, { "epoch": 1.5753033213097858, "grad_norm": 0.6343665072838138, "learning_rate": 5.683050419194566e-07, "loss": 0.2817, "step": 33628 }, { "epoch": 1.5753501662997142, "grad_norm": 0.6011009024702766, "learning_rate": 5.681846579074218e-07, "loss": 0.2624, "step": 33629 }, { "epoch": 1.5753970112896427, "grad_norm": 0.5965131085424035, "learning_rate": 5.68064285012499e-07, "loss": 0.2678, "step": 33630 }, { "epoch": 1.575443856279571, "grad_norm": 0.5997244297855896, "learning_rate": 5.679439232353811e-07, "loss": 0.2741, "step": 33631 }, { "epoch": 1.5754907012694992, "grad_norm": 0.6079847263585353, "learning_rate": 5.678235725767597e-07, "loss": 0.2619, "step": 33632 }, { "epoch": 1.5755375462594277, "grad_norm": 0.5970005914650386, "learning_rate": 5.677032330373283e-07, "loss": 0.277, "step": 33633 }, { "epoch": 1.575584391249356, "grad_norm": 0.5846460991913353, "learning_rate": 5.675829046177797e-07, "loss": 0.2678, "step": 33634 }, { "epoch": 1.5756312362392841, "grad_norm": 0.5977162862510975, "learning_rate": 5.67462587318805e-07, "loss": 0.2772, "step": 33635 }, { "epoch": 1.5756780812292126, "grad_norm": 0.571380228406977, "learning_rate": 5.673422811410973e-07, "loss": 0.2662, "step": 33636 }, { "epoch": 1.5757249262191408, "grad_norm": 0.6133884315164392, "learning_rate": 5.672219860853498e-07, "loss": 0.284, "step": 33637 }, { "epoch": 1.575771771209069, "grad_norm": 0.5914196995812976, "learning_rate": 5.671017021522532e-07, "loss": 0.2731, "step": 33638 }, { "epoch": 1.5758186161989975, "grad_norm": 0.5851419720077868, "learning_rate": 5.669814293425007e-07, "loss": 0.2627, "step": 33639 }, { "epoch": 1.575865461188926, "grad_norm": 0.6439726094830288, "learning_rate": 5.668611676567839e-07, "loss": 0.2875, "step": 33640 }, { "epoch": 1.575912306178854, "grad_norm": 0.6016410178513755, "learning_rate": 5.667409170957964e-07, "loss": 0.2646, "step": 33641 }, { "epoch": 1.5759591511687825, "grad_norm": 0.6059925756469626, "learning_rate": 5.666206776602279e-07, "loss": 0.2684, "step": 33642 }, { "epoch": 1.576005996158711, "grad_norm": 0.6165441790836934, "learning_rate": 5.665004493507723e-07, "loss": 0.2675, "step": 33643 }, { "epoch": 1.5760528411486392, "grad_norm": 0.5941394885554407, "learning_rate": 5.6638023216812e-07, "loss": 0.2665, "step": 33644 }, { "epoch": 1.5760996861385674, "grad_norm": 0.6176131737683535, "learning_rate": 5.662600261129633e-07, "loss": 0.2842, "step": 33645 }, { "epoch": 1.576146531128496, "grad_norm": 0.5684853182061894, "learning_rate": 5.661398311859942e-07, "loss": 0.2513, "step": 33646 }, { "epoch": 1.5761933761184241, "grad_norm": 0.605170439092976, "learning_rate": 5.660196473879043e-07, "loss": 0.2633, "step": 33647 }, { "epoch": 1.5762402211083524, "grad_norm": 0.6021466764820325, "learning_rate": 5.658994747193861e-07, "loss": 0.282, "step": 33648 }, { "epoch": 1.5762870660982808, "grad_norm": 0.5926389719554189, "learning_rate": 5.657793131811301e-07, "loss": 0.2765, "step": 33649 }, { "epoch": 1.576333911088209, "grad_norm": 0.6291246128686104, "learning_rate": 5.656591627738275e-07, "loss": 0.2823, "step": 33650 }, { "epoch": 1.5763807560781373, "grad_norm": 0.5516981170385796, "learning_rate": 5.655390234981701e-07, "loss": 0.2579, "step": 33651 }, { "epoch": 1.5764276010680658, "grad_norm": 0.5831933034790476, "learning_rate": 5.654188953548495e-07, "loss": 0.2629, "step": 33652 }, { "epoch": 1.5764744460579942, "grad_norm": 0.6504961748898517, "learning_rate": 5.652987783445568e-07, "loss": 0.2885, "step": 33653 }, { "epoch": 1.5765212910479223, "grad_norm": 0.6271480018724368, "learning_rate": 5.651786724679834e-07, "loss": 0.2761, "step": 33654 }, { "epoch": 1.5765681360378507, "grad_norm": 0.5685391067778088, "learning_rate": 5.65058577725821e-07, "loss": 0.2595, "step": 33655 }, { "epoch": 1.5766149810277792, "grad_norm": 0.6225464639308168, "learning_rate": 5.649384941187605e-07, "loss": 0.2918, "step": 33656 }, { "epoch": 1.5766618260177074, "grad_norm": 0.5806987198012609, "learning_rate": 5.648184216474917e-07, "loss": 0.2709, "step": 33657 }, { "epoch": 1.5767086710076357, "grad_norm": 0.6091139371247644, "learning_rate": 5.646983603127065e-07, "loss": 0.2816, "step": 33658 }, { "epoch": 1.5767555159975641, "grad_norm": 0.5543621407822017, "learning_rate": 5.645783101150958e-07, "loss": 0.2588, "step": 33659 }, { "epoch": 1.5768023609874924, "grad_norm": 0.6152411238019877, "learning_rate": 5.644582710553506e-07, "loss": 0.2852, "step": 33660 }, { "epoch": 1.5768492059774206, "grad_norm": 0.5866524417984951, "learning_rate": 5.64338243134162e-07, "loss": 0.2725, "step": 33661 }, { "epoch": 1.576896050967349, "grad_norm": 0.5633674743320208, "learning_rate": 5.6421822635222e-07, "loss": 0.2603, "step": 33662 }, { "epoch": 1.5769428959572773, "grad_norm": 0.5751493427913377, "learning_rate": 5.640982207102161e-07, "loss": 0.2563, "step": 33663 }, { "epoch": 1.5769897409472056, "grad_norm": 0.6538509204975778, "learning_rate": 5.639782262088395e-07, "loss": 0.2866, "step": 33664 }, { "epoch": 1.577036585937134, "grad_norm": 0.6305974790112365, "learning_rate": 5.638582428487818e-07, "loss": 0.279, "step": 33665 }, { "epoch": 1.5770834309270625, "grad_norm": 0.609810308143948, "learning_rate": 5.637382706307331e-07, "loss": 0.2639, "step": 33666 }, { "epoch": 1.5771302759169907, "grad_norm": 0.5876209428348916, "learning_rate": 5.636183095553849e-07, "loss": 0.26, "step": 33667 }, { "epoch": 1.577177120906919, "grad_norm": 0.6061351281606174, "learning_rate": 5.634983596234258e-07, "loss": 0.2808, "step": 33668 }, { "epoch": 1.5772239658968474, "grad_norm": 0.5416967610463002, "learning_rate": 5.633784208355478e-07, "loss": 0.2515, "step": 33669 }, { "epoch": 1.5772708108867757, "grad_norm": 0.6276903384208021, "learning_rate": 5.632584931924393e-07, "loss": 0.276, "step": 33670 }, { "epoch": 1.577317655876704, "grad_norm": 0.6406014318697668, "learning_rate": 5.631385766947914e-07, "loss": 0.2825, "step": 33671 }, { "epoch": 1.5773645008666324, "grad_norm": 0.5726570188326998, "learning_rate": 5.630186713432942e-07, "loss": 0.2717, "step": 33672 }, { "epoch": 1.5774113458565606, "grad_norm": 0.5862994528841962, "learning_rate": 5.628987771386385e-07, "loss": 0.2617, "step": 33673 }, { "epoch": 1.5774581908464889, "grad_norm": 0.6312845281528281, "learning_rate": 5.627788940815127e-07, "loss": 0.2691, "step": 33674 }, { "epoch": 1.5775050358364173, "grad_norm": 0.6374779678718586, "learning_rate": 5.626590221726075e-07, "loss": 0.2796, "step": 33675 }, { "epoch": 1.5775518808263458, "grad_norm": 0.623420993709431, "learning_rate": 5.625391614126136e-07, "loss": 0.284, "step": 33676 }, { "epoch": 1.5775987258162738, "grad_norm": 0.6035249607726036, "learning_rate": 5.624193118022189e-07, "loss": 0.2553, "step": 33677 }, { "epoch": 1.5776455708062023, "grad_norm": 0.5661094003504846, "learning_rate": 5.622994733421142e-07, "loss": 0.2609, "step": 33678 }, { "epoch": 1.5776924157961307, "grad_norm": 0.5776944627023212, "learning_rate": 5.621796460329896e-07, "loss": 0.2806, "step": 33679 }, { "epoch": 1.577739260786059, "grad_norm": 0.5839065592767405, "learning_rate": 5.620598298755333e-07, "loss": 0.2606, "step": 33680 }, { "epoch": 1.5777861057759872, "grad_norm": 0.604224613606855, "learning_rate": 5.619400248704357e-07, "loss": 0.2761, "step": 33681 }, { "epoch": 1.5778329507659157, "grad_norm": 0.5853983533700456, "learning_rate": 5.618202310183862e-07, "loss": 0.2581, "step": 33682 }, { "epoch": 1.577879795755844, "grad_norm": 0.5620648925285363, "learning_rate": 5.617004483200747e-07, "loss": 0.2617, "step": 33683 }, { "epoch": 1.5779266407457722, "grad_norm": 0.5861772255410498, "learning_rate": 5.615806767761895e-07, "loss": 0.2655, "step": 33684 }, { "epoch": 1.5779734857357006, "grad_norm": 0.6471323537558051, "learning_rate": 5.614609163874207e-07, "loss": 0.2754, "step": 33685 }, { "epoch": 1.5780203307256289, "grad_norm": 0.6113221550473757, "learning_rate": 5.613411671544566e-07, "loss": 0.2679, "step": 33686 }, { "epoch": 1.578067175715557, "grad_norm": 0.5603521789681554, "learning_rate": 5.612214290779866e-07, "loss": 0.263, "step": 33687 }, { "epoch": 1.5781140207054856, "grad_norm": 0.6510646837462424, "learning_rate": 5.611017021587e-07, "loss": 0.2915, "step": 33688 }, { "epoch": 1.578160865695414, "grad_norm": 0.5735375760222577, "learning_rate": 5.60981986397286e-07, "loss": 0.2777, "step": 33689 }, { "epoch": 1.578207710685342, "grad_norm": 0.5550090061393947, "learning_rate": 5.60862281794434e-07, "loss": 0.2619, "step": 33690 }, { "epoch": 1.5782545556752705, "grad_norm": 0.5990492205732583, "learning_rate": 5.607425883508313e-07, "loss": 0.2581, "step": 33691 }, { "epoch": 1.578301400665199, "grad_norm": 0.5538618638946936, "learning_rate": 5.606229060671684e-07, "loss": 0.2673, "step": 33692 }, { "epoch": 1.5783482456551272, "grad_norm": 0.5928509427080673, "learning_rate": 5.605032349441325e-07, "loss": 0.2563, "step": 33693 }, { "epoch": 1.5783950906450555, "grad_norm": 0.5952080362054801, "learning_rate": 5.603835749824133e-07, "loss": 0.2738, "step": 33694 }, { "epoch": 1.578441935634984, "grad_norm": 0.5854169352692755, "learning_rate": 5.602639261826986e-07, "loss": 0.2626, "step": 33695 }, { "epoch": 1.5784887806249122, "grad_norm": 0.5591704246375568, "learning_rate": 5.601442885456779e-07, "loss": 0.2609, "step": 33696 }, { "epoch": 1.5785356256148404, "grad_norm": 0.6268922601125871, "learning_rate": 5.600246620720399e-07, "loss": 0.2826, "step": 33697 }, { "epoch": 1.5785824706047689, "grad_norm": 0.6550618980748357, "learning_rate": 5.59905046762472e-07, "loss": 0.2718, "step": 33698 }, { "epoch": 1.578629315594697, "grad_norm": 0.6109254592578055, "learning_rate": 5.597854426176624e-07, "loss": 0.2611, "step": 33699 }, { "epoch": 1.5786761605846253, "grad_norm": 0.6030002171203985, "learning_rate": 5.596658496383001e-07, "loss": 0.2655, "step": 33700 }, { "epoch": 1.5787230055745538, "grad_norm": 0.599402698103007, "learning_rate": 5.595462678250732e-07, "loss": 0.2709, "step": 33701 }, { "epoch": 1.5787698505644823, "grad_norm": 0.6153075231495497, "learning_rate": 5.594266971786697e-07, "loss": 0.2785, "step": 33702 }, { "epoch": 1.5788166955544105, "grad_norm": 0.6091803858155481, "learning_rate": 5.593071376997777e-07, "loss": 0.2648, "step": 33703 }, { "epoch": 1.5788635405443388, "grad_norm": 0.648978921148286, "learning_rate": 5.59187589389086e-07, "loss": 0.2752, "step": 33704 }, { "epoch": 1.5789103855342672, "grad_norm": 0.6032904369488071, "learning_rate": 5.590680522472822e-07, "loss": 0.2679, "step": 33705 }, { "epoch": 1.5789572305241955, "grad_norm": 0.597545169198468, "learning_rate": 5.58948526275053e-07, "loss": 0.2863, "step": 33706 }, { "epoch": 1.5790040755141237, "grad_norm": 0.6477083491697324, "learning_rate": 5.588290114730874e-07, "loss": 0.2735, "step": 33707 }, { "epoch": 1.5790509205040522, "grad_norm": 0.5765411736456476, "learning_rate": 5.587095078420726e-07, "loss": 0.2559, "step": 33708 }, { "epoch": 1.5790977654939804, "grad_norm": 0.573884043577552, "learning_rate": 5.58590015382697e-07, "loss": 0.2796, "step": 33709 }, { "epoch": 1.5791446104839086, "grad_norm": 0.5615133374265914, "learning_rate": 5.584705340956484e-07, "loss": 0.2526, "step": 33710 }, { "epoch": 1.579191455473837, "grad_norm": 0.5790129266776661, "learning_rate": 5.583510639816131e-07, "loss": 0.2642, "step": 33711 }, { "epoch": 1.5792383004637656, "grad_norm": 0.5670284948439454, "learning_rate": 5.582316050412804e-07, "loss": 0.2639, "step": 33712 }, { "epoch": 1.5792851454536936, "grad_norm": 0.602198562097049, "learning_rate": 5.58112157275336e-07, "loss": 0.261, "step": 33713 }, { "epoch": 1.579331990443622, "grad_norm": 0.5995700882162667, "learning_rate": 5.579927206844679e-07, "loss": 0.2667, "step": 33714 }, { "epoch": 1.5793788354335505, "grad_norm": 0.6180363458611992, "learning_rate": 5.578732952693636e-07, "loss": 0.2725, "step": 33715 }, { "epoch": 1.5794256804234788, "grad_norm": 0.5846985479083565, "learning_rate": 5.577538810307112e-07, "loss": 0.2643, "step": 33716 }, { "epoch": 1.579472525413407, "grad_norm": 0.623159206257284, "learning_rate": 5.576344779691962e-07, "loss": 0.2745, "step": 33717 }, { "epoch": 1.5795193704033355, "grad_norm": 0.6012909397221166, "learning_rate": 5.575150860855075e-07, "loss": 0.2744, "step": 33718 }, { "epoch": 1.5795662153932637, "grad_norm": 0.5746747698118603, "learning_rate": 5.573957053803303e-07, "loss": 0.269, "step": 33719 }, { "epoch": 1.579613060383192, "grad_norm": 0.5602731471913215, "learning_rate": 5.572763358543525e-07, "loss": 0.2713, "step": 33720 }, { "epoch": 1.5796599053731204, "grad_norm": 0.5601185996124235, "learning_rate": 5.571569775082613e-07, "loss": 0.2536, "step": 33721 }, { "epoch": 1.5797067503630486, "grad_norm": 0.5724240356510475, "learning_rate": 5.570376303427441e-07, "loss": 0.2579, "step": 33722 }, { "epoch": 1.5797535953529769, "grad_norm": 0.617225525733464, "learning_rate": 5.569182943584864e-07, "loss": 0.2798, "step": 33723 }, { "epoch": 1.5798004403429053, "grad_norm": 0.5974514876303444, "learning_rate": 5.567989695561754e-07, "loss": 0.2736, "step": 33724 }, { "epoch": 1.5798472853328338, "grad_norm": 0.5997326362004732, "learning_rate": 5.566796559364985e-07, "loss": 0.2533, "step": 33725 }, { "epoch": 1.5798941303227618, "grad_norm": 0.6475534958247188, "learning_rate": 5.565603535001413e-07, "loss": 0.275, "step": 33726 }, { "epoch": 1.5799409753126903, "grad_norm": 0.5615536701853879, "learning_rate": 5.564410622477906e-07, "loss": 0.2514, "step": 33727 }, { "epoch": 1.5799878203026188, "grad_norm": 0.638969873059889, "learning_rate": 5.563217821801336e-07, "loss": 0.2926, "step": 33728 }, { "epoch": 1.580034665292547, "grad_norm": 0.6213182700768717, "learning_rate": 5.56202513297856e-07, "loss": 0.2647, "step": 33729 }, { "epoch": 1.5800815102824752, "grad_norm": 0.6090609755016113, "learning_rate": 5.560832556016441e-07, "loss": 0.2827, "step": 33730 }, { "epoch": 1.5801283552724037, "grad_norm": 0.6029551956958212, "learning_rate": 5.559640090921844e-07, "loss": 0.2787, "step": 33731 }, { "epoch": 1.580175200262332, "grad_norm": 0.5784234442510636, "learning_rate": 5.558447737701641e-07, "loss": 0.2775, "step": 33732 }, { "epoch": 1.5802220452522602, "grad_norm": 0.5562799837967587, "learning_rate": 5.557255496362676e-07, "loss": 0.264, "step": 33733 }, { "epoch": 1.5802688902421886, "grad_norm": 0.6028694499575948, "learning_rate": 5.556063366911829e-07, "loss": 0.2804, "step": 33734 }, { "epoch": 1.5803157352321169, "grad_norm": 0.6302175435334948, "learning_rate": 5.554871349355939e-07, "loss": 0.2865, "step": 33735 }, { "epoch": 1.5803625802220451, "grad_norm": 0.5981755579518309, "learning_rate": 5.553679443701881e-07, "loss": 0.2652, "step": 33736 }, { "epoch": 1.5804094252119736, "grad_norm": 0.6408828525218272, "learning_rate": 5.552487649956509e-07, "loss": 0.2948, "step": 33737 }, { "epoch": 1.580456270201902, "grad_norm": 0.5897451968567128, "learning_rate": 5.551295968126683e-07, "loss": 0.2773, "step": 33738 }, { "epoch": 1.5805031151918303, "grad_norm": 0.5893289739577107, "learning_rate": 5.550104398219264e-07, "loss": 0.2633, "step": 33739 }, { "epoch": 1.5805499601817585, "grad_norm": 0.5759546225791723, "learning_rate": 5.548912940241108e-07, "loss": 0.2555, "step": 33740 }, { "epoch": 1.580596805171687, "grad_norm": 0.5970742992290997, "learning_rate": 5.547721594199063e-07, "loss": 0.2674, "step": 33741 }, { "epoch": 1.5806436501616152, "grad_norm": 0.6100503688800873, "learning_rate": 5.546530360099992e-07, "loss": 0.2852, "step": 33742 }, { "epoch": 1.5806904951515435, "grad_norm": 0.568806635212096, "learning_rate": 5.545339237950747e-07, "loss": 0.2786, "step": 33743 }, { "epoch": 1.580737340141472, "grad_norm": 0.5986658805759404, "learning_rate": 5.544148227758187e-07, "loss": 0.2694, "step": 33744 }, { "epoch": 1.5807841851314002, "grad_norm": 0.5900138295055416, "learning_rate": 5.542957329529161e-07, "loss": 0.2703, "step": 33745 }, { "epoch": 1.5808310301213284, "grad_norm": 0.5833195084104946, "learning_rate": 5.541766543270535e-07, "loss": 0.2634, "step": 33746 }, { "epoch": 1.5808778751112569, "grad_norm": 0.6193628718816062, "learning_rate": 5.540575868989151e-07, "loss": 0.2821, "step": 33747 }, { "epoch": 1.5809247201011853, "grad_norm": 0.5892327828259993, "learning_rate": 5.539385306691853e-07, "loss": 0.2633, "step": 33748 }, { "epoch": 1.5809715650911134, "grad_norm": 0.589316158126481, "learning_rate": 5.538194856385503e-07, "loss": 0.2881, "step": 33749 }, { "epoch": 1.5810184100810418, "grad_norm": 0.608385359003247, "learning_rate": 5.53700451807695e-07, "loss": 0.2771, "step": 33750 }, { "epoch": 1.5810652550709703, "grad_norm": 0.6166243028889716, "learning_rate": 5.535814291773045e-07, "loss": 0.2841, "step": 33751 }, { "epoch": 1.5811121000608985, "grad_norm": 0.6182417648180694, "learning_rate": 5.534624177480644e-07, "loss": 0.2938, "step": 33752 }, { "epoch": 1.5811589450508268, "grad_norm": 0.6090118778675516, "learning_rate": 5.533434175206578e-07, "loss": 0.275, "step": 33753 }, { "epoch": 1.5812057900407552, "grad_norm": 0.6165885485980742, "learning_rate": 5.532244284957716e-07, "loss": 0.2746, "step": 33754 }, { "epoch": 1.5812526350306835, "grad_norm": 0.5739166315539019, "learning_rate": 5.531054506740887e-07, "loss": 0.2413, "step": 33755 }, { "epoch": 1.5812994800206117, "grad_norm": 0.5399162261815607, "learning_rate": 5.529864840562946e-07, "loss": 0.2653, "step": 33756 }, { "epoch": 1.5813463250105402, "grad_norm": 0.5456934032084378, "learning_rate": 5.528675286430741e-07, "loss": 0.2468, "step": 33757 }, { "epoch": 1.5813931700004684, "grad_norm": 0.5994769528630882, "learning_rate": 5.527485844351113e-07, "loss": 0.2759, "step": 33758 }, { "epoch": 1.5814400149903967, "grad_norm": 0.5703158406527914, "learning_rate": 5.526296514330918e-07, "loss": 0.2763, "step": 33759 }, { "epoch": 1.5814868599803251, "grad_norm": 0.5858484811472315, "learning_rate": 5.525107296376983e-07, "loss": 0.272, "step": 33760 }, { "epoch": 1.5815337049702536, "grad_norm": 0.5514496836292696, "learning_rate": 5.523918190496169e-07, "loss": 0.2555, "step": 33761 }, { "epoch": 1.5815805499601816, "grad_norm": 0.6346846031973041, "learning_rate": 5.522729196695303e-07, "loss": 0.2654, "step": 33762 }, { "epoch": 1.58162739495011, "grad_norm": 0.6453898275422365, "learning_rate": 5.521540314981239e-07, "loss": 0.2741, "step": 33763 }, { "epoch": 1.5816742399400385, "grad_norm": 0.5593178428806093, "learning_rate": 5.520351545360811e-07, "loss": 0.2626, "step": 33764 }, { "epoch": 1.5817210849299668, "grad_norm": 0.6110190982368758, "learning_rate": 5.51916288784087e-07, "loss": 0.267, "step": 33765 }, { "epoch": 1.581767929919895, "grad_norm": 0.5619341707914473, "learning_rate": 5.517974342428245e-07, "loss": 0.2509, "step": 33766 }, { "epoch": 1.5818147749098235, "grad_norm": 0.5346087455557851, "learning_rate": 5.51678590912979e-07, "loss": 0.2533, "step": 33767 }, { "epoch": 1.5818616198997517, "grad_norm": 0.5909883175217899, "learning_rate": 5.515597587952326e-07, "loss": 0.2816, "step": 33768 }, { "epoch": 1.58190846488968, "grad_norm": 0.6042955779137471, "learning_rate": 5.514409378902699e-07, "loss": 0.2735, "step": 33769 }, { "epoch": 1.5819553098796084, "grad_norm": 0.58396098723861, "learning_rate": 5.513221281987752e-07, "loss": 0.2674, "step": 33770 }, { "epoch": 1.5820021548695367, "grad_norm": 0.656012163931025, "learning_rate": 5.512033297214323e-07, "loss": 0.2784, "step": 33771 }, { "epoch": 1.582048999859465, "grad_norm": 0.5934942444323451, "learning_rate": 5.51084542458924e-07, "loss": 0.2708, "step": 33772 }, { "epoch": 1.5820958448493934, "grad_norm": 0.6535956976524323, "learning_rate": 5.509657664119342e-07, "loss": 0.2794, "step": 33773 }, { "epoch": 1.5821426898393218, "grad_norm": 0.5848448808063764, "learning_rate": 5.508470015811471e-07, "loss": 0.2734, "step": 33774 }, { "epoch": 1.58218953482925, "grad_norm": 0.6234994824930321, "learning_rate": 5.507282479672449e-07, "loss": 0.2686, "step": 33775 }, { "epoch": 1.5822363798191783, "grad_norm": 0.6005962213767729, "learning_rate": 5.506095055709118e-07, "loss": 0.2804, "step": 33776 }, { "epoch": 1.5822832248091068, "grad_norm": 0.6077845668216044, "learning_rate": 5.504907743928317e-07, "loss": 0.274, "step": 33777 }, { "epoch": 1.582330069799035, "grad_norm": 0.6163128305845978, "learning_rate": 5.503720544336866e-07, "loss": 0.2926, "step": 33778 }, { "epoch": 1.5823769147889633, "grad_norm": 0.5960923619741506, "learning_rate": 5.502533456941602e-07, "loss": 0.2741, "step": 33779 }, { "epoch": 1.5824237597788917, "grad_norm": 0.7161468422568572, "learning_rate": 5.501346481749359e-07, "loss": 0.2948, "step": 33780 }, { "epoch": 1.58247060476882, "grad_norm": 0.597294849627896, "learning_rate": 5.50015961876697e-07, "loss": 0.2667, "step": 33781 }, { "epoch": 1.5825174497587482, "grad_norm": 0.6116877127147826, "learning_rate": 5.498972868001257e-07, "loss": 0.2614, "step": 33782 }, { "epoch": 1.5825642947486767, "grad_norm": 0.6200457935011426, "learning_rate": 5.497786229459059e-07, "loss": 0.287, "step": 33783 }, { "epoch": 1.5826111397386051, "grad_norm": 0.6224221871886436, "learning_rate": 5.496599703147196e-07, "loss": 0.2885, "step": 33784 }, { "epoch": 1.5826579847285331, "grad_norm": 0.5848951479144803, "learning_rate": 5.495413289072496e-07, "loss": 0.2725, "step": 33785 }, { "epoch": 1.5827048297184616, "grad_norm": 0.5808506468337845, "learning_rate": 5.494226987241791e-07, "loss": 0.2702, "step": 33786 }, { "epoch": 1.58275167470839, "grad_norm": 0.6322337569652625, "learning_rate": 5.493040797661908e-07, "loss": 0.2801, "step": 33787 }, { "epoch": 1.5827985196983183, "grad_norm": 0.6202116817610143, "learning_rate": 5.491854720339679e-07, "loss": 0.2827, "step": 33788 }, { "epoch": 1.5828453646882465, "grad_norm": 0.5882974626197516, "learning_rate": 5.490668755281925e-07, "loss": 0.2678, "step": 33789 }, { "epoch": 1.582892209678175, "grad_norm": 0.59230061948514, "learning_rate": 5.489482902495457e-07, "loss": 0.2705, "step": 33790 }, { "epoch": 1.5829390546681033, "grad_norm": 0.5666207693501769, "learning_rate": 5.488297161987117e-07, "loss": 0.2742, "step": 33791 }, { "epoch": 1.5829858996580315, "grad_norm": 0.6053782372250012, "learning_rate": 5.487111533763717e-07, "loss": 0.2759, "step": 33792 }, { "epoch": 1.58303274464796, "grad_norm": 0.580996431919648, "learning_rate": 5.485926017832089e-07, "loss": 0.2736, "step": 33793 }, { "epoch": 1.5830795896378882, "grad_norm": 0.5623295848993686, "learning_rate": 5.484740614199052e-07, "loss": 0.2531, "step": 33794 }, { "epoch": 1.5831264346278164, "grad_norm": 0.5677689736818692, "learning_rate": 5.483555322871434e-07, "loss": 0.2616, "step": 33795 }, { "epoch": 1.583173279617745, "grad_norm": 0.6572004815496117, "learning_rate": 5.48237014385605e-07, "loss": 0.2946, "step": 33796 }, { "epoch": 1.5832201246076734, "grad_norm": 0.5799316791966999, "learning_rate": 5.481185077159712e-07, "loss": 0.2692, "step": 33797 }, { "epoch": 1.5832669695976014, "grad_norm": 0.5742513732642444, "learning_rate": 5.48000012278925e-07, "loss": 0.2801, "step": 33798 }, { "epoch": 1.5833138145875298, "grad_norm": 0.5961015941768896, "learning_rate": 5.478815280751481e-07, "loss": 0.2712, "step": 33799 }, { "epoch": 1.5833606595774583, "grad_norm": 0.613989771386309, "learning_rate": 5.477630551053223e-07, "loss": 0.2756, "step": 33800 }, { "epoch": 1.5834075045673865, "grad_norm": 0.5782949672192014, "learning_rate": 5.476445933701302e-07, "loss": 0.2673, "step": 33801 }, { "epoch": 1.5834543495573148, "grad_norm": 0.6261488139013767, "learning_rate": 5.475261428702519e-07, "loss": 0.2757, "step": 33802 }, { "epoch": 1.5835011945472433, "grad_norm": 0.6352849184552292, "learning_rate": 5.474077036063707e-07, "loss": 0.2861, "step": 33803 }, { "epoch": 1.5835480395371715, "grad_norm": 0.6039280678123016, "learning_rate": 5.472892755791667e-07, "loss": 0.2674, "step": 33804 }, { "epoch": 1.5835948845270997, "grad_norm": 0.5998795045055879, "learning_rate": 5.47170858789322e-07, "loss": 0.2663, "step": 33805 }, { "epoch": 1.5836417295170282, "grad_norm": 0.631589088030807, "learning_rate": 5.470524532375184e-07, "loss": 0.2721, "step": 33806 }, { "epoch": 1.5836885745069564, "grad_norm": 0.6144645194386468, "learning_rate": 5.469340589244379e-07, "loss": 0.2694, "step": 33807 }, { "epoch": 1.5837354194968847, "grad_norm": 0.5791039346492409, "learning_rate": 5.468156758507601e-07, "loss": 0.2716, "step": 33808 }, { "epoch": 1.5837822644868131, "grad_norm": 0.5818907809370362, "learning_rate": 5.466973040171677e-07, "loss": 0.2707, "step": 33809 }, { "epoch": 1.5838291094767416, "grad_norm": 0.5881011844474828, "learning_rate": 5.46578943424341e-07, "loss": 0.2607, "step": 33810 }, { "epoch": 1.5838759544666698, "grad_norm": 0.5783199856595836, "learning_rate": 5.464605940729612e-07, "loss": 0.2686, "step": 33811 }, { "epoch": 1.583922799456598, "grad_norm": 0.574153707502817, "learning_rate": 5.4634225596371e-07, "loss": 0.2644, "step": 33812 }, { "epoch": 1.5839696444465265, "grad_norm": 0.580551102507281, "learning_rate": 5.462239290972684e-07, "loss": 0.2655, "step": 33813 }, { "epoch": 1.5840164894364548, "grad_norm": 0.5468853813017676, "learning_rate": 5.461056134743167e-07, "loss": 0.2624, "step": 33814 }, { "epoch": 1.584063334426383, "grad_norm": 0.6909345140860187, "learning_rate": 5.459873090955359e-07, "loss": 0.2948, "step": 33815 }, { "epoch": 1.5841101794163115, "grad_norm": 0.6237551668614, "learning_rate": 5.458690159616079e-07, "loss": 0.2637, "step": 33816 }, { "epoch": 1.5841570244062397, "grad_norm": 0.5908439705876698, "learning_rate": 5.457507340732115e-07, "loss": 0.27, "step": 33817 }, { "epoch": 1.584203869396168, "grad_norm": 0.5577773156199916, "learning_rate": 5.456324634310286e-07, "loss": 0.2589, "step": 33818 }, { "epoch": 1.5842507143860964, "grad_norm": 0.5774002945818627, "learning_rate": 5.455142040357405e-07, "loss": 0.2698, "step": 33819 }, { "epoch": 1.584297559376025, "grad_norm": 0.6248854280516937, "learning_rate": 5.453959558880262e-07, "loss": 0.2785, "step": 33820 }, { "epoch": 1.584344404365953, "grad_norm": 0.5668309136522527, "learning_rate": 5.452777189885669e-07, "loss": 0.2657, "step": 33821 }, { "epoch": 1.5843912493558814, "grad_norm": 0.5872172538733675, "learning_rate": 5.451594933380427e-07, "loss": 0.2621, "step": 33822 }, { "epoch": 1.5844380943458098, "grad_norm": 0.572719089303139, "learning_rate": 5.450412789371353e-07, "loss": 0.2652, "step": 33823 }, { "epoch": 1.584484939335738, "grad_norm": 0.5581927272883005, "learning_rate": 5.449230757865235e-07, "loss": 0.2492, "step": 33824 }, { "epoch": 1.5845317843256663, "grad_norm": 0.5679458643977457, "learning_rate": 5.448048838868874e-07, "loss": 0.2542, "step": 33825 }, { "epoch": 1.5845786293155948, "grad_norm": 0.6321438720849217, "learning_rate": 5.446867032389089e-07, "loss": 0.2659, "step": 33826 }, { "epoch": 1.584625474305523, "grad_norm": 0.5800591203923519, "learning_rate": 5.445685338432663e-07, "loss": 0.2488, "step": 33827 }, { "epoch": 1.5846723192954513, "grad_norm": 0.6437960330411477, "learning_rate": 5.444503757006403e-07, "loss": 0.2852, "step": 33828 }, { "epoch": 1.5847191642853797, "grad_norm": 0.6087134438070885, "learning_rate": 5.443322288117107e-07, "loss": 0.2655, "step": 33829 }, { "epoch": 1.584766009275308, "grad_norm": 0.5610686194606536, "learning_rate": 5.442140931771583e-07, "loss": 0.2598, "step": 33830 }, { "epoch": 1.5848128542652362, "grad_norm": 0.5798858808092296, "learning_rate": 5.440959687976616e-07, "loss": 0.2649, "step": 33831 }, { "epoch": 1.5848596992551647, "grad_norm": 0.6131683246903167, "learning_rate": 5.439778556739017e-07, "loss": 0.2764, "step": 33832 }, { "epoch": 1.5849065442450931, "grad_norm": 0.6164488934555733, "learning_rate": 5.438597538065568e-07, "loss": 0.2764, "step": 33833 }, { "epoch": 1.5849533892350212, "grad_norm": 0.6031602822358312, "learning_rate": 5.437416631963074e-07, "loss": 0.2713, "step": 33834 }, { "epoch": 1.5850002342249496, "grad_norm": 0.6178773194895285, "learning_rate": 5.43623583843833e-07, "loss": 0.2791, "step": 33835 }, { "epoch": 1.585047079214878, "grad_norm": 0.5700049692875618, "learning_rate": 5.435055157498134e-07, "loss": 0.246, "step": 33836 }, { "epoch": 1.5850939242048063, "grad_norm": 0.5840374221013375, "learning_rate": 5.433874589149284e-07, "loss": 0.2461, "step": 33837 }, { "epoch": 1.5851407691947346, "grad_norm": 0.5721153004355233, "learning_rate": 5.432694133398567e-07, "loss": 0.2609, "step": 33838 }, { "epoch": 1.585187614184663, "grad_norm": 0.6340670437647523, "learning_rate": 5.431513790252771e-07, "loss": 0.2711, "step": 33839 }, { "epoch": 1.5852344591745913, "grad_norm": 0.5935361342706066, "learning_rate": 5.430333559718695e-07, "loss": 0.2629, "step": 33840 }, { "epoch": 1.5852813041645195, "grad_norm": 0.5907801228321534, "learning_rate": 5.429153441803131e-07, "loss": 0.2836, "step": 33841 }, { "epoch": 1.585328149154448, "grad_norm": 0.5673492753685243, "learning_rate": 5.427973436512868e-07, "loss": 0.2717, "step": 33842 }, { "epoch": 1.5853749941443762, "grad_norm": 0.5841453440227208, "learning_rate": 5.4267935438547e-07, "loss": 0.2632, "step": 33843 }, { "epoch": 1.5854218391343045, "grad_norm": 0.5839144082270276, "learning_rate": 5.425613763835424e-07, "loss": 0.2732, "step": 33844 }, { "epoch": 1.585468684124233, "grad_norm": 0.625905792267494, "learning_rate": 5.42443409646182e-07, "loss": 0.2728, "step": 33845 }, { "epoch": 1.5855155291141614, "grad_norm": 0.5636652886974605, "learning_rate": 5.42325454174067e-07, "loss": 0.2652, "step": 33846 }, { "epoch": 1.5855623741040896, "grad_norm": 0.614852522089311, "learning_rate": 5.422075099678769e-07, "loss": 0.283, "step": 33847 }, { "epoch": 1.5856092190940179, "grad_norm": 0.6289422958637497, "learning_rate": 5.420895770282905e-07, "loss": 0.2944, "step": 33848 }, { "epoch": 1.5856560640839463, "grad_norm": 0.5836922673822798, "learning_rate": 5.419716553559867e-07, "loss": 0.2675, "step": 33849 }, { "epoch": 1.5857029090738746, "grad_norm": 0.5787818254276259, "learning_rate": 5.418537449516445e-07, "loss": 0.265, "step": 33850 }, { "epoch": 1.5857497540638028, "grad_norm": 0.5920939247192443, "learning_rate": 5.417358458159411e-07, "loss": 0.2552, "step": 33851 }, { "epoch": 1.5857965990537313, "grad_norm": 0.5654194840075273, "learning_rate": 5.416179579495562e-07, "loss": 0.2658, "step": 33852 }, { "epoch": 1.5858434440436595, "grad_norm": 0.6441612258686179, "learning_rate": 5.415000813531673e-07, "loss": 0.2875, "step": 33853 }, { "epoch": 1.5858902890335878, "grad_norm": 0.5987591615956254, "learning_rate": 5.413822160274532e-07, "loss": 0.2631, "step": 33854 }, { "epoch": 1.5859371340235162, "grad_norm": 0.6069492925765069, "learning_rate": 5.41264361973092e-07, "loss": 0.2718, "step": 33855 }, { "epoch": 1.5859839790134447, "grad_norm": 0.610181316407393, "learning_rate": 5.41146519190763e-07, "loss": 0.2724, "step": 33856 }, { "epoch": 1.5860308240033727, "grad_norm": 0.5818624763514081, "learning_rate": 5.410286876811427e-07, "loss": 0.2575, "step": 33857 }, { "epoch": 1.5860776689933012, "grad_norm": 0.598859594481811, "learning_rate": 5.409108674449104e-07, "loss": 0.2672, "step": 33858 }, { "epoch": 1.5861245139832296, "grad_norm": 0.6006377378958369, "learning_rate": 5.407930584827431e-07, "loss": 0.2786, "step": 33859 }, { "epoch": 1.5861713589731579, "grad_norm": 0.6503115855226472, "learning_rate": 5.406752607953195e-07, "loss": 0.2951, "step": 33860 }, { "epoch": 1.586218203963086, "grad_norm": 0.6051949521656023, "learning_rate": 5.40557474383317e-07, "loss": 0.2652, "step": 33861 }, { "epoch": 1.5862650489530146, "grad_norm": 0.5628410752187931, "learning_rate": 5.404396992474145e-07, "loss": 0.2521, "step": 33862 }, { "epoch": 1.5863118939429428, "grad_norm": 0.605786879684072, "learning_rate": 5.403219353882886e-07, "loss": 0.2779, "step": 33863 }, { "epoch": 1.586358738932871, "grad_norm": 0.6046401716361223, "learning_rate": 5.40204182806617e-07, "loss": 0.2652, "step": 33864 }, { "epoch": 1.5864055839227995, "grad_norm": 0.628305925807665, "learning_rate": 5.400864415030788e-07, "loss": 0.2894, "step": 33865 }, { "epoch": 1.5864524289127278, "grad_norm": 0.662527190369839, "learning_rate": 5.399687114783497e-07, "loss": 0.3015, "step": 33866 }, { "epoch": 1.586499273902656, "grad_norm": 0.6158090439421614, "learning_rate": 5.398509927331081e-07, "loss": 0.2757, "step": 33867 }, { "epoch": 1.5865461188925845, "grad_norm": 0.6098560425833545, "learning_rate": 5.397332852680321e-07, "loss": 0.2817, "step": 33868 }, { "epoch": 1.586592963882513, "grad_norm": 0.6359791212179158, "learning_rate": 5.396155890837976e-07, "loss": 0.2918, "step": 33869 }, { "epoch": 1.586639808872441, "grad_norm": 0.5866690310546473, "learning_rate": 5.394979041810824e-07, "loss": 0.2654, "step": 33870 }, { "epoch": 1.5866866538623694, "grad_norm": 0.5583474428124928, "learning_rate": 5.393802305605644e-07, "loss": 0.256, "step": 33871 }, { "epoch": 1.5867334988522979, "grad_norm": 0.577924960413826, "learning_rate": 5.392625682229208e-07, "loss": 0.2673, "step": 33872 }, { "epoch": 1.586780343842226, "grad_norm": 0.6303126055571168, "learning_rate": 5.391449171688279e-07, "loss": 0.2789, "step": 33873 }, { "epoch": 1.5868271888321543, "grad_norm": 0.6357283030684469, "learning_rate": 5.390272773989635e-07, "loss": 0.2726, "step": 33874 }, { "epoch": 1.5868740338220828, "grad_norm": 0.6327732837144477, "learning_rate": 5.389096489140039e-07, "loss": 0.2779, "step": 33875 }, { "epoch": 1.586920878812011, "grad_norm": 0.658910104839123, "learning_rate": 5.387920317146262e-07, "loss": 0.2846, "step": 33876 }, { "epoch": 1.5869677238019393, "grad_norm": 0.6410072366431023, "learning_rate": 5.386744258015073e-07, "loss": 0.2762, "step": 33877 }, { "epoch": 1.5870145687918678, "grad_norm": 0.621729818609902, "learning_rate": 5.385568311753242e-07, "loss": 0.2823, "step": 33878 }, { "epoch": 1.587061413781796, "grad_norm": 0.602919229690417, "learning_rate": 5.384392478367542e-07, "loss": 0.2679, "step": 33879 }, { "epoch": 1.5871082587717242, "grad_norm": 0.6192656024237209, "learning_rate": 5.383216757864734e-07, "loss": 0.2821, "step": 33880 }, { "epoch": 1.5871551037616527, "grad_norm": 0.6269235517730323, "learning_rate": 5.382041150251577e-07, "loss": 0.2866, "step": 33881 }, { "epoch": 1.5872019487515812, "grad_norm": 0.5758223557141458, "learning_rate": 5.380865655534842e-07, "loss": 0.267, "step": 33882 }, { "epoch": 1.5872487937415094, "grad_norm": 0.5986835671552292, "learning_rate": 5.379690273721294e-07, "loss": 0.2574, "step": 33883 }, { "epoch": 1.5872956387314376, "grad_norm": 0.5885044141903986, "learning_rate": 5.378515004817697e-07, "loss": 0.2813, "step": 33884 }, { "epoch": 1.587342483721366, "grad_norm": 0.5598786611610158, "learning_rate": 5.377339848830812e-07, "loss": 0.2537, "step": 33885 }, { "epoch": 1.5873893287112943, "grad_norm": 0.5660686896634525, "learning_rate": 5.376164805767415e-07, "loss": 0.2603, "step": 33886 }, { "epoch": 1.5874361737012226, "grad_norm": 0.580588532821478, "learning_rate": 5.374989875634254e-07, "loss": 0.2726, "step": 33887 }, { "epoch": 1.587483018691151, "grad_norm": 0.6204770015828722, "learning_rate": 5.373815058438089e-07, "loss": 0.2726, "step": 33888 }, { "epoch": 1.5875298636810793, "grad_norm": 0.6097497806567361, "learning_rate": 5.372640354185684e-07, "loss": 0.2748, "step": 33889 }, { "epoch": 1.5875767086710075, "grad_norm": 0.6170454835857274, "learning_rate": 5.371465762883801e-07, "loss": 0.2726, "step": 33890 }, { "epoch": 1.587623553660936, "grad_norm": 0.6374946461554868, "learning_rate": 5.3702912845392e-07, "loss": 0.2923, "step": 33891 }, { "epoch": 1.5876703986508645, "grad_norm": 0.5977924658200257, "learning_rate": 5.369116919158646e-07, "loss": 0.2713, "step": 33892 }, { "epoch": 1.5877172436407925, "grad_norm": 0.5517502080156906, "learning_rate": 5.367942666748882e-07, "loss": 0.2615, "step": 33893 }, { "epoch": 1.587764088630721, "grad_norm": 0.5865844822736077, "learning_rate": 5.366768527316682e-07, "loss": 0.2609, "step": 33894 }, { "epoch": 1.5878109336206494, "grad_norm": 0.5889095716104581, "learning_rate": 5.365594500868787e-07, "loss": 0.2571, "step": 33895 }, { "epoch": 1.5878577786105776, "grad_norm": 0.6054744014756461, "learning_rate": 5.364420587411959e-07, "loss": 0.2797, "step": 33896 }, { "epoch": 1.5879046236005059, "grad_norm": 0.5704723051067259, "learning_rate": 5.363246786952958e-07, "loss": 0.2579, "step": 33897 }, { "epoch": 1.5879514685904343, "grad_norm": 0.5796747294143572, "learning_rate": 5.362073099498533e-07, "loss": 0.2559, "step": 33898 }, { "epoch": 1.5879983135803626, "grad_norm": 0.6280506041546876, "learning_rate": 5.360899525055452e-07, "loss": 0.286, "step": 33899 }, { "epoch": 1.5880451585702908, "grad_norm": 0.6430892651918191, "learning_rate": 5.35972606363045e-07, "loss": 0.2869, "step": 33900 }, { "epoch": 1.5880920035602193, "grad_norm": 0.561372444065215, "learning_rate": 5.358552715230292e-07, "loss": 0.2505, "step": 33901 }, { "epoch": 1.5881388485501475, "grad_norm": 0.5844700338658131, "learning_rate": 5.357379479861724e-07, "loss": 0.2798, "step": 33902 }, { "epoch": 1.5881856935400758, "grad_norm": 0.6047471419437036, "learning_rate": 5.356206357531496e-07, "loss": 0.2773, "step": 33903 }, { "epoch": 1.5882325385300042, "grad_norm": 0.6153498272872868, "learning_rate": 5.355033348246366e-07, "loss": 0.3038, "step": 33904 }, { "epoch": 1.5882793835199327, "grad_norm": 0.6131952692966525, "learning_rate": 5.353860452013088e-07, "loss": 0.2768, "step": 33905 }, { "epoch": 1.5883262285098607, "grad_norm": 0.6556112175843255, "learning_rate": 5.352687668838397e-07, "loss": 0.2787, "step": 33906 }, { "epoch": 1.5883730734997892, "grad_norm": 0.5971119454327071, "learning_rate": 5.35151499872906e-07, "loss": 0.2589, "step": 33907 }, { "epoch": 1.5884199184897176, "grad_norm": 0.6217936184335026, "learning_rate": 5.350342441691805e-07, "loss": 0.2674, "step": 33908 }, { "epoch": 1.5884667634796459, "grad_norm": 0.6086527989833374, "learning_rate": 5.349169997733395e-07, "loss": 0.2804, "step": 33909 }, { "epoch": 1.5885136084695741, "grad_norm": 0.5773696399696341, "learning_rate": 5.347997666860569e-07, "loss": 0.2759, "step": 33910 }, { "epoch": 1.5885604534595026, "grad_norm": 0.6362347633314248, "learning_rate": 5.346825449080085e-07, "loss": 0.2793, "step": 33911 }, { "epoch": 1.5886072984494308, "grad_norm": 0.618277266925831, "learning_rate": 5.345653344398675e-07, "loss": 0.2567, "step": 33912 }, { "epoch": 1.588654143439359, "grad_norm": 0.597446834093243, "learning_rate": 5.344481352823089e-07, "loss": 0.2679, "step": 33913 }, { "epoch": 1.5887009884292875, "grad_norm": 0.5916800687154138, "learning_rate": 5.343309474360078e-07, "loss": 0.2852, "step": 33914 }, { "epoch": 1.5887478334192158, "grad_norm": 0.5982125871244794, "learning_rate": 5.342137709016376e-07, "loss": 0.2797, "step": 33915 }, { "epoch": 1.588794678409144, "grad_norm": 0.6254042311878056, "learning_rate": 5.34096605679873e-07, "loss": 0.2945, "step": 33916 }, { "epoch": 1.5888415233990725, "grad_norm": 0.585091220310545, "learning_rate": 5.33979451771389e-07, "loss": 0.2824, "step": 33917 }, { "epoch": 1.588888368389001, "grad_norm": 0.6469584799085399, "learning_rate": 5.338623091768583e-07, "loss": 0.2828, "step": 33918 }, { "epoch": 1.5889352133789292, "grad_norm": 0.5686496424993588, "learning_rate": 5.337451778969561e-07, "loss": 0.2747, "step": 33919 }, { "epoch": 1.5889820583688574, "grad_norm": 0.5629390255685021, "learning_rate": 5.33628057932356e-07, "loss": 0.2644, "step": 33920 }, { "epoch": 1.5890289033587859, "grad_norm": 0.5686485030574627, "learning_rate": 5.335109492837329e-07, "loss": 0.2618, "step": 33921 }, { "epoch": 1.5890757483487141, "grad_norm": 0.6268542120341343, "learning_rate": 5.333938519517596e-07, "loss": 0.2721, "step": 33922 }, { "epoch": 1.5891225933386424, "grad_norm": 0.6231189247757714, "learning_rate": 5.332767659371107e-07, "loss": 0.2868, "step": 33923 }, { "epoch": 1.5891694383285708, "grad_norm": 0.6299145390515666, "learning_rate": 5.331596912404593e-07, "loss": 0.2799, "step": 33924 }, { "epoch": 1.589216283318499, "grad_norm": 0.612233536640737, "learning_rate": 5.330426278624792e-07, "loss": 0.2721, "step": 33925 }, { "epoch": 1.5892631283084273, "grad_norm": 0.5931353010999811, "learning_rate": 5.329255758038449e-07, "loss": 0.2749, "step": 33926 }, { "epoch": 1.5893099732983558, "grad_norm": 0.5822327399337835, "learning_rate": 5.32808535065229e-07, "loss": 0.2669, "step": 33927 }, { "epoch": 1.5893568182882842, "grad_norm": 0.6010846218839779, "learning_rate": 5.326915056473064e-07, "loss": 0.2776, "step": 33928 }, { "epoch": 1.5894036632782123, "grad_norm": 0.5898503276854788, "learning_rate": 5.325744875507499e-07, "loss": 0.2516, "step": 33929 }, { "epoch": 1.5894505082681407, "grad_norm": 0.6548640926653478, "learning_rate": 5.32457480776232e-07, "loss": 0.2813, "step": 33930 }, { "epoch": 1.5894973532580692, "grad_norm": 0.6627069907173199, "learning_rate": 5.323404853244265e-07, "loss": 0.2891, "step": 33931 }, { "epoch": 1.5895441982479974, "grad_norm": 0.5866561058718851, "learning_rate": 5.322235011960072e-07, "loss": 0.2587, "step": 33932 }, { "epoch": 1.5895910432379257, "grad_norm": 0.5952189412296088, "learning_rate": 5.321065283916471e-07, "loss": 0.2672, "step": 33933 }, { "epoch": 1.5896378882278541, "grad_norm": 0.6209489287795386, "learning_rate": 5.319895669120192e-07, "loss": 0.2724, "step": 33934 }, { "epoch": 1.5896847332177824, "grad_norm": 0.6205201714589136, "learning_rate": 5.318726167577975e-07, "loss": 0.2617, "step": 33935 }, { "epoch": 1.5897315782077106, "grad_norm": 0.6132709545156032, "learning_rate": 5.31755677929654e-07, "loss": 0.2706, "step": 33936 }, { "epoch": 1.589778423197639, "grad_norm": 0.6068629770637863, "learning_rate": 5.316387504282613e-07, "loss": 0.2745, "step": 33937 }, { "epoch": 1.5898252681875673, "grad_norm": 0.5791528116295139, "learning_rate": 5.31521834254293e-07, "loss": 0.2689, "step": 33938 }, { "epoch": 1.5898721131774955, "grad_norm": 0.6151992731007363, "learning_rate": 5.314049294084217e-07, "loss": 0.2726, "step": 33939 }, { "epoch": 1.589918958167424, "grad_norm": 0.5840454835179872, "learning_rate": 5.312880358913203e-07, "loss": 0.2704, "step": 33940 }, { "epoch": 1.5899658031573525, "grad_norm": 0.5659798191667047, "learning_rate": 5.311711537036618e-07, "loss": 0.2416, "step": 33941 }, { "epoch": 1.5900126481472805, "grad_norm": 0.6179615381670506, "learning_rate": 5.310542828461182e-07, "loss": 0.2778, "step": 33942 }, { "epoch": 1.590059493137209, "grad_norm": 0.6254484092932081, "learning_rate": 5.309374233193629e-07, "loss": 0.2773, "step": 33943 }, { "epoch": 1.5901063381271374, "grad_norm": 0.5862250918180657, "learning_rate": 5.308205751240669e-07, "loss": 0.2771, "step": 33944 }, { "epoch": 1.5901531831170657, "grad_norm": 0.612864418193199, "learning_rate": 5.307037382609037e-07, "loss": 0.2685, "step": 33945 }, { "epoch": 1.590200028106994, "grad_norm": 0.5802304827656674, "learning_rate": 5.305869127305455e-07, "loss": 0.2638, "step": 33946 }, { "epoch": 1.5902468730969224, "grad_norm": 0.6375713124989676, "learning_rate": 5.304700985336656e-07, "loss": 0.2905, "step": 33947 }, { "epoch": 1.5902937180868506, "grad_norm": 0.5788231309415022, "learning_rate": 5.303532956709343e-07, "loss": 0.2702, "step": 33948 }, { "epoch": 1.5903405630767788, "grad_norm": 0.6416666255301834, "learning_rate": 5.302365041430254e-07, "loss": 0.2771, "step": 33949 }, { "epoch": 1.5903874080667073, "grad_norm": 0.6118141415537965, "learning_rate": 5.301197239506095e-07, "loss": 0.2633, "step": 33950 }, { "epoch": 1.5904342530566355, "grad_norm": 0.5840625795526563, "learning_rate": 5.300029550943597e-07, "loss": 0.2625, "step": 33951 }, { "epoch": 1.5904810980465638, "grad_norm": 0.6527212786834233, "learning_rate": 5.298861975749478e-07, "loss": 0.2808, "step": 33952 }, { "epoch": 1.5905279430364923, "grad_norm": 0.5938490789480702, "learning_rate": 5.297694513930463e-07, "loss": 0.2639, "step": 33953 }, { "epoch": 1.5905747880264207, "grad_norm": 0.6127412268431696, "learning_rate": 5.296527165493256e-07, "loss": 0.2789, "step": 33954 }, { "epoch": 1.590621633016349, "grad_norm": 0.604308303982528, "learning_rate": 5.295359930444585e-07, "loss": 0.2559, "step": 33955 }, { "epoch": 1.5906684780062772, "grad_norm": 0.6344536883661499, "learning_rate": 5.294192808791171e-07, "loss": 0.2845, "step": 33956 }, { "epoch": 1.5907153229962057, "grad_norm": 0.6596143303900489, "learning_rate": 5.293025800539717e-07, "loss": 0.2887, "step": 33957 }, { "epoch": 1.590762167986134, "grad_norm": 0.630268744901826, "learning_rate": 5.291858905696948e-07, "loss": 0.2701, "step": 33958 }, { "epoch": 1.5908090129760621, "grad_norm": 0.5823561319530716, "learning_rate": 5.290692124269584e-07, "loss": 0.2729, "step": 33959 }, { "epoch": 1.5908558579659906, "grad_norm": 0.5443461763895516, "learning_rate": 5.289525456264325e-07, "loss": 0.2507, "step": 33960 }, { "epoch": 1.5909027029559188, "grad_norm": 0.609867399952228, "learning_rate": 5.288358901687893e-07, "loss": 0.2829, "step": 33961 }, { "epoch": 1.590949547945847, "grad_norm": 0.6018426119347585, "learning_rate": 5.287192460547e-07, "loss": 0.2915, "step": 33962 }, { "epoch": 1.5909963929357755, "grad_norm": 0.5907639701823517, "learning_rate": 5.286026132848368e-07, "loss": 0.2596, "step": 33963 }, { "epoch": 1.5910432379257038, "grad_norm": 0.6102743665241894, "learning_rate": 5.284859918598695e-07, "loss": 0.285, "step": 33964 }, { "epoch": 1.591090082915632, "grad_norm": 0.5715910439122197, "learning_rate": 5.283693817804697e-07, "loss": 0.2691, "step": 33965 }, { "epoch": 1.5911369279055605, "grad_norm": 0.5887460837669175, "learning_rate": 5.282527830473091e-07, "loss": 0.2584, "step": 33966 }, { "epoch": 1.591183772895489, "grad_norm": 0.6077927175717488, "learning_rate": 5.281361956610578e-07, "loss": 0.2689, "step": 33967 }, { "epoch": 1.5912306178854172, "grad_norm": 0.6325926519785705, "learning_rate": 5.280196196223869e-07, "loss": 0.2756, "step": 33968 }, { "epoch": 1.5912774628753454, "grad_norm": 0.5771752399940657, "learning_rate": 5.279030549319675e-07, "loss": 0.2488, "step": 33969 }, { "epoch": 1.591324307865274, "grad_norm": 0.6066241507977791, "learning_rate": 5.277865015904709e-07, "loss": 0.2928, "step": 33970 }, { "epoch": 1.5913711528552021, "grad_norm": 0.6103151520841336, "learning_rate": 5.276699595985665e-07, "loss": 0.2591, "step": 33971 }, { "epoch": 1.5914179978451304, "grad_norm": 0.6894256328958411, "learning_rate": 5.275534289569268e-07, "loss": 0.3008, "step": 33972 }, { "epoch": 1.5914648428350588, "grad_norm": 0.5567791406591072, "learning_rate": 5.274369096662205e-07, "loss": 0.2646, "step": 33973 }, { "epoch": 1.591511687824987, "grad_norm": 0.5853040143533286, "learning_rate": 5.273204017271188e-07, "loss": 0.2862, "step": 33974 }, { "epoch": 1.5915585328149153, "grad_norm": 0.5949300411603443, "learning_rate": 5.272039051402928e-07, "loss": 0.2699, "step": 33975 }, { "epoch": 1.5916053778048438, "grad_norm": 0.5728323508789585, "learning_rate": 5.270874199064122e-07, "loss": 0.2725, "step": 33976 }, { "epoch": 1.5916522227947723, "grad_norm": 0.5749821645099468, "learning_rate": 5.269709460261483e-07, "loss": 0.2563, "step": 33977 }, { "epoch": 1.5916990677847003, "grad_norm": 0.6112029048575037, "learning_rate": 5.268544835001707e-07, "loss": 0.2712, "step": 33978 }, { "epoch": 1.5917459127746287, "grad_norm": 0.6384907153195555, "learning_rate": 5.26738032329149e-07, "loss": 0.2945, "step": 33979 }, { "epoch": 1.5917927577645572, "grad_norm": 0.6225477535714133, "learning_rate": 5.266215925137541e-07, "loss": 0.283, "step": 33980 }, { "epoch": 1.5918396027544854, "grad_norm": 0.6320118476704174, "learning_rate": 5.265051640546556e-07, "loss": 0.2699, "step": 33981 }, { "epoch": 1.5918864477444137, "grad_norm": 0.6363917703831127, "learning_rate": 5.263887469525242e-07, "loss": 0.2759, "step": 33982 }, { "epoch": 1.5919332927343421, "grad_norm": 0.5898004073073888, "learning_rate": 5.262723412080295e-07, "loss": 0.2794, "step": 33983 }, { "epoch": 1.5919801377242704, "grad_norm": 0.5903028327722446, "learning_rate": 5.261559468218419e-07, "loss": 0.265, "step": 33984 }, { "epoch": 1.5920269827141986, "grad_norm": 0.5823496994692164, "learning_rate": 5.260395637946308e-07, "loss": 0.2585, "step": 33985 }, { "epoch": 1.592073827704127, "grad_norm": 0.6150688553628485, "learning_rate": 5.259231921270652e-07, "loss": 0.2782, "step": 33986 }, { "epoch": 1.5921206726940553, "grad_norm": 0.6571989450461334, "learning_rate": 5.258068318198154e-07, "loss": 0.2894, "step": 33987 }, { "epoch": 1.5921675176839836, "grad_norm": 0.5991465344709654, "learning_rate": 5.25690482873551e-07, "loss": 0.2782, "step": 33988 }, { "epoch": 1.592214362673912, "grad_norm": 0.6215193190575186, "learning_rate": 5.255741452889418e-07, "loss": 0.2786, "step": 33989 }, { "epoch": 1.5922612076638405, "grad_norm": 0.6378818045371872, "learning_rate": 5.254578190666579e-07, "loss": 0.2845, "step": 33990 }, { "epoch": 1.5923080526537685, "grad_norm": 0.6212961059744645, "learning_rate": 5.25341504207367e-07, "loss": 0.2694, "step": 33991 }, { "epoch": 1.592354897643697, "grad_norm": 0.6138757094501176, "learning_rate": 5.252252007117403e-07, "loss": 0.2888, "step": 33992 }, { "epoch": 1.5924017426336254, "grad_norm": 0.5922970274460708, "learning_rate": 5.251089085804456e-07, "loss": 0.2672, "step": 33993 }, { "epoch": 1.5924485876235537, "grad_norm": 0.6332909274709903, "learning_rate": 5.249926278141526e-07, "loss": 0.2864, "step": 33994 }, { "epoch": 1.592495432613482, "grad_norm": 0.624586862310536, "learning_rate": 5.248763584135307e-07, "loss": 0.2708, "step": 33995 }, { "epoch": 1.5925422776034104, "grad_norm": 0.5641107765601542, "learning_rate": 5.247601003792499e-07, "loss": 0.2709, "step": 33996 }, { "epoch": 1.5925891225933386, "grad_norm": 0.6392422141641574, "learning_rate": 5.246438537119772e-07, "loss": 0.2729, "step": 33997 }, { "epoch": 1.5926359675832669, "grad_norm": 0.5888405448202453, "learning_rate": 5.245276184123834e-07, "loss": 0.2635, "step": 33998 }, { "epoch": 1.5926828125731953, "grad_norm": 0.6234057294517595, "learning_rate": 5.24411394481136e-07, "loss": 0.2751, "step": 33999 }, { "epoch": 1.5927296575631236, "grad_norm": 0.5846268476616998, "learning_rate": 5.242951819189046e-07, "loss": 0.2674, "step": 34000 }, { "epoch": 1.5927765025530518, "grad_norm": 0.5824418414633399, "learning_rate": 5.241789807263575e-07, "loss": 0.2628, "step": 34001 }, { "epoch": 1.5928233475429803, "grad_norm": 0.5658023913034589, "learning_rate": 5.240627909041648e-07, "loss": 0.2532, "step": 34002 }, { "epoch": 1.5928701925329087, "grad_norm": 0.619701769705084, "learning_rate": 5.239466124529932e-07, "loss": 0.2673, "step": 34003 }, { "epoch": 1.592917037522837, "grad_norm": 0.6265586033647711, "learning_rate": 5.238304453735124e-07, "loss": 0.2687, "step": 34004 }, { "epoch": 1.5929638825127652, "grad_norm": 0.6292532287058115, "learning_rate": 5.237142896663913e-07, "loss": 0.2871, "step": 34005 }, { "epoch": 1.5930107275026937, "grad_norm": 0.5910302922109223, "learning_rate": 5.23598145332297e-07, "loss": 0.2737, "step": 34006 }, { "epoch": 1.593057572492622, "grad_norm": 0.6063714746262845, "learning_rate": 5.234820123718986e-07, "loss": 0.2584, "step": 34007 }, { "epoch": 1.5931044174825502, "grad_norm": 0.611274612832089, "learning_rate": 5.233658907858652e-07, "loss": 0.2697, "step": 34008 }, { "epoch": 1.5931512624724786, "grad_norm": 0.602324461564993, "learning_rate": 5.232497805748634e-07, "loss": 0.2603, "step": 34009 }, { "epoch": 1.5931981074624069, "grad_norm": 0.5934911478049706, "learning_rate": 5.231336817395627e-07, "loss": 0.2691, "step": 34010 }, { "epoch": 1.593244952452335, "grad_norm": 0.600075695904838, "learning_rate": 5.230175942806304e-07, "loss": 0.2712, "step": 34011 }, { "epoch": 1.5932917974422636, "grad_norm": 0.6282842313312026, "learning_rate": 5.229015181987356e-07, "loss": 0.2659, "step": 34012 }, { "epoch": 1.593338642432192, "grad_norm": 0.6501158933953377, "learning_rate": 5.227854534945453e-07, "loss": 0.2857, "step": 34013 }, { "epoch": 1.59338548742212, "grad_norm": 0.5943390202911971, "learning_rate": 5.226694001687283e-07, "loss": 0.264, "step": 34014 }, { "epoch": 1.5934323324120485, "grad_norm": 0.6250283742059088, "learning_rate": 5.225533582219511e-07, "loss": 0.2733, "step": 34015 }, { "epoch": 1.593479177401977, "grad_norm": 0.6154186210540828, "learning_rate": 5.224373276548825e-07, "loss": 0.2751, "step": 34016 }, { "epoch": 1.5935260223919052, "grad_norm": 0.5820216398635041, "learning_rate": 5.223213084681899e-07, "loss": 0.2635, "step": 34017 }, { "epoch": 1.5935728673818335, "grad_norm": 0.606176201608408, "learning_rate": 5.222053006625413e-07, "loss": 0.2727, "step": 34018 }, { "epoch": 1.593619712371762, "grad_norm": 0.6393656723415703, "learning_rate": 5.220893042386046e-07, "loss": 0.2698, "step": 34019 }, { "epoch": 1.5936665573616902, "grad_norm": 0.5971657731278998, "learning_rate": 5.219733191970469e-07, "loss": 0.2776, "step": 34020 }, { "epoch": 1.5937134023516184, "grad_norm": 0.6210213064097707, "learning_rate": 5.218573455385348e-07, "loss": 0.2858, "step": 34021 }, { "epoch": 1.5937602473415469, "grad_norm": 0.6032734263803138, "learning_rate": 5.217413832637366e-07, "loss": 0.2654, "step": 34022 }, { "epoch": 1.593807092331475, "grad_norm": 0.623046334614662, "learning_rate": 5.216254323733194e-07, "loss": 0.2754, "step": 34023 }, { "epoch": 1.5938539373214033, "grad_norm": 0.5869121485354771, "learning_rate": 5.215094928679507e-07, "loss": 0.2758, "step": 34024 }, { "epoch": 1.5939007823113318, "grad_norm": 0.5808045194616174, "learning_rate": 5.213935647482976e-07, "loss": 0.2696, "step": 34025 }, { "epoch": 1.5939476273012603, "grad_norm": 0.6215691503890661, "learning_rate": 5.212776480150278e-07, "loss": 0.2837, "step": 34026 }, { "epoch": 1.5939944722911883, "grad_norm": 0.6011108819491869, "learning_rate": 5.211617426688079e-07, "loss": 0.2853, "step": 34027 }, { "epoch": 1.5940413172811168, "grad_norm": 0.6859621312588737, "learning_rate": 5.21045848710304e-07, "loss": 0.2772, "step": 34028 }, { "epoch": 1.5940881622710452, "grad_norm": 0.6016152736688379, "learning_rate": 5.20929966140184e-07, "loss": 0.2827, "step": 34029 }, { "epoch": 1.5941350072609735, "grad_norm": 0.5676719379307712, "learning_rate": 5.208140949591145e-07, "loss": 0.2497, "step": 34030 }, { "epoch": 1.5941818522509017, "grad_norm": 0.632716813464884, "learning_rate": 5.206982351677625e-07, "loss": 0.2781, "step": 34031 }, { "epoch": 1.5942286972408302, "grad_norm": 0.61812591138689, "learning_rate": 5.205823867667948e-07, "loss": 0.2781, "step": 34032 }, { "epoch": 1.5942755422307584, "grad_norm": 0.5786795584087996, "learning_rate": 5.204665497568787e-07, "loss": 0.2603, "step": 34033 }, { "epoch": 1.5943223872206866, "grad_norm": 0.5799451250281479, "learning_rate": 5.203507241386796e-07, "loss": 0.261, "step": 34034 }, { "epoch": 1.594369232210615, "grad_norm": 0.5977848287436669, "learning_rate": 5.202349099128643e-07, "loss": 0.2671, "step": 34035 }, { "epoch": 1.5944160772005433, "grad_norm": 0.5930029641321813, "learning_rate": 5.201191070800993e-07, "loss": 0.2652, "step": 34036 }, { "epoch": 1.5944629221904716, "grad_norm": 0.5619413882859963, "learning_rate": 5.200033156410511e-07, "loss": 0.2493, "step": 34037 }, { "epoch": 1.5945097671804, "grad_norm": 0.6024470725208827, "learning_rate": 5.198875355963865e-07, "loss": 0.272, "step": 34038 }, { "epoch": 1.5945566121703285, "grad_norm": 0.5732289490553566, "learning_rate": 5.19771766946772e-07, "loss": 0.2666, "step": 34039 }, { "epoch": 1.5946034571602568, "grad_norm": 0.5875880588034713, "learning_rate": 5.196560096928724e-07, "loss": 0.2703, "step": 34040 }, { "epoch": 1.594650302150185, "grad_norm": 0.605131732702283, "learning_rate": 5.195402638353555e-07, "loss": 0.2812, "step": 34041 }, { "epoch": 1.5946971471401135, "grad_norm": 0.5846560421493537, "learning_rate": 5.194245293748859e-07, "loss": 0.2548, "step": 34042 }, { "epoch": 1.5947439921300417, "grad_norm": 0.6021549699931559, "learning_rate": 5.193088063121304e-07, "loss": 0.2615, "step": 34043 }, { "epoch": 1.59479083711997, "grad_norm": 0.6601653319078495, "learning_rate": 5.19193094647755e-07, "loss": 0.278, "step": 34044 }, { "epoch": 1.5948376821098984, "grad_norm": 0.5863110070689169, "learning_rate": 5.190773943824259e-07, "loss": 0.2609, "step": 34045 }, { "epoch": 1.5948845270998266, "grad_norm": 0.5804040375539657, "learning_rate": 5.189617055168081e-07, "loss": 0.2524, "step": 34046 }, { "epoch": 1.5949313720897549, "grad_norm": 0.6039759304209003, "learning_rate": 5.188460280515683e-07, "loss": 0.265, "step": 34047 }, { "epoch": 1.5949782170796833, "grad_norm": 0.6110889916909481, "learning_rate": 5.18730361987371e-07, "loss": 0.2747, "step": 34048 }, { "epoch": 1.5950250620696118, "grad_norm": 0.5825666357490591, "learning_rate": 5.186147073248826e-07, "loss": 0.2624, "step": 34049 }, { "epoch": 1.5950719070595398, "grad_norm": 0.5645782423527395, "learning_rate": 5.184990640647683e-07, "loss": 0.2835, "step": 34050 }, { "epoch": 1.5951187520494683, "grad_norm": 0.6132524679023954, "learning_rate": 5.183834322076947e-07, "loss": 0.2639, "step": 34051 }, { "epoch": 1.5951655970393968, "grad_norm": 0.6136827137338383, "learning_rate": 5.182678117543258e-07, "loss": 0.2679, "step": 34052 }, { "epoch": 1.595212442029325, "grad_norm": 0.6297370804934815, "learning_rate": 5.181522027053273e-07, "loss": 0.2861, "step": 34053 }, { "epoch": 1.5952592870192532, "grad_norm": 0.6342686716026, "learning_rate": 5.180366050613658e-07, "loss": 0.293, "step": 34054 }, { "epoch": 1.5953061320091817, "grad_norm": 0.6116634225156133, "learning_rate": 5.179210188231045e-07, "loss": 0.2707, "step": 34055 }, { "epoch": 1.59535297699911, "grad_norm": 0.5961999644823891, "learning_rate": 5.178054439912097e-07, "loss": 0.2625, "step": 34056 }, { "epoch": 1.5953998219890382, "grad_norm": 0.5988737308124271, "learning_rate": 5.176898805663472e-07, "loss": 0.2603, "step": 34057 }, { "epoch": 1.5954466669789666, "grad_norm": 0.6041359956029397, "learning_rate": 5.175743285491802e-07, "loss": 0.2604, "step": 34058 }, { "epoch": 1.5954935119688949, "grad_norm": 0.6306771986375111, "learning_rate": 5.174587879403747e-07, "loss": 0.282, "step": 34059 }, { "epoch": 1.5955403569588231, "grad_norm": 0.5807292065829689, "learning_rate": 5.173432587405958e-07, "loss": 0.2719, "step": 34060 }, { "epoch": 1.5955872019487516, "grad_norm": 0.6179816367772771, "learning_rate": 5.172277409505085e-07, "loss": 0.269, "step": 34061 }, { "epoch": 1.59563404693868, "grad_norm": 0.6167566464370771, "learning_rate": 5.171122345707769e-07, "loss": 0.2699, "step": 34062 }, { "epoch": 1.595680891928608, "grad_norm": 0.5677066052511227, "learning_rate": 5.169967396020664e-07, "loss": 0.2578, "step": 34063 }, { "epoch": 1.5957277369185365, "grad_norm": 0.601509526984389, "learning_rate": 5.168812560450406e-07, "loss": 0.2636, "step": 34064 }, { "epoch": 1.595774581908465, "grad_norm": 0.6340755499936742, "learning_rate": 5.16765783900365e-07, "loss": 0.2635, "step": 34065 }, { "epoch": 1.5958214268983932, "grad_norm": 0.5785479973088604, "learning_rate": 5.166503231687036e-07, "loss": 0.2743, "step": 34066 }, { "epoch": 1.5958682718883215, "grad_norm": 0.562461538338202, "learning_rate": 5.165348738507212e-07, "loss": 0.2577, "step": 34067 }, { "epoch": 1.59591511687825, "grad_norm": 0.5881054586194544, "learning_rate": 5.164194359470825e-07, "loss": 0.2793, "step": 34068 }, { "epoch": 1.5959619618681782, "grad_norm": 0.5618235876928608, "learning_rate": 5.163040094584518e-07, "loss": 0.263, "step": 34069 }, { "epoch": 1.5960088068581064, "grad_norm": 0.6214244205237861, "learning_rate": 5.16188594385492e-07, "loss": 0.2868, "step": 34070 }, { "epoch": 1.5960556518480349, "grad_norm": 0.6412781092946401, "learning_rate": 5.160731907288682e-07, "loss": 0.2814, "step": 34071 }, { "epoch": 1.5961024968379631, "grad_norm": 0.6006224517987054, "learning_rate": 5.159577984892447e-07, "loss": 0.2731, "step": 34072 }, { "epoch": 1.5961493418278914, "grad_norm": 0.6670478712882852, "learning_rate": 5.158424176672855e-07, "loss": 0.2922, "step": 34073 }, { "epoch": 1.5961961868178198, "grad_norm": 0.5702074775264568, "learning_rate": 5.157270482636542e-07, "loss": 0.2644, "step": 34074 }, { "epoch": 1.5962430318077483, "grad_norm": 0.596953359983059, "learning_rate": 5.156116902790159e-07, "loss": 0.2645, "step": 34075 }, { "epoch": 1.5962898767976765, "grad_norm": 0.6147477478589068, "learning_rate": 5.154963437140334e-07, "loss": 0.2849, "step": 34076 }, { "epoch": 1.5963367217876048, "grad_norm": 0.5894463379096635, "learning_rate": 5.1538100856937e-07, "loss": 0.2681, "step": 34077 }, { "epoch": 1.5963835667775332, "grad_norm": 0.5636116733278439, "learning_rate": 5.152656848456902e-07, "loss": 0.268, "step": 34078 }, { "epoch": 1.5964304117674615, "grad_norm": 0.6187522226125762, "learning_rate": 5.151503725436574e-07, "loss": 0.2756, "step": 34079 }, { "epoch": 1.5964772567573897, "grad_norm": 0.6002392351964134, "learning_rate": 5.150350716639354e-07, "loss": 0.2749, "step": 34080 }, { "epoch": 1.5965241017473182, "grad_norm": 0.6128727158386584, "learning_rate": 5.149197822071883e-07, "loss": 0.2601, "step": 34081 }, { "epoch": 1.5965709467372464, "grad_norm": 0.561844211833628, "learning_rate": 5.148045041740785e-07, "loss": 0.2701, "step": 34082 }, { "epoch": 1.5966177917271747, "grad_norm": 0.5647405741429901, "learning_rate": 5.146892375652701e-07, "loss": 0.2466, "step": 34083 }, { "epoch": 1.5966646367171031, "grad_norm": 0.5757640098458237, "learning_rate": 5.145739823814258e-07, "loss": 0.2642, "step": 34084 }, { "epoch": 1.5967114817070316, "grad_norm": 0.6131516747348973, "learning_rate": 5.144587386232089e-07, "loss": 0.2675, "step": 34085 }, { "epoch": 1.5967583266969596, "grad_norm": 0.6110022285850939, "learning_rate": 5.14343506291283e-07, "loss": 0.2784, "step": 34086 }, { "epoch": 1.596805171686888, "grad_norm": 0.6215720016915351, "learning_rate": 5.14228285386312e-07, "loss": 0.2785, "step": 34087 }, { "epoch": 1.5968520166768165, "grad_norm": 0.5913243384383173, "learning_rate": 5.141130759089574e-07, "loss": 0.2706, "step": 34088 }, { "epoch": 1.5968988616667448, "grad_norm": 0.557214286273495, "learning_rate": 5.139978778598834e-07, "loss": 0.2662, "step": 34089 }, { "epoch": 1.596945706656673, "grad_norm": 0.5793156901876624, "learning_rate": 5.138826912397521e-07, "loss": 0.2692, "step": 34090 }, { "epoch": 1.5969925516466015, "grad_norm": 0.6048502636953249, "learning_rate": 5.137675160492264e-07, "loss": 0.2676, "step": 34091 }, { "epoch": 1.5970393966365297, "grad_norm": 0.5645627290874411, "learning_rate": 5.136523522889694e-07, "loss": 0.2599, "step": 34092 }, { "epoch": 1.597086241626458, "grad_norm": 0.6130470355831525, "learning_rate": 5.135371999596447e-07, "loss": 0.2706, "step": 34093 }, { "epoch": 1.5971330866163864, "grad_norm": 0.6489774126008198, "learning_rate": 5.134220590619135e-07, "loss": 0.2883, "step": 34094 }, { "epoch": 1.5971799316063147, "grad_norm": 0.5445464558673142, "learning_rate": 5.133069295964391e-07, "loss": 0.246, "step": 34095 }, { "epoch": 1.597226776596243, "grad_norm": 0.6276257275778745, "learning_rate": 5.131918115638845e-07, "loss": 0.2656, "step": 34096 }, { "epoch": 1.5972736215861714, "grad_norm": 0.6036642155818192, "learning_rate": 5.13076704964911e-07, "loss": 0.2728, "step": 34097 }, { "epoch": 1.5973204665760998, "grad_norm": 0.5924681442009881, "learning_rate": 5.129616098001816e-07, "loss": 0.2529, "step": 34098 }, { "epoch": 1.5973673115660278, "grad_norm": 0.5655199010416935, "learning_rate": 5.128465260703589e-07, "loss": 0.2586, "step": 34099 }, { "epoch": 1.5974141565559563, "grad_norm": 0.6111507440518298, "learning_rate": 5.127314537761055e-07, "loss": 0.2772, "step": 34100 }, { "epoch": 1.5974610015458848, "grad_norm": 0.5701481511683643, "learning_rate": 5.126163929180827e-07, "loss": 0.2592, "step": 34101 }, { "epoch": 1.597507846535813, "grad_norm": 0.6117565088988591, "learning_rate": 5.125013434969528e-07, "loss": 0.2749, "step": 34102 }, { "epoch": 1.5975546915257413, "grad_norm": 0.5800983041102182, "learning_rate": 5.123863055133788e-07, "loss": 0.2573, "step": 34103 }, { "epoch": 1.5976015365156697, "grad_norm": 0.5680808507853451, "learning_rate": 5.122712789680215e-07, "loss": 0.2546, "step": 34104 }, { "epoch": 1.597648381505598, "grad_norm": 0.593208263682426, "learning_rate": 5.121562638615433e-07, "loss": 0.2614, "step": 34105 }, { "epoch": 1.5976952264955262, "grad_norm": 0.5928111477828579, "learning_rate": 5.120412601946068e-07, "loss": 0.2627, "step": 34106 }, { "epoch": 1.5977420714854547, "grad_norm": 0.6144754812385017, "learning_rate": 5.119262679678727e-07, "loss": 0.264, "step": 34107 }, { "epoch": 1.597788916475383, "grad_norm": 0.560469767238487, "learning_rate": 5.11811287182003e-07, "loss": 0.2666, "step": 34108 }, { "epoch": 1.5978357614653111, "grad_norm": 0.5991020990826106, "learning_rate": 5.1169631783766e-07, "loss": 0.2719, "step": 34109 }, { "epoch": 1.5978826064552396, "grad_norm": 0.6208482691574164, "learning_rate": 5.115813599355052e-07, "loss": 0.2785, "step": 34110 }, { "epoch": 1.597929451445168, "grad_norm": 0.62040768234667, "learning_rate": 5.114664134761993e-07, "loss": 0.2846, "step": 34111 }, { "epoch": 1.5979762964350963, "grad_norm": 0.6064312138301571, "learning_rate": 5.113514784604054e-07, "loss": 0.272, "step": 34112 }, { "epoch": 1.5980231414250246, "grad_norm": 0.6138943929415186, "learning_rate": 5.112365548887829e-07, "loss": 0.2744, "step": 34113 }, { "epoch": 1.598069986414953, "grad_norm": 0.6059496264315388, "learning_rate": 5.111216427619941e-07, "loss": 0.2702, "step": 34114 }, { "epoch": 1.5981168314048813, "grad_norm": 0.6170264324220758, "learning_rate": 5.110067420807008e-07, "loss": 0.2837, "step": 34115 }, { "epoch": 1.5981636763948095, "grad_norm": 0.5785904914002855, "learning_rate": 5.108918528455634e-07, "loss": 0.2645, "step": 34116 }, { "epoch": 1.598210521384738, "grad_norm": 0.5580982620673691, "learning_rate": 5.107769750572445e-07, "loss": 0.2557, "step": 34117 }, { "epoch": 1.5982573663746662, "grad_norm": 0.6162666763102974, "learning_rate": 5.106621087164038e-07, "loss": 0.2724, "step": 34118 }, { "epoch": 1.5983042113645944, "grad_norm": 0.5964743047601242, "learning_rate": 5.105472538237019e-07, "loss": 0.2764, "step": 34119 }, { "epoch": 1.598351056354523, "grad_norm": 0.6203094766469108, "learning_rate": 5.104324103798008e-07, "loss": 0.2822, "step": 34120 }, { "epoch": 1.5983979013444514, "grad_norm": 0.6480611971666201, "learning_rate": 5.103175783853609e-07, "loss": 0.2823, "step": 34121 }, { "epoch": 1.5984447463343794, "grad_norm": 0.6066499260442193, "learning_rate": 5.102027578410434e-07, "loss": 0.2692, "step": 34122 }, { "epoch": 1.5984915913243078, "grad_norm": 0.5763825532732839, "learning_rate": 5.100879487475088e-07, "loss": 0.2582, "step": 34123 }, { "epoch": 1.5985384363142363, "grad_norm": 0.6678630218295439, "learning_rate": 5.099731511054188e-07, "loss": 0.2756, "step": 34124 }, { "epoch": 1.5985852813041646, "grad_norm": 0.6157890946443826, "learning_rate": 5.098583649154329e-07, "loss": 0.2682, "step": 34125 }, { "epoch": 1.5986321262940928, "grad_norm": 0.6007592673545581, "learning_rate": 5.097435901782113e-07, "loss": 0.2659, "step": 34126 }, { "epoch": 1.5986789712840213, "grad_norm": 0.5783094185005792, "learning_rate": 5.096288268944153e-07, "loss": 0.2812, "step": 34127 }, { "epoch": 1.5987258162739495, "grad_norm": 0.6556103638141078, "learning_rate": 5.095140750647048e-07, "loss": 0.2874, "step": 34128 }, { "epoch": 1.5987726612638777, "grad_norm": 0.6105139331950619, "learning_rate": 5.093993346897408e-07, "loss": 0.2726, "step": 34129 }, { "epoch": 1.5988195062538062, "grad_norm": 0.5915684067296401, "learning_rate": 5.092846057701839e-07, "loss": 0.2895, "step": 34130 }, { "epoch": 1.5988663512437344, "grad_norm": 0.6074042131875835, "learning_rate": 5.091698883066931e-07, "loss": 0.2689, "step": 34131 }, { "epoch": 1.5989131962336627, "grad_norm": 0.5725424905068209, "learning_rate": 5.090551822999298e-07, "loss": 0.2706, "step": 34132 }, { "epoch": 1.5989600412235911, "grad_norm": 0.6118791492029255, "learning_rate": 5.08940487750553e-07, "loss": 0.2637, "step": 34133 }, { "epoch": 1.5990068862135196, "grad_norm": 0.5660584556581425, "learning_rate": 5.088258046592232e-07, "loss": 0.2512, "step": 34134 }, { "epoch": 1.5990537312034476, "grad_norm": 0.6117555858050562, "learning_rate": 5.087111330266e-07, "loss": 0.2702, "step": 34135 }, { "epoch": 1.599100576193376, "grad_norm": 0.5925230717996549, "learning_rate": 5.085964728533449e-07, "loss": 0.2658, "step": 34136 }, { "epoch": 1.5991474211833046, "grad_norm": 0.6052712170529342, "learning_rate": 5.08481824140116e-07, "loss": 0.2614, "step": 34137 }, { "epoch": 1.5991942661732328, "grad_norm": 0.6163807985509796, "learning_rate": 5.083671868875739e-07, "loss": 0.2771, "step": 34138 }, { "epoch": 1.599241111163161, "grad_norm": 0.5882054714573242, "learning_rate": 5.082525610963776e-07, "loss": 0.2638, "step": 34139 }, { "epoch": 1.5992879561530895, "grad_norm": 0.6649231570479505, "learning_rate": 5.081379467671873e-07, "loss": 0.2895, "step": 34140 }, { "epoch": 1.5993348011430177, "grad_norm": 0.5907652720631265, "learning_rate": 5.080233439006623e-07, "loss": 0.2591, "step": 34141 }, { "epoch": 1.599381646132946, "grad_norm": 0.6320229347935665, "learning_rate": 5.079087524974632e-07, "loss": 0.2879, "step": 34142 }, { "epoch": 1.5994284911228744, "grad_norm": 0.6060750983851854, "learning_rate": 5.077941725582477e-07, "loss": 0.2797, "step": 34143 }, { "epoch": 1.5994753361128027, "grad_norm": 0.5714488526669228, "learning_rate": 5.07679604083676e-07, "loss": 0.2624, "step": 34144 }, { "epoch": 1.599522181102731, "grad_norm": 0.6353920743830173, "learning_rate": 5.075650470744081e-07, "loss": 0.2748, "step": 34145 }, { "epoch": 1.5995690260926594, "grad_norm": 0.6533442066192034, "learning_rate": 5.074505015311021e-07, "loss": 0.2882, "step": 34146 }, { "epoch": 1.5996158710825878, "grad_norm": 0.5923800443368882, "learning_rate": 5.073359674544173e-07, "loss": 0.2585, "step": 34147 }, { "epoch": 1.599662716072516, "grad_norm": 0.5862813875456494, "learning_rate": 5.072214448450141e-07, "loss": 0.2518, "step": 34148 }, { "epoch": 1.5997095610624443, "grad_norm": 0.6093745725942012, "learning_rate": 5.071069337035497e-07, "loss": 0.2881, "step": 34149 }, { "epoch": 1.5997564060523728, "grad_norm": 0.5731482168190369, "learning_rate": 5.069924340306845e-07, "loss": 0.2692, "step": 34150 }, { "epoch": 1.599803251042301, "grad_norm": 0.5736211683257257, "learning_rate": 5.068779458270764e-07, "loss": 0.2531, "step": 34151 }, { "epoch": 1.5998500960322293, "grad_norm": 0.5996260925708913, "learning_rate": 5.067634690933856e-07, "loss": 0.2609, "step": 34152 }, { "epoch": 1.5998969410221577, "grad_norm": 0.6271301034272259, "learning_rate": 5.066490038302696e-07, "loss": 0.2808, "step": 34153 }, { "epoch": 1.599943786012086, "grad_norm": 0.5625453362582323, "learning_rate": 5.065345500383881e-07, "loss": 0.2528, "step": 34154 }, { "epoch": 1.5999906310020142, "grad_norm": 0.5806954380872786, "learning_rate": 5.064201077183983e-07, "loss": 0.2732, "step": 34155 }, { "epoch": 1.6000374759919427, "grad_norm": 0.5667133278144679, "learning_rate": 5.063056768709601e-07, "loss": 0.267, "step": 34156 }, { "epoch": 1.6000843209818711, "grad_norm": 0.6002328794854981, "learning_rate": 5.061912574967315e-07, "loss": 0.2843, "step": 34157 }, { "epoch": 1.6001311659717992, "grad_norm": 0.5866247221426889, "learning_rate": 5.060768495963708e-07, "loss": 0.2651, "step": 34158 }, { "epoch": 1.6001780109617276, "grad_norm": 0.6043868120889481, "learning_rate": 5.059624531705379e-07, "loss": 0.2544, "step": 34159 }, { "epoch": 1.600224855951656, "grad_norm": 0.5806948992710771, "learning_rate": 5.058480682198893e-07, "loss": 0.2614, "step": 34160 }, { "epoch": 1.6002717009415843, "grad_norm": 0.5738960238467357, "learning_rate": 5.057336947450836e-07, "loss": 0.2614, "step": 34161 }, { "epoch": 1.6003185459315126, "grad_norm": 0.6092650707717351, "learning_rate": 5.056193327467793e-07, "loss": 0.2511, "step": 34162 }, { "epoch": 1.600365390921441, "grad_norm": 0.599742892948744, "learning_rate": 5.055049822256342e-07, "loss": 0.2599, "step": 34163 }, { "epoch": 1.6004122359113693, "grad_norm": 0.6419887899940314, "learning_rate": 5.053906431823066e-07, "loss": 0.2834, "step": 34164 }, { "epoch": 1.6004590809012975, "grad_norm": 0.6081128221862497, "learning_rate": 5.052763156174547e-07, "loss": 0.2592, "step": 34165 }, { "epoch": 1.600505925891226, "grad_norm": 0.568970319215392, "learning_rate": 5.05161999531737e-07, "loss": 0.2611, "step": 34166 }, { "epoch": 1.6005527708811542, "grad_norm": 0.6586001407402063, "learning_rate": 5.050476949258104e-07, "loss": 0.2912, "step": 34167 }, { "epoch": 1.6005996158710825, "grad_norm": 0.6416535364316329, "learning_rate": 5.049334018003324e-07, "loss": 0.2773, "step": 34168 }, { "epoch": 1.600646460861011, "grad_norm": 0.5797268628241153, "learning_rate": 5.04819120155961e-07, "loss": 0.2598, "step": 34169 }, { "epoch": 1.6006933058509394, "grad_norm": 0.6006532120248316, "learning_rate": 5.047048499933543e-07, "loss": 0.2768, "step": 34170 }, { "epoch": 1.6007401508408674, "grad_norm": 0.6385445880537148, "learning_rate": 5.045905913131696e-07, "loss": 0.2686, "step": 34171 }, { "epoch": 1.6007869958307959, "grad_norm": 0.6638640813490906, "learning_rate": 5.044763441160646e-07, "loss": 0.2831, "step": 34172 }, { "epoch": 1.6008338408207243, "grad_norm": 0.5573358764559159, "learning_rate": 5.043621084026973e-07, "loss": 0.2621, "step": 34173 }, { "epoch": 1.6008806858106526, "grad_norm": 0.5747509220589184, "learning_rate": 5.042478841737245e-07, "loss": 0.2745, "step": 34174 }, { "epoch": 1.6009275308005808, "grad_norm": 0.6203684768256286, "learning_rate": 5.041336714298029e-07, "loss": 0.281, "step": 34175 }, { "epoch": 1.6009743757905093, "grad_norm": 0.6152118544064585, "learning_rate": 5.040194701715903e-07, "loss": 0.2945, "step": 34176 }, { "epoch": 1.6010212207804375, "grad_norm": 0.61586826408839, "learning_rate": 5.039052803997441e-07, "loss": 0.2798, "step": 34177 }, { "epoch": 1.6010680657703658, "grad_norm": 0.6341213037389216, "learning_rate": 5.037911021149212e-07, "loss": 0.2776, "step": 34178 }, { "epoch": 1.6011149107602942, "grad_norm": 0.6256147289052907, "learning_rate": 5.036769353177793e-07, "loss": 0.2896, "step": 34179 }, { "epoch": 1.6011617557502225, "grad_norm": 0.6012421893659194, "learning_rate": 5.035627800089743e-07, "loss": 0.2752, "step": 34180 }, { "epoch": 1.6012086007401507, "grad_norm": 0.6000123903736002, "learning_rate": 5.034486361891644e-07, "loss": 0.2761, "step": 34181 }, { "epoch": 1.6012554457300792, "grad_norm": 0.5903289343534278, "learning_rate": 5.033345038590054e-07, "loss": 0.2672, "step": 34182 }, { "epoch": 1.6013022907200076, "grad_norm": 0.5670433334800454, "learning_rate": 5.03220383019154e-07, "loss": 0.2678, "step": 34183 }, { "epoch": 1.6013491357099359, "grad_norm": 0.5973975475482839, "learning_rate": 5.031062736702677e-07, "loss": 0.2717, "step": 34184 }, { "epoch": 1.601395980699864, "grad_norm": 0.6275254561779214, "learning_rate": 5.029921758130035e-07, "loss": 0.2696, "step": 34185 }, { "epoch": 1.6014428256897926, "grad_norm": 0.5811499713538623, "learning_rate": 5.028780894480167e-07, "loss": 0.2651, "step": 34186 }, { "epoch": 1.6014896706797208, "grad_norm": 0.5639567218362619, "learning_rate": 5.027640145759654e-07, "loss": 0.2678, "step": 34187 }, { "epoch": 1.601536515669649, "grad_norm": 0.5625908094688239, "learning_rate": 5.026499511975045e-07, "loss": 0.2602, "step": 34188 }, { "epoch": 1.6015833606595775, "grad_norm": 0.6506968680602722, "learning_rate": 5.025358993132909e-07, "loss": 0.2897, "step": 34189 }, { "epoch": 1.6016302056495058, "grad_norm": 0.590945866543796, "learning_rate": 5.024218589239813e-07, "loss": 0.2835, "step": 34190 }, { "epoch": 1.601677050639434, "grad_norm": 0.6343294568806735, "learning_rate": 5.023078300302326e-07, "loss": 0.2812, "step": 34191 }, { "epoch": 1.6017238956293625, "grad_norm": 0.5585570295269423, "learning_rate": 5.021938126326994e-07, "loss": 0.2534, "step": 34192 }, { "epoch": 1.601770740619291, "grad_norm": 0.603961264728167, "learning_rate": 5.020798067320387e-07, "loss": 0.2697, "step": 34193 }, { "epoch": 1.601817585609219, "grad_norm": 0.608146031114681, "learning_rate": 5.019658123289073e-07, "loss": 0.2643, "step": 34194 }, { "epoch": 1.6018644305991474, "grad_norm": 0.521389337646337, "learning_rate": 5.018518294239597e-07, "loss": 0.2482, "step": 34195 }, { "epoch": 1.6019112755890759, "grad_norm": 0.6091940514866839, "learning_rate": 5.017378580178528e-07, "loss": 0.2682, "step": 34196 }, { "epoch": 1.601958120579004, "grad_norm": 0.5808981811071997, "learning_rate": 5.016238981112426e-07, "loss": 0.2649, "step": 34197 }, { "epoch": 1.6020049655689323, "grad_norm": 0.5715174610308539, "learning_rate": 5.015099497047843e-07, "loss": 0.2533, "step": 34198 }, { "epoch": 1.6020518105588608, "grad_norm": 0.6231601079110448, "learning_rate": 5.013960127991338e-07, "loss": 0.2879, "step": 34199 }, { "epoch": 1.602098655548789, "grad_norm": 0.5998988822988134, "learning_rate": 5.012820873949467e-07, "loss": 0.2835, "step": 34200 }, { "epoch": 1.6021455005387173, "grad_norm": 0.5666934194527156, "learning_rate": 5.011681734928797e-07, "loss": 0.2562, "step": 34201 }, { "epoch": 1.6021923455286458, "grad_norm": 0.6200703789499082, "learning_rate": 5.010542710935867e-07, "loss": 0.2736, "step": 34202 }, { "epoch": 1.602239190518574, "grad_norm": 0.5637380312628364, "learning_rate": 5.009403801977247e-07, "loss": 0.2613, "step": 34203 }, { "epoch": 1.6022860355085022, "grad_norm": 0.6058057652647058, "learning_rate": 5.00826500805948e-07, "loss": 0.2711, "step": 34204 }, { "epoch": 1.6023328804984307, "grad_norm": 0.6418880996084321, "learning_rate": 5.007126329189119e-07, "loss": 0.2846, "step": 34205 }, { "epoch": 1.6023797254883592, "grad_norm": 0.6423183219151503, "learning_rate": 5.005987765372722e-07, "loss": 0.2693, "step": 34206 }, { "epoch": 1.6024265704782872, "grad_norm": 0.571799716128048, "learning_rate": 5.004849316616839e-07, "loss": 0.2669, "step": 34207 }, { "epoch": 1.6024734154682156, "grad_norm": 0.5903416252367187, "learning_rate": 5.003710982928031e-07, "loss": 0.2688, "step": 34208 }, { "epoch": 1.602520260458144, "grad_norm": 0.6059702408710438, "learning_rate": 5.002572764312841e-07, "loss": 0.2542, "step": 34209 }, { "epoch": 1.6025671054480723, "grad_norm": 0.6468291092280487, "learning_rate": 5.001434660777809e-07, "loss": 0.2832, "step": 34210 }, { "epoch": 1.6026139504380006, "grad_norm": 0.592688062147597, "learning_rate": 5.000296672329496e-07, "loss": 0.266, "step": 34211 }, { "epoch": 1.602660795427929, "grad_norm": 0.6152174010254504, "learning_rate": 4.999158798974449e-07, "loss": 0.2847, "step": 34212 }, { "epoch": 1.6027076404178573, "grad_norm": 0.5964102192346744, "learning_rate": 4.998021040719218e-07, "loss": 0.2677, "step": 34213 }, { "epoch": 1.6027544854077855, "grad_norm": 0.5866843755215881, "learning_rate": 4.996883397570345e-07, "loss": 0.2557, "step": 34214 }, { "epoch": 1.602801330397714, "grad_norm": 0.5516009596109726, "learning_rate": 4.995745869534388e-07, "loss": 0.2513, "step": 34215 }, { "epoch": 1.6028481753876422, "grad_norm": 0.582809591885351, "learning_rate": 4.994608456617888e-07, "loss": 0.2548, "step": 34216 }, { "epoch": 1.6028950203775705, "grad_norm": 0.603348947236136, "learning_rate": 4.993471158827379e-07, "loss": 0.2772, "step": 34217 }, { "epoch": 1.602941865367499, "grad_norm": 0.6121712845013138, "learning_rate": 4.992333976169417e-07, "loss": 0.2781, "step": 34218 }, { "epoch": 1.6029887103574274, "grad_norm": 0.563711949571632, "learning_rate": 4.991196908650542e-07, "loss": 0.2533, "step": 34219 }, { "epoch": 1.6030355553473556, "grad_norm": 0.6005214407474546, "learning_rate": 4.990059956277301e-07, "loss": 0.2665, "step": 34220 }, { "epoch": 1.6030824003372839, "grad_norm": 0.591504247735999, "learning_rate": 4.988923119056241e-07, "loss": 0.2738, "step": 34221 }, { "epoch": 1.6031292453272123, "grad_norm": 0.6359574656211681, "learning_rate": 4.987786396993893e-07, "loss": 0.2746, "step": 34222 }, { "epoch": 1.6031760903171406, "grad_norm": 0.5729189490399723, "learning_rate": 4.986649790096812e-07, "loss": 0.2672, "step": 34223 }, { "epoch": 1.6032229353070688, "grad_norm": 0.5677188201826455, "learning_rate": 4.985513298371524e-07, "loss": 0.2589, "step": 34224 }, { "epoch": 1.6032697802969973, "grad_norm": 0.5888892721442149, "learning_rate": 4.984376921824577e-07, "loss": 0.2742, "step": 34225 }, { "epoch": 1.6033166252869255, "grad_norm": 0.638126634603089, "learning_rate": 4.98324066046251e-07, "loss": 0.2655, "step": 34226 }, { "epoch": 1.6033634702768538, "grad_norm": 0.6058892096694142, "learning_rate": 4.982104514291869e-07, "loss": 0.2746, "step": 34227 }, { "epoch": 1.6034103152667822, "grad_norm": 0.6371109628625169, "learning_rate": 4.980968483319176e-07, "loss": 0.28, "step": 34228 }, { "epoch": 1.6034571602567107, "grad_norm": 0.6154947046646077, "learning_rate": 4.979832567550988e-07, "loss": 0.2721, "step": 34229 }, { "epoch": 1.6035040052466387, "grad_norm": 0.617343790724848, "learning_rate": 4.978696766993823e-07, "loss": 0.2666, "step": 34230 }, { "epoch": 1.6035508502365672, "grad_norm": 0.6185987763019727, "learning_rate": 4.977561081654225e-07, "loss": 0.2697, "step": 34231 }, { "epoch": 1.6035976952264956, "grad_norm": 0.5630225373799013, "learning_rate": 4.976425511538733e-07, "loss": 0.2514, "step": 34232 }, { "epoch": 1.6036445402164239, "grad_norm": 0.6191933475811776, "learning_rate": 4.975290056653884e-07, "loss": 0.2837, "step": 34233 }, { "epoch": 1.6036913852063521, "grad_norm": 0.6783812681319294, "learning_rate": 4.974154717006202e-07, "loss": 0.2846, "step": 34234 }, { "epoch": 1.6037382301962806, "grad_norm": 0.595287950503938, "learning_rate": 4.973019492602227e-07, "loss": 0.279, "step": 34235 }, { "epoch": 1.6037850751862088, "grad_norm": 0.5653680103791048, "learning_rate": 4.971884383448497e-07, "loss": 0.2609, "step": 34236 }, { "epoch": 1.603831920176137, "grad_norm": 0.5691665790363505, "learning_rate": 4.970749389551532e-07, "loss": 0.2704, "step": 34237 }, { "epoch": 1.6038787651660655, "grad_norm": 0.6143168441507827, "learning_rate": 4.969614510917869e-07, "loss": 0.279, "step": 34238 }, { "epoch": 1.6039256101559938, "grad_norm": 0.6376411789796588, "learning_rate": 4.968479747554042e-07, "loss": 0.2777, "step": 34239 }, { "epoch": 1.603972455145922, "grad_norm": 0.6194229240642736, "learning_rate": 4.967345099466583e-07, "loss": 0.2713, "step": 34240 }, { "epoch": 1.6040193001358505, "grad_norm": 0.6330496148248963, "learning_rate": 4.966210566662013e-07, "loss": 0.2711, "step": 34241 }, { "epoch": 1.604066145125779, "grad_norm": 0.5921923450197718, "learning_rate": 4.965076149146867e-07, "loss": 0.2638, "step": 34242 }, { "epoch": 1.604112990115707, "grad_norm": 0.6651418943096531, "learning_rate": 4.963941846927678e-07, "loss": 0.292, "step": 34243 }, { "epoch": 1.6041598351056354, "grad_norm": 0.6307104727435598, "learning_rate": 4.962807660010963e-07, "loss": 0.2594, "step": 34244 }, { "epoch": 1.6042066800955639, "grad_norm": 0.6064602099704826, "learning_rate": 4.961673588403252e-07, "loss": 0.2744, "step": 34245 }, { "epoch": 1.6042535250854921, "grad_norm": 0.5989951371007431, "learning_rate": 4.960539632111078e-07, "loss": 0.2682, "step": 34246 }, { "epoch": 1.6043003700754204, "grad_norm": 0.618968013473369, "learning_rate": 4.959405791140956e-07, "loss": 0.2624, "step": 34247 }, { "epoch": 1.6043472150653488, "grad_norm": 0.6232983905908569, "learning_rate": 4.958272065499417e-07, "loss": 0.282, "step": 34248 }, { "epoch": 1.604394060055277, "grad_norm": 0.5917519891087761, "learning_rate": 4.957138455192986e-07, "loss": 0.2661, "step": 34249 }, { "epoch": 1.6044409050452053, "grad_norm": 0.6235586122615348, "learning_rate": 4.956004960228191e-07, "loss": 0.2741, "step": 34250 }, { "epoch": 1.6044877500351338, "grad_norm": 0.568983031588924, "learning_rate": 4.954871580611545e-07, "loss": 0.2538, "step": 34251 }, { "epoch": 1.604534595025062, "grad_norm": 0.5675604927446005, "learning_rate": 4.953738316349579e-07, "loss": 0.2665, "step": 34252 }, { "epoch": 1.6045814400149903, "grad_norm": 0.5860039364265145, "learning_rate": 4.952605167448806e-07, "loss": 0.2639, "step": 34253 }, { "epoch": 1.6046282850049187, "grad_norm": 0.5974181166395642, "learning_rate": 4.95147213391575e-07, "loss": 0.265, "step": 34254 }, { "epoch": 1.6046751299948472, "grad_norm": 0.5858937835432976, "learning_rate": 4.950339215756933e-07, "loss": 0.279, "step": 34255 }, { "epoch": 1.6047219749847754, "grad_norm": 0.5659677479195789, "learning_rate": 4.949206412978874e-07, "loss": 0.273, "step": 34256 }, { "epoch": 1.6047688199747037, "grad_norm": 0.6494184639239546, "learning_rate": 4.948073725588102e-07, "loss": 0.2884, "step": 34257 }, { "epoch": 1.6048156649646321, "grad_norm": 0.5753215841714374, "learning_rate": 4.946941153591123e-07, "loss": 0.266, "step": 34258 }, { "epoch": 1.6048625099545604, "grad_norm": 0.5878251452201215, "learning_rate": 4.94580869699445e-07, "loss": 0.2811, "step": 34259 }, { "epoch": 1.6049093549444886, "grad_norm": 0.5754231256773146, "learning_rate": 4.944676355804612e-07, "loss": 0.2666, "step": 34260 }, { "epoch": 1.604956199934417, "grad_norm": 0.5962757404104094, "learning_rate": 4.943544130028116e-07, "loss": 0.2789, "step": 34261 }, { "epoch": 1.6050030449243453, "grad_norm": 0.5827491554205438, "learning_rate": 4.942412019671486e-07, "loss": 0.2727, "step": 34262 }, { "epoch": 1.6050498899142736, "grad_norm": 0.6070386306909048, "learning_rate": 4.941280024741232e-07, "loss": 0.2749, "step": 34263 }, { "epoch": 1.605096734904202, "grad_norm": 0.5749791260217227, "learning_rate": 4.940148145243875e-07, "loss": 0.2738, "step": 34264 }, { "epoch": 1.6051435798941305, "grad_norm": 0.5841771321229431, "learning_rate": 4.939016381185926e-07, "loss": 0.2617, "step": 34265 }, { "epoch": 1.6051904248840585, "grad_norm": 0.5845300359032052, "learning_rate": 4.937884732573889e-07, "loss": 0.2671, "step": 34266 }, { "epoch": 1.605237269873987, "grad_norm": 0.5972258535037342, "learning_rate": 4.936753199414282e-07, "loss": 0.277, "step": 34267 }, { "epoch": 1.6052841148639154, "grad_norm": 0.6014767236509235, "learning_rate": 4.935621781713618e-07, "loss": 0.2662, "step": 34268 }, { "epoch": 1.6053309598538437, "grad_norm": 0.6361391711539498, "learning_rate": 4.934490479478407e-07, "loss": 0.2802, "step": 34269 }, { "epoch": 1.605377804843772, "grad_norm": 0.5462853638883375, "learning_rate": 4.933359292715167e-07, "loss": 0.275, "step": 34270 }, { "epoch": 1.6054246498337004, "grad_norm": 0.6985048009727951, "learning_rate": 4.932228221430394e-07, "loss": 0.2974, "step": 34271 }, { "epoch": 1.6054714948236286, "grad_norm": 0.6274076306370571, "learning_rate": 4.93109726563061e-07, "loss": 0.2802, "step": 34272 }, { "epoch": 1.6055183398135568, "grad_norm": 0.6321541394508898, "learning_rate": 4.92996642532231e-07, "loss": 0.2812, "step": 34273 }, { "epoch": 1.6055651848034853, "grad_norm": 0.5955448133033231, "learning_rate": 4.928835700512011e-07, "loss": 0.2758, "step": 34274 }, { "epoch": 1.6056120297934136, "grad_norm": 0.6276116359464711, "learning_rate": 4.927705091206217e-07, "loss": 0.2801, "step": 34275 }, { "epoch": 1.6056588747833418, "grad_norm": 0.5872268630604252, "learning_rate": 4.926574597411443e-07, "loss": 0.2928, "step": 34276 }, { "epoch": 1.6057057197732703, "grad_norm": 0.572174051243345, "learning_rate": 4.925444219134179e-07, "loss": 0.2636, "step": 34277 }, { "epoch": 1.6057525647631987, "grad_norm": 0.6386997384546487, "learning_rate": 4.924313956380944e-07, "loss": 0.2967, "step": 34278 }, { "epoch": 1.6057994097531267, "grad_norm": 0.6441258350309594, "learning_rate": 4.92318380915823e-07, "loss": 0.2862, "step": 34279 }, { "epoch": 1.6058462547430552, "grad_norm": 0.6186602929986996, "learning_rate": 4.922053777472546e-07, "loss": 0.2687, "step": 34280 }, { "epoch": 1.6058930997329837, "grad_norm": 0.6150579781517628, "learning_rate": 4.920923861330398e-07, "loss": 0.2915, "step": 34281 }, { "epoch": 1.605939944722912, "grad_norm": 0.5542532339340162, "learning_rate": 4.919794060738292e-07, "loss": 0.2523, "step": 34282 }, { "epoch": 1.6059867897128401, "grad_norm": 0.6294828998590426, "learning_rate": 4.918664375702717e-07, "loss": 0.2793, "step": 34283 }, { "epoch": 1.6060336347027686, "grad_norm": 0.5799903618230534, "learning_rate": 4.917534806230184e-07, "loss": 0.2611, "step": 34284 }, { "epoch": 1.6060804796926968, "grad_norm": 0.5801962867992231, "learning_rate": 4.916405352327194e-07, "loss": 0.2715, "step": 34285 }, { "epoch": 1.606127324682625, "grad_norm": 0.5816980582324489, "learning_rate": 4.91527601400024e-07, "loss": 0.2758, "step": 34286 }, { "epoch": 1.6061741696725536, "grad_norm": 0.6296049139631759, "learning_rate": 4.914146791255822e-07, "loss": 0.2822, "step": 34287 }, { "epoch": 1.6062210146624818, "grad_norm": 0.5882940725450155, "learning_rate": 4.913017684100449e-07, "loss": 0.2654, "step": 34288 }, { "epoch": 1.60626785965241, "grad_norm": 0.6267491592078736, "learning_rate": 4.911888692540604e-07, "loss": 0.271, "step": 34289 }, { "epoch": 1.6063147046423385, "grad_norm": 0.611029809660125, "learning_rate": 4.910759816582788e-07, "loss": 0.2745, "step": 34290 }, { "epoch": 1.606361549632267, "grad_norm": 0.5829860173714176, "learning_rate": 4.909631056233502e-07, "loss": 0.2754, "step": 34291 }, { "epoch": 1.6064083946221952, "grad_norm": 0.5924173369164342, "learning_rate": 4.908502411499247e-07, "loss": 0.2641, "step": 34292 }, { "epoch": 1.6064552396121234, "grad_norm": 0.5698111379430942, "learning_rate": 4.907373882386502e-07, "loss": 0.2693, "step": 34293 }, { "epoch": 1.606502084602052, "grad_norm": 0.5518717505709207, "learning_rate": 4.906245468901776e-07, "loss": 0.2624, "step": 34294 }, { "epoch": 1.6065489295919801, "grad_norm": 0.5705150488922778, "learning_rate": 4.905117171051555e-07, "loss": 0.2532, "step": 34295 }, { "epoch": 1.6065957745819084, "grad_norm": 0.6392238279849646, "learning_rate": 4.903988988842332e-07, "loss": 0.2942, "step": 34296 }, { "epoch": 1.6066426195718368, "grad_norm": 0.6046476459698537, "learning_rate": 4.9028609222806e-07, "loss": 0.2868, "step": 34297 }, { "epoch": 1.606689464561765, "grad_norm": 0.5869049642179527, "learning_rate": 4.901732971372852e-07, "loss": 0.2603, "step": 34298 }, { "epoch": 1.6067363095516933, "grad_norm": 0.6268984826838131, "learning_rate": 4.900605136125589e-07, "loss": 0.278, "step": 34299 }, { "epoch": 1.6067831545416218, "grad_norm": 0.5650175165648486, "learning_rate": 4.89947741654529e-07, "loss": 0.2479, "step": 34300 }, { "epoch": 1.6068299995315503, "grad_norm": 0.6272524997311754, "learning_rate": 4.898349812638439e-07, "loss": 0.2629, "step": 34301 }, { "epoch": 1.6068768445214783, "grad_norm": 0.6006217066555637, "learning_rate": 4.897222324411532e-07, "loss": 0.2803, "step": 34302 }, { "epoch": 1.6069236895114067, "grad_norm": 0.5424268720226391, "learning_rate": 4.896094951871058e-07, "loss": 0.2472, "step": 34303 }, { "epoch": 1.6069705345013352, "grad_norm": 0.6006548204865029, "learning_rate": 4.894967695023506e-07, "loss": 0.2619, "step": 34304 }, { "epoch": 1.6070173794912634, "grad_norm": 0.5760397345482259, "learning_rate": 4.893840553875362e-07, "loss": 0.28, "step": 34305 }, { "epoch": 1.6070642244811917, "grad_norm": 0.6071995855142814, "learning_rate": 4.892713528433116e-07, "loss": 0.27, "step": 34306 }, { "epoch": 1.6071110694711201, "grad_norm": 0.6312160272762769, "learning_rate": 4.891586618703254e-07, "loss": 0.2732, "step": 34307 }, { "epoch": 1.6071579144610484, "grad_norm": 0.6331529675386592, "learning_rate": 4.890459824692245e-07, "loss": 0.2738, "step": 34308 }, { "epoch": 1.6072047594509766, "grad_norm": 0.5944036684089542, "learning_rate": 4.889333146406589e-07, "loss": 0.2854, "step": 34309 }, { "epoch": 1.607251604440905, "grad_norm": 0.6020033541314331, "learning_rate": 4.888206583852767e-07, "loss": 0.2729, "step": 34310 }, { "epoch": 1.6072984494308333, "grad_norm": 0.5812794557020291, "learning_rate": 4.887080137037259e-07, "loss": 0.2675, "step": 34311 }, { "epoch": 1.6073452944207616, "grad_norm": 0.6315601358800557, "learning_rate": 4.885953805966557e-07, "loss": 0.2844, "step": 34312 }, { "epoch": 1.60739213941069, "grad_norm": 0.5894382888428437, "learning_rate": 4.884827590647129e-07, "loss": 0.265, "step": 34313 }, { "epoch": 1.6074389844006185, "grad_norm": 0.5872666270114727, "learning_rate": 4.883701491085468e-07, "loss": 0.2617, "step": 34314 }, { "epoch": 1.6074858293905465, "grad_norm": 0.6194031024964152, "learning_rate": 4.882575507288043e-07, "loss": 0.2749, "step": 34315 }, { "epoch": 1.607532674380475, "grad_norm": 0.5911190481787736, "learning_rate": 4.88144963926134e-07, "loss": 0.2576, "step": 34316 }, { "epoch": 1.6075795193704034, "grad_norm": 0.6329440001000411, "learning_rate": 4.880323887011837e-07, "loss": 0.2771, "step": 34317 }, { "epoch": 1.6076263643603317, "grad_norm": 0.5447153329533676, "learning_rate": 4.879198250546014e-07, "loss": 0.2656, "step": 34318 }, { "epoch": 1.60767320935026, "grad_norm": 0.6252799618965355, "learning_rate": 4.878072729870353e-07, "loss": 0.2779, "step": 34319 }, { "epoch": 1.6077200543401884, "grad_norm": 0.5604878426225505, "learning_rate": 4.876947324991321e-07, "loss": 0.2657, "step": 34320 }, { "epoch": 1.6077668993301166, "grad_norm": 0.5628084709980442, "learning_rate": 4.875822035915406e-07, "loss": 0.2788, "step": 34321 }, { "epoch": 1.6078137443200449, "grad_norm": 0.6349978099559371, "learning_rate": 4.874696862649069e-07, "loss": 0.2849, "step": 34322 }, { "epoch": 1.6078605893099733, "grad_norm": 0.5945216163131727, "learning_rate": 4.873571805198793e-07, "loss": 0.2729, "step": 34323 }, { "epoch": 1.6079074342999016, "grad_norm": 0.5921619253823178, "learning_rate": 4.872446863571054e-07, "loss": 0.2726, "step": 34324 }, { "epoch": 1.6079542792898298, "grad_norm": 0.6278105999604601, "learning_rate": 4.871322037772333e-07, "loss": 0.2993, "step": 34325 }, { "epoch": 1.6080011242797583, "grad_norm": 0.5518775706257707, "learning_rate": 4.870197327809084e-07, "loss": 0.2539, "step": 34326 }, { "epoch": 1.6080479692696867, "grad_norm": 0.5892770067662079, "learning_rate": 4.869072733687799e-07, "loss": 0.2574, "step": 34327 }, { "epoch": 1.608094814259615, "grad_norm": 0.600004444202427, "learning_rate": 4.867948255414934e-07, "loss": 0.2804, "step": 34328 }, { "epoch": 1.6081416592495432, "grad_norm": 0.5858811701670971, "learning_rate": 4.866823892996967e-07, "loss": 0.2693, "step": 34329 }, { "epoch": 1.6081885042394717, "grad_norm": 0.585105529189851, "learning_rate": 4.865699646440367e-07, "loss": 0.2608, "step": 34330 }, { "epoch": 1.6082353492294, "grad_norm": 0.6182062433230459, "learning_rate": 4.86457551575161e-07, "loss": 0.2691, "step": 34331 }, { "epoch": 1.6082821942193282, "grad_norm": 1.4000693861584639, "learning_rate": 4.863451500937155e-07, "loss": 0.2887, "step": 34332 }, { "epoch": 1.6083290392092566, "grad_norm": 0.625137729868225, "learning_rate": 4.862327602003478e-07, "loss": 0.2699, "step": 34333 }, { "epoch": 1.6083758841991849, "grad_norm": 0.5935522733908635, "learning_rate": 4.861203818957048e-07, "loss": 0.2662, "step": 34334 }, { "epoch": 1.608422729189113, "grad_norm": 0.5924650715953653, "learning_rate": 4.860080151804323e-07, "loss": 0.2673, "step": 34335 }, { "epoch": 1.6084695741790416, "grad_norm": 0.5860677205028391, "learning_rate": 4.858956600551773e-07, "loss": 0.2701, "step": 34336 }, { "epoch": 1.60851641916897, "grad_norm": 0.5787908978737527, "learning_rate": 4.857833165205875e-07, "loss": 0.2786, "step": 34337 }, { "epoch": 1.608563264158898, "grad_norm": 0.5900693374605317, "learning_rate": 4.856709845773075e-07, "loss": 0.2757, "step": 34338 }, { "epoch": 1.6086101091488265, "grad_norm": 0.5783209476788466, "learning_rate": 4.855586642259849e-07, "loss": 0.2497, "step": 34339 }, { "epoch": 1.608656954138755, "grad_norm": 0.5920870623323375, "learning_rate": 4.854463554672659e-07, "loss": 0.2758, "step": 34340 }, { "epoch": 1.6087037991286832, "grad_norm": 0.5767309736780103, "learning_rate": 4.853340583017973e-07, "loss": 0.2751, "step": 34341 }, { "epoch": 1.6087506441186115, "grad_norm": 0.6196710481630868, "learning_rate": 4.852217727302242e-07, "loss": 0.2606, "step": 34342 }, { "epoch": 1.60879748910854, "grad_norm": 0.5688612088701083, "learning_rate": 4.85109498753194e-07, "loss": 0.2583, "step": 34343 }, { "epoch": 1.6088443340984682, "grad_norm": 0.5816509089005575, "learning_rate": 4.849972363713518e-07, "loss": 0.26, "step": 34344 }, { "epoch": 1.6088911790883964, "grad_norm": 0.5694977188207753, "learning_rate": 4.848849855853438e-07, "loss": 0.2581, "step": 34345 }, { "epoch": 1.6089380240783249, "grad_norm": 0.5917081840569528, "learning_rate": 4.847727463958163e-07, "loss": 0.2661, "step": 34346 }, { "epoch": 1.608984869068253, "grad_norm": 0.5942562164192168, "learning_rate": 4.846605188034151e-07, "loss": 0.2721, "step": 34347 }, { "epoch": 1.6090317140581814, "grad_norm": 0.579711791081355, "learning_rate": 4.845483028087869e-07, "loss": 0.2651, "step": 34348 }, { "epoch": 1.6090785590481098, "grad_norm": 0.6501733541925429, "learning_rate": 4.844360984125765e-07, "loss": 0.2856, "step": 34349 }, { "epoch": 1.6091254040380383, "grad_norm": 0.6317577572634611, "learning_rate": 4.843239056154292e-07, "loss": 0.2846, "step": 34350 }, { "epoch": 1.6091722490279663, "grad_norm": 0.6695638824273066, "learning_rate": 4.842117244179911e-07, "loss": 0.2862, "step": 34351 }, { "epoch": 1.6092190940178948, "grad_norm": 0.5973561905676267, "learning_rate": 4.840995548209079e-07, "loss": 0.2851, "step": 34352 }, { "epoch": 1.6092659390078232, "grad_norm": 0.6364745191508252, "learning_rate": 4.839873968248252e-07, "loss": 0.3006, "step": 34353 }, { "epoch": 1.6093127839977515, "grad_norm": 0.5978918757912179, "learning_rate": 4.838752504303882e-07, "loss": 0.2692, "step": 34354 }, { "epoch": 1.6093596289876797, "grad_norm": 0.6109001596142682, "learning_rate": 4.83763115638243e-07, "loss": 0.2589, "step": 34355 }, { "epoch": 1.6094064739776082, "grad_norm": 0.6161714593095746, "learning_rate": 4.836509924490345e-07, "loss": 0.2726, "step": 34356 }, { "epoch": 1.6094533189675364, "grad_norm": 0.5863775238941543, "learning_rate": 4.83538880863407e-07, "loss": 0.2702, "step": 34357 }, { "epoch": 1.6095001639574646, "grad_norm": 0.5975056734228066, "learning_rate": 4.834267808820065e-07, "loss": 0.2673, "step": 34358 }, { "epoch": 1.609547008947393, "grad_norm": 0.5935090273900858, "learning_rate": 4.83314692505478e-07, "loss": 0.2624, "step": 34359 }, { "epoch": 1.6095938539373214, "grad_norm": 0.5806386506662758, "learning_rate": 4.832026157344663e-07, "loss": 0.252, "step": 34360 }, { "epoch": 1.6096406989272496, "grad_norm": 0.5980853803140257, "learning_rate": 4.830905505696176e-07, "loss": 0.2644, "step": 34361 }, { "epoch": 1.609687543917178, "grad_norm": 0.5925978069050335, "learning_rate": 4.82978497011575e-07, "loss": 0.2689, "step": 34362 }, { "epoch": 1.6097343889071065, "grad_norm": 0.6091868609119849, "learning_rate": 4.828664550609849e-07, "loss": 0.272, "step": 34363 }, { "epoch": 1.6097812338970348, "grad_norm": 0.603624825743742, "learning_rate": 4.827544247184909e-07, "loss": 0.2756, "step": 34364 }, { "epoch": 1.609828078886963, "grad_norm": 0.5506047665722916, "learning_rate": 4.826424059847379e-07, "loss": 0.2497, "step": 34365 }, { "epoch": 1.6098749238768915, "grad_norm": 0.6223513008793194, "learning_rate": 4.82530398860371e-07, "loss": 0.279, "step": 34366 }, { "epoch": 1.6099217688668197, "grad_norm": 0.6020382739776559, "learning_rate": 4.824184033460353e-07, "loss": 0.2695, "step": 34367 }, { "epoch": 1.609968613856748, "grad_norm": 0.614208206351333, "learning_rate": 4.823064194423738e-07, "loss": 0.2832, "step": 34368 }, { "epoch": 1.6100154588466764, "grad_norm": 0.5820593654122734, "learning_rate": 4.821944471500323e-07, "loss": 0.2923, "step": 34369 }, { "epoch": 1.6100623038366046, "grad_norm": 0.6023027996547282, "learning_rate": 4.820824864696542e-07, "loss": 0.2806, "step": 34370 }, { "epoch": 1.6101091488265329, "grad_norm": 0.6024700702357846, "learning_rate": 4.819705374018841e-07, "loss": 0.2568, "step": 34371 }, { "epoch": 1.6101559938164614, "grad_norm": 0.6026175689540297, "learning_rate": 4.81858599947366e-07, "loss": 0.2699, "step": 34372 }, { "epoch": 1.6102028388063898, "grad_norm": 0.570301957364795, "learning_rate": 4.817466741067448e-07, "loss": 0.2643, "step": 34373 }, { "epoch": 1.6102496837963178, "grad_norm": 0.5826161444552644, "learning_rate": 4.816347598806648e-07, "loss": 0.2673, "step": 34374 }, { "epoch": 1.6102965287862463, "grad_norm": 0.568018717644913, "learning_rate": 4.815228572697689e-07, "loss": 0.2663, "step": 34375 }, { "epoch": 1.6103433737761748, "grad_norm": 0.6042804913941041, "learning_rate": 4.814109662747021e-07, "loss": 0.278, "step": 34376 }, { "epoch": 1.610390218766103, "grad_norm": 0.6152690441953403, "learning_rate": 4.812990868961073e-07, "loss": 0.2917, "step": 34377 }, { "epoch": 1.6104370637560312, "grad_norm": 0.5821079463327199, "learning_rate": 4.811872191346286e-07, "loss": 0.2697, "step": 34378 }, { "epoch": 1.6104839087459597, "grad_norm": 0.5949484456165748, "learning_rate": 4.810753629909104e-07, "loss": 0.2786, "step": 34379 }, { "epoch": 1.610530753735888, "grad_norm": 0.6144432548448606, "learning_rate": 4.809635184655967e-07, "loss": 0.2947, "step": 34380 }, { "epoch": 1.6105775987258162, "grad_norm": 0.5820607104054129, "learning_rate": 4.808516855593295e-07, "loss": 0.2759, "step": 34381 }, { "epoch": 1.6106244437157446, "grad_norm": 0.5895311481503825, "learning_rate": 4.807398642727537e-07, "loss": 0.2761, "step": 34382 }, { "epoch": 1.6106712887056729, "grad_norm": 0.6285688678487795, "learning_rate": 4.806280546065129e-07, "loss": 0.2758, "step": 34383 }, { "epoch": 1.6107181336956011, "grad_norm": 0.5727797176726914, "learning_rate": 4.805162565612495e-07, "loss": 0.2653, "step": 34384 }, { "epoch": 1.6107649786855296, "grad_norm": 0.5997375097410014, "learning_rate": 4.804044701376076e-07, "loss": 0.2793, "step": 34385 }, { "epoch": 1.610811823675458, "grad_norm": 0.6156496913392958, "learning_rate": 4.802926953362308e-07, "loss": 0.2759, "step": 34386 }, { "epoch": 1.610858668665386, "grad_norm": 0.5663776085438197, "learning_rate": 4.801809321577613e-07, "loss": 0.2619, "step": 34387 }, { "epoch": 1.6109055136553145, "grad_norm": 0.6909833110047813, "learning_rate": 4.800691806028432e-07, "loss": 0.2843, "step": 34388 }, { "epoch": 1.610952358645243, "grad_norm": 0.5709760360562212, "learning_rate": 4.799574406721189e-07, "loss": 0.2615, "step": 34389 }, { "epoch": 1.6109992036351712, "grad_norm": 0.6361718979220763, "learning_rate": 4.798457123662325e-07, "loss": 0.2967, "step": 34390 }, { "epoch": 1.6110460486250995, "grad_norm": 0.6406405275011615, "learning_rate": 4.797339956858258e-07, "loss": 0.2761, "step": 34391 }, { "epoch": 1.611092893615028, "grad_norm": 0.5867808974897898, "learning_rate": 4.796222906315426e-07, "loss": 0.2833, "step": 34392 }, { "epoch": 1.6111397386049562, "grad_norm": 0.6195740540854706, "learning_rate": 4.795105972040249e-07, "loss": 0.2711, "step": 34393 }, { "epoch": 1.6111865835948844, "grad_norm": 0.5673611536887786, "learning_rate": 4.793989154039158e-07, "loss": 0.2677, "step": 34394 }, { "epoch": 1.6112334285848129, "grad_norm": 0.6067275931959133, "learning_rate": 4.792872452318578e-07, "loss": 0.2688, "step": 34395 }, { "epoch": 1.6112802735747411, "grad_norm": 0.6002358296733116, "learning_rate": 4.791755866884943e-07, "loss": 0.267, "step": 34396 }, { "epoch": 1.6113271185646694, "grad_norm": 0.5958291755307134, "learning_rate": 4.790639397744679e-07, "loss": 0.2608, "step": 34397 }, { "epoch": 1.6113739635545978, "grad_norm": 0.5530364767115674, "learning_rate": 4.789523044904204e-07, "loss": 0.2594, "step": 34398 }, { "epoch": 1.6114208085445263, "grad_norm": 0.6183056451470342, "learning_rate": 4.788406808369939e-07, "loss": 0.2841, "step": 34399 }, { "epoch": 1.6114676535344545, "grad_norm": 0.619976535864086, "learning_rate": 4.78729068814831e-07, "loss": 0.2647, "step": 34400 }, { "epoch": 1.6115144985243828, "grad_norm": 0.5972402252819449, "learning_rate": 4.786174684245745e-07, "loss": 0.2849, "step": 34401 }, { "epoch": 1.6115613435143112, "grad_norm": 0.5666619090311973, "learning_rate": 4.785058796668665e-07, "loss": 0.2622, "step": 34402 }, { "epoch": 1.6116081885042395, "grad_norm": 0.5988895552410981, "learning_rate": 4.783943025423491e-07, "loss": 0.2616, "step": 34403 }, { "epoch": 1.6116550334941677, "grad_norm": 0.5927155670181735, "learning_rate": 4.782827370516649e-07, "loss": 0.2789, "step": 34404 }, { "epoch": 1.6117018784840962, "grad_norm": 0.5663836149531802, "learning_rate": 4.781711831954555e-07, "loss": 0.262, "step": 34405 }, { "epoch": 1.6117487234740244, "grad_norm": 0.576152066010154, "learning_rate": 4.780596409743621e-07, "loss": 0.2664, "step": 34406 }, { "epoch": 1.6117955684639527, "grad_norm": 0.592916222401635, "learning_rate": 4.779481103890271e-07, "loss": 0.249, "step": 34407 }, { "epoch": 1.6118424134538811, "grad_norm": 0.6119458216129009, "learning_rate": 4.778365914400926e-07, "loss": 0.2715, "step": 34408 }, { "epoch": 1.6118892584438096, "grad_norm": 0.6233167620722365, "learning_rate": 4.777250841282005e-07, "loss": 0.2716, "step": 34409 }, { "epoch": 1.6119361034337376, "grad_norm": 0.6268346407051417, "learning_rate": 4.776135884539926e-07, "loss": 0.2778, "step": 34410 }, { "epoch": 1.611982948423666, "grad_norm": 0.6260875342021711, "learning_rate": 4.775021044181096e-07, "loss": 0.278, "step": 34411 }, { "epoch": 1.6120297934135945, "grad_norm": 0.5554106347880313, "learning_rate": 4.773906320211943e-07, "loss": 0.2683, "step": 34412 }, { "epoch": 1.6120766384035228, "grad_norm": 0.6195225165157485, "learning_rate": 4.772791712638872e-07, "loss": 0.2783, "step": 34413 }, { "epoch": 1.612123483393451, "grad_norm": 0.5906975296884368, "learning_rate": 4.771677221468299e-07, "loss": 0.2671, "step": 34414 }, { "epoch": 1.6121703283833795, "grad_norm": 0.5981950061571991, "learning_rate": 4.770562846706636e-07, "loss": 0.2816, "step": 34415 }, { "epoch": 1.6122171733733077, "grad_norm": 0.6078637435476121, "learning_rate": 4.76944858836031e-07, "loss": 0.2947, "step": 34416 }, { "epoch": 1.612264018363236, "grad_norm": 0.6064718715822195, "learning_rate": 4.7683344464357157e-07, "loss": 0.2715, "step": 34417 }, { "epoch": 1.6123108633531644, "grad_norm": 0.6103462027505027, "learning_rate": 4.7672204209392753e-07, "loss": 0.2696, "step": 34418 }, { "epoch": 1.6123577083430927, "grad_norm": 0.5592492606631287, "learning_rate": 4.7661065118773915e-07, "loss": 0.2575, "step": 34419 }, { "epoch": 1.612404553333021, "grad_norm": 0.5471435702435333, "learning_rate": 4.764992719256481e-07, "loss": 0.2672, "step": 34420 }, { "epoch": 1.6124513983229494, "grad_norm": 0.5819336928048737, "learning_rate": 4.7638790430829475e-07, "loss": 0.267, "step": 34421 }, { "epoch": 1.6124982433128778, "grad_norm": 0.6218835277086725, "learning_rate": 4.762765483363213e-07, "loss": 0.279, "step": 34422 }, { "epoch": 1.6125450883028059, "grad_norm": 0.6281119181613123, "learning_rate": 4.76165204010367e-07, "loss": 0.2822, "step": 34423 }, { "epoch": 1.6125919332927343, "grad_norm": 0.6285878225434012, "learning_rate": 4.7605387133107335e-07, "loss": 0.2716, "step": 34424 }, { "epoch": 1.6126387782826628, "grad_norm": 0.649329726580894, "learning_rate": 4.7594255029908135e-07, "loss": 0.2869, "step": 34425 }, { "epoch": 1.612685623272591, "grad_norm": 0.5756914336741605, "learning_rate": 4.758312409150309e-07, "loss": 0.2545, "step": 34426 }, { "epoch": 1.6127324682625193, "grad_norm": 0.5914641436997872, "learning_rate": 4.757199431795628e-07, "loss": 0.2596, "step": 34427 }, { "epoch": 1.6127793132524477, "grad_norm": 0.5987988917316873, "learning_rate": 4.7560865709331826e-07, "loss": 0.2648, "step": 34428 }, { "epoch": 1.612826158242376, "grad_norm": 0.6437826569692822, "learning_rate": 4.7549738265693644e-07, "loss": 0.2761, "step": 34429 }, { "epoch": 1.6128730032323042, "grad_norm": 0.6302236169618565, "learning_rate": 4.7538611987105827e-07, "loss": 0.2728, "step": 34430 }, { "epoch": 1.6129198482222327, "grad_norm": 0.6333889965356148, "learning_rate": 4.752748687363243e-07, "loss": 0.2822, "step": 34431 }, { "epoch": 1.612966693212161, "grad_norm": 0.527381984893805, "learning_rate": 4.7516362925337483e-07, "loss": 0.2482, "step": 34432 }, { "epoch": 1.6130135382020891, "grad_norm": 0.5622623507871521, "learning_rate": 4.750524014228494e-07, "loss": 0.2602, "step": 34433 }, { "epoch": 1.6130603831920176, "grad_norm": 0.6078709111432542, "learning_rate": 4.749411852453892e-07, "loss": 0.2749, "step": 34434 }, { "epoch": 1.613107228181946, "grad_norm": 0.6138590689451918, "learning_rate": 4.748299807216328e-07, "loss": 0.2651, "step": 34435 }, { "epoch": 1.6131540731718743, "grad_norm": 0.5763112217301519, "learning_rate": 4.747187878522208e-07, "loss": 0.2561, "step": 34436 }, { "epoch": 1.6132009181618026, "grad_norm": 0.5756676141752978, "learning_rate": 4.7460760663779303e-07, "loss": 0.2937, "step": 34437 }, { "epoch": 1.613247763151731, "grad_norm": 0.6158408503361087, "learning_rate": 4.744964370789895e-07, "loss": 0.2823, "step": 34438 }, { "epoch": 1.6132946081416593, "grad_norm": 0.5997279057122579, "learning_rate": 4.7438527917645054e-07, "loss": 0.2749, "step": 34439 }, { "epoch": 1.6133414531315875, "grad_norm": 0.6360508966222271, "learning_rate": 4.7427413293081454e-07, "loss": 0.289, "step": 34440 }, { "epoch": 1.613388298121516, "grad_norm": 0.5939838392547914, "learning_rate": 4.741629983427223e-07, "loss": 0.262, "step": 34441 }, { "epoch": 1.6134351431114442, "grad_norm": 0.5625749263083275, "learning_rate": 4.740518754128123e-07, "loss": 0.2686, "step": 34442 }, { "epoch": 1.6134819881013724, "grad_norm": 0.598464623610624, "learning_rate": 4.739407641417246e-07, "loss": 0.2851, "step": 34443 }, { "epoch": 1.613528833091301, "grad_norm": 0.6047288666993756, "learning_rate": 4.738296645300985e-07, "loss": 0.2604, "step": 34444 }, { "epoch": 1.6135756780812294, "grad_norm": 0.6121169001809319, "learning_rate": 4.7371857657857343e-07, "loss": 0.2687, "step": 34445 }, { "epoch": 1.6136225230711574, "grad_norm": 0.622774788728896, "learning_rate": 4.7360750028778934e-07, "loss": 0.2693, "step": 34446 }, { "epoch": 1.6136693680610859, "grad_norm": 0.6170445248583495, "learning_rate": 4.734964356583846e-07, "loss": 0.2721, "step": 34447 }, { "epoch": 1.6137162130510143, "grad_norm": 0.6015502629304038, "learning_rate": 4.7338538269099816e-07, "loss": 0.2603, "step": 34448 }, { "epoch": 1.6137630580409426, "grad_norm": 0.6200694808843086, "learning_rate": 4.7327434138626923e-07, "loss": 0.2682, "step": 34449 }, { "epoch": 1.6138099030308708, "grad_norm": 0.5419296601066351, "learning_rate": 4.731633117448373e-07, "loss": 0.2575, "step": 34450 }, { "epoch": 1.6138567480207993, "grad_norm": 0.6138509003176558, "learning_rate": 4.7305229376734067e-07, "loss": 0.2685, "step": 34451 }, { "epoch": 1.6139035930107275, "grad_norm": 0.594870723606923, "learning_rate": 4.729412874544198e-07, "loss": 0.2733, "step": 34452 }, { "epoch": 1.6139504380006557, "grad_norm": 0.6040860216893288, "learning_rate": 4.7283029280671135e-07, "loss": 0.2644, "step": 34453 }, { "epoch": 1.6139972829905842, "grad_norm": 0.5744929180550468, "learning_rate": 4.7271930982485587e-07, "loss": 0.2631, "step": 34454 }, { "epoch": 1.6140441279805124, "grad_norm": 0.593897802519966, "learning_rate": 4.7260833850949037e-07, "loss": 0.2766, "step": 34455 }, { "epoch": 1.6140909729704407, "grad_norm": 0.5872732710127732, "learning_rate": 4.724973788612547e-07, "loss": 0.2688, "step": 34456 }, { "epoch": 1.6141378179603691, "grad_norm": 0.5797748064522222, "learning_rate": 4.7238643088078655e-07, "loss": 0.2747, "step": 34457 }, { "epoch": 1.6141846629502976, "grad_norm": 0.5582156427389982, "learning_rate": 4.7227549456872524e-07, "loss": 0.2701, "step": 34458 }, { "epoch": 1.6142315079402256, "grad_norm": 0.6056902949636754, "learning_rate": 4.7216456992570936e-07, "loss": 0.2869, "step": 34459 }, { "epoch": 1.614278352930154, "grad_norm": 0.5867935668223738, "learning_rate": 4.720536569523759e-07, "loss": 0.2695, "step": 34460 }, { "epoch": 1.6143251979200826, "grad_norm": 0.5845146725259667, "learning_rate": 4.719427556493647e-07, "loss": 0.2705, "step": 34461 }, { "epoch": 1.6143720429100108, "grad_norm": 0.5895826315739449, "learning_rate": 4.7183186601731267e-07, "loss": 0.2575, "step": 34462 }, { "epoch": 1.614418887899939, "grad_norm": 0.6577777679742207, "learning_rate": 4.7172098805685823e-07, "loss": 0.2966, "step": 34463 }, { "epoch": 1.6144657328898675, "grad_norm": 0.5961321512851183, "learning_rate": 4.7161012176864004e-07, "loss": 0.2634, "step": 34464 }, { "epoch": 1.6145125778797957, "grad_norm": 0.5957685696078245, "learning_rate": 4.714992671532961e-07, "loss": 0.274, "step": 34465 }, { "epoch": 1.614559422869724, "grad_norm": 0.6276975551744315, "learning_rate": 4.7138842421146356e-07, "loss": 0.2782, "step": 34466 }, { "epoch": 1.6146062678596524, "grad_norm": 0.5880668420659879, "learning_rate": 4.712775929437813e-07, "loss": 0.2696, "step": 34467 }, { "epoch": 1.6146531128495807, "grad_norm": 0.6491557284572473, "learning_rate": 4.7116677335088625e-07, "loss": 0.2847, "step": 34468 }, { "epoch": 1.614699957839509, "grad_norm": 0.5889052840046833, "learning_rate": 4.7105596543341624e-07, "loss": 0.2495, "step": 34469 }, { "epoch": 1.6147468028294374, "grad_norm": 0.6012213144502088, "learning_rate": 4.7094516919200915e-07, "loss": 0.2581, "step": 34470 }, { "epoch": 1.6147936478193659, "grad_norm": 0.5813583253909342, "learning_rate": 4.7083438462730335e-07, "loss": 0.2652, "step": 34471 }, { "epoch": 1.614840492809294, "grad_norm": 0.5963594177264487, "learning_rate": 4.70723611739935e-07, "loss": 0.2819, "step": 34472 }, { "epoch": 1.6148873377992223, "grad_norm": 0.607962833507493, "learning_rate": 4.706128505305424e-07, "loss": 0.2731, "step": 34473 }, { "epoch": 1.6149341827891508, "grad_norm": 0.5887079029796812, "learning_rate": 4.705021009997632e-07, "loss": 0.2683, "step": 34474 }, { "epoch": 1.614981027779079, "grad_norm": 0.5882083303928846, "learning_rate": 4.7039136314823377e-07, "loss": 0.2715, "step": 34475 }, { "epoch": 1.6150278727690073, "grad_norm": 0.5972056539980328, "learning_rate": 4.702806369765919e-07, "loss": 0.2734, "step": 34476 }, { "epoch": 1.6150747177589357, "grad_norm": 0.5426953712418779, "learning_rate": 4.7016992248547526e-07, "loss": 0.254, "step": 34477 }, { "epoch": 1.615121562748864, "grad_norm": 0.5978186998041811, "learning_rate": 4.700592196755202e-07, "loss": 0.2932, "step": 34478 }, { "epoch": 1.6151684077387922, "grad_norm": 0.6796553234497156, "learning_rate": 4.69948528547364e-07, "loss": 0.2765, "step": 34479 }, { "epoch": 1.6152152527287207, "grad_norm": 0.5696265219439093, "learning_rate": 4.6983784910164365e-07, "loss": 0.2672, "step": 34480 }, { "epoch": 1.6152620977186491, "grad_norm": 0.5819477211918869, "learning_rate": 4.69727181338997e-07, "loss": 0.2555, "step": 34481 }, { "epoch": 1.6153089427085772, "grad_norm": 0.6226021514573699, "learning_rate": 4.696165252600596e-07, "loss": 0.2968, "step": 34482 }, { "epoch": 1.6153557876985056, "grad_norm": 0.5941817114341006, "learning_rate": 4.6950588086546926e-07, "loss": 0.2529, "step": 34483 }, { "epoch": 1.615402632688434, "grad_norm": 0.5713502509525592, "learning_rate": 4.6939524815586166e-07, "loss": 0.2672, "step": 34484 }, { "epoch": 1.6154494776783623, "grad_norm": 0.6248798848555746, "learning_rate": 4.6928462713187406e-07, "loss": 0.2762, "step": 34485 }, { "epoch": 1.6154963226682906, "grad_norm": 0.6027638177732092, "learning_rate": 4.6917401779414286e-07, "loss": 0.2707, "step": 34486 }, { "epoch": 1.615543167658219, "grad_norm": 0.614291360210959, "learning_rate": 4.69063420143305e-07, "loss": 0.2812, "step": 34487 }, { "epoch": 1.6155900126481473, "grad_norm": 0.5805915615474317, "learning_rate": 4.68952834179997e-07, "loss": 0.2682, "step": 34488 }, { "epoch": 1.6156368576380755, "grad_norm": 0.6184670038595473, "learning_rate": 4.6884225990485533e-07, "loss": 0.2783, "step": 34489 }, { "epoch": 1.615683702628004, "grad_norm": 0.6335041018253571, "learning_rate": 4.687316973185152e-07, "loss": 0.2661, "step": 34490 }, { "epoch": 1.6157305476179322, "grad_norm": 0.5031925983746585, "learning_rate": 4.686211464216134e-07, "loss": 0.2359, "step": 34491 }, { "epoch": 1.6157773926078605, "grad_norm": 0.5874989549386631, "learning_rate": 4.685106072147866e-07, "loss": 0.277, "step": 34492 }, { "epoch": 1.615824237597789, "grad_norm": 0.5369986344372181, "learning_rate": 4.684000796986704e-07, "loss": 0.2523, "step": 34493 }, { "epoch": 1.6158710825877174, "grad_norm": 0.6051383958495734, "learning_rate": 4.682895638739013e-07, "loss": 0.2767, "step": 34494 }, { "epoch": 1.6159179275776454, "grad_norm": 0.6005222394728086, "learning_rate": 4.6817905974111595e-07, "loss": 0.2648, "step": 34495 }, { "epoch": 1.6159647725675739, "grad_norm": 0.587657758684018, "learning_rate": 4.6806856730094914e-07, "loss": 0.2604, "step": 34496 }, { "epoch": 1.6160116175575023, "grad_norm": 0.6173838684664725, "learning_rate": 4.679580865540365e-07, "loss": 0.2696, "step": 34497 }, { "epoch": 1.6160584625474306, "grad_norm": 0.5984000625684084, "learning_rate": 4.678476175010141e-07, "loss": 0.2897, "step": 34498 }, { "epoch": 1.6161053075373588, "grad_norm": 0.5506142513046561, "learning_rate": 4.6773716014251816e-07, "loss": 0.2641, "step": 34499 }, { "epoch": 1.6161521525272873, "grad_norm": 0.5942868184195893, "learning_rate": 4.6762671447918407e-07, "loss": 0.272, "step": 34500 }, { "epoch": 1.6161989975172155, "grad_norm": 0.6029085066062772, "learning_rate": 4.6751628051164787e-07, "loss": 0.2695, "step": 34501 }, { "epoch": 1.6162458425071438, "grad_norm": 0.5925231444910228, "learning_rate": 4.674058582405441e-07, "loss": 0.261, "step": 34502 }, { "epoch": 1.6162926874970722, "grad_norm": 0.5857776371745873, "learning_rate": 4.6729544766650927e-07, "loss": 0.2711, "step": 34503 }, { "epoch": 1.6163395324870005, "grad_norm": 0.5914775705661752, "learning_rate": 4.6718504879017774e-07, "loss": 0.269, "step": 34504 }, { "epoch": 1.6163863774769287, "grad_norm": 0.530729142677947, "learning_rate": 4.6707466161218547e-07, "loss": 0.2452, "step": 34505 }, { "epoch": 1.6164332224668572, "grad_norm": 0.5875531117866681, "learning_rate": 4.669642861331672e-07, "loss": 0.2645, "step": 34506 }, { "epoch": 1.6164800674567856, "grad_norm": 0.5956633095550414, "learning_rate": 4.668539223537588e-07, "loss": 0.2654, "step": 34507 }, { "epoch": 1.6165269124467139, "grad_norm": 0.6062700305910883, "learning_rate": 4.6674357027459537e-07, "loss": 0.2577, "step": 34508 }, { "epoch": 1.6165737574366421, "grad_norm": 0.6178185892755513, "learning_rate": 4.6663322989631195e-07, "loss": 0.2709, "step": 34509 }, { "epoch": 1.6166206024265706, "grad_norm": 0.6624275631037959, "learning_rate": 4.6652290121954243e-07, "loss": 0.2962, "step": 34510 }, { "epoch": 1.6166674474164988, "grad_norm": 0.5639316050279194, "learning_rate": 4.664125842449227e-07, "loss": 0.2762, "step": 34511 }, { "epoch": 1.616714292406427, "grad_norm": 0.6118966329123361, "learning_rate": 4.6630227897308727e-07, "loss": 0.2752, "step": 34512 }, { "epoch": 1.6167611373963555, "grad_norm": 0.5711688952021503, "learning_rate": 4.661919854046712e-07, "loss": 0.2702, "step": 34513 }, { "epoch": 1.6168079823862838, "grad_norm": 0.5617226692513716, "learning_rate": 4.660817035403095e-07, "loss": 0.2626, "step": 34514 }, { "epoch": 1.616854827376212, "grad_norm": 0.5658054066246415, "learning_rate": 4.659714333806359e-07, "loss": 0.2603, "step": 34515 }, { "epoch": 1.6169016723661405, "grad_norm": 0.6336583205875365, "learning_rate": 4.658611749262862e-07, "loss": 0.2949, "step": 34516 }, { "epoch": 1.616948517356069, "grad_norm": 0.7662724462852789, "learning_rate": 4.6575092817789325e-07, "loss": 0.26, "step": 34517 }, { "epoch": 1.616995362345997, "grad_norm": 0.5746129154522507, "learning_rate": 4.656406931360927e-07, "loss": 0.263, "step": 34518 }, { "epoch": 1.6170422073359254, "grad_norm": 0.5609151401032313, "learning_rate": 4.655304698015184e-07, "loss": 0.2637, "step": 34519 }, { "epoch": 1.6170890523258539, "grad_norm": 0.6427614393387642, "learning_rate": 4.654202581748057e-07, "loss": 0.2883, "step": 34520 }, { "epoch": 1.6171358973157821, "grad_norm": 0.611637764922181, "learning_rate": 4.6531005825658727e-07, "loss": 0.2742, "step": 34521 }, { "epoch": 1.6171827423057104, "grad_norm": 0.615171946468297, "learning_rate": 4.651998700474983e-07, "loss": 0.2724, "step": 34522 }, { "epoch": 1.6172295872956388, "grad_norm": 0.5733245200312255, "learning_rate": 4.6508969354817303e-07, "loss": 0.2628, "step": 34523 }, { "epoch": 1.617276432285567, "grad_norm": 0.5922188198539394, "learning_rate": 4.6497952875924455e-07, "loss": 0.2582, "step": 34524 }, { "epoch": 1.6173232772754953, "grad_norm": 0.570140194310772, "learning_rate": 4.648693756813477e-07, "loss": 0.2646, "step": 34525 }, { "epoch": 1.6173701222654238, "grad_norm": 0.6018500812815667, "learning_rate": 4.647592343151164e-07, "loss": 0.281, "step": 34526 }, { "epoch": 1.617416967255352, "grad_norm": 0.5835777310133139, "learning_rate": 4.646491046611834e-07, "loss": 0.2535, "step": 34527 }, { "epoch": 1.6174638122452802, "grad_norm": 0.6169568316243962, "learning_rate": 4.6453898672018355e-07, "loss": 0.2638, "step": 34528 }, { "epoch": 1.6175106572352087, "grad_norm": 0.5891465651646554, "learning_rate": 4.644288804927502e-07, "loss": 0.269, "step": 34529 }, { "epoch": 1.6175575022251372, "grad_norm": 0.596612767317248, "learning_rate": 4.643187859795176e-07, "loss": 0.2732, "step": 34530 }, { "epoch": 1.6176043472150652, "grad_norm": 0.600832387700545, "learning_rate": 4.6420870318111794e-07, "loss": 0.2737, "step": 34531 }, { "epoch": 1.6176511922049936, "grad_norm": 0.6126626476012973, "learning_rate": 4.6409863209818636e-07, "loss": 0.2881, "step": 34532 }, { "epoch": 1.6176980371949221, "grad_norm": 0.6314983571195041, "learning_rate": 4.639885727313548e-07, "loss": 0.2799, "step": 34533 }, { "epoch": 1.6177448821848504, "grad_norm": 0.5626133715331976, "learning_rate": 4.638785250812572e-07, "loss": 0.251, "step": 34534 }, { "epoch": 1.6177917271747786, "grad_norm": 0.6069512189150668, "learning_rate": 4.6376848914852696e-07, "loss": 0.2793, "step": 34535 }, { "epoch": 1.617838572164707, "grad_norm": 0.5919605491208455, "learning_rate": 4.636584649337972e-07, "loss": 0.2794, "step": 34536 }, { "epoch": 1.6178854171546353, "grad_norm": 0.5916343857543302, "learning_rate": 4.635484524377018e-07, "loss": 0.2751, "step": 34537 }, { "epoch": 1.6179322621445635, "grad_norm": 0.6064538034821297, "learning_rate": 4.634384516608734e-07, "loss": 0.2831, "step": 34538 }, { "epoch": 1.617979107134492, "grad_norm": 0.6120363459337513, "learning_rate": 4.6332846260394385e-07, "loss": 0.2697, "step": 34539 }, { "epoch": 1.6180259521244202, "grad_norm": 0.6041988331330296, "learning_rate": 4.6321848526754753e-07, "loss": 0.2776, "step": 34540 }, { "epoch": 1.6180727971143485, "grad_norm": 0.597832099842173, "learning_rate": 4.6310851965231664e-07, "loss": 0.2667, "step": 34541 }, { "epoch": 1.618119642104277, "grad_norm": 0.594295888354646, "learning_rate": 4.629985657588842e-07, "loss": 0.2799, "step": 34542 }, { "epoch": 1.6181664870942054, "grad_norm": 0.5817733260159104, "learning_rate": 4.6288862358788324e-07, "loss": 0.2618, "step": 34543 }, { "epoch": 1.6182133320841336, "grad_norm": 0.6012256738057332, "learning_rate": 4.6277869313994667e-07, "loss": 0.278, "step": 34544 }, { "epoch": 1.618260177074062, "grad_norm": 0.5659459872589272, "learning_rate": 4.6266877441570683e-07, "loss": 0.2686, "step": 34545 }, { "epoch": 1.6183070220639904, "grad_norm": 0.5787822873619274, "learning_rate": 4.6255886741579545e-07, "loss": 0.2688, "step": 34546 }, { "epoch": 1.6183538670539186, "grad_norm": 0.5701706846105891, "learning_rate": 4.6244897214084595e-07, "loss": 0.2644, "step": 34547 }, { "epoch": 1.6184007120438468, "grad_norm": 0.6114180509356725, "learning_rate": 4.623390885914902e-07, "loss": 0.2638, "step": 34548 }, { "epoch": 1.6184475570337753, "grad_norm": 0.6262377938111289, "learning_rate": 4.622292167683609e-07, "loss": 0.2721, "step": 34549 }, { "epoch": 1.6184944020237035, "grad_norm": 0.5933619757641231, "learning_rate": 4.621193566720908e-07, "loss": 0.2647, "step": 34550 }, { "epoch": 1.6185412470136318, "grad_norm": 0.5996442109553473, "learning_rate": 4.62009508303311e-07, "loss": 0.2795, "step": 34551 }, { "epoch": 1.6185880920035602, "grad_norm": 0.6164529653633014, "learning_rate": 4.6189967166265505e-07, "loss": 0.2837, "step": 34552 }, { "epoch": 1.6186349369934887, "grad_norm": 0.6094790430148684, "learning_rate": 4.6178984675075366e-07, "loss": 0.2646, "step": 34553 }, { "epoch": 1.6186817819834167, "grad_norm": 0.6289916008750398, "learning_rate": 4.616800335682392e-07, "loss": 0.269, "step": 34554 }, { "epoch": 1.6187286269733452, "grad_norm": 0.5911738894448605, "learning_rate": 4.6157023211574395e-07, "loss": 0.2671, "step": 34555 }, { "epoch": 1.6187754719632736, "grad_norm": 0.6034866725170913, "learning_rate": 4.6146044239390036e-07, "loss": 0.2766, "step": 34556 }, { "epoch": 1.618822316953202, "grad_norm": 0.6862262020235931, "learning_rate": 4.613506644033389e-07, "loss": 0.2803, "step": 34557 }, { "epoch": 1.6188691619431301, "grad_norm": 0.5767529572470119, "learning_rate": 4.6124089814469254e-07, "loss": 0.2747, "step": 34558 }, { "epoch": 1.6189160069330586, "grad_norm": 0.5639105586433022, "learning_rate": 4.6113114361859167e-07, "loss": 0.2622, "step": 34559 }, { "epoch": 1.6189628519229868, "grad_norm": 0.5803003445165739, "learning_rate": 4.610214008256686e-07, "loss": 0.2524, "step": 34560 }, { "epoch": 1.619009696912915, "grad_norm": 0.6030184712807942, "learning_rate": 4.60911669766555e-07, "loss": 0.2699, "step": 34561 }, { "epoch": 1.6190565419028435, "grad_norm": 0.5941044525676031, "learning_rate": 4.608019504418829e-07, "loss": 0.2602, "step": 34562 }, { "epoch": 1.6191033868927718, "grad_norm": 0.5562087628505925, "learning_rate": 4.6069224285228223e-07, "loss": 0.2613, "step": 34563 }, { "epoch": 1.6191502318827, "grad_norm": 0.5875238447723523, "learning_rate": 4.6058254699838544e-07, "loss": 0.271, "step": 34564 }, { "epoch": 1.6191970768726285, "grad_norm": 0.6214552368343804, "learning_rate": 4.604728628808239e-07, "loss": 0.2746, "step": 34565 }, { "epoch": 1.619243921862557, "grad_norm": 0.6315425709056368, "learning_rate": 4.6036319050022786e-07, "loss": 0.2871, "step": 34566 }, { "epoch": 1.619290766852485, "grad_norm": 0.5505791174150222, "learning_rate": 4.6025352985722916e-07, "loss": 0.2644, "step": 34567 }, { "epoch": 1.6193376118424134, "grad_norm": 0.5924083268337709, "learning_rate": 4.6014388095245915e-07, "loss": 0.275, "step": 34568 }, { "epoch": 1.619384456832342, "grad_norm": 0.5665782918685268, "learning_rate": 4.600342437865479e-07, "loss": 0.2681, "step": 34569 }, { "epoch": 1.6194313018222701, "grad_norm": 0.6141800194702224, "learning_rate": 4.599246183601266e-07, "loss": 0.2657, "step": 34570 }, { "epoch": 1.6194781468121984, "grad_norm": 0.5609131191072771, "learning_rate": 4.598150046738267e-07, "loss": 0.2672, "step": 34571 }, { "epoch": 1.6195249918021268, "grad_norm": 0.5872233948572406, "learning_rate": 4.5970540272827906e-07, "loss": 0.2645, "step": 34572 }, { "epoch": 1.619571836792055, "grad_norm": 0.6263469553080361, "learning_rate": 4.5959581252411346e-07, "loss": 0.2568, "step": 34573 }, { "epoch": 1.6196186817819833, "grad_norm": 0.6429339854446198, "learning_rate": 4.594862340619619e-07, "loss": 0.2849, "step": 34574 }, { "epoch": 1.6196655267719118, "grad_norm": 0.6190693405072859, "learning_rate": 4.593766673424535e-07, "loss": 0.2813, "step": 34575 }, { "epoch": 1.61971237176184, "grad_norm": 0.5723576597648429, "learning_rate": 4.592671123662193e-07, "loss": 0.2695, "step": 34576 }, { "epoch": 1.6197592167517683, "grad_norm": 0.5875324666264999, "learning_rate": 4.591575691338901e-07, "loss": 0.2746, "step": 34577 }, { "epoch": 1.6198060617416967, "grad_norm": 0.5885161765252924, "learning_rate": 4.5904803764609624e-07, "loss": 0.2716, "step": 34578 }, { "epoch": 1.6198529067316252, "grad_norm": 0.5908426677884523, "learning_rate": 4.5893851790346834e-07, "loss": 0.264, "step": 34579 }, { "epoch": 1.6198997517215534, "grad_norm": 0.6099188516405729, "learning_rate": 4.588290099066359e-07, "loss": 0.2772, "step": 34580 }, { "epoch": 1.6199465967114817, "grad_norm": 0.591917982270814, "learning_rate": 4.5871951365623004e-07, "loss": 0.2956, "step": 34581 }, { "epoch": 1.6199934417014101, "grad_norm": 0.6386762100776603, "learning_rate": 4.5861002915287944e-07, "loss": 0.2862, "step": 34582 }, { "epoch": 1.6200402866913384, "grad_norm": 0.5582243167067755, "learning_rate": 4.585005563972153e-07, "loss": 0.2675, "step": 34583 }, { "epoch": 1.6200871316812666, "grad_norm": 0.5956364510222444, "learning_rate": 4.5839109538986737e-07, "loss": 0.2755, "step": 34584 }, { "epoch": 1.620133976671195, "grad_norm": 0.6043302843705961, "learning_rate": 4.5828164613146543e-07, "loss": 0.2772, "step": 34585 }, { "epoch": 1.6201808216611233, "grad_norm": 0.6209009556340603, "learning_rate": 4.5817220862264004e-07, "loss": 0.2678, "step": 34586 }, { "epoch": 1.6202276666510516, "grad_norm": 0.6521992300566185, "learning_rate": 4.580627828640205e-07, "loss": 0.2862, "step": 34587 }, { "epoch": 1.62027451164098, "grad_norm": 0.6071160040883345, "learning_rate": 4.579533688562357e-07, "loss": 0.2596, "step": 34588 }, { "epoch": 1.6203213566309085, "grad_norm": 0.5694354221785598, "learning_rate": 4.5784396659991597e-07, "loss": 0.2731, "step": 34589 }, { "epoch": 1.6203682016208365, "grad_norm": 0.6250499663311585, "learning_rate": 4.577345760956911e-07, "loss": 0.2622, "step": 34590 }, { "epoch": 1.620415046610765, "grad_norm": 0.5901296876819627, "learning_rate": 4.576251973441903e-07, "loss": 0.2713, "step": 34591 }, { "epoch": 1.6204618916006934, "grad_norm": 0.6205123336883875, "learning_rate": 4.575158303460439e-07, "loss": 0.2816, "step": 34592 }, { "epoch": 1.6205087365906217, "grad_norm": 0.6716610119654357, "learning_rate": 4.5740647510187974e-07, "loss": 0.2876, "step": 34593 }, { "epoch": 1.62055558158055, "grad_norm": 0.5621808660395816, "learning_rate": 4.572971316123287e-07, "loss": 0.2603, "step": 34594 }, { "epoch": 1.6206024265704784, "grad_norm": 0.5578200371240738, "learning_rate": 4.5718779987801857e-07, "loss": 0.2521, "step": 34595 }, { "epoch": 1.6206492715604066, "grad_norm": 0.5969765247346915, "learning_rate": 4.570784798995795e-07, "loss": 0.266, "step": 34596 }, { "epoch": 1.6206961165503349, "grad_norm": 0.6270872221637523, "learning_rate": 4.5696917167764e-07, "loss": 0.2829, "step": 34597 }, { "epoch": 1.6207429615402633, "grad_norm": 0.6147949778457396, "learning_rate": 4.5685987521282947e-07, "loss": 0.2899, "step": 34598 }, { "epoch": 1.6207898065301916, "grad_norm": 0.5386628506762768, "learning_rate": 4.5675059050577733e-07, "loss": 0.2522, "step": 34599 }, { "epoch": 1.6208366515201198, "grad_norm": 0.5773040555605224, "learning_rate": 4.566413175571116e-07, "loss": 0.2667, "step": 34600 }, { "epoch": 1.6208834965100483, "grad_norm": 0.5705514692258925, "learning_rate": 4.565320563674619e-07, "loss": 0.2589, "step": 34601 }, { "epoch": 1.6209303414999767, "grad_norm": 0.6197327668534336, "learning_rate": 4.564228069374563e-07, "loss": 0.2757, "step": 34602 }, { "epoch": 1.6209771864899047, "grad_norm": 0.6068858001585725, "learning_rate": 4.563135692677237e-07, "loss": 0.2729, "step": 34603 }, { "epoch": 1.6210240314798332, "grad_norm": 0.6182361169186751, "learning_rate": 4.562043433588925e-07, "loss": 0.2844, "step": 34604 }, { "epoch": 1.6210708764697617, "grad_norm": 0.6236319734104022, "learning_rate": 4.5609512921159256e-07, "loss": 0.27, "step": 34605 }, { "epoch": 1.62111772145969, "grad_norm": 0.6098640096437546, "learning_rate": 4.559859268264508e-07, "loss": 0.282, "step": 34606 }, { "epoch": 1.6211645664496181, "grad_norm": 0.620039309867317, "learning_rate": 4.558767362040969e-07, "loss": 0.2678, "step": 34607 }, { "epoch": 1.6212114114395466, "grad_norm": 0.5642448505071943, "learning_rate": 4.557675573451581e-07, "loss": 0.2658, "step": 34608 }, { "epoch": 1.6212582564294749, "grad_norm": 0.6339173579805805, "learning_rate": 4.5565839025026285e-07, "loss": 0.2953, "step": 34609 }, { "epoch": 1.621305101419403, "grad_norm": 0.5749688681270468, "learning_rate": 4.555492349200402e-07, "loss": 0.273, "step": 34610 }, { "epoch": 1.6213519464093316, "grad_norm": 0.6248045254338701, "learning_rate": 4.5544009135511827e-07, "loss": 0.2708, "step": 34611 }, { "epoch": 1.6213987913992598, "grad_norm": 0.5897231263324401, "learning_rate": 4.5533095955612395e-07, "loss": 0.2601, "step": 34612 }, { "epoch": 1.621445636389188, "grad_norm": 0.6168454516489231, "learning_rate": 4.5522183952368624e-07, "loss": 0.289, "step": 34613 }, { "epoch": 1.6214924813791165, "grad_norm": 0.5596694359599529, "learning_rate": 4.551127312584333e-07, "loss": 0.2634, "step": 34614 }, { "epoch": 1.621539326369045, "grad_norm": 0.6223689722941642, "learning_rate": 4.5500363476099233e-07, "loss": 0.2709, "step": 34615 }, { "epoch": 1.6215861713589732, "grad_norm": 0.5991580656375238, "learning_rate": 4.548945500319912e-07, "loss": 0.262, "step": 34616 }, { "epoch": 1.6216330163489014, "grad_norm": 0.6021851936121868, "learning_rate": 4.547854770720586e-07, "loss": 0.2935, "step": 34617 }, { "epoch": 1.62167986133883, "grad_norm": 0.5902279955704882, "learning_rate": 4.546764158818209e-07, "loss": 0.2616, "step": 34618 }, { "epoch": 1.6217267063287581, "grad_norm": 0.6145023451067276, "learning_rate": 4.545673664619066e-07, "loss": 0.2661, "step": 34619 }, { "epoch": 1.6217735513186864, "grad_norm": 0.593187832487185, "learning_rate": 4.544583288129428e-07, "loss": 0.2747, "step": 34620 }, { "epoch": 1.6218203963086149, "grad_norm": 0.605460387175314, "learning_rate": 4.543493029355578e-07, "loss": 0.2773, "step": 34621 }, { "epoch": 1.621867241298543, "grad_norm": 0.5933468826032572, "learning_rate": 4.5424028883037764e-07, "loss": 0.2607, "step": 34622 }, { "epoch": 1.6219140862884713, "grad_norm": 0.6279076755445965, "learning_rate": 4.541312864980313e-07, "loss": 0.2623, "step": 34623 }, { "epoch": 1.6219609312783998, "grad_norm": 0.6132639960066398, "learning_rate": 4.5402229593914466e-07, "loss": 0.2797, "step": 34624 }, { "epoch": 1.6220077762683283, "grad_norm": 0.6360040173095842, "learning_rate": 4.5391331715434527e-07, "loss": 0.2796, "step": 34625 }, { "epoch": 1.6220546212582563, "grad_norm": 0.589398713366526, "learning_rate": 4.5380435014426046e-07, "loss": 0.2788, "step": 34626 }, { "epoch": 1.6221014662481847, "grad_norm": 0.6204477109165786, "learning_rate": 4.536953949095174e-07, "loss": 0.2848, "step": 34627 }, { "epoch": 1.6221483112381132, "grad_norm": 0.6073735208351646, "learning_rate": 4.5358645145074347e-07, "loss": 0.2554, "step": 34628 }, { "epoch": 1.6221951562280414, "grad_norm": 0.5817635557356673, "learning_rate": 4.5347751976856536e-07, "loss": 0.2711, "step": 34629 }, { "epoch": 1.6222420012179697, "grad_norm": 0.5905265877414051, "learning_rate": 4.533685998636092e-07, "loss": 0.2702, "step": 34630 }, { "epoch": 1.6222888462078981, "grad_norm": 0.607253102831029, "learning_rate": 4.5325969173650204e-07, "loss": 0.2632, "step": 34631 }, { "epoch": 1.6223356911978264, "grad_norm": 0.5809603937747668, "learning_rate": 4.531507953878711e-07, "loss": 0.2864, "step": 34632 }, { "epoch": 1.6223825361877546, "grad_norm": 0.5786024118137554, "learning_rate": 4.5304191081834286e-07, "loss": 0.2591, "step": 34633 }, { "epoch": 1.622429381177683, "grad_norm": 0.5586837820813811, "learning_rate": 4.5293303802854403e-07, "loss": 0.2467, "step": 34634 }, { "epoch": 1.6224762261676113, "grad_norm": 0.5744905093318332, "learning_rate": 4.5282417701910166e-07, "loss": 0.2783, "step": 34635 }, { "epoch": 1.6225230711575396, "grad_norm": 0.6027485902687633, "learning_rate": 4.5271532779064155e-07, "loss": 0.2807, "step": 34636 }, { "epoch": 1.622569916147468, "grad_norm": 0.5955752502265853, "learning_rate": 4.526064903437896e-07, "loss": 0.2738, "step": 34637 }, { "epoch": 1.6226167611373965, "grad_norm": 0.6218675749897515, "learning_rate": 4.524976646791726e-07, "loss": 0.2791, "step": 34638 }, { "epoch": 1.6226636061273245, "grad_norm": 0.6035761680225861, "learning_rate": 4.5238885079741693e-07, "loss": 0.2507, "step": 34639 }, { "epoch": 1.622710451117253, "grad_norm": 0.5471886062790833, "learning_rate": 4.522800486991488e-07, "loss": 0.246, "step": 34640 }, { "epoch": 1.6227572961071814, "grad_norm": 0.6042215868413932, "learning_rate": 4.521712583849952e-07, "loss": 0.2824, "step": 34641 }, { "epoch": 1.6228041410971097, "grad_norm": 0.5622385489147363, "learning_rate": 4.5206247985558034e-07, "loss": 0.2639, "step": 34642 }, { "epoch": 1.622850986087038, "grad_norm": 0.5974745945952427, "learning_rate": 4.5195371311153205e-07, "loss": 0.2738, "step": 34643 }, { "epoch": 1.6228978310769664, "grad_norm": 0.5703263543172449, "learning_rate": 4.518449581534745e-07, "loss": 0.2565, "step": 34644 }, { "epoch": 1.6229446760668946, "grad_norm": 0.6235931245822385, "learning_rate": 4.517362149820345e-07, "loss": 0.2899, "step": 34645 }, { "epoch": 1.6229915210568229, "grad_norm": 0.6116588436975194, "learning_rate": 4.516274835978379e-07, "loss": 0.2863, "step": 34646 }, { "epoch": 1.6230383660467513, "grad_norm": 0.6400223908223549, "learning_rate": 4.5151876400151034e-07, "loss": 0.2937, "step": 34647 }, { "epoch": 1.6230852110366796, "grad_norm": 0.6218638353003056, "learning_rate": 4.5141005619367766e-07, "loss": 0.2825, "step": 34648 }, { "epoch": 1.6231320560266078, "grad_norm": 0.5973978010893093, "learning_rate": 4.513013601749655e-07, "loss": 0.2759, "step": 34649 }, { "epoch": 1.6231789010165363, "grad_norm": 0.5995096461076097, "learning_rate": 4.5119267594599866e-07, "loss": 0.2738, "step": 34650 }, { "epoch": 1.6232257460064647, "grad_norm": 0.5602437533218928, "learning_rate": 4.5108400350740265e-07, "loss": 0.2717, "step": 34651 }, { "epoch": 1.623272590996393, "grad_norm": 0.5743702719493416, "learning_rate": 4.509753428598035e-07, "loss": 0.255, "step": 34652 }, { "epoch": 1.6233194359863212, "grad_norm": 0.6148359848420946, "learning_rate": 4.5086669400382614e-07, "loss": 0.266, "step": 34653 }, { "epoch": 1.6233662809762497, "grad_norm": 0.5544021962144603, "learning_rate": 4.5075805694009655e-07, "loss": 0.2393, "step": 34654 }, { "epoch": 1.623413125966178, "grad_norm": 0.5669851218524112, "learning_rate": 4.5064943166923864e-07, "loss": 0.255, "step": 34655 }, { "epoch": 1.6234599709561062, "grad_norm": 0.5808473679938964, "learning_rate": 4.5054081819187886e-07, "loss": 0.2584, "step": 34656 }, { "epoch": 1.6235068159460346, "grad_norm": 0.6178006030375063, "learning_rate": 4.5043221650864083e-07, "loss": 0.2603, "step": 34657 }, { "epoch": 1.6235536609359629, "grad_norm": 0.5663356135380355, "learning_rate": 4.5032362662015025e-07, "loss": 0.2517, "step": 34658 }, { "epoch": 1.6236005059258911, "grad_norm": 0.5811176765453742, "learning_rate": 4.5021504852703205e-07, "loss": 0.2741, "step": 34659 }, { "epoch": 1.6236473509158196, "grad_norm": 0.5970560400381895, "learning_rate": 4.5010648222991174e-07, "loss": 0.2843, "step": 34660 }, { "epoch": 1.623694195905748, "grad_norm": 0.6547163983045228, "learning_rate": 4.499979277294128e-07, "loss": 0.28, "step": 34661 }, { "epoch": 1.623741040895676, "grad_norm": 0.5883099821586586, "learning_rate": 4.498893850261604e-07, "loss": 0.2801, "step": 34662 }, { "epoch": 1.6237878858856045, "grad_norm": 0.6120463350172247, "learning_rate": 4.4978085412078014e-07, "loss": 0.2634, "step": 34663 }, { "epoch": 1.623834730875533, "grad_norm": 0.6053738736571144, "learning_rate": 4.4967233501389486e-07, "loss": 0.2795, "step": 34664 }, { "epoch": 1.6238815758654612, "grad_norm": 0.5892599308043238, "learning_rate": 4.495638277061301e-07, "loss": 0.2605, "step": 34665 }, { "epoch": 1.6239284208553895, "grad_norm": 0.6293695997067736, "learning_rate": 4.49455332198111e-07, "loss": 0.2803, "step": 34666 }, { "epoch": 1.623975265845318, "grad_norm": 0.5707173133791175, "learning_rate": 4.4934684849046007e-07, "loss": 0.2625, "step": 34667 }, { "epoch": 1.6240221108352462, "grad_norm": 0.5918332427127941, "learning_rate": 4.492383765838029e-07, "loss": 0.2766, "step": 34668 }, { "epoch": 1.6240689558251744, "grad_norm": 0.6096532103435061, "learning_rate": 4.4912991647876326e-07, "loss": 0.2824, "step": 34669 }, { "epoch": 1.6241158008151029, "grad_norm": 0.581223845643586, "learning_rate": 4.490214681759664e-07, "loss": 0.2516, "step": 34670 }, { "epoch": 1.6241626458050311, "grad_norm": 0.5667638339557918, "learning_rate": 4.4891303167603457e-07, "loss": 0.2586, "step": 34671 }, { "epoch": 1.6242094907949594, "grad_norm": 0.5703629589323439, "learning_rate": 4.488046069795934e-07, "loss": 0.2715, "step": 34672 }, { "epoch": 1.6242563357848878, "grad_norm": 0.6396826195192304, "learning_rate": 4.4869619408726557e-07, "loss": 0.2904, "step": 34673 }, { "epoch": 1.6243031807748163, "grad_norm": 0.6186928312065739, "learning_rate": 4.4858779299967544e-07, "loss": 0.2845, "step": 34674 }, { "epoch": 1.6243500257647443, "grad_norm": 0.6157307079109811, "learning_rate": 4.4847940371744724e-07, "loss": 0.2892, "step": 34675 }, { "epoch": 1.6243968707546728, "grad_norm": 0.5806377316908748, "learning_rate": 4.4837102624120417e-07, "loss": 0.2501, "step": 34676 }, { "epoch": 1.6244437157446012, "grad_norm": 0.6072176596461807, "learning_rate": 4.4826266057157093e-07, "loss": 0.2628, "step": 34677 }, { "epoch": 1.6244905607345295, "grad_norm": 0.605766484708152, "learning_rate": 4.481543067091704e-07, "loss": 0.2683, "step": 34678 }, { "epoch": 1.6245374057244577, "grad_norm": 0.6381911997243481, "learning_rate": 4.480459646546256e-07, "loss": 0.2728, "step": 34679 }, { "epoch": 1.6245842507143862, "grad_norm": 0.5972033837646946, "learning_rate": 4.479376344085604e-07, "loss": 0.2635, "step": 34680 }, { "epoch": 1.6246310957043144, "grad_norm": 0.5886093371574006, "learning_rate": 4.4782931597159826e-07, "loss": 0.2805, "step": 34681 }, { "epoch": 1.6246779406942427, "grad_norm": 0.6263033049124366, "learning_rate": 4.4772100934436293e-07, "loss": 0.267, "step": 34682 }, { "epoch": 1.6247247856841711, "grad_norm": 0.6064999745718275, "learning_rate": 4.47612714527477e-07, "loss": 0.2685, "step": 34683 }, { "epoch": 1.6247716306740994, "grad_norm": 0.5846879799477568, "learning_rate": 4.4750443152156486e-07, "loss": 0.2833, "step": 34684 }, { "epoch": 1.6248184756640276, "grad_norm": 0.5962683443972694, "learning_rate": 4.473961603272489e-07, "loss": 0.2741, "step": 34685 }, { "epoch": 1.624865320653956, "grad_norm": 0.5503910540816873, "learning_rate": 4.472879009451514e-07, "loss": 0.2504, "step": 34686 }, { "epoch": 1.6249121656438845, "grad_norm": 0.603393523168875, "learning_rate": 4.4717965337589625e-07, "loss": 0.27, "step": 34687 }, { "epoch": 1.6249590106338128, "grad_norm": 0.5898486724276115, "learning_rate": 4.4707141762010604e-07, "loss": 0.2624, "step": 34688 }, { "epoch": 1.625005855623741, "grad_norm": 0.6012245873857459, "learning_rate": 4.4696319367840384e-07, "loss": 0.2907, "step": 34689 }, { "epoch": 1.6250527006136695, "grad_norm": 0.62799468407177, "learning_rate": 4.468549815514131e-07, "loss": 0.2711, "step": 34690 }, { "epoch": 1.6250995456035977, "grad_norm": 0.6814127345213005, "learning_rate": 4.467467812397555e-07, "loss": 0.2814, "step": 34691 }, { "epoch": 1.625146390593526, "grad_norm": 0.614528803337646, "learning_rate": 4.4663859274405445e-07, "loss": 0.2797, "step": 34692 }, { "epoch": 1.6251932355834544, "grad_norm": 0.6288303787979177, "learning_rate": 4.465304160649317e-07, "loss": 0.2751, "step": 34693 }, { "epoch": 1.6252400805733827, "grad_norm": 0.6047872380442861, "learning_rate": 4.464222512030103e-07, "loss": 0.2571, "step": 34694 }, { "epoch": 1.625286925563311, "grad_norm": 0.6210444186967344, "learning_rate": 4.4631409815891256e-07, "loss": 0.2738, "step": 34695 }, { "epoch": 1.6253337705532394, "grad_norm": 0.629253122099609, "learning_rate": 4.462059569332619e-07, "loss": 0.2874, "step": 34696 }, { "epoch": 1.6253806155431678, "grad_norm": 0.6302484965359514, "learning_rate": 4.460978275266789e-07, "loss": 0.2786, "step": 34697 }, { "epoch": 1.6254274605330958, "grad_norm": 0.6153156725888793, "learning_rate": 4.4598970993978754e-07, "loss": 0.2719, "step": 34698 }, { "epoch": 1.6254743055230243, "grad_norm": 0.5793823931066198, "learning_rate": 4.4588160417320835e-07, "loss": 0.2576, "step": 34699 }, { "epoch": 1.6255211505129528, "grad_norm": 0.592384019631815, "learning_rate": 4.4577351022756427e-07, "loss": 0.2701, "step": 34700 }, { "epoch": 1.625567995502881, "grad_norm": 0.6096087025368014, "learning_rate": 4.4566542810347723e-07, "loss": 0.2667, "step": 34701 }, { "epoch": 1.6256148404928092, "grad_norm": 0.6171242006877777, "learning_rate": 4.455573578015701e-07, "loss": 0.2773, "step": 34702 }, { "epoch": 1.6256616854827377, "grad_norm": 0.6763893557085754, "learning_rate": 4.454492993224632e-07, "loss": 0.2831, "step": 34703 }, { "epoch": 1.625708530472666, "grad_norm": 0.5915469229919695, "learning_rate": 4.453412526667794e-07, "loss": 0.2571, "step": 34704 }, { "epoch": 1.6257553754625942, "grad_norm": 0.5914733291654274, "learning_rate": 4.4523321783514065e-07, "loss": 0.2595, "step": 34705 }, { "epoch": 1.6258022204525227, "grad_norm": 0.579790487759149, "learning_rate": 4.451251948281679e-07, "loss": 0.2655, "step": 34706 }, { "epoch": 1.625849065442451, "grad_norm": 0.6006303998861402, "learning_rate": 4.450171836464831e-07, "loss": 0.2756, "step": 34707 }, { "epoch": 1.6258959104323791, "grad_norm": 0.6495383789960049, "learning_rate": 4.4490918429070857e-07, "loss": 0.2903, "step": 34708 }, { "epoch": 1.6259427554223076, "grad_norm": 0.5744362518522458, "learning_rate": 4.448011967614643e-07, "loss": 0.275, "step": 34709 }, { "epoch": 1.625989600412236, "grad_norm": 0.6241410557982673, "learning_rate": 4.446932210593727e-07, "loss": 0.2787, "step": 34710 }, { "epoch": 1.626036445402164, "grad_norm": 0.5843058532649583, "learning_rate": 4.445852571850551e-07, "loss": 0.2781, "step": 34711 }, { "epoch": 1.6260832903920925, "grad_norm": 0.603925696572302, "learning_rate": 4.444773051391335e-07, "loss": 0.2746, "step": 34712 }, { "epoch": 1.626130135382021, "grad_norm": 0.6138913239537916, "learning_rate": 4.443693649222275e-07, "loss": 0.2765, "step": 34713 }, { "epoch": 1.6261769803719492, "grad_norm": 0.5579941392889347, "learning_rate": 4.442614365349593e-07, "loss": 0.2631, "step": 34714 }, { "epoch": 1.6262238253618775, "grad_norm": 0.5531359272874006, "learning_rate": 4.441535199779504e-07, "loss": 0.2579, "step": 34715 }, { "epoch": 1.626270670351806, "grad_norm": 0.6305810116480176, "learning_rate": 4.4404561525182076e-07, "loss": 0.2726, "step": 34716 }, { "epoch": 1.6263175153417342, "grad_norm": 0.619356324935414, "learning_rate": 4.4393772235719166e-07, "loss": 0.2863, "step": 34717 }, { "epoch": 1.6263643603316624, "grad_norm": 0.5909792665331277, "learning_rate": 4.438298412946843e-07, "loss": 0.2665, "step": 34718 }, { "epoch": 1.626411205321591, "grad_norm": 0.5921200836209592, "learning_rate": 4.4372197206491995e-07, "loss": 0.2724, "step": 34719 }, { "epoch": 1.6264580503115191, "grad_norm": 0.6037800049412552, "learning_rate": 4.436141146685183e-07, "loss": 0.2863, "step": 34720 }, { "epoch": 1.6265048953014474, "grad_norm": 0.6278431227282805, "learning_rate": 4.4350626910610125e-07, "loss": 0.2742, "step": 34721 }, { "epoch": 1.6265517402913758, "grad_norm": 0.5770381951885452, "learning_rate": 4.4339843537828804e-07, "loss": 0.263, "step": 34722 }, { "epoch": 1.6265985852813043, "grad_norm": 0.5697156885045997, "learning_rate": 4.4329061348569986e-07, "loss": 0.2656, "step": 34723 }, { "epoch": 1.6266454302712325, "grad_norm": 0.5813349688786856, "learning_rate": 4.431828034289573e-07, "loss": 0.2623, "step": 34724 }, { "epoch": 1.6266922752611608, "grad_norm": 0.6553283328034777, "learning_rate": 4.43075005208681e-07, "loss": 0.2738, "step": 34725 }, { "epoch": 1.6267391202510892, "grad_norm": 0.5867415489439269, "learning_rate": 4.4296721882549133e-07, "loss": 0.2555, "step": 34726 }, { "epoch": 1.6267859652410175, "grad_norm": 0.5932408856569128, "learning_rate": 4.4285944428000853e-07, "loss": 0.2777, "step": 34727 }, { "epoch": 1.6268328102309457, "grad_norm": 0.6149689913866204, "learning_rate": 4.4275168157285164e-07, "loss": 0.2786, "step": 34728 }, { "epoch": 1.6268796552208742, "grad_norm": 0.6254319640107882, "learning_rate": 4.4264393070464206e-07, "loss": 0.2778, "step": 34729 }, { "epoch": 1.6269265002108024, "grad_norm": 0.6224634323179007, "learning_rate": 4.4253619167599957e-07, "loss": 0.2913, "step": 34730 }, { "epoch": 1.6269733452007307, "grad_norm": 0.6080790323691496, "learning_rate": 4.42428464487544e-07, "loss": 0.2832, "step": 34731 }, { "epoch": 1.6270201901906591, "grad_norm": 0.5734371450571224, "learning_rate": 4.423207491398962e-07, "loss": 0.2708, "step": 34732 }, { "epoch": 1.6270670351805876, "grad_norm": 0.6207299132231737, "learning_rate": 4.422130456336746e-07, "loss": 0.2839, "step": 34733 }, { "epoch": 1.6271138801705156, "grad_norm": 0.5985725442883562, "learning_rate": 4.4210535396950053e-07, "loss": 0.2673, "step": 34734 }, { "epoch": 1.627160725160444, "grad_norm": 0.5993625788846848, "learning_rate": 4.41997674147992e-07, "loss": 0.2784, "step": 34735 }, { "epoch": 1.6272075701503725, "grad_norm": 0.6156017798936008, "learning_rate": 4.4189000616976977e-07, "loss": 0.2789, "step": 34736 }, { "epoch": 1.6272544151403008, "grad_norm": 0.5856914999469699, "learning_rate": 4.417823500354532e-07, "loss": 0.2729, "step": 34737 }, { "epoch": 1.627301260130229, "grad_norm": 0.6220330271552575, "learning_rate": 4.4167470574566193e-07, "loss": 0.2766, "step": 34738 }, { "epoch": 1.6273481051201575, "grad_norm": 0.5522292911752883, "learning_rate": 4.4156707330101605e-07, "loss": 0.25, "step": 34739 }, { "epoch": 1.6273949501100857, "grad_norm": 0.607215150581363, "learning_rate": 4.4145945270213373e-07, "loss": 0.2766, "step": 34740 }, { "epoch": 1.627441795100014, "grad_norm": 0.5684646239437766, "learning_rate": 4.413518439496353e-07, "loss": 0.2584, "step": 34741 }, { "epoch": 1.6274886400899424, "grad_norm": 0.6280191814465814, "learning_rate": 4.4124424704413915e-07, "loss": 0.2824, "step": 34742 }, { "epoch": 1.6275354850798707, "grad_norm": 0.6334937037774906, "learning_rate": 4.4113666198626473e-07, "loss": 0.2916, "step": 34743 }, { "epoch": 1.627582330069799, "grad_norm": 0.6039848792169141, "learning_rate": 4.410290887766314e-07, "loss": 0.2805, "step": 34744 }, { "epoch": 1.6276291750597274, "grad_norm": 0.6009991762690633, "learning_rate": 4.40921527415859e-07, "loss": 0.2842, "step": 34745 }, { "epoch": 1.6276760200496558, "grad_norm": 0.5704685883370648, "learning_rate": 4.4081397790456476e-07, "loss": 0.2513, "step": 34746 }, { "epoch": 1.6277228650395839, "grad_norm": 0.5788953138711717, "learning_rate": 4.4070644024336917e-07, "loss": 0.2626, "step": 34747 }, { "epoch": 1.6277697100295123, "grad_norm": 0.5912231354808789, "learning_rate": 4.405989144328901e-07, "loss": 0.2612, "step": 34748 }, { "epoch": 1.6278165550194408, "grad_norm": 0.5851622012458858, "learning_rate": 4.404914004737465e-07, "loss": 0.2581, "step": 34749 }, { "epoch": 1.627863400009369, "grad_norm": 0.6021902219262237, "learning_rate": 4.4038389836655704e-07, "loss": 0.2788, "step": 34750 }, { "epoch": 1.6279102449992973, "grad_norm": 0.6001671413497197, "learning_rate": 4.402764081119415e-07, "loss": 0.2576, "step": 34751 }, { "epoch": 1.6279570899892257, "grad_norm": 0.59055913190122, "learning_rate": 4.4016892971051663e-07, "loss": 0.277, "step": 34752 }, { "epoch": 1.628003934979154, "grad_norm": 0.6580019519111896, "learning_rate": 4.4006146316290217e-07, "loss": 0.2816, "step": 34753 }, { "epoch": 1.6280507799690822, "grad_norm": 0.5676930918517152, "learning_rate": 4.399540084697168e-07, "loss": 0.2645, "step": 34754 }, { "epoch": 1.6280976249590107, "grad_norm": 0.6210653812735039, "learning_rate": 4.3984656563157756e-07, "loss": 0.2736, "step": 34755 }, { "epoch": 1.628144469948939, "grad_norm": 0.6578336509009336, "learning_rate": 4.3973913464910366e-07, "loss": 0.279, "step": 34756 }, { "epoch": 1.6281913149388672, "grad_norm": 0.62407980226074, "learning_rate": 4.396317155229138e-07, "loss": 0.2803, "step": 34757 }, { "epoch": 1.6282381599287956, "grad_norm": 0.571825526476695, "learning_rate": 4.395243082536249e-07, "loss": 0.269, "step": 34758 }, { "epoch": 1.628285004918724, "grad_norm": 0.5912679049619722, "learning_rate": 4.3941691284185574e-07, "loss": 0.2592, "step": 34759 }, { "epoch": 1.6283318499086523, "grad_norm": 0.6037103504228033, "learning_rate": 4.3930952928822437e-07, "loss": 0.2707, "step": 34760 }, { "epoch": 1.6283786948985806, "grad_norm": 0.5624087321844206, "learning_rate": 4.392021575933492e-07, "loss": 0.2574, "step": 34761 }, { "epoch": 1.628425539888509, "grad_norm": 0.5589960986806402, "learning_rate": 4.390947977578472e-07, "loss": 0.258, "step": 34762 }, { "epoch": 1.6284723848784373, "grad_norm": 0.6084811463724749, "learning_rate": 4.389874497823371e-07, "loss": 0.2792, "step": 34763 }, { "epoch": 1.6285192298683655, "grad_norm": 0.6814226689358576, "learning_rate": 4.3888011366743587e-07, "loss": 0.288, "step": 34764 }, { "epoch": 1.628566074858294, "grad_norm": 0.5919876164878413, "learning_rate": 4.387727894137614e-07, "loss": 0.2811, "step": 34765 }, { "epoch": 1.6286129198482222, "grad_norm": 0.6075217216532052, "learning_rate": 4.386654770219315e-07, "loss": 0.2807, "step": 34766 }, { "epoch": 1.6286597648381504, "grad_norm": 0.6248952517112032, "learning_rate": 4.385581764925634e-07, "loss": 0.2669, "step": 34767 }, { "epoch": 1.628706609828079, "grad_norm": 0.6425313461955562, "learning_rate": 4.384508878262758e-07, "loss": 0.2856, "step": 34768 }, { "epoch": 1.6287534548180074, "grad_norm": 0.614983417963493, "learning_rate": 4.3834361102368496e-07, "loss": 0.2793, "step": 34769 }, { "epoch": 1.6288002998079354, "grad_norm": 0.5912586321666785, "learning_rate": 4.3823634608540804e-07, "loss": 0.2681, "step": 34770 }, { "epoch": 1.6288471447978639, "grad_norm": 0.5874223553970365, "learning_rate": 4.381290930120627e-07, "loss": 0.2852, "step": 34771 }, { "epoch": 1.6288939897877923, "grad_norm": 0.5603274816751843, "learning_rate": 4.3802185180426614e-07, "loss": 0.2589, "step": 34772 }, { "epoch": 1.6289408347777206, "grad_norm": 0.5718579512816886, "learning_rate": 4.379146224626357e-07, "loss": 0.2679, "step": 34773 }, { "epoch": 1.6289876797676488, "grad_norm": 0.6006515452288168, "learning_rate": 4.378074049877881e-07, "loss": 0.2909, "step": 34774 }, { "epoch": 1.6290345247575773, "grad_norm": 0.6129311040047223, "learning_rate": 4.3770019938034145e-07, "loss": 0.2814, "step": 34775 }, { "epoch": 1.6290813697475055, "grad_norm": 0.5940784225132727, "learning_rate": 4.3759300564091165e-07, "loss": 0.2649, "step": 34776 }, { "epoch": 1.6291282147374337, "grad_norm": 0.5839965358628122, "learning_rate": 4.374858237701152e-07, "loss": 0.2492, "step": 34777 }, { "epoch": 1.6291750597273622, "grad_norm": 0.5981148815297415, "learning_rate": 4.3737865376856927e-07, "loss": 0.2692, "step": 34778 }, { "epoch": 1.6292219047172904, "grad_norm": 0.596348048629934, "learning_rate": 4.372714956368909e-07, "loss": 0.2811, "step": 34779 }, { "epoch": 1.6292687497072187, "grad_norm": 0.5571894924416564, "learning_rate": 4.371643493756966e-07, "loss": 0.254, "step": 34780 }, { "epoch": 1.6293155946971472, "grad_norm": 0.5843466932349816, "learning_rate": 4.3705721498560365e-07, "loss": 0.2629, "step": 34781 }, { "epoch": 1.6293624396870756, "grad_norm": 0.5858369547420361, "learning_rate": 4.3695009246722737e-07, "loss": 0.2796, "step": 34782 }, { "epoch": 1.6294092846770036, "grad_norm": 0.5511339853464887, "learning_rate": 4.3684298182118526e-07, "loss": 0.2592, "step": 34783 }, { "epoch": 1.629456129666932, "grad_norm": 0.6029385140971487, "learning_rate": 4.3673588304809276e-07, "loss": 0.27, "step": 34784 }, { "epoch": 1.6295029746568606, "grad_norm": 0.6351176088183729, "learning_rate": 4.3662879614856655e-07, "loss": 0.2883, "step": 34785 }, { "epoch": 1.6295498196467888, "grad_norm": 0.5973624109648208, "learning_rate": 4.365217211232231e-07, "loss": 0.2612, "step": 34786 }, { "epoch": 1.629596664636717, "grad_norm": 0.5804677946247059, "learning_rate": 4.364146579726783e-07, "loss": 0.2786, "step": 34787 }, { "epoch": 1.6296435096266455, "grad_norm": 0.601128628780933, "learning_rate": 4.363076066975494e-07, "loss": 0.2753, "step": 34788 }, { "epoch": 1.6296903546165737, "grad_norm": 0.61593316327986, "learning_rate": 4.362005672984512e-07, "loss": 0.2717, "step": 34789 }, { "epoch": 1.629737199606502, "grad_norm": 0.5970473613866389, "learning_rate": 4.3609353977599966e-07, "loss": 0.2577, "step": 34790 }, { "epoch": 1.6297840445964304, "grad_norm": 0.6037182935588778, "learning_rate": 4.359865241308112e-07, "loss": 0.2859, "step": 34791 }, { "epoch": 1.6298308895863587, "grad_norm": 0.627920116561055, "learning_rate": 4.358795203635011e-07, "loss": 0.2935, "step": 34792 }, { "epoch": 1.629877734576287, "grad_norm": 0.6363442069492327, "learning_rate": 4.3577252847468584e-07, "loss": 0.2815, "step": 34793 }, { "epoch": 1.6299245795662154, "grad_norm": 0.6580557826308742, "learning_rate": 4.356655484649813e-07, "loss": 0.2912, "step": 34794 }, { "epoch": 1.6299714245561439, "grad_norm": 0.5831319895396965, "learning_rate": 4.355585803350021e-07, "loss": 0.2558, "step": 34795 }, { "epoch": 1.630018269546072, "grad_norm": 0.6105040911985117, "learning_rate": 4.354516240853651e-07, "loss": 0.2748, "step": 34796 }, { "epoch": 1.6300651145360003, "grad_norm": 0.5838292696261275, "learning_rate": 4.3534467971668465e-07, "loss": 0.2711, "step": 34797 }, { "epoch": 1.6301119595259288, "grad_norm": 0.5693963546758891, "learning_rate": 4.3523774722957627e-07, "loss": 0.2527, "step": 34798 }, { "epoch": 1.630158804515857, "grad_norm": 0.659168588805518, "learning_rate": 4.351308266246559e-07, "loss": 0.2877, "step": 34799 }, { "epoch": 1.6302056495057853, "grad_norm": 0.6539807260891971, "learning_rate": 4.3502391790253924e-07, "loss": 0.3067, "step": 34800 }, { "epoch": 1.6302524944957137, "grad_norm": 0.576897686646309, "learning_rate": 4.3491702106384037e-07, "loss": 0.261, "step": 34801 }, { "epoch": 1.630299339485642, "grad_norm": 0.5854402406916255, "learning_rate": 4.3481013610917474e-07, "loss": 0.2662, "step": 34802 }, { "epoch": 1.6303461844755702, "grad_norm": 0.6000399561422233, "learning_rate": 4.3470326303915873e-07, "loss": 0.2634, "step": 34803 }, { "epoch": 1.6303930294654987, "grad_norm": 0.6146808501829407, "learning_rate": 4.345964018544055e-07, "loss": 0.2694, "step": 34804 }, { "epoch": 1.6304398744554272, "grad_norm": 0.6046244280174743, "learning_rate": 4.3448955255553067e-07, "loss": 0.267, "step": 34805 }, { "epoch": 1.6304867194453552, "grad_norm": 0.6048964181808977, "learning_rate": 4.343827151431501e-07, "loss": 0.2753, "step": 34806 }, { "epoch": 1.6305335644352836, "grad_norm": 0.5862487106212209, "learning_rate": 4.3427588961787725e-07, "loss": 0.2616, "step": 34807 }, { "epoch": 1.630580409425212, "grad_norm": 0.5548108949985192, "learning_rate": 4.341690759803274e-07, "loss": 0.2569, "step": 34808 }, { "epoch": 1.6306272544151403, "grad_norm": 0.5965135066592562, "learning_rate": 4.3406227423111543e-07, "loss": 0.2866, "step": 34809 }, { "epoch": 1.6306740994050686, "grad_norm": 0.6161655859457469, "learning_rate": 4.3395548437085626e-07, "loss": 0.2623, "step": 34810 }, { "epoch": 1.630720944394997, "grad_norm": 0.625853671209639, "learning_rate": 4.3384870640016367e-07, "loss": 0.2861, "step": 34811 }, { "epoch": 1.6307677893849253, "grad_norm": 0.6124028984958522, "learning_rate": 4.337419403196527e-07, "loss": 0.2768, "step": 34812 }, { "epoch": 1.6308146343748535, "grad_norm": 0.6230393052365434, "learning_rate": 4.3363518612993704e-07, "loss": 0.2895, "step": 34813 }, { "epoch": 1.630861479364782, "grad_norm": 0.5887965041879772, "learning_rate": 4.335284438316317e-07, "loss": 0.2642, "step": 34814 }, { "epoch": 1.6309083243547102, "grad_norm": 0.5963775291879704, "learning_rate": 4.334217134253507e-07, "loss": 0.2814, "step": 34815 }, { "epoch": 1.6309551693446385, "grad_norm": 0.5878245675972794, "learning_rate": 4.3331499491170827e-07, "loss": 0.2678, "step": 34816 }, { "epoch": 1.631002014334567, "grad_norm": 0.6237384250560435, "learning_rate": 4.332082882913191e-07, "loss": 0.2641, "step": 34817 }, { "epoch": 1.6310488593244954, "grad_norm": 0.5919491463162455, "learning_rate": 4.3310159356479695e-07, "loss": 0.2653, "step": 34818 }, { "epoch": 1.6310957043144234, "grad_norm": 0.6207082621914333, "learning_rate": 4.3299491073275495e-07, "loss": 0.2711, "step": 34819 }, { "epoch": 1.6311425493043519, "grad_norm": 0.6077895358948782, "learning_rate": 4.3288823979580786e-07, "loss": 0.2583, "step": 34820 }, { "epoch": 1.6311893942942803, "grad_norm": 0.5965117774786594, "learning_rate": 4.3278158075456903e-07, "loss": 0.2777, "step": 34821 }, { "epoch": 1.6312362392842086, "grad_norm": 0.6389916799511727, "learning_rate": 4.3267493360965305e-07, "loss": 0.2688, "step": 34822 }, { "epoch": 1.6312830842741368, "grad_norm": 0.6439931066996974, "learning_rate": 4.3256829836167304e-07, "loss": 0.2863, "step": 34823 }, { "epoch": 1.6313299292640653, "grad_norm": 0.5192541740678318, "learning_rate": 4.3246167501124345e-07, "loss": 0.2263, "step": 34824 }, { "epoch": 1.6313767742539935, "grad_norm": 0.6018452649640464, "learning_rate": 4.3235506355897713e-07, "loss": 0.2831, "step": 34825 }, { "epoch": 1.6314236192439218, "grad_norm": 0.5930754355775153, "learning_rate": 4.322484640054872e-07, "loss": 0.2811, "step": 34826 }, { "epoch": 1.6314704642338502, "grad_norm": 0.5551852244406469, "learning_rate": 4.321418763513877e-07, "loss": 0.2513, "step": 34827 }, { "epoch": 1.6315173092237785, "grad_norm": 0.5806900755875894, "learning_rate": 4.3203530059729216e-07, "loss": 0.2672, "step": 34828 }, { "epoch": 1.6315641542137067, "grad_norm": 0.5798809969751572, "learning_rate": 4.3192873674381325e-07, "loss": 0.2665, "step": 34829 }, { "epoch": 1.6316109992036352, "grad_norm": 0.6135455874655643, "learning_rate": 4.3182218479156573e-07, "loss": 0.2798, "step": 34830 }, { "epoch": 1.6316578441935636, "grad_norm": 0.6553421074134093, "learning_rate": 4.317156447411608e-07, "loss": 0.2841, "step": 34831 }, { "epoch": 1.6317046891834919, "grad_norm": 0.659499823184804, "learning_rate": 4.316091165932132e-07, "loss": 0.2847, "step": 34832 }, { "epoch": 1.6317515341734201, "grad_norm": 0.599311242096169, "learning_rate": 4.315026003483347e-07, "loss": 0.281, "step": 34833 }, { "epoch": 1.6317983791633486, "grad_norm": 0.5968970177129844, "learning_rate": 4.3139609600713894e-07, "loss": 0.278, "step": 34834 }, { "epoch": 1.6318452241532768, "grad_norm": 0.5657213913144267, "learning_rate": 4.3128960357023855e-07, "loss": 0.264, "step": 34835 }, { "epoch": 1.631892069143205, "grad_norm": 0.5744130690488548, "learning_rate": 4.311831230382471e-07, "loss": 0.2569, "step": 34836 }, { "epoch": 1.6319389141331335, "grad_norm": 0.5940388237170997, "learning_rate": 4.3107665441177644e-07, "loss": 0.2665, "step": 34837 }, { "epoch": 1.6319857591230618, "grad_norm": 0.5828531658454797, "learning_rate": 4.3097019769144017e-07, "loss": 0.2562, "step": 34838 }, { "epoch": 1.63203260411299, "grad_norm": 0.6176565433811899, "learning_rate": 4.308637528778495e-07, "loss": 0.264, "step": 34839 }, { "epoch": 1.6320794491029185, "grad_norm": 0.5537221042362386, "learning_rate": 4.307573199716181e-07, "loss": 0.2575, "step": 34840 }, { "epoch": 1.632126294092847, "grad_norm": 0.6419887348144999, "learning_rate": 4.306508989733582e-07, "loss": 0.2681, "step": 34841 }, { "epoch": 1.632173139082775, "grad_norm": 0.5779746180873803, "learning_rate": 4.3054448988368276e-07, "loss": 0.2631, "step": 34842 }, { "epoch": 1.6322199840727034, "grad_norm": 0.5803728958489681, "learning_rate": 4.3043809270320315e-07, "loss": 0.2623, "step": 34843 }, { "epoch": 1.6322668290626319, "grad_norm": 0.5819863134447523, "learning_rate": 4.303317074325322e-07, "loss": 0.2684, "step": 34844 }, { "epoch": 1.6323136740525601, "grad_norm": 0.6600048938841938, "learning_rate": 4.3022533407228285e-07, "loss": 0.2834, "step": 34845 }, { "epoch": 1.6323605190424884, "grad_norm": 0.5698134849950357, "learning_rate": 4.301189726230656e-07, "loss": 0.2589, "step": 34846 }, { "epoch": 1.6324073640324168, "grad_norm": 0.602338859176613, "learning_rate": 4.3001262308549343e-07, "loss": 0.272, "step": 34847 }, { "epoch": 1.632454209022345, "grad_norm": 0.6230622671271316, "learning_rate": 4.2990628546017906e-07, "loss": 0.2882, "step": 34848 }, { "epoch": 1.6325010540122733, "grad_norm": 0.6162764724977, "learning_rate": 4.2979995974773285e-07, "loss": 0.2983, "step": 34849 }, { "epoch": 1.6325478990022018, "grad_norm": 0.6399942024372899, "learning_rate": 4.29693645948768e-07, "loss": 0.2887, "step": 34850 }, { "epoch": 1.63259474399213, "grad_norm": 0.549710830297036, "learning_rate": 4.2958734406389537e-07, "loss": 0.2497, "step": 34851 }, { "epoch": 1.6326415889820582, "grad_norm": 0.614078292733638, "learning_rate": 4.294810540937283e-07, "loss": 0.277, "step": 34852 }, { "epoch": 1.6326884339719867, "grad_norm": 0.6044379932081342, "learning_rate": 4.293747760388764e-07, "loss": 0.27, "step": 34853 }, { "epoch": 1.6327352789619152, "grad_norm": 0.5661972904469269, "learning_rate": 4.2926850989995247e-07, "loss": 0.2725, "step": 34854 }, { "epoch": 1.6327821239518432, "grad_norm": 0.6043373733642913, "learning_rate": 4.291622556775682e-07, "loss": 0.2775, "step": 34855 }, { "epoch": 1.6328289689417717, "grad_norm": 0.5893309059176348, "learning_rate": 4.2905601337233397e-07, "loss": 0.2716, "step": 34856 }, { "epoch": 1.6328758139317001, "grad_norm": 0.6248631279898987, "learning_rate": 4.2894978298486214e-07, "loss": 0.2852, "step": 34857 }, { "epoch": 1.6329226589216284, "grad_norm": 0.6073939824012957, "learning_rate": 4.2884356451576374e-07, "loss": 0.2736, "step": 34858 }, { "epoch": 1.6329695039115566, "grad_norm": 0.5813003156725423, "learning_rate": 4.2873735796565065e-07, "loss": 0.2675, "step": 34859 }, { "epoch": 1.633016348901485, "grad_norm": 0.5469721356313966, "learning_rate": 4.2863116333513253e-07, "loss": 0.2635, "step": 34860 }, { "epoch": 1.6330631938914133, "grad_norm": 0.6009029370585134, "learning_rate": 4.2852498062482236e-07, "loss": 0.2589, "step": 34861 }, { "epoch": 1.6331100388813415, "grad_norm": 0.6340348993492372, "learning_rate": 4.284188098353298e-07, "loss": 0.2779, "step": 34862 }, { "epoch": 1.63315688387127, "grad_norm": 0.596523589616077, "learning_rate": 4.283126509672661e-07, "loss": 0.2662, "step": 34863 }, { "epoch": 1.6332037288611982, "grad_norm": 0.6052895101064707, "learning_rate": 4.2820650402124244e-07, "loss": 0.2752, "step": 34864 }, { "epoch": 1.6332505738511265, "grad_norm": 0.5902169896984938, "learning_rate": 4.2810036899786965e-07, "loss": 0.2848, "step": 34865 }, { "epoch": 1.633297418841055, "grad_norm": 0.610840691231653, "learning_rate": 4.27994245897759e-07, "loss": 0.2849, "step": 34866 }, { "epoch": 1.6333442638309834, "grad_norm": 0.5754201674893356, "learning_rate": 4.278881347215208e-07, "loss": 0.2733, "step": 34867 }, { "epoch": 1.6333911088209117, "grad_norm": 0.6264628815502601, "learning_rate": 4.277820354697651e-07, "loss": 0.2854, "step": 34868 }, { "epoch": 1.63343795381084, "grad_norm": 0.5602982563914692, "learning_rate": 4.276759481431028e-07, "loss": 0.2611, "step": 34869 }, { "epoch": 1.6334847988007684, "grad_norm": 0.6051228957709164, "learning_rate": 4.2756987274214456e-07, "loss": 0.2882, "step": 34870 }, { "epoch": 1.6335316437906966, "grad_norm": 0.5926048302408372, "learning_rate": 4.2746380926750067e-07, "loss": 0.2611, "step": 34871 }, { "epoch": 1.6335784887806248, "grad_norm": 0.6173630066579572, "learning_rate": 4.273577577197824e-07, "loss": 0.2572, "step": 34872 }, { "epoch": 1.6336253337705533, "grad_norm": 0.5753678985202749, "learning_rate": 4.272517180995986e-07, "loss": 0.2647, "step": 34873 }, { "epoch": 1.6336721787604815, "grad_norm": 0.614781254563498, "learning_rate": 4.2714569040756107e-07, "loss": 0.2827, "step": 34874 }, { "epoch": 1.6337190237504098, "grad_norm": 0.6191647849320455, "learning_rate": 4.2703967464427817e-07, "loss": 0.2858, "step": 34875 }, { "epoch": 1.6337658687403382, "grad_norm": 0.596490075799794, "learning_rate": 4.269336708103611e-07, "loss": 0.2688, "step": 34876 }, { "epoch": 1.6338127137302667, "grad_norm": 0.685120893454687, "learning_rate": 4.268276789064196e-07, "loss": 0.2859, "step": 34877 }, { "epoch": 1.6338595587201947, "grad_norm": 0.52501185662966, "learning_rate": 4.267216989330639e-07, "loss": 0.2497, "step": 34878 }, { "epoch": 1.6339064037101232, "grad_norm": 0.5988533307159017, "learning_rate": 4.2661573089090413e-07, "loss": 0.2661, "step": 34879 }, { "epoch": 1.6339532487000517, "grad_norm": 0.5932588212596801, "learning_rate": 4.2650977478054997e-07, "loss": 0.2615, "step": 34880 }, { "epoch": 1.63400009368998, "grad_norm": 0.5786050202252601, "learning_rate": 4.2640383060260997e-07, "loss": 0.2575, "step": 34881 }, { "epoch": 1.6340469386799081, "grad_norm": 0.6339549013190762, "learning_rate": 4.2629789835769486e-07, "loss": 0.2847, "step": 34882 }, { "epoch": 1.6340937836698366, "grad_norm": 0.5998449657224946, "learning_rate": 4.261919780464144e-07, "loss": 0.2739, "step": 34883 }, { "epoch": 1.6341406286597648, "grad_norm": 0.5918126918944162, "learning_rate": 4.260860696693775e-07, "loss": 0.2751, "step": 34884 }, { "epoch": 1.634187473649693, "grad_norm": 0.5886084889758396, "learning_rate": 4.2598017322719483e-07, "loss": 0.272, "step": 34885 }, { "epoch": 1.6342343186396215, "grad_norm": 0.5739254192133686, "learning_rate": 4.258742887204742e-07, "loss": 0.2667, "step": 34886 }, { "epoch": 1.6342811636295498, "grad_norm": 0.5920842531969834, "learning_rate": 4.257684161498266e-07, "loss": 0.2665, "step": 34887 }, { "epoch": 1.634328008619478, "grad_norm": 0.612386848618441, "learning_rate": 4.256625555158597e-07, "loss": 0.266, "step": 34888 }, { "epoch": 1.6343748536094065, "grad_norm": 0.5532291563962918, "learning_rate": 4.255567068191835e-07, "loss": 0.2544, "step": 34889 }, { "epoch": 1.634421698599335, "grad_norm": 0.6015335356491386, "learning_rate": 4.254508700604068e-07, "loss": 0.2681, "step": 34890 }, { "epoch": 1.634468543589263, "grad_norm": 0.6050201530485528, "learning_rate": 4.253450452401398e-07, "loss": 0.2696, "step": 34891 }, { "epoch": 1.6345153885791914, "grad_norm": 0.5879369415636656, "learning_rate": 4.252392323589899e-07, "loss": 0.2711, "step": 34892 }, { "epoch": 1.63456223356912, "grad_norm": 0.6413131163715099, "learning_rate": 4.2513343141756676e-07, "loss": 0.2757, "step": 34893 }, { "epoch": 1.6346090785590481, "grad_norm": 0.5498389348595532, "learning_rate": 4.250276424164798e-07, "loss": 0.2301, "step": 34894 }, { "epoch": 1.6346559235489764, "grad_norm": 0.5933967329715238, "learning_rate": 4.2492186535633666e-07, "loss": 0.2665, "step": 34895 }, { "epoch": 1.6347027685389048, "grad_norm": 0.6476604647368959, "learning_rate": 4.2481610023774653e-07, "loss": 0.2873, "step": 34896 }, { "epoch": 1.634749613528833, "grad_norm": 0.643520612343823, "learning_rate": 4.24710347061319e-07, "loss": 0.2726, "step": 34897 }, { "epoch": 1.6347964585187613, "grad_norm": 0.584705705838958, "learning_rate": 4.2460460582766094e-07, "loss": 0.266, "step": 34898 }, { "epoch": 1.6348433035086898, "grad_norm": 0.5804609409473815, "learning_rate": 4.2449887653738197e-07, "loss": 0.2689, "step": 34899 }, { "epoch": 1.634890148498618, "grad_norm": 0.6168488403299794, "learning_rate": 4.243931591910902e-07, "loss": 0.2736, "step": 34900 }, { "epoch": 1.6349369934885463, "grad_norm": 0.5853320257407388, "learning_rate": 4.2428745378939486e-07, "loss": 0.2873, "step": 34901 }, { "epoch": 1.6349838384784747, "grad_norm": 0.5902136625603476, "learning_rate": 4.2418176033290264e-07, "loss": 0.269, "step": 34902 }, { "epoch": 1.6350306834684032, "grad_norm": 0.5930197754924441, "learning_rate": 4.240760788222234e-07, "loss": 0.2686, "step": 34903 }, { "epoch": 1.6350775284583314, "grad_norm": 0.6204287620060409, "learning_rate": 4.2397040925796384e-07, "loss": 0.265, "step": 34904 }, { "epoch": 1.6351243734482597, "grad_norm": 0.618259032843439, "learning_rate": 4.2386475164073295e-07, "loss": 0.2789, "step": 34905 }, { "epoch": 1.6351712184381881, "grad_norm": 0.6706124191528932, "learning_rate": 4.237591059711385e-07, "loss": 0.2872, "step": 34906 }, { "epoch": 1.6352180634281164, "grad_norm": 0.6021944767392234, "learning_rate": 4.236534722497887e-07, "loss": 0.2755, "step": 34907 }, { "epoch": 1.6352649084180446, "grad_norm": 0.6051130613973715, "learning_rate": 4.2354785047729194e-07, "loss": 0.2819, "step": 34908 }, { "epoch": 1.635311753407973, "grad_norm": 0.6310270303385743, "learning_rate": 4.2344224065425514e-07, "loss": 0.2769, "step": 34909 }, { "epoch": 1.6353585983979013, "grad_norm": 0.5946371040137681, "learning_rate": 4.23336642781286e-07, "loss": 0.2655, "step": 34910 }, { "epoch": 1.6354054433878296, "grad_norm": 0.6671236223148211, "learning_rate": 4.232310568589923e-07, "loss": 0.2878, "step": 34911 }, { "epoch": 1.635452288377758, "grad_norm": 0.5923309043636328, "learning_rate": 4.231254828879819e-07, "loss": 0.267, "step": 34912 }, { "epoch": 1.6354991333676865, "grad_norm": 0.6021371495196086, "learning_rate": 4.230199208688623e-07, "loss": 0.2851, "step": 34913 }, { "epoch": 1.6355459783576145, "grad_norm": 0.6117923316275518, "learning_rate": 4.2291437080224125e-07, "loss": 0.267, "step": 34914 }, { "epoch": 1.635592823347543, "grad_norm": 0.5825097806613933, "learning_rate": 4.228088326887264e-07, "loss": 0.2668, "step": 34915 }, { "epoch": 1.6356396683374714, "grad_norm": 0.5482511980360423, "learning_rate": 4.227033065289249e-07, "loss": 0.2555, "step": 34916 }, { "epoch": 1.6356865133273997, "grad_norm": 0.5954923221869968, "learning_rate": 4.2259779232344284e-07, "loss": 0.2744, "step": 34917 }, { "epoch": 1.635733358317328, "grad_norm": 0.6071874447324979, "learning_rate": 4.2249229007288837e-07, "loss": 0.2848, "step": 34918 }, { "epoch": 1.6357802033072564, "grad_norm": 0.5876715122770135, "learning_rate": 4.2238679977786857e-07, "loss": 0.2596, "step": 34919 }, { "epoch": 1.6358270482971846, "grad_norm": 0.6181037639633101, "learning_rate": 4.222813214389906e-07, "loss": 0.2682, "step": 34920 }, { "epoch": 1.6358738932871129, "grad_norm": 0.6024442135387718, "learning_rate": 4.221758550568622e-07, "loss": 0.2767, "step": 34921 }, { "epoch": 1.6359207382770413, "grad_norm": 0.6319023885223256, "learning_rate": 4.2207040063208885e-07, "loss": 0.2778, "step": 34922 }, { "epoch": 1.6359675832669696, "grad_norm": 0.545009362809217, "learning_rate": 4.219649581652785e-07, "loss": 0.2526, "step": 34923 }, { "epoch": 1.6360144282568978, "grad_norm": 0.6141852505762085, "learning_rate": 4.2185952765703697e-07, "loss": 0.2853, "step": 34924 }, { "epoch": 1.6360612732468263, "grad_norm": 0.5909935885832954, "learning_rate": 4.217541091079716e-07, "loss": 0.2689, "step": 34925 }, { "epoch": 1.6361081182367547, "grad_norm": 0.6210815200949358, "learning_rate": 4.216487025186891e-07, "loss": 0.2799, "step": 34926 }, { "epoch": 1.6361549632266827, "grad_norm": 0.5712664167374626, "learning_rate": 4.215433078897957e-07, "loss": 0.259, "step": 34927 }, { "epoch": 1.6362018082166112, "grad_norm": 0.6039456396172631, "learning_rate": 4.214379252218989e-07, "loss": 0.2836, "step": 34928 }, { "epoch": 1.6362486532065397, "grad_norm": 0.6241823781751266, "learning_rate": 4.213325545156044e-07, "loss": 0.2745, "step": 34929 }, { "epoch": 1.636295498196468, "grad_norm": 0.620656345317466, "learning_rate": 4.2122719577151806e-07, "loss": 0.2644, "step": 34930 }, { "epoch": 1.6363423431863962, "grad_norm": 0.623910342738556, "learning_rate": 4.211218489902466e-07, "loss": 0.2749, "step": 34931 }, { "epoch": 1.6363891881763246, "grad_norm": 0.5643137284782037, "learning_rate": 4.210165141723965e-07, "loss": 0.2687, "step": 34932 }, { "epoch": 1.6364360331662529, "grad_norm": 0.5874667206350757, "learning_rate": 4.209111913185737e-07, "loss": 0.2696, "step": 34933 }, { "epoch": 1.636482878156181, "grad_norm": 0.574410046648888, "learning_rate": 4.208058804293852e-07, "loss": 0.2616, "step": 34934 }, { "epoch": 1.6365297231461096, "grad_norm": 0.587346395334978, "learning_rate": 4.207005815054357e-07, "loss": 0.2559, "step": 34935 }, { "epoch": 1.6365765681360378, "grad_norm": 0.5881570484698404, "learning_rate": 4.205952945473321e-07, "loss": 0.2772, "step": 34936 }, { "epoch": 1.636623413125966, "grad_norm": 0.6542290402089334, "learning_rate": 4.2049001955567965e-07, "loss": 0.2652, "step": 34937 }, { "epoch": 1.6366702581158945, "grad_norm": 0.6274711156100017, "learning_rate": 4.2038475653108426e-07, "loss": 0.2843, "step": 34938 }, { "epoch": 1.636717103105823, "grad_norm": 0.6361822527959252, "learning_rate": 4.202795054741518e-07, "loss": 0.2851, "step": 34939 }, { "epoch": 1.6367639480957512, "grad_norm": 0.5937808400089929, "learning_rate": 4.201742663854888e-07, "loss": 0.2754, "step": 34940 }, { "epoch": 1.6368107930856794, "grad_norm": 0.5910559794675395, "learning_rate": 4.2006903926569945e-07, "loss": 0.2736, "step": 34941 }, { "epoch": 1.636857638075608, "grad_norm": 0.6228019172039831, "learning_rate": 4.1996382411539025e-07, "loss": 0.264, "step": 34942 }, { "epoch": 1.6369044830655362, "grad_norm": 0.6208949176502565, "learning_rate": 4.198586209351668e-07, "loss": 0.2774, "step": 34943 }, { "epoch": 1.6369513280554644, "grad_norm": 0.5917906375071708, "learning_rate": 4.1975342972563394e-07, "loss": 0.281, "step": 34944 }, { "epoch": 1.6369981730453929, "grad_norm": 0.6480103439689497, "learning_rate": 4.196482504873969e-07, "loss": 0.2943, "step": 34945 }, { "epoch": 1.637045018035321, "grad_norm": 0.6137452653319041, "learning_rate": 4.19543083221062e-07, "loss": 0.2637, "step": 34946 }, { "epoch": 1.6370918630252493, "grad_norm": 0.5941083801565419, "learning_rate": 4.1943792792723315e-07, "loss": 0.2764, "step": 34947 }, { "epoch": 1.6371387080151778, "grad_norm": 0.6337468721927813, "learning_rate": 4.1933278460651595e-07, "loss": 0.2832, "step": 34948 }, { "epoch": 1.6371855530051063, "grad_norm": 0.5824193129334833, "learning_rate": 4.1922765325951574e-07, "loss": 0.2573, "step": 34949 }, { "epoch": 1.6372323979950343, "grad_norm": 0.5904069373738093, "learning_rate": 4.191225338868379e-07, "loss": 0.2834, "step": 34950 }, { "epoch": 1.6372792429849627, "grad_norm": 0.6135408223901793, "learning_rate": 4.1901742648908644e-07, "loss": 0.2773, "step": 34951 }, { "epoch": 1.6373260879748912, "grad_norm": 0.5619841653229527, "learning_rate": 4.189123310668672e-07, "loss": 0.2697, "step": 34952 }, { "epoch": 1.6373729329648194, "grad_norm": 0.5642597341494615, "learning_rate": 4.188072476207838e-07, "loss": 0.2606, "step": 34953 }, { "epoch": 1.6374197779547477, "grad_norm": 0.5963689340280256, "learning_rate": 4.187021761514418e-07, "loss": 0.2756, "step": 34954 }, { "epoch": 1.6374666229446762, "grad_norm": 0.6151594485448725, "learning_rate": 4.1859711665944545e-07, "loss": 0.2613, "step": 34955 }, { "epoch": 1.6375134679346044, "grad_norm": 0.545415055660807, "learning_rate": 4.184920691453995e-07, "loss": 0.252, "step": 34956 }, { "epoch": 1.6375603129245326, "grad_norm": 0.5941311158607352, "learning_rate": 4.183870336099091e-07, "loss": 0.2785, "step": 34957 }, { "epoch": 1.637607157914461, "grad_norm": 0.620943481779303, "learning_rate": 4.182820100535784e-07, "loss": 0.2925, "step": 34958 }, { "epoch": 1.6376540029043893, "grad_norm": 0.5824795932928236, "learning_rate": 4.1817699847701065e-07, "loss": 0.2704, "step": 34959 }, { "epoch": 1.6377008478943176, "grad_norm": 0.6101861397318215, "learning_rate": 4.180719988808113e-07, "loss": 0.2791, "step": 34960 }, { "epoch": 1.637747692884246, "grad_norm": 0.6406029119100525, "learning_rate": 4.17967011265584e-07, "loss": 0.2847, "step": 34961 }, { "epoch": 1.6377945378741745, "grad_norm": 0.5964711490284094, "learning_rate": 4.178620356319335e-07, "loss": 0.2603, "step": 34962 }, { "epoch": 1.6378413828641025, "grad_norm": 0.5841047559459989, "learning_rate": 4.177570719804636e-07, "loss": 0.2677, "step": 34963 }, { "epoch": 1.637888227854031, "grad_norm": 0.5746477970275099, "learning_rate": 4.1765212031177896e-07, "loss": 0.2625, "step": 34964 }, { "epoch": 1.6379350728439594, "grad_norm": 0.5601085177884487, "learning_rate": 4.17547180626483e-07, "loss": 0.2439, "step": 34965 }, { "epoch": 1.6379819178338877, "grad_norm": 0.6008187625285005, "learning_rate": 4.1744225292517896e-07, "loss": 0.2668, "step": 34966 }, { "epoch": 1.638028762823816, "grad_norm": 0.573690714102055, "learning_rate": 4.173373372084713e-07, "loss": 0.2569, "step": 34967 }, { "epoch": 1.6380756078137444, "grad_norm": 0.6179565979822854, "learning_rate": 4.1723243347696396e-07, "loss": 0.2676, "step": 34968 }, { "epoch": 1.6381224528036726, "grad_norm": 0.5764092597263254, "learning_rate": 4.1712754173126033e-07, "loss": 0.2677, "step": 34969 }, { "epoch": 1.6381692977936009, "grad_norm": 0.6109484960838818, "learning_rate": 4.17022661971965e-07, "loss": 0.2723, "step": 34970 }, { "epoch": 1.6382161427835293, "grad_norm": 0.6285519755634468, "learning_rate": 4.169177941996799e-07, "loss": 0.2885, "step": 34971 }, { "epoch": 1.6382629877734576, "grad_norm": 0.6385472880834144, "learning_rate": 4.1681293841500994e-07, "loss": 0.2816, "step": 34972 }, { "epoch": 1.6383098327633858, "grad_norm": 0.6092568551625427, "learning_rate": 4.1670809461855724e-07, "loss": 0.2762, "step": 34973 }, { "epoch": 1.6383566777533143, "grad_norm": 0.6043736784655525, "learning_rate": 4.1660326281092625e-07, "loss": 0.2754, "step": 34974 }, { "epoch": 1.6384035227432427, "grad_norm": 0.5563655160558493, "learning_rate": 4.1649844299271935e-07, "loss": 0.2645, "step": 34975 }, { "epoch": 1.638450367733171, "grad_norm": 0.6011674547011646, "learning_rate": 4.1639363516454117e-07, "loss": 0.2727, "step": 34976 }, { "epoch": 1.6384972127230992, "grad_norm": 0.578736336853253, "learning_rate": 4.1628883932699315e-07, "loss": 0.2657, "step": 34977 }, { "epoch": 1.6385440577130277, "grad_norm": 0.5882685214755647, "learning_rate": 4.161840554806798e-07, "loss": 0.2787, "step": 34978 }, { "epoch": 1.638590902702956, "grad_norm": 0.6101805841365687, "learning_rate": 4.160792836262029e-07, "loss": 0.2797, "step": 34979 }, { "epoch": 1.6386377476928842, "grad_norm": 0.5765399464034817, "learning_rate": 4.15974523764166e-07, "loss": 0.2468, "step": 34980 }, { "epoch": 1.6386845926828126, "grad_norm": 0.5772398524638731, "learning_rate": 4.1586977589517185e-07, "loss": 0.2623, "step": 34981 }, { "epoch": 1.6387314376727409, "grad_norm": 0.6326355290693786, "learning_rate": 4.157650400198238e-07, "loss": 0.3073, "step": 34982 }, { "epoch": 1.6387782826626691, "grad_norm": 0.6307750706904862, "learning_rate": 4.1566031613872376e-07, "loss": 0.2838, "step": 34983 }, { "epoch": 1.6388251276525976, "grad_norm": 0.5832245787137879, "learning_rate": 4.155556042524744e-07, "loss": 0.2591, "step": 34984 }, { "epoch": 1.638871972642526, "grad_norm": 0.5711723624183004, "learning_rate": 4.1545090436167944e-07, "loss": 0.2517, "step": 34985 }, { "epoch": 1.638918817632454, "grad_norm": 0.6468905864559021, "learning_rate": 4.1534621646694e-07, "loss": 0.2842, "step": 34986 }, { "epoch": 1.6389656626223825, "grad_norm": 0.6231880539928931, "learning_rate": 4.1524154056885894e-07, "loss": 0.2578, "step": 34987 }, { "epoch": 1.639012507612311, "grad_norm": 0.6142624258804046, "learning_rate": 4.1513687666803916e-07, "loss": 0.2849, "step": 34988 }, { "epoch": 1.6390593526022392, "grad_norm": 0.6088652953971968, "learning_rate": 4.150322247650829e-07, "loss": 0.2749, "step": 34989 }, { "epoch": 1.6391061975921675, "grad_norm": 0.617714965086987, "learning_rate": 4.149275848605916e-07, "loss": 0.2595, "step": 34990 }, { "epoch": 1.639153042582096, "grad_norm": 0.610453407392588, "learning_rate": 4.1482295695516817e-07, "loss": 0.2658, "step": 34991 }, { "epoch": 1.6391998875720242, "grad_norm": 0.6244739420749066, "learning_rate": 4.147183410494152e-07, "loss": 0.2719, "step": 34992 }, { "epoch": 1.6392467325619524, "grad_norm": 0.6313400995732082, "learning_rate": 4.146137371439332e-07, "loss": 0.2831, "step": 34993 }, { "epoch": 1.6392935775518809, "grad_norm": 0.5785873286612334, "learning_rate": 4.145091452393252e-07, "loss": 0.2488, "step": 34994 }, { "epoch": 1.6393404225418091, "grad_norm": 0.6146032739966439, "learning_rate": 4.1440456533619335e-07, "loss": 0.2897, "step": 34995 }, { "epoch": 1.6393872675317374, "grad_norm": 0.5809395359671501, "learning_rate": 4.1429999743513833e-07, "loss": 0.2715, "step": 34996 }, { "epoch": 1.6394341125216658, "grad_norm": 0.5871157072512958, "learning_rate": 4.1419544153676274e-07, "loss": 0.2703, "step": 34997 }, { "epoch": 1.6394809575115943, "grad_norm": 0.5934352641821639, "learning_rate": 4.1409089764166827e-07, "loss": 0.2723, "step": 34998 }, { "epoch": 1.6395278025015223, "grad_norm": 0.5993943280285128, "learning_rate": 4.1398636575045696e-07, "loss": 0.2687, "step": 34999 }, { "epoch": 1.6395746474914508, "grad_norm": 0.6023533571162009, "learning_rate": 4.138818458637292e-07, "loss": 0.2785, "step": 35000 }, { "epoch": 1.6396214924813792, "grad_norm": 0.6014503746187111, "learning_rate": 4.137773379820875e-07, "loss": 0.2741, "step": 35001 }, { "epoch": 1.6396683374713075, "grad_norm": 0.6119926176106641, "learning_rate": 4.1367284210613255e-07, "loss": 0.2785, "step": 35002 }, { "epoch": 1.6397151824612357, "grad_norm": 0.594054503498825, "learning_rate": 4.135683582364658e-07, "loss": 0.2738, "step": 35003 }, { "epoch": 1.6397620274511642, "grad_norm": 0.5735872901263623, "learning_rate": 4.1346388637368866e-07, "loss": 0.2607, "step": 35004 }, { "epoch": 1.6398088724410924, "grad_norm": 0.6126347052352799, "learning_rate": 4.1335942651840267e-07, "loss": 0.2885, "step": 35005 }, { "epoch": 1.6398557174310207, "grad_norm": 0.6148515337562656, "learning_rate": 4.132549786712092e-07, "loss": 0.2728, "step": 35006 }, { "epoch": 1.6399025624209491, "grad_norm": 0.5843469623399534, "learning_rate": 4.13150542832709e-07, "loss": 0.2623, "step": 35007 }, { "epoch": 1.6399494074108774, "grad_norm": 0.5872433177981327, "learning_rate": 4.1304611900350207e-07, "loss": 0.2742, "step": 35008 }, { "epoch": 1.6399962524008056, "grad_norm": 0.666372382278738, "learning_rate": 4.1294170718419043e-07, "loss": 0.2944, "step": 35009 }, { "epoch": 1.640043097390734, "grad_norm": 0.5613612624731467, "learning_rate": 4.128373073753744e-07, "loss": 0.2583, "step": 35010 }, { "epoch": 1.6400899423806625, "grad_norm": 0.6611549129838924, "learning_rate": 4.1273291957765526e-07, "loss": 0.2846, "step": 35011 }, { "epoch": 1.6401367873705908, "grad_norm": 0.5951933689871162, "learning_rate": 4.126285437916344e-07, "loss": 0.2632, "step": 35012 }, { "epoch": 1.640183632360519, "grad_norm": 0.642343042899691, "learning_rate": 4.125241800179106e-07, "loss": 0.2874, "step": 35013 }, { "epoch": 1.6402304773504475, "grad_norm": 0.5785501648180892, "learning_rate": 4.1241982825708625e-07, "loss": 0.2582, "step": 35014 }, { "epoch": 1.6402773223403757, "grad_norm": 0.6445931731479434, "learning_rate": 4.123154885097605e-07, "loss": 0.2878, "step": 35015 }, { "epoch": 1.640324167330304, "grad_norm": 0.5847544964996687, "learning_rate": 4.1221116077653443e-07, "loss": 0.2777, "step": 35016 }, { "epoch": 1.6403710123202324, "grad_norm": 0.6072514145751593, "learning_rate": 4.1210684505800846e-07, "loss": 0.2663, "step": 35017 }, { "epoch": 1.6404178573101607, "grad_norm": 0.5924176177324438, "learning_rate": 4.1200254135478265e-07, "loss": 0.2756, "step": 35018 }, { "epoch": 1.640464702300089, "grad_norm": 0.6097257552949417, "learning_rate": 4.1189824966745813e-07, "loss": 0.2677, "step": 35019 }, { "epoch": 1.6405115472900174, "grad_norm": 0.6040011893424568, "learning_rate": 4.1179396999663443e-07, "loss": 0.2685, "step": 35020 }, { "epoch": 1.6405583922799458, "grad_norm": 0.59690306079237, "learning_rate": 4.116897023429109e-07, "loss": 0.2703, "step": 35021 }, { "epoch": 1.6406052372698738, "grad_norm": 0.6296655174489743, "learning_rate": 4.1158544670688806e-07, "loss": 0.2865, "step": 35022 }, { "epoch": 1.6406520822598023, "grad_norm": 0.5653623743965266, "learning_rate": 4.1148120308916626e-07, "loss": 0.2694, "step": 35023 }, { "epoch": 1.6406989272497308, "grad_norm": 0.6128243983768741, "learning_rate": 4.113769714903451e-07, "loss": 0.2714, "step": 35024 }, { "epoch": 1.640745772239659, "grad_norm": 0.5808645555127884, "learning_rate": 4.1127275191102515e-07, "loss": 0.2731, "step": 35025 }, { "epoch": 1.6407926172295872, "grad_norm": 0.5533282853529093, "learning_rate": 4.1116854435180514e-07, "loss": 0.2588, "step": 35026 }, { "epoch": 1.6408394622195157, "grad_norm": 0.6283267997934641, "learning_rate": 4.1106434881328565e-07, "loss": 0.2893, "step": 35027 }, { "epoch": 1.640886307209444, "grad_norm": 0.6090234400516774, "learning_rate": 4.1096016529606516e-07, "loss": 0.291, "step": 35028 }, { "epoch": 1.6409331521993722, "grad_norm": 0.6275491771858581, "learning_rate": 4.1085599380074364e-07, "loss": 0.2808, "step": 35029 }, { "epoch": 1.6409799971893007, "grad_norm": 0.5908801892328683, "learning_rate": 4.10751834327921e-07, "loss": 0.2677, "step": 35030 }, { "epoch": 1.641026842179229, "grad_norm": 0.6239264758766261, "learning_rate": 4.1064768687819694e-07, "loss": 0.2716, "step": 35031 }, { "epoch": 1.6410736871691571, "grad_norm": 0.60605046913421, "learning_rate": 4.1054355145216994e-07, "loss": 0.2785, "step": 35032 }, { "epoch": 1.6411205321590856, "grad_norm": 0.5447733482338712, "learning_rate": 4.104394280504395e-07, "loss": 0.2651, "step": 35033 }, { "epoch": 1.641167377149014, "grad_norm": 0.6103751724347641, "learning_rate": 4.1033531667360545e-07, "loss": 0.2744, "step": 35034 }, { "epoch": 1.641214222138942, "grad_norm": 0.5924285315924385, "learning_rate": 4.102312173222658e-07, "loss": 0.2642, "step": 35035 }, { "epoch": 1.6412610671288705, "grad_norm": 0.5943257597094866, "learning_rate": 4.1012712999702056e-07, "loss": 0.2658, "step": 35036 }, { "epoch": 1.641307912118799, "grad_norm": 0.5516660338771388, "learning_rate": 4.1002305469846874e-07, "loss": 0.2637, "step": 35037 }, { "epoch": 1.6413547571087272, "grad_norm": 0.6036872064639541, "learning_rate": 4.0991899142720837e-07, "loss": 0.2633, "step": 35038 }, { "epoch": 1.6414016020986555, "grad_norm": 0.625496977041188, "learning_rate": 4.0981494018383915e-07, "loss": 0.2871, "step": 35039 }, { "epoch": 1.641448447088584, "grad_norm": 0.6243396721425538, "learning_rate": 4.097109009689593e-07, "loss": 0.2934, "step": 35040 }, { "epoch": 1.6414952920785122, "grad_norm": 0.6101939367085799, "learning_rate": 4.096068737831685e-07, "loss": 0.2728, "step": 35041 }, { "epoch": 1.6415421370684404, "grad_norm": 0.5702248047692529, "learning_rate": 4.095028586270641e-07, "loss": 0.2613, "step": 35042 }, { "epoch": 1.641588982058369, "grad_norm": 0.6153548432674409, "learning_rate": 4.0939885550124623e-07, "loss": 0.2659, "step": 35043 }, { "epoch": 1.6416358270482971, "grad_norm": 0.5881135705228461, "learning_rate": 4.092948644063116e-07, "loss": 0.2761, "step": 35044 }, { "epoch": 1.6416826720382254, "grad_norm": 0.5942063271585477, "learning_rate": 4.0919088534285946e-07, "loss": 0.2725, "step": 35045 }, { "epoch": 1.6417295170281538, "grad_norm": 0.5765054480997333, "learning_rate": 4.0908691831148843e-07, "loss": 0.2654, "step": 35046 }, { "epoch": 1.6417763620180823, "grad_norm": 0.598352959560374, "learning_rate": 4.089829633127967e-07, "loss": 0.2658, "step": 35047 }, { "epoch": 1.6418232070080105, "grad_norm": 0.546577095484401, "learning_rate": 4.08879020347383e-07, "loss": 0.257, "step": 35048 }, { "epoch": 1.6418700519979388, "grad_norm": 0.606631832003308, "learning_rate": 4.0877508941584517e-07, "loss": 0.2724, "step": 35049 }, { "epoch": 1.6419168969878672, "grad_norm": 0.6049031659244893, "learning_rate": 4.0867117051878013e-07, "loss": 0.2708, "step": 35050 }, { "epoch": 1.6419637419777955, "grad_norm": 0.6114199459134122, "learning_rate": 4.0856726365678695e-07, "loss": 0.2771, "step": 35051 }, { "epoch": 1.6420105869677237, "grad_norm": 0.7611642288990579, "learning_rate": 4.0846336883046375e-07, "loss": 0.2837, "step": 35052 }, { "epoch": 1.6420574319576522, "grad_norm": 0.5875431573773138, "learning_rate": 4.083594860404078e-07, "loss": 0.2693, "step": 35053 }, { "epoch": 1.6421042769475804, "grad_norm": 0.6291149994925095, "learning_rate": 4.0825561528721744e-07, "loss": 0.2781, "step": 35054 }, { "epoch": 1.6421511219375087, "grad_norm": 0.679629468830314, "learning_rate": 4.08151756571491e-07, "loss": 0.2909, "step": 35055 }, { "epoch": 1.6421979669274371, "grad_norm": 0.6220331873340533, "learning_rate": 4.080479098938253e-07, "loss": 0.2781, "step": 35056 }, { "epoch": 1.6422448119173656, "grad_norm": 0.6238207134384823, "learning_rate": 4.079440752548178e-07, "loss": 0.2724, "step": 35057 }, { "epoch": 1.6422916569072936, "grad_norm": 0.5938512531850246, "learning_rate": 4.0784025265506595e-07, "loss": 0.2841, "step": 35058 }, { "epoch": 1.642338501897222, "grad_norm": 0.5830362642712293, "learning_rate": 4.077364420951677e-07, "loss": 0.2575, "step": 35059 }, { "epoch": 1.6423853468871505, "grad_norm": 0.6328110132497046, "learning_rate": 4.076326435757205e-07, "loss": 0.2752, "step": 35060 }, { "epoch": 1.6424321918770788, "grad_norm": 0.6342558387021009, "learning_rate": 4.0752885709732184e-07, "loss": 0.281, "step": 35061 }, { "epoch": 1.642479036867007, "grad_norm": 0.591929956818403, "learning_rate": 4.0742508266056827e-07, "loss": 0.2779, "step": 35062 }, { "epoch": 1.6425258818569355, "grad_norm": 0.5985659983725256, "learning_rate": 4.0732132026605807e-07, "loss": 0.28, "step": 35063 }, { "epoch": 1.6425727268468637, "grad_norm": 0.6396178478945737, "learning_rate": 4.072175699143871e-07, "loss": 0.2895, "step": 35064 }, { "epoch": 1.642619571836792, "grad_norm": 0.6082684219770338, "learning_rate": 4.071138316061529e-07, "loss": 0.2588, "step": 35065 }, { "epoch": 1.6426664168267204, "grad_norm": 0.5818669746887959, "learning_rate": 4.0701010534195255e-07, "loss": 0.2615, "step": 35066 }, { "epoch": 1.6427132618166487, "grad_norm": 0.5451137523483431, "learning_rate": 4.0690639112238305e-07, "loss": 0.2501, "step": 35067 }, { "epoch": 1.642760106806577, "grad_norm": 0.6001670587892413, "learning_rate": 4.068026889480417e-07, "loss": 0.263, "step": 35068 }, { "epoch": 1.6428069517965054, "grad_norm": 0.6390776235400903, "learning_rate": 4.0669899881952494e-07, "loss": 0.259, "step": 35069 }, { "epoch": 1.6428537967864338, "grad_norm": 0.6155264250867877, "learning_rate": 4.0659532073742875e-07, "loss": 0.2998, "step": 35070 }, { "epoch": 1.6429006417763619, "grad_norm": 0.5995937460483557, "learning_rate": 4.064916547023501e-07, "loss": 0.2736, "step": 35071 }, { "epoch": 1.6429474867662903, "grad_norm": 0.5975999908158158, "learning_rate": 4.06388000714886e-07, "loss": 0.2721, "step": 35072 }, { "epoch": 1.6429943317562188, "grad_norm": 0.6156751897277417, "learning_rate": 4.0628435877563266e-07, "loss": 0.2745, "step": 35073 }, { "epoch": 1.643041176746147, "grad_norm": 0.6091769754049791, "learning_rate": 4.061807288851871e-07, "loss": 0.2729, "step": 35074 }, { "epoch": 1.6430880217360753, "grad_norm": 0.582469702315252, "learning_rate": 4.060771110441447e-07, "loss": 0.2713, "step": 35075 }, { "epoch": 1.6431348667260037, "grad_norm": 0.575072484741886, "learning_rate": 4.0597350525310275e-07, "loss": 0.2746, "step": 35076 }, { "epoch": 1.643181711715932, "grad_norm": 0.6133883874562309, "learning_rate": 4.058699115126566e-07, "loss": 0.27, "step": 35077 }, { "epoch": 1.6432285567058602, "grad_norm": 0.6047030007497001, "learning_rate": 4.057663298234024e-07, "loss": 0.2655, "step": 35078 }, { "epoch": 1.6432754016957887, "grad_norm": 0.5679211942680708, "learning_rate": 4.0566276018593697e-07, "loss": 0.2571, "step": 35079 }, { "epoch": 1.643322246685717, "grad_norm": 0.6030373051363831, "learning_rate": 4.0555920260085617e-07, "loss": 0.2636, "step": 35080 }, { "epoch": 1.6433690916756452, "grad_norm": 0.5987944790696481, "learning_rate": 4.054556570687554e-07, "loss": 0.2586, "step": 35081 }, { "epoch": 1.6434159366655736, "grad_norm": 0.6224902392186823, "learning_rate": 4.0535212359023107e-07, "loss": 0.2812, "step": 35082 }, { "epoch": 1.643462781655502, "grad_norm": 0.5547150794500674, "learning_rate": 4.0524860216587914e-07, "loss": 0.2504, "step": 35083 }, { "epoch": 1.6435096266454303, "grad_norm": 0.5738649056061728, "learning_rate": 4.051450927962944e-07, "loss": 0.2673, "step": 35084 }, { "epoch": 1.6435564716353586, "grad_norm": 0.5373903916132827, "learning_rate": 4.05041595482073e-07, "loss": 0.2671, "step": 35085 }, { "epoch": 1.643603316625287, "grad_norm": 0.6504404824099813, "learning_rate": 4.0493811022381146e-07, "loss": 0.2849, "step": 35086 }, { "epoch": 1.6436501616152153, "grad_norm": 0.6040894789973696, "learning_rate": 4.04834637022104e-07, "loss": 0.2634, "step": 35087 }, { "epoch": 1.6436970066051435, "grad_norm": 0.6049374596393854, "learning_rate": 4.0473117587754654e-07, "loss": 0.2612, "step": 35088 }, { "epoch": 1.643743851595072, "grad_norm": 0.5556846866319561, "learning_rate": 4.046277267907345e-07, "loss": 0.252, "step": 35089 }, { "epoch": 1.6437906965850002, "grad_norm": 0.6075974895112627, "learning_rate": 4.045242897622639e-07, "loss": 0.2723, "step": 35090 }, { "epoch": 1.6438375415749285, "grad_norm": 0.6092389658873023, "learning_rate": 4.0442086479272863e-07, "loss": 0.2652, "step": 35091 }, { "epoch": 1.643884386564857, "grad_norm": 0.6097615597906937, "learning_rate": 4.043174518827253e-07, "loss": 0.274, "step": 35092 }, { "epoch": 1.6439312315547854, "grad_norm": 0.5955883596072256, "learning_rate": 4.0421405103284764e-07, "loss": 0.2762, "step": 35093 }, { "epoch": 1.6439780765447134, "grad_norm": 0.5598009742741733, "learning_rate": 4.0411066224369133e-07, "loss": 0.2618, "step": 35094 }, { "epoch": 1.6440249215346419, "grad_norm": 0.5853985304611654, "learning_rate": 4.040072855158514e-07, "loss": 0.2724, "step": 35095 }, { "epoch": 1.6440717665245703, "grad_norm": 0.6226628662627874, "learning_rate": 4.0390392084992264e-07, "loss": 0.2761, "step": 35096 }, { "epoch": 1.6441186115144986, "grad_norm": 0.5692451383333946, "learning_rate": 4.038005682465007e-07, "loss": 0.2484, "step": 35097 }, { "epoch": 1.6441654565044268, "grad_norm": 0.6460762594005465, "learning_rate": 4.0369722770617957e-07, "loss": 0.2722, "step": 35098 }, { "epoch": 1.6442123014943553, "grad_norm": 0.5935898563722231, "learning_rate": 4.035938992295535e-07, "loss": 0.2727, "step": 35099 }, { "epoch": 1.6442591464842835, "grad_norm": 0.6193500202973656, "learning_rate": 4.0349058281721756e-07, "loss": 0.2793, "step": 35100 }, { "epoch": 1.6443059914742117, "grad_norm": 0.5958129649160098, "learning_rate": 4.0338727846976647e-07, "loss": 0.276, "step": 35101 }, { "epoch": 1.6443528364641402, "grad_norm": 0.6400601995220135, "learning_rate": 4.0328398618779435e-07, "loss": 0.294, "step": 35102 }, { "epoch": 1.6443996814540685, "grad_norm": 0.5751532330505503, "learning_rate": 4.031807059718962e-07, "loss": 0.2528, "step": 35103 }, { "epoch": 1.6444465264439967, "grad_norm": 0.5734243861768666, "learning_rate": 4.030774378226665e-07, "loss": 0.2464, "step": 35104 }, { "epoch": 1.6444933714339252, "grad_norm": 0.5971560122739047, "learning_rate": 4.029741817406993e-07, "loss": 0.273, "step": 35105 }, { "epoch": 1.6445402164238536, "grad_norm": 0.564140919241025, "learning_rate": 4.028709377265877e-07, "loss": 0.2592, "step": 35106 }, { "epoch": 1.6445870614137816, "grad_norm": 0.6117447031113272, "learning_rate": 4.0276770578092704e-07, "loss": 0.2597, "step": 35107 }, { "epoch": 1.64463390640371, "grad_norm": 0.5864937961523783, "learning_rate": 4.0266448590431106e-07, "loss": 0.2703, "step": 35108 }, { "epoch": 1.6446807513936386, "grad_norm": 0.6016285237313238, "learning_rate": 4.0256127809733367e-07, "loss": 0.2887, "step": 35109 }, { "epoch": 1.6447275963835668, "grad_norm": 0.6096205647440002, "learning_rate": 4.0245808236058976e-07, "loss": 0.2764, "step": 35110 }, { "epoch": 1.644774441373495, "grad_norm": 0.5985504210434108, "learning_rate": 4.023548986946718e-07, "loss": 0.2769, "step": 35111 }, { "epoch": 1.6448212863634235, "grad_norm": 0.6258075429913887, "learning_rate": 4.022517271001747e-07, "loss": 0.2806, "step": 35112 }, { "epoch": 1.6448681313533517, "grad_norm": 0.5896593254991096, "learning_rate": 4.0214856757769095e-07, "loss": 0.2749, "step": 35113 }, { "epoch": 1.64491497634328, "grad_norm": 0.611213792876891, "learning_rate": 4.020454201278148e-07, "loss": 0.2858, "step": 35114 }, { "epoch": 1.6449618213332085, "grad_norm": 0.6135914887606629, "learning_rate": 4.0194228475114033e-07, "loss": 0.2722, "step": 35115 }, { "epoch": 1.6450086663231367, "grad_norm": 0.5773171530555194, "learning_rate": 4.0183916144826114e-07, "loss": 0.2713, "step": 35116 }, { "epoch": 1.645055511313065, "grad_norm": 0.5825441791888647, "learning_rate": 4.0173605021976954e-07, "loss": 0.2673, "step": 35117 }, { "epoch": 1.6451023563029934, "grad_norm": 0.6231704579424242, "learning_rate": 4.016329510662603e-07, "loss": 0.2665, "step": 35118 }, { "epoch": 1.6451492012929219, "grad_norm": 0.6045785886513049, "learning_rate": 4.015298639883253e-07, "loss": 0.274, "step": 35119 }, { "epoch": 1.64519604628285, "grad_norm": 0.6111789925654052, "learning_rate": 4.014267889865586e-07, "loss": 0.2793, "step": 35120 }, { "epoch": 1.6452428912727783, "grad_norm": 0.5874432855161058, "learning_rate": 4.013237260615532e-07, "loss": 0.2845, "step": 35121 }, { "epoch": 1.6452897362627068, "grad_norm": 0.6434322058152163, "learning_rate": 4.0122067521390224e-07, "loss": 0.259, "step": 35122 }, { "epoch": 1.645336581252635, "grad_norm": 0.5611655300479313, "learning_rate": 4.011176364441996e-07, "loss": 0.2632, "step": 35123 }, { "epoch": 1.6453834262425633, "grad_norm": 0.5898337695098215, "learning_rate": 4.0101460975303654e-07, "loss": 0.2673, "step": 35124 }, { "epoch": 1.6454302712324917, "grad_norm": 0.6139750235303553, "learning_rate": 4.009115951410078e-07, "loss": 0.2823, "step": 35125 }, { "epoch": 1.64547711622242, "grad_norm": 0.6594525752906395, "learning_rate": 4.008085926087044e-07, "loss": 0.2876, "step": 35126 }, { "epoch": 1.6455239612123482, "grad_norm": 0.6294563518456476, "learning_rate": 4.0070560215671986e-07, "loss": 0.289, "step": 35127 }, { "epoch": 1.6455708062022767, "grad_norm": 0.6037108578707729, "learning_rate": 4.0060262378564695e-07, "loss": 0.2637, "step": 35128 }, { "epoch": 1.6456176511922052, "grad_norm": 0.6152254599696778, "learning_rate": 4.0049965749607897e-07, "loss": 0.2778, "step": 35129 }, { "epoch": 1.6456644961821332, "grad_norm": 0.5741564349915753, "learning_rate": 4.0039670328860717e-07, "loss": 0.2621, "step": 35130 }, { "epoch": 1.6457113411720616, "grad_norm": 0.6259557955908142, "learning_rate": 4.0029376116382467e-07, "loss": 0.283, "step": 35131 }, { "epoch": 1.64575818616199, "grad_norm": 0.5949944765906269, "learning_rate": 4.001908311223246e-07, "loss": 0.2787, "step": 35132 }, { "epoch": 1.6458050311519183, "grad_norm": 0.6432492516763866, "learning_rate": 4.000879131646976e-07, "loss": 0.2748, "step": 35133 }, { "epoch": 1.6458518761418466, "grad_norm": 0.5915677775267761, "learning_rate": 3.9998500729153706e-07, "loss": 0.274, "step": 35134 }, { "epoch": 1.645898721131775, "grad_norm": 0.544668342651379, "learning_rate": 3.9988211350343556e-07, "loss": 0.2523, "step": 35135 }, { "epoch": 1.6459455661217033, "grad_norm": 0.5834988162428977, "learning_rate": 3.997792318009838e-07, "loss": 0.2638, "step": 35136 }, { "epoch": 1.6459924111116315, "grad_norm": 0.5880978790773261, "learning_rate": 3.996763621847752e-07, "loss": 0.2695, "step": 35137 }, { "epoch": 1.64603925610156, "grad_norm": 0.6069112736464384, "learning_rate": 3.995735046554008e-07, "loss": 0.2654, "step": 35138 }, { "epoch": 1.6460861010914882, "grad_norm": 0.6390641222578085, "learning_rate": 3.994706592134537e-07, "loss": 0.2866, "step": 35139 }, { "epoch": 1.6461329460814165, "grad_norm": 0.5563194709600816, "learning_rate": 3.993678258595243e-07, "loss": 0.2469, "step": 35140 }, { "epoch": 1.646179791071345, "grad_norm": 0.5751789650744146, "learning_rate": 3.9926500459420593e-07, "loss": 0.2608, "step": 35141 }, { "epoch": 1.6462266360612734, "grad_norm": 0.6071380023197112, "learning_rate": 3.991621954180888e-07, "loss": 0.2576, "step": 35142 }, { "epoch": 1.6462734810512014, "grad_norm": 0.612006486774168, "learning_rate": 3.9905939833176503e-07, "loss": 0.285, "step": 35143 }, { "epoch": 1.6463203260411299, "grad_norm": 0.5936308693799544, "learning_rate": 3.9895661333582634e-07, "loss": 0.2792, "step": 35144 }, { "epoch": 1.6463671710310583, "grad_norm": 0.5974221856351245, "learning_rate": 3.988538404308645e-07, "loss": 0.2661, "step": 35145 }, { "epoch": 1.6464140160209866, "grad_norm": 0.59683991100089, "learning_rate": 3.9875107961747123e-07, "loss": 0.2707, "step": 35146 }, { "epoch": 1.6464608610109148, "grad_norm": 0.5944163933715104, "learning_rate": 3.986483308962374e-07, "loss": 0.2634, "step": 35147 }, { "epoch": 1.6465077060008433, "grad_norm": 0.5633453598980256, "learning_rate": 3.985455942677535e-07, "loss": 0.2674, "step": 35148 }, { "epoch": 1.6465545509907715, "grad_norm": 0.5369036077606295, "learning_rate": 3.9844286973261176e-07, "loss": 0.2565, "step": 35149 }, { "epoch": 1.6466013959806998, "grad_norm": 0.5955733864759188, "learning_rate": 3.983401572914028e-07, "loss": 0.2627, "step": 35150 }, { "epoch": 1.6466482409706282, "grad_norm": 0.5657563976618891, "learning_rate": 3.982374569447184e-07, "loss": 0.2699, "step": 35151 }, { "epoch": 1.6466950859605565, "grad_norm": 0.6213307476059753, "learning_rate": 3.981347686931494e-07, "loss": 0.2859, "step": 35152 }, { "epoch": 1.6467419309504847, "grad_norm": 0.6248288042507717, "learning_rate": 3.98032092537286e-07, "loss": 0.2509, "step": 35153 }, { "epoch": 1.6467887759404132, "grad_norm": 0.5736530396856566, "learning_rate": 3.979294284777205e-07, "loss": 0.2715, "step": 35154 }, { "epoch": 1.6468356209303416, "grad_norm": 0.59316618733373, "learning_rate": 3.9782677651504173e-07, "loss": 0.2827, "step": 35155 }, { "epoch": 1.6468824659202699, "grad_norm": 0.6334634578980246, "learning_rate": 3.977241366498419e-07, "loss": 0.2824, "step": 35156 }, { "epoch": 1.6469293109101981, "grad_norm": 0.5233315878574242, "learning_rate": 3.9762150888271125e-07, "loss": 0.2435, "step": 35157 }, { "epoch": 1.6469761559001266, "grad_norm": 0.5315979161382178, "learning_rate": 3.9751889321424013e-07, "loss": 0.2546, "step": 35158 }, { "epoch": 1.6470230008900548, "grad_norm": 0.5813691414240959, "learning_rate": 3.974162896450201e-07, "loss": 0.2672, "step": 35159 }, { "epoch": 1.647069845879983, "grad_norm": 0.6442082999243389, "learning_rate": 3.9731369817564093e-07, "loss": 0.2835, "step": 35160 }, { "epoch": 1.6471166908699115, "grad_norm": 0.6023667999168612, "learning_rate": 3.9721111880669213e-07, "loss": 0.2907, "step": 35161 }, { "epoch": 1.6471635358598398, "grad_norm": 0.5772637534340268, "learning_rate": 3.9710855153876486e-07, "loss": 0.2471, "step": 35162 }, { "epoch": 1.647210380849768, "grad_norm": 0.5787028444923734, "learning_rate": 3.9700599637244956e-07, "loss": 0.2627, "step": 35163 }, { "epoch": 1.6472572258396965, "grad_norm": 0.6112172365883185, "learning_rate": 3.96903453308336e-07, "loss": 0.2759, "step": 35164 }, { "epoch": 1.647304070829625, "grad_norm": 0.6118886357500436, "learning_rate": 3.968009223470151e-07, "loss": 0.2856, "step": 35165 }, { "epoch": 1.647350915819553, "grad_norm": 0.6189308750231395, "learning_rate": 3.9669840348907585e-07, "loss": 0.2714, "step": 35166 }, { "epoch": 1.6473977608094814, "grad_norm": 0.5515118622193004, "learning_rate": 3.9659589673510885e-07, "loss": 0.258, "step": 35167 }, { "epoch": 1.6474446057994099, "grad_norm": 0.6126173089349896, "learning_rate": 3.964934020857036e-07, "loss": 0.2821, "step": 35168 }, { "epoch": 1.6474914507893381, "grad_norm": 0.5933652687016996, "learning_rate": 3.9639091954145005e-07, "loss": 0.2783, "step": 35169 }, { "epoch": 1.6475382957792664, "grad_norm": 0.6396958593985702, "learning_rate": 3.9628844910293785e-07, "loss": 0.276, "step": 35170 }, { "epoch": 1.6475851407691948, "grad_norm": 0.5953983488799417, "learning_rate": 3.9618599077075743e-07, "loss": 0.2742, "step": 35171 }, { "epoch": 1.647631985759123, "grad_norm": 0.589868706278052, "learning_rate": 3.9608354454549745e-07, "loss": 0.2774, "step": 35172 }, { "epoch": 1.6476788307490513, "grad_norm": 0.59860746201968, "learning_rate": 3.959811104277478e-07, "loss": 0.2856, "step": 35173 }, { "epoch": 1.6477256757389798, "grad_norm": 0.580121979612222, "learning_rate": 3.958786884180987e-07, "loss": 0.2654, "step": 35174 }, { "epoch": 1.647772520728908, "grad_norm": 0.5987361304113276, "learning_rate": 3.957762785171382e-07, "loss": 0.2533, "step": 35175 }, { "epoch": 1.6478193657188362, "grad_norm": 0.5663574652557596, "learning_rate": 3.956738807254565e-07, "loss": 0.2542, "step": 35176 }, { "epoch": 1.6478662107087647, "grad_norm": 0.5559956417650069, "learning_rate": 3.9557149504364317e-07, "loss": 0.2555, "step": 35177 }, { "epoch": 1.6479130556986932, "grad_norm": 0.5938956462395192, "learning_rate": 3.954691214722864e-07, "loss": 0.2752, "step": 35178 }, { "epoch": 1.6479599006886212, "grad_norm": 0.6237852976151239, "learning_rate": 3.9536676001197596e-07, "loss": 0.2789, "step": 35179 }, { "epoch": 1.6480067456785497, "grad_norm": 0.5917711714845743, "learning_rate": 3.952644106633005e-07, "loss": 0.275, "step": 35180 }, { "epoch": 1.6480535906684781, "grad_norm": 0.5687612075407377, "learning_rate": 3.951620734268502e-07, "loss": 0.2539, "step": 35181 }, { "epoch": 1.6481004356584064, "grad_norm": 0.6230310550266224, "learning_rate": 3.950597483032126e-07, "loss": 0.2892, "step": 35182 }, { "epoch": 1.6481472806483346, "grad_norm": 0.5919312995623052, "learning_rate": 3.9495743529297774e-07, "loss": 0.2605, "step": 35183 }, { "epoch": 1.648194125638263, "grad_norm": 0.6216885607342433, "learning_rate": 3.9485513439673303e-07, "loss": 0.2721, "step": 35184 }, { "epoch": 1.6482409706281913, "grad_norm": 0.6464370796244412, "learning_rate": 3.947528456150679e-07, "loss": 0.2832, "step": 35185 }, { "epoch": 1.6482878156181195, "grad_norm": 0.5967089630011546, "learning_rate": 3.946505689485711e-07, "loss": 0.2648, "step": 35186 }, { "epoch": 1.648334660608048, "grad_norm": 0.614093625528369, "learning_rate": 3.94548304397831e-07, "loss": 0.2701, "step": 35187 }, { "epoch": 1.6483815055979762, "grad_norm": 0.5890298694057343, "learning_rate": 3.944460519634369e-07, "loss": 0.2739, "step": 35188 }, { "epoch": 1.6484283505879045, "grad_norm": 0.6329865593649872, "learning_rate": 3.9434381164597637e-07, "loss": 0.2781, "step": 35189 }, { "epoch": 1.648475195577833, "grad_norm": 0.5979546739120963, "learning_rate": 3.9424158344603753e-07, "loss": 0.2952, "step": 35190 }, { "epoch": 1.6485220405677614, "grad_norm": 0.6055607665086511, "learning_rate": 3.941393673642091e-07, "loss": 0.2775, "step": 35191 }, { "epoch": 1.6485688855576897, "grad_norm": 0.608518169808415, "learning_rate": 3.940371634010795e-07, "loss": 0.2856, "step": 35192 }, { "epoch": 1.648615730547618, "grad_norm": 0.5872569819162515, "learning_rate": 3.9393497155723657e-07, "loss": 0.2697, "step": 35193 }, { "epoch": 1.6486625755375464, "grad_norm": 0.6164837776234589, "learning_rate": 3.938327918332685e-07, "loss": 0.2546, "step": 35194 }, { "epoch": 1.6487094205274746, "grad_norm": 0.603310113016155, "learning_rate": 3.9373062422976394e-07, "loss": 0.2624, "step": 35195 }, { "epoch": 1.6487562655174028, "grad_norm": 0.6149567419057769, "learning_rate": 3.936284687473102e-07, "loss": 0.2706, "step": 35196 }, { "epoch": 1.6488031105073313, "grad_norm": 0.5846375442919699, "learning_rate": 3.935263253864946e-07, "loss": 0.2744, "step": 35197 }, { "epoch": 1.6488499554972595, "grad_norm": 0.6246156757359593, "learning_rate": 3.9342419414790583e-07, "loss": 0.2868, "step": 35198 }, { "epoch": 1.6488968004871878, "grad_norm": 0.582610472622331, "learning_rate": 3.9332207503213123e-07, "loss": 0.2532, "step": 35199 }, { "epoch": 1.6489436454771162, "grad_norm": 0.6118023403482309, "learning_rate": 3.932199680397583e-07, "loss": 0.2726, "step": 35200 }, { "epoch": 1.6489904904670447, "grad_norm": 0.6116371418131411, "learning_rate": 3.9311787317137586e-07, "loss": 0.2691, "step": 35201 }, { "epoch": 1.6490373354569727, "grad_norm": 0.6177025230729034, "learning_rate": 3.930157904275697e-07, "loss": 0.2795, "step": 35202 }, { "epoch": 1.6490841804469012, "grad_norm": 0.608955149407558, "learning_rate": 3.929137198089289e-07, "loss": 0.2575, "step": 35203 }, { "epoch": 1.6491310254368297, "grad_norm": 0.6006451662527097, "learning_rate": 3.928116613160393e-07, "loss": 0.2707, "step": 35204 }, { "epoch": 1.649177870426758, "grad_norm": 0.536739326726214, "learning_rate": 3.927096149494891e-07, "loss": 0.2563, "step": 35205 }, { "epoch": 1.6492247154166861, "grad_norm": 0.6780967788157588, "learning_rate": 3.926075807098653e-07, "loss": 0.2881, "step": 35206 }, { "epoch": 1.6492715604066146, "grad_norm": 0.5624483248338263, "learning_rate": 3.925055585977552e-07, "loss": 0.263, "step": 35207 }, { "epoch": 1.6493184053965428, "grad_norm": 0.5855417421199479, "learning_rate": 3.9240354861374673e-07, "loss": 0.2805, "step": 35208 }, { "epoch": 1.649365250386471, "grad_norm": 0.5731314379568043, "learning_rate": 3.9230155075842573e-07, "loss": 0.2608, "step": 35209 }, { "epoch": 1.6494120953763995, "grad_norm": 0.5308339112351004, "learning_rate": 3.9219956503237925e-07, "loss": 0.2486, "step": 35210 }, { "epoch": 1.6494589403663278, "grad_norm": 0.6054899860539213, "learning_rate": 3.9209759143619435e-07, "loss": 0.2867, "step": 35211 }, { "epoch": 1.649505785356256, "grad_norm": 0.591538686010317, "learning_rate": 3.91995629970458e-07, "loss": 0.2738, "step": 35212 }, { "epoch": 1.6495526303461845, "grad_norm": 0.5742671024650446, "learning_rate": 3.918936806357573e-07, "loss": 0.2617, "step": 35213 }, { "epoch": 1.649599475336113, "grad_norm": 0.5769923103511798, "learning_rate": 3.9179174343267874e-07, "loss": 0.2705, "step": 35214 }, { "epoch": 1.649646320326041, "grad_norm": 0.5565162155134665, "learning_rate": 3.9168981836180844e-07, "loss": 0.2651, "step": 35215 }, { "epoch": 1.6496931653159694, "grad_norm": 0.6017173233708528, "learning_rate": 3.91587905423734e-07, "loss": 0.2586, "step": 35216 }, { "epoch": 1.649740010305898, "grad_norm": 0.6499837073890353, "learning_rate": 3.914860046190405e-07, "loss": 0.2812, "step": 35217 }, { "epoch": 1.6497868552958261, "grad_norm": 0.6346432291444846, "learning_rate": 3.913841159483153e-07, "loss": 0.2763, "step": 35218 }, { "epoch": 1.6498337002857544, "grad_norm": 0.610823540197546, "learning_rate": 3.9128223941214433e-07, "loss": 0.2761, "step": 35219 }, { "epoch": 1.6498805452756828, "grad_norm": 0.6042972014286947, "learning_rate": 3.9118037501111484e-07, "loss": 0.2747, "step": 35220 }, { "epoch": 1.649927390265611, "grad_norm": 0.6084210527890198, "learning_rate": 3.9107852274581136e-07, "loss": 0.2864, "step": 35221 }, { "epoch": 1.6499742352555393, "grad_norm": 0.5593224853419184, "learning_rate": 3.9097668261682126e-07, "loss": 0.2552, "step": 35222 }, { "epoch": 1.6500210802454678, "grad_norm": 0.6213260481094378, "learning_rate": 3.9087485462473067e-07, "loss": 0.2807, "step": 35223 }, { "epoch": 1.650067925235396, "grad_norm": 0.6023233085257377, "learning_rate": 3.907730387701245e-07, "loss": 0.2635, "step": 35224 }, { "epoch": 1.6501147702253243, "grad_norm": 0.6433869372276926, "learning_rate": 3.9067123505358963e-07, "loss": 0.2859, "step": 35225 }, { "epoch": 1.6501616152152527, "grad_norm": 0.5885163944903921, "learning_rate": 3.9056944347571205e-07, "loss": 0.2752, "step": 35226 }, { "epoch": 1.6502084602051812, "grad_norm": 0.6065828293814646, "learning_rate": 3.9046766403707667e-07, "loss": 0.2737, "step": 35227 }, { "epoch": 1.6502553051951094, "grad_norm": 0.5338534684797269, "learning_rate": 3.9036589673826953e-07, "loss": 0.2442, "step": 35228 }, { "epoch": 1.6503021501850377, "grad_norm": 0.5558995465996899, "learning_rate": 3.9026414157987636e-07, "loss": 0.2512, "step": 35229 }, { "epoch": 1.6503489951749661, "grad_norm": 0.5988391068089515, "learning_rate": 3.901623985624836e-07, "loss": 0.2724, "step": 35230 }, { "epoch": 1.6503958401648944, "grad_norm": 0.5994015027890304, "learning_rate": 3.900606676866753e-07, "loss": 0.2804, "step": 35231 }, { "epoch": 1.6504426851548226, "grad_norm": 0.6473187824588948, "learning_rate": 3.899589489530381e-07, "loss": 0.2792, "step": 35232 }, { "epoch": 1.650489530144751, "grad_norm": 0.5834014498967324, "learning_rate": 3.8985724236215606e-07, "loss": 0.2813, "step": 35233 }, { "epoch": 1.6505363751346793, "grad_norm": 0.6189343201960366, "learning_rate": 3.8975554791461533e-07, "loss": 0.2681, "step": 35234 }, { "epoch": 1.6505832201246076, "grad_norm": 0.6150858590878143, "learning_rate": 3.8965386561100107e-07, "loss": 0.2704, "step": 35235 }, { "epoch": 1.650630065114536, "grad_norm": 0.6065063726060205, "learning_rate": 3.8955219545189855e-07, "loss": 0.2711, "step": 35236 }, { "epoch": 1.6506769101044645, "grad_norm": 0.5447354906769147, "learning_rate": 3.894505374378932e-07, "loss": 0.2716, "step": 35237 }, { "epoch": 1.6507237550943925, "grad_norm": 0.6057139734349221, "learning_rate": 3.893488915695695e-07, "loss": 0.2616, "step": 35238 }, { "epoch": 1.650770600084321, "grad_norm": 0.6003570187825313, "learning_rate": 3.8924725784751176e-07, "loss": 0.2698, "step": 35239 }, { "epoch": 1.6508174450742494, "grad_norm": 0.6086273231507419, "learning_rate": 3.891456362723056e-07, "loss": 0.261, "step": 35240 }, { "epoch": 1.6508642900641777, "grad_norm": 0.5848541311967709, "learning_rate": 3.8904402684453583e-07, "loss": 0.2807, "step": 35241 }, { "epoch": 1.650911135054106, "grad_norm": 0.5388385213214932, "learning_rate": 3.88942429564787e-07, "loss": 0.245, "step": 35242 }, { "epoch": 1.6509579800440344, "grad_norm": 0.6471678633724014, "learning_rate": 3.888408444336439e-07, "loss": 0.2796, "step": 35243 }, { "epoch": 1.6510048250339626, "grad_norm": 0.6108848964718003, "learning_rate": 3.887392714516919e-07, "loss": 0.2855, "step": 35244 }, { "epoch": 1.6510516700238909, "grad_norm": 0.577027306076762, "learning_rate": 3.8863771061951436e-07, "loss": 0.2761, "step": 35245 }, { "epoch": 1.6510985150138193, "grad_norm": 0.5789662028812002, "learning_rate": 3.885361619376959e-07, "loss": 0.266, "step": 35246 }, { "epoch": 1.6511453600037476, "grad_norm": 0.5990445602266868, "learning_rate": 3.8843462540682107e-07, "loss": 0.2602, "step": 35247 }, { "epoch": 1.6511922049936758, "grad_norm": 0.5933864677276438, "learning_rate": 3.8833310102747405e-07, "loss": 0.2668, "step": 35248 }, { "epoch": 1.6512390499836043, "grad_norm": 0.5926679930894225, "learning_rate": 3.882315888002394e-07, "loss": 0.2601, "step": 35249 }, { "epoch": 1.6512858949735327, "grad_norm": 0.5882384965273888, "learning_rate": 3.8813008872570196e-07, "loss": 0.2613, "step": 35250 }, { "epoch": 1.6513327399634607, "grad_norm": 0.5956680786474565, "learning_rate": 3.8802860080444425e-07, "loss": 0.2806, "step": 35251 }, { "epoch": 1.6513795849533892, "grad_norm": 0.5810897742004937, "learning_rate": 3.879271250370517e-07, "loss": 0.2735, "step": 35252 }, { "epoch": 1.6514264299433177, "grad_norm": 0.5842738760685687, "learning_rate": 3.8782566142410716e-07, "loss": 0.2745, "step": 35253 }, { "epoch": 1.651473274933246, "grad_norm": 0.5840208302609027, "learning_rate": 3.8772420996619513e-07, "loss": 0.2604, "step": 35254 }, { "epoch": 1.6515201199231742, "grad_norm": 0.5788115021844319, "learning_rate": 3.8762277066389936e-07, "loss": 0.2751, "step": 35255 }, { "epoch": 1.6515669649131026, "grad_norm": 0.5953453670725305, "learning_rate": 3.8752134351780407e-07, "loss": 0.2733, "step": 35256 }, { "epoch": 1.6516138099030309, "grad_norm": 0.6414437277408618, "learning_rate": 3.8741992852849214e-07, "loss": 0.2844, "step": 35257 }, { "epoch": 1.651660654892959, "grad_norm": 0.6366267851560699, "learning_rate": 3.873185256965481e-07, "loss": 0.2776, "step": 35258 }, { "epoch": 1.6517074998828876, "grad_norm": 0.6136486810449329, "learning_rate": 3.8721713502255424e-07, "loss": 0.2708, "step": 35259 }, { "epoch": 1.6517543448728158, "grad_norm": 0.5888661725886717, "learning_rate": 3.871157565070946e-07, "loss": 0.2769, "step": 35260 }, { "epoch": 1.651801189862744, "grad_norm": 0.6045950099929592, "learning_rate": 3.870143901507528e-07, "loss": 0.2763, "step": 35261 }, { "epoch": 1.6518480348526725, "grad_norm": 0.5990074615809187, "learning_rate": 3.869130359541123e-07, "loss": 0.257, "step": 35262 }, { "epoch": 1.651894879842601, "grad_norm": 0.6536316795808497, "learning_rate": 3.868116939177566e-07, "loss": 0.2793, "step": 35263 }, { "epoch": 1.6519417248325292, "grad_norm": 0.59261796347389, "learning_rate": 3.8671036404226816e-07, "loss": 0.2707, "step": 35264 }, { "epoch": 1.6519885698224575, "grad_norm": 0.639994786983041, "learning_rate": 3.866090463282307e-07, "loss": 0.2638, "step": 35265 }, { "epoch": 1.652035414812386, "grad_norm": 0.5621879390661159, "learning_rate": 3.8650774077622633e-07, "loss": 0.2503, "step": 35266 }, { "epoch": 1.6520822598023142, "grad_norm": 0.6042797792619743, "learning_rate": 3.8640644738683895e-07, "loss": 0.2628, "step": 35267 }, { "epoch": 1.6521291047922424, "grad_norm": 0.5607910858809996, "learning_rate": 3.8630516616065124e-07, "loss": 0.2584, "step": 35268 }, { "epoch": 1.6521759497821709, "grad_norm": 0.5932960567658377, "learning_rate": 3.862038970982465e-07, "loss": 0.2698, "step": 35269 }, { "epoch": 1.652222794772099, "grad_norm": 0.556279506411386, "learning_rate": 3.8610264020020634e-07, "loss": 0.2535, "step": 35270 }, { "epoch": 1.6522696397620273, "grad_norm": 0.5798030201044985, "learning_rate": 3.8600139546711434e-07, "loss": 0.2639, "step": 35271 }, { "epoch": 1.6523164847519558, "grad_norm": 0.598725777843934, "learning_rate": 3.8590016289955374e-07, "loss": 0.2751, "step": 35272 }, { "epoch": 1.6523633297418843, "grad_norm": 0.601599649069551, "learning_rate": 3.857989424981054e-07, "loss": 0.2675, "step": 35273 }, { "epoch": 1.6524101747318123, "grad_norm": 0.6460245348654448, "learning_rate": 3.8569773426335303e-07, "loss": 0.2759, "step": 35274 }, { "epoch": 1.6524570197217407, "grad_norm": 0.5778989934448887, "learning_rate": 3.855965381958793e-07, "loss": 0.2728, "step": 35275 }, { "epoch": 1.6525038647116692, "grad_norm": 0.5813110249695671, "learning_rate": 3.8549535429626553e-07, "loss": 0.257, "step": 35276 }, { "epoch": 1.6525507097015975, "grad_norm": 0.6015458383853186, "learning_rate": 3.853941825650945e-07, "loss": 0.2801, "step": 35277 }, { "epoch": 1.6525975546915257, "grad_norm": 0.5478424278059206, "learning_rate": 3.8529302300294835e-07, "loss": 0.255, "step": 35278 }, { "epoch": 1.6526443996814542, "grad_norm": 0.5453395837118368, "learning_rate": 3.8519187561041005e-07, "loss": 0.2668, "step": 35279 }, { "epoch": 1.6526912446713824, "grad_norm": 0.6038418108664348, "learning_rate": 3.850907403880602e-07, "loss": 0.2751, "step": 35280 }, { "epoch": 1.6527380896613106, "grad_norm": 0.5909914427718496, "learning_rate": 3.8498961733648225e-07, "loss": 0.2675, "step": 35281 }, { "epoch": 1.652784934651239, "grad_norm": 0.5800988181991861, "learning_rate": 3.8488850645625714e-07, "loss": 0.285, "step": 35282 }, { "epoch": 1.6528317796411673, "grad_norm": 0.6180270868779211, "learning_rate": 3.847874077479666e-07, "loss": 0.2736, "step": 35283 }, { "epoch": 1.6528786246310956, "grad_norm": 0.5761399309835601, "learning_rate": 3.8468632121219317e-07, "loss": 0.2746, "step": 35284 }, { "epoch": 1.652925469621024, "grad_norm": 0.5914675888081313, "learning_rate": 3.845852468495184e-07, "loss": 0.2739, "step": 35285 }, { "epoch": 1.6529723146109525, "grad_norm": 0.6095500246468408, "learning_rate": 3.8448418466052434e-07, "loss": 0.2713, "step": 35286 }, { "epoch": 1.6530191596008805, "grad_norm": 0.5861629681190559, "learning_rate": 3.8438313464579183e-07, "loss": 0.2733, "step": 35287 }, { "epoch": 1.653066004590809, "grad_norm": 0.5915622451541247, "learning_rate": 3.8428209680590243e-07, "loss": 0.2813, "step": 35288 }, { "epoch": 1.6531128495807375, "grad_norm": 0.6004380877502568, "learning_rate": 3.841810711414376e-07, "loss": 0.2728, "step": 35289 }, { "epoch": 1.6531596945706657, "grad_norm": 0.5878623009328666, "learning_rate": 3.840800576529788e-07, "loss": 0.2645, "step": 35290 }, { "epoch": 1.653206539560594, "grad_norm": 0.574317040427641, "learning_rate": 3.839790563411078e-07, "loss": 0.2732, "step": 35291 }, { "epoch": 1.6532533845505224, "grad_norm": 0.5881059216496118, "learning_rate": 3.838780672064057e-07, "loss": 0.274, "step": 35292 }, { "epoch": 1.6533002295404506, "grad_norm": 0.6168918198678267, "learning_rate": 3.837770902494531e-07, "loss": 0.2838, "step": 35293 }, { "epoch": 1.6533470745303789, "grad_norm": 0.6131827887927851, "learning_rate": 3.836761254708321e-07, "loss": 0.2862, "step": 35294 }, { "epoch": 1.6533939195203073, "grad_norm": 0.6189270888676794, "learning_rate": 3.8357517287112236e-07, "loss": 0.2864, "step": 35295 }, { "epoch": 1.6534407645102356, "grad_norm": 0.5955689336467819, "learning_rate": 3.8347423245090553e-07, "loss": 0.2788, "step": 35296 }, { "epoch": 1.6534876095001638, "grad_norm": 0.5602891250154121, "learning_rate": 3.8337330421076233e-07, "loss": 0.253, "step": 35297 }, { "epoch": 1.6535344544900923, "grad_norm": 0.6087279285343378, "learning_rate": 3.8327238815127426e-07, "loss": 0.2728, "step": 35298 }, { "epoch": 1.6535812994800207, "grad_norm": 0.6035502293309037, "learning_rate": 3.831714842730219e-07, "loss": 0.2759, "step": 35299 }, { "epoch": 1.653628144469949, "grad_norm": 0.5718004097394632, "learning_rate": 3.830705925765854e-07, "loss": 0.2661, "step": 35300 }, { "epoch": 1.6536749894598772, "grad_norm": 0.6076069831852152, "learning_rate": 3.829697130625454e-07, "loss": 0.2733, "step": 35301 }, { "epoch": 1.6537218344498057, "grad_norm": 0.6102620485482947, "learning_rate": 3.828688457314822e-07, "loss": 0.2594, "step": 35302 }, { "epoch": 1.653768679439734, "grad_norm": 0.6119281659534153, "learning_rate": 3.8276799058397707e-07, "loss": 0.283, "step": 35303 }, { "epoch": 1.6538155244296622, "grad_norm": 0.6108555456884401, "learning_rate": 3.826671476206098e-07, "loss": 0.2723, "step": 35304 }, { "epoch": 1.6538623694195906, "grad_norm": 0.6003773861813478, "learning_rate": 3.8256631684196133e-07, "loss": 0.263, "step": 35305 }, { "epoch": 1.6539092144095189, "grad_norm": 0.6063742163748955, "learning_rate": 3.824654982486112e-07, "loss": 0.2976, "step": 35306 }, { "epoch": 1.6539560593994471, "grad_norm": 0.6283403550989353, "learning_rate": 3.8236469184114026e-07, "loss": 0.2829, "step": 35307 }, { "epoch": 1.6540029043893756, "grad_norm": 0.6435282807565601, "learning_rate": 3.822638976201279e-07, "loss": 0.2776, "step": 35308 }, { "epoch": 1.654049749379304, "grad_norm": 0.5893699165658279, "learning_rate": 3.8216311558615463e-07, "loss": 0.2654, "step": 35309 }, { "epoch": 1.654096594369232, "grad_norm": 0.6398192962503414, "learning_rate": 3.820623457398001e-07, "loss": 0.2912, "step": 35310 }, { "epoch": 1.6541434393591605, "grad_norm": 0.6031515239036507, "learning_rate": 3.819615880816452e-07, "loss": 0.2696, "step": 35311 }, { "epoch": 1.654190284349089, "grad_norm": 0.6443098183457545, "learning_rate": 3.818608426122683e-07, "loss": 0.2776, "step": 35312 }, { "epoch": 1.6542371293390172, "grad_norm": 0.594277889251924, "learning_rate": 3.817601093322498e-07, "loss": 0.2701, "step": 35313 }, { "epoch": 1.6542839743289455, "grad_norm": 0.5781306993752917, "learning_rate": 3.8165938824217016e-07, "loss": 0.2642, "step": 35314 }, { "epoch": 1.654330819318874, "grad_norm": 0.638436826302175, "learning_rate": 3.8155867934260763e-07, "loss": 0.2889, "step": 35315 }, { "epoch": 1.6543776643088022, "grad_norm": 0.6675283514787749, "learning_rate": 3.814579826341427e-07, "loss": 0.2934, "step": 35316 }, { "epoch": 1.6544245092987304, "grad_norm": 0.538627580588784, "learning_rate": 3.8135729811735495e-07, "loss": 0.2577, "step": 35317 }, { "epoch": 1.6544713542886589, "grad_norm": 0.5595844319465966, "learning_rate": 3.8125662579282294e-07, "loss": 0.2644, "step": 35318 }, { "epoch": 1.6545181992785871, "grad_norm": 0.60435890890688, "learning_rate": 3.811559656611266e-07, "loss": 0.2826, "step": 35319 }, { "epoch": 1.6545650442685154, "grad_norm": 0.5937434298999018, "learning_rate": 3.810553177228449e-07, "loss": 0.2781, "step": 35320 }, { "epoch": 1.6546118892584438, "grad_norm": 0.5965478158963772, "learning_rate": 3.809546819785581e-07, "loss": 0.277, "step": 35321 }, { "epoch": 1.6546587342483723, "grad_norm": 0.6242765126937757, "learning_rate": 3.808540584288439e-07, "loss": 0.2783, "step": 35322 }, { "epoch": 1.6547055792383003, "grad_norm": 0.6074789889241914, "learning_rate": 3.8075344707428244e-07, "loss": 0.2636, "step": 35323 }, { "epoch": 1.6547524242282288, "grad_norm": 0.5556140281610336, "learning_rate": 3.8065284791545173e-07, "loss": 0.2699, "step": 35324 }, { "epoch": 1.6547992692181572, "grad_norm": 0.6380121572443975, "learning_rate": 3.8055226095293107e-07, "loss": 0.29, "step": 35325 }, { "epoch": 1.6548461142080855, "grad_norm": 0.6005359203705086, "learning_rate": 3.804516861872995e-07, "loss": 0.2736, "step": 35326 }, { "epoch": 1.6548929591980137, "grad_norm": 0.6135227084010331, "learning_rate": 3.8035112361913597e-07, "loss": 0.2748, "step": 35327 }, { "epoch": 1.6549398041879422, "grad_norm": 0.612543475151393, "learning_rate": 3.8025057324901916e-07, "loss": 0.2737, "step": 35328 }, { "epoch": 1.6549866491778704, "grad_norm": 0.6060682641300789, "learning_rate": 3.801500350775272e-07, "loss": 0.2881, "step": 35329 }, { "epoch": 1.6550334941677987, "grad_norm": 0.5665462644960871, "learning_rate": 3.800495091052395e-07, "loss": 0.2515, "step": 35330 }, { "epoch": 1.6550803391577271, "grad_norm": 0.5879089205775994, "learning_rate": 3.799489953327337e-07, "loss": 0.2707, "step": 35331 }, { "epoch": 1.6551271841476554, "grad_norm": 0.6157469643292612, "learning_rate": 3.7984849376058814e-07, "loss": 0.2729, "step": 35332 }, { "epoch": 1.6551740291375836, "grad_norm": 0.6135507675871563, "learning_rate": 3.7974800438938195e-07, "loss": 0.2867, "step": 35333 }, { "epoch": 1.655220874127512, "grad_norm": 0.5770994206986543, "learning_rate": 3.7964752721969284e-07, "loss": 0.2565, "step": 35334 }, { "epoch": 1.6552677191174405, "grad_norm": 0.6015449509815435, "learning_rate": 3.7954706225210026e-07, "loss": 0.2649, "step": 35335 }, { "epoch": 1.6553145641073688, "grad_norm": 0.6214042312417736, "learning_rate": 3.7944660948718096e-07, "loss": 0.2753, "step": 35336 }, { "epoch": 1.655361409097297, "grad_norm": 0.6116222688404593, "learning_rate": 3.793461689255132e-07, "loss": 0.2709, "step": 35337 }, { "epoch": 1.6554082540872255, "grad_norm": 0.6101224983415472, "learning_rate": 3.79245740567675e-07, "loss": 0.2827, "step": 35338 }, { "epoch": 1.6554550990771537, "grad_norm": 0.597148766883964, "learning_rate": 3.7914532441424457e-07, "loss": 0.2669, "step": 35339 }, { "epoch": 1.655501944067082, "grad_norm": 0.6177628559146043, "learning_rate": 3.7904492046579977e-07, "loss": 0.2656, "step": 35340 }, { "epoch": 1.6555487890570104, "grad_norm": 0.5902956210527688, "learning_rate": 3.7894452872291874e-07, "loss": 0.2701, "step": 35341 }, { "epoch": 1.6555956340469387, "grad_norm": 0.5921103372739958, "learning_rate": 3.7884414918617857e-07, "loss": 0.2793, "step": 35342 }, { "epoch": 1.655642479036867, "grad_norm": 0.602126131053842, "learning_rate": 3.7874378185615763e-07, "loss": 0.2866, "step": 35343 }, { "epoch": 1.6556893240267954, "grad_norm": 0.5752025992756606, "learning_rate": 3.7864342673343243e-07, "loss": 0.2562, "step": 35344 }, { "epoch": 1.6557361690167238, "grad_norm": 0.5808013972862334, "learning_rate": 3.7854308381858134e-07, "loss": 0.2646, "step": 35345 }, { "epoch": 1.6557830140066518, "grad_norm": 0.5675834424593883, "learning_rate": 3.784427531121815e-07, "loss": 0.2445, "step": 35346 }, { "epoch": 1.6558298589965803, "grad_norm": 0.5879348140543732, "learning_rate": 3.783424346148104e-07, "loss": 0.2631, "step": 35347 }, { "epoch": 1.6558767039865088, "grad_norm": 0.6122385034358688, "learning_rate": 3.782421283270457e-07, "loss": 0.2849, "step": 35348 }, { "epoch": 1.655923548976437, "grad_norm": 0.602241479336808, "learning_rate": 3.7814183424946444e-07, "loss": 0.27, "step": 35349 }, { "epoch": 1.6559703939663653, "grad_norm": 0.5874311385674379, "learning_rate": 3.7804155238264304e-07, "loss": 0.2692, "step": 35350 }, { "epoch": 1.6560172389562937, "grad_norm": 0.5878130297073113, "learning_rate": 3.779412827271592e-07, "loss": 0.2608, "step": 35351 }, { "epoch": 1.656064083946222, "grad_norm": 0.551265361818106, "learning_rate": 3.778410252835898e-07, "loss": 0.2655, "step": 35352 }, { "epoch": 1.6561109289361502, "grad_norm": 0.6037240150500821, "learning_rate": 3.777407800525121e-07, "loss": 0.2786, "step": 35353 }, { "epoch": 1.6561577739260787, "grad_norm": 0.5855702716754769, "learning_rate": 3.776405470345032e-07, "loss": 0.2661, "step": 35354 }, { "epoch": 1.656204618916007, "grad_norm": 0.612155334161344, "learning_rate": 3.7754032623013885e-07, "loss": 0.2813, "step": 35355 }, { "epoch": 1.6562514639059351, "grad_norm": 0.5853752118748808, "learning_rate": 3.774401176399972e-07, "loss": 0.2629, "step": 35356 }, { "epoch": 1.6562983088958636, "grad_norm": 0.6422391588365648, "learning_rate": 3.7733992126465364e-07, "loss": 0.2891, "step": 35357 }, { "epoch": 1.656345153885792, "grad_norm": 0.6042589864170806, "learning_rate": 3.7723973710468544e-07, "loss": 0.2658, "step": 35358 }, { "epoch": 1.65639199887572, "grad_norm": 0.6337173302088391, "learning_rate": 3.771395651606688e-07, "loss": 0.2787, "step": 35359 }, { "epoch": 1.6564388438656485, "grad_norm": 0.6005182965716424, "learning_rate": 3.7703940543318107e-07, "loss": 0.2503, "step": 35360 }, { "epoch": 1.656485688855577, "grad_norm": 0.6158414757637253, "learning_rate": 3.7693925792279734e-07, "loss": 0.279, "step": 35361 }, { "epoch": 1.6565325338455053, "grad_norm": 0.5998243132973963, "learning_rate": 3.7683912263009465e-07, "loss": 0.2678, "step": 35362 }, { "epoch": 1.6565793788354335, "grad_norm": 0.5499980497079369, "learning_rate": 3.7673899955564974e-07, "loss": 0.2684, "step": 35363 }, { "epoch": 1.656626223825362, "grad_norm": 0.5975654628801501, "learning_rate": 3.766388887000375e-07, "loss": 0.2734, "step": 35364 }, { "epoch": 1.6566730688152902, "grad_norm": 0.5950853986432955, "learning_rate": 3.765387900638348e-07, "loss": 0.2552, "step": 35365 }, { "epoch": 1.6567199138052184, "grad_norm": 0.5999533925742696, "learning_rate": 3.764387036476183e-07, "loss": 0.2556, "step": 35366 }, { "epoch": 1.656766758795147, "grad_norm": 0.6049705820082292, "learning_rate": 3.7633862945196275e-07, "loss": 0.2702, "step": 35367 }, { "epoch": 1.6568136037850751, "grad_norm": 0.5841314777706886, "learning_rate": 3.7623856747744466e-07, "loss": 0.2764, "step": 35368 }, { "epoch": 1.6568604487750034, "grad_norm": 0.632314305199429, "learning_rate": 3.7613851772463986e-07, "loss": 0.2861, "step": 35369 }, { "epoch": 1.6569072937649318, "grad_norm": 0.6050370431688548, "learning_rate": 3.760384801941247e-07, "loss": 0.265, "step": 35370 }, { "epoch": 1.6569541387548603, "grad_norm": 0.5985043141907935, "learning_rate": 3.7593845488647336e-07, "loss": 0.267, "step": 35371 }, { "epoch": 1.6570009837447885, "grad_norm": 0.6306618253899986, "learning_rate": 3.7583844180226315e-07, "loss": 0.2715, "step": 35372 }, { "epoch": 1.6570478287347168, "grad_norm": 0.5925149935429838, "learning_rate": 3.7573844094206836e-07, "loss": 0.2675, "step": 35373 }, { "epoch": 1.6570946737246453, "grad_norm": 0.5785690167838158, "learning_rate": 3.756384523064646e-07, "loss": 0.2706, "step": 35374 }, { "epoch": 1.6571415187145735, "grad_norm": 0.5847925479855095, "learning_rate": 3.755384758960279e-07, "loss": 0.2773, "step": 35375 }, { "epoch": 1.6571883637045017, "grad_norm": 0.5901187595756675, "learning_rate": 3.754385117113332e-07, "loss": 0.2636, "step": 35376 }, { "epoch": 1.6572352086944302, "grad_norm": 0.5851657637480331, "learning_rate": 3.753385597529566e-07, "loss": 0.2685, "step": 35377 }, { "epoch": 1.6572820536843584, "grad_norm": 0.5776469019114154, "learning_rate": 3.752386200214722e-07, "loss": 0.254, "step": 35378 }, { "epoch": 1.6573288986742867, "grad_norm": 0.5893033149464038, "learning_rate": 3.751386925174552e-07, "loss": 0.2563, "step": 35379 }, { "epoch": 1.6573757436642151, "grad_norm": 0.6065785872568645, "learning_rate": 3.7503877724148095e-07, "loss": 0.2518, "step": 35380 }, { "epoch": 1.6574225886541436, "grad_norm": 0.6003576289838717, "learning_rate": 3.7493887419412453e-07, "loss": 0.2749, "step": 35381 }, { "epoch": 1.6574694336440716, "grad_norm": 0.5884550044643918, "learning_rate": 3.7483898337596055e-07, "loss": 0.2621, "step": 35382 }, { "epoch": 1.657516278634, "grad_norm": 0.6356556725220135, "learning_rate": 3.7473910478756426e-07, "loss": 0.2905, "step": 35383 }, { "epoch": 1.6575631236239285, "grad_norm": 0.5516322154831023, "learning_rate": 3.7463923842951083e-07, "loss": 0.2384, "step": 35384 }, { "epoch": 1.6576099686138568, "grad_norm": 0.5810296231341919, "learning_rate": 3.7453938430237425e-07, "loss": 0.2754, "step": 35385 }, { "epoch": 1.657656813603785, "grad_norm": 0.561142671242529, "learning_rate": 3.7443954240672875e-07, "loss": 0.252, "step": 35386 }, { "epoch": 1.6577036585937135, "grad_norm": 0.5572141089384605, "learning_rate": 3.7433971274314945e-07, "loss": 0.2677, "step": 35387 }, { "epoch": 1.6577505035836417, "grad_norm": 0.5854636950886081, "learning_rate": 3.742398953122109e-07, "loss": 0.268, "step": 35388 }, { "epoch": 1.65779734857357, "grad_norm": 0.5953810696858435, "learning_rate": 3.741400901144876e-07, "loss": 0.2711, "step": 35389 }, { "epoch": 1.6578441935634984, "grad_norm": 0.610473002361994, "learning_rate": 3.740402971505541e-07, "loss": 0.2709, "step": 35390 }, { "epoch": 1.6578910385534267, "grad_norm": 0.5725535053244489, "learning_rate": 3.739405164209839e-07, "loss": 0.2729, "step": 35391 }, { "epoch": 1.657937883543355, "grad_norm": 0.6421247132750499, "learning_rate": 3.7384074792635226e-07, "loss": 0.2799, "step": 35392 }, { "epoch": 1.6579847285332834, "grad_norm": 0.6212452377256118, "learning_rate": 3.7374099166723216e-07, "loss": 0.2907, "step": 35393 }, { "epoch": 1.6580315735232118, "grad_norm": 0.5752147081493559, "learning_rate": 3.7364124764419807e-07, "loss": 0.2682, "step": 35394 }, { "epoch": 1.6580784185131399, "grad_norm": 0.6095236775617684, "learning_rate": 3.735415158578243e-07, "loss": 0.266, "step": 35395 }, { "epoch": 1.6581252635030683, "grad_norm": 0.5731388965236213, "learning_rate": 3.7344179630868447e-07, "loss": 0.2702, "step": 35396 }, { "epoch": 1.6581721084929968, "grad_norm": 0.5681794208221917, "learning_rate": 3.7334208899735354e-07, "loss": 0.2651, "step": 35397 }, { "epoch": 1.658218953482925, "grad_norm": 0.6047524626998041, "learning_rate": 3.73242393924404e-07, "loss": 0.2721, "step": 35398 }, { "epoch": 1.6582657984728533, "grad_norm": 0.5673442045404516, "learning_rate": 3.7314271109040955e-07, "loss": 0.2652, "step": 35399 }, { "epoch": 1.6583126434627817, "grad_norm": 0.5750575537514911, "learning_rate": 3.730430404959443e-07, "loss": 0.2555, "step": 35400 }, { "epoch": 1.65835948845271, "grad_norm": 0.5849959362700068, "learning_rate": 3.729433821415815e-07, "loss": 0.2675, "step": 35401 }, { "epoch": 1.6584063334426382, "grad_norm": 0.6116063715410976, "learning_rate": 3.728437360278953e-07, "loss": 0.2633, "step": 35402 }, { "epoch": 1.6584531784325667, "grad_norm": 0.6043388641000841, "learning_rate": 3.7274410215545897e-07, "loss": 0.2576, "step": 35403 }, { "epoch": 1.658500023422495, "grad_norm": 0.5740440271214028, "learning_rate": 3.7264448052484526e-07, "loss": 0.2724, "step": 35404 }, { "epoch": 1.6585468684124232, "grad_norm": 0.5886274828175168, "learning_rate": 3.7254487113662833e-07, "loss": 0.2581, "step": 35405 }, { "epoch": 1.6585937134023516, "grad_norm": 0.6454116349848783, "learning_rate": 3.724452739913806e-07, "loss": 0.2852, "step": 35406 }, { "epoch": 1.65864055839228, "grad_norm": 0.6119375917401794, "learning_rate": 3.723456890896754e-07, "loss": 0.2845, "step": 35407 }, { "epoch": 1.6586874033822083, "grad_norm": 0.5881112808283432, "learning_rate": 3.722461164320859e-07, "loss": 0.2581, "step": 35408 }, { "epoch": 1.6587342483721366, "grad_norm": 0.5646594380065953, "learning_rate": 3.7214655601918615e-07, "loss": 0.2508, "step": 35409 }, { "epoch": 1.658781093362065, "grad_norm": 0.6702483268205062, "learning_rate": 3.720470078515473e-07, "loss": 0.2907, "step": 35410 }, { "epoch": 1.6588279383519933, "grad_norm": 0.6455884143446835, "learning_rate": 3.7194747192974304e-07, "loss": 0.2923, "step": 35411 }, { "epoch": 1.6588747833419215, "grad_norm": 0.6559734797510555, "learning_rate": 3.718479482543469e-07, "loss": 0.2817, "step": 35412 }, { "epoch": 1.65892162833185, "grad_norm": 0.5689824010647243, "learning_rate": 3.717484368259305e-07, "loss": 0.2482, "step": 35413 }, { "epoch": 1.6589684733217782, "grad_norm": 0.6022502219032726, "learning_rate": 3.716489376450666e-07, "loss": 0.2678, "step": 35414 }, { "epoch": 1.6590153183117065, "grad_norm": 0.6241100027130968, "learning_rate": 3.7154945071232906e-07, "loss": 0.2713, "step": 35415 }, { "epoch": 1.659062163301635, "grad_norm": 0.622636169637902, "learning_rate": 3.714499760282886e-07, "loss": 0.2851, "step": 35416 }, { "epoch": 1.6591090082915634, "grad_norm": 0.5939755083473279, "learning_rate": 3.713505135935186e-07, "loss": 0.2704, "step": 35417 }, { "epoch": 1.6591558532814914, "grad_norm": 0.615149054112751, "learning_rate": 3.712510634085914e-07, "loss": 0.2702, "step": 35418 }, { "epoch": 1.6592026982714199, "grad_norm": 0.5959522652731278, "learning_rate": 3.711516254740799e-07, "loss": 0.2723, "step": 35419 }, { "epoch": 1.6592495432613483, "grad_norm": 0.5769301458117054, "learning_rate": 3.7105219979055503e-07, "loss": 0.2737, "step": 35420 }, { "epoch": 1.6592963882512766, "grad_norm": 0.5816595018966849, "learning_rate": 3.7095278635859046e-07, "loss": 0.2692, "step": 35421 }, { "epoch": 1.6593432332412048, "grad_norm": 0.585039069040915, "learning_rate": 3.7085338517875684e-07, "loss": 0.254, "step": 35422 }, { "epoch": 1.6593900782311333, "grad_norm": 0.6048751208517334, "learning_rate": 3.7075399625162686e-07, "loss": 0.2818, "step": 35423 }, { "epoch": 1.6594369232210615, "grad_norm": 0.5889575427439512, "learning_rate": 3.7065461957777216e-07, "loss": 0.2741, "step": 35424 }, { "epoch": 1.6594837682109898, "grad_norm": 0.5535435692085082, "learning_rate": 3.705552551577654e-07, "loss": 0.2597, "step": 35425 }, { "epoch": 1.6595306132009182, "grad_norm": 0.6805685694932654, "learning_rate": 3.704559029921781e-07, "loss": 0.2906, "step": 35426 }, { "epoch": 1.6595774581908465, "grad_norm": 0.6207733583645243, "learning_rate": 3.7035656308158194e-07, "loss": 0.2811, "step": 35427 }, { "epoch": 1.6596243031807747, "grad_norm": 0.5684855165259903, "learning_rate": 3.702572354265482e-07, "loss": 0.2766, "step": 35428 }, { "epoch": 1.6596711481707032, "grad_norm": 0.5537921953749284, "learning_rate": 3.7015792002764837e-07, "loss": 0.2869, "step": 35429 }, { "epoch": 1.6597179931606316, "grad_norm": 0.6072242599336771, "learning_rate": 3.7005861688545476e-07, "loss": 0.2704, "step": 35430 }, { "epoch": 1.6597648381505596, "grad_norm": 0.5997446773820836, "learning_rate": 3.6995932600053824e-07, "loss": 0.2677, "step": 35431 }, { "epoch": 1.659811683140488, "grad_norm": 0.5837761064031987, "learning_rate": 3.698600473734712e-07, "loss": 0.2687, "step": 35432 }, { "epoch": 1.6598585281304166, "grad_norm": 0.5813049954893571, "learning_rate": 3.6976078100482345e-07, "loss": 0.2691, "step": 35433 }, { "epoch": 1.6599053731203448, "grad_norm": 0.5635128293697305, "learning_rate": 3.696615268951675e-07, "loss": 0.2656, "step": 35434 }, { "epoch": 1.659952218110273, "grad_norm": 0.6148232137362896, "learning_rate": 3.6956228504507365e-07, "loss": 0.277, "step": 35435 }, { "epoch": 1.6599990631002015, "grad_norm": 0.5388318479492977, "learning_rate": 3.694630554551132e-07, "loss": 0.2533, "step": 35436 }, { "epoch": 1.6600459080901298, "grad_norm": 0.594335487537456, "learning_rate": 3.693638381258577e-07, "loss": 0.2771, "step": 35437 }, { "epoch": 1.660092753080058, "grad_norm": 0.655056182949989, "learning_rate": 3.6926463305787753e-07, "loss": 0.2765, "step": 35438 }, { "epoch": 1.6601395980699865, "grad_norm": 0.5930461817467947, "learning_rate": 3.6916544025174443e-07, "loss": 0.2653, "step": 35439 }, { "epoch": 1.6601864430599147, "grad_norm": 0.5814737873790109, "learning_rate": 3.690662597080286e-07, "loss": 0.2697, "step": 35440 }, { "epoch": 1.660233288049843, "grad_norm": 0.6024600192035857, "learning_rate": 3.689670914273005e-07, "loss": 0.2725, "step": 35441 }, { "epoch": 1.6602801330397714, "grad_norm": 0.5733918848528521, "learning_rate": 3.68867935410131e-07, "loss": 0.2584, "step": 35442 }, { "epoch": 1.6603269780296999, "grad_norm": 0.5721297084175097, "learning_rate": 3.687687916570909e-07, "loss": 0.269, "step": 35443 }, { "epoch": 1.660373823019628, "grad_norm": 0.5538627368043814, "learning_rate": 3.686696601687509e-07, "loss": 0.2455, "step": 35444 }, { "epoch": 1.6604206680095563, "grad_norm": 0.583115788954521, "learning_rate": 3.685705409456816e-07, "loss": 0.275, "step": 35445 }, { "epoch": 1.6604675129994848, "grad_norm": 0.6669324108687696, "learning_rate": 3.684714339884529e-07, "loss": 0.2653, "step": 35446 }, { "epoch": 1.660514357989413, "grad_norm": 0.602568366922815, "learning_rate": 3.683723392976357e-07, "loss": 0.2678, "step": 35447 }, { "epoch": 1.6605612029793413, "grad_norm": 0.5955827027203164, "learning_rate": 3.682732568737993e-07, "loss": 0.2576, "step": 35448 }, { "epoch": 1.6606080479692698, "grad_norm": 0.6083345196573404, "learning_rate": 3.681741867175148e-07, "loss": 0.2564, "step": 35449 }, { "epoch": 1.660654892959198, "grad_norm": 0.6048582793062767, "learning_rate": 3.680751288293519e-07, "loss": 0.2797, "step": 35450 }, { "epoch": 1.6607017379491262, "grad_norm": 0.5895239198628535, "learning_rate": 3.679760832098811e-07, "loss": 0.2705, "step": 35451 }, { "epoch": 1.6607485829390547, "grad_norm": 0.6277443863514989, "learning_rate": 3.6787704985967176e-07, "loss": 0.2749, "step": 35452 }, { "epoch": 1.6607954279289832, "grad_norm": 0.5672564634384558, "learning_rate": 3.6777802877929426e-07, "loss": 0.26, "step": 35453 }, { "epoch": 1.6608422729189112, "grad_norm": 0.5843076441714179, "learning_rate": 3.6767901996931863e-07, "loss": 0.2725, "step": 35454 }, { "epoch": 1.6608891179088396, "grad_norm": 0.5931583410779535, "learning_rate": 3.675800234303137e-07, "loss": 0.2703, "step": 35455 }, { "epoch": 1.660935962898768, "grad_norm": 0.576693734074921, "learning_rate": 3.674810391628497e-07, "loss": 0.2718, "step": 35456 }, { "epoch": 1.6609828078886963, "grad_norm": 0.6732200630100033, "learning_rate": 3.6738206716749714e-07, "loss": 0.2891, "step": 35457 }, { "epoch": 1.6610296528786246, "grad_norm": 0.5599866925266577, "learning_rate": 3.672831074448241e-07, "loss": 0.2756, "step": 35458 }, { "epoch": 1.661076497868553, "grad_norm": 0.5851306913345322, "learning_rate": 3.671841599954007e-07, "loss": 0.2615, "step": 35459 }, { "epoch": 1.6611233428584813, "grad_norm": 0.5813578025738033, "learning_rate": 3.6708522481979647e-07, "loss": 0.271, "step": 35460 }, { "epoch": 1.6611701878484095, "grad_norm": 0.5947238772578736, "learning_rate": 3.6698630191858125e-07, "loss": 0.2674, "step": 35461 }, { "epoch": 1.661217032838338, "grad_norm": 0.6240782728859853, "learning_rate": 3.668873912923232e-07, "loss": 0.2728, "step": 35462 }, { "epoch": 1.6612638778282662, "grad_norm": 0.5677675022260164, "learning_rate": 3.6678849294159216e-07, "loss": 0.2546, "step": 35463 }, { "epoch": 1.6613107228181945, "grad_norm": 0.621174832850324, "learning_rate": 3.666896068669576e-07, "loss": 0.2753, "step": 35464 }, { "epoch": 1.661357567808123, "grad_norm": 0.5897881024829856, "learning_rate": 3.6659073306898746e-07, "loss": 0.2737, "step": 35465 }, { "epoch": 1.6614044127980514, "grad_norm": 0.5750133928906984, "learning_rate": 3.664918715482518e-07, "loss": 0.2534, "step": 35466 }, { "epoch": 1.6614512577879794, "grad_norm": 0.5783883717945945, "learning_rate": 3.663930223053189e-07, "loss": 0.2664, "step": 35467 }, { "epoch": 1.6614981027779079, "grad_norm": 0.602303384813075, "learning_rate": 3.662941853407587e-07, "loss": 0.2733, "step": 35468 }, { "epoch": 1.6615449477678363, "grad_norm": 0.6386473577991957, "learning_rate": 3.6619536065513826e-07, "loss": 0.2649, "step": 35469 }, { "epoch": 1.6615917927577646, "grad_norm": 0.6197791459466634, "learning_rate": 3.6609654824902806e-07, "loss": 0.2698, "step": 35470 }, { "epoch": 1.6616386377476928, "grad_norm": 0.5786258582486845, "learning_rate": 3.659977481229951e-07, "loss": 0.2581, "step": 35471 }, { "epoch": 1.6616854827376213, "grad_norm": 0.5774988474897362, "learning_rate": 3.658989602776089e-07, "loss": 0.261, "step": 35472 }, { "epoch": 1.6617323277275495, "grad_norm": 0.6170567591468161, "learning_rate": 3.6580018471343794e-07, "loss": 0.2903, "step": 35473 }, { "epoch": 1.6617791727174778, "grad_norm": 0.5901350859086685, "learning_rate": 3.657014214310503e-07, "loss": 0.273, "step": 35474 }, { "epoch": 1.6618260177074062, "grad_norm": 0.6310979750834479, "learning_rate": 3.6560267043101503e-07, "loss": 0.2724, "step": 35475 }, { "epoch": 1.6618728626973345, "grad_norm": 0.5721577045024935, "learning_rate": 3.655039317139e-07, "loss": 0.2602, "step": 35476 }, { "epoch": 1.6619197076872627, "grad_norm": 0.6272848779503012, "learning_rate": 3.6540520528027284e-07, "loss": 0.2827, "step": 35477 }, { "epoch": 1.6619665526771912, "grad_norm": 0.57798747541122, "learning_rate": 3.653064911307022e-07, "loss": 0.2565, "step": 35478 }, { "epoch": 1.6620133976671196, "grad_norm": 0.5870890010135014, "learning_rate": 3.6520778926575627e-07, "loss": 0.2694, "step": 35479 }, { "epoch": 1.6620602426570479, "grad_norm": 0.6297605772003152, "learning_rate": 3.6510909968600296e-07, "loss": 0.28, "step": 35480 }, { "epoch": 1.6621070876469761, "grad_norm": 0.6081269857430482, "learning_rate": 3.650104223920106e-07, "loss": 0.291, "step": 35481 }, { "epoch": 1.6621539326369046, "grad_norm": 0.6013768106944674, "learning_rate": 3.649117573843461e-07, "loss": 0.2727, "step": 35482 }, { "epoch": 1.6622007776268328, "grad_norm": 0.593002136400352, "learning_rate": 3.6481310466357835e-07, "loss": 0.2651, "step": 35483 }, { "epoch": 1.662247622616761, "grad_norm": 0.5936107625756675, "learning_rate": 3.6471446423027415e-07, "loss": 0.2696, "step": 35484 }, { "epoch": 1.6622944676066895, "grad_norm": 0.6232579641346095, "learning_rate": 3.646158360850013e-07, "loss": 0.2781, "step": 35485 }, { "epoch": 1.6623413125966178, "grad_norm": 0.604738546969848, "learning_rate": 3.6451722022832785e-07, "loss": 0.2668, "step": 35486 }, { "epoch": 1.662388157586546, "grad_norm": 0.5972540253259856, "learning_rate": 3.6441861666082073e-07, "loss": 0.2687, "step": 35487 }, { "epoch": 1.6624350025764745, "grad_norm": 0.5805003595930414, "learning_rate": 3.643200253830487e-07, "loss": 0.2655, "step": 35488 }, { "epoch": 1.662481847566403, "grad_norm": 0.5383566864029278, "learning_rate": 3.642214463955779e-07, "loss": 0.2509, "step": 35489 }, { "epoch": 1.662528692556331, "grad_norm": 0.5720819392250696, "learning_rate": 3.6412287969897516e-07, "loss": 0.268, "step": 35490 }, { "epoch": 1.6625755375462594, "grad_norm": 0.5996922813638239, "learning_rate": 3.640243252938086e-07, "loss": 0.2812, "step": 35491 }, { "epoch": 1.6626223825361879, "grad_norm": 0.6393996194103279, "learning_rate": 3.6392578318064503e-07, "loss": 0.2784, "step": 35492 }, { "epoch": 1.6626692275261161, "grad_norm": 0.5979866309306485, "learning_rate": 3.6382725336005204e-07, "loss": 0.2774, "step": 35493 }, { "epoch": 1.6627160725160444, "grad_norm": 0.5574466263154332, "learning_rate": 3.6372873583259665e-07, "loss": 0.2593, "step": 35494 }, { "epoch": 1.6627629175059728, "grad_norm": 0.5686590550262325, "learning_rate": 3.6363023059884485e-07, "loss": 0.2734, "step": 35495 }, { "epoch": 1.662809762495901, "grad_norm": 0.6156781967460835, "learning_rate": 3.635317376593647e-07, "loss": 0.269, "step": 35496 }, { "epoch": 1.6628566074858293, "grad_norm": 0.5739050074921811, "learning_rate": 3.634332570147217e-07, "loss": 0.2598, "step": 35497 }, { "epoch": 1.6629034524757578, "grad_norm": 0.594320470918168, "learning_rate": 3.633347886654837e-07, "loss": 0.2782, "step": 35498 }, { "epoch": 1.662950297465686, "grad_norm": 0.6391056928819617, "learning_rate": 3.632363326122165e-07, "loss": 0.2799, "step": 35499 }, { "epoch": 1.6629971424556143, "grad_norm": 0.5701091120625847, "learning_rate": 3.6313788885548816e-07, "loss": 0.2533, "step": 35500 }, { "epoch": 1.6630439874455427, "grad_norm": 0.592041070233731, "learning_rate": 3.6303945739586343e-07, "loss": 0.2725, "step": 35501 }, { "epoch": 1.6630908324354712, "grad_norm": 0.605253763589187, "learning_rate": 3.6294103823390937e-07, "loss": 0.2649, "step": 35502 }, { "epoch": 1.6631376774253992, "grad_norm": 0.6456588895495993, "learning_rate": 3.6284263137019326e-07, "loss": 0.2829, "step": 35503 }, { "epoch": 1.6631845224153277, "grad_norm": 0.6038612236154751, "learning_rate": 3.6274423680528e-07, "loss": 0.2842, "step": 35504 }, { "epoch": 1.6632313674052561, "grad_norm": 0.588885815464342, "learning_rate": 3.6264585453973657e-07, "loss": 0.2616, "step": 35505 }, { "epoch": 1.6632782123951844, "grad_norm": 0.6005091741818319, "learning_rate": 3.6254748457412947e-07, "loss": 0.287, "step": 35506 }, { "epoch": 1.6633250573851126, "grad_norm": 0.5765593663987593, "learning_rate": 3.624491269090238e-07, "loss": 0.2623, "step": 35507 }, { "epoch": 1.663371902375041, "grad_norm": 0.6246220510383524, "learning_rate": 3.6235078154498644e-07, "loss": 0.262, "step": 35508 }, { "epoch": 1.6634187473649693, "grad_norm": 0.6093166359880178, "learning_rate": 3.622524484825826e-07, "loss": 0.2751, "step": 35509 }, { "epoch": 1.6634655923548975, "grad_norm": 0.5823421405005209, "learning_rate": 3.6215412772237944e-07, "loss": 0.2641, "step": 35510 }, { "epoch": 1.663512437344826, "grad_norm": 0.6536577532932503, "learning_rate": 3.620558192649415e-07, "loss": 0.2789, "step": 35511 }, { "epoch": 1.6635592823347543, "grad_norm": 0.5831409969535462, "learning_rate": 3.619575231108355e-07, "loss": 0.2716, "step": 35512 }, { "epoch": 1.6636061273246825, "grad_norm": 0.571926798407001, "learning_rate": 3.618592392606257e-07, "loss": 0.26, "step": 35513 }, { "epoch": 1.663652972314611, "grad_norm": 0.5808904427947077, "learning_rate": 3.6176096771487896e-07, "loss": 0.265, "step": 35514 }, { "epoch": 1.6636998173045394, "grad_norm": 0.5694531429913187, "learning_rate": 3.6166270847416004e-07, "loss": 0.2732, "step": 35515 }, { "epoch": 1.6637466622944677, "grad_norm": 0.5945467388333644, "learning_rate": 3.6156446153903514e-07, "loss": 0.2756, "step": 35516 }, { "epoch": 1.663793507284396, "grad_norm": 0.6241578467557992, "learning_rate": 3.6146622691006966e-07, "loss": 0.2665, "step": 35517 }, { "epoch": 1.6638403522743244, "grad_norm": 0.6107439007177471, "learning_rate": 3.613680045878287e-07, "loss": 0.271, "step": 35518 }, { "epoch": 1.6638871972642526, "grad_norm": 0.6526968825148783, "learning_rate": 3.612697945728766e-07, "loss": 0.2881, "step": 35519 }, { "epoch": 1.6639340422541808, "grad_norm": 0.6147698846284263, "learning_rate": 3.6117159686577944e-07, "loss": 0.2833, "step": 35520 }, { "epoch": 1.6639808872441093, "grad_norm": 0.5779057318033659, "learning_rate": 3.610734114671019e-07, "loss": 0.256, "step": 35521 }, { "epoch": 1.6640277322340375, "grad_norm": 0.6532729881936319, "learning_rate": 3.6097523837740954e-07, "loss": 0.2883, "step": 35522 }, { "epoch": 1.6640745772239658, "grad_norm": 0.6261490767541463, "learning_rate": 3.60877077597267e-07, "loss": 0.2792, "step": 35523 }, { "epoch": 1.6641214222138943, "grad_norm": 0.5892313995290297, "learning_rate": 3.607789291272398e-07, "loss": 0.2588, "step": 35524 }, { "epoch": 1.6641682672038227, "grad_norm": 0.5972102355982861, "learning_rate": 3.606807929678921e-07, "loss": 0.2703, "step": 35525 }, { "epoch": 1.6642151121937507, "grad_norm": 0.6025146426550042, "learning_rate": 3.6058266911978836e-07, "loss": 0.2698, "step": 35526 }, { "epoch": 1.6642619571836792, "grad_norm": 0.6289898159849303, "learning_rate": 3.604845575834934e-07, "loss": 0.2759, "step": 35527 }, { "epoch": 1.6643088021736077, "grad_norm": 0.5482012584990557, "learning_rate": 3.603864583595723e-07, "loss": 0.2526, "step": 35528 }, { "epoch": 1.664355647163536, "grad_norm": 0.615035152143419, "learning_rate": 3.6028837144858915e-07, "loss": 0.2776, "step": 35529 }, { "epoch": 1.6644024921534641, "grad_norm": 0.5882094866284258, "learning_rate": 3.6019029685110925e-07, "loss": 0.2858, "step": 35530 }, { "epoch": 1.6644493371433926, "grad_norm": 0.5761083362598147, "learning_rate": 3.6009223456769577e-07, "loss": 0.2751, "step": 35531 }, { "epoch": 1.6644961821333208, "grad_norm": 0.6097334596010258, "learning_rate": 3.599941845989144e-07, "loss": 0.2715, "step": 35532 }, { "epoch": 1.664543027123249, "grad_norm": 0.569031426985242, "learning_rate": 3.59896146945328e-07, "loss": 0.2624, "step": 35533 }, { "epoch": 1.6645898721131775, "grad_norm": 0.6402864360157121, "learning_rate": 3.5979812160750113e-07, "loss": 0.2796, "step": 35534 }, { "epoch": 1.6646367171031058, "grad_norm": 0.6077397488443866, "learning_rate": 3.597001085859983e-07, "loss": 0.2733, "step": 35535 }, { "epoch": 1.664683562093034, "grad_norm": 0.6325208396171202, "learning_rate": 3.5960210788138356e-07, "loss": 0.2745, "step": 35536 }, { "epoch": 1.6647304070829625, "grad_norm": 0.6230540426191531, "learning_rate": 3.5950411949422114e-07, "loss": 0.2836, "step": 35537 }, { "epoch": 1.664777252072891, "grad_norm": 0.5752441458685343, "learning_rate": 3.5940614342507454e-07, "loss": 0.2617, "step": 35538 }, { "epoch": 1.664824097062819, "grad_norm": 0.6149025235362475, "learning_rate": 3.5930817967450707e-07, "loss": 0.2882, "step": 35539 }, { "epoch": 1.6648709420527474, "grad_norm": 0.576177678626939, "learning_rate": 3.5921022824308287e-07, "loss": 0.2564, "step": 35540 }, { "epoch": 1.664917787042676, "grad_norm": 0.6215393176943067, "learning_rate": 3.591122891313656e-07, "loss": 0.276, "step": 35541 }, { "epoch": 1.6649646320326041, "grad_norm": 0.6151818749418081, "learning_rate": 3.590143623399192e-07, "loss": 0.2749, "step": 35542 }, { "epoch": 1.6650114770225324, "grad_norm": 0.5695700502130384, "learning_rate": 3.5891644786930776e-07, "loss": 0.2443, "step": 35543 }, { "epoch": 1.6650583220124608, "grad_norm": 0.5920416792153337, "learning_rate": 3.5881854572009325e-07, "loss": 0.2658, "step": 35544 }, { "epoch": 1.665105167002389, "grad_norm": 0.5779524772654353, "learning_rate": 3.587206558928405e-07, "loss": 0.2743, "step": 35545 }, { "epoch": 1.6651520119923173, "grad_norm": 0.5694686856676926, "learning_rate": 3.586227783881116e-07, "loss": 0.2619, "step": 35546 }, { "epoch": 1.6651988569822458, "grad_norm": 0.662345941327054, "learning_rate": 3.5852491320647025e-07, "loss": 0.2845, "step": 35547 }, { "epoch": 1.665245701972174, "grad_norm": 0.5547849538848308, "learning_rate": 3.584270603484799e-07, "loss": 0.2712, "step": 35548 }, { "epoch": 1.6652925469621023, "grad_norm": 0.5750173505378431, "learning_rate": 3.5832921981470396e-07, "loss": 0.2632, "step": 35549 }, { "epoch": 1.6653393919520307, "grad_norm": 0.6017074141919346, "learning_rate": 3.582313916057048e-07, "loss": 0.2593, "step": 35550 }, { "epoch": 1.6653862369419592, "grad_norm": 0.593118531642266, "learning_rate": 3.5813357572204555e-07, "loss": 0.2677, "step": 35551 }, { "epoch": 1.6654330819318874, "grad_norm": 0.6071277910470746, "learning_rate": 3.580357721642896e-07, "loss": 0.2583, "step": 35552 }, { "epoch": 1.6654799269218157, "grad_norm": 0.6183211742168895, "learning_rate": 3.5793798093299913e-07, "loss": 0.2804, "step": 35553 }, { "epoch": 1.6655267719117441, "grad_norm": 0.6333700480869578, "learning_rate": 3.578402020287372e-07, "loss": 0.2904, "step": 35554 }, { "epoch": 1.6655736169016724, "grad_norm": 0.5741476542889771, "learning_rate": 3.57742435452067e-07, "loss": 0.2542, "step": 35555 }, { "epoch": 1.6656204618916006, "grad_norm": 0.6183481214286597, "learning_rate": 3.5764468120355e-07, "loss": 0.2911, "step": 35556 }, { "epoch": 1.665667306881529, "grad_norm": 0.5524708509835917, "learning_rate": 3.575469392837494e-07, "loss": 0.2509, "step": 35557 }, { "epoch": 1.6657141518714573, "grad_norm": 0.5564341151753407, "learning_rate": 3.5744920969322776e-07, "loss": 0.2491, "step": 35558 }, { "epoch": 1.6657609968613856, "grad_norm": 0.6459528451568556, "learning_rate": 3.573514924325477e-07, "loss": 0.2734, "step": 35559 }, { "epoch": 1.665807841851314, "grad_norm": 0.6168566176173238, "learning_rate": 3.572537875022711e-07, "loss": 0.2653, "step": 35560 }, { "epoch": 1.6658546868412425, "grad_norm": 0.590496885003506, "learning_rate": 3.5715609490296065e-07, "loss": 0.2781, "step": 35561 }, { "epoch": 1.6659015318311705, "grad_norm": 0.6053208851668135, "learning_rate": 3.57058414635178e-07, "loss": 0.2683, "step": 35562 }, { "epoch": 1.665948376821099, "grad_norm": 0.584625124445643, "learning_rate": 3.5696074669948536e-07, "loss": 0.2474, "step": 35563 }, { "epoch": 1.6659952218110274, "grad_norm": 0.6049651399131494, "learning_rate": 3.568630910964452e-07, "loss": 0.2761, "step": 35564 }, { "epoch": 1.6660420668009557, "grad_norm": 0.6192502886453387, "learning_rate": 3.5676544782661886e-07, "loss": 0.2888, "step": 35565 }, { "epoch": 1.666088911790884, "grad_norm": 0.6130831489929085, "learning_rate": 3.5666781689056964e-07, "loss": 0.2627, "step": 35566 }, { "epoch": 1.6661357567808124, "grad_norm": 0.5557487880897422, "learning_rate": 3.5657019828885835e-07, "loss": 0.2582, "step": 35567 }, { "epoch": 1.6661826017707406, "grad_norm": 0.5907969961583835, "learning_rate": 3.564725920220463e-07, "loss": 0.2683, "step": 35568 }, { "epoch": 1.6662294467606689, "grad_norm": 0.6154918221061838, "learning_rate": 3.5637499809069556e-07, "loss": 0.2699, "step": 35569 }, { "epoch": 1.6662762917505973, "grad_norm": 0.6430568002702106, "learning_rate": 3.5627741649536806e-07, "loss": 0.2785, "step": 35570 }, { "epoch": 1.6663231367405256, "grad_norm": 0.5803608167082216, "learning_rate": 3.561798472366254e-07, "loss": 0.2702, "step": 35571 }, { "epoch": 1.6663699817304538, "grad_norm": 0.6120121384473404, "learning_rate": 3.5608229031502907e-07, "loss": 0.2747, "step": 35572 }, { "epoch": 1.6664168267203823, "grad_norm": 0.6243331532970782, "learning_rate": 3.5598474573113994e-07, "loss": 0.2725, "step": 35573 }, { "epoch": 1.6664636717103107, "grad_norm": 0.5814930184961047, "learning_rate": 3.558872134855204e-07, "loss": 0.2713, "step": 35574 }, { "epoch": 1.6665105167002388, "grad_norm": 0.6115672532641627, "learning_rate": 3.5578969357873027e-07, "loss": 0.2783, "step": 35575 }, { "epoch": 1.6665573616901672, "grad_norm": 0.6193786834431213, "learning_rate": 3.5569218601133157e-07, "loss": 0.2835, "step": 35576 }, { "epoch": 1.6666042066800957, "grad_norm": 0.5969522926425674, "learning_rate": 3.5559469078388557e-07, "loss": 0.2771, "step": 35577 }, { "epoch": 1.666651051670024, "grad_norm": 0.6179966886459293, "learning_rate": 3.554972078969529e-07, "loss": 0.2646, "step": 35578 }, { "epoch": 1.6666978966599522, "grad_norm": 0.5987497042517627, "learning_rate": 3.553997373510956e-07, "loss": 0.27, "step": 35579 }, { "epoch": 1.6667447416498806, "grad_norm": 0.6053946515312013, "learning_rate": 3.553022791468735e-07, "loss": 0.2715, "step": 35580 }, { "epoch": 1.6667915866398089, "grad_norm": 0.5913719136138087, "learning_rate": 3.5520483328484735e-07, "loss": 0.2684, "step": 35581 }, { "epoch": 1.666838431629737, "grad_norm": 0.6194416633257899, "learning_rate": 3.5510739976557823e-07, "loss": 0.2723, "step": 35582 }, { "epoch": 1.6668852766196656, "grad_norm": 0.5831187039839729, "learning_rate": 3.550099785896272e-07, "loss": 0.2677, "step": 35583 }, { "epoch": 1.6669321216095938, "grad_norm": 0.6286629747154524, "learning_rate": 3.5491256975755423e-07, "loss": 0.2748, "step": 35584 }, { "epoch": 1.666978966599522, "grad_norm": 0.6040580998873841, "learning_rate": 3.548151732699212e-07, "loss": 0.2727, "step": 35585 }, { "epoch": 1.6670258115894505, "grad_norm": 0.6683243226955334, "learning_rate": 3.5471778912728683e-07, "loss": 0.2917, "step": 35586 }, { "epoch": 1.667072656579379, "grad_norm": 0.5934094118077973, "learning_rate": 3.546204173302131e-07, "loss": 0.2664, "step": 35587 }, { "epoch": 1.6671195015693072, "grad_norm": 0.6077953196299646, "learning_rate": 3.545230578792591e-07, "loss": 0.2653, "step": 35588 }, { "epoch": 1.6671663465592355, "grad_norm": 0.5801655779185663, "learning_rate": 3.544257107749854e-07, "loss": 0.2681, "step": 35589 }, { "epoch": 1.667213191549164, "grad_norm": 0.6027152038514262, "learning_rate": 3.5432837601795277e-07, "loss": 0.2614, "step": 35590 }, { "epoch": 1.6672600365390922, "grad_norm": 0.5733722971729518, "learning_rate": 3.542310536087215e-07, "loss": 0.2617, "step": 35591 }, { "epoch": 1.6673068815290204, "grad_norm": 0.6034007149589833, "learning_rate": 3.541337435478506e-07, "loss": 0.2767, "step": 35592 }, { "epoch": 1.6673537265189489, "grad_norm": 0.6502134595451922, "learning_rate": 3.5403644583590053e-07, "loss": 0.2868, "step": 35593 }, { "epoch": 1.667400571508877, "grad_norm": 0.5818940271326754, "learning_rate": 3.5393916047343215e-07, "loss": 0.2579, "step": 35594 }, { "epoch": 1.6674474164988053, "grad_norm": 0.5982923312697369, "learning_rate": 3.538418874610039e-07, "loss": 0.279, "step": 35595 }, { "epoch": 1.6674942614887338, "grad_norm": 0.5842969331909175, "learning_rate": 3.537446267991762e-07, "loss": 0.28, "step": 35596 }, { "epoch": 1.6675411064786623, "grad_norm": 0.5911047768356972, "learning_rate": 3.5364737848850916e-07, "loss": 0.2934, "step": 35597 }, { "epoch": 1.6675879514685903, "grad_norm": 0.6071187186092494, "learning_rate": 3.535501425295615e-07, "loss": 0.2784, "step": 35598 }, { "epoch": 1.6676347964585188, "grad_norm": 0.567493217771707, "learning_rate": 3.534529189228933e-07, "loss": 0.2487, "step": 35599 }, { "epoch": 1.6676816414484472, "grad_norm": 0.6452635453089655, "learning_rate": 3.5335570766906383e-07, "loss": 0.2813, "step": 35600 }, { "epoch": 1.6677284864383755, "grad_norm": 0.5862532212116226, "learning_rate": 3.5325850876863355e-07, "loss": 0.2682, "step": 35601 }, { "epoch": 1.6677753314283037, "grad_norm": 0.620691849923157, "learning_rate": 3.5316132222216054e-07, "loss": 0.2766, "step": 35602 }, { "epoch": 1.6678221764182322, "grad_norm": 0.6248193787224321, "learning_rate": 3.530641480302041e-07, "loss": 0.2865, "step": 35603 }, { "epoch": 1.6678690214081604, "grad_norm": 0.6450734533769444, "learning_rate": 3.5296698619332494e-07, "loss": 0.2777, "step": 35604 }, { "epoch": 1.6679158663980886, "grad_norm": 0.5775899178812035, "learning_rate": 3.5286983671208e-07, "loss": 0.2657, "step": 35605 }, { "epoch": 1.667962711388017, "grad_norm": 0.6492914951026615, "learning_rate": 3.527726995870301e-07, "loss": 0.2674, "step": 35606 }, { "epoch": 1.6680095563779453, "grad_norm": 0.6105077508936726, "learning_rate": 3.5267557481873325e-07, "loss": 0.2703, "step": 35607 }, { "epoch": 1.6680564013678736, "grad_norm": 0.6394632184920849, "learning_rate": 3.525784624077494e-07, "loss": 0.3031, "step": 35608 }, { "epoch": 1.668103246357802, "grad_norm": 0.593059519772336, "learning_rate": 3.524813623546361e-07, "loss": 0.2634, "step": 35609 }, { "epoch": 1.6681500913477305, "grad_norm": 0.5987752611030298, "learning_rate": 3.5238427465995347e-07, "loss": 0.2631, "step": 35610 }, { "epoch": 1.6681969363376585, "grad_norm": 0.5888450197119888, "learning_rate": 3.522871993242591e-07, "loss": 0.259, "step": 35611 }, { "epoch": 1.668243781327587, "grad_norm": 0.6009524787621175, "learning_rate": 3.52190136348112e-07, "loss": 0.2755, "step": 35612 }, { "epoch": 1.6682906263175155, "grad_norm": 0.6244530418840355, "learning_rate": 3.520930857320706e-07, "loss": 0.2859, "step": 35613 }, { "epoch": 1.6683374713074437, "grad_norm": 0.6077476830348257, "learning_rate": 3.51996047476694e-07, "loss": 0.2768, "step": 35614 }, { "epoch": 1.668384316297372, "grad_norm": 0.5604192946897972, "learning_rate": 3.5189902158254073e-07, "loss": 0.2496, "step": 35615 }, { "epoch": 1.6684311612873004, "grad_norm": 0.5934329854206547, "learning_rate": 3.518020080501686e-07, "loss": 0.2526, "step": 35616 }, { "epoch": 1.6684780062772286, "grad_norm": 0.6127478175588523, "learning_rate": 3.517050068801353e-07, "loss": 0.2654, "step": 35617 }, { "epoch": 1.6685248512671569, "grad_norm": 0.6114975434652963, "learning_rate": 3.5160801807299997e-07, "loss": 0.2706, "step": 35618 }, { "epoch": 1.6685716962570853, "grad_norm": 0.6086100958333406, "learning_rate": 3.5151104162932046e-07, "loss": 0.2804, "step": 35619 }, { "epoch": 1.6686185412470136, "grad_norm": 0.5763542384742505, "learning_rate": 3.5141407754965494e-07, "loss": 0.283, "step": 35620 }, { "epoch": 1.6686653862369418, "grad_norm": 0.5887874768130659, "learning_rate": 3.5131712583456187e-07, "loss": 0.2678, "step": 35621 }, { "epoch": 1.6687122312268703, "grad_norm": 0.574949132902316, "learning_rate": 3.5122018648459796e-07, "loss": 0.2619, "step": 35622 }, { "epoch": 1.6687590762167988, "grad_norm": 0.598600293410417, "learning_rate": 3.511232595003228e-07, "loss": 0.2784, "step": 35623 }, { "epoch": 1.668805921206727, "grad_norm": 0.5431976930676137, "learning_rate": 3.510263448822923e-07, "loss": 0.2546, "step": 35624 }, { "epoch": 1.6688527661966552, "grad_norm": 0.5590591732307628, "learning_rate": 3.50929442631065e-07, "loss": 0.2632, "step": 35625 }, { "epoch": 1.6688996111865837, "grad_norm": 0.6039594389001618, "learning_rate": 3.508325527471987e-07, "loss": 0.2886, "step": 35626 }, { "epoch": 1.668946456176512, "grad_norm": 0.5645399350749923, "learning_rate": 3.50735675231251e-07, "loss": 0.237, "step": 35627 }, { "epoch": 1.6689933011664402, "grad_norm": 0.5912076154221936, "learning_rate": 3.506388100837796e-07, "loss": 0.2743, "step": 35628 }, { "epoch": 1.6690401461563686, "grad_norm": 0.5852152003583491, "learning_rate": 3.50541957305342e-07, "loss": 0.2567, "step": 35629 }, { "epoch": 1.6690869911462969, "grad_norm": 0.5730988872819339, "learning_rate": 3.5044511689649425e-07, "loss": 0.2815, "step": 35630 }, { "epoch": 1.6691338361362251, "grad_norm": 0.5929924657097914, "learning_rate": 3.503482888577947e-07, "loss": 0.2578, "step": 35631 }, { "epoch": 1.6691806811261536, "grad_norm": 0.6093088067473852, "learning_rate": 3.5025147318980076e-07, "loss": 0.2683, "step": 35632 }, { "epoch": 1.669227526116082, "grad_norm": 0.6281743114553553, "learning_rate": 3.5015466989306886e-07, "loss": 0.2919, "step": 35633 }, { "epoch": 1.66927437110601, "grad_norm": 0.5690963224008279, "learning_rate": 3.500578789681572e-07, "loss": 0.2594, "step": 35634 }, { "epoch": 1.6693212160959385, "grad_norm": 0.6499467474386526, "learning_rate": 3.499611004156217e-07, "loss": 0.2646, "step": 35635 }, { "epoch": 1.669368061085867, "grad_norm": 0.6269518032950075, "learning_rate": 3.4986433423602e-07, "loss": 0.2734, "step": 35636 }, { "epoch": 1.6694149060757952, "grad_norm": 0.582277108655193, "learning_rate": 3.49767580429908e-07, "loss": 0.2558, "step": 35637 }, { "epoch": 1.6694617510657235, "grad_norm": 0.6228748555485407, "learning_rate": 3.496708389978434e-07, "loss": 0.2738, "step": 35638 }, { "epoch": 1.669508596055652, "grad_norm": 0.653140168948152, "learning_rate": 3.495741099403824e-07, "loss": 0.2929, "step": 35639 }, { "epoch": 1.6695554410455802, "grad_norm": 0.6247042979258073, "learning_rate": 3.494773932580828e-07, "loss": 0.2959, "step": 35640 }, { "epoch": 1.6696022860355084, "grad_norm": 0.5880851244944942, "learning_rate": 3.4938068895149946e-07, "loss": 0.267, "step": 35641 }, { "epoch": 1.6696491310254369, "grad_norm": 0.6698018624342081, "learning_rate": 3.4928399702118976e-07, "loss": 0.3005, "step": 35642 }, { "epoch": 1.6696959760153651, "grad_norm": 0.6319796912623373, "learning_rate": 3.491873174677107e-07, "loss": 0.2859, "step": 35643 }, { "epoch": 1.6697428210052934, "grad_norm": 0.6189152188111471, "learning_rate": 3.490906502916175e-07, "loss": 0.2822, "step": 35644 }, { "epoch": 1.6697896659952218, "grad_norm": 0.546419541385114, "learning_rate": 3.489939954934668e-07, "loss": 0.2583, "step": 35645 }, { "epoch": 1.6698365109851503, "grad_norm": 0.6356183768769671, "learning_rate": 3.48897353073816e-07, "loss": 0.2825, "step": 35646 }, { "epoch": 1.6698833559750783, "grad_norm": 0.6201564976022025, "learning_rate": 3.488007230332194e-07, "loss": 0.2857, "step": 35647 }, { "epoch": 1.6699302009650068, "grad_norm": 0.5772474308591246, "learning_rate": 3.48704105372234e-07, "loss": 0.2645, "step": 35648 }, { "epoch": 1.6699770459549352, "grad_norm": 0.5949397752783853, "learning_rate": 3.4860750009141575e-07, "loss": 0.2814, "step": 35649 }, { "epoch": 1.6700238909448635, "grad_norm": 0.5751154474152379, "learning_rate": 3.485109071913212e-07, "loss": 0.2644, "step": 35650 }, { "epoch": 1.6700707359347917, "grad_norm": 0.6122167568994964, "learning_rate": 3.4841432667250515e-07, "loss": 0.2627, "step": 35651 }, { "epoch": 1.6701175809247202, "grad_norm": 0.5741952054929004, "learning_rate": 3.483177585355241e-07, "loss": 0.2625, "step": 35652 }, { "epoch": 1.6701644259146484, "grad_norm": 0.6422589364082355, "learning_rate": 3.482212027809334e-07, "loss": 0.2895, "step": 35653 }, { "epoch": 1.6702112709045767, "grad_norm": 0.6231329514347103, "learning_rate": 3.481246594092885e-07, "loss": 0.2741, "step": 35654 }, { "epoch": 1.6702581158945051, "grad_norm": 0.6118272743297252, "learning_rate": 3.4802812842114564e-07, "loss": 0.2673, "step": 35655 }, { "epoch": 1.6703049608844334, "grad_norm": 0.5840389000740692, "learning_rate": 3.4793160981705987e-07, "loss": 0.2717, "step": 35656 }, { "epoch": 1.6703518058743616, "grad_norm": 0.6003923213930458, "learning_rate": 3.4783510359758746e-07, "loss": 0.2652, "step": 35657 }, { "epoch": 1.67039865086429, "grad_norm": 0.6476832505060073, "learning_rate": 3.477386097632829e-07, "loss": 0.285, "step": 35658 }, { "epoch": 1.6704454958542185, "grad_norm": 0.6257744592984854, "learning_rate": 3.4764212831470106e-07, "loss": 0.2704, "step": 35659 }, { "epoch": 1.6704923408441468, "grad_norm": 0.577064398799492, "learning_rate": 3.475456592523982e-07, "loss": 0.2671, "step": 35660 }, { "epoch": 1.670539185834075, "grad_norm": 0.6033815121515899, "learning_rate": 3.474492025769288e-07, "loss": 0.2762, "step": 35661 }, { "epoch": 1.6705860308240035, "grad_norm": 0.6071814755630202, "learning_rate": 3.473527582888481e-07, "loss": 0.2593, "step": 35662 }, { "epoch": 1.6706328758139317, "grad_norm": 0.5807766792520492, "learning_rate": 3.472563263887116e-07, "loss": 0.2681, "step": 35663 }, { "epoch": 1.67067972080386, "grad_norm": 0.5829718595443307, "learning_rate": 3.4715990687707395e-07, "loss": 0.2601, "step": 35664 }, { "epoch": 1.6707265657937884, "grad_norm": 0.5737814365499159, "learning_rate": 3.470634997544903e-07, "loss": 0.2701, "step": 35665 }, { "epoch": 1.6707734107837167, "grad_norm": 0.612332184637012, "learning_rate": 3.4696710502151455e-07, "loss": 0.2713, "step": 35666 }, { "epoch": 1.670820255773645, "grad_norm": 0.5772655257449429, "learning_rate": 3.468707226787019e-07, "loss": 0.266, "step": 35667 }, { "epoch": 1.6708671007635734, "grad_norm": 0.6288051502416407, "learning_rate": 3.467743527266068e-07, "loss": 0.2845, "step": 35668 }, { "epoch": 1.6709139457535018, "grad_norm": 0.6000921391963773, "learning_rate": 3.466779951657845e-07, "loss": 0.2575, "step": 35669 }, { "epoch": 1.6709607907434298, "grad_norm": 0.6549574611709786, "learning_rate": 3.4658164999678945e-07, "loss": 0.2661, "step": 35670 }, { "epoch": 1.6710076357333583, "grad_norm": 0.6037902097314463, "learning_rate": 3.4648531722017513e-07, "loss": 0.2669, "step": 35671 }, { "epoch": 1.6710544807232868, "grad_norm": 0.6279345450912326, "learning_rate": 3.463889968364972e-07, "loss": 0.2738, "step": 35672 }, { "epoch": 1.671101325713215, "grad_norm": 0.5645826577327413, "learning_rate": 3.462926888463089e-07, "loss": 0.2539, "step": 35673 }, { "epoch": 1.6711481707031433, "grad_norm": 0.6121896894432639, "learning_rate": 3.461963932501647e-07, "loss": 0.2695, "step": 35674 }, { "epoch": 1.6711950156930717, "grad_norm": 0.5883550998704712, "learning_rate": 3.461001100486189e-07, "loss": 0.2682, "step": 35675 }, { "epoch": 1.671241860683, "grad_norm": 0.6188815226341997, "learning_rate": 3.4600383924222575e-07, "loss": 0.2837, "step": 35676 }, { "epoch": 1.6712887056729282, "grad_norm": 0.5891161820362327, "learning_rate": 3.4590758083153956e-07, "loss": 0.2621, "step": 35677 }, { "epoch": 1.6713355506628567, "grad_norm": 0.6284751788049957, "learning_rate": 3.458113348171138e-07, "loss": 0.2764, "step": 35678 }, { "epoch": 1.671382395652785, "grad_norm": 0.6300502923469673, "learning_rate": 3.457151011995019e-07, "loss": 0.27, "step": 35679 }, { "epoch": 1.6714292406427131, "grad_norm": 0.6484530591373916, "learning_rate": 3.4561887997925813e-07, "loss": 0.2664, "step": 35680 }, { "epoch": 1.6714760856326416, "grad_norm": 0.5421606444281526, "learning_rate": 3.4552267115693624e-07, "loss": 0.2711, "step": 35681 }, { "epoch": 1.67152293062257, "grad_norm": 0.5551744119264831, "learning_rate": 3.454264747330899e-07, "loss": 0.2508, "step": 35682 }, { "epoch": 1.671569775612498, "grad_norm": 0.5863087310268648, "learning_rate": 3.453302907082731e-07, "loss": 0.255, "step": 35683 }, { "epoch": 1.6716166206024266, "grad_norm": 0.6107595015811206, "learning_rate": 3.4523411908303855e-07, "loss": 0.2763, "step": 35684 }, { "epoch": 1.671663465592355, "grad_norm": 0.5953096079963826, "learning_rate": 3.4513795985794044e-07, "loss": 0.2601, "step": 35685 }, { "epoch": 1.6717103105822833, "grad_norm": 0.6142720661524396, "learning_rate": 3.4504181303353116e-07, "loss": 0.2485, "step": 35686 }, { "epoch": 1.6717571555722115, "grad_norm": 0.5905953653918071, "learning_rate": 3.449456786103647e-07, "loss": 0.2723, "step": 35687 }, { "epoch": 1.67180400056214, "grad_norm": 0.6863218594872752, "learning_rate": 3.448495565889942e-07, "loss": 0.3056, "step": 35688 }, { "epoch": 1.6718508455520682, "grad_norm": 0.591875428541445, "learning_rate": 3.4475344696997345e-07, "loss": 0.2781, "step": 35689 }, { "epoch": 1.6718976905419964, "grad_norm": 0.6267363706594433, "learning_rate": 3.446573497538544e-07, "loss": 0.2893, "step": 35690 }, { "epoch": 1.671944535531925, "grad_norm": 0.579333393383056, "learning_rate": 3.445612649411903e-07, "loss": 0.2672, "step": 35691 }, { "epoch": 1.6719913805218531, "grad_norm": 0.6133682937682001, "learning_rate": 3.444651925325351e-07, "loss": 0.2807, "step": 35692 }, { "epoch": 1.6720382255117814, "grad_norm": 0.5799465701121046, "learning_rate": 3.4436913252844065e-07, "loss": 0.2724, "step": 35693 }, { "epoch": 1.6720850705017098, "grad_norm": 0.5798036243903487, "learning_rate": 3.442730849294598e-07, "loss": 0.2631, "step": 35694 }, { "epoch": 1.6721319154916383, "grad_norm": 0.6072788812072382, "learning_rate": 3.44177049736146e-07, "loss": 0.2765, "step": 35695 }, { "epoch": 1.6721787604815666, "grad_norm": 0.5530062523774661, "learning_rate": 3.4408102694905104e-07, "loss": 0.2579, "step": 35696 }, { "epoch": 1.6722256054714948, "grad_norm": 0.6149271361892575, "learning_rate": 3.439850165687278e-07, "loss": 0.2599, "step": 35697 }, { "epoch": 1.6722724504614233, "grad_norm": 0.6198464235637006, "learning_rate": 3.438890185957289e-07, "loss": 0.2738, "step": 35698 }, { "epoch": 1.6723192954513515, "grad_norm": 0.6092007896816923, "learning_rate": 3.437930330306072e-07, "loss": 0.2639, "step": 35699 }, { "epoch": 1.6723661404412797, "grad_norm": 0.5545172280323474, "learning_rate": 3.4369705987391424e-07, "loss": 0.2532, "step": 35700 }, { "epoch": 1.6724129854312082, "grad_norm": 0.5899490501401125, "learning_rate": 3.436010991262034e-07, "loss": 0.2702, "step": 35701 }, { "epoch": 1.6724598304211364, "grad_norm": 0.6235290520116304, "learning_rate": 3.435051507880255e-07, "loss": 0.2767, "step": 35702 }, { "epoch": 1.6725066754110647, "grad_norm": 0.5625047231867462, "learning_rate": 3.4340921485993356e-07, "loss": 0.2665, "step": 35703 }, { "epoch": 1.6725535204009931, "grad_norm": 0.5727390225562584, "learning_rate": 3.4331329134247945e-07, "loss": 0.2702, "step": 35704 }, { "epoch": 1.6726003653909216, "grad_norm": 0.6088734124359265, "learning_rate": 3.432173802362154e-07, "loss": 0.265, "step": 35705 }, { "epoch": 1.6726472103808496, "grad_norm": 0.5771054741530918, "learning_rate": 3.4312148154169387e-07, "loss": 0.2581, "step": 35706 }, { "epoch": 1.672694055370778, "grad_norm": 0.5644638884316355, "learning_rate": 3.4302559525946576e-07, "loss": 0.2594, "step": 35707 }, { "epoch": 1.6727409003607066, "grad_norm": 0.5731174152147265, "learning_rate": 3.4292972139008314e-07, "loss": 0.249, "step": 35708 }, { "epoch": 1.6727877453506348, "grad_norm": 0.6186733637570404, "learning_rate": 3.428338599340975e-07, "loss": 0.2983, "step": 35709 }, { "epoch": 1.672834590340563, "grad_norm": 0.5899464551342734, "learning_rate": 3.4273801089206084e-07, "loss": 0.2531, "step": 35710 }, { "epoch": 1.6728814353304915, "grad_norm": 0.6253228138899192, "learning_rate": 3.4264217426452474e-07, "loss": 0.2721, "step": 35711 }, { "epoch": 1.6729282803204197, "grad_norm": 0.6154393582997921, "learning_rate": 3.4254635005204123e-07, "loss": 0.28, "step": 35712 }, { "epoch": 1.672975125310348, "grad_norm": 0.6097859504188367, "learning_rate": 3.4245053825516073e-07, "loss": 0.2709, "step": 35713 }, { "epoch": 1.6730219703002764, "grad_norm": 0.6400459691407446, "learning_rate": 3.4235473887443555e-07, "loss": 0.2902, "step": 35714 }, { "epoch": 1.6730688152902047, "grad_norm": 0.6106426638489074, "learning_rate": 3.422589519104161e-07, "loss": 0.2714, "step": 35715 }, { "epoch": 1.673115660280133, "grad_norm": 0.6098239741667411, "learning_rate": 3.421631773636538e-07, "loss": 0.29, "step": 35716 }, { "epoch": 1.6731625052700614, "grad_norm": 0.6407413645464073, "learning_rate": 3.420674152347003e-07, "loss": 0.2784, "step": 35717 }, { "epoch": 1.6732093502599898, "grad_norm": 0.5786087584471835, "learning_rate": 3.419716655241065e-07, "loss": 0.2751, "step": 35718 }, { "epoch": 1.6732561952499179, "grad_norm": 0.6125977230157602, "learning_rate": 3.418759282324238e-07, "loss": 0.2899, "step": 35719 }, { "epoch": 1.6733030402398463, "grad_norm": 0.6122687116676322, "learning_rate": 3.4178020336020275e-07, "loss": 0.2636, "step": 35720 }, { "epoch": 1.6733498852297748, "grad_norm": 0.5910696965082557, "learning_rate": 3.416844909079936e-07, "loss": 0.2599, "step": 35721 }, { "epoch": 1.673396730219703, "grad_norm": 0.6102526950921063, "learning_rate": 3.4158879087634795e-07, "loss": 0.2791, "step": 35722 }, { "epoch": 1.6734435752096313, "grad_norm": 0.6118004264019318, "learning_rate": 3.4149310326581615e-07, "loss": 0.269, "step": 35723 }, { "epoch": 1.6734904201995597, "grad_norm": 0.654509085167267, "learning_rate": 3.413974280769489e-07, "loss": 0.2915, "step": 35724 }, { "epoch": 1.673537265189488, "grad_norm": 0.6000731077181312, "learning_rate": 3.413017653102976e-07, "loss": 0.2734, "step": 35725 }, { "epoch": 1.6735841101794162, "grad_norm": 0.5793034687721771, "learning_rate": 3.4120611496641145e-07, "loss": 0.2665, "step": 35726 }, { "epoch": 1.6736309551693447, "grad_norm": 0.5643911415296644, "learning_rate": 3.4111047704584206e-07, "loss": 0.2584, "step": 35727 }, { "epoch": 1.673677800159273, "grad_norm": 0.5753377805296531, "learning_rate": 3.410148515491388e-07, "loss": 0.2708, "step": 35728 }, { "epoch": 1.6737246451492012, "grad_norm": 0.5364938789702841, "learning_rate": 3.409192384768523e-07, "loss": 0.255, "step": 35729 }, { "epoch": 1.6737714901391296, "grad_norm": 0.6332830129763374, "learning_rate": 3.4082363782953294e-07, "loss": 0.2769, "step": 35730 }, { "epoch": 1.673818335129058, "grad_norm": 0.5935019515000475, "learning_rate": 3.4072804960773145e-07, "loss": 0.2597, "step": 35731 }, { "epoch": 1.6738651801189863, "grad_norm": 0.5472247969968035, "learning_rate": 3.406324738119968e-07, "loss": 0.2608, "step": 35732 }, { "epoch": 1.6739120251089146, "grad_norm": 0.5838819490326731, "learning_rate": 3.405369104428796e-07, "loss": 0.2502, "step": 35733 }, { "epoch": 1.673958870098843, "grad_norm": 0.6132039602464152, "learning_rate": 3.404413595009301e-07, "loss": 0.2871, "step": 35734 }, { "epoch": 1.6740057150887713, "grad_norm": 0.5752660813298124, "learning_rate": 3.403458209866972e-07, "loss": 0.2693, "step": 35735 }, { "epoch": 1.6740525600786995, "grad_norm": 0.6270060992401272, "learning_rate": 3.4025029490073166e-07, "loss": 0.2869, "step": 35736 }, { "epoch": 1.674099405068628, "grad_norm": 0.6024650088297397, "learning_rate": 3.4015478124358234e-07, "loss": 0.271, "step": 35737 }, { "epoch": 1.6741462500585562, "grad_norm": 0.5686390331702175, "learning_rate": 3.4005928001580037e-07, "loss": 0.2619, "step": 35738 }, { "epoch": 1.6741930950484845, "grad_norm": 0.6534811112694657, "learning_rate": 3.399637912179335e-07, "loss": 0.2789, "step": 35739 }, { "epoch": 1.674239940038413, "grad_norm": 0.6901701870262923, "learning_rate": 3.3986831485053216e-07, "loss": 0.2845, "step": 35740 }, { "epoch": 1.6742867850283414, "grad_norm": 0.5573348502952183, "learning_rate": 3.3977285091414654e-07, "loss": 0.2478, "step": 35741 }, { "epoch": 1.6743336300182694, "grad_norm": 0.6060769325490851, "learning_rate": 3.396773994093244e-07, "loss": 0.2562, "step": 35742 }, { "epoch": 1.6743804750081979, "grad_norm": 0.583909884637559, "learning_rate": 3.39581960336616e-07, "loss": 0.2689, "step": 35743 }, { "epoch": 1.6744273199981263, "grad_norm": 0.6391180694146172, "learning_rate": 3.3948653369657083e-07, "loss": 0.2826, "step": 35744 }, { "epoch": 1.6744741649880546, "grad_norm": 0.5757771975370369, "learning_rate": 3.393911194897373e-07, "loss": 0.2537, "step": 35745 }, { "epoch": 1.6745210099779828, "grad_norm": 0.6018846874564097, "learning_rate": 3.3929571771666447e-07, "loss": 0.258, "step": 35746 }, { "epoch": 1.6745678549679113, "grad_norm": 0.6481394802661745, "learning_rate": 3.3920032837790215e-07, "loss": 0.286, "step": 35747 }, { "epoch": 1.6746146999578395, "grad_norm": 0.6049923121946738, "learning_rate": 3.3910495147399913e-07, "loss": 0.2696, "step": 35748 }, { "epoch": 1.6746615449477678, "grad_norm": 0.5882683407313881, "learning_rate": 3.390095870055035e-07, "loss": 0.2678, "step": 35749 }, { "epoch": 1.6747083899376962, "grad_norm": 0.6147778488685886, "learning_rate": 3.3891423497296516e-07, "loss": 0.2717, "step": 35750 }, { "epoch": 1.6747552349276245, "grad_norm": 0.6033413257213206, "learning_rate": 3.3881889537693144e-07, "loss": 0.2789, "step": 35751 }, { "epoch": 1.6748020799175527, "grad_norm": 0.6106090133389213, "learning_rate": 3.387235682179521e-07, "loss": 0.2828, "step": 35752 }, { "epoch": 1.6748489249074812, "grad_norm": 0.60009336863994, "learning_rate": 3.3862825349657515e-07, "loss": 0.2734, "step": 35753 }, { "epoch": 1.6748957698974096, "grad_norm": 0.6044168864603985, "learning_rate": 3.385329512133495e-07, "loss": 0.2745, "step": 35754 }, { "epoch": 1.6749426148873376, "grad_norm": 0.602989764802025, "learning_rate": 3.3843766136882397e-07, "loss": 0.2707, "step": 35755 }, { "epoch": 1.674989459877266, "grad_norm": 0.5452230030383496, "learning_rate": 3.3834238396354637e-07, "loss": 0.2624, "step": 35756 }, { "epoch": 1.6750363048671946, "grad_norm": 0.6171347895487299, "learning_rate": 3.382471189980646e-07, "loss": 0.2717, "step": 35757 }, { "epoch": 1.6750831498571228, "grad_norm": 0.6244806583895877, "learning_rate": 3.381518664729272e-07, "loss": 0.2559, "step": 35758 }, { "epoch": 1.675129994847051, "grad_norm": 0.5966463724291075, "learning_rate": 3.3805662638868255e-07, "loss": 0.2752, "step": 35759 }, { "epoch": 1.6751768398369795, "grad_norm": 0.6055841283238376, "learning_rate": 3.379613987458785e-07, "loss": 0.2679, "step": 35760 }, { "epoch": 1.6752236848269078, "grad_norm": 0.6067652776652652, "learning_rate": 3.378661835450639e-07, "loss": 0.2647, "step": 35761 }, { "epoch": 1.675270529816836, "grad_norm": 0.6188467331244998, "learning_rate": 3.3777098078678545e-07, "loss": 0.26, "step": 35762 }, { "epoch": 1.6753173748067645, "grad_norm": 0.5946177379278123, "learning_rate": 3.3767579047159186e-07, "loss": 0.2783, "step": 35763 }, { "epoch": 1.6753642197966927, "grad_norm": 0.5868277998976864, "learning_rate": 3.375806126000303e-07, "loss": 0.2771, "step": 35764 }, { "epoch": 1.675411064786621, "grad_norm": 0.5809851266744273, "learning_rate": 3.374854471726488e-07, "loss": 0.2596, "step": 35765 }, { "epoch": 1.6754579097765494, "grad_norm": 0.6051154689088721, "learning_rate": 3.373902941899948e-07, "loss": 0.2564, "step": 35766 }, { "epoch": 1.6755047547664779, "grad_norm": 0.5729833073942245, "learning_rate": 3.3729515365261615e-07, "loss": 0.2654, "step": 35767 }, { "epoch": 1.675551599756406, "grad_norm": 0.5545448013424775, "learning_rate": 3.372000255610611e-07, "loss": 0.2503, "step": 35768 }, { "epoch": 1.6755984447463343, "grad_norm": 0.6535831724654111, "learning_rate": 3.371049099158763e-07, "loss": 0.2784, "step": 35769 }, { "epoch": 1.6756452897362628, "grad_norm": 0.5917051221078341, "learning_rate": 3.3700980671760833e-07, "loss": 0.2697, "step": 35770 }, { "epoch": 1.675692134726191, "grad_norm": 0.6125720516976787, "learning_rate": 3.3691471596680544e-07, "loss": 0.2718, "step": 35771 }, { "epoch": 1.6757389797161193, "grad_norm": 0.5748425557525203, "learning_rate": 3.3681963766401457e-07, "loss": 0.2574, "step": 35772 }, { "epoch": 1.6757858247060478, "grad_norm": 0.5943631351339093, "learning_rate": 3.3672457180978306e-07, "loss": 0.2653, "step": 35773 }, { "epoch": 1.675832669695976, "grad_norm": 0.6031687174537642, "learning_rate": 3.366295184046586e-07, "loss": 0.2589, "step": 35774 }, { "epoch": 1.6758795146859042, "grad_norm": 0.6180124176201367, "learning_rate": 3.3653447744918686e-07, "loss": 0.2828, "step": 35775 }, { "epoch": 1.6759263596758327, "grad_norm": 0.578091726965997, "learning_rate": 3.364394489439157e-07, "loss": 0.2632, "step": 35776 }, { "epoch": 1.6759732046657612, "grad_norm": 0.6042152823667967, "learning_rate": 3.363444328893917e-07, "loss": 0.2627, "step": 35777 }, { "epoch": 1.6760200496556892, "grad_norm": 0.6165508265333729, "learning_rate": 3.3624942928616124e-07, "loss": 0.2753, "step": 35778 }, { "epoch": 1.6760668946456176, "grad_norm": 0.5992289361036417, "learning_rate": 3.3615443813477174e-07, "loss": 0.2757, "step": 35779 }, { "epoch": 1.676113739635546, "grad_norm": 0.6015449431726446, "learning_rate": 3.3605945943576997e-07, "loss": 0.2525, "step": 35780 }, { "epoch": 1.6761605846254743, "grad_norm": 0.5913808456282721, "learning_rate": 3.359644931897016e-07, "loss": 0.2402, "step": 35781 }, { "epoch": 1.6762074296154026, "grad_norm": 0.6194601967563087, "learning_rate": 3.358695393971137e-07, "loss": 0.2807, "step": 35782 }, { "epoch": 1.676254274605331, "grad_norm": 0.6381237166178421, "learning_rate": 3.357745980585533e-07, "loss": 0.2866, "step": 35783 }, { "epoch": 1.6763011195952593, "grad_norm": 0.6411413808756025, "learning_rate": 3.356796691745656e-07, "loss": 0.2764, "step": 35784 }, { "epoch": 1.6763479645851875, "grad_norm": 0.5740965480707336, "learning_rate": 3.3558475274569734e-07, "loss": 0.258, "step": 35785 }, { "epoch": 1.676394809575116, "grad_norm": 0.6313668174701393, "learning_rate": 3.354898487724953e-07, "loss": 0.286, "step": 35786 }, { "epoch": 1.6764416545650442, "grad_norm": 0.5666386314231027, "learning_rate": 3.353949572555046e-07, "loss": 0.2565, "step": 35787 }, { "epoch": 1.6764884995549725, "grad_norm": 0.5754722267994652, "learning_rate": 3.3530007819527197e-07, "loss": 0.2584, "step": 35788 }, { "epoch": 1.676535344544901, "grad_norm": 0.6259344915329595, "learning_rate": 3.352052115923432e-07, "loss": 0.2699, "step": 35789 }, { "epoch": 1.6765821895348294, "grad_norm": 0.5992322213515257, "learning_rate": 3.35110357447265e-07, "loss": 0.2645, "step": 35790 }, { "epoch": 1.6766290345247574, "grad_norm": 0.5960780826931561, "learning_rate": 3.3501551576058193e-07, "loss": 0.2706, "step": 35791 }, { "epoch": 1.6766758795146859, "grad_norm": 0.6397624497365261, "learning_rate": 3.349206865328411e-07, "loss": 0.2927, "step": 35792 }, { "epoch": 1.6767227245046143, "grad_norm": 0.6212904127536272, "learning_rate": 3.3482586976458703e-07, "loss": 0.2912, "step": 35793 }, { "epoch": 1.6767695694945426, "grad_norm": 0.6122201519893079, "learning_rate": 3.347310654563657e-07, "loss": 0.2887, "step": 35794 }, { "epoch": 1.6768164144844708, "grad_norm": 0.5618628081656779, "learning_rate": 3.3463627360872273e-07, "loss": 0.2637, "step": 35795 }, { "epoch": 1.6768632594743993, "grad_norm": 0.5777601213945647, "learning_rate": 3.3454149422220416e-07, "loss": 0.2425, "step": 35796 }, { "epoch": 1.6769101044643275, "grad_norm": 0.6483806360087441, "learning_rate": 3.3444672729735533e-07, "loss": 0.2646, "step": 35797 }, { "epoch": 1.6769569494542558, "grad_norm": 0.6016740302348412, "learning_rate": 3.3435197283472136e-07, "loss": 0.267, "step": 35798 }, { "epoch": 1.6770037944441842, "grad_norm": 0.5974830391602239, "learning_rate": 3.3425723083484683e-07, "loss": 0.2599, "step": 35799 }, { "epoch": 1.6770506394341125, "grad_norm": 0.6096030023449638, "learning_rate": 3.341625012982777e-07, "loss": 0.2967, "step": 35800 }, { "epoch": 1.6770974844240407, "grad_norm": 0.5624057629264291, "learning_rate": 3.340677842255591e-07, "loss": 0.2581, "step": 35801 }, { "epoch": 1.6771443294139692, "grad_norm": 0.6084648795154064, "learning_rate": 3.339730796172358e-07, "loss": 0.2837, "step": 35802 }, { "epoch": 1.6771911744038976, "grad_norm": 0.5910632537388149, "learning_rate": 3.3387838747385384e-07, "loss": 0.2574, "step": 35803 }, { "epoch": 1.6772380193938259, "grad_norm": 0.5893809326083036, "learning_rate": 3.337837077959566e-07, "loss": 0.2641, "step": 35804 }, { "epoch": 1.6772848643837541, "grad_norm": 0.5913378091234655, "learning_rate": 3.336890405840901e-07, "loss": 0.2567, "step": 35805 }, { "epoch": 1.6773317093736826, "grad_norm": 0.5974913534253204, "learning_rate": 3.335943858387983e-07, "loss": 0.2714, "step": 35806 }, { "epoch": 1.6773785543636108, "grad_norm": 0.5728053301440443, "learning_rate": 3.334997435606263e-07, "loss": 0.2605, "step": 35807 }, { "epoch": 1.677425399353539, "grad_norm": 0.6142592913547684, "learning_rate": 3.334051137501185e-07, "loss": 0.2819, "step": 35808 }, { "epoch": 1.6774722443434675, "grad_norm": 0.5837918866367384, "learning_rate": 3.3331049640781985e-07, "loss": 0.2656, "step": 35809 }, { "epoch": 1.6775190893333958, "grad_norm": 0.5708171718470014, "learning_rate": 3.3321589153427565e-07, "loss": 0.262, "step": 35810 }, { "epoch": 1.677565934323324, "grad_norm": 0.5903500937617597, "learning_rate": 3.331212991300284e-07, "loss": 0.2634, "step": 35811 }, { "epoch": 1.6776127793132525, "grad_norm": 0.5833781027625116, "learning_rate": 3.330267191956241e-07, "loss": 0.2484, "step": 35812 }, { "epoch": 1.677659624303181, "grad_norm": 0.5889886035178199, "learning_rate": 3.329321517316059e-07, "loss": 0.2568, "step": 35813 }, { "epoch": 1.677706469293109, "grad_norm": 0.6265024608644801, "learning_rate": 3.3283759673851836e-07, "loss": 0.27, "step": 35814 }, { "epoch": 1.6777533142830374, "grad_norm": 0.6105258974402435, "learning_rate": 3.3274305421690604e-07, "loss": 0.2872, "step": 35815 }, { "epoch": 1.6778001592729659, "grad_norm": 0.5751169124457781, "learning_rate": 3.3264852416731246e-07, "loss": 0.2688, "step": 35816 }, { "epoch": 1.6778470042628941, "grad_norm": 0.5834479898648073, "learning_rate": 3.325540065902824e-07, "loss": 0.2709, "step": 35817 }, { "epoch": 1.6778938492528224, "grad_norm": 0.5959857846610442, "learning_rate": 3.324595014863594e-07, "loss": 0.2767, "step": 35818 }, { "epoch": 1.6779406942427508, "grad_norm": 0.5773396898943672, "learning_rate": 3.323650088560867e-07, "loss": 0.274, "step": 35819 }, { "epoch": 1.677987539232679, "grad_norm": 0.5537057360398047, "learning_rate": 3.322705287000083e-07, "loss": 0.2513, "step": 35820 }, { "epoch": 1.6780343842226073, "grad_norm": 0.5922672572968299, "learning_rate": 3.3217606101866835e-07, "loss": 0.2666, "step": 35821 }, { "epoch": 1.6780812292125358, "grad_norm": 0.5474641674269911, "learning_rate": 3.3208160581261026e-07, "loss": 0.2551, "step": 35822 }, { "epoch": 1.678128074202464, "grad_norm": 0.6301374647460786, "learning_rate": 3.3198716308237843e-07, "loss": 0.28, "step": 35823 }, { "epoch": 1.6781749191923923, "grad_norm": 0.5803144048725101, "learning_rate": 3.318927328285146e-07, "loss": 0.2612, "step": 35824 }, { "epoch": 1.6782217641823207, "grad_norm": 0.5899470118450441, "learning_rate": 3.317983150515641e-07, "loss": 0.2464, "step": 35825 }, { "epoch": 1.6782686091722492, "grad_norm": 0.616798349425378, "learning_rate": 3.3170390975206857e-07, "loss": 0.2726, "step": 35826 }, { "epoch": 1.6783154541621772, "grad_norm": 0.5825081335858777, "learning_rate": 3.316095169305722e-07, "loss": 0.2589, "step": 35827 }, { "epoch": 1.6783622991521057, "grad_norm": 0.6484750432656133, "learning_rate": 3.315151365876179e-07, "loss": 0.2644, "step": 35828 }, { "epoch": 1.6784091441420341, "grad_norm": 0.6161823861481901, "learning_rate": 3.3142076872374944e-07, "loss": 0.2702, "step": 35829 }, { "epoch": 1.6784559891319624, "grad_norm": 0.5909557029106952, "learning_rate": 3.3132641333950886e-07, "loss": 0.2727, "step": 35830 }, { "epoch": 1.6785028341218906, "grad_norm": 0.6287794067252648, "learning_rate": 3.312320704354399e-07, "loss": 0.28, "step": 35831 }, { "epoch": 1.678549679111819, "grad_norm": 0.5598975287015616, "learning_rate": 3.311377400120858e-07, "loss": 0.2676, "step": 35832 }, { "epoch": 1.6785965241017473, "grad_norm": 0.578242119823152, "learning_rate": 3.3104342206998817e-07, "loss": 0.2582, "step": 35833 }, { "epoch": 1.6786433690916756, "grad_norm": 0.5888644388187743, "learning_rate": 3.3094911660969067e-07, "loss": 0.2671, "step": 35834 }, { "epoch": 1.678690214081604, "grad_norm": 0.6219121697900626, "learning_rate": 3.3085482363173634e-07, "loss": 0.2825, "step": 35835 }, { "epoch": 1.6787370590715323, "grad_norm": 0.6100134896317299, "learning_rate": 3.3076054313666674e-07, "loss": 0.2822, "step": 35836 }, { "epoch": 1.6787839040614605, "grad_norm": 0.5606577700703089, "learning_rate": 3.3066627512502505e-07, "loss": 0.2578, "step": 35837 }, { "epoch": 1.678830749051389, "grad_norm": 0.557387508290154, "learning_rate": 3.3057201959735357e-07, "loss": 0.2616, "step": 35838 }, { "epoch": 1.6788775940413174, "grad_norm": 0.6257776257405498, "learning_rate": 3.3047777655419556e-07, "loss": 0.2879, "step": 35839 }, { "epoch": 1.6789244390312457, "grad_norm": 0.6390360195050807, "learning_rate": 3.303835459960919e-07, "loss": 0.2648, "step": 35840 }, { "epoch": 1.678971284021174, "grad_norm": 0.6046015534849687, "learning_rate": 3.302893279235864e-07, "loss": 0.2737, "step": 35841 }, { "epoch": 1.6790181290111024, "grad_norm": 0.604207467959209, "learning_rate": 3.301951223372196e-07, "loss": 0.2661, "step": 35842 }, { "epoch": 1.6790649740010306, "grad_norm": 0.5712851015837351, "learning_rate": 3.3010092923753483e-07, "loss": 0.2717, "step": 35843 }, { "epoch": 1.6791118189909588, "grad_norm": 0.5968172912282569, "learning_rate": 3.300067486250738e-07, "loss": 0.2677, "step": 35844 }, { "epoch": 1.6791586639808873, "grad_norm": 0.5970308134888441, "learning_rate": 3.299125805003783e-07, "loss": 0.2558, "step": 35845 }, { "epoch": 1.6792055089708156, "grad_norm": 0.6134229832325437, "learning_rate": 3.298184248639913e-07, "loss": 0.2895, "step": 35846 }, { "epoch": 1.6792523539607438, "grad_norm": 0.57527207349523, "learning_rate": 3.2972428171645364e-07, "loss": 0.2615, "step": 35847 }, { "epoch": 1.6792991989506723, "grad_norm": 0.6305532327214863, "learning_rate": 3.2963015105830666e-07, "loss": 0.2653, "step": 35848 }, { "epoch": 1.6793460439406007, "grad_norm": 0.6343603025727252, "learning_rate": 3.295360328900929e-07, "loss": 0.2759, "step": 35849 }, { "epoch": 1.6793928889305287, "grad_norm": 0.5873097714851746, "learning_rate": 3.294419272123536e-07, "loss": 0.2647, "step": 35850 }, { "epoch": 1.6794397339204572, "grad_norm": 0.5926145798436172, "learning_rate": 3.2934783402563037e-07, "loss": 0.2597, "step": 35851 }, { "epoch": 1.6794865789103857, "grad_norm": 0.604599249290606, "learning_rate": 3.292537533304654e-07, "loss": 0.2653, "step": 35852 }, { "epoch": 1.679533423900314, "grad_norm": 0.5868868798906507, "learning_rate": 3.29159685127399e-07, "loss": 0.2549, "step": 35853 }, { "epoch": 1.6795802688902421, "grad_norm": 0.6239775633647591, "learning_rate": 3.2906562941697335e-07, "loss": 0.2682, "step": 35854 }, { "epoch": 1.6796271138801706, "grad_norm": 0.6471804693049406, "learning_rate": 3.289715861997292e-07, "loss": 0.285, "step": 35855 }, { "epoch": 1.6796739588700988, "grad_norm": 0.5768529594935351, "learning_rate": 3.2887755547620747e-07, "loss": 0.2596, "step": 35856 }, { "epoch": 1.679720803860027, "grad_norm": 0.5856527165470137, "learning_rate": 3.2878353724695e-07, "loss": 0.2577, "step": 35857 }, { "epoch": 1.6797676488499556, "grad_norm": 0.6100104960211682, "learning_rate": 3.2868953151249745e-07, "loss": 0.2627, "step": 35858 }, { "epoch": 1.6798144938398838, "grad_norm": 0.6484948114928301, "learning_rate": 3.2859553827339156e-07, "loss": 0.2964, "step": 35859 }, { "epoch": 1.679861338829812, "grad_norm": 0.5663065289030454, "learning_rate": 3.2850155753017226e-07, "loss": 0.2758, "step": 35860 }, { "epoch": 1.6799081838197405, "grad_norm": 0.5895111973731426, "learning_rate": 3.2840758928338043e-07, "loss": 0.2689, "step": 35861 }, { "epoch": 1.679955028809669, "grad_norm": 0.6196250766692356, "learning_rate": 3.283136335335571e-07, "loss": 0.2708, "step": 35862 }, { "epoch": 1.680001873799597, "grad_norm": 0.6269861142783736, "learning_rate": 3.282196902812429e-07, "loss": 0.2709, "step": 35863 }, { "epoch": 1.6800487187895254, "grad_norm": 0.5938464146761392, "learning_rate": 3.281257595269788e-07, "loss": 0.2574, "step": 35864 }, { "epoch": 1.680095563779454, "grad_norm": 0.6121687760387362, "learning_rate": 3.2803184127130546e-07, "loss": 0.2605, "step": 35865 }, { "epoch": 1.6801424087693821, "grad_norm": 0.5938754086958614, "learning_rate": 3.279379355147622e-07, "loss": 0.2636, "step": 35866 }, { "epoch": 1.6801892537593104, "grad_norm": 0.6024978724516857, "learning_rate": 3.2784404225789104e-07, "loss": 0.2808, "step": 35867 }, { "epoch": 1.6802360987492388, "grad_norm": 0.6155480541762519, "learning_rate": 3.2775016150123075e-07, "loss": 0.2906, "step": 35868 }, { "epoch": 1.680282943739167, "grad_norm": 0.5929953194724911, "learning_rate": 3.276562932453223e-07, "loss": 0.2576, "step": 35869 }, { "epoch": 1.6803297887290953, "grad_norm": 0.5447790495829894, "learning_rate": 3.275624374907058e-07, "loss": 0.2541, "step": 35870 }, { "epoch": 1.6803766337190238, "grad_norm": 0.5917922759890056, "learning_rate": 3.2746859423792217e-07, "loss": 0.2854, "step": 35871 }, { "epoch": 1.680423478708952, "grad_norm": 0.5856056480496803, "learning_rate": 3.273747634875102e-07, "loss": 0.2852, "step": 35872 }, { "epoch": 1.6804703236988803, "grad_norm": 0.6599251457169649, "learning_rate": 3.2728094524001027e-07, "loss": 0.2972, "step": 35873 }, { "epoch": 1.6805171686888087, "grad_norm": 0.6330762551279352, "learning_rate": 3.2718713949596305e-07, "loss": 0.2723, "step": 35874 }, { "epoch": 1.6805640136787372, "grad_norm": 0.5815349858925246, "learning_rate": 3.2709334625590725e-07, "loss": 0.2629, "step": 35875 }, { "epoch": 1.6806108586686654, "grad_norm": 0.6040994768074779, "learning_rate": 3.269995655203828e-07, "loss": 0.2626, "step": 35876 }, { "epoch": 1.6806577036585937, "grad_norm": 0.578702306885852, "learning_rate": 3.2690579728993007e-07, "loss": 0.2711, "step": 35877 }, { "epoch": 1.6807045486485221, "grad_norm": 0.5667560933009507, "learning_rate": 3.2681204156508855e-07, "loss": 0.2562, "step": 35878 }, { "epoch": 1.6807513936384504, "grad_norm": 0.6010738632291257, "learning_rate": 3.2671829834639706e-07, "loss": 0.2806, "step": 35879 }, { "epoch": 1.6807982386283786, "grad_norm": 0.5979881418366332, "learning_rate": 3.266245676343957e-07, "loss": 0.2598, "step": 35880 }, { "epoch": 1.680845083618307, "grad_norm": 0.6144740833692504, "learning_rate": 3.2653084942962407e-07, "loss": 0.2717, "step": 35881 }, { "epoch": 1.6808919286082353, "grad_norm": 0.6089710611829091, "learning_rate": 3.264371437326205e-07, "loss": 0.2763, "step": 35882 }, { "epoch": 1.6809387735981636, "grad_norm": 0.5971108490800137, "learning_rate": 3.2634345054392503e-07, "loss": 0.2784, "step": 35883 }, { "epoch": 1.680985618588092, "grad_norm": 0.5567010146317445, "learning_rate": 3.2624976986407706e-07, "loss": 0.2507, "step": 35884 }, { "epoch": 1.6810324635780205, "grad_norm": 0.5964408111298316, "learning_rate": 3.261561016936149e-07, "loss": 0.2714, "step": 35885 }, { "epoch": 1.6810793085679485, "grad_norm": 0.5894940198749796, "learning_rate": 3.2606244603307804e-07, "loss": 0.2681, "step": 35886 }, { "epoch": 1.681126153557877, "grad_norm": 0.5964075943803426, "learning_rate": 3.259688028830052e-07, "loss": 0.2599, "step": 35887 }, { "epoch": 1.6811729985478054, "grad_norm": 0.6132941989927202, "learning_rate": 3.258751722439363e-07, "loss": 0.2914, "step": 35888 }, { "epoch": 1.6812198435377337, "grad_norm": 0.6157572553383905, "learning_rate": 3.257815541164086e-07, "loss": 0.2807, "step": 35889 }, { "epoch": 1.681266688527662, "grad_norm": 0.5729925326647649, "learning_rate": 3.2568794850096204e-07, "loss": 0.2535, "step": 35890 }, { "epoch": 1.6813135335175904, "grad_norm": 0.6260042064472231, "learning_rate": 3.2559435539813446e-07, "loss": 0.2865, "step": 35891 }, { "epoch": 1.6813603785075186, "grad_norm": 0.6188446496942646, "learning_rate": 3.255007748084649e-07, "loss": 0.2654, "step": 35892 }, { "epoch": 1.6814072234974469, "grad_norm": 0.591981132307, "learning_rate": 3.254072067324918e-07, "loss": 0.2774, "step": 35893 }, { "epoch": 1.6814540684873753, "grad_norm": 0.6206560168188957, "learning_rate": 3.253136511707536e-07, "loss": 0.2565, "step": 35894 }, { "epoch": 1.6815009134773036, "grad_norm": 0.625702723974957, "learning_rate": 3.2522010812378933e-07, "loss": 0.2725, "step": 35895 }, { "epoch": 1.6815477584672318, "grad_norm": 0.6011376202511136, "learning_rate": 3.2512657759213667e-07, "loss": 0.2741, "step": 35896 }, { "epoch": 1.6815946034571603, "grad_norm": 0.5767047192327822, "learning_rate": 3.2503305957633347e-07, "loss": 0.2485, "step": 35897 }, { "epoch": 1.6816414484470887, "grad_norm": 0.60233066108361, "learning_rate": 3.249395540769182e-07, "loss": 0.2611, "step": 35898 }, { "epoch": 1.6816882934370168, "grad_norm": 0.57543961765228, "learning_rate": 3.248460610944293e-07, "loss": 0.2501, "step": 35899 }, { "epoch": 1.6817351384269452, "grad_norm": 0.6523101562352089, "learning_rate": 3.247525806294044e-07, "loss": 0.2796, "step": 35900 }, { "epoch": 1.6817819834168737, "grad_norm": 0.6130811397737593, "learning_rate": 3.2465911268238256e-07, "loss": 0.2631, "step": 35901 }, { "epoch": 1.681828828406802, "grad_norm": 0.6079955015824217, "learning_rate": 3.2456565725389995e-07, "loss": 0.2535, "step": 35902 }, { "epoch": 1.6818756733967302, "grad_norm": 0.5690526478363842, "learning_rate": 3.244722143444959e-07, "loss": 0.2583, "step": 35903 }, { "epoch": 1.6819225183866586, "grad_norm": 0.6208589088424716, "learning_rate": 3.2437878395470664e-07, "loss": 0.2821, "step": 35904 }, { "epoch": 1.6819693633765869, "grad_norm": 0.6446140875042997, "learning_rate": 3.242853660850709e-07, "loss": 0.2773, "step": 35905 }, { "epoch": 1.682016208366515, "grad_norm": 0.6088814618842872, "learning_rate": 3.24191960736126e-07, "loss": 0.2682, "step": 35906 }, { "epoch": 1.6820630533564436, "grad_norm": 0.5910083537803821, "learning_rate": 3.2409856790840937e-07, "loss": 0.254, "step": 35907 }, { "epoch": 1.6821098983463718, "grad_norm": 0.5743278057437153, "learning_rate": 3.240051876024594e-07, "loss": 0.2636, "step": 35908 }, { "epoch": 1.6821567433363, "grad_norm": 0.5712176086231323, "learning_rate": 3.239118198188124e-07, "loss": 0.252, "step": 35909 }, { "epoch": 1.6822035883262285, "grad_norm": 0.5973633507815772, "learning_rate": 3.238184645580056e-07, "loss": 0.2633, "step": 35910 }, { "epoch": 1.682250433316157, "grad_norm": 0.6095440257885844, "learning_rate": 3.2372512182057644e-07, "loss": 0.267, "step": 35911 }, { "epoch": 1.682297278306085, "grad_norm": 0.6161979218603619, "learning_rate": 3.236317916070622e-07, "loss": 0.282, "step": 35912 }, { "epoch": 1.6823441232960135, "grad_norm": 0.6557194839206153, "learning_rate": 3.2353847391800003e-07, "loss": 0.297, "step": 35913 }, { "epoch": 1.682390968285942, "grad_norm": 0.6033838115236838, "learning_rate": 3.234451687539275e-07, "loss": 0.2832, "step": 35914 }, { "epoch": 1.6824378132758702, "grad_norm": 0.6193711729861012, "learning_rate": 3.233518761153803e-07, "loss": 0.2872, "step": 35915 }, { "epoch": 1.6824846582657984, "grad_norm": 0.6148617868698505, "learning_rate": 3.232585960028967e-07, "loss": 0.2661, "step": 35916 }, { "epoch": 1.6825315032557269, "grad_norm": 0.5945373765634285, "learning_rate": 3.231653284170122e-07, "loss": 0.2704, "step": 35917 }, { "epoch": 1.682578348245655, "grad_norm": 0.5810717474849645, "learning_rate": 3.23072073358264e-07, "loss": 0.2629, "step": 35918 }, { "epoch": 1.6826251932355833, "grad_norm": 0.6043405225804621, "learning_rate": 3.229788308271889e-07, "loss": 0.2664, "step": 35919 }, { "epoch": 1.6826720382255118, "grad_norm": 0.6010736586666487, "learning_rate": 3.228856008243242e-07, "loss": 0.2512, "step": 35920 }, { "epoch": 1.68271888321544, "grad_norm": 0.5677525414078974, "learning_rate": 3.227923833502047e-07, "loss": 0.2529, "step": 35921 }, { "epoch": 1.6827657282053683, "grad_norm": 0.6496174012287136, "learning_rate": 3.226991784053682e-07, "loss": 0.2624, "step": 35922 }, { "epoch": 1.6828125731952968, "grad_norm": 0.6041662374898815, "learning_rate": 3.22605985990351e-07, "loss": 0.2661, "step": 35923 }, { "epoch": 1.6828594181852252, "grad_norm": 0.6404980041674481, "learning_rate": 3.225128061056887e-07, "loss": 0.2861, "step": 35924 }, { "epoch": 1.6829062631751535, "grad_norm": 0.6563639392157763, "learning_rate": 3.2241963875191767e-07, "loss": 0.2846, "step": 35925 }, { "epoch": 1.6829531081650817, "grad_norm": 0.5407104502157145, "learning_rate": 3.2232648392957504e-07, "loss": 0.2626, "step": 35926 }, { "epoch": 1.6829999531550102, "grad_norm": 0.600141711170765, "learning_rate": 3.2223334163919567e-07, "loss": 0.2644, "step": 35927 }, { "epoch": 1.6830467981449384, "grad_norm": 0.5839077542019249, "learning_rate": 3.2214021188131605e-07, "loss": 0.2626, "step": 35928 }, { "epoch": 1.6830936431348666, "grad_norm": 0.6125237722029477, "learning_rate": 3.2204709465647186e-07, "loss": 0.2616, "step": 35929 }, { "epoch": 1.683140488124795, "grad_norm": 0.5739506816252452, "learning_rate": 3.2195398996519993e-07, "loss": 0.273, "step": 35930 }, { "epoch": 1.6831873331147233, "grad_norm": 0.5503009868952877, "learning_rate": 3.218608978080351e-07, "loss": 0.2568, "step": 35931 }, { "epoch": 1.6832341781046516, "grad_norm": 0.6155479617755557, "learning_rate": 3.217678181855136e-07, "loss": 0.2814, "step": 35932 }, { "epoch": 1.68328102309458, "grad_norm": 0.5937264202694159, "learning_rate": 3.216747510981702e-07, "loss": 0.2686, "step": 35933 }, { "epoch": 1.6833278680845085, "grad_norm": 0.5853871696704099, "learning_rate": 3.2158169654654124e-07, "loss": 0.2559, "step": 35934 }, { "epoch": 1.6833747130744365, "grad_norm": 0.5793236439316642, "learning_rate": 3.214886545311621e-07, "loss": 0.2789, "step": 35935 }, { "epoch": 1.683421558064365, "grad_norm": 0.6189766255364779, "learning_rate": 3.213956250525682e-07, "loss": 0.2841, "step": 35936 }, { "epoch": 1.6834684030542935, "grad_norm": 0.586062589716001, "learning_rate": 3.213026081112955e-07, "loss": 0.2739, "step": 35937 }, { "epoch": 1.6835152480442217, "grad_norm": 0.6028587518758934, "learning_rate": 3.212096037078785e-07, "loss": 0.2749, "step": 35938 }, { "epoch": 1.68356209303415, "grad_norm": 0.5840592981703076, "learning_rate": 3.2111661184285215e-07, "loss": 0.2642, "step": 35939 }, { "epoch": 1.6836089380240784, "grad_norm": 0.6221928750818517, "learning_rate": 3.210236325167521e-07, "loss": 0.2654, "step": 35940 }, { "epoch": 1.6836557830140066, "grad_norm": 0.7117074925874012, "learning_rate": 3.2093066573011337e-07, "loss": 0.2965, "step": 35941 }, { "epoch": 1.6837026280039349, "grad_norm": 0.6006135573847889, "learning_rate": 3.2083771148347096e-07, "loss": 0.277, "step": 35942 }, { "epoch": 1.6837494729938633, "grad_norm": 0.6312035472093355, "learning_rate": 3.207447697773605e-07, "loss": 0.2843, "step": 35943 }, { "epoch": 1.6837963179837916, "grad_norm": 0.6009572655862002, "learning_rate": 3.2065184061231547e-07, "loss": 0.2697, "step": 35944 }, { "epoch": 1.6838431629737198, "grad_norm": 0.5832309785036659, "learning_rate": 3.205589239888718e-07, "loss": 0.2721, "step": 35945 }, { "epoch": 1.6838900079636483, "grad_norm": 0.5983478091070149, "learning_rate": 3.204660199075635e-07, "loss": 0.2558, "step": 35946 }, { "epoch": 1.6839368529535768, "grad_norm": 0.6081393406517043, "learning_rate": 3.2037312836892517e-07, "loss": 0.2925, "step": 35947 }, { "epoch": 1.6839836979435048, "grad_norm": 0.5899759408291451, "learning_rate": 3.202802493734916e-07, "loss": 0.2722, "step": 35948 }, { "epoch": 1.6840305429334332, "grad_norm": 0.6558553581621781, "learning_rate": 3.2018738292179746e-07, "loss": 0.2796, "step": 35949 }, { "epoch": 1.6840773879233617, "grad_norm": 0.5450702517359414, "learning_rate": 3.200945290143778e-07, "loss": 0.2664, "step": 35950 }, { "epoch": 1.68412423291329, "grad_norm": 0.5888382365315191, "learning_rate": 3.200016876517656e-07, "loss": 0.2629, "step": 35951 }, { "epoch": 1.6841710779032182, "grad_norm": 0.5975612349761815, "learning_rate": 3.1990885883449617e-07, "loss": 0.2864, "step": 35952 }, { "epoch": 1.6842179228931466, "grad_norm": 0.6470811960252982, "learning_rate": 3.198160425631028e-07, "loss": 0.2847, "step": 35953 }, { "epoch": 1.6842647678830749, "grad_norm": 0.6392703047871693, "learning_rate": 3.1972323883811994e-07, "loss": 0.2795, "step": 35954 }, { "epoch": 1.6843116128730031, "grad_norm": 0.5551986555325001, "learning_rate": 3.196304476600823e-07, "loss": 0.259, "step": 35955 }, { "epoch": 1.6843584578629316, "grad_norm": 0.6513977290332166, "learning_rate": 3.1953766902952294e-07, "loss": 0.2769, "step": 35956 }, { "epoch": 1.6844053028528598, "grad_norm": 0.616827216822831, "learning_rate": 3.1944490294697707e-07, "loss": 0.2752, "step": 35957 }, { "epoch": 1.684452147842788, "grad_norm": 0.6056960252565894, "learning_rate": 3.193521494129778e-07, "loss": 0.2655, "step": 35958 }, { "epoch": 1.6844989928327165, "grad_norm": 0.5931768071809443, "learning_rate": 3.1925940842805814e-07, "loss": 0.2698, "step": 35959 }, { "epoch": 1.684545837822645, "grad_norm": 0.601193365955295, "learning_rate": 3.1916667999275257e-07, "loss": 0.283, "step": 35960 }, { "epoch": 1.6845926828125732, "grad_norm": 0.6207819583968285, "learning_rate": 3.1907396410759435e-07, "loss": 0.2577, "step": 35961 }, { "epoch": 1.6846395278025015, "grad_norm": 0.5991677484804117, "learning_rate": 3.189812607731177e-07, "loss": 0.2685, "step": 35962 }, { "epoch": 1.68468637279243, "grad_norm": 0.6017311013413247, "learning_rate": 3.1888856998985585e-07, "loss": 0.2725, "step": 35963 }, { "epoch": 1.6847332177823582, "grad_norm": 0.6105500274452403, "learning_rate": 3.1879589175834197e-07, "loss": 0.267, "step": 35964 }, { "epoch": 1.6847800627722864, "grad_norm": 0.5944037089947802, "learning_rate": 3.187032260791098e-07, "loss": 0.2586, "step": 35965 }, { "epoch": 1.6848269077622149, "grad_norm": 0.6019220935197992, "learning_rate": 3.186105729526917e-07, "loss": 0.2708, "step": 35966 }, { "epoch": 1.6848737527521431, "grad_norm": 0.5870345968596073, "learning_rate": 3.185179323796217e-07, "loss": 0.2667, "step": 35967 }, { "epoch": 1.6849205977420714, "grad_norm": 0.5949845545095069, "learning_rate": 3.184253043604327e-07, "loss": 0.2871, "step": 35968 }, { "epoch": 1.6849674427319998, "grad_norm": 0.5614127184491621, "learning_rate": 3.183326888956581e-07, "loss": 0.2683, "step": 35969 }, { "epoch": 1.6850142877219283, "grad_norm": 0.6122483999070097, "learning_rate": 3.1824008598583006e-07, "loss": 0.2733, "step": 35970 }, { "epoch": 1.6850611327118563, "grad_norm": 0.623830684646676, "learning_rate": 3.18147495631482e-07, "loss": 0.2835, "step": 35971 }, { "epoch": 1.6851079777017848, "grad_norm": 0.5665204777830747, "learning_rate": 3.1805491783314713e-07, "loss": 0.2636, "step": 35972 }, { "epoch": 1.6851548226917132, "grad_norm": 0.5865284777405068, "learning_rate": 3.179623525913575e-07, "loss": 0.2652, "step": 35973 }, { "epoch": 1.6852016676816415, "grad_norm": 0.6414401793123906, "learning_rate": 3.17869799906646e-07, "loss": 0.2749, "step": 35974 }, { "epoch": 1.6852485126715697, "grad_norm": 0.587491294772635, "learning_rate": 3.177772597795459e-07, "loss": 0.2811, "step": 35975 }, { "epoch": 1.6852953576614982, "grad_norm": 0.5793290054519918, "learning_rate": 3.1768473221058857e-07, "loss": 0.2642, "step": 35976 }, { "epoch": 1.6853422026514264, "grad_norm": 0.5976177028529561, "learning_rate": 3.1759221720030706e-07, "loss": 0.2676, "step": 35977 }, { "epoch": 1.6853890476413547, "grad_norm": 0.5449129082464897, "learning_rate": 3.17499714749234e-07, "loss": 0.251, "step": 35978 }, { "epoch": 1.6854358926312831, "grad_norm": 0.6234481474787898, "learning_rate": 3.174072248579019e-07, "loss": 0.2726, "step": 35979 }, { "epoch": 1.6854827376212114, "grad_norm": 0.620135371932485, "learning_rate": 3.1731474752684204e-07, "loss": 0.274, "step": 35980 }, { "epoch": 1.6855295826111396, "grad_norm": 0.5987167547951934, "learning_rate": 3.1722228275658793e-07, "loss": 0.2692, "step": 35981 }, { "epoch": 1.685576427601068, "grad_norm": 0.6091692551759282, "learning_rate": 3.171298305476703e-07, "loss": 0.2676, "step": 35982 }, { "epoch": 1.6856232725909965, "grad_norm": 0.5609940496104672, "learning_rate": 3.170373909006219e-07, "loss": 0.2603, "step": 35983 }, { "epoch": 1.6856701175809246, "grad_norm": 0.6283324913238619, "learning_rate": 3.1694496381597467e-07, "loss": 0.2855, "step": 35984 }, { "epoch": 1.685716962570853, "grad_norm": 0.6237951451001208, "learning_rate": 3.1685254929426064e-07, "loss": 0.2696, "step": 35985 }, { "epoch": 1.6857638075607815, "grad_norm": 0.5827233087433435, "learning_rate": 3.1676014733601187e-07, "loss": 0.2649, "step": 35986 }, { "epoch": 1.6858106525507097, "grad_norm": 0.5894365452111274, "learning_rate": 3.1666775794175956e-07, "loss": 0.2629, "step": 35987 }, { "epoch": 1.685857497540638, "grad_norm": 0.6200958160038503, "learning_rate": 3.1657538111203506e-07, "loss": 0.268, "step": 35988 }, { "epoch": 1.6859043425305664, "grad_norm": 0.6298534847244517, "learning_rate": 3.1648301684737067e-07, "loss": 0.2916, "step": 35989 }, { "epoch": 1.6859511875204947, "grad_norm": 0.569340308005531, "learning_rate": 3.163906651482976e-07, "loss": 0.27, "step": 35990 }, { "epoch": 1.685998032510423, "grad_norm": 0.5918381325874889, "learning_rate": 3.162983260153474e-07, "loss": 0.2738, "step": 35991 }, { "epoch": 1.6860448775003514, "grad_norm": 0.593434316150968, "learning_rate": 3.1620599944905186e-07, "loss": 0.2718, "step": 35992 }, { "epoch": 1.6860917224902796, "grad_norm": 0.6313871101468876, "learning_rate": 3.1611368544994164e-07, "loss": 0.2897, "step": 35993 }, { "epoch": 1.6861385674802079, "grad_norm": 0.5635229843543673, "learning_rate": 3.160213840185486e-07, "loss": 0.2645, "step": 35994 }, { "epoch": 1.6861854124701363, "grad_norm": 0.5774376219517979, "learning_rate": 3.159290951554034e-07, "loss": 0.2798, "step": 35995 }, { "epoch": 1.6862322574600648, "grad_norm": 0.5574824873849268, "learning_rate": 3.1583681886103694e-07, "loss": 0.2537, "step": 35996 }, { "epoch": 1.686279102449993, "grad_norm": 0.5969364987829181, "learning_rate": 3.1574455513598055e-07, "loss": 0.2675, "step": 35997 }, { "epoch": 1.6863259474399213, "grad_norm": 0.5798947194535179, "learning_rate": 3.1565230398076546e-07, "loss": 0.2712, "step": 35998 }, { "epoch": 1.6863727924298497, "grad_norm": 0.5647243040376749, "learning_rate": 3.1556006539592287e-07, "loss": 0.2678, "step": 35999 }, { "epoch": 1.686419637419778, "grad_norm": 0.6092033986759917, "learning_rate": 3.154678393819832e-07, "loss": 0.2876, "step": 36000 }, { "epoch": 1.6864664824097062, "grad_norm": 0.6257668801087582, "learning_rate": 3.1537562593947634e-07, "loss": 0.2686, "step": 36001 }, { "epoch": 1.6865133273996347, "grad_norm": 0.631744243769355, "learning_rate": 3.152834250689338e-07, "loss": 0.2961, "step": 36002 }, { "epoch": 1.686560172389563, "grad_norm": 0.5737617389029162, "learning_rate": 3.1519123677088563e-07, "loss": 0.2589, "step": 36003 }, { "epoch": 1.6866070173794911, "grad_norm": 0.6275481400317763, "learning_rate": 3.1509906104586317e-07, "loss": 0.2622, "step": 36004 }, { "epoch": 1.6866538623694196, "grad_norm": 0.6134332092730189, "learning_rate": 3.150068978943968e-07, "loss": 0.2779, "step": 36005 }, { "epoch": 1.686700707359348, "grad_norm": 0.6020503364508487, "learning_rate": 3.149147473170164e-07, "loss": 0.2854, "step": 36006 }, { "epoch": 1.686747552349276, "grad_norm": 0.641811293093624, "learning_rate": 3.1482260931425256e-07, "loss": 0.277, "step": 36007 }, { "epoch": 1.6867943973392046, "grad_norm": 0.5522351601399763, "learning_rate": 3.14730483886635e-07, "loss": 0.2412, "step": 36008 }, { "epoch": 1.686841242329133, "grad_norm": 0.6781873286528539, "learning_rate": 3.1463837103469424e-07, "loss": 0.2937, "step": 36009 }, { "epoch": 1.6868880873190613, "grad_norm": 0.616899539631724, "learning_rate": 3.1454627075896025e-07, "loss": 0.2717, "step": 36010 }, { "epoch": 1.6869349323089895, "grad_norm": 0.6301872237441718, "learning_rate": 3.1445418305996315e-07, "loss": 0.2708, "step": 36011 }, { "epoch": 1.686981777298918, "grad_norm": 0.5926620571397971, "learning_rate": 3.143621079382336e-07, "loss": 0.2752, "step": 36012 }, { "epoch": 1.6870286222888462, "grad_norm": 0.5996395803571168, "learning_rate": 3.142700453943004e-07, "loss": 0.2724, "step": 36013 }, { "epoch": 1.6870754672787744, "grad_norm": 0.5934381920624207, "learning_rate": 3.141779954286941e-07, "loss": 0.269, "step": 36014 }, { "epoch": 1.687122312268703, "grad_norm": 0.5842459921297976, "learning_rate": 3.1408595804194336e-07, "loss": 0.2601, "step": 36015 }, { "epoch": 1.6871691572586311, "grad_norm": 0.6072265470075688, "learning_rate": 3.1399393323457873e-07, "loss": 0.2716, "step": 36016 }, { "epoch": 1.6872160022485594, "grad_norm": 0.6484605398513028, "learning_rate": 3.1390192100712953e-07, "loss": 0.2995, "step": 36017 }, { "epoch": 1.6872628472384879, "grad_norm": 0.5730322473334997, "learning_rate": 3.138099213601259e-07, "loss": 0.2672, "step": 36018 }, { "epoch": 1.6873096922284163, "grad_norm": 0.6199027509843195, "learning_rate": 3.1371793429409604e-07, "loss": 0.277, "step": 36019 }, { "epoch": 1.6873565372183443, "grad_norm": 0.619518881729746, "learning_rate": 3.1362595980957005e-07, "loss": 0.2762, "step": 36020 }, { "epoch": 1.6874033822082728, "grad_norm": 0.5986690368969806, "learning_rate": 3.1353399790707755e-07, "loss": 0.2783, "step": 36021 }, { "epoch": 1.6874502271982013, "grad_norm": 0.587210891584412, "learning_rate": 3.13442048587147e-07, "loss": 0.2656, "step": 36022 }, { "epoch": 1.6874970721881295, "grad_norm": 0.5888745144321731, "learning_rate": 3.1335011185030796e-07, "loss": 0.2698, "step": 36023 }, { "epoch": 1.6875439171780577, "grad_norm": 0.6212186545983929, "learning_rate": 3.132581876970897e-07, "loss": 0.2781, "step": 36024 }, { "epoch": 1.6875907621679862, "grad_norm": 0.6307143262888301, "learning_rate": 3.131662761280205e-07, "loss": 0.2943, "step": 36025 }, { "epoch": 1.6876376071579144, "grad_norm": 0.6067439768372443, "learning_rate": 3.130743771436298e-07, "loss": 0.2668, "step": 36026 }, { "epoch": 1.6876844521478427, "grad_norm": 0.6015170020132075, "learning_rate": 3.129824907444462e-07, "loss": 0.2731, "step": 36027 }, { "epoch": 1.6877312971377711, "grad_norm": 0.6378505871473541, "learning_rate": 3.128906169309995e-07, "loss": 0.2843, "step": 36028 }, { "epoch": 1.6877781421276994, "grad_norm": 0.5807166865910475, "learning_rate": 3.12798755703817e-07, "loss": 0.2626, "step": 36029 }, { "epoch": 1.6878249871176276, "grad_norm": 0.5851012073333194, "learning_rate": 3.1270690706342837e-07, "loss": 0.2409, "step": 36030 }, { "epoch": 1.687871832107556, "grad_norm": 0.6152407352908393, "learning_rate": 3.126150710103612e-07, "loss": 0.2824, "step": 36031 }, { "epoch": 1.6879186770974846, "grad_norm": 0.5906046641647504, "learning_rate": 3.125232475451445e-07, "loss": 0.267, "step": 36032 }, { "epoch": 1.6879655220874128, "grad_norm": 0.5832369081473615, "learning_rate": 3.124314366683068e-07, "loss": 0.2712, "step": 36033 }, { "epoch": 1.688012367077341, "grad_norm": 0.5796134496983459, "learning_rate": 3.123396383803762e-07, "loss": 0.268, "step": 36034 }, { "epoch": 1.6880592120672695, "grad_norm": 0.5915807595839943, "learning_rate": 3.12247852681882e-07, "loss": 0.2715, "step": 36035 }, { "epoch": 1.6881060570571977, "grad_norm": 0.6477955088155566, "learning_rate": 3.1215607957335107e-07, "loss": 0.2797, "step": 36036 }, { "epoch": 1.688152902047126, "grad_norm": 0.5911170282855467, "learning_rate": 3.120643190553119e-07, "loss": 0.2832, "step": 36037 }, { "epoch": 1.6881997470370544, "grad_norm": 0.5823402871695356, "learning_rate": 3.119725711282925e-07, "loss": 0.2667, "step": 36038 }, { "epoch": 1.6882465920269827, "grad_norm": 0.6191667986786913, "learning_rate": 3.11880835792821e-07, "loss": 0.2774, "step": 36039 }, { "epoch": 1.688293437016911, "grad_norm": 0.5668236102843156, "learning_rate": 3.117891130494252e-07, "loss": 0.26, "step": 36040 }, { "epoch": 1.6883402820068394, "grad_norm": 0.6302484430550445, "learning_rate": 3.116974028986339e-07, "loss": 0.2793, "step": 36041 }, { "epoch": 1.6883871269967679, "grad_norm": 0.5503513840629137, "learning_rate": 3.116057053409732e-07, "loss": 0.27, "step": 36042 }, { "epoch": 1.6884339719866959, "grad_norm": 0.5769341671549914, "learning_rate": 3.115140203769723e-07, "loss": 0.2619, "step": 36043 }, { "epoch": 1.6884808169766243, "grad_norm": 0.5720188441834549, "learning_rate": 3.1142234800715765e-07, "loss": 0.2622, "step": 36044 }, { "epoch": 1.6885276619665528, "grad_norm": 0.6623359936282242, "learning_rate": 3.1133068823205715e-07, "loss": 0.2884, "step": 36045 }, { "epoch": 1.688574506956481, "grad_norm": 0.6095996610166468, "learning_rate": 3.112390410521987e-07, "loss": 0.2863, "step": 36046 }, { "epoch": 1.6886213519464093, "grad_norm": 0.5855207574334016, "learning_rate": 3.1114740646810915e-07, "loss": 0.2607, "step": 36047 }, { "epoch": 1.6886681969363377, "grad_norm": 0.5801378733084978, "learning_rate": 3.110557844803169e-07, "loss": 0.2469, "step": 36048 }, { "epoch": 1.688715041926266, "grad_norm": 0.5552995614167837, "learning_rate": 3.109641750893483e-07, "loss": 0.2578, "step": 36049 }, { "epoch": 1.6887618869161942, "grad_norm": 0.6551859770544829, "learning_rate": 3.108725782957303e-07, "loss": 0.2862, "step": 36050 }, { "epoch": 1.6888087319061227, "grad_norm": 0.6114713033794592, "learning_rate": 3.1078099409999037e-07, "loss": 0.2817, "step": 36051 }, { "epoch": 1.688855576896051, "grad_norm": 0.6416774204613898, "learning_rate": 3.106894225026555e-07, "loss": 0.2802, "step": 36052 }, { "epoch": 1.6889024218859792, "grad_norm": 0.6638878968789796, "learning_rate": 3.1059786350425255e-07, "loss": 0.2856, "step": 36053 }, { "epoch": 1.6889492668759076, "grad_norm": 0.5966658147131552, "learning_rate": 3.1050631710530936e-07, "loss": 0.2738, "step": 36054 }, { "epoch": 1.688996111865836, "grad_norm": 0.6161865499810032, "learning_rate": 3.1041478330635146e-07, "loss": 0.2414, "step": 36055 }, { "epoch": 1.689042956855764, "grad_norm": 0.5839951885971055, "learning_rate": 3.1032326210790667e-07, "loss": 0.264, "step": 36056 }, { "epoch": 1.6890898018456926, "grad_norm": 0.6011574040533554, "learning_rate": 3.1023175351050067e-07, "loss": 0.2729, "step": 36057 }, { "epoch": 1.689136646835621, "grad_norm": 0.5930608499033382, "learning_rate": 3.101402575146606e-07, "loss": 0.2721, "step": 36058 }, { "epoch": 1.6891834918255493, "grad_norm": 0.6020467392983465, "learning_rate": 3.100487741209127e-07, "loss": 0.2589, "step": 36059 }, { "epoch": 1.6892303368154775, "grad_norm": 0.5796845045228342, "learning_rate": 3.099573033297845e-07, "loss": 0.2623, "step": 36060 }, { "epoch": 1.689277181805406, "grad_norm": 0.5956651533695484, "learning_rate": 3.0986584514180075e-07, "loss": 0.2766, "step": 36061 }, { "epoch": 1.6893240267953342, "grad_norm": 0.5875975633537666, "learning_rate": 3.0977439955748895e-07, "loss": 0.2675, "step": 36062 }, { "epoch": 1.6893708717852625, "grad_norm": 0.6252163769568363, "learning_rate": 3.096829665773751e-07, "loss": 0.2768, "step": 36063 }, { "epoch": 1.689417716775191, "grad_norm": 0.5801431476930723, "learning_rate": 3.0959154620198496e-07, "loss": 0.2729, "step": 36064 }, { "epoch": 1.6894645617651192, "grad_norm": 0.6251743765245839, "learning_rate": 3.0950013843184496e-07, "loss": 0.2864, "step": 36065 }, { "epoch": 1.6895114067550474, "grad_norm": 0.574640232524361, "learning_rate": 3.094087432674817e-07, "loss": 0.2669, "step": 36066 }, { "epoch": 1.6895582517449759, "grad_norm": 0.5712275147348178, "learning_rate": 3.093173607094199e-07, "loss": 0.256, "step": 36067 }, { "epoch": 1.6896050967349043, "grad_norm": 0.6280139747532463, "learning_rate": 3.09225990758186e-07, "loss": 0.2713, "step": 36068 }, { "epoch": 1.6896519417248326, "grad_norm": 0.5882285180611888, "learning_rate": 3.091346334143061e-07, "loss": 0.2783, "step": 36069 }, { "epoch": 1.6896987867147608, "grad_norm": 0.6544403092599872, "learning_rate": 3.09043288678306e-07, "loss": 0.2828, "step": 36070 }, { "epoch": 1.6897456317046893, "grad_norm": 0.6040407591731144, "learning_rate": 3.0895195655071067e-07, "loss": 0.2738, "step": 36071 }, { "epoch": 1.6897924766946175, "grad_norm": 0.6010175403002527, "learning_rate": 3.088606370320468e-07, "loss": 0.2696, "step": 36072 }, { "epoch": 1.6898393216845458, "grad_norm": 0.6727896912193604, "learning_rate": 3.0876933012283866e-07, "loss": 0.2891, "step": 36073 }, { "epoch": 1.6898861666744742, "grad_norm": 0.5976035025315901, "learning_rate": 3.0867803582361217e-07, "loss": 0.2594, "step": 36074 }, { "epoch": 1.6899330116644025, "grad_norm": 0.6029320317630479, "learning_rate": 3.0858675413489303e-07, "loss": 0.2574, "step": 36075 }, { "epoch": 1.6899798566543307, "grad_norm": 0.6023827912875942, "learning_rate": 3.084954850572061e-07, "loss": 0.2815, "step": 36076 }, { "epoch": 1.6900267016442592, "grad_norm": 0.5798233630781647, "learning_rate": 3.084042285910774e-07, "loss": 0.2713, "step": 36077 }, { "epoch": 1.6900735466341876, "grad_norm": 0.5783881914021547, "learning_rate": 3.083129847370311e-07, "loss": 0.2653, "step": 36078 }, { "epoch": 1.6901203916241156, "grad_norm": 0.6193468243561269, "learning_rate": 3.082217534955931e-07, "loss": 0.268, "step": 36079 }, { "epoch": 1.690167236614044, "grad_norm": 0.6219334169800773, "learning_rate": 3.081305348672875e-07, "loss": 0.2922, "step": 36080 }, { "epoch": 1.6902140816039726, "grad_norm": 0.5850341922592103, "learning_rate": 3.080393288526401e-07, "loss": 0.2735, "step": 36081 }, { "epoch": 1.6902609265939008, "grad_norm": 0.6368935411623627, "learning_rate": 3.0794813545217516e-07, "loss": 0.2705, "step": 36082 }, { "epoch": 1.690307771583829, "grad_norm": 0.6165453918831041, "learning_rate": 3.078569546664181e-07, "loss": 0.2703, "step": 36083 }, { "epoch": 1.6903546165737575, "grad_norm": 0.6482173497075102, "learning_rate": 3.07765786495893e-07, "loss": 0.2732, "step": 36084 }, { "epoch": 1.6904014615636858, "grad_norm": 0.6386813425463279, "learning_rate": 3.076746309411252e-07, "loss": 0.2872, "step": 36085 }, { "epoch": 1.690448306553614, "grad_norm": 0.5624859368662466, "learning_rate": 3.0758348800263846e-07, "loss": 0.2395, "step": 36086 }, { "epoch": 1.6904951515435425, "grad_norm": 0.6010499909040099, "learning_rate": 3.074923576809577e-07, "loss": 0.2811, "step": 36087 }, { "epoch": 1.6905419965334707, "grad_norm": 0.5894990595201652, "learning_rate": 3.0740123997660745e-07, "loss": 0.2644, "step": 36088 }, { "epoch": 1.690588841523399, "grad_norm": 0.5983256993687674, "learning_rate": 3.0731013489011196e-07, "loss": 0.2796, "step": 36089 }, { "epoch": 1.6906356865133274, "grad_norm": 0.590973228096219, "learning_rate": 3.0721904242199593e-07, "loss": 0.2695, "step": 36090 }, { "epoch": 1.6906825315032559, "grad_norm": 0.6102028822183283, "learning_rate": 3.0712796257278273e-07, "loss": 0.2636, "step": 36091 }, { "epoch": 1.6907293764931839, "grad_norm": 0.5897243791653661, "learning_rate": 3.0703689534299725e-07, "loss": 0.2854, "step": 36092 }, { "epoch": 1.6907762214831124, "grad_norm": 0.5880206686817223, "learning_rate": 3.0694584073316295e-07, "loss": 0.2758, "step": 36093 }, { "epoch": 1.6908230664730408, "grad_norm": 0.5863631477320526, "learning_rate": 3.068547987438042e-07, "loss": 0.2631, "step": 36094 }, { "epoch": 1.690869911462969, "grad_norm": 0.5884320972346324, "learning_rate": 3.0676376937544464e-07, "loss": 0.263, "step": 36095 }, { "epoch": 1.6909167564528973, "grad_norm": 0.6199741354039955, "learning_rate": 3.066727526286084e-07, "loss": 0.2867, "step": 36096 }, { "epoch": 1.6909636014428258, "grad_norm": 0.5669273589203375, "learning_rate": 3.065817485038197e-07, "loss": 0.2653, "step": 36097 }, { "epoch": 1.691010446432754, "grad_norm": 0.6035218702969097, "learning_rate": 3.064907570016021e-07, "loss": 0.265, "step": 36098 }, { "epoch": 1.6910572914226822, "grad_norm": 0.575114587777447, "learning_rate": 3.063997781224778e-07, "loss": 0.2705, "step": 36099 }, { "epoch": 1.6911041364126107, "grad_norm": 0.6221578284583227, "learning_rate": 3.0630881186697186e-07, "loss": 0.269, "step": 36100 }, { "epoch": 1.691150981402539, "grad_norm": 0.562964855168202, "learning_rate": 3.0621785823560704e-07, "loss": 0.2595, "step": 36101 }, { "epoch": 1.6911978263924672, "grad_norm": 0.6123758162955502, "learning_rate": 3.061269172289072e-07, "loss": 0.2788, "step": 36102 }, { "epoch": 1.6912446713823956, "grad_norm": 0.555322417883215, "learning_rate": 3.060359888473963e-07, "loss": 0.2617, "step": 36103 }, { "epoch": 1.691291516372324, "grad_norm": 0.5818000451635498, "learning_rate": 3.059450730915961e-07, "loss": 0.2592, "step": 36104 }, { "epoch": 1.6913383613622524, "grad_norm": 0.57209631332979, "learning_rate": 3.05854169962031e-07, "loss": 0.265, "step": 36105 }, { "epoch": 1.6913852063521806, "grad_norm": 0.5855477158349985, "learning_rate": 3.057632794592233e-07, "loss": 0.2761, "step": 36106 }, { "epoch": 1.691432051342109, "grad_norm": 0.5712945890881309, "learning_rate": 3.056724015836962e-07, "loss": 0.2705, "step": 36107 }, { "epoch": 1.6914788963320373, "grad_norm": 0.5652897686198978, "learning_rate": 3.0558153633597295e-07, "loss": 0.2451, "step": 36108 }, { "epoch": 1.6915257413219655, "grad_norm": 0.5881778598396842, "learning_rate": 3.054906837165772e-07, "loss": 0.2592, "step": 36109 }, { "epoch": 1.691572586311894, "grad_norm": 0.6261292022998545, "learning_rate": 3.0539984372603025e-07, "loss": 0.2806, "step": 36110 }, { "epoch": 1.6916194313018222, "grad_norm": 0.6017156789168987, "learning_rate": 3.053090163648556e-07, "loss": 0.2798, "step": 36111 }, { "epoch": 1.6916662762917505, "grad_norm": 0.6643310735079101, "learning_rate": 3.052182016335767e-07, "loss": 0.2811, "step": 36112 }, { "epoch": 1.691713121281679, "grad_norm": 0.595307544534785, "learning_rate": 3.0512739953271476e-07, "loss": 0.2621, "step": 36113 }, { "epoch": 1.6917599662716074, "grad_norm": 0.6570304346881615, "learning_rate": 3.0503661006279274e-07, "loss": 0.3022, "step": 36114 }, { "epoch": 1.6918068112615354, "grad_norm": 0.5955264362687674, "learning_rate": 3.049458332243341e-07, "loss": 0.262, "step": 36115 }, { "epoch": 1.6918536562514639, "grad_norm": 0.6117254963827429, "learning_rate": 3.0485506901786006e-07, "loss": 0.2799, "step": 36116 }, { "epoch": 1.6919005012413924, "grad_norm": 0.6191522974534369, "learning_rate": 3.04764317443893e-07, "loss": 0.2821, "step": 36117 }, { "epoch": 1.6919473462313206, "grad_norm": 0.6100704528338666, "learning_rate": 3.046735785029559e-07, "loss": 0.2641, "step": 36118 }, { "epoch": 1.6919941912212488, "grad_norm": 0.5996731616468541, "learning_rate": 3.04582852195571e-07, "loss": 0.27, "step": 36119 }, { "epoch": 1.6920410362111773, "grad_norm": 0.6052872676809328, "learning_rate": 3.0449213852225936e-07, "loss": 0.2663, "step": 36120 }, { "epoch": 1.6920878812011055, "grad_norm": 0.6031296402777507, "learning_rate": 3.044014374835444e-07, "loss": 0.2891, "step": 36121 }, { "epoch": 1.6921347261910338, "grad_norm": 0.5814733023466383, "learning_rate": 3.043107490799466e-07, "loss": 0.2566, "step": 36122 }, { "epoch": 1.6921815711809622, "grad_norm": 0.5819969937145284, "learning_rate": 3.0422007331198876e-07, "loss": 0.2635, "step": 36123 }, { "epoch": 1.6922284161708905, "grad_norm": 0.6157863951226898, "learning_rate": 3.0412941018019227e-07, "loss": 0.2673, "step": 36124 }, { "epoch": 1.6922752611608187, "grad_norm": 0.6185935886088018, "learning_rate": 3.040387596850791e-07, "loss": 0.273, "step": 36125 }, { "epoch": 1.6923221061507472, "grad_norm": 0.6071264707655785, "learning_rate": 3.039481218271717e-07, "loss": 0.2744, "step": 36126 }, { "epoch": 1.6923689511406756, "grad_norm": 0.6011314957393615, "learning_rate": 3.038574966069907e-07, "loss": 0.2583, "step": 36127 }, { "epoch": 1.6924157961306037, "grad_norm": 0.6257591883106902, "learning_rate": 3.037668840250574e-07, "loss": 0.2849, "step": 36128 }, { "epoch": 1.6924626411205321, "grad_norm": 0.6403269408950132, "learning_rate": 3.036762840818938e-07, "loss": 0.2734, "step": 36129 }, { "epoch": 1.6925094861104606, "grad_norm": 0.589679517878468, "learning_rate": 3.03585696778021e-07, "loss": 0.2609, "step": 36130 }, { "epoch": 1.6925563311003888, "grad_norm": 0.5702706329005306, "learning_rate": 3.0349512211396037e-07, "loss": 0.2627, "step": 36131 }, { "epoch": 1.692603176090317, "grad_norm": 0.5764887551231392, "learning_rate": 3.0340456009023384e-07, "loss": 0.2634, "step": 36132 }, { "epoch": 1.6926500210802455, "grad_norm": 0.5854462043603623, "learning_rate": 3.0331401070736147e-07, "loss": 0.2832, "step": 36133 }, { "epoch": 1.6926968660701738, "grad_norm": 0.6018277565952593, "learning_rate": 3.0322347396586546e-07, "loss": 0.2804, "step": 36134 }, { "epoch": 1.692743711060102, "grad_norm": 0.5911887590940952, "learning_rate": 3.031329498662658e-07, "loss": 0.2731, "step": 36135 }, { "epoch": 1.6927905560500305, "grad_norm": 0.6234306275689279, "learning_rate": 3.0304243840908356e-07, "loss": 0.2785, "step": 36136 }, { "epoch": 1.6928374010399587, "grad_norm": 0.6294431771804753, "learning_rate": 3.029519395948402e-07, "loss": 0.2666, "step": 36137 }, { "epoch": 1.692884246029887, "grad_norm": 0.6496885642693929, "learning_rate": 3.028614534240559e-07, "loss": 0.2826, "step": 36138 }, { "epoch": 1.6929310910198154, "grad_norm": 0.5873778543198425, "learning_rate": 3.0277097989725253e-07, "loss": 0.2704, "step": 36139 }, { "epoch": 1.6929779360097439, "grad_norm": 0.5630392132340523, "learning_rate": 3.0268051901494953e-07, "loss": 0.2637, "step": 36140 }, { "epoch": 1.6930247809996721, "grad_norm": 0.6008601086307, "learning_rate": 3.025900707776677e-07, "loss": 0.2654, "step": 36141 }, { "epoch": 1.6930716259896004, "grad_norm": 0.5835059646821277, "learning_rate": 3.024996351859277e-07, "loss": 0.2687, "step": 36142 }, { "epoch": 1.6931184709795288, "grad_norm": 0.5809275465034568, "learning_rate": 3.024092122402497e-07, "loss": 0.2708, "step": 36143 }, { "epoch": 1.693165315969457, "grad_norm": 0.5741676382816608, "learning_rate": 3.0231880194115463e-07, "loss": 0.2628, "step": 36144 }, { "epoch": 1.6932121609593853, "grad_norm": 0.5751648783800236, "learning_rate": 3.022284042891624e-07, "loss": 0.2612, "step": 36145 }, { "epoch": 1.6932590059493138, "grad_norm": 0.5945384220775697, "learning_rate": 3.0213801928479364e-07, "loss": 0.2773, "step": 36146 }, { "epoch": 1.693305850939242, "grad_norm": 0.5970296280513894, "learning_rate": 3.020476469285685e-07, "loss": 0.2822, "step": 36147 }, { "epoch": 1.6933526959291703, "grad_norm": 0.6141148807478449, "learning_rate": 3.019572872210061e-07, "loss": 0.2621, "step": 36148 }, { "epoch": 1.6933995409190987, "grad_norm": 0.5846846001035167, "learning_rate": 3.0186694016262673e-07, "loss": 0.2657, "step": 36149 }, { "epoch": 1.6934463859090272, "grad_norm": 0.588699317191996, "learning_rate": 3.017766057539509e-07, "loss": 0.2619, "step": 36150 }, { "epoch": 1.6934932308989552, "grad_norm": 0.6208599520454092, "learning_rate": 3.016862839954984e-07, "loss": 0.2716, "step": 36151 }, { "epoch": 1.6935400758888837, "grad_norm": 0.5742610625335758, "learning_rate": 3.0159597488778916e-07, "loss": 0.2586, "step": 36152 }, { "epoch": 1.6935869208788121, "grad_norm": 0.566601324145463, "learning_rate": 3.0150567843134194e-07, "loss": 0.2715, "step": 36153 }, { "epoch": 1.6936337658687404, "grad_norm": 0.6368065490752579, "learning_rate": 3.014153946266776e-07, "loss": 0.2689, "step": 36154 }, { "epoch": 1.6936806108586686, "grad_norm": 0.5721314596428396, "learning_rate": 3.0132512347431447e-07, "loss": 0.257, "step": 36155 }, { "epoch": 1.693727455848597, "grad_norm": 0.5742443776306966, "learning_rate": 3.0123486497477263e-07, "loss": 0.2704, "step": 36156 }, { "epoch": 1.6937743008385253, "grad_norm": 0.6727850154459931, "learning_rate": 3.011446191285713e-07, "loss": 0.2878, "step": 36157 }, { "epoch": 1.6938211458284536, "grad_norm": 0.630803607689654, "learning_rate": 3.010543859362308e-07, "loss": 0.2849, "step": 36158 }, { "epoch": 1.693867990818382, "grad_norm": 0.5822127735644902, "learning_rate": 3.009641653982692e-07, "loss": 0.2572, "step": 36159 }, { "epoch": 1.6939148358083103, "grad_norm": 0.6528789237056234, "learning_rate": 3.0087395751520586e-07, "loss": 0.2885, "step": 36160 }, { "epoch": 1.6939616807982385, "grad_norm": 0.5985568726150787, "learning_rate": 3.007837622875609e-07, "loss": 0.2538, "step": 36161 }, { "epoch": 1.694008525788167, "grad_norm": 0.6211589536642883, "learning_rate": 3.006935797158522e-07, "loss": 0.2666, "step": 36162 }, { "epoch": 1.6940553707780954, "grad_norm": 0.5701744578610893, "learning_rate": 3.006034098005989e-07, "loss": 0.2766, "step": 36163 }, { "epoch": 1.6941022157680234, "grad_norm": 0.5856613862364547, "learning_rate": 3.005132525423207e-07, "loss": 0.2618, "step": 36164 }, { "epoch": 1.694149060757952, "grad_norm": 0.5670485429973946, "learning_rate": 3.0042310794153565e-07, "loss": 0.2683, "step": 36165 }, { "epoch": 1.6941959057478804, "grad_norm": 0.5928377170711387, "learning_rate": 3.0033297599876247e-07, "loss": 0.2638, "step": 36166 }, { "epoch": 1.6942427507378086, "grad_norm": 0.548542557308616, "learning_rate": 3.0024285671452044e-07, "loss": 0.2608, "step": 36167 }, { "epoch": 1.6942895957277369, "grad_norm": 0.6028386745801464, "learning_rate": 3.001527500893284e-07, "loss": 0.2744, "step": 36168 }, { "epoch": 1.6943364407176653, "grad_norm": 0.668123603102523, "learning_rate": 3.0006265612370355e-07, "loss": 0.2899, "step": 36169 }, { "epoch": 1.6943832857075936, "grad_norm": 0.5900506235312725, "learning_rate": 2.999725748181659e-07, "loss": 0.2675, "step": 36170 }, { "epoch": 1.6944301306975218, "grad_norm": 0.6639837292669857, "learning_rate": 2.9988250617323247e-07, "loss": 0.2942, "step": 36171 }, { "epoch": 1.6944769756874503, "grad_norm": 0.6129591104630397, "learning_rate": 2.9979245018942257e-07, "loss": 0.2768, "step": 36172 }, { "epoch": 1.6945238206773785, "grad_norm": 0.5304105582964518, "learning_rate": 2.997024068672538e-07, "loss": 0.2556, "step": 36173 }, { "epoch": 1.6945706656673067, "grad_norm": 0.64840056534303, "learning_rate": 2.996123762072448e-07, "loss": 0.2728, "step": 36174 }, { "epoch": 1.6946175106572352, "grad_norm": 0.6114298756793063, "learning_rate": 2.995223582099138e-07, "loss": 0.2672, "step": 36175 }, { "epoch": 1.6946643556471637, "grad_norm": 0.5739720366194421, "learning_rate": 2.9943235287577895e-07, "loss": 0.2678, "step": 36176 }, { "epoch": 1.694711200637092, "grad_norm": 0.5544624952716409, "learning_rate": 2.993423602053569e-07, "loss": 0.2566, "step": 36177 }, { "epoch": 1.6947580456270201, "grad_norm": 0.590498961290238, "learning_rate": 2.9925238019916676e-07, "loss": 0.2778, "step": 36178 }, { "epoch": 1.6948048906169486, "grad_norm": 0.6333830405601946, "learning_rate": 2.9916241285772584e-07, "loss": 0.2786, "step": 36179 }, { "epoch": 1.6948517356068769, "grad_norm": 0.5834922592032052, "learning_rate": 2.990724581815521e-07, "loss": 0.2618, "step": 36180 }, { "epoch": 1.694898580596805, "grad_norm": 0.5769458663518608, "learning_rate": 2.9898251617116404e-07, "loss": 0.2593, "step": 36181 }, { "epoch": 1.6949454255867336, "grad_norm": 0.5951506793266533, "learning_rate": 2.9889258682707755e-07, "loss": 0.2572, "step": 36182 }, { "epoch": 1.6949922705766618, "grad_norm": 0.6128350277744053, "learning_rate": 2.9880267014981143e-07, "loss": 0.2763, "step": 36183 }, { "epoch": 1.69503911556659, "grad_norm": 0.5874638342628494, "learning_rate": 2.987127661398823e-07, "loss": 0.2613, "step": 36184 }, { "epoch": 1.6950859605565185, "grad_norm": 0.5907260004668204, "learning_rate": 2.986228747978079e-07, "loss": 0.2644, "step": 36185 }, { "epoch": 1.695132805546447, "grad_norm": 0.5713268196232987, "learning_rate": 2.985329961241057e-07, "loss": 0.268, "step": 36186 }, { "epoch": 1.695179650536375, "grad_norm": 0.6081121422919128, "learning_rate": 2.984431301192928e-07, "loss": 0.2644, "step": 36187 }, { "epoch": 1.6952264955263034, "grad_norm": 0.5850730026263091, "learning_rate": 2.983532767838865e-07, "loss": 0.2726, "step": 36188 }, { "epoch": 1.695273340516232, "grad_norm": 0.6068353398000516, "learning_rate": 2.982634361184042e-07, "loss": 0.2722, "step": 36189 }, { "epoch": 1.6953201855061601, "grad_norm": 0.6048921397066563, "learning_rate": 2.981736081233616e-07, "loss": 0.2857, "step": 36190 }, { "epoch": 1.6953670304960884, "grad_norm": 0.5912377031785755, "learning_rate": 2.980837927992766e-07, "loss": 0.2653, "step": 36191 }, { "epoch": 1.6954138754860169, "grad_norm": 0.551585131290671, "learning_rate": 2.979939901466658e-07, "loss": 0.2664, "step": 36192 }, { "epoch": 1.695460720475945, "grad_norm": 0.5866179645295098, "learning_rate": 2.9790420016604623e-07, "loss": 0.2659, "step": 36193 }, { "epoch": 1.6955075654658733, "grad_norm": 0.5785835535659336, "learning_rate": 2.978144228579352e-07, "loss": 0.2636, "step": 36194 }, { "epoch": 1.6955544104558018, "grad_norm": 0.6001291451831724, "learning_rate": 2.9772465822284797e-07, "loss": 0.2736, "step": 36195 }, { "epoch": 1.69560125544573, "grad_norm": 0.5613889095838571, "learning_rate": 2.9763490626130237e-07, "loss": 0.2609, "step": 36196 }, { "epoch": 1.6956481004356583, "grad_norm": 0.5749316785439895, "learning_rate": 2.975451669738139e-07, "loss": 0.2737, "step": 36197 }, { "epoch": 1.6956949454255867, "grad_norm": 0.5883405715945964, "learning_rate": 2.9745544036089924e-07, "loss": 0.2626, "step": 36198 }, { "epoch": 1.6957417904155152, "grad_norm": 0.629403222776839, "learning_rate": 2.9736572642307504e-07, "loss": 0.2837, "step": 36199 }, { "epoch": 1.6957886354054432, "grad_norm": 0.607657549029781, "learning_rate": 2.972760251608578e-07, "loss": 0.2782, "step": 36200 }, { "epoch": 1.6958354803953717, "grad_norm": 0.6585334909720489, "learning_rate": 2.971863365747632e-07, "loss": 0.2702, "step": 36201 }, { "epoch": 1.6958823253853001, "grad_norm": 0.5982044127572083, "learning_rate": 2.970966606653075e-07, "loss": 0.2674, "step": 36202 }, { "epoch": 1.6959291703752284, "grad_norm": 0.606960626426604, "learning_rate": 2.9700699743300726e-07, "loss": 0.2836, "step": 36203 }, { "epoch": 1.6959760153651566, "grad_norm": 0.5978502877455989, "learning_rate": 2.969173468783776e-07, "loss": 0.2607, "step": 36204 }, { "epoch": 1.696022860355085, "grad_norm": 0.5565844103145687, "learning_rate": 2.968277090019348e-07, "loss": 0.2512, "step": 36205 }, { "epoch": 1.6960697053450133, "grad_norm": 0.5854628722733918, "learning_rate": 2.967380838041953e-07, "loss": 0.281, "step": 36206 }, { "epoch": 1.6961165503349416, "grad_norm": 0.6334091959949814, "learning_rate": 2.9664847128567405e-07, "loss": 0.2893, "step": 36207 }, { "epoch": 1.69616339532487, "grad_norm": 0.5901421925933649, "learning_rate": 2.9655887144688673e-07, "loss": 0.2758, "step": 36208 }, { "epoch": 1.6962102403147983, "grad_norm": 0.5984700817065747, "learning_rate": 2.964692842883496e-07, "loss": 0.2667, "step": 36209 }, { "epoch": 1.6962570853047265, "grad_norm": 0.6456163726848946, "learning_rate": 2.9637970981057804e-07, "loss": 0.2679, "step": 36210 }, { "epoch": 1.696303930294655, "grad_norm": 0.5866832853515109, "learning_rate": 2.962901480140873e-07, "loss": 0.2692, "step": 36211 }, { "epoch": 1.6963507752845834, "grad_norm": 0.6161550547443451, "learning_rate": 2.962005988993932e-07, "loss": 0.2624, "step": 36212 }, { "epoch": 1.6963976202745117, "grad_norm": 0.5982660507811906, "learning_rate": 2.961110624670102e-07, "loss": 0.2833, "step": 36213 }, { "epoch": 1.69644446526444, "grad_norm": 0.6240198160325972, "learning_rate": 2.960215387174542e-07, "loss": 0.2781, "step": 36214 }, { "epoch": 1.6964913102543684, "grad_norm": 0.5665776154948778, "learning_rate": 2.9593202765124035e-07, "loss": 0.261, "step": 36215 }, { "epoch": 1.6965381552442966, "grad_norm": 0.6007223979132046, "learning_rate": 2.958425292688835e-07, "loss": 0.2618, "step": 36216 }, { "epoch": 1.6965850002342249, "grad_norm": 0.6026734410100066, "learning_rate": 2.957530435708994e-07, "loss": 0.2717, "step": 36217 }, { "epoch": 1.6966318452241533, "grad_norm": 0.6072274119496206, "learning_rate": 2.956635705578023e-07, "loss": 0.2689, "step": 36218 }, { "epoch": 1.6966786902140816, "grad_norm": 0.6070593517384396, "learning_rate": 2.955741102301077e-07, "loss": 0.2803, "step": 36219 }, { "epoch": 1.6967255352040098, "grad_norm": 0.5792110696733043, "learning_rate": 2.9548466258832925e-07, "loss": 0.2578, "step": 36220 }, { "epoch": 1.6967723801939383, "grad_norm": 0.6234694566584076, "learning_rate": 2.9539522763298275e-07, "loss": 0.2712, "step": 36221 }, { "epoch": 1.6968192251838667, "grad_norm": 0.6301978110914713, "learning_rate": 2.953058053645827e-07, "loss": 0.2856, "step": 36222 }, { "epoch": 1.6968660701737948, "grad_norm": 0.5790859933256747, "learning_rate": 2.9521639578364404e-07, "loss": 0.2674, "step": 36223 }, { "epoch": 1.6969129151637232, "grad_norm": 0.6322973151709224, "learning_rate": 2.9512699889068017e-07, "loss": 0.2636, "step": 36224 }, { "epoch": 1.6969597601536517, "grad_norm": 0.6020985793090692, "learning_rate": 2.950376146862069e-07, "loss": 0.2645, "step": 36225 }, { "epoch": 1.69700660514358, "grad_norm": 0.609490741203259, "learning_rate": 2.949482431707376e-07, "loss": 0.2803, "step": 36226 }, { "epoch": 1.6970534501335082, "grad_norm": 0.6041736115062075, "learning_rate": 2.948588843447867e-07, "loss": 0.2661, "step": 36227 }, { "epoch": 1.6971002951234366, "grad_norm": 0.6211424743501965, "learning_rate": 2.947695382088686e-07, "loss": 0.2757, "step": 36228 }, { "epoch": 1.6971471401133649, "grad_norm": 0.5684847847913618, "learning_rate": 2.946802047634978e-07, "loss": 0.2703, "step": 36229 }, { "epoch": 1.6971939851032931, "grad_norm": 0.612401595364737, "learning_rate": 2.9459088400918853e-07, "loss": 0.2771, "step": 36230 }, { "epoch": 1.6972408300932216, "grad_norm": 0.6340861191833204, "learning_rate": 2.9450157594645396e-07, "loss": 0.2863, "step": 36231 }, { "epoch": 1.6972876750831498, "grad_norm": 0.5998174703183352, "learning_rate": 2.94412280575809e-07, "loss": 0.2867, "step": 36232 }, { "epoch": 1.697334520073078, "grad_norm": 0.5964980287484962, "learning_rate": 2.943229978977666e-07, "loss": 0.273, "step": 36233 }, { "epoch": 1.6973813650630065, "grad_norm": 0.6028177763139378, "learning_rate": 2.942337279128407e-07, "loss": 0.2722, "step": 36234 }, { "epoch": 1.697428210052935, "grad_norm": 0.597495355871772, "learning_rate": 2.9414447062154545e-07, "loss": 0.2703, "step": 36235 }, { "epoch": 1.697475055042863, "grad_norm": 0.6362486473592446, "learning_rate": 2.9405522602439425e-07, "loss": 0.2865, "step": 36236 }, { "epoch": 1.6975219000327915, "grad_norm": 0.570757541578886, "learning_rate": 2.939659941219014e-07, "loss": 0.2742, "step": 36237 }, { "epoch": 1.69756874502272, "grad_norm": 0.5905306173915901, "learning_rate": 2.938767749145799e-07, "loss": 0.269, "step": 36238 }, { "epoch": 1.6976155900126482, "grad_norm": 0.5675151521692688, "learning_rate": 2.937875684029423e-07, "loss": 0.2619, "step": 36239 }, { "epoch": 1.6976624350025764, "grad_norm": 0.6048240672800785, "learning_rate": 2.9369837458750297e-07, "loss": 0.2636, "step": 36240 }, { "epoch": 1.6977092799925049, "grad_norm": 0.5813653206581598, "learning_rate": 2.9360919346877485e-07, "loss": 0.2612, "step": 36241 }, { "epoch": 1.6977561249824331, "grad_norm": 0.602272159005436, "learning_rate": 2.9352002504727114e-07, "loss": 0.2625, "step": 36242 }, { "epoch": 1.6978029699723614, "grad_norm": 0.6318171509336931, "learning_rate": 2.934308693235058e-07, "loss": 0.2775, "step": 36243 }, { "epoch": 1.6978498149622898, "grad_norm": 0.6173105609086665, "learning_rate": 2.9334172629799066e-07, "loss": 0.276, "step": 36244 }, { "epoch": 1.697896659952218, "grad_norm": 0.6457595813585062, "learning_rate": 2.9325259597123985e-07, "loss": 0.2951, "step": 36245 }, { "epoch": 1.6979435049421463, "grad_norm": 0.6058188742501261, "learning_rate": 2.9316347834376507e-07, "loss": 0.2785, "step": 36246 }, { "epoch": 1.6979903499320748, "grad_norm": 0.5932718702764644, "learning_rate": 2.9307437341607984e-07, "loss": 0.2694, "step": 36247 }, { "epoch": 1.6980371949220032, "grad_norm": 0.5726601342163179, "learning_rate": 2.929852811886968e-07, "loss": 0.2621, "step": 36248 }, { "epoch": 1.6980840399119315, "grad_norm": 0.5693077639166481, "learning_rate": 2.928962016621295e-07, "loss": 0.2607, "step": 36249 }, { "epoch": 1.6981308849018597, "grad_norm": 0.610227885510809, "learning_rate": 2.928071348368891e-07, "loss": 0.2795, "step": 36250 }, { "epoch": 1.6981777298917882, "grad_norm": 0.5695864808796695, "learning_rate": 2.9271808071348885e-07, "loss": 0.2532, "step": 36251 }, { "epoch": 1.6982245748817164, "grad_norm": 0.592860797247388, "learning_rate": 2.926290392924419e-07, "loss": 0.2625, "step": 36252 }, { "epoch": 1.6982714198716446, "grad_norm": 0.6588543976242733, "learning_rate": 2.9254001057425957e-07, "loss": 0.2877, "step": 36253 }, { "epoch": 1.6983182648615731, "grad_norm": 0.5981592626481109, "learning_rate": 2.9245099455945473e-07, "loss": 0.2661, "step": 36254 }, { "epoch": 1.6983651098515014, "grad_norm": 0.5774534567000736, "learning_rate": 2.923619912485398e-07, "loss": 0.2683, "step": 36255 }, { "epoch": 1.6984119548414296, "grad_norm": 0.5635942730270026, "learning_rate": 2.922730006420263e-07, "loss": 0.2704, "step": 36256 }, { "epoch": 1.698458799831358, "grad_norm": 0.5965878045584431, "learning_rate": 2.9218402274042685e-07, "loss": 0.262, "step": 36257 }, { "epoch": 1.6985056448212865, "grad_norm": 0.592133371314166, "learning_rate": 2.9209505754425355e-07, "loss": 0.2614, "step": 36258 }, { "epoch": 1.6985524898112145, "grad_norm": 0.649517196744356, "learning_rate": 2.9200610505401877e-07, "loss": 0.2895, "step": 36259 }, { "epoch": 1.698599334801143, "grad_norm": 0.6004629281631674, "learning_rate": 2.9191716527023323e-07, "loss": 0.2726, "step": 36260 }, { "epoch": 1.6986461797910715, "grad_norm": 0.5811456804903516, "learning_rate": 2.918282381934098e-07, "loss": 0.2703, "step": 36261 }, { "epoch": 1.6986930247809997, "grad_norm": 0.6180782828016412, "learning_rate": 2.917393238240596e-07, "loss": 0.2792, "step": 36262 }, { "epoch": 1.698739869770928, "grad_norm": 0.5896942503976698, "learning_rate": 2.9165042216269453e-07, "loss": 0.2841, "step": 36263 }, { "epoch": 1.6987867147608564, "grad_norm": 0.5907612696027954, "learning_rate": 2.915615332098262e-07, "loss": 0.2694, "step": 36264 }, { "epoch": 1.6988335597507846, "grad_norm": 0.5498783992390388, "learning_rate": 2.914726569659662e-07, "loss": 0.2585, "step": 36265 }, { "epoch": 1.698880404740713, "grad_norm": 0.5758447270184767, "learning_rate": 2.9138379343162634e-07, "loss": 0.2685, "step": 36266 }, { "epoch": 1.6989272497306414, "grad_norm": 0.6180588750652912, "learning_rate": 2.912949426073178e-07, "loss": 0.2781, "step": 36267 }, { "epoch": 1.6989740947205696, "grad_norm": 0.5878057277163398, "learning_rate": 2.912061044935513e-07, "loss": 0.2736, "step": 36268 }, { "epoch": 1.6990209397104978, "grad_norm": 0.5721158460896016, "learning_rate": 2.911172790908384e-07, "loss": 0.2485, "step": 36269 }, { "epoch": 1.6990677847004263, "grad_norm": 0.6309390771580153, "learning_rate": 2.910284663996901e-07, "loss": 0.2679, "step": 36270 }, { "epoch": 1.6991146296903548, "grad_norm": 0.6407700469362478, "learning_rate": 2.9093966642061823e-07, "loss": 0.2825, "step": 36271 }, { "epoch": 1.6991614746802828, "grad_norm": 0.5964101727325815, "learning_rate": 2.908508791541337e-07, "loss": 0.2711, "step": 36272 }, { "epoch": 1.6992083196702112, "grad_norm": 0.623569386907135, "learning_rate": 2.9076210460074637e-07, "loss": 0.2709, "step": 36273 }, { "epoch": 1.6992551646601397, "grad_norm": 0.6202236457701324, "learning_rate": 2.906733427609687e-07, "loss": 0.2742, "step": 36274 }, { "epoch": 1.699302009650068, "grad_norm": 0.6057700113535884, "learning_rate": 2.9058459363530995e-07, "loss": 0.2681, "step": 36275 }, { "epoch": 1.6993488546399962, "grad_norm": 0.6144893360217785, "learning_rate": 2.9049585722428164e-07, "loss": 0.2843, "step": 36276 }, { "epoch": 1.6993956996299246, "grad_norm": 0.6398714129756802, "learning_rate": 2.9040713352839423e-07, "loss": 0.2909, "step": 36277 }, { "epoch": 1.699442544619853, "grad_norm": 0.600826470888636, "learning_rate": 2.903184225481584e-07, "loss": 0.2687, "step": 36278 }, { "epoch": 1.6994893896097811, "grad_norm": 0.5600522081394493, "learning_rate": 2.9022972428408514e-07, "loss": 0.2579, "step": 36279 }, { "epoch": 1.6995362345997096, "grad_norm": 0.6230028009728872, "learning_rate": 2.901410387366846e-07, "loss": 0.2643, "step": 36280 }, { "epoch": 1.6995830795896378, "grad_norm": 0.5671546374478874, "learning_rate": 2.9005236590646637e-07, "loss": 0.2741, "step": 36281 }, { "epoch": 1.699629924579566, "grad_norm": 0.65119875626123, "learning_rate": 2.899637057939414e-07, "loss": 0.2683, "step": 36282 }, { "epoch": 1.6996767695694945, "grad_norm": 0.6127529879612883, "learning_rate": 2.8987505839961963e-07, "loss": 0.2867, "step": 36283 }, { "epoch": 1.699723614559423, "grad_norm": 0.6039643597795453, "learning_rate": 2.897864237240114e-07, "loss": 0.2706, "step": 36284 }, { "epoch": 1.6997704595493512, "grad_norm": 0.5695836511169218, "learning_rate": 2.896978017676269e-07, "loss": 0.2708, "step": 36285 }, { "epoch": 1.6998173045392795, "grad_norm": 0.5740197697231639, "learning_rate": 2.8960919253097657e-07, "loss": 0.265, "step": 36286 }, { "epoch": 1.699864149529208, "grad_norm": 0.6078947166541949, "learning_rate": 2.895205960145697e-07, "loss": 0.2722, "step": 36287 }, { "epoch": 1.6999109945191362, "grad_norm": 0.6070528390079918, "learning_rate": 2.8943201221891594e-07, "loss": 0.2628, "step": 36288 }, { "epoch": 1.6999578395090644, "grad_norm": 0.6253234942368335, "learning_rate": 2.8934344114452506e-07, "loss": 0.2815, "step": 36289 }, { "epoch": 1.700004684498993, "grad_norm": 0.5971196085148487, "learning_rate": 2.89254882791907e-07, "loss": 0.2724, "step": 36290 }, { "epoch": 1.7000515294889211, "grad_norm": 0.6262719899538622, "learning_rate": 2.8916633716157163e-07, "loss": 0.2828, "step": 36291 }, { "epoch": 1.7000983744788494, "grad_norm": 0.6050395566427099, "learning_rate": 2.8907780425402875e-07, "loss": 0.2753, "step": 36292 }, { "epoch": 1.7001452194687778, "grad_norm": 0.5686345240972632, "learning_rate": 2.8898928406978693e-07, "loss": 0.2577, "step": 36293 }, { "epoch": 1.7001920644587063, "grad_norm": 0.6128568688088972, "learning_rate": 2.8890077660935657e-07, "loss": 0.268, "step": 36294 }, { "epoch": 1.7002389094486343, "grad_norm": 0.5847574038714748, "learning_rate": 2.888122818732461e-07, "loss": 0.2675, "step": 36295 }, { "epoch": 1.7002857544385628, "grad_norm": 0.5398904537129029, "learning_rate": 2.887237998619649e-07, "loss": 0.258, "step": 36296 }, { "epoch": 1.7003325994284912, "grad_norm": 0.5929564670397697, "learning_rate": 2.8863533057602255e-07, "loss": 0.2725, "step": 36297 }, { "epoch": 1.7003794444184195, "grad_norm": 0.6264096611087119, "learning_rate": 2.885468740159289e-07, "loss": 0.2885, "step": 36298 }, { "epoch": 1.7004262894083477, "grad_norm": 0.5976437928063156, "learning_rate": 2.8845843018219133e-07, "loss": 0.2564, "step": 36299 }, { "epoch": 1.7004731343982762, "grad_norm": 0.6368954481682435, "learning_rate": 2.883699990753197e-07, "loss": 0.2806, "step": 36300 }, { "epoch": 1.7005199793882044, "grad_norm": 0.6009992602898594, "learning_rate": 2.882815806958233e-07, "loss": 0.264, "step": 36301 }, { "epoch": 1.7005668243781327, "grad_norm": 0.6067283055518845, "learning_rate": 2.8819317504421016e-07, "loss": 0.2669, "step": 36302 }, { "epoch": 1.7006136693680611, "grad_norm": 0.5739247508803681, "learning_rate": 2.8810478212098926e-07, "loss": 0.2684, "step": 36303 }, { "epoch": 1.7006605143579894, "grad_norm": 0.615808186402752, "learning_rate": 2.8801640192666985e-07, "loss": 0.2759, "step": 36304 }, { "epoch": 1.7007073593479176, "grad_norm": 0.583937770998211, "learning_rate": 2.8792803446175967e-07, "loss": 0.2692, "step": 36305 }, { "epoch": 1.700754204337846, "grad_norm": 0.5739801214326995, "learning_rate": 2.878396797267677e-07, "loss": 0.258, "step": 36306 }, { "epoch": 1.7008010493277745, "grad_norm": 0.6180403218957568, "learning_rate": 2.877513377222024e-07, "loss": 0.2668, "step": 36307 }, { "epoch": 1.7008478943177026, "grad_norm": 0.583050901555242, "learning_rate": 2.8766300844857264e-07, "loss": 0.2738, "step": 36308 }, { "epoch": 1.700894739307631, "grad_norm": 0.6007344210813848, "learning_rate": 2.8757469190638544e-07, "loss": 0.268, "step": 36309 }, { "epoch": 1.7009415842975595, "grad_norm": 0.5618828779780043, "learning_rate": 2.8748638809615064e-07, "loss": 0.2626, "step": 36310 }, { "epoch": 1.7009884292874877, "grad_norm": 0.5719804098076209, "learning_rate": 2.8739809701837487e-07, "loss": 0.2626, "step": 36311 }, { "epoch": 1.701035274277416, "grad_norm": 0.6657652276846578, "learning_rate": 2.8730981867356703e-07, "loss": 0.2918, "step": 36312 }, { "epoch": 1.7010821192673444, "grad_norm": 0.5741931187062903, "learning_rate": 2.872215530622349e-07, "loss": 0.2577, "step": 36313 }, { "epoch": 1.7011289642572727, "grad_norm": 0.5792251758620782, "learning_rate": 2.871333001848869e-07, "loss": 0.2645, "step": 36314 }, { "epoch": 1.701175809247201, "grad_norm": 0.5767690983639526, "learning_rate": 2.870450600420308e-07, "loss": 0.2601, "step": 36315 }, { "epoch": 1.7012226542371294, "grad_norm": 0.6075744174852779, "learning_rate": 2.869568326341743e-07, "loss": 0.2597, "step": 36316 }, { "epoch": 1.7012694992270576, "grad_norm": 0.5781418066655729, "learning_rate": 2.868686179618244e-07, "loss": 0.2567, "step": 36317 }, { "epoch": 1.7013163442169859, "grad_norm": 0.5701194132190549, "learning_rate": 2.8678041602548945e-07, "loss": 0.252, "step": 36318 }, { "epoch": 1.7013631892069143, "grad_norm": 0.6073204777291372, "learning_rate": 2.8669222682567693e-07, "loss": 0.2659, "step": 36319 }, { "epoch": 1.7014100341968428, "grad_norm": 0.5817099760522971, "learning_rate": 2.866040503628942e-07, "loss": 0.274, "step": 36320 }, { "epoch": 1.701456879186771, "grad_norm": 0.6062177695982643, "learning_rate": 2.8651588663764937e-07, "loss": 0.2727, "step": 36321 }, { "epoch": 1.7015037241766993, "grad_norm": 0.5943029383816502, "learning_rate": 2.8642773565044914e-07, "loss": 0.2606, "step": 36322 }, { "epoch": 1.7015505691666277, "grad_norm": 0.6207081400392075, "learning_rate": 2.86339597401801e-07, "loss": 0.2757, "step": 36323 }, { "epoch": 1.701597414156556, "grad_norm": 0.5949681108826542, "learning_rate": 2.862514718922119e-07, "loss": 0.2712, "step": 36324 }, { "epoch": 1.7016442591464842, "grad_norm": 0.538090445177019, "learning_rate": 2.8616335912218876e-07, "loss": 0.2441, "step": 36325 }, { "epoch": 1.7016911041364127, "grad_norm": 0.6190254471591321, "learning_rate": 2.8607525909223944e-07, "loss": 0.2787, "step": 36326 }, { "epoch": 1.701737949126341, "grad_norm": 0.557241609338996, "learning_rate": 2.8598717180287033e-07, "loss": 0.2821, "step": 36327 }, { "epoch": 1.7017847941162692, "grad_norm": 0.6179030232747089, "learning_rate": 2.858990972545894e-07, "loss": 0.2703, "step": 36328 }, { "epoch": 1.7018316391061976, "grad_norm": 0.6016215330702591, "learning_rate": 2.858110354479024e-07, "loss": 0.2666, "step": 36329 }, { "epoch": 1.701878484096126, "grad_norm": 0.5782707182031441, "learning_rate": 2.857229863833158e-07, "loss": 0.269, "step": 36330 }, { "epoch": 1.701925329086054, "grad_norm": 0.5657240091181537, "learning_rate": 2.85634950061337e-07, "loss": 0.2538, "step": 36331 }, { "epoch": 1.7019721740759826, "grad_norm": 0.6270135974392901, "learning_rate": 2.8554692648247254e-07, "loss": 0.2749, "step": 36332 }, { "epoch": 1.702019019065911, "grad_norm": 0.5920409310987423, "learning_rate": 2.85458915647229e-07, "loss": 0.2762, "step": 36333 }, { "epoch": 1.7020658640558393, "grad_norm": 0.5514089806175919, "learning_rate": 2.8537091755611314e-07, "loss": 0.2417, "step": 36334 }, { "epoch": 1.7021127090457675, "grad_norm": 0.6220941483497644, "learning_rate": 2.8528293220963067e-07, "loss": 0.2834, "step": 36335 }, { "epoch": 1.702159554035696, "grad_norm": 0.613459711034588, "learning_rate": 2.851949596082887e-07, "loss": 0.2724, "step": 36336 }, { "epoch": 1.7022063990256242, "grad_norm": 0.5807037267541654, "learning_rate": 2.851069997525924e-07, "loss": 0.2594, "step": 36337 }, { "epoch": 1.7022532440155524, "grad_norm": 0.5629995868428491, "learning_rate": 2.850190526430488e-07, "loss": 0.266, "step": 36338 }, { "epoch": 1.702300089005481, "grad_norm": 0.6079896661797198, "learning_rate": 2.8493111828016396e-07, "loss": 0.2737, "step": 36339 }, { "epoch": 1.7023469339954092, "grad_norm": 0.6134946754613577, "learning_rate": 2.848431966644441e-07, "loss": 0.2726, "step": 36340 }, { "epoch": 1.7023937789853374, "grad_norm": 0.59985406484913, "learning_rate": 2.8475528779639437e-07, "loss": 0.259, "step": 36341 }, { "epoch": 1.7024406239752659, "grad_norm": 0.6277440566027666, "learning_rate": 2.8466739167652133e-07, "loss": 0.2743, "step": 36342 }, { "epoch": 1.7024874689651943, "grad_norm": 0.6065702745990028, "learning_rate": 2.84579508305331e-07, "loss": 0.2748, "step": 36343 }, { "epoch": 1.7025343139551223, "grad_norm": 0.615871430248117, "learning_rate": 2.8449163768332846e-07, "loss": 0.2812, "step": 36344 }, { "epoch": 1.7025811589450508, "grad_norm": 0.5762254382602299, "learning_rate": 2.844037798110194e-07, "loss": 0.2692, "step": 36345 }, { "epoch": 1.7026280039349793, "grad_norm": 0.5734210988956614, "learning_rate": 2.8431593468891044e-07, "loss": 0.2575, "step": 36346 }, { "epoch": 1.7026748489249075, "grad_norm": 0.5840919911613703, "learning_rate": 2.8422810231750585e-07, "loss": 0.2561, "step": 36347 }, { "epoch": 1.7027216939148357, "grad_norm": 0.5868354398338991, "learning_rate": 2.8414028269731163e-07, "loss": 0.2781, "step": 36348 }, { "epoch": 1.7027685389047642, "grad_norm": 0.6038675307430065, "learning_rate": 2.840524758288332e-07, "loss": 0.2525, "step": 36349 }, { "epoch": 1.7028153838946924, "grad_norm": 0.6012139199492997, "learning_rate": 2.839646817125763e-07, "loss": 0.2589, "step": 36350 }, { "epoch": 1.7028622288846207, "grad_norm": 0.6101315554868847, "learning_rate": 2.838769003490449e-07, "loss": 0.2766, "step": 36351 }, { "epoch": 1.7029090738745492, "grad_norm": 0.6129808444216363, "learning_rate": 2.8378913173874534e-07, "loss": 0.2872, "step": 36352 }, { "epoch": 1.7029559188644774, "grad_norm": 0.6103703574579207, "learning_rate": 2.837013758821824e-07, "loss": 0.2703, "step": 36353 }, { "epoch": 1.7030027638544056, "grad_norm": 0.5907849642193072, "learning_rate": 2.8361363277986083e-07, "loss": 0.261, "step": 36354 }, { "epoch": 1.703049608844334, "grad_norm": 0.6238488626129131, "learning_rate": 2.835259024322856e-07, "loss": 0.273, "step": 36355 }, { "epoch": 1.7030964538342626, "grad_norm": 0.6142314529208664, "learning_rate": 2.8343818483996174e-07, "loss": 0.2804, "step": 36356 }, { "epoch": 1.7031432988241908, "grad_norm": 0.6088354806605787, "learning_rate": 2.8335048000339454e-07, "loss": 0.2755, "step": 36357 }, { "epoch": 1.703190143814119, "grad_norm": 0.6319281153711271, "learning_rate": 2.8326278792308757e-07, "loss": 0.2976, "step": 36358 }, { "epoch": 1.7032369888040475, "grad_norm": 0.6160419812370178, "learning_rate": 2.831751085995468e-07, "loss": 0.2662, "step": 36359 }, { "epoch": 1.7032838337939757, "grad_norm": 0.5961809801192619, "learning_rate": 2.830874420332755e-07, "loss": 0.2667, "step": 36360 }, { "epoch": 1.703330678783904, "grad_norm": 0.6572342242944422, "learning_rate": 2.8299978822477867e-07, "loss": 0.3018, "step": 36361 }, { "epoch": 1.7033775237738324, "grad_norm": 0.6351100776652809, "learning_rate": 2.8291214717456104e-07, "loss": 0.274, "step": 36362 }, { "epoch": 1.7034243687637607, "grad_norm": 0.6479938658222273, "learning_rate": 2.8282451888312716e-07, "loss": 0.268, "step": 36363 }, { "epoch": 1.703471213753689, "grad_norm": 0.6169022095967774, "learning_rate": 2.8273690335098027e-07, "loss": 0.2836, "step": 36364 }, { "epoch": 1.7035180587436174, "grad_norm": 0.5872039252591006, "learning_rate": 2.82649300578626e-07, "loss": 0.2537, "step": 36365 }, { "epoch": 1.7035649037335459, "grad_norm": 0.5898965458384784, "learning_rate": 2.8256171056656707e-07, "loss": 0.2601, "step": 36366 }, { "epoch": 1.7036117487234739, "grad_norm": 0.634972065847191, "learning_rate": 2.8247413331530804e-07, "loss": 0.2707, "step": 36367 }, { "epoch": 1.7036585937134023, "grad_norm": 0.5588160093408331, "learning_rate": 2.82386568825353e-07, "loss": 0.2652, "step": 36368 }, { "epoch": 1.7037054387033308, "grad_norm": 0.5755269650466921, "learning_rate": 2.8229901709720594e-07, "loss": 0.2561, "step": 36369 }, { "epoch": 1.703752283693259, "grad_norm": 0.6503219307426753, "learning_rate": 2.8221147813137116e-07, "loss": 0.2801, "step": 36370 }, { "epoch": 1.7037991286831873, "grad_norm": 0.5785164722595415, "learning_rate": 2.8212395192835195e-07, "loss": 0.2775, "step": 36371 }, { "epoch": 1.7038459736731157, "grad_norm": 0.5890103951115123, "learning_rate": 2.8203643848865145e-07, "loss": 0.2746, "step": 36372 }, { "epoch": 1.703892818663044, "grad_norm": 0.5854866436928231, "learning_rate": 2.8194893781277376e-07, "loss": 0.2601, "step": 36373 }, { "epoch": 1.7039396636529722, "grad_norm": 0.6485713245073748, "learning_rate": 2.818614499012226e-07, "loss": 0.2719, "step": 36374 }, { "epoch": 1.7039865086429007, "grad_norm": 0.6098045445473631, "learning_rate": 2.817739747545012e-07, "loss": 0.2614, "step": 36375 }, { "epoch": 1.704033353632829, "grad_norm": 0.5554566374153901, "learning_rate": 2.8168651237311275e-07, "loss": 0.2556, "step": 36376 }, { "epoch": 1.7040801986227572, "grad_norm": 0.6239531997259918, "learning_rate": 2.8159906275756183e-07, "loss": 0.2656, "step": 36377 }, { "epoch": 1.7041270436126856, "grad_norm": 0.6012783490702315, "learning_rate": 2.815116259083503e-07, "loss": 0.2715, "step": 36378 }, { "epoch": 1.704173888602614, "grad_norm": 0.597731649212397, "learning_rate": 2.814242018259816e-07, "loss": 0.2724, "step": 36379 }, { "epoch": 1.7042207335925421, "grad_norm": 0.6457751712779002, "learning_rate": 2.8133679051095876e-07, "loss": 0.273, "step": 36380 }, { "epoch": 1.7042675785824706, "grad_norm": 0.5916693388713451, "learning_rate": 2.8124939196378513e-07, "loss": 0.2679, "step": 36381 }, { "epoch": 1.704314423572399, "grad_norm": 0.5621139419258134, "learning_rate": 2.8116200618496376e-07, "loss": 0.2544, "step": 36382 }, { "epoch": 1.7043612685623273, "grad_norm": 0.6125058426914135, "learning_rate": 2.810746331749978e-07, "loss": 0.2872, "step": 36383 }, { "epoch": 1.7044081135522555, "grad_norm": 0.5801606775703178, "learning_rate": 2.80987272934389e-07, "loss": 0.2711, "step": 36384 }, { "epoch": 1.704454958542184, "grad_norm": 0.6044159854224581, "learning_rate": 2.808999254636413e-07, "loss": 0.2826, "step": 36385 }, { "epoch": 1.7045018035321122, "grad_norm": 0.5646035332144567, "learning_rate": 2.808125907632561e-07, "loss": 0.2631, "step": 36386 }, { "epoch": 1.7045486485220405, "grad_norm": 0.5987705960063142, "learning_rate": 2.8072526883373695e-07, "loss": 0.2662, "step": 36387 }, { "epoch": 1.704595493511969, "grad_norm": 0.5854976641118053, "learning_rate": 2.806379596755859e-07, "loss": 0.2688, "step": 36388 }, { "epoch": 1.7046423385018972, "grad_norm": 0.5969096041556408, "learning_rate": 2.80550663289306e-07, "loss": 0.2642, "step": 36389 }, { "epoch": 1.7046891834918254, "grad_norm": 0.565855321321352, "learning_rate": 2.8046337967539886e-07, "loss": 0.2736, "step": 36390 }, { "epoch": 1.7047360284817539, "grad_norm": 0.570801834139619, "learning_rate": 2.8037610883436704e-07, "loss": 0.2622, "step": 36391 }, { "epoch": 1.7047828734716823, "grad_norm": 0.5686516652147195, "learning_rate": 2.802888507667134e-07, "loss": 0.2773, "step": 36392 }, { "epoch": 1.7048297184616106, "grad_norm": 0.6550633298355193, "learning_rate": 2.8020160547293897e-07, "loss": 0.2733, "step": 36393 }, { "epoch": 1.7048765634515388, "grad_norm": 0.5763323660753135, "learning_rate": 2.801143729535463e-07, "loss": 0.2705, "step": 36394 }, { "epoch": 1.7049234084414673, "grad_norm": 0.5886984765381088, "learning_rate": 2.8002715320903815e-07, "loss": 0.2545, "step": 36395 }, { "epoch": 1.7049702534313955, "grad_norm": 0.6073047280445574, "learning_rate": 2.7993994623991496e-07, "loss": 0.272, "step": 36396 }, { "epoch": 1.7050170984213238, "grad_norm": 0.5864014730350694, "learning_rate": 2.798527520466798e-07, "loss": 0.2631, "step": 36397 }, { "epoch": 1.7050639434112522, "grad_norm": 0.5828327819187872, "learning_rate": 2.7976557062983384e-07, "loss": 0.262, "step": 36398 }, { "epoch": 1.7051107884011805, "grad_norm": 0.6274454462083624, "learning_rate": 2.7967840198987933e-07, "loss": 0.2799, "step": 36399 }, { "epoch": 1.7051576333911087, "grad_norm": 0.5862129814983668, "learning_rate": 2.795912461273173e-07, "loss": 0.2698, "step": 36400 }, { "epoch": 1.7052044783810372, "grad_norm": 0.6227171601168586, "learning_rate": 2.7950410304264985e-07, "loss": 0.2681, "step": 36401 }, { "epoch": 1.7052513233709656, "grad_norm": 0.6340086520609703, "learning_rate": 2.7941697273637796e-07, "loss": 0.2827, "step": 36402 }, { "epoch": 1.7052981683608937, "grad_norm": 0.5973019864151762, "learning_rate": 2.793298552090029e-07, "loss": 0.2682, "step": 36403 }, { "epoch": 1.7053450133508221, "grad_norm": 0.5767266651349332, "learning_rate": 2.7924275046102677e-07, "loss": 0.2658, "step": 36404 }, { "epoch": 1.7053918583407506, "grad_norm": 0.5688125983164857, "learning_rate": 2.791556584929503e-07, "loss": 0.2658, "step": 36405 }, { "epoch": 1.7054387033306788, "grad_norm": 0.6167581759402667, "learning_rate": 2.7906857930527523e-07, "loss": 0.2766, "step": 36406 }, { "epoch": 1.705485548320607, "grad_norm": 0.653204463343577, "learning_rate": 2.789815128985024e-07, "loss": 0.2953, "step": 36407 }, { "epoch": 1.7055323933105355, "grad_norm": 0.627246890792119, "learning_rate": 2.7889445927313213e-07, "loss": 0.269, "step": 36408 }, { "epoch": 1.7055792383004638, "grad_norm": 0.6016337280587916, "learning_rate": 2.7880741842966604e-07, "loss": 0.2746, "step": 36409 }, { "epoch": 1.705626083290392, "grad_norm": 0.5490769357975542, "learning_rate": 2.7872039036860506e-07, "loss": 0.2491, "step": 36410 }, { "epoch": 1.7056729282803205, "grad_norm": 0.5808438852038005, "learning_rate": 2.7863337509044963e-07, "loss": 0.2685, "step": 36411 }, { "epoch": 1.7057197732702487, "grad_norm": 0.5459420735071125, "learning_rate": 2.7854637259570163e-07, "loss": 0.2648, "step": 36412 }, { "epoch": 1.705766618260177, "grad_norm": 0.6649444460731004, "learning_rate": 2.784593828848603e-07, "loss": 0.2935, "step": 36413 }, { "epoch": 1.7058134632501054, "grad_norm": 0.5766245543236763, "learning_rate": 2.7837240595842753e-07, "loss": 0.274, "step": 36414 }, { "epoch": 1.7058603082400339, "grad_norm": 0.6052510612141234, "learning_rate": 2.782854418169023e-07, "loss": 0.2708, "step": 36415 }, { "epoch": 1.705907153229962, "grad_norm": 0.5744562747486884, "learning_rate": 2.781984904607862e-07, "loss": 0.2611, "step": 36416 }, { "epoch": 1.7059539982198904, "grad_norm": 0.5583546705130857, "learning_rate": 2.7811155189057945e-07, "loss": 0.2504, "step": 36417 }, { "epoch": 1.7060008432098188, "grad_norm": 0.644864615827539, "learning_rate": 2.7802462610678205e-07, "loss": 0.2826, "step": 36418 }, { "epoch": 1.706047688199747, "grad_norm": 0.578452162622265, "learning_rate": 2.779377131098951e-07, "loss": 0.2719, "step": 36419 }, { "epoch": 1.7060945331896753, "grad_norm": 0.6265453967715977, "learning_rate": 2.7785081290041815e-07, "loss": 0.2905, "step": 36420 }, { "epoch": 1.7061413781796038, "grad_norm": 0.6102955398651954, "learning_rate": 2.7776392547885056e-07, "loss": 0.2683, "step": 36421 }, { "epoch": 1.706188223169532, "grad_norm": 0.594969046768498, "learning_rate": 2.77677050845693e-07, "loss": 0.2758, "step": 36422 }, { "epoch": 1.7062350681594602, "grad_norm": 0.638233122590943, "learning_rate": 2.775901890014457e-07, "loss": 0.2849, "step": 36423 }, { "epoch": 1.7062819131493887, "grad_norm": 0.5747151302705298, "learning_rate": 2.7750333994660816e-07, "loss": 0.265, "step": 36424 }, { "epoch": 1.706328758139317, "grad_norm": 0.6168182032486007, "learning_rate": 2.7741650368168034e-07, "loss": 0.2846, "step": 36425 }, { "epoch": 1.7063756031292452, "grad_norm": 0.6165191556131059, "learning_rate": 2.7732968020716235e-07, "loss": 0.2776, "step": 36426 }, { "epoch": 1.7064224481191737, "grad_norm": 0.5759331625882428, "learning_rate": 2.772428695235535e-07, "loss": 0.248, "step": 36427 }, { "epoch": 1.7064692931091021, "grad_norm": 0.5602434386287334, "learning_rate": 2.771560716313529e-07, "loss": 0.2588, "step": 36428 }, { "epoch": 1.7065161380990304, "grad_norm": 0.5899292527027843, "learning_rate": 2.770692865310601e-07, "loss": 0.2615, "step": 36429 }, { "epoch": 1.7065629830889586, "grad_norm": 0.6254687319422348, "learning_rate": 2.7698251422317497e-07, "loss": 0.2728, "step": 36430 }, { "epoch": 1.706609828078887, "grad_norm": 0.5755473199014149, "learning_rate": 2.7689575470819684e-07, "loss": 0.2458, "step": 36431 }, { "epoch": 1.7066566730688153, "grad_norm": 0.6644329965556178, "learning_rate": 2.768090079866251e-07, "loss": 0.2804, "step": 36432 }, { "epoch": 1.7067035180587435, "grad_norm": 0.6271142434604333, "learning_rate": 2.767222740589584e-07, "loss": 0.2751, "step": 36433 }, { "epoch": 1.706750363048672, "grad_norm": 0.5898331797247781, "learning_rate": 2.766355529256967e-07, "loss": 0.2652, "step": 36434 }, { "epoch": 1.7067972080386002, "grad_norm": 0.5771879995127859, "learning_rate": 2.765488445873382e-07, "loss": 0.2602, "step": 36435 }, { "epoch": 1.7068440530285285, "grad_norm": 0.5700372968481245, "learning_rate": 2.7646214904438224e-07, "loss": 0.2658, "step": 36436 }, { "epoch": 1.706890898018457, "grad_norm": 0.6632481310274024, "learning_rate": 2.763754662973275e-07, "loss": 0.2812, "step": 36437 }, { "epoch": 1.7069377430083854, "grad_norm": 0.5669801861565208, "learning_rate": 2.76288796346674e-07, "loss": 0.2629, "step": 36438 }, { "epoch": 1.7069845879983134, "grad_norm": 0.5833896031206368, "learning_rate": 2.762021391929187e-07, "loss": 0.2575, "step": 36439 }, { "epoch": 1.707031432988242, "grad_norm": 0.6249039252621802, "learning_rate": 2.761154948365613e-07, "loss": 0.2636, "step": 36440 }, { "epoch": 1.7070782779781704, "grad_norm": 0.6412850694154097, "learning_rate": 2.760288632781008e-07, "loss": 0.2637, "step": 36441 }, { "epoch": 1.7071251229680986, "grad_norm": 0.5654950954266079, "learning_rate": 2.7594224451803456e-07, "loss": 0.2601, "step": 36442 }, { "epoch": 1.7071719679580268, "grad_norm": 0.6513360832347649, "learning_rate": 2.758556385568617e-07, "loss": 0.2917, "step": 36443 }, { "epoch": 1.7072188129479553, "grad_norm": 0.581023910838023, "learning_rate": 2.757690453950815e-07, "loss": 0.2726, "step": 36444 }, { "epoch": 1.7072656579378835, "grad_norm": 0.5761582522896518, "learning_rate": 2.7568246503319044e-07, "loss": 0.2749, "step": 36445 }, { "epoch": 1.7073125029278118, "grad_norm": 0.6109539492565466, "learning_rate": 2.75595897471688e-07, "loss": 0.2668, "step": 36446 }, { "epoch": 1.7073593479177402, "grad_norm": 0.5892018034007845, "learning_rate": 2.7550934271107194e-07, "loss": 0.2541, "step": 36447 }, { "epoch": 1.7074061929076685, "grad_norm": 0.6144963075897607, "learning_rate": 2.754228007518411e-07, "loss": 0.2647, "step": 36448 }, { "epoch": 1.7074530378975967, "grad_norm": 0.5668041044299326, "learning_rate": 2.753362715944924e-07, "loss": 0.2683, "step": 36449 }, { "epoch": 1.7074998828875252, "grad_norm": 0.5637630216853565, "learning_rate": 2.7524975523952476e-07, "loss": 0.2639, "step": 36450 }, { "epoch": 1.7075467278774537, "grad_norm": 0.5683501946932917, "learning_rate": 2.751632516874353e-07, "loss": 0.2554, "step": 36451 }, { "epoch": 1.7075935728673817, "grad_norm": 0.6396086618364413, "learning_rate": 2.75076760938722e-07, "loss": 0.2798, "step": 36452 }, { "epoch": 1.7076404178573101, "grad_norm": 0.579124970898476, "learning_rate": 2.749902829938825e-07, "loss": 0.2782, "step": 36453 }, { "epoch": 1.7076872628472386, "grad_norm": 0.6089018119926346, "learning_rate": 2.7490381785341495e-07, "loss": 0.2696, "step": 36454 }, { "epoch": 1.7077341078371668, "grad_norm": 0.5826291753293527, "learning_rate": 2.748173655178171e-07, "loss": 0.2623, "step": 36455 }, { "epoch": 1.707780952827095, "grad_norm": 0.6174167683382275, "learning_rate": 2.7473092598758604e-07, "loss": 0.286, "step": 36456 }, { "epoch": 1.7078277978170235, "grad_norm": 0.5780507961050898, "learning_rate": 2.7464449926321887e-07, "loss": 0.2593, "step": 36457 }, { "epoch": 1.7078746428069518, "grad_norm": 0.6275706001509653, "learning_rate": 2.745580853452132e-07, "loss": 0.2863, "step": 36458 }, { "epoch": 1.70792148779688, "grad_norm": 0.6282527806211167, "learning_rate": 2.7447168423406614e-07, "loss": 0.2859, "step": 36459 }, { "epoch": 1.7079683327868085, "grad_norm": 0.5902262042895529, "learning_rate": 2.7438529593027536e-07, "loss": 0.2655, "step": 36460 }, { "epoch": 1.7080151777767367, "grad_norm": 0.6110032779690711, "learning_rate": 2.7429892043433855e-07, "loss": 0.2797, "step": 36461 }, { "epoch": 1.708062022766665, "grad_norm": 0.6388972435215298, "learning_rate": 2.742125577467511e-07, "loss": 0.2689, "step": 36462 }, { "epoch": 1.7081088677565934, "grad_norm": 0.5914575943429221, "learning_rate": 2.741262078680118e-07, "loss": 0.271, "step": 36463 }, { "epoch": 1.708155712746522, "grad_norm": 0.5891699386771995, "learning_rate": 2.740398707986161e-07, "loss": 0.2656, "step": 36464 }, { "epoch": 1.7082025577364501, "grad_norm": 0.6117741123064488, "learning_rate": 2.739535465390614e-07, "loss": 0.2864, "step": 36465 }, { "epoch": 1.7082494027263784, "grad_norm": 0.6126841383203939, "learning_rate": 2.738672350898447e-07, "loss": 0.2853, "step": 36466 }, { "epoch": 1.7082962477163068, "grad_norm": 0.6064354888208541, "learning_rate": 2.737809364514624e-07, "loss": 0.2821, "step": 36467 }, { "epoch": 1.708343092706235, "grad_norm": 0.5395320601640505, "learning_rate": 2.7369465062441177e-07, "loss": 0.2651, "step": 36468 }, { "epoch": 1.7083899376961633, "grad_norm": 0.5978487722084348, "learning_rate": 2.7360837760918886e-07, "loss": 0.2733, "step": 36469 }, { "epoch": 1.7084367826860918, "grad_norm": 0.5693137699867626, "learning_rate": 2.7352211740628966e-07, "loss": 0.263, "step": 36470 }, { "epoch": 1.70848362767602, "grad_norm": 0.6118496603503971, "learning_rate": 2.7343587001621125e-07, "loss": 0.2731, "step": 36471 }, { "epoch": 1.7085304726659483, "grad_norm": 0.5898683846236011, "learning_rate": 2.7334963543944964e-07, "loss": 0.2712, "step": 36472 }, { "epoch": 1.7085773176558767, "grad_norm": 0.5940575535197395, "learning_rate": 2.732634136765011e-07, "loss": 0.2844, "step": 36473 }, { "epoch": 1.7086241626458052, "grad_norm": 0.6055014777285762, "learning_rate": 2.7317720472786275e-07, "loss": 0.266, "step": 36474 }, { "epoch": 1.7086710076357332, "grad_norm": 0.6241708379116175, "learning_rate": 2.7309100859402915e-07, "loss": 0.2789, "step": 36475 }, { "epoch": 1.7087178526256617, "grad_norm": 0.5817676995571809, "learning_rate": 2.7300482527549773e-07, "loss": 0.2631, "step": 36476 }, { "epoch": 1.7087646976155901, "grad_norm": 0.635524065403271, "learning_rate": 2.72918654772763e-07, "loss": 0.2925, "step": 36477 }, { "epoch": 1.7088115426055184, "grad_norm": 0.6001237417198758, "learning_rate": 2.728324970863219e-07, "loss": 0.2769, "step": 36478 }, { "epoch": 1.7088583875954466, "grad_norm": 0.6083012009111691, "learning_rate": 2.7274635221667013e-07, "loss": 0.2653, "step": 36479 }, { "epoch": 1.708905232585375, "grad_norm": 0.6644056812210782, "learning_rate": 2.726602201643036e-07, "loss": 0.2874, "step": 36480 }, { "epoch": 1.7089520775753033, "grad_norm": 0.580712423324927, "learning_rate": 2.7257410092971724e-07, "loss": 0.2694, "step": 36481 }, { "epoch": 1.7089989225652316, "grad_norm": 0.6339468031518422, "learning_rate": 2.7248799451340705e-07, "loss": 0.2609, "step": 36482 }, { "epoch": 1.70904576755516, "grad_norm": 0.6125268324271139, "learning_rate": 2.7240190091586934e-07, "loss": 0.2688, "step": 36483 }, { "epoch": 1.7090926125450883, "grad_norm": 0.604655912341006, "learning_rate": 2.7231582013759804e-07, "loss": 0.2497, "step": 36484 }, { "epoch": 1.7091394575350165, "grad_norm": 0.6486233473841423, "learning_rate": 2.7222975217908923e-07, "loss": 0.2695, "step": 36485 }, { "epoch": 1.709186302524945, "grad_norm": 0.5871107800757197, "learning_rate": 2.7214369704083865e-07, "loss": 0.2828, "step": 36486 }, { "epoch": 1.7092331475148734, "grad_norm": 0.5745946273383132, "learning_rate": 2.720576547233411e-07, "loss": 0.2704, "step": 36487 }, { "epoch": 1.7092799925048014, "grad_norm": 0.5715748478410934, "learning_rate": 2.719716252270918e-07, "loss": 0.2708, "step": 36488 }, { "epoch": 1.70932683749473, "grad_norm": 0.5953773234494144, "learning_rate": 2.7188560855258533e-07, "loss": 0.2645, "step": 36489 }, { "epoch": 1.7093736824846584, "grad_norm": 0.6510094513633511, "learning_rate": 2.71799604700318e-07, "loss": 0.2829, "step": 36490 }, { "epoch": 1.7094205274745866, "grad_norm": 0.595694231886342, "learning_rate": 2.717136136707832e-07, "loss": 0.2702, "step": 36491 }, { "epoch": 1.7094673724645149, "grad_norm": 0.592312924092361, "learning_rate": 2.716276354644767e-07, "loss": 0.2696, "step": 36492 }, { "epoch": 1.7095142174544433, "grad_norm": 0.6211543190955917, "learning_rate": 2.7154167008189345e-07, "loss": 0.2659, "step": 36493 }, { "epoch": 1.7095610624443716, "grad_norm": 0.6094569193579826, "learning_rate": 2.7145571752352745e-07, "loss": 0.2894, "step": 36494 }, { "epoch": 1.7096079074342998, "grad_norm": 0.5559640342475485, "learning_rate": 2.7136977778987354e-07, "loss": 0.2644, "step": 36495 }, { "epoch": 1.7096547524242283, "grad_norm": 0.5969060682082631, "learning_rate": 2.7128385088142637e-07, "loss": 0.2719, "step": 36496 }, { "epoch": 1.7097015974141565, "grad_norm": 0.5983319200337135, "learning_rate": 2.7119793679868084e-07, "loss": 0.2666, "step": 36497 }, { "epoch": 1.7097484424040847, "grad_norm": 0.5753674254984936, "learning_rate": 2.7111203554213065e-07, "loss": 0.2673, "step": 36498 }, { "epoch": 1.7097952873940132, "grad_norm": 0.5904218224082953, "learning_rate": 2.7102614711227105e-07, "loss": 0.2822, "step": 36499 }, { "epoch": 1.7098421323839417, "grad_norm": 0.5813990333398887, "learning_rate": 2.709402715095952e-07, "loss": 0.2627, "step": 36500 }, { "epoch": 1.70988897737387, "grad_norm": 0.637550657177247, "learning_rate": 2.70854408734598e-07, "loss": 0.297, "step": 36501 }, { "epoch": 1.7099358223637982, "grad_norm": 0.6153487244196539, "learning_rate": 2.7076855878777314e-07, "loss": 0.2772, "step": 36502 }, { "epoch": 1.7099826673537266, "grad_norm": 0.6358972367799747, "learning_rate": 2.7068272166961534e-07, "loss": 0.2839, "step": 36503 }, { "epoch": 1.7100295123436549, "grad_norm": 0.5792474502690578, "learning_rate": 2.705968973806181e-07, "loss": 0.2671, "step": 36504 }, { "epoch": 1.710076357333583, "grad_norm": 0.587747991886093, "learning_rate": 2.705110859212756e-07, "loss": 0.2609, "step": 36505 }, { "epoch": 1.7101232023235116, "grad_norm": 0.5754053575818345, "learning_rate": 2.7042528729208097e-07, "loss": 0.2645, "step": 36506 }, { "epoch": 1.7101700473134398, "grad_norm": 0.5592529192306366, "learning_rate": 2.7033950149352837e-07, "loss": 0.2589, "step": 36507 }, { "epoch": 1.710216892303368, "grad_norm": 0.5763411327152239, "learning_rate": 2.7025372852611143e-07, "loss": 0.2734, "step": 36508 }, { "epoch": 1.7102637372932965, "grad_norm": 0.6216860385537891, "learning_rate": 2.701679683903241e-07, "loss": 0.2796, "step": 36509 }, { "epoch": 1.710310582283225, "grad_norm": 0.5983690328631612, "learning_rate": 2.7008222108665994e-07, "loss": 0.2611, "step": 36510 }, { "epoch": 1.710357427273153, "grad_norm": 0.5952294002605223, "learning_rate": 2.699964866156124e-07, "loss": 0.2723, "step": 36511 }, { "epoch": 1.7104042722630814, "grad_norm": 0.5674542872577527, "learning_rate": 2.699107649776739e-07, "loss": 0.2679, "step": 36512 }, { "epoch": 1.71045111725301, "grad_norm": 0.5786879113707146, "learning_rate": 2.6982505617333845e-07, "loss": 0.2497, "step": 36513 }, { "epoch": 1.7104979622429382, "grad_norm": 0.5803984569954438, "learning_rate": 2.6973936020309927e-07, "loss": 0.2673, "step": 36514 }, { "epoch": 1.7105448072328664, "grad_norm": 0.6049143004331117, "learning_rate": 2.696536770674496e-07, "loss": 0.278, "step": 36515 }, { "epoch": 1.7105916522227949, "grad_norm": 0.604587130477442, "learning_rate": 2.6956800676688237e-07, "loss": 0.279, "step": 36516 }, { "epoch": 1.710638497212723, "grad_norm": 0.5770457621140651, "learning_rate": 2.694823493018911e-07, "loss": 0.2557, "step": 36517 }, { "epoch": 1.7106853422026513, "grad_norm": 0.5942338870467245, "learning_rate": 2.693967046729684e-07, "loss": 0.2733, "step": 36518 }, { "epoch": 1.7107321871925798, "grad_norm": 0.5811141552610966, "learning_rate": 2.693110728806064e-07, "loss": 0.2601, "step": 36519 }, { "epoch": 1.710779032182508, "grad_norm": 0.6387953767781209, "learning_rate": 2.6922545392529864e-07, "loss": 0.2839, "step": 36520 }, { "epoch": 1.7108258771724363, "grad_norm": 0.6101863342495442, "learning_rate": 2.691398478075377e-07, "loss": 0.2862, "step": 36521 }, { "epoch": 1.7108727221623647, "grad_norm": 0.5877363553214043, "learning_rate": 2.690542545278163e-07, "loss": 0.2624, "step": 36522 }, { "epoch": 1.7109195671522932, "grad_norm": 0.6135322592283159, "learning_rate": 2.6896867408662734e-07, "loss": 0.2744, "step": 36523 }, { "epoch": 1.7109664121422212, "grad_norm": 0.5964560634109239, "learning_rate": 2.688831064844624e-07, "loss": 0.2569, "step": 36524 }, { "epoch": 1.7110132571321497, "grad_norm": 0.6279623961729939, "learning_rate": 2.68797551721815e-07, "loss": 0.2915, "step": 36525 }, { "epoch": 1.7110601021220782, "grad_norm": 0.5406919850653168, "learning_rate": 2.6871200979917634e-07, "loss": 0.2536, "step": 36526 }, { "epoch": 1.7111069471120064, "grad_norm": 0.5838150494566989, "learning_rate": 2.686264807170391e-07, "loss": 0.261, "step": 36527 }, { "epoch": 1.7111537921019346, "grad_norm": 0.6134136015646521, "learning_rate": 2.685409644758957e-07, "loss": 0.2724, "step": 36528 }, { "epoch": 1.711200637091863, "grad_norm": 0.5845464704142849, "learning_rate": 2.684554610762388e-07, "loss": 0.272, "step": 36529 }, { "epoch": 1.7112474820817913, "grad_norm": 0.5915427074621011, "learning_rate": 2.683699705185594e-07, "loss": 0.2764, "step": 36530 }, { "epoch": 1.7112943270717196, "grad_norm": 0.5985656167600962, "learning_rate": 2.682844928033498e-07, "loss": 0.2789, "step": 36531 }, { "epoch": 1.711341172061648, "grad_norm": 0.5676159594373247, "learning_rate": 2.6819902793110255e-07, "loss": 0.257, "step": 36532 }, { "epoch": 1.7113880170515763, "grad_norm": 0.6015376881064137, "learning_rate": 2.6811357590230845e-07, "loss": 0.2677, "step": 36533 }, { "epoch": 1.7114348620415045, "grad_norm": 0.5715309303223589, "learning_rate": 2.6802813671745974e-07, "loss": 0.2684, "step": 36534 }, { "epoch": 1.711481707031433, "grad_norm": 0.6862477242839169, "learning_rate": 2.6794271037704876e-07, "loss": 0.3085, "step": 36535 }, { "epoch": 1.7115285520213614, "grad_norm": 0.6259800859174487, "learning_rate": 2.678572968815657e-07, "loss": 0.2695, "step": 36536 }, { "epoch": 1.7115753970112897, "grad_norm": 0.6163099758085916, "learning_rate": 2.677718962315032e-07, "loss": 0.2793, "step": 36537 }, { "epoch": 1.711622242001218, "grad_norm": 0.6035736823383457, "learning_rate": 2.6768650842735197e-07, "loss": 0.2692, "step": 36538 }, { "epoch": 1.7116690869911464, "grad_norm": 0.6434220842563202, "learning_rate": 2.6760113346960467e-07, "loss": 0.2831, "step": 36539 }, { "epoch": 1.7117159319810746, "grad_norm": 0.6321045010358074, "learning_rate": 2.675157713587512e-07, "loss": 0.28, "step": 36540 }, { "epoch": 1.7117627769710029, "grad_norm": 0.6013836211844038, "learning_rate": 2.6743042209528364e-07, "loss": 0.2674, "step": 36541 }, { "epoch": 1.7118096219609313, "grad_norm": 0.5916111826497592, "learning_rate": 2.6734508567969246e-07, "loss": 0.2774, "step": 36542 }, { "epoch": 1.7118564669508596, "grad_norm": 0.550532051542982, "learning_rate": 2.6725976211246926e-07, "loss": 0.2637, "step": 36543 }, { "epoch": 1.7119033119407878, "grad_norm": 0.6028085803032378, "learning_rate": 2.671744513941049e-07, "loss": 0.2715, "step": 36544 }, { "epoch": 1.7119501569307163, "grad_norm": 0.6045359995788154, "learning_rate": 2.6708915352509054e-07, "loss": 0.2802, "step": 36545 }, { "epoch": 1.7119970019206447, "grad_norm": 0.5952603782138156, "learning_rate": 2.6700386850591705e-07, "loss": 0.2867, "step": 36546 }, { "epoch": 1.7120438469105728, "grad_norm": 0.5963934011423273, "learning_rate": 2.6691859633707517e-07, "loss": 0.2752, "step": 36547 }, { "epoch": 1.7120906919005012, "grad_norm": 0.5940361333801305, "learning_rate": 2.668333370190551e-07, "loss": 0.2597, "step": 36548 }, { "epoch": 1.7121375368904297, "grad_norm": 0.6234052264045532, "learning_rate": 2.6674809055234784e-07, "loss": 0.2663, "step": 36549 }, { "epoch": 1.712184381880358, "grad_norm": 0.6575453939572188, "learning_rate": 2.666628569374441e-07, "loss": 0.2934, "step": 36550 }, { "epoch": 1.7122312268702862, "grad_norm": 0.5948123598575874, "learning_rate": 2.66577636174834e-07, "loss": 0.2668, "step": 36551 }, { "epoch": 1.7122780718602146, "grad_norm": 0.6133674092942597, "learning_rate": 2.6649242826500915e-07, "loss": 0.2732, "step": 36552 }, { "epoch": 1.7123249168501429, "grad_norm": 0.5926175439205614, "learning_rate": 2.664072332084583e-07, "loss": 0.2573, "step": 36553 }, { "epoch": 1.7123717618400711, "grad_norm": 0.5952136081732193, "learning_rate": 2.66322051005673e-07, "loss": 0.2638, "step": 36554 }, { "epoch": 1.7124186068299996, "grad_norm": 0.579702722398526, "learning_rate": 2.662368816571423e-07, "loss": 0.2759, "step": 36555 }, { "epoch": 1.7124654518199278, "grad_norm": 0.6270592224455113, "learning_rate": 2.661517251633569e-07, "loss": 0.2936, "step": 36556 }, { "epoch": 1.712512296809856, "grad_norm": 0.6390634912690435, "learning_rate": 2.660665815248067e-07, "loss": 0.2728, "step": 36557 }, { "epoch": 1.7125591417997845, "grad_norm": 0.5714927038102308, "learning_rate": 2.6598145074198225e-07, "loss": 0.2778, "step": 36558 }, { "epoch": 1.712605986789713, "grad_norm": 0.6207072008903797, "learning_rate": 2.6589633281537324e-07, "loss": 0.2677, "step": 36559 }, { "epoch": 1.712652831779641, "grad_norm": 0.6047622695808227, "learning_rate": 2.6581122774546944e-07, "loss": 0.2639, "step": 36560 }, { "epoch": 1.7126996767695695, "grad_norm": 0.6298817910691461, "learning_rate": 2.6572613553276005e-07, "loss": 0.2701, "step": 36561 }, { "epoch": 1.712746521759498, "grad_norm": 0.5909866893496757, "learning_rate": 2.6564105617773505e-07, "loss": 0.2679, "step": 36562 }, { "epoch": 1.7127933667494262, "grad_norm": 0.5807898013722346, "learning_rate": 2.655559896808843e-07, "loss": 0.2879, "step": 36563 }, { "epoch": 1.7128402117393544, "grad_norm": 0.5826527586906748, "learning_rate": 2.6547093604269715e-07, "loss": 0.2783, "step": 36564 }, { "epoch": 1.7128870567292829, "grad_norm": 0.6134603630386747, "learning_rate": 2.6538589526366315e-07, "loss": 0.2631, "step": 36565 }, { "epoch": 1.7129339017192111, "grad_norm": 0.6402640830876944, "learning_rate": 2.65300867344272e-07, "loss": 0.2836, "step": 36566 }, { "epoch": 1.7129807467091394, "grad_norm": 0.624296660769992, "learning_rate": 2.652158522850129e-07, "loss": 0.273, "step": 36567 }, { "epoch": 1.7130275916990678, "grad_norm": 0.6161068897835226, "learning_rate": 2.6513085008637444e-07, "loss": 0.2862, "step": 36568 }, { "epoch": 1.713074436688996, "grad_norm": 0.595802933415837, "learning_rate": 2.6504586074884597e-07, "loss": 0.2672, "step": 36569 }, { "epoch": 1.7131212816789243, "grad_norm": 0.6069219562369729, "learning_rate": 2.649608842729171e-07, "loss": 0.2643, "step": 36570 }, { "epoch": 1.7131681266688528, "grad_norm": 0.6126871477065955, "learning_rate": 2.648759206590765e-07, "loss": 0.2837, "step": 36571 }, { "epoch": 1.7132149716587812, "grad_norm": 0.5932519123795301, "learning_rate": 2.6479096990781354e-07, "loss": 0.2689, "step": 36572 }, { "epoch": 1.7132618166487095, "grad_norm": 0.5956761991815847, "learning_rate": 2.6470603201961654e-07, "loss": 0.2843, "step": 36573 }, { "epoch": 1.7133086616386377, "grad_norm": 0.5921583370122221, "learning_rate": 2.6462110699497475e-07, "loss": 0.2809, "step": 36574 }, { "epoch": 1.7133555066285662, "grad_norm": 0.5913391611907045, "learning_rate": 2.645361948343761e-07, "loss": 0.2666, "step": 36575 }, { "epoch": 1.7134023516184944, "grad_norm": 0.6639597944436976, "learning_rate": 2.6445129553830993e-07, "loss": 0.2894, "step": 36576 }, { "epoch": 1.7134491966084227, "grad_norm": 0.5829487034273876, "learning_rate": 2.643664091072648e-07, "loss": 0.2536, "step": 36577 }, { "epoch": 1.7134960415983511, "grad_norm": 0.5981196407174605, "learning_rate": 2.6428153554172944e-07, "loss": 0.276, "step": 36578 }, { "epoch": 1.7135428865882794, "grad_norm": 0.5975771415135357, "learning_rate": 2.6419667484219147e-07, "loss": 0.2672, "step": 36579 }, { "epoch": 1.7135897315782076, "grad_norm": 0.622839871485256, "learning_rate": 2.6411182700913944e-07, "loss": 0.2763, "step": 36580 }, { "epoch": 1.713636576568136, "grad_norm": 0.6039012369217633, "learning_rate": 2.640269920430627e-07, "loss": 0.2723, "step": 36581 }, { "epoch": 1.7136834215580645, "grad_norm": 0.622695786783136, "learning_rate": 2.6394216994444804e-07, "loss": 0.2698, "step": 36582 }, { "epoch": 1.7137302665479925, "grad_norm": 0.5663070281340424, "learning_rate": 2.638573607137843e-07, "loss": 0.2632, "step": 36583 }, { "epoch": 1.713777111537921, "grad_norm": 0.5837333481270316, "learning_rate": 2.6377256435155956e-07, "loss": 0.2524, "step": 36584 }, { "epoch": 1.7138239565278495, "grad_norm": 0.6224461832124187, "learning_rate": 2.6368778085826137e-07, "loss": 0.2637, "step": 36585 }, { "epoch": 1.7138708015177777, "grad_norm": 0.5643517156201322, "learning_rate": 2.6360301023437785e-07, "loss": 0.2493, "step": 36586 }, { "epoch": 1.713917646507706, "grad_norm": 0.6100482826852875, "learning_rate": 2.63518252480397e-07, "loss": 0.2886, "step": 36587 }, { "epoch": 1.7139644914976344, "grad_norm": 0.6207416227008227, "learning_rate": 2.6343350759680696e-07, "loss": 0.2523, "step": 36588 }, { "epoch": 1.7140113364875627, "grad_norm": 0.58802281929198, "learning_rate": 2.633487755840944e-07, "loss": 0.2707, "step": 36589 }, { "epoch": 1.714058181477491, "grad_norm": 0.6081202895403176, "learning_rate": 2.6326405644274797e-07, "loss": 0.2728, "step": 36590 }, { "epoch": 1.7141050264674194, "grad_norm": 0.6439469117720747, "learning_rate": 2.6317935017325437e-07, "loss": 0.2828, "step": 36591 }, { "epoch": 1.7141518714573476, "grad_norm": 0.6034272846428615, "learning_rate": 2.630946567761014e-07, "loss": 0.2705, "step": 36592 }, { "epoch": 1.7141987164472758, "grad_norm": 0.6145714356058999, "learning_rate": 2.6300997625177625e-07, "loss": 0.3078, "step": 36593 }, { "epoch": 1.7142455614372043, "grad_norm": 0.5659599632853963, "learning_rate": 2.629253086007666e-07, "loss": 0.2693, "step": 36594 }, { "epoch": 1.7142924064271328, "grad_norm": 0.576758820602805, "learning_rate": 2.6284065382356005e-07, "loss": 0.2791, "step": 36595 }, { "epoch": 1.7143392514170608, "grad_norm": 0.6404843426634454, "learning_rate": 2.627560119206432e-07, "loss": 0.2784, "step": 36596 }, { "epoch": 1.7143860964069892, "grad_norm": 0.6130601924378438, "learning_rate": 2.626713828925026e-07, "loss": 0.2888, "step": 36597 }, { "epoch": 1.7144329413969177, "grad_norm": 0.6479106350651718, "learning_rate": 2.6258676673962593e-07, "loss": 0.2874, "step": 36598 }, { "epoch": 1.714479786386846, "grad_norm": 0.6290265318820559, "learning_rate": 2.6250216346249993e-07, "loss": 0.2761, "step": 36599 }, { "epoch": 1.7145266313767742, "grad_norm": 0.9974607161750985, "learning_rate": 2.624175730616119e-07, "loss": 0.2727, "step": 36600 }, { "epoch": 1.7145734763667027, "grad_norm": 0.5923774276910562, "learning_rate": 2.623329955374487e-07, "loss": 0.2757, "step": 36601 }, { "epoch": 1.714620321356631, "grad_norm": 0.5723761655305601, "learning_rate": 2.62248430890496e-07, "loss": 0.2507, "step": 36602 }, { "epoch": 1.7146671663465591, "grad_norm": 0.6058292876663804, "learning_rate": 2.621638791212419e-07, "loss": 0.2833, "step": 36603 }, { "epoch": 1.7147140113364876, "grad_norm": 0.6137855845468234, "learning_rate": 2.6207934023017164e-07, "loss": 0.2727, "step": 36604 }, { "epoch": 1.7147608563264158, "grad_norm": 0.6574257817563817, "learning_rate": 2.6199481421777225e-07, "loss": 0.2912, "step": 36605 }, { "epoch": 1.714807701316344, "grad_norm": 0.5934898637574059, "learning_rate": 2.619103010845303e-07, "loss": 0.2666, "step": 36606 }, { "epoch": 1.7148545463062725, "grad_norm": 0.5913058188975971, "learning_rate": 2.6182580083093214e-07, "loss": 0.2711, "step": 36607 }, { "epoch": 1.714901391296201, "grad_norm": 0.6080035507215218, "learning_rate": 2.6174131345746424e-07, "loss": 0.2782, "step": 36608 }, { "epoch": 1.7149482362861292, "grad_norm": 0.5726308286315801, "learning_rate": 2.6165683896461234e-07, "loss": 0.2566, "step": 36609 }, { "epoch": 1.7149950812760575, "grad_norm": 0.6278163915311109, "learning_rate": 2.615723773528625e-07, "loss": 0.2717, "step": 36610 }, { "epoch": 1.715041926265986, "grad_norm": 0.5732649086232079, "learning_rate": 2.6148792862270124e-07, "loss": 0.2674, "step": 36611 }, { "epoch": 1.7150887712559142, "grad_norm": 0.5978956562673364, "learning_rate": 2.6140349277461397e-07, "loss": 0.2637, "step": 36612 }, { "epoch": 1.7151356162458424, "grad_norm": 0.6103736813359298, "learning_rate": 2.61319069809087e-07, "loss": 0.2646, "step": 36613 }, { "epoch": 1.715182461235771, "grad_norm": 0.669936346835793, "learning_rate": 2.612346597266066e-07, "loss": 0.2903, "step": 36614 }, { "epoch": 1.7152293062256991, "grad_norm": 0.6173988296752382, "learning_rate": 2.6115026252765767e-07, "loss": 0.2782, "step": 36615 }, { "epoch": 1.7152761512156274, "grad_norm": 0.5919501943930668, "learning_rate": 2.610658782127268e-07, "loss": 0.2561, "step": 36616 }, { "epoch": 1.7153229962055558, "grad_norm": 0.6397897864030051, "learning_rate": 2.609815067822985e-07, "loss": 0.2871, "step": 36617 }, { "epoch": 1.7153698411954843, "grad_norm": 0.5741903238885602, "learning_rate": 2.6089714823685883e-07, "loss": 0.2634, "step": 36618 }, { "epoch": 1.7154166861854123, "grad_norm": 0.5700270655060247, "learning_rate": 2.608128025768933e-07, "loss": 0.2625, "step": 36619 }, { "epoch": 1.7154635311753408, "grad_norm": 0.6196278395135003, "learning_rate": 2.607284698028878e-07, "loss": 0.2676, "step": 36620 }, { "epoch": 1.7155103761652692, "grad_norm": 0.5824843541810116, "learning_rate": 2.606441499153267e-07, "loss": 0.2604, "step": 36621 }, { "epoch": 1.7155572211551975, "grad_norm": 0.6116860084669035, "learning_rate": 2.605598429146955e-07, "loss": 0.2828, "step": 36622 }, { "epoch": 1.7156040661451257, "grad_norm": 0.5912391608285131, "learning_rate": 2.604755488014801e-07, "loss": 0.2778, "step": 36623 }, { "epoch": 1.7156509111350542, "grad_norm": 0.5976402483906711, "learning_rate": 2.603912675761647e-07, "loss": 0.2764, "step": 36624 }, { "epoch": 1.7156977561249824, "grad_norm": 0.603717860321007, "learning_rate": 2.6030699923923456e-07, "loss": 0.2618, "step": 36625 }, { "epoch": 1.7157446011149107, "grad_norm": 0.573808326776171, "learning_rate": 2.6022274379117476e-07, "loss": 0.2593, "step": 36626 }, { "epoch": 1.7157914461048391, "grad_norm": 0.5729193745716082, "learning_rate": 2.601385012324706e-07, "loss": 0.2645, "step": 36627 }, { "epoch": 1.7158382910947674, "grad_norm": 0.5604509810716704, "learning_rate": 2.6005427156360565e-07, "loss": 0.2514, "step": 36628 }, { "epoch": 1.7158851360846956, "grad_norm": 0.6073791806091134, "learning_rate": 2.599700547850656e-07, "loss": 0.2799, "step": 36629 }, { "epoch": 1.715931981074624, "grad_norm": 0.6271352579963912, "learning_rate": 2.598858508973354e-07, "loss": 0.277, "step": 36630 }, { "epoch": 1.7159788260645525, "grad_norm": 0.6127283832800384, "learning_rate": 2.598016599008987e-07, "loss": 0.2834, "step": 36631 }, { "epoch": 1.7160256710544806, "grad_norm": 0.6300988994097935, "learning_rate": 2.5971748179624027e-07, "loss": 0.2842, "step": 36632 }, { "epoch": 1.716072516044409, "grad_norm": 0.5803473677610036, "learning_rate": 2.5963331658384495e-07, "loss": 0.2658, "step": 36633 }, { "epoch": 1.7161193610343375, "grad_norm": 0.5966217856538476, "learning_rate": 2.5954916426419646e-07, "loss": 0.2538, "step": 36634 }, { "epoch": 1.7161662060242657, "grad_norm": 0.6117068580221533, "learning_rate": 2.5946502483777947e-07, "loss": 0.2694, "step": 36635 }, { "epoch": 1.716213051014194, "grad_norm": 0.5917013858944344, "learning_rate": 2.593808983050783e-07, "loss": 0.258, "step": 36636 }, { "epoch": 1.7162598960041224, "grad_norm": 0.6019576409770989, "learning_rate": 2.59296784666577e-07, "loss": 0.2681, "step": 36637 }, { "epoch": 1.7163067409940507, "grad_norm": 0.6217314007343101, "learning_rate": 2.5921268392275933e-07, "loss": 0.2829, "step": 36638 }, { "epoch": 1.716353585983979, "grad_norm": 0.6086097960558481, "learning_rate": 2.5912859607410994e-07, "loss": 0.2657, "step": 36639 }, { "epoch": 1.7164004309739074, "grad_norm": 0.5690803502411086, "learning_rate": 2.590445211211115e-07, "loss": 0.2611, "step": 36640 }, { "epoch": 1.7164472759638356, "grad_norm": 0.5771543707463742, "learning_rate": 2.589604590642489e-07, "loss": 0.2652, "step": 36641 }, { "epoch": 1.7164941209537639, "grad_norm": 0.5479660603580516, "learning_rate": 2.5887640990400533e-07, "loss": 0.2427, "step": 36642 }, { "epoch": 1.7165409659436923, "grad_norm": 0.6364664950045892, "learning_rate": 2.5879237364086543e-07, "loss": 0.2875, "step": 36643 }, { "epoch": 1.7165878109336208, "grad_norm": 0.6654557326058829, "learning_rate": 2.5870835027531156e-07, "loss": 0.2956, "step": 36644 }, { "epoch": 1.716634655923549, "grad_norm": 0.6026840626589981, "learning_rate": 2.586243398078284e-07, "loss": 0.2669, "step": 36645 }, { "epoch": 1.7166815009134773, "grad_norm": 0.6105592344952632, "learning_rate": 2.5854034223889824e-07, "loss": 0.2807, "step": 36646 }, { "epoch": 1.7167283459034057, "grad_norm": 0.636371050659116, "learning_rate": 2.5845635756900494e-07, "loss": 0.2882, "step": 36647 }, { "epoch": 1.716775190893334, "grad_norm": 0.618551924070124, "learning_rate": 2.58372385798632e-07, "loss": 0.2642, "step": 36648 }, { "epoch": 1.7168220358832622, "grad_norm": 0.6122108512061237, "learning_rate": 2.5828842692826264e-07, "loss": 0.2619, "step": 36649 }, { "epoch": 1.7168688808731907, "grad_norm": 0.615779533970002, "learning_rate": 2.582044809583803e-07, "loss": 0.2713, "step": 36650 }, { "epoch": 1.716915725863119, "grad_norm": 0.644292184576928, "learning_rate": 2.581205478894677e-07, "loss": 0.2668, "step": 36651 }, { "epoch": 1.7169625708530472, "grad_norm": 0.6094711066760571, "learning_rate": 2.5803662772200754e-07, "loss": 0.2664, "step": 36652 }, { "epoch": 1.7170094158429756, "grad_norm": 0.5445273695118622, "learning_rate": 2.57952720456483e-07, "loss": 0.2552, "step": 36653 }, { "epoch": 1.717056260832904, "grad_norm": 0.6007754956179114, "learning_rate": 2.5786882609337707e-07, "loss": 0.2696, "step": 36654 }, { "epoch": 1.717103105822832, "grad_norm": 0.6198622711357193, "learning_rate": 2.5778494463317233e-07, "loss": 0.2688, "step": 36655 }, { "epoch": 1.7171499508127606, "grad_norm": 0.5930881133563313, "learning_rate": 2.577010760763518e-07, "loss": 0.2718, "step": 36656 }, { "epoch": 1.717196795802689, "grad_norm": 0.6180600438932227, "learning_rate": 2.576172204233987e-07, "loss": 0.2839, "step": 36657 }, { "epoch": 1.7172436407926173, "grad_norm": 0.6035460833532639, "learning_rate": 2.575333776747946e-07, "loss": 0.2558, "step": 36658 }, { "epoch": 1.7172904857825455, "grad_norm": 0.6273867970528769, "learning_rate": 2.574495478310218e-07, "loss": 0.296, "step": 36659 }, { "epoch": 1.717337330772474, "grad_norm": 0.5933992819750615, "learning_rate": 2.573657308925634e-07, "loss": 0.2633, "step": 36660 }, { "epoch": 1.7173841757624022, "grad_norm": 0.5656828834403166, "learning_rate": 2.5728192685990137e-07, "loss": 0.2547, "step": 36661 }, { "epoch": 1.7174310207523305, "grad_norm": 0.5779222032181188, "learning_rate": 2.5719813573351847e-07, "loss": 0.2585, "step": 36662 }, { "epoch": 1.717477865742259, "grad_norm": 0.5800864832126776, "learning_rate": 2.571143575138968e-07, "loss": 0.2684, "step": 36663 }, { "epoch": 1.7175247107321872, "grad_norm": 0.5823088831722119, "learning_rate": 2.5703059220151787e-07, "loss": 0.2746, "step": 36664 }, { "epoch": 1.7175715557221154, "grad_norm": 0.6093031442686551, "learning_rate": 2.5694683979686467e-07, "loss": 0.2618, "step": 36665 }, { "epoch": 1.7176184007120439, "grad_norm": 0.5449337579530961, "learning_rate": 2.568631003004179e-07, "loss": 0.2487, "step": 36666 }, { "epoch": 1.7176652457019723, "grad_norm": 0.6060610098401235, "learning_rate": 2.567793737126606e-07, "loss": 0.2763, "step": 36667 }, { "epoch": 1.7177120906919003, "grad_norm": 0.5850260821498573, "learning_rate": 2.566956600340739e-07, "loss": 0.2565, "step": 36668 }, { "epoch": 1.7177589356818288, "grad_norm": 0.5754885943635767, "learning_rate": 2.566119592651403e-07, "loss": 0.269, "step": 36669 }, { "epoch": 1.7178057806717573, "grad_norm": 0.5684114869645421, "learning_rate": 2.5652827140634077e-07, "loss": 0.2634, "step": 36670 }, { "epoch": 1.7178526256616855, "grad_norm": 0.5776692355192482, "learning_rate": 2.564445964581572e-07, "loss": 0.2528, "step": 36671 }, { "epoch": 1.7178994706516137, "grad_norm": 0.611541521595477, "learning_rate": 2.563609344210713e-07, "loss": 0.2641, "step": 36672 }, { "epoch": 1.7179463156415422, "grad_norm": 0.6182001739082973, "learning_rate": 2.562772852955639e-07, "loss": 0.2818, "step": 36673 }, { "epoch": 1.7179931606314705, "grad_norm": 0.688314018418899, "learning_rate": 2.561936490821168e-07, "loss": 0.3003, "step": 36674 }, { "epoch": 1.7180400056213987, "grad_norm": 0.6115049619663325, "learning_rate": 2.5611002578121184e-07, "loss": 0.2651, "step": 36675 }, { "epoch": 1.7180868506113272, "grad_norm": 0.5837265713963861, "learning_rate": 2.56026415393329e-07, "loss": 0.2572, "step": 36676 }, { "epoch": 1.7181336956012554, "grad_norm": 0.6113557071143576, "learning_rate": 2.559428179189505e-07, "loss": 0.2948, "step": 36677 }, { "epoch": 1.7181805405911836, "grad_norm": 0.6114051628498672, "learning_rate": 2.558592333585566e-07, "loss": 0.2723, "step": 36678 }, { "epoch": 1.718227385581112, "grad_norm": 0.5981787142210103, "learning_rate": 2.5577566171262974e-07, "loss": 0.2582, "step": 36679 }, { "epoch": 1.7182742305710406, "grad_norm": 0.6214095271620264, "learning_rate": 2.5569210298164923e-07, "loss": 0.2633, "step": 36680 }, { "epoch": 1.7183210755609688, "grad_norm": 0.5974769450973266, "learning_rate": 2.556085571660968e-07, "loss": 0.2674, "step": 36681 }, { "epoch": 1.718367920550897, "grad_norm": 0.5811605310269233, "learning_rate": 2.555250242664528e-07, "loss": 0.2631, "step": 36682 }, { "epoch": 1.7184147655408255, "grad_norm": 0.6101242714662394, "learning_rate": 2.5544150428319813e-07, "loss": 0.2702, "step": 36683 }, { "epoch": 1.7184616105307537, "grad_norm": 0.5513472136956644, "learning_rate": 2.5535799721681324e-07, "loss": 0.2414, "step": 36684 }, { "epoch": 1.718508455520682, "grad_norm": 0.6213809318558533, "learning_rate": 2.5527450306777916e-07, "loss": 0.2702, "step": 36685 }, { "epoch": 1.7185553005106105, "grad_norm": 0.6282063417287611, "learning_rate": 2.5519102183657664e-07, "loss": 0.2776, "step": 36686 }, { "epoch": 1.7186021455005387, "grad_norm": 0.6019453843837983, "learning_rate": 2.551075535236855e-07, "loss": 0.2639, "step": 36687 }, { "epoch": 1.718648990490467, "grad_norm": 0.5522241044017034, "learning_rate": 2.5502409812958567e-07, "loss": 0.2596, "step": 36688 }, { "epoch": 1.7186958354803954, "grad_norm": 0.601556529844573, "learning_rate": 2.5494065565475787e-07, "loss": 0.271, "step": 36689 }, { "epoch": 1.7187426804703239, "grad_norm": 0.621737110636812, "learning_rate": 2.548572260996823e-07, "loss": 0.2905, "step": 36690 }, { "epoch": 1.7187895254602519, "grad_norm": 0.6279254646103828, "learning_rate": 2.5477380946483913e-07, "loss": 0.2616, "step": 36691 }, { "epoch": 1.7188363704501803, "grad_norm": 0.598450613530055, "learning_rate": 2.546904057507088e-07, "loss": 0.2648, "step": 36692 }, { "epoch": 1.7188832154401088, "grad_norm": 0.6032851600541621, "learning_rate": 2.546070149577703e-07, "loss": 0.2873, "step": 36693 }, { "epoch": 1.718930060430037, "grad_norm": 0.5823584801183245, "learning_rate": 2.545236370865048e-07, "loss": 0.2641, "step": 36694 }, { "epoch": 1.7189769054199653, "grad_norm": 0.6179979898045492, "learning_rate": 2.544402721373906e-07, "loss": 0.2652, "step": 36695 }, { "epoch": 1.7190237504098937, "grad_norm": 0.570213498004522, "learning_rate": 2.5435692011090865e-07, "loss": 0.2584, "step": 36696 }, { "epoch": 1.719070595399822, "grad_norm": 0.6213929420693828, "learning_rate": 2.542735810075378e-07, "loss": 0.2807, "step": 36697 }, { "epoch": 1.7191174403897502, "grad_norm": 0.5422761112489294, "learning_rate": 2.5419025482775834e-07, "loss": 0.2589, "step": 36698 }, { "epoch": 1.7191642853796787, "grad_norm": 0.5693289218568139, "learning_rate": 2.5410694157204985e-07, "loss": 0.2533, "step": 36699 }, { "epoch": 1.719211130369607, "grad_norm": 0.5961670291390412, "learning_rate": 2.540236412408914e-07, "loss": 0.2784, "step": 36700 }, { "epoch": 1.7192579753595352, "grad_norm": 0.6182514932821864, "learning_rate": 2.539403538347618e-07, "loss": 0.2736, "step": 36701 }, { "epoch": 1.7193048203494636, "grad_norm": 0.6028892819034785, "learning_rate": 2.5385707935414116e-07, "loss": 0.266, "step": 36702 }, { "epoch": 1.719351665339392, "grad_norm": 0.556019220155107, "learning_rate": 2.537738177995083e-07, "loss": 0.2607, "step": 36703 }, { "epoch": 1.7193985103293201, "grad_norm": 0.5709097628575063, "learning_rate": 2.536905691713429e-07, "loss": 0.2515, "step": 36704 }, { "epoch": 1.7194453553192486, "grad_norm": 0.5694605484029867, "learning_rate": 2.536073334701233e-07, "loss": 0.2613, "step": 36705 }, { "epoch": 1.719492200309177, "grad_norm": 0.5953350873295042, "learning_rate": 2.535241106963296e-07, "loss": 0.2695, "step": 36706 }, { "epoch": 1.7195390452991053, "grad_norm": 0.6030256652950048, "learning_rate": 2.534409008504399e-07, "loss": 0.2782, "step": 36707 }, { "epoch": 1.7195858902890335, "grad_norm": 0.5932528468128356, "learning_rate": 2.533577039329327e-07, "loss": 0.2487, "step": 36708 }, { "epoch": 1.719632735278962, "grad_norm": 0.617281015872936, "learning_rate": 2.5327451994428707e-07, "loss": 0.2692, "step": 36709 }, { "epoch": 1.7196795802688902, "grad_norm": 0.6330495706369383, "learning_rate": 2.5319134888498217e-07, "loss": 0.2866, "step": 36710 }, { "epoch": 1.7197264252588185, "grad_norm": 0.5752085112336397, "learning_rate": 2.531081907554961e-07, "loss": 0.2766, "step": 36711 }, { "epoch": 1.719773270248747, "grad_norm": 0.8207988312266218, "learning_rate": 2.5302504555630827e-07, "loss": 0.2715, "step": 36712 }, { "epoch": 1.7198201152386752, "grad_norm": 0.622489665248846, "learning_rate": 2.52941913287896e-07, "loss": 0.2808, "step": 36713 }, { "epoch": 1.7198669602286034, "grad_norm": 0.5956453459151325, "learning_rate": 2.52858793950739e-07, "loss": 0.2727, "step": 36714 }, { "epoch": 1.7199138052185319, "grad_norm": 0.5742055146515264, "learning_rate": 2.5277568754531414e-07, "loss": 0.2788, "step": 36715 }, { "epoch": 1.7199606502084603, "grad_norm": 0.5896104572120728, "learning_rate": 2.5269259407210035e-07, "loss": 0.2685, "step": 36716 }, { "epoch": 1.7200074951983886, "grad_norm": 0.6251177697592587, "learning_rate": 2.526095135315759e-07, "loss": 0.2712, "step": 36717 }, { "epoch": 1.7200543401883168, "grad_norm": 0.5743460452974485, "learning_rate": 2.525264459242194e-07, "loss": 0.2562, "step": 36718 }, { "epoch": 1.7201011851782453, "grad_norm": 0.6022920067247988, "learning_rate": 2.5244339125050753e-07, "loss": 0.267, "step": 36719 }, { "epoch": 1.7201480301681735, "grad_norm": 0.6278991915780822, "learning_rate": 2.523603495109192e-07, "loss": 0.277, "step": 36720 }, { "epoch": 1.7201948751581018, "grad_norm": 0.5853054321298881, "learning_rate": 2.522773207059329e-07, "loss": 0.2631, "step": 36721 }, { "epoch": 1.7202417201480302, "grad_norm": 0.586897806424307, "learning_rate": 2.521943048360248e-07, "loss": 0.2684, "step": 36722 }, { "epoch": 1.7202885651379585, "grad_norm": 0.611798541546766, "learning_rate": 2.5211130190167383e-07, "loss": 0.2702, "step": 36723 }, { "epoch": 1.7203354101278867, "grad_norm": 0.6013139869906454, "learning_rate": 2.520283119033576e-07, "loss": 0.2692, "step": 36724 }, { "epoch": 1.7203822551178152, "grad_norm": 0.6381090354641817, "learning_rate": 2.519453348415529e-07, "loss": 0.299, "step": 36725 }, { "epoch": 1.7204291001077436, "grad_norm": 0.591743583998651, "learning_rate": 2.518623707167378e-07, "loss": 0.2625, "step": 36726 }, { "epoch": 1.7204759450976717, "grad_norm": 0.60836807479607, "learning_rate": 2.517794195293899e-07, "loss": 0.2641, "step": 36727 }, { "epoch": 1.7205227900876001, "grad_norm": 0.6352374229838528, "learning_rate": 2.5169648127998685e-07, "loss": 0.2793, "step": 36728 }, { "epoch": 1.7205696350775286, "grad_norm": 0.573596429206286, "learning_rate": 2.5161355596900473e-07, "loss": 0.2673, "step": 36729 }, { "epoch": 1.7206164800674568, "grad_norm": 0.5771644453468339, "learning_rate": 2.5153064359692226e-07, "loss": 0.2592, "step": 36730 }, { "epoch": 1.720663325057385, "grad_norm": 0.6107083414069988, "learning_rate": 2.5144774416421493e-07, "loss": 0.2784, "step": 36731 }, { "epoch": 1.7207101700473135, "grad_norm": 0.593935368143471, "learning_rate": 2.5136485767136095e-07, "loss": 0.2677, "step": 36732 }, { "epoch": 1.7207570150372418, "grad_norm": 0.6656411195254534, "learning_rate": 2.5128198411883714e-07, "loss": 0.2808, "step": 36733 }, { "epoch": 1.72080386002717, "grad_norm": 0.6430751499005984, "learning_rate": 2.5119912350712013e-07, "loss": 0.2704, "step": 36734 }, { "epoch": 1.7208507050170985, "grad_norm": 0.573485991499584, "learning_rate": 2.5111627583668753e-07, "loss": 0.2569, "step": 36735 }, { "epoch": 1.7208975500070267, "grad_norm": 0.603976553248648, "learning_rate": 2.5103344110801537e-07, "loss": 0.2738, "step": 36736 }, { "epoch": 1.720944394996955, "grad_norm": 0.5912482156237586, "learning_rate": 2.509506193215802e-07, "loss": 0.2857, "step": 36737 }, { "epoch": 1.7209912399868834, "grad_norm": 0.5964658611745828, "learning_rate": 2.508678104778589e-07, "loss": 0.2774, "step": 36738 }, { "epoch": 1.7210380849768119, "grad_norm": 0.5491975911423672, "learning_rate": 2.50785014577328e-07, "loss": 0.2563, "step": 36739 }, { "epoch": 1.72108492996674, "grad_norm": 0.5967784434971286, "learning_rate": 2.507022316204641e-07, "loss": 0.2707, "step": 36740 }, { "epoch": 1.7211317749566684, "grad_norm": 0.5748214337967298, "learning_rate": 2.506194616077437e-07, "loss": 0.2514, "step": 36741 }, { "epoch": 1.7211786199465968, "grad_norm": 0.631631653746163, "learning_rate": 2.5053670453964286e-07, "loss": 0.2796, "step": 36742 }, { "epoch": 1.721225464936525, "grad_norm": 0.5771499054178116, "learning_rate": 2.5045396041663813e-07, "loss": 0.2597, "step": 36743 }, { "epoch": 1.7212723099264533, "grad_norm": 0.622619274916246, "learning_rate": 2.503712292392052e-07, "loss": 0.2762, "step": 36744 }, { "epoch": 1.7213191549163818, "grad_norm": 0.5610499038552632, "learning_rate": 2.502885110078201e-07, "loss": 0.2658, "step": 36745 }, { "epoch": 1.72136599990631, "grad_norm": 0.6232895948823362, "learning_rate": 2.502058057229595e-07, "loss": 0.2799, "step": 36746 }, { "epoch": 1.7214128448962382, "grad_norm": 0.5852447355798699, "learning_rate": 2.5012311338509896e-07, "loss": 0.2711, "step": 36747 }, { "epoch": 1.7214596898861667, "grad_norm": 0.5681106466104856, "learning_rate": 2.5004043399471464e-07, "loss": 0.2659, "step": 36748 }, { "epoch": 1.721506534876095, "grad_norm": 0.5460192407231403, "learning_rate": 2.499577675522824e-07, "loss": 0.2617, "step": 36749 }, { "epoch": 1.7215533798660232, "grad_norm": 0.572560196621632, "learning_rate": 2.4987511405827695e-07, "loss": 0.2585, "step": 36750 }, { "epoch": 1.7216002248559517, "grad_norm": 0.6085223141046936, "learning_rate": 2.497924735131749e-07, "loss": 0.2905, "step": 36751 }, { "epoch": 1.7216470698458801, "grad_norm": 0.600617886650288, "learning_rate": 2.4970984591745156e-07, "loss": 0.2732, "step": 36752 }, { "epoch": 1.7216939148358084, "grad_norm": 0.5983261624037398, "learning_rate": 2.496272312715825e-07, "loss": 0.2667, "step": 36753 }, { "epoch": 1.7217407598257366, "grad_norm": 0.6074768173574883, "learning_rate": 2.495446295760434e-07, "loss": 0.2691, "step": 36754 }, { "epoch": 1.721787604815665, "grad_norm": 0.5526976432195136, "learning_rate": 2.4946204083130895e-07, "loss": 0.27, "step": 36755 }, { "epoch": 1.7218344498055933, "grad_norm": 0.5733432636161683, "learning_rate": 2.493794650378553e-07, "loss": 0.2639, "step": 36756 }, { "epoch": 1.7218812947955215, "grad_norm": 0.6169366218679275, "learning_rate": 2.4929690219615666e-07, "loss": 0.2697, "step": 36757 }, { "epoch": 1.72192813978545, "grad_norm": 0.5995634197833613, "learning_rate": 2.4921435230668866e-07, "loss": 0.2656, "step": 36758 }, { "epoch": 1.7219749847753782, "grad_norm": 0.6019581678241013, "learning_rate": 2.491318153699265e-07, "loss": 0.2778, "step": 36759 }, { "epoch": 1.7220218297653065, "grad_norm": 0.5925423111658231, "learning_rate": 2.4904929138634485e-07, "loss": 0.2773, "step": 36760 }, { "epoch": 1.722068674755235, "grad_norm": 0.5709364094345794, "learning_rate": 2.489667803564194e-07, "loss": 0.256, "step": 36761 }, { "epoch": 1.7221155197451634, "grad_norm": 0.6197972436683398, "learning_rate": 2.4888428228062367e-07, "loss": 0.2644, "step": 36762 }, { "epoch": 1.7221623647350914, "grad_norm": 0.5952734688403606, "learning_rate": 2.4880179715943363e-07, "loss": 0.2677, "step": 36763 }, { "epoch": 1.72220920972502, "grad_norm": 0.6154081530036281, "learning_rate": 2.4871932499332285e-07, "loss": 0.2731, "step": 36764 }, { "epoch": 1.7222560547149484, "grad_norm": 0.640059743426991, "learning_rate": 2.4863686578276675e-07, "loss": 0.274, "step": 36765 }, { "epoch": 1.7223028997048766, "grad_norm": 0.5938622480907175, "learning_rate": 2.4855441952823937e-07, "loss": 0.2707, "step": 36766 }, { "epoch": 1.7223497446948048, "grad_norm": 0.6312575610486243, "learning_rate": 2.4847198623021625e-07, "loss": 0.2833, "step": 36767 }, { "epoch": 1.7223965896847333, "grad_norm": 0.6185918871527646, "learning_rate": 2.483895658891702e-07, "loss": 0.2874, "step": 36768 }, { "epoch": 1.7224434346746615, "grad_norm": 0.5741683523469066, "learning_rate": 2.483071585055763e-07, "loss": 0.2785, "step": 36769 }, { "epoch": 1.7224902796645898, "grad_norm": 0.6125162354083332, "learning_rate": 2.482247640799093e-07, "loss": 0.2708, "step": 36770 }, { "epoch": 1.7225371246545182, "grad_norm": 0.6010460431516208, "learning_rate": 2.4814238261264253e-07, "loss": 0.2793, "step": 36771 }, { "epoch": 1.7225839696444465, "grad_norm": 0.5735937523352161, "learning_rate": 2.4806001410424997e-07, "loss": 0.2699, "step": 36772 }, { "epoch": 1.7226308146343747, "grad_norm": 0.5969135516652511, "learning_rate": 2.4797765855520686e-07, "loss": 0.2721, "step": 36773 }, { "epoch": 1.7226776596243032, "grad_norm": 0.5786071526323188, "learning_rate": 2.4789531596598553e-07, "loss": 0.2607, "step": 36774 }, { "epoch": 1.7227245046142317, "grad_norm": 0.6351616896794535, "learning_rate": 2.478129863370607e-07, "loss": 0.2688, "step": 36775 }, { "epoch": 1.7227713496041597, "grad_norm": 0.5954820863363528, "learning_rate": 2.477306696689061e-07, "loss": 0.2853, "step": 36776 }, { "epoch": 1.7228181945940881, "grad_norm": 0.5948285795449307, "learning_rate": 2.4764836596199604e-07, "loss": 0.263, "step": 36777 }, { "epoch": 1.7228650395840166, "grad_norm": 0.6739221578595118, "learning_rate": 2.47566075216803e-07, "loss": 0.2995, "step": 36778 }, { "epoch": 1.7229118845739448, "grad_norm": 0.5836846944965304, "learning_rate": 2.4748379743380125e-07, "loss": 0.2645, "step": 36779 }, { "epoch": 1.722958729563873, "grad_norm": 0.5830131350383719, "learning_rate": 2.474015326134638e-07, "loss": 0.272, "step": 36780 }, { "epoch": 1.7230055745538015, "grad_norm": 0.5815675625574707, "learning_rate": 2.4731928075626437e-07, "loss": 0.2536, "step": 36781 }, { "epoch": 1.7230524195437298, "grad_norm": 0.6364249961499833, "learning_rate": 2.4723704186267653e-07, "loss": 0.2904, "step": 36782 }, { "epoch": 1.723099264533658, "grad_norm": 0.5851589263760062, "learning_rate": 2.4715481593317353e-07, "loss": 0.2522, "step": 36783 }, { "epoch": 1.7231461095235865, "grad_norm": 0.6091796411972681, "learning_rate": 2.470726029682277e-07, "loss": 0.2695, "step": 36784 }, { "epoch": 1.7231929545135147, "grad_norm": 0.6241127661728524, "learning_rate": 2.4699040296831345e-07, "loss": 0.2721, "step": 36785 }, { "epoch": 1.723239799503443, "grad_norm": 0.5652452249904331, "learning_rate": 2.4690821593390286e-07, "loss": 0.2562, "step": 36786 }, { "epoch": 1.7232866444933714, "grad_norm": 0.5742421816777827, "learning_rate": 2.4682604186546886e-07, "loss": 0.267, "step": 36787 }, { "epoch": 1.7233334894833, "grad_norm": 0.563675618344998, "learning_rate": 2.467438807634848e-07, "loss": 0.2564, "step": 36788 }, { "epoch": 1.7233803344732281, "grad_norm": 0.5961022533540044, "learning_rate": 2.466617326284235e-07, "loss": 0.2673, "step": 36789 }, { "epoch": 1.7234271794631564, "grad_norm": 0.5769580927628082, "learning_rate": 2.465795974607579e-07, "loss": 0.2679, "step": 36790 }, { "epoch": 1.7234740244530848, "grad_norm": 0.5819366470282072, "learning_rate": 2.464974752609603e-07, "loss": 0.2595, "step": 36791 }, { "epoch": 1.723520869443013, "grad_norm": 0.5840242027225206, "learning_rate": 2.4641536602950293e-07, "loss": 0.2628, "step": 36792 }, { "epoch": 1.7235677144329413, "grad_norm": 0.5826761205156619, "learning_rate": 2.4633326976685885e-07, "loss": 0.2511, "step": 36793 }, { "epoch": 1.7236145594228698, "grad_norm": 0.6518011090369669, "learning_rate": 2.462511864735004e-07, "loss": 0.2649, "step": 36794 }, { "epoch": 1.723661404412798, "grad_norm": 0.6215822100576143, "learning_rate": 2.461691161498997e-07, "loss": 0.2685, "step": 36795 }, { "epoch": 1.7237082494027263, "grad_norm": 0.5831075639745096, "learning_rate": 2.4608705879652916e-07, "loss": 0.2627, "step": 36796 }, { "epoch": 1.7237550943926547, "grad_norm": 0.5742647509784075, "learning_rate": 2.4600501441386176e-07, "loss": 0.2645, "step": 36797 }, { "epoch": 1.7238019393825832, "grad_norm": 0.6156285790245587, "learning_rate": 2.4592298300236903e-07, "loss": 0.2701, "step": 36798 }, { "epoch": 1.7238487843725112, "grad_norm": 0.5959749821823426, "learning_rate": 2.458409645625223e-07, "loss": 0.2674, "step": 36799 }, { "epoch": 1.7238956293624397, "grad_norm": 0.6197198393455758, "learning_rate": 2.457589590947945e-07, "loss": 0.2856, "step": 36800 }, { "epoch": 1.7239424743523681, "grad_norm": 0.5998692154372695, "learning_rate": 2.456769665996572e-07, "loss": 0.2725, "step": 36801 }, { "epoch": 1.7239893193422964, "grad_norm": 0.5930884540575941, "learning_rate": 2.455949870775823e-07, "loss": 0.2724, "step": 36802 }, { "epoch": 1.7240361643322246, "grad_norm": 0.6106346949594768, "learning_rate": 2.455130205290421e-07, "loss": 0.2732, "step": 36803 }, { "epoch": 1.724083009322153, "grad_norm": 0.5973182202266809, "learning_rate": 2.4543106695450734e-07, "loss": 0.272, "step": 36804 }, { "epoch": 1.7241298543120813, "grad_norm": 0.596932692149252, "learning_rate": 2.453491263544508e-07, "loss": 0.2685, "step": 36805 }, { "epoch": 1.7241766993020096, "grad_norm": 0.6290034602595561, "learning_rate": 2.4526719872934283e-07, "loss": 0.2672, "step": 36806 }, { "epoch": 1.724223544291938, "grad_norm": 0.6401011862017592, "learning_rate": 2.451852840796554e-07, "loss": 0.2747, "step": 36807 }, { "epoch": 1.7242703892818663, "grad_norm": 0.5689666563095744, "learning_rate": 2.451033824058596e-07, "loss": 0.2746, "step": 36808 }, { "epoch": 1.7243172342717945, "grad_norm": 0.6237871501568278, "learning_rate": 2.4502149370842805e-07, "loss": 0.2671, "step": 36809 }, { "epoch": 1.724364079261723, "grad_norm": 0.5813072411714836, "learning_rate": 2.449396179878302e-07, "loss": 0.2726, "step": 36810 }, { "epoch": 1.7244109242516514, "grad_norm": 0.6294142526447571, "learning_rate": 2.448577552445383e-07, "loss": 0.2863, "step": 36811 }, { "epoch": 1.7244577692415795, "grad_norm": 0.5726120890599191, "learning_rate": 2.4477590547902357e-07, "loss": 0.2598, "step": 36812 }, { "epoch": 1.724504614231508, "grad_norm": 0.6361908515854349, "learning_rate": 2.4469406869175623e-07, "loss": 0.2717, "step": 36813 }, { "epoch": 1.7245514592214364, "grad_norm": 0.628360642711585, "learning_rate": 2.446122448832075e-07, "loss": 0.281, "step": 36814 }, { "epoch": 1.7245983042113646, "grad_norm": 0.6748482169456917, "learning_rate": 2.445304340538493e-07, "loss": 0.309, "step": 36815 }, { "epoch": 1.7246451492012929, "grad_norm": 0.6094525889867404, "learning_rate": 2.4444863620415063e-07, "loss": 0.269, "step": 36816 }, { "epoch": 1.7246919941912213, "grad_norm": 0.6383439182426442, "learning_rate": 2.4436685133458344e-07, "loss": 0.2863, "step": 36817 }, { "epoch": 1.7247388391811496, "grad_norm": 0.6046250491545812, "learning_rate": 2.4428507944561807e-07, "loss": 0.2816, "step": 36818 }, { "epoch": 1.7247856841710778, "grad_norm": 0.6207190111982965, "learning_rate": 2.4420332053772563e-07, "loss": 0.2651, "step": 36819 }, { "epoch": 1.7248325291610063, "grad_norm": 0.5839184777096343, "learning_rate": 2.4412157461137534e-07, "loss": 0.2642, "step": 36820 }, { "epoch": 1.7248793741509345, "grad_norm": 0.6402041300878364, "learning_rate": 2.440398416670392e-07, "loss": 0.2859, "step": 36821 }, { "epoch": 1.7249262191408627, "grad_norm": 0.5385146955614173, "learning_rate": 2.439581217051862e-07, "loss": 0.2512, "step": 36822 }, { "epoch": 1.7249730641307912, "grad_norm": 0.597366502414977, "learning_rate": 2.43876414726287e-07, "loss": 0.2706, "step": 36823 }, { "epoch": 1.7250199091207197, "grad_norm": 0.6350804185397226, "learning_rate": 2.4379472073081226e-07, "loss": 0.2888, "step": 36824 }, { "epoch": 1.725066754110648, "grad_norm": 0.6035689797142744, "learning_rate": 2.437130397192317e-07, "loss": 0.2821, "step": 36825 }, { "epoch": 1.7251135991005762, "grad_norm": 0.6340084835684621, "learning_rate": 2.4363137169201577e-07, "loss": 0.2618, "step": 36826 }, { "epoch": 1.7251604440905046, "grad_norm": 0.5831077996295418, "learning_rate": 2.4354971664963394e-07, "loss": 0.2751, "step": 36827 }, { "epoch": 1.7252072890804329, "grad_norm": 0.5759318398075528, "learning_rate": 2.434680745925569e-07, "loss": 0.2484, "step": 36828 }, { "epoch": 1.725254134070361, "grad_norm": 0.6122557589980338, "learning_rate": 2.433864455212531e-07, "loss": 0.2923, "step": 36829 }, { "epoch": 1.7253009790602896, "grad_norm": 0.6533243717756205, "learning_rate": 2.433048294361934e-07, "loss": 0.2734, "step": 36830 }, { "epoch": 1.7253478240502178, "grad_norm": 0.5878128605883048, "learning_rate": 2.432232263378473e-07, "loss": 0.2542, "step": 36831 }, { "epoch": 1.725394669040146, "grad_norm": 0.6620724860706432, "learning_rate": 2.4314163622668444e-07, "loss": 0.2861, "step": 36832 }, { "epoch": 1.7254415140300745, "grad_norm": 0.6192176061490595, "learning_rate": 2.4306005910317397e-07, "loss": 0.2718, "step": 36833 }, { "epoch": 1.725488359020003, "grad_norm": 0.6450208849023248, "learning_rate": 2.429784949677863e-07, "loss": 0.2761, "step": 36834 }, { "epoch": 1.725535204009931, "grad_norm": 0.6391619424682281, "learning_rate": 2.428969438209894e-07, "loss": 0.2722, "step": 36835 }, { "epoch": 1.7255820489998595, "grad_norm": 0.5762866537234843, "learning_rate": 2.428154056632531e-07, "loss": 0.2636, "step": 36836 }, { "epoch": 1.725628893989788, "grad_norm": 0.604030620826477, "learning_rate": 2.4273388049504713e-07, "loss": 0.2726, "step": 36837 }, { "epoch": 1.7256757389797162, "grad_norm": 0.5876821105122301, "learning_rate": 2.426523683168402e-07, "loss": 0.264, "step": 36838 }, { "epoch": 1.7257225839696444, "grad_norm": 0.5988603211438306, "learning_rate": 2.4257086912910207e-07, "loss": 0.2645, "step": 36839 }, { "epoch": 1.7257694289595729, "grad_norm": 0.6095991448398249, "learning_rate": 2.424893829323011e-07, "loss": 0.2727, "step": 36840 }, { "epoch": 1.725816273949501, "grad_norm": 0.6190176620386758, "learning_rate": 2.4240790972690583e-07, "loss": 0.2947, "step": 36841 }, { "epoch": 1.7258631189394293, "grad_norm": 0.5777178965689604, "learning_rate": 2.423264495133856e-07, "loss": 0.2532, "step": 36842 }, { "epoch": 1.7259099639293578, "grad_norm": 0.613498002174869, "learning_rate": 2.4224500229220934e-07, "loss": 0.2656, "step": 36843 }, { "epoch": 1.725956808919286, "grad_norm": 0.5805441663231452, "learning_rate": 2.4216356806384566e-07, "loss": 0.2706, "step": 36844 }, { "epoch": 1.7260036539092143, "grad_norm": 0.5656027567726821, "learning_rate": 2.4208214682876293e-07, "loss": 0.2676, "step": 36845 }, { "epoch": 1.7260504988991427, "grad_norm": 0.5979295909027225, "learning_rate": 2.420007385874307e-07, "loss": 0.2755, "step": 36846 }, { "epoch": 1.7260973438890712, "grad_norm": 0.5906590725687689, "learning_rate": 2.4191934334031665e-07, "loss": 0.2728, "step": 36847 }, { "epoch": 1.7261441888789992, "grad_norm": 0.5957013736394587, "learning_rate": 2.4183796108788847e-07, "loss": 0.2616, "step": 36848 }, { "epoch": 1.7261910338689277, "grad_norm": 0.6012006204946426, "learning_rate": 2.417565918306156e-07, "loss": 0.2764, "step": 36849 }, { "epoch": 1.7262378788588562, "grad_norm": 0.6386082815222084, "learning_rate": 2.4167523556896585e-07, "loss": 0.2729, "step": 36850 }, { "epoch": 1.7262847238487844, "grad_norm": 0.6371123533498398, "learning_rate": 2.415938923034072e-07, "loss": 0.278, "step": 36851 }, { "epoch": 1.7263315688387126, "grad_norm": 0.6233451110438387, "learning_rate": 2.415125620344089e-07, "loss": 0.2879, "step": 36852 }, { "epoch": 1.726378413828641, "grad_norm": 0.5848086041068188, "learning_rate": 2.4143124476243757e-07, "loss": 0.2735, "step": 36853 }, { "epoch": 1.7264252588185693, "grad_norm": 0.6427934729583678, "learning_rate": 2.413499404879624e-07, "loss": 0.2835, "step": 36854 }, { "epoch": 1.7264721038084976, "grad_norm": 0.5483225953442223, "learning_rate": 2.4126864921144993e-07, "loss": 0.2601, "step": 36855 }, { "epoch": 1.726518948798426, "grad_norm": 0.5995142064921392, "learning_rate": 2.411873709333687e-07, "loss": 0.2714, "step": 36856 }, { "epoch": 1.7265657937883543, "grad_norm": 0.6515662710199254, "learning_rate": 2.4110610565418665e-07, "loss": 0.2588, "step": 36857 }, { "epoch": 1.7266126387782825, "grad_norm": 0.6321760957852891, "learning_rate": 2.410248533743714e-07, "loss": 0.2841, "step": 36858 }, { "epoch": 1.726659483768211, "grad_norm": 0.6237039315911548, "learning_rate": 2.4094361409438993e-07, "loss": 0.2628, "step": 36859 }, { "epoch": 1.7267063287581395, "grad_norm": 0.5992167328826469, "learning_rate": 2.408623878147101e-07, "loss": 0.2699, "step": 36860 }, { "epoch": 1.7267531737480677, "grad_norm": 0.631127338577135, "learning_rate": 2.407811745358002e-07, "loss": 0.2698, "step": 36861 }, { "epoch": 1.726800018737996, "grad_norm": 0.5896173657165998, "learning_rate": 2.406999742581262e-07, "loss": 0.2681, "step": 36862 }, { "epoch": 1.7268468637279244, "grad_norm": 0.6352081926698185, "learning_rate": 2.406187869821558e-07, "loss": 0.2753, "step": 36863 }, { "epoch": 1.7268937087178526, "grad_norm": 0.5454053472534885, "learning_rate": 2.4053761270835695e-07, "loss": 0.2591, "step": 36864 }, { "epoch": 1.7269405537077809, "grad_norm": 0.5389477156923076, "learning_rate": 2.4045645143719596e-07, "loss": 0.2403, "step": 36865 }, { "epoch": 1.7269873986977093, "grad_norm": 0.6108523694082281, "learning_rate": 2.4037530316914e-07, "loss": 0.2678, "step": 36866 }, { "epoch": 1.7270342436876376, "grad_norm": 0.6114934562506085, "learning_rate": 2.4029416790465635e-07, "loss": 0.2823, "step": 36867 }, { "epoch": 1.7270810886775658, "grad_norm": 0.6242496269772775, "learning_rate": 2.4021304564421227e-07, "loss": 0.2632, "step": 36868 }, { "epoch": 1.7271279336674943, "grad_norm": 0.5846413940807679, "learning_rate": 2.4013193638827345e-07, "loss": 0.2476, "step": 36869 }, { "epoch": 1.7271747786574227, "grad_norm": 0.5884497284679326, "learning_rate": 2.400508401373081e-07, "loss": 0.2604, "step": 36870 }, { "epoch": 1.7272216236473508, "grad_norm": 0.6299107628787107, "learning_rate": 2.399697568917814e-07, "loss": 0.2771, "step": 36871 }, { "epoch": 1.7272684686372792, "grad_norm": 0.6090436175886674, "learning_rate": 2.39888686652161e-07, "loss": 0.2725, "step": 36872 }, { "epoch": 1.7273153136272077, "grad_norm": 0.6060367760858548, "learning_rate": 2.3980762941891306e-07, "loss": 0.281, "step": 36873 }, { "epoch": 1.727362158617136, "grad_norm": 0.5992149038851456, "learning_rate": 2.39726585192504e-07, "loss": 0.268, "step": 36874 }, { "epoch": 1.7274090036070642, "grad_norm": 0.5608978172861038, "learning_rate": 2.3964555397340075e-07, "loss": 0.2674, "step": 36875 }, { "epoch": 1.7274558485969926, "grad_norm": 0.6324260374086115, "learning_rate": 2.395645357620696e-07, "loss": 0.279, "step": 36876 }, { "epoch": 1.7275026935869209, "grad_norm": 0.5928485778036193, "learning_rate": 2.394835305589757e-07, "loss": 0.2692, "step": 36877 }, { "epoch": 1.7275495385768491, "grad_norm": 0.6686524526352207, "learning_rate": 2.3940253836458594e-07, "loss": 0.274, "step": 36878 }, { "epoch": 1.7275963835667776, "grad_norm": 0.5963147324765615, "learning_rate": 2.3932155917936626e-07, "loss": 0.2719, "step": 36879 }, { "epoch": 1.7276432285567058, "grad_norm": 0.5328570464770723, "learning_rate": 2.3924059300378306e-07, "loss": 0.2652, "step": 36880 }, { "epoch": 1.727690073546634, "grad_norm": 0.5720637810824231, "learning_rate": 2.3915963983830225e-07, "loss": 0.2487, "step": 36881 }, { "epoch": 1.7277369185365625, "grad_norm": 0.569954232856525, "learning_rate": 2.390786996833891e-07, "loss": 0.2556, "step": 36882 }, { "epoch": 1.727783763526491, "grad_norm": 0.6336552033399752, "learning_rate": 2.3899777253951015e-07, "loss": 0.2705, "step": 36883 }, { "epoch": 1.727830608516419, "grad_norm": 0.6025844122941467, "learning_rate": 2.389168584071305e-07, "loss": 0.277, "step": 36884 }, { "epoch": 1.7278774535063475, "grad_norm": 0.629406088472734, "learning_rate": 2.3883595728671577e-07, "loss": 0.2884, "step": 36885 }, { "epoch": 1.727924298496276, "grad_norm": 0.6146151579137772, "learning_rate": 2.3875506917873217e-07, "loss": 0.2586, "step": 36886 }, { "epoch": 1.7279711434862042, "grad_norm": 0.591597403755273, "learning_rate": 2.3867419408364426e-07, "loss": 0.2595, "step": 36887 }, { "epoch": 1.7280179884761324, "grad_norm": 0.5736353058842741, "learning_rate": 2.3859333200191904e-07, "loss": 0.2585, "step": 36888 }, { "epoch": 1.7280648334660609, "grad_norm": 0.5895229124863677, "learning_rate": 2.3851248293402046e-07, "loss": 0.2727, "step": 36889 }, { "epoch": 1.7281116784559891, "grad_norm": 0.6083303681341463, "learning_rate": 2.3843164688041376e-07, "loss": 0.2746, "step": 36890 }, { "epoch": 1.7281585234459174, "grad_norm": 0.5913140900222589, "learning_rate": 2.3835082384156438e-07, "loss": 0.2599, "step": 36891 }, { "epoch": 1.7282053684358458, "grad_norm": 0.5929177829098705, "learning_rate": 2.3827001381793779e-07, "loss": 0.2698, "step": 36892 }, { "epoch": 1.728252213425774, "grad_norm": 0.6079355544069721, "learning_rate": 2.3818921680999863e-07, "loss": 0.2617, "step": 36893 }, { "epoch": 1.7282990584157023, "grad_norm": 0.5789950074810514, "learning_rate": 2.381084328182126e-07, "loss": 0.2688, "step": 36894 }, { "epoch": 1.7283459034056308, "grad_norm": 0.5942280498818517, "learning_rate": 2.3802766184304353e-07, "loss": 0.2627, "step": 36895 }, { "epoch": 1.7283927483955592, "grad_norm": 0.5922534611912763, "learning_rate": 2.3794690388495718e-07, "loss": 0.2778, "step": 36896 }, { "epoch": 1.7284395933854875, "grad_norm": 0.5997474629744508, "learning_rate": 2.378661589444173e-07, "loss": 0.2742, "step": 36897 }, { "epoch": 1.7284864383754157, "grad_norm": 0.6060581800021657, "learning_rate": 2.3778542702188934e-07, "loss": 0.2777, "step": 36898 }, { "epoch": 1.7285332833653442, "grad_norm": 0.5848677395720395, "learning_rate": 2.3770470811783742e-07, "loss": 0.2646, "step": 36899 }, { "epoch": 1.7285801283552724, "grad_norm": 0.5740156904512093, "learning_rate": 2.376240022327267e-07, "loss": 0.2771, "step": 36900 }, { "epoch": 1.7286269733452007, "grad_norm": 0.5461740982989706, "learning_rate": 2.3754330936702124e-07, "loss": 0.2454, "step": 36901 }, { "epoch": 1.7286738183351291, "grad_norm": 0.5447904200761134, "learning_rate": 2.3746262952118516e-07, "loss": 0.2454, "step": 36902 }, { "epoch": 1.7287206633250574, "grad_norm": 0.5987108340969907, "learning_rate": 2.3738196269568332e-07, "loss": 0.2815, "step": 36903 }, { "epoch": 1.7287675083149856, "grad_norm": 0.6214374070596896, "learning_rate": 2.3730130889097923e-07, "loss": 0.2697, "step": 36904 }, { "epoch": 1.728814353304914, "grad_norm": 0.645460535057547, "learning_rate": 2.372206681075373e-07, "loss": 0.3043, "step": 36905 }, { "epoch": 1.7288611982948425, "grad_norm": 0.6174937218918977, "learning_rate": 2.3714004034582182e-07, "loss": 0.2722, "step": 36906 }, { "epoch": 1.7289080432847705, "grad_norm": 0.6230778425205646, "learning_rate": 2.370594256062972e-07, "loss": 0.2666, "step": 36907 }, { "epoch": 1.728954888274699, "grad_norm": 0.6580747195473056, "learning_rate": 2.3697882388942606e-07, "loss": 0.2559, "step": 36908 }, { "epoch": 1.7290017332646275, "grad_norm": 0.5753440100017001, "learning_rate": 2.3689823519567307e-07, "loss": 0.2749, "step": 36909 }, { "epoch": 1.7290485782545557, "grad_norm": 0.6440083068219589, "learning_rate": 2.3681765952550255e-07, "loss": 0.2816, "step": 36910 }, { "epoch": 1.729095423244484, "grad_norm": 0.6362371216310269, "learning_rate": 2.3673709687937697e-07, "loss": 0.2782, "step": 36911 }, { "epoch": 1.7291422682344124, "grad_norm": 0.5932798871898964, "learning_rate": 2.366565472577606e-07, "loss": 0.264, "step": 36912 }, { "epoch": 1.7291891132243407, "grad_norm": 0.59763950424498, "learning_rate": 2.3657601066111758e-07, "loss": 0.2635, "step": 36913 }, { "epoch": 1.729235958214269, "grad_norm": 0.6192968869885725, "learning_rate": 2.3649548708991e-07, "loss": 0.2714, "step": 36914 }, { "epoch": 1.7292828032041974, "grad_norm": 0.5707980307831185, "learning_rate": 2.3641497654460222e-07, "loss": 0.2697, "step": 36915 }, { "epoch": 1.7293296481941256, "grad_norm": 0.5765454967420615, "learning_rate": 2.3633447902565691e-07, "loss": 0.2544, "step": 36916 }, { "epoch": 1.7293764931840538, "grad_norm": 0.6254821142582087, "learning_rate": 2.3625399453353848e-07, "loss": 0.2585, "step": 36917 }, { "epoch": 1.7294233381739823, "grad_norm": 0.6209500440236988, "learning_rate": 2.36173523068709e-07, "loss": 0.2815, "step": 36918 }, { "epoch": 1.7294701831639108, "grad_norm": 0.615095536936906, "learning_rate": 2.3609306463163227e-07, "loss": 0.2875, "step": 36919 }, { "epoch": 1.7295170281538388, "grad_norm": 0.6544835665537018, "learning_rate": 2.360126192227702e-07, "loss": 0.2726, "step": 36920 }, { "epoch": 1.7295638731437672, "grad_norm": 0.6128376339110304, "learning_rate": 2.3593218684258678e-07, "loss": 0.2594, "step": 36921 }, { "epoch": 1.7296107181336957, "grad_norm": 0.6093112080675419, "learning_rate": 2.3585176749154448e-07, "loss": 0.2706, "step": 36922 }, { "epoch": 1.729657563123624, "grad_norm": 0.6410237774120588, "learning_rate": 2.3577136117010678e-07, "loss": 0.274, "step": 36923 }, { "epoch": 1.7297044081135522, "grad_norm": 0.6472654136597144, "learning_rate": 2.35690967878735e-07, "loss": 0.2899, "step": 36924 }, { "epoch": 1.7297512531034807, "grad_norm": 0.5864897739302383, "learning_rate": 2.356105876178935e-07, "loss": 0.2726, "step": 36925 }, { "epoch": 1.729798098093409, "grad_norm": 0.6064098027632568, "learning_rate": 2.3553022038804302e-07, "loss": 0.2683, "step": 36926 }, { "epoch": 1.7298449430833371, "grad_norm": 0.5856894748778022, "learning_rate": 2.354498661896473e-07, "loss": 0.2716, "step": 36927 }, { "epoch": 1.7298917880732656, "grad_norm": 0.6034420665733339, "learning_rate": 2.3536952502316828e-07, "loss": 0.2681, "step": 36928 }, { "epoch": 1.7299386330631938, "grad_norm": 0.6091185489048094, "learning_rate": 2.352891968890686e-07, "loss": 0.2698, "step": 36929 }, { "epoch": 1.729985478053122, "grad_norm": 0.6200445404103323, "learning_rate": 2.3520888178781064e-07, "loss": 0.2812, "step": 36930 }, { "epoch": 1.7300323230430505, "grad_norm": 0.5956807641052988, "learning_rate": 2.3512857971985632e-07, "loss": 0.2775, "step": 36931 }, { "epoch": 1.730079168032979, "grad_norm": 0.5505357590723546, "learning_rate": 2.3504829068566742e-07, "loss": 0.2442, "step": 36932 }, { "epoch": 1.7301260130229072, "grad_norm": 0.5953255887175574, "learning_rate": 2.349680146857064e-07, "loss": 0.253, "step": 36933 }, { "epoch": 1.7301728580128355, "grad_norm": 0.6054527864613203, "learning_rate": 2.348877517204351e-07, "loss": 0.2736, "step": 36934 }, { "epoch": 1.730219703002764, "grad_norm": 0.5573321271207312, "learning_rate": 2.3480750179031537e-07, "loss": 0.2535, "step": 36935 }, { "epoch": 1.7302665479926922, "grad_norm": 0.6314910532350818, "learning_rate": 2.3472726489580933e-07, "loss": 0.2765, "step": 36936 }, { "epoch": 1.7303133929826204, "grad_norm": 0.5823201369502129, "learning_rate": 2.3464704103737884e-07, "loss": 0.2565, "step": 36937 }, { "epoch": 1.730360237972549, "grad_norm": 0.6059674027580471, "learning_rate": 2.345668302154852e-07, "loss": 0.2758, "step": 36938 }, { "epoch": 1.7304070829624771, "grad_norm": 0.6215734999814869, "learning_rate": 2.3448663243058972e-07, "loss": 0.2809, "step": 36939 }, { "epoch": 1.7304539279524054, "grad_norm": 0.6189934803914835, "learning_rate": 2.3440644768315425e-07, "loss": 0.2558, "step": 36940 }, { "epoch": 1.7305007729423338, "grad_norm": 0.5859682775764524, "learning_rate": 2.3432627597364004e-07, "loss": 0.2623, "step": 36941 }, { "epoch": 1.7305476179322623, "grad_norm": 0.5812401632341471, "learning_rate": 2.3424611730250902e-07, "loss": 0.261, "step": 36942 }, { "epoch": 1.7305944629221903, "grad_norm": 0.6118042793093719, "learning_rate": 2.3416597167022242e-07, "loss": 0.2698, "step": 36943 }, { "epoch": 1.7306413079121188, "grad_norm": 0.5780770476141107, "learning_rate": 2.3408583907724048e-07, "loss": 0.254, "step": 36944 }, { "epoch": 1.7306881529020472, "grad_norm": 0.5885696212191275, "learning_rate": 2.3400571952402585e-07, "loss": 0.2618, "step": 36945 }, { "epoch": 1.7307349978919755, "grad_norm": 0.59818757167335, "learning_rate": 2.339256130110379e-07, "loss": 0.2693, "step": 36946 }, { "epoch": 1.7307818428819037, "grad_norm": 0.5802737611152424, "learning_rate": 2.3384551953873875e-07, "loss": 0.275, "step": 36947 }, { "epoch": 1.7308286878718322, "grad_norm": 0.6337207187614149, "learning_rate": 2.3376543910758915e-07, "loss": 0.2715, "step": 36948 }, { "epoch": 1.7308755328617604, "grad_norm": 0.6659925108810291, "learning_rate": 2.3368537171805012e-07, "loss": 0.268, "step": 36949 }, { "epoch": 1.7309223778516887, "grad_norm": 0.6576270191485535, "learning_rate": 2.3360531737058183e-07, "loss": 0.2935, "step": 36950 }, { "epoch": 1.7309692228416171, "grad_norm": 0.6595460977135675, "learning_rate": 2.3352527606564507e-07, "loss": 0.2741, "step": 36951 }, { "epoch": 1.7310160678315454, "grad_norm": 0.5922087676402703, "learning_rate": 2.3344524780370137e-07, "loss": 0.2714, "step": 36952 }, { "epoch": 1.7310629128214736, "grad_norm": 0.6086343140714816, "learning_rate": 2.333652325852101e-07, "loss": 0.2669, "step": 36953 }, { "epoch": 1.731109757811402, "grad_norm": 0.6300883701461337, "learning_rate": 2.33285230410632e-07, "loss": 0.271, "step": 36954 }, { "epoch": 1.7311566028013305, "grad_norm": 0.6072903213585227, "learning_rate": 2.3320524128042837e-07, "loss": 0.2758, "step": 36955 }, { "epoch": 1.7312034477912586, "grad_norm": 0.5577591158129052, "learning_rate": 2.331252651950583e-07, "loss": 0.2527, "step": 36956 }, { "epoch": 1.731250292781187, "grad_norm": 0.6056087390336699, "learning_rate": 2.3304530215498277e-07, "loss": 0.2728, "step": 36957 }, { "epoch": 1.7312971377711155, "grad_norm": 0.6238046083902028, "learning_rate": 2.3296535216066145e-07, "loss": 0.2799, "step": 36958 }, { "epoch": 1.7313439827610437, "grad_norm": 0.5689667481646826, "learning_rate": 2.3288541521255535e-07, "loss": 0.2558, "step": 36959 }, { "epoch": 1.731390827750972, "grad_norm": 0.6204350751759604, "learning_rate": 2.3280549131112357e-07, "loss": 0.2798, "step": 36960 }, { "epoch": 1.7314376727409004, "grad_norm": 0.5637665970998093, "learning_rate": 2.3272558045682652e-07, "loss": 0.2524, "step": 36961 }, { "epoch": 1.7314845177308287, "grad_norm": 0.5587405956871946, "learning_rate": 2.326456826501236e-07, "loss": 0.2547, "step": 36962 }, { "epoch": 1.731531362720757, "grad_norm": 0.5335166553520214, "learning_rate": 2.325657978914747e-07, "loss": 0.2567, "step": 36963 }, { "epoch": 1.7315782077106854, "grad_norm": 0.5661344388121708, "learning_rate": 2.3248592618134002e-07, "loss": 0.2685, "step": 36964 }, { "epoch": 1.7316250527006136, "grad_norm": 0.5699121247695409, "learning_rate": 2.3240606752017863e-07, "loss": 0.2657, "step": 36965 }, { "epoch": 1.7316718976905419, "grad_norm": 0.5570267298310202, "learning_rate": 2.32326221908451e-07, "loss": 0.2524, "step": 36966 }, { "epoch": 1.7317187426804703, "grad_norm": 0.5931753954163894, "learning_rate": 2.3224638934661563e-07, "loss": 0.2646, "step": 36967 }, { "epoch": 1.7317655876703988, "grad_norm": 0.5719071800412854, "learning_rate": 2.3216656983513247e-07, "loss": 0.2751, "step": 36968 }, { "epoch": 1.731812432660327, "grad_norm": 0.5857113482267824, "learning_rate": 2.3208676337446055e-07, "loss": 0.2576, "step": 36969 }, { "epoch": 1.7318592776502553, "grad_norm": 0.5926368730810625, "learning_rate": 2.3200696996505927e-07, "loss": 0.2585, "step": 36970 }, { "epoch": 1.7319061226401837, "grad_norm": 0.6095372536952821, "learning_rate": 2.319271896073877e-07, "loss": 0.2766, "step": 36971 }, { "epoch": 1.731952967630112, "grad_norm": 0.5971171666513645, "learning_rate": 2.3184742230190543e-07, "loss": 0.2628, "step": 36972 }, { "epoch": 1.7319998126200402, "grad_norm": 0.6176934917049401, "learning_rate": 2.3176766804907103e-07, "loss": 0.259, "step": 36973 }, { "epoch": 1.7320466576099687, "grad_norm": 0.5666781252956044, "learning_rate": 2.3168792684934384e-07, "loss": 0.2474, "step": 36974 }, { "epoch": 1.732093502599897, "grad_norm": 0.6042716740189954, "learning_rate": 2.3160819870318208e-07, "loss": 0.2663, "step": 36975 }, { "epoch": 1.7321403475898252, "grad_norm": 0.6119092360489488, "learning_rate": 2.3152848361104513e-07, "loss": 0.2818, "step": 36976 }, { "epoch": 1.7321871925797536, "grad_norm": 0.6470567209225945, "learning_rate": 2.314487815733915e-07, "loss": 0.2742, "step": 36977 }, { "epoch": 1.732234037569682, "grad_norm": 0.6187924286407421, "learning_rate": 2.3136909259068002e-07, "loss": 0.2689, "step": 36978 }, { "epoch": 1.73228088255961, "grad_norm": 0.6085150193670336, "learning_rate": 2.3128941666336947e-07, "loss": 0.2605, "step": 36979 }, { "epoch": 1.7323277275495386, "grad_norm": 0.6279545198243253, "learning_rate": 2.3120975379191833e-07, "loss": 0.2666, "step": 36980 }, { "epoch": 1.732374572539467, "grad_norm": 0.612371987981109, "learning_rate": 2.3113010397678437e-07, "loss": 0.2771, "step": 36981 }, { "epoch": 1.7324214175293953, "grad_norm": 0.6032204557877764, "learning_rate": 2.3105046721842634e-07, "loss": 0.2729, "step": 36982 }, { "epoch": 1.7324682625193235, "grad_norm": 0.5976260241081653, "learning_rate": 2.3097084351730247e-07, "loss": 0.2716, "step": 36983 }, { "epoch": 1.732515107509252, "grad_norm": 0.5282029935564186, "learning_rate": 2.3089123287387105e-07, "loss": 0.2488, "step": 36984 }, { "epoch": 1.7325619524991802, "grad_norm": 0.6385437987151199, "learning_rate": 2.3081163528859057e-07, "loss": 0.2868, "step": 36985 }, { "epoch": 1.7326087974891085, "grad_norm": 0.616588112986855, "learning_rate": 2.30732050761919e-07, "loss": 0.2794, "step": 36986 }, { "epoch": 1.732655642479037, "grad_norm": 0.6471278341911723, "learning_rate": 2.3065247929431434e-07, "loss": 0.272, "step": 36987 }, { "epoch": 1.7327024874689652, "grad_norm": 0.5881434440639863, "learning_rate": 2.3057292088623367e-07, "loss": 0.2622, "step": 36988 }, { "epoch": 1.7327493324588934, "grad_norm": 0.5712527729902871, "learning_rate": 2.3049337553813529e-07, "loss": 0.2696, "step": 36989 }, { "epoch": 1.7327961774488219, "grad_norm": 0.6643837884538953, "learning_rate": 2.3041384325047738e-07, "loss": 0.2743, "step": 36990 }, { "epoch": 1.7328430224387503, "grad_norm": 0.6294828976491356, "learning_rate": 2.303343240237174e-07, "loss": 0.2838, "step": 36991 }, { "epoch": 1.7328898674286783, "grad_norm": 0.5752579550575414, "learning_rate": 2.3025481785831306e-07, "loss": 0.2725, "step": 36992 }, { "epoch": 1.7329367124186068, "grad_norm": 0.5452986700851101, "learning_rate": 2.301753247547217e-07, "loss": 0.246, "step": 36993 }, { "epoch": 1.7329835574085353, "grad_norm": 0.5829753711642527, "learning_rate": 2.3009584471340107e-07, "loss": 0.2672, "step": 36994 }, { "epoch": 1.7330304023984635, "grad_norm": 0.620006660579596, "learning_rate": 2.3001637773480773e-07, "loss": 0.2667, "step": 36995 }, { "epoch": 1.7330772473883918, "grad_norm": 0.5972665095657187, "learning_rate": 2.2993692381939991e-07, "loss": 0.2644, "step": 36996 }, { "epoch": 1.7331240923783202, "grad_norm": 0.5759382155797329, "learning_rate": 2.298574829676345e-07, "loss": 0.2653, "step": 36997 }, { "epoch": 1.7331709373682485, "grad_norm": 0.5834153671566003, "learning_rate": 2.2977805517996887e-07, "loss": 0.2602, "step": 36998 }, { "epoch": 1.7332177823581767, "grad_norm": 0.5637305999155393, "learning_rate": 2.2969864045685962e-07, "loss": 0.2467, "step": 36999 }, { "epoch": 1.7332646273481052, "grad_norm": 0.5854980828700049, "learning_rate": 2.296192387987642e-07, "loss": 0.2605, "step": 37000 }, { "epoch": 1.7333114723380334, "grad_norm": 0.5939065345700043, "learning_rate": 2.2953985020613993e-07, "loss": 0.2567, "step": 37001 }, { "epoch": 1.7333583173279616, "grad_norm": 0.5778390397286144, "learning_rate": 2.2946047467944239e-07, "loss": 0.2693, "step": 37002 }, { "epoch": 1.73340516231789, "grad_norm": 0.593885874851111, "learning_rate": 2.2938111221912945e-07, "loss": 0.2626, "step": 37003 }, { "epoch": 1.7334520073078186, "grad_norm": 0.5730909554438418, "learning_rate": 2.2930176282565776e-07, "loss": 0.2545, "step": 37004 }, { "epoch": 1.7334988522977468, "grad_norm": 0.6002365691138093, "learning_rate": 2.2922242649948328e-07, "loss": 0.2703, "step": 37005 }, { "epoch": 1.733545697287675, "grad_norm": 0.5818094487709148, "learning_rate": 2.291431032410632e-07, "loss": 0.2655, "step": 37006 }, { "epoch": 1.7335925422776035, "grad_norm": 0.532566745061951, "learning_rate": 2.290637930508535e-07, "loss": 0.2528, "step": 37007 }, { "epoch": 1.7336393872675318, "grad_norm": 0.6032528551859532, "learning_rate": 2.2898449592931167e-07, "loss": 0.2769, "step": 37008 }, { "epoch": 1.73368623225746, "grad_norm": 0.6098032621700533, "learning_rate": 2.2890521187689252e-07, "loss": 0.273, "step": 37009 }, { "epoch": 1.7337330772473885, "grad_norm": 0.5994542119917806, "learning_rate": 2.2882594089405353e-07, "loss": 0.2728, "step": 37010 }, { "epoch": 1.7337799222373167, "grad_norm": 0.5957385453294171, "learning_rate": 2.2874668298125012e-07, "loss": 0.2577, "step": 37011 }, { "epoch": 1.733826767227245, "grad_norm": 0.6445879224048577, "learning_rate": 2.2866743813893865e-07, "loss": 0.2815, "step": 37012 }, { "epoch": 1.7338736122171734, "grad_norm": 0.5688175191370787, "learning_rate": 2.2858820636757512e-07, "loss": 0.2658, "step": 37013 }, { "epoch": 1.7339204572071019, "grad_norm": 0.6229491408186785, "learning_rate": 2.285089876676158e-07, "loss": 0.2701, "step": 37014 }, { "epoch": 1.7339673021970299, "grad_norm": 0.5992154393849337, "learning_rate": 2.284297820395165e-07, "loss": 0.2626, "step": 37015 }, { "epoch": 1.7340141471869583, "grad_norm": 0.6102397579443769, "learning_rate": 2.283505894837329e-07, "loss": 0.261, "step": 37016 }, { "epoch": 1.7340609921768868, "grad_norm": 0.5682863077113115, "learning_rate": 2.282714100007205e-07, "loss": 0.2483, "step": 37017 }, { "epoch": 1.734107837166815, "grad_norm": 0.6288008716143861, "learning_rate": 2.2819224359093507e-07, "loss": 0.2686, "step": 37018 }, { "epoch": 1.7341546821567433, "grad_norm": 0.59141283591168, "learning_rate": 2.2811309025483201e-07, "loss": 0.2648, "step": 37019 }, { "epoch": 1.7342015271466718, "grad_norm": 0.5828712034753718, "learning_rate": 2.2803394999286742e-07, "loss": 0.2743, "step": 37020 }, { "epoch": 1.7342483721366, "grad_norm": 0.6320661205493182, "learning_rate": 2.2795482280549675e-07, "loss": 0.2736, "step": 37021 }, { "epoch": 1.7342952171265282, "grad_norm": 0.6806719719983864, "learning_rate": 2.2787570869317433e-07, "loss": 0.2929, "step": 37022 }, { "epoch": 1.7343420621164567, "grad_norm": 0.6138031788276478, "learning_rate": 2.2779660765635674e-07, "loss": 0.2658, "step": 37023 }, { "epoch": 1.734388907106385, "grad_norm": 0.6093878376146941, "learning_rate": 2.2771751969549839e-07, "loss": 0.2533, "step": 37024 }, { "epoch": 1.7344357520963132, "grad_norm": 0.5975642933896131, "learning_rate": 2.2763844481105441e-07, "loss": 0.268, "step": 37025 }, { "epoch": 1.7344825970862416, "grad_norm": 0.6273300369618685, "learning_rate": 2.2755938300348003e-07, "loss": 0.2935, "step": 37026 }, { "epoch": 1.73452944207617, "grad_norm": 0.5663976278186923, "learning_rate": 2.2748033427323013e-07, "loss": 0.2651, "step": 37027 }, { "epoch": 1.7345762870660981, "grad_norm": 0.6235254106587895, "learning_rate": 2.2740129862076023e-07, "loss": 0.2908, "step": 37028 }, { "epoch": 1.7346231320560266, "grad_norm": 0.6163980955632647, "learning_rate": 2.273222760465249e-07, "loss": 0.2847, "step": 37029 }, { "epoch": 1.734669977045955, "grad_norm": 0.646600067504891, "learning_rate": 2.27243266550978e-07, "loss": 0.2628, "step": 37030 }, { "epoch": 1.7347168220358833, "grad_norm": 0.5728829416772606, "learning_rate": 2.271642701345747e-07, "loss": 0.2649, "step": 37031 }, { "epoch": 1.7347636670258115, "grad_norm": 0.6167337000030431, "learning_rate": 2.270852867977699e-07, "loss": 0.2795, "step": 37032 }, { "epoch": 1.73481051201574, "grad_norm": 0.5995458705130035, "learning_rate": 2.2700631654101828e-07, "loss": 0.289, "step": 37033 }, { "epoch": 1.7348573570056682, "grad_norm": 0.5669191214352964, "learning_rate": 2.269273593647736e-07, "loss": 0.265, "step": 37034 }, { "epoch": 1.7349042019955965, "grad_norm": 0.6839801869442781, "learning_rate": 2.2684841526949136e-07, "loss": 0.2771, "step": 37035 }, { "epoch": 1.734951046985525, "grad_norm": 0.600957121891906, "learning_rate": 2.2676948425562506e-07, "loss": 0.2703, "step": 37036 }, { "epoch": 1.7349978919754532, "grad_norm": 0.5978489703202811, "learning_rate": 2.2669056632362851e-07, "loss": 0.2854, "step": 37037 }, { "epoch": 1.7350447369653814, "grad_norm": 0.5884661018027748, "learning_rate": 2.2661166147395663e-07, "loss": 0.2654, "step": 37038 }, { "epoch": 1.7350915819553099, "grad_norm": 0.6094092704631456, "learning_rate": 2.2653276970706294e-07, "loss": 0.2764, "step": 37039 }, { "epoch": 1.7351384269452383, "grad_norm": 0.5746903757305654, "learning_rate": 2.2645389102340183e-07, "loss": 0.2561, "step": 37040 }, { "epoch": 1.7351852719351666, "grad_norm": 0.5796933986799802, "learning_rate": 2.2637502542342788e-07, "loss": 0.2566, "step": 37041 }, { "epoch": 1.7352321169250948, "grad_norm": 0.6449732252701228, "learning_rate": 2.2629617290759325e-07, "loss": 0.272, "step": 37042 }, { "epoch": 1.7352789619150233, "grad_norm": 0.6385134828586967, "learning_rate": 2.2621733347635345e-07, "loss": 0.2861, "step": 37043 }, { "epoch": 1.7353258069049515, "grad_norm": 0.6305330644676473, "learning_rate": 2.2613850713016111e-07, "loss": 0.2836, "step": 37044 }, { "epoch": 1.7353726518948798, "grad_norm": 0.5954077493606199, "learning_rate": 2.260596938694698e-07, "loss": 0.2589, "step": 37045 }, { "epoch": 1.7354194968848082, "grad_norm": 0.6180998804641102, "learning_rate": 2.259808936947336e-07, "loss": 0.277, "step": 37046 }, { "epoch": 1.7354663418747365, "grad_norm": 0.6915202384963837, "learning_rate": 2.2590210660640626e-07, "loss": 0.2769, "step": 37047 }, { "epoch": 1.7355131868646647, "grad_norm": 0.6018745292745068, "learning_rate": 2.2582333260493998e-07, "loss": 0.2755, "step": 37048 }, { "epoch": 1.7355600318545932, "grad_norm": 0.6138746058428783, "learning_rate": 2.257445716907891e-07, "loss": 0.2848, "step": 37049 }, { "epoch": 1.7356068768445216, "grad_norm": 0.6206684740541584, "learning_rate": 2.2566582386440715e-07, "loss": 0.2914, "step": 37050 }, { "epoch": 1.7356537218344497, "grad_norm": 0.6048482158670929, "learning_rate": 2.25587089126246e-07, "loss": 0.2606, "step": 37051 }, { "epoch": 1.7357005668243781, "grad_norm": 0.5507292177447318, "learning_rate": 2.2550836747675969e-07, "loss": 0.2603, "step": 37052 }, { "epoch": 1.7357474118143066, "grad_norm": 0.6151772890449962, "learning_rate": 2.254296589164015e-07, "loss": 0.2688, "step": 37053 }, { "epoch": 1.7357942568042348, "grad_norm": 0.6479090182042484, "learning_rate": 2.2535096344562357e-07, "loss": 0.2697, "step": 37054 }, { "epoch": 1.735841101794163, "grad_norm": 0.5992090509146281, "learning_rate": 2.2527228106487885e-07, "loss": 0.2757, "step": 37055 }, { "epoch": 1.7358879467840915, "grad_norm": 0.650376911456055, "learning_rate": 2.2519361177462062e-07, "loss": 0.2893, "step": 37056 }, { "epoch": 1.7359347917740198, "grad_norm": 0.624836582312179, "learning_rate": 2.2511495557530182e-07, "loss": 0.2778, "step": 37057 }, { "epoch": 1.735981636763948, "grad_norm": 0.597942695754453, "learning_rate": 2.2503631246737433e-07, "loss": 0.2615, "step": 37058 }, { "epoch": 1.7360284817538765, "grad_norm": 0.5823064177574736, "learning_rate": 2.249576824512914e-07, "loss": 0.2809, "step": 37059 }, { "epoch": 1.7360753267438047, "grad_norm": 0.6042142340720361, "learning_rate": 2.248790655275049e-07, "loss": 0.2778, "step": 37060 }, { "epoch": 1.736122171733733, "grad_norm": 0.5915822301243687, "learning_rate": 2.2480046169646718e-07, "loss": 0.279, "step": 37061 }, { "epoch": 1.7361690167236614, "grad_norm": 0.6128661668658635, "learning_rate": 2.2472187095863128e-07, "loss": 0.2846, "step": 37062 }, { "epoch": 1.7362158617135899, "grad_norm": 0.5850020936640027, "learning_rate": 2.246432933144496e-07, "loss": 0.2705, "step": 37063 }, { "epoch": 1.736262706703518, "grad_norm": 0.6364182652147066, "learning_rate": 2.245647287643732e-07, "loss": 0.2652, "step": 37064 }, { "epoch": 1.7363095516934464, "grad_norm": 0.6408866972971411, "learning_rate": 2.2448617730885553e-07, "loss": 0.2933, "step": 37065 }, { "epoch": 1.7363563966833748, "grad_norm": 0.614588156557217, "learning_rate": 2.244076389483474e-07, "loss": 0.2742, "step": 37066 }, { "epoch": 1.736403241673303, "grad_norm": 0.6390798519745112, "learning_rate": 2.2432911368330146e-07, "loss": 0.2739, "step": 37067 }, { "epoch": 1.7364500866632313, "grad_norm": 0.6465094614285826, "learning_rate": 2.2425060151416932e-07, "loss": 0.2897, "step": 37068 }, { "epoch": 1.7364969316531598, "grad_norm": 0.5902932256637934, "learning_rate": 2.2417210244140314e-07, "loss": 0.2652, "step": 37069 }, { "epoch": 1.736543776643088, "grad_norm": 0.6392795489705965, "learning_rate": 2.24093616465455e-07, "loss": 0.2724, "step": 37070 }, { "epoch": 1.7365906216330163, "grad_norm": 0.5788059553712595, "learning_rate": 2.24015143586776e-07, "loss": 0.243, "step": 37071 }, { "epoch": 1.7366374666229447, "grad_norm": 0.6130322120821586, "learning_rate": 2.239366838058174e-07, "loss": 0.2827, "step": 37072 }, { "epoch": 1.736684311612873, "grad_norm": 0.5593900469579587, "learning_rate": 2.2385823712303106e-07, "loss": 0.2536, "step": 37073 }, { "epoch": 1.7367311566028012, "grad_norm": 0.612153037968893, "learning_rate": 2.2377980353886858e-07, "loss": 0.2662, "step": 37074 }, { "epoch": 1.7367780015927297, "grad_norm": 0.5889802050001878, "learning_rate": 2.2370138305378097e-07, "loss": 0.2591, "step": 37075 }, { "epoch": 1.7368248465826581, "grad_norm": 0.6097380243395327, "learning_rate": 2.2362297566822012e-07, "loss": 0.2848, "step": 37076 }, { "epoch": 1.7368716915725864, "grad_norm": 0.5530411535309351, "learning_rate": 2.2354458138263702e-07, "loss": 0.2558, "step": 37077 }, { "epoch": 1.7369185365625146, "grad_norm": 0.7477392449970603, "learning_rate": 2.23466200197483e-07, "loss": 0.288, "step": 37078 }, { "epoch": 1.736965381552443, "grad_norm": 0.5993471882978364, "learning_rate": 2.2338783211320798e-07, "loss": 0.2763, "step": 37079 }, { "epoch": 1.7370122265423713, "grad_norm": 0.5974122296782248, "learning_rate": 2.2330947713026407e-07, "loss": 0.2586, "step": 37080 }, { "epoch": 1.7370590715322995, "grad_norm": 0.5585729364388728, "learning_rate": 2.2323113524910178e-07, "loss": 0.2565, "step": 37081 }, { "epoch": 1.737105916522228, "grad_norm": 0.5869069174060775, "learning_rate": 2.2315280647017213e-07, "loss": 0.2708, "step": 37082 }, { "epoch": 1.7371527615121563, "grad_norm": 0.6273360739895552, "learning_rate": 2.230744907939264e-07, "loss": 0.2825, "step": 37083 }, { "epoch": 1.7371996065020845, "grad_norm": 0.6071771124878842, "learning_rate": 2.2299618822081399e-07, "loss": 0.2737, "step": 37084 }, { "epoch": 1.737246451492013, "grad_norm": 0.5828636015416512, "learning_rate": 2.229178987512867e-07, "loss": 0.2668, "step": 37085 }, { "epoch": 1.7372932964819414, "grad_norm": 0.5928084738780636, "learning_rate": 2.2283962238579427e-07, "loss": 0.2797, "step": 37086 }, { "epoch": 1.7373401414718694, "grad_norm": 0.5323472364794214, "learning_rate": 2.2276135912478735e-07, "loss": 0.2602, "step": 37087 }, { "epoch": 1.737386986461798, "grad_norm": 0.5868939331865827, "learning_rate": 2.226831089687165e-07, "loss": 0.2619, "step": 37088 }, { "epoch": 1.7374338314517264, "grad_norm": 0.5877833292923407, "learning_rate": 2.2260487191803237e-07, "loss": 0.2581, "step": 37089 }, { "epoch": 1.7374806764416546, "grad_norm": 0.6197726634233319, "learning_rate": 2.225266479731844e-07, "loss": 0.2671, "step": 37090 }, { "epoch": 1.7375275214315828, "grad_norm": 0.6011412303727519, "learning_rate": 2.2244843713462305e-07, "loss": 0.2545, "step": 37091 }, { "epoch": 1.7375743664215113, "grad_norm": 0.5699125125744788, "learning_rate": 2.2237023940279907e-07, "loss": 0.2585, "step": 37092 }, { "epoch": 1.7376212114114395, "grad_norm": 0.5853046260560001, "learning_rate": 2.2229205477816152e-07, "loss": 0.2585, "step": 37093 }, { "epoch": 1.7376680564013678, "grad_norm": 0.6123738760330257, "learning_rate": 2.2221388326116062e-07, "loss": 0.2784, "step": 37094 }, { "epoch": 1.7377149013912963, "grad_norm": 0.6598929185275413, "learning_rate": 2.2213572485224683e-07, "loss": 0.2812, "step": 37095 }, { "epoch": 1.7377617463812245, "grad_norm": 0.6279781847054864, "learning_rate": 2.2205757955186896e-07, "loss": 0.2721, "step": 37096 }, { "epoch": 1.7378085913711527, "grad_norm": 0.5669227790821357, "learning_rate": 2.2197944736047693e-07, "loss": 0.26, "step": 37097 }, { "epoch": 1.7378554363610812, "grad_norm": 0.5836140262289662, "learning_rate": 2.2190132827852095e-07, "loss": 0.267, "step": 37098 }, { "epoch": 1.7379022813510097, "grad_norm": 0.6125348971057568, "learning_rate": 2.2182322230645064e-07, "loss": 0.2768, "step": 37099 }, { "epoch": 1.7379491263409377, "grad_norm": 0.5633795523176941, "learning_rate": 2.2174512944471455e-07, "loss": 0.2553, "step": 37100 }, { "epoch": 1.7379959713308661, "grad_norm": 0.6198603636447133, "learning_rate": 2.2166704969376256e-07, "loss": 0.277, "step": 37101 }, { "epoch": 1.7380428163207946, "grad_norm": 0.5605666333528486, "learning_rate": 2.2158898305404465e-07, "loss": 0.2456, "step": 37102 }, { "epoch": 1.7380896613107228, "grad_norm": 0.5849776246218767, "learning_rate": 2.21510929526009e-07, "loss": 0.2769, "step": 37103 }, { "epoch": 1.738136506300651, "grad_norm": 0.5935276652101641, "learning_rate": 2.214328891101053e-07, "loss": 0.2718, "step": 37104 }, { "epoch": 1.7381833512905795, "grad_norm": 0.6421264289319114, "learning_rate": 2.2135486180678235e-07, "loss": 0.2833, "step": 37105 }, { "epoch": 1.7382301962805078, "grad_norm": 0.6038328124791328, "learning_rate": 2.2127684761649033e-07, "loss": 0.262, "step": 37106 }, { "epoch": 1.738277041270436, "grad_norm": 0.6269169561183404, "learning_rate": 2.2119884653967666e-07, "loss": 0.2839, "step": 37107 }, { "epoch": 1.7383238862603645, "grad_norm": 0.6050196612693566, "learning_rate": 2.2112085857679127e-07, "loss": 0.2685, "step": 37108 }, { "epoch": 1.7383707312502927, "grad_norm": 0.5808352809627344, "learning_rate": 2.2104288372828213e-07, "loss": 0.2615, "step": 37109 }, { "epoch": 1.738417576240221, "grad_norm": 0.5755561655924009, "learning_rate": 2.2096492199459863e-07, "loss": 0.2618, "step": 37110 }, { "epoch": 1.7384644212301494, "grad_norm": 0.5683831736391446, "learning_rate": 2.2088697337618898e-07, "loss": 0.2625, "step": 37111 }, { "epoch": 1.738511266220078, "grad_norm": 0.6023339549368236, "learning_rate": 2.2080903787350228e-07, "loss": 0.2709, "step": 37112 }, { "epoch": 1.7385581112100061, "grad_norm": 0.5465769523254411, "learning_rate": 2.207311154869865e-07, "loss": 0.2546, "step": 37113 }, { "epoch": 1.7386049561999344, "grad_norm": 0.6318867485217301, "learning_rate": 2.2065320621709075e-07, "loss": 0.2756, "step": 37114 }, { "epoch": 1.7386518011898628, "grad_norm": 0.5345341552319822, "learning_rate": 2.2057531006426213e-07, "loss": 0.2475, "step": 37115 }, { "epoch": 1.738698646179791, "grad_norm": 0.5920755797943688, "learning_rate": 2.2049742702895006e-07, "loss": 0.2605, "step": 37116 }, { "epoch": 1.7387454911697193, "grad_norm": 0.6225422696503579, "learning_rate": 2.2041955711160217e-07, "loss": 0.2928, "step": 37117 }, { "epoch": 1.7387923361596478, "grad_norm": 0.5757595856727465, "learning_rate": 2.2034170031266678e-07, "loss": 0.2499, "step": 37118 }, { "epoch": 1.738839181149576, "grad_norm": 0.6157152782228725, "learning_rate": 2.2026385663259263e-07, "loss": 0.2745, "step": 37119 }, { "epoch": 1.7388860261395043, "grad_norm": 0.6211471538514866, "learning_rate": 2.2018602607182665e-07, "loss": 0.2715, "step": 37120 }, { "epoch": 1.7389328711294327, "grad_norm": 0.6074687423962525, "learning_rate": 2.201082086308168e-07, "loss": 0.2681, "step": 37121 }, { "epoch": 1.7389797161193612, "grad_norm": 0.6556464421405678, "learning_rate": 2.2003040431001128e-07, "loss": 0.2741, "step": 37122 }, { "epoch": 1.7390265611092892, "grad_norm": 0.5738352349546094, "learning_rate": 2.1995261310985754e-07, "loss": 0.2651, "step": 37123 }, { "epoch": 1.7390734060992177, "grad_norm": 0.565412347034742, "learning_rate": 2.1987483503080358e-07, "loss": 0.2606, "step": 37124 }, { "epoch": 1.7391202510891461, "grad_norm": 0.5784513907762049, "learning_rate": 2.1979707007329681e-07, "loss": 0.258, "step": 37125 }, { "epoch": 1.7391670960790744, "grad_norm": 0.6097808279272618, "learning_rate": 2.197193182377852e-07, "loss": 0.2885, "step": 37126 }, { "epoch": 1.7392139410690026, "grad_norm": 0.6179801971163716, "learning_rate": 2.1964157952471588e-07, "loss": 0.2752, "step": 37127 }, { "epoch": 1.739260786058931, "grad_norm": 0.6282298825835898, "learning_rate": 2.1956385393453544e-07, "loss": 0.2836, "step": 37128 }, { "epoch": 1.7393076310488593, "grad_norm": 0.5986939453521369, "learning_rate": 2.1948614146769215e-07, "loss": 0.2644, "step": 37129 }, { "epoch": 1.7393544760387876, "grad_norm": 0.5584137715164201, "learning_rate": 2.1940844212463287e-07, "loss": 0.2502, "step": 37130 }, { "epoch": 1.739401321028716, "grad_norm": 0.5903938622899083, "learning_rate": 2.193307559058047e-07, "loss": 0.277, "step": 37131 }, { "epoch": 1.7394481660186443, "grad_norm": 0.5793123304002453, "learning_rate": 2.1925308281165512e-07, "loss": 0.2613, "step": 37132 }, { "epoch": 1.7394950110085725, "grad_norm": 0.5891549852098715, "learning_rate": 2.191754228426307e-07, "loss": 0.2489, "step": 37133 }, { "epoch": 1.739541855998501, "grad_norm": 0.5940405653887559, "learning_rate": 2.190977759991786e-07, "loss": 0.2703, "step": 37134 }, { "epoch": 1.7395887009884294, "grad_norm": 0.6221430466990683, "learning_rate": 2.1902014228174534e-07, "loss": 0.2744, "step": 37135 }, { "epoch": 1.7396355459783575, "grad_norm": 0.5729743188420469, "learning_rate": 2.1894252169077756e-07, "loss": 0.257, "step": 37136 }, { "epoch": 1.739682390968286, "grad_norm": 0.6216501416646364, "learning_rate": 2.188649142267224e-07, "loss": 0.2852, "step": 37137 }, { "epoch": 1.7397292359582144, "grad_norm": 0.5615529854524967, "learning_rate": 2.1878731989002673e-07, "loss": 0.2545, "step": 37138 }, { "epoch": 1.7397760809481426, "grad_norm": 0.5858480494075906, "learning_rate": 2.1870973868113626e-07, "loss": 0.2675, "step": 37139 }, { "epoch": 1.7398229259380709, "grad_norm": 0.5758940718823233, "learning_rate": 2.186321706004979e-07, "loss": 0.2692, "step": 37140 }, { "epoch": 1.7398697709279993, "grad_norm": 0.5968813300216885, "learning_rate": 2.185546156485585e-07, "loss": 0.2772, "step": 37141 }, { "epoch": 1.7399166159179276, "grad_norm": 0.5880375486964994, "learning_rate": 2.1847707382576327e-07, "loss": 0.2733, "step": 37142 }, { "epoch": 1.7399634609078558, "grad_norm": 0.5585336425318718, "learning_rate": 2.1839954513255906e-07, "loss": 0.2518, "step": 37143 }, { "epoch": 1.7400103058977843, "grad_norm": 0.5983040445848476, "learning_rate": 2.1832202956939247e-07, "loss": 0.2597, "step": 37144 }, { "epoch": 1.7400571508877125, "grad_norm": 0.601153693354498, "learning_rate": 2.182445271367087e-07, "loss": 0.2692, "step": 37145 }, { "epoch": 1.7401039958776408, "grad_norm": 0.5973961696261849, "learning_rate": 2.181670378349543e-07, "loss": 0.2593, "step": 37146 }, { "epoch": 1.7401508408675692, "grad_norm": 0.5961979462831365, "learning_rate": 2.1808956166457507e-07, "loss": 0.284, "step": 37147 }, { "epoch": 1.7401976858574977, "grad_norm": 0.5977086140363528, "learning_rate": 2.1801209862601703e-07, "loss": 0.2595, "step": 37148 }, { "epoch": 1.740244530847426, "grad_norm": 0.6013484819959058, "learning_rate": 2.1793464871972563e-07, "loss": 0.2738, "step": 37149 }, { "epoch": 1.7402913758373542, "grad_norm": 0.6264732603961216, "learning_rate": 2.1785721194614696e-07, "loss": 0.2754, "step": 37150 }, { "epoch": 1.7403382208272826, "grad_norm": 0.5937187709134594, "learning_rate": 2.1777978830572615e-07, "loss": 0.265, "step": 37151 }, { "epoch": 1.7403850658172109, "grad_norm": 0.5894075870747579, "learning_rate": 2.1770237779890903e-07, "loss": 0.2576, "step": 37152 }, { "epoch": 1.740431910807139, "grad_norm": 0.590862857562147, "learning_rate": 2.1762498042614127e-07, "loss": 0.272, "step": 37153 }, { "epoch": 1.7404787557970676, "grad_norm": 0.6264878843368791, "learning_rate": 2.1754759618786786e-07, "loss": 0.2804, "step": 37154 }, { "epoch": 1.7405256007869958, "grad_norm": 0.611676266525999, "learning_rate": 2.174702250845348e-07, "loss": 0.2581, "step": 37155 }, { "epoch": 1.740572445776924, "grad_norm": 0.605926215133827, "learning_rate": 2.1739286711658703e-07, "loss": 0.2653, "step": 37156 }, { "epoch": 1.7406192907668525, "grad_norm": 0.6007059125843466, "learning_rate": 2.1731552228446918e-07, "loss": 0.273, "step": 37157 }, { "epoch": 1.740666135756781, "grad_norm": 0.6171075961289993, "learning_rate": 2.1723819058862673e-07, "loss": 0.2742, "step": 37158 }, { "epoch": 1.740712980746709, "grad_norm": 0.6039841023788115, "learning_rate": 2.171608720295046e-07, "loss": 0.2691, "step": 37159 }, { "epoch": 1.7407598257366375, "grad_norm": 0.5860598171158855, "learning_rate": 2.17083566607548e-07, "loss": 0.2555, "step": 37160 }, { "epoch": 1.740806670726566, "grad_norm": 0.5808604678375705, "learning_rate": 2.170062743232021e-07, "loss": 0.2645, "step": 37161 }, { "epoch": 1.7408535157164942, "grad_norm": 0.5722472088430395, "learning_rate": 2.1692899517691075e-07, "loss": 0.2619, "step": 37162 }, { "epoch": 1.7409003607064224, "grad_norm": 0.5917861160002398, "learning_rate": 2.1685172916911996e-07, "loss": 0.2736, "step": 37163 }, { "epoch": 1.7409472056963509, "grad_norm": 0.5750936168338082, "learning_rate": 2.1677447630027272e-07, "loss": 0.2577, "step": 37164 }, { "epoch": 1.740994050686279, "grad_norm": 0.6030415563637428, "learning_rate": 2.1669723657081477e-07, "loss": 0.271, "step": 37165 }, { "epoch": 1.7410408956762073, "grad_norm": 0.6041951842244303, "learning_rate": 2.166200099811905e-07, "loss": 0.2784, "step": 37166 }, { "epoch": 1.7410877406661358, "grad_norm": 0.6039821489326519, "learning_rate": 2.1654279653184395e-07, "loss": 0.2667, "step": 37167 }, { "epoch": 1.741134585656064, "grad_norm": 0.6126503754483128, "learning_rate": 2.1646559622322012e-07, "loss": 0.2772, "step": 37168 }, { "epoch": 1.7411814306459923, "grad_norm": 0.6233534728238347, "learning_rate": 2.1638840905576274e-07, "loss": 0.2763, "step": 37169 }, { "epoch": 1.7412282756359208, "grad_norm": 0.6240351758123011, "learning_rate": 2.1631123502991598e-07, "loss": 0.2696, "step": 37170 }, { "epoch": 1.7412751206258492, "grad_norm": 0.5622073378797912, "learning_rate": 2.1623407414612386e-07, "loss": 0.2558, "step": 37171 }, { "epoch": 1.7413219656157772, "grad_norm": 0.6145944893395674, "learning_rate": 2.161569264048305e-07, "loss": 0.2872, "step": 37172 }, { "epoch": 1.7413688106057057, "grad_norm": 0.5709981370472956, "learning_rate": 2.1607979180648026e-07, "loss": 0.253, "step": 37173 }, { "epoch": 1.7414156555956342, "grad_norm": 0.5989922340669517, "learning_rate": 2.160026703515167e-07, "loss": 0.2765, "step": 37174 }, { "epoch": 1.7414625005855624, "grad_norm": 0.6058521328021886, "learning_rate": 2.1592556204038412e-07, "loss": 0.2541, "step": 37175 }, { "epoch": 1.7415093455754906, "grad_norm": 0.6312614498541561, "learning_rate": 2.1584846687352585e-07, "loss": 0.2748, "step": 37176 }, { "epoch": 1.741556190565419, "grad_norm": 0.6126404672021917, "learning_rate": 2.157713848513851e-07, "loss": 0.2671, "step": 37177 }, { "epoch": 1.7416030355553473, "grad_norm": 0.5547949286799967, "learning_rate": 2.1569431597440598e-07, "loss": 0.2705, "step": 37178 }, { "epoch": 1.7416498805452756, "grad_norm": 0.6619645794831577, "learning_rate": 2.1561726024303202e-07, "loss": 0.2805, "step": 37179 }, { "epoch": 1.741696725535204, "grad_norm": 0.5653864542743903, "learning_rate": 2.155402176577065e-07, "loss": 0.2578, "step": 37180 }, { "epoch": 1.7417435705251323, "grad_norm": 0.5740474919848466, "learning_rate": 2.1546318821887318e-07, "loss": 0.2686, "step": 37181 }, { "epoch": 1.7417904155150605, "grad_norm": 0.5601312197515625, "learning_rate": 2.1538617192697452e-07, "loss": 0.2516, "step": 37182 }, { "epoch": 1.741837260504989, "grad_norm": 0.6233041346246034, "learning_rate": 2.153091687824549e-07, "loss": 0.2722, "step": 37183 }, { "epoch": 1.7418841054949175, "grad_norm": 0.6511791474286582, "learning_rate": 2.1523217878575614e-07, "loss": 0.2666, "step": 37184 }, { "epoch": 1.7419309504848457, "grad_norm": 0.5668865199199036, "learning_rate": 2.151552019373221e-07, "loss": 0.2642, "step": 37185 }, { "epoch": 1.741977795474774, "grad_norm": 0.5587779841264965, "learning_rate": 2.1507823823759545e-07, "loss": 0.2669, "step": 37186 }, { "epoch": 1.7420246404647024, "grad_norm": 0.6203777535585459, "learning_rate": 2.1500128768701973e-07, "loss": 0.2851, "step": 37187 }, { "epoch": 1.7420714854546306, "grad_norm": 0.5535320740482786, "learning_rate": 2.149243502860368e-07, "loss": 0.2527, "step": 37188 }, { "epoch": 1.7421183304445589, "grad_norm": 0.6118570389006988, "learning_rate": 2.1484742603508996e-07, "loss": 0.2759, "step": 37189 }, { "epoch": 1.7421651754344873, "grad_norm": 0.5782936702716456, "learning_rate": 2.1477051493462242e-07, "loss": 0.2627, "step": 37190 }, { "epoch": 1.7422120204244156, "grad_norm": 0.5847450845612828, "learning_rate": 2.146936169850755e-07, "loss": 0.2635, "step": 37191 }, { "epoch": 1.7422588654143438, "grad_norm": 0.5698834946049566, "learning_rate": 2.146167321868925e-07, "loss": 0.2696, "step": 37192 }, { "epoch": 1.7423057104042723, "grad_norm": 0.5821447143973384, "learning_rate": 2.1453986054051635e-07, "loss": 0.2518, "step": 37193 }, { "epoch": 1.7423525553942008, "grad_norm": 0.6237355512305586, "learning_rate": 2.1446300204638838e-07, "loss": 0.2861, "step": 37194 }, { "epoch": 1.7423994003841288, "grad_norm": 0.6407812090495508, "learning_rate": 2.1438615670495128e-07, "loss": 0.2961, "step": 37195 }, { "epoch": 1.7424462453740572, "grad_norm": 0.5994233870257536, "learning_rate": 2.1430932451664748e-07, "loss": 0.2621, "step": 37196 }, { "epoch": 1.7424930903639857, "grad_norm": 0.6314378070817847, "learning_rate": 2.1423250548191944e-07, "loss": 0.2924, "step": 37197 }, { "epoch": 1.742539935353914, "grad_norm": 0.5822187449990451, "learning_rate": 2.1415569960120842e-07, "loss": 0.26, "step": 37198 }, { "epoch": 1.7425867803438422, "grad_norm": 0.5893849038911527, "learning_rate": 2.1407890687495713e-07, "loss": 0.2743, "step": 37199 }, { "epoch": 1.7426336253337706, "grad_norm": 0.6385478262159108, "learning_rate": 2.140021273036069e-07, "loss": 0.2672, "step": 37200 }, { "epoch": 1.7426804703236989, "grad_norm": 0.5850841288114048, "learning_rate": 2.139253608875999e-07, "loss": 0.2632, "step": 37201 }, { "epoch": 1.7427273153136271, "grad_norm": 0.5957657650963707, "learning_rate": 2.138486076273777e-07, "loss": 0.2563, "step": 37202 }, { "epoch": 1.7427741603035556, "grad_norm": 0.5720121099785483, "learning_rate": 2.137718675233827e-07, "loss": 0.2584, "step": 37203 }, { "epoch": 1.7428210052934838, "grad_norm": 0.6502751386245891, "learning_rate": 2.1369514057605566e-07, "loss": 0.2761, "step": 37204 }, { "epoch": 1.742867850283412, "grad_norm": 0.625807170679859, "learning_rate": 2.1361842678583877e-07, "loss": 0.2802, "step": 37205 }, { "epoch": 1.7429146952733405, "grad_norm": 0.5917365952992366, "learning_rate": 2.1354172615317276e-07, "loss": 0.2668, "step": 37206 }, { "epoch": 1.742961540263269, "grad_norm": 0.5696866385806544, "learning_rate": 2.1346503867849922e-07, "loss": 0.2499, "step": 37207 }, { "epoch": 1.743008385253197, "grad_norm": 0.6197533637186762, "learning_rate": 2.1338836436226e-07, "loss": 0.2682, "step": 37208 }, { "epoch": 1.7430552302431255, "grad_norm": 0.6154679084195821, "learning_rate": 2.1331170320489593e-07, "loss": 0.2718, "step": 37209 }, { "epoch": 1.743102075233054, "grad_norm": 0.6199579355408662, "learning_rate": 2.132350552068485e-07, "loss": 0.2836, "step": 37210 }, { "epoch": 1.7431489202229822, "grad_norm": 0.6226560918736574, "learning_rate": 2.1315842036855883e-07, "loss": 0.2779, "step": 37211 }, { "epoch": 1.7431957652129104, "grad_norm": 0.5454880161751428, "learning_rate": 2.1308179869046707e-07, "loss": 0.2467, "step": 37212 }, { "epoch": 1.7432426102028389, "grad_norm": 0.5990434747159873, "learning_rate": 2.1300519017301487e-07, "loss": 0.2776, "step": 37213 }, { "epoch": 1.7432894551927671, "grad_norm": 0.5909500171079274, "learning_rate": 2.129285948166429e-07, "loss": 0.2686, "step": 37214 }, { "epoch": 1.7433363001826954, "grad_norm": 0.5973423452717842, "learning_rate": 2.12852012621792e-07, "loss": 0.2651, "step": 37215 }, { "epoch": 1.7433831451726238, "grad_norm": 0.6164617197088293, "learning_rate": 2.1277544358890317e-07, "loss": 0.2589, "step": 37216 }, { "epoch": 1.743429990162552, "grad_norm": 0.5933959009797407, "learning_rate": 2.126988877184169e-07, "loss": 0.2645, "step": 37217 }, { "epoch": 1.7434768351524803, "grad_norm": 0.5656029299869781, "learning_rate": 2.1262234501077393e-07, "loss": 0.2644, "step": 37218 }, { "epoch": 1.7435236801424088, "grad_norm": 0.586474744907226, "learning_rate": 2.1254581546641368e-07, "loss": 0.2512, "step": 37219 }, { "epoch": 1.7435705251323372, "grad_norm": 0.6303305988543813, "learning_rate": 2.1246929908577767e-07, "loss": 0.2744, "step": 37220 }, { "epoch": 1.7436173701222655, "grad_norm": 0.6158130774516171, "learning_rate": 2.123927958693056e-07, "loss": 0.2631, "step": 37221 }, { "epoch": 1.7436642151121937, "grad_norm": 0.6640856678797056, "learning_rate": 2.1231630581743823e-07, "loss": 0.2809, "step": 37222 }, { "epoch": 1.7437110601021222, "grad_norm": 0.5828879165711733, "learning_rate": 2.12239828930616e-07, "loss": 0.2574, "step": 37223 }, { "epoch": 1.7437579050920504, "grad_norm": 0.5997850942650218, "learning_rate": 2.1216336520927804e-07, "loss": 0.2586, "step": 37224 }, { "epoch": 1.7438047500819787, "grad_norm": 0.596454942129048, "learning_rate": 2.1208691465386538e-07, "loss": 0.2671, "step": 37225 }, { "epoch": 1.7438515950719071, "grad_norm": 0.6026095714222047, "learning_rate": 2.1201047726481682e-07, "loss": 0.2792, "step": 37226 }, { "epoch": 1.7438984400618354, "grad_norm": 0.616569321640817, "learning_rate": 2.1193405304257315e-07, "loss": 0.2631, "step": 37227 }, { "epoch": 1.7439452850517636, "grad_norm": 0.5684790016627954, "learning_rate": 2.118576419875737e-07, "loss": 0.2718, "step": 37228 }, { "epoch": 1.743992130041692, "grad_norm": 0.5798175978073321, "learning_rate": 2.1178124410025897e-07, "loss": 0.262, "step": 37229 }, { "epoch": 1.7440389750316205, "grad_norm": 0.5822990757756982, "learning_rate": 2.1170485938106778e-07, "loss": 0.2849, "step": 37230 }, { "epoch": 1.7440858200215485, "grad_norm": 0.5828483325515724, "learning_rate": 2.1162848783043977e-07, "loss": 0.2755, "step": 37231 }, { "epoch": 1.744132665011477, "grad_norm": 0.6030991651915991, "learning_rate": 2.1155212944881516e-07, "loss": 0.2689, "step": 37232 }, { "epoch": 1.7441795100014055, "grad_norm": 0.6257020535819042, "learning_rate": 2.1147578423663246e-07, "loss": 0.2796, "step": 37233 }, { "epoch": 1.7442263549913337, "grad_norm": 0.6340893557927114, "learning_rate": 2.1139945219433133e-07, "loss": 0.2859, "step": 37234 }, { "epoch": 1.744273199981262, "grad_norm": 0.5760673294773857, "learning_rate": 2.1132313332235173e-07, "loss": 0.2605, "step": 37235 }, { "epoch": 1.7443200449711904, "grad_norm": 0.577354964099884, "learning_rate": 2.1124682762113158e-07, "loss": 0.2612, "step": 37236 }, { "epoch": 1.7443668899611187, "grad_norm": 0.593556416837446, "learning_rate": 2.1117053509111085e-07, "loss": 0.2746, "step": 37237 }, { "epoch": 1.744413734951047, "grad_norm": 0.5728823534001636, "learning_rate": 2.1109425573272834e-07, "loss": 0.2696, "step": 37238 }, { "epoch": 1.7444605799409754, "grad_norm": 0.6405474011439103, "learning_rate": 2.110179895464237e-07, "loss": 0.2743, "step": 37239 }, { "epoch": 1.7445074249309036, "grad_norm": 0.5876784130401643, "learning_rate": 2.1094173653263468e-07, "loss": 0.2499, "step": 37240 }, { "epoch": 1.7445542699208318, "grad_norm": 0.6435858782863816, "learning_rate": 2.1086549669180056e-07, "loss": 0.2964, "step": 37241 }, { "epoch": 1.7446011149107603, "grad_norm": 0.6110087221651532, "learning_rate": 2.107892700243605e-07, "loss": 0.2681, "step": 37242 }, { "epoch": 1.7446479599006888, "grad_norm": 0.6038547078392338, "learning_rate": 2.1071305653075247e-07, "loss": 0.2855, "step": 37243 }, { "epoch": 1.7446948048906168, "grad_norm": 0.6035642733219576, "learning_rate": 2.1063685621141555e-07, "loss": 0.2636, "step": 37244 }, { "epoch": 1.7447416498805453, "grad_norm": 0.5856589130824292, "learning_rate": 2.10560669066788e-07, "loss": 0.2791, "step": 37245 }, { "epoch": 1.7447884948704737, "grad_norm": 0.5715915105635918, "learning_rate": 2.1048449509730896e-07, "loss": 0.2622, "step": 37246 }, { "epoch": 1.744835339860402, "grad_norm": 0.6195474770646128, "learning_rate": 2.104083343034155e-07, "loss": 0.276, "step": 37247 }, { "epoch": 1.7448821848503302, "grad_norm": 0.6052259813824394, "learning_rate": 2.1033218668554733e-07, "loss": 0.2765, "step": 37248 }, { "epoch": 1.7449290298402587, "grad_norm": 0.6063430740117646, "learning_rate": 2.1025605224414132e-07, "loss": 0.2557, "step": 37249 }, { "epoch": 1.744975874830187, "grad_norm": 0.6156603876735111, "learning_rate": 2.1017993097963625e-07, "loss": 0.2789, "step": 37250 }, { "epoch": 1.7450227198201151, "grad_norm": 0.5800209894367266, "learning_rate": 2.101038228924701e-07, "loss": 0.2668, "step": 37251 }, { "epoch": 1.7450695648100436, "grad_norm": 0.608845196655709, "learning_rate": 2.1002772798308147e-07, "loss": 0.2775, "step": 37252 }, { "epoch": 1.7451164097999718, "grad_norm": 0.6043091543732294, "learning_rate": 2.0995164625190716e-07, "loss": 0.264, "step": 37253 }, { "epoch": 1.7451632547899, "grad_norm": 0.5979608635425181, "learning_rate": 2.0987557769938605e-07, "loss": 0.2723, "step": 37254 }, { "epoch": 1.7452100997798285, "grad_norm": 0.6092534427386028, "learning_rate": 2.097995223259547e-07, "loss": 0.2618, "step": 37255 }, { "epoch": 1.745256944769757, "grad_norm": 0.5580134711013266, "learning_rate": 2.097234801320519e-07, "loss": 0.2504, "step": 37256 }, { "epoch": 1.7453037897596853, "grad_norm": 0.5597569828934065, "learning_rate": 2.0964745111811458e-07, "loss": 0.2577, "step": 37257 }, { "epoch": 1.7453506347496135, "grad_norm": 0.6505113492128922, "learning_rate": 2.095714352845804e-07, "loss": 0.2874, "step": 37258 }, { "epoch": 1.745397479739542, "grad_norm": 0.5899351302867797, "learning_rate": 2.0949543263188765e-07, "loss": 0.265, "step": 37259 }, { "epoch": 1.7454443247294702, "grad_norm": 0.6219161470797415, "learning_rate": 2.094194431604729e-07, "loss": 0.2778, "step": 37260 }, { "epoch": 1.7454911697193984, "grad_norm": 0.552164741603176, "learning_rate": 2.0934346687077305e-07, "loss": 0.2595, "step": 37261 }, { "epoch": 1.745538014709327, "grad_norm": 0.6510883295450488, "learning_rate": 2.092675037632258e-07, "loss": 0.2632, "step": 37262 }, { "epoch": 1.7455848596992551, "grad_norm": 0.643191036780734, "learning_rate": 2.0919155383826828e-07, "loss": 0.2858, "step": 37263 }, { "epoch": 1.7456317046891834, "grad_norm": 0.6040845995824601, "learning_rate": 2.0911561709633792e-07, "loss": 0.2561, "step": 37264 }, { "epoch": 1.7456785496791118, "grad_norm": 0.5975320937802203, "learning_rate": 2.0903969353787108e-07, "loss": 0.2655, "step": 37265 }, { "epoch": 1.7457253946690403, "grad_norm": 0.6210780449746307, "learning_rate": 2.089637831633054e-07, "loss": 0.2777, "step": 37266 }, { "epoch": 1.7457722396589683, "grad_norm": 0.5953657797748712, "learning_rate": 2.0888788597307753e-07, "loss": 0.2685, "step": 37267 }, { "epoch": 1.7458190846488968, "grad_norm": 0.6292089367449846, "learning_rate": 2.0881200196762347e-07, "loss": 0.2784, "step": 37268 }, { "epoch": 1.7458659296388253, "grad_norm": 0.584694924967009, "learning_rate": 2.087361311473804e-07, "loss": 0.2725, "step": 37269 }, { "epoch": 1.7459127746287535, "grad_norm": 0.5626910980972617, "learning_rate": 2.0866027351278517e-07, "loss": 0.2555, "step": 37270 }, { "epoch": 1.7459596196186817, "grad_norm": 0.6151862386108011, "learning_rate": 2.0858442906427384e-07, "loss": 0.2619, "step": 37271 }, { "epoch": 1.7460064646086102, "grad_norm": 0.5827305583286213, "learning_rate": 2.0850859780228382e-07, "loss": 0.2546, "step": 37272 }, { "epoch": 1.7460533095985384, "grad_norm": 0.5639280622188182, "learning_rate": 2.0843277972725062e-07, "loss": 0.258, "step": 37273 }, { "epoch": 1.7461001545884667, "grad_norm": 0.6043152772765642, "learning_rate": 2.083569748396111e-07, "loss": 0.2632, "step": 37274 }, { "epoch": 1.7461469995783951, "grad_norm": 0.6177922024373167, "learning_rate": 2.0828118313980074e-07, "loss": 0.2793, "step": 37275 }, { "epoch": 1.7461938445683234, "grad_norm": 0.6110988791177417, "learning_rate": 2.082054046282561e-07, "loss": 0.278, "step": 37276 }, { "epoch": 1.7462406895582516, "grad_norm": 0.6175989143345666, "learning_rate": 2.0812963930541357e-07, "loss": 0.2799, "step": 37277 }, { "epoch": 1.74628753454818, "grad_norm": 0.591999098850692, "learning_rate": 2.0805388717170915e-07, "loss": 0.2601, "step": 37278 }, { "epoch": 1.7463343795381085, "grad_norm": 0.6119191698098855, "learning_rate": 2.0797814822757834e-07, "loss": 0.276, "step": 37279 }, { "epoch": 1.7463812245280366, "grad_norm": 0.5715149373399308, "learning_rate": 2.0790242247345687e-07, "loss": 0.2598, "step": 37280 }, { "epoch": 1.746428069517965, "grad_norm": 0.5761348421588001, "learning_rate": 2.0782670990978165e-07, "loss": 0.2534, "step": 37281 }, { "epoch": 1.7464749145078935, "grad_norm": 0.5829795653052509, "learning_rate": 2.0775101053698705e-07, "loss": 0.257, "step": 37282 }, { "epoch": 1.7465217594978217, "grad_norm": 0.5887181451330651, "learning_rate": 2.0767532435550907e-07, "loss": 0.2687, "step": 37283 }, { "epoch": 1.74656860448775, "grad_norm": 0.5610433802676694, "learning_rate": 2.075996513657841e-07, "loss": 0.2651, "step": 37284 }, { "epoch": 1.7466154494776784, "grad_norm": 0.6015115531272729, "learning_rate": 2.0752399156824642e-07, "loss": 0.2772, "step": 37285 }, { "epoch": 1.7466622944676067, "grad_norm": 0.574676339773783, "learning_rate": 2.074483449633319e-07, "loss": 0.2812, "step": 37286 }, { "epoch": 1.746709139457535, "grad_norm": 0.6153856698963965, "learning_rate": 2.073727115514762e-07, "loss": 0.2771, "step": 37287 }, { "epoch": 1.7467559844474634, "grad_norm": 0.533297479347129, "learning_rate": 2.072970913331146e-07, "loss": 0.2481, "step": 37288 }, { "epoch": 1.7468028294373916, "grad_norm": 0.5795463590429493, "learning_rate": 2.0722148430868145e-07, "loss": 0.2689, "step": 37289 }, { "epoch": 1.7468496744273199, "grad_norm": 0.6298671067968737, "learning_rate": 2.0714589047861278e-07, "loss": 0.2744, "step": 37290 }, { "epoch": 1.7468965194172483, "grad_norm": 0.5904974128666463, "learning_rate": 2.0707030984334298e-07, "loss": 0.2719, "step": 37291 }, { "epoch": 1.7469433644071768, "grad_norm": 0.6077220829700035, "learning_rate": 2.0699474240330725e-07, "loss": 0.2631, "step": 37292 }, { "epoch": 1.746990209397105, "grad_norm": 0.6014737802232304, "learning_rate": 2.0691918815894025e-07, "loss": 0.2577, "step": 37293 }, { "epoch": 1.7470370543870333, "grad_norm": 0.6198949502223662, "learning_rate": 2.0684364711067773e-07, "loss": 0.2604, "step": 37294 }, { "epoch": 1.7470838993769617, "grad_norm": 0.62347239322742, "learning_rate": 2.0676811925895295e-07, "loss": 0.2791, "step": 37295 }, { "epoch": 1.74713074436689, "grad_norm": 0.5896027292047704, "learning_rate": 2.066926046042017e-07, "loss": 0.2746, "step": 37296 }, { "epoch": 1.7471775893568182, "grad_norm": 0.5767927358446445, "learning_rate": 2.0661710314685778e-07, "loss": 0.2894, "step": 37297 }, { "epoch": 1.7472244343467467, "grad_norm": 0.5917676915857762, "learning_rate": 2.0654161488735614e-07, "loss": 0.2755, "step": 37298 }, { "epoch": 1.747271279336675, "grad_norm": 0.6159955176076148, "learning_rate": 2.0646613982613084e-07, "loss": 0.2772, "step": 37299 }, { "epoch": 1.7473181243266032, "grad_norm": 0.651659781094744, "learning_rate": 2.0639067796361655e-07, "loss": 0.2902, "step": 37300 }, { "epoch": 1.7473649693165316, "grad_norm": 0.5930629799424264, "learning_rate": 2.0631522930024794e-07, "loss": 0.269, "step": 37301 }, { "epoch": 1.74741181430646, "grad_norm": 0.6232611802920576, "learning_rate": 2.0623979383645797e-07, "loss": 0.2705, "step": 37302 }, { "epoch": 1.747458659296388, "grad_norm": 0.5853270048047349, "learning_rate": 2.0616437157268217e-07, "loss": 0.2586, "step": 37303 }, { "epoch": 1.7475055042863166, "grad_norm": 0.6014579413417582, "learning_rate": 2.0608896250935345e-07, "loss": 0.2703, "step": 37304 }, { "epoch": 1.747552349276245, "grad_norm": 0.6067512538311088, "learning_rate": 2.0601356664690598e-07, "loss": 0.2676, "step": 37305 }, { "epoch": 1.7475991942661733, "grad_norm": 0.5553725657801938, "learning_rate": 2.059381839857741e-07, "loss": 0.2533, "step": 37306 }, { "epoch": 1.7476460392561015, "grad_norm": 0.6140142224737343, "learning_rate": 2.0586281452639133e-07, "loss": 0.2929, "step": 37307 }, { "epoch": 1.74769288424603, "grad_norm": 0.5655636828044885, "learning_rate": 2.0578745826919184e-07, "loss": 0.2602, "step": 37308 }, { "epoch": 1.7477397292359582, "grad_norm": 0.6015686010056045, "learning_rate": 2.0571211521460882e-07, "loss": 0.2655, "step": 37309 }, { "epoch": 1.7477865742258865, "grad_norm": 0.621241366277532, "learning_rate": 2.0563678536307558e-07, "loss": 0.2918, "step": 37310 }, { "epoch": 1.747833419215815, "grad_norm": 0.604878798587381, "learning_rate": 2.055614687150259e-07, "loss": 0.2645, "step": 37311 }, { "epoch": 1.7478802642057432, "grad_norm": 0.625983792640847, "learning_rate": 2.0548616527089337e-07, "loss": 0.2809, "step": 37312 }, { "epoch": 1.7479271091956714, "grad_norm": 0.5758250051574108, "learning_rate": 2.0541087503111124e-07, "loss": 0.2685, "step": 37313 }, { "epoch": 1.7479739541855999, "grad_norm": 0.6458863209130141, "learning_rate": 2.0533559799611276e-07, "loss": 0.2814, "step": 37314 }, { "epoch": 1.7480207991755283, "grad_norm": 0.6444052000373631, "learning_rate": 2.0526033416633172e-07, "loss": 0.2638, "step": 37315 }, { "epoch": 1.7480676441654563, "grad_norm": 0.6033875302995693, "learning_rate": 2.0518508354220062e-07, "loss": 0.2753, "step": 37316 }, { "epoch": 1.7481144891553848, "grad_norm": 0.613396840591976, "learning_rate": 2.0510984612415215e-07, "loss": 0.2843, "step": 37317 }, { "epoch": 1.7481613341453133, "grad_norm": 0.6288383757261042, "learning_rate": 2.050346219126198e-07, "loss": 0.2871, "step": 37318 }, { "epoch": 1.7482081791352415, "grad_norm": 0.5887905953108679, "learning_rate": 2.0495941090803635e-07, "loss": 0.2634, "step": 37319 }, { "epoch": 1.7482550241251698, "grad_norm": 0.6232079787078308, "learning_rate": 2.0488421311083445e-07, "loss": 0.2873, "step": 37320 }, { "epoch": 1.7483018691150982, "grad_norm": 0.5967193800947647, "learning_rate": 2.0480902852144768e-07, "loss": 0.2682, "step": 37321 }, { "epoch": 1.7483487141050265, "grad_norm": 0.5661814563934324, "learning_rate": 2.0473385714030764e-07, "loss": 0.2685, "step": 37322 }, { "epoch": 1.7483955590949547, "grad_norm": 0.6039115641171707, "learning_rate": 2.0465869896784758e-07, "loss": 0.2828, "step": 37323 }, { "epoch": 1.7484424040848832, "grad_norm": 0.6385280516142802, "learning_rate": 2.0458355400449965e-07, "loss": 0.2843, "step": 37324 }, { "epoch": 1.7484892490748114, "grad_norm": 0.620822893982889, "learning_rate": 2.0450842225069628e-07, "loss": 0.2724, "step": 37325 }, { "epoch": 1.7485360940647396, "grad_norm": 0.5943935289424989, "learning_rate": 2.0443330370686993e-07, "loss": 0.2634, "step": 37326 }, { "epoch": 1.748582939054668, "grad_norm": 0.5761992612659675, "learning_rate": 2.0435819837345356e-07, "loss": 0.2662, "step": 37327 }, { "epoch": 1.7486297840445966, "grad_norm": 0.6007634190679565, "learning_rate": 2.042831062508782e-07, "loss": 0.2744, "step": 37328 }, { "epoch": 1.7486766290345248, "grad_norm": 0.6118264703457807, "learning_rate": 2.042080273395766e-07, "loss": 0.2758, "step": 37329 }, { "epoch": 1.748723474024453, "grad_norm": 0.6147904355383936, "learning_rate": 2.041329616399812e-07, "loss": 0.2764, "step": 37330 }, { "epoch": 1.7487703190143815, "grad_norm": 0.519966262195658, "learning_rate": 2.0405790915252326e-07, "loss": 0.2451, "step": 37331 }, { "epoch": 1.7488171640043098, "grad_norm": 0.6409000992633833, "learning_rate": 2.0398286987763472e-07, "loss": 0.275, "step": 37332 }, { "epoch": 1.748864008994238, "grad_norm": 0.5837110318777384, "learning_rate": 2.0390784381574825e-07, "loss": 0.2609, "step": 37333 }, { "epoch": 1.7489108539841665, "grad_norm": 0.5772373434492429, "learning_rate": 2.0383283096729462e-07, "loss": 0.262, "step": 37334 }, { "epoch": 1.7489576989740947, "grad_norm": 0.6214116416340015, "learning_rate": 2.0375783133270598e-07, "loss": 0.2786, "step": 37335 }, { "epoch": 1.749004543964023, "grad_norm": 0.613674226745364, "learning_rate": 2.0368284491241397e-07, "loss": 0.2835, "step": 37336 }, { "epoch": 1.7490513889539514, "grad_norm": 0.5657605008652815, "learning_rate": 2.0360787170685015e-07, "loss": 0.2625, "step": 37337 }, { "epoch": 1.7490982339438799, "grad_norm": 0.5557538710555013, "learning_rate": 2.0353291171644558e-07, "loss": 0.2668, "step": 37338 }, { "epoch": 1.7491450789338079, "grad_norm": 0.580571129125531, "learning_rate": 2.034579649416324e-07, "loss": 0.2653, "step": 37339 }, { "epoch": 1.7491919239237363, "grad_norm": 0.5930516644680158, "learning_rate": 2.033830313828411e-07, "loss": 0.2688, "step": 37340 }, { "epoch": 1.7492387689136648, "grad_norm": 0.5975308968982134, "learning_rate": 2.0330811104050301e-07, "loss": 0.2591, "step": 37341 }, { "epoch": 1.749285613903593, "grad_norm": 0.617297926873573, "learning_rate": 2.0323320391504948e-07, "loss": 0.269, "step": 37342 }, { "epoch": 1.7493324588935213, "grad_norm": 0.5976142861836282, "learning_rate": 2.0315831000691206e-07, "loss": 0.2788, "step": 37343 }, { "epoch": 1.7493793038834498, "grad_norm": 0.6360043029052487, "learning_rate": 2.0308342931652098e-07, "loss": 0.2713, "step": 37344 }, { "epoch": 1.749426148873378, "grad_norm": 0.5611728368369422, "learning_rate": 2.0300856184430784e-07, "loss": 0.2558, "step": 37345 }, { "epoch": 1.7494729938633062, "grad_norm": 0.6559938719514494, "learning_rate": 2.0293370759070257e-07, "loss": 0.2843, "step": 37346 }, { "epoch": 1.7495198388532347, "grad_norm": 0.6036788989185559, "learning_rate": 2.0285886655613652e-07, "loss": 0.2795, "step": 37347 }, { "epoch": 1.749566683843163, "grad_norm": 0.6423095161952485, "learning_rate": 2.0278403874104014e-07, "loss": 0.2903, "step": 37348 }, { "epoch": 1.7496135288330912, "grad_norm": 0.5940806081429604, "learning_rate": 2.0270922414584447e-07, "loss": 0.2793, "step": 37349 }, { "epoch": 1.7496603738230196, "grad_norm": 0.5795916246726779, "learning_rate": 2.0263442277098006e-07, "loss": 0.2634, "step": 37350 }, { "epoch": 1.749707218812948, "grad_norm": 0.5859959794333534, "learning_rate": 2.0255963461687734e-07, "loss": 0.2568, "step": 37351 }, { "epoch": 1.7497540638028761, "grad_norm": 0.6271510398673515, "learning_rate": 2.0248485968396597e-07, "loss": 0.2765, "step": 37352 }, { "epoch": 1.7498009087928046, "grad_norm": 0.6051254934247389, "learning_rate": 2.0241009797267674e-07, "loss": 0.2717, "step": 37353 }, { "epoch": 1.749847753782733, "grad_norm": 0.5779140349095213, "learning_rate": 2.0233534948343986e-07, "loss": 0.2589, "step": 37354 }, { "epoch": 1.7498945987726613, "grad_norm": 0.6102172341187335, "learning_rate": 2.0226061421668581e-07, "loss": 0.2648, "step": 37355 }, { "epoch": 1.7499414437625895, "grad_norm": 0.6238402531688345, "learning_rate": 2.0218589217284423e-07, "loss": 0.2738, "step": 37356 }, { "epoch": 1.749988288752518, "grad_norm": 0.5953657103004575, "learning_rate": 2.0211118335234565e-07, "loss": 0.263, "step": 37357 }, { "epoch": 1.7500351337424462, "grad_norm": 0.5613745450856636, "learning_rate": 2.0203648775561969e-07, "loss": 0.2744, "step": 37358 }, { "epoch": 1.7500819787323745, "grad_norm": 0.6402380679049363, "learning_rate": 2.0196180538309574e-07, "loss": 0.2716, "step": 37359 }, { "epoch": 1.750128823722303, "grad_norm": 0.5743352029977117, "learning_rate": 2.0188713623520427e-07, "loss": 0.2663, "step": 37360 }, { "epoch": 1.7501756687122312, "grad_norm": 0.6049847914234083, "learning_rate": 2.0181248031237445e-07, "loss": 0.2652, "step": 37361 }, { "epoch": 1.7502225137021594, "grad_norm": 0.5642655820266292, "learning_rate": 2.0173783761503612e-07, "loss": 0.2513, "step": 37362 }, { "epoch": 1.7502693586920879, "grad_norm": 0.6126919297310462, "learning_rate": 2.0166320814361956e-07, "loss": 0.2685, "step": 37363 }, { "epoch": 1.7503162036820163, "grad_norm": 0.6246070182256256, "learning_rate": 2.01588591898553e-07, "loss": 0.2802, "step": 37364 }, { "epoch": 1.7503630486719446, "grad_norm": 0.6508318493498177, "learning_rate": 2.0151398888026668e-07, "loss": 0.2871, "step": 37365 }, { "epoch": 1.7504098936618728, "grad_norm": 0.5725594784785424, "learning_rate": 2.0143939908918942e-07, "loss": 0.2476, "step": 37366 }, { "epoch": 1.7504567386518013, "grad_norm": 0.6167302251398258, "learning_rate": 2.0136482252575086e-07, "loss": 0.2648, "step": 37367 }, { "epoch": 1.7505035836417295, "grad_norm": 0.5816178533528482, "learning_rate": 2.012902591903798e-07, "loss": 0.2723, "step": 37368 }, { "epoch": 1.7505504286316578, "grad_norm": 0.6195443127925558, "learning_rate": 2.0121570908350596e-07, "loss": 0.2656, "step": 37369 }, { "epoch": 1.7505972736215862, "grad_norm": 0.6553576081181612, "learning_rate": 2.0114117220555756e-07, "loss": 0.2837, "step": 37370 }, { "epoch": 1.7506441186115145, "grad_norm": 0.6033962071564706, "learning_rate": 2.0106664855696374e-07, "loss": 0.269, "step": 37371 }, { "epoch": 1.7506909636014427, "grad_norm": 0.6415533894735251, "learning_rate": 2.0099213813815438e-07, "loss": 0.2872, "step": 37372 }, { "epoch": 1.7507378085913712, "grad_norm": 0.6539071683488862, "learning_rate": 2.0091764094955667e-07, "loss": 0.2614, "step": 37373 }, { "epoch": 1.7507846535812996, "grad_norm": 0.5898852496512839, "learning_rate": 2.008431569916e-07, "loss": 0.2641, "step": 37374 }, { "epoch": 1.7508314985712277, "grad_norm": 0.6080774470194656, "learning_rate": 2.0076868626471345e-07, "loss": 0.2633, "step": 37375 }, { "epoch": 1.7508783435611561, "grad_norm": 0.5757190017109457, "learning_rate": 2.0069422876932531e-07, "loss": 0.2736, "step": 37376 }, { "epoch": 1.7509251885510846, "grad_norm": 0.5709011711395565, "learning_rate": 2.0061978450586384e-07, "loss": 0.2612, "step": 37377 }, { "epoch": 1.7509720335410128, "grad_norm": 0.5638491957146334, "learning_rate": 2.005453534747573e-07, "loss": 0.2693, "step": 37378 }, { "epoch": 1.751018878530941, "grad_norm": 0.6075866133795808, "learning_rate": 2.0047093567643506e-07, "loss": 0.283, "step": 37379 }, { "epoch": 1.7510657235208695, "grad_norm": 0.6222704848890851, "learning_rate": 2.0039653111132372e-07, "loss": 0.2699, "step": 37380 }, { "epoch": 1.7511125685107978, "grad_norm": 0.5925028950323695, "learning_rate": 2.003221397798527e-07, "loss": 0.2684, "step": 37381 }, { "epoch": 1.751159413500726, "grad_norm": 0.607664590952167, "learning_rate": 2.0024776168245025e-07, "loss": 0.2569, "step": 37382 }, { "epoch": 1.7512062584906545, "grad_norm": 0.5667779477612597, "learning_rate": 2.0017339681954324e-07, "loss": 0.257, "step": 37383 }, { "epoch": 1.7512531034805827, "grad_norm": 0.6200052009802611, "learning_rate": 2.0009904519156047e-07, "loss": 0.2724, "step": 37384 }, { "epoch": 1.751299948470511, "grad_norm": 0.6164039502385917, "learning_rate": 2.0002470679892972e-07, "loss": 0.2812, "step": 37385 }, { "epoch": 1.7513467934604394, "grad_norm": 0.620705833021777, "learning_rate": 1.9995038164207892e-07, "loss": 0.2603, "step": 37386 }, { "epoch": 1.7513936384503679, "grad_norm": 0.6105673478879579, "learning_rate": 1.9987606972143524e-07, "loss": 0.2734, "step": 37387 }, { "epoch": 1.751440483440296, "grad_norm": 0.5786956687634482, "learning_rate": 1.99801771037427e-07, "loss": 0.2565, "step": 37388 }, { "epoch": 1.7514873284302244, "grad_norm": 0.6097799419705714, "learning_rate": 1.99727485590481e-07, "loss": 0.2767, "step": 37389 }, { "epoch": 1.7515341734201528, "grad_norm": 0.5544859840497662, "learning_rate": 1.9965321338102528e-07, "loss": 0.2577, "step": 37390 }, { "epoch": 1.751581018410081, "grad_norm": 0.5933354133258747, "learning_rate": 1.9957895440948726e-07, "loss": 0.2788, "step": 37391 }, { "epoch": 1.7516278634000093, "grad_norm": 0.6663571422384981, "learning_rate": 1.995047086762944e-07, "loss": 0.2859, "step": 37392 }, { "epoch": 1.7516747083899378, "grad_norm": 0.5742370785873236, "learning_rate": 1.9943047618187329e-07, "loss": 0.257, "step": 37393 }, { "epoch": 1.751721553379866, "grad_norm": 0.5965440493508084, "learning_rate": 1.9935625692665217e-07, "loss": 0.2638, "step": 37394 }, { "epoch": 1.7517683983697943, "grad_norm": 0.5819772439267761, "learning_rate": 1.9928205091105685e-07, "loss": 0.2582, "step": 37395 }, { "epoch": 1.7518152433597227, "grad_norm": 0.6153431934647695, "learning_rate": 1.992078581355153e-07, "loss": 0.2809, "step": 37396 }, { "epoch": 1.751862088349651, "grad_norm": 0.6610746861501076, "learning_rate": 1.9913367860045412e-07, "loss": 0.2951, "step": 37397 }, { "epoch": 1.7519089333395792, "grad_norm": 0.5897878448519138, "learning_rate": 1.990595123063005e-07, "loss": 0.2757, "step": 37398 }, { "epoch": 1.7519557783295077, "grad_norm": 0.6233902805921743, "learning_rate": 1.989853592534813e-07, "loss": 0.2801, "step": 37399 }, { "epoch": 1.7520026233194361, "grad_norm": 0.618288185082217, "learning_rate": 1.989112194424231e-07, "loss": 0.2654, "step": 37400 }, { "epoch": 1.7520494683093644, "grad_norm": 0.6064486351391429, "learning_rate": 1.9883709287355197e-07, "loss": 0.2709, "step": 37401 }, { "epoch": 1.7520963132992926, "grad_norm": 0.5702854988803708, "learning_rate": 1.9876297954729506e-07, "loss": 0.2433, "step": 37402 }, { "epoch": 1.752143158289221, "grad_norm": 0.5975708184726891, "learning_rate": 1.986888794640787e-07, "loss": 0.2584, "step": 37403 }, { "epoch": 1.7521900032791493, "grad_norm": 0.6546038500866668, "learning_rate": 1.9861479262432953e-07, "loss": 0.2907, "step": 37404 }, { "epoch": 1.7522368482690776, "grad_norm": 0.6007854671026884, "learning_rate": 1.9854071902847382e-07, "loss": 0.2672, "step": 37405 }, { "epoch": 1.752283693259006, "grad_norm": 0.7935913353523744, "learning_rate": 1.984666586769382e-07, "loss": 0.2967, "step": 37406 }, { "epoch": 1.7523305382489343, "grad_norm": 0.673687376514438, "learning_rate": 1.9839261157014872e-07, "loss": 0.2943, "step": 37407 }, { "epoch": 1.7523773832388625, "grad_norm": 0.6120767841975697, "learning_rate": 1.9831857770853058e-07, "loss": 0.267, "step": 37408 }, { "epoch": 1.752424228228791, "grad_norm": 0.6071789040043833, "learning_rate": 1.982445570925104e-07, "loss": 0.2818, "step": 37409 }, { "epoch": 1.7524710732187194, "grad_norm": 0.5737907665112499, "learning_rate": 1.9817054972251449e-07, "loss": 0.2663, "step": 37410 }, { "epoch": 1.7525179182086474, "grad_norm": 0.6176445563328762, "learning_rate": 1.9809655559896863e-07, "loss": 0.2926, "step": 37411 }, { "epoch": 1.752564763198576, "grad_norm": 0.5607031682876794, "learning_rate": 1.9802257472229886e-07, "loss": 0.2551, "step": 37412 }, { "epoch": 1.7526116081885044, "grad_norm": 0.6007269721509012, "learning_rate": 1.9794860709292984e-07, "loss": 0.2722, "step": 37413 }, { "epoch": 1.7526584531784326, "grad_norm": 0.5845261798046308, "learning_rate": 1.9787465271128874e-07, "loss": 0.2577, "step": 37414 }, { "epoch": 1.7527052981683608, "grad_norm": 0.6204834921714919, "learning_rate": 1.9780071157779995e-07, "loss": 0.2555, "step": 37415 }, { "epoch": 1.7527521431582893, "grad_norm": 0.5992896217442925, "learning_rate": 1.977267836928895e-07, "loss": 0.277, "step": 37416 }, { "epoch": 1.7527989881482176, "grad_norm": 0.6002751840494601, "learning_rate": 1.976528690569826e-07, "loss": 0.2559, "step": 37417 }, { "epoch": 1.7528458331381458, "grad_norm": 0.6458324706562795, "learning_rate": 1.9757896767050534e-07, "loss": 0.2922, "step": 37418 }, { "epoch": 1.7528926781280743, "grad_norm": 0.5736415349125681, "learning_rate": 1.9750507953388176e-07, "loss": 0.2744, "step": 37419 }, { "epoch": 1.7529395231180025, "grad_norm": 0.6530596417572199, "learning_rate": 1.9743120464753796e-07, "loss": 0.2704, "step": 37420 }, { "epoch": 1.7529863681079307, "grad_norm": 0.6059684143177634, "learning_rate": 1.9735734301189913e-07, "loss": 0.2836, "step": 37421 }, { "epoch": 1.7530332130978592, "grad_norm": 0.5672883523756093, "learning_rate": 1.9728349462738965e-07, "loss": 0.2583, "step": 37422 }, { "epoch": 1.7530800580877877, "grad_norm": 0.6728643099171626, "learning_rate": 1.9720965949443506e-07, "loss": 0.2847, "step": 37423 }, { "epoch": 1.7531269030777157, "grad_norm": 0.5934922498804108, "learning_rate": 1.9713583761346021e-07, "loss": 0.2639, "step": 37424 }, { "epoch": 1.7531737480676441, "grad_norm": 0.5953281816563678, "learning_rate": 1.970620289848896e-07, "loss": 0.2749, "step": 37425 }, { "epoch": 1.7532205930575726, "grad_norm": 0.5986229925631471, "learning_rate": 1.969882336091483e-07, "loss": 0.2755, "step": 37426 }, { "epoch": 1.7532674380475008, "grad_norm": 0.6351627894087079, "learning_rate": 1.9691445148666056e-07, "loss": 0.2687, "step": 37427 }, { "epoch": 1.753314283037429, "grad_norm": 0.6063225001614205, "learning_rate": 1.9684068261785206e-07, "loss": 0.2852, "step": 37428 }, { "epoch": 1.7533611280273576, "grad_norm": 0.5947931538743726, "learning_rate": 1.967669270031458e-07, "loss": 0.2599, "step": 37429 }, { "epoch": 1.7534079730172858, "grad_norm": 0.5744331673983658, "learning_rate": 1.966931846429676e-07, "loss": 0.2557, "step": 37430 }, { "epoch": 1.753454818007214, "grad_norm": 0.6087580655139764, "learning_rate": 1.966194555377407e-07, "loss": 0.2804, "step": 37431 }, { "epoch": 1.7535016629971425, "grad_norm": 0.5786402085638005, "learning_rate": 1.9654573968789004e-07, "loss": 0.2616, "step": 37432 }, { "epoch": 1.7535485079870707, "grad_norm": 0.6025522641877501, "learning_rate": 1.9647203709383973e-07, "loss": 0.2589, "step": 37433 }, { "epoch": 1.753595352976999, "grad_norm": 0.6538156366675928, "learning_rate": 1.9639834775601412e-07, "loss": 0.2809, "step": 37434 }, { "epoch": 1.7536421979669274, "grad_norm": 0.5993190803459081, "learning_rate": 1.9632467167483655e-07, "loss": 0.2669, "step": 37435 }, { "epoch": 1.753689042956856, "grad_norm": 0.6209714515113475, "learning_rate": 1.9625100885073218e-07, "loss": 0.2694, "step": 37436 }, { "epoch": 1.7537358879467841, "grad_norm": 0.5766530029732091, "learning_rate": 1.9617735928412347e-07, "loss": 0.2733, "step": 37437 }, { "epoch": 1.7537827329367124, "grad_norm": 0.6255203776783523, "learning_rate": 1.9610372297543535e-07, "loss": 0.2764, "step": 37438 }, { "epoch": 1.7538295779266408, "grad_norm": 0.6413940585111653, "learning_rate": 1.960300999250911e-07, "loss": 0.2822, "step": 37439 }, { "epoch": 1.753876422916569, "grad_norm": 0.6274001372527989, "learning_rate": 1.9595649013351454e-07, "loss": 0.2768, "step": 37440 }, { "epoch": 1.7539232679064973, "grad_norm": 0.6172936728295014, "learning_rate": 1.958828936011295e-07, "loss": 0.2861, "step": 37441 }, { "epoch": 1.7539701128964258, "grad_norm": 0.5857462609769011, "learning_rate": 1.9580931032835898e-07, "loss": 0.2726, "step": 37442 }, { "epoch": 1.754016957886354, "grad_norm": 0.6303163186161975, "learning_rate": 1.9573574031562736e-07, "loss": 0.2803, "step": 37443 }, { "epoch": 1.7540638028762823, "grad_norm": 0.5041343969106875, "learning_rate": 1.9566218356335682e-07, "loss": 0.2433, "step": 37444 }, { "epoch": 1.7541106478662107, "grad_norm": 0.5965947321066043, "learning_rate": 1.9558864007197142e-07, "loss": 0.2649, "step": 37445 }, { "epoch": 1.7541574928561392, "grad_norm": 0.6099180783255398, "learning_rate": 1.9551510984189393e-07, "loss": 0.2956, "step": 37446 }, { "epoch": 1.7542043378460672, "grad_norm": 0.6062380815440783, "learning_rate": 1.9544159287354787e-07, "loss": 0.2849, "step": 37447 }, { "epoch": 1.7542511828359957, "grad_norm": 0.6257993114870493, "learning_rate": 1.9536808916735678e-07, "loss": 0.2823, "step": 37448 }, { "epoch": 1.7542980278259241, "grad_norm": 0.6293086420528354, "learning_rate": 1.9529459872374286e-07, "loss": 0.2759, "step": 37449 }, { "epoch": 1.7543448728158524, "grad_norm": 0.6034163024845784, "learning_rate": 1.9522112154312905e-07, "loss": 0.2552, "step": 37450 }, { "epoch": 1.7543917178057806, "grad_norm": 0.6311614162972156, "learning_rate": 1.9514765762593813e-07, "loss": 0.2727, "step": 37451 }, { "epoch": 1.754438562795709, "grad_norm": 0.5710992108939325, "learning_rate": 1.9507420697259332e-07, "loss": 0.2693, "step": 37452 }, { "epoch": 1.7544854077856373, "grad_norm": 0.6067917204888036, "learning_rate": 1.9500076958351737e-07, "loss": 0.272, "step": 37453 }, { "epoch": 1.7545322527755656, "grad_norm": 0.606889340196652, "learning_rate": 1.9492734545913216e-07, "loss": 0.2626, "step": 37454 }, { "epoch": 1.754579097765494, "grad_norm": 0.6046412865325846, "learning_rate": 1.948539345998615e-07, "loss": 0.2516, "step": 37455 }, { "epoch": 1.7546259427554223, "grad_norm": 0.5897892518389214, "learning_rate": 1.9478053700612703e-07, "loss": 0.2653, "step": 37456 }, { "epoch": 1.7546727877453505, "grad_norm": 0.6244261418403725, "learning_rate": 1.9470715267835062e-07, "loss": 0.2846, "step": 37457 }, { "epoch": 1.754719632735279, "grad_norm": 0.5980580598648316, "learning_rate": 1.9463378161695523e-07, "loss": 0.2827, "step": 37458 }, { "epoch": 1.7547664777252074, "grad_norm": 0.5584675700375974, "learning_rate": 1.9456042382236278e-07, "loss": 0.2597, "step": 37459 }, { "epoch": 1.7548133227151355, "grad_norm": 0.5760420878788635, "learning_rate": 1.94487079294996e-07, "loss": 0.2601, "step": 37460 }, { "epoch": 1.754860167705064, "grad_norm": 0.6073262433508831, "learning_rate": 1.9441374803527675e-07, "loss": 0.2933, "step": 37461 }, { "epoch": 1.7549070126949924, "grad_norm": 0.6341290327345772, "learning_rate": 1.9434043004362662e-07, "loss": 0.2789, "step": 37462 }, { "epoch": 1.7549538576849206, "grad_norm": 0.6240667870753247, "learning_rate": 1.942671253204681e-07, "loss": 0.271, "step": 37463 }, { "epoch": 1.7550007026748489, "grad_norm": 0.5935414837014596, "learning_rate": 1.9419383386622248e-07, "loss": 0.2629, "step": 37464 }, { "epoch": 1.7550475476647773, "grad_norm": 0.6125386930860081, "learning_rate": 1.9412055568131166e-07, "loss": 0.269, "step": 37465 }, { "epoch": 1.7550943926547056, "grad_norm": 0.6332109744234418, "learning_rate": 1.9404729076615754e-07, "loss": 0.2642, "step": 37466 }, { "epoch": 1.7551412376446338, "grad_norm": 0.5900821242812139, "learning_rate": 1.9397403912118222e-07, "loss": 0.2723, "step": 37467 }, { "epoch": 1.7551880826345623, "grad_norm": 0.5558798486805566, "learning_rate": 1.93900800746806e-07, "loss": 0.2639, "step": 37468 }, { "epoch": 1.7552349276244905, "grad_norm": 0.6413421139389343, "learning_rate": 1.9382757564345128e-07, "loss": 0.2968, "step": 37469 }, { "epoch": 1.7552817726144188, "grad_norm": 0.5981873584910629, "learning_rate": 1.9375436381153967e-07, "loss": 0.2806, "step": 37470 }, { "epoch": 1.7553286176043472, "grad_norm": 0.595721555626101, "learning_rate": 1.9368116525149167e-07, "loss": 0.268, "step": 37471 }, { "epoch": 1.7553754625942757, "grad_norm": 0.6005239050774033, "learning_rate": 1.936079799637286e-07, "loss": 0.2682, "step": 37472 }, { "epoch": 1.755422307584204, "grad_norm": 0.6373450902985741, "learning_rate": 1.9353480794867262e-07, "loss": 0.2779, "step": 37473 }, { "epoch": 1.7554691525741322, "grad_norm": 0.5884157194843789, "learning_rate": 1.9346164920674342e-07, "loss": 0.2693, "step": 37474 }, { "epoch": 1.7555159975640606, "grad_norm": 0.6027367249587917, "learning_rate": 1.9338850373836286e-07, "loss": 0.2695, "step": 37475 }, { "epoch": 1.7555628425539889, "grad_norm": 0.6416999534885954, "learning_rate": 1.9331537154395175e-07, "loss": 0.2746, "step": 37476 }, { "epoch": 1.755609687543917, "grad_norm": 0.6168909667828942, "learning_rate": 1.9324225262393137e-07, "loss": 0.2714, "step": 37477 }, { "epoch": 1.7556565325338456, "grad_norm": 0.5857329615020954, "learning_rate": 1.931691469787217e-07, "loss": 0.2608, "step": 37478 }, { "epoch": 1.7557033775237738, "grad_norm": 0.5951647081704657, "learning_rate": 1.9309605460874403e-07, "loss": 0.2765, "step": 37479 }, { "epoch": 1.755750222513702, "grad_norm": 0.607243593538072, "learning_rate": 1.9302297551441834e-07, "loss": 0.2453, "step": 37480 }, { "epoch": 1.7557970675036305, "grad_norm": 0.5834430967017323, "learning_rate": 1.9294990969616566e-07, "loss": 0.2565, "step": 37481 }, { "epoch": 1.755843912493559, "grad_norm": 0.6463292826549735, "learning_rate": 1.9287685715440624e-07, "loss": 0.2842, "step": 37482 }, { "epoch": 1.755890757483487, "grad_norm": 0.6240373383112868, "learning_rate": 1.928038178895611e-07, "loss": 0.2699, "step": 37483 }, { "epoch": 1.7559376024734155, "grad_norm": 0.5970111148455637, "learning_rate": 1.927307919020499e-07, "loss": 0.2653, "step": 37484 }, { "epoch": 1.755984447463344, "grad_norm": 0.6034597618903776, "learning_rate": 1.9265777919229318e-07, "loss": 0.268, "step": 37485 }, { "epoch": 1.7560312924532722, "grad_norm": 0.6392118770110058, "learning_rate": 1.9258477976071083e-07, "loss": 0.2692, "step": 37486 }, { "epoch": 1.7560781374432004, "grad_norm": 0.6416846086279216, "learning_rate": 1.925117936077231e-07, "loss": 0.2935, "step": 37487 }, { "epoch": 1.7561249824331289, "grad_norm": 0.608853186228418, "learning_rate": 1.9243882073374992e-07, "loss": 0.2961, "step": 37488 }, { "epoch": 1.756171827423057, "grad_norm": 0.5541211394962702, "learning_rate": 1.9236586113921152e-07, "loss": 0.2447, "step": 37489 }, { "epoch": 1.7562186724129853, "grad_norm": 0.5866638729669534, "learning_rate": 1.9229291482452783e-07, "loss": 0.271, "step": 37490 }, { "epoch": 1.7562655174029138, "grad_norm": 0.6308712589245308, "learning_rate": 1.9221998179011852e-07, "loss": 0.2816, "step": 37491 }, { "epoch": 1.756312362392842, "grad_norm": 0.6223855839615833, "learning_rate": 1.9214706203640272e-07, "loss": 0.2737, "step": 37492 }, { "epoch": 1.7563592073827703, "grad_norm": 0.5721154112620033, "learning_rate": 1.9207415556380033e-07, "loss": 0.2645, "step": 37493 }, { "epoch": 1.7564060523726988, "grad_norm": 0.6086719581054527, "learning_rate": 1.9200126237273131e-07, "loss": 0.2616, "step": 37494 }, { "epoch": 1.7564528973626272, "grad_norm": 0.5769931997592325, "learning_rate": 1.9192838246361478e-07, "loss": 0.2608, "step": 37495 }, { "epoch": 1.7564997423525552, "grad_norm": 0.6242405999722203, "learning_rate": 1.9185551583687039e-07, "loss": 0.2745, "step": 37496 }, { "epoch": 1.7565465873424837, "grad_norm": 0.5881151537296873, "learning_rate": 1.9178266249291784e-07, "loss": 0.2673, "step": 37497 }, { "epoch": 1.7565934323324122, "grad_norm": 0.5919438112234862, "learning_rate": 1.917098224321759e-07, "loss": 0.2779, "step": 37498 }, { "epoch": 1.7566402773223404, "grad_norm": 0.5902154740921786, "learning_rate": 1.9163699565506343e-07, "loss": 0.2631, "step": 37499 }, { "epoch": 1.7566871223122686, "grad_norm": 0.6302487735573765, "learning_rate": 1.9156418216199956e-07, "loss": 0.2791, "step": 37500 }, { "epoch": 1.756733967302197, "grad_norm": 0.6230143413113668, "learning_rate": 1.9149138195340394e-07, "loss": 0.2775, "step": 37501 }, { "epoch": 1.7567808122921253, "grad_norm": 0.6170683501988937, "learning_rate": 1.914185950296951e-07, "loss": 0.2719, "step": 37502 }, { "epoch": 1.7568276572820536, "grad_norm": 0.5713706869309747, "learning_rate": 1.913458213912925e-07, "loss": 0.2773, "step": 37503 }, { "epoch": 1.756874502271982, "grad_norm": 0.6145445547312253, "learning_rate": 1.9127306103861375e-07, "loss": 0.2799, "step": 37504 }, { "epoch": 1.7569213472619103, "grad_norm": 0.6167179483042121, "learning_rate": 1.912003139720789e-07, "loss": 0.2781, "step": 37505 }, { "epoch": 1.7569681922518385, "grad_norm": 0.6573643458658407, "learning_rate": 1.911275801921056e-07, "loss": 0.2867, "step": 37506 }, { "epoch": 1.757015037241767, "grad_norm": 0.6223538036615329, "learning_rate": 1.910548596991124e-07, "loss": 0.2776, "step": 37507 }, { "epoch": 1.7570618822316955, "grad_norm": 0.6200769514246949, "learning_rate": 1.9098215249351848e-07, "loss": 0.2681, "step": 37508 }, { "epoch": 1.7571087272216237, "grad_norm": 0.6043715858189904, "learning_rate": 1.9090945857574177e-07, "loss": 0.2712, "step": 37509 }, { "epoch": 1.757155572211552, "grad_norm": 0.5687975721259981, "learning_rate": 1.9083677794620144e-07, "loss": 0.2736, "step": 37510 }, { "epoch": 1.7572024172014804, "grad_norm": 0.5591949906017778, "learning_rate": 1.907641106053143e-07, "loss": 0.2612, "step": 37511 }, { "epoch": 1.7572492621914086, "grad_norm": 0.5525224816994462, "learning_rate": 1.9069145655350007e-07, "loss": 0.2681, "step": 37512 }, { "epoch": 1.7572961071813369, "grad_norm": 0.6211662126815787, "learning_rate": 1.9061881579117537e-07, "loss": 0.2812, "step": 37513 }, { "epoch": 1.7573429521712653, "grad_norm": 0.6513811496657995, "learning_rate": 1.90546188318759e-07, "loss": 0.2813, "step": 37514 }, { "epoch": 1.7573897971611936, "grad_norm": 0.62849651082412, "learning_rate": 1.9047357413666896e-07, "loss": 0.2862, "step": 37515 }, { "epoch": 1.7574366421511218, "grad_norm": 0.5798876091799151, "learning_rate": 1.9040097324532326e-07, "loss": 0.2706, "step": 37516 }, { "epoch": 1.7574834871410503, "grad_norm": 0.5604238382733508, "learning_rate": 1.9032838564513934e-07, "loss": 0.2531, "step": 37517 }, { "epoch": 1.7575303321309788, "grad_norm": 0.5897183823657386, "learning_rate": 1.9025581133653464e-07, "loss": 0.2607, "step": 37518 }, { "epoch": 1.7575771771209068, "grad_norm": 0.5767828800203963, "learning_rate": 1.90183250319928e-07, "loss": 0.257, "step": 37519 }, { "epoch": 1.7576240221108352, "grad_norm": 0.6243406633237816, "learning_rate": 1.901107025957355e-07, "loss": 0.2674, "step": 37520 }, { "epoch": 1.7576708671007637, "grad_norm": 0.57385637419771, "learning_rate": 1.9003816816437564e-07, "loss": 0.2608, "step": 37521 }, { "epoch": 1.757717712090692, "grad_norm": 0.6226589901304038, "learning_rate": 1.8996564702626563e-07, "loss": 0.2857, "step": 37522 }, { "epoch": 1.7577645570806202, "grad_norm": 0.6546001620869242, "learning_rate": 1.8989313918182263e-07, "loss": 0.2871, "step": 37523 }, { "epoch": 1.7578114020705486, "grad_norm": 0.6352271523804297, "learning_rate": 1.8982064463146377e-07, "loss": 0.2837, "step": 37524 }, { "epoch": 1.7578582470604769, "grad_norm": 0.6246518421452363, "learning_rate": 1.8974816337560654e-07, "loss": 0.2771, "step": 37525 }, { "epoch": 1.7579050920504051, "grad_norm": 0.6020243760721283, "learning_rate": 1.8967569541466836e-07, "loss": 0.2806, "step": 37526 }, { "epoch": 1.7579519370403336, "grad_norm": 0.5999472899098965, "learning_rate": 1.8960324074906554e-07, "loss": 0.2694, "step": 37527 }, { "epoch": 1.7579987820302618, "grad_norm": 0.6283792011947323, "learning_rate": 1.8953079937921558e-07, "loss": 0.2951, "step": 37528 }, { "epoch": 1.75804562702019, "grad_norm": 0.5705237252181214, "learning_rate": 1.894583713055348e-07, "loss": 0.259, "step": 37529 }, { "epoch": 1.7580924720101185, "grad_norm": 0.577567569927588, "learning_rate": 1.8938595652844061e-07, "loss": 0.2735, "step": 37530 }, { "epoch": 1.758139317000047, "grad_norm": 0.6263763322555486, "learning_rate": 1.893135550483491e-07, "loss": 0.284, "step": 37531 }, { "epoch": 1.758186161989975, "grad_norm": 0.6503082700948469, "learning_rate": 1.8924116686567796e-07, "loss": 0.2851, "step": 37532 }, { "epoch": 1.7582330069799035, "grad_norm": 0.5717464561467885, "learning_rate": 1.8916879198084271e-07, "loss": 0.2586, "step": 37533 }, { "epoch": 1.758279851969832, "grad_norm": 0.5844190457693798, "learning_rate": 1.890964303942608e-07, "loss": 0.2473, "step": 37534 }, { "epoch": 1.7583266969597602, "grad_norm": 0.5756070160117042, "learning_rate": 1.8902408210634744e-07, "loss": 0.2651, "step": 37535 }, { "epoch": 1.7583735419496884, "grad_norm": 0.6051294230281213, "learning_rate": 1.8895174711751978e-07, "loss": 0.273, "step": 37536 }, { "epoch": 1.7584203869396169, "grad_norm": 0.5994102814919734, "learning_rate": 1.8887942542819394e-07, "loss": 0.2699, "step": 37537 }, { "epoch": 1.7584672319295451, "grad_norm": 0.5728042462240374, "learning_rate": 1.888071170387859e-07, "loss": 0.2618, "step": 37538 }, { "epoch": 1.7585140769194734, "grad_norm": 0.5970022573746848, "learning_rate": 1.887348219497126e-07, "loss": 0.2676, "step": 37539 }, { "epoch": 1.7585609219094018, "grad_norm": 0.6199448431264735, "learning_rate": 1.8866254016138953e-07, "loss": 0.2743, "step": 37540 }, { "epoch": 1.75860776689933, "grad_norm": 0.6080258313746091, "learning_rate": 1.8859027167423216e-07, "loss": 0.2762, "step": 37541 }, { "epoch": 1.7586546118892583, "grad_norm": 0.6055344387144395, "learning_rate": 1.8851801648865685e-07, "loss": 0.2759, "step": 37542 }, { "epoch": 1.7587014568791868, "grad_norm": 0.5778351350169842, "learning_rate": 1.884457746050794e-07, "loss": 0.2754, "step": 37543 }, { "epoch": 1.7587483018691152, "grad_norm": 0.6090680998203853, "learning_rate": 1.8837354602391556e-07, "loss": 0.2728, "step": 37544 }, { "epoch": 1.7587951468590435, "grad_norm": 0.6020273009536109, "learning_rate": 1.883013307455808e-07, "loss": 0.2865, "step": 37545 }, { "epoch": 1.7588419918489717, "grad_norm": 0.591096465600434, "learning_rate": 1.8822912877049155e-07, "loss": 0.2775, "step": 37546 }, { "epoch": 1.7588888368389002, "grad_norm": 0.639790346001871, "learning_rate": 1.8815694009906239e-07, "loss": 0.2861, "step": 37547 }, { "epoch": 1.7589356818288284, "grad_norm": 0.6038430110821775, "learning_rate": 1.8808476473170856e-07, "loss": 0.257, "step": 37548 }, { "epoch": 1.7589825268187567, "grad_norm": 0.6292518051831347, "learning_rate": 1.8801260266884585e-07, "loss": 0.2844, "step": 37549 }, { "epoch": 1.7590293718086851, "grad_norm": 0.6029384924014425, "learning_rate": 1.879404539108895e-07, "loss": 0.2694, "step": 37550 }, { "epoch": 1.7590762167986134, "grad_norm": 0.5767312579078085, "learning_rate": 1.8786831845825498e-07, "loss": 0.2698, "step": 37551 }, { "epoch": 1.7591230617885416, "grad_norm": 0.5672308494952201, "learning_rate": 1.8779619631135726e-07, "loss": 0.2675, "step": 37552 }, { "epoch": 1.75916990677847, "grad_norm": 0.6015872820137919, "learning_rate": 1.87724087470611e-07, "loss": 0.2721, "step": 37553 }, { "epoch": 1.7592167517683985, "grad_norm": 0.553741735044516, "learning_rate": 1.8765199193643168e-07, "loss": 0.2466, "step": 37554 }, { "epoch": 1.7592635967583266, "grad_norm": 0.5790652173129098, "learning_rate": 1.8757990970923374e-07, "loss": 0.2581, "step": 37555 }, { "epoch": 1.759310441748255, "grad_norm": 0.6265684940975804, "learning_rate": 1.8750784078943234e-07, "loss": 0.2847, "step": 37556 }, { "epoch": 1.7593572867381835, "grad_norm": 0.5912631964030465, "learning_rate": 1.8743578517744194e-07, "loss": 0.2693, "step": 37557 }, { "epoch": 1.7594041317281117, "grad_norm": 0.6255869881926394, "learning_rate": 1.873637428736777e-07, "loss": 0.2595, "step": 37558 }, { "epoch": 1.75945097671804, "grad_norm": 0.6375728151591897, "learning_rate": 1.872917138785535e-07, "loss": 0.2744, "step": 37559 }, { "epoch": 1.7594978217079684, "grad_norm": 0.5601991645545847, "learning_rate": 1.8721969819248426e-07, "loss": 0.2579, "step": 37560 }, { "epoch": 1.7595446666978967, "grad_norm": 0.5973970201246022, "learning_rate": 1.8714769581588467e-07, "loss": 0.2732, "step": 37561 }, { "epoch": 1.759591511687825, "grad_norm": 0.6108079812400823, "learning_rate": 1.8707570674916826e-07, "loss": 0.2727, "step": 37562 }, { "epoch": 1.7596383566777534, "grad_norm": 0.600834673762464, "learning_rate": 1.8700373099274998e-07, "loss": 0.2654, "step": 37563 }, { "epoch": 1.7596852016676816, "grad_norm": 0.5941023062901073, "learning_rate": 1.8693176854704453e-07, "loss": 0.2666, "step": 37564 }, { "epoch": 1.7597320466576098, "grad_norm": 0.6351811884037848, "learning_rate": 1.8685981941246462e-07, "loss": 0.2774, "step": 37565 }, { "epoch": 1.7597788916475383, "grad_norm": 0.5919332654114706, "learning_rate": 1.8678788358942518e-07, "loss": 0.2688, "step": 37566 }, { "epoch": 1.7598257366374668, "grad_norm": 0.5775541277421736, "learning_rate": 1.8671596107834005e-07, "loss": 0.2554, "step": 37567 }, { "epoch": 1.7598725816273948, "grad_norm": 0.5804415891776805, "learning_rate": 1.8664405187962364e-07, "loss": 0.2691, "step": 37568 }, { "epoch": 1.7599194266173233, "grad_norm": 0.5969877907004799, "learning_rate": 1.8657215599368867e-07, "loss": 0.2627, "step": 37569 }, { "epoch": 1.7599662716072517, "grad_norm": 0.6279364281995484, "learning_rate": 1.8650027342095006e-07, "loss": 0.2652, "step": 37570 }, { "epoch": 1.76001311659718, "grad_norm": 0.5610582795867045, "learning_rate": 1.8642840416182055e-07, "loss": 0.2618, "step": 37571 }, { "epoch": 1.7600599615871082, "grad_norm": 0.6370352760344209, "learning_rate": 1.86356548216714e-07, "loss": 0.2731, "step": 37572 }, { "epoch": 1.7601068065770367, "grad_norm": 0.5838439034028524, "learning_rate": 1.8628470558604396e-07, "loss": 0.2682, "step": 37573 }, { "epoch": 1.760153651566965, "grad_norm": 0.6135408518079956, "learning_rate": 1.862128762702242e-07, "loss": 0.2729, "step": 37574 }, { "epoch": 1.7602004965568931, "grad_norm": 0.6186459351115288, "learning_rate": 1.8614106026966755e-07, "loss": 0.2695, "step": 37575 }, { "epoch": 1.7602473415468216, "grad_norm": 0.6108244354718698, "learning_rate": 1.8606925758478806e-07, "loss": 0.2759, "step": 37576 }, { "epoch": 1.7602941865367498, "grad_norm": 0.564073613164164, "learning_rate": 1.8599746821599763e-07, "loss": 0.2675, "step": 37577 }, { "epoch": 1.760341031526678, "grad_norm": 0.6631368973704627, "learning_rate": 1.8592569216371036e-07, "loss": 0.2892, "step": 37578 }, { "epoch": 1.7603878765166066, "grad_norm": 0.577697723287697, "learning_rate": 1.858539294283393e-07, "loss": 0.2619, "step": 37579 }, { "epoch": 1.760434721506535, "grad_norm": 0.621322070489391, "learning_rate": 1.8578218001029686e-07, "loss": 0.281, "step": 37580 }, { "epoch": 1.7604815664964633, "grad_norm": 0.634146387246039, "learning_rate": 1.8571044390999715e-07, "loss": 0.2902, "step": 37581 }, { "epoch": 1.7605284114863915, "grad_norm": 0.5864087453179814, "learning_rate": 1.8563872112785153e-07, "loss": 0.2681, "step": 37582 }, { "epoch": 1.76057525647632, "grad_norm": 0.5825199782477954, "learning_rate": 1.8556701166427383e-07, "loss": 0.2696, "step": 37583 }, { "epoch": 1.7606221014662482, "grad_norm": 0.6735953942155768, "learning_rate": 1.8549531551967564e-07, "loss": 0.2847, "step": 37584 }, { "epoch": 1.7606689464561764, "grad_norm": 0.5843148958073785, "learning_rate": 1.8542363269447056e-07, "loss": 0.2737, "step": 37585 }, { "epoch": 1.760715791446105, "grad_norm": 0.5485682300237176, "learning_rate": 1.8535196318907046e-07, "loss": 0.2615, "step": 37586 }, { "epoch": 1.7607626364360331, "grad_norm": 0.5858859145912761, "learning_rate": 1.8528030700388804e-07, "loss": 0.2678, "step": 37587 }, { "epoch": 1.7608094814259614, "grad_norm": 0.5733059853390117, "learning_rate": 1.8520866413933607e-07, "loss": 0.2612, "step": 37588 }, { "epoch": 1.7608563264158898, "grad_norm": 0.5895199820634255, "learning_rate": 1.851370345958267e-07, "loss": 0.273, "step": 37589 }, { "epoch": 1.7609031714058183, "grad_norm": 0.5830963165764237, "learning_rate": 1.850654183737713e-07, "loss": 0.2521, "step": 37590 }, { "epoch": 1.7609500163957463, "grad_norm": 0.587681122212795, "learning_rate": 1.8499381547358254e-07, "loss": 0.2642, "step": 37591 }, { "epoch": 1.7609968613856748, "grad_norm": 0.6576533567979801, "learning_rate": 1.849222258956726e-07, "loss": 0.2869, "step": 37592 }, { "epoch": 1.7610437063756033, "grad_norm": 0.5978350284323355, "learning_rate": 1.8485064964045312e-07, "loss": 0.2548, "step": 37593 }, { "epoch": 1.7610905513655315, "grad_norm": 0.5950729143908331, "learning_rate": 1.8477908670833655e-07, "loss": 0.269, "step": 37594 }, { "epoch": 1.7611373963554597, "grad_norm": 0.618106333042174, "learning_rate": 1.8470753709973477e-07, "loss": 0.2773, "step": 37595 }, { "epoch": 1.7611842413453882, "grad_norm": 0.5736422793164835, "learning_rate": 1.846360008150591e-07, "loss": 0.2695, "step": 37596 }, { "epoch": 1.7612310863353164, "grad_norm": 0.6168196523641565, "learning_rate": 1.8456447785472093e-07, "loss": 0.269, "step": 37597 }, { "epoch": 1.7612779313252447, "grad_norm": 0.6109827583104266, "learning_rate": 1.8449296821913237e-07, "loss": 0.2781, "step": 37598 }, { "epoch": 1.7613247763151731, "grad_norm": 0.6316717886570602, "learning_rate": 1.844214719087048e-07, "loss": 0.2578, "step": 37599 }, { "epoch": 1.7613716213051014, "grad_norm": 0.6336333189001037, "learning_rate": 1.8434998892384953e-07, "loss": 0.2799, "step": 37600 }, { "epoch": 1.7614184662950296, "grad_norm": 0.6561247048567678, "learning_rate": 1.8427851926497846e-07, "loss": 0.2809, "step": 37601 }, { "epoch": 1.761465311284958, "grad_norm": 0.5847760303561884, "learning_rate": 1.8420706293250213e-07, "loss": 0.271, "step": 37602 }, { "epoch": 1.7615121562748866, "grad_norm": 0.6087594931280287, "learning_rate": 1.8413561992683237e-07, "loss": 0.2646, "step": 37603 }, { "epoch": 1.7615590012648146, "grad_norm": 0.594946016407738, "learning_rate": 1.8406419024837973e-07, "loss": 0.2496, "step": 37604 }, { "epoch": 1.761605846254743, "grad_norm": 0.6236886179824696, "learning_rate": 1.8399277389755555e-07, "loss": 0.2774, "step": 37605 }, { "epoch": 1.7616526912446715, "grad_norm": 0.6263898156511032, "learning_rate": 1.8392137087477086e-07, "loss": 0.2966, "step": 37606 }, { "epoch": 1.7616995362345997, "grad_norm": 0.6106150695541527, "learning_rate": 1.83849981180437e-07, "loss": 0.2922, "step": 37607 }, { "epoch": 1.761746381224528, "grad_norm": 0.6288840838901079, "learning_rate": 1.8377860481496395e-07, "loss": 0.2607, "step": 37608 }, { "epoch": 1.7617932262144564, "grad_norm": 0.5969195836353923, "learning_rate": 1.8370724177876275e-07, "loss": 0.279, "step": 37609 }, { "epoch": 1.7618400712043847, "grad_norm": 0.6116597249671915, "learning_rate": 1.8363589207224476e-07, "loss": 0.2767, "step": 37610 }, { "epoch": 1.761886916194313, "grad_norm": 0.610226047088021, "learning_rate": 1.835645556958196e-07, "loss": 0.271, "step": 37611 }, { "epoch": 1.7619337611842414, "grad_norm": 0.5744817645269045, "learning_rate": 1.834932326498981e-07, "loss": 0.2675, "step": 37612 }, { "epoch": 1.7619806061741696, "grad_norm": 0.619006661472654, "learning_rate": 1.834219229348913e-07, "loss": 0.2882, "step": 37613 }, { "epoch": 1.7620274511640979, "grad_norm": 0.5808524092387967, "learning_rate": 1.833506265512089e-07, "loss": 0.2692, "step": 37614 }, { "epoch": 1.7620742961540263, "grad_norm": 0.6012104293687176, "learning_rate": 1.8327934349926108e-07, "loss": 0.2759, "step": 37615 }, { "epoch": 1.7621211411439548, "grad_norm": 0.5865990276058489, "learning_rate": 1.8320807377945836e-07, "loss": 0.2603, "step": 37616 }, { "epoch": 1.762167986133883, "grad_norm": 0.6005196819870009, "learning_rate": 1.8313681739221128e-07, "loss": 0.287, "step": 37617 }, { "epoch": 1.7622148311238113, "grad_norm": 0.6024996430567156, "learning_rate": 1.8306557433792922e-07, "loss": 0.2829, "step": 37618 }, { "epoch": 1.7622616761137397, "grad_norm": 0.6096667861087017, "learning_rate": 1.8299434461702264e-07, "loss": 0.2629, "step": 37619 }, { "epoch": 1.762308521103668, "grad_norm": 0.5733736600185098, "learning_rate": 1.8292312822990099e-07, "loss": 0.2667, "step": 37620 }, { "epoch": 1.7623553660935962, "grad_norm": 0.5945860638498931, "learning_rate": 1.8285192517697448e-07, "loss": 0.2612, "step": 37621 }, { "epoch": 1.7624022110835247, "grad_norm": 0.6155987511566944, "learning_rate": 1.827807354586525e-07, "loss": 0.2538, "step": 37622 }, { "epoch": 1.762449056073453, "grad_norm": 0.5760969105240229, "learning_rate": 1.8270955907534555e-07, "loss": 0.2566, "step": 37623 }, { "epoch": 1.7624959010633812, "grad_norm": 0.630785711811605, "learning_rate": 1.826383960274622e-07, "loss": 0.2891, "step": 37624 }, { "epoch": 1.7625427460533096, "grad_norm": 0.5755277218834098, "learning_rate": 1.8256724631541296e-07, "loss": 0.2848, "step": 37625 }, { "epoch": 1.762589591043238, "grad_norm": 0.6385465504845507, "learning_rate": 1.824961099396061e-07, "loss": 0.2966, "step": 37626 }, { "epoch": 1.762636436033166, "grad_norm": 0.6064304290763222, "learning_rate": 1.8242498690045184e-07, "loss": 0.265, "step": 37627 }, { "epoch": 1.7626832810230946, "grad_norm": 0.5927580987513973, "learning_rate": 1.8235387719835907e-07, "loss": 0.2644, "step": 37628 }, { "epoch": 1.762730126013023, "grad_norm": 0.6174088416333232, "learning_rate": 1.822827808337374e-07, "loss": 0.2683, "step": 37629 }, { "epoch": 1.7627769710029513, "grad_norm": 0.6312494177454451, "learning_rate": 1.8221169780699627e-07, "loss": 0.2761, "step": 37630 }, { "epoch": 1.7628238159928795, "grad_norm": 0.6193540661129449, "learning_rate": 1.821406281185442e-07, "loss": 0.2772, "step": 37631 }, { "epoch": 1.762870660982808, "grad_norm": 0.6410337116343394, "learning_rate": 1.8206957176878954e-07, "loss": 0.2808, "step": 37632 }, { "epoch": 1.7629175059727362, "grad_norm": 0.5541806274698349, "learning_rate": 1.8199852875814216e-07, "loss": 0.2646, "step": 37633 }, { "epoch": 1.7629643509626645, "grad_norm": 0.5987453598085362, "learning_rate": 1.819274990870107e-07, "loss": 0.2643, "step": 37634 }, { "epoch": 1.763011195952593, "grad_norm": 0.6410137201753341, "learning_rate": 1.818564827558039e-07, "loss": 0.2752, "step": 37635 }, { "epoch": 1.7630580409425212, "grad_norm": 0.6097591821086233, "learning_rate": 1.8178547976493015e-07, "loss": 0.264, "step": 37636 }, { "epoch": 1.7631048859324494, "grad_norm": 0.583014292922214, "learning_rate": 1.817144901147988e-07, "loss": 0.2622, "step": 37637 }, { "epoch": 1.7631517309223779, "grad_norm": 0.608556715981906, "learning_rate": 1.8164351380581814e-07, "loss": 0.2778, "step": 37638 }, { "epoch": 1.7631985759123063, "grad_norm": 0.5705510231078205, "learning_rate": 1.8157255083839558e-07, "loss": 0.2649, "step": 37639 }, { "epoch": 1.7632454209022344, "grad_norm": 0.6492160887077548, "learning_rate": 1.8150160121294058e-07, "loss": 0.2749, "step": 37640 }, { "epoch": 1.7632922658921628, "grad_norm": 0.6010356016684216, "learning_rate": 1.8143066492986084e-07, "loss": 0.2798, "step": 37641 }, { "epoch": 1.7633391108820913, "grad_norm": 0.5910592208290242, "learning_rate": 1.8135974198956492e-07, "loss": 0.2777, "step": 37642 }, { "epoch": 1.7633859558720195, "grad_norm": 0.6214873634301501, "learning_rate": 1.8128883239246165e-07, "loss": 0.2726, "step": 37643 }, { "epoch": 1.7634328008619478, "grad_norm": 0.5891144728226251, "learning_rate": 1.8121793613895767e-07, "loss": 0.2628, "step": 37644 }, { "epoch": 1.7634796458518762, "grad_norm": 0.5948360058886449, "learning_rate": 1.8114705322946208e-07, "loss": 0.2793, "step": 37645 }, { "epoch": 1.7635264908418045, "grad_norm": 0.6121899579374546, "learning_rate": 1.8107618366438207e-07, "loss": 0.2713, "step": 37646 }, { "epoch": 1.7635733358317327, "grad_norm": 0.6066921073723713, "learning_rate": 1.8100532744412563e-07, "loss": 0.2769, "step": 37647 }, { "epoch": 1.7636201808216612, "grad_norm": 0.6032468316593385, "learning_rate": 1.809344845691008e-07, "loss": 0.2697, "step": 37648 }, { "epoch": 1.7636670258115894, "grad_norm": 0.5677861122560921, "learning_rate": 1.8086365503971525e-07, "loss": 0.2618, "step": 37649 }, { "epoch": 1.7637138708015176, "grad_norm": 0.562521405263422, "learning_rate": 1.807928388563765e-07, "loss": 0.2583, "step": 37650 }, { "epoch": 1.763760715791446, "grad_norm": 0.6232911108111883, "learning_rate": 1.8072203601949194e-07, "loss": 0.2871, "step": 37651 }, { "epoch": 1.7638075607813746, "grad_norm": 0.6140163141661072, "learning_rate": 1.8065124652946964e-07, "loss": 0.2597, "step": 37652 }, { "epoch": 1.7638544057713028, "grad_norm": 0.5996273102084965, "learning_rate": 1.805804703867159e-07, "loss": 0.2611, "step": 37653 }, { "epoch": 1.763901250761231, "grad_norm": 1.0012173238388271, "learning_rate": 1.8050970759163845e-07, "loss": 0.2712, "step": 37654 }, { "epoch": 1.7639480957511595, "grad_norm": 0.6395621336174189, "learning_rate": 1.8043895814464474e-07, "loss": 0.2951, "step": 37655 }, { "epoch": 1.7639949407410878, "grad_norm": 0.582726700386224, "learning_rate": 1.8036822204614252e-07, "loss": 0.2687, "step": 37656 }, { "epoch": 1.764041785731016, "grad_norm": 0.5598668739048411, "learning_rate": 1.8029749929653755e-07, "loss": 0.2656, "step": 37657 }, { "epoch": 1.7640886307209445, "grad_norm": 0.6227852569118281, "learning_rate": 1.8022678989623732e-07, "loss": 0.2652, "step": 37658 }, { "epoch": 1.7641354757108727, "grad_norm": 0.5551133192602701, "learning_rate": 1.8015609384564925e-07, "loss": 0.2573, "step": 37659 }, { "epoch": 1.764182320700801, "grad_norm": 0.624309867635248, "learning_rate": 1.800854111451797e-07, "loss": 0.2641, "step": 37660 }, { "epoch": 1.7642291656907294, "grad_norm": 0.5675490907631151, "learning_rate": 1.8001474179523527e-07, "loss": 0.2582, "step": 37661 }, { "epoch": 1.7642760106806579, "grad_norm": 0.6068305670615035, "learning_rate": 1.7994408579622312e-07, "loss": 0.2769, "step": 37662 }, { "epoch": 1.7643228556705859, "grad_norm": 0.6035237306166797, "learning_rate": 1.7987344314854965e-07, "loss": 0.252, "step": 37663 }, { "epoch": 1.7643697006605144, "grad_norm": 0.646962792662346, "learning_rate": 1.7980281385262116e-07, "loss": 0.2605, "step": 37664 }, { "epoch": 1.7644165456504428, "grad_norm": 0.6166953889859348, "learning_rate": 1.797321979088443e-07, "loss": 0.2699, "step": 37665 }, { "epoch": 1.764463390640371, "grad_norm": 0.6212468784428115, "learning_rate": 1.796615953176259e-07, "loss": 0.2733, "step": 37666 }, { "epoch": 1.7645102356302993, "grad_norm": 0.6274342311608397, "learning_rate": 1.7959100607937152e-07, "loss": 0.2726, "step": 37667 }, { "epoch": 1.7645570806202278, "grad_norm": 0.6023358314052195, "learning_rate": 1.7952043019448777e-07, "loss": 0.2704, "step": 37668 }, { "epoch": 1.764603925610156, "grad_norm": 0.5704678407956854, "learning_rate": 1.7944986766338075e-07, "loss": 0.2626, "step": 37669 }, { "epoch": 1.7646507706000842, "grad_norm": 0.6271590245834431, "learning_rate": 1.7937931848645618e-07, "loss": 0.2797, "step": 37670 }, { "epoch": 1.7646976155900127, "grad_norm": 0.5870260375171295, "learning_rate": 1.7930878266412043e-07, "loss": 0.2663, "step": 37671 }, { "epoch": 1.764744460579941, "grad_norm": 0.6241495572010753, "learning_rate": 1.7923826019677986e-07, "loss": 0.2843, "step": 37672 }, { "epoch": 1.7647913055698692, "grad_norm": 0.5642812337644382, "learning_rate": 1.7916775108483914e-07, "loss": 0.2608, "step": 37673 }, { "epoch": 1.7648381505597976, "grad_norm": 0.5731403133208741, "learning_rate": 1.7909725532870543e-07, "loss": 0.2669, "step": 37674 }, { "epoch": 1.764884995549726, "grad_norm": 0.6062134069684214, "learning_rate": 1.7902677292878311e-07, "loss": 0.2735, "step": 37675 }, { "epoch": 1.7649318405396541, "grad_norm": 0.6645043373401404, "learning_rate": 1.789563038854783e-07, "loss": 0.2859, "step": 37676 }, { "epoch": 1.7649786855295826, "grad_norm": 0.6054506382396826, "learning_rate": 1.7888584819919675e-07, "loss": 0.2632, "step": 37677 }, { "epoch": 1.765025530519511, "grad_norm": 0.5909793345897165, "learning_rate": 1.7881540587034368e-07, "loss": 0.273, "step": 37678 }, { "epoch": 1.7650723755094393, "grad_norm": 0.6080653881237573, "learning_rate": 1.7874497689932492e-07, "loss": 0.2784, "step": 37679 }, { "epoch": 1.7651192204993675, "grad_norm": 0.6224482914035581, "learning_rate": 1.7867456128654537e-07, "loss": 0.2766, "step": 37680 }, { "epoch": 1.765166065489296, "grad_norm": 0.5757760015117767, "learning_rate": 1.7860415903241002e-07, "loss": 0.273, "step": 37681 }, { "epoch": 1.7652129104792242, "grad_norm": 0.5921663932257932, "learning_rate": 1.785337701373241e-07, "loss": 0.2785, "step": 37682 }, { "epoch": 1.7652597554691525, "grad_norm": 0.6765967377630961, "learning_rate": 1.7846339460169282e-07, "loss": 0.2981, "step": 37683 }, { "epoch": 1.765306600459081, "grad_norm": 0.5961105459088575, "learning_rate": 1.7839303242592142e-07, "loss": 0.2797, "step": 37684 }, { "epoch": 1.7653534454490092, "grad_norm": 0.605720906262216, "learning_rate": 1.7832268361041434e-07, "loss": 0.2779, "step": 37685 }, { "epoch": 1.7654002904389374, "grad_norm": 0.5784932714084586, "learning_rate": 1.7825234815557729e-07, "loss": 0.2682, "step": 37686 }, { "epoch": 1.7654471354288659, "grad_norm": 0.6093037200484518, "learning_rate": 1.7818202606181444e-07, "loss": 0.2695, "step": 37687 }, { "epoch": 1.7654939804187944, "grad_norm": 0.5981694118451762, "learning_rate": 1.7811171732952992e-07, "loss": 0.2747, "step": 37688 }, { "epoch": 1.7655408254087226, "grad_norm": 0.5341176281996224, "learning_rate": 1.7804142195912893e-07, "loss": 0.2581, "step": 37689 }, { "epoch": 1.7655876703986508, "grad_norm": 0.606671050441968, "learning_rate": 1.779711399510159e-07, "loss": 0.2742, "step": 37690 }, { "epoch": 1.7656345153885793, "grad_norm": 0.5829081082688372, "learning_rate": 1.7790087130559547e-07, "loss": 0.271, "step": 37691 }, { "epoch": 1.7656813603785075, "grad_norm": 0.5600739523535434, "learning_rate": 1.7783061602327235e-07, "loss": 0.2431, "step": 37692 }, { "epoch": 1.7657282053684358, "grad_norm": 0.6099828205287127, "learning_rate": 1.777603741044498e-07, "loss": 0.2684, "step": 37693 }, { "epoch": 1.7657750503583642, "grad_norm": 0.5746685324252166, "learning_rate": 1.7769014554953305e-07, "loss": 0.2573, "step": 37694 }, { "epoch": 1.7658218953482925, "grad_norm": 0.6003711887234665, "learning_rate": 1.7761993035892543e-07, "loss": 0.2767, "step": 37695 }, { "epoch": 1.7658687403382207, "grad_norm": 0.5742231882364561, "learning_rate": 1.775497285330316e-07, "loss": 0.2538, "step": 37696 }, { "epoch": 1.7659155853281492, "grad_norm": 0.5843905638316806, "learning_rate": 1.7747954007225508e-07, "loss": 0.2664, "step": 37697 }, { "epoch": 1.7659624303180776, "grad_norm": 0.5923536613585425, "learning_rate": 1.7740936497700062e-07, "loss": 0.2593, "step": 37698 }, { "epoch": 1.7660092753080057, "grad_norm": 0.646187000718491, "learning_rate": 1.773392032476709e-07, "loss": 0.2771, "step": 37699 }, { "epoch": 1.7660561202979341, "grad_norm": 0.6000881036945639, "learning_rate": 1.772690548846706e-07, "loss": 0.2763, "step": 37700 }, { "epoch": 1.7661029652878626, "grad_norm": 0.5564836114493119, "learning_rate": 1.7719891988840333e-07, "loss": 0.2486, "step": 37701 }, { "epoch": 1.7661498102777908, "grad_norm": 0.6018385965847904, "learning_rate": 1.7712879825927203e-07, "loss": 0.2615, "step": 37702 }, { "epoch": 1.766196655267719, "grad_norm": 0.6261447038873985, "learning_rate": 1.7705868999768062e-07, "loss": 0.2867, "step": 37703 }, { "epoch": 1.7662435002576475, "grad_norm": 0.5694014027623149, "learning_rate": 1.7698859510403315e-07, "loss": 0.258, "step": 37704 }, { "epoch": 1.7662903452475758, "grad_norm": 0.6187776797475281, "learning_rate": 1.7691851357873185e-07, "loss": 0.2848, "step": 37705 }, { "epoch": 1.766337190237504, "grad_norm": 0.6233213437270958, "learning_rate": 1.7684844542218083e-07, "loss": 0.2732, "step": 37706 }, { "epoch": 1.7663840352274325, "grad_norm": 0.5795764030128654, "learning_rate": 1.7677839063478307e-07, "loss": 0.2545, "step": 37707 }, { "epoch": 1.7664308802173607, "grad_norm": 0.625706976913832, "learning_rate": 1.7670834921694218e-07, "loss": 0.2665, "step": 37708 }, { "epoch": 1.766477725207289, "grad_norm": 0.5552523726017132, "learning_rate": 1.7663832116906033e-07, "loss": 0.2478, "step": 37709 }, { "epoch": 1.7665245701972174, "grad_norm": 0.6339881737935911, "learning_rate": 1.7656830649154134e-07, "loss": 0.2793, "step": 37710 }, { "epoch": 1.7665714151871459, "grad_norm": 0.6415763022273129, "learning_rate": 1.7649830518478738e-07, "loss": 0.2883, "step": 37711 }, { "epoch": 1.766618260177074, "grad_norm": 0.609908678913314, "learning_rate": 1.7642831724920179e-07, "loss": 0.2848, "step": 37712 }, { "epoch": 1.7666651051670024, "grad_norm": 0.6028463193058707, "learning_rate": 1.7635834268518697e-07, "loss": 0.2687, "step": 37713 }, { "epoch": 1.7667119501569308, "grad_norm": 0.5778535134963422, "learning_rate": 1.7628838149314653e-07, "loss": 0.2647, "step": 37714 }, { "epoch": 1.766758795146859, "grad_norm": 0.5900947382025917, "learning_rate": 1.762184336734818e-07, "loss": 0.2759, "step": 37715 }, { "epoch": 1.7668056401367873, "grad_norm": 0.5999083117686289, "learning_rate": 1.7614849922659604e-07, "loss": 0.2835, "step": 37716 }, { "epoch": 1.7668524851267158, "grad_norm": 0.5933511302916119, "learning_rate": 1.7607857815289204e-07, "loss": 0.2819, "step": 37717 }, { "epoch": 1.766899330116644, "grad_norm": 0.58032694824666, "learning_rate": 1.7600867045277137e-07, "loss": 0.2669, "step": 37718 }, { "epoch": 1.7669461751065723, "grad_norm": 0.6282259963859196, "learning_rate": 1.7593877612663651e-07, "loss": 0.2826, "step": 37719 }, { "epoch": 1.7669930200965007, "grad_norm": 0.6492868117701609, "learning_rate": 1.758688951748902e-07, "loss": 0.2633, "step": 37720 }, { "epoch": 1.767039865086429, "grad_norm": 0.5603206746644358, "learning_rate": 1.757990275979343e-07, "loss": 0.2467, "step": 37721 }, { "epoch": 1.7670867100763572, "grad_norm": 0.5847231167535918, "learning_rate": 1.757291733961705e-07, "loss": 0.2671, "step": 37722 }, { "epoch": 1.7671335550662857, "grad_norm": 0.6102775531940848, "learning_rate": 1.7565933257000152e-07, "loss": 0.2702, "step": 37723 }, { "epoch": 1.7671804000562141, "grad_norm": 0.5833030363465209, "learning_rate": 1.7558950511982864e-07, "loss": 0.2603, "step": 37724 }, { "epoch": 1.7672272450461424, "grad_norm": 0.6251658590438837, "learning_rate": 1.755196910460538e-07, "loss": 0.2744, "step": 37725 }, { "epoch": 1.7672740900360706, "grad_norm": 0.5737040824032036, "learning_rate": 1.7544989034907895e-07, "loss": 0.2677, "step": 37726 }, { "epoch": 1.767320935025999, "grad_norm": 0.536260434514003, "learning_rate": 1.7538010302930565e-07, "loss": 0.2576, "step": 37727 }, { "epoch": 1.7673677800159273, "grad_norm": 0.6081752368837481, "learning_rate": 1.7531032908713608e-07, "loss": 0.2727, "step": 37728 }, { "epoch": 1.7674146250058556, "grad_norm": 0.6128086544744299, "learning_rate": 1.7524056852297135e-07, "loss": 0.2772, "step": 37729 }, { "epoch": 1.767461469995784, "grad_norm": 0.5763856997306424, "learning_rate": 1.751708213372122e-07, "loss": 0.2567, "step": 37730 }, { "epoch": 1.7675083149857123, "grad_norm": 0.5820205398909072, "learning_rate": 1.7510108753026083e-07, "loss": 0.2654, "step": 37731 }, { "epoch": 1.7675551599756405, "grad_norm": 0.6038699747107464, "learning_rate": 1.7503136710251834e-07, "loss": 0.2472, "step": 37732 }, { "epoch": 1.767602004965569, "grad_norm": 0.5789983868801306, "learning_rate": 1.7496166005438575e-07, "loss": 0.2606, "step": 37733 }, { "epoch": 1.7676488499554974, "grad_norm": 0.6030588799226808, "learning_rate": 1.7489196638626472e-07, "loss": 0.2626, "step": 37734 }, { "epoch": 1.7676956949454254, "grad_norm": 0.6064966773803651, "learning_rate": 1.748222860985563e-07, "loss": 0.28, "step": 37735 }, { "epoch": 1.767742539935354, "grad_norm": 0.6095438808532931, "learning_rate": 1.7475261919166127e-07, "loss": 0.2611, "step": 37736 }, { "epoch": 1.7677893849252824, "grad_norm": 0.5803853756828686, "learning_rate": 1.7468296566598014e-07, "loss": 0.2705, "step": 37737 }, { "epoch": 1.7678362299152106, "grad_norm": 0.6262843951448113, "learning_rate": 1.74613325521914e-07, "loss": 0.2723, "step": 37738 }, { "epoch": 1.7678830749051389, "grad_norm": 0.6155318760283148, "learning_rate": 1.7454369875986364e-07, "loss": 0.2819, "step": 37739 }, { "epoch": 1.7679299198950673, "grad_norm": 0.5864324154950538, "learning_rate": 1.7447408538022986e-07, "loss": 0.2733, "step": 37740 }, { "epoch": 1.7679767648849956, "grad_norm": 0.6501662380719311, "learning_rate": 1.744044853834137e-07, "loss": 0.2752, "step": 37741 }, { "epoch": 1.7680236098749238, "grad_norm": 0.5792896226418501, "learning_rate": 1.7433489876981486e-07, "loss": 0.2627, "step": 37742 }, { "epoch": 1.7680704548648523, "grad_norm": 0.6031933241599738, "learning_rate": 1.742653255398344e-07, "loss": 0.2801, "step": 37743 }, { "epoch": 1.7681172998547805, "grad_norm": 0.5969253279675166, "learning_rate": 1.7419576569387197e-07, "loss": 0.2667, "step": 37744 }, { "epoch": 1.7681641448447087, "grad_norm": 0.6066551249234265, "learning_rate": 1.741262192323284e-07, "loss": 0.2706, "step": 37745 }, { "epoch": 1.7682109898346372, "grad_norm": 0.5851072813714787, "learning_rate": 1.7405668615560396e-07, "loss": 0.2772, "step": 37746 }, { "epoch": 1.7682578348245657, "grad_norm": 0.583617475519784, "learning_rate": 1.7398716646409907e-07, "loss": 0.2615, "step": 37747 }, { "epoch": 1.7683046798144937, "grad_norm": 0.5824546465486892, "learning_rate": 1.7391766015821294e-07, "loss": 0.2715, "step": 37748 }, { "epoch": 1.7683515248044221, "grad_norm": 0.579795584521778, "learning_rate": 1.738481672383463e-07, "loss": 0.2565, "step": 37749 }, { "epoch": 1.7683983697943506, "grad_norm": 0.6145579246645816, "learning_rate": 1.7377868770489887e-07, "loss": 0.2692, "step": 37750 }, { "epoch": 1.7684452147842789, "grad_norm": 0.5917491650353303, "learning_rate": 1.7370922155827004e-07, "loss": 0.2767, "step": 37751 }, { "epoch": 1.768492059774207, "grad_norm": 0.5911066117970856, "learning_rate": 1.7363976879886003e-07, "loss": 0.2648, "step": 37752 }, { "epoch": 1.7685389047641356, "grad_norm": 0.5896662890812511, "learning_rate": 1.7357032942706908e-07, "loss": 0.2691, "step": 37753 }, { "epoch": 1.7685857497540638, "grad_norm": 0.6041041145696817, "learning_rate": 1.7350090344329552e-07, "loss": 0.2668, "step": 37754 }, { "epoch": 1.768632594743992, "grad_norm": 0.641599801065578, "learning_rate": 1.7343149084793953e-07, "loss": 0.269, "step": 37755 }, { "epoch": 1.7686794397339205, "grad_norm": 0.6332660572818395, "learning_rate": 1.7336209164140056e-07, "loss": 0.2705, "step": 37756 }, { "epoch": 1.7687262847238487, "grad_norm": 0.6120884722990853, "learning_rate": 1.7329270582407825e-07, "loss": 0.2647, "step": 37757 }, { "epoch": 1.768773129713777, "grad_norm": 0.6008540051993166, "learning_rate": 1.7322333339637122e-07, "loss": 0.2749, "step": 37758 }, { "epoch": 1.7688199747037054, "grad_norm": 0.5539671496201494, "learning_rate": 1.7315397435867965e-07, "loss": 0.2585, "step": 37759 }, { "epoch": 1.768866819693634, "grad_norm": 0.5937526911018959, "learning_rate": 1.7308462871140158e-07, "loss": 0.2665, "step": 37760 }, { "epoch": 1.7689136646835621, "grad_norm": 0.6197255286634942, "learning_rate": 1.7301529645493698e-07, "loss": 0.2791, "step": 37761 }, { "epoch": 1.7689605096734904, "grad_norm": 0.5867176696024347, "learning_rate": 1.7294597758968413e-07, "loss": 0.273, "step": 37762 }, { "epoch": 1.7690073546634189, "grad_norm": 0.5910872526400132, "learning_rate": 1.728766721160427e-07, "loss": 0.2738, "step": 37763 }, { "epoch": 1.769054199653347, "grad_norm": 0.5567810899057897, "learning_rate": 1.72807380034411e-07, "loss": 0.2525, "step": 37764 }, { "epoch": 1.7691010446432753, "grad_norm": 0.5950873638291703, "learning_rate": 1.7273810134518815e-07, "loss": 0.2737, "step": 37765 }, { "epoch": 1.7691478896332038, "grad_norm": 0.5833071918805612, "learning_rate": 1.7266883604877217e-07, "loss": 0.2659, "step": 37766 }, { "epoch": 1.769194734623132, "grad_norm": 0.5885596344131705, "learning_rate": 1.725995841455619e-07, "loss": 0.2669, "step": 37767 }, { "epoch": 1.7692415796130603, "grad_norm": 0.5634976139626038, "learning_rate": 1.7253034563595615e-07, "loss": 0.255, "step": 37768 }, { "epoch": 1.7692884246029887, "grad_norm": 0.5808196119037012, "learning_rate": 1.7246112052035358e-07, "loss": 0.2608, "step": 37769 }, { "epoch": 1.7693352695929172, "grad_norm": 0.6004719831061734, "learning_rate": 1.723919087991524e-07, "loss": 0.2736, "step": 37770 }, { "epoch": 1.7693821145828452, "grad_norm": 0.6122960222948233, "learning_rate": 1.7232271047275067e-07, "loss": 0.2638, "step": 37771 }, { "epoch": 1.7694289595727737, "grad_norm": 0.5737373259845798, "learning_rate": 1.7225352554154638e-07, "loss": 0.2576, "step": 37772 }, { "epoch": 1.7694758045627021, "grad_norm": 0.5685866442323522, "learning_rate": 1.7218435400593814e-07, "loss": 0.2707, "step": 37773 }, { "epoch": 1.7695226495526304, "grad_norm": 0.5995534423964354, "learning_rate": 1.721151958663239e-07, "loss": 0.2729, "step": 37774 }, { "epoch": 1.7695694945425586, "grad_norm": 0.5815218557553103, "learning_rate": 1.7204605112310147e-07, "loss": 0.2522, "step": 37775 }, { "epoch": 1.769616339532487, "grad_norm": 0.6095542812272884, "learning_rate": 1.719769197766688e-07, "loss": 0.2575, "step": 37776 }, { "epoch": 1.7696631845224153, "grad_norm": 0.6264939703206748, "learning_rate": 1.719078018274245e-07, "loss": 0.266, "step": 37777 }, { "epoch": 1.7697100295123436, "grad_norm": 0.630728592954959, "learning_rate": 1.7183869727576547e-07, "loss": 0.2677, "step": 37778 }, { "epoch": 1.769756874502272, "grad_norm": 0.5612289903942851, "learning_rate": 1.7176960612208914e-07, "loss": 0.2659, "step": 37779 }, { "epoch": 1.7698037194922003, "grad_norm": 0.5736711105552579, "learning_rate": 1.7170052836679358e-07, "loss": 0.2617, "step": 37780 }, { "epoch": 1.7698505644821285, "grad_norm": 0.6058266084716344, "learning_rate": 1.7163146401027647e-07, "loss": 0.2733, "step": 37781 }, { "epoch": 1.769897409472057, "grad_norm": 0.6332926170569825, "learning_rate": 1.7156241305293474e-07, "loss": 0.2865, "step": 37782 }, { "epoch": 1.7699442544619854, "grad_norm": 0.5830973728840433, "learning_rate": 1.7149337549516643e-07, "loss": 0.2569, "step": 37783 }, { "epoch": 1.7699910994519135, "grad_norm": 0.5802883987084421, "learning_rate": 1.7142435133736869e-07, "loss": 0.2779, "step": 37784 }, { "epoch": 1.770037944441842, "grad_norm": 0.5709080330313806, "learning_rate": 1.713553405799387e-07, "loss": 0.2734, "step": 37785 }, { "epoch": 1.7700847894317704, "grad_norm": 0.6064121529494115, "learning_rate": 1.7128634322327282e-07, "loss": 0.2695, "step": 37786 }, { "epoch": 1.7701316344216986, "grad_norm": 0.6117490404059681, "learning_rate": 1.712173592677688e-07, "loss": 0.2661, "step": 37787 }, { "epoch": 1.7701784794116269, "grad_norm": 0.5706667884285958, "learning_rate": 1.7114838871382378e-07, "loss": 0.2664, "step": 37788 }, { "epoch": 1.7702253244015553, "grad_norm": 0.5796244778078173, "learning_rate": 1.7107943156183414e-07, "loss": 0.2675, "step": 37789 }, { "epoch": 1.7702721693914836, "grad_norm": 0.5947728805249781, "learning_rate": 1.7101048781219765e-07, "loss": 0.2632, "step": 37790 }, { "epoch": 1.7703190143814118, "grad_norm": 0.614881356287348, "learning_rate": 1.7094155746531004e-07, "loss": 0.2739, "step": 37791 }, { "epoch": 1.7703658593713403, "grad_norm": 0.6229306597747484, "learning_rate": 1.7087264052156855e-07, "loss": 0.2744, "step": 37792 }, { "epoch": 1.7704127043612685, "grad_norm": 0.5815433687001282, "learning_rate": 1.7080373698136948e-07, "loss": 0.2615, "step": 37793 }, { "epoch": 1.7704595493511968, "grad_norm": 0.5960532524245942, "learning_rate": 1.7073484684510923e-07, "loss": 0.2504, "step": 37794 }, { "epoch": 1.7705063943411252, "grad_norm": 0.5726822863346669, "learning_rate": 1.7066597011318464e-07, "loss": 0.2674, "step": 37795 }, { "epoch": 1.7705532393310537, "grad_norm": 0.5789784376735368, "learning_rate": 1.705971067859924e-07, "loss": 0.2596, "step": 37796 }, { "epoch": 1.770600084320982, "grad_norm": 0.5891784352511342, "learning_rate": 1.705282568639277e-07, "loss": 0.2676, "step": 37797 }, { "epoch": 1.7706469293109102, "grad_norm": 0.6255590568295598, "learning_rate": 1.704594203473875e-07, "loss": 0.2892, "step": 37798 }, { "epoch": 1.7706937743008386, "grad_norm": 0.6433950809602725, "learning_rate": 1.7039059723676837e-07, "loss": 0.2831, "step": 37799 }, { "epoch": 1.7707406192907669, "grad_norm": 0.6063281868869338, "learning_rate": 1.7032178753246532e-07, "loss": 0.2771, "step": 37800 }, { "epoch": 1.7707874642806951, "grad_norm": 0.5459564273881048, "learning_rate": 1.7025299123487493e-07, "loss": 0.2617, "step": 37801 }, { "epoch": 1.7708343092706236, "grad_norm": 0.5783980946873398, "learning_rate": 1.701842083443933e-07, "loss": 0.2746, "step": 37802 }, { "epoch": 1.7708811542605518, "grad_norm": 0.5622730538807841, "learning_rate": 1.7011543886141568e-07, "loss": 0.2539, "step": 37803 }, { "epoch": 1.77092799925048, "grad_norm": 0.6366896175811178, "learning_rate": 1.7004668278633785e-07, "loss": 0.2853, "step": 37804 }, { "epoch": 1.7709748442404085, "grad_norm": 0.5698767434518006, "learning_rate": 1.6997794011955588e-07, "loss": 0.2611, "step": 37805 }, { "epoch": 1.771021689230337, "grad_norm": 0.5775308872890581, "learning_rate": 1.6990921086146584e-07, "loss": 0.2548, "step": 37806 }, { "epoch": 1.771068534220265, "grad_norm": 0.587382007814429, "learning_rate": 1.6984049501246214e-07, "loss": 0.2692, "step": 37807 }, { "epoch": 1.7711153792101935, "grad_norm": 0.5755757242867603, "learning_rate": 1.6977179257294114e-07, "loss": 0.2734, "step": 37808 }, { "epoch": 1.771162224200122, "grad_norm": 0.5975778172891778, "learning_rate": 1.6970310354329723e-07, "loss": 0.2734, "step": 37809 }, { "epoch": 1.7712090691900502, "grad_norm": 0.6792624111382158, "learning_rate": 1.6963442792392648e-07, "loss": 0.2952, "step": 37810 }, { "epoch": 1.7712559141799784, "grad_norm": 0.5876861528235061, "learning_rate": 1.695657657152236e-07, "loss": 0.2569, "step": 37811 }, { "epoch": 1.7713027591699069, "grad_norm": 0.6000349709420935, "learning_rate": 1.6949711691758465e-07, "loss": 0.2763, "step": 37812 }, { "epoch": 1.7713496041598351, "grad_norm": 0.6491736053670245, "learning_rate": 1.6942848153140346e-07, "loss": 0.2751, "step": 37813 }, { "epoch": 1.7713964491497634, "grad_norm": 0.5946223186326013, "learning_rate": 1.6935985955707612e-07, "loss": 0.2692, "step": 37814 }, { "epoch": 1.7714432941396918, "grad_norm": 0.6062445340401086, "learning_rate": 1.6929125099499678e-07, "loss": 0.2768, "step": 37815 }, { "epoch": 1.77149013912962, "grad_norm": 0.6359143018331438, "learning_rate": 1.692226558455601e-07, "loss": 0.2747, "step": 37816 }, { "epoch": 1.7715369841195483, "grad_norm": 0.5893929315110338, "learning_rate": 1.6915407410916157e-07, "loss": 0.2686, "step": 37817 }, { "epoch": 1.7715838291094768, "grad_norm": 0.5534353616740595, "learning_rate": 1.6908550578619538e-07, "loss": 0.2559, "step": 37818 }, { "epoch": 1.7716306740994052, "grad_norm": 0.5894920540054985, "learning_rate": 1.6901695087705644e-07, "loss": 0.2683, "step": 37819 }, { "epoch": 1.7716775190893332, "grad_norm": 0.5704570926590001, "learning_rate": 1.689484093821392e-07, "loss": 0.2458, "step": 37820 }, { "epoch": 1.7717243640792617, "grad_norm": 0.6025059355679444, "learning_rate": 1.6887988130183773e-07, "loss": 0.2737, "step": 37821 }, { "epoch": 1.7717712090691902, "grad_norm": 0.5991727734285042, "learning_rate": 1.6881136663654652e-07, "loss": 0.2657, "step": 37822 }, { "epoch": 1.7718180540591184, "grad_norm": 0.6131862000414404, "learning_rate": 1.687428653866599e-07, "loss": 0.2874, "step": 37823 }, { "epoch": 1.7718648990490466, "grad_norm": 0.6101730210160597, "learning_rate": 1.6867437755257233e-07, "loss": 0.2795, "step": 37824 }, { "epoch": 1.7719117440389751, "grad_norm": 0.6143893971211316, "learning_rate": 1.6860590313467762e-07, "loss": 0.2723, "step": 37825 }, { "epoch": 1.7719585890289034, "grad_norm": 0.5925902636943456, "learning_rate": 1.6853744213337048e-07, "loss": 0.2753, "step": 37826 }, { "epoch": 1.7720054340188316, "grad_norm": 0.5763993879989862, "learning_rate": 1.6846899454904448e-07, "loss": 0.2599, "step": 37827 }, { "epoch": 1.77205227900876, "grad_norm": 0.5613430349472698, "learning_rate": 1.6840056038209264e-07, "loss": 0.2427, "step": 37828 }, { "epoch": 1.7720991239986883, "grad_norm": 0.5817905268698457, "learning_rate": 1.683321396329099e-07, "loss": 0.2587, "step": 37829 }, { "epoch": 1.7721459689886165, "grad_norm": 0.6035825149807565, "learning_rate": 1.682637323018893e-07, "loss": 0.257, "step": 37830 }, { "epoch": 1.772192813978545, "grad_norm": 0.5644750666730017, "learning_rate": 1.6819533838942526e-07, "loss": 0.2749, "step": 37831 }, { "epoch": 1.7722396589684735, "grad_norm": 0.6111621882741107, "learning_rate": 1.6812695789591105e-07, "loss": 0.2699, "step": 37832 }, { "epoch": 1.7722865039584017, "grad_norm": 0.5827424047808537, "learning_rate": 1.6805859082173997e-07, "loss": 0.2773, "step": 37833 }, { "epoch": 1.77233334894833, "grad_norm": 0.6572558792987646, "learning_rate": 1.6799023716730563e-07, "loss": 0.2612, "step": 37834 }, { "epoch": 1.7723801939382584, "grad_norm": 0.591870100226032, "learning_rate": 1.6792189693300127e-07, "loss": 0.2753, "step": 37835 }, { "epoch": 1.7724270389281866, "grad_norm": 0.5722738410491166, "learning_rate": 1.6785357011922022e-07, "loss": 0.2688, "step": 37836 }, { "epoch": 1.772473883918115, "grad_norm": 0.5890908160913112, "learning_rate": 1.677852567263555e-07, "loss": 0.2758, "step": 37837 }, { "epoch": 1.7725207289080434, "grad_norm": 0.6380430920001733, "learning_rate": 1.6771695675480092e-07, "loss": 0.2883, "step": 37838 }, { "epoch": 1.7725675738979716, "grad_norm": 0.6170694537040943, "learning_rate": 1.6764867020494874e-07, "loss": 0.2777, "step": 37839 }, { "epoch": 1.7726144188878998, "grad_norm": 0.5774276224747064, "learning_rate": 1.675803970771922e-07, "loss": 0.2589, "step": 37840 }, { "epoch": 1.7726612638778283, "grad_norm": 0.6042119995136176, "learning_rate": 1.6751213737192462e-07, "loss": 0.2627, "step": 37841 }, { "epoch": 1.7727081088677568, "grad_norm": 0.6314317213684905, "learning_rate": 1.674438910895382e-07, "loss": 0.2678, "step": 37842 }, { "epoch": 1.7727549538576848, "grad_norm": 0.609979394478226, "learning_rate": 1.673756582304259e-07, "loss": 0.2885, "step": 37843 }, { "epoch": 1.7728017988476132, "grad_norm": 0.622079404518883, "learning_rate": 1.6730743879498053e-07, "loss": 0.2723, "step": 37844 }, { "epoch": 1.7728486438375417, "grad_norm": 0.6244793409019498, "learning_rate": 1.672392327835945e-07, "loss": 0.2515, "step": 37845 }, { "epoch": 1.77289548882747, "grad_norm": 0.5943569542481268, "learning_rate": 1.6717104019666003e-07, "loss": 0.2578, "step": 37846 }, { "epoch": 1.7729423338173982, "grad_norm": 0.5938048705233281, "learning_rate": 1.6710286103457012e-07, "loss": 0.2697, "step": 37847 }, { "epoch": 1.7729891788073266, "grad_norm": 0.639225685276674, "learning_rate": 1.6703469529771728e-07, "loss": 0.2898, "step": 37848 }, { "epoch": 1.773036023797255, "grad_norm": 0.6092429888098009, "learning_rate": 1.669665429864928e-07, "loss": 0.279, "step": 37849 }, { "epoch": 1.7730828687871831, "grad_norm": 0.5783410203686101, "learning_rate": 1.668984041012897e-07, "loss": 0.2582, "step": 37850 }, { "epoch": 1.7731297137771116, "grad_norm": 0.6196496732060505, "learning_rate": 1.6683027864250024e-07, "loss": 0.2826, "step": 37851 }, { "epoch": 1.7731765587670398, "grad_norm": 0.5954748387418556, "learning_rate": 1.667621666105157e-07, "loss": 0.2741, "step": 37852 }, { "epoch": 1.773223403756968, "grad_norm": 0.5920947602526128, "learning_rate": 1.666940680057283e-07, "loss": 0.2748, "step": 37853 }, { "epoch": 1.7732702487468965, "grad_norm": 0.6267252840922974, "learning_rate": 1.6662598282853077e-07, "loss": 0.2827, "step": 37854 }, { "epoch": 1.773317093736825, "grad_norm": 0.5728869793231456, "learning_rate": 1.6655791107931362e-07, "loss": 0.2683, "step": 37855 }, { "epoch": 1.773363938726753, "grad_norm": 0.650927761264853, "learning_rate": 1.6648985275846935e-07, "loss": 0.3034, "step": 37856 }, { "epoch": 1.7734107837166815, "grad_norm": 0.5765529655857268, "learning_rate": 1.6642180786638984e-07, "loss": 0.2624, "step": 37857 }, { "epoch": 1.77345762870661, "grad_norm": 0.5876187476799513, "learning_rate": 1.663537764034656e-07, "loss": 0.2637, "step": 37858 }, { "epoch": 1.7735044736965382, "grad_norm": 0.6109441275285842, "learning_rate": 1.6628575837008914e-07, "loss": 0.2955, "step": 37859 }, { "epoch": 1.7735513186864664, "grad_norm": 0.6422941105266311, "learning_rate": 1.662177537666515e-07, "loss": 0.2797, "step": 37860 }, { "epoch": 1.773598163676395, "grad_norm": 0.6071853908641984, "learning_rate": 1.661497625935446e-07, "loss": 0.2618, "step": 37861 }, { "epoch": 1.7736450086663231, "grad_norm": 0.534851832632137, "learning_rate": 1.6608178485115866e-07, "loss": 0.2407, "step": 37862 }, { "epoch": 1.7736918536562514, "grad_norm": 0.6000393779217544, "learning_rate": 1.6601382053988618e-07, "loss": 0.2771, "step": 37863 }, { "epoch": 1.7737386986461798, "grad_norm": 0.6161459139274725, "learning_rate": 1.6594586966011683e-07, "loss": 0.2754, "step": 37864 }, { "epoch": 1.773785543636108, "grad_norm": 0.6125643799502517, "learning_rate": 1.6587793221224252e-07, "loss": 0.2807, "step": 37865 }, { "epoch": 1.7738323886260363, "grad_norm": 0.5800864977194662, "learning_rate": 1.6581000819665376e-07, "loss": 0.2626, "step": 37866 }, { "epoch": 1.7738792336159648, "grad_norm": 0.5812867512005229, "learning_rate": 1.6574209761374193e-07, "loss": 0.2748, "step": 37867 }, { "epoch": 1.7739260786058932, "grad_norm": 0.5935013039712974, "learning_rate": 1.656742004638981e-07, "loss": 0.2749, "step": 37868 }, { "epoch": 1.7739729235958213, "grad_norm": 0.6073077234832308, "learning_rate": 1.656063167475125e-07, "loss": 0.2713, "step": 37869 }, { "epoch": 1.7740197685857497, "grad_norm": 0.608291785138091, "learning_rate": 1.6553844646497536e-07, "loss": 0.2691, "step": 37870 }, { "epoch": 1.7740666135756782, "grad_norm": 0.5625842099536085, "learning_rate": 1.654705896166778e-07, "loss": 0.257, "step": 37871 }, { "epoch": 1.7741134585656064, "grad_norm": 0.5933952360458067, "learning_rate": 1.6540274620301e-07, "loss": 0.275, "step": 37872 }, { "epoch": 1.7741603035555347, "grad_norm": 0.585869743333646, "learning_rate": 1.653349162243628e-07, "loss": 0.2765, "step": 37873 }, { "epoch": 1.7742071485454631, "grad_norm": 0.6572425465768822, "learning_rate": 1.6526709968112647e-07, "loss": 0.2956, "step": 37874 }, { "epoch": 1.7742539935353914, "grad_norm": 0.6423370577987821, "learning_rate": 1.6519929657369148e-07, "loss": 0.2846, "step": 37875 }, { "epoch": 1.7743008385253196, "grad_norm": 0.5501928547944742, "learning_rate": 1.6513150690244757e-07, "loss": 0.2509, "step": 37876 }, { "epoch": 1.774347683515248, "grad_norm": 0.5443535458949583, "learning_rate": 1.6506373066778492e-07, "loss": 0.2442, "step": 37877 }, { "epoch": 1.7743945285051763, "grad_norm": 0.5990382245209955, "learning_rate": 1.6499596787009325e-07, "loss": 0.2836, "step": 37878 }, { "epoch": 1.7744413734951046, "grad_norm": 0.602599992754074, "learning_rate": 1.649282185097631e-07, "loss": 0.2747, "step": 37879 }, { "epoch": 1.774488218485033, "grad_norm": 0.5873620243388812, "learning_rate": 1.6486048258718413e-07, "loss": 0.2695, "step": 37880 }, { "epoch": 1.7745350634749615, "grad_norm": 0.5812535000698495, "learning_rate": 1.647927601027466e-07, "loss": 0.2538, "step": 37881 }, { "epoch": 1.7745819084648897, "grad_norm": 0.6150987192995312, "learning_rate": 1.6472505105683934e-07, "loss": 0.2726, "step": 37882 }, { "epoch": 1.774628753454818, "grad_norm": 0.635653386996737, "learning_rate": 1.646573554498529e-07, "loss": 0.2774, "step": 37883 }, { "epoch": 1.7746755984447464, "grad_norm": 0.6518480714008458, "learning_rate": 1.6458967328217613e-07, "loss": 0.286, "step": 37884 }, { "epoch": 1.7747224434346747, "grad_norm": 0.6019921736729879, "learning_rate": 1.6452200455419842e-07, "loss": 0.2736, "step": 37885 }, { "epoch": 1.774769288424603, "grad_norm": 0.6189652665771533, "learning_rate": 1.6445434926631004e-07, "loss": 0.2979, "step": 37886 }, { "epoch": 1.7748161334145314, "grad_norm": 0.627698511814142, "learning_rate": 1.6438670741889985e-07, "loss": 0.2656, "step": 37887 }, { "epoch": 1.7748629784044596, "grad_norm": 0.5915377585528719, "learning_rate": 1.6431907901235694e-07, "loss": 0.2719, "step": 37888 }, { "epoch": 1.7749098233943879, "grad_norm": 0.5251736520083281, "learning_rate": 1.6425146404707075e-07, "loss": 0.2525, "step": 37889 }, { "epoch": 1.7749566683843163, "grad_norm": 0.5893602241175879, "learning_rate": 1.6418386252343072e-07, "loss": 0.2542, "step": 37890 }, { "epoch": 1.7750035133742448, "grad_norm": 0.597019428303946, "learning_rate": 1.6411627444182483e-07, "loss": 0.2758, "step": 37891 }, { "epoch": 1.7750503583641728, "grad_norm": 0.6726573647859903, "learning_rate": 1.6404869980264303e-07, "loss": 0.2855, "step": 37892 }, { "epoch": 1.7750972033541013, "grad_norm": 0.5966350275766984, "learning_rate": 1.6398113860627396e-07, "loss": 0.2788, "step": 37893 }, { "epoch": 1.7751440483440297, "grad_norm": 0.5280921081344113, "learning_rate": 1.6391359085310587e-07, "loss": 0.2486, "step": 37894 }, { "epoch": 1.775190893333958, "grad_norm": 0.616504165038833, "learning_rate": 1.638460565435282e-07, "loss": 0.2706, "step": 37895 }, { "epoch": 1.7752377383238862, "grad_norm": 0.5887856819020405, "learning_rate": 1.6377853567792923e-07, "loss": 0.2671, "step": 37896 }, { "epoch": 1.7752845833138147, "grad_norm": 0.6045085496403326, "learning_rate": 1.6371102825669783e-07, "loss": 0.2714, "step": 37897 }, { "epoch": 1.775331428303743, "grad_norm": 0.634071193344318, "learning_rate": 1.6364353428022201e-07, "loss": 0.2819, "step": 37898 }, { "epoch": 1.7753782732936711, "grad_norm": 0.5672275467905638, "learning_rate": 1.6357605374889062e-07, "loss": 0.2581, "step": 37899 }, { "epoch": 1.7754251182835996, "grad_norm": 0.6248442615085186, "learning_rate": 1.6350858666309145e-07, "loss": 0.2727, "step": 37900 }, { "epoch": 1.7754719632735279, "grad_norm": 0.6099207714116869, "learning_rate": 1.634411330232133e-07, "loss": 0.2766, "step": 37901 }, { "epoch": 1.775518808263456, "grad_norm": 0.6304947641872413, "learning_rate": 1.6337369282964393e-07, "loss": 0.2775, "step": 37902 }, { "epoch": 1.7755656532533846, "grad_norm": 0.6287716820951599, "learning_rate": 1.6330626608277222e-07, "loss": 0.2768, "step": 37903 }, { "epoch": 1.775612498243313, "grad_norm": 0.6569055012443367, "learning_rate": 1.6323885278298533e-07, "loss": 0.2996, "step": 37904 }, { "epoch": 1.775659343233241, "grad_norm": 0.5903150913095817, "learning_rate": 1.631714529306719e-07, "loss": 0.2684, "step": 37905 }, { "epoch": 1.7757061882231695, "grad_norm": 0.6324902902062566, "learning_rate": 1.6310406652621875e-07, "loss": 0.2741, "step": 37906 }, { "epoch": 1.775753033213098, "grad_norm": 0.6135096275590167, "learning_rate": 1.630366935700145e-07, "loss": 0.2636, "step": 37907 }, { "epoch": 1.7757998782030262, "grad_norm": 0.6034717915496528, "learning_rate": 1.6296933406244692e-07, "loss": 0.2778, "step": 37908 }, { "epoch": 1.7758467231929544, "grad_norm": 0.5935567308325921, "learning_rate": 1.6290198800390318e-07, "loss": 0.2753, "step": 37909 }, { "epoch": 1.775893568182883, "grad_norm": 0.5845121810845227, "learning_rate": 1.6283465539477154e-07, "loss": 0.2734, "step": 37910 }, { "epoch": 1.7759404131728111, "grad_norm": 0.6073708042311975, "learning_rate": 1.62767336235439e-07, "loss": 0.2524, "step": 37911 }, { "epoch": 1.7759872581627394, "grad_norm": 0.5834150311086045, "learning_rate": 1.627000305262927e-07, "loss": 0.2687, "step": 37912 }, { "epoch": 1.7760341031526679, "grad_norm": 0.6389840274160755, "learning_rate": 1.626327382677201e-07, "loss": 0.2777, "step": 37913 }, { "epoch": 1.776080948142596, "grad_norm": 0.5822605319158737, "learning_rate": 1.6256545946010867e-07, "loss": 0.2653, "step": 37914 }, { "epoch": 1.7761277931325243, "grad_norm": 0.55204292675602, "learning_rate": 1.624981941038456e-07, "loss": 0.2545, "step": 37915 }, { "epoch": 1.7761746381224528, "grad_norm": 0.5987847258188335, "learning_rate": 1.6243094219931782e-07, "loss": 0.2683, "step": 37916 }, { "epoch": 1.7762214831123813, "grad_norm": 0.6038685133591254, "learning_rate": 1.6236370374691306e-07, "loss": 0.2563, "step": 37917 }, { "epoch": 1.7762683281023095, "grad_norm": 0.6053759513766405, "learning_rate": 1.6229647874701744e-07, "loss": 0.2755, "step": 37918 }, { "epoch": 1.7763151730922377, "grad_norm": 0.6113962191251889, "learning_rate": 1.622292672000178e-07, "loss": 0.2612, "step": 37919 }, { "epoch": 1.7763620180821662, "grad_norm": 0.6079891670393678, "learning_rate": 1.6216206910630084e-07, "loss": 0.2667, "step": 37920 }, { "epoch": 1.7764088630720944, "grad_norm": 0.6075340494358235, "learning_rate": 1.620948844662537e-07, "loss": 0.2607, "step": 37921 }, { "epoch": 1.7764557080620227, "grad_norm": 0.6222705200922883, "learning_rate": 1.6202771328026308e-07, "loss": 0.3013, "step": 37922 }, { "epoch": 1.7765025530519511, "grad_norm": 0.6134997445835139, "learning_rate": 1.6196055554871525e-07, "loss": 0.2564, "step": 37923 }, { "epoch": 1.7765493980418794, "grad_norm": 0.6157114216630549, "learning_rate": 1.618934112719972e-07, "loss": 0.2755, "step": 37924 }, { "epoch": 1.7765962430318076, "grad_norm": 0.6245997522396304, "learning_rate": 1.618262804504947e-07, "loss": 0.2724, "step": 37925 }, { "epoch": 1.776643088021736, "grad_norm": 0.5747900459609078, "learning_rate": 1.617591630845941e-07, "loss": 0.2545, "step": 37926 }, { "epoch": 1.7766899330116646, "grad_norm": 0.561773816570675, "learning_rate": 1.6169205917468177e-07, "loss": 0.2747, "step": 37927 }, { "epoch": 1.7767367780015926, "grad_norm": 0.5946029349602042, "learning_rate": 1.616249687211438e-07, "loss": 0.2654, "step": 37928 }, { "epoch": 1.776783622991521, "grad_norm": 0.5916173615356457, "learning_rate": 1.6155789172436653e-07, "loss": 0.2609, "step": 37929 }, { "epoch": 1.7768304679814495, "grad_norm": 0.6042823540600197, "learning_rate": 1.6149082818473634e-07, "loss": 0.2731, "step": 37930 }, { "epoch": 1.7768773129713777, "grad_norm": 0.5963889450218373, "learning_rate": 1.6142377810263815e-07, "loss": 0.2614, "step": 37931 }, { "epoch": 1.776924157961306, "grad_norm": 0.6542795040091743, "learning_rate": 1.6135674147845892e-07, "loss": 0.2929, "step": 37932 }, { "epoch": 1.7769710029512344, "grad_norm": 0.6052528555378328, "learning_rate": 1.6128971831258305e-07, "loss": 0.2503, "step": 37933 }, { "epoch": 1.7770178479411627, "grad_norm": 0.6211704716871033, "learning_rate": 1.6122270860539746e-07, "loss": 0.277, "step": 37934 }, { "epoch": 1.777064692931091, "grad_norm": 0.6075229635634197, "learning_rate": 1.611557123572871e-07, "loss": 0.2841, "step": 37935 }, { "epoch": 1.7771115379210194, "grad_norm": 0.5932371464248594, "learning_rate": 1.6108872956863835e-07, "loss": 0.2577, "step": 37936 }, { "epoch": 1.7771583829109476, "grad_norm": 0.6326282607353688, "learning_rate": 1.6102176023983534e-07, "loss": 0.2667, "step": 37937 }, { "epoch": 1.7772052279008759, "grad_norm": 0.6281391871935897, "learning_rate": 1.609548043712647e-07, "loss": 0.278, "step": 37938 }, { "epoch": 1.7772520728908043, "grad_norm": 0.5581771336327183, "learning_rate": 1.608878619633114e-07, "loss": 0.2701, "step": 37939 }, { "epoch": 1.7772989178807328, "grad_norm": 0.6090458314977749, "learning_rate": 1.6082093301636015e-07, "loss": 0.2669, "step": 37940 }, { "epoch": 1.7773457628706608, "grad_norm": 0.56811731106069, "learning_rate": 1.6075401753079645e-07, "loss": 0.269, "step": 37941 }, { "epoch": 1.7773926078605893, "grad_norm": 0.5986014887475023, "learning_rate": 1.6068711550700582e-07, "loss": 0.3006, "step": 37942 }, { "epoch": 1.7774394528505177, "grad_norm": 0.5838917985643465, "learning_rate": 1.6062022694537272e-07, "loss": 0.2812, "step": 37943 }, { "epoch": 1.777486297840446, "grad_norm": 0.6044126222265839, "learning_rate": 1.6055335184628208e-07, "loss": 0.2715, "step": 37944 }, { "epoch": 1.7775331428303742, "grad_norm": 0.5985787216431941, "learning_rate": 1.6048649021011886e-07, "loss": 0.265, "step": 37945 }, { "epoch": 1.7775799878203027, "grad_norm": 0.6013064627893623, "learning_rate": 1.6041964203726834e-07, "loss": 0.2744, "step": 37946 }, { "epoch": 1.777626832810231, "grad_norm": 0.5878801899765634, "learning_rate": 1.603528073281141e-07, "loss": 0.2683, "step": 37947 }, { "epoch": 1.7776736778001592, "grad_norm": 0.6218757347531121, "learning_rate": 1.602859860830422e-07, "loss": 0.2785, "step": 37948 }, { "epoch": 1.7777205227900876, "grad_norm": 0.6251858877571866, "learning_rate": 1.6021917830243565e-07, "loss": 0.2766, "step": 37949 }, { "epoch": 1.7777673677800159, "grad_norm": 0.5627418243978226, "learning_rate": 1.6015238398667975e-07, "loss": 0.2487, "step": 37950 }, { "epoch": 1.7778142127699441, "grad_norm": 0.6550097697707132, "learning_rate": 1.600856031361589e-07, "loss": 0.2825, "step": 37951 }, { "epoch": 1.7778610577598726, "grad_norm": 0.6050335387501906, "learning_rate": 1.6001883575125775e-07, "loss": 0.2864, "step": 37952 }, { "epoch": 1.777907902749801, "grad_norm": 0.6173878402034086, "learning_rate": 1.5995208183235938e-07, "loss": 0.275, "step": 37953 }, { "epoch": 1.7779547477397293, "grad_norm": 0.5751024448128396, "learning_rate": 1.598853413798493e-07, "loss": 0.2518, "step": 37954 }, { "epoch": 1.7780015927296575, "grad_norm": 0.5663729407895611, "learning_rate": 1.598186143941105e-07, "loss": 0.2556, "step": 37955 }, { "epoch": 1.778048437719586, "grad_norm": 0.6025490786814031, "learning_rate": 1.5975190087552716e-07, "loss": 0.2732, "step": 37956 }, { "epoch": 1.7780952827095142, "grad_norm": 0.555664491803194, "learning_rate": 1.596852008244837e-07, "loss": 0.2562, "step": 37957 }, { "epoch": 1.7781421276994425, "grad_norm": 0.6448857040430768, "learning_rate": 1.5961851424136337e-07, "loss": 0.2678, "step": 37958 }, { "epoch": 1.778188972689371, "grad_norm": 0.5944984316769057, "learning_rate": 1.595518411265509e-07, "loss": 0.2636, "step": 37959 }, { "epoch": 1.7782358176792992, "grad_norm": 0.5882706355887463, "learning_rate": 1.5948518148042936e-07, "loss": 0.2912, "step": 37960 }, { "epoch": 1.7782826626692274, "grad_norm": 0.5836227463591972, "learning_rate": 1.594185353033817e-07, "loss": 0.2814, "step": 37961 }, { "epoch": 1.7783295076591559, "grad_norm": 0.6092964072325927, "learning_rate": 1.5935190259579236e-07, "loss": 0.2672, "step": 37962 }, { "epoch": 1.7783763526490843, "grad_norm": 0.6062494297530214, "learning_rate": 1.5928528335804438e-07, "loss": 0.2847, "step": 37963 }, { "epoch": 1.7784231976390124, "grad_norm": 0.5850115024444774, "learning_rate": 1.5921867759052134e-07, "loss": 0.2592, "step": 37964 }, { "epoch": 1.7784700426289408, "grad_norm": 0.6170973029516706, "learning_rate": 1.5915208529360654e-07, "loss": 0.276, "step": 37965 }, { "epoch": 1.7785168876188693, "grad_norm": 0.537748813932748, "learning_rate": 1.590855064676833e-07, "loss": 0.2526, "step": 37966 }, { "epoch": 1.7785637326087975, "grad_norm": 0.5921729937343525, "learning_rate": 1.5901894111313488e-07, "loss": 0.2589, "step": 37967 }, { "epoch": 1.7786105775987258, "grad_norm": 0.5875952635222228, "learning_rate": 1.589523892303435e-07, "loss": 0.2675, "step": 37968 }, { "epoch": 1.7786574225886542, "grad_norm": 0.6253639965624977, "learning_rate": 1.5888585081969276e-07, "loss": 0.2713, "step": 37969 }, { "epoch": 1.7787042675785825, "grad_norm": 0.5489215755456472, "learning_rate": 1.588193258815654e-07, "loss": 0.2607, "step": 37970 }, { "epoch": 1.7787511125685107, "grad_norm": 0.5960042019350618, "learning_rate": 1.5875281441634444e-07, "loss": 0.2761, "step": 37971 }, { "epoch": 1.7787979575584392, "grad_norm": 0.6131201516864264, "learning_rate": 1.5868631642441317e-07, "loss": 0.2746, "step": 37972 }, { "epoch": 1.7788448025483674, "grad_norm": 0.6212964184194977, "learning_rate": 1.5861983190615328e-07, "loss": 0.2702, "step": 37973 }, { "epoch": 1.7788916475382957, "grad_norm": 0.6346339526946518, "learning_rate": 1.58553360861948e-07, "loss": 0.2682, "step": 37974 }, { "epoch": 1.7789384925282241, "grad_norm": 0.6228800047586196, "learning_rate": 1.584869032921793e-07, "loss": 0.2837, "step": 37975 }, { "epoch": 1.7789853375181526, "grad_norm": 0.6322770910885381, "learning_rate": 1.5842045919722994e-07, "loss": 0.262, "step": 37976 }, { "epoch": 1.7790321825080806, "grad_norm": 0.5924575656551945, "learning_rate": 1.5835402857748233e-07, "loss": 0.2603, "step": 37977 }, { "epoch": 1.779079027498009, "grad_norm": 0.6006356662408132, "learning_rate": 1.58287611433319e-07, "loss": 0.2718, "step": 37978 }, { "epoch": 1.7791258724879375, "grad_norm": 0.5925794487456139, "learning_rate": 1.5822120776512156e-07, "loss": 0.2683, "step": 37979 }, { "epoch": 1.7791727174778658, "grad_norm": 0.6080445582322626, "learning_rate": 1.581548175732725e-07, "loss": 0.275, "step": 37980 }, { "epoch": 1.779219562467794, "grad_norm": 0.5843818369302138, "learning_rate": 1.580884408581543e-07, "loss": 0.2652, "step": 37981 }, { "epoch": 1.7792664074577225, "grad_norm": 0.6216186644609523, "learning_rate": 1.5802207762014826e-07, "loss": 0.2709, "step": 37982 }, { "epoch": 1.7793132524476507, "grad_norm": 0.6366854707181099, "learning_rate": 1.5795572785963638e-07, "loss": 0.2837, "step": 37983 }, { "epoch": 1.779360097437579, "grad_norm": 0.619227305944626, "learning_rate": 1.5788939157700078e-07, "loss": 0.2724, "step": 37984 }, { "epoch": 1.7794069424275074, "grad_norm": 0.5575737250524688, "learning_rate": 1.5782306877262315e-07, "loss": 0.2642, "step": 37985 }, { "epoch": 1.7794537874174357, "grad_norm": 0.5444287172545742, "learning_rate": 1.5775675944688484e-07, "loss": 0.2514, "step": 37986 }, { "epoch": 1.779500632407364, "grad_norm": 0.5801776742327316, "learning_rate": 1.576904636001675e-07, "loss": 0.256, "step": 37987 }, { "epoch": 1.7795474773972924, "grad_norm": 0.578825614829115, "learning_rate": 1.5762418123285357e-07, "loss": 0.2655, "step": 37988 }, { "epoch": 1.7795943223872208, "grad_norm": 0.668234356367765, "learning_rate": 1.5755791234532304e-07, "loss": 0.2783, "step": 37989 }, { "epoch": 1.779641167377149, "grad_norm": 0.5885937236001076, "learning_rate": 1.5749165693795808e-07, "loss": 0.2608, "step": 37990 }, { "epoch": 1.7796880123670773, "grad_norm": 0.6088984201836148, "learning_rate": 1.5742541501114038e-07, "loss": 0.277, "step": 37991 }, { "epoch": 1.7797348573570058, "grad_norm": 0.5787177136644616, "learning_rate": 1.5735918656525017e-07, "loss": 0.2522, "step": 37992 }, { "epoch": 1.779781702346934, "grad_norm": 0.5492666460042986, "learning_rate": 1.5729297160066908e-07, "loss": 0.2477, "step": 37993 }, { "epoch": 1.7798285473368622, "grad_norm": 0.6144201195236911, "learning_rate": 1.572267701177782e-07, "loss": 0.2878, "step": 37994 }, { "epoch": 1.7798753923267907, "grad_norm": 0.5915890356907045, "learning_rate": 1.5716058211695834e-07, "loss": 0.2723, "step": 37995 }, { "epoch": 1.779922237316719, "grad_norm": 0.6456989095894828, "learning_rate": 1.5709440759859057e-07, "loss": 0.2866, "step": 37996 }, { "epoch": 1.7799690823066472, "grad_norm": 0.6030088890706798, "learning_rate": 1.5702824656305572e-07, "loss": 0.2755, "step": 37997 }, { "epoch": 1.7800159272965757, "grad_norm": 0.5983845998340355, "learning_rate": 1.5696209901073428e-07, "loss": 0.2657, "step": 37998 }, { "epoch": 1.7800627722865041, "grad_norm": 0.5928858326875779, "learning_rate": 1.5689596494200681e-07, "loss": 0.2648, "step": 37999 }, { "epoch": 1.7801096172764321, "grad_norm": 0.6166189682057953, "learning_rate": 1.568298443572544e-07, "loss": 0.2761, "step": 38000 }, { "epoch": 1.7801564622663606, "grad_norm": 0.580079412016338, "learning_rate": 1.5676373725685757e-07, "loss": 0.261, "step": 38001 }, { "epoch": 1.780203307256289, "grad_norm": 0.60213193593446, "learning_rate": 1.56697643641196e-07, "loss": 0.2697, "step": 38002 }, { "epoch": 1.7802501522462173, "grad_norm": 0.5757642454329598, "learning_rate": 1.5663156351065107e-07, "loss": 0.2615, "step": 38003 }, { "epoch": 1.7802969972361455, "grad_norm": 0.6101480153227226, "learning_rate": 1.5656549686560218e-07, "loss": 0.274, "step": 38004 }, { "epoch": 1.780343842226074, "grad_norm": 0.6401110825431335, "learning_rate": 1.564994437064296e-07, "loss": 0.2826, "step": 38005 }, { "epoch": 1.7803906872160022, "grad_norm": 0.5934208505598997, "learning_rate": 1.5643340403351386e-07, "loss": 0.2602, "step": 38006 }, { "epoch": 1.7804375322059305, "grad_norm": 0.5975284187046815, "learning_rate": 1.5636737784723494e-07, "loss": 0.2741, "step": 38007 }, { "epoch": 1.780484377195859, "grad_norm": 0.5931255515371775, "learning_rate": 1.5630136514797305e-07, "loss": 0.2745, "step": 38008 }, { "epoch": 1.7805312221857872, "grad_norm": 0.6295799759216628, "learning_rate": 1.5623536593610767e-07, "loss": 0.2835, "step": 38009 }, { "epoch": 1.7805780671757154, "grad_norm": 0.6143423274329367, "learning_rate": 1.5616938021201817e-07, "loss": 0.2769, "step": 38010 }, { "epoch": 1.780624912165644, "grad_norm": 0.6105267570576068, "learning_rate": 1.5610340797608509e-07, "loss": 0.2776, "step": 38011 }, { "epoch": 1.7806717571555724, "grad_norm": 0.6192699031025695, "learning_rate": 1.5603744922868756e-07, "loss": 0.2695, "step": 38012 }, { "epoch": 1.7807186021455004, "grad_norm": 0.5795005829068207, "learning_rate": 1.5597150397020534e-07, "loss": 0.2492, "step": 38013 }, { "epoch": 1.7807654471354288, "grad_norm": 0.5793664396936741, "learning_rate": 1.5590557220101805e-07, "loss": 0.2586, "step": 38014 }, { "epoch": 1.7808122921253573, "grad_norm": 0.5950969818553541, "learning_rate": 1.5583965392150542e-07, "loss": 0.2703, "step": 38015 }, { "epoch": 1.7808591371152855, "grad_norm": 0.6150221296067603, "learning_rate": 1.5577374913204658e-07, "loss": 0.2716, "step": 38016 }, { "epoch": 1.7809059821052138, "grad_norm": 0.6181197381250935, "learning_rate": 1.5570785783301984e-07, "loss": 0.2776, "step": 38017 }, { "epoch": 1.7809528270951422, "grad_norm": 0.5943529728510302, "learning_rate": 1.5564198002480546e-07, "loss": 0.271, "step": 38018 }, { "epoch": 1.7809996720850705, "grad_norm": 0.5978042978268069, "learning_rate": 1.5557611570778204e-07, "loss": 0.2754, "step": 38019 }, { "epoch": 1.7810465170749987, "grad_norm": 0.5969549444102797, "learning_rate": 1.5551026488232894e-07, "loss": 0.2686, "step": 38020 }, { "epoch": 1.7810933620649272, "grad_norm": 0.6289129900202873, "learning_rate": 1.5544442754882538e-07, "loss": 0.2693, "step": 38021 }, { "epoch": 1.7811402070548554, "grad_norm": 0.608646464399686, "learning_rate": 1.5537860370764962e-07, "loss": 0.2761, "step": 38022 }, { "epoch": 1.7811870520447837, "grad_norm": 0.5636033159290336, "learning_rate": 1.5531279335918083e-07, "loss": 0.2541, "step": 38023 }, { "epoch": 1.7812338970347121, "grad_norm": 0.5900539310907715, "learning_rate": 1.5524699650379728e-07, "loss": 0.2516, "step": 38024 }, { "epoch": 1.7812807420246406, "grad_norm": 0.6484713710323217, "learning_rate": 1.5518121314187812e-07, "loss": 0.2773, "step": 38025 }, { "epoch": 1.7813275870145688, "grad_norm": 0.5575344930435725, "learning_rate": 1.5511544327380167e-07, "loss": 0.2758, "step": 38026 }, { "epoch": 1.781374432004497, "grad_norm": 0.591698687338854, "learning_rate": 1.5504968689994654e-07, "loss": 0.2647, "step": 38027 }, { "epoch": 1.7814212769944255, "grad_norm": 0.5947312716100012, "learning_rate": 1.54983944020691e-07, "loss": 0.2819, "step": 38028 }, { "epoch": 1.7814681219843538, "grad_norm": 0.5672881928311581, "learning_rate": 1.5491821463641338e-07, "loss": 0.2518, "step": 38029 }, { "epoch": 1.781514966974282, "grad_norm": 0.5643104539334964, "learning_rate": 1.5485249874749254e-07, "loss": 0.2759, "step": 38030 }, { "epoch": 1.7815618119642105, "grad_norm": 0.6151668619686615, "learning_rate": 1.5478679635430565e-07, "loss": 0.2719, "step": 38031 }, { "epoch": 1.7816086569541387, "grad_norm": 0.6079525906888819, "learning_rate": 1.5472110745723134e-07, "loss": 0.2535, "step": 38032 }, { "epoch": 1.781655501944067, "grad_norm": 0.5965951677459553, "learning_rate": 1.546554320566479e-07, "loss": 0.2713, "step": 38033 }, { "epoch": 1.7817023469339954, "grad_norm": 0.5806070518493215, "learning_rate": 1.5458977015293254e-07, "loss": 0.2553, "step": 38034 }, { "epoch": 1.781749191923924, "grad_norm": 0.5602976221159381, "learning_rate": 1.5452412174646354e-07, "loss": 0.263, "step": 38035 }, { "epoch": 1.781796036913852, "grad_norm": 0.5725018625602981, "learning_rate": 1.5445848683761893e-07, "loss": 0.2663, "step": 38036 }, { "epoch": 1.7818428819037804, "grad_norm": 0.5697245241093667, "learning_rate": 1.543928654267765e-07, "loss": 0.255, "step": 38037 }, { "epoch": 1.7818897268937088, "grad_norm": 0.6346203333407517, "learning_rate": 1.5432725751431315e-07, "loss": 0.2641, "step": 38038 }, { "epoch": 1.781936571883637, "grad_norm": 0.6290273077801325, "learning_rate": 1.542616631006072e-07, "loss": 0.2861, "step": 38039 }, { "epoch": 1.7819834168735653, "grad_norm": 0.6606936623074205, "learning_rate": 1.5419608218603553e-07, "loss": 0.268, "step": 38040 }, { "epoch": 1.7820302618634938, "grad_norm": 0.6125867936001722, "learning_rate": 1.541305147709757e-07, "loss": 0.2571, "step": 38041 }, { "epoch": 1.782077106853422, "grad_norm": 0.6512276955543073, "learning_rate": 1.5406496085580507e-07, "loss": 0.29, "step": 38042 }, { "epoch": 1.7821239518433503, "grad_norm": 0.6056231594979246, "learning_rate": 1.539994204409015e-07, "loss": 0.2573, "step": 38043 }, { "epoch": 1.7821707968332787, "grad_norm": 0.5568724945930658, "learning_rate": 1.539338935266413e-07, "loss": 0.2522, "step": 38044 }, { "epoch": 1.782217641823207, "grad_norm": 0.6191429772783594, "learning_rate": 1.5386838011340221e-07, "loss": 0.274, "step": 38045 }, { "epoch": 1.7822644868131352, "grad_norm": 0.5943786519084684, "learning_rate": 1.5380288020156036e-07, "loss": 0.2725, "step": 38046 }, { "epoch": 1.7823113318030637, "grad_norm": 0.6237245427602531, "learning_rate": 1.537373937914935e-07, "loss": 0.2657, "step": 38047 }, { "epoch": 1.7823581767929921, "grad_norm": 0.6083077668847642, "learning_rate": 1.5367192088357797e-07, "loss": 0.2671, "step": 38048 }, { "epoch": 1.7824050217829202, "grad_norm": 0.6166667566904392, "learning_rate": 1.5360646147819098e-07, "loss": 0.2667, "step": 38049 }, { "epoch": 1.7824518667728486, "grad_norm": 0.6078005503696343, "learning_rate": 1.5354101557570943e-07, "loss": 0.269, "step": 38050 }, { "epoch": 1.782498711762777, "grad_norm": 0.599319399993954, "learning_rate": 1.5347558317650945e-07, "loss": 0.262, "step": 38051 }, { "epoch": 1.7825455567527053, "grad_norm": 0.524727349130041, "learning_rate": 1.5341016428096767e-07, "loss": 0.2504, "step": 38052 }, { "epoch": 1.7825924017426336, "grad_norm": 0.590655499908429, "learning_rate": 1.5334475888946016e-07, "loss": 0.2714, "step": 38053 }, { "epoch": 1.782639246732562, "grad_norm": 0.6265935726407427, "learning_rate": 1.532793670023641e-07, "loss": 0.2768, "step": 38054 }, { "epoch": 1.7826860917224903, "grad_norm": 0.5721034152977097, "learning_rate": 1.532139886200551e-07, "loss": 0.2585, "step": 38055 }, { "epoch": 1.7827329367124185, "grad_norm": 0.596400391342132, "learning_rate": 1.5314862374291002e-07, "loss": 0.2676, "step": 38056 }, { "epoch": 1.782779781702347, "grad_norm": 0.6016224064351876, "learning_rate": 1.530832723713052e-07, "loss": 0.2716, "step": 38057 }, { "epoch": 1.7828266266922752, "grad_norm": 0.628737608609407, "learning_rate": 1.5301793450561596e-07, "loss": 0.2667, "step": 38058 }, { "epoch": 1.7828734716822034, "grad_norm": 0.5848002476685906, "learning_rate": 1.5295261014621866e-07, "loss": 0.2545, "step": 38059 }, { "epoch": 1.782920316672132, "grad_norm": 0.5673546524984748, "learning_rate": 1.5288729929348877e-07, "loss": 0.2548, "step": 38060 }, { "epoch": 1.7829671616620604, "grad_norm": 0.6526781681299405, "learning_rate": 1.5282200194780273e-07, "loss": 0.2808, "step": 38061 }, { "epoch": 1.7830140066519886, "grad_norm": 0.6080325505213761, "learning_rate": 1.527567181095363e-07, "loss": 0.2674, "step": 38062 }, { "epoch": 1.7830608516419169, "grad_norm": 0.6168339472041814, "learning_rate": 1.5269144777906475e-07, "loss": 0.2716, "step": 38063 }, { "epoch": 1.7831076966318453, "grad_norm": 0.5919876750851688, "learning_rate": 1.5262619095676446e-07, "loss": 0.2708, "step": 38064 }, { "epoch": 1.7831545416217736, "grad_norm": 0.6219312590566947, "learning_rate": 1.5256094764301038e-07, "loss": 0.2747, "step": 38065 }, { "epoch": 1.7832013866117018, "grad_norm": 0.6315632705764648, "learning_rate": 1.524957178381775e-07, "loss": 0.2683, "step": 38066 }, { "epoch": 1.7832482316016303, "grad_norm": 0.5674111449870948, "learning_rate": 1.5243050154264193e-07, "loss": 0.2669, "step": 38067 }, { "epoch": 1.7832950765915585, "grad_norm": 0.5869298191805978, "learning_rate": 1.5236529875677865e-07, "loss": 0.2649, "step": 38068 }, { "epoch": 1.7833419215814867, "grad_norm": 0.697651076934788, "learning_rate": 1.5230010948096313e-07, "loss": 0.2921, "step": 38069 }, { "epoch": 1.7833887665714152, "grad_norm": 0.583607720022069, "learning_rate": 1.5223493371557095e-07, "loss": 0.2737, "step": 38070 }, { "epoch": 1.7834356115613437, "grad_norm": 0.5883712205685228, "learning_rate": 1.52169771460976e-07, "loss": 0.2527, "step": 38071 }, { "epoch": 1.7834824565512717, "grad_norm": 0.6017195002277139, "learning_rate": 1.5210462271755433e-07, "loss": 0.2724, "step": 38072 }, { "epoch": 1.7835293015412002, "grad_norm": 0.6337800959931402, "learning_rate": 1.5203948748568008e-07, "loss": 0.2907, "step": 38073 }, { "epoch": 1.7835761465311286, "grad_norm": 0.6006914327496085, "learning_rate": 1.5197436576572854e-07, "loss": 0.2836, "step": 38074 }, { "epoch": 1.7836229915210569, "grad_norm": 0.5917978339571701, "learning_rate": 1.519092575580744e-07, "loss": 0.2752, "step": 38075 }, { "epoch": 1.783669836510985, "grad_norm": 0.565151148854537, "learning_rate": 1.518441628630926e-07, "loss": 0.2601, "step": 38076 }, { "epoch": 1.7837166815009136, "grad_norm": 0.5757303468241534, "learning_rate": 1.5177908168115706e-07, "loss": 0.2749, "step": 38077 }, { "epoch": 1.7837635264908418, "grad_norm": 0.6218451225570539, "learning_rate": 1.5171401401264274e-07, "loss": 0.2653, "step": 38078 }, { "epoch": 1.78381037148077, "grad_norm": 0.5788509943584452, "learning_rate": 1.5164895985792433e-07, "loss": 0.2593, "step": 38079 }, { "epoch": 1.7838572164706985, "grad_norm": 0.5661023544399589, "learning_rate": 1.515839192173757e-07, "loss": 0.2706, "step": 38080 }, { "epoch": 1.7839040614606267, "grad_norm": 0.5886718883921805, "learning_rate": 1.5151889209137127e-07, "loss": 0.2704, "step": 38081 }, { "epoch": 1.783950906450555, "grad_norm": 0.6160746284532619, "learning_rate": 1.5145387848028574e-07, "loss": 0.2738, "step": 38082 }, { "epoch": 1.7839977514404834, "grad_norm": 0.5345392368073162, "learning_rate": 1.5138887838449246e-07, "loss": 0.2548, "step": 38083 }, { "epoch": 1.784044596430412, "grad_norm": 0.5778556537589319, "learning_rate": 1.513238918043658e-07, "loss": 0.2524, "step": 38084 }, { "epoch": 1.78409144142034, "grad_norm": 0.6031757530596413, "learning_rate": 1.5125891874027991e-07, "loss": 0.2814, "step": 38085 }, { "epoch": 1.7841382864102684, "grad_norm": 0.5918311876645221, "learning_rate": 1.5119395919260897e-07, "loss": 0.2721, "step": 38086 }, { "epoch": 1.7841851314001969, "grad_norm": 0.582379290794651, "learning_rate": 1.51129013161726e-07, "loss": 0.2766, "step": 38087 }, { "epoch": 1.784231976390125, "grad_norm": 0.6092442541404469, "learning_rate": 1.510640806480057e-07, "loss": 0.2676, "step": 38088 }, { "epoch": 1.7842788213800533, "grad_norm": 0.6223955110306344, "learning_rate": 1.5099916165182083e-07, "loss": 0.2975, "step": 38089 }, { "epoch": 1.7843256663699818, "grad_norm": 0.5805139797102253, "learning_rate": 1.509342561735455e-07, "loss": 0.2742, "step": 38090 }, { "epoch": 1.78437251135991, "grad_norm": 0.5377801435511714, "learning_rate": 1.508693642135528e-07, "loss": 0.2538, "step": 38091 }, { "epoch": 1.7844193563498383, "grad_norm": 0.5864933638666162, "learning_rate": 1.508044857722171e-07, "loss": 0.2755, "step": 38092 }, { "epoch": 1.7844662013397667, "grad_norm": 0.6087443501789791, "learning_rate": 1.5073962084991067e-07, "loss": 0.2649, "step": 38093 }, { "epoch": 1.784513046329695, "grad_norm": 0.5986746392202507, "learning_rate": 1.506747694470076e-07, "loss": 0.2566, "step": 38094 }, { "epoch": 1.7845598913196232, "grad_norm": 0.5673952662429722, "learning_rate": 1.5060993156388064e-07, "loss": 0.2709, "step": 38095 }, { "epoch": 1.7846067363095517, "grad_norm": 0.5670551457306957, "learning_rate": 1.505451072009026e-07, "loss": 0.2595, "step": 38096 }, { "epoch": 1.7846535812994802, "grad_norm": 0.5982152530863853, "learning_rate": 1.504802963584473e-07, "loss": 0.2842, "step": 38097 }, { "epoch": 1.7847004262894084, "grad_norm": 0.612029063532133, "learning_rate": 1.5041549903688722e-07, "loss": 0.277, "step": 38098 }, { "epoch": 1.7847472712793366, "grad_norm": 0.6261259723930435, "learning_rate": 1.5035071523659572e-07, "loss": 0.2799, "step": 38099 }, { "epoch": 1.784794116269265, "grad_norm": 0.5617749464228248, "learning_rate": 1.5028594495794551e-07, "loss": 0.2566, "step": 38100 }, { "epoch": 1.7848409612591933, "grad_norm": 0.6083079622965107, "learning_rate": 1.5022118820130855e-07, "loss": 0.2829, "step": 38101 }, { "epoch": 1.7848878062491216, "grad_norm": 0.6259246661811191, "learning_rate": 1.501564449670581e-07, "loss": 0.2742, "step": 38102 }, { "epoch": 1.78493465123905, "grad_norm": 0.5672802870039051, "learning_rate": 1.5009171525556672e-07, "loss": 0.2678, "step": 38103 }, { "epoch": 1.7849814962289783, "grad_norm": 0.6295507144613856, "learning_rate": 1.500269990672068e-07, "loss": 0.2826, "step": 38104 }, { "epoch": 1.7850283412189065, "grad_norm": 0.6251612062564228, "learning_rate": 1.4996229640235089e-07, "loss": 0.2734, "step": 38105 }, { "epoch": 1.785075186208835, "grad_norm": 0.6058901452405481, "learning_rate": 1.498976072613717e-07, "loss": 0.2758, "step": 38106 }, { "epoch": 1.7851220311987634, "grad_norm": 0.5534336544955621, "learning_rate": 1.4983293164464118e-07, "loss": 0.2402, "step": 38107 }, { "epoch": 1.7851688761886915, "grad_norm": 0.5917344810810686, "learning_rate": 1.49768269552531e-07, "loss": 0.2722, "step": 38108 }, { "epoch": 1.78521572117862, "grad_norm": 0.5810495313958781, "learning_rate": 1.4970362098541357e-07, "loss": 0.2636, "step": 38109 }, { "epoch": 1.7852625661685484, "grad_norm": 0.597170066126833, "learning_rate": 1.4963898594366144e-07, "loss": 0.2729, "step": 38110 }, { "epoch": 1.7853094111584766, "grad_norm": 0.6023018747686129, "learning_rate": 1.4957436442764595e-07, "loss": 0.276, "step": 38111 }, { "epoch": 1.7853562561484049, "grad_norm": 0.600680043366601, "learning_rate": 1.4950975643773957e-07, "loss": 0.271, "step": 38112 }, { "epoch": 1.7854031011383333, "grad_norm": 0.5979528736162424, "learning_rate": 1.494451619743134e-07, "loss": 0.27, "step": 38113 }, { "epoch": 1.7854499461282616, "grad_norm": 0.5650137150404628, "learning_rate": 1.4938058103774024e-07, "loss": 0.2475, "step": 38114 }, { "epoch": 1.7854967911181898, "grad_norm": 0.641268520286322, "learning_rate": 1.4931601362839027e-07, "loss": 0.2859, "step": 38115 }, { "epoch": 1.7855436361081183, "grad_norm": 0.6447582691983563, "learning_rate": 1.4925145974663603e-07, "loss": 0.2594, "step": 38116 }, { "epoch": 1.7855904810980465, "grad_norm": 0.6089686016368033, "learning_rate": 1.4918691939284858e-07, "loss": 0.2838, "step": 38117 }, { "epoch": 1.7856373260879748, "grad_norm": 0.6344974550729728, "learning_rate": 1.4912239256740013e-07, "loss": 0.281, "step": 38118 }, { "epoch": 1.7856841710779032, "grad_norm": 0.5731864191751028, "learning_rate": 1.4905787927066096e-07, "loss": 0.2777, "step": 38119 }, { "epoch": 1.7857310160678317, "grad_norm": 0.5668410266403027, "learning_rate": 1.4899337950300296e-07, "loss": 0.2595, "step": 38120 }, { "epoch": 1.7857778610577597, "grad_norm": 0.6045453342671694, "learning_rate": 1.4892889326479725e-07, "loss": 0.2676, "step": 38121 }, { "epoch": 1.7858247060476882, "grad_norm": 0.6179764957644859, "learning_rate": 1.4886442055641464e-07, "loss": 0.2739, "step": 38122 }, { "epoch": 1.7858715510376166, "grad_norm": 0.5818723610714286, "learning_rate": 1.487999613782265e-07, "loss": 0.261, "step": 38123 }, { "epoch": 1.7859183960275449, "grad_norm": 0.621353129190716, "learning_rate": 1.4873551573060335e-07, "loss": 0.2777, "step": 38124 }, { "epoch": 1.7859652410174731, "grad_norm": 0.6755179258902105, "learning_rate": 1.4867108361391686e-07, "loss": 0.2716, "step": 38125 }, { "epoch": 1.7860120860074016, "grad_norm": 0.5887001546499039, "learning_rate": 1.48606665028537e-07, "loss": 0.2785, "step": 38126 }, { "epoch": 1.7860589309973298, "grad_norm": 0.5945582347266183, "learning_rate": 1.4854225997483457e-07, "loss": 0.2501, "step": 38127 }, { "epoch": 1.786105775987258, "grad_norm": 0.5986905268653766, "learning_rate": 1.4847786845318096e-07, "loss": 0.2587, "step": 38128 }, { "epoch": 1.7861526209771865, "grad_norm": 0.5903997214974628, "learning_rate": 1.4841349046394588e-07, "loss": 0.2839, "step": 38129 }, { "epoch": 1.7861994659671148, "grad_norm": 0.5880573657609403, "learning_rate": 1.4834912600750011e-07, "loss": 0.2915, "step": 38130 }, { "epoch": 1.786246310957043, "grad_norm": 0.6338451681224033, "learning_rate": 1.4828477508421425e-07, "loss": 0.279, "step": 38131 }, { "epoch": 1.7862931559469715, "grad_norm": 0.5627646248820745, "learning_rate": 1.4822043769445792e-07, "loss": 0.2594, "step": 38132 }, { "epoch": 1.7863400009369, "grad_norm": 0.5648959910447136, "learning_rate": 1.481561138386023e-07, "loss": 0.2599, "step": 38133 }, { "epoch": 1.7863868459268282, "grad_norm": 0.6316715678722116, "learning_rate": 1.480918035170173e-07, "loss": 0.2923, "step": 38134 }, { "epoch": 1.7864336909167564, "grad_norm": 0.5725293772094721, "learning_rate": 1.4802750673007238e-07, "loss": 0.2543, "step": 38135 }, { "epoch": 1.7864805359066849, "grad_norm": 0.6220136564489561, "learning_rate": 1.4796322347813808e-07, "loss": 0.2897, "step": 38136 }, { "epoch": 1.7865273808966131, "grad_norm": 0.5944780389681924, "learning_rate": 1.4789895376158464e-07, "loss": 0.2618, "step": 38137 }, { "epoch": 1.7865742258865414, "grad_norm": 0.5853880204789174, "learning_rate": 1.4783469758078123e-07, "loss": 0.2817, "step": 38138 }, { "epoch": 1.7866210708764698, "grad_norm": 0.5859505301338098, "learning_rate": 1.477704549360978e-07, "loss": 0.2567, "step": 38139 }, { "epoch": 1.786667915866398, "grad_norm": 0.6121891065967556, "learning_rate": 1.4770622582790438e-07, "loss": 0.2527, "step": 38140 }, { "epoch": 1.7867147608563263, "grad_norm": 0.5827750244997932, "learning_rate": 1.4764201025657065e-07, "loss": 0.27, "step": 38141 }, { "epoch": 1.7867616058462548, "grad_norm": 0.6104412916798498, "learning_rate": 1.475778082224655e-07, "loss": 0.2734, "step": 38142 }, { "epoch": 1.7868084508361832, "grad_norm": 0.5848805802982899, "learning_rate": 1.4751361972595912e-07, "loss": 0.2702, "step": 38143 }, { "epoch": 1.7868552958261112, "grad_norm": 0.5941222181169032, "learning_rate": 1.4744944476742046e-07, "loss": 0.2697, "step": 38144 }, { "epoch": 1.7869021408160397, "grad_norm": 0.5868291993894216, "learning_rate": 1.473852833472189e-07, "loss": 0.275, "step": 38145 }, { "epoch": 1.7869489858059682, "grad_norm": 0.6142009184136545, "learning_rate": 1.4732113546572364e-07, "loss": 0.2769, "step": 38146 }, { "epoch": 1.7869958307958964, "grad_norm": 0.6724721977071298, "learning_rate": 1.4725700112330376e-07, "loss": 0.2978, "step": 38147 }, { "epoch": 1.7870426757858247, "grad_norm": 0.5842531311770223, "learning_rate": 1.4719288032032902e-07, "loss": 0.2807, "step": 38148 }, { "epoch": 1.7870895207757531, "grad_norm": 0.6030654091072631, "learning_rate": 1.47128773057168e-07, "loss": 0.2683, "step": 38149 }, { "epoch": 1.7871363657656814, "grad_norm": 0.5932609549306033, "learning_rate": 1.47064679334189e-07, "loss": 0.2617, "step": 38150 }, { "epoch": 1.7871832107556096, "grad_norm": 0.6063859550277789, "learning_rate": 1.4700059915176145e-07, "loss": 0.2649, "step": 38151 }, { "epoch": 1.787230055745538, "grad_norm": 0.5906646103005865, "learning_rate": 1.4693653251025398e-07, "loss": 0.2711, "step": 38152 }, { "epoch": 1.7872769007354663, "grad_norm": 0.6292424237257144, "learning_rate": 1.468724794100354e-07, "loss": 0.2965, "step": 38153 }, { "epoch": 1.7873237457253945, "grad_norm": 0.5955939332738113, "learning_rate": 1.4680843985147437e-07, "loss": 0.2645, "step": 38154 }, { "epoch": 1.787370590715323, "grad_norm": 0.6285264143640309, "learning_rate": 1.467444138349397e-07, "loss": 0.2682, "step": 38155 }, { "epoch": 1.7874174357052515, "grad_norm": 0.6269999903536947, "learning_rate": 1.466804013607992e-07, "loss": 0.2601, "step": 38156 }, { "epoch": 1.7874642806951795, "grad_norm": 0.5844173637287513, "learning_rate": 1.4661640242942143e-07, "loss": 0.2766, "step": 38157 }, { "epoch": 1.787511125685108, "grad_norm": 0.5934337376456005, "learning_rate": 1.4655241704117446e-07, "loss": 0.2619, "step": 38158 }, { "epoch": 1.7875579706750364, "grad_norm": 0.5621041375129663, "learning_rate": 1.4648844519642713e-07, "loss": 0.2513, "step": 38159 }, { "epoch": 1.7876048156649647, "grad_norm": 0.5358539628558043, "learning_rate": 1.4642448689554695e-07, "loss": 0.2573, "step": 38160 }, { "epoch": 1.787651660654893, "grad_norm": 0.5956496330557508, "learning_rate": 1.4636054213890276e-07, "loss": 0.2764, "step": 38161 }, { "epoch": 1.7876985056448214, "grad_norm": 0.5727413132400645, "learning_rate": 1.4629661092686182e-07, "loss": 0.2517, "step": 38162 }, { "epoch": 1.7877453506347496, "grad_norm": 0.5918486563474233, "learning_rate": 1.4623269325979266e-07, "loss": 0.2585, "step": 38163 }, { "epoch": 1.7877921956246778, "grad_norm": 0.5754679965507244, "learning_rate": 1.4616878913806255e-07, "loss": 0.2524, "step": 38164 }, { "epoch": 1.7878390406146063, "grad_norm": 0.6155830375515075, "learning_rate": 1.4610489856203918e-07, "loss": 0.2706, "step": 38165 }, { "epoch": 1.7878858856045345, "grad_norm": 0.6040931896606411, "learning_rate": 1.4604102153209038e-07, "loss": 0.2703, "step": 38166 }, { "epoch": 1.7879327305944628, "grad_norm": 0.5713555964724653, "learning_rate": 1.4597715804858443e-07, "loss": 0.2631, "step": 38167 }, { "epoch": 1.7879795755843912, "grad_norm": 0.611750664274985, "learning_rate": 1.4591330811188797e-07, "loss": 0.2694, "step": 38168 }, { "epoch": 1.7880264205743197, "grad_norm": 0.6173263604300309, "learning_rate": 1.4584947172236853e-07, "loss": 0.2672, "step": 38169 }, { "epoch": 1.788073265564248, "grad_norm": 0.615906927711723, "learning_rate": 1.4578564888039437e-07, "loss": 0.281, "step": 38170 }, { "epoch": 1.7881201105541762, "grad_norm": 0.6639012730600784, "learning_rate": 1.4572183958633136e-07, "loss": 0.2833, "step": 38171 }, { "epoch": 1.7881669555441047, "grad_norm": 0.5851226921729239, "learning_rate": 1.4565804384054776e-07, "loss": 0.2592, "step": 38172 }, { "epoch": 1.788213800534033, "grad_norm": 0.5816310328413197, "learning_rate": 1.4559426164341056e-07, "loss": 0.2569, "step": 38173 }, { "epoch": 1.7882606455239611, "grad_norm": 0.5938793148646632, "learning_rate": 1.455304929952864e-07, "loss": 0.2697, "step": 38174 }, { "epoch": 1.7883074905138896, "grad_norm": 0.6334595563079577, "learning_rate": 1.4546673789654214e-07, "loss": 0.2677, "step": 38175 }, { "epoch": 1.7883543355038178, "grad_norm": 0.5805948323126355, "learning_rate": 1.4540299634754535e-07, "loss": 0.2583, "step": 38176 }, { "epoch": 1.788401180493746, "grad_norm": 0.6257902527630008, "learning_rate": 1.4533926834866292e-07, "loss": 0.2771, "step": 38177 }, { "epoch": 1.7884480254836745, "grad_norm": 0.5748163736533609, "learning_rate": 1.4527555390026066e-07, "loss": 0.2637, "step": 38178 }, { "epoch": 1.788494870473603, "grad_norm": 0.6298630002776514, "learning_rate": 1.4521185300270607e-07, "loss": 0.2865, "step": 38179 }, { "epoch": 1.788541715463531, "grad_norm": 0.6123594981995926, "learning_rate": 1.4514816565636497e-07, "loss": 0.2678, "step": 38180 }, { "epoch": 1.7885885604534595, "grad_norm": 0.5583844085754976, "learning_rate": 1.4508449186160457e-07, "loss": 0.2645, "step": 38181 }, { "epoch": 1.788635405443388, "grad_norm": 0.6706842165116021, "learning_rate": 1.4502083161879093e-07, "loss": 0.2906, "step": 38182 }, { "epoch": 1.7886822504333162, "grad_norm": 0.6095420961965806, "learning_rate": 1.4495718492829075e-07, "loss": 0.2713, "step": 38183 }, { "epoch": 1.7887290954232444, "grad_norm": 0.5712443422213028, "learning_rate": 1.4489355179046982e-07, "loss": 0.2681, "step": 38184 }, { "epoch": 1.788775940413173, "grad_norm": 0.6169789515537465, "learning_rate": 1.448299322056948e-07, "loss": 0.2768, "step": 38185 }, { "epoch": 1.7888227854031011, "grad_norm": 0.5866904221248316, "learning_rate": 1.4476632617433122e-07, "loss": 0.2605, "step": 38186 }, { "epoch": 1.7888696303930294, "grad_norm": 0.5585690052427009, "learning_rate": 1.4470273369674575e-07, "loss": 0.2618, "step": 38187 }, { "epoch": 1.7889164753829578, "grad_norm": 0.6450432260029512, "learning_rate": 1.4463915477330366e-07, "loss": 0.2778, "step": 38188 }, { "epoch": 1.788963320372886, "grad_norm": 0.66104164991771, "learning_rate": 1.4457558940437155e-07, "loss": 0.3036, "step": 38189 }, { "epoch": 1.7890101653628143, "grad_norm": 0.5954917193363027, "learning_rate": 1.4451203759031501e-07, "loss": 0.2745, "step": 38190 }, { "epoch": 1.7890570103527428, "grad_norm": 0.6153899647514498, "learning_rate": 1.4444849933149985e-07, "loss": 0.2783, "step": 38191 }, { "epoch": 1.7891038553426712, "grad_norm": 0.6041459646797862, "learning_rate": 1.4438497462829105e-07, "loss": 0.2666, "step": 38192 }, { "epoch": 1.7891507003325993, "grad_norm": 0.6167700278258006, "learning_rate": 1.4432146348105497e-07, "loss": 0.273, "step": 38193 }, { "epoch": 1.7891975453225277, "grad_norm": 0.6398072349328133, "learning_rate": 1.4425796589015635e-07, "loss": 0.2865, "step": 38194 }, { "epoch": 1.7892443903124562, "grad_norm": 0.5731097186471589, "learning_rate": 1.4419448185596126e-07, "loss": 0.2767, "step": 38195 }, { "epoch": 1.7892912353023844, "grad_norm": 0.6168068128810653, "learning_rate": 1.4413101137883495e-07, "loss": 0.2695, "step": 38196 }, { "epoch": 1.7893380802923127, "grad_norm": 0.6655693729018489, "learning_rate": 1.4406755445914272e-07, "loss": 0.2826, "step": 38197 }, { "epoch": 1.7893849252822411, "grad_norm": 0.6300481612013218, "learning_rate": 1.4400411109724955e-07, "loss": 0.2887, "step": 38198 }, { "epoch": 1.7894317702721694, "grad_norm": 0.6065389920819819, "learning_rate": 1.439406812935204e-07, "loss": 0.2752, "step": 38199 }, { "epoch": 1.7894786152620976, "grad_norm": 0.5515054206512532, "learning_rate": 1.4387726504832029e-07, "loss": 0.2502, "step": 38200 }, { "epoch": 1.789525460252026, "grad_norm": 0.610230166433642, "learning_rate": 1.4381386236201417e-07, "loss": 0.2809, "step": 38201 }, { "epoch": 1.7895723052419543, "grad_norm": 0.647154512877947, "learning_rate": 1.4375047323496733e-07, "loss": 0.259, "step": 38202 }, { "epoch": 1.7896191502318826, "grad_norm": 0.6027310860903048, "learning_rate": 1.436870976675442e-07, "loss": 0.268, "step": 38203 }, { "epoch": 1.789665995221811, "grad_norm": 0.6287371915814893, "learning_rate": 1.4362373566011002e-07, "loss": 0.2762, "step": 38204 }, { "epoch": 1.7897128402117395, "grad_norm": 0.6186739174826897, "learning_rate": 1.4356038721302894e-07, "loss": 0.2577, "step": 38205 }, { "epoch": 1.7897596852016677, "grad_norm": 0.5812818643501799, "learning_rate": 1.4349705232666517e-07, "loss": 0.2487, "step": 38206 }, { "epoch": 1.789806530191596, "grad_norm": 0.6067169986596368, "learning_rate": 1.4343373100138364e-07, "loss": 0.2944, "step": 38207 }, { "epoch": 1.7898533751815244, "grad_norm": 0.6141906338528396, "learning_rate": 1.433704232375488e-07, "loss": 0.2777, "step": 38208 }, { "epoch": 1.7899002201714527, "grad_norm": 0.5831422815303263, "learning_rate": 1.4330712903552453e-07, "loss": 0.2732, "step": 38209 }, { "epoch": 1.789947065161381, "grad_norm": 0.5728037815650567, "learning_rate": 1.4324384839567608e-07, "loss": 0.2678, "step": 38210 }, { "epoch": 1.7899939101513094, "grad_norm": 0.6017346876843201, "learning_rate": 1.431805813183665e-07, "loss": 0.2691, "step": 38211 }, { "epoch": 1.7900407551412376, "grad_norm": 0.6400181344100502, "learning_rate": 1.431173278039605e-07, "loss": 0.2742, "step": 38212 }, { "epoch": 1.7900876001311659, "grad_norm": 0.6366448187211478, "learning_rate": 1.4305408785282165e-07, "loss": 0.283, "step": 38213 }, { "epoch": 1.7901344451210943, "grad_norm": 0.6185073178229309, "learning_rate": 1.4299086146531415e-07, "loss": 0.2632, "step": 38214 }, { "epoch": 1.7901812901110228, "grad_norm": 0.6517751609927769, "learning_rate": 1.4292764864180186e-07, "loss": 0.2808, "step": 38215 }, { "epoch": 1.7902281351009508, "grad_norm": 0.6197637485772909, "learning_rate": 1.4286444938264866e-07, "loss": 0.2919, "step": 38216 }, { "epoch": 1.7902749800908793, "grad_norm": 0.6169125080183249, "learning_rate": 1.4280126368821812e-07, "loss": 0.2742, "step": 38217 }, { "epoch": 1.7903218250808077, "grad_norm": 0.6114997953954889, "learning_rate": 1.4273809155887359e-07, "loss": 0.2767, "step": 38218 }, { "epoch": 1.790368670070736, "grad_norm": 0.6054771331009269, "learning_rate": 1.426749329949792e-07, "loss": 0.265, "step": 38219 }, { "epoch": 1.7904155150606642, "grad_norm": 0.584269243938498, "learning_rate": 1.4261178799689772e-07, "loss": 0.2671, "step": 38220 }, { "epoch": 1.7904623600505927, "grad_norm": 0.5831932836241789, "learning_rate": 1.4254865656499306e-07, "loss": 0.2701, "step": 38221 }, { "epoch": 1.790509205040521, "grad_norm": 0.6252977836104819, "learning_rate": 1.4248553869962877e-07, "loss": 0.2893, "step": 38222 }, { "epoch": 1.7905560500304492, "grad_norm": 0.5887103710915442, "learning_rate": 1.424224344011671e-07, "loss": 0.2494, "step": 38223 }, { "epoch": 1.7906028950203776, "grad_norm": 0.6051304910277379, "learning_rate": 1.423593436699719e-07, "loss": 0.2659, "step": 38224 }, { "epoch": 1.7906497400103059, "grad_norm": 0.5753822124143629, "learning_rate": 1.4229626650640593e-07, "loss": 0.2551, "step": 38225 }, { "epoch": 1.790696585000234, "grad_norm": 0.5657056240247893, "learning_rate": 1.422332029108328e-07, "loss": 0.2564, "step": 38226 }, { "epoch": 1.7907434299901626, "grad_norm": 0.5976923266213081, "learning_rate": 1.4217015288361474e-07, "loss": 0.2654, "step": 38227 }, { "epoch": 1.790790274980091, "grad_norm": 0.6111394770972937, "learning_rate": 1.4210711642511505e-07, "loss": 0.2719, "step": 38228 }, { "epoch": 1.790837119970019, "grad_norm": 0.5975862542255846, "learning_rate": 1.4204409353569592e-07, "loss": 0.2529, "step": 38229 }, { "epoch": 1.7908839649599475, "grad_norm": 0.5905073999879383, "learning_rate": 1.4198108421572044e-07, "loss": 0.2592, "step": 38230 }, { "epoch": 1.790930809949876, "grad_norm": 0.5788446984411537, "learning_rate": 1.4191808846555132e-07, "loss": 0.2757, "step": 38231 }, { "epoch": 1.7909776549398042, "grad_norm": 0.6000293318696687, "learning_rate": 1.418551062855511e-07, "loss": 0.2656, "step": 38232 }, { "epoch": 1.7910244999297324, "grad_norm": 0.6001363943772063, "learning_rate": 1.4179213767608168e-07, "loss": 0.2708, "step": 38233 }, { "epoch": 1.791071344919661, "grad_norm": 0.561629326484542, "learning_rate": 1.4172918263750612e-07, "loss": 0.2604, "step": 38234 }, { "epoch": 1.7911181899095892, "grad_norm": 0.6012200847743949, "learning_rate": 1.4166624117018607e-07, "loss": 0.2681, "step": 38235 }, { "epoch": 1.7911650348995174, "grad_norm": 0.6195913185707937, "learning_rate": 1.4160331327448402e-07, "loss": 0.2802, "step": 38236 }, { "epoch": 1.7912118798894459, "grad_norm": 0.5787156533722257, "learning_rate": 1.4154039895076216e-07, "loss": 0.2769, "step": 38237 }, { "epoch": 1.791258724879374, "grad_norm": 0.5747892387308066, "learning_rate": 1.4147749819938245e-07, "loss": 0.2764, "step": 38238 }, { "epoch": 1.7913055698693023, "grad_norm": 0.5963304222732339, "learning_rate": 1.4141461102070764e-07, "loss": 0.2595, "step": 38239 }, { "epoch": 1.7913524148592308, "grad_norm": 0.5984877475901651, "learning_rate": 1.4135173741509856e-07, "loss": 0.2694, "step": 38240 }, { "epoch": 1.7913992598491593, "grad_norm": 0.5738469522069445, "learning_rate": 1.4128887738291714e-07, "loss": 0.2646, "step": 38241 }, { "epoch": 1.7914461048390875, "grad_norm": 0.6216078652617514, "learning_rate": 1.4122603092452558e-07, "loss": 0.2817, "step": 38242 }, { "epoch": 1.7914929498290157, "grad_norm": 0.5590577500955229, "learning_rate": 1.4116319804028527e-07, "loss": 0.263, "step": 38243 }, { "epoch": 1.7915397948189442, "grad_norm": 0.5894761317940482, "learning_rate": 1.4110037873055759e-07, "loss": 0.2507, "step": 38244 }, { "epoch": 1.7915866398088724, "grad_norm": 0.5906004799248169, "learning_rate": 1.4103757299570476e-07, "loss": 0.284, "step": 38245 }, { "epoch": 1.7916334847988007, "grad_norm": 0.615719743928019, "learning_rate": 1.4097478083608785e-07, "loss": 0.274, "step": 38246 }, { "epoch": 1.7916803297887292, "grad_norm": 0.6251636342207562, "learning_rate": 1.4091200225206824e-07, "loss": 0.2774, "step": 38247 }, { "epoch": 1.7917271747786574, "grad_norm": 0.6071933161231508, "learning_rate": 1.408492372440068e-07, "loss": 0.271, "step": 38248 }, { "epoch": 1.7917740197685856, "grad_norm": 0.6025777031572314, "learning_rate": 1.4078648581226512e-07, "loss": 0.2717, "step": 38249 }, { "epoch": 1.791820864758514, "grad_norm": 0.639016915813361, "learning_rate": 1.4072374795720434e-07, "loss": 0.2718, "step": 38250 }, { "epoch": 1.7918677097484426, "grad_norm": 0.6105604508659229, "learning_rate": 1.40661023679185e-07, "loss": 0.2751, "step": 38251 }, { "epoch": 1.7919145547383706, "grad_norm": 0.579738062266376, "learning_rate": 1.405983129785693e-07, "loss": 0.2554, "step": 38252 }, { "epoch": 1.791961399728299, "grad_norm": 0.5640255006211045, "learning_rate": 1.405356158557167e-07, "loss": 0.2688, "step": 38253 }, { "epoch": 1.7920082447182275, "grad_norm": 0.5681449804233683, "learning_rate": 1.404729323109888e-07, "loss": 0.2479, "step": 38254 }, { "epoch": 1.7920550897081557, "grad_norm": 0.6134266564647639, "learning_rate": 1.404102623447462e-07, "loss": 0.2666, "step": 38255 }, { "epoch": 1.792101934698084, "grad_norm": 0.5704894588104956, "learning_rate": 1.4034760595734915e-07, "loss": 0.2631, "step": 38256 }, { "epoch": 1.7921487796880124, "grad_norm": 0.5972064586983569, "learning_rate": 1.4028496314915847e-07, "loss": 0.2663, "step": 38257 }, { "epoch": 1.7921956246779407, "grad_norm": 0.6016094763255411, "learning_rate": 1.4022233392053525e-07, "loss": 0.2832, "step": 38258 }, { "epoch": 1.792242469667869, "grad_norm": 0.6136242822060871, "learning_rate": 1.4015971827183923e-07, "loss": 0.2619, "step": 38259 }, { "epoch": 1.7922893146577974, "grad_norm": 0.5756833510804933, "learning_rate": 1.4009711620343064e-07, "loss": 0.2637, "step": 38260 }, { "epoch": 1.7923361596477256, "grad_norm": 0.5975151643464544, "learning_rate": 1.4003452771567034e-07, "loss": 0.2648, "step": 38261 }, { "epoch": 1.7923830046376539, "grad_norm": 0.5571089384057317, "learning_rate": 1.3997195280891802e-07, "loss": 0.263, "step": 38262 }, { "epoch": 1.7924298496275823, "grad_norm": 0.5763724684866591, "learning_rate": 1.3990939148353395e-07, "loss": 0.2584, "step": 38263 }, { "epoch": 1.7924766946175108, "grad_norm": 0.5915253921870145, "learning_rate": 1.3984684373987783e-07, "loss": 0.2847, "step": 38264 }, { "epoch": 1.7925235396074388, "grad_norm": 0.6104994974315815, "learning_rate": 1.397843095783105e-07, "loss": 0.2859, "step": 38265 }, { "epoch": 1.7925703845973673, "grad_norm": 0.5588993042864071, "learning_rate": 1.3972178899919086e-07, "loss": 0.2625, "step": 38266 }, { "epoch": 1.7926172295872957, "grad_norm": 0.6097420103929034, "learning_rate": 1.3965928200287915e-07, "loss": 0.253, "step": 38267 }, { "epoch": 1.792664074577224, "grad_norm": 0.5789561114870108, "learning_rate": 1.3959678858973536e-07, "loss": 0.2701, "step": 38268 }, { "epoch": 1.7927109195671522, "grad_norm": 0.5792939858646233, "learning_rate": 1.3953430876011836e-07, "loss": 0.2625, "step": 38269 }, { "epoch": 1.7927577645570807, "grad_norm": 0.5869271247873528, "learning_rate": 1.3947184251438816e-07, "loss": 0.2681, "step": 38270 }, { "epoch": 1.792804609547009, "grad_norm": 0.6566102893668612, "learning_rate": 1.3940938985290475e-07, "loss": 0.2939, "step": 38271 }, { "epoch": 1.7928514545369372, "grad_norm": 0.6263730103561256, "learning_rate": 1.3934695077602645e-07, "loss": 0.2767, "step": 38272 }, { "epoch": 1.7928982995268656, "grad_norm": 0.5894015614292626, "learning_rate": 1.3928452528411295e-07, "loss": 0.2688, "step": 38273 }, { "epoch": 1.7929451445167939, "grad_norm": 0.6019273869056322, "learning_rate": 1.392221133775243e-07, "loss": 0.2702, "step": 38274 }, { "epoch": 1.7929919895067221, "grad_norm": 0.6712522180691357, "learning_rate": 1.3915971505661873e-07, "loss": 0.2915, "step": 38275 }, { "epoch": 1.7930388344966506, "grad_norm": 0.5931499559835554, "learning_rate": 1.390973303217555e-07, "loss": 0.2639, "step": 38276 }, { "epoch": 1.793085679486579, "grad_norm": 0.5338161009106636, "learning_rate": 1.3903495917329395e-07, "loss": 0.2642, "step": 38277 }, { "epoch": 1.7931325244765073, "grad_norm": 0.5650536912598126, "learning_rate": 1.3897260161159248e-07, "loss": 0.2676, "step": 38278 }, { "epoch": 1.7931793694664355, "grad_norm": 0.5760206110177978, "learning_rate": 1.389102576370105e-07, "loss": 0.259, "step": 38279 }, { "epoch": 1.793226214456364, "grad_norm": 0.5567613032667884, "learning_rate": 1.3884792724990632e-07, "loss": 0.2606, "step": 38280 }, { "epoch": 1.7932730594462922, "grad_norm": 0.5701183587814297, "learning_rate": 1.3878561045063937e-07, "loss": 0.26, "step": 38281 }, { "epoch": 1.7933199044362205, "grad_norm": 0.5785516810404674, "learning_rate": 1.3872330723956746e-07, "loss": 0.2672, "step": 38282 }, { "epoch": 1.793366749426149, "grad_norm": 0.592929611248769, "learning_rate": 1.386610176170497e-07, "loss": 0.2809, "step": 38283 }, { "epoch": 1.7934135944160772, "grad_norm": 0.6417226272292913, "learning_rate": 1.385987415834439e-07, "loss": 0.2951, "step": 38284 }, { "epoch": 1.7934604394060054, "grad_norm": 0.5676345655379442, "learning_rate": 1.3853647913910863e-07, "loss": 0.2587, "step": 38285 }, { "epoch": 1.7935072843959339, "grad_norm": 0.5780958786808575, "learning_rate": 1.3847423028440276e-07, "loss": 0.2737, "step": 38286 }, { "epoch": 1.7935541293858623, "grad_norm": 0.621261464660309, "learning_rate": 1.3841199501968382e-07, "loss": 0.2765, "step": 38287 }, { "epoch": 1.7936009743757904, "grad_norm": 0.575535032519594, "learning_rate": 1.3834977334531097e-07, "loss": 0.2654, "step": 38288 }, { "epoch": 1.7936478193657188, "grad_norm": 0.6073026748967321, "learning_rate": 1.382875652616414e-07, "loss": 0.2752, "step": 38289 }, { "epoch": 1.7936946643556473, "grad_norm": 0.5935465426852489, "learning_rate": 1.3822537076903286e-07, "loss": 0.2666, "step": 38290 }, { "epoch": 1.7937415093455755, "grad_norm": 0.5769168245565848, "learning_rate": 1.3816318986784371e-07, "loss": 0.2463, "step": 38291 }, { "epoch": 1.7937883543355038, "grad_norm": 0.601447346874883, "learning_rate": 1.38101022558432e-07, "loss": 0.2702, "step": 38292 }, { "epoch": 1.7938351993254322, "grad_norm": 0.5522410109984931, "learning_rate": 1.3803886884115518e-07, "loss": 0.2609, "step": 38293 }, { "epoch": 1.7938820443153605, "grad_norm": 0.5846968263403324, "learning_rate": 1.379767287163708e-07, "loss": 0.27, "step": 38294 }, { "epoch": 1.7939288893052887, "grad_norm": 0.6085097825286448, "learning_rate": 1.3791460218443741e-07, "loss": 0.2856, "step": 38295 }, { "epoch": 1.7939757342952172, "grad_norm": 0.5762331192611492, "learning_rate": 1.3785248924571143e-07, "loss": 0.262, "step": 38296 }, { "epoch": 1.7940225792851454, "grad_norm": 0.5596979852195038, "learning_rate": 1.377903899005506e-07, "loss": 0.2525, "step": 38297 }, { "epoch": 1.7940694242750737, "grad_norm": 0.6425265688711813, "learning_rate": 1.3772830414931215e-07, "loss": 0.2944, "step": 38298 }, { "epoch": 1.7941162692650021, "grad_norm": 0.5812141249176416, "learning_rate": 1.3766623199235384e-07, "loss": 0.2653, "step": 38299 }, { "epoch": 1.7941631142549306, "grad_norm": 0.6027469015114699, "learning_rate": 1.3760417343003234e-07, "loss": 0.2743, "step": 38300 }, { "epoch": 1.7942099592448586, "grad_norm": 0.6021915310021884, "learning_rate": 1.3754212846270542e-07, "loss": 0.2774, "step": 38301 }, { "epoch": 1.794256804234787, "grad_norm": 0.5857052360241701, "learning_rate": 1.3748009709072945e-07, "loss": 0.2754, "step": 38302 }, { "epoch": 1.7943036492247155, "grad_norm": 0.5715092580240317, "learning_rate": 1.3741807931446221e-07, "loss": 0.2761, "step": 38303 }, { "epoch": 1.7943504942146438, "grad_norm": 0.6044178053242866, "learning_rate": 1.3735607513425981e-07, "loss": 0.2856, "step": 38304 }, { "epoch": 1.794397339204572, "grad_norm": 0.558774943761141, "learning_rate": 1.3729408455047917e-07, "loss": 0.2582, "step": 38305 }, { "epoch": 1.7944441841945005, "grad_norm": 0.5953097209432688, "learning_rate": 1.3723210756347722e-07, "loss": 0.2661, "step": 38306 }, { "epoch": 1.7944910291844287, "grad_norm": 0.6240858421562561, "learning_rate": 1.3717014417361118e-07, "loss": 0.2877, "step": 38307 }, { "epoch": 1.794537874174357, "grad_norm": 0.5715467790830157, "learning_rate": 1.3710819438123661e-07, "loss": 0.2477, "step": 38308 }, { "epoch": 1.7945847191642854, "grad_norm": 0.5862599722312377, "learning_rate": 1.3704625818671047e-07, "loss": 0.2843, "step": 38309 }, { "epoch": 1.7946315641542137, "grad_norm": 0.5876148052098324, "learning_rate": 1.3698433559038938e-07, "loss": 0.275, "step": 38310 }, { "epoch": 1.794678409144142, "grad_norm": 0.6336540658947675, "learning_rate": 1.3692242659262943e-07, "loss": 0.2624, "step": 38311 }, { "epoch": 1.7947252541340704, "grad_norm": 0.6544445725028435, "learning_rate": 1.3686053119378707e-07, "loss": 0.2672, "step": 38312 }, { "epoch": 1.7947720991239988, "grad_norm": 0.581269446388141, "learning_rate": 1.3679864939421861e-07, "loss": 0.2721, "step": 38313 }, { "epoch": 1.794818944113927, "grad_norm": 0.6139586913822823, "learning_rate": 1.3673678119427936e-07, "loss": 0.2539, "step": 38314 }, { "epoch": 1.7948657891038553, "grad_norm": 0.599589156864659, "learning_rate": 1.3667492659432625e-07, "loss": 0.2825, "step": 38315 }, { "epoch": 1.7949126340937838, "grad_norm": 0.5572050015335738, "learning_rate": 1.366130855947148e-07, "loss": 0.2503, "step": 38316 }, { "epoch": 1.794959479083712, "grad_norm": 0.5756929780494473, "learning_rate": 1.3655125819580146e-07, "loss": 0.2673, "step": 38317 }, { "epoch": 1.7950063240736402, "grad_norm": 0.6353740275230512, "learning_rate": 1.3648944439794142e-07, "loss": 0.2644, "step": 38318 }, { "epoch": 1.7950531690635687, "grad_norm": 0.569799520295325, "learning_rate": 1.3642764420149057e-07, "loss": 0.2605, "step": 38319 }, { "epoch": 1.795100014053497, "grad_norm": 0.5831354779873955, "learning_rate": 1.3636585760680443e-07, "loss": 0.2734, "step": 38320 }, { "epoch": 1.7951468590434252, "grad_norm": 0.6231807347354756, "learning_rate": 1.3630408461423882e-07, "loss": 0.2755, "step": 38321 }, { "epoch": 1.7951937040333537, "grad_norm": 0.5643335824124729, "learning_rate": 1.3624232522414876e-07, "loss": 0.253, "step": 38322 }, { "epoch": 1.7952405490232821, "grad_norm": 0.6554116747822343, "learning_rate": 1.3618057943689062e-07, "loss": 0.2806, "step": 38323 }, { "epoch": 1.7952873940132101, "grad_norm": 0.6180555654342033, "learning_rate": 1.3611884725281882e-07, "loss": 0.2719, "step": 38324 }, { "epoch": 1.7953342390031386, "grad_norm": 0.5316592305903749, "learning_rate": 1.3605712867228894e-07, "loss": 0.2538, "step": 38325 }, { "epoch": 1.795381083993067, "grad_norm": 0.5735568927190644, "learning_rate": 1.3599542369565622e-07, "loss": 0.2663, "step": 38326 }, { "epoch": 1.7954279289829953, "grad_norm": 0.6037977436485118, "learning_rate": 1.359337323232754e-07, "loss": 0.2748, "step": 38327 }, { "epoch": 1.7954747739729235, "grad_norm": 0.6018801237141734, "learning_rate": 1.3587205455550173e-07, "loss": 0.2731, "step": 38328 }, { "epoch": 1.795521618962852, "grad_norm": 0.5814876122826662, "learning_rate": 1.3581039039269023e-07, "loss": 0.2581, "step": 38329 }, { "epoch": 1.7955684639527802, "grad_norm": 0.5954860410133239, "learning_rate": 1.3574873983519615e-07, "loss": 0.2638, "step": 38330 }, { "epoch": 1.7956153089427085, "grad_norm": 0.5636490669277574, "learning_rate": 1.356871028833734e-07, "loss": 0.257, "step": 38331 }, { "epoch": 1.795662153932637, "grad_norm": 0.5534496424106817, "learning_rate": 1.356254795375775e-07, "loss": 0.247, "step": 38332 }, { "epoch": 1.7957089989225652, "grad_norm": 0.5552730605247196, "learning_rate": 1.3556386979816233e-07, "loss": 0.2521, "step": 38333 }, { "epoch": 1.7957558439124934, "grad_norm": 0.6147805436840018, "learning_rate": 1.355022736654829e-07, "loss": 0.279, "step": 38334 }, { "epoch": 1.795802688902422, "grad_norm": 0.6136382577166716, "learning_rate": 1.3544069113989339e-07, "loss": 0.2921, "step": 38335 }, { "epoch": 1.7958495338923504, "grad_norm": 0.5498841228090792, "learning_rate": 1.3537912222174876e-07, "loss": 0.2572, "step": 38336 }, { "epoch": 1.7958963788822784, "grad_norm": 0.6737840144118845, "learning_rate": 1.353175669114029e-07, "loss": 0.2958, "step": 38337 }, { "epoch": 1.7959432238722068, "grad_norm": 0.5620086662507583, "learning_rate": 1.3525602520921027e-07, "loss": 0.2607, "step": 38338 }, { "epoch": 1.7959900688621353, "grad_norm": 0.5924311386144091, "learning_rate": 1.3519449711552446e-07, "loss": 0.2539, "step": 38339 }, { "epoch": 1.7960369138520635, "grad_norm": 0.6133315551327765, "learning_rate": 1.351329826307002e-07, "loss": 0.2612, "step": 38340 }, { "epoch": 1.7960837588419918, "grad_norm": 0.561005533610624, "learning_rate": 1.3507148175509104e-07, "loss": 0.2591, "step": 38341 }, { "epoch": 1.7961306038319202, "grad_norm": 0.5563599841546116, "learning_rate": 1.3500999448905096e-07, "loss": 0.2683, "step": 38342 }, { "epoch": 1.7961774488218485, "grad_norm": 0.6219685917929333, "learning_rate": 1.349485208329343e-07, "loss": 0.2674, "step": 38343 }, { "epoch": 1.7962242938117767, "grad_norm": 0.5789960607449743, "learning_rate": 1.3488706078709445e-07, "loss": 0.2835, "step": 38344 }, { "epoch": 1.7962711388017052, "grad_norm": 0.5994430990369809, "learning_rate": 1.348256143518853e-07, "loss": 0.2673, "step": 38345 }, { "epoch": 1.7963179837916334, "grad_norm": 0.6086054621166697, "learning_rate": 1.3476418152765986e-07, "loss": 0.2849, "step": 38346 }, { "epoch": 1.7963648287815617, "grad_norm": 0.5807363838822183, "learning_rate": 1.3470276231477231e-07, "loss": 0.2527, "step": 38347 }, { "epoch": 1.7964116737714901, "grad_norm": 0.6637228199884344, "learning_rate": 1.3464135671357543e-07, "loss": 0.2912, "step": 38348 }, { "epoch": 1.7964585187614186, "grad_norm": 0.6197189964979968, "learning_rate": 1.3457996472442337e-07, "loss": 0.2779, "step": 38349 }, { "epoch": 1.7965053637513468, "grad_norm": 0.5893374907177016, "learning_rate": 1.3451858634766946e-07, "loss": 0.2535, "step": 38350 }, { "epoch": 1.796552208741275, "grad_norm": 0.6197606837642075, "learning_rate": 1.3445722158366592e-07, "loss": 0.2761, "step": 38351 }, { "epoch": 1.7965990537312035, "grad_norm": 0.5784689194795148, "learning_rate": 1.343958704327672e-07, "loss": 0.2664, "step": 38352 }, { "epoch": 1.7966458987211318, "grad_norm": 0.6056926308789492, "learning_rate": 1.3433453289532493e-07, "loss": 0.2748, "step": 38353 }, { "epoch": 1.79669274371106, "grad_norm": 0.5975054850445207, "learning_rate": 1.3427320897169305e-07, "loss": 0.2748, "step": 38354 }, { "epoch": 1.7967395887009885, "grad_norm": 0.6979708454607152, "learning_rate": 1.342118986622243e-07, "loss": 0.2971, "step": 38355 }, { "epoch": 1.7967864336909167, "grad_norm": 0.6326776562697315, "learning_rate": 1.3415060196727169e-07, "loss": 0.2814, "step": 38356 }, { "epoch": 1.796833278680845, "grad_norm": 0.5964953159534649, "learning_rate": 1.3408931888718752e-07, "loss": 0.2625, "step": 38357 }, { "epoch": 1.7968801236707734, "grad_norm": 0.6118630189448707, "learning_rate": 1.340280494223245e-07, "loss": 0.2761, "step": 38358 }, { "epoch": 1.796926968660702, "grad_norm": 0.6257019397986024, "learning_rate": 1.33966793573036e-07, "loss": 0.2679, "step": 38359 }, { "epoch": 1.79697381365063, "grad_norm": 0.6279163497967942, "learning_rate": 1.3390555133967336e-07, "loss": 0.2766, "step": 38360 }, { "epoch": 1.7970206586405584, "grad_norm": 0.6158663402186423, "learning_rate": 1.338443227225897e-07, "loss": 0.2589, "step": 38361 }, { "epoch": 1.7970675036304868, "grad_norm": 0.5793810683149325, "learning_rate": 1.3378310772213743e-07, "loss": 0.2695, "step": 38362 }, { "epoch": 1.797114348620415, "grad_norm": 0.5973888406062262, "learning_rate": 1.3372190633866856e-07, "loss": 0.2526, "step": 38363 }, { "epoch": 1.7971611936103433, "grad_norm": 0.643711617427548, "learning_rate": 1.3366071857253498e-07, "loss": 0.266, "step": 38364 }, { "epoch": 1.7972080386002718, "grad_norm": 0.5730591946396331, "learning_rate": 1.335995444240895e-07, "loss": 0.2583, "step": 38365 }, { "epoch": 1.7972548835902, "grad_norm": 0.6203697352401577, "learning_rate": 1.3353838389368435e-07, "loss": 0.2762, "step": 38366 }, { "epoch": 1.7973017285801283, "grad_norm": 0.6293480987291845, "learning_rate": 1.3347723698167032e-07, "loss": 0.2886, "step": 38367 }, { "epoch": 1.7973485735700567, "grad_norm": 0.560741276257405, "learning_rate": 1.3341610368840047e-07, "loss": 0.2693, "step": 38368 }, { "epoch": 1.797395418559985, "grad_norm": 0.5775168794841472, "learning_rate": 1.3335498401422593e-07, "loss": 0.2644, "step": 38369 }, { "epoch": 1.7974422635499132, "grad_norm": 0.6062046973541761, "learning_rate": 1.3329387795949834e-07, "loss": 0.278, "step": 38370 }, { "epoch": 1.7974891085398417, "grad_norm": 0.6292847357347249, "learning_rate": 1.3323278552456991e-07, "loss": 0.2798, "step": 38371 }, { "epoch": 1.7975359535297701, "grad_norm": 0.6365573775814549, "learning_rate": 1.3317170670979208e-07, "loss": 0.2704, "step": 38372 }, { "epoch": 1.7975827985196982, "grad_norm": 0.6224837797891615, "learning_rate": 1.331106415155159e-07, "loss": 0.2723, "step": 38373 }, { "epoch": 1.7976296435096266, "grad_norm": 0.5690488243161824, "learning_rate": 1.330495899420936e-07, "loss": 0.2639, "step": 38374 }, { "epoch": 1.797676488499555, "grad_norm": 0.6286877800431945, "learning_rate": 1.3298855198987548e-07, "loss": 0.2561, "step": 38375 }, { "epoch": 1.7977233334894833, "grad_norm": 0.5926836259633501, "learning_rate": 1.3292752765921317e-07, "loss": 0.2776, "step": 38376 }, { "epoch": 1.7977701784794116, "grad_norm": 0.6201893433903841, "learning_rate": 1.3286651695045838e-07, "loss": 0.2811, "step": 38377 }, { "epoch": 1.79781702346934, "grad_norm": 0.6009819083902838, "learning_rate": 1.3280551986396162e-07, "loss": 0.2688, "step": 38378 }, { "epoch": 1.7978638684592683, "grad_norm": 0.5651061306634572, "learning_rate": 1.3274453640007428e-07, "loss": 0.2589, "step": 38379 }, { "epoch": 1.7979107134491965, "grad_norm": 0.6283328000638099, "learning_rate": 1.326835665591475e-07, "loss": 0.272, "step": 38380 }, { "epoch": 1.797957558439125, "grad_norm": 0.5764830463886775, "learning_rate": 1.3262261034153123e-07, "loss": 0.2522, "step": 38381 }, { "epoch": 1.7980044034290532, "grad_norm": 0.5895292718400466, "learning_rate": 1.3256166774757688e-07, "loss": 0.2606, "step": 38382 }, { "epoch": 1.7980512484189815, "grad_norm": 0.5940184148262008, "learning_rate": 1.3250073877763504e-07, "loss": 0.2642, "step": 38383 }, { "epoch": 1.79809809340891, "grad_norm": 0.5541385792153176, "learning_rate": 1.3243982343205648e-07, "loss": 0.2581, "step": 38384 }, { "epoch": 1.7981449383988384, "grad_norm": 0.6448030268295781, "learning_rate": 1.323789217111915e-07, "loss": 0.2897, "step": 38385 }, { "epoch": 1.7981917833887666, "grad_norm": 0.5961671645798949, "learning_rate": 1.3231803361539148e-07, "loss": 0.2729, "step": 38386 }, { "epoch": 1.7982386283786949, "grad_norm": 0.5841259383069823, "learning_rate": 1.322571591450056e-07, "loss": 0.264, "step": 38387 }, { "epoch": 1.7982854733686233, "grad_norm": 0.5850250736744305, "learning_rate": 1.3219629830038467e-07, "loss": 0.2566, "step": 38388 }, { "epoch": 1.7983323183585516, "grad_norm": 0.5729182688753075, "learning_rate": 1.3213545108187868e-07, "loss": 0.2614, "step": 38389 }, { "epoch": 1.7983791633484798, "grad_norm": 0.6125238517980383, "learning_rate": 1.3207461748983824e-07, "loss": 0.2889, "step": 38390 }, { "epoch": 1.7984260083384083, "grad_norm": 0.5977173620250107, "learning_rate": 1.3201379752461302e-07, "loss": 0.269, "step": 38391 }, { "epoch": 1.7984728533283365, "grad_norm": 0.578010270178775, "learning_rate": 1.3195299118655386e-07, "loss": 0.2633, "step": 38392 }, { "epoch": 1.7985196983182647, "grad_norm": 0.5611517610688863, "learning_rate": 1.3189219847600936e-07, "loss": 0.2584, "step": 38393 }, { "epoch": 1.7985665433081932, "grad_norm": 0.5855358705045574, "learning_rate": 1.3183141939333065e-07, "loss": 0.2507, "step": 38394 }, { "epoch": 1.7986133882981217, "grad_norm": 0.608874341824411, "learning_rate": 1.3177065393886633e-07, "loss": 0.2634, "step": 38395 }, { "epoch": 1.7986602332880497, "grad_norm": 0.597707520456687, "learning_rate": 1.3170990211296698e-07, "loss": 0.2727, "step": 38396 }, { "epoch": 1.7987070782779782, "grad_norm": 0.6086369764001638, "learning_rate": 1.316491639159817e-07, "loss": 0.2662, "step": 38397 }, { "epoch": 1.7987539232679066, "grad_norm": 0.5497370169861179, "learning_rate": 1.3158843934826027e-07, "loss": 0.2398, "step": 38398 }, { "epoch": 1.7988007682578349, "grad_norm": 0.6352298540252368, "learning_rate": 1.315277284101524e-07, "loss": 0.2579, "step": 38399 }, { "epoch": 1.798847613247763, "grad_norm": 0.5866807198472548, "learning_rate": 1.3146703110200694e-07, "loss": 0.2679, "step": 38400 }, { "epoch": 1.7988944582376916, "grad_norm": 0.5773932573447034, "learning_rate": 1.3140634742417363e-07, "loss": 0.2543, "step": 38401 }, { "epoch": 1.7989413032276198, "grad_norm": 0.6231714711456625, "learning_rate": 1.3134567737700111e-07, "loss": 0.2668, "step": 38402 }, { "epoch": 1.798988148217548, "grad_norm": 0.5658161513704685, "learning_rate": 1.3128502096083878e-07, "loss": 0.2617, "step": 38403 }, { "epoch": 1.7990349932074765, "grad_norm": 0.6365067041486339, "learning_rate": 1.3122437817603585e-07, "loss": 0.2762, "step": 38404 }, { "epoch": 1.7990818381974047, "grad_norm": 0.617681921388483, "learning_rate": 1.3116374902294171e-07, "loss": 0.2591, "step": 38405 }, { "epoch": 1.799128683187333, "grad_norm": 0.5852759200211627, "learning_rate": 1.3110313350190446e-07, "loss": 0.2672, "step": 38406 }, { "epoch": 1.7991755281772615, "grad_norm": 0.5640827807747198, "learning_rate": 1.3104253161327325e-07, "loss": 0.2532, "step": 38407 }, { "epoch": 1.79922237316719, "grad_norm": 0.6394409275223062, "learning_rate": 1.3098194335739722e-07, "loss": 0.2766, "step": 38408 }, { "epoch": 1.799269218157118, "grad_norm": 0.5894244660074459, "learning_rate": 1.3092136873462418e-07, "loss": 0.255, "step": 38409 }, { "epoch": 1.7993160631470464, "grad_norm": 0.5634977982540024, "learning_rate": 1.308608077453033e-07, "loss": 0.2608, "step": 38410 }, { "epoch": 1.7993629081369749, "grad_norm": 0.5856399137085911, "learning_rate": 1.3080026038978344e-07, "loss": 0.2756, "step": 38411 }, { "epoch": 1.799409753126903, "grad_norm": 0.6155152125562658, "learning_rate": 1.307397266684124e-07, "loss": 0.2736, "step": 38412 }, { "epoch": 1.7994565981168313, "grad_norm": 0.6044687726200215, "learning_rate": 1.306792065815385e-07, "loss": 0.2518, "step": 38413 }, { "epoch": 1.7995034431067598, "grad_norm": 0.5560839149883742, "learning_rate": 1.306187001295106e-07, "loss": 0.2528, "step": 38414 }, { "epoch": 1.799550288096688, "grad_norm": 0.5930977486190927, "learning_rate": 1.3055820731267626e-07, "loss": 0.2731, "step": 38415 }, { "epoch": 1.7995971330866163, "grad_norm": 0.699737581723133, "learning_rate": 1.3049772813138378e-07, "loss": 0.2809, "step": 38416 }, { "epoch": 1.7996439780765447, "grad_norm": 0.5707503285493931, "learning_rate": 1.3043726258598204e-07, "loss": 0.2611, "step": 38417 }, { "epoch": 1.799690823066473, "grad_norm": 0.59978895742893, "learning_rate": 1.3037681067681744e-07, "loss": 0.2696, "step": 38418 }, { "epoch": 1.7997376680564012, "grad_norm": 0.636707849214421, "learning_rate": 1.3031637240423916e-07, "loss": 0.2826, "step": 38419 }, { "epoch": 1.7997845130463297, "grad_norm": 0.5729000684967372, "learning_rate": 1.302559477685944e-07, "loss": 0.266, "step": 38420 }, { "epoch": 1.7998313580362582, "grad_norm": 0.6044950708758785, "learning_rate": 1.3019553677023122e-07, "loss": 0.2575, "step": 38421 }, { "epoch": 1.7998782030261864, "grad_norm": 0.5973085424323751, "learning_rate": 1.3013513940949686e-07, "loss": 0.2725, "step": 38422 }, { "epoch": 1.7999250480161146, "grad_norm": 0.6560285672736771, "learning_rate": 1.300747556867396e-07, "loss": 0.2836, "step": 38423 }, { "epoch": 1.799971893006043, "grad_norm": 0.5930271572175968, "learning_rate": 1.3001438560230616e-07, "loss": 0.2777, "step": 38424 }, { "epoch": 1.8000187379959713, "grad_norm": 0.6339707178672866, "learning_rate": 1.2995402915654405e-07, "loss": 0.2809, "step": 38425 }, { "epoch": 1.8000655829858996, "grad_norm": 0.5558931951351072, "learning_rate": 1.2989368634980098e-07, "loss": 0.2646, "step": 38426 }, { "epoch": 1.800112427975828, "grad_norm": 0.5965588206936402, "learning_rate": 1.2983335718242397e-07, "loss": 0.2833, "step": 38427 }, { "epoch": 1.8001592729657563, "grad_norm": 0.5873255258443735, "learning_rate": 1.2977304165476046e-07, "loss": 0.2643, "step": 38428 }, { "epoch": 1.8002061179556845, "grad_norm": 0.571179793606713, "learning_rate": 1.2971273976715742e-07, "loss": 0.2805, "step": 38429 }, { "epoch": 1.800252962945613, "grad_norm": 0.6234320590411239, "learning_rate": 1.2965245151996152e-07, "loss": 0.2645, "step": 38430 }, { "epoch": 1.8002998079355415, "grad_norm": 0.5711940447524307, "learning_rate": 1.2959217691352029e-07, "loss": 0.2632, "step": 38431 }, { "epoch": 1.8003466529254695, "grad_norm": 0.6470518829105544, "learning_rate": 1.2953191594817975e-07, "loss": 0.2729, "step": 38432 }, { "epoch": 1.800393497915398, "grad_norm": 0.5932909703214561, "learning_rate": 1.294716686242878e-07, "loss": 0.2726, "step": 38433 }, { "epoch": 1.8004403429053264, "grad_norm": 0.5775410785358082, "learning_rate": 1.2941143494219017e-07, "loss": 0.2442, "step": 38434 }, { "epoch": 1.8004871878952546, "grad_norm": 0.5899422939277678, "learning_rate": 1.293512149022344e-07, "loss": 0.2604, "step": 38435 }, { "epoch": 1.8005340328851829, "grad_norm": 0.6179801934869176, "learning_rate": 1.2929100850476662e-07, "loss": 0.2682, "step": 38436 }, { "epoch": 1.8005808778751113, "grad_norm": 0.5895010633996108, "learning_rate": 1.2923081575013263e-07, "loss": 0.2513, "step": 38437 }, { "epoch": 1.8006277228650396, "grad_norm": 0.6022126168707184, "learning_rate": 1.291706366386797e-07, "loss": 0.2717, "step": 38438 }, { "epoch": 1.8006745678549678, "grad_norm": 0.6138256084219139, "learning_rate": 1.2911047117075386e-07, "loss": 0.2844, "step": 38439 }, { "epoch": 1.8007214128448963, "grad_norm": 0.602630183722802, "learning_rate": 1.290503193467013e-07, "loss": 0.28, "step": 38440 }, { "epoch": 1.8007682578348245, "grad_norm": 0.5898921844652135, "learning_rate": 1.289901811668684e-07, "loss": 0.2726, "step": 38441 }, { "epoch": 1.8008151028247528, "grad_norm": 0.5694713042859375, "learning_rate": 1.2893005663160096e-07, "loss": 0.2553, "step": 38442 }, { "epoch": 1.8008619478146812, "grad_norm": 0.5692608807598005, "learning_rate": 1.288699457412454e-07, "loss": 0.2661, "step": 38443 }, { "epoch": 1.8009087928046097, "grad_norm": 0.6188563673431994, "learning_rate": 1.2880984849614696e-07, "loss": 0.2884, "step": 38444 }, { "epoch": 1.8009556377945377, "grad_norm": 0.5704210330663414, "learning_rate": 1.287497648966518e-07, "loss": 0.2781, "step": 38445 }, { "epoch": 1.8010024827844662, "grad_norm": 0.5883545350387259, "learning_rate": 1.2868969494310573e-07, "loss": 0.2776, "step": 38446 }, { "epoch": 1.8010493277743946, "grad_norm": 0.5921191680708884, "learning_rate": 1.2862963863585516e-07, "loss": 0.2681, "step": 38447 }, { "epoch": 1.8010961727643229, "grad_norm": 0.5986442747879296, "learning_rate": 1.285695959752442e-07, "loss": 0.2713, "step": 38448 }, { "epoch": 1.8011430177542511, "grad_norm": 0.5867702954202648, "learning_rate": 1.2850956696161932e-07, "loss": 0.2666, "step": 38449 }, { "epoch": 1.8011898627441796, "grad_norm": 0.6028375946733, "learning_rate": 1.2844955159532628e-07, "loss": 0.2715, "step": 38450 }, { "epoch": 1.8012367077341078, "grad_norm": 0.6212092796034099, "learning_rate": 1.283895498767096e-07, "loss": 0.2725, "step": 38451 }, { "epoch": 1.801283552724036, "grad_norm": 0.5904207143427189, "learning_rate": 1.2832956180611504e-07, "loss": 0.2657, "step": 38452 }, { "epoch": 1.8013303977139645, "grad_norm": 0.5794263355201523, "learning_rate": 1.2826958738388794e-07, "loss": 0.2732, "step": 38453 }, { "epoch": 1.8013772427038928, "grad_norm": 0.5923152202797966, "learning_rate": 1.28209626610373e-07, "loss": 0.2706, "step": 38454 }, { "epoch": 1.801424087693821, "grad_norm": 0.6087701658378318, "learning_rate": 1.2814967948591577e-07, "loss": 0.2754, "step": 38455 }, { "epoch": 1.8014709326837495, "grad_norm": 0.5996564127383306, "learning_rate": 1.2808974601086072e-07, "loss": 0.2827, "step": 38456 }, { "epoch": 1.801517777673678, "grad_norm": 0.576158078876884, "learning_rate": 1.2802982618555339e-07, "loss": 0.2599, "step": 38457 }, { "epoch": 1.8015646226636062, "grad_norm": 0.6120168300237052, "learning_rate": 1.2796992001033793e-07, "loss": 0.2767, "step": 38458 }, { "epoch": 1.8016114676535344, "grad_norm": 0.5826769938345954, "learning_rate": 1.2791002748555963e-07, "loss": 0.2629, "step": 38459 }, { "epoch": 1.8016583126434629, "grad_norm": 0.6106299930664213, "learning_rate": 1.278501486115627e-07, "loss": 0.2642, "step": 38460 }, { "epoch": 1.8017051576333911, "grad_norm": 0.6179073739153386, "learning_rate": 1.2779028338869183e-07, "loss": 0.2599, "step": 38461 }, { "epoch": 1.8017520026233194, "grad_norm": 0.5893213088233066, "learning_rate": 1.277304318172917e-07, "loss": 0.2698, "step": 38462 }, { "epoch": 1.8017988476132478, "grad_norm": 0.7181040677076277, "learning_rate": 1.2767059389770713e-07, "loss": 0.278, "step": 38463 }, { "epoch": 1.801845692603176, "grad_norm": 0.5406762080382043, "learning_rate": 1.276107696302817e-07, "loss": 0.2568, "step": 38464 }, { "epoch": 1.8018925375931043, "grad_norm": 0.5809106743536647, "learning_rate": 1.275509590153598e-07, "loss": 0.2628, "step": 38465 }, { "epoch": 1.8019393825830328, "grad_norm": 0.5591861801939, "learning_rate": 1.274911620532865e-07, "loss": 0.2482, "step": 38466 }, { "epoch": 1.8019862275729612, "grad_norm": 0.6134423903914746, "learning_rate": 1.2743137874440452e-07, "loss": 0.2558, "step": 38467 }, { "epoch": 1.8020330725628892, "grad_norm": 0.6153936048683368, "learning_rate": 1.2737160908905895e-07, "loss": 0.2746, "step": 38468 }, { "epoch": 1.8020799175528177, "grad_norm": 0.5583082153110467, "learning_rate": 1.273118530875933e-07, "loss": 0.2605, "step": 38469 }, { "epoch": 1.8021267625427462, "grad_norm": 0.59423488606176, "learning_rate": 1.272521107403521e-07, "loss": 0.2697, "step": 38470 }, { "epoch": 1.8021736075326744, "grad_norm": 0.586093906308464, "learning_rate": 1.2719238204767808e-07, "loss": 0.2725, "step": 38471 }, { "epoch": 1.8022204525226027, "grad_norm": 0.5651462273756852, "learning_rate": 1.2713266700991573e-07, "loss": 0.263, "step": 38472 }, { "epoch": 1.8022672975125311, "grad_norm": 0.6030092173414571, "learning_rate": 1.2707296562740834e-07, "loss": 0.2538, "step": 38473 }, { "epoch": 1.8023141425024594, "grad_norm": 0.6129806948792065, "learning_rate": 1.2701327790049984e-07, "loss": 0.2586, "step": 38474 }, { "epoch": 1.8023609874923876, "grad_norm": 0.608645169336044, "learning_rate": 1.2695360382953325e-07, "loss": 0.2763, "step": 38475 }, { "epoch": 1.802407832482316, "grad_norm": 0.6551680419481071, "learning_rate": 1.2689394341485223e-07, "loss": 0.266, "step": 38476 }, { "epoch": 1.8024546774722443, "grad_norm": 0.6283486110715166, "learning_rate": 1.2683429665680036e-07, "loss": 0.2578, "step": 38477 }, { "epoch": 1.8025015224621725, "grad_norm": 0.5963855288675363, "learning_rate": 1.267746635557207e-07, "loss": 0.2829, "step": 38478 }, { "epoch": 1.802548367452101, "grad_norm": 0.5724939747147546, "learning_rate": 1.2671504411195606e-07, "loss": 0.2643, "step": 38479 }, { "epoch": 1.8025952124420295, "grad_norm": 0.6478720276135349, "learning_rate": 1.2665543832585002e-07, "loss": 0.2676, "step": 38480 }, { "epoch": 1.8026420574319575, "grad_norm": 0.6030204444844666, "learning_rate": 1.265958461977451e-07, "loss": 0.2742, "step": 38481 }, { "epoch": 1.802688902421886, "grad_norm": 0.5329996991380964, "learning_rate": 1.2653626772798467e-07, "loss": 0.2459, "step": 38482 }, { "epoch": 1.8027357474118144, "grad_norm": 0.6031845050803654, "learning_rate": 1.2647670291691145e-07, "loss": 0.2666, "step": 38483 }, { "epoch": 1.8027825924017427, "grad_norm": 0.6156480440840896, "learning_rate": 1.264171517648688e-07, "loss": 0.2757, "step": 38484 }, { "epoch": 1.802829437391671, "grad_norm": 0.6126029094607015, "learning_rate": 1.2635761427219867e-07, "loss": 0.2662, "step": 38485 }, { "epoch": 1.8028762823815994, "grad_norm": 0.5870242400307093, "learning_rate": 1.2629809043924357e-07, "loss": 0.2601, "step": 38486 }, { "epoch": 1.8029231273715276, "grad_norm": 0.6461507897325618, "learning_rate": 1.262385802663463e-07, "loss": 0.2852, "step": 38487 }, { "epoch": 1.8029699723614558, "grad_norm": 0.6264976781169654, "learning_rate": 1.261790837538493e-07, "loss": 0.2885, "step": 38488 }, { "epoch": 1.8030168173513843, "grad_norm": 0.5765647755392627, "learning_rate": 1.2611960090209512e-07, "loss": 0.2584, "step": 38489 }, { "epoch": 1.8030636623413125, "grad_norm": 0.5892358048301953, "learning_rate": 1.2606013171142655e-07, "loss": 0.2593, "step": 38490 }, { "epoch": 1.8031105073312408, "grad_norm": 0.5979179280684779, "learning_rate": 1.260006761821847e-07, "loss": 0.277, "step": 38491 }, { "epoch": 1.8031573523211692, "grad_norm": 0.5805107292351279, "learning_rate": 1.259412343147126e-07, "loss": 0.2537, "step": 38492 }, { "epoch": 1.8032041973110977, "grad_norm": 0.6215362385804083, "learning_rate": 1.258818061093517e-07, "loss": 0.2612, "step": 38493 }, { "epoch": 1.803251042301026, "grad_norm": 0.5811690165025146, "learning_rate": 1.2582239156644417e-07, "loss": 0.2731, "step": 38494 }, { "epoch": 1.8032978872909542, "grad_norm": 0.6058491067230148, "learning_rate": 1.2576299068633224e-07, "loss": 0.2755, "step": 38495 }, { "epoch": 1.8033447322808827, "grad_norm": 0.5842564501480445, "learning_rate": 1.2570360346935788e-07, "loss": 0.2515, "step": 38496 }, { "epoch": 1.803391577270811, "grad_norm": 0.6484906576216636, "learning_rate": 1.2564422991586223e-07, "loss": 0.2782, "step": 38497 }, { "epoch": 1.8034384222607391, "grad_norm": 0.5950373908073883, "learning_rate": 1.2558487002618692e-07, "loss": 0.266, "step": 38498 }, { "epoch": 1.8034852672506676, "grad_norm": 0.6297201049698132, "learning_rate": 1.2552552380067473e-07, "loss": 0.289, "step": 38499 }, { "epoch": 1.8035321122405958, "grad_norm": 0.6100438494920198, "learning_rate": 1.254661912396657e-07, "loss": 0.2744, "step": 38500 }, { "epoch": 1.803578957230524, "grad_norm": 0.5691848133383002, "learning_rate": 1.2540687234350203e-07, "loss": 0.2474, "step": 38501 }, { "epoch": 1.8036258022204525, "grad_norm": 0.6105503698068858, "learning_rate": 1.253475671125251e-07, "loss": 0.2689, "step": 38502 }, { "epoch": 1.803672647210381, "grad_norm": 0.6254725787490513, "learning_rate": 1.2528827554707608e-07, "loss": 0.2752, "step": 38503 }, { "epoch": 1.803719492200309, "grad_norm": 0.5806520352823769, "learning_rate": 1.2522899764749602e-07, "loss": 0.2693, "step": 38504 }, { "epoch": 1.8037663371902375, "grad_norm": 0.5955322819957359, "learning_rate": 1.251697334141261e-07, "loss": 0.2629, "step": 38505 }, { "epoch": 1.803813182180166, "grad_norm": 0.5935496150515518, "learning_rate": 1.2511048284730793e-07, "loss": 0.2547, "step": 38506 }, { "epoch": 1.8038600271700942, "grad_norm": 0.5863029402970328, "learning_rate": 1.2505124594738154e-07, "loss": 0.2739, "step": 38507 }, { "epoch": 1.8039068721600224, "grad_norm": 0.5988300738641251, "learning_rate": 1.249920227146889e-07, "loss": 0.2582, "step": 38508 }, { "epoch": 1.803953717149951, "grad_norm": 0.615165167122043, "learning_rate": 1.2493281314956972e-07, "loss": 0.2771, "step": 38509 }, { "epoch": 1.8040005621398791, "grad_norm": 0.6079897515043311, "learning_rate": 1.2487361725236513e-07, "loss": 0.2632, "step": 38510 }, { "epoch": 1.8040474071298074, "grad_norm": 0.5671474302639556, "learning_rate": 1.2481443502341624e-07, "loss": 0.2668, "step": 38511 }, { "epoch": 1.8040942521197358, "grad_norm": 0.6602700162447581, "learning_rate": 1.247552664630633e-07, "loss": 0.2826, "step": 38512 }, { "epoch": 1.804141097109664, "grad_norm": 0.6296705652578903, "learning_rate": 1.2469611157164663e-07, "loss": 0.2823, "step": 38513 }, { "epoch": 1.8041879420995923, "grad_norm": 0.635530730178548, "learning_rate": 1.2463697034950734e-07, "loss": 0.2838, "step": 38514 }, { "epoch": 1.8042347870895208, "grad_norm": 0.551006405393528, "learning_rate": 1.245778427969846e-07, "loss": 0.2506, "step": 38515 }, { "epoch": 1.8042816320794492, "grad_norm": 0.5802573669874654, "learning_rate": 1.2451872891441952e-07, "loss": 0.2661, "step": 38516 }, { "epoch": 1.8043284770693773, "grad_norm": 0.6031040661004007, "learning_rate": 1.244596287021521e-07, "loss": 0.2638, "step": 38517 }, { "epoch": 1.8043753220593057, "grad_norm": 0.6489524304757689, "learning_rate": 1.244005421605224e-07, "loss": 0.291, "step": 38518 }, { "epoch": 1.8044221670492342, "grad_norm": 0.6842586788189374, "learning_rate": 1.2434146928987063e-07, "loss": 0.2885, "step": 38519 }, { "epoch": 1.8044690120391624, "grad_norm": 0.6405803371611531, "learning_rate": 1.2428241009053682e-07, "loss": 0.2663, "step": 38520 }, { "epoch": 1.8045158570290907, "grad_norm": 0.5517248037215582, "learning_rate": 1.2422336456286044e-07, "loss": 0.2612, "step": 38521 }, { "epoch": 1.8045627020190191, "grad_norm": 0.5913729940850249, "learning_rate": 1.241643327071812e-07, "loss": 0.2576, "step": 38522 }, { "epoch": 1.8046095470089474, "grad_norm": 0.5798293397749088, "learning_rate": 1.2410531452383912e-07, "loss": 0.2791, "step": 38523 }, { "epoch": 1.8046563919988756, "grad_norm": 0.626814733530415, "learning_rate": 1.2404631001317391e-07, "loss": 0.2776, "step": 38524 }, { "epoch": 1.804703236988804, "grad_norm": 0.6029810857557167, "learning_rate": 1.2398731917552503e-07, "loss": 0.2701, "step": 38525 }, { "epoch": 1.8047500819787323, "grad_norm": 0.6135185930324288, "learning_rate": 1.239283420112322e-07, "loss": 0.2833, "step": 38526 }, { "epoch": 1.8047969269686606, "grad_norm": 0.6462481083235428, "learning_rate": 1.2386937852063463e-07, "loss": 0.2976, "step": 38527 }, { "epoch": 1.804843771958589, "grad_norm": 0.5823970346482452, "learning_rate": 1.2381042870407116e-07, "loss": 0.2688, "step": 38528 }, { "epoch": 1.8048906169485175, "grad_norm": 0.6155154573515683, "learning_rate": 1.2375149256188157e-07, "loss": 0.2687, "step": 38529 }, { "epoch": 1.8049374619384457, "grad_norm": 0.5479695754639746, "learning_rate": 1.2369257009440473e-07, "loss": 0.2496, "step": 38530 }, { "epoch": 1.804984306928374, "grad_norm": 0.5658458539605369, "learning_rate": 1.236336613019798e-07, "loss": 0.2722, "step": 38531 }, { "epoch": 1.8050311519183024, "grad_norm": 0.5630593092647403, "learning_rate": 1.235747661849465e-07, "loss": 0.2579, "step": 38532 }, { "epoch": 1.8050779969082307, "grad_norm": 0.5504509311934759, "learning_rate": 1.235158847436424e-07, "loss": 0.2488, "step": 38533 }, { "epoch": 1.805124841898159, "grad_norm": 0.5904627548964226, "learning_rate": 1.2345701697840773e-07, "loss": 0.2708, "step": 38534 }, { "epoch": 1.8051716868880874, "grad_norm": 0.5811814353532745, "learning_rate": 1.233981628895803e-07, "loss": 0.2509, "step": 38535 }, { "epoch": 1.8052185318780156, "grad_norm": 0.6062244152518426, "learning_rate": 1.2333932247749896e-07, "loss": 0.275, "step": 38536 }, { "epoch": 1.8052653768679439, "grad_norm": 0.6471583197860549, "learning_rate": 1.2328049574250238e-07, "loss": 0.2763, "step": 38537 }, { "epoch": 1.8053122218578723, "grad_norm": 0.6179632791170099, "learning_rate": 1.2322168268492945e-07, "loss": 0.2783, "step": 38538 }, { "epoch": 1.8053590668478008, "grad_norm": 0.587607473215347, "learning_rate": 1.231628833051185e-07, "loss": 0.2768, "step": 38539 }, { "epoch": 1.8054059118377288, "grad_norm": 0.594573926207886, "learning_rate": 1.2310409760340758e-07, "loss": 0.2734, "step": 38540 }, { "epoch": 1.8054527568276573, "grad_norm": 0.6201607262573476, "learning_rate": 1.2304532558013532e-07, "loss": 0.2845, "step": 38541 }, { "epoch": 1.8054996018175857, "grad_norm": 0.5971865923685902, "learning_rate": 1.2298656723563952e-07, "loss": 0.2702, "step": 38542 }, { "epoch": 1.805546446807514, "grad_norm": 0.60420338170753, "learning_rate": 1.229278225702585e-07, "loss": 0.2615, "step": 38543 }, { "epoch": 1.8055932917974422, "grad_norm": 0.6252052868789911, "learning_rate": 1.2286909158433057e-07, "loss": 0.2856, "step": 38544 }, { "epoch": 1.8056401367873707, "grad_norm": 0.6101143608914951, "learning_rate": 1.2281037427819387e-07, "loss": 0.2807, "step": 38545 }, { "epoch": 1.805686981777299, "grad_norm": 0.5747289547723883, "learning_rate": 1.2275167065218557e-07, "loss": 0.2668, "step": 38546 }, { "epoch": 1.8057338267672272, "grad_norm": 0.6209608363001711, "learning_rate": 1.2269298070664375e-07, "loss": 0.2678, "step": 38547 }, { "epoch": 1.8057806717571556, "grad_norm": 0.5683375637375703, "learning_rate": 1.2263430444190678e-07, "loss": 0.2531, "step": 38548 }, { "epoch": 1.8058275167470839, "grad_norm": 0.5954492758494898, "learning_rate": 1.2257564185831155e-07, "loss": 0.2699, "step": 38549 }, { "epoch": 1.805874361737012, "grad_norm": 0.6363337253282249, "learning_rate": 1.2251699295619618e-07, "loss": 0.2948, "step": 38550 }, { "epoch": 1.8059212067269406, "grad_norm": 0.5783971027913135, "learning_rate": 1.2245835773589814e-07, "loss": 0.2569, "step": 38551 }, { "epoch": 1.805968051716869, "grad_norm": 0.6147280813877266, "learning_rate": 1.2239973619775414e-07, "loss": 0.2665, "step": 38552 }, { "epoch": 1.806014896706797, "grad_norm": 0.6112026741921343, "learning_rate": 1.2234112834210222e-07, "loss": 0.282, "step": 38553 }, { "epoch": 1.8060617416967255, "grad_norm": 0.6252357181625205, "learning_rate": 1.2228253416927987e-07, "loss": 0.2681, "step": 38554 }, { "epoch": 1.806108586686654, "grad_norm": 0.6053635254812242, "learning_rate": 1.2222395367962354e-07, "loss": 0.2689, "step": 38555 }, { "epoch": 1.8061554316765822, "grad_norm": 0.6086441903411505, "learning_rate": 1.2216538687347068e-07, "loss": 0.2738, "step": 38556 }, { "epoch": 1.8062022766665105, "grad_norm": 0.5805494563922, "learning_rate": 1.2210683375115885e-07, "loss": 0.2667, "step": 38557 }, { "epoch": 1.806249121656439, "grad_norm": 0.5880737255241102, "learning_rate": 1.220482943130241e-07, "loss": 0.2638, "step": 38558 }, { "epoch": 1.8062959666463672, "grad_norm": 0.5918452748139381, "learning_rate": 1.2198976855940375e-07, "loss": 0.2655, "step": 38559 }, { "epoch": 1.8063428116362954, "grad_norm": 0.6108940042684035, "learning_rate": 1.2193125649063469e-07, "loss": 0.278, "step": 38560 }, { "epoch": 1.8063896566262239, "grad_norm": 0.6125521504292074, "learning_rate": 1.2187275810705363e-07, "loss": 0.2715, "step": 38561 }, { "epoch": 1.806436501616152, "grad_norm": 0.5832992982547087, "learning_rate": 1.2181427340899692e-07, "loss": 0.2752, "step": 38562 }, { "epoch": 1.8064833466060803, "grad_norm": 0.6603978549113695, "learning_rate": 1.2175580239680184e-07, "loss": 0.2867, "step": 38563 }, { "epoch": 1.8065301915960088, "grad_norm": 0.5552309690703244, "learning_rate": 1.2169734507080366e-07, "loss": 0.246, "step": 38564 }, { "epoch": 1.8065770365859373, "grad_norm": 0.5645962267780413, "learning_rate": 1.216389014313399e-07, "loss": 0.2624, "step": 38565 }, { "epoch": 1.8066238815758655, "grad_norm": 0.6098283945946066, "learning_rate": 1.2158047147874635e-07, "loss": 0.2709, "step": 38566 }, { "epoch": 1.8066707265657937, "grad_norm": 0.6120098107221115, "learning_rate": 1.215220552133592e-07, "loss": 0.2686, "step": 38567 }, { "epoch": 1.8067175715557222, "grad_norm": 0.6245422838246488, "learning_rate": 1.2146365263551534e-07, "loss": 0.2972, "step": 38568 }, { "epoch": 1.8067644165456505, "grad_norm": 0.5778079719556342, "learning_rate": 1.214052637455504e-07, "loss": 0.2697, "step": 38569 }, { "epoch": 1.8068112615355787, "grad_norm": 0.6200276795451863, "learning_rate": 1.213468885437999e-07, "loss": 0.2727, "step": 38570 }, { "epoch": 1.8068581065255072, "grad_norm": 0.5480457868882572, "learning_rate": 1.2128852703059996e-07, "loss": 0.2393, "step": 38571 }, { "epoch": 1.8069049515154354, "grad_norm": 0.592447945987567, "learning_rate": 1.2123017920628698e-07, "loss": 0.2785, "step": 38572 }, { "epoch": 1.8069517965053636, "grad_norm": 0.6010246077869491, "learning_rate": 1.2117184507119628e-07, "loss": 0.2793, "step": 38573 }, { "epoch": 1.806998641495292, "grad_norm": 0.5469444104646617, "learning_rate": 1.2111352462566396e-07, "loss": 0.2569, "step": 38574 }, { "epoch": 1.8070454864852206, "grad_norm": 0.5991870675299051, "learning_rate": 1.2105521787002556e-07, "loss": 0.2681, "step": 38575 }, { "epoch": 1.8070923314751486, "grad_norm": 0.621458497678896, "learning_rate": 1.209969248046164e-07, "loss": 0.2762, "step": 38576 }, { "epoch": 1.807139176465077, "grad_norm": 0.6082996562410604, "learning_rate": 1.2093864542977174e-07, "loss": 0.2643, "step": 38577 }, { "epoch": 1.8071860214550055, "grad_norm": 0.6094547295759317, "learning_rate": 1.2088037974582718e-07, "loss": 0.2589, "step": 38578 }, { "epoch": 1.8072328664449337, "grad_norm": 0.6063902191541676, "learning_rate": 1.2082212775311825e-07, "loss": 0.2615, "step": 38579 }, { "epoch": 1.807279711434862, "grad_norm": 0.6460153586761699, "learning_rate": 1.207638894519797e-07, "loss": 0.3013, "step": 38580 }, { "epoch": 1.8073265564247905, "grad_norm": 0.5653745362370034, "learning_rate": 1.2070566484274764e-07, "loss": 0.2589, "step": 38581 }, { "epoch": 1.8073734014147187, "grad_norm": 0.6028776258359226, "learning_rate": 1.2064745392575572e-07, "loss": 0.271, "step": 38582 }, { "epoch": 1.807420246404647, "grad_norm": 0.6074278292426176, "learning_rate": 1.2058925670134031e-07, "loss": 0.289, "step": 38583 }, { "epoch": 1.8074670913945754, "grad_norm": 0.6170530342449099, "learning_rate": 1.205310731698353e-07, "loss": 0.2676, "step": 38584 }, { "epoch": 1.8075139363845036, "grad_norm": 0.5892814309952464, "learning_rate": 1.2047290333157575e-07, "loss": 0.2612, "step": 38585 }, { "epoch": 1.8075607813744319, "grad_norm": 0.6094070441064412, "learning_rate": 1.2041474718689689e-07, "loss": 0.2778, "step": 38586 }, { "epoch": 1.8076076263643603, "grad_norm": 0.5956136575248769, "learning_rate": 1.203566047361332e-07, "loss": 0.2645, "step": 38587 }, { "epoch": 1.8076544713542888, "grad_norm": 0.5798294396646936, "learning_rate": 1.2029847597961887e-07, "loss": 0.2737, "step": 38588 }, { "epoch": 1.8077013163442168, "grad_norm": 0.618708362529504, "learning_rate": 1.202403609176886e-07, "loss": 0.2962, "step": 38589 }, { "epoch": 1.8077481613341453, "grad_norm": 0.6152218474579478, "learning_rate": 1.2018225955067713e-07, "loss": 0.2787, "step": 38590 }, { "epoch": 1.8077950063240737, "grad_norm": 0.5903510241441021, "learning_rate": 1.201241718789184e-07, "loss": 0.2813, "step": 38591 }, { "epoch": 1.807841851314002, "grad_norm": 0.5962079370674112, "learning_rate": 1.200660979027471e-07, "loss": 0.2686, "step": 38592 }, { "epoch": 1.8078886963039302, "grad_norm": 0.6053376746440202, "learning_rate": 1.200080376224974e-07, "loss": 0.2968, "step": 38593 }, { "epoch": 1.8079355412938587, "grad_norm": 0.6312619161872168, "learning_rate": 1.1994999103850297e-07, "loss": 0.2721, "step": 38594 }, { "epoch": 1.807982386283787, "grad_norm": 0.6109618801665194, "learning_rate": 1.1989195815109795e-07, "loss": 0.2708, "step": 38595 }, { "epoch": 1.8080292312737152, "grad_norm": 0.6025386605609762, "learning_rate": 1.1983393896061678e-07, "loss": 0.2786, "step": 38596 }, { "epoch": 1.8080760762636436, "grad_norm": 0.5889924543722993, "learning_rate": 1.1977593346739315e-07, "loss": 0.2587, "step": 38597 }, { "epoch": 1.8081229212535719, "grad_norm": 0.6303212482583628, "learning_rate": 1.1971794167176059e-07, "loss": 0.2655, "step": 38598 }, { "epoch": 1.8081697662435001, "grad_norm": 0.6025464964227909, "learning_rate": 1.1965996357405334e-07, "loss": 0.2662, "step": 38599 }, { "epoch": 1.8082166112334286, "grad_norm": 0.5820500591500244, "learning_rate": 1.1960199917460447e-07, "loss": 0.2704, "step": 38600 }, { "epoch": 1.808263456223357, "grad_norm": 0.5681949769059959, "learning_rate": 1.1954404847374756e-07, "loss": 0.2473, "step": 38601 }, { "epoch": 1.8083103012132853, "grad_norm": 0.6108040994848606, "learning_rate": 1.1948611147181654e-07, "loss": 0.2751, "step": 38602 }, { "epoch": 1.8083571462032135, "grad_norm": 0.6203901046771426, "learning_rate": 1.1942818816914504e-07, "loss": 0.2699, "step": 38603 }, { "epoch": 1.808403991193142, "grad_norm": 0.5844504071345197, "learning_rate": 1.1937027856606555e-07, "loss": 0.277, "step": 38604 }, { "epoch": 1.8084508361830702, "grad_norm": 0.6064135266828247, "learning_rate": 1.193123826629117e-07, "loss": 0.2681, "step": 38605 }, { "epoch": 1.8084976811729985, "grad_norm": 0.5815532945607088, "learning_rate": 1.1925450046001709e-07, "loss": 0.264, "step": 38606 }, { "epoch": 1.808544526162927, "grad_norm": 0.6102090764426494, "learning_rate": 1.1919663195771396e-07, "loss": 0.2826, "step": 38607 }, { "epoch": 1.8085913711528552, "grad_norm": 0.6061392037769039, "learning_rate": 1.1913877715633599e-07, "loss": 0.2653, "step": 38608 }, { "epoch": 1.8086382161427834, "grad_norm": 0.6124589528222756, "learning_rate": 1.190809360562159e-07, "loss": 0.2795, "step": 38609 }, { "epoch": 1.8086850611327119, "grad_norm": 0.5721779031092747, "learning_rate": 1.1902310865768707e-07, "loss": 0.2591, "step": 38610 }, { "epoch": 1.8087319061226403, "grad_norm": 0.603713848659284, "learning_rate": 1.1896529496108145e-07, "loss": 0.2822, "step": 38611 }, { "epoch": 1.8087787511125684, "grad_norm": 0.6210150376113027, "learning_rate": 1.1890749496673237e-07, "loss": 0.2727, "step": 38612 }, { "epoch": 1.8088255961024968, "grad_norm": 0.5845384654021629, "learning_rate": 1.188497086749718e-07, "loss": 0.254, "step": 38613 }, { "epoch": 1.8088724410924253, "grad_norm": 0.5822249892015184, "learning_rate": 1.1879193608613282e-07, "loss": 0.2776, "step": 38614 }, { "epoch": 1.8089192860823535, "grad_norm": 0.6208628880357036, "learning_rate": 1.1873417720054764e-07, "loss": 0.2688, "step": 38615 }, { "epoch": 1.8089661310722818, "grad_norm": 0.5665078343186495, "learning_rate": 1.1867643201854878e-07, "loss": 0.276, "step": 38616 }, { "epoch": 1.8090129760622102, "grad_norm": 0.5793448496136031, "learning_rate": 1.1861870054046876e-07, "loss": 0.2662, "step": 38617 }, { "epoch": 1.8090598210521385, "grad_norm": 0.5917939467069614, "learning_rate": 1.1856098276663979e-07, "loss": 0.2683, "step": 38618 }, { "epoch": 1.8091066660420667, "grad_norm": 0.5287773776920587, "learning_rate": 1.185032786973933e-07, "loss": 0.2482, "step": 38619 }, { "epoch": 1.8091535110319952, "grad_norm": 0.5779594484050108, "learning_rate": 1.1844558833306208e-07, "loss": 0.2619, "step": 38620 }, { "epoch": 1.8092003560219234, "grad_norm": 0.6249597302526705, "learning_rate": 1.1838791167397779e-07, "loss": 0.2963, "step": 38621 }, { "epoch": 1.8092472010118517, "grad_norm": 0.6054519156460414, "learning_rate": 1.1833024872047238e-07, "loss": 0.2696, "step": 38622 }, { "epoch": 1.8092940460017801, "grad_norm": 0.5557531721174861, "learning_rate": 1.1827259947287784e-07, "loss": 0.2584, "step": 38623 }, { "epoch": 1.8093408909917086, "grad_norm": 0.6253132055277117, "learning_rate": 1.1821496393152638e-07, "loss": 0.2727, "step": 38624 }, { "epoch": 1.8093877359816366, "grad_norm": 0.5734696619236138, "learning_rate": 1.1815734209674884e-07, "loss": 0.2717, "step": 38625 }, { "epoch": 1.809434580971565, "grad_norm": 0.6019236716441301, "learning_rate": 1.1809973396887692e-07, "loss": 0.2725, "step": 38626 }, { "epoch": 1.8094814259614935, "grad_norm": 0.5683119953758118, "learning_rate": 1.1804213954824256e-07, "loss": 0.2583, "step": 38627 }, { "epoch": 1.8095282709514218, "grad_norm": 0.6207090716712401, "learning_rate": 1.179845588351769e-07, "loss": 0.2819, "step": 38628 }, { "epoch": 1.80957511594135, "grad_norm": 0.6283724433070949, "learning_rate": 1.1792699183001133e-07, "loss": 0.2843, "step": 38629 }, { "epoch": 1.8096219609312785, "grad_norm": 0.5994239697223567, "learning_rate": 1.1786943853307753e-07, "loss": 0.2561, "step": 38630 }, { "epoch": 1.8096688059212067, "grad_norm": 0.59644726621446, "learning_rate": 1.1781189894470607e-07, "loss": 0.2738, "step": 38631 }, { "epoch": 1.809715650911135, "grad_norm": 0.6133747005218321, "learning_rate": 1.1775437306522891e-07, "loss": 0.2657, "step": 38632 }, { "epoch": 1.8097624959010634, "grad_norm": 0.6076479075299196, "learning_rate": 1.1769686089497606e-07, "loss": 0.2583, "step": 38633 }, { "epoch": 1.8098093408909917, "grad_norm": 0.5270378626264295, "learning_rate": 1.1763936243427921e-07, "loss": 0.2346, "step": 38634 }, { "epoch": 1.80985618588092, "grad_norm": 0.5626095155347579, "learning_rate": 1.175818776834689e-07, "loss": 0.2629, "step": 38635 }, { "epoch": 1.8099030308708484, "grad_norm": 0.6070858206865206, "learning_rate": 1.1752440664287629e-07, "loss": 0.2678, "step": 38636 }, { "epoch": 1.8099498758607768, "grad_norm": 0.5962605006274043, "learning_rate": 1.1746694931283165e-07, "loss": 0.2539, "step": 38637 }, { "epoch": 1.809996720850705, "grad_norm": 0.5739501261274813, "learning_rate": 1.1740950569366582e-07, "loss": 0.2631, "step": 38638 }, { "epoch": 1.8100435658406333, "grad_norm": 0.6152034803510387, "learning_rate": 1.1735207578570996e-07, "loss": 0.2687, "step": 38639 }, { "epoch": 1.8100904108305618, "grad_norm": 0.6015797265192795, "learning_rate": 1.1729465958929375e-07, "loss": 0.2631, "step": 38640 }, { "epoch": 1.81013725582049, "grad_norm": 0.5873560019322364, "learning_rate": 1.1723725710474781e-07, "loss": 0.28, "step": 38641 }, { "epoch": 1.8101841008104183, "grad_norm": 0.6132439255666493, "learning_rate": 1.1717986833240297e-07, "loss": 0.281, "step": 38642 }, { "epoch": 1.8102309458003467, "grad_norm": 0.6116017748806857, "learning_rate": 1.171224932725884e-07, "loss": 0.2744, "step": 38643 }, { "epoch": 1.810277790790275, "grad_norm": 0.5917164038934942, "learning_rate": 1.1706513192563551e-07, "loss": 0.272, "step": 38644 }, { "epoch": 1.8103246357802032, "grad_norm": 0.5814006070405355, "learning_rate": 1.1700778429187376e-07, "loss": 0.2597, "step": 38645 }, { "epoch": 1.8103714807701317, "grad_norm": 0.590822560892829, "learning_rate": 1.1695045037163317e-07, "loss": 0.2763, "step": 38646 }, { "epoch": 1.8104183257600601, "grad_norm": 0.5502580971060388, "learning_rate": 1.1689313016524374e-07, "loss": 0.2608, "step": 38647 }, { "epoch": 1.8104651707499881, "grad_norm": 0.5880188051030057, "learning_rate": 1.1683582367303547e-07, "loss": 0.259, "step": 38648 }, { "epoch": 1.8105120157399166, "grad_norm": 0.6337543542784543, "learning_rate": 1.1677853089533814e-07, "loss": 0.2849, "step": 38649 }, { "epoch": 1.810558860729845, "grad_norm": 0.5884763630655947, "learning_rate": 1.1672125183248117e-07, "loss": 0.2587, "step": 38650 }, { "epoch": 1.8106057057197733, "grad_norm": 0.617204590389752, "learning_rate": 1.1666398648479432e-07, "loss": 0.2637, "step": 38651 }, { "epoch": 1.8106525507097015, "grad_norm": 0.5787324999994664, "learning_rate": 1.166067348526076e-07, "loss": 0.2691, "step": 38652 }, { "epoch": 1.81069939569963, "grad_norm": 0.5917593229766643, "learning_rate": 1.1654949693624962e-07, "loss": 0.278, "step": 38653 }, { "epoch": 1.8107462406895583, "grad_norm": 0.6026840893353945, "learning_rate": 1.1649227273605068e-07, "loss": 0.2765, "step": 38654 }, { "epoch": 1.8107930856794865, "grad_norm": 0.6242728246020544, "learning_rate": 1.1643506225233913e-07, "loss": 0.2763, "step": 38655 }, { "epoch": 1.810839930669415, "grad_norm": 0.5795401689219276, "learning_rate": 1.16377865485445e-07, "loss": 0.2598, "step": 38656 }, { "epoch": 1.8108867756593432, "grad_norm": 0.6100881345410013, "learning_rate": 1.1632068243569688e-07, "loss": 0.2699, "step": 38657 }, { "epoch": 1.8109336206492714, "grad_norm": 0.6055254153228429, "learning_rate": 1.1626351310342427e-07, "loss": 0.2814, "step": 38658 }, { "epoch": 1.8109804656392, "grad_norm": 0.6458617863941629, "learning_rate": 1.162063574889563e-07, "loss": 0.2839, "step": 38659 }, { "epoch": 1.8110273106291284, "grad_norm": 0.620859045816916, "learning_rate": 1.1614921559262165e-07, "loss": 0.277, "step": 38660 }, { "epoch": 1.8110741556190564, "grad_norm": 0.5681633281559567, "learning_rate": 1.1609208741474892e-07, "loss": 0.2637, "step": 38661 }, { "epoch": 1.8111210006089848, "grad_norm": 0.6019821895911645, "learning_rate": 1.1603497295566673e-07, "loss": 0.252, "step": 38662 }, { "epoch": 1.8111678455989133, "grad_norm": 0.5825686969782685, "learning_rate": 1.1597787221570428e-07, "loss": 0.268, "step": 38663 }, { "epoch": 1.8112146905888415, "grad_norm": 0.596294971008741, "learning_rate": 1.159207851951899e-07, "loss": 0.2647, "step": 38664 }, { "epoch": 1.8112615355787698, "grad_norm": 0.5789293007293417, "learning_rate": 1.1586371189445223e-07, "loss": 0.2682, "step": 38665 }, { "epoch": 1.8113083805686983, "grad_norm": 0.5445154090943732, "learning_rate": 1.1580665231381988e-07, "loss": 0.2474, "step": 38666 }, { "epoch": 1.8113552255586265, "grad_norm": 0.6175567756326561, "learning_rate": 1.1574960645362122e-07, "loss": 0.2665, "step": 38667 }, { "epoch": 1.8114020705485547, "grad_norm": 0.6136378408623423, "learning_rate": 1.1569257431418402e-07, "loss": 0.2778, "step": 38668 }, { "epoch": 1.8114489155384832, "grad_norm": 0.60253592962761, "learning_rate": 1.1563555589583663e-07, "loss": 0.2811, "step": 38669 }, { "epoch": 1.8114957605284114, "grad_norm": 0.605826141153534, "learning_rate": 1.1557855119890715e-07, "loss": 0.2788, "step": 38670 }, { "epoch": 1.8115426055183397, "grad_norm": 0.6000736804952465, "learning_rate": 1.155215602237239e-07, "loss": 0.2658, "step": 38671 }, { "epoch": 1.8115894505082681, "grad_norm": 0.6150443947020656, "learning_rate": 1.1546458297061496e-07, "loss": 0.274, "step": 38672 }, { "epoch": 1.8116362954981966, "grad_norm": 0.5752585049166964, "learning_rate": 1.1540761943990814e-07, "loss": 0.2479, "step": 38673 }, { "epoch": 1.8116831404881248, "grad_norm": 0.5498169746057532, "learning_rate": 1.153506696319312e-07, "loss": 0.2504, "step": 38674 }, { "epoch": 1.811729985478053, "grad_norm": 0.6077055001822231, "learning_rate": 1.1529373354701168e-07, "loss": 0.2689, "step": 38675 }, { "epoch": 1.8117768304679815, "grad_norm": 0.5589434208580608, "learning_rate": 1.1523681118547708e-07, "loss": 0.2609, "step": 38676 }, { "epoch": 1.8118236754579098, "grad_norm": 0.6304593509139057, "learning_rate": 1.151799025476552e-07, "loss": 0.266, "step": 38677 }, { "epoch": 1.811870520447838, "grad_norm": 0.59451215778334, "learning_rate": 1.1512300763387385e-07, "loss": 0.2535, "step": 38678 }, { "epoch": 1.8119173654377665, "grad_norm": 0.6004724208528944, "learning_rate": 1.1506612644446025e-07, "loss": 0.2706, "step": 38679 }, { "epoch": 1.8119642104276947, "grad_norm": 0.6075669330550264, "learning_rate": 1.1500925897974136e-07, "loss": 0.2752, "step": 38680 }, { "epoch": 1.812011055417623, "grad_norm": 0.5953692487272042, "learning_rate": 1.1495240524004526e-07, "loss": 0.2689, "step": 38681 }, { "epoch": 1.8120579004075514, "grad_norm": 0.5788854721179875, "learning_rate": 1.1489556522569805e-07, "loss": 0.265, "step": 38682 }, { "epoch": 1.81210474539748, "grad_norm": 0.5584660494672747, "learning_rate": 1.1483873893702757e-07, "loss": 0.2629, "step": 38683 }, { "epoch": 1.812151590387408, "grad_norm": 0.5894431438910374, "learning_rate": 1.1478192637436048e-07, "loss": 0.2667, "step": 38684 }, { "epoch": 1.8121984353773364, "grad_norm": 0.9225334560401202, "learning_rate": 1.1472512753802456e-07, "loss": 0.2824, "step": 38685 }, { "epoch": 1.8122452803672648, "grad_norm": 0.6620999754702507, "learning_rate": 1.146683424283454e-07, "loss": 0.2869, "step": 38686 }, { "epoch": 1.812292125357193, "grad_norm": 0.6289485862553823, "learning_rate": 1.1461157104565051e-07, "loss": 0.2643, "step": 38687 }, { "epoch": 1.8123389703471213, "grad_norm": 0.5891203582273431, "learning_rate": 1.1455481339026658e-07, "loss": 0.2686, "step": 38688 }, { "epoch": 1.8123858153370498, "grad_norm": 0.6200492857633568, "learning_rate": 1.1449806946252001e-07, "loss": 0.2663, "step": 38689 }, { "epoch": 1.812432660326978, "grad_norm": 0.6208430497685786, "learning_rate": 1.1444133926273748e-07, "loss": 0.2795, "step": 38690 }, { "epoch": 1.8124795053169063, "grad_norm": 0.5941709132202461, "learning_rate": 1.1438462279124568e-07, "loss": 0.2584, "step": 38691 }, { "epoch": 1.8125263503068347, "grad_norm": 0.5566260664143979, "learning_rate": 1.1432792004837073e-07, "loss": 0.2665, "step": 38692 }, { "epoch": 1.812573195296763, "grad_norm": 0.6146428013665227, "learning_rate": 1.1427123103443877e-07, "loss": 0.2536, "step": 38693 }, { "epoch": 1.8126200402866912, "grad_norm": 0.6054223522058778, "learning_rate": 1.1421455574977647e-07, "loss": 0.2944, "step": 38694 }, { "epoch": 1.8126668852766197, "grad_norm": 0.6091276845232473, "learning_rate": 1.1415789419470968e-07, "loss": 0.2658, "step": 38695 }, { "epoch": 1.8127137302665481, "grad_norm": 0.5633942833397384, "learning_rate": 1.1410124636956426e-07, "loss": 0.2631, "step": 38696 }, { "epoch": 1.8127605752564762, "grad_norm": 0.6349467052297851, "learning_rate": 1.1404461227466717e-07, "loss": 0.2685, "step": 38697 }, { "epoch": 1.8128074202464046, "grad_norm": 0.6144864448377259, "learning_rate": 1.1398799191034315e-07, "loss": 0.2806, "step": 38698 }, { "epoch": 1.812854265236333, "grad_norm": 0.6025887803462452, "learning_rate": 1.1393138527691832e-07, "loss": 0.2648, "step": 38699 }, { "epoch": 1.8129011102262613, "grad_norm": 0.6058157944677729, "learning_rate": 1.1387479237471882e-07, "loss": 0.268, "step": 38700 }, { "epoch": 1.8129479552161896, "grad_norm": 0.5806679334920289, "learning_rate": 1.138182132040705e-07, "loss": 0.2778, "step": 38701 }, { "epoch": 1.812994800206118, "grad_norm": 0.572098884144273, "learning_rate": 1.1376164776529836e-07, "loss": 0.2566, "step": 38702 }, { "epoch": 1.8130416451960463, "grad_norm": 0.6610828582346667, "learning_rate": 1.1370509605872854e-07, "loss": 0.2848, "step": 38703 }, { "epoch": 1.8130884901859745, "grad_norm": 0.6213406105122623, "learning_rate": 1.1364855808468578e-07, "loss": 0.2773, "step": 38704 }, { "epoch": 1.813135335175903, "grad_norm": 0.5592932401478692, "learning_rate": 1.1359203384349565e-07, "loss": 0.2512, "step": 38705 }, { "epoch": 1.8131821801658312, "grad_norm": 0.6315662842505109, "learning_rate": 1.1353552333548346e-07, "loss": 0.2882, "step": 38706 }, { "epoch": 1.8132290251557595, "grad_norm": 0.5903511535201144, "learning_rate": 1.1347902656097476e-07, "loss": 0.2604, "step": 38707 }, { "epoch": 1.813275870145688, "grad_norm": 0.6244059536382879, "learning_rate": 1.1342254352029459e-07, "loss": 0.2652, "step": 38708 }, { "epoch": 1.8133227151356164, "grad_norm": 0.5737678976728554, "learning_rate": 1.1336607421376794e-07, "loss": 0.272, "step": 38709 }, { "epoch": 1.8133695601255446, "grad_norm": 0.6237813173835682, "learning_rate": 1.133096186417193e-07, "loss": 0.2776, "step": 38710 }, { "epoch": 1.8134164051154729, "grad_norm": 0.6564753622030277, "learning_rate": 1.1325317680447395e-07, "loss": 0.2774, "step": 38711 }, { "epoch": 1.8134632501054013, "grad_norm": 0.6024752926938257, "learning_rate": 1.1319674870235692e-07, "loss": 0.2646, "step": 38712 }, { "epoch": 1.8135100950953296, "grad_norm": 0.5813322066557999, "learning_rate": 1.1314033433569238e-07, "loss": 0.2674, "step": 38713 }, { "epoch": 1.8135569400852578, "grad_norm": 0.6278986625769345, "learning_rate": 1.1308393370480536e-07, "loss": 0.278, "step": 38714 }, { "epoch": 1.8136037850751863, "grad_norm": 0.5778334931561199, "learning_rate": 1.1302754681002087e-07, "loss": 0.2602, "step": 38715 }, { "epoch": 1.8136506300651145, "grad_norm": 0.5763614716487224, "learning_rate": 1.1297117365166255e-07, "loss": 0.2597, "step": 38716 }, { "epoch": 1.8136974750550428, "grad_norm": 0.5823624571744096, "learning_rate": 1.1291481423005513e-07, "loss": 0.2533, "step": 38717 }, { "epoch": 1.8137443200449712, "grad_norm": 0.5562728730510941, "learning_rate": 1.1285846854552307e-07, "loss": 0.2623, "step": 38718 }, { "epoch": 1.8137911650348997, "grad_norm": 0.6179413642853651, "learning_rate": 1.1280213659839029e-07, "loss": 0.2673, "step": 38719 }, { "epoch": 1.8138380100248277, "grad_norm": 0.5908767929264281, "learning_rate": 1.1274581838898124e-07, "loss": 0.2676, "step": 38720 }, { "epoch": 1.8138848550147562, "grad_norm": 0.5752436414950006, "learning_rate": 1.1268951391762039e-07, "loss": 0.2622, "step": 38721 }, { "epoch": 1.8139317000046846, "grad_norm": 0.6205274640426263, "learning_rate": 1.1263322318463111e-07, "loss": 0.2714, "step": 38722 }, { "epoch": 1.8139785449946129, "grad_norm": 0.6035742632033739, "learning_rate": 1.1257694619033782e-07, "loss": 0.2653, "step": 38723 }, { "epoch": 1.814025389984541, "grad_norm": 0.6011653937818572, "learning_rate": 1.1252068293506391e-07, "loss": 0.2637, "step": 38724 }, { "epoch": 1.8140722349744696, "grad_norm": 0.5850236731616792, "learning_rate": 1.1246443341913327e-07, "loss": 0.279, "step": 38725 }, { "epoch": 1.8141190799643978, "grad_norm": 0.6127235913374038, "learning_rate": 1.1240819764286981e-07, "loss": 0.2864, "step": 38726 }, { "epoch": 1.814165924954326, "grad_norm": 0.6038105667810034, "learning_rate": 1.1235197560659744e-07, "loss": 0.2593, "step": 38727 }, { "epoch": 1.8142127699442545, "grad_norm": 0.556958478476123, "learning_rate": 1.1229576731063896e-07, "loss": 0.2657, "step": 38728 }, { "epoch": 1.8142596149341828, "grad_norm": 0.5763460886484449, "learning_rate": 1.12239572755318e-07, "loss": 0.2727, "step": 38729 }, { "epoch": 1.814306459924111, "grad_norm": 0.5778122038008762, "learning_rate": 1.1218339194095873e-07, "loss": 0.2566, "step": 38730 }, { "epoch": 1.8143533049140395, "grad_norm": 0.6067434716829136, "learning_rate": 1.1212722486788368e-07, "loss": 0.2767, "step": 38731 }, { "epoch": 1.814400149903968, "grad_norm": 0.612895417213469, "learning_rate": 1.1207107153641594e-07, "loss": 0.2759, "step": 38732 }, { "epoch": 1.814446994893896, "grad_norm": 0.5560896161912704, "learning_rate": 1.1201493194687968e-07, "loss": 0.246, "step": 38733 }, { "epoch": 1.8144938398838244, "grad_norm": 0.6392770954485356, "learning_rate": 1.1195880609959658e-07, "loss": 0.2833, "step": 38734 }, { "epoch": 1.8145406848737529, "grad_norm": 0.6590295178753603, "learning_rate": 1.1190269399489056e-07, "loss": 0.2867, "step": 38735 }, { "epoch": 1.814587529863681, "grad_norm": 0.6124469197157096, "learning_rate": 1.1184659563308442e-07, "loss": 0.2616, "step": 38736 }, { "epoch": 1.8146343748536093, "grad_norm": 0.5675989772821096, "learning_rate": 1.1179051101450095e-07, "loss": 0.2643, "step": 38737 }, { "epoch": 1.8146812198435378, "grad_norm": 0.6034706696280752, "learning_rate": 1.1173444013946266e-07, "loss": 0.2611, "step": 38738 }, { "epoch": 1.814728064833466, "grad_norm": 0.5789436842484654, "learning_rate": 1.1167838300829236e-07, "loss": 0.2639, "step": 38739 }, { "epoch": 1.8147749098233943, "grad_norm": 0.5823239152200633, "learning_rate": 1.1162233962131314e-07, "loss": 0.264, "step": 38740 }, { "epoch": 1.8148217548133228, "grad_norm": 0.6168389923152572, "learning_rate": 1.1156630997884666e-07, "loss": 0.2606, "step": 38741 }, { "epoch": 1.814868599803251, "grad_norm": 0.5679532272158712, "learning_rate": 1.1151029408121572e-07, "loss": 0.2709, "step": 38742 }, { "epoch": 1.8149154447931792, "grad_norm": 0.6227134509276817, "learning_rate": 1.1145429192874285e-07, "loss": 0.2719, "step": 38743 }, { "epoch": 1.8149622897831077, "grad_norm": 0.5831845290698806, "learning_rate": 1.1139830352175029e-07, "loss": 0.248, "step": 38744 }, { "epoch": 1.8150091347730362, "grad_norm": 0.6216138759633003, "learning_rate": 1.1134232886055974e-07, "loss": 0.2821, "step": 38745 }, { "epoch": 1.8150559797629644, "grad_norm": 0.5897786419400868, "learning_rate": 1.1128636794549424e-07, "loss": 0.2597, "step": 38746 }, { "epoch": 1.8151028247528926, "grad_norm": 0.6070917465085665, "learning_rate": 1.1123042077687495e-07, "loss": 0.2669, "step": 38747 }, { "epoch": 1.815149669742821, "grad_norm": 0.6245570821911738, "learning_rate": 1.1117448735502413e-07, "loss": 0.277, "step": 38748 }, { "epoch": 1.8151965147327493, "grad_norm": 0.5848775004761269, "learning_rate": 1.1111856768026396e-07, "loss": 0.2664, "step": 38749 }, { "epoch": 1.8152433597226776, "grad_norm": 0.5590697597966958, "learning_rate": 1.1106266175291619e-07, "loss": 0.2567, "step": 38750 }, { "epoch": 1.815290204712606, "grad_norm": 0.6047725667309353, "learning_rate": 1.1100676957330192e-07, "loss": 0.2759, "step": 38751 }, { "epoch": 1.8153370497025343, "grad_norm": 0.601465658172591, "learning_rate": 1.1095089114174367e-07, "loss": 0.261, "step": 38752 }, { "epoch": 1.8153838946924625, "grad_norm": 0.5871123748297136, "learning_rate": 1.1089502645856232e-07, "loss": 0.2639, "step": 38753 }, { "epoch": 1.815430739682391, "grad_norm": 0.5535984963762278, "learning_rate": 1.1083917552407952e-07, "loss": 0.2592, "step": 38754 }, { "epoch": 1.8154775846723195, "grad_norm": 0.5780309877063936, "learning_rate": 1.1078333833861671e-07, "loss": 0.2863, "step": 38755 }, { "epoch": 1.8155244296622475, "grad_norm": 0.6477005937520627, "learning_rate": 1.1072751490249556e-07, "loss": 0.2795, "step": 38756 }, { "epoch": 1.815571274652176, "grad_norm": 0.6209689348310853, "learning_rate": 1.1067170521603693e-07, "loss": 0.2749, "step": 38757 }, { "epoch": 1.8156181196421044, "grad_norm": 0.5787489421459142, "learning_rate": 1.106159092795625e-07, "loss": 0.2714, "step": 38758 }, { "epoch": 1.8156649646320326, "grad_norm": 0.5846083846550542, "learning_rate": 1.1056012709339231e-07, "loss": 0.2658, "step": 38759 }, { "epoch": 1.8157118096219609, "grad_norm": 0.611654703723256, "learning_rate": 1.1050435865784831e-07, "loss": 0.2824, "step": 38760 }, { "epoch": 1.8157586546118893, "grad_norm": 0.5945254606818525, "learning_rate": 1.1044860397325107e-07, "loss": 0.2549, "step": 38761 }, { "epoch": 1.8158054996018176, "grad_norm": 0.5786365990650462, "learning_rate": 1.1039286303992148e-07, "loss": 0.2717, "step": 38762 }, { "epoch": 1.8158523445917458, "grad_norm": 0.6188971899585356, "learning_rate": 1.1033713585818034e-07, "loss": 0.2851, "step": 38763 }, { "epoch": 1.8158991895816743, "grad_norm": 0.5882510184620804, "learning_rate": 1.1028142242834883e-07, "loss": 0.2533, "step": 38764 }, { "epoch": 1.8159460345716025, "grad_norm": 0.6150569432659002, "learning_rate": 1.1022572275074695e-07, "loss": 0.2727, "step": 38765 }, { "epoch": 1.8159928795615308, "grad_norm": 0.5680848748974648, "learning_rate": 1.10170036825695e-07, "loss": 0.2613, "step": 38766 }, { "epoch": 1.8160397245514592, "grad_norm": 0.6155286821994553, "learning_rate": 1.1011436465351411e-07, "loss": 0.2833, "step": 38767 }, { "epoch": 1.8160865695413877, "grad_norm": 0.5989228286453377, "learning_rate": 1.1005870623452403e-07, "loss": 0.2695, "step": 38768 }, { "epoch": 1.8161334145313157, "grad_norm": 0.6293011931973141, "learning_rate": 1.1000306156904561e-07, "loss": 0.2683, "step": 38769 }, { "epoch": 1.8161802595212442, "grad_norm": 0.5866776632512419, "learning_rate": 1.0994743065739915e-07, "loss": 0.2652, "step": 38770 }, { "epoch": 1.8162271045111726, "grad_norm": 0.5898442890417954, "learning_rate": 1.0989181349990413e-07, "loss": 0.2624, "step": 38771 }, { "epoch": 1.8162739495011009, "grad_norm": 0.6317333082224066, "learning_rate": 1.0983621009688139e-07, "loss": 0.2747, "step": 38772 }, { "epoch": 1.8163207944910291, "grad_norm": 0.6288607956106168, "learning_rate": 1.0978062044865012e-07, "loss": 0.2879, "step": 38773 }, { "epoch": 1.8163676394809576, "grad_norm": 0.6455721684898857, "learning_rate": 1.0972504455553062e-07, "loss": 0.277, "step": 38774 }, { "epoch": 1.8164144844708858, "grad_norm": 0.6012668956695569, "learning_rate": 1.0966948241784292e-07, "loss": 0.2589, "step": 38775 }, { "epoch": 1.816461329460814, "grad_norm": 0.5774506971630714, "learning_rate": 1.0961393403590675e-07, "loss": 0.2726, "step": 38776 }, { "epoch": 1.8165081744507425, "grad_norm": 0.6100434418276017, "learning_rate": 1.0955839941004132e-07, "loss": 0.2764, "step": 38777 }, { "epoch": 1.8165550194406708, "grad_norm": 0.6075262736495318, "learning_rate": 1.0950287854056635e-07, "loss": 0.2702, "step": 38778 }, { "epoch": 1.816601864430599, "grad_norm": 0.5754106666022903, "learning_rate": 1.0944737142780187e-07, "loss": 0.2661, "step": 38779 }, { "epoch": 1.8166487094205275, "grad_norm": 0.5759554535433634, "learning_rate": 1.0939187807206653e-07, "loss": 0.2576, "step": 38780 }, { "epoch": 1.816695554410456, "grad_norm": 0.6080162323867373, "learning_rate": 1.0933639847368033e-07, "loss": 0.2838, "step": 38781 }, { "epoch": 1.8167423994003842, "grad_norm": 0.620182608944764, "learning_rate": 1.092809326329622e-07, "loss": 0.2778, "step": 38782 }, { "epoch": 1.8167892443903124, "grad_norm": 0.5929275362970283, "learning_rate": 1.0922548055023158e-07, "loss": 0.2849, "step": 38783 }, { "epoch": 1.8168360893802409, "grad_norm": 0.6318933519676779, "learning_rate": 1.0917004222580712e-07, "loss": 0.2918, "step": 38784 }, { "epoch": 1.8168829343701691, "grad_norm": 0.5716850798764448, "learning_rate": 1.091146176600083e-07, "loss": 0.2665, "step": 38785 }, { "epoch": 1.8169297793600974, "grad_norm": 0.6361695488942104, "learning_rate": 1.0905920685315402e-07, "loss": 0.2915, "step": 38786 }, { "epoch": 1.8169766243500258, "grad_norm": 0.5812535099702422, "learning_rate": 1.0900380980556291e-07, "loss": 0.2639, "step": 38787 }, { "epoch": 1.817023469339954, "grad_norm": 0.5658937455799314, "learning_rate": 1.0894842651755416e-07, "loss": 0.2587, "step": 38788 }, { "epoch": 1.8170703143298823, "grad_norm": 0.5776209359966357, "learning_rate": 1.0889305698944585e-07, "loss": 0.2723, "step": 38789 }, { "epoch": 1.8171171593198108, "grad_norm": 0.6315558323192678, "learning_rate": 1.0883770122155718e-07, "loss": 0.2808, "step": 38790 }, { "epoch": 1.8171640043097392, "grad_norm": 0.6141776006585972, "learning_rate": 1.0878235921420649e-07, "loss": 0.2832, "step": 38791 }, { "epoch": 1.8172108492996673, "grad_norm": 0.6055028684356978, "learning_rate": 1.087270309677127e-07, "loss": 0.2794, "step": 38792 }, { "epoch": 1.8172576942895957, "grad_norm": 0.586987859969744, "learning_rate": 1.0867171648239333e-07, "loss": 0.26, "step": 38793 }, { "epoch": 1.8173045392795242, "grad_norm": 0.5763845018929578, "learning_rate": 1.0861641575856785e-07, "loss": 0.2587, "step": 38794 }, { "epoch": 1.8173513842694524, "grad_norm": 0.5867630262012772, "learning_rate": 1.0856112879655323e-07, "loss": 0.2588, "step": 38795 }, { "epoch": 1.8173982292593807, "grad_norm": 0.6083120294853012, "learning_rate": 1.0850585559666837e-07, "loss": 0.2662, "step": 38796 }, { "epoch": 1.8174450742493091, "grad_norm": 0.5979091210059907, "learning_rate": 1.0845059615923109e-07, "loss": 0.2801, "step": 38797 }, { "epoch": 1.8174919192392374, "grad_norm": 0.6079247794160216, "learning_rate": 1.0839535048455974e-07, "loss": 0.2767, "step": 38798 }, { "epoch": 1.8175387642291656, "grad_norm": 0.5802070611569713, "learning_rate": 1.0834011857297211e-07, "loss": 0.2687, "step": 38799 }, { "epoch": 1.817585609219094, "grad_norm": 0.6152587714712167, "learning_rate": 1.0828490042478628e-07, "loss": 0.2643, "step": 38800 }, { "epoch": 1.8176324542090223, "grad_norm": 0.5720219637533385, "learning_rate": 1.0822969604031924e-07, "loss": 0.2458, "step": 38801 }, { "epoch": 1.8176792991989505, "grad_norm": 0.6311270337016621, "learning_rate": 1.0817450541988905e-07, "loss": 0.2709, "step": 38802 }, { "epoch": 1.817726144188879, "grad_norm": 0.5629898089280669, "learning_rate": 1.0811932856381352e-07, "loss": 0.2453, "step": 38803 }, { "epoch": 1.8177729891788075, "grad_norm": 0.6113414035005831, "learning_rate": 1.0806416547241016e-07, "loss": 0.2755, "step": 38804 }, { "epoch": 1.8178198341687355, "grad_norm": 0.5774612178768513, "learning_rate": 1.0800901614599624e-07, "loss": 0.2786, "step": 38805 }, { "epoch": 1.817866679158664, "grad_norm": 0.6501989726730262, "learning_rate": 1.0795388058488954e-07, "loss": 0.2661, "step": 38806 }, { "epoch": 1.8179135241485924, "grad_norm": 0.6027702715241342, "learning_rate": 1.0789875878940703e-07, "loss": 0.2824, "step": 38807 }, { "epoch": 1.8179603691385207, "grad_norm": 0.5653352406435367, "learning_rate": 1.078436507598657e-07, "loss": 0.2583, "step": 38808 }, { "epoch": 1.818007214128449, "grad_norm": 0.6459674162403468, "learning_rate": 1.0778855649658276e-07, "loss": 0.2885, "step": 38809 }, { "epoch": 1.8180540591183774, "grad_norm": 0.6096380197746234, "learning_rate": 1.077334759998755e-07, "loss": 0.2775, "step": 38810 }, { "epoch": 1.8181009041083056, "grad_norm": 0.6231247463105614, "learning_rate": 1.0767840927006085e-07, "loss": 0.2761, "step": 38811 }, { "epoch": 1.8181477490982338, "grad_norm": 0.5901206559887558, "learning_rate": 1.0762335630745552e-07, "loss": 0.277, "step": 38812 }, { "epoch": 1.8181945940881623, "grad_norm": 0.5592274631806647, "learning_rate": 1.0756831711237703e-07, "loss": 0.2602, "step": 38813 }, { "epoch": 1.8182414390780905, "grad_norm": 0.6259094682837821, "learning_rate": 1.0751329168514124e-07, "loss": 0.2659, "step": 38814 }, { "epoch": 1.8182882840680188, "grad_norm": 0.5771581917152878, "learning_rate": 1.0745828002606511e-07, "loss": 0.2741, "step": 38815 }, { "epoch": 1.8183351290579473, "grad_norm": 0.6320659646974619, "learning_rate": 1.0740328213546508e-07, "loss": 0.2647, "step": 38816 }, { "epoch": 1.8183819740478757, "grad_norm": 0.5663612008182343, "learning_rate": 1.0734829801365781e-07, "loss": 0.2557, "step": 38817 }, { "epoch": 1.818428819037804, "grad_norm": 0.5587998976554248, "learning_rate": 1.0729332766095973e-07, "loss": 0.2697, "step": 38818 }, { "epoch": 1.8184756640277322, "grad_norm": 0.6106560670070564, "learning_rate": 1.0723837107768753e-07, "loss": 0.2784, "step": 38819 }, { "epoch": 1.8185225090176607, "grad_norm": 0.5995585845861763, "learning_rate": 1.0718342826415679e-07, "loss": 0.2759, "step": 38820 }, { "epoch": 1.818569354007589, "grad_norm": 0.5718668333191312, "learning_rate": 1.0712849922068447e-07, "loss": 0.2638, "step": 38821 }, { "epoch": 1.8186161989975171, "grad_norm": 0.600072246815348, "learning_rate": 1.070735839475856e-07, "loss": 0.2711, "step": 38822 }, { "epoch": 1.8186630439874456, "grad_norm": 0.6184441802632955, "learning_rate": 1.0701868244517716e-07, "loss": 0.2753, "step": 38823 }, { "epoch": 1.8187098889773738, "grad_norm": 0.6012741253902518, "learning_rate": 1.0696379471377444e-07, "loss": 0.2776, "step": 38824 }, { "epoch": 1.818756733967302, "grad_norm": 0.6514523877359806, "learning_rate": 1.0690892075369413e-07, "loss": 0.2746, "step": 38825 }, { "epoch": 1.8188035789572305, "grad_norm": 0.5934731019799366, "learning_rate": 1.0685406056525099e-07, "loss": 0.2696, "step": 38826 }, { "epoch": 1.818850423947159, "grad_norm": 0.5627924782594871, "learning_rate": 1.0679921414876115e-07, "loss": 0.2572, "step": 38827 }, { "epoch": 1.818897268937087, "grad_norm": 0.6445435227695574, "learning_rate": 1.0674438150454102e-07, "loss": 0.2816, "step": 38828 }, { "epoch": 1.8189441139270155, "grad_norm": 0.584409124483138, "learning_rate": 1.066895626329048e-07, "loss": 0.2628, "step": 38829 }, { "epoch": 1.818990958916944, "grad_norm": 0.5947532910976688, "learning_rate": 1.0663475753416891e-07, "loss": 0.2646, "step": 38830 }, { "epoch": 1.8190378039068722, "grad_norm": 0.6074111078802291, "learning_rate": 1.0657996620864863e-07, "loss": 0.2796, "step": 38831 }, { "epoch": 1.8190846488968004, "grad_norm": 0.5612096249282094, "learning_rate": 1.0652518865665874e-07, "loss": 0.2564, "step": 38832 }, { "epoch": 1.819131493886729, "grad_norm": 0.5782477757733583, "learning_rate": 1.0647042487851478e-07, "loss": 0.2688, "step": 38833 }, { "epoch": 1.8191783388766571, "grad_norm": 0.6103728244396102, "learning_rate": 1.0641567487453208e-07, "loss": 0.2889, "step": 38834 }, { "epoch": 1.8192251838665854, "grad_norm": 0.603176059804986, "learning_rate": 1.0636093864502539e-07, "loss": 0.2725, "step": 38835 }, { "epoch": 1.8192720288565138, "grad_norm": 0.6178078805779372, "learning_rate": 1.0630621619031e-07, "loss": 0.2805, "step": 38836 }, { "epoch": 1.819318873846442, "grad_norm": 0.6743217425787559, "learning_rate": 1.0625150751070096e-07, "loss": 0.2906, "step": 38837 }, { "epoch": 1.8193657188363703, "grad_norm": 0.629574426070385, "learning_rate": 1.0619681260651244e-07, "loss": 0.2824, "step": 38838 }, { "epoch": 1.8194125638262988, "grad_norm": 0.6072889176856893, "learning_rate": 1.0614213147805974e-07, "loss": 0.2794, "step": 38839 }, { "epoch": 1.8194594088162273, "grad_norm": 0.5315919549200624, "learning_rate": 1.0608746412565734e-07, "loss": 0.2435, "step": 38840 }, { "epoch": 1.8195062538061553, "grad_norm": 0.5821268624344682, "learning_rate": 1.0603281054962028e-07, "loss": 0.2808, "step": 38841 }, { "epoch": 1.8195530987960837, "grad_norm": 0.6283190523236544, "learning_rate": 1.0597817075026246e-07, "loss": 0.2741, "step": 38842 }, { "epoch": 1.8195999437860122, "grad_norm": 0.6002953862223037, "learning_rate": 1.0592354472789861e-07, "loss": 0.2902, "step": 38843 }, { "epoch": 1.8196467887759404, "grad_norm": 0.6483959436039123, "learning_rate": 1.0586893248284269e-07, "loss": 0.2712, "step": 38844 }, { "epoch": 1.8196936337658687, "grad_norm": 0.5868403431493772, "learning_rate": 1.0581433401540969e-07, "loss": 0.2691, "step": 38845 }, { "epoch": 1.8197404787557971, "grad_norm": 0.5960308062754703, "learning_rate": 1.0575974932591299e-07, "loss": 0.2681, "step": 38846 }, { "epoch": 1.8197873237457254, "grad_norm": 0.6341647257466225, "learning_rate": 1.057051784146676e-07, "loss": 0.2741, "step": 38847 }, { "epoch": 1.8198341687356536, "grad_norm": 0.5791857786250036, "learning_rate": 1.0565062128198717e-07, "loss": 0.255, "step": 38848 }, { "epoch": 1.819881013725582, "grad_norm": 0.5793684519945864, "learning_rate": 1.055960779281856e-07, "loss": 0.2674, "step": 38849 }, { "epoch": 1.8199278587155103, "grad_norm": 0.6341530043925389, "learning_rate": 1.0554154835357655e-07, "loss": 0.2891, "step": 38850 }, { "epoch": 1.8199747037054386, "grad_norm": 0.6458912376961686, "learning_rate": 1.054870325584742e-07, "loss": 0.2717, "step": 38851 }, { "epoch": 1.820021548695367, "grad_norm": 0.5942261405775117, "learning_rate": 1.0543253054319191e-07, "loss": 0.2743, "step": 38852 }, { "epoch": 1.8200683936852955, "grad_norm": 0.6205550101376993, "learning_rate": 1.0537804230804361e-07, "loss": 0.279, "step": 38853 }, { "epoch": 1.8201152386752237, "grad_norm": 0.6000794144615629, "learning_rate": 1.0532356785334291e-07, "loss": 0.2622, "step": 38854 }, { "epoch": 1.820162083665152, "grad_norm": 0.5670337261059806, "learning_rate": 1.0526910717940348e-07, "loss": 0.258, "step": 38855 }, { "epoch": 1.8202089286550804, "grad_norm": 0.5955397802064294, "learning_rate": 1.0521466028653837e-07, "loss": 0.2591, "step": 38856 }, { "epoch": 1.8202557736450087, "grad_norm": 0.599494222809529, "learning_rate": 1.0516022717506069e-07, "loss": 0.2634, "step": 38857 }, { "epoch": 1.820302618634937, "grad_norm": 0.6111050303809861, "learning_rate": 1.0510580784528407e-07, "loss": 0.283, "step": 38858 }, { "epoch": 1.8203494636248654, "grad_norm": 0.5259393560289003, "learning_rate": 1.0505140229752159e-07, "loss": 0.2529, "step": 38859 }, { "epoch": 1.8203963086147936, "grad_norm": 0.5596913489994986, "learning_rate": 1.0499701053208605e-07, "loss": 0.2507, "step": 38860 }, { "epoch": 1.8204431536047219, "grad_norm": 0.6793716025389026, "learning_rate": 1.0494263254929138e-07, "loss": 0.2924, "step": 38861 }, { "epoch": 1.8204899985946503, "grad_norm": 0.5603842655182484, "learning_rate": 1.0488826834944954e-07, "loss": 0.2501, "step": 38862 }, { "epoch": 1.8205368435845788, "grad_norm": 0.6084967104739357, "learning_rate": 1.048339179328739e-07, "loss": 0.2685, "step": 38863 }, { "epoch": 1.8205836885745068, "grad_norm": 0.594650485525024, "learning_rate": 1.0477958129987697e-07, "loss": 0.2799, "step": 38864 }, { "epoch": 1.8206305335644353, "grad_norm": 0.5968157109768357, "learning_rate": 1.0472525845077131e-07, "loss": 0.2709, "step": 38865 }, { "epoch": 1.8206773785543637, "grad_norm": 0.6175075042357745, "learning_rate": 1.0467094938586997e-07, "loss": 0.2739, "step": 38866 }, { "epoch": 1.820724223544292, "grad_norm": 0.6427806273921328, "learning_rate": 1.0461665410548549e-07, "loss": 0.2744, "step": 38867 }, { "epoch": 1.8207710685342202, "grad_norm": 0.6376507185379213, "learning_rate": 1.0456237260992957e-07, "loss": 0.2846, "step": 38868 }, { "epoch": 1.8208179135241487, "grad_norm": 0.6155178274134265, "learning_rate": 1.045081048995153e-07, "loss": 0.2793, "step": 38869 }, { "epoch": 1.820864758514077, "grad_norm": 0.5670519543071606, "learning_rate": 1.0445385097455518e-07, "loss": 0.2723, "step": 38870 }, { "epoch": 1.8209116035040052, "grad_norm": 0.5968544250798661, "learning_rate": 1.0439961083536066e-07, "loss": 0.2609, "step": 38871 }, { "epoch": 1.8209584484939336, "grad_norm": 0.6317046788743567, "learning_rate": 1.0434538448224451e-07, "loss": 0.2719, "step": 38872 }, { "epoch": 1.8210052934838619, "grad_norm": 0.6063524561541053, "learning_rate": 1.0429117191551846e-07, "loss": 0.2845, "step": 38873 }, { "epoch": 1.82105213847379, "grad_norm": 0.6183618996751259, "learning_rate": 1.0423697313549446e-07, "loss": 0.2685, "step": 38874 }, { "epoch": 1.8210989834637186, "grad_norm": 0.5746272379067774, "learning_rate": 1.0418278814248451e-07, "loss": 0.258, "step": 38875 }, { "epoch": 1.821145828453647, "grad_norm": 0.6132317765152236, "learning_rate": 1.0412861693680055e-07, "loss": 0.278, "step": 38876 }, { "epoch": 1.821192673443575, "grad_norm": 0.6016096127442041, "learning_rate": 1.0407445951875456e-07, "loss": 0.254, "step": 38877 }, { "epoch": 1.8212395184335035, "grad_norm": 0.6164871871413307, "learning_rate": 1.0402031588865741e-07, "loss": 0.2674, "step": 38878 }, { "epoch": 1.821286363423432, "grad_norm": 0.5928457977887306, "learning_rate": 1.0396618604682108e-07, "loss": 0.2717, "step": 38879 }, { "epoch": 1.8213332084133602, "grad_norm": 0.5674302990946273, "learning_rate": 1.0391206999355752e-07, "loss": 0.2701, "step": 38880 }, { "epoch": 1.8213800534032885, "grad_norm": 0.5865089479353824, "learning_rate": 1.038579677291776e-07, "loss": 0.2659, "step": 38881 }, { "epoch": 1.821426898393217, "grad_norm": 0.5543281024342217, "learning_rate": 1.038038792539925e-07, "loss": 0.269, "step": 38882 }, { "epoch": 1.8214737433831452, "grad_norm": 0.60177696606514, "learning_rate": 1.037498045683144e-07, "loss": 0.2681, "step": 38883 }, { "epoch": 1.8215205883730734, "grad_norm": 0.5755437931050555, "learning_rate": 1.0369574367245338e-07, "loss": 0.2568, "step": 38884 }, { "epoch": 1.8215674333630019, "grad_norm": 0.5703705351921222, "learning_rate": 1.0364169656672112e-07, "loss": 0.2667, "step": 38885 }, { "epoch": 1.82161427835293, "grad_norm": 0.5797222557944376, "learning_rate": 1.0358766325142905e-07, "loss": 0.262, "step": 38886 }, { "epoch": 1.8216611233428583, "grad_norm": 0.5899998286505355, "learning_rate": 1.0353364372688746e-07, "loss": 0.2701, "step": 38887 }, { "epoch": 1.8217079683327868, "grad_norm": 0.6180180371631073, "learning_rate": 1.034796379934072e-07, "loss": 0.2795, "step": 38888 }, { "epoch": 1.8217548133227153, "grad_norm": 0.55314342518387, "learning_rate": 1.0342564605129918e-07, "loss": 0.2416, "step": 38889 }, { "epoch": 1.8218016583126435, "grad_norm": 0.5567484440058947, "learning_rate": 1.0337166790087477e-07, "loss": 0.261, "step": 38890 }, { "epoch": 1.8218485033025718, "grad_norm": 0.6160571939829528, "learning_rate": 1.0331770354244347e-07, "loss": 0.2764, "step": 38891 }, { "epoch": 1.8218953482925002, "grad_norm": 0.6019398690990642, "learning_rate": 1.0326375297631697e-07, "loss": 0.2739, "step": 38892 }, { "epoch": 1.8219421932824285, "grad_norm": 0.577155411925289, "learning_rate": 1.0320981620280473e-07, "loss": 0.2585, "step": 38893 }, { "epoch": 1.8219890382723567, "grad_norm": 0.6151343234909689, "learning_rate": 1.0315589322221763e-07, "loss": 0.2917, "step": 38894 }, { "epoch": 1.8220358832622852, "grad_norm": 0.6268554984840394, "learning_rate": 1.0310198403486598e-07, "loss": 0.2748, "step": 38895 }, { "epoch": 1.8220827282522134, "grad_norm": 0.5661711430063542, "learning_rate": 1.0304808864105981e-07, "loss": 0.2663, "step": 38896 }, { "epoch": 1.8221295732421416, "grad_norm": 0.5820952921318612, "learning_rate": 1.0299420704110996e-07, "loss": 0.2747, "step": 38897 }, { "epoch": 1.82217641823207, "grad_norm": 0.6021295471417717, "learning_rate": 1.0294033923532565e-07, "loss": 0.2862, "step": 38898 }, { "epoch": 1.8222232632219986, "grad_norm": 0.5479041185028831, "learning_rate": 1.0288648522401717e-07, "loss": 0.253, "step": 38899 }, { "epoch": 1.8222701082119266, "grad_norm": 0.580516461047927, "learning_rate": 1.0283264500749458e-07, "loss": 0.2627, "step": 38900 }, { "epoch": 1.822316953201855, "grad_norm": 0.6705499635525859, "learning_rate": 1.0277881858606731e-07, "loss": 0.2834, "step": 38901 }, { "epoch": 1.8223637981917835, "grad_norm": 0.6275019328363864, "learning_rate": 1.027250059600457e-07, "loss": 0.2796, "step": 38902 }, { "epoch": 1.8224106431817118, "grad_norm": 0.6635973807858428, "learning_rate": 1.0267120712973894e-07, "loss": 0.2774, "step": 38903 }, { "epoch": 1.82245748817164, "grad_norm": 0.5519476144404879, "learning_rate": 1.0261742209545732e-07, "loss": 0.257, "step": 38904 }, { "epoch": 1.8225043331615685, "grad_norm": 0.5988531128278777, "learning_rate": 1.025636508575098e-07, "loss": 0.2751, "step": 38905 }, { "epoch": 1.8225511781514967, "grad_norm": 0.6210174511979855, "learning_rate": 1.0250989341620554e-07, "loss": 0.2915, "step": 38906 }, { "epoch": 1.822598023141425, "grad_norm": 0.5725484983195388, "learning_rate": 1.0245614977185431e-07, "loss": 0.2643, "step": 38907 }, { "epoch": 1.8226448681313534, "grad_norm": 0.6157701698338869, "learning_rate": 1.024024199247653e-07, "loss": 0.2785, "step": 38908 }, { "epoch": 1.8226917131212816, "grad_norm": 0.6740299262216561, "learning_rate": 1.023487038752477e-07, "loss": 0.2658, "step": 38909 }, { "epoch": 1.8227385581112099, "grad_norm": 0.5853267375441908, "learning_rate": 1.02295001623611e-07, "loss": 0.2839, "step": 38910 }, { "epoch": 1.8227854031011383, "grad_norm": 0.6288343013898757, "learning_rate": 1.0224131317016383e-07, "loss": 0.2706, "step": 38911 }, { "epoch": 1.8228322480910668, "grad_norm": 0.6034927696142861, "learning_rate": 1.021876385152154e-07, "loss": 0.2704, "step": 38912 }, { "epoch": 1.8228790930809948, "grad_norm": 0.5742114571316876, "learning_rate": 1.0213397765907406e-07, "loss": 0.2655, "step": 38913 }, { "epoch": 1.8229259380709233, "grad_norm": 0.571603974607148, "learning_rate": 1.0208033060204903e-07, "loss": 0.2522, "step": 38914 }, { "epoch": 1.8229727830608518, "grad_norm": 0.5828433401606513, "learning_rate": 1.0202669734444892e-07, "loss": 0.2799, "step": 38915 }, { "epoch": 1.82301962805078, "grad_norm": 0.5758742138527417, "learning_rate": 1.0197307788658295e-07, "loss": 0.2589, "step": 38916 }, { "epoch": 1.8230664730407082, "grad_norm": 0.5940134945614914, "learning_rate": 1.0191947222875892e-07, "loss": 0.2525, "step": 38917 }, { "epoch": 1.8231133180306367, "grad_norm": 0.6040616932886574, "learning_rate": 1.0186588037128548e-07, "loss": 0.27, "step": 38918 }, { "epoch": 1.823160163020565, "grad_norm": 0.5892986580349932, "learning_rate": 1.0181230231447154e-07, "loss": 0.2585, "step": 38919 }, { "epoch": 1.8232070080104932, "grad_norm": 0.5951789320404598, "learning_rate": 1.0175873805862463e-07, "loss": 0.2586, "step": 38920 }, { "epoch": 1.8232538530004216, "grad_norm": 0.5780462067589814, "learning_rate": 1.0170518760405341e-07, "loss": 0.2443, "step": 38921 }, { "epoch": 1.8233006979903499, "grad_norm": 0.6391259788890004, "learning_rate": 1.0165165095106649e-07, "loss": 0.2979, "step": 38922 }, { "epoch": 1.8233475429802781, "grad_norm": 0.6551244877420704, "learning_rate": 1.0159812809997116e-07, "loss": 0.2805, "step": 38923 }, { "epoch": 1.8233943879702066, "grad_norm": 0.6131141683818893, "learning_rate": 1.0154461905107549e-07, "loss": 0.2685, "step": 38924 }, { "epoch": 1.823441232960135, "grad_norm": 0.584930824002958, "learning_rate": 1.014911238046884e-07, "loss": 0.2763, "step": 38925 }, { "epoch": 1.8234880779500633, "grad_norm": 0.6092550378348562, "learning_rate": 1.0143764236111659e-07, "loss": 0.27, "step": 38926 }, { "epoch": 1.8235349229399915, "grad_norm": 0.6033862742538818, "learning_rate": 1.0138417472066842e-07, "loss": 0.2904, "step": 38927 }, { "epoch": 1.82358176792992, "grad_norm": 0.6028600367551087, "learning_rate": 1.0133072088365142e-07, "loss": 0.2887, "step": 38928 }, { "epoch": 1.8236286129198482, "grad_norm": 0.5750076768991342, "learning_rate": 1.0127728085037314e-07, "loss": 0.2598, "step": 38929 }, { "epoch": 1.8236754579097765, "grad_norm": 0.587708763810947, "learning_rate": 1.0122385462114138e-07, "loss": 0.2672, "step": 38930 }, { "epoch": 1.823722302899705, "grad_norm": 0.6262702167987592, "learning_rate": 1.0117044219626337e-07, "loss": 0.2746, "step": 38931 }, { "epoch": 1.8237691478896332, "grad_norm": 0.577957782771191, "learning_rate": 1.0111704357604667e-07, "loss": 0.239, "step": 38932 }, { "epoch": 1.8238159928795614, "grad_norm": 0.5900592480719821, "learning_rate": 1.0106365876079827e-07, "loss": 0.2787, "step": 38933 }, { "epoch": 1.8238628378694899, "grad_norm": 0.5930725974973083, "learning_rate": 1.0101028775082566e-07, "loss": 0.2685, "step": 38934 }, { "epoch": 1.8239096828594183, "grad_norm": 0.5750695004133843, "learning_rate": 1.0095693054643585e-07, "loss": 0.2625, "step": 38935 }, { "epoch": 1.8239565278493464, "grad_norm": 0.5867126518860131, "learning_rate": 1.009035871479358e-07, "loss": 0.2657, "step": 38936 }, { "epoch": 1.8240033728392748, "grad_norm": 0.667057026273192, "learning_rate": 1.0085025755563277e-07, "loss": 0.3162, "step": 38937 }, { "epoch": 1.8240502178292033, "grad_norm": 0.6185011682562986, "learning_rate": 1.0079694176983318e-07, "loss": 0.2756, "step": 38938 }, { "epoch": 1.8240970628191315, "grad_norm": 0.6567353157682047, "learning_rate": 1.0074363979084484e-07, "loss": 0.2767, "step": 38939 }, { "epoch": 1.8241439078090598, "grad_norm": 0.6069825933610864, "learning_rate": 1.0069035161897361e-07, "loss": 0.2692, "step": 38940 }, { "epoch": 1.8241907527989882, "grad_norm": 0.6160067972919295, "learning_rate": 1.006370772545262e-07, "loss": 0.2747, "step": 38941 }, { "epoch": 1.8242375977889165, "grad_norm": 0.675283620079249, "learning_rate": 1.0058381669780904e-07, "loss": 0.286, "step": 38942 }, { "epoch": 1.8242844427788447, "grad_norm": 0.5999964979598394, "learning_rate": 1.0053056994912935e-07, "loss": 0.2712, "step": 38943 }, { "epoch": 1.8243312877687732, "grad_norm": 0.6139798411564964, "learning_rate": 1.0047733700879276e-07, "loss": 0.2615, "step": 38944 }, { "epoch": 1.8243781327587014, "grad_norm": 0.5497241205940558, "learning_rate": 1.004241178771062e-07, "loss": 0.2453, "step": 38945 }, { "epoch": 1.8244249777486297, "grad_norm": 0.5715015008172257, "learning_rate": 1.0037091255437615e-07, "loss": 0.2684, "step": 38946 }, { "epoch": 1.8244718227385581, "grad_norm": 0.5924028379065388, "learning_rate": 1.0031772104090815e-07, "loss": 0.27, "step": 38947 }, { "epoch": 1.8245186677284866, "grad_norm": 0.6535330288169512, "learning_rate": 1.0026454333700835e-07, "loss": 0.2878, "step": 38948 }, { "epoch": 1.8245655127184146, "grad_norm": 0.601489852063654, "learning_rate": 1.0021137944298293e-07, "loss": 0.2657, "step": 38949 }, { "epoch": 1.824612357708343, "grad_norm": 0.5956166342170643, "learning_rate": 1.00158229359138e-07, "loss": 0.2754, "step": 38950 }, { "epoch": 1.8246592026982715, "grad_norm": 0.6001256525373315, "learning_rate": 1.0010509308577915e-07, "loss": 0.2717, "step": 38951 }, { "epoch": 1.8247060476881998, "grad_norm": 0.6070287466097186, "learning_rate": 1.0005197062321226e-07, "loss": 0.2679, "step": 38952 }, { "epoch": 1.824752892678128, "grad_norm": 0.6107160709618626, "learning_rate": 9.999886197174347e-08, "loss": 0.2748, "step": 38953 }, { "epoch": 1.8247997376680565, "grad_norm": 0.5942241953419025, "learning_rate": 9.994576713167781e-08, "loss": 0.2551, "step": 38954 }, { "epoch": 1.8248465826579847, "grad_norm": 0.5477814915807695, "learning_rate": 9.989268610332087e-08, "loss": 0.2458, "step": 38955 }, { "epoch": 1.824893427647913, "grad_norm": 0.5985461477927799, "learning_rate": 9.983961888697824e-08, "loss": 0.281, "step": 38956 }, { "epoch": 1.8249402726378414, "grad_norm": 0.6096788072899071, "learning_rate": 9.97865654829555e-08, "loss": 0.2572, "step": 38957 }, { "epoch": 1.8249871176277697, "grad_norm": 0.61854511372398, "learning_rate": 9.973352589155772e-08, "loss": 0.2656, "step": 38958 }, { "epoch": 1.825033962617698, "grad_norm": 0.5884939338455778, "learning_rate": 9.968050011309044e-08, "loss": 0.2733, "step": 38959 }, { "epoch": 1.8250808076076264, "grad_norm": 0.6096671653045341, "learning_rate": 9.962748814785817e-08, "loss": 0.2785, "step": 38960 }, { "epoch": 1.8251276525975548, "grad_norm": 0.6252483277881801, "learning_rate": 9.957448999616704e-08, "loss": 0.2707, "step": 38961 }, { "epoch": 1.825174497587483, "grad_norm": 0.636018356424198, "learning_rate": 9.952150565832069e-08, "loss": 0.2725, "step": 38962 }, { "epoch": 1.8252213425774113, "grad_norm": 0.5935223812581026, "learning_rate": 9.9468535134625e-08, "loss": 0.2742, "step": 38963 }, { "epoch": 1.8252681875673398, "grad_norm": 0.6235035034420835, "learning_rate": 9.941557842538446e-08, "loss": 0.2722, "step": 38964 }, { "epoch": 1.825315032557268, "grad_norm": 0.5752023394048453, "learning_rate": 9.936263553090436e-08, "loss": 0.2683, "step": 38965 }, { "epoch": 1.8253618775471963, "grad_norm": 0.5549152644737225, "learning_rate": 9.930970645148835e-08, "loss": 0.2503, "step": 38966 }, { "epoch": 1.8254087225371247, "grad_norm": 0.5740124144216032, "learning_rate": 9.925679118744174e-08, "loss": 0.2547, "step": 38967 }, { "epoch": 1.825455567527053, "grad_norm": 0.6398080253929985, "learning_rate": 9.920388973906931e-08, "loss": 0.2657, "step": 38968 }, { "epoch": 1.8255024125169812, "grad_norm": 0.5689558641090914, "learning_rate": 9.915100210667466e-08, "loss": 0.2712, "step": 38969 }, { "epoch": 1.8255492575069097, "grad_norm": 0.6027000473545415, "learning_rate": 9.909812829056231e-08, "loss": 0.2714, "step": 38970 }, { "epoch": 1.8255961024968381, "grad_norm": 0.6106126311854376, "learning_rate": 9.904526829103756e-08, "loss": 0.267, "step": 38971 }, { "epoch": 1.8256429474867661, "grad_norm": 0.5596321936831279, "learning_rate": 9.899242210840321e-08, "loss": 0.2551, "step": 38972 }, { "epoch": 1.8256897924766946, "grad_norm": 0.5661946369011067, "learning_rate": 9.893958974296402e-08, "loss": 0.257, "step": 38973 }, { "epoch": 1.825736637466623, "grad_norm": 0.6531850933504064, "learning_rate": 9.888677119502449e-08, "loss": 0.2758, "step": 38974 }, { "epoch": 1.8257834824565513, "grad_norm": 0.590268494365804, "learning_rate": 9.883396646488769e-08, "loss": 0.256, "step": 38975 }, { "epoch": 1.8258303274464796, "grad_norm": 0.6051611266353017, "learning_rate": 9.878117555285782e-08, "loss": 0.2803, "step": 38976 }, { "epoch": 1.825877172436408, "grad_norm": 0.5978872613198623, "learning_rate": 9.872839845923909e-08, "loss": 0.2646, "step": 38977 }, { "epoch": 1.8259240174263363, "grad_norm": 0.5841747986156036, "learning_rate": 9.86756351843346e-08, "loss": 0.2616, "step": 38978 }, { "epoch": 1.8259708624162645, "grad_norm": 0.5499690693190471, "learning_rate": 9.862288572844824e-08, "loss": 0.2593, "step": 38979 }, { "epoch": 1.826017707406193, "grad_norm": 0.5865456980609942, "learning_rate": 9.85701500918837e-08, "loss": 0.2789, "step": 38980 }, { "epoch": 1.8260645523961212, "grad_norm": 0.5588407475567231, "learning_rate": 9.851742827494487e-08, "loss": 0.2759, "step": 38981 }, { "epoch": 1.8261113973860494, "grad_norm": 0.5485861686352822, "learning_rate": 9.846472027793403e-08, "loss": 0.2667, "step": 38982 }, { "epoch": 1.826158242375978, "grad_norm": 0.5777759360504074, "learning_rate": 9.841202610115564e-08, "loss": 0.2664, "step": 38983 }, { "epoch": 1.8262050873659064, "grad_norm": 0.6232615740852825, "learning_rate": 9.835934574491196e-08, "loss": 0.278, "step": 38984 }, { "epoch": 1.8262519323558344, "grad_norm": 0.5828212720204724, "learning_rate": 9.830667920950665e-08, "loss": 0.2562, "step": 38985 }, { "epoch": 1.8262987773457628, "grad_norm": 0.5953087960946088, "learning_rate": 9.825402649524279e-08, "loss": 0.2494, "step": 38986 }, { "epoch": 1.8263456223356913, "grad_norm": 0.6127614996078028, "learning_rate": 9.820138760242321e-08, "loss": 0.271, "step": 38987 }, { "epoch": 1.8263924673256196, "grad_norm": 0.5630127878258178, "learning_rate": 9.814876253135152e-08, "loss": 0.2511, "step": 38988 }, { "epoch": 1.8264393123155478, "grad_norm": 0.5855823316371118, "learning_rate": 9.809615128232974e-08, "loss": 0.2574, "step": 38989 }, { "epoch": 1.8264861573054763, "grad_norm": 0.6441072537736838, "learning_rate": 9.804355385566094e-08, "loss": 0.2713, "step": 38990 }, { "epoch": 1.8265330022954045, "grad_norm": 0.5974225953286123, "learning_rate": 9.799097025164739e-08, "loss": 0.2867, "step": 38991 }, { "epoch": 1.8265798472853327, "grad_norm": 0.5447157120473829, "learning_rate": 9.793840047059244e-08, "loss": 0.2459, "step": 38992 }, { "epoch": 1.8266266922752612, "grad_norm": 0.5920425566588895, "learning_rate": 9.788584451279808e-08, "loss": 0.2647, "step": 38993 }, { "epoch": 1.8266735372651894, "grad_norm": 0.5842611534309009, "learning_rate": 9.783330237856687e-08, "loss": 0.268, "step": 38994 }, { "epoch": 1.8267203822551177, "grad_norm": 0.6105350015863403, "learning_rate": 9.778077406820186e-08, "loss": 0.2818, "step": 38995 }, { "epoch": 1.8267672272450461, "grad_norm": 0.584853771217353, "learning_rate": 9.772825958200449e-08, "loss": 0.2711, "step": 38996 }, { "epoch": 1.8268140722349746, "grad_norm": 0.5843140623740661, "learning_rate": 9.767575892027675e-08, "loss": 0.2777, "step": 38997 }, { "epoch": 1.8268609172249028, "grad_norm": 0.5589573630635952, "learning_rate": 9.762327208332145e-08, "loss": 0.2518, "step": 38998 }, { "epoch": 1.826907762214831, "grad_norm": 0.6195411608528432, "learning_rate": 9.757079907144029e-08, "loss": 0.2786, "step": 38999 }, { "epoch": 1.8269546072047596, "grad_norm": 0.6337202498681312, "learning_rate": 9.751833988493553e-08, "loss": 0.2713, "step": 39000 }, { "epoch": 1.8270014521946878, "grad_norm": 0.5711971262069745, "learning_rate": 9.746589452410887e-08, "loss": 0.2571, "step": 39001 }, { "epoch": 1.827048297184616, "grad_norm": 0.5654296796889065, "learning_rate": 9.741346298926202e-08, "loss": 0.2483, "step": 39002 }, { "epoch": 1.8270951421745445, "grad_norm": 0.644324779630704, "learning_rate": 9.736104528069723e-08, "loss": 0.268, "step": 39003 }, { "epoch": 1.8271419871644727, "grad_norm": 0.6606228068340891, "learning_rate": 9.730864139871537e-08, "loss": 0.2756, "step": 39004 }, { "epoch": 1.827188832154401, "grad_norm": 0.6045394058200504, "learning_rate": 9.725625134361844e-08, "loss": 0.259, "step": 39005 }, { "epoch": 1.8272356771443294, "grad_norm": 0.6012838671603052, "learning_rate": 9.720387511570756e-08, "loss": 0.2699, "step": 39006 }, { "epoch": 1.827282522134258, "grad_norm": 0.6171268449510846, "learning_rate": 9.7151512715285e-08, "loss": 0.3001, "step": 39007 }, { "epoch": 1.827329367124186, "grad_norm": 0.5695896339700532, "learning_rate": 9.709916414265136e-08, "loss": 0.264, "step": 39008 }, { "epoch": 1.8273762121141144, "grad_norm": 0.5959801913426728, "learning_rate": 9.704682939810777e-08, "loss": 0.2834, "step": 39009 }, { "epoch": 1.8274230571040428, "grad_norm": 0.5844069533197657, "learning_rate": 9.699450848195625e-08, "loss": 0.2594, "step": 39010 }, { "epoch": 1.827469902093971, "grad_norm": 0.6186674253189335, "learning_rate": 9.694220139449706e-08, "loss": 0.2694, "step": 39011 }, { "epoch": 1.8275167470838993, "grad_norm": 0.6293923865625918, "learning_rate": 9.688990813603139e-08, "loss": 0.2762, "step": 39012 }, { "epoch": 1.8275635920738278, "grad_norm": 0.593082758671668, "learning_rate": 9.683762870686064e-08, "loss": 0.2665, "step": 39013 }, { "epoch": 1.827610437063756, "grad_norm": 0.5628419591171059, "learning_rate": 9.678536310728515e-08, "loss": 0.2731, "step": 39014 }, { "epoch": 1.8276572820536843, "grad_norm": 0.6017353312113028, "learning_rate": 9.673311133760576e-08, "loss": 0.2669, "step": 39015 }, { "epoch": 1.8277041270436127, "grad_norm": 0.6021598733911355, "learning_rate": 9.668087339812337e-08, "loss": 0.2581, "step": 39016 }, { "epoch": 1.827750972033541, "grad_norm": 0.6623732941919689, "learning_rate": 9.662864928913885e-08, "loss": 0.2873, "step": 39017 }, { "epoch": 1.8277978170234692, "grad_norm": 0.5738246264574184, "learning_rate": 9.657643901095193e-08, "loss": 0.2628, "step": 39018 }, { "epoch": 1.8278446620133977, "grad_norm": 0.5904217113168957, "learning_rate": 9.65242425638635e-08, "loss": 0.2845, "step": 39019 }, { "epoch": 1.8278915070033261, "grad_norm": 0.6152222997707997, "learning_rate": 9.647205994817443e-08, "loss": 0.2728, "step": 39020 }, { "epoch": 1.8279383519932542, "grad_norm": 0.5952919140253159, "learning_rate": 9.641989116418421e-08, "loss": 0.2637, "step": 39021 }, { "epoch": 1.8279851969831826, "grad_norm": 0.61658870265459, "learning_rate": 9.636773621219314e-08, "loss": 0.2769, "step": 39022 }, { "epoch": 1.828032041973111, "grad_norm": 0.5866870157398985, "learning_rate": 9.63155950925021e-08, "loss": 0.2661, "step": 39023 }, { "epoch": 1.8280788869630393, "grad_norm": 0.595768055899774, "learning_rate": 9.626346780541058e-08, "loss": 0.2611, "step": 39024 }, { "epoch": 1.8281257319529676, "grad_norm": 0.5897111086537745, "learning_rate": 9.621135435121831e-08, "loss": 0.2762, "step": 39025 }, { "epoch": 1.828172576942896, "grad_norm": 0.6068412878289994, "learning_rate": 9.615925473022592e-08, "loss": 0.2779, "step": 39026 }, { "epoch": 1.8282194219328243, "grad_norm": 0.5875209912173559, "learning_rate": 9.610716894273258e-08, "loss": 0.2569, "step": 39027 }, { "epoch": 1.8282662669227525, "grad_norm": 0.5790711090874665, "learning_rate": 9.605509698903836e-08, "loss": 0.2536, "step": 39028 }, { "epoch": 1.828313111912681, "grad_norm": 0.580430215011247, "learning_rate": 9.600303886944273e-08, "loss": 0.2757, "step": 39029 }, { "epoch": 1.8283599569026092, "grad_norm": 0.5737352984706866, "learning_rate": 9.595099458424573e-08, "loss": 0.2617, "step": 39030 }, { "epoch": 1.8284068018925375, "grad_norm": 0.5961318756180707, "learning_rate": 9.589896413374627e-08, "loss": 0.2725, "step": 39031 }, { "epoch": 1.828453646882466, "grad_norm": 0.5523806514655826, "learning_rate": 9.584694751824442e-08, "loss": 0.2598, "step": 39032 }, { "epoch": 1.8285004918723944, "grad_norm": 0.5954385118008024, "learning_rate": 9.579494473803852e-08, "loss": 0.2866, "step": 39033 }, { "epoch": 1.8285473368623226, "grad_norm": 0.5822191134163283, "learning_rate": 9.574295579342862e-08, "loss": 0.2625, "step": 39034 }, { "epoch": 1.8285941818522509, "grad_norm": 0.6861803624410571, "learning_rate": 9.569098068471367e-08, "loss": 0.2632, "step": 39035 }, { "epoch": 1.8286410268421793, "grad_norm": 0.6156621070523024, "learning_rate": 9.563901941219312e-08, "loss": 0.2671, "step": 39036 }, { "epoch": 1.8286878718321076, "grad_norm": 0.5804484554954485, "learning_rate": 9.558707197616562e-08, "loss": 0.2808, "step": 39037 }, { "epoch": 1.8287347168220358, "grad_norm": 0.6223919436175899, "learning_rate": 9.55351383769304e-08, "loss": 0.2713, "step": 39038 }, { "epoch": 1.8287815618119643, "grad_norm": 0.6223908542136548, "learning_rate": 9.548321861478554e-08, "loss": 0.272, "step": 39039 }, { "epoch": 1.8288284068018925, "grad_norm": 0.586988221877286, "learning_rate": 9.543131269003081e-08, "loss": 0.2639, "step": 39040 }, { "epoch": 1.8288752517918208, "grad_norm": 0.5712679244099982, "learning_rate": 9.537942060296429e-08, "loss": 0.2672, "step": 39041 }, { "epoch": 1.8289220967817492, "grad_norm": 0.6132145036039136, "learning_rate": 9.532754235388463e-08, "loss": 0.254, "step": 39042 }, { "epoch": 1.8289689417716777, "grad_norm": 0.5675628244583023, "learning_rate": 9.527567794309078e-08, "loss": 0.2533, "step": 39043 }, { "epoch": 1.8290157867616057, "grad_norm": 0.582002204147845, "learning_rate": 9.522382737088137e-08, "loss": 0.2847, "step": 39044 }, { "epoch": 1.8290626317515342, "grad_norm": 0.607688485346482, "learning_rate": 9.51719906375545e-08, "loss": 0.2736, "step": 39045 }, { "epoch": 1.8291094767414626, "grad_norm": 0.6517033346900525, "learning_rate": 9.512016774340799e-08, "loss": 0.2753, "step": 39046 }, { "epoch": 1.8291563217313909, "grad_norm": 0.6075830396363459, "learning_rate": 9.50683586887402e-08, "loss": 0.2778, "step": 39047 }, { "epoch": 1.829203166721319, "grad_norm": 0.6135880541112586, "learning_rate": 9.50165634738498e-08, "loss": 0.2658, "step": 39048 }, { "epoch": 1.8292500117112476, "grad_norm": 0.6219687852117306, "learning_rate": 9.49647820990346e-08, "loss": 0.2748, "step": 39049 }, { "epoch": 1.8292968567011758, "grad_norm": 0.5959318078705428, "learning_rate": 9.491301456459296e-08, "loss": 0.2496, "step": 39050 }, { "epoch": 1.829343701691104, "grad_norm": 0.5655197842720606, "learning_rate": 9.486126087082188e-08, "loss": 0.2582, "step": 39051 }, { "epoch": 1.8293905466810325, "grad_norm": 0.6169575200186403, "learning_rate": 9.480952101802026e-08, "loss": 0.2899, "step": 39052 }, { "epoch": 1.8294373916709608, "grad_norm": 0.5825143601688088, "learning_rate": 9.475779500648485e-08, "loss": 0.2674, "step": 39053 }, { "epoch": 1.829484236660889, "grad_norm": 0.5840535740324608, "learning_rate": 9.4706082836514e-08, "loss": 0.2664, "step": 39054 }, { "epoch": 1.8295310816508175, "grad_norm": 0.6101829889871603, "learning_rate": 9.465438450840498e-08, "loss": 0.2768, "step": 39055 }, { "epoch": 1.829577926640746, "grad_norm": 0.6178762571232345, "learning_rate": 9.460270002245586e-08, "loss": 0.281, "step": 39056 }, { "epoch": 1.829624771630674, "grad_norm": 0.5658073048200775, "learning_rate": 9.455102937896309e-08, "loss": 0.265, "step": 39057 }, { "epoch": 1.8296716166206024, "grad_norm": 0.6742800019530155, "learning_rate": 9.449937257822478e-08, "loss": 0.2684, "step": 39058 }, { "epoch": 1.8297184616105309, "grad_norm": 0.644681659477363, "learning_rate": 9.444772962053816e-08, "loss": 0.2767, "step": 39059 }, { "epoch": 1.829765306600459, "grad_norm": 0.6303776283762874, "learning_rate": 9.439610050619996e-08, "loss": 0.2781, "step": 39060 }, { "epoch": 1.8298121515903873, "grad_norm": 0.5746182654047449, "learning_rate": 9.434448523550743e-08, "loss": 0.2584, "step": 39061 }, { "epoch": 1.8298589965803158, "grad_norm": 0.626139780743503, "learning_rate": 9.42928838087584e-08, "loss": 0.2883, "step": 39062 }, { "epoch": 1.829905841570244, "grad_norm": 0.5494247314345538, "learning_rate": 9.424129622624845e-08, "loss": 0.265, "step": 39063 }, { "epoch": 1.8299526865601723, "grad_norm": 0.56663368150324, "learning_rate": 9.418972248827541e-08, "loss": 0.2667, "step": 39064 }, { "epoch": 1.8299995315501008, "grad_norm": 0.6159569361103553, "learning_rate": 9.413816259513625e-08, "loss": 0.2791, "step": 39065 }, { "epoch": 1.830046376540029, "grad_norm": 0.6291272443449841, "learning_rate": 9.408661654712686e-08, "loss": 0.2906, "step": 39066 }, { "epoch": 1.8300932215299572, "grad_norm": 0.5778956998978277, "learning_rate": 9.403508434454423e-08, "loss": 0.2597, "step": 39067 }, { "epoch": 1.8301400665198857, "grad_norm": 0.6232526670749364, "learning_rate": 9.398356598768532e-08, "loss": 0.2655, "step": 39068 }, { "epoch": 1.8301869115098142, "grad_norm": 0.5988963732065109, "learning_rate": 9.393206147684575e-08, "loss": 0.2883, "step": 39069 }, { "epoch": 1.8302337564997424, "grad_norm": 0.6147301256126455, "learning_rate": 9.388057081232278e-08, "loss": 0.2747, "step": 39070 }, { "epoch": 1.8302806014896706, "grad_norm": 0.6251943980865136, "learning_rate": 9.382909399441225e-08, "loss": 0.2796, "step": 39071 }, { "epoch": 1.830327446479599, "grad_norm": 0.6084085139311921, "learning_rate": 9.377763102341065e-08, "loss": 0.2529, "step": 39072 }, { "epoch": 1.8303742914695273, "grad_norm": 0.6032129727264073, "learning_rate": 9.37261818996138e-08, "loss": 0.2718, "step": 39073 }, { "epoch": 1.8304211364594556, "grad_norm": 0.6200192540123476, "learning_rate": 9.367474662331843e-08, "loss": 0.2773, "step": 39074 }, { "epoch": 1.830467981449384, "grad_norm": 0.6259728900108285, "learning_rate": 9.36233251948196e-08, "loss": 0.2741, "step": 39075 }, { "epoch": 1.8305148264393123, "grad_norm": 0.6004166681708183, "learning_rate": 9.35719176144137e-08, "loss": 0.2579, "step": 39076 }, { "epoch": 1.8305616714292405, "grad_norm": 0.6060789024893484, "learning_rate": 9.35205238823969e-08, "loss": 0.2675, "step": 39077 }, { "epoch": 1.830608516419169, "grad_norm": 0.5590638500510995, "learning_rate": 9.346914399906426e-08, "loss": 0.2507, "step": 39078 }, { "epoch": 1.8306553614090975, "grad_norm": 0.6192572250324517, "learning_rate": 9.341777796471218e-08, "loss": 0.2769, "step": 39079 }, { "epoch": 1.8307022063990255, "grad_norm": 0.6137988524023754, "learning_rate": 9.336642577963572e-08, "loss": 0.2759, "step": 39080 }, { "epoch": 1.830749051388954, "grad_norm": 0.5983656517544081, "learning_rate": 9.331508744413104e-08, "loss": 0.2604, "step": 39081 }, { "epoch": 1.8307958963788824, "grad_norm": 0.5842526813180016, "learning_rate": 9.32637629584926e-08, "loss": 0.2546, "step": 39082 }, { "epoch": 1.8308427413688106, "grad_norm": 0.5903627897384712, "learning_rate": 9.321245232301629e-08, "loss": 0.2808, "step": 39083 }, { "epoch": 1.8308895863587389, "grad_norm": 0.5509499291004517, "learning_rate": 9.31611555379977e-08, "loss": 0.2609, "step": 39084 }, { "epoch": 1.8309364313486673, "grad_norm": 0.5810804666556204, "learning_rate": 9.310987260373134e-08, "loss": 0.2575, "step": 39085 }, { "epoch": 1.8309832763385956, "grad_norm": 0.6755888729279431, "learning_rate": 9.305860352051333e-08, "loss": 0.3006, "step": 39086 }, { "epoch": 1.8310301213285238, "grad_norm": 0.5720727394318939, "learning_rate": 9.300734828863788e-08, "loss": 0.2627, "step": 39087 }, { "epoch": 1.8310769663184523, "grad_norm": 0.6013485488599294, "learning_rate": 9.295610690839979e-08, "loss": 0.2516, "step": 39088 }, { "epoch": 1.8311238113083805, "grad_norm": 0.6172138605328544, "learning_rate": 9.290487938009462e-08, "loss": 0.26, "step": 39089 }, { "epoch": 1.8311706562983088, "grad_norm": 0.5888435113097071, "learning_rate": 9.28536657040166e-08, "loss": 0.2714, "step": 39090 }, { "epoch": 1.8312175012882372, "grad_norm": 0.5907277128919524, "learning_rate": 9.280246588046076e-08, "loss": 0.2707, "step": 39091 }, { "epoch": 1.8312643462781657, "grad_norm": 0.5905491272386033, "learning_rate": 9.275127990972188e-08, "loss": 0.2779, "step": 39092 }, { "epoch": 1.8313111912680937, "grad_norm": 0.5966628283804075, "learning_rate": 9.270010779209442e-08, "loss": 0.2535, "step": 39093 }, { "epoch": 1.8313580362580222, "grad_norm": 0.6112868506358445, "learning_rate": 9.26489495278729e-08, "loss": 0.2868, "step": 39094 }, { "epoch": 1.8314048812479506, "grad_norm": 0.6202073928113114, "learning_rate": 9.25978051173515e-08, "loss": 0.2729, "step": 39095 }, { "epoch": 1.8314517262378789, "grad_norm": 0.5782801003263528, "learning_rate": 9.254667456082444e-08, "loss": 0.2683, "step": 39096 }, { "epoch": 1.8314985712278071, "grad_norm": 0.5524433049165137, "learning_rate": 9.249555785858622e-08, "loss": 0.2573, "step": 39097 }, { "epoch": 1.8315454162177356, "grad_norm": 0.665754649733698, "learning_rate": 9.244445501093075e-08, "loss": 0.2937, "step": 39098 }, { "epoch": 1.8315922612076638, "grad_norm": 0.6600654205237665, "learning_rate": 9.239336601815279e-08, "loss": 0.2806, "step": 39099 }, { "epoch": 1.831639106197592, "grad_norm": 0.5815189943070878, "learning_rate": 9.234229088054575e-08, "loss": 0.2568, "step": 39100 }, { "epoch": 1.8316859511875205, "grad_norm": 0.5982085150634356, "learning_rate": 9.22912295984038e-08, "loss": 0.2699, "step": 39101 }, { "epoch": 1.8317327961774488, "grad_norm": 0.6187301072508955, "learning_rate": 9.224018217202036e-08, "loss": 0.281, "step": 39102 }, { "epoch": 1.831779641167377, "grad_norm": 0.5744624248283166, "learning_rate": 9.218914860168959e-08, "loss": 0.2746, "step": 39103 }, { "epoch": 1.8318264861573055, "grad_norm": 0.5792149807275203, "learning_rate": 9.213812888770518e-08, "loss": 0.2543, "step": 39104 }, { "epoch": 1.831873331147234, "grad_norm": 0.6061379908329779, "learning_rate": 9.208712303036078e-08, "loss": 0.2768, "step": 39105 }, { "epoch": 1.8319201761371622, "grad_norm": 0.5789206622292278, "learning_rate": 9.203613102994946e-08, "loss": 0.2574, "step": 39106 }, { "epoch": 1.8319670211270904, "grad_norm": 0.6279607860582864, "learning_rate": 9.19851528867649e-08, "loss": 0.2781, "step": 39107 }, { "epoch": 1.8320138661170189, "grad_norm": 0.6147851426203278, "learning_rate": 9.193418860110104e-08, "loss": 0.2628, "step": 39108 }, { "epoch": 1.8320607111069471, "grad_norm": 0.594686607934151, "learning_rate": 9.18832381732504e-08, "loss": 0.2683, "step": 39109 }, { "epoch": 1.8321075560968754, "grad_norm": 0.6093651300164602, "learning_rate": 9.183230160350637e-08, "loss": 0.2714, "step": 39110 }, { "epoch": 1.8321544010868038, "grad_norm": 0.6148857700781398, "learning_rate": 9.17813788921626e-08, "loss": 0.2843, "step": 39111 }, { "epoch": 1.832201246076732, "grad_norm": 0.6211185610481332, "learning_rate": 9.173047003951136e-08, "loss": 0.2645, "step": 39112 }, { "epoch": 1.8322480910666603, "grad_norm": 0.5588060358890656, "learning_rate": 9.167957504584574e-08, "loss": 0.2593, "step": 39113 }, { "epoch": 1.8322949360565888, "grad_norm": 0.5456215914007415, "learning_rate": 9.16286939114594e-08, "loss": 0.254, "step": 39114 }, { "epoch": 1.8323417810465172, "grad_norm": 0.544493746407112, "learning_rate": 9.157782663664433e-08, "loss": 0.2543, "step": 39115 }, { "epoch": 1.8323886260364453, "grad_norm": 0.5923745083409679, "learning_rate": 9.152697322169336e-08, "loss": 0.2655, "step": 39116 }, { "epoch": 1.8324354710263737, "grad_norm": 0.5677013031075325, "learning_rate": 9.147613366689983e-08, "loss": 0.2551, "step": 39117 }, { "epoch": 1.8324823160163022, "grad_norm": 0.6330135323831343, "learning_rate": 9.142530797255522e-08, "loss": 0.2725, "step": 39118 }, { "epoch": 1.8325291610062304, "grad_norm": 0.6143348504712327, "learning_rate": 9.137449613895289e-08, "loss": 0.278, "step": 39119 }, { "epoch": 1.8325760059961587, "grad_norm": 0.6165930252336952, "learning_rate": 9.132369816638481e-08, "loss": 0.2832, "step": 39120 }, { "epoch": 1.8326228509860871, "grad_norm": 0.5815400771233894, "learning_rate": 9.127291405514354e-08, "loss": 0.2735, "step": 39121 }, { "epoch": 1.8326696959760154, "grad_norm": 0.5545320892032541, "learning_rate": 9.122214380552136e-08, "loss": 0.2511, "step": 39122 }, { "epoch": 1.8327165409659436, "grad_norm": 0.5828458308915273, "learning_rate": 9.117138741781023e-08, "loss": 0.2621, "step": 39123 }, { "epoch": 1.832763385955872, "grad_norm": 0.614782126393993, "learning_rate": 9.112064489230215e-08, "loss": 0.2692, "step": 39124 }, { "epoch": 1.8328102309458003, "grad_norm": 0.5871829256423349, "learning_rate": 9.10699162292894e-08, "loss": 0.2532, "step": 39125 }, { "epoch": 1.8328570759357286, "grad_norm": 0.5808196999599172, "learning_rate": 9.101920142906395e-08, "loss": 0.2776, "step": 39126 }, { "epoch": 1.832903920925657, "grad_norm": 0.6129464425606221, "learning_rate": 9.096850049191724e-08, "loss": 0.2678, "step": 39127 }, { "epoch": 1.8329507659155855, "grad_norm": 0.5675021971471769, "learning_rate": 9.091781341814182e-08, "loss": 0.2544, "step": 39128 }, { "epoch": 1.8329976109055135, "grad_norm": 0.5705303332648742, "learning_rate": 9.086714020802856e-08, "loss": 0.2631, "step": 39129 }, { "epoch": 1.833044455895442, "grad_norm": 0.6146621455373645, "learning_rate": 9.081648086186945e-08, "loss": 0.2771, "step": 39130 }, { "epoch": 1.8330913008853704, "grad_norm": 0.6206012461796403, "learning_rate": 9.076583537995593e-08, "loss": 0.259, "step": 39131 }, { "epoch": 1.8331381458752987, "grad_norm": 0.6386180907211549, "learning_rate": 9.071520376257942e-08, "loss": 0.2742, "step": 39132 }, { "epoch": 1.833184990865227, "grad_norm": 0.597019989213581, "learning_rate": 9.066458601003136e-08, "loss": 0.2697, "step": 39133 }, { "epoch": 1.8332318358551554, "grad_norm": 0.5984598764705275, "learning_rate": 9.061398212260292e-08, "loss": 0.262, "step": 39134 }, { "epoch": 1.8332786808450836, "grad_norm": 0.6316693688836766, "learning_rate": 9.056339210058579e-08, "loss": 0.2858, "step": 39135 }, { "epoch": 1.8333255258350118, "grad_norm": 0.6282980553419022, "learning_rate": 9.051281594427086e-08, "loss": 0.2701, "step": 39136 }, { "epoch": 1.8333723708249403, "grad_norm": 0.5518750749697491, "learning_rate": 9.046225365394873e-08, "loss": 0.2593, "step": 39137 }, { "epoch": 1.8334192158148686, "grad_norm": 0.5717797117901308, "learning_rate": 9.041170522991055e-08, "loss": 0.2615, "step": 39138 }, { "epoch": 1.8334660608047968, "grad_norm": 0.636505477313721, "learning_rate": 9.036117067244776e-08, "loss": 0.2549, "step": 39139 }, { "epoch": 1.8335129057947253, "grad_norm": 0.637475125387662, "learning_rate": 9.031064998185041e-08, "loss": 0.2734, "step": 39140 }, { "epoch": 1.8335597507846537, "grad_norm": 0.5930440736424305, "learning_rate": 9.02601431584102e-08, "loss": 0.2719, "step": 39141 }, { "epoch": 1.833606595774582, "grad_norm": 0.6053403552193958, "learning_rate": 9.020965020241663e-08, "loss": 0.2715, "step": 39142 }, { "epoch": 1.8336534407645102, "grad_norm": 0.6054924744806703, "learning_rate": 9.01591711141614e-08, "loss": 0.2667, "step": 39143 }, { "epoch": 1.8337002857544387, "grad_norm": 0.5964749052585528, "learning_rate": 9.0108705893934e-08, "loss": 0.2804, "step": 39144 }, { "epoch": 1.833747130744367, "grad_norm": 0.6192695810557065, "learning_rate": 9.005825454202532e-08, "loss": 0.2826, "step": 39145 }, { "epoch": 1.8337939757342951, "grad_norm": 0.6282409635091311, "learning_rate": 9.000781705872569e-08, "loss": 0.2688, "step": 39146 }, { "epoch": 1.8338408207242236, "grad_norm": 0.6153640224663861, "learning_rate": 8.995739344432541e-08, "loss": 0.2444, "step": 39147 }, { "epoch": 1.8338876657141518, "grad_norm": 0.563060313982264, "learning_rate": 8.990698369911482e-08, "loss": 0.2511, "step": 39148 }, { "epoch": 1.83393451070408, "grad_norm": 0.6293918009590298, "learning_rate": 8.985658782338341e-08, "loss": 0.2818, "step": 39149 }, { "epoch": 1.8339813556940086, "grad_norm": 0.6310461498297442, "learning_rate": 8.980620581742178e-08, "loss": 0.2826, "step": 39150 }, { "epoch": 1.834028200683937, "grad_norm": 0.61580907703378, "learning_rate": 8.975583768151968e-08, "loss": 0.2622, "step": 39151 }, { "epoch": 1.834075045673865, "grad_norm": 0.6604121534451834, "learning_rate": 8.970548341596663e-08, "loss": 0.2846, "step": 39152 }, { "epoch": 1.8341218906637935, "grad_norm": 0.60286647138619, "learning_rate": 8.965514302105293e-08, "loss": 0.2799, "step": 39153 }, { "epoch": 1.834168735653722, "grad_norm": 0.614826448939202, "learning_rate": 8.960481649706838e-08, "loss": 0.2732, "step": 39154 }, { "epoch": 1.8342155806436502, "grad_norm": 0.6425914227503731, "learning_rate": 8.955450384430187e-08, "loss": 0.2662, "step": 39155 }, { "epoch": 1.8342624256335784, "grad_norm": 0.6152471897548929, "learning_rate": 8.950420506304347e-08, "loss": 0.2762, "step": 39156 }, { "epoch": 1.834309270623507, "grad_norm": 0.6087283777827583, "learning_rate": 8.945392015358268e-08, "loss": 0.259, "step": 39157 }, { "epoch": 1.8343561156134351, "grad_norm": 0.5855951707285189, "learning_rate": 8.940364911620842e-08, "loss": 0.2739, "step": 39158 }, { "epoch": 1.8344029606033634, "grad_norm": 0.6279183846893946, "learning_rate": 8.935339195121017e-08, "loss": 0.282, "step": 39159 }, { "epoch": 1.8344498055932918, "grad_norm": 0.6356546853596498, "learning_rate": 8.930314865887773e-08, "loss": 0.2652, "step": 39160 }, { "epoch": 1.83449665058322, "grad_norm": 0.6269666768090469, "learning_rate": 8.925291923949947e-08, "loss": 0.2771, "step": 39161 }, { "epoch": 1.8345434955731483, "grad_norm": 0.6138329947715446, "learning_rate": 8.920270369336459e-08, "loss": 0.265, "step": 39162 }, { "epoch": 1.8345903405630768, "grad_norm": 0.5776156767325757, "learning_rate": 8.915250202076287e-08, "loss": 0.2585, "step": 39163 }, { "epoch": 1.8346371855530053, "grad_norm": 0.5880570273855027, "learning_rate": 8.910231422198185e-08, "loss": 0.2608, "step": 39164 }, { "epoch": 1.8346840305429333, "grad_norm": 0.6028268642031739, "learning_rate": 8.905214029731129e-08, "loss": 0.2761, "step": 39165 }, { "epoch": 1.8347308755328617, "grad_norm": 0.5651473026507349, "learning_rate": 8.900198024703987e-08, "loss": 0.2636, "step": 39166 }, { "epoch": 1.8347777205227902, "grad_norm": 0.6585021960671943, "learning_rate": 8.895183407145597e-08, "loss": 0.2795, "step": 39167 }, { "epoch": 1.8348245655127184, "grad_norm": 0.5508656581193941, "learning_rate": 8.890170177084794e-08, "loss": 0.2546, "step": 39168 }, { "epoch": 1.8348714105026467, "grad_norm": 0.5715669511473397, "learning_rate": 8.885158334550503e-08, "loss": 0.2542, "step": 39169 }, { "epoch": 1.8349182554925751, "grad_norm": 0.5813500918591831, "learning_rate": 8.880147879571505e-08, "loss": 0.2653, "step": 39170 }, { "epoch": 1.8349651004825034, "grad_norm": 0.6252266117439212, "learning_rate": 8.875138812176664e-08, "loss": 0.2841, "step": 39171 }, { "epoch": 1.8350119454724316, "grad_norm": 0.5759877890427186, "learning_rate": 8.870131132394793e-08, "loss": 0.272, "step": 39172 }, { "epoch": 1.83505879046236, "grad_norm": 0.6037349240352563, "learning_rate": 8.865124840254703e-08, "loss": 0.2732, "step": 39173 }, { "epoch": 1.8351056354522883, "grad_norm": 0.5854884919117254, "learning_rate": 8.860119935785228e-08, "loss": 0.2645, "step": 39174 }, { "epoch": 1.8351524804422166, "grad_norm": 0.6007672738804548, "learning_rate": 8.855116419015126e-08, "loss": 0.2551, "step": 39175 }, { "epoch": 1.835199325432145, "grad_norm": 0.6061207484606632, "learning_rate": 8.850114289973232e-08, "loss": 0.2626, "step": 39176 }, { "epoch": 1.8352461704220735, "grad_norm": 0.595417429619405, "learning_rate": 8.845113548688333e-08, "loss": 0.271, "step": 39177 }, { "epoch": 1.8352930154120017, "grad_norm": 0.5425695760294234, "learning_rate": 8.840114195189209e-08, "loss": 0.2548, "step": 39178 }, { "epoch": 1.83533986040193, "grad_norm": 0.6126027973724948, "learning_rate": 8.835116229504586e-08, "loss": 0.2854, "step": 39179 }, { "epoch": 1.8353867053918584, "grad_norm": 0.6086000146916399, "learning_rate": 8.830119651663249e-08, "loss": 0.2603, "step": 39180 }, { "epoch": 1.8354335503817867, "grad_norm": 0.6077466713728192, "learning_rate": 8.825124461693952e-08, "loss": 0.2817, "step": 39181 }, { "epoch": 1.835480395371715, "grad_norm": 0.6175500266310152, "learning_rate": 8.820130659625448e-08, "loss": 0.298, "step": 39182 }, { "epoch": 1.8355272403616434, "grad_norm": 0.591040871843826, "learning_rate": 8.815138245486493e-08, "loss": 0.2692, "step": 39183 }, { "epoch": 1.8355740853515716, "grad_norm": 0.5633490455314051, "learning_rate": 8.810147219305815e-08, "loss": 0.2641, "step": 39184 }, { "epoch": 1.8356209303414999, "grad_norm": 0.6655876954188705, "learning_rate": 8.805157581112111e-08, "loss": 0.305, "step": 39185 }, { "epoch": 1.8356677753314283, "grad_norm": 0.6170005826329988, "learning_rate": 8.800169330934083e-08, "loss": 0.2877, "step": 39186 }, { "epoch": 1.8357146203213568, "grad_norm": 0.6033520856511433, "learning_rate": 8.795182468800457e-08, "loss": 0.2688, "step": 39187 }, { "epoch": 1.8357614653112848, "grad_norm": 0.6041259248031671, "learning_rate": 8.79019699473993e-08, "loss": 0.2785, "step": 39188 }, { "epoch": 1.8358083103012133, "grad_norm": 0.6688806690229673, "learning_rate": 8.785212908781176e-08, "loss": 0.2893, "step": 39189 }, { "epoch": 1.8358551552911417, "grad_norm": 0.5628768146847709, "learning_rate": 8.78023021095295e-08, "loss": 0.2524, "step": 39190 }, { "epoch": 1.83590200028107, "grad_norm": 0.5724524819636625, "learning_rate": 8.775248901283839e-08, "loss": 0.2589, "step": 39191 }, { "epoch": 1.8359488452709982, "grad_norm": 0.6348984542054947, "learning_rate": 8.770268979802543e-08, "loss": 0.2837, "step": 39192 }, { "epoch": 1.8359956902609267, "grad_norm": 0.5929054705231777, "learning_rate": 8.765290446537705e-08, "loss": 0.2788, "step": 39193 }, { "epoch": 1.836042535250855, "grad_norm": 0.6120799398408063, "learning_rate": 8.760313301517998e-08, "loss": 0.2637, "step": 39194 }, { "epoch": 1.8360893802407832, "grad_norm": 0.626882758849027, "learning_rate": 8.755337544772064e-08, "loss": 0.2758, "step": 39195 }, { "epoch": 1.8361362252307116, "grad_norm": 0.5685475708904489, "learning_rate": 8.75036317632852e-08, "loss": 0.2716, "step": 39196 }, { "epoch": 1.8361830702206399, "grad_norm": 0.6152026958306268, "learning_rate": 8.745390196216009e-08, "loss": 0.2773, "step": 39197 }, { "epoch": 1.836229915210568, "grad_norm": 0.5505725853902366, "learning_rate": 8.740418604463119e-08, "loss": 0.2544, "step": 39198 }, { "epoch": 1.8362767602004966, "grad_norm": 0.5715762004295019, "learning_rate": 8.735448401098523e-08, "loss": 0.2658, "step": 39199 }, { "epoch": 1.836323605190425, "grad_norm": 0.6351492169702596, "learning_rate": 8.730479586150726e-08, "loss": 0.2909, "step": 39200 }, { "epoch": 1.836370450180353, "grad_norm": 0.6020549371221692, "learning_rate": 8.725512159648397e-08, "loss": 0.2548, "step": 39201 }, { "epoch": 1.8364172951702815, "grad_norm": 0.6036668128429065, "learning_rate": 8.720546121620154e-08, "loss": 0.2642, "step": 39202 }, { "epoch": 1.83646414016021, "grad_norm": 0.6066332255855136, "learning_rate": 8.715581472094475e-08, "loss": 0.2742, "step": 39203 }, { "epoch": 1.8365109851501382, "grad_norm": 0.5525026481174924, "learning_rate": 8.710618211099975e-08, "loss": 0.2497, "step": 39204 }, { "epoch": 1.8365578301400665, "grad_norm": 0.5982573093655829, "learning_rate": 8.705656338665242e-08, "loss": 0.2727, "step": 39205 }, { "epoch": 1.836604675129995, "grad_norm": 0.5235868856980862, "learning_rate": 8.700695854818808e-08, "loss": 0.2478, "step": 39206 }, { "epoch": 1.8366515201199232, "grad_norm": 0.656649369104556, "learning_rate": 8.695736759589207e-08, "loss": 0.2812, "step": 39207 }, { "epoch": 1.8366983651098514, "grad_norm": 0.6170308352629328, "learning_rate": 8.690779053005027e-08, "loss": 0.2853, "step": 39208 }, { "epoch": 1.8367452100997799, "grad_norm": 0.573597720391666, "learning_rate": 8.685822735094718e-08, "loss": 0.2688, "step": 39209 }, { "epoch": 1.836792055089708, "grad_norm": 0.6050825002910603, "learning_rate": 8.680867805886866e-08, "loss": 0.259, "step": 39210 }, { "epoch": 1.8368389000796363, "grad_norm": 0.6242721530910207, "learning_rate": 8.675914265409952e-08, "loss": 0.2774, "step": 39211 }, { "epoch": 1.8368857450695648, "grad_norm": 0.5729327544169749, "learning_rate": 8.670962113692533e-08, "loss": 0.2535, "step": 39212 }, { "epoch": 1.8369325900594933, "grad_norm": 0.5863878664294437, "learning_rate": 8.666011350763032e-08, "loss": 0.2632, "step": 39213 }, { "epoch": 1.8369794350494215, "grad_norm": 0.606326656619359, "learning_rate": 8.661061976650009e-08, "loss": 0.2762, "step": 39214 }, { "epoch": 1.8370262800393498, "grad_norm": 0.6140033787251786, "learning_rate": 8.656113991381887e-08, "loss": 0.2765, "step": 39215 }, { "epoch": 1.8370731250292782, "grad_norm": 0.5634658171328897, "learning_rate": 8.651167394987198e-08, "loss": 0.2554, "step": 39216 }, { "epoch": 1.8371199700192065, "grad_norm": 0.6323393785837788, "learning_rate": 8.646222187494363e-08, "loss": 0.2861, "step": 39217 }, { "epoch": 1.8371668150091347, "grad_norm": 0.6004161051505119, "learning_rate": 8.641278368931833e-08, "loss": 0.2624, "step": 39218 }, { "epoch": 1.8372136599990632, "grad_norm": 0.6787658161185259, "learning_rate": 8.636335939328139e-08, "loss": 0.2725, "step": 39219 }, { "epoch": 1.8372605049889914, "grad_norm": 0.6352969805164056, "learning_rate": 8.631394898711621e-08, "loss": 0.2769, "step": 39220 }, { "epoch": 1.8373073499789196, "grad_norm": 0.6123230660370704, "learning_rate": 8.626455247110809e-08, "loss": 0.2757, "step": 39221 }, { "epoch": 1.837354194968848, "grad_norm": 0.5573371224116583, "learning_rate": 8.621516984554046e-08, "loss": 0.2505, "step": 39222 }, { "epoch": 1.8374010399587766, "grad_norm": 0.6055373806941765, "learning_rate": 8.616580111069778e-08, "loss": 0.252, "step": 39223 }, { "epoch": 1.8374478849487046, "grad_norm": 0.5811259407918197, "learning_rate": 8.611644626686427e-08, "loss": 0.2716, "step": 39224 }, { "epoch": 1.837494729938633, "grad_norm": 0.6290424252386899, "learning_rate": 8.606710531432361e-08, "loss": 0.2753, "step": 39225 }, { "epoch": 1.8375415749285615, "grad_norm": 0.5752658879385572, "learning_rate": 8.601777825336083e-08, "loss": 0.2641, "step": 39226 }, { "epoch": 1.8375884199184898, "grad_norm": 0.5817813768852754, "learning_rate": 8.596846508425877e-08, "loss": 0.2681, "step": 39227 }, { "epoch": 1.837635264908418, "grad_norm": 0.6124326777835224, "learning_rate": 8.591916580730109e-08, "loss": 0.2821, "step": 39228 }, { "epoch": 1.8376821098983465, "grad_norm": 0.5736039567222767, "learning_rate": 8.586988042277172e-08, "loss": 0.2505, "step": 39229 }, { "epoch": 1.8377289548882747, "grad_norm": 0.618588177986457, "learning_rate": 8.582060893095462e-08, "loss": 0.2748, "step": 39230 }, { "epoch": 1.837775799878203, "grad_norm": 0.5887142585160369, "learning_rate": 8.577135133213316e-08, "loss": 0.258, "step": 39231 }, { "epoch": 1.8378226448681314, "grad_norm": 0.6133342412420386, "learning_rate": 8.572210762659073e-08, "loss": 0.2771, "step": 39232 }, { "epoch": 1.8378694898580596, "grad_norm": 0.5613141473241355, "learning_rate": 8.567287781461126e-08, "loss": 0.2799, "step": 39233 }, { "epoch": 1.8379163348479879, "grad_norm": 0.612583447707381, "learning_rate": 8.56236618964773e-08, "loss": 0.2741, "step": 39234 }, { "epoch": 1.8379631798379163, "grad_norm": 0.5997265852257331, "learning_rate": 8.557445987247199e-08, "loss": 0.2708, "step": 39235 }, { "epoch": 1.8380100248278448, "grad_norm": 0.6066710765731662, "learning_rate": 8.552527174287894e-08, "loss": 0.2673, "step": 39236 }, { "epoch": 1.8380568698177728, "grad_norm": 0.6048293797742237, "learning_rate": 8.54760975079813e-08, "loss": 0.2764, "step": 39237 }, { "epoch": 1.8381037148077013, "grad_norm": 0.6117859194989529, "learning_rate": 8.54269371680616e-08, "loss": 0.2798, "step": 39238 }, { "epoch": 1.8381505597976298, "grad_norm": 0.6330813294190154, "learning_rate": 8.537779072340324e-08, "loss": 0.2754, "step": 39239 }, { "epoch": 1.838197404787558, "grad_norm": 0.6384651795123738, "learning_rate": 8.532865817428875e-08, "loss": 0.2734, "step": 39240 }, { "epoch": 1.8382442497774862, "grad_norm": 0.6088585533206277, "learning_rate": 8.527953952100098e-08, "loss": 0.28, "step": 39241 }, { "epoch": 1.8382910947674147, "grad_norm": 0.6373435072868454, "learning_rate": 8.52304347638222e-08, "loss": 0.2905, "step": 39242 }, { "epoch": 1.838337939757343, "grad_norm": 0.6304708473115734, "learning_rate": 8.518134390303551e-08, "loss": 0.2751, "step": 39243 }, { "epoch": 1.8383847847472712, "grad_norm": 0.5798547745864623, "learning_rate": 8.513226693892346e-08, "loss": 0.2686, "step": 39244 }, { "epoch": 1.8384316297371996, "grad_norm": 0.57315475627235, "learning_rate": 8.508320387176805e-08, "loss": 0.2718, "step": 39245 }, { "epoch": 1.8384784747271279, "grad_norm": 0.6446542701303907, "learning_rate": 8.503415470185184e-08, "loss": 0.2801, "step": 39246 }, { "epoch": 1.8385253197170561, "grad_norm": 0.6118666403323388, "learning_rate": 8.498511942945708e-08, "loss": 0.2759, "step": 39247 }, { "epoch": 1.8385721647069846, "grad_norm": 0.6209567942572012, "learning_rate": 8.493609805486636e-08, "loss": 0.2815, "step": 39248 }, { "epoch": 1.838619009696913, "grad_norm": 0.5692766280335105, "learning_rate": 8.488709057836081e-08, "loss": 0.2687, "step": 39249 }, { "epoch": 1.8386658546868413, "grad_norm": 0.5989896696449984, "learning_rate": 8.483809700022328e-08, "loss": 0.2785, "step": 39250 }, { "epoch": 1.8387126996767695, "grad_norm": 0.6357515569380557, "learning_rate": 8.478911732073574e-08, "loss": 0.2783, "step": 39251 }, { "epoch": 1.838759544666698, "grad_norm": 0.5896247672805208, "learning_rate": 8.474015154017939e-08, "loss": 0.262, "step": 39252 }, { "epoch": 1.8388063896566262, "grad_norm": 0.6186102932604896, "learning_rate": 8.469119965883649e-08, "loss": 0.2831, "step": 39253 }, { "epoch": 1.8388532346465545, "grad_norm": 0.5889781954699658, "learning_rate": 8.464226167698902e-08, "loss": 0.2509, "step": 39254 }, { "epoch": 1.838900079636483, "grad_norm": 0.5891922737131362, "learning_rate": 8.459333759491789e-08, "loss": 0.2708, "step": 39255 }, { "epoch": 1.8389469246264112, "grad_norm": 0.6361655301490706, "learning_rate": 8.454442741290509e-08, "loss": 0.2747, "step": 39256 }, { "epoch": 1.8389937696163394, "grad_norm": 0.5635664906609266, "learning_rate": 8.449553113123204e-08, "loss": 0.2602, "step": 39257 }, { "epoch": 1.8390406146062679, "grad_norm": 0.5844037975188139, "learning_rate": 8.444664875017994e-08, "loss": 0.2674, "step": 39258 }, { "epoch": 1.8390874595961964, "grad_norm": 0.6087747246084989, "learning_rate": 8.439778027003049e-08, "loss": 0.2795, "step": 39259 }, { "epoch": 1.8391343045861244, "grad_norm": 0.6873918244401783, "learning_rate": 8.434892569106429e-08, "loss": 0.2971, "step": 39260 }, { "epoch": 1.8391811495760528, "grad_norm": 0.602328407911905, "learning_rate": 8.430008501356335e-08, "loss": 0.2618, "step": 39261 }, { "epoch": 1.8392279945659813, "grad_norm": 0.610309649825345, "learning_rate": 8.4251258237808e-08, "loss": 0.2751, "step": 39262 }, { "epoch": 1.8392748395559095, "grad_norm": 0.6517540810543472, "learning_rate": 8.420244536407968e-08, "loss": 0.2698, "step": 39263 }, { "epoch": 1.8393216845458378, "grad_norm": 0.6210781121485711, "learning_rate": 8.4153646392659e-08, "loss": 0.2818, "step": 39264 }, { "epoch": 1.8393685295357662, "grad_norm": 0.5626320034495187, "learning_rate": 8.410486132382683e-08, "loss": 0.2635, "step": 39265 }, { "epoch": 1.8394153745256945, "grad_norm": 0.5989654022472161, "learning_rate": 8.405609015786381e-08, "loss": 0.2751, "step": 39266 }, { "epoch": 1.8394622195156227, "grad_norm": 0.5719688918155459, "learning_rate": 8.400733289505081e-08, "loss": 0.2575, "step": 39267 }, { "epoch": 1.8395090645055512, "grad_norm": 0.5990107681443388, "learning_rate": 8.395858953566871e-08, "loss": 0.2718, "step": 39268 }, { "epoch": 1.8395559094954794, "grad_norm": 0.6288801193246455, "learning_rate": 8.390986007999757e-08, "loss": 0.28, "step": 39269 }, { "epoch": 1.8396027544854077, "grad_norm": 0.6767519425435479, "learning_rate": 8.386114452831801e-08, "loss": 0.2891, "step": 39270 }, { "epoch": 1.8396495994753361, "grad_norm": 0.6183628694014285, "learning_rate": 8.381244288090978e-08, "loss": 0.2759, "step": 39271 }, { "epoch": 1.8396964444652646, "grad_norm": 0.576871148344616, "learning_rate": 8.376375513805407e-08, "loss": 0.2632, "step": 39272 }, { "epoch": 1.8397432894551926, "grad_norm": 0.6018285378065094, "learning_rate": 8.371508130003037e-08, "loss": 0.2615, "step": 39273 }, { "epoch": 1.839790134445121, "grad_norm": 0.6018582964434288, "learning_rate": 8.366642136711928e-08, "loss": 0.2671, "step": 39274 }, { "epoch": 1.8398369794350495, "grad_norm": 0.5911720735171458, "learning_rate": 8.361777533960058e-08, "loss": 0.2771, "step": 39275 }, { "epoch": 1.8398838244249778, "grad_norm": 0.5635984719436308, "learning_rate": 8.356914321775461e-08, "loss": 0.2596, "step": 39276 }, { "epoch": 1.839930669414906, "grad_norm": 0.660381420530069, "learning_rate": 8.35205250018603e-08, "loss": 0.2827, "step": 39277 }, { "epoch": 1.8399775144048345, "grad_norm": 0.5880639526265469, "learning_rate": 8.347192069219828e-08, "loss": 0.2644, "step": 39278 }, { "epoch": 1.8400243593947627, "grad_norm": 0.6100204212347949, "learning_rate": 8.342333028904775e-08, "loss": 0.2805, "step": 39279 }, { "epoch": 1.840071204384691, "grad_norm": 0.6356172077020898, "learning_rate": 8.337475379268878e-08, "loss": 0.2809, "step": 39280 }, { "epoch": 1.8401180493746194, "grad_norm": 0.5832792786917523, "learning_rate": 8.332619120340085e-08, "loss": 0.2473, "step": 39281 }, { "epoch": 1.8401648943645477, "grad_norm": 0.5730155583214359, "learning_rate": 8.327764252146291e-08, "loss": 0.2471, "step": 39282 }, { "epoch": 1.840211739354476, "grad_norm": 0.560013532840179, "learning_rate": 8.322910774715531e-08, "loss": 0.2443, "step": 39283 }, { "epoch": 1.8402585843444044, "grad_norm": 0.5925240401041552, "learning_rate": 8.318058688075614e-08, "loss": 0.2563, "step": 39284 }, { "epoch": 1.8403054293343328, "grad_norm": 0.6087292550817053, "learning_rate": 8.313207992254546e-08, "loss": 0.274, "step": 39285 }, { "epoch": 1.840352274324261, "grad_norm": 0.5787719104385308, "learning_rate": 8.308358687280222e-08, "loss": 0.2704, "step": 39286 }, { "epoch": 1.8403991193141893, "grad_norm": 0.5649773892041453, "learning_rate": 8.303510773180534e-08, "loss": 0.2531, "step": 39287 }, { "epoch": 1.8404459643041178, "grad_norm": 0.5989157008735965, "learning_rate": 8.298664249983435e-08, "loss": 0.2639, "step": 39288 }, { "epoch": 1.840492809294046, "grad_norm": 0.6081369138244093, "learning_rate": 8.293819117716733e-08, "loss": 0.2762, "step": 39289 }, { "epoch": 1.8405396542839743, "grad_norm": 0.6152438540481807, "learning_rate": 8.288975376408382e-08, "loss": 0.2672, "step": 39290 }, { "epoch": 1.8405864992739027, "grad_norm": 0.5955804198039614, "learning_rate": 8.284133026086189e-08, "loss": 0.2659, "step": 39291 }, { "epoch": 1.840633344263831, "grad_norm": 0.6161197728876113, "learning_rate": 8.279292066778077e-08, "loss": 0.2925, "step": 39292 }, { "epoch": 1.8406801892537592, "grad_norm": 0.6006497983324159, "learning_rate": 8.274452498511859e-08, "loss": 0.2553, "step": 39293 }, { "epoch": 1.8407270342436877, "grad_norm": 0.6428755974880959, "learning_rate": 8.269614321315456e-08, "loss": 0.2692, "step": 39294 }, { "epoch": 1.8407738792336161, "grad_norm": 0.5621596323438631, "learning_rate": 8.26477753521665e-08, "loss": 0.254, "step": 39295 }, { "epoch": 1.8408207242235441, "grad_norm": 0.5678411081754273, "learning_rate": 8.259942140243282e-08, "loss": 0.2569, "step": 39296 }, { "epoch": 1.8408675692134726, "grad_norm": 0.5908445613348149, "learning_rate": 8.255108136423217e-08, "loss": 0.2651, "step": 39297 }, { "epoch": 1.840914414203401, "grad_norm": 0.5767554350574229, "learning_rate": 8.250275523784212e-08, "loss": 0.2666, "step": 39298 }, { "epoch": 1.8409612591933293, "grad_norm": 0.6037031547282591, "learning_rate": 8.245444302354105e-08, "loss": 0.2676, "step": 39299 }, { "epoch": 1.8410081041832576, "grad_norm": 0.5781438846215178, "learning_rate": 8.240614472160735e-08, "loss": 0.2653, "step": 39300 }, { "epoch": 1.841054949173186, "grad_norm": 0.5931144382747422, "learning_rate": 8.23578603323183e-08, "loss": 0.2637, "step": 39301 }, { "epoch": 1.8411017941631143, "grad_norm": 0.614659902190194, "learning_rate": 8.230958985595228e-08, "loss": 0.2804, "step": 39302 }, { "epoch": 1.8411486391530425, "grad_norm": 0.6035517217618762, "learning_rate": 8.226133329278713e-08, "loss": 0.2685, "step": 39303 }, { "epoch": 1.841195484142971, "grad_norm": 0.599658522696257, "learning_rate": 8.221309064310012e-08, "loss": 0.263, "step": 39304 }, { "epoch": 1.8412423291328992, "grad_norm": 0.6315848478260557, "learning_rate": 8.216486190716883e-08, "loss": 0.2947, "step": 39305 }, { "epoch": 1.8412891741228274, "grad_norm": 0.6042123816765266, "learning_rate": 8.211664708527161e-08, "loss": 0.2552, "step": 39306 }, { "epoch": 1.841336019112756, "grad_norm": 0.6121300955101288, "learning_rate": 8.206844617768494e-08, "loss": 0.2722, "step": 39307 }, { "epoch": 1.8413828641026844, "grad_norm": 0.5981954596555709, "learning_rate": 8.202025918468664e-08, "loss": 0.2706, "step": 39308 }, { "epoch": 1.8414297090926124, "grad_norm": 0.5835176611593883, "learning_rate": 8.1972086106554e-08, "loss": 0.2641, "step": 39309 }, { "epoch": 1.8414765540825409, "grad_norm": 0.6211496161289772, "learning_rate": 8.192392694356483e-08, "loss": 0.2782, "step": 39310 }, { "epoch": 1.8415233990724693, "grad_norm": 0.6277076797610798, "learning_rate": 8.187578169599503e-08, "loss": 0.2719, "step": 39311 }, { "epoch": 1.8415702440623976, "grad_norm": 0.6210846107431394, "learning_rate": 8.18276503641227e-08, "loss": 0.2712, "step": 39312 }, { "epoch": 1.8416170890523258, "grad_norm": 0.6257086807586474, "learning_rate": 8.177953294822433e-08, "loss": 0.2831, "step": 39313 }, { "epoch": 1.8416639340422543, "grad_norm": 0.5580127479108008, "learning_rate": 8.173142944857687e-08, "loss": 0.2491, "step": 39314 }, { "epoch": 1.8417107790321825, "grad_norm": 0.5864491046767624, "learning_rate": 8.168333986545735e-08, "loss": 0.2773, "step": 39315 }, { "epoch": 1.8417576240221107, "grad_norm": 0.5491265828725956, "learning_rate": 8.163526419914219e-08, "loss": 0.2411, "step": 39316 }, { "epoch": 1.8418044690120392, "grad_norm": 0.6740520944477877, "learning_rate": 8.158720244990842e-08, "loss": 0.3036, "step": 39317 }, { "epoch": 1.8418513140019674, "grad_norm": 0.6172907939987232, "learning_rate": 8.153915461803275e-08, "loss": 0.2592, "step": 39318 }, { "epoch": 1.8418981589918957, "grad_norm": 0.6040396765298813, "learning_rate": 8.149112070379106e-08, "loss": 0.2647, "step": 39319 }, { "epoch": 1.8419450039818241, "grad_norm": 0.6541103384819088, "learning_rate": 8.144310070746009e-08, "loss": 0.2706, "step": 39320 }, { "epoch": 1.8419918489717526, "grad_norm": 0.6083895630975251, "learning_rate": 8.1395094629316e-08, "loss": 0.2758, "step": 39321 }, { "epoch": 1.8420386939616809, "grad_norm": 0.6293964176684781, "learning_rate": 8.13471024696355e-08, "loss": 0.2762, "step": 39322 }, { "epoch": 1.842085538951609, "grad_norm": 0.562017219068532, "learning_rate": 8.129912422869424e-08, "loss": 0.2585, "step": 39323 }, { "epoch": 1.8421323839415376, "grad_norm": 0.6231560975660333, "learning_rate": 8.125115990676919e-08, "loss": 0.303, "step": 39324 }, { "epoch": 1.8421792289314658, "grad_norm": 0.6593705724342723, "learning_rate": 8.120320950413569e-08, "loss": 0.2922, "step": 39325 }, { "epoch": 1.842226073921394, "grad_norm": 0.6092105949202516, "learning_rate": 8.115527302106935e-08, "loss": 0.2798, "step": 39326 }, { "epoch": 1.8422729189113225, "grad_norm": 0.5791287323172735, "learning_rate": 8.110735045784663e-08, "loss": 0.263, "step": 39327 }, { "epoch": 1.8423197639012507, "grad_norm": 0.5489781792175296, "learning_rate": 8.105944181474284e-08, "loss": 0.2523, "step": 39328 }, { "epoch": 1.842366608891179, "grad_norm": 0.6435238483227627, "learning_rate": 8.101154709203445e-08, "loss": 0.2871, "step": 39329 }, { "epoch": 1.8424134538811074, "grad_norm": 0.5918466854347265, "learning_rate": 8.096366628999653e-08, "loss": 0.275, "step": 39330 }, { "epoch": 1.842460298871036, "grad_norm": 0.6019024378294637, "learning_rate": 8.091579940890465e-08, "loss": 0.2672, "step": 39331 }, { "epoch": 1.842507143860964, "grad_norm": 0.5712506672378999, "learning_rate": 8.086794644903445e-08, "loss": 0.2663, "step": 39332 }, { "epoch": 1.8425539888508924, "grad_norm": 0.6167976879557165, "learning_rate": 8.082010741066098e-08, "loss": 0.2782, "step": 39333 }, { "epoch": 1.8426008338408209, "grad_norm": 0.6099753248149441, "learning_rate": 8.077228229405987e-08, "loss": 0.2705, "step": 39334 }, { "epoch": 1.842647678830749, "grad_norm": 0.5824985959560774, "learning_rate": 8.072447109950615e-08, "loss": 0.2695, "step": 39335 }, { "epoch": 1.8426945238206773, "grad_norm": 0.5729376226559663, "learning_rate": 8.067667382727518e-08, "loss": 0.2507, "step": 39336 }, { "epoch": 1.8427413688106058, "grad_norm": 0.5631892111552494, "learning_rate": 8.062889047764172e-08, "loss": 0.2596, "step": 39337 }, { "epoch": 1.842788213800534, "grad_norm": 0.60235030950406, "learning_rate": 8.058112105088083e-08, "loss": 0.2601, "step": 39338 }, { "epoch": 1.8428350587904623, "grad_norm": 0.5933748957884731, "learning_rate": 8.053336554726787e-08, "loss": 0.2666, "step": 39339 }, { "epoch": 1.8428819037803907, "grad_norm": 0.6040771907913983, "learning_rate": 8.048562396707704e-08, "loss": 0.2746, "step": 39340 }, { "epoch": 1.842928748770319, "grad_norm": 0.6276549608853932, "learning_rate": 8.043789631058313e-08, "loss": 0.269, "step": 39341 }, { "epoch": 1.8429755937602472, "grad_norm": 0.6623689261551494, "learning_rate": 8.039018257806147e-08, "loss": 0.3002, "step": 39342 }, { "epoch": 1.8430224387501757, "grad_norm": 0.6339368992248933, "learning_rate": 8.034248276978573e-08, "loss": 0.2915, "step": 39343 }, { "epoch": 1.8430692837401041, "grad_norm": 0.5881704151781827, "learning_rate": 8.029479688603097e-08, "loss": 0.2656, "step": 39344 }, { "epoch": 1.8431161287300322, "grad_norm": 0.5996766400933783, "learning_rate": 8.02471249270717e-08, "loss": 0.2646, "step": 39345 }, { "epoch": 1.8431629737199606, "grad_norm": 0.573819457794182, "learning_rate": 8.019946689318159e-08, "loss": 0.2503, "step": 39346 }, { "epoch": 1.843209818709889, "grad_norm": 0.5610173267584462, "learning_rate": 8.015182278463567e-08, "loss": 0.2645, "step": 39347 }, { "epoch": 1.8432566636998173, "grad_norm": 0.5815471520434028, "learning_rate": 8.010419260170793e-08, "loss": 0.2637, "step": 39348 }, { "epoch": 1.8433035086897456, "grad_norm": 0.5986255667390884, "learning_rate": 8.005657634467201e-08, "loss": 0.2691, "step": 39349 }, { "epoch": 1.843350353679674, "grad_norm": 0.602439193976143, "learning_rate": 8.000897401380243e-08, "loss": 0.2674, "step": 39350 }, { "epoch": 1.8433971986696023, "grad_norm": 0.6090437222414973, "learning_rate": 7.996138560937283e-08, "loss": 0.2582, "step": 39351 }, { "epoch": 1.8434440436595305, "grad_norm": 0.6026518127823444, "learning_rate": 7.991381113165747e-08, "loss": 0.2767, "step": 39352 }, { "epoch": 1.843490888649459, "grad_norm": 0.622644229178924, "learning_rate": 7.986625058092973e-08, "loss": 0.2838, "step": 39353 }, { "epoch": 1.8435377336393872, "grad_norm": 0.5679852794658553, "learning_rate": 7.981870395746327e-08, "loss": 0.2792, "step": 39354 }, { "epoch": 1.8435845786293155, "grad_norm": 0.6119467742320464, "learning_rate": 7.977117126153233e-08, "loss": 0.27, "step": 39355 }, { "epoch": 1.843631423619244, "grad_norm": 0.5855139317224416, "learning_rate": 7.972365249340946e-08, "loss": 0.264, "step": 39356 }, { "epoch": 1.8436782686091724, "grad_norm": 0.5717151879535494, "learning_rate": 7.967614765336889e-08, "loss": 0.2668, "step": 39357 }, { "epoch": 1.8437251135991006, "grad_norm": 0.6328960067298537, "learning_rate": 7.962865674168374e-08, "loss": 0.279, "step": 39358 }, { "epoch": 1.8437719585890289, "grad_norm": 0.5920746942784848, "learning_rate": 7.95811797586274e-08, "loss": 0.2686, "step": 39359 }, { "epoch": 1.8438188035789573, "grad_norm": 0.5844914848200486, "learning_rate": 7.953371670447297e-08, "loss": 0.2701, "step": 39360 }, { "epoch": 1.8438656485688856, "grad_norm": 0.5940789993008679, "learning_rate": 7.948626757949385e-08, "loss": 0.2618, "step": 39361 }, { "epoch": 1.8439124935588138, "grad_norm": 0.6128278267731061, "learning_rate": 7.94388323839626e-08, "loss": 0.2517, "step": 39362 }, { "epoch": 1.8439593385487423, "grad_norm": 0.5390737376980974, "learning_rate": 7.939141111815236e-08, "loss": 0.2546, "step": 39363 }, { "epoch": 1.8440061835386705, "grad_norm": 0.6339341320119529, "learning_rate": 7.934400378233647e-08, "loss": 0.2572, "step": 39364 }, { "epoch": 1.8440530285285988, "grad_norm": 0.5592198015012312, "learning_rate": 7.929661037678727e-08, "loss": 0.2601, "step": 39365 }, { "epoch": 1.8440998735185272, "grad_norm": 0.6469987538599365, "learning_rate": 7.924923090177784e-08, "loss": 0.2791, "step": 39366 }, { "epoch": 1.8441467185084557, "grad_norm": 0.5807984121317711, "learning_rate": 7.920186535758073e-08, "loss": 0.2681, "step": 39367 }, { "epoch": 1.8441935634983837, "grad_norm": 0.5722410061812728, "learning_rate": 7.915451374446798e-08, "loss": 0.258, "step": 39368 }, { "epoch": 1.8442404084883122, "grad_norm": 0.6380266463903428, "learning_rate": 7.910717606271295e-08, "loss": 0.2669, "step": 39369 }, { "epoch": 1.8442872534782406, "grad_norm": 0.5988650523786898, "learning_rate": 7.90598523125874e-08, "loss": 0.2634, "step": 39370 }, { "epoch": 1.8443340984681689, "grad_norm": 0.5848542223765598, "learning_rate": 7.901254249436386e-08, "loss": 0.2593, "step": 39371 }, { "epoch": 1.844380943458097, "grad_norm": 0.5825063854511652, "learning_rate": 7.896524660831462e-08, "loss": 0.2659, "step": 39372 }, { "epoch": 1.8444277884480256, "grad_norm": 0.6213313517699504, "learning_rate": 7.891796465471224e-08, "loss": 0.2712, "step": 39373 }, { "epoch": 1.8444746334379538, "grad_norm": 0.5574040857981989, "learning_rate": 7.887069663382873e-08, "loss": 0.2529, "step": 39374 }, { "epoch": 1.844521478427882, "grad_norm": 0.6094353644085951, "learning_rate": 7.882344254593527e-08, "loss": 0.2626, "step": 39375 }, { "epoch": 1.8445683234178105, "grad_norm": 0.6494981893834697, "learning_rate": 7.87762023913044e-08, "loss": 0.2908, "step": 39376 }, { "epoch": 1.8446151684077388, "grad_norm": 0.5591028028765099, "learning_rate": 7.872897617020786e-08, "loss": 0.2663, "step": 39377 }, { "epoch": 1.844662013397667, "grad_norm": 0.6245527061735259, "learning_rate": 7.868176388291765e-08, "loss": 0.2807, "step": 39378 }, { "epoch": 1.8447088583875955, "grad_norm": 0.6294629528540596, "learning_rate": 7.86345655297055e-08, "loss": 0.2713, "step": 39379 }, { "epoch": 1.844755703377524, "grad_norm": 0.5798011704338676, "learning_rate": 7.858738111084258e-08, "loss": 0.272, "step": 39380 }, { "epoch": 1.844802548367452, "grad_norm": 0.640982841213555, "learning_rate": 7.854021062660089e-08, "loss": 0.2872, "step": 39381 }, { "epoch": 1.8448493933573804, "grad_norm": 0.6728794083295611, "learning_rate": 7.849305407725133e-08, "loss": 0.2611, "step": 39382 }, { "epoch": 1.8448962383473089, "grad_norm": 0.6188516014861493, "learning_rate": 7.84459114630659e-08, "loss": 0.2654, "step": 39383 }, { "epoch": 1.844943083337237, "grad_norm": 0.5801011326788557, "learning_rate": 7.839878278431551e-08, "loss": 0.2662, "step": 39384 }, { "epoch": 1.8449899283271654, "grad_norm": 0.5944161830128721, "learning_rate": 7.835166804127159e-08, "loss": 0.2614, "step": 39385 }, { "epoch": 1.8450367733170938, "grad_norm": 0.5986876453386291, "learning_rate": 7.830456723420504e-08, "loss": 0.2877, "step": 39386 }, { "epoch": 1.845083618307022, "grad_norm": 0.6226112085737663, "learning_rate": 7.825748036338704e-08, "loss": 0.2651, "step": 39387 }, { "epoch": 1.8451304632969503, "grad_norm": 0.6506855051307557, "learning_rate": 7.821040742908875e-08, "loss": 0.2747, "step": 39388 }, { "epoch": 1.8451773082868788, "grad_norm": 0.5832047110108942, "learning_rate": 7.816334843158052e-08, "loss": 0.2507, "step": 39389 }, { "epoch": 1.845224153276807, "grad_norm": 0.5740308606373421, "learning_rate": 7.81163033711338e-08, "loss": 0.2672, "step": 39390 }, { "epoch": 1.8452709982667352, "grad_norm": 0.580489082395037, "learning_rate": 7.806927224801919e-08, "loss": 0.2818, "step": 39391 }, { "epoch": 1.8453178432566637, "grad_norm": 0.6159327531853638, "learning_rate": 7.802225506250676e-08, "loss": 0.2602, "step": 39392 }, { "epoch": 1.8453646882465922, "grad_norm": 0.6140662069331312, "learning_rate": 7.79752518148677e-08, "loss": 0.2766, "step": 39393 }, { "epoch": 1.8454115332365204, "grad_norm": 0.5724893660212746, "learning_rate": 7.792826250537233e-08, "loss": 0.282, "step": 39394 }, { "epoch": 1.8454583782264486, "grad_norm": 0.62920568682208, "learning_rate": 7.788128713429099e-08, "loss": 0.2739, "step": 39395 }, { "epoch": 1.845505223216377, "grad_norm": 0.5764947414648023, "learning_rate": 7.783432570189403e-08, "loss": 0.2579, "step": 39396 }, { "epoch": 1.8455520682063054, "grad_norm": 0.5802517524237124, "learning_rate": 7.778737820845206e-08, "loss": 0.2543, "step": 39397 }, { "epoch": 1.8455989131962336, "grad_norm": 0.6280702202109377, "learning_rate": 7.774044465423458e-08, "loss": 0.2551, "step": 39398 }, { "epoch": 1.845645758186162, "grad_norm": 0.5925774711992141, "learning_rate": 7.769352503951221e-08, "loss": 0.2695, "step": 39399 }, { "epoch": 1.8456926031760903, "grad_norm": 0.6029788152736658, "learning_rate": 7.764661936455447e-08, "loss": 0.2664, "step": 39400 }, { "epoch": 1.8457394481660185, "grad_norm": 0.6161830937659513, "learning_rate": 7.759972762963197e-08, "loss": 0.2754, "step": 39401 }, { "epoch": 1.845786293155947, "grad_norm": 0.5879175871723855, "learning_rate": 7.755284983501393e-08, "loss": 0.274, "step": 39402 }, { "epoch": 1.8458331381458755, "grad_norm": 0.5740313335087639, "learning_rate": 7.75059859809707e-08, "loss": 0.2744, "step": 39403 }, { "epoch": 1.8458799831358035, "grad_norm": 0.635387630291161, "learning_rate": 7.74591360677715e-08, "loss": 0.2776, "step": 39404 }, { "epoch": 1.845926828125732, "grad_norm": 0.5910199757569361, "learning_rate": 7.741230009568584e-08, "loss": 0.2724, "step": 39405 }, { "epoch": 1.8459736731156604, "grad_norm": 0.5969667765101515, "learning_rate": 7.736547806498379e-08, "loss": 0.2758, "step": 39406 }, { "epoch": 1.8460205181055886, "grad_norm": 0.5913612875218859, "learning_rate": 7.731866997593428e-08, "loss": 0.2703, "step": 39407 }, { "epoch": 1.8460673630955169, "grad_norm": 0.6443313815888564, "learning_rate": 7.727187582880713e-08, "loss": 0.2854, "step": 39408 }, { "epoch": 1.8461142080854454, "grad_norm": 0.5898015244137477, "learning_rate": 7.722509562387153e-08, "loss": 0.2532, "step": 39409 }, { "epoch": 1.8461610530753736, "grad_norm": 0.5825894032963149, "learning_rate": 7.717832936139646e-08, "loss": 0.2733, "step": 39410 }, { "epoch": 1.8462078980653018, "grad_norm": 0.6048974246053911, "learning_rate": 7.713157704165087e-08, "loss": 0.2791, "step": 39411 }, { "epoch": 1.8462547430552303, "grad_norm": 0.5960229452407035, "learning_rate": 7.708483866490424e-08, "loss": 0.2634, "step": 39412 }, { "epoch": 1.8463015880451585, "grad_norm": 0.5679948652676543, "learning_rate": 7.703811423142526e-08, "loss": 0.2685, "step": 39413 }, { "epoch": 1.8463484330350868, "grad_norm": 0.5575393225066844, "learning_rate": 7.69914037414829e-08, "loss": 0.2722, "step": 39414 }, { "epoch": 1.8463952780250152, "grad_norm": 0.5811404615037872, "learning_rate": 7.694470719534635e-08, "loss": 0.2661, "step": 39415 }, { "epoch": 1.8464421230149437, "grad_norm": 0.574030352691785, "learning_rate": 7.689802459328404e-08, "loss": 0.258, "step": 39416 }, { "epoch": 1.8464889680048717, "grad_norm": 0.5882621414495572, "learning_rate": 7.685135593556436e-08, "loss": 0.2683, "step": 39417 }, { "epoch": 1.8465358129948002, "grad_norm": 0.6065014328602756, "learning_rate": 7.680470122245598e-08, "loss": 0.2782, "step": 39418 }, { "epoch": 1.8465826579847286, "grad_norm": 0.5919771052717493, "learning_rate": 7.675806045422757e-08, "loss": 0.2729, "step": 39419 }, { "epoch": 1.846629502974657, "grad_norm": 0.5868233038003602, "learning_rate": 7.671143363114753e-08, "loss": 0.2755, "step": 39420 }, { "epoch": 1.8466763479645851, "grad_norm": 0.6326633990965017, "learning_rate": 7.666482075348397e-08, "loss": 0.2845, "step": 39421 }, { "epoch": 1.8467231929545136, "grad_norm": 0.6395466771916519, "learning_rate": 7.661822182150558e-08, "loss": 0.2701, "step": 39422 }, { "epoch": 1.8467700379444418, "grad_norm": 0.5649719816252285, "learning_rate": 7.657163683548047e-08, "loss": 0.2632, "step": 39423 }, { "epoch": 1.84681688293437, "grad_norm": 0.6190789354786039, "learning_rate": 7.652506579567593e-08, "loss": 0.2856, "step": 39424 }, { "epoch": 1.8468637279242985, "grad_norm": 0.5634616198304693, "learning_rate": 7.647850870236061e-08, "loss": 0.2566, "step": 39425 }, { "epoch": 1.8469105729142268, "grad_norm": 0.5818403591437968, "learning_rate": 7.64319655558024e-08, "loss": 0.2624, "step": 39426 }, { "epoch": 1.846957417904155, "grad_norm": 0.5711261079013953, "learning_rate": 7.638543635626883e-08, "loss": 0.2573, "step": 39427 }, { "epoch": 1.8470042628940835, "grad_norm": 0.6007583583120982, "learning_rate": 7.633892110402857e-08, "loss": 0.2656, "step": 39428 }, { "epoch": 1.847051107884012, "grad_norm": 0.6050429182445415, "learning_rate": 7.629241979934809e-08, "loss": 0.2613, "step": 39429 }, { "epoch": 1.8470979528739402, "grad_norm": 0.619929631264408, "learning_rate": 7.624593244249606e-08, "loss": 0.2637, "step": 39430 }, { "epoch": 1.8471447978638684, "grad_norm": 0.5686562167629775, "learning_rate": 7.619945903373893e-08, "loss": 0.2651, "step": 39431 }, { "epoch": 1.847191642853797, "grad_norm": 0.6412859342928919, "learning_rate": 7.615299957334483e-08, "loss": 0.2787, "step": 39432 }, { "epoch": 1.8472384878437251, "grad_norm": 0.564765691682898, "learning_rate": 7.61065540615813e-08, "loss": 0.2449, "step": 39433 }, { "epoch": 1.8472853328336534, "grad_norm": 0.6161584199646599, "learning_rate": 7.606012249871536e-08, "loss": 0.2695, "step": 39434 }, { "epoch": 1.8473321778235818, "grad_norm": 0.5941561199872227, "learning_rate": 7.601370488501375e-08, "loss": 0.2888, "step": 39435 }, { "epoch": 1.84737902281351, "grad_norm": 0.5569882736405318, "learning_rate": 7.59673012207443e-08, "loss": 0.2595, "step": 39436 }, { "epoch": 1.8474258678034383, "grad_norm": 0.6205059609812609, "learning_rate": 7.592091150617375e-08, "loss": 0.2743, "step": 39437 }, { "epoch": 1.8474727127933668, "grad_norm": 0.5867808351867159, "learning_rate": 7.587453574156912e-08, "loss": 0.2643, "step": 39438 }, { "epoch": 1.8475195577832952, "grad_norm": 0.6150906865731722, "learning_rate": 7.58281739271971e-08, "loss": 0.2578, "step": 39439 }, { "epoch": 1.8475664027732233, "grad_norm": 0.6049702211851585, "learning_rate": 7.578182606332502e-08, "loss": 0.277, "step": 39440 }, { "epoch": 1.8476132477631517, "grad_norm": 0.6175164358516273, "learning_rate": 7.573549215021875e-08, "loss": 0.2769, "step": 39441 }, { "epoch": 1.8476600927530802, "grad_norm": 0.6306372179390276, "learning_rate": 7.568917218814559e-08, "loss": 0.2873, "step": 39442 }, { "epoch": 1.8477069377430084, "grad_norm": 0.5936366642021063, "learning_rate": 7.564286617737226e-08, "loss": 0.2723, "step": 39443 }, { "epoch": 1.8477537827329367, "grad_norm": 0.5524883097235113, "learning_rate": 7.55965741181644e-08, "loss": 0.2692, "step": 39444 }, { "epoch": 1.8478006277228651, "grad_norm": 0.6076953084779493, "learning_rate": 7.5550296010789e-08, "loss": 0.2695, "step": 39445 }, { "epoch": 1.8478474727127934, "grad_norm": 0.5737690697116145, "learning_rate": 7.550403185551253e-08, "loss": 0.2658, "step": 39446 }, { "epoch": 1.8478943177027216, "grad_norm": 0.5945662034223931, "learning_rate": 7.545778165260087e-08, "loss": 0.2644, "step": 39447 }, { "epoch": 1.84794116269265, "grad_norm": 0.5820760702633136, "learning_rate": 7.54115454023202e-08, "loss": 0.264, "step": 39448 }, { "epoch": 1.8479880076825783, "grad_norm": 0.5822643270819166, "learning_rate": 7.536532310493672e-08, "loss": 0.2667, "step": 39449 }, { "epoch": 1.8480348526725066, "grad_norm": 0.5152133687508853, "learning_rate": 7.531911476071658e-08, "loss": 0.2476, "step": 39450 }, { "epoch": 1.848081697662435, "grad_norm": 0.5943847868918846, "learning_rate": 7.527292036992511e-08, "loss": 0.2791, "step": 39451 }, { "epoch": 1.8481285426523635, "grad_norm": 0.5717204752540839, "learning_rate": 7.522673993282908e-08, "loss": 0.2646, "step": 39452 }, { "epoch": 1.8481753876422915, "grad_norm": 0.647177911830244, "learning_rate": 7.518057344969326e-08, "loss": 0.2936, "step": 39453 }, { "epoch": 1.84822223263222, "grad_norm": 0.6048876343724235, "learning_rate": 7.513442092078382e-08, "loss": 0.2823, "step": 39454 }, { "epoch": 1.8482690776221484, "grad_norm": 0.6505252794617788, "learning_rate": 7.508828234636639e-08, "loss": 0.2901, "step": 39455 }, { "epoch": 1.8483159226120767, "grad_norm": 0.6295782268474667, "learning_rate": 7.504215772670631e-08, "loss": 0.2773, "step": 39456 }, { "epoch": 1.848362767602005, "grad_norm": 0.5664578662868921, "learning_rate": 7.499604706206948e-08, "loss": 0.2643, "step": 39457 }, { "epoch": 1.8484096125919334, "grad_norm": 0.6247812935693488, "learning_rate": 7.494995035272095e-08, "loss": 0.2677, "step": 39458 }, { "epoch": 1.8484564575818616, "grad_norm": 0.6175428457238548, "learning_rate": 7.490386759892581e-08, "loss": 0.2757, "step": 39459 }, { "epoch": 1.8485033025717899, "grad_norm": 0.5960728205879587, "learning_rate": 7.485779880094912e-08, "loss": 0.2558, "step": 39460 }, { "epoch": 1.8485501475617183, "grad_norm": 0.6072940667399217, "learning_rate": 7.481174395905622e-08, "loss": 0.2659, "step": 39461 }, { "epoch": 1.8485969925516466, "grad_norm": 0.6143035100044986, "learning_rate": 7.476570307351244e-08, "loss": 0.272, "step": 39462 }, { "epoch": 1.8486438375415748, "grad_norm": 0.6212139260183035, "learning_rate": 7.47196761445823e-08, "loss": 0.2746, "step": 39463 }, { "epoch": 1.8486906825315033, "grad_norm": 0.5603638067478824, "learning_rate": 7.467366317253117e-08, "loss": 0.2626, "step": 39464 }, { "epoch": 1.8487375275214317, "grad_norm": 0.6245187688132471, "learning_rate": 7.462766415762351e-08, "loss": 0.2767, "step": 39465 }, { "epoch": 1.84878437251136, "grad_norm": 0.6418025235693684, "learning_rate": 7.458167910012387e-08, "loss": 0.2805, "step": 39466 }, { "epoch": 1.8488312175012882, "grad_norm": 0.6406542564008556, "learning_rate": 7.453570800029675e-08, "loss": 0.283, "step": 39467 }, { "epoch": 1.8488780624912167, "grad_norm": 0.6743402991615769, "learning_rate": 7.448975085840748e-08, "loss": 0.2989, "step": 39468 }, { "epoch": 1.848924907481145, "grad_norm": 0.5797974887363886, "learning_rate": 7.444380767471975e-08, "loss": 0.2731, "step": 39469 }, { "epoch": 1.8489717524710731, "grad_norm": 0.5872536167336538, "learning_rate": 7.439787844949864e-08, "loss": 0.2554, "step": 39470 }, { "epoch": 1.8490185974610016, "grad_norm": 0.6042713215468334, "learning_rate": 7.435196318300781e-08, "loss": 0.2764, "step": 39471 }, { "epoch": 1.8490654424509299, "grad_norm": 0.5863236850425214, "learning_rate": 7.430606187551203e-08, "loss": 0.2598, "step": 39472 }, { "epoch": 1.849112287440858, "grad_norm": 0.5979999387021666, "learning_rate": 7.426017452727474e-08, "loss": 0.285, "step": 39473 }, { "epoch": 1.8491591324307866, "grad_norm": 0.6162414693962355, "learning_rate": 7.42143011385607e-08, "loss": 0.2705, "step": 39474 }, { "epoch": 1.849205977420715, "grad_norm": 0.5768906933566543, "learning_rate": 7.41684417096336e-08, "loss": 0.2585, "step": 39475 }, { "epoch": 1.849252822410643, "grad_norm": 0.6394411328690112, "learning_rate": 7.412259624075768e-08, "loss": 0.2936, "step": 39476 }, { "epoch": 1.8492996674005715, "grad_norm": 0.5940405829009485, "learning_rate": 7.407676473219604e-08, "loss": 0.2629, "step": 39477 }, { "epoch": 1.8493465123905, "grad_norm": 0.6041196409165098, "learning_rate": 7.403094718421322e-08, "loss": 0.2748, "step": 39478 }, { "epoch": 1.8493933573804282, "grad_norm": 0.6000050477485769, "learning_rate": 7.39851435970726e-08, "loss": 0.2828, "step": 39479 }, { "epoch": 1.8494402023703564, "grad_norm": 0.6230610983749396, "learning_rate": 7.393935397103757e-08, "loss": 0.2861, "step": 39480 }, { "epoch": 1.849487047360285, "grad_norm": 0.5984404656543352, "learning_rate": 7.389357830637156e-08, "loss": 0.2771, "step": 39481 }, { "epoch": 1.8495338923502131, "grad_norm": 0.6326696075566243, "learning_rate": 7.384781660333878e-08, "loss": 0.287, "step": 39482 }, { "epoch": 1.8495807373401414, "grad_norm": 0.5889032915875988, "learning_rate": 7.380206886220153e-08, "loss": 0.2552, "step": 39483 }, { "epoch": 1.8496275823300699, "grad_norm": 0.6382110168774038, "learning_rate": 7.375633508322378e-08, "loss": 0.2758, "step": 39484 }, { "epoch": 1.849674427319998, "grad_norm": 0.6019545856870262, "learning_rate": 7.371061526666861e-08, "loss": 0.2655, "step": 39485 }, { "epoch": 1.8497212723099263, "grad_norm": 0.566689332808872, "learning_rate": 7.366490941279892e-08, "loss": 0.253, "step": 39486 }, { "epoch": 1.8497681172998548, "grad_norm": 0.660845740584634, "learning_rate": 7.361921752187779e-08, "loss": 0.2969, "step": 39487 }, { "epoch": 1.8498149622897833, "grad_norm": 0.593541325566154, "learning_rate": 7.357353959416807e-08, "loss": 0.2677, "step": 39488 }, { "epoch": 1.8498618072797113, "grad_norm": 0.6144063069382514, "learning_rate": 7.352787562993319e-08, "loss": 0.2767, "step": 39489 }, { "epoch": 1.8499086522696397, "grad_norm": 0.5565965746846385, "learning_rate": 7.348222562943514e-08, "loss": 0.248, "step": 39490 }, { "epoch": 1.8499554972595682, "grad_norm": 0.6148145911487449, "learning_rate": 7.343658959293704e-08, "loss": 0.2798, "step": 39491 }, { "epoch": 1.8500023422494964, "grad_norm": 0.637031687383985, "learning_rate": 7.339096752070201e-08, "loss": 0.2938, "step": 39492 }, { "epoch": 1.8500491872394247, "grad_norm": 0.6294252246621375, "learning_rate": 7.334535941299154e-08, "loss": 0.281, "step": 39493 }, { "epoch": 1.8500960322293531, "grad_norm": 0.5779227237794051, "learning_rate": 7.329976527006843e-08, "loss": 0.2677, "step": 39494 }, { "epoch": 1.8501428772192814, "grad_norm": 0.6409525740739577, "learning_rate": 7.325418509219584e-08, "loss": 0.2771, "step": 39495 }, { "epoch": 1.8501897222092096, "grad_norm": 0.6346908689993104, "learning_rate": 7.320861887963494e-08, "loss": 0.268, "step": 39496 }, { "epoch": 1.850236567199138, "grad_norm": 0.6314190595857633, "learning_rate": 7.316306663264854e-08, "loss": 0.281, "step": 39497 }, { "epoch": 1.8502834121890663, "grad_norm": 0.6054908839411194, "learning_rate": 7.3117528351499e-08, "loss": 0.2836, "step": 39498 }, { "epoch": 1.8503302571789946, "grad_norm": 0.5641695015644208, "learning_rate": 7.307200403644798e-08, "loss": 0.265, "step": 39499 }, { "epoch": 1.850377102168923, "grad_norm": 0.6032876496791242, "learning_rate": 7.302649368775754e-08, "loss": 0.274, "step": 39500 }, { "epoch": 1.8504239471588515, "grad_norm": 0.6171037906530604, "learning_rate": 7.298099730568997e-08, "loss": 0.2597, "step": 39501 }, { "epoch": 1.8504707921487797, "grad_norm": 0.5736892675299857, "learning_rate": 7.293551489050643e-08, "loss": 0.2557, "step": 39502 }, { "epoch": 1.850517637138708, "grad_norm": 0.5601659526584142, "learning_rate": 7.289004644246894e-08, "loss": 0.2567, "step": 39503 }, { "epoch": 1.8505644821286364, "grad_norm": 0.6047060751836904, "learning_rate": 7.284459196183924e-08, "loss": 0.2666, "step": 39504 }, { "epoch": 1.8506113271185647, "grad_norm": 0.6095013816327175, "learning_rate": 7.279915144887878e-08, "loss": 0.2625, "step": 39505 }, { "epoch": 1.850658172108493, "grad_norm": 0.5883703295404438, "learning_rate": 7.275372490384929e-08, "loss": 0.2723, "step": 39506 }, { "epoch": 1.8507050170984214, "grad_norm": 0.5722668558868003, "learning_rate": 7.270831232701225e-08, "loss": 0.2678, "step": 39507 }, { "epoch": 1.8507518620883496, "grad_norm": 0.6027172836349758, "learning_rate": 7.266291371862854e-08, "loss": 0.2564, "step": 39508 }, { "epoch": 1.8507987070782779, "grad_norm": 0.608623916474929, "learning_rate": 7.261752907895964e-08, "loss": 0.2778, "step": 39509 }, { "epoch": 1.8508455520682063, "grad_norm": 0.5792080542304664, "learning_rate": 7.25721584082667e-08, "loss": 0.261, "step": 39510 }, { "epoch": 1.8508923970581348, "grad_norm": 0.6041982172482336, "learning_rate": 7.252680170681092e-08, "loss": 0.2656, "step": 39511 }, { "epoch": 1.8509392420480628, "grad_norm": 0.6211926767755735, "learning_rate": 7.248145897485348e-08, "loss": 0.2646, "step": 39512 }, { "epoch": 1.8509860870379913, "grad_norm": 0.6061644030191177, "learning_rate": 7.2436130212655e-08, "loss": 0.2757, "step": 39513 }, { "epoch": 1.8510329320279197, "grad_norm": 0.5587004805782818, "learning_rate": 7.239081542047665e-08, "loss": 0.2479, "step": 39514 }, { "epoch": 1.851079777017848, "grad_norm": 0.6001792581298481, "learning_rate": 7.23455145985788e-08, "loss": 0.2585, "step": 39515 }, { "epoch": 1.8511266220077762, "grad_norm": 0.5815829219330337, "learning_rate": 7.230022774722234e-08, "loss": 0.2785, "step": 39516 }, { "epoch": 1.8511734669977047, "grad_norm": 0.6120272064601382, "learning_rate": 7.22549548666679e-08, "loss": 0.2765, "step": 39517 }, { "epoch": 1.851220311987633, "grad_norm": 0.6205373783824059, "learning_rate": 7.22096959571758e-08, "loss": 0.2729, "step": 39518 }, { "epoch": 1.8512671569775612, "grad_norm": 0.5986158318588756, "learning_rate": 7.216445101900699e-08, "loss": 0.2683, "step": 39519 }, { "epoch": 1.8513140019674896, "grad_norm": 0.5868427018357506, "learning_rate": 7.21192200524215e-08, "loss": 0.2743, "step": 39520 }, { "epoch": 1.8513608469574179, "grad_norm": 0.6407218501444583, "learning_rate": 7.20740030576797e-08, "loss": 0.2774, "step": 39521 }, { "epoch": 1.8514076919473461, "grad_norm": 0.5949070058141417, "learning_rate": 7.202880003504165e-08, "loss": 0.266, "step": 39522 }, { "epoch": 1.8514545369372746, "grad_norm": 0.6042772697887147, "learning_rate": 7.19836109847677e-08, "loss": 0.2816, "step": 39523 }, { "epoch": 1.851501381927203, "grad_norm": 0.6260751582991873, "learning_rate": 7.193843590711763e-08, "loss": 0.2664, "step": 39524 }, { "epoch": 1.851548226917131, "grad_norm": 0.5880240323536248, "learning_rate": 7.189327480235181e-08, "loss": 0.2708, "step": 39525 }, { "epoch": 1.8515950719070595, "grad_norm": 0.6116905665812855, "learning_rate": 7.184812767072974e-08, "loss": 0.2659, "step": 39526 }, { "epoch": 1.851641916896988, "grad_norm": 0.5695153761651883, "learning_rate": 7.180299451251149e-08, "loss": 0.2591, "step": 39527 }, { "epoch": 1.8516887618869162, "grad_norm": 0.6216368356465883, "learning_rate": 7.175787532795686e-08, "loss": 0.2752, "step": 39528 }, { "epoch": 1.8517356068768445, "grad_norm": 0.6124899066719964, "learning_rate": 7.171277011732508e-08, "loss": 0.2693, "step": 39529 }, { "epoch": 1.851782451866773, "grad_norm": 0.5924156280114784, "learning_rate": 7.166767888087623e-08, "loss": 0.2815, "step": 39530 }, { "epoch": 1.8518292968567012, "grad_norm": 0.586301163421329, "learning_rate": 7.162260161886952e-08, "loss": 0.2687, "step": 39531 }, { "epoch": 1.8518761418466294, "grad_norm": 0.6311037114345344, "learning_rate": 7.157753833156422e-08, "loss": 0.2878, "step": 39532 }, { "epoch": 1.8519229868365579, "grad_norm": 0.6084759114210562, "learning_rate": 7.153248901921983e-08, "loss": 0.2751, "step": 39533 }, { "epoch": 1.8519698318264861, "grad_norm": 0.5969467152306445, "learning_rate": 7.148745368209586e-08, "loss": 0.2769, "step": 39534 }, { "epoch": 1.8520166768164144, "grad_norm": 0.602427664439061, "learning_rate": 7.144243232045073e-08, "loss": 0.2778, "step": 39535 }, { "epoch": 1.8520635218063428, "grad_norm": 0.5875507928413162, "learning_rate": 7.139742493454421e-08, "loss": 0.269, "step": 39536 }, { "epoch": 1.8521103667962713, "grad_norm": 0.6158462830605652, "learning_rate": 7.135243152463556e-08, "loss": 0.2676, "step": 39537 }, { "epoch": 1.8521572117861995, "grad_norm": 0.5905470800376807, "learning_rate": 7.13074520909826e-08, "loss": 0.273, "step": 39538 }, { "epoch": 1.8522040567761278, "grad_norm": 0.6384692736523824, "learning_rate": 7.126248663384517e-08, "loss": 0.2704, "step": 39539 }, { "epoch": 1.8522509017660562, "grad_norm": 0.5681011900609668, "learning_rate": 7.121753515348134e-08, "loss": 0.2403, "step": 39540 }, { "epoch": 1.8522977467559845, "grad_norm": 0.6206228157263535, "learning_rate": 7.117259765015067e-08, "loss": 0.2606, "step": 39541 }, { "epoch": 1.8523445917459127, "grad_norm": 0.5993306842119903, "learning_rate": 7.112767412411098e-08, "loss": 0.2725, "step": 39542 }, { "epoch": 1.8523914367358412, "grad_norm": 0.5613822156633455, "learning_rate": 7.108276457562124e-08, "loss": 0.2599, "step": 39543 }, { "epoch": 1.8524382817257694, "grad_norm": 0.5837812493179144, "learning_rate": 7.10378690049393e-08, "loss": 0.2696, "step": 39544 }, { "epoch": 1.8524851267156976, "grad_norm": 0.5926115869494963, "learning_rate": 7.099298741232414e-08, "loss": 0.2579, "step": 39545 }, { "epoch": 1.8525319717056261, "grad_norm": 0.5897558597289474, "learning_rate": 7.094811979803356e-08, "loss": 0.2798, "step": 39546 }, { "epoch": 1.8525788166955546, "grad_norm": 0.5857601052566354, "learning_rate": 7.090326616232629e-08, "loss": 0.2618, "step": 39547 }, { "epoch": 1.8526256616854826, "grad_norm": 0.6148212838043826, "learning_rate": 7.085842650546043e-08, "loss": 0.2595, "step": 39548 }, { "epoch": 1.852672506675411, "grad_norm": 0.6095120086432837, "learning_rate": 7.081360082769384e-08, "loss": 0.265, "step": 39549 }, { "epoch": 1.8527193516653395, "grad_norm": 0.6092277984158816, "learning_rate": 7.076878912928409e-08, "loss": 0.2922, "step": 39550 }, { "epoch": 1.8527661966552678, "grad_norm": 0.5854857547156909, "learning_rate": 7.07239914104893e-08, "loss": 0.2712, "step": 39551 }, { "epoch": 1.852813041645196, "grad_norm": 0.6050316857622582, "learning_rate": 7.067920767156732e-08, "loss": 0.2722, "step": 39552 }, { "epoch": 1.8528598866351245, "grad_norm": 0.5982209164544204, "learning_rate": 7.063443791277602e-08, "loss": 0.2587, "step": 39553 }, { "epoch": 1.8529067316250527, "grad_norm": 0.6396335170098089, "learning_rate": 7.058968213437295e-08, "loss": 0.2722, "step": 39554 }, { "epoch": 1.852953576614981, "grad_norm": 0.6167758203643283, "learning_rate": 7.054494033661596e-08, "loss": 0.2825, "step": 39555 }, { "epoch": 1.8530004216049094, "grad_norm": 0.5783896795087945, "learning_rate": 7.050021251976208e-08, "loss": 0.276, "step": 39556 }, { "epoch": 1.8530472665948376, "grad_norm": 0.5827153204399215, "learning_rate": 7.04554986840686e-08, "loss": 0.2721, "step": 39557 }, { "epoch": 1.853094111584766, "grad_norm": 0.6964657157622278, "learning_rate": 7.041079882979334e-08, "loss": 0.2846, "step": 39558 }, { "epoch": 1.8531409565746944, "grad_norm": 0.6043354020908565, "learning_rate": 7.03661129571931e-08, "loss": 0.2776, "step": 39559 }, { "epoch": 1.8531878015646228, "grad_norm": 0.603975976193684, "learning_rate": 7.03214410665251e-08, "loss": 0.2708, "step": 39560 }, { "epoch": 1.8532346465545508, "grad_norm": 0.577158037686676, "learning_rate": 7.027678315804671e-08, "loss": 0.2633, "step": 39561 }, { "epoch": 1.8532814915444793, "grad_norm": 0.5872524912830956, "learning_rate": 7.023213923201489e-08, "loss": 0.248, "step": 39562 }, { "epoch": 1.8533283365344078, "grad_norm": 0.598019773526494, "learning_rate": 7.018750928868639e-08, "loss": 0.2699, "step": 39563 }, { "epoch": 1.853375181524336, "grad_norm": 0.6087337791668455, "learning_rate": 7.014289332831769e-08, "loss": 0.2841, "step": 39564 }, { "epoch": 1.8534220265142642, "grad_norm": 0.597212348166132, "learning_rate": 7.009829135116608e-08, "loss": 0.2577, "step": 39565 }, { "epoch": 1.8534688715041927, "grad_norm": 0.6006372714657028, "learning_rate": 7.005370335748829e-08, "loss": 0.2789, "step": 39566 }, { "epoch": 1.853515716494121, "grad_norm": 0.584781622854406, "learning_rate": 7.000912934754023e-08, "loss": 0.2788, "step": 39567 }, { "epoch": 1.8535625614840492, "grad_norm": 0.5986968305020728, "learning_rate": 6.996456932157946e-08, "loss": 0.2757, "step": 39568 }, { "epoch": 1.8536094064739776, "grad_norm": 0.5904556580513599, "learning_rate": 6.992002327986164e-08, "loss": 0.2583, "step": 39569 }, { "epoch": 1.853656251463906, "grad_norm": 0.6207168940396118, "learning_rate": 6.98754912226432e-08, "loss": 0.2935, "step": 39570 }, { "epoch": 1.8537030964538341, "grad_norm": 0.6226201220335236, "learning_rate": 6.983097315018062e-08, "loss": 0.2753, "step": 39571 }, { "epoch": 1.8537499414437626, "grad_norm": 0.6188017607650281, "learning_rate": 6.978646906272979e-08, "loss": 0.2709, "step": 39572 }, { "epoch": 1.853796786433691, "grad_norm": 0.5630620040725778, "learning_rate": 6.974197896054718e-08, "loss": 0.2546, "step": 39573 }, { "epoch": 1.8538436314236193, "grad_norm": 0.584571126243313, "learning_rate": 6.969750284388871e-08, "loss": 0.2677, "step": 39574 }, { "epoch": 1.8538904764135475, "grad_norm": 0.6146744985847318, "learning_rate": 6.965304071300999e-08, "loss": 0.2827, "step": 39575 }, { "epoch": 1.853937321403476, "grad_norm": 0.59079308741108, "learning_rate": 6.960859256816749e-08, "loss": 0.2543, "step": 39576 }, { "epoch": 1.8539841663934042, "grad_norm": 0.5639157586809821, "learning_rate": 6.956415840961656e-08, "loss": 0.2648, "step": 39577 }, { "epoch": 1.8540310113833325, "grad_norm": 0.5740216704632933, "learning_rate": 6.95197382376131e-08, "loss": 0.2549, "step": 39578 }, { "epoch": 1.854077856373261, "grad_norm": 0.5801708126907605, "learning_rate": 6.947533205241247e-08, "loss": 0.2859, "step": 39579 }, { "epoch": 1.8541247013631892, "grad_norm": 0.6520051432193458, "learning_rate": 6.943093985427086e-08, "loss": 0.2826, "step": 39580 }, { "epoch": 1.8541715463531174, "grad_norm": 0.5743544791013578, "learning_rate": 6.938656164344276e-08, "loss": 0.2532, "step": 39581 }, { "epoch": 1.854218391343046, "grad_norm": 0.5534248051131517, "learning_rate": 6.934219742018439e-08, "loss": 0.2663, "step": 39582 }, { "epoch": 1.8542652363329744, "grad_norm": 0.6271282143141302, "learning_rate": 6.92978471847508e-08, "loss": 0.2739, "step": 39583 }, { "epoch": 1.8543120813229024, "grad_norm": 0.5968311441002466, "learning_rate": 6.925351093739679e-08, "loss": 0.2783, "step": 39584 }, { "epoch": 1.8543589263128308, "grad_norm": 0.5993855470937063, "learning_rate": 6.920918867837829e-08, "loss": 0.2629, "step": 39585 }, { "epoch": 1.8544057713027593, "grad_norm": 0.5970884783547901, "learning_rate": 6.916488040794978e-08, "loss": 0.2743, "step": 39586 }, { "epoch": 1.8544526162926875, "grad_norm": 0.6249666061305126, "learning_rate": 6.912058612636635e-08, "loss": 0.2561, "step": 39587 }, { "epoch": 1.8544994612826158, "grad_norm": 0.6415164170290262, "learning_rate": 6.907630583388309e-08, "loss": 0.2769, "step": 39588 }, { "epoch": 1.8545463062725442, "grad_norm": 0.5663840232129315, "learning_rate": 6.903203953075449e-08, "loss": 0.2693, "step": 39589 }, { "epoch": 1.8545931512624725, "grad_norm": 0.5798479294967711, "learning_rate": 6.898778721723592e-08, "loss": 0.2737, "step": 39590 }, { "epoch": 1.8546399962524007, "grad_norm": 0.5776354560235063, "learning_rate": 6.894354889358135e-08, "loss": 0.2614, "step": 39591 }, { "epoch": 1.8546868412423292, "grad_norm": 0.6076173250389637, "learning_rate": 6.889932456004583e-08, "loss": 0.2724, "step": 39592 }, { "epoch": 1.8547336862322574, "grad_norm": 0.5709507061499272, "learning_rate": 6.885511421688335e-08, "loss": 0.2815, "step": 39593 }, { "epoch": 1.8547805312221857, "grad_norm": 0.6290979784169688, "learning_rate": 6.881091786434868e-08, "loss": 0.2929, "step": 39594 }, { "epoch": 1.8548273762121141, "grad_norm": 0.6234836773779745, "learning_rate": 6.876673550269608e-08, "loss": 0.281, "step": 39595 }, { "epoch": 1.8548742212020426, "grad_norm": 0.5410608326420943, "learning_rate": 6.872256713218006e-08, "loss": 0.245, "step": 39596 }, { "epoch": 1.8549210661919706, "grad_norm": 0.5571055842881272, "learning_rate": 6.867841275305459e-08, "loss": 0.2469, "step": 39597 }, { "epoch": 1.854967911181899, "grad_norm": 0.6139775475787916, "learning_rate": 6.863427236557391e-08, "loss": 0.2517, "step": 39598 }, { "epoch": 1.8550147561718275, "grad_norm": 0.5785081685028779, "learning_rate": 6.85901459699917e-08, "loss": 0.2511, "step": 39599 }, { "epoch": 1.8550616011617558, "grad_norm": 0.6173432264301845, "learning_rate": 6.854603356656193e-08, "loss": 0.2806, "step": 39600 }, { "epoch": 1.855108446151684, "grad_norm": 0.5816684450946272, "learning_rate": 6.850193515553855e-08, "loss": 0.2619, "step": 39601 }, { "epoch": 1.8551552911416125, "grad_norm": 0.624936831129078, "learning_rate": 6.845785073717553e-08, "loss": 0.2711, "step": 39602 }, { "epoch": 1.8552021361315407, "grad_norm": 0.5873907822726616, "learning_rate": 6.841378031172658e-08, "loss": 0.2659, "step": 39603 }, { "epoch": 1.855248981121469, "grad_norm": 0.6024381145920386, "learning_rate": 6.836972387944507e-08, "loss": 0.2724, "step": 39604 }, { "epoch": 1.8552958261113974, "grad_norm": 0.5878227787751542, "learning_rate": 6.832568144058499e-08, "loss": 0.2564, "step": 39605 }, { "epoch": 1.8553426711013257, "grad_norm": 0.6028965430240335, "learning_rate": 6.82816529953989e-08, "loss": 0.2623, "step": 39606 }, { "epoch": 1.855389516091254, "grad_norm": 0.555044958574678, "learning_rate": 6.823763854414078e-08, "loss": 0.2449, "step": 39607 }, { "epoch": 1.8554363610811824, "grad_norm": 0.5397537311541905, "learning_rate": 6.819363808706403e-08, "loss": 0.2557, "step": 39608 }, { "epoch": 1.8554832060711108, "grad_norm": 0.5662900165652641, "learning_rate": 6.81496516244215e-08, "loss": 0.2666, "step": 39609 }, { "epoch": 1.855530051061039, "grad_norm": 0.5517662195496283, "learning_rate": 6.810567915646688e-08, "loss": 0.2559, "step": 39610 }, { "epoch": 1.8555768960509673, "grad_norm": 0.6230085934210909, "learning_rate": 6.806172068345246e-08, "loss": 0.2607, "step": 39611 }, { "epoch": 1.8556237410408958, "grad_norm": 0.6279951616422674, "learning_rate": 6.801777620563194e-08, "loss": 0.2796, "step": 39612 }, { "epoch": 1.855670586030824, "grad_norm": 0.551387885281356, "learning_rate": 6.79738457232576e-08, "loss": 0.2468, "step": 39613 }, { "epoch": 1.8557174310207523, "grad_norm": 0.5740432130124057, "learning_rate": 6.792992923658232e-08, "loss": 0.2673, "step": 39614 }, { "epoch": 1.8557642760106807, "grad_norm": 0.6609819531373378, "learning_rate": 6.788602674585921e-08, "loss": 0.29, "step": 39615 }, { "epoch": 1.855811121000609, "grad_norm": 0.6589948676278089, "learning_rate": 6.784213825134112e-08, "loss": 0.2795, "step": 39616 }, { "epoch": 1.8558579659905372, "grad_norm": 0.6094180652944969, "learning_rate": 6.77982637532798e-08, "loss": 0.2816, "step": 39617 }, { "epoch": 1.8559048109804657, "grad_norm": 0.5778710685802934, "learning_rate": 6.775440325192812e-08, "loss": 0.2717, "step": 39618 }, { "epoch": 1.8559516559703941, "grad_norm": 0.5722678421371655, "learning_rate": 6.771055674753862e-08, "loss": 0.2593, "step": 39619 }, { "epoch": 1.8559985009603222, "grad_norm": 0.5997477554026972, "learning_rate": 6.766672424036364e-08, "loss": 0.2751, "step": 39620 }, { "epoch": 1.8560453459502506, "grad_norm": 0.6045511674508732, "learning_rate": 6.762290573065517e-08, "loss": 0.2663, "step": 39621 }, { "epoch": 1.856092190940179, "grad_norm": 0.5791384826985786, "learning_rate": 6.757910121866579e-08, "loss": 0.2559, "step": 39622 }, { "epoch": 1.8561390359301073, "grad_norm": 0.5890631766836639, "learning_rate": 6.753531070464698e-08, "loss": 0.2891, "step": 39623 }, { "epoch": 1.8561858809200356, "grad_norm": 0.6222500112831026, "learning_rate": 6.749153418885102e-08, "loss": 0.2946, "step": 39624 }, { "epoch": 1.856232725909964, "grad_norm": 0.6026161838454108, "learning_rate": 6.744777167153022e-08, "loss": 0.2793, "step": 39625 }, { "epoch": 1.8562795708998923, "grad_norm": 0.5978501216113227, "learning_rate": 6.740402315293604e-08, "loss": 0.2779, "step": 39626 }, { "epoch": 1.8563264158898205, "grad_norm": 0.5928341765094238, "learning_rate": 6.736028863332022e-08, "loss": 0.2679, "step": 39627 }, { "epoch": 1.856373260879749, "grad_norm": 0.615175992138107, "learning_rate": 6.731656811293424e-08, "loss": 0.2744, "step": 39628 }, { "epoch": 1.8564201058696772, "grad_norm": 0.5846849119528365, "learning_rate": 6.727286159203039e-08, "loss": 0.2735, "step": 39629 }, { "epoch": 1.8564669508596054, "grad_norm": 0.5960813383532633, "learning_rate": 6.722916907085958e-08, "loss": 0.2541, "step": 39630 }, { "epoch": 1.856513795849534, "grad_norm": 0.6226932956002871, "learning_rate": 6.718549054967355e-08, "loss": 0.2706, "step": 39631 }, { "epoch": 1.8565606408394624, "grad_norm": 0.588245305718597, "learning_rate": 6.714182602872377e-08, "loss": 0.2674, "step": 39632 }, { "epoch": 1.8566074858293904, "grad_norm": 0.606102825952645, "learning_rate": 6.709817550826087e-08, "loss": 0.2713, "step": 39633 }, { "epoch": 1.8566543308193189, "grad_norm": 0.5879991973779698, "learning_rate": 6.705453898853658e-08, "loss": 0.2713, "step": 39634 }, { "epoch": 1.8567011758092473, "grad_norm": 0.5827461346768338, "learning_rate": 6.701091646980213e-08, "loss": 0.2587, "step": 39635 }, { "epoch": 1.8567480207991756, "grad_norm": 0.6084127630928512, "learning_rate": 6.696730795230838e-08, "loss": 0.2759, "step": 39636 }, { "epoch": 1.8567948657891038, "grad_norm": 0.6133394195771262, "learning_rate": 6.6923713436306e-08, "loss": 0.2664, "step": 39637 }, { "epoch": 1.8568417107790323, "grad_norm": 0.569292383655261, "learning_rate": 6.688013292204615e-08, "loss": 0.2641, "step": 39638 }, { "epoch": 1.8568885557689605, "grad_norm": 0.5795371572943869, "learning_rate": 6.683656640977976e-08, "loss": 0.2731, "step": 39639 }, { "epoch": 1.8569354007588887, "grad_norm": 0.6203711602052037, "learning_rate": 6.679301389975718e-08, "loss": 0.2699, "step": 39640 }, { "epoch": 1.8569822457488172, "grad_norm": 0.6507806676423101, "learning_rate": 6.674947539222959e-08, "loss": 0.2963, "step": 39641 }, { "epoch": 1.8570290907387454, "grad_norm": 0.6706358678311848, "learning_rate": 6.67059508874468e-08, "loss": 0.2805, "step": 39642 }, { "epoch": 1.8570759357286737, "grad_norm": 0.5917964127549386, "learning_rate": 6.666244038565973e-08, "loss": 0.2764, "step": 39643 }, { "epoch": 1.8571227807186022, "grad_norm": 0.5998314272850069, "learning_rate": 6.66189438871187e-08, "loss": 0.2763, "step": 39644 }, { "epoch": 1.8571696257085306, "grad_norm": 0.6056222464004971, "learning_rate": 6.65754613920741e-08, "loss": 0.2757, "step": 39645 }, { "epoch": 1.8572164706984589, "grad_norm": 0.595116227681567, "learning_rate": 6.653199290077628e-08, "loss": 0.2677, "step": 39646 }, { "epoch": 1.857263315688387, "grad_norm": 0.5662115932906773, "learning_rate": 6.648853841347531e-08, "loss": 0.2648, "step": 39647 }, { "epoch": 1.8573101606783156, "grad_norm": 0.5942972336476318, "learning_rate": 6.64450979304207e-08, "loss": 0.2672, "step": 39648 }, { "epoch": 1.8573570056682438, "grad_norm": 0.6278311541543808, "learning_rate": 6.64016714518631e-08, "loss": 0.2763, "step": 39649 }, { "epoch": 1.857403850658172, "grad_norm": 0.58609606241488, "learning_rate": 6.635825897805231e-08, "loss": 0.2684, "step": 39650 }, { "epoch": 1.8574506956481005, "grad_norm": 0.6187587491456366, "learning_rate": 6.631486050923785e-08, "loss": 0.2759, "step": 39651 }, { "epoch": 1.8574975406380287, "grad_norm": 0.5823012107576324, "learning_rate": 6.62714760456698e-08, "loss": 0.2558, "step": 39652 }, { "epoch": 1.857544385627957, "grad_norm": 0.6182158363915861, "learning_rate": 6.622810558759795e-08, "loss": 0.2738, "step": 39653 }, { "epoch": 1.8575912306178854, "grad_norm": 0.6430044649935912, "learning_rate": 6.618474913527157e-08, "loss": 0.2759, "step": 39654 }, { "epoch": 1.857638075607814, "grad_norm": 0.6053443614619766, "learning_rate": 6.614140668894015e-08, "loss": 0.288, "step": 39655 }, { "epoch": 1.857684920597742, "grad_norm": 0.6309486609186897, "learning_rate": 6.609807824885295e-08, "loss": 0.2643, "step": 39656 }, { "epoch": 1.8577317655876704, "grad_norm": 0.5660994287293308, "learning_rate": 6.605476381525977e-08, "loss": 0.2632, "step": 39657 }, { "epoch": 1.8577786105775989, "grad_norm": 0.6224366953235739, "learning_rate": 6.601146338840959e-08, "loss": 0.2694, "step": 39658 }, { "epoch": 1.857825455567527, "grad_norm": 0.6150391646756416, "learning_rate": 6.596817696855217e-08, "loss": 0.2916, "step": 39659 }, { "epoch": 1.8578723005574553, "grad_norm": 0.5457181012825273, "learning_rate": 6.592490455593542e-08, "loss": 0.2566, "step": 39660 }, { "epoch": 1.8579191455473838, "grad_norm": 0.6463831516109937, "learning_rate": 6.588164615080967e-08, "loss": 0.2842, "step": 39661 }, { "epoch": 1.857965990537312, "grad_norm": 0.6266914503991864, "learning_rate": 6.583840175342304e-08, "loss": 0.2631, "step": 39662 }, { "epoch": 1.8580128355272403, "grad_norm": 0.5932197728069837, "learning_rate": 6.579517136402424e-08, "loss": 0.2717, "step": 39663 }, { "epoch": 1.8580596805171687, "grad_norm": 0.5706373745868145, "learning_rate": 6.57519549828628e-08, "loss": 0.2479, "step": 39664 }, { "epoch": 1.858106525507097, "grad_norm": 0.5910469438458428, "learning_rate": 6.570875261018711e-08, "loss": 0.2573, "step": 39665 }, { "epoch": 1.8581533704970252, "grad_norm": 0.589026652049614, "learning_rate": 6.566556424624532e-08, "loss": 0.2617, "step": 39666 }, { "epoch": 1.8582002154869537, "grad_norm": 0.5816857722609781, "learning_rate": 6.562238989128666e-08, "loss": 0.2826, "step": 39667 }, { "epoch": 1.8582470604768822, "grad_norm": 0.6334090803888587, "learning_rate": 6.557922954555929e-08, "loss": 0.2786, "step": 39668 }, { "epoch": 1.8582939054668102, "grad_norm": 0.5922006515974246, "learning_rate": 6.553608320931159e-08, "loss": 0.2596, "step": 39669 }, { "epoch": 1.8583407504567386, "grad_norm": 0.5775201404317425, "learning_rate": 6.549295088279173e-08, "loss": 0.2697, "step": 39670 }, { "epoch": 1.858387595446667, "grad_norm": 0.5848650537928517, "learning_rate": 6.54498325662481e-08, "loss": 0.2702, "step": 39671 }, { "epoch": 1.8584344404365953, "grad_norm": 0.5721578974094256, "learning_rate": 6.540672825992883e-08, "loss": 0.2741, "step": 39672 }, { "epoch": 1.8584812854265236, "grad_norm": 0.5931416414311982, "learning_rate": 6.536363796408179e-08, "loss": 0.2643, "step": 39673 }, { "epoch": 1.858528130416452, "grad_norm": 0.5712812269686717, "learning_rate": 6.532056167895512e-08, "loss": 0.2506, "step": 39674 }, { "epoch": 1.8585749754063803, "grad_norm": 0.6031202046379933, "learning_rate": 6.527749940479667e-08, "loss": 0.2834, "step": 39675 }, { "epoch": 1.8586218203963085, "grad_norm": 0.6325241074941808, "learning_rate": 6.523445114185428e-08, "loss": 0.2771, "step": 39676 }, { "epoch": 1.858668665386237, "grad_norm": 0.6399542818749147, "learning_rate": 6.519141689037612e-08, "loss": 0.2627, "step": 39677 }, { "epoch": 1.8587155103761652, "grad_norm": 0.5502152376805238, "learning_rate": 6.514839665060891e-08, "loss": 0.2491, "step": 39678 }, { "epoch": 1.8587623553660935, "grad_norm": 0.6024093838888595, "learning_rate": 6.51053904228005e-08, "loss": 0.2666, "step": 39679 }, { "epoch": 1.858809200356022, "grad_norm": 0.5869996584984444, "learning_rate": 6.506239820719878e-08, "loss": 0.2572, "step": 39680 }, { "epoch": 1.8588560453459504, "grad_norm": 0.6250262230548349, "learning_rate": 6.501942000405132e-08, "loss": 0.2686, "step": 39681 }, { "epoch": 1.8589028903358786, "grad_norm": 0.5784286215694607, "learning_rate": 6.497645581360456e-08, "loss": 0.2568, "step": 39682 }, { "epoch": 1.8589497353258069, "grad_norm": 0.6182154173439499, "learning_rate": 6.493350563610696e-08, "loss": 0.288, "step": 39683 }, { "epoch": 1.8589965803157353, "grad_norm": 0.5823945177068696, "learning_rate": 6.489056947180439e-08, "loss": 0.263, "step": 39684 }, { "epoch": 1.8590434253056636, "grad_norm": 0.6212882312656776, "learning_rate": 6.484764732094473e-08, "loss": 0.2885, "step": 39685 }, { "epoch": 1.8590902702955918, "grad_norm": 0.546910435357108, "learning_rate": 6.480473918377473e-08, "loss": 0.2508, "step": 39686 }, { "epoch": 1.8591371152855203, "grad_norm": 0.5733481516617225, "learning_rate": 6.476184506054139e-08, "loss": 0.2724, "step": 39687 }, { "epoch": 1.8591839602754485, "grad_norm": 0.5967919368509833, "learning_rate": 6.471896495149176e-08, "loss": 0.2681, "step": 39688 }, { "epoch": 1.8592308052653768, "grad_norm": 0.6089892723326911, "learning_rate": 6.467609885687232e-08, "loss": 0.2695, "step": 39689 }, { "epoch": 1.8592776502553052, "grad_norm": 0.5733611631697544, "learning_rate": 6.463324677692979e-08, "loss": 0.2615, "step": 39690 }, { "epoch": 1.8593244952452337, "grad_norm": 0.6085998994764282, "learning_rate": 6.459040871191063e-08, "loss": 0.2691, "step": 39691 }, { "epoch": 1.8593713402351617, "grad_norm": 0.5910570869617273, "learning_rate": 6.45475846620619e-08, "loss": 0.2692, "step": 39692 }, { "epoch": 1.8594181852250902, "grad_norm": 0.6707324563049485, "learning_rate": 6.450477462762949e-08, "loss": 0.2725, "step": 39693 }, { "epoch": 1.8594650302150186, "grad_norm": 0.5733894687138108, "learning_rate": 6.44619786088599e-08, "loss": 0.2735, "step": 39694 }, { "epoch": 1.8595118752049469, "grad_norm": 0.6214892511358975, "learning_rate": 6.441919660599954e-08, "loss": 0.279, "step": 39695 }, { "epoch": 1.8595587201948751, "grad_norm": 0.6139800834361985, "learning_rate": 6.437642861929494e-08, "loss": 0.2687, "step": 39696 }, { "epoch": 1.8596055651848036, "grad_norm": 0.5564179960496944, "learning_rate": 6.433367464899142e-08, "loss": 0.2729, "step": 39697 }, { "epoch": 1.8596524101747318, "grad_norm": 0.5924610975665789, "learning_rate": 6.429093469533521e-08, "loss": 0.2817, "step": 39698 }, { "epoch": 1.85969925516466, "grad_norm": 0.6138676820506188, "learning_rate": 6.424820875857274e-08, "loss": 0.2805, "step": 39699 }, { "epoch": 1.8597461001545885, "grad_norm": 0.609474763037427, "learning_rate": 6.420549683894967e-08, "loss": 0.2743, "step": 39700 }, { "epoch": 1.8597929451445168, "grad_norm": 0.5346681606184198, "learning_rate": 6.416279893671162e-08, "loss": 0.2536, "step": 39701 }, { "epoch": 1.859839790134445, "grad_norm": 0.6065691722344145, "learning_rate": 6.412011505210453e-08, "loss": 0.2784, "step": 39702 }, { "epoch": 1.8598866351243735, "grad_norm": 0.6016037674035466, "learning_rate": 6.407744518537428e-08, "loss": 0.2581, "step": 39703 }, { "epoch": 1.859933480114302, "grad_norm": 0.5594218261371645, "learning_rate": 6.403478933676543e-08, "loss": 0.2462, "step": 39704 }, { "epoch": 1.85998032510423, "grad_norm": 0.6020972847708708, "learning_rate": 6.399214750652444e-08, "loss": 0.2802, "step": 39705 }, { "epoch": 1.8600271700941584, "grad_norm": 0.5608616759518084, "learning_rate": 6.394951969489638e-08, "loss": 0.2567, "step": 39706 }, { "epoch": 1.8600740150840869, "grad_norm": 0.5936688605730772, "learning_rate": 6.390690590212634e-08, "loss": 0.2664, "step": 39707 }, { "epoch": 1.8601208600740151, "grad_norm": 0.637895850054575, "learning_rate": 6.386430612846023e-08, "loss": 0.2611, "step": 39708 }, { "epoch": 1.8601677050639434, "grad_norm": 0.6265799400187745, "learning_rate": 6.38217203741423e-08, "loss": 0.2804, "step": 39709 }, { "epoch": 1.8602145500538718, "grad_norm": 0.5942847680303156, "learning_rate": 6.377914863941847e-08, "loss": 0.2754, "step": 39710 }, { "epoch": 1.8602613950438, "grad_norm": 0.6143862348998596, "learning_rate": 6.373659092453299e-08, "loss": 0.2868, "step": 39711 }, { "epoch": 1.8603082400337283, "grad_norm": 0.5665406677530296, "learning_rate": 6.369404722973122e-08, "loss": 0.2557, "step": 39712 }, { "epoch": 1.8603550850236568, "grad_norm": 0.608403043254855, "learning_rate": 6.365151755525767e-08, "loss": 0.2705, "step": 39713 }, { "epoch": 1.860401930013585, "grad_norm": 0.6138613745466412, "learning_rate": 6.360900190135743e-08, "loss": 0.2787, "step": 39714 }, { "epoch": 1.8604487750035132, "grad_norm": 0.6322258559105468, "learning_rate": 6.356650026827504e-08, "loss": 0.2892, "step": 39715 }, { "epoch": 1.8604956199934417, "grad_norm": 0.555605224235935, "learning_rate": 6.352401265625501e-08, "loss": 0.2419, "step": 39716 }, { "epoch": 1.8605424649833702, "grad_norm": 0.6515103518562687, "learning_rate": 6.348153906554216e-08, "loss": 0.2844, "step": 39717 }, { "epoch": 1.8605893099732984, "grad_norm": 0.6109371510669886, "learning_rate": 6.343907949638046e-08, "loss": 0.278, "step": 39718 }, { "epoch": 1.8606361549632267, "grad_norm": 0.5655100604686264, "learning_rate": 6.339663394901413e-08, "loss": 0.2522, "step": 39719 }, { "epoch": 1.8606829999531551, "grad_norm": 0.5623348567253061, "learning_rate": 6.335420242368828e-08, "loss": 0.2555, "step": 39720 }, { "epoch": 1.8607298449430834, "grad_norm": 0.5953845602181281, "learning_rate": 6.331178492064632e-08, "loss": 0.2758, "step": 39721 }, { "epoch": 1.8607766899330116, "grad_norm": 0.6042725655936992, "learning_rate": 6.326938144013251e-08, "loss": 0.2746, "step": 39722 }, { "epoch": 1.86082353492294, "grad_norm": 0.5875720393190625, "learning_rate": 6.322699198239135e-08, "loss": 0.2612, "step": 39723 }, { "epoch": 1.8608703799128683, "grad_norm": 0.5978828352414657, "learning_rate": 6.3184616547666e-08, "loss": 0.2703, "step": 39724 }, { "epoch": 1.8609172249027965, "grad_norm": 0.5800672367633489, "learning_rate": 6.31422551362007e-08, "loss": 0.2636, "step": 39725 }, { "epoch": 1.860964069892725, "grad_norm": 0.5785383664515741, "learning_rate": 6.30999077482397e-08, "loss": 0.2736, "step": 39726 }, { "epoch": 1.8610109148826535, "grad_norm": 0.5729015584639156, "learning_rate": 6.305757438402588e-08, "loss": 0.2654, "step": 39727 }, { "epoch": 1.8610577598725815, "grad_norm": 0.6068998855822192, "learning_rate": 6.301525504380318e-08, "loss": 0.2801, "step": 39728 }, { "epoch": 1.86110460486251, "grad_norm": 0.6293766379214428, "learning_rate": 6.297294972781504e-08, "loss": 0.2841, "step": 39729 }, { "epoch": 1.8611514498524384, "grad_norm": 0.5761146110940428, "learning_rate": 6.293065843630541e-08, "loss": 0.2707, "step": 39730 }, { "epoch": 1.8611982948423667, "grad_norm": 0.619111752210402, "learning_rate": 6.288838116951717e-08, "loss": 0.2876, "step": 39731 }, { "epoch": 1.861245139832295, "grad_norm": 0.6053081048044917, "learning_rate": 6.284611792769374e-08, "loss": 0.269, "step": 39732 }, { "epoch": 1.8612919848222234, "grad_norm": 0.628851845356747, "learning_rate": 6.280386871107824e-08, "loss": 0.2781, "step": 39733 }, { "epoch": 1.8613388298121516, "grad_norm": 0.5660096988867774, "learning_rate": 6.27616335199141e-08, "loss": 0.2571, "step": 39734 }, { "epoch": 1.8613856748020798, "grad_norm": 0.6381141037414666, "learning_rate": 6.27194123544439e-08, "loss": 0.2729, "step": 39735 }, { "epoch": 1.8614325197920083, "grad_norm": 0.6033130380112275, "learning_rate": 6.267720521491105e-08, "loss": 0.2758, "step": 39736 }, { "epoch": 1.8614793647819365, "grad_norm": 0.6172686298654652, "learning_rate": 6.263501210155843e-08, "loss": 0.2566, "step": 39737 }, { "epoch": 1.8615262097718648, "grad_norm": 0.5628000091205432, "learning_rate": 6.259283301462887e-08, "loss": 0.2586, "step": 39738 }, { "epoch": 1.8615730547617932, "grad_norm": 0.6100212692904944, "learning_rate": 6.255066795436443e-08, "loss": 0.2832, "step": 39739 }, { "epoch": 1.8616198997517217, "grad_norm": 0.5625444287877568, "learning_rate": 6.250851692100852e-08, "loss": 0.2564, "step": 39740 }, { "epoch": 1.8616667447416497, "grad_norm": 0.5927216661697121, "learning_rate": 6.246637991480341e-08, "loss": 0.2781, "step": 39741 }, { "epoch": 1.8617135897315782, "grad_norm": 0.5975223631115124, "learning_rate": 6.242425693599146e-08, "loss": 0.2551, "step": 39742 }, { "epoch": 1.8617604347215067, "grad_norm": 0.6113251578746957, "learning_rate": 6.238214798481551e-08, "loss": 0.2766, "step": 39743 }, { "epoch": 1.861807279711435, "grad_norm": 0.616214937330539, "learning_rate": 6.234005306151758e-08, "loss": 0.2677, "step": 39744 }, { "epoch": 1.8618541247013631, "grad_norm": 0.6638854156302241, "learning_rate": 6.229797216634026e-08, "loss": 0.2976, "step": 39745 }, { "epoch": 1.8619009696912916, "grad_norm": 0.5637488083114004, "learning_rate": 6.225590529952502e-08, "loss": 0.2634, "step": 39746 }, { "epoch": 1.8619478146812198, "grad_norm": 0.6112016210394124, "learning_rate": 6.221385246131418e-08, "loss": 0.276, "step": 39747 }, { "epoch": 1.861994659671148, "grad_norm": 0.6272540605667075, "learning_rate": 6.21718136519503e-08, "loss": 0.2733, "step": 39748 }, { "epoch": 1.8620415046610765, "grad_norm": 0.5813442130293263, "learning_rate": 6.212978887167459e-08, "loss": 0.2522, "step": 39749 }, { "epoch": 1.8620883496510048, "grad_norm": 0.6469980034613824, "learning_rate": 6.208777812072964e-08, "loss": 0.27, "step": 39750 }, { "epoch": 1.862135194640933, "grad_norm": 0.5832352333089756, "learning_rate": 6.204578139935635e-08, "loss": 0.2658, "step": 39751 }, { "epoch": 1.8621820396308615, "grad_norm": 0.5827640654101767, "learning_rate": 6.200379870779705e-08, "loss": 0.2649, "step": 39752 }, { "epoch": 1.86222888462079, "grad_norm": 0.5658635636052052, "learning_rate": 6.19618300462932e-08, "loss": 0.2463, "step": 39753 }, { "epoch": 1.8622757296107182, "grad_norm": 0.6172708870811476, "learning_rate": 6.191987541508598e-08, "loss": 0.2757, "step": 39754 }, { "epoch": 1.8623225746006464, "grad_norm": 0.6064028743015398, "learning_rate": 6.187793481441717e-08, "loss": 0.2571, "step": 39755 }, { "epoch": 1.862369419590575, "grad_norm": 0.5860268204356301, "learning_rate": 6.183600824452824e-08, "loss": 0.2588, "step": 39756 }, { "epoch": 1.8624162645805031, "grad_norm": 0.5591380493754606, "learning_rate": 6.179409570566008e-08, "loss": 0.2673, "step": 39757 }, { "epoch": 1.8624631095704314, "grad_norm": 0.5987102937554246, "learning_rate": 6.175219719805391e-08, "loss": 0.2629, "step": 39758 }, { "epoch": 1.8625099545603598, "grad_norm": 0.6017500772085208, "learning_rate": 6.17103127219515e-08, "loss": 0.2816, "step": 39759 }, { "epoch": 1.862556799550288, "grad_norm": 0.6028521316117846, "learning_rate": 6.166844227759289e-08, "loss": 0.2805, "step": 39760 }, { "epoch": 1.8626036445402163, "grad_norm": 0.576860797454601, "learning_rate": 6.162658586521986e-08, "loss": 0.2633, "step": 39761 }, { "epoch": 1.8626504895301448, "grad_norm": 0.5766445169365971, "learning_rate": 6.158474348507277e-08, "loss": 0.2702, "step": 39762 }, { "epoch": 1.8626973345200732, "grad_norm": 0.6082904790238822, "learning_rate": 6.15429151373928e-08, "loss": 0.2744, "step": 39763 }, { "epoch": 1.8627441795100013, "grad_norm": 0.619933463450353, "learning_rate": 6.150110082242034e-08, "loss": 0.2829, "step": 39764 }, { "epoch": 1.8627910244999297, "grad_norm": 0.5678794433152707, "learning_rate": 6.145930054039628e-08, "loss": 0.2512, "step": 39765 }, { "epoch": 1.8628378694898582, "grad_norm": 0.5815040215312693, "learning_rate": 6.141751429156101e-08, "loss": 0.2646, "step": 39766 }, { "epoch": 1.8628847144797864, "grad_norm": 0.5815586848692865, "learning_rate": 6.137574207615488e-08, "loss": 0.2586, "step": 39767 }, { "epoch": 1.8629315594697147, "grad_norm": 0.5789425202128087, "learning_rate": 6.133398389441853e-08, "loss": 0.2729, "step": 39768 }, { "epoch": 1.8629784044596431, "grad_norm": 0.5722368205190657, "learning_rate": 6.129223974659232e-08, "loss": 0.2582, "step": 39769 }, { "epoch": 1.8630252494495714, "grad_norm": 0.6132505603446604, "learning_rate": 6.125050963291607e-08, "loss": 0.2611, "step": 39770 }, { "epoch": 1.8630720944394996, "grad_norm": 0.6267197017485646, "learning_rate": 6.120879355363012e-08, "loss": 0.2832, "step": 39771 }, { "epoch": 1.863118939429428, "grad_norm": 0.6229629203272116, "learning_rate": 6.116709150897516e-08, "loss": 0.2799, "step": 39772 }, { "epoch": 1.8631657844193563, "grad_norm": 0.6054878848482577, "learning_rate": 6.112540349919011e-08, "loss": 0.271, "step": 39773 }, { "epoch": 1.8632126294092846, "grad_norm": 0.5887188949184218, "learning_rate": 6.108372952451536e-08, "loss": 0.2742, "step": 39774 }, { "epoch": 1.863259474399213, "grad_norm": 0.5939795833553155, "learning_rate": 6.104206958519099e-08, "loss": 0.2664, "step": 39775 }, { "epoch": 1.8633063193891415, "grad_norm": 0.5831637249296463, "learning_rate": 6.100042368145626e-08, "loss": 0.2758, "step": 39776 }, { "epoch": 1.8633531643790695, "grad_norm": 0.6274932674167851, "learning_rate": 6.095879181355125e-08, "loss": 0.2772, "step": 39777 }, { "epoch": 1.863400009368998, "grad_norm": 0.5631756723624735, "learning_rate": 6.09171739817152e-08, "loss": 0.2571, "step": 39778 }, { "epoch": 1.8634468543589264, "grad_norm": 0.5906082791039677, "learning_rate": 6.087557018618795e-08, "loss": 0.253, "step": 39779 }, { "epoch": 1.8634936993488547, "grad_norm": 0.5788374674072542, "learning_rate": 6.083398042720872e-08, "loss": 0.2733, "step": 39780 }, { "epoch": 1.863540544338783, "grad_norm": 0.6095491710730755, "learning_rate": 6.07924047050168e-08, "loss": 0.2803, "step": 39781 }, { "epoch": 1.8635873893287114, "grad_norm": 0.6210529780524904, "learning_rate": 6.075084301985168e-08, "loss": 0.2888, "step": 39782 }, { "epoch": 1.8636342343186396, "grad_norm": 0.5726421787544265, "learning_rate": 6.070929537195209e-08, "loss": 0.2739, "step": 39783 }, { "epoch": 1.8636810793085679, "grad_norm": 0.5671081353029604, "learning_rate": 6.066776176155753e-08, "loss": 0.2681, "step": 39784 }, { "epoch": 1.8637279242984963, "grad_norm": 0.6284041975630525, "learning_rate": 6.062624218890672e-08, "loss": 0.2652, "step": 39785 }, { "epoch": 1.8637747692884246, "grad_norm": 0.6293050872386676, "learning_rate": 6.05847366542392e-08, "loss": 0.2837, "step": 39786 }, { "epoch": 1.8638216142783528, "grad_norm": 0.5637272495449801, "learning_rate": 6.054324515779336e-08, "loss": 0.2565, "step": 39787 }, { "epoch": 1.8638684592682813, "grad_norm": 0.5975719865903567, "learning_rate": 6.050176769980765e-08, "loss": 0.2623, "step": 39788 }, { "epoch": 1.8639153042582097, "grad_norm": 0.5972784298982715, "learning_rate": 6.04603042805213e-08, "loss": 0.2751, "step": 39789 }, { "epoch": 1.8639621492481377, "grad_norm": 0.5837195335695038, "learning_rate": 6.041885490017274e-08, "loss": 0.2627, "step": 39790 }, { "epoch": 1.8640089942380662, "grad_norm": 0.6485297525006202, "learning_rate": 6.03774195590004e-08, "loss": 0.2802, "step": 39791 }, { "epoch": 1.8640558392279947, "grad_norm": 0.5658029519036787, "learning_rate": 6.033599825724295e-08, "loss": 0.2529, "step": 39792 }, { "epoch": 1.864102684217923, "grad_norm": 0.6223925976455138, "learning_rate": 6.029459099513857e-08, "loss": 0.2644, "step": 39793 }, { "epoch": 1.8641495292078512, "grad_norm": 0.6221121351521457, "learning_rate": 6.025319777292593e-08, "loss": 0.2841, "step": 39794 }, { "epoch": 1.8641963741977796, "grad_norm": 0.6126622713721142, "learning_rate": 6.021181859084263e-08, "loss": 0.2724, "step": 39795 }, { "epoch": 1.8642432191877079, "grad_norm": 0.6014522389684273, "learning_rate": 6.017045344912709e-08, "loss": 0.2785, "step": 39796 }, { "epoch": 1.864290064177636, "grad_norm": 0.6077282133421699, "learning_rate": 6.012910234801744e-08, "loss": 0.2861, "step": 39797 }, { "epoch": 1.8643369091675646, "grad_norm": 0.6525052433742864, "learning_rate": 6.008776528775129e-08, "loss": 0.2815, "step": 39798 }, { "epoch": 1.8643837541574928, "grad_norm": 0.5225563085009453, "learning_rate": 6.004644226856732e-08, "loss": 0.242, "step": 39799 }, { "epoch": 1.864430599147421, "grad_norm": 0.6268505245424586, "learning_rate": 6.000513329070256e-08, "loss": 0.279, "step": 39800 }, { "epoch": 1.8644774441373495, "grad_norm": 0.5535116517933585, "learning_rate": 5.996383835439517e-08, "loss": 0.2417, "step": 39801 }, { "epoch": 1.864524289127278, "grad_norm": 0.593711508176025, "learning_rate": 5.992255745988219e-08, "loss": 0.2584, "step": 39802 }, { "epoch": 1.8645711341172062, "grad_norm": 0.6019683037184591, "learning_rate": 5.988129060740173e-08, "loss": 0.2801, "step": 39803 }, { "epoch": 1.8646179791071344, "grad_norm": 0.557651995991983, "learning_rate": 5.984003779719138e-08, "loss": 0.2595, "step": 39804 }, { "epoch": 1.864664824097063, "grad_norm": 0.5999032374643121, "learning_rate": 5.979879902948821e-08, "loss": 0.2643, "step": 39805 }, { "epoch": 1.8647116690869912, "grad_norm": 0.5911088886908352, "learning_rate": 5.97575743045295e-08, "loss": 0.2606, "step": 39806 }, { "epoch": 1.8647585140769194, "grad_norm": 0.5784208731499616, "learning_rate": 5.971636362255256e-08, "loss": 0.2494, "step": 39807 }, { "epoch": 1.8648053590668479, "grad_norm": 0.616746648853565, "learning_rate": 5.9675166983795e-08, "loss": 0.2699, "step": 39808 }, { "epoch": 1.864852204056776, "grad_norm": 0.6121305889027517, "learning_rate": 5.963398438849327e-08, "loss": 0.2741, "step": 39809 }, { "epoch": 1.8648990490467043, "grad_norm": 0.6086620823275485, "learning_rate": 5.959281583688442e-08, "loss": 0.2768, "step": 39810 }, { "epoch": 1.8649458940366328, "grad_norm": 0.6500179931728909, "learning_rate": 5.9551661329206036e-08, "loss": 0.2722, "step": 39811 }, { "epoch": 1.8649927390265613, "grad_norm": 0.6187806600060425, "learning_rate": 5.951052086569403e-08, "loss": 0.2803, "step": 39812 }, { "epoch": 1.8650395840164893, "grad_norm": 0.5732789164532931, "learning_rate": 5.946939444658545e-08, "loss": 0.2515, "step": 39813 }, { "epoch": 1.8650864290064177, "grad_norm": 0.603030011342488, "learning_rate": 5.9428282072117595e-08, "loss": 0.2679, "step": 39814 }, { "epoch": 1.8651332739963462, "grad_norm": 0.6056141181539415, "learning_rate": 5.938718374252611e-08, "loss": 0.2787, "step": 39815 }, { "epoch": 1.8651801189862744, "grad_norm": 0.5956527267491767, "learning_rate": 5.934609945804804e-08, "loss": 0.2728, "step": 39816 }, { "epoch": 1.8652269639762027, "grad_norm": 0.5897076989778889, "learning_rate": 5.930502921892012e-08, "loss": 0.2651, "step": 39817 }, { "epoch": 1.8652738089661312, "grad_norm": 0.5839718234715431, "learning_rate": 5.926397302537801e-08, "loss": 0.2638, "step": 39818 }, { "epoch": 1.8653206539560594, "grad_norm": 0.593194332886877, "learning_rate": 5.922293087765818e-08, "loss": 0.265, "step": 39819 }, { "epoch": 1.8653674989459876, "grad_norm": 0.5768270281587328, "learning_rate": 5.9181902775996836e-08, "loss": 0.2733, "step": 39820 }, { "epoch": 1.865414343935916, "grad_norm": 0.586350732350135, "learning_rate": 5.914088872063073e-08, "loss": 0.2568, "step": 39821 }, { "epoch": 1.8654611889258443, "grad_norm": 0.6085533811073279, "learning_rate": 5.909988871179467e-08, "loss": 0.2713, "step": 39822 }, { "epoch": 1.8655080339157726, "grad_norm": 0.5974426876105553, "learning_rate": 5.90589027497257e-08, "loss": 0.2679, "step": 39823 }, { "epoch": 1.865554878905701, "grad_norm": 0.5874917790307832, "learning_rate": 5.9017930834659176e-08, "loss": 0.2591, "step": 39824 }, { "epoch": 1.8656017238956295, "grad_norm": 0.6073316471986953, "learning_rate": 5.8976972966830746e-08, "loss": 0.2798, "step": 39825 }, { "epoch": 1.8656485688855575, "grad_norm": 0.5881270864456045, "learning_rate": 5.8936029146476334e-08, "loss": 0.2628, "step": 39826 }, { "epoch": 1.865695413875486, "grad_norm": 0.5933412218518548, "learning_rate": 5.8895099373831586e-08, "loss": 0.2708, "step": 39827 }, { "epoch": 1.8657422588654144, "grad_norm": 0.6103761020240545, "learning_rate": 5.8854183649132144e-08, "loss": 0.2682, "step": 39828 }, { "epoch": 1.8657891038553427, "grad_norm": 0.5448820812801939, "learning_rate": 5.88132819726131e-08, "loss": 0.2497, "step": 39829 }, { "epoch": 1.865835948845271, "grad_norm": 0.6045014232472673, "learning_rate": 5.8772394344510096e-08, "loss": 0.2573, "step": 39830 }, { "epoch": 1.8658827938351994, "grad_norm": 0.6280341581321155, "learning_rate": 5.87315207650585e-08, "loss": 0.2777, "step": 39831 }, { "epoch": 1.8659296388251276, "grad_norm": 0.5944237905472531, "learning_rate": 5.869066123449313e-08, "loss": 0.2762, "step": 39832 }, { "epoch": 1.8659764838150559, "grad_norm": 0.6211043649053947, "learning_rate": 5.864981575304962e-08, "loss": 0.2758, "step": 39833 }, { "epoch": 1.8660233288049843, "grad_norm": 0.6084081147311489, "learning_rate": 5.8608984320962794e-08, "loss": 0.2801, "step": 39834 }, { "epoch": 1.8660701737949126, "grad_norm": 0.6244682877940532, "learning_rate": 5.856816693846773e-08, "loss": 0.2682, "step": 39835 }, { "epoch": 1.8661170187848408, "grad_norm": 0.5801277764804135, "learning_rate": 5.8527363605799246e-08, "loss": 0.2614, "step": 39836 }, { "epoch": 1.8661638637747693, "grad_norm": 0.5312065694991374, "learning_rate": 5.848657432319216e-08, "loss": 0.2593, "step": 39837 }, { "epoch": 1.8662107087646977, "grad_norm": 0.6235013160505957, "learning_rate": 5.8445799090880995e-08, "loss": 0.2779, "step": 39838 }, { "epoch": 1.866257553754626, "grad_norm": 0.6090877730592741, "learning_rate": 5.840503790910057e-08, "loss": 0.2728, "step": 39839 }, { "epoch": 1.8663043987445542, "grad_norm": 0.5695833263694403, "learning_rate": 5.836429077808542e-08, "loss": 0.2643, "step": 39840 }, { "epoch": 1.8663512437344827, "grad_norm": 0.5587511619406865, "learning_rate": 5.832355769807035e-08, "loss": 0.2554, "step": 39841 }, { "epoch": 1.866398088724411, "grad_norm": 0.6469212234758391, "learning_rate": 5.828283866928935e-08, "loss": 0.2823, "step": 39842 }, { "epoch": 1.8664449337143392, "grad_norm": 0.5747440014879961, "learning_rate": 5.824213369197723e-08, "loss": 0.2594, "step": 39843 }, { "epoch": 1.8664917787042676, "grad_norm": 0.6273991676745823, "learning_rate": 5.820144276636769e-08, "loss": 0.2685, "step": 39844 }, { "epoch": 1.8665386236941959, "grad_norm": 0.6406179183030124, "learning_rate": 5.816076589269498e-08, "loss": 0.2745, "step": 39845 }, { "epoch": 1.8665854686841241, "grad_norm": 0.6153269083622798, "learning_rate": 5.812010307119337e-08, "loss": 0.2669, "step": 39846 }, { "epoch": 1.8666323136740526, "grad_norm": 0.6055944753965339, "learning_rate": 5.807945430209683e-08, "loss": 0.2814, "step": 39847 }, { "epoch": 1.866679158663981, "grad_norm": 0.6411262951165394, "learning_rate": 5.803881958563962e-08, "loss": 0.2986, "step": 39848 }, { "epoch": 1.866726003653909, "grad_norm": 0.6209089997830213, "learning_rate": 5.7998198922054885e-08, "loss": 0.2745, "step": 39849 }, { "epoch": 1.8667728486438375, "grad_norm": 0.5878302353117615, "learning_rate": 5.7957592311576885e-08, "loss": 0.2622, "step": 39850 }, { "epoch": 1.866819693633766, "grad_norm": 0.592606646915443, "learning_rate": 5.791699975443904e-08, "loss": 0.2636, "step": 39851 }, { "epoch": 1.8668665386236942, "grad_norm": 0.5884860007251949, "learning_rate": 5.787642125087506e-08, "loss": 0.2708, "step": 39852 }, { "epoch": 1.8669133836136225, "grad_norm": 0.6377113995758157, "learning_rate": 5.7835856801118365e-08, "loss": 0.2807, "step": 39853 }, { "epoch": 1.866960228603551, "grad_norm": 0.5875981720561058, "learning_rate": 5.7795306405402926e-08, "loss": 0.269, "step": 39854 }, { "epoch": 1.8670070735934792, "grad_norm": 0.5958995581508946, "learning_rate": 5.775477006396135e-08, "loss": 0.2653, "step": 39855 }, { "epoch": 1.8670539185834074, "grad_norm": 0.6134261854937969, "learning_rate": 5.7714247777027334e-08, "loss": 0.2632, "step": 39856 }, { "epoch": 1.8671007635733359, "grad_norm": 0.6224532826797038, "learning_rate": 5.7673739544834015e-08, "loss": 0.2701, "step": 39857 }, { "epoch": 1.8671476085632641, "grad_norm": 0.579034704713894, "learning_rate": 5.763324536761428e-08, "loss": 0.2544, "step": 39858 }, { "epoch": 1.8671944535531924, "grad_norm": 0.6394706467733267, "learning_rate": 5.759276524560154e-08, "loss": 0.2738, "step": 39859 }, { "epoch": 1.8672412985431208, "grad_norm": 0.5901220925326638, "learning_rate": 5.7552299179028656e-08, "loss": 0.2663, "step": 39860 }, { "epoch": 1.8672881435330493, "grad_norm": 0.6102884809882163, "learning_rate": 5.7511847168128245e-08, "loss": 0.2589, "step": 39861 }, { "epoch": 1.8673349885229773, "grad_norm": 0.606464879817508, "learning_rate": 5.747140921313316e-08, "loss": 0.2767, "step": 39862 }, { "epoch": 1.8673818335129058, "grad_norm": 0.5885924956530123, "learning_rate": 5.743098531427627e-08, "loss": 0.2793, "step": 39863 }, { "epoch": 1.8674286785028342, "grad_norm": 0.6241663661586363, "learning_rate": 5.7390575471790166e-08, "loss": 0.2721, "step": 39864 }, { "epoch": 1.8674755234927625, "grad_norm": 0.5812406851425312, "learning_rate": 5.735017968590745e-08, "loss": 0.2524, "step": 39865 }, { "epoch": 1.8675223684826907, "grad_norm": 0.5876192759046249, "learning_rate": 5.7309797956860424e-08, "loss": 0.2612, "step": 39866 }, { "epoch": 1.8675692134726192, "grad_norm": 0.6058933907018291, "learning_rate": 5.726943028488169e-08, "loss": 0.2596, "step": 39867 }, { "epoch": 1.8676160584625474, "grad_norm": 0.5610980605940575, "learning_rate": 5.722907667020328e-08, "loss": 0.2702, "step": 39868 }, { "epoch": 1.8676629034524757, "grad_norm": 0.5639000426026418, "learning_rate": 5.71887371130575e-08, "loss": 0.2483, "step": 39869 }, { "epoch": 1.8677097484424041, "grad_norm": 0.6296529046216625, "learning_rate": 5.714841161367668e-08, "loss": 0.2794, "step": 39870 }, { "epoch": 1.8677565934323324, "grad_norm": 0.5548945800494127, "learning_rate": 5.7108100172292844e-08, "loss": 0.2533, "step": 39871 }, { "epoch": 1.8678034384222606, "grad_norm": 0.5820934187727189, "learning_rate": 5.7067802789138035e-08, "loss": 0.269, "step": 39872 }, { "epoch": 1.867850283412189, "grad_norm": 0.5715169858484663, "learning_rate": 5.702751946444346e-08, "loss": 0.258, "step": 39873 }, { "epoch": 1.8678971284021175, "grad_norm": 0.599624361963056, "learning_rate": 5.698725019844198e-08, "loss": 0.2579, "step": 39874 }, { "epoch": 1.8679439733920458, "grad_norm": 0.5469595336222085, "learning_rate": 5.694699499136452e-08, "loss": 0.261, "step": 39875 }, { "epoch": 1.867990818381974, "grad_norm": 0.5843541668496272, "learning_rate": 5.690675384344313e-08, "loss": 0.2539, "step": 39876 }, { "epoch": 1.8680376633719025, "grad_norm": 0.6201396932031646, "learning_rate": 5.6866526754909555e-08, "loss": 0.2864, "step": 39877 }, { "epoch": 1.8680845083618307, "grad_norm": 0.6216548795706797, "learning_rate": 5.682631372599501e-08, "loss": 0.2669, "step": 39878 }, { "epoch": 1.868131353351759, "grad_norm": 0.5779610468282725, "learning_rate": 5.678611475693097e-08, "loss": 0.2547, "step": 39879 }, { "epoch": 1.8681781983416874, "grad_norm": 0.5690119564005346, "learning_rate": 5.6745929847948634e-08, "loss": 0.2601, "step": 39880 }, { "epoch": 1.8682250433316157, "grad_norm": 0.5813541839713758, "learning_rate": 5.670575899927921e-08, "loss": 0.2637, "step": 39881 }, { "epoch": 1.868271888321544, "grad_norm": 0.6060907460545261, "learning_rate": 5.6665602211154195e-08, "loss": 0.2719, "step": 39882 }, { "epoch": 1.8683187333114724, "grad_norm": 0.6110066655692382, "learning_rate": 5.662545948380449e-08, "loss": 0.2685, "step": 39883 }, { "epoch": 1.8683655783014008, "grad_norm": 0.6040863866047703, "learning_rate": 5.658533081746159e-08, "loss": 0.2673, "step": 39884 }, { "epoch": 1.8684124232913288, "grad_norm": 0.5422905666736223, "learning_rate": 5.654521621235559e-08, "loss": 0.2455, "step": 39885 }, { "epoch": 1.8684592682812573, "grad_norm": 0.603030091390137, "learning_rate": 5.650511566871769e-08, "loss": 0.2773, "step": 39886 }, { "epoch": 1.8685061132711858, "grad_norm": 0.6043249083976637, "learning_rate": 5.646502918677882e-08, "loss": 0.2785, "step": 39887 }, { "epoch": 1.868552958261114, "grad_norm": 0.6115403392609813, "learning_rate": 5.6424956766769346e-08, "loss": 0.2741, "step": 39888 }, { "epoch": 1.8685998032510422, "grad_norm": 0.5968140768393106, "learning_rate": 5.6384898408920196e-08, "loss": 0.2699, "step": 39889 }, { "epoch": 1.8686466482409707, "grad_norm": 0.5956358374906432, "learning_rate": 5.634485411346202e-08, "loss": 0.2603, "step": 39890 }, { "epoch": 1.868693493230899, "grad_norm": 0.5708303158532395, "learning_rate": 5.630482388062492e-08, "loss": 0.2573, "step": 39891 }, { "epoch": 1.8687403382208272, "grad_norm": 0.6399328112902396, "learning_rate": 5.6264807710639245e-08, "loss": 0.3036, "step": 39892 }, { "epoch": 1.8687871832107557, "grad_norm": 0.5574969001929403, "learning_rate": 5.622480560373539e-08, "loss": 0.2485, "step": 39893 }, { "epoch": 1.868834028200684, "grad_norm": 0.6176237760321281, "learning_rate": 5.618481756014343e-08, "loss": 0.2804, "step": 39894 }, { "epoch": 1.8688808731906121, "grad_norm": 0.6506223409004686, "learning_rate": 5.6144843580093754e-08, "loss": 0.2869, "step": 39895 }, { "epoch": 1.8689277181805406, "grad_norm": 0.6191565755629251, "learning_rate": 5.610488366381644e-08, "loss": 0.266, "step": 39896 }, { "epoch": 1.868974563170469, "grad_norm": 0.5702605904051721, "learning_rate": 5.6064937811541045e-08, "loss": 0.2668, "step": 39897 }, { "epoch": 1.869021408160397, "grad_norm": 0.6066278405665086, "learning_rate": 5.602500602349764e-08, "loss": 0.2685, "step": 39898 }, { "epoch": 1.8690682531503255, "grad_norm": 0.5922161776686602, "learning_rate": 5.598508829991634e-08, "loss": 0.2564, "step": 39899 }, { "epoch": 1.869115098140254, "grad_norm": 0.6189148298769723, "learning_rate": 5.594518464102638e-08, "loss": 0.2821, "step": 39900 }, { "epoch": 1.8691619431301822, "grad_norm": 0.5945101744099548, "learning_rate": 5.590529504705733e-08, "loss": 0.2781, "step": 39901 }, { "epoch": 1.8692087881201105, "grad_norm": 0.6323552968901253, "learning_rate": 5.5865419518239264e-08, "loss": 0.2891, "step": 39902 }, { "epoch": 1.869255633110039, "grad_norm": 0.6123057861857996, "learning_rate": 5.582555805480172e-08, "loss": 0.2644, "step": 39903 }, { "epoch": 1.8693024780999672, "grad_norm": 0.589517473829712, "learning_rate": 5.578571065697341e-08, "loss": 0.2602, "step": 39904 }, { "epoch": 1.8693493230898954, "grad_norm": 0.5887660498043051, "learning_rate": 5.5745877324984154e-08, "loss": 0.2722, "step": 39905 }, { "epoch": 1.869396168079824, "grad_norm": 0.5935328372704655, "learning_rate": 5.570605805906293e-08, "loss": 0.287, "step": 39906 }, { "epoch": 1.8694430130697521, "grad_norm": 0.6344297669795466, "learning_rate": 5.5666252859438715e-08, "loss": 0.2845, "step": 39907 }, { "epoch": 1.8694898580596804, "grad_norm": 0.6146478360652233, "learning_rate": 5.5626461726341065e-08, "loss": 0.2722, "step": 39908 }, { "epoch": 1.8695367030496088, "grad_norm": 0.6099472836619109, "learning_rate": 5.5586684659998946e-08, "loss": 0.2631, "step": 39909 }, { "epoch": 1.8695835480395373, "grad_norm": 0.5883550185413751, "learning_rate": 5.5546921660641076e-08, "loss": 0.2712, "step": 39910 }, { "epoch": 1.8696303930294655, "grad_norm": 0.586474816412384, "learning_rate": 5.550717272849587e-08, "loss": 0.2828, "step": 39911 }, { "epoch": 1.8696772380193938, "grad_norm": 0.570236904237221, "learning_rate": 5.546743786379316e-08, "loss": 0.2484, "step": 39912 }, { "epoch": 1.8697240830093222, "grad_norm": 0.6118502549973148, "learning_rate": 5.5427717066760534e-08, "loss": 0.2738, "step": 39913 }, { "epoch": 1.8697709279992505, "grad_norm": 0.6133232599176438, "learning_rate": 5.5388010337626693e-08, "loss": 0.2712, "step": 39914 }, { "epoch": 1.8698177729891787, "grad_norm": 0.6137016039029733, "learning_rate": 5.534831767662091e-08, "loss": 0.2833, "step": 39915 }, { "epoch": 1.8698646179791072, "grad_norm": 0.5898315574048995, "learning_rate": 5.530863908397105e-08, "loss": 0.2669, "step": 39916 }, { "epoch": 1.8699114629690354, "grad_norm": 0.5906873176706755, "learning_rate": 5.5268974559905264e-08, "loss": 0.273, "step": 39917 }, { "epoch": 1.8699583079589637, "grad_norm": 0.6248762369858709, "learning_rate": 5.522932410465198e-08, "loss": 0.2759, "step": 39918 }, { "epoch": 1.8700051529488921, "grad_norm": 0.6067812915042606, "learning_rate": 5.518968771843991e-08, "loss": 0.2614, "step": 39919 }, { "epoch": 1.8700519979388206, "grad_norm": 0.6046276451369229, "learning_rate": 5.515006540149637e-08, "loss": 0.2734, "step": 39920 }, { "epoch": 1.8700988429287486, "grad_norm": 0.5980339864361176, "learning_rate": 5.511045715405006e-08, "loss": 0.2849, "step": 39921 }, { "epoch": 1.870145687918677, "grad_norm": 0.5501615828747792, "learning_rate": 5.50708629763283e-08, "loss": 0.2456, "step": 39922 }, { "epoch": 1.8701925329086055, "grad_norm": 0.6360292031655772, "learning_rate": 5.5031282868559246e-08, "loss": 0.2734, "step": 39923 }, { "epoch": 1.8702393778985338, "grad_norm": 0.6359504377244428, "learning_rate": 5.4991716830970485e-08, "loss": 0.2678, "step": 39924 }, { "epoch": 1.870286222888462, "grad_norm": 0.5716313870517211, "learning_rate": 5.49521648637899e-08, "loss": 0.2635, "step": 39925 }, { "epoch": 1.8703330678783905, "grad_norm": 0.6210055682500999, "learning_rate": 5.491262696724536e-08, "loss": 0.2716, "step": 39926 }, { "epoch": 1.8703799128683187, "grad_norm": 0.5659766189255091, "learning_rate": 5.487310314156419e-08, "loss": 0.2672, "step": 39927 }, { "epoch": 1.870426757858247, "grad_norm": 0.6101447409714098, "learning_rate": 5.4833593386973413e-08, "loss": 0.2676, "step": 39928 }, { "epoch": 1.8704736028481754, "grad_norm": 0.5949881230465259, "learning_rate": 5.479409770370092e-08, "loss": 0.2747, "step": 39929 }, { "epoch": 1.8705204478381037, "grad_norm": 0.6135159705517017, "learning_rate": 5.4754616091973464e-08, "loss": 0.2668, "step": 39930 }, { "epoch": 1.870567292828032, "grad_norm": 0.5855489698980153, "learning_rate": 5.471514855201893e-08, "loss": 0.2755, "step": 39931 }, { "epoch": 1.8706141378179604, "grad_norm": 0.6382905895195514, "learning_rate": 5.467569508406378e-08, "loss": 0.2818, "step": 39932 }, { "epoch": 1.8706609828078888, "grad_norm": 0.5713818738001848, "learning_rate": 5.463625568833592e-08, "loss": 0.2546, "step": 39933 }, { "epoch": 1.8707078277978169, "grad_norm": 0.5722232299295509, "learning_rate": 5.4596830365061805e-08, "loss": 0.2754, "step": 39934 }, { "epoch": 1.8707546727877453, "grad_norm": 0.6120981456036346, "learning_rate": 5.4557419114467935e-08, "loss": 0.2819, "step": 39935 }, { "epoch": 1.8708015177776738, "grad_norm": 0.5789799219026265, "learning_rate": 5.451802193678135e-08, "loss": 0.2614, "step": 39936 }, { "epoch": 1.870848362767602, "grad_norm": 0.6012617509659117, "learning_rate": 5.447863883222909e-08, "loss": 0.274, "step": 39937 }, { "epoch": 1.8708952077575303, "grad_norm": 0.6057546354683048, "learning_rate": 5.443926980103764e-08, "loss": 0.2696, "step": 39938 }, { "epoch": 1.8709420527474587, "grad_norm": 0.5842071698746099, "learning_rate": 5.439991484343377e-08, "loss": 0.2628, "step": 39939 }, { "epoch": 1.870988897737387, "grad_norm": 0.6352801243531832, "learning_rate": 5.436057395964339e-08, "loss": 0.2686, "step": 39940 }, { "epoch": 1.8710357427273152, "grad_norm": 0.6675755995399728, "learning_rate": 5.432124714989328e-08, "loss": 0.2938, "step": 39941 }, { "epoch": 1.8710825877172437, "grad_norm": 0.6353503453030461, "learning_rate": 5.428193441440965e-08, "loss": 0.2688, "step": 39942 }, { "epoch": 1.871129432707172, "grad_norm": 0.5905348582551082, "learning_rate": 5.4242635753418705e-08, "loss": 0.2679, "step": 39943 }, { "epoch": 1.8711762776971002, "grad_norm": 0.5925118571953915, "learning_rate": 5.4203351167146926e-08, "loss": 0.2582, "step": 39944 }, { "epoch": 1.8712231226870286, "grad_norm": 0.572367552689428, "learning_rate": 5.4164080655819965e-08, "loss": 0.2722, "step": 39945 }, { "epoch": 1.871269967676957, "grad_norm": 0.6419302510955636, "learning_rate": 5.412482421966403e-08, "loss": 0.2784, "step": 39946 }, { "epoch": 1.8713168126668853, "grad_norm": 0.5763801831541207, "learning_rate": 5.4085581858905055e-08, "loss": 0.2635, "step": 39947 }, { "epoch": 1.8713636576568136, "grad_norm": 0.5979947787523424, "learning_rate": 5.404635357376869e-08, "loss": 0.2763, "step": 39948 }, { "epoch": 1.871410502646742, "grad_norm": 0.5763877666848625, "learning_rate": 5.4007139364480874e-08, "loss": 0.2755, "step": 39949 }, { "epoch": 1.8714573476366703, "grad_norm": 0.5919735474837305, "learning_rate": 5.3967939231266975e-08, "loss": 0.2691, "step": 39950 }, { "epoch": 1.8715041926265985, "grad_norm": 0.6312951123709332, "learning_rate": 5.39287531743532e-08, "loss": 0.2699, "step": 39951 }, { "epoch": 1.871551037616527, "grad_norm": 0.6222265722650305, "learning_rate": 5.388958119396437e-08, "loss": 0.2623, "step": 39952 }, { "epoch": 1.8715978826064552, "grad_norm": 0.6244033429990506, "learning_rate": 5.3850423290326136e-08, "loss": 0.275, "step": 39953 }, { "epoch": 1.8716447275963835, "grad_norm": 0.611674167733722, "learning_rate": 5.381127946366416e-08, "loss": 0.2735, "step": 39954 }, { "epoch": 1.871691572586312, "grad_norm": 0.5480256386342579, "learning_rate": 5.377214971420325e-08, "loss": 0.269, "step": 39955 }, { "epoch": 1.8717384175762404, "grad_norm": 0.585786544001593, "learning_rate": 5.373303404216879e-08, "loss": 0.2546, "step": 39956 }, { "epoch": 1.8717852625661684, "grad_norm": 0.6215970653145346, "learning_rate": 5.369393244778615e-08, "loss": 0.2817, "step": 39957 }, { "epoch": 1.8718321075560969, "grad_norm": 0.6149981113670634, "learning_rate": 5.3654844931279595e-08, "loss": 0.275, "step": 39958 }, { "epoch": 1.8718789525460253, "grad_norm": 0.6235634649842682, "learning_rate": 5.361577149287478e-08, "loss": 0.2716, "step": 39959 }, { "epoch": 1.8719257975359536, "grad_norm": 0.6308204366406802, "learning_rate": 5.357671213279625e-08, "loss": 0.2911, "step": 39960 }, { "epoch": 1.8719726425258818, "grad_norm": 0.5846165779200676, "learning_rate": 5.35376668512691e-08, "loss": 0.2715, "step": 39961 }, { "epoch": 1.8720194875158103, "grad_norm": 0.5606530576495776, "learning_rate": 5.3498635648517595e-08, "loss": 0.2721, "step": 39962 }, { "epoch": 1.8720663325057385, "grad_norm": 0.5770545469447326, "learning_rate": 5.345961852476655e-08, "loss": 0.2757, "step": 39963 }, { "epoch": 1.8721131774956667, "grad_norm": 0.5655728522744934, "learning_rate": 5.342061548024052e-08, "loss": 0.27, "step": 39964 }, { "epoch": 1.8721600224855952, "grad_norm": 0.5686389579039981, "learning_rate": 5.3381626515163765e-08, "loss": 0.2674, "step": 39965 }, { "epoch": 1.8722068674755235, "grad_norm": 0.637249177673216, "learning_rate": 5.3342651629760825e-08, "loss": 0.2718, "step": 39966 }, { "epoch": 1.8722537124654517, "grad_norm": 0.6449148969178796, "learning_rate": 5.330369082425624e-08, "loss": 0.285, "step": 39967 }, { "epoch": 1.8723005574553802, "grad_norm": 0.6216104315903824, "learning_rate": 5.3264744098874e-08, "loss": 0.2672, "step": 39968 }, { "epoch": 1.8723474024453086, "grad_norm": 0.614480625401912, "learning_rate": 5.322581145383782e-08, "loss": 0.2907, "step": 39969 }, { "epoch": 1.8723942474352366, "grad_norm": 0.6205834569028671, "learning_rate": 5.318689288937251e-08, "loss": 0.2652, "step": 39970 }, { "epoch": 1.872441092425165, "grad_norm": 0.6248293649523032, "learning_rate": 5.3147988405701237e-08, "loss": 0.2685, "step": 39971 }, { "epoch": 1.8724879374150936, "grad_norm": 0.5327790707064712, "learning_rate": 5.310909800304853e-08, "loss": 0.2442, "step": 39972 }, { "epoch": 1.8725347824050218, "grad_norm": 0.5621110308529141, "learning_rate": 5.307022168163783e-08, "loss": 0.2558, "step": 39973 }, { "epoch": 1.87258162739495, "grad_norm": 0.6424948966491468, "learning_rate": 5.303135944169313e-08, "loss": 0.2884, "step": 39974 }, { "epoch": 1.8726284723848785, "grad_norm": 0.6667154616613085, "learning_rate": 5.299251128343813e-08, "loss": 0.2746, "step": 39975 }, { "epoch": 1.8726753173748067, "grad_norm": 0.6350487664154773, "learning_rate": 5.295367720709599e-08, "loss": 0.2788, "step": 39976 }, { "epoch": 1.872722162364735, "grad_norm": 0.5259026264937557, "learning_rate": 5.2914857212890414e-08, "loss": 0.2431, "step": 39977 }, { "epoch": 1.8727690073546635, "grad_norm": 0.5906609681632158, "learning_rate": 5.2876051301044565e-08, "loss": 0.2609, "step": 39978 }, { "epoch": 1.8728158523445917, "grad_norm": 0.5819608159694963, "learning_rate": 5.283725947178214e-08, "loss": 0.2737, "step": 39979 }, { "epoch": 1.87286269733452, "grad_norm": 0.5775778964489282, "learning_rate": 5.279848172532631e-08, "loss": 0.2569, "step": 39980 }, { "epoch": 1.8729095423244484, "grad_norm": 0.5827149715858408, "learning_rate": 5.2759718061899944e-08, "loss": 0.2608, "step": 39981 }, { "epoch": 1.8729563873143769, "grad_norm": 0.5626695166276542, "learning_rate": 5.272096848172647e-08, "loss": 0.2677, "step": 39982 }, { "epoch": 1.873003232304305, "grad_norm": 0.6234377120298229, "learning_rate": 5.268223298502878e-08, "loss": 0.284, "step": 39983 }, { "epoch": 1.8730500772942333, "grad_norm": 0.587344257243864, "learning_rate": 5.2643511572029736e-08, "loss": 0.274, "step": 39984 }, { "epoch": 1.8730969222841618, "grad_norm": 0.6445606214954402, "learning_rate": 5.260480424295194e-08, "loss": 0.2744, "step": 39985 }, { "epoch": 1.87314376727409, "grad_norm": 0.6542748482398394, "learning_rate": 5.256611099801856e-08, "loss": 0.2996, "step": 39986 }, { "epoch": 1.8731906122640183, "grad_norm": 0.6003945962396113, "learning_rate": 5.2527431837451906e-08, "loss": 0.2748, "step": 39987 }, { "epoch": 1.8732374572539467, "grad_norm": 0.5653902350341775, "learning_rate": 5.248876676147485e-08, "loss": 0.2544, "step": 39988 }, { "epoch": 1.873284302243875, "grad_norm": 0.5763138780604743, "learning_rate": 5.2450115770309725e-08, "loss": 0.269, "step": 39989 }, { "epoch": 1.8733311472338032, "grad_norm": 0.5940750252481447, "learning_rate": 5.241147886417913e-08, "loss": 0.2716, "step": 39990 }, { "epoch": 1.8733779922237317, "grad_norm": 0.5768770560667146, "learning_rate": 5.237285604330511e-08, "loss": 0.2744, "step": 39991 }, { "epoch": 1.8734248372136602, "grad_norm": 0.6124636452593872, "learning_rate": 5.233424730791026e-08, "loss": 0.2559, "step": 39992 }, { "epoch": 1.8734716822035882, "grad_norm": 0.5687334123795865, "learning_rate": 5.229565265821662e-08, "loss": 0.2606, "step": 39993 }, { "epoch": 1.8735185271935166, "grad_norm": 0.6029883027680925, "learning_rate": 5.225707209444625e-08, "loss": 0.2793, "step": 39994 }, { "epoch": 1.873565372183445, "grad_norm": 0.5661936820311774, "learning_rate": 5.221850561682118e-08, "loss": 0.2593, "step": 39995 }, { "epoch": 1.8736122171733733, "grad_norm": 0.5880030361895403, "learning_rate": 5.217995322556346e-08, "loss": 0.2605, "step": 39996 }, { "epoch": 1.8736590621633016, "grad_norm": 0.5760831389406971, "learning_rate": 5.214141492089486e-08, "loss": 0.2722, "step": 39997 }, { "epoch": 1.87370590715323, "grad_norm": 0.5817030616574543, "learning_rate": 5.2102890703037146e-08, "loss": 0.2728, "step": 39998 }, { "epoch": 1.8737527521431583, "grad_norm": 0.5531715926075538, "learning_rate": 5.20643805722118e-08, "loss": 0.2571, "step": 39999 }, { "epoch": 1.8737995971330865, "grad_norm": 0.5263506232990599, "learning_rate": 5.202588452864116e-08, "loss": 0.2401, "step": 40000 }, { "epoch": 1.873846442123015, "grad_norm": 0.5955737163266291, "learning_rate": 5.198740257254586e-08, "loss": 0.2623, "step": 40001 }, { "epoch": 1.8738932871129432, "grad_norm": 0.6077279466418876, "learning_rate": 5.194893470414797e-08, "loss": 0.2803, "step": 40002 }, { "epoch": 1.8739401321028715, "grad_norm": 0.6047883038804607, "learning_rate": 5.1910480923668684e-08, "loss": 0.2756, "step": 40003 }, { "epoch": 1.8739869770928, "grad_norm": 0.5940420338846372, "learning_rate": 5.187204123132922e-08, "loss": 0.2777, "step": 40004 }, { "epoch": 1.8740338220827284, "grad_norm": 0.6286381752095076, "learning_rate": 5.183361562735079e-08, "loss": 0.2866, "step": 40005 }, { "epoch": 1.8740806670726564, "grad_norm": 0.5749487684174347, "learning_rate": 5.179520411195488e-08, "loss": 0.2597, "step": 40006 }, { "epoch": 1.8741275120625849, "grad_norm": 0.6078539458399054, "learning_rate": 5.175680668536187e-08, "loss": 0.2665, "step": 40007 }, { "epoch": 1.8741743570525133, "grad_norm": 0.616150828158688, "learning_rate": 5.1718423347793256e-08, "loss": 0.2768, "step": 40008 }, { "epoch": 1.8742212020424416, "grad_norm": 0.5889095871170551, "learning_rate": 5.168005409946969e-08, "loss": 0.2693, "step": 40009 }, { "epoch": 1.8742680470323698, "grad_norm": 0.6181129611411336, "learning_rate": 5.1641698940612105e-08, "loss": 0.2818, "step": 40010 }, { "epoch": 1.8743148920222983, "grad_norm": 0.5950839931267169, "learning_rate": 5.160335787144116e-08, "loss": 0.2735, "step": 40011 }, { "epoch": 1.8743617370122265, "grad_norm": 0.6205218816824242, "learning_rate": 5.1565030892177793e-08, "loss": 0.2748, "step": 40012 }, { "epoch": 1.8744085820021548, "grad_norm": 0.575006238625284, "learning_rate": 5.152671800304182e-08, "loss": 0.2587, "step": 40013 }, { "epoch": 1.8744554269920832, "grad_norm": 0.5505584999986579, "learning_rate": 5.148841920425446e-08, "loss": 0.2623, "step": 40014 }, { "epoch": 1.8745022719820115, "grad_norm": 0.5391174335239738, "learning_rate": 5.1450134496035534e-08, "loss": 0.2509, "step": 40015 }, { "epoch": 1.8745491169719397, "grad_norm": 0.5856276062584358, "learning_rate": 5.1411863878605693e-08, "loss": 0.2626, "step": 40016 }, { "epoch": 1.8745959619618682, "grad_norm": 0.6294558079116686, "learning_rate": 5.1373607352185603e-08, "loss": 0.2851, "step": 40017 }, { "epoch": 1.8746428069517966, "grad_norm": 0.594912980402242, "learning_rate": 5.133536491699481e-08, "loss": 0.2612, "step": 40018 }, { "epoch": 1.8746896519417249, "grad_norm": 0.5698890506453667, "learning_rate": 5.129713657325341e-08, "loss": 0.2697, "step": 40019 }, { "epoch": 1.8747364969316531, "grad_norm": 0.5837926786911366, "learning_rate": 5.125892232118152e-08, "loss": 0.2465, "step": 40020 }, { "epoch": 1.8747833419215816, "grad_norm": 0.5854568054582281, "learning_rate": 5.122072216099894e-08, "loss": 0.2658, "step": 40021 }, { "epoch": 1.8748301869115098, "grad_norm": 0.6004298873725181, "learning_rate": 5.118253609292578e-08, "loss": 0.2761, "step": 40022 }, { "epoch": 1.874877031901438, "grad_norm": 0.6497285189784882, "learning_rate": 5.1144364117181597e-08, "loss": 0.285, "step": 40023 }, { "epoch": 1.8749238768913665, "grad_norm": 0.6285459110912972, "learning_rate": 5.110620623398621e-08, "loss": 0.2813, "step": 40024 }, { "epoch": 1.8749707218812948, "grad_norm": 0.6264026188916977, "learning_rate": 5.1068062443559163e-08, "loss": 0.2702, "step": 40025 }, { "epoch": 1.875017566871223, "grad_norm": 0.5947001706835328, "learning_rate": 5.1029932746119724e-08, "loss": 0.254, "step": 40026 }, { "epoch": 1.8750644118611515, "grad_norm": 0.5908479050174248, "learning_rate": 5.0991817141887444e-08, "loss": 0.2623, "step": 40027 }, { "epoch": 1.87511125685108, "grad_norm": 0.5790783498381733, "learning_rate": 5.095371563108159e-08, "loss": 0.2426, "step": 40028 }, { "epoch": 1.875158101841008, "grad_norm": 0.6457175379601203, "learning_rate": 5.091562821392171e-08, "loss": 0.2877, "step": 40029 }, { "epoch": 1.8752049468309364, "grad_norm": 0.637500053952122, "learning_rate": 5.087755489062707e-08, "loss": 0.2826, "step": 40030 }, { "epoch": 1.8752517918208649, "grad_norm": 0.5812216098910478, "learning_rate": 5.083949566141638e-08, "loss": 0.2767, "step": 40031 }, { "epoch": 1.8752986368107931, "grad_norm": 0.6037466804942594, "learning_rate": 5.080145052650892e-08, "loss": 0.2838, "step": 40032 }, { "epoch": 1.8753454818007214, "grad_norm": 0.5889942594043169, "learning_rate": 5.07634194861234e-08, "loss": 0.2768, "step": 40033 }, { "epoch": 1.8753923267906498, "grad_norm": 0.601935111947522, "learning_rate": 5.072540254047881e-08, "loss": 0.2599, "step": 40034 }, { "epoch": 1.875439171780578, "grad_norm": 0.6011019850162795, "learning_rate": 5.068739968979386e-08, "loss": 0.2756, "step": 40035 }, { "epoch": 1.8754860167705063, "grad_norm": 0.6069770330517362, "learning_rate": 5.064941093428727e-08, "loss": 0.2623, "step": 40036 }, { "epoch": 1.8755328617604348, "grad_norm": 0.642528469283594, "learning_rate": 5.061143627417803e-08, "loss": 0.2819, "step": 40037 }, { "epoch": 1.875579706750363, "grad_norm": 0.614274057368157, "learning_rate": 5.0573475709684015e-08, "loss": 0.274, "step": 40038 }, { "epoch": 1.8756265517402912, "grad_norm": 0.5776706019122054, "learning_rate": 5.0535529241024226e-08, "loss": 0.2533, "step": 40039 }, { "epoch": 1.8756733967302197, "grad_norm": 0.590499422388232, "learning_rate": 5.049759686841682e-08, "loss": 0.2588, "step": 40040 }, { "epoch": 1.8757202417201482, "grad_norm": 0.5906449427937229, "learning_rate": 5.0459678592079954e-08, "loss": 0.263, "step": 40041 }, { "epoch": 1.8757670867100762, "grad_norm": 0.6031522160431761, "learning_rate": 5.0421774412231785e-08, "loss": 0.2815, "step": 40042 }, { "epoch": 1.8758139317000047, "grad_norm": 0.61317515722346, "learning_rate": 5.0383884329091027e-08, "loss": 0.2708, "step": 40043 }, { "epoch": 1.8758607766899331, "grad_norm": 0.5592424128404064, "learning_rate": 5.034600834287501e-08, "loss": 0.2569, "step": 40044 }, { "epoch": 1.8759076216798614, "grad_norm": 0.6022196081661763, "learning_rate": 5.0308146453802444e-08, "loss": 0.27, "step": 40045 }, { "epoch": 1.8759544666697896, "grad_norm": 0.5997973016139277, "learning_rate": 5.027029866209038e-08, "loss": 0.2552, "step": 40046 }, { "epoch": 1.876001311659718, "grad_norm": 0.6390483665977145, "learning_rate": 5.023246496795697e-08, "loss": 0.2852, "step": 40047 }, { "epoch": 1.8760481566496463, "grad_norm": 0.5792757541969904, "learning_rate": 5.019464537162011e-08, "loss": 0.2798, "step": 40048 }, { "epoch": 1.8760950016395745, "grad_norm": 0.6171520946736935, "learning_rate": 5.015683987329767e-08, "loss": 0.2762, "step": 40049 }, { "epoch": 1.876141846629503, "grad_norm": 0.6254502277823403, "learning_rate": 5.011904847320642e-08, "loss": 0.2798, "step": 40050 }, { "epoch": 1.8761886916194312, "grad_norm": 0.5884100028920956, "learning_rate": 5.0081271171564526e-08, "loss": 0.2814, "step": 40051 }, { "epoch": 1.8762355366093595, "grad_norm": 0.5300396380914364, "learning_rate": 5.004350796858931e-08, "loss": 0.234, "step": 40052 }, { "epoch": 1.876282381599288, "grad_norm": 0.6297595179279009, "learning_rate": 5.000575886449754e-08, "loss": 0.2782, "step": 40053 }, { "epoch": 1.8763292265892164, "grad_norm": 0.5924200052383066, "learning_rate": 4.996802385950711e-08, "loss": 0.2719, "step": 40054 }, { "epoch": 1.8763760715791447, "grad_norm": 0.5876872425595457, "learning_rate": 4.993030295383478e-08, "loss": 0.2762, "step": 40055 }, { "epoch": 1.876422916569073, "grad_norm": 0.5926460281924688, "learning_rate": 4.989259614769787e-08, "loss": 0.2599, "step": 40056 }, { "epoch": 1.8764697615590014, "grad_norm": 0.577345731968222, "learning_rate": 4.985490344131316e-08, "loss": 0.27, "step": 40057 }, { "epoch": 1.8765166065489296, "grad_norm": 0.6402503417070755, "learning_rate": 4.981722483489743e-08, "loss": 0.2758, "step": 40058 }, { "epoch": 1.8765634515388578, "grad_norm": 0.6253448491509732, "learning_rate": 4.9779560328668264e-08, "loss": 0.2842, "step": 40059 }, { "epoch": 1.8766102965287863, "grad_norm": 0.623070062270877, "learning_rate": 4.974190992284161e-08, "loss": 0.2769, "step": 40060 }, { "epoch": 1.8766571415187145, "grad_norm": 0.6062104255143234, "learning_rate": 4.9704273617634525e-08, "loss": 0.2749, "step": 40061 }, { "epoch": 1.8767039865086428, "grad_norm": 0.6223384181009971, "learning_rate": 4.9666651413263214e-08, "loss": 0.2726, "step": 40062 }, { "epoch": 1.8767508314985712, "grad_norm": 0.5819194063354651, "learning_rate": 4.9629043309944725e-08, "loss": 0.2651, "step": 40063 }, { "epoch": 1.8767976764884997, "grad_norm": 0.5980821789078744, "learning_rate": 4.9591449307895e-08, "loss": 0.2606, "step": 40064 }, { "epoch": 1.8768445214784277, "grad_norm": 0.5412330622123782, "learning_rate": 4.955386940733054e-08, "loss": 0.2618, "step": 40065 }, { "epoch": 1.8768913664683562, "grad_norm": 0.5839282168283799, "learning_rate": 4.9516303608468095e-08, "loss": 0.2718, "step": 40066 }, { "epoch": 1.8769382114582847, "grad_norm": 0.6521404183608468, "learning_rate": 4.947875191152335e-08, "loss": 0.2953, "step": 40067 }, { "epoch": 1.876985056448213, "grad_norm": 0.6295940142194131, "learning_rate": 4.9441214316712224e-08, "loss": 0.2844, "step": 40068 }, { "epoch": 1.8770319014381411, "grad_norm": 0.639076254588082, "learning_rate": 4.940369082425095e-08, "loss": 0.2644, "step": 40069 }, { "epoch": 1.8770787464280696, "grad_norm": 0.6031930582869021, "learning_rate": 4.936618143435545e-08, "loss": 0.2765, "step": 40070 }, { "epoch": 1.8771255914179978, "grad_norm": 0.5606080794097035, "learning_rate": 4.932868614724168e-08, "loss": 0.2571, "step": 40071 }, { "epoch": 1.877172436407926, "grad_norm": 0.6474688686357064, "learning_rate": 4.929120496312556e-08, "loss": 0.2984, "step": 40072 }, { "epoch": 1.8772192813978545, "grad_norm": 0.56549878597615, "learning_rate": 4.9253737882222485e-08, "loss": 0.256, "step": 40073 }, { "epoch": 1.8772661263877828, "grad_norm": 0.5876966250999395, "learning_rate": 4.921628490474839e-08, "loss": 0.2641, "step": 40074 }, { "epoch": 1.877312971377711, "grad_norm": 0.5739217896744427, "learning_rate": 4.917884603091838e-08, "loss": 0.2759, "step": 40075 }, { "epoch": 1.8773598163676395, "grad_norm": 0.591055869594857, "learning_rate": 4.914142126094812e-08, "loss": 0.2644, "step": 40076 }, { "epoch": 1.877406661357568, "grad_norm": 0.6621543630548484, "learning_rate": 4.9104010595052706e-08, "loss": 0.2937, "step": 40077 }, { "epoch": 1.877453506347496, "grad_norm": 0.6383569844320507, "learning_rate": 4.906661403344809e-08, "loss": 0.2793, "step": 40078 }, { "epoch": 1.8775003513374244, "grad_norm": 0.5910754200292981, "learning_rate": 4.902923157634909e-08, "loss": 0.2545, "step": 40079 }, { "epoch": 1.877547196327353, "grad_norm": 0.6444411745515216, "learning_rate": 4.899186322397054e-08, "loss": 0.2821, "step": 40080 }, { "epoch": 1.8775940413172811, "grad_norm": 0.5951088474023088, "learning_rate": 4.895450897652837e-08, "loss": 0.2747, "step": 40081 }, { "epoch": 1.8776408863072094, "grad_norm": 0.5877818231003391, "learning_rate": 4.8917168834236306e-08, "loss": 0.263, "step": 40082 }, { "epoch": 1.8776877312971378, "grad_norm": 0.5852100820738859, "learning_rate": 4.887984279731001e-08, "loss": 0.2666, "step": 40083 }, { "epoch": 1.877734576287066, "grad_norm": 0.6085517504579824, "learning_rate": 4.88425308659643e-08, "loss": 0.2785, "step": 40084 }, { "epoch": 1.8777814212769943, "grad_norm": 0.6086198949029267, "learning_rate": 4.880523304041401e-08, "loss": 0.2814, "step": 40085 }, { "epoch": 1.8778282662669228, "grad_norm": 0.5870735452405296, "learning_rate": 4.8767949320873144e-08, "loss": 0.264, "step": 40086 }, { "epoch": 1.877875111256851, "grad_norm": 0.6045684507680915, "learning_rate": 4.8730679707556513e-08, "loss": 0.2668, "step": 40087 }, { "epoch": 1.8779219562467793, "grad_norm": 0.6156621711138465, "learning_rate": 4.869342420067924e-08, "loss": 0.2903, "step": 40088 }, { "epoch": 1.8779688012367077, "grad_norm": 0.5914407429524118, "learning_rate": 4.865618280045475e-08, "loss": 0.2733, "step": 40089 }, { "epoch": 1.8780156462266362, "grad_norm": 0.5406427140743707, "learning_rate": 4.861895550709789e-08, "loss": 0.2588, "step": 40090 }, { "epoch": 1.8780624912165644, "grad_norm": 0.5879548670136235, "learning_rate": 4.858174232082291e-08, "loss": 0.2667, "step": 40091 }, { "epoch": 1.8781093362064927, "grad_norm": 0.5945999096130352, "learning_rate": 4.854454324184382e-08, "loss": 0.2578, "step": 40092 }, { "epoch": 1.8781561811964211, "grad_norm": 0.6143364898522808, "learning_rate": 4.850735827037462e-08, "loss": 0.282, "step": 40093 }, { "epoch": 1.8782030261863494, "grad_norm": 0.6209259846352128, "learning_rate": 4.8470187406629846e-08, "loss": 0.2758, "step": 40094 }, { "epoch": 1.8782498711762776, "grad_norm": 0.5933507987172609, "learning_rate": 4.843303065082239e-08, "loss": 0.2681, "step": 40095 }, { "epoch": 1.878296716166206, "grad_norm": 0.5954662174490374, "learning_rate": 4.83958880031668e-08, "loss": 0.2756, "step": 40096 }, { "epoch": 1.8783435611561343, "grad_norm": 0.5945004958187109, "learning_rate": 4.835875946387708e-08, "loss": 0.2626, "step": 40097 }, { "epoch": 1.8783904061460626, "grad_norm": 0.5979902735063864, "learning_rate": 4.832164503316611e-08, "loss": 0.2757, "step": 40098 }, { "epoch": 1.878437251135991, "grad_norm": 0.5824140996166453, "learning_rate": 4.828454471124788e-08, "loss": 0.2508, "step": 40099 }, { "epoch": 1.8784840961259195, "grad_norm": 0.6296236646586612, "learning_rate": 4.824745849833612e-08, "loss": 0.2699, "step": 40100 }, { "epoch": 1.8785309411158475, "grad_norm": 0.6037403782452606, "learning_rate": 4.821038639464398e-08, "loss": 0.2581, "step": 40101 }, { "epoch": 1.878577786105776, "grad_norm": 0.6589456221514912, "learning_rate": 4.8173328400384635e-08, "loss": 0.2804, "step": 40102 }, { "epoch": 1.8786246310957044, "grad_norm": 0.6038790420537468, "learning_rate": 4.8136284515771517e-08, "loss": 0.2775, "step": 40103 }, { "epoch": 1.8786714760856327, "grad_norm": 0.6043610048462662, "learning_rate": 4.8099254741018066e-08, "loss": 0.2743, "step": 40104 }, { "epoch": 1.878718321075561, "grad_norm": 0.5782685127763997, "learning_rate": 4.806223907633717e-08, "loss": 0.2641, "step": 40105 }, { "epoch": 1.8787651660654894, "grad_norm": 0.60534417347771, "learning_rate": 4.802523752194144e-08, "loss": 0.2526, "step": 40106 }, { "epoch": 1.8788120110554176, "grad_norm": 0.632473722008837, "learning_rate": 4.7988250078044305e-08, "loss": 0.2899, "step": 40107 }, { "epoch": 1.8788588560453459, "grad_norm": 0.6072429376561188, "learning_rate": 4.795127674485894e-08, "loss": 0.261, "step": 40108 }, { "epoch": 1.8789057010352743, "grad_norm": 0.5722015587244673, "learning_rate": 4.791431752259712e-08, "loss": 0.256, "step": 40109 }, { "epoch": 1.8789525460252026, "grad_norm": 0.5872701631881891, "learning_rate": 4.787737241147256e-08, "loss": 0.2658, "step": 40110 }, { "epoch": 1.8789993910151308, "grad_norm": 0.5893819144211289, "learning_rate": 4.7840441411697024e-08, "loss": 0.2709, "step": 40111 }, { "epoch": 1.8790462360050593, "grad_norm": 0.6526071344633545, "learning_rate": 4.780352452348369e-08, "loss": 0.2777, "step": 40112 }, { "epoch": 1.8790930809949877, "grad_norm": 0.603121671540779, "learning_rate": 4.776662174704461e-08, "loss": 0.2543, "step": 40113 }, { "epoch": 1.8791399259849157, "grad_norm": 0.5989753496548583, "learning_rate": 4.77297330825921e-08, "loss": 0.2656, "step": 40114 }, { "epoch": 1.8791867709748442, "grad_norm": 0.5847491271642911, "learning_rate": 4.769285853033906e-08, "loss": 0.2633, "step": 40115 }, { "epoch": 1.8792336159647727, "grad_norm": 0.6302831495764352, "learning_rate": 4.765599809049698e-08, "loss": 0.2687, "step": 40116 }, { "epoch": 1.879280460954701, "grad_norm": 0.6037387194598192, "learning_rate": 4.7619151763278193e-08, "loss": 0.2596, "step": 40117 }, { "epoch": 1.8793273059446292, "grad_norm": 0.5778953295310384, "learning_rate": 4.758231954889475e-08, "loss": 0.267, "step": 40118 }, { "epoch": 1.8793741509345576, "grad_norm": 0.6287584936279459, "learning_rate": 4.7545501447558985e-08, "loss": 0.2914, "step": 40119 }, { "epoch": 1.8794209959244859, "grad_norm": 0.6039826212060172, "learning_rate": 4.750869745948211e-08, "loss": 0.2733, "step": 40120 }, { "epoch": 1.879467840914414, "grad_norm": 0.5969715570952491, "learning_rate": 4.7471907584876465e-08, "loss": 0.2759, "step": 40121 }, { "epoch": 1.8795146859043426, "grad_norm": 0.5781050179325355, "learning_rate": 4.743513182395354e-08, "loss": 0.272, "step": 40122 }, { "epoch": 1.8795615308942708, "grad_norm": 0.5883894719873619, "learning_rate": 4.739837017692511e-08, "loss": 0.2767, "step": 40123 }, { "epoch": 1.879608375884199, "grad_norm": 0.6047012489931327, "learning_rate": 4.736162264400268e-08, "loss": 0.296, "step": 40124 }, { "epoch": 1.8796552208741275, "grad_norm": 0.6205122493821817, "learning_rate": 4.7324889225397184e-08, "loss": 0.2812, "step": 40125 }, { "epoch": 1.879702065864056, "grad_norm": 0.6295907226045182, "learning_rate": 4.728816992132096e-08, "loss": 0.2835, "step": 40126 }, { "epoch": 1.8797489108539842, "grad_norm": 0.6065042286561886, "learning_rate": 4.7251464731984664e-08, "loss": 0.2793, "step": 40127 }, { "epoch": 1.8797957558439125, "grad_norm": 0.6147891261159427, "learning_rate": 4.72147736575998e-08, "loss": 0.278, "step": 40128 }, { "epoch": 1.879842600833841, "grad_norm": 0.5977716096771677, "learning_rate": 4.7178096698377307e-08, "loss": 0.2837, "step": 40129 }, { "epoch": 1.8798894458237692, "grad_norm": 0.6414397541306356, "learning_rate": 4.7141433854528687e-08, "loss": 0.2842, "step": 40130 }, { "epoch": 1.8799362908136974, "grad_norm": 0.5880228861519824, "learning_rate": 4.710478512626432e-08, "loss": 0.2731, "step": 40131 }, { "epoch": 1.8799831358036259, "grad_norm": 0.6211466694979526, "learning_rate": 4.706815051379543e-08, "loss": 0.2732, "step": 40132 }, { "epoch": 1.880029980793554, "grad_norm": 0.6202970233031146, "learning_rate": 4.7031530017332686e-08, "loss": 0.2673, "step": 40133 }, { "epoch": 1.8800768257834823, "grad_norm": 0.6545910357798845, "learning_rate": 4.69949236370873e-08, "loss": 0.2856, "step": 40134 }, { "epoch": 1.8801236707734108, "grad_norm": 0.5931357512276378, "learning_rate": 4.6958331373269386e-08, "loss": 0.274, "step": 40135 }, { "epoch": 1.8801705157633393, "grad_norm": 0.6204705051989882, "learning_rate": 4.692175322608988e-08, "loss": 0.28, "step": 40136 }, { "epoch": 1.8802173607532673, "grad_norm": 0.6101593813699488, "learning_rate": 4.6885189195758906e-08, "loss": 0.2726, "step": 40137 }, { "epoch": 1.8802642057431957, "grad_norm": 0.6039217306493588, "learning_rate": 4.684863928248712e-08, "loss": 0.2687, "step": 40138 }, { "epoch": 1.8803110507331242, "grad_norm": 0.5958863658146586, "learning_rate": 4.681210348648463e-08, "loss": 0.2789, "step": 40139 }, { "epoch": 1.8803578957230525, "grad_norm": 0.5572946612856772, "learning_rate": 4.677558180796238e-08, "loss": 0.258, "step": 40140 }, { "epoch": 1.8804047407129807, "grad_norm": 0.6069045192385857, "learning_rate": 4.6739074247129925e-08, "loss": 0.271, "step": 40141 }, { "epoch": 1.8804515857029092, "grad_norm": 0.568945290360588, "learning_rate": 4.6702580804197376e-08, "loss": 0.256, "step": 40142 }, { "epoch": 1.8804984306928374, "grad_norm": 0.6351533829523488, "learning_rate": 4.6666101479374834e-08, "loss": 0.3008, "step": 40143 }, { "epoch": 1.8805452756827656, "grad_norm": 0.6139350365556814, "learning_rate": 4.662963627287242e-08, "loss": 0.273, "step": 40144 }, { "epoch": 1.880592120672694, "grad_norm": 0.6064211786061176, "learning_rate": 4.659318518489969e-08, "loss": 0.2753, "step": 40145 }, { "epoch": 1.8806389656626223, "grad_norm": 0.6000198275803004, "learning_rate": 4.655674821566675e-08, "loss": 0.2729, "step": 40146 }, { "epoch": 1.8806858106525506, "grad_norm": 0.5553581145551153, "learning_rate": 4.652032536538259e-08, "loss": 0.2485, "step": 40147 }, { "epoch": 1.880732655642479, "grad_norm": 0.5749372661521535, "learning_rate": 4.648391663425761e-08, "loss": 0.2531, "step": 40148 }, { "epoch": 1.8807795006324075, "grad_norm": 0.6238743936555499, "learning_rate": 4.644752202250108e-08, "loss": 0.2718, "step": 40149 }, { "epoch": 1.8808263456223355, "grad_norm": 0.6047620084723331, "learning_rate": 4.641114153032228e-08, "loss": 0.285, "step": 40150 }, { "epoch": 1.880873190612264, "grad_norm": 0.6365142877573727, "learning_rate": 4.637477515793076e-08, "loss": 0.273, "step": 40151 }, { "epoch": 1.8809200356021925, "grad_norm": 0.5877596002180872, "learning_rate": 4.633842290553581e-08, "loss": 0.2681, "step": 40152 }, { "epoch": 1.8809668805921207, "grad_norm": 0.607682947973624, "learning_rate": 4.6302084773346415e-08, "loss": 0.2779, "step": 40153 }, { "epoch": 1.881013725582049, "grad_norm": 0.5943012800331143, "learning_rate": 4.626576076157158e-08, "loss": 0.2773, "step": 40154 }, { "epoch": 1.8810605705719774, "grad_norm": 0.5808672863319337, "learning_rate": 4.622945087042086e-08, "loss": 0.2582, "step": 40155 }, { "epoch": 1.8811074155619056, "grad_norm": 0.590305946997166, "learning_rate": 4.619315510010269e-08, "loss": 0.2692, "step": 40156 }, { "epoch": 1.8811542605518339, "grad_norm": 0.6085253035626742, "learning_rate": 4.615687345082637e-08, "loss": 0.2683, "step": 40157 }, { "epoch": 1.8812011055417623, "grad_norm": 0.5524665698438276, "learning_rate": 4.6120605922800597e-08, "loss": 0.2484, "step": 40158 }, { "epoch": 1.8812479505316906, "grad_norm": 0.5956880700920304, "learning_rate": 4.6084352516233836e-08, "loss": 0.2703, "step": 40159 }, { "epoch": 1.8812947955216188, "grad_norm": 0.5941373843564682, "learning_rate": 4.604811323133479e-08, "loss": 0.2677, "step": 40160 }, { "epoch": 1.8813416405115473, "grad_norm": 0.6275050864552129, "learning_rate": 4.6011888068311916e-08, "loss": 0.2809, "step": 40161 }, { "epoch": 1.8813884855014757, "grad_norm": 0.571376217328298, "learning_rate": 4.5975677027373935e-08, "loss": 0.2587, "step": 40162 }, { "epoch": 1.881435330491404, "grad_norm": 0.5873089584324417, "learning_rate": 4.5939480108729284e-08, "loss": 0.2669, "step": 40163 }, { "epoch": 1.8814821754813322, "grad_norm": 0.5843634668831713, "learning_rate": 4.5903297312586137e-08, "loss": 0.2586, "step": 40164 }, { "epoch": 1.8815290204712607, "grad_norm": 0.5990167625708339, "learning_rate": 4.5867128639152934e-08, "loss": 0.27, "step": 40165 }, { "epoch": 1.881575865461189, "grad_norm": 0.5894580125482536, "learning_rate": 4.583097408863729e-08, "loss": 0.2675, "step": 40166 }, { "epoch": 1.8816227104511172, "grad_norm": 0.5891623524338039, "learning_rate": 4.579483366124737e-08, "loss": 0.2745, "step": 40167 }, { "epoch": 1.8816695554410456, "grad_norm": 0.6021212880113042, "learning_rate": 4.5758707357191626e-08, "loss": 0.2762, "step": 40168 }, { "epoch": 1.8817164004309739, "grad_norm": 0.6335129951867011, "learning_rate": 4.572259517667737e-08, "loss": 0.2924, "step": 40169 }, { "epoch": 1.8817632454209021, "grad_norm": 0.5918521474875961, "learning_rate": 4.56864971199128e-08, "loss": 0.2706, "step": 40170 }, { "epoch": 1.8818100904108306, "grad_norm": 0.6336917583382032, "learning_rate": 4.5650413187106055e-08, "loss": 0.2699, "step": 40171 }, { "epoch": 1.881856935400759, "grad_norm": 0.6131197688911068, "learning_rate": 4.561434337846393e-08, "loss": 0.293, "step": 40172 }, { "epoch": 1.881903780390687, "grad_norm": 0.5908860321356837, "learning_rate": 4.557828769419431e-08, "loss": 0.2598, "step": 40173 }, { "epoch": 1.8819506253806155, "grad_norm": 0.6393252480406437, "learning_rate": 4.5542246134504806e-08, "loss": 0.2746, "step": 40174 }, { "epoch": 1.881997470370544, "grad_norm": 0.6033193867631802, "learning_rate": 4.5506218699602754e-08, "loss": 0.2705, "step": 40175 }, { "epoch": 1.8820443153604722, "grad_norm": 0.6009880529218058, "learning_rate": 4.5470205389695486e-08, "loss": 0.276, "step": 40176 }, { "epoch": 1.8820911603504005, "grad_norm": 0.6171190225843488, "learning_rate": 4.543420620499034e-08, "loss": 0.2639, "step": 40177 }, { "epoch": 1.882138005340329, "grad_norm": 0.6092626726964524, "learning_rate": 4.539822114569437e-08, "loss": 0.2842, "step": 40178 }, { "epoch": 1.8821848503302572, "grad_norm": 0.6135707248749895, "learning_rate": 4.53622502120149e-08, "loss": 0.2629, "step": 40179 }, { "epoch": 1.8822316953201854, "grad_norm": 0.6402767282460163, "learning_rate": 4.532629340415845e-08, "loss": 0.2832, "step": 40180 }, { "epoch": 1.8822785403101139, "grad_norm": 0.6082146305037045, "learning_rate": 4.529035072233234e-08, "loss": 0.2739, "step": 40181 }, { "epoch": 1.8823253853000421, "grad_norm": 0.6290183193583895, "learning_rate": 4.5254422166743086e-08, "loss": 0.2681, "step": 40182 }, { "epoch": 1.8823722302899704, "grad_norm": 0.5837109876795125, "learning_rate": 4.5218507737598006e-08, "loss": 0.2727, "step": 40183 }, { "epoch": 1.8824190752798988, "grad_norm": 0.6416153916388332, "learning_rate": 4.5182607435103334e-08, "loss": 0.2764, "step": 40184 }, { "epoch": 1.8824659202698273, "grad_norm": 0.6374985162383711, "learning_rate": 4.514672125946556e-08, "loss": 0.2788, "step": 40185 }, { "epoch": 1.8825127652597553, "grad_norm": 0.6231686221304712, "learning_rate": 4.5110849210891485e-08, "loss": 0.2788, "step": 40186 }, { "epoch": 1.8825596102496838, "grad_norm": 0.6371380253086493, "learning_rate": 4.5074991289587587e-08, "loss": 0.2806, "step": 40187 }, { "epoch": 1.8826064552396122, "grad_norm": 0.6076799498327249, "learning_rate": 4.503914749575983e-08, "loss": 0.2803, "step": 40188 }, { "epoch": 1.8826533002295405, "grad_norm": 0.5619849480997056, "learning_rate": 4.500331782961526e-08, "loss": 0.2643, "step": 40189 }, { "epoch": 1.8827001452194687, "grad_norm": 0.5690862096607059, "learning_rate": 4.496750229135899e-08, "loss": 0.2534, "step": 40190 }, { "epoch": 1.8827469902093972, "grad_norm": 0.6387863860888957, "learning_rate": 4.49317008811978e-08, "loss": 0.2735, "step": 40191 }, { "epoch": 1.8827938351993254, "grad_norm": 0.6211570694034694, "learning_rate": 4.4895913599337924e-08, "loss": 0.2678, "step": 40192 }, { "epoch": 1.8828406801892537, "grad_norm": 0.6115475041047452, "learning_rate": 4.486014044598475e-08, "loss": 0.2668, "step": 40193 }, { "epoch": 1.8828875251791821, "grad_norm": 0.5974255454342503, "learning_rate": 4.4824381421344487e-08, "loss": 0.2546, "step": 40194 }, { "epoch": 1.8829343701691104, "grad_norm": 0.6120411349014865, "learning_rate": 4.478863652562282e-08, "loss": 0.277, "step": 40195 }, { "epoch": 1.8829812151590386, "grad_norm": 0.588857730004493, "learning_rate": 4.4752905759025413e-08, "loss": 0.2704, "step": 40196 }, { "epoch": 1.883028060148967, "grad_norm": 0.5750786073061348, "learning_rate": 4.471718912175793e-08, "loss": 0.2558, "step": 40197 }, { "epoch": 1.8830749051388955, "grad_norm": 0.5940987669140199, "learning_rate": 4.468148661402577e-08, "loss": 0.2862, "step": 40198 }, { "epoch": 1.8831217501288238, "grad_norm": 0.6408823593471281, "learning_rate": 4.464579823603488e-08, "loss": 0.3059, "step": 40199 }, { "epoch": 1.883168595118752, "grad_norm": 0.5830786599096363, "learning_rate": 4.4610123987990086e-08, "loss": 0.2669, "step": 40200 }, { "epoch": 1.8832154401086805, "grad_norm": 0.5985662257949145, "learning_rate": 4.4574463870097063e-08, "loss": 0.2744, "step": 40201 }, { "epoch": 1.8832622850986087, "grad_norm": 0.6059273842427011, "learning_rate": 4.453881788256065e-08, "loss": 0.2695, "step": 40202 }, { "epoch": 1.883309130088537, "grad_norm": 0.5999543125516668, "learning_rate": 4.450318602558595e-08, "loss": 0.2698, "step": 40203 }, { "epoch": 1.8833559750784654, "grad_norm": 0.6487907878555009, "learning_rate": 4.4467568299378646e-08, "loss": 0.2629, "step": 40204 }, { "epoch": 1.8834028200683937, "grad_norm": 0.640005758692839, "learning_rate": 4.4431964704143014e-08, "loss": 0.2759, "step": 40205 }, { "epoch": 1.883449665058322, "grad_norm": 0.6368704481531156, "learning_rate": 4.439637524008444e-08, "loss": 0.2728, "step": 40206 }, { "epoch": 1.8834965100482504, "grad_norm": 0.5620528256143139, "learning_rate": 4.436079990740777e-08, "loss": 0.2576, "step": 40207 }, { "epoch": 1.8835433550381788, "grad_norm": 0.6275393496477006, "learning_rate": 4.4325238706317274e-08, "loss": 0.2804, "step": 40208 }, { "epoch": 1.8835902000281068, "grad_norm": 0.5712708410404167, "learning_rate": 4.428969163701752e-08, "loss": 0.2515, "step": 40209 }, { "epoch": 1.8836370450180353, "grad_norm": 0.6475140099366858, "learning_rate": 4.425415869971361e-08, "loss": 0.2635, "step": 40210 }, { "epoch": 1.8836838900079638, "grad_norm": 0.5926017229357954, "learning_rate": 4.4218639894609836e-08, "loss": 0.2616, "step": 40211 }, { "epoch": 1.883730734997892, "grad_norm": 0.5891853444705435, "learning_rate": 4.4183135221910475e-08, "loss": 0.2677, "step": 40212 }, { "epoch": 1.8837775799878202, "grad_norm": 0.6208600927757234, "learning_rate": 4.414764468182009e-08, "loss": 0.2598, "step": 40213 }, { "epoch": 1.8838244249777487, "grad_norm": 0.5915045435112316, "learning_rate": 4.411216827454268e-08, "loss": 0.265, "step": 40214 }, { "epoch": 1.883871269967677, "grad_norm": 0.5983072820112888, "learning_rate": 4.407670600028252e-08, "loss": 0.274, "step": 40215 }, { "epoch": 1.8839181149576052, "grad_norm": 0.6359541497880805, "learning_rate": 4.404125785924335e-08, "loss": 0.2778, "step": 40216 }, { "epoch": 1.8839649599475337, "grad_norm": 0.5983174965406857, "learning_rate": 4.400582385162971e-08, "loss": 0.2609, "step": 40217 }, { "epoch": 1.884011804937462, "grad_norm": 0.6298282984135716, "learning_rate": 4.397040397764507e-08, "loss": 0.2807, "step": 40218 }, { "epoch": 1.8840586499273901, "grad_norm": 0.6032272435491696, "learning_rate": 4.39349982374937e-08, "loss": 0.2778, "step": 40219 }, { "epoch": 1.8841054949173186, "grad_norm": 0.572072517921984, "learning_rate": 4.389960663137904e-08, "loss": 0.2672, "step": 40220 }, { "epoch": 1.884152339907247, "grad_norm": 0.5976488895027556, "learning_rate": 4.386422915950483e-08, "loss": 0.2763, "step": 40221 }, { "epoch": 1.884199184897175, "grad_norm": 0.58280778550312, "learning_rate": 4.382886582207452e-08, "loss": 0.2604, "step": 40222 }, { "epoch": 1.8842460298871035, "grad_norm": 0.6089217925232456, "learning_rate": 4.379351661929182e-08, "loss": 0.2696, "step": 40223 }, { "epoch": 1.884292874877032, "grad_norm": 0.5623843344773922, "learning_rate": 4.375818155136019e-08, "loss": 0.2548, "step": 40224 }, { "epoch": 1.8843397198669602, "grad_norm": 0.6042380363194655, "learning_rate": 4.37228606184828e-08, "loss": 0.2711, "step": 40225 }, { "epoch": 1.8843865648568885, "grad_norm": 0.5760583541592427, "learning_rate": 4.368755382086309e-08, "loss": 0.2613, "step": 40226 }, { "epoch": 1.884433409846817, "grad_norm": 0.6041388207846082, "learning_rate": 4.365226115870397e-08, "loss": 0.274, "step": 40227 }, { "epoch": 1.8844802548367452, "grad_norm": 0.6109037991615882, "learning_rate": 4.361698263220887e-08, "loss": 0.2794, "step": 40228 }, { "epoch": 1.8845270998266734, "grad_norm": 0.6513219200232592, "learning_rate": 4.3581718241580416e-08, "loss": 0.2658, "step": 40229 }, { "epoch": 1.884573944816602, "grad_norm": 0.601410112599377, "learning_rate": 4.3546467987021776e-08, "loss": 0.2747, "step": 40230 }, { "epoch": 1.8846207898065301, "grad_norm": 0.5959415280127137, "learning_rate": 4.3511231868736124e-08, "loss": 0.271, "step": 40231 }, { "epoch": 1.8846676347964584, "grad_norm": 0.5787588734495955, "learning_rate": 4.3476009886925795e-08, "loss": 0.2695, "step": 40232 }, { "epoch": 1.8847144797863868, "grad_norm": 0.5838971306677115, "learning_rate": 4.344080204179341e-08, "loss": 0.2561, "step": 40233 }, { "epoch": 1.8847613247763153, "grad_norm": 0.6350680358199913, "learning_rate": 4.3405608333542125e-08, "loss": 0.2825, "step": 40234 }, { "epoch": 1.8848081697662435, "grad_norm": 0.5726193571083245, "learning_rate": 4.337042876237374e-08, "loss": 0.262, "step": 40235 }, { "epoch": 1.8848550147561718, "grad_norm": 0.6125532809239183, "learning_rate": 4.333526332849114e-08, "loss": 0.2779, "step": 40236 }, { "epoch": 1.8849018597461002, "grad_norm": 0.5875039025384211, "learning_rate": 4.3300112032096944e-08, "loss": 0.2781, "step": 40237 }, { "epoch": 1.8849487047360285, "grad_norm": 0.5831401971754864, "learning_rate": 4.3264974873392926e-08, "loss": 0.2604, "step": 40238 }, { "epoch": 1.8849955497259567, "grad_norm": 0.5535605544414935, "learning_rate": 4.3229851852581436e-08, "loss": 0.2487, "step": 40239 }, { "epoch": 1.8850423947158852, "grad_norm": 0.6157652994003439, "learning_rate": 4.319474296986481e-08, "loss": 0.2577, "step": 40240 }, { "epoch": 1.8850892397058134, "grad_norm": 0.6304561381580361, "learning_rate": 4.315964822544483e-08, "loss": 0.2774, "step": 40241 }, { "epoch": 1.8851360846957417, "grad_norm": 0.586284903759854, "learning_rate": 4.312456761952355e-08, "loss": 0.26, "step": 40242 }, { "epoch": 1.8851829296856701, "grad_norm": 0.5704214202806811, "learning_rate": 4.3089501152303036e-08, "loss": 0.2617, "step": 40243 }, { "epoch": 1.8852297746755986, "grad_norm": 0.5974455977225603, "learning_rate": 4.30544488239848e-08, "loss": 0.2643, "step": 40244 }, { "epoch": 1.8852766196655266, "grad_norm": 0.5429020700355328, "learning_rate": 4.301941063477061e-08, "loss": 0.2476, "step": 40245 }, { "epoch": 1.885323464655455, "grad_norm": 0.6638817516885434, "learning_rate": 4.298438658486226e-08, "loss": 0.2828, "step": 40246 }, { "epoch": 1.8853703096453835, "grad_norm": 0.6131329734755407, "learning_rate": 4.294937667446125e-08, "loss": 0.2753, "step": 40247 }, { "epoch": 1.8854171546353118, "grad_norm": 0.6022392763921998, "learning_rate": 4.291438090376909e-08, "loss": 0.2828, "step": 40248 }, { "epoch": 1.88546399962524, "grad_norm": 0.617203380990746, "learning_rate": 4.287939927298701e-08, "loss": 0.2923, "step": 40249 }, { "epoch": 1.8855108446151685, "grad_norm": 0.5933173991059078, "learning_rate": 4.284443178231651e-08, "loss": 0.2781, "step": 40250 }, { "epoch": 1.8855576896050967, "grad_norm": 0.5819105563539039, "learning_rate": 4.280947843195854e-08, "loss": 0.2699, "step": 40251 }, { "epoch": 1.885604534595025, "grad_norm": 0.5655307607209074, "learning_rate": 4.277453922211433e-08, "loss": 0.2576, "step": 40252 }, { "epoch": 1.8856513795849534, "grad_norm": 0.5995415244851192, "learning_rate": 4.273961415298539e-08, "loss": 0.2606, "step": 40253 }, { "epoch": 1.8856982245748817, "grad_norm": 0.592358746427583, "learning_rate": 4.27047032247721e-08, "loss": 0.2753, "step": 40254 }, { "epoch": 1.88574506956481, "grad_norm": 0.5904452844855785, "learning_rate": 4.266980643767571e-08, "loss": 0.2647, "step": 40255 }, { "epoch": 1.8857919145547384, "grad_norm": 0.548220845973998, "learning_rate": 4.2634923791897154e-08, "loss": 0.2613, "step": 40256 }, { "epoch": 1.8858387595446668, "grad_norm": 0.6002384852727, "learning_rate": 4.260005528763683e-08, "loss": 0.2793, "step": 40257 }, { "epoch": 1.8858856045345949, "grad_norm": 0.6473344304988704, "learning_rate": 4.256520092509542e-08, "loss": 0.287, "step": 40258 }, { "epoch": 1.8859324495245233, "grad_norm": 0.5613228507172874, "learning_rate": 4.253036070447358e-08, "loss": 0.2532, "step": 40259 }, { "epoch": 1.8859792945144518, "grad_norm": 0.6472713110322843, "learning_rate": 4.2495534625972e-08, "loss": 0.2749, "step": 40260 }, { "epoch": 1.88602613950438, "grad_norm": 0.5985732321033989, "learning_rate": 4.246072268979079e-08, "loss": 0.2645, "step": 40261 }, { "epoch": 1.8860729844943083, "grad_norm": 0.5589828764064674, "learning_rate": 4.24259248961309e-08, "loss": 0.2653, "step": 40262 }, { "epoch": 1.8861198294842367, "grad_norm": 0.5956980897823214, "learning_rate": 4.239114124519189e-08, "loss": 0.2666, "step": 40263 }, { "epoch": 1.886166674474165, "grad_norm": 0.5837691936571514, "learning_rate": 4.235637173717389e-08, "loss": 0.271, "step": 40264 }, { "epoch": 1.8862135194640932, "grad_norm": 0.5746471793236156, "learning_rate": 4.2321616372277555e-08, "loss": 0.265, "step": 40265 }, { "epoch": 1.8862603644540217, "grad_norm": 0.6318377808088146, "learning_rate": 4.228687515070246e-08, "loss": 0.2775, "step": 40266 }, { "epoch": 1.88630720944395, "grad_norm": 0.5809127274828967, "learning_rate": 4.225214807264871e-08, "loss": 0.2686, "step": 40267 }, { "epoch": 1.8863540544338782, "grad_norm": 0.645463990967441, "learning_rate": 4.221743513831616e-08, "loss": 0.2886, "step": 40268 }, { "epoch": 1.8864008994238066, "grad_norm": 0.6165028217553471, "learning_rate": 4.2182736347904373e-08, "loss": 0.2875, "step": 40269 }, { "epoch": 1.886447744413735, "grad_norm": 0.5991139791731289, "learning_rate": 4.214805170161346e-08, "loss": 0.2815, "step": 40270 }, { "epoch": 1.8864945894036633, "grad_norm": 0.5701874340021481, "learning_rate": 4.21133811996427e-08, "loss": 0.2627, "step": 40271 }, { "epoch": 1.8865414343935916, "grad_norm": 0.618820356117372, "learning_rate": 4.207872484219139e-08, "loss": 0.281, "step": 40272 }, { "epoch": 1.88658827938352, "grad_norm": 0.6253601235021615, "learning_rate": 4.204408262945936e-08, "loss": 0.2969, "step": 40273 }, { "epoch": 1.8866351243734483, "grad_norm": 0.6227203996143739, "learning_rate": 4.200945456164618e-08, "loss": 0.2673, "step": 40274 }, { "epoch": 1.8866819693633765, "grad_norm": 0.6560742960336623, "learning_rate": 4.1974840638950296e-08, "loss": 0.2769, "step": 40275 }, { "epoch": 1.886728814353305, "grad_norm": 0.6022687412433155, "learning_rate": 4.194024086157183e-08, "loss": 0.2698, "step": 40276 }, { "epoch": 1.8867756593432332, "grad_norm": 0.6250523006825512, "learning_rate": 4.190565522970924e-08, "loss": 0.2746, "step": 40277 }, { "epoch": 1.8868225043331615, "grad_norm": 0.6461457627778003, "learning_rate": 4.18710837435618e-08, "loss": 0.2965, "step": 40278 }, { "epoch": 1.88686934932309, "grad_norm": 0.6107670894677012, "learning_rate": 4.183652640332852e-08, "loss": 0.2661, "step": 40279 }, { "epoch": 1.8869161943130184, "grad_norm": 0.6223079556128666, "learning_rate": 4.180198320920814e-08, "loss": 0.273, "step": 40280 }, { "epoch": 1.8869630393029464, "grad_norm": 0.603914583904149, "learning_rate": 4.176745416139938e-08, "loss": 0.2846, "step": 40281 }, { "epoch": 1.8870098842928749, "grad_norm": 0.5979481643482186, "learning_rate": 4.1732939260101244e-08, "loss": 0.2806, "step": 40282 }, { "epoch": 1.8870567292828033, "grad_norm": 0.6070683190897754, "learning_rate": 4.1698438505512196e-08, "loss": 0.2721, "step": 40283 }, { "epoch": 1.8871035742727316, "grad_norm": 0.5983377715547368, "learning_rate": 4.166395189783068e-08, "loss": 0.2641, "step": 40284 }, { "epoch": 1.8871504192626598, "grad_norm": 0.5691426688325157, "learning_rate": 4.162947943725515e-08, "loss": 0.2561, "step": 40285 }, { "epoch": 1.8871972642525883, "grad_norm": 0.6055881616891486, "learning_rate": 4.1595021123984335e-08, "loss": 0.2751, "step": 40286 }, { "epoch": 1.8872441092425165, "grad_norm": 0.627419630741509, "learning_rate": 4.1560576958216134e-08, "loss": 0.2741, "step": 40287 }, { "epoch": 1.8872909542324448, "grad_norm": 0.6043962259964, "learning_rate": 4.1526146940148726e-08, "loss": 0.2874, "step": 40288 }, { "epoch": 1.8873377992223732, "grad_norm": 0.5412372151886323, "learning_rate": 4.1491731069980554e-08, "loss": 0.2653, "step": 40289 }, { "epoch": 1.8873846442123015, "grad_norm": 0.5699878627401409, "learning_rate": 4.1457329347909804e-08, "loss": 0.2626, "step": 40290 }, { "epoch": 1.8874314892022297, "grad_norm": 0.6180187232792778, "learning_rate": 4.1422941774134086e-08, "loss": 0.2708, "step": 40291 }, { "epoch": 1.8874783341921582, "grad_norm": 0.6589593983267731, "learning_rate": 4.13885683488513e-08, "loss": 0.2762, "step": 40292 }, { "epoch": 1.8875251791820866, "grad_norm": 0.6099314851822026, "learning_rate": 4.1354209072259346e-08, "loss": 0.2791, "step": 40293 }, { "epoch": 1.8875720241720146, "grad_norm": 0.5911105362320422, "learning_rate": 4.1319863944555836e-08, "loss": 0.276, "step": 40294 }, { "epoch": 1.887618869161943, "grad_norm": 0.5864646577921003, "learning_rate": 4.128553296593868e-08, "loss": 0.2725, "step": 40295 }, { "epoch": 1.8876657141518716, "grad_norm": 0.5819839137639047, "learning_rate": 4.125121613660521e-08, "loss": 0.2596, "step": 40296 }, { "epoch": 1.8877125591417998, "grad_norm": 0.6160306384835145, "learning_rate": 4.1216913456753325e-08, "loss": 0.2814, "step": 40297 }, { "epoch": 1.887759404131728, "grad_norm": 0.6290771070171015, "learning_rate": 4.1182624926579815e-08, "loss": 0.2906, "step": 40298 }, { "epoch": 1.8878062491216565, "grad_norm": 0.6079832097596122, "learning_rate": 4.1148350546282304e-08, "loss": 0.2581, "step": 40299 }, { "epoch": 1.8878530941115848, "grad_norm": 0.5951358205083127, "learning_rate": 4.111409031605812e-08, "loss": 0.2743, "step": 40300 }, { "epoch": 1.887899939101513, "grad_norm": 0.6428683194833741, "learning_rate": 4.107984423610434e-08, "loss": 0.2652, "step": 40301 }, { "epoch": 1.8879467840914415, "grad_norm": 0.5525831181973202, "learning_rate": 4.1045612306617746e-08, "loss": 0.2613, "step": 40302 }, { "epoch": 1.8879936290813697, "grad_norm": 0.6263012779559914, "learning_rate": 4.101139452779595e-08, "loss": 0.2682, "step": 40303 }, { "epoch": 1.888040474071298, "grad_norm": 0.6138508196109363, "learning_rate": 4.0977190899835484e-08, "loss": 0.2639, "step": 40304 }, { "epoch": 1.8880873190612264, "grad_norm": 0.6011992219588144, "learning_rate": 4.094300142293339e-08, "loss": 0.2705, "step": 40305 }, { "epoch": 1.8881341640511549, "grad_norm": 0.6652507676978932, "learning_rate": 4.090882609728591e-08, "loss": 0.2813, "step": 40306 }, { "epoch": 1.888181009041083, "grad_norm": 0.5684527520610536, "learning_rate": 4.087466492309011e-08, "loss": 0.2614, "step": 40307 }, { "epoch": 1.8882278540310113, "grad_norm": 0.5555399769267504, "learning_rate": 4.0840517900542774e-08, "loss": 0.2493, "step": 40308 }, { "epoch": 1.8882746990209398, "grad_norm": 0.6016669707745519, "learning_rate": 4.0806385029839854e-08, "loss": 0.2743, "step": 40309 }, { "epoch": 1.888321544010868, "grad_norm": 0.5893688616752382, "learning_rate": 4.077226631117842e-08, "loss": 0.2648, "step": 40310 }, { "epoch": 1.8883683890007963, "grad_norm": 0.59800493265255, "learning_rate": 4.0738161744754426e-08, "loss": 0.2601, "step": 40311 }, { "epoch": 1.8884152339907248, "grad_norm": 0.6085676235878135, "learning_rate": 4.0704071330764094e-08, "loss": 0.2775, "step": 40312 }, { "epoch": 1.888462078980653, "grad_norm": 0.5950483558716807, "learning_rate": 4.0669995069403664e-08, "loss": 0.2892, "step": 40313 }, { "epoch": 1.8885089239705812, "grad_norm": 0.5936056022207269, "learning_rate": 4.063593296086937e-08, "loss": 0.2626, "step": 40314 }, { "epoch": 1.8885557689605097, "grad_norm": 0.5736307067134802, "learning_rate": 4.060188500535717e-08, "loss": 0.2599, "step": 40315 }, { "epoch": 1.8886026139504382, "grad_norm": 0.5666019803804977, "learning_rate": 4.056785120306273e-08, "loss": 0.2634, "step": 40316 }, { "epoch": 1.8886494589403662, "grad_norm": 0.597711090602163, "learning_rate": 4.0533831554182555e-08, "loss": 0.2734, "step": 40317 }, { "epoch": 1.8886963039302946, "grad_norm": 0.5870493151133179, "learning_rate": 4.0499826058911786e-08, "loss": 0.2598, "step": 40318 }, { "epoch": 1.888743148920223, "grad_norm": 0.5549533779287056, "learning_rate": 4.046583471744664e-08, "loss": 0.2518, "step": 40319 }, { "epoch": 1.8887899939101513, "grad_norm": 0.5617233995914077, "learning_rate": 4.043185752998197e-08, "loss": 0.2665, "step": 40320 }, { "epoch": 1.8888368389000796, "grad_norm": 0.5939426131828438, "learning_rate": 4.0397894496714e-08, "loss": 0.2558, "step": 40321 }, { "epoch": 1.888883683890008, "grad_norm": 0.5829800174455141, "learning_rate": 4.036394561783785e-08, "loss": 0.2736, "step": 40322 }, { "epoch": 1.8889305288799363, "grad_norm": 0.6119572697465688, "learning_rate": 4.033001089354921e-08, "loss": 0.2798, "step": 40323 }, { "epoch": 1.8889773738698645, "grad_norm": 0.5961453324238569, "learning_rate": 4.029609032404291e-08, "loss": 0.2653, "step": 40324 }, { "epoch": 1.889024218859793, "grad_norm": 0.5432496111461272, "learning_rate": 4.026218390951464e-08, "loss": 0.2405, "step": 40325 }, { "epoch": 1.8890710638497212, "grad_norm": 0.5955489669120012, "learning_rate": 4.022829165015896e-08, "loss": 0.2541, "step": 40326 }, { "epoch": 1.8891179088396495, "grad_norm": 0.6125755145702741, "learning_rate": 4.019441354617126e-08, "loss": 0.2782, "step": 40327 }, { "epoch": 1.889164753829578, "grad_norm": 0.5622891639764037, "learning_rate": 4.016054959774668e-08, "loss": 0.26, "step": 40328 }, { "epoch": 1.8892115988195064, "grad_norm": 0.5810217090834674, "learning_rate": 4.012669980507977e-08, "loss": 0.2619, "step": 40329 }, { "epoch": 1.8892584438094344, "grad_norm": 0.6013241660177073, "learning_rate": 4.0092864168365384e-08, "loss": 0.2747, "step": 40330 }, { "epoch": 1.8893052887993629, "grad_norm": 0.6294335331164342, "learning_rate": 4.0059042687798365e-08, "loss": 0.2865, "step": 40331 }, { "epoch": 1.8893521337892913, "grad_norm": 0.6138042665830638, "learning_rate": 4.002523536357328e-08, "loss": 0.2702, "step": 40332 }, { "epoch": 1.8893989787792196, "grad_norm": 0.6082054861135616, "learning_rate": 3.9991442195884686e-08, "loss": 0.2753, "step": 40333 }, { "epoch": 1.8894458237691478, "grad_norm": 0.5781017432341083, "learning_rate": 3.995766318492689e-08, "loss": 0.2601, "step": 40334 }, { "epoch": 1.8894926687590763, "grad_norm": 0.6251053197226489, "learning_rate": 3.992389833089472e-08, "loss": 0.2762, "step": 40335 }, { "epoch": 1.8895395137490045, "grad_norm": 0.5718793307720003, "learning_rate": 3.989014763398191e-08, "loss": 0.255, "step": 40336 }, { "epoch": 1.8895863587389328, "grad_norm": 0.5678439939225505, "learning_rate": 3.985641109438304e-08, "loss": 0.2666, "step": 40337 }, { "epoch": 1.8896332037288612, "grad_norm": 0.6373330349566904, "learning_rate": 3.982268871229211e-08, "loss": 0.2792, "step": 40338 }, { "epoch": 1.8896800487187895, "grad_norm": 0.5664366455617353, "learning_rate": 3.9788980487903696e-08, "loss": 0.2641, "step": 40339 }, { "epoch": 1.8897268937087177, "grad_norm": 0.5931474187700934, "learning_rate": 3.975528642141097e-08, "loss": 0.2564, "step": 40340 }, { "epoch": 1.8897737386986462, "grad_norm": 0.6008998975763916, "learning_rate": 3.9721606513008214e-08, "loss": 0.2788, "step": 40341 }, { "epoch": 1.8898205836885746, "grad_norm": 0.6177481134382778, "learning_rate": 3.9687940762889454e-08, "loss": 0.2838, "step": 40342 }, { "epoch": 1.8898674286785029, "grad_norm": 0.5617723514715379, "learning_rate": 3.965428917124786e-08, "loss": 0.2429, "step": 40343 }, { "epoch": 1.8899142736684311, "grad_norm": 0.6161675164067651, "learning_rate": 3.962065173827773e-08, "loss": 0.2762, "step": 40344 }, { "epoch": 1.8899611186583596, "grad_norm": 0.5504435952485653, "learning_rate": 3.958702846417223e-08, "loss": 0.2426, "step": 40345 }, { "epoch": 1.8900079636482878, "grad_norm": 0.5947027603114893, "learning_rate": 3.95534193491251e-08, "loss": 0.2566, "step": 40346 }, { "epoch": 1.890054808638216, "grad_norm": 0.6423076428968003, "learning_rate": 3.95198243933298e-08, "loss": 0.2908, "step": 40347 }, { "epoch": 1.8901016536281445, "grad_norm": 0.6068581080237563, "learning_rate": 3.948624359697922e-08, "loss": 0.2639, "step": 40348 }, { "epoch": 1.8901484986180728, "grad_norm": 0.6520575199179479, "learning_rate": 3.945267696026683e-08, "loss": 0.2817, "step": 40349 }, { "epoch": 1.890195343608001, "grad_norm": 0.5602202468228512, "learning_rate": 3.9419124483385805e-08, "loss": 0.257, "step": 40350 }, { "epoch": 1.8902421885979295, "grad_norm": 0.6233778298908084, "learning_rate": 3.9385586166529324e-08, "loss": 0.2808, "step": 40351 }, { "epoch": 1.890289033587858, "grad_norm": 0.5644657060841499, "learning_rate": 3.935206200989056e-08, "loss": 0.2616, "step": 40352 }, { "epoch": 1.890335878577786, "grad_norm": 0.6512245659301484, "learning_rate": 3.931855201366214e-08, "loss": 0.2887, "step": 40353 }, { "epoch": 1.8903827235677144, "grad_norm": 0.5903228249299588, "learning_rate": 3.9285056178036964e-08, "loss": 0.2841, "step": 40354 }, { "epoch": 1.8904295685576429, "grad_norm": 0.6152306974876374, "learning_rate": 3.925157450320794e-08, "loss": 0.2753, "step": 40355 }, { "epoch": 1.8904764135475711, "grad_norm": 0.5388875351033188, "learning_rate": 3.92181069893674e-08, "loss": 0.2501, "step": 40356 }, { "epoch": 1.8905232585374994, "grad_norm": 0.6213459521054912, "learning_rate": 3.918465363670798e-08, "loss": 0.2824, "step": 40357 }, { "epoch": 1.8905701035274278, "grad_norm": 0.5690569757532786, "learning_rate": 3.915121444542258e-08, "loss": 0.2569, "step": 40358 }, { "epoch": 1.890616948517356, "grad_norm": 0.5759715641458328, "learning_rate": 3.911778941570354e-08, "loss": 0.2668, "step": 40359 }, { "epoch": 1.8906637935072843, "grad_norm": 0.5722125119170957, "learning_rate": 3.9084378547742944e-08, "loss": 0.266, "step": 40360 }, { "epoch": 1.8907106384972128, "grad_norm": 0.6083636451981531, "learning_rate": 3.905098184173367e-08, "loss": 0.2578, "step": 40361 }, { "epoch": 1.890757483487141, "grad_norm": 0.6097239015890487, "learning_rate": 3.901759929786697e-08, "loss": 0.2848, "step": 40362 }, { "epoch": 1.8908043284770693, "grad_norm": 0.5884615663465517, "learning_rate": 3.898423091633546e-08, "loss": 0.2707, "step": 40363 }, { "epoch": 1.8908511734669977, "grad_norm": 0.6177206658565015, "learning_rate": 3.895087669733122e-08, "loss": 0.2821, "step": 40364 }, { "epoch": 1.8908980184569262, "grad_norm": 0.5622871740192593, "learning_rate": 3.89175366410463e-08, "loss": 0.2594, "step": 40365 }, { "epoch": 1.8909448634468542, "grad_norm": 0.5898904423559164, "learning_rate": 3.888421074767224e-08, "loss": 0.2856, "step": 40366 }, { "epoch": 1.8909917084367827, "grad_norm": 0.5706556960018855, "learning_rate": 3.88508990174008e-08, "loss": 0.254, "step": 40367 }, { "epoch": 1.8910385534267111, "grad_norm": 0.5736384236580369, "learning_rate": 3.881760145042435e-08, "loss": 0.249, "step": 40368 }, { "epoch": 1.8910853984166394, "grad_norm": 0.6124634350430472, "learning_rate": 3.8784318046933554e-08, "loss": 0.2767, "step": 40369 }, { "epoch": 1.8911322434065676, "grad_norm": 0.5865550032909472, "learning_rate": 3.875104880712049e-08, "loss": 0.2525, "step": 40370 }, { "epoch": 1.891179088396496, "grad_norm": 0.6053376498552681, "learning_rate": 3.871779373117668e-08, "loss": 0.2706, "step": 40371 }, { "epoch": 1.8912259333864243, "grad_norm": 0.5966941657517731, "learning_rate": 3.868455281929306e-08, "loss": 0.2625, "step": 40372 }, { "epoch": 1.8912727783763525, "grad_norm": 0.5797577948781261, "learning_rate": 3.865132607166145e-08, "loss": 0.271, "step": 40373 }, { "epoch": 1.891319623366281, "grad_norm": 0.5863674654666592, "learning_rate": 3.861811348847277e-08, "loss": 0.2618, "step": 40374 }, { "epoch": 1.8913664683562093, "grad_norm": 0.5880975024854411, "learning_rate": 3.8584915069918295e-08, "loss": 0.2603, "step": 40375 }, { "epoch": 1.8914133133461375, "grad_norm": 0.6676627661196817, "learning_rate": 3.855173081618868e-08, "loss": 0.3041, "step": 40376 }, { "epoch": 1.891460158336066, "grad_norm": 0.6193423059741557, "learning_rate": 3.851856072747545e-08, "loss": 0.2713, "step": 40377 }, { "epoch": 1.8915070033259944, "grad_norm": 0.5957885863698499, "learning_rate": 3.848540480396928e-08, "loss": 0.2507, "step": 40378 }, { "epoch": 1.8915538483159227, "grad_norm": 0.6079739295169242, "learning_rate": 3.845226304586058e-08, "loss": 0.2629, "step": 40379 }, { "epoch": 1.891600693305851, "grad_norm": 0.6177446381797697, "learning_rate": 3.841913545334086e-08, "loss": 0.2797, "step": 40380 }, { "epoch": 1.8916475382957794, "grad_norm": 0.6093253437581266, "learning_rate": 3.8386022026600247e-08, "loss": 0.2602, "step": 40381 }, { "epoch": 1.8916943832857076, "grad_norm": 0.6232386761648034, "learning_rate": 3.835292276582914e-08, "loss": 0.2711, "step": 40382 }, { "epoch": 1.8917412282756358, "grad_norm": 0.6116905546215793, "learning_rate": 3.8319837671218215e-08, "loss": 0.2631, "step": 40383 }, { "epoch": 1.8917880732655643, "grad_norm": 0.6003800702228456, "learning_rate": 3.828676674295817e-08, "loss": 0.274, "step": 40384 }, { "epoch": 1.8918349182554925, "grad_norm": 0.6049432386297338, "learning_rate": 3.825370998123884e-08, "loss": 0.2767, "step": 40385 }, { "epoch": 1.8918817632454208, "grad_norm": 0.6337651746287757, "learning_rate": 3.822066738625063e-08, "loss": 0.27, "step": 40386 }, { "epoch": 1.8919286082353493, "grad_norm": 0.5935671832870868, "learning_rate": 3.818763895818395e-08, "loss": 0.2735, "step": 40387 }, { "epoch": 1.8919754532252777, "grad_norm": 0.6071022658601368, "learning_rate": 3.815462469722864e-08, "loss": 0.273, "step": 40388 }, { "epoch": 1.8920222982152057, "grad_norm": 0.6155000946244367, "learning_rate": 3.812162460357455e-08, "loss": 0.2631, "step": 40389 }, { "epoch": 1.8920691432051342, "grad_norm": 0.6001615120701504, "learning_rate": 3.808863867741208e-08, "loss": 0.2594, "step": 40390 }, { "epoch": 1.8921159881950627, "grad_norm": 0.5723967411395292, "learning_rate": 3.8055666918930255e-08, "loss": 0.2644, "step": 40391 }, { "epoch": 1.892162833184991, "grad_norm": 0.597719042376822, "learning_rate": 3.802270932831947e-08, "loss": 0.273, "step": 40392 }, { "epoch": 1.8922096781749191, "grad_norm": 0.6050013173757057, "learning_rate": 3.79897659057693e-08, "loss": 0.2769, "step": 40393 }, { "epoch": 1.8922565231648476, "grad_norm": 0.5812071977949202, "learning_rate": 3.795683665146904e-08, "loss": 0.2612, "step": 40394 }, { "epoch": 1.8923033681547758, "grad_norm": 0.5956982185182372, "learning_rate": 3.7923921565608534e-08, "loss": 0.2849, "step": 40395 }, { "epoch": 1.892350213144704, "grad_norm": 0.6316014677662394, "learning_rate": 3.789102064837708e-08, "loss": 0.2849, "step": 40396 }, { "epoch": 1.8923970581346325, "grad_norm": 0.6370527981224171, "learning_rate": 3.7858133899963957e-08, "loss": 0.2716, "step": 40397 }, { "epoch": 1.8924439031245608, "grad_norm": 0.6056983940928748, "learning_rate": 3.782526132055819e-08, "loss": 0.27, "step": 40398 }, { "epoch": 1.892490748114489, "grad_norm": 0.6037587309061349, "learning_rate": 3.7792402910349356e-08, "loss": 0.2652, "step": 40399 }, { "epoch": 1.8925375931044175, "grad_norm": 0.594171338399054, "learning_rate": 3.7759558669526466e-08, "loss": 0.2647, "step": 40400 }, { "epoch": 1.892584438094346, "grad_norm": 0.6546258484875942, "learning_rate": 3.7726728598278254e-08, "loss": 0.2845, "step": 40401 }, { "epoch": 1.892631283084274, "grad_norm": 0.6246261582946657, "learning_rate": 3.769391269679401e-08, "loss": 0.2892, "step": 40402 }, { "epoch": 1.8926781280742024, "grad_norm": 0.6029495019964927, "learning_rate": 3.766111096526248e-08, "loss": 0.2668, "step": 40403 }, { "epoch": 1.892724973064131, "grad_norm": 0.5733090096547426, "learning_rate": 3.76283234038724e-08, "loss": 0.2797, "step": 40404 }, { "epoch": 1.8927718180540591, "grad_norm": 0.6100958766909199, "learning_rate": 3.759555001281223e-08, "loss": 0.2761, "step": 40405 }, { "epoch": 1.8928186630439874, "grad_norm": 0.5942019509458, "learning_rate": 3.756279079227071e-08, "loss": 0.2661, "step": 40406 }, { "epoch": 1.8928655080339158, "grad_norm": 0.605770950840022, "learning_rate": 3.753004574243657e-08, "loss": 0.2751, "step": 40407 }, { "epoch": 1.892912353023844, "grad_norm": 0.6476580818196057, "learning_rate": 3.749731486349828e-08, "loss": 0.2705, "step": 40408 }, { "epoch": 1.8929591980137723, "grad_norm": 0.6093399739636984, "learning_rate": 3.7464598155643736e-08, "loss": 0.2656, "step": 40409 }, { "epoch": 1.8930060430037008, "grad_norm": 0.599374213591509, "learning_rate": 3.743189561906169e-08, "loss": 0.2768, "step": 40410 }, { "epoch": 1.893052887993629, "grad_norm": 0.5832208185266539, "learning_rate": 3.739920725394003e-08, "loss": 0.2617, "step": 40411 }, { "epoch": 1.8930997329835573, "grad_norm": 0.6075459193031452, "learning_rate": 3.736653306046695e-08, "loss": 0.2598, "step": 40412 }, { "epoch": 1.8931465779734857, "grad_norm": 0.5580512503263682, "learning_rate": 3.733387303883035e-08, "loss": 0.2576, "step": 40413 }, { "epoch": 1.8931934229634142, "grad_norm": 0.6032305869572588, "learning_rate": 3.730122718921869e-08, "loss": 0.2765, "step": 40414 }, { "epoch": 1.8932402679533424, "grad_norm": 0.5734041505754289, "learning_rate": 3.726859551181933e-08, "loss": 0.2605, "step": 40415 }, { "epoch": 1.8932871129432707, "grad_norm": 0.6292938569295998, "learning_rate": 3.723597800682016e-08, "loss": 0.2841, "step": 40416 }, { "epoch": 1.8933339579331991, "grad_norm": 0.6111068106651929, "learning_rate": 3.72033746744091e-08, "loss": 0.254, "step": 40417 }, { "epoch": 1.8933808029231274, "grad_norm": 0.632560431034984, "learning_rate": 3.717078551477349e-08, "loss": 0.2739, "step": 40418 }, { "epoch": 1.8934276479130556, "grad_norm": 0.5927621856040881, "learning_rate": 3.713821052810096e-08, "loss": 0.2622, "step": 40419 }, { "epoch": 1.893474492902984, "grad_norm": 0.6188661799728498, "learning_rate": 3.710564971457914e-08, "loss": 0.2624, "step": 40420 }, { "epoch": 1.8935213378929123, "grad_norm": 0.5803760317096844, "learning_rate": 3.707310307439538e-08, "loss": 0.2507, "step": 40421 }, { "epoch": 1.8935681828828406, "grad_norm": 0.6186371705201255, "learning_rate": 3.704057060773647e-08, "loss": 0.2804, "step": 40422 }, { "epoch": 1.893615027872769, "grad_norm": 0.5703354563442944, "learning_rate": 3.7008052314790596e-08, "loss": 0.2625, "step": 40423 }, { "epoch": 1.8936618728626975, "grad_norm": 0.621691883950183, "learning_rate": 3.6975548195744e-08, "loss": 0.2793, "step": 40424 }, { "epoch": 1.8937087178526255, "grad_norm": 0.611346155156504, "learning_rate": 3.6943058250784036e-08, "loss": 0.2903, "step": 40425 }, { "epoch": 1.893755562842554, "grad_norm": 0.5850566163358809, "learning_rate": 3.691058248009777e-08, "loss": 0.2616, "step": 40426 }, { "epoch": 1.8938024078324824, "grad_norm": 0.5937943144946671, "learning_rate": 3.687812088387199e-08, "loss": 0.2693, "step": 40427 }, { "epoch": 1.8938492528224107, "grad_norm": 0.5696349287937594, "learning_rate": 3.6845673462293506e-08, "loss": 0.2628, "step": 40428 }, { "epoch": 1.893896097812339, "grad_norm": 0.6048210114171999, "learning_rate": 3.6813240215549104e-08, "loss": 0.2759, "step": 40429 }, { "epoch": 1.8939429428022674, "grad_norm": 0.5915077038177651, "learning_rate": 3.678082114382558e-08, "loss": 0.2521, "step": 40430 }, { "epoch": 1.8939897877921956, "grad_norm": 0.5849496878092286, "learning_rate": 3.674841624730918e-08, "loss": 0.2695, "step": 40431 }, { "epoch": 1.8940366327821239, "grad_norm": 0.6263068516787061, "learning_rate": 3.6716025526186694e-08, "loss": 0.2771, "step": 40432 }, { "epoch": 1.8940834777720523, "grad_norm": 0.6195009626602951, "learning_rate": 3.668364898064408e-08, "loss": 0.2816, "step": 40433 }, { "epoch": 1.8941303227619806, "grad_norm": 0.5666469525795406, "learning_rate": 3.6651286610867867e-08, "loss": 0.273, "step": 40434 }, { "epoch": 1.8941771677519088, "grad_norm": 0.5849426020713621, "learning_rate": 3.661893841704456e-08, "loss": 0.2725, "step": 40435 }, { "epoch": 1.8942240127418373, "grad_norm": 0.5659429532189867, "learning_rate": 3.658660439936013e-08, "loss": 0.2619, "step": 40436 }, { "epoch": 1.8942708577317657, "grad_norm": 0.553018060427613, "learning_rate": 3.655428455800081e-08, "loss": 0.2588, "step": 40437 }, { "epoch": 1.8943177027216938, "grad_norm": 0.5908549852344982, "learning_rate": 3.652197889315229e-08, "loss": 0.2571, "step": 40438 }, { "epoch": 1.8943645477116222, "grad_norm": 0.5707964055344344, "learning_rate": 3.6489687405000526e-08, "loss": 0.2545, "step": 40439 }, { "epoch": 1.8944113927015507, "grad_norm": 0.5923222892557564, "learning_rate": 3.6457410093731215e-08, "loss": 0.2723, "step": 40440 }, { "epoch": 1.894458237691479, "grad_norm": 0.5778456724422667, "learning_rate": 3.6425146959530586e-08, "loss": 0.265, "step": 40441 }, { "epoch": 1.8945050826814072, "grad_norm": 0.5635931037962618, "learning_rate": 3.6392898002584054e-08, "loss": 0.2709, "step": 40442 }, { "epoch": 1.8945519276713356, "grad_norm": 0.5752335089428154, "learning_rate": 3.636066322307702e-08, "loss": 0.2697, "step": 40443 }, { "epoch": 1.8945987726612639, "grad_norm": 0.6008593232915459, "learning_rate": 3.632844262119545e-08, "loss": 0.2656, "step": 40444 }, { "epoch": 1.894645617651192, "grad_norm": 0.63397710625271, "learning_rate": 3.629623619712447e-08, "loss": 0.2851, "step": 40445 }, { "epoch": 1.8946924626411206, "grad_norm": 0.576850110251982, "learning_rate": 3.626404395104921e-08, "loss": 0.2899, "step": 40446 }, { "epoch": 1.8947393076310488, "grad_norm": 0.6376295024445303, "learning_rate": 3.623186588315508e-08, "loss": 0.2786, "step": 40447 }, { "epoch": 1.894786152620977, "grad_norm": 0.6478532205342769, "learning_rate": 3.619970199362749e-08, "loss": 0.266, "step": 40448 }, { "epoch": 1.8948329976109055, "grad_norm": 0.5835228504614267, "learning_rate": 3.616755228265101e-08, "loss": 0.2656, "step": 40449 }, { "epoch": 1.894879842600834, "grad_norm": 0.6019797980082924, "learning_rate": 3.613541675041132e-08, "loss": 0.2693, "step": 40450 }, { "epoch": 1.8949266875907622, "grad_norm": 0.5966339634777272, "learning_rate": 3.610329539709329e-08, "loss": 0.2775, "step": 40451 }, { "epoch": 1.8949735325806905, "grad_norm": 0.6287604886476529, "learning_rate": 3.6071188222881195e-08, "loss": 0.2585, "step": 40452 }, { "epoch": 1.895020377570619, "grad_norm": 0.6380359081465409, "learning_rate": 3.6039095227960174e-08, "loss": 0.2801, "step": 40453 }, { "epoch": 1.8950672225605472, "grad_norm": 0.6191379117298327, "learning_rate": 3.6007016412514804e-08, "loss": 0.2751, "step": 40454 }, { "epoch": 1.8951140675504754, "grad_norm": 0.5970032292557679, "learning_rate": 3.597495177672966e-08, "loss": 0.2671, "step": 40455 }, { "epoch": 1.8951609125404039, "grad_norm": 0.5894250186429267, "learning_rate": 3.594290132078959e-08, "loss": 0.2693, "step": 40456 }, { "epoch": 1.895207757530332, "grad_norm": 0.6301815858287377, "learning_rate": 3.591086504487889e-08, "loss": 0.2732, "step": 40457 }, { "epoch": 1.8952546025202603, "grad_norm": 0.6109845140989941, "learning_rate": 3.587884294918159e-08, "loss": 0.2908, "step": 40458 }, { "epoch": 1.8953014475101888, "grad_norm": 0.590964983708423, "learning_rate": 3.584683503388226e-08, "loss": 0.2598, "step": 40459 }, { "epoch": 1.8953482925001173, "grad_norm": 0.5526030068352805, "learning_rate": 3.581484129916518e-08, "loss": 0.2568, "step": 40460 }, { "epoch": 1.8953951374900453, "grad_norm": 0.5129313234445719, "learning_rate": 3.5782861745214117e-08, "loss": 0.2403, "step": 40461 }, { "epoch": 1.8954419824799738, "grad_norm": 0.6142522424569574, "learning_rate": 3.575089637221335e-08, "loss": 0.2772, "step": 40462 }, { "epoch": 1.8954888274699022, "grad_norm": 0.5678253900035861, "learning_rate": 3.571894518034719e-08, "loss": 0.2615, "step": 40463 }, { "epoch": 1.8955356724598305, "grad_norm": 0.5695811655404052, "learning_rate": 3.568700816979881e-08, "loss": 0.2547, "step": 40464 }, { "epoch": 1.8955825174497587, "grad_norm": 0.5878162741845271, "learning_rate": 3.5655085340752515e-08, "loss": 0.2629, "step": 40465 }, { "epoch": 1.8956293624396872, "grad_norm": 0.5848584563784696, "learning_rate": 3.562317669339149e-08, "loss": 0.2671, "step": 40466 }, { "epoch": 1.8956762074296154, "grad_norm": 0.5757145393030564, "learning_rate": 3.559128222789976e-08, "loss": 0.2593, "step": 40467 }, { "epoch": 1.8957230524195436, "grad_norm": 0.6101198965120694, "learning_rate": 3.555940194446078e-08, "loss": 0.27, "step": 40468 }, { "epoch": 1.895769897409472, "grad_norm": 0.5814664393245068, "learning_rate": 3.55275358432583e-08, "loss": 0.2578, "step": 40469 }, { "epoch": 1.8958167423994003, "grad_norm": 0.5983981296120195, "learning_rate": 3.549568392447522e-08, "loss": 0.2514, "step": 40470 }, { "epoch": 1.8958635873893286, "grad_norm": 0.5947609110834132, "learning_rate": 3.546384618829502e-08, "loss": 0.2692, "step": 40471 }, { "epoch": 1.895910432379257, "grad_norm": 0.6176962805281249, "learning_rate": 3.543202263490114e-08, "loss": 0.2799, "step": 40472 }, { "epoch": 1.8959572773691855, "grad_norm": 0.5838018456169529, "learning_rate": 3.540021326447651e-08, "loss": 0.2508, "step": 40473 }, { "epoch": 1.8960041223591135, "grad_norm": 0.591405867476178, "learning_rate": 3.536841807720404e-08, "loss": 0.2586, "step": 40474 }, { "epoch": 1.896050967349042, "grad_norm": 0.6098297037033712, "learning_rate": 3.53366370732669e-08, "loss": 0.2696, "step": 40475 }, { "epoch": 1.8960978123389705, "grad_norm": 0.6287457523318524, "learning_rate": 3.530487025284801e-08, "loss": 0.2619, "step": 40476 }, { "epoch": 1.8961446573288987, "grad_norm": 0.5635868831638976, "learning_rate": 3.527311761613e-08, "loss": 0.2646, "step": 40477 }, { "epoch": 1.896191502318827, "grad_norm": 0.5959156742380277, "learning_rate": 3.5241379163295785e-08, "loss": 0.2774, "step": 40478 }, { "epoch": 1.8962383473087554, "grad_norm": 0.631945506843569, "learning_rate": 3.520965489452799e-08, "loss": 0.289, "step": 40479 }, { "epoch": 1.8962851922986836, "grad_norm": 0.5597994228096355, "learning_rate": 3.5177944810008966e-08, "loss": 0.2651, "step": 40480 }, { "epoch": 1.8963320372886119, "grad_norm": 0.573401310913741, "learning_rate": 3.514624890992163e-08, "loss": 0.2606, "step": 40481 }, { "epoch": 1.8963788822785403, "grad_norm": 0.5972984439327881, "learning_rate": 3.511456719444778e-08, "loss": 0.2834, "step": 40482 }, { "epoch": 1.8964257272684686, "grad_norm": 0.6198045191155844, "learning_rate": 3.508289966377032e-08, "loss": 0.2717, "step": 40483 }, { "epoch": 1.8964725722583968, "grad_norm": 0.6003258366122559, "learning_rate": 3.5051246318071056e-08, "loss": 0.2719, "step": 40484 }, { "epoch": 1.8965194172483253, "grad_norm": 0.6405589341307696, "learning_rate": 3.5019607157532335e-08, "loss": 0.2923, "step": 40485 }, { "epoch": 1.8965662622382538, "grad_norm": 0.5956598513258891, "learning_rate": 3.498798218233651e-08, "loss": 0.2718, "step": 40486 }, { "epoch": 1.896613107228182, "grad_norm": 0.5607549945752932, "learning_rate": 3.495637139266511e-08, "loss": 0.2561, "step": 40487 }, { "epoch": 1.8966599522181102, "grad_norm": 0.5564531628268413, "learning_rate": 3.492477478870021e-08, "loss": 0.2705, "step": 40488 }, { "epoch": 1.8967067972080387, "grad_norm": 0.6791161508002894, "learning_rate": 3.489319237062361e-08, "loss": 0.2973, "step": 40489 }, { "epoch": 1.896753642197967, "grad_norm": 0.5511874060702443, "learning_rate": 3.48616241386171e-08, "loss": 0.2439, "step": 40490 }, { "epoch": 1.8968004871878952, "grad_norm": 0.6025456951930541, "learning_rate": 3.483007009286249e-08, "loss": 0.2676, "step": 40491 }, { "epoch": 1.8968473321778236, "grad_norm": 0.594358831442709, "learning_rate": 3.4798530233541014e-08, "loss": 0.2755, "step": 40492 }, { "epoch": 1.8968941771677519, "grad_norm": 0.613684440661602, "learning_rate": 3.476700456083476e-08, "loss": 0.2792, "step": 40493 }, { "epoch": 1.8969410221576801, "grad_norm": 0.5757983105789904, "learning_rate": 3.4735493074924684e-08, "loss": 0.267, "step": 40494 }, { "epoch": 1.8969878671476086, "grad_norm": 0.6334466734628889, "learning_rate": 3.470399577599204e-08, "loss": 0.2728, "step": 40495 }, { "epoch": 1.897034712137537, "grad_norm": 0.5773193162275604, "learning_rate": 3.467251266421806e-08, "loss": 0.262, "step": 40496 }, { "epoch": 1.897081557127465, "grad_norm": 0.5839260922425357, "learning_rate": 3.464104373978455e-08, "loss": 0.2765, "step": 40497 }, { "epoch": 1.8971284021173935, "grad_norm": 0.6134951650472504, "learning_rate": 3.4609589002871926e-08, "loss": 0.276, "step": 40498 }, { "epoch": 1.897175247107322, "grad_norm": 0.6152275725532781, "learning_rate": 3.4578148453661696e-08, "loss": 0.2779, "step": 40499 }, { "epoch": 1.8972220920972502, "grad_norm": 0.5741814207547361, "learning_rate": 3.454672209233428e-08, "loss": 0.2699, "step": 40500 }, { "epoch": 1.8972689370871785, "grad_norm": 0.592352890859809, "learning_rate": 3.4515309919071204e-08, "loss": 0.2653, "step": 40501 }, { "epoch": 1.897315782077107, "grad_norm": 0.604060362967585, "learning_rate": 3.4483911934052594e-08, "loss": 0.2757, "step": 40502 }, { "epoch": 1.8973626270670352, "grad_norm": 0.5903266365759893, "learning_rate": 3.445252813745914e-08, "loss": 0.2586, "step": 40503 }, { "epoch": 1.8974094720569634, "grad_norm": 0.6326677454900663, "learning_rate": 3.442115852947209e-08, "loss": 0.2975, "step": 40504 }, { "epoch": 1.8974563170468919, "grad_norm": 0.5630236699821555, "learning_rate": 3.438980311027157e-08, "loss": 0.2599, "step": 40505 }, { "epoch": 1.8975031620368201, "grad_norm": 0.5893072266258751, "learning_rate": 3.435846188003772e-08, "loss": 0.2591, "step": 40506 }, { "epoch": 1.8975500070267484, "grad_norm": 0.5662495324656281, "learning_rate": 3.4327134838951224e-08, "loss": 0.2612, "step": 40507 }, { "epoch": 1.8975968520166768, "grad_norm": 0.6142130929545951, "learning_rate": 3.429582198719278e-08, "loss": 0.2732, "step": 40508 }, { "epoch": 1.8976436970066053, "grad_norm": 0.5735999249417734, "learning_rate": 3.426452332494168e-08, "loss": 0.2512, "step": 40509 }, { "epoch": 1.8976905419965333, "grad_norm": 0.5746186745334212, "learning_rate": 3.423323885237889e-08, "loss": 0.2506, "step": 40510 }, { "epoch": 1.8977373869864618, "grad_norm": 0.57444923581532, "learning_rate": 3.420196856968372e-08, "loss": 0.2548, "step": 40511 }, { "epoch": 1.8977842319763902, "grad_norm": 0.568571282505093, "learning_rate": 3.417071247703685e-08, "loss": 0.2632, "step": 40512 }, { "epoch": 1.8978310769663185, "grad_norm": 0.5958369686141443, "learning_rate": 3.413947057461731e-08, "loss": 0.263, "step": 40513 }, { "epoch": 1.8978779219562467, "grad_norm": 0.5922510074448625, "learning_rate": 3.410824286260578e-08, "loss": 0.2622, "step": 40514 }, { "epoch": 1.8979247669461752, "grad_norm": 0.6076650193903238, "learning_rate": 3.407702934118129e-08, "loss": 0.2744, "step": 40515 }, { "epoch": 1.8979716119361034, "grad_norm": 0.5921493200811712, "learning_rate": 3.4045830010523694e-08, "loss": 0.2863, "step": 40516 }, { "epoch": 1.8980184569260317, "grad_norm": 0.622855822617305, "learning_rate": 3.401464487081257e-08, "loss": 0.2802, "step": 40517 }, { "epoch": 1.8980653019159601, "grad_norm": 0.6570794336462636, "learning_rate": 3.3983473922227506e-08, "loss": 0.282, "step": 40518 }, { "epoch": 1.8981121469058884, "grad_norm": 0.6202595597410355, "learning_rate": 3.3952317164947514e-08, "loss": 0.2817, "step": 40519 }, { "epoch": 1.8981589918958166, "grad_norm": 0.5797511178570913, "learning_rate": 3.3921174599152175e-08, "loss": 0.2598, "step": 40520 }, { "epoch": 1.898205836885745, "grad_norm": 0.5692580717008316, "learning_rate": 3.389004622502079e-08, "loss": 0.2766, "step": 40521 }, { "epoch": 1.8982526818756735, "grad_norm": 0.5887010660377185, "learning_rate": 3.385893204273211e-08, "loss": 0.2909, "step": 40522 }, { "epoch": 1.8982995268656018, "grad_norm": 0.6381867343215035, "learning_rate": 3.38278320524657e-08, "loss": 0.2871, "step": 40523 }, { "epoch": 1.89834637185553, "grad_norm": 0.6346603299605174, "learning_rate": 3.3796746254400324e-08, "loss": 0.2651, "step": 40524 }, { "epoch": 1.8983932168454585, "grad_norm": 0.616112677658672, "learning_rate": 3.3765674648714445e-08, "loss": 0.2705, "step": 40525 }, { "epoch": 1.8984400618353867, "grad_norm": 0.5605274723067794, "learning_rate": 3.373461723558763e-08, "loss": 0.2635, "step": 40526 }, { "epoch": 1.898486906825315, "grad_norm": 0.5942983506536559, "learning_rate": 3.37035740151978e-08, "loss": 0.2788, "step": 40527 }, { "epoch": 1.8985337518152434, "grad_norm": 0.5471275870060774, "learning_rate": 3.3672544987724544e-08, "loss": 0.2533, "step": 40528 }, { "epoch": 1.8985805968051717, "grad_norm": 0.6123309167728385, "learning_rate": 3.364153015334548e-08, "loss": 0.268, "step": 40529 }, { "epoch": 1.8986274417951, "grad_norm": 0.598692339359727, "learning_rate": 3.361052951223992e-08, "loss": 0.2671, "step": 40530 }, { "epoch": 1.8986742867850284, "grad_norm": 0.6341242562182761, "learning_rate": 3.35795430645855e-08, "loss": 0.2871, "step": 40531 }, { "epoch": 1.8987211317749568, "grad_norm": 0.5527652946589181, "learning_rate": 3.354857081056095e-08, "loss": 0.2675, "step": 40532 }, { "epoch": 1.8987679767648848, "grad_norm": 0.5659899494399846, "learning_rate": 3.3517612750344485e-08, "loss": 0.2654, "step": 40533 }, { "epoch": 1.8988148217548133, "grad_norm": 0.5996899961756766, "learning_rate": 3.3486668884114006e-08, "loss": 0.2715, "step": 40534 }, { "epoch": 1.8988616667447418, "grad_norm": 0.55650183494721, "learning_rate": 3.345573921204826e-08, "loss": 0.2492, "step": 40535 }, { "epoch": 1.89890851173467, "grad_norm": 0.6138209078333118, "learning_rate": 3.3424823734324596e-08, "loss": 0.2705, "step": 40536 }, { "epoch": 1.8989553567245983, "grad_norm": 0.5840236862695513, "learning_rate": 3.339392245112122e-08, "loss": 0.2501, "step": 40537 }, { "epoch": 1.8990022017145267, "grad_norm": 0.5790830031413249, "learning_rate": 3.336303536261576e-08, "loss": 0.2664, "step": 40538 }, { "epoch": 1.899049046704455, "grad_norm": 0.6276083129148109, "learning_rate": 3.333216246898585e-08, "loss": 0.2879, "step": 40539 }, { "epoch": 1.8990958916943832, "grad_norm": 0.5955036661673241, "learning_rate": 3.330130377040969e-08, "loss": 0.2703, "step": 40540 }, { "epoch": 1.8991427366843117, "grad_norm": 0.5944233648718912, "learning_rate": 3.327045926706435e-08, "loss": 0.2847, "step": 40541 }, { "epoch": 1.89918958167424, "grad_norm": 0.5798681440929592, "learning_rate": 3.3239628959127745e-08, "loss": 0.2725, "step": 40542 }, { "epoch": 1.8992364266641681, "grad_norm": 0.5925924092217668, "learning_rate": 3.320881284677696e-08, "loss": 0.2605, "step": 40543 }, { "epoch": 1.8992832716540966, "grad_norm": 0.6106053702381464, "learning_rate": 3.317801093018935e-08, "loss": 0.274, "step": 40544 }, { "epoch": 1.899330116644025, "grad_norm": 0.5937573192940775, "learning_rate": 3.314722320954228e-08, "loss": 0.262, "step": 40545 }, { "epoch": 1.899376961633953, "grad_norm": 0.5771559258104123, "learning_rate": 3.31164496850131e-08, "loss": 0.2591, "step": 40546 }, { "epoch": 1.8994238066238815, "grad_norm": 0.5602456165751384, "learning_rate": 3.308569035677861e-08, "loss": 0.2476, "step": 40547 }, { "epoch": 1.89947065161381, "grad_norm": 0.5700121624636355, "learning_rate": 3.30549452250159e-08, "loss": 0.2727, "step": 40548 }, { "epoch": 1.8995174966037383, "grad_norm": 0.5860190486362723, "learning_rate": 3.302421428990204e-08, "loss": 0.2645, "step": 40549 }, { "epoch": 1.8995643415936665, "grad_norm": 0.5794638184543769, "learning_rate": 3.299349755161385e-08, "loss": 0.2631, "step": 40550 }, { "epoch": 1.899611186583595, "grad_norm": 0.568321593796018, "learning_rate": 3.296279501032784e-08, "loss": 0.2657, "step": 40551 }, { "epoch": 1.8996580315735232, "grad_norm": 0.6064396930035496, "learning_rate": 3.293210666622082e-08, "loss": 0.2719, "step": 40552 }, { "epoch": 1.8997048765634514, "grad_norm": 0.6252944770995515, "learning_rate": 3.290143251946959e-08, "loss": 0.2783, "step": 40553 }, { "epoch": 1.89975172155338, "grad_norm": 0.5995999057702359, "learning_rate": 3.2870772570250675e-08, "loss": 0.2744, "step": 40554 }, { "epoch": 1.8997985665433081, "grad_norm": 0.6507032399102647, "learning_rate": 3.284012681874005e-08, "loss": 0.2804, "step": 40555 }, { "epoch": 1.8998454115332364, "grad_norm": 0.6070705750000148, "learning_rate": 3.280949526511479e-08, "loss": 0.2781, "step": 40556 }, { "epoch": 1.8998922565231648, "grad_norm": 0.6059365848654165, "learning_rate": 3.2778877909550585e-08, "loss": 0.2661, "step": 40557 }, { "epoch": 1.8999391015130933, "grad_norm": 0.6025229142647467, "learning_rate": 3.2748274752223694e-08, "loss": 0.2599, "step": 40558 }, { "epoch": 1.8999859465030215, "grad_norm": 0.6421681165382492, "learning_rate": 3.2717685793310363e-08, "loss": 0.2737, "step": 40559 }, { "epoch": 1.9000327914929498, "grad_norm": 0.6064912935423797, "learning_rate": 3.268711103298683e-08, "loss": 0.2617, "step": 40560 }, { "epoch": 1.9000796364828783, "grad_norm": 0.6172497991151394, "learning_rate": 3.2656550471428794e-08, "loss": 0.2607, "step": 40561 }, { "epoch": 1.9001264814728065, "grad_norm": 0.6341819239229363, "learning_rate": 3.262600410881195e-08, "loss": 0.2783, "step": 40562 }, { "epoch": 1.9001733264627347, "grad_norm": 0.6614334589828302, "learning_rate": 3.259547194531254e-08, "loss": 0.2867, "step": 40563 }, { "epoch": 1.9002201714526632, "grad_norm": 0.5898462221902951, "learning_rate": 3.256495398110598e-08, "loss": 0.2681, "step": 40564 }, { "epoch": 1.9002670164425914, "grad_norm": 0.6380324898485961, "learning_rate": 3.253445021636797e-08, "loss": 0.2887, "step": 40565 }, { "epoch": 1.9003138614325197, "grad_norm": 0.5805072442763345, "learning_rate": 3.2503960651273924e-08, "loss": 0.2764, "step": 40566 }, { "epoch": 1.9003607064224481, "grad_norm": 0.6442953115232606, "learning_rate": 3.247348528599953e-08, "loss": 0.2861, "step": 40567 }, { "epoch": 1.9004075514123766, "grad_norm": 0.6427157484208, "learning_rate": 3.244302412071965e-08, "loss": 0.2696, "step": 40568 }, { "epoch": 1.9004543964023046, "grad_norm": 0.6038429016361506, "learning_rate": 3.241257715561025e-08, "loss": 0.2614, "step": 40569 }, { "epoch": 1.900501241392233, "grad_norm": 0.5715266964069481, "learning_rate": 3.238214439084647e-08, "loss": 0.2783, "step": 40570 }, { "epoch": 1.9005480863821616, "grad_norm": 0.6161829751883378, "learning_rate": 3.2351725826602896e-08, "loss": 0.2492, "step": 40571 }, { "epoch": 1.9005949313720898, "grad_norm": 0.5659195289854964, "learning_rate": 3.232132146305522e-08, "loss": 0.2743, "step": 40572 }, { "epoch": 1.900641776362018, "grad_norm": 0.6416235978608833, "learning_rate": 3.229093130037803e-08, "loss": 0.2811, "step": 40573 }, { "epoch": 1.9006886213519465, "grad_norm": 0.5447650683515701, "learning_rate": 3.226055533874617e-08, "loss": 0.264, "step": 40574 }, { "epoch": 1.9007354663418747, "grad_norm": 0.6078064898118204, "learning_rate": 3.223019357833451e-08, "loss": 0.2764, "step": 40575 }, { "epoch": 1.900782311331803, "grad_norm": 0.567274176855584, "learning_rate": 3.219984601931792e-08, "loss": 0.2779, "step": 40576 }, { "epoch": 1.9008291563217314, "grad_norm": 0.5990921985924028, "learning_rate": 3.216951266187124e-08, "loss": 0.2681, "step": 40577 }, { "epoch": 1.9008760013116597, "grad_norm": 0.6440744706037167, "learning_rate": 3.213919350616851e-08, "loss": 0.2692, "step": 40578 }, { "epoch": 1.900922846301588, "grad_norm": 0.5868745563296807, "learning_rate": 3.210888855238431e-08, "loss": 0.2701, "step": 40579 }, { "epoch": 1.9009696912915164, "grad_norm": 0.6363263741571303, "learning_rate": 3.2078597800693223e-08, "loss": 0.2884, "step": 40580 }, { "epoch": 1.9010165362814448, "grad_norm": 0.5928872440199964, "learning_rate": 3.2048321251269554e-08, "loss": 0.2712, "step": 40581 }, { "epoch": 1.9010633812713729, "grad_norm": 0.5602905273994042, "learning_rate": 3.2018058904287604e-08, "loss": 0.2605, "step": 40582 }, { "epoch": 1.9011102262613013, "grad_norm": 0.5526048002910603, "learning_rate": 3.198781075992141e-08, "loss": 0.2513, "step": 40583 }, { "epoch": 1.9011570712512298, "grad_norm": 0.5822855565812152, "learning_rate": 3.195757681834499e-08, "loss": 0.2762, "step": 40584 }, { "epoch": 1.901203916241158, "grad_norm": 0.6321715305715531, "learning_rate": 3.192735707973266e-08, "loss": 0.279, "step": 40585 }, { "epoch": 1.9012507612310863, "grad_norm": 0.584251412678831, "learning_rate": 3.1897151544257595e-08, "loss": 0.2591, "step": 40586 }, { "epoch": 1.9012976062210147, "grad_norm": 0.6030729769828233, "learning_rate": 3.18669602120944e-08, "loss": 0.2709, "step": 40587 }, { "epoch": 1.901344451210943, "grad_norm": 0.5995806146646969, "learning_rate": 3.183678308341626e-08, "loss": 0.2727, "step": 40588 }, { "epoch": 1.9013912962008712, "grad_norm": 0.5867184113809892, "learning_rate": 3.180662015839719e-08, "loss": 0.2622, "step": 40589 }, { "epoch": 1.9014381411907997, "grad_norm": 0.5616117344836669, "learning_rate": 3.177647143721069e-08, "loss": 0.2639, "step": 40590 }, { "epoch": 1.901484986180728, "grad_norm": 0.6179242519128824, "learning_rate": 3.1746336920030485e-08, "loss": 0.2878, "step": 40591 }, { "epoch": 1.9015318311706562, "grad_norm": 0.6052992283829417, "learning_rate": 3.1716216607029504e-08, "loss": 0.2786, "step": 40592 }, { "epoch": 1.9015786761605846, "grad_norm": 0.5830429696068474, "learning_rate": 3.1686110498381215e-08, "loss": 0.2709, "step": 40593 }, { "epoch": 1.901625521150513, "grad_norm": 0.5804460685549956, "learning_rate": 3.16560185942591e-08, "loss": 0.2814, "step": 40594 }, { "epoch": 1.9016723661404413, "grad_norm": 0.592916407067423, "learning_rate": 3.162594089483606e-08, "loss": 0.2722, "step": 40595 }, { "epoch": 1.9017192111303696, "grad_norm": 0.5769035452572931, "learning_rate": 3.159587740028558e-08, "loss": 0.2631, "step": 40596 }, { "epoch": 1.901766056120298, "grad_norm": 0.5798907683484181, "learning_rate": 3.1565828110780294e-08, "loss": 0.2728, "step": 40597 }, { "epoch": 1.9018129011102263, "grad_norm": 0.5526491472305609, "learning_rate": 3.153579302649312e-08, "loss": 0.2516, "step": 40598 }, { "epoch": 1.9018597461001545, "grad_norm": 0.5677308021811994, "learning_rate": 3.1505772147597256e-08, "loss": 0.2752, "step": 40599 }, { "epoch": 1.901906591090083, "grad_norm": 0.5898651843091209, "learning_rate": 3.147576547426506e-08, "loss": 0.2618, "step": 40600 }, { "epoch": 1.9019534360800112, "grad_norm": 0.5364783288423991, "learning_rate": 3.144577300666946e-08, "loss": 0.2525, "step": 40601 }, { "epoch": 1.9020002810699395, "grad_norm": 0.5692469729903558, "learning_rate": 3.1415794744983075e-08, "loss": 0.2501, "step": 40602 }, { "epoch": 1.902047126059868, "grad_norm": 0.5667117037349474, "learning_rate": 3.1385830689378284e-08, "loss": 0.253, "step": 40603 }, { "epoch": 1.9020939710497964, "grad_norm": 0.5876024371676458, "learning_rate": 3.135588084002744e-08, "loss": 0.255, "step": 40604 }, { "epoch": 1.9021408160397244, "grad_norm": 0.5920749090973825, "learning_rate": 3.1325945197103466e-08, "loss": 0.2573, "step": 40605 }, { "epoch": 1.9021876610296529, "grad_norm": 0.603007809217537, "learning_rate": 3.129602376077762e-08, "loss": 0.2644, "step": 40606 }, { "epoch": 1.9022345060195813, "grad_norm": 0.6023222793622621, "learning_rate": 3.126611653122308e-08, "loss": 0.2829, "step": 40607 }, { "epoch": 1.9022813510095096, "grad_norm": 0.6058937795650283, "learning_rate": 3.123622350861111e-08, "loss": 0.278, "step": 40608 }, { "epoch": 1.9023281959994378, "grad_norm": 0.6014572396322015, "learning_rate": 3.120634469311462e-08, "loss": 0.2652, "step": 40609 }, { "epoch": 1.9023750409893663, "grad_norm": 0.5922115637970489, "learning_rate": 3.117648008490487e-08, "loss": 0.2607, "step": 40610 }, { "epoch": 1.9024218859792945, "grad_norm": 0.5770824252506104, "learning_rate": 3.1146629684154214e-08, "loss": 0.2699, "step": 40611 }, { "epoch": 1.9024687309692228, "grad_norm": 0.5719601162187603, "learning_rate": 3.111679349103392e-08, "loss": 0.2675, "step": 40612 }, { "epoch": 1.9025155759591512, "grad_norm": 0.5741735084894312, "learning_rate": 3.1086971505716045e-08, "loss": 0.2605, "step": 40613 }, { "epoch": 1.9025624209490795, "grad_norm": 0.5623822405275505, "learning_rate": 3.105716372837214e-08, "loss": 0.2401, "step": 40614 }, { "epoch": 1.9026092659390077, "grad_norm": 0.6166366552595738, "learning_rate": 3.102737015917373e-08, "loss": 0.2724, "step": 40615 }, { "epoch": 1.9026561109289362, "grad_norm": 0.6036490348975507, "learning_rate": 3.099759079829206e-08, "loss": 0.2706, "step": 40616 }, { "epoch": 1.9027029559188646, "grad_norm": 0.5950254310889868, "learning_rate": 3.0967825645898956e-08, "loss": 0.2622, "step": 40617 }, { "epoch": 1.9027498009087926, "grad_norm": 0.5931945959334805, "learning_rate": 3.093807470216509e-08, "loss": 0.2482, "step": 40618 }, { "epoch": 1.902796645898721, "grad_norm": 0.6661834211890025, "learning_rate": 3.090833796726256e-08, "loss": 0.2943, "step": 40619 }, { "epoch": 1.9028434908886496, "grad_norm": 0.6239755447434938, "learning_rate": 3.087861544136151e-08, "loss": 0.277, "step": 40620 }, { "epoch": 1.9028903358785778, "grad_norm": 0.6487139401267448, "learning_rate": 3.084890712463373e-08, "loss": 0.277, "step": 40621 }, { "epoch": 1.902937180868506, "grad_norm": 0.577451190378543, "learning_rate": 3.081921301724966e-08, "loss": 0.272, "step": 40622 }, { "epoch": 1.9029840258584345, "grad_norm": 0.5802145403592244, "learning_rate": 3.0789533119380545e-08, "loss": 0.2676, "step": 40623 }, { "epoch": 1.9030308708483628, "grad_norm": 0.5840476149309137, "learning_rate": 3.07598674311968e-08, "loss": 0.26, "step": 40624 }, { "epoch": 1.903077715838291, "grad_norm": 0.6268830771913781, "learning_rate": 3.073021595286968e-08, "loss": 0.2793, "step": 40625 }, { "epoch": 1.9031245608282195, "grad_norm": 0.5821295551846887, "learning_rate": 3.07005786845696e-08, "loss": 0.2624, "step": 40626 }, { "epoch": 1.9031714058181477, "grad_norm": 0.5863032712870258, "learning_rate": 3.067095562646699e-08, "loss": 0.275, "step": 40627 }, { "epoch": 1.903218250808076, "grad_norm": 0.5440496801366418, "learning_rate": 3.064134677873226e-08, "loss": 0.2404, "step": 40628 }, { "epoch": 1.9032650957980044, "grad_norm": 0.6064106345882099, "learning_rate": 3.0611752141535834e-08, "loss": 0.2708, "step": 40629 }, { "epoch": 1.9033119407879329, "grad_norm": 0.6010485993966553, "learning_rate": 3.0582171715048406e-08, "loss": 0.2636, "step": 40630 }, { "epoch": 1.903358785777861, "grad_norm": 0.6161145624837692, "learning_rate": 3.0552605499439556e-08, "loss": 0.2746, "step": 40631 }, { "epoch": 1.9034056307677893, "grad_norm": 0.5799236722239237, "learning_rate": 3.0523053494879994e-08, "loss": 0.2521, "step": 40632 }, { "epoch": 1.9034524757577178, "grad_norm": 0.5342771410017237, "learning_rate": 3.0493515701539854e-08, "loss": 0.2505, "step": 40633 }, { "epoch": 1.903499320747646, "grad_norm": 0.5964180279847522, "learning_rate": 3.046399211958873e-08, "loss": 0.2594, "step": 40634 }, { "epoch": 1.9035461657375743, "grad_norm": 0.6300016449189028, "learning_rate": 3.043448274919647e-08, "loss": 0.2711, "step": 40635 }, { "epoch": 1.9035930107275028, "grad_norm": 0.6117545093422395, "learning_rate": 3.0404987590532954e-08, "loss": 0.2629, "step": 40636 }, { "epoch": 1.903639855717431, "grad_norm": 0.6175077065009447, "learning_rate": 3.037550664376804e-08, "loss": 0.2703, "step": 40637 }, { "epoch": 1.9036867007073592, "grad_norm": 0.6190256563315999, "learning_rate": 3.034603990907159e-08, "loss": 0.2858, "step": 40638 }, { "epoch": 1.9037335456972877, "grad_norm": 0.5953417885154719, "learning_rate": 3.031658738661292e-08, "loss": 0.2706, "step": 40639 }, { "epoch": 1.9037803906872162, "grad_norm": 0.5915708376730521, "learning_rate": 3.0287149076561326e-08, "loss": 0.2856, "step": 40640 }, { "epoch": 1.9038272356771442, "grad_norm": 0.5207016672186268, "learning_rate": 3.025772497908669e-08, "loss": 0.2571, "step": 40641 }, { "epoch": 1.9038740806670726, "grad_norm": 0.5953040860142632, "learning_rate": 3.022831509435803e-08, "loss": 0.2767, "step": 40642 }, { "epoch": 1.903920925657001, "grad_norm": 0.5603666100307382, "learning_rate": 3.019891942254466e-08, "loss": 0.2543, "step": 40643 }, { "epoch": 1.9039677706469293, "grad_norm": 0.5706047363515458, "learning_rate": 3.016953796381561e-08, "loss": 0.2583, "step": 40644 }, { "epoch": 1.9040146156368576, "grad_norm": 0.5879813349410252, "learning_rate": 3.0140170718340464e-08, "loss": 0.2536, "step": 40645 }, { "epoch": 1.904061460626786, "grad_norm": 0.6056168667102182, "learning_rate": 3.011081768628743e-08, "loss": 0.2643, "step": 40646 }, { "epoch": 1.9041083056167143, "grad_norm": 0.650482119158659, "learning_rate": 3.008147886782609e-08, "loss": 0.2924, "step": 40647 }, { "epoch": 1.9041551506066425, "grad_norm": 0.641950342766223, "learning_rate": 3.0052154263125186e-08, "loss": 0.2811, "step": 40648 }, { "epoch": 1.904201995596571, "grad_norm": 0.625295989094081, "learning_rate": 3.0022843872353214e-08, "loss": 0.2633, "step": 40649 }, { "epoch": 1.9042488405864992, "grad_norm": 0.6055941241326787, "learning_rate": 2.9993547695678915e-08, "loss": 0.2649, "step": 40650 }, { "epoch": 1.9042956855764275, "grad_norm": 0.5961220298443889, "learning_rate": 2.996426573327077e-08, "loss": 0.2679, "step": 40651 }, { "epoch": 1.904342530566356, "grad_norm": 0.6354795853710984, "learning_rate": 2.9934997985297807e-08, "loss": 0.2917, "step": 40652 }, { "epoch": 1.9043893755562844, "grad_norm": 0.5712879710238606, "learning_rate": 2.9905744451927674e-08, "loss": 0.2712, "step": 40653 }, { "epoch": 1.9044362205462124, "grad_norm": 0.6235533346260929, "learning_rate": 2.9876505133329684e-08, "loss": 0.2711, "step": 40654 }, { "epoch": 1.9044830655361409, "grad_norm": 0.5977991726236117, "learning_rate": 2.9847280029671186e-08, "loss": 0.268, "step": 40655 }, { "epoch": 1.9045299105260693, "grad_norm": 0.5841443725696291, "learning_rate": 2.9818069141120673e-08, "loss": 0.2604, "step": 40656 }, { "epoch": 1.9045767555159976, "grad_norm": 0.5668254821661558, "learning_rate": 2.978887246784662e-08, "loss": 0.2542, "step": 40657 }, { "epoch": 1.9046236005059258, "grad_norm": 0.6284978794353209, "learning_rate": 2.975969001001666e-08, "loss": 0.2834, "step": 40658 }, { "epoch": 1.9046704454958543, "grad_norm": 0.6388070264010249, "learning_rate": 2.9730521767798725e-08, "loss": 0.2643, "step": 40659 }, { "epoch": 1.9047172904857825, "grad_norm": 0.928855588375785, "learning_rate": 2.970136774136073e-08, "loss": 0.2674, "step": 40660 }, { "epoch": 1.9047641354757108, "grad_norm": 0.5823405398657913, "learning_rate": 2.96722279308706e-08, "loss": 0.2615, "step": 40661 }, { "epoch": 1.9048109804656392, "grad_norm": 0.6312647397551273, "learning_rate": 2.96431023364957e-08, "loss": 0.256, "step": 40662 }, { "epoch": 1.9048578254555675, "grad_norm": 0.5380129422287306, "learning_rate": 2.961399095840395e-08, "loss": 0.2496, "step": 40663 }, { "epoch": 1.9049046704454957, "grad_norm": 0.5846924996013393, "learning_rate": 2.9584893796762993e-08, "loss": 0.2681, "step": 40664 }, { "epoch": 1.9049515154354242, "grad_norm": 0.6052579750117119, "learning_rate": 2.955581085173992e-08, "loss": 0.2799, "step": 40665 }, { "epoch": 1.9049983604253526, "grad_norm": 0.6139795408272342, "learning_rate": 2.9526742123502093e-08, "loss": 0.2756, "step": 40666 }, { "epoch": 1.9050452054152809, "grad_norm": 0.6056075097293439, "learning_rate": 2.9497687612216885e-08, "loss": 0.2902, "step": 40667 }, { "epoch": 1.9050920504052091, "grad_norm": 0.6103281565217247, "learning_rate": 2.9468647318051936e-08, "loss": 0.2869, "step": 40668 }, { "epoch": 1.9051388953951376, "grad_norm": 0.571238057889081, "learning_rate": 2.94396212411735e-08, "loss": 0.2522, "step": 40669 }, { "epoch": 1.9051857403850658, "grad_norm": 0.5463517325039071, "learning_rate": 2.94106093817495e-08, "loss": 0.2369, "step": 40670 }, { "epoch": 1.905232585374994, "grad_norm": 0.5861258014935451, "learning_rate": 2.938161173994619e-08, "loss": 0.2625, "step": 40671 }, { "epoch": 1.9052794303649225, "grad_norm": 0.6354240094895324, "learning_rate": 2.9352628315930943e-08, "loss": 0.2876, "step": 40672 }, { "epoch": 1.9053262753548508, "grad_norm": 0.604410405335194, "learning_rate": 2.9323659109870284e-08, "loss": 0.2876, "step": 40673 }, { "epoch": 1.905373120344779, "grad_norm": 0.6327116882004191, "learning_rate": 2.929470412193075e-08, "loss": 0.2848, "step": 40674 }, { "epoch": 1.9054199653347075, "grad_norm": 0.6322325667886988, "learning_rate": 2.926576335227971e-08, "loss": 0.2903, "step": 40675 }, { "epoch": 1.905466810324636, "grad_norm": 0.6061258453633063, "learning_rate": 2.923683680108286e-08, "loss": 0.2712, "step": 40676 }, { "epoch": 1.905513655314564, "grad_norm": 0.5889140406338029, "learning_rate": 2.920792446850701e-08, "loss": 0.266, "step": 40677 }, { "epoch": 1.9055605003044924, "grad_norm": 0.5858680609076233, "learning_rate": 2.91790263547187e-08, "loss": 0.2709, "step": 40678 }, { "epoch": 1.9056073452944209, "grad_norm": 0.5878546457073087, "learning_rate": 2.91501424598839e-08, "loss": 0.2553, "step": 40679 }, { "epoch": 1.9056541902843491, "grad_norm": 0.6336505296445473, "learning_rate": 2.9121272784168876e-08, "loss": 0.2858, "step": 40680 }, { "epoch": 1.9057010352742774, "grad_norm": 0.5568217908688454, "learning_rate": 2.9092417327740153e-08, "loss": 0.2531, "step": 40681 }, { "epoch": 1.9057478802642058, "grad_norm": 0.6101868715645042, "learning_rate": 2.9063576090763434e-08, "loss": 0.2746, "step": 40682 }, { "epoch": 1.905794725254134, "grad_norm": 0.5909798311533516, "learning_rate": 2.9034749073404978e-08, "loss": 0.2628, "step": 40683 }, { "epoch": 1.9058415702440623, "grad_norm": 0.5887409813546467, "learning_rate": 2.900593627583048e-08, "loss": 0.2793, "step": 40684 }, { "epoch": 1.9058884152339908, "grad_norm": 0.5837977989688271, "learning_rate": 2.897713769820537e-08, "loss": 0.2605, "step": 40685 }, { "epoch": 1.905935260223919, "grad_norm": 0.6239026249142479, "learning_rate": 2.89483533406959e-08, "loss": 0.2895, "step": 40686 }, { "epoch": 1.9059821052138473, "grad_norm": 0.6185981483255761, "learning_rate": 2.8919583203467772e-08, "loss": 0.2685, "step": 40687 }, { "epoch": 1.9060289502037757, "grad_norm": 0.5939649661903987, "learning_rate": 2.8890827286686406e-08, "loss": 0.2584, "step": 40688 }, { "epoch": 1.9060757951937042, "grad_norm": 0.6103293230707773, "learning_rate": 2.886208559051695e-08, "loss": 0.2711, "step": 40689 }, { "epoch": 1.9061226401836322, "grad_norm": 0.6102497381545334, "learning_rate": 2.883335811512539e-08, "loss": 0.2658, "step": 40690 }, { "epoch": 1.9061694851735607, "grad_norm": 0.6395116949105492, "learning_rate": 2.8804644860676578e-08, "loss": 0.2838, "step": 40691 }, { "epoch": 1.9062163301634891, "grad_norm": 0.591720292401607, "learning_rate": 2.877594582733595e-08, "loss": 0.2797, "step": 40692 }, { "epoch": 1.9062631751534174, "grad_norm": 0.6218024067295547, "learning_rate": 2.8747261015268645e-08, "loss": 0.2714, "step": 40693 }, { "epoch": 1.9063100201433456, "grad_norm": 0.604732990501903, "learning_rate": 2.8718590424639814e-08, "loss": 0.275, "step": 40694 }, { "epoch": 1.906356865133274, "grad_norm": 0.5650242054369434, "learning_rate": 2.868993405561432e-08, "loss": 0.2592, "step": 40695 }, { "epoch": 1.9064037101232023, "grad_norm": 0.5850300569023048, "learning_rate": 2.8661291908357035e-08, "loss": 0.2644, "step": 40696 }, { "epoch": 1.9064505551131306, "grad_norm": 0.5866309508988515, "learning_rate": 2.8632663983032826e-08, "loss": 0.2689, "step": 40697 }, { "epoch": 1.906497400103059, "grad_norm": 0.6386050169454741, "learning_rate": 2.860405027980656e-08, "loss": 0.2881, "step": 40698 }, { "epoch": 1.9065442450929873, "grad_norm": 0.6557578313671321, "learning_rate": 2.857545079884283e-08, "loss": 0.2814, "step": 40699 }, { "epoch": 1.9065910900829155, "grad_norm": 0.6117528753146027, "learning_rate": 2.8546865540306224e-08, "loss": 0.2735, "step": 40700 }, { "epoch": 1.906637935072844, "grad_norm": 0.5610656777368338, "learning_rate": 2.8518294504361056e-08, "loss": 0.2645, "step": 40701 }, { "epoch": 1.9066847800627724, "grad_norm": 0.5776834391341666, "learning_rate": 2.8489737691172193e-08, "loss": 0.2683, "step": 40702 }, { "epoch": 1.9067316250527007, "grad_norm": 0.5849877882206418, "learning_rate": 2.8461195100903672e-08, "loss": 0.2731, "step": 40703 }, { "epoch": 1.906778470042629, "grad_norm": 0.5812626312627491, "learning_rate": 2.8432666733719527e-08, "loss": 0.255, "step": 40704 }, { "epoch": 1.9068253150325574, "grad_norm": 0.6460672285079506, "learning_rate": 2.840415258978435e-08, "loss": 0.2804, "step": 40705 }, { "epoch": 1.9068721600224856, "grad_norm": 0.564004511669994, "learning_rate": 2.8375652669262176e-08, "loss": 0.2585, "step": 40706 }, { "epoch": 1.9069190050124138, "grad_norm": 0.5971322006163157, "learning_rate": 2.8347166972316766e-08, "loss": 0.2606, "step": 40707 }, { "epoch": 1.9069658500023423, "grad_norm": 0.5896072867461368, "learning_rate": 2.8318695499112148e-08, "loss": 0.2794, "step": 40708 }, { "epoch": 1.9070126949922706, "grad_norm": 0.5661137091879053, "learning_rate": 2.8290238249812086e-08, "loss": 0.2593, "step": 40709 }, { "epoch": 1.9070595399821988, "grad_norm": 0.6002261764441882, "learning_rate": 2.826179522458089e-08, "loss": 0.2681, "step": 40710 }, { "epoch": 1.9071063849721273, "grad_norm": 0.5356626177112579, "learning_rate": 2.8233366423581486e-08, "loss": 0.2551, "step": 40711 }, { "epoch": 1.9071532299620557, "grad_norm": 0.5640372471326465, "learning_rate": 2.8204951846977914e-08, "loss": 0.2524, "step": 40712 }, { "epoch": 1.9072000749519837, "grad_norm": 0.5750312782891351, "learning_rate": 2.817655149493337e-08, "loss": 0.2575, "step": 40713 }, { "epoch": 1.9072469199419122, "grad_norm": 0.6151723815155705, "learning_rate": 2.8148165367611612e-08, "loss": 0.2752, "step": 40714 }, { "epoch": 1.9072937649318407, "grad_norm": 0.6165058826142644, "learning_rate": 2.811979346517585e-08, "loss": 0.2792, "step": 40715 }, { "epoch": 1.907340609921769, "grad_norm": 0.6321378455979938, "learning_rate": 2.8091435787789557e-08, "loss": 0.272, "step": 40716 }, { "epoch": 1.9073874549116971, "grad_norm": 0.5969458883335995, "learning_rate": 2.806309233561566e-08, "loss": 0.2511, "step": 40717 }, { "epoch": 1.9074342999016256, "grad_norm": 0.6119409142183244, "learning_rate": 2.8034763108817363e-08, "loss": 0.2801, "step": 40718 }, { "epoch": 1.9074811448915538, "grad_norm": 0.6071998017270933, "learning_rate": 2.800644810755787e-08, "loss": 0.2621, "step": 40719 }, { "epoch": 1.907527989881482, "grad_norm": 0.5817695893246353, "learning_rate": 2.797814733199983e-08, "loss": 0.2693, "step": 40720 }, { "epoch": 1.9075748348714106, "grad_norm": 0.5798070913870371, "learning_rate": 2.794986078230616e-08, "loss": 0.2499, "step": 40721 }, { "epoch": 1.9076216798613388, "grad_norm": 0.5730928876122041, "learning_rate": 2.792158845863979e-08, "loss": 0.2573, "step": 40722 }, { "epoch": 1.907668524851267, "grad_norm": 0.6010946845263879, "learning_rate": 2.7893330361163096e-08, "loss": 0.2714, "step": 40723 }, { "epoch": 1.9077153698411955, "grad_norm": 0.5701326906686955, "learning_rate": 2.7865086490039274e-08, "loss": 0.2586, "step": 40724 }, { "epoch": 1.907762214831124, "grad_norm": 0.6059537287653325, "learning_rate": 2.7836856845430692e-08, "loss": 0.2615, "step": 40725 }, { "epoch": 1.907809059821052, "grad_norm": 0.567589535028514, "learning_rate": 2.7808641427499172e-08, "loss": 0.2577, "step": 40726 }, { "epoch": 1.9078559048109804, "grad_norm": 0.5894388203408438, "learning_rate": 2.7780440236407914e-08, "loss": 0.2669, "step": 40727 }, { "epoch": 1.907902749800909, "grad_norm": 0.6025465649938397, "learning_rate": 2.7752253272318453e-08, "loss": 0.2754, "step": 40728 }, { "epoch": 1.9079495947908371, "grad_norm": 0.5878485862395396, "learning_rate": 2.7724080535393717e-08, "loss": 0.2621, "step": 40729 }, { "epoch": 1.9079964397807654, "grad_norm": 0.5481367508762373, "learning_rate": 2.769592202579552e-08, "loss": 0.2648, "step": 40730 }, { "epoch": 1.9080432847706938, "grad_norm": 0.6106279511439628, "learning_rate": 2.7667777743685953e-08, "loss": 0.2692, "step": 40731 }, { "epoch": 1.908090129760622, "grad_norm": 0.6530178698098531, "learning_rate": 2.7639647689226833e-08, "loss": 0.2796, "step": 40732 }, { "epoch": 1.9081369747505503, "grad_norm": 0.61141496896607, "learning_rate": 2.7611531862579978e-08, "loss": 0.2794, "step": 40733 }, { "epoch": 1.9081838197404788, "grad_norm": 0.6005361112517992, "learning_rate": 2.7583430263907472e-08, "loss": 0.2838, "step": 40734 }, { "epoch": 1.908230664730407, "grad_norm": 0.5685689106989488, "learning_rate": 2.755534289337086e-08, "loss": 0.2658, "step": 40735 }, { "epoch": 1.9082775097203353, "grad_norm": 0.5883530060855064, "learning_rate": 2.7527269751131958e-08, "loss": 0.2563, "step": 40736 }, { "epoch": 1.9083243547102637, "grad_norm": 0.573030307456287, "learning_rate": 2.749921083735202e-08, "loss": 0.2725, "step": 40737 }, { "epoch": 1.9083711997001922, "grad_norm": 0.6154088678421895, "learning_rate": 2.7471166152192863e-08, "loss": 0.2809, "step": 40738 }, { "epoch": 1.9084180446901204, "grad_norm": 0.6270679377583713, "learning_rate": 2.7443135695815748e-08, "loss": 0.2928, "step": 40739 }, { "epoch": 1.9084648896800487, "grad_norm": 0.5935225851532495, "learning_rate": 2.7415119468381657e-08, "loss": 0.2634, "step": 40740 }, { "epoch": 1.9085117346699771, "grad_norm": 0.5921478297729064, "learning_rate": 2.7387117470052126e-08, "loss": 0.2825, "step": 40741 }, { "epoch": 1.9085585796599054, "grad_norm": 0.5805022530344868, "learning_rate": 2.7359129700988418e-08, "loss": 0.2705, "step": 40742 }, { "epoch": 1.9086054246498336, "grad_norm": 0.5753323853614715, "learning_rate": 2.7331156161351513e-08, "loss": 0.2716, "step": 40743 }, { "epoch": 1.908652269639762, "grad_norm": 0.6605928672101556, "learning_rate": 2.7303196851302115e-08, "loss": 0.2836, "step": 40744 }, { "epoch": 1.9086991146296903, "grad_norm": 0.5813706883470264, "learning_rate": 2.7275251771001488e-08, "loss": 0.2533, "step": 40745 }, { "epoch": 1.9087459596196186, "grad_norm": 0.6046307155862222, "learning_rate": 2.7247320920610056e-08, "loss": 0.2633, "step": 40746 }, { "epoch": 1.908792804609547, "grad_norm": 0.5551545240652889, "learning_rate": 2.7219404300289075e-08, "loss": 0.2586, "step": 40747 }, { "epoch": 1.9088396495994755, "grad_norm": 0.555700406069411, "learning_rate": 2.7191501910198705e-08, "loss": 0.2614, "step": 40748 }, { "epoch": 1.9088864945894035, "grad_norm": 0.6088501464270789, "learning_rate": 2.7163613750499918e-08, "loss": 0.2755, "step": 40749 }, { "epoch": 1.908933339579332, "grad_norm": 0.5907264382192821, "learning_rate": 2.713573982135287e-08, "loss": 0.2563, "step": 40750 }, { "epoch": 1.9089801845692604, "grad_norm": 0.6270946585824931, "learning_rate": 2.7107880122917985e-08, "loss": 0.2844, "step": 40751 }, { "epoch": 1.9090270295591887, "grad_norm": 0.6059305507332442, "learning_rate": 2.7080034655355968e-08, "loss": 0.2785, "step": 40752 }, { "epoch": 1.909073874549117, "grad_norm": 0.5994658816995503, "learning_rate": 2.7052203418826695e-08, "loss": 0.2618, "step": 40753 }, { "epoch": 1.9091207195390454, "grad_norm": 0.606255550527834, "learning_rate": 2.7024386413490312e-08, "loss": 0.2857, "step": 40754 }, { "epoch": 1.9091675645289736, "grad_norm": 0.5731058634115723, "learning_rate": 2.6996583639507244e-08, "loss": 0.2503, "step": 40755 }, { "epoch": 1.9092144095189019, "grad_norm": 0.5733386153095397, "learning_rate": 2.696879509703737e-08, "loss": 0.2503, "step": 40756 }, { "epoch": 1.9092612545088303, "grad_norm": 0.6584474462784765, "learning_rate": 2.694102078624028e-08, "loss": 0.2774, "step": 40757 }, { "epoch": 1.9093080994987586, "grad_norm": 0.6228579098711551, "learning_rate": 2.6913260707275847e-08, "loss": 0.2801, "step": 40758 }, { "epoch": 1.9093549444886868, "grad_norm": 0.580592500713264, "learning_rate": 2.6885514860304495e-08, "loss": 0.2664, "step": 40759 }, { "epoch": 1.9094017894786153, "grad_norm": 0.569934281891308, "learning_rate": 2.6857783245484993e-08, "loss": 0.2558, "step": 40760 }, { "epoch": 1.9094486344685437, "grad_norm": 0.5549817425535555, "learning_rate": 2.683006586297776e-08, "loss": 0.2482, "step": 40761 }, { "epoch": 1.9094954794584718, "grad_norm": 0.5485076416408728, "learning_rate": 2.6802362712941564e-08, "loss": 0.2503, "step": 40762 }, { "epoch": 1.9095423244484002, "grad_norm": 0.5830476006722238, "learning_rate": 2.6774673795536277e-08, "loss": 0.259, "step": 40763 }, { "epoch": 1.9095891694383287, "grad_norm": 0.6103162639928952, "learning_rate": 2.6746999110920934e-08, "loss": 0.2674, "step": 40764 }, { "epoch": 1.909636014428257, "grad_norm": 0.5849019591839945, "learning_rate": 2.6719338659255134e-08, "loss": 0.2704, "step": 40765 }, { "epoch": 1.9096828594181852, "grad_norm": 0.5969507880963315, "learning_rate": 2.6691692440697914e-08, "loss": 0.2797, "step": 40766 }, { "epoch": 1.9097297044081136, "grad_norm": 0.6551063637067668, "learning_rate": 2.666406045540859e-08, "loss": 0.2946, "step": 40767 }, { "epoch": 1.9097765493980419, "grad_norm": 0.6711721651142346, "learning_rate": 2.6636442703545652e-08, "loss": 0.2858, "step": 40768 }, { "epoch": 1.90982339438797, "grad_norm": 0.5983839190419138, "learning_rate": 2.660883918526813e-08, "loss": 0.2809, "step": 40769 }, { "epoch": 1.9098702393778986, "grad_norm": 0.5649853759473185, "learning_rate": 2.658124990073535e-08, "loss": 0.2669, "step": 40770 }, { "epoch": 1.9099170843678268, "grad_norm": 0.6732402461350419, "learning_rate": 2.6553674850105794e-08, "loss": 0.2777, "step": 40771 }, { "epoch": 1.909963929357755, "grad_norm": 0.5848393775917243, "learning_rate": 2.652611403353794e-08, "loss": 0.2691, "step": 40772 }, { "epoch": 1.9100107743476835, "grad_norm": 0.5993638484391304, "learning_rate": 2.6498567451191113e-08, "loss": 0.2576, "step": 40773 }, { "epoch": 1.910057619337612, "grad_norm": 0.5744457170348704, "learning_rate": 2.647103510322324e-08, "loss": 0.2621, "step": 40774 }, { "epoch": 1.9101044643275402, "grad_norm": 0.6024471024164276, "learning_rate": 2.644351698979253e-08, "loss": 0.2761, "step": 40775 }, { "epoch": 1.9101513093174685, "grad_norm": 0.6294098008295718, "learning_rate": 2.6416013111057736e-08, "loss": 0.2937, "step": 40776 }, { "epoch": 1.910198154307397, "grad_norm": 0.6143814272259998, "learning_rate": 2.6388523467177074e-08, "loss": 0.272, "step": 40777 }, { "epoch": 1.9102449992973252, "grad_norm": 0.6454038231749791, "learning_rate": 2.6361048058308748e-08, "loss": 0.2779, "step": 40778 }, { "epoch": 1.9102918442872534, "grad_norm": 0.6334268640306171, "learning_rate": 2.633358688461124e-08, "loss": 0.2916, "step": 40779 }, { "epoch": 1.9103386892771819, "grad_norm": 0.6374609231379029, "learning_rate": 2.630613994624165e-08, "loss": 0.2804, "step": 40780 }, { "epoch": 1.91038553426711, "grad_norm": 0.6590352950784666, "learning_rate": 2.6278707243359014e-08, "loss": 0.2828, "step": 40781 }, { "epoch": 1.9104323792570383, "grad_norm": 0.6107939495383049, "learning_rate": 2.6251288776120153e-08, "loss": 0.2869, "step": 40782 }, { "epoch": 1.9104792242469668, "grad_norm": 0.6459923584632811, "learning_rate": 2.6223884544683832e-08, "loss": 0.2727, "step": 40783 }, { "epoch": 1.9105260692368953, "grad_norm": 0.5966873267472671, "learning_rate": 2.6196494549206863e-08, "loss": 0.2805, "step": 40784 }, { "epoch": 1.9105729142268233, "grad_norm": 0.601473826663684, "learning_rate": 2.616911878984746e-08, "loss": 0.2605, "step": 40785 }, { "epoch": 1.9106197592167518, "grad_norm": 0.6007427092644357, "learning_rate": 2.6141757266763268e-08, "loss": 0.278, "step": 40786 }, { "epoch": 1.9106666042066802, "grad_norm": 0.6445395074294287, "learning_rate": 2.611440998011111e-08, "loss": 0.2747, "step": 40787 }, { "epoch": 1.9107134491966085, "grad_norm": 0.6029952226269726, "learning_rate": 2.6087076930049192e-08, "loss": 0.2735, "step": 40788 }, { "epoch": 1.9107602941865367, "grad_norm": 0.6125992224719055, "learning_rate": 2.6059758116734056e-08, "loss": 0.2635, "step": 40789 }, { "epoch": 1.9108071391764652, "grad_norm": 0.617135444427265, "learning_rate": 2.6032453540323078e-08, "loss": 0.2812, "step": 40790 }, { "epoch": 1.9108539841663934, "grad_norm": 0.5757423295396046, "learning_rate": 2.6005163200973627e-08, "loss": 0.2658, "step": 40791 }, { "epoch": 1.9109008291563216, "grad_norm": 0.6191720775180038, "learning_rate": 2.5977887098842802e-08, "loss": 0.2672, "step": 40792 }, { "epoch": 1.91094767414625, "grad_norm": 0.5756989054597796, "learning_rate": 2.5950625234087145e-08, "loss": 0.2575, "step": 40793 }, { "epoch": 1.9109945191361783, "grad_norm": 0.6090520414376234, "learning_rate": 2.5923377606864032e-08, "loss": 0.2915, "step": 40794 }, { "epoch": 1.9110413641261066, "grad_norm": 0.5753143738735156, "learning_rate": 2.589614421733e-08, "loss": 0.2669, "step": 40795 }, { "epoch": 1.911088209116035, "grad_norm": 0.6133113810009984, "learning_rate": 2.586892506564187e-08, "loss": 0.2687, "step": 40796 }, { "epoch": 1.9111350541059635, "grad_norm": 0.5840344812004744, "learning_rate": 2.5841720151955908e-08, "loss": 0.2587, "step": 40797 }, { "epoch": 1.9111818990958915, "grad_norm": 0.6435118075253334, "learning_rate": 2.5814529476429486e-08, "loss": 0.2721, "step": 40798 }, { "epoch": 1.91122874408582, "grad_norm": 0.6087740708637882, "learning_rate": 2.578735303921831e-08, "loss": 0.2652, "step": 40799 }, { "epoch": 1.9112755890757485, "grad_norm": 0.5441751413275626, "learning_rate": 2.5760190840478925e-08, "loss": 0.2464, "step": 40800 }, { "epoch": 1.9113224340656767, "grad_norm": 0.5930501786379321, "learning_rate": 2.5733042880367876e-08, "loss": 0.2784, "step": 40801 }, { "epoch": 1.911369279055605, "grad_norm": 0.5966411137573739, "learning_rate": 2.570590915904142e-08, "loss": 0.2799, "step": 40802 }, { "epoch": 1.9114161240455334, "grad_norm": 0.5859584730555071, "learning_rate": 2.5678789676655268e-08, "loss": 0.2603, "step": 40803 }, { "epoch": 1.9114629690354616, "grad_norm": 0.611655932307087, "learning_rate": 2.565168443336624e-08, "loss": 0.2954, "step": 40804 }, { "epoch": 1.9115098140253899, "grad_norm": 0.6405947507837432, "learning_rate": 2.562459342932949e-08, "loss": 0.269, "step": 40805 }, { "epoch": 1.9115566590153183, "grad_norm": 0.5983866219086865, "learning_rate": 2.5597516664701283e-08, "loss": 0.2731, "step": 40806 }, { "epoch": 1.9116035040052466, "grad_norm": 0.6123528068735113, "learning_rate": 2.5570454139637602e-08, "loss": 0.2787, "step": 40807 }, { "epoch": 1.9116503489951748, "grad_norm": 0.5995415997055565, "learning_rate": 2.5543405854294156e-08, "loss": 0.2582, "step": 40808 }, { "epoch": 1.9116971939851033, "grad_norm": 0.6149157550482932, "learning_rate": 2.55163718088261e-08, "loss": 0.2722, "step": 40809 }, { "epoch": 1.9117440389750318, "grad_norm": 0.5976944323587344, "learning_rate": 2.54893520033897e-08, "loss": 0.2573, "step": 40810 }, { "epoch": 1.91179088396496, "grad_norm": 0.5977223325233494, "learning_rate": 2.5462346438140105e-08, "loss": 0.263, "step": 40811 }, { "epoch": 1.9118377289548882, "grad_norm": 0.6358584822662108, "learning_rate": 2.543535511323275e-08, "loss": 0.2782, "step": 40812 }, { "epoch": 1.9118845739448167, "grad_norm": 0.6132680047537736, "learning_rate": 2.5408378028822787e-08, "loss": 0.2781, "step": 40813 }, { "epoch": 1.911931418934745, "grad_norm": 0.5654949140084226, "learning_rate": 2.538141518506565e-08, "loss": 0.2624, "step": 40814 }, { "epoch": 1.9119782639246732, "grad_norm": 0.6201111375293744, "learning_rate": 2.5354466582116765e-08, "loss": 0.2587, "step": 40815 }, { "epoch": 1.9120251089146016, "grad_norm": 0.576299597730008, "learning_rate": 2.5327532220130734e-08, "loss": 0.2586, "step": 40816 }, { "epoch": 1.9120719539045299, "grad_norm": 0.6138630188548562, "learning_rate": 2.530061209926299e-08, "loss": 0.2627, "step": 40817 }, { "epoch": 1.9121187988944581, "grad_norm": 0.5709392223771051, "learning_rate": 2.5273706219667848e-08, "loss": 0.253, "step": 40818 }, { "epoch": 1.9121656438843866, "grad_norm": 0.6278444585796772, "learning_rate": 2.5246814581500746e-08, "loss": 0.2683, "step": 40819 }, { "epoch": 1.912212488874315, "grad_norm": 0.575783912325006, "learning_rate": 2.5219937184916276e-08, "loss": 0.272, "step": 40820 }, { "epoch": 1.912259333864243, "grad_norm": 0.6127281621739419, "learning_rate": 2.5193074030068766e-08, "loss": 0.2751, "step": 40821 }, { "epoch": 1.9123061788541715, "grad_norm": 0.5758905318703004, "learning_rate": 2.516622511711364e-08, "loss": 0.2606, "step": 40822 }, { "epoch": 1.9123530238441, "grad_norm": 0.6265709528936352, "learning_rate": 2.513939044620467e-08, "loss": 0.2825, "step": 40823 }, { "epoch": 1.9123998688340282, "grad_norm": 0.5716664456774567, "learning_rate": 2.5112570017496173e-08, "loss": 0.2714, "step": 40824 }, { "epoch": 1.9124467138239565, "grad_norm": 0.5495441066024346, "learning_rate": 2.5085763831143027e-08, "loss": 0.246, "step": 40825 }, { "epoch": 1.912493558813885, "grad_norm": 0.5792432419159068, "learning_rate": 2.5058971887298998e-08, "loss": 0.2719, "step": 40826 }, { "epoch": 1.9125404038038132, "grad_norm": 0.5895731315708191, "learning_rate": 2.5032194186118686e-08, "loss": 0.2612, "step": 40827 }, { "epoch": 1.9125872487937414, "grad_norm": 0.6086191755547508, "learning_rate": 2.5005430727756407e-08, "loss": 0.2648, "step": 40828 }, { "epoch": 1.9126340937836699, "grad_norm": 0.569512914827929, "learning_rate": 2.4978681512365378e-08, "loss": 0.2573, "step": 40829 }, { "epoch": 1.9126809387735981, "grad_norm": 0.6203081302105421, "learning_rate": 2.4951946540100193e-08, "loss": 0.289, "step": 40830 }, { "epoch": 1.9127277837635264, "grad_norm": 0.5876599954224011, "learning_rate": 2.492522581111434e-08, "loss": 0.2672, "step": 40831 }, { "epoch": 1.9127746287534548, "grad_norm": 0.6218601612034678, "learning_rate": 2.489851932556159e-08, "loss": 0.2887, "step": 40832 }, { "epoch": 1.9128214737433833, "grad_norm": 0.6384553699399451, "learning_rate": 2.4871827083595978e-08, "loss": 0.2987, "step": 40833 }, { "epoch": 1.9128683187333113, "grad_norm": 0.6239842437301665, "learning_rate": 2.4845149085371e-08, "loss": 0.2772, "step": 40834 }, { "epoch": 1.9129151637232398, "grad_norm": 0.6065214170674341, "learning_rate": 2.4818485331039865e-08, "loss": 0.279, "step": 40835 }, { "epoch": 1.9129620087131682, "grad_norm": 0.6099454791296015, "learning_rate": 2.479183582075634e-08, "loss": 0.2676, "step": 40836 }, { "epoch": 1.9130088537030965, "grad_norm": 0.6194719248380529, "learning_rate": 2.476520055467363e-08, "loss": 0.2882, "step": 40837 }, { "epoch": 1.9130556986930247, "grad_norm": 0.6000665080346371, "learning_rate": 2.4738579532945228e-08, "loss": 0.2672, "step": 40838 }, { "epoch": 1.9131025436829532, "grad_norm": 0.6386954175875379, "learning_rate": 2.471197275572379e-08, "loss": 0.2746, "step": 40839 }, { "epoch": 1.9131493886728814, "grad_norm": 0.6251502112531506, "learning_rate": 2.468538022316308e-08, "loss": 0.2671, "step": 40840 }, { "epoch": 1.9131962336628097, "grad_norm": 0.6296143005480063, "learning_rate": 2.4658801935415755e-08, "loss": 0.2675, "step": 40841 }, { "epoch": 1.9132430786527381, "grad_norm": 0.5796068969107014, "learning_rate": 2.4632237892635025e-08, "loss": 0.2727, "step": 40842 }, { "epoch": 1.9132899236426664, "grad_norm": 0.6333734446888832, "learning_rate": 2.4605688094973545e-08, "loss": 0.2741, "step": 40843 }, { "epoch": 1.9133367686325946, "grad_norm": 0.6224481837313385, "learning_rate": 2.457915254258425e-08, "loss": 0.2725, "step": 40844 }, { "epoch": 1.913383613622523, "grad_norm": 0.5745024970836489, "learning_rate": 2.455263123561952e-08, "loss": 0.2594, "step": 40845 }, { "epoch": 1.9134304586124515, "grad_norm": 0.5766210736314075, "learning_rate": 2.4526124174232557e-08, "loss": 0.2624, "step": 40846 }, { "epoch": 1.9134773036023798, "grad_norm": 0.6175758570911548, "learning_rate": 2.4499631358575195e-08, "loss": 0.2669, "step": 40847 }, { "epoch": 1.913524148592308, "grad_norm": 0.5604222383982808, "learning_rate": 2.4473152788800358e-08, "loss": 0.2675, "step": 40848 }, { "epoch": 1.9135709935822365, "grad_norm": 0.6137524305927577, "learning_rate": 2.4446688465060432e-08, "loss": 0.2539, "step": 40849 }, { "epoch": 1.9136178385721647, "grad_norm": 0.609797284026779, "learning_rate": 2.442023838750751e-08, "loss": 0.2746, "step": 40850 }, { "epoch": 1.913664683562093, "grad_norm": 0.6139001170214572, "learning_rate": 2.4393802556293977e-08, "loss": 0.2965, "step": 40851 }, { "epoch": 1.9137115285520214, "grad_norm": 0.5858145192146899, "learning_rate": 2.4367380971571653e-08, "loss": 0.2767, "step": 40852 }, { "epoch": 1.9137583735419497, "grad_norm": 0.5860944944584093, "learning_rate": 2.4340973633493192e-08, "loss": 0.2477, "step": 40853 }, { "epoch": 1.913805218531878, "grad_norm": 0.6324195535240604, "learning_rate": 2.4314580542210143e-08, "loss": 0.2768, "step": 40854 }, { "epoch": 1.9138520635218064, "grad_norm": 0.5782772076620796, "learning_rate": 2.4288201697874326e-08, "loss": 0.2553, "step": 40855 }, { "epoch": 1.9138989085117348, "grad_norm": 0.5881063082948661, "learning_rate": 2.4261837100637563e-08, "loss": 0.2566, "step": 40856 }, { "epoch": 1.9139457535016628, "grad_norm": 0.6037633618313596, "learning_rate": 2.4235486750651683e-08, "loss": 0.2797, "step": 40857 }, { "epoch": 1.9139925984915913, "grad_norm": 0.5732620531406393, "learning_rate": 2.42091506480685e-08, "loss": 0.2547, "step": 40858 }, { "epoch": 1.9140394434815198, "grad_norm": 0.5781457087500173, "learning_rate": 2.4182828793039292e-08, "loss": 0.2578, "step": 40859 }, { "epoch": 1.914086288471448, "grad_norm": 0.5980233101186583, "learning_rate": 2.41565211857156e-08, "loss": 0.2629, "step": 40860 }, { "epoch": 1.9141331334613763, "grad_norm": 0.5340531064444631, "learning_rate": 2.4130227826248964e-08, "loss": 0.2335, "step": 40861 }, { "epoch": 1.9141799784513047, "grad_norm": 0.5910881000562718, "learning_rate": 2.410394871479038e-08, "loss": 0.2643, "step": 40862 }, { "epoch": 1.914226823441233, "grad_norm": 0.6322853751372811, "learning_rate": 2.4077683851491395e-08, "loss": 0.2823, "step": 40863 }, { "epoch": 1.9142736684311612, "grad_norm": 0.5702270325910809, "learning_rate": 2.4051433236503276e-08, "loss": 0.2553, "step": 40864 }, { "epoch": 1.9143205134210897, "grad_norm": 0.5917822565587687, "learning_rate": 2.402519686997673e-08, "loss": 0.261, "step": 40865 }, { "epoch": 1.914367358411018, "grad_norm": 0.6216313149107504, "learning_rate": 2.3998974752062753e-08, "loss": 0.2793, "step": 40866 }, { "epoch": 1.9144142034009461, "grad_norm": 0.5721827069184271, "learning_rate": 2.397276688291206e-08, "loss": 0.2622, "step": 40867 }, { "epoch": 1.9144610483908746, "grad_norm": 0.6086068769073915, "learning_rate": 2.394657326267619e-08, "loss": 0.271, "step": 40868 }, { "epoch": 1.914507893380803, "grad_norm": 0.5962668887880661, "learning_rate": 2.3920393891505302e-08, "loss": 0.2778, "step": 40869 }, { "epoch": 1.914554738370731, "grad_norm": 0.5877600470384993, "learning_rate": 2.389422876955011e-08, "loss": 0.2651, "step": 40870 }, { "epoch": 1.9146015833606596, "grad_norm": 0.5945753566133526, "learning_rate": 2.3868077896961606e-08, "loss": 0.2731, "step": 40871 }, { "epoch": 1.914648428350588, "grad_norm": 0.5767750946172887, "learning_rate": 2.3841941273889947e-08, "loss": 0.2526, "step": 40872 }, { "epoch": 1.9146952733405163, "grad_norm": 0.625649694268692, "learning_rate": 2.3815818900485287e-08, "loss": 0.2857, "step": 40873 }, { "epoch": 1.9147421183304445, "grad_norm": 0.6145135108233805, "learning_rate": 2.3789710776898345e-08, "loss": 0.2808, "step": 40874 }, { "epoch": 1.914788963320373, "grad_norm": 0.6386883241353704, "learning_rate": 2.3763616903278996e-08, "loss": 0.2974, "step": 40875 }, { "epoch": 1.9148358083103012, "grad_norm": 0.5507369274782214, "learning_rate": 2.3737537279777956e-08, "loss": 0.2419, "step": 40876 }, { "epoch": 1.9148826533002294, "grad_norm": 0.6220226092719059, "learning_rate": 2.3711471906545103e-08, "loss": 0.2717, "step": 40877 }, { "epoch": 1.914929498290158, "grad_norm": 0.6076574495541346, "learning_rate": 2.368542078373004e-08, "loss": 0.2751, "step": 40878 }, { "epoch": 1.9149763432800861, "grad_norm": 0.6338428979766615, "learning_rate": 2.36593839114832e-08, "loss": 0.2727, "step": 40879 }, { "epoch": 1.9150231882700144, "grad_norm": 0.5689695310134463, "learning_rate": 2.3633361289953916e-08, "loss": 0.2692, "step": 40880 }, { "epoch": 1.9150700332599428, "grad_norm": 0.585429682892876, "learning_rate": 2.3607352919292336e-08, "loss": 0.2688, "step": 40881 }, { "epoch": 1.9151168782498713, "grad_norm": 0.6153540954088818, "learning_rate": 2.3581358799648067e-08, "loss": 0.273, "step": 40882 }, { "epoch": 1.9151637232397996, "grad_norm": 0.5653133322328223, "learning_rate": 2.3555378931170993e-08, "loss": 0.2634, "step": 40883 }, { "epoch": 1.9152105682297278, "grad_norm": 0.573136910382805, "learning_rate": 2.3529413314009875e-08, "loss": 0.2541, "step": 40884 }, { "epoch": 1.9152574132196563, "grad_norm": 0.5905454203786864, "learning_rate": 2.35034619483146e-08, "loss": 0.2571, "step": 40885 }, { "epoch": 1.9153042582095845, "grad_norm": 0.5806835434129729, "learning_rate": 2.347752483423449e-08, "loss": 0.2622, "step": 40886 }, { "epoch": 1.9153511031995127, "grad_norm": 0.584925913970786, "learning_rate": 2.345160197191887e-08, "loss": 0.2692, "step": 40887 }, { "epoch": 1.9153979481894412, "grad_norm": 0.6058818876287295, "learning_rate": 2.3425693361516788e-08, "loss": 0.2643, "step": 40888 }, { "epoch": 1.9154447931793694, "grad_norm": 0.5983662172529338, "learning_rate": 2.3399799003177293e-08, "loss": 0.2653, "step": 40889 }, { "epoch": 1.9154916381692977, "grad_norm": 0.6206587571084187, "learning_rate": 2.337391889704943e-08, "loss": 0.2748, "step": 40890 }, { "epoch": 1.9155384831592261, "grad_norm": 0.5646745908917353, "learning_rate": 2.3348053043282247e-08, "loss": 0.265, "step": 40891 }, { "epoch": 1.9155853281491546, "grad_norm": 0.5860217001885025, "learning_rate": 2.3322201442024795e-08, "loss": 0.286, "step": 40892 }, { "epoch": 1.9156321731390826, "grad_norm": 0.5929347268404442, "learning_rate": 2.3296364093425285e-08, "loss": 0.2661, "step": 40893 }, { "epoch": 1.915679018129011, "grad_norm": 0.6268499678353572, "learning_rate": 2.327054099763276e-08, "loss": 0.2761, "step": 40894 }, { "epoch": 1.9157258631189396, "grad_norm": 0.6177125764915952, "learning_rate": 2.3244732154795722e-08, "loss": 0.2752, "step": 40895 }, { "epoch": 1.9157727081088678, "grad_norm": 0.6264591520042885, "learning_rate": 2.321893756506294e-08, "loss": 0.3005, "step": 40896 }, { "epoch": 1.915819553098796, "grad_norm": 0.5785872270935346, "learning_rate": 2.319315722858234e-08, "loss": 0.2707, "step": 40897 }, { "epoch": 1.9158663980887245, "grad_norm": 0.6426102632696632, "learning_rate": 2.3167391145502705e-08, "loss": 0.2628, "step": 40898 }, { "epoch": 1.9159132430786527, "grad_norm": 0.6013361939212688, "learning_rate": 2.314163931597252e-08, "loss": 0.281, "step": 40899 }, { "epoch": 1.915960088068581, "grad_norm": 0.6237247208406022, "learning_rate": 2.3115901740139446e-08, "loss": 0.2663, "step": 40900 }, { "epoch": 1.9160069330585094, "grad_norm": 0.6108625953755975, "learning_rate": 2.30901784181517e-08, "loss": 0.2799, "step": 40901 }, { "epoch": 1.9160537780484377, "grad_norm": 0.5893555356288593, "learning_rate": 2.3064469350157492e-08, "loss": 0.2592, "step": 40902 }, { "epoch": 1.916100623038366, "grad_norm": 0.644336825778218, "learning_rate": 2.3038774536304763e-08, "loss": 0.2707, "step": 40903 }, { "epoch": 1.9161474680282944, "grad_norm": 0.5741289026984081, "learning_rate": 2.3013093976741176e-08, "loss": 0.2628, "step": 40904 }, { "epoch": 1.9161943130182229, "grad_norm": 0.5950184550024439, "learning_rate": 2.2987427671614938e-08, "loss": 0.2664, "step": 40905 }, { "epoch": 1.9162411580081509, "grad_norm": 0.5659528365467789, "learning_rate": 2.2961775621073434e-08, "loss": 0.2546, "step": 40906 }, { "epoch": 1.9162880029980793, "grad_norm": 0.5592113975206677, "learning_rate": 2.2936137825264328e-08, "loss": 0.263, "step": 40907 }, { "epoch": 1.9163348479880078, "grad_norm": 0.5689863012342851, "learning_rate": 2.2910514284334994e-08, "loss": 0.2626, "step": 40908 }, { "epoch": 1.916381692977936, "grad_norm": 0.5818354320213286, "learning_rate": 2.28849049984331e-08, "loss": 0.2416, "step": 40909 }, { "epoch": 1.9164285379678643, "grad_norm": 0.6301003952267662, "learning_rate": 2.285930996770602e-08, "loss": 0.2835, "step": 40910 }, { "epoch": 1.9164753829577927, "grad_norm": 0.6093751037734492, "learning_rate": 2.283372919230087e-08, "loss": 0.2814, "step": 40911 }, { "epoch": 1.916522227947721, "grad_norm": 0.6021870254363768, "learning_rate": 2.280816267236502e-08, "loss": 0.2781, "step": 40912 }, { "epoch": 1.9165690729376492, "grad_norm": 0.6752440203402802, "learning_rate": 2.2782610408045858e-08, "loss": 0.2836, "step": 40913 }, { "epoch": 1.9166159179275777, "grad_norm": 0.5982172609555401, "learning_rate": 2.2757072399489934e-08, "loss": 0.2816, "step": 40914 }, { "epoch": 1.916662762917506, "grad_norm": 0.5787946785882687, "learning_rate": 2.2731548646844624e-08, "loss": 0.2801, "step": 40915 }, { "epoch": 1.9167096079074342, "grad_norm": 0.6116162483224009, "learning_rate": 2.270603915025621e-08, "loss": 0.2836, "step": 40916 }, { "epoch": 1.9167564528973626, "grad_norm": 0.5799214111416465, "learning_rate": 2.2680543909872065e-08, "loss": 0.2835, "step": 40917 }, { "epoch": 1.916803297887291, "grad_norm": 0.616985191450778, "learning_rate": 2.2655062925838745e-08, "loss": 0.2712, "step": 40918 }, { "epoch": 1.9168501428772193, "grad_norm": 0.5703487572275977, "learning_rate": 2.262959619830307e-08, "loss": 0.2583, "step": 40919 }, { "epoch": 1.9168969878671476, "grad_norm": 0.573167770991848, "learning_rate": 2.260414372741132e-08, "loss": 0.2541, "step": 40920 }, { "epoch": 1.916943832857076, "grad_norm": 0.5826930834632794, "learning_rate": 2.257870551331004e-08, "loss": 0.2679, "step": 40921 }, { "epoch": 1.9169906778470043, "grad_norm": 0.6339912234958388, "learning_rate": 2.255328155614578e-08, "loss": 0.2804, "step": 40922 }, { "epoch": 1.9170375228369325, "grad_norm": 0.6180577352110386, "learning_rate": 2.2527871856064254e-08, "loss": 0.2963, "step": 40923 }, { "epoch": 1.917084367826861, "grad_norm": 0.6444359398636558, "learning_rate": 2.250247641321257e-08, "loss": 0.2763, "step": 40924 }, { "epoch": 1.9171312128167892, "grad_norm": 0.5615720516633202, "learning_rate": 2.2477095227736167e-08, "loss": 0.2437, "step": 40925 }, { "epoch": 1.9171780578067175, "grad_norm": 0.6141183932327057, "learning_rate": 2.2451728299781594e-08, "loss": 0.2676, "step": 40926 }, { "epoch": 1.917224902796646, "grad_norm": 0.5673911466897507, "learning_rate": 2.2426375629494566e-08, "loss": 0.2655, "step": 40927 }, { "epoch": 1.9172717477865744, "grad_norm": 0.5932745393605128, "learning_rate": 2.2401037217021083e-08, "loss": 0.272, "step": 40928 }, { "epoch": 1.9173185927765024, "grad_norm": 0.6139458860115984, "learning_rate": 2.2375713062506577e-08, "loss": 0.2818, "step": 40929 }, { "epoch": 1.9173654377664309, "grad_norm": 0.6045724092627003, "learning_rate": 2.2350403166097322e-08, "loss": 0.2576, "step": 40930 }, { "epoch": 1.9174122827563593, "grad_norm": 0.5824538957483987, "learning_rate": 2.2325107527938762e-08, "loss": 0.2705, "step": 40931 }, { "epoch": 1.9174591277462876, "grad_norm": 0.5885074057147921, "learning_rate": 2.2299826148176607e-08, "loss": 0.2639, "step": 40932 }, { "epoch": 1.9175059727362158, "grad_norm": 0.5438024133270435, "learning_rate": 2.227455902695602e-08, "loss": 0.2501, "step": 40933 }, { "epoch": 1.9175528177261443, "grad_norm": 0.5844717779985277, "learning_rate": 2.2249306164422725e-08, "loss": 0.2818, "step": 40934 }, { "epoch": 1.9175996627160725, "grad_norm": 0.6172394204838194, "learning_rate": 2.2224067560721874e-08, "loss": 0.2789, "step": 40935 }, { "epoch": 1.9176465077060008, "grad_norm": 0.5892849094506695, "learning_rate": 2.2198843215998635e-08, "loss": 0.2595, "step": 40936 }, { "epoch": 1.9176933526959292, "grad_norm": 0.5809682865884758, "learning_rate": 2.2173633130398442e-08, "loss": 0.2689, "step": 40937 }, { "epoch": 1.9177401976858575, "grad_norm": 0.6069935620172406, "learning_rate": 2.2148437304066185e-08, "loss": 0.2817, "step": 40938 }, { "epoch": 1.9177870426757857, "grad_norm": 0.6020721778594458, "learning_rate": 2.2123255737146742e-08, "loss": 0.2813, "step": 40939 }, { "epoch": 1.9178338876657142, "grad_norm": 0.5935892874508228, "learning_rate": 2.209808842978528e-08, "loss": 0.2731, "step": 40940 }, { "epoch": 1.9178807326556426, "grad_norm": 0.599223209663805, "learning_rate": 2.2072935382126683e-08, "loss": 0.2642, "step": 40941 }, { "epoch": 1.9179275776455706, "grad_norm": 0.58480017890016, "learning_rate": 2.2047796594315275e-08, "loss": 0.2531, "step": 40942 }, { "epoch": 1.917974422635499, "grad_norm": 0.6176552535711793, "learning_rate": 2.2022672066495944e-08, "loss": 0.2721, "step": 40943 }, { "epoch": 1.9180212676254276, "grad_norm": 0.5783249460499339, "learning_rate": 2.199756179881357e-08, "loss": 0.2563, "step": 40944 }, { "epoch": 1.9180681126153558, "grad_norm": 0.6345686067582207, "learning_rate": 2.197246579141221e-08, "loss": 0.2882, "step": 40945 }, { "epoch": 1.918114957605284, "grad_norm": 0.6322632877194302, "learning_rate": 2.194738404443675e-08, "loss": 0.2593, "step": 40946 }, { "epoch": 1.9181618025952125, "grad_norm": 0.5874958432481399, "learning_rate": 2.1922316558030954e-08, "loss": 0.2587, "step": 40947 }, { "epoch": 1.9182086475851408, "grad_norm": 0.5681669726804844, "learning_rate": 2.1897263332339437e-08, "loss": 0.2655, "step": 40948 }, { "epoch": 1.918255492575069, "grad_norm": 0.5862002695179226, "learning_rate": 2.1872224367506523e-08, "loss": 0.2792, "step": 40949 }, { "epoch": 1.9183023375649975, "grad_norm": 0.5957688882676556, "learning_rate": 2.1847199663675987e-08, "loss": 0.2615, "step": 40950 }, { "epoch": 1.9183491825549257, "grad_norm": 0.6283453741408959, "learning_rate": 2.1822189220991884e-08, "loss": 0.2862, "step": 40951 }, { "epoch": 1.918396027544854, "grad_norm": 0.5981744877646125, "learning_rate": 2.1797193039598263e-08, "loss": 0.2705, "step": 40952 }, { "epoch": 1.9184428725347824, "grad_norm": 0.6440825526294092, "learning_rate": 2.1772211119638896e-08, "loss": 0.2901, "step": 40953 }, { "epoch": 1.9184897175247109, "grad_norm": 0.5993424945205612, "learning_rate": 2.1747243461257563e-08, "loss": 0.27, "step": 40954 }, { "epoch": 1.918536562514639, "grad_norm": 0.5799121490196016, "learning_rate": 2.172229006459803e-08, "loss": 0.2435, "step": 40955 }, { "epoch": 1.9185834075045674, "grad_norm": 0.6245137950626996, "learning_rate": 2.1697350929804072e-08, "loss": 0.2694, "step": 40956 }, { "epoch": 1.9186302524944958, "grad_norm": 0.5701036439243355, "learning_rate": 2.1672426057018636e-08, "loss": 0.2594, "step": 40957 }, { "epoch": 1.918677097484424, "grad_norm": 0.6204260159914141, "learning_rate": 2.1647515446385494e-08, "loss": 0.2644, "step": 40958 }, { "epoch": 1.9187239424743523, "grad_norm": 0.6025978560973343, "learning_rate": 2.1622619098047858e-08, "loss": 0.2821, "step": 40959 }, { "epoch": 1.9187707874642808, "grad_norm": 0.6311290487342648, "learning_rate": 2.1597737012149233e-08, "loss": 0.2887, "step": 40960 }, { "epoch": 1.918817632454209, "grad_norm": 0.5931629170870182, "learning_rate": 2.1572869188832835e-08, "loss": 0.2648, "step": 40961 }, { "epoch": 1.9188644774441372, "grad_norm": 0.5979582514123751, "learning_rate": 2.1548015628241604e-08, "loss": 0.2899, "step": 40962 }, { "epoch": 1.9189113224340657, "grad_norm": 0.5684216366561097, "learning_rate": 2.1523176330518758e-08, "loss": 0.2613, "step": 40963 }, { "epoch": 1.9189581674239942, "grad_norm": 0.5993828807989798, "learning_rate": 2.149835129580696e-08, "loss": 0.2724, "step": 40964 }, { "epoch": 1.9190050124139222, "grad_norm": 0.6115783422265666, "learning_rate": 2.147354052424916e-08, "loss": 0.2611, "step": 40965 }, { "epoch": 1.9190518574038506, "grad_norm": 0.5979238204342598, "learning_rate": 2.144874401598801e-08, "loss": 0.2635, "step": 40966 }, { "epoch": 1.919098702393779, "grad_norm": 0.5562140981830223, "learning_rate": 2.1423961771166457e-08, "loss": 0.2452, "step": 40967 }, { "epoch": 1.9191455473837074, "grad_norm": 0.5802644148589889, "learning_rate": 2.1399193789927442e-08, "loss": 0.2585, "step": 40968 }, { "epoch": 1.9191923923736356, "grad_norm": 0.6050621560929728, "learning_rate": 2.1374440072412795e-08, "loss": 0.2802, "step": 40969 }, { "epoch": 1.919239237363564, "grad_norm": 0.5912933599827617, "learning_rate": 2.1349700618765178e-08, "loss": 0.2686, "step": 40970 }, { "epoch": 1.9192860823534923, "grad_norm": 0.6164631383107901, "learning_rate": 2.132497542912726e-08, "loss": 0.27, "step": 40971 }, { "epoch": 1.9193329273434205, "grad_norm": 0.6265644749460612, "learning_rate": 2.1300264503640867e-08, "loss": 0.2747, "step": 40972 }, { "epoch": 1.919379772333349, "grad_norm": 0.6124717418813177, "learning_rate": 2.1275567842448665e-08, "loss": 0.2705, "step": 40973 }, { "epoch": 1.9194266173232772, "grad_norm": 0.5745149322679571, "learning_rate": 2.125088544569248e-08, "loss": 0.2598, "step": 40974 }, { "epoch": 1.9194734623132055, "grad_norm": 0.5615584868014458, "learning_rate": 2.122621731351443e-08, "loss": 0.258, "step": 40975 }, { "epoch": 1.919520307303134, "grad_norm": 0.5815896741596533, "learning_rate": 2.1201563446056616e-08, "loss": 0.254, "step": 40976 }, { "epoch": 1.9195671522930624, "grad_norm": 0.640721455610965, "learning_rate": 2.1176923843460596e-08, "loss": 0.2852, "step": 40977 }, { "epoch": 1.9196139972829904, "grad_norm": 0.6071006932611999, "learning_rate": 2.1152298505868195e-08, "loss": 0.261, "step": 40978 }, { "epoch": 1.9196608422729189, "grad_norm": 0.5842062302063855, "learning_rate": 2.112768743342153e-08, "loss": 0.2606, "step": 40979 }, { "epoch": 1.9197076872628474, "grad_norm": 0.6080804994326944, "learning_rate": 2.1103090626262147e-08, "loss": 0.2674, "step": 40980 }, { "epoch": 1.9197545322527756, "grad_norm": 0.5558917066215284, "learning_rate": 2.1078508084531048e-08, "loss": 0.2501, "step": 40981 }, { "epoch": 1.9198013772427038, "grad_norm": 0.5609211513738067, "learning_rate": 2.105393980837034e-08, "loss": 0.2591, "step": 40982 }, { "epoch": 1.9198482222326323, "grad_norm": 0.592299858371521, "learning_rate": 2.102938579792102e-08, "loss": 0.2565, "step": 40983 }, { "epoch": 1.9198950672225605, "grad_norm": 0.5973990316955997, "learning_rate": 2.1004846053324647e-08, "loss": 0.2645, "step": 40984 }, { "epoch": 1.9199419122124888, "grad_norm": 0.6034403977716818, "learning_rate": 2.0980320574722214e-08, "loss": 0.2852, "step": 40985 }, { "epoch": 1.9199887572024172, "grad_norm": 0.6428829270509188, "learning_rate": 2.0955809362255e-08, "loss": 0.287, "step": 40986 }, { "epoch": 1.9200356021923455, "grad_norm": 0.6279098514875756, "learning_rate": 2.0931312416063997e-08, "loss": 0.2745, "step": 40987 }, { "epoch": 1.9200824471822737, "grad_norm": 0.5893155626239456, "learning_rate": 2.0906829736290214e-08, "loss": 0.2699, "step": 40988 }, { "epoch": 1.9201292921722022, "grad_norm": 0.6007167184592043, "learning_rate": 2.0882361323074364e-08, "loss": 0.2777, "step": 40989 }, { "epoch": 1.9201761371621306, "grad_norm": 0.602125745121152, "learning_rate": 2.0857907176557722e-08, "loss": 0.2602, "step": 40990 }, { "epoch": 1.9202229821520589, "grad_norm": 0.5808293019028278, "learning_rate": 2.083346729688074e-08, "loss": 0.2731, "step": 40991 }, { "epoch": 1.9202698271419871, "grad_norm": 0.5795730671028811, "learning_rate": 2.0809041684183572e-08, "loss": 0.2545, "step": 40992 }, { "epoch": 1.9203166721319156, "grad_norm": 0.6123273073072028, "learning_rate": 2.0784630338607782e-08, "loss": 0.2712, "step": 40993 }, { "epoch": 1.9203635171218438, "grad_norm": 0.6080509271966964, "learning_rate": 2.0760233260292973e-08, "loss": 0.2782, "step": 40994 }, { "epoch": 1.920410362111772, "grad_norm": 0.6151061368049713, "learning_rate": 2.0735850449380145e-08, "loss": 0.288, "step": 40995 }, { "epoch": 1.9204572071017005, "grad_norm": 0.5883383754127356, "learning_rate": 2.0711481906009188e-08, "loss": 0.2552, "step": 40996 }, { "epoch": 1.9205040520916288, "grad_norm": 0.6002413684852445, "learning_rate": 2.0687127630320546e-08, "loss": 0.2695, "step": 40997 }, { "epoch": 1.920550897081557, "grad_norm": 0.6301465213040814, "learning_rate": 2.0662787622454105e-08, "loss": 0.2824, "step": 40998 }, { "epoch": 1.9205977420714855, "grad_norm": 0.6160614546905535, "learning_rate": 2.0638461882550588e-08, "loss": 0.2632, "step": 40999 }, { "epoch": 1.920644587061414, "grad_norm": 0.6390753510733046, "learning_rate": 2.061415041074932e-08, "loss": 0.275, "step": 41000 }, { "epoch": 1.920691432051342, "grad_norm": 0.5550946443288934, "learning_rate": 2.058985320719048e-08, "loss": 0.2662, "step": 41001 }, { "epoch": 1.9207382770412704, "grad_norm": 0.5884525601772088, "learning_rate": 2.0565570272013668e-08, "loss": 0.2654, "step": 41002 }, { "epoch": 1.9207851220311989, "grad_norm": 0.6094527133129488, "learning_rate": 2.0541301605359054e-08, "loss": 0.2577, "step": 41003 }, { "epoch": 1.9208319670211271, "grad_norm": 0.5643496377336938, "learning_rate": 2.051704720736597e-08, "loss": 0.2578, "step": 41004 }, { "epoch": 1.9208788120110554, "grad_norm": 0.5635860833396735, "learning_rate": 2.0492807078174304e-08, "loss": 0.256, "step": 41005 }, { "epoch": 1.9209256570009838, "grad_norm": 0.5820729122851848, "learning_rate": 2.0468581217923113e-08, "loss": 0.2598, "step": 41006 }, { "epoch": 1.920972501990912, "grad_norm": 0.5441892356124235, "learning_rate": 2.0444369626752003e-08, "loss": 0.2554, "step": 41007 }, { "epoch": 1.9210193469808403, "grad_norm": 0.6082193392769242, "learning_rate": 2.042017230480031e-08, "loss": 0.2726, "step": 41008 }, { "epoch": 1.9210661919707688, "grad_norm": 0.6151022684448636, "learning_rate": 2.039598925220737e-08, "loss": 0.2658, "step": 41009 }, { "epoch": 1.921113036960697, "grad_norm": 0.5748975000745341, "learning_rate": 2.0371820469112235e-08, "loss": 0.2613, "step": 41010 }, { "epoch": 1.9211598819506253, "grad_norm": 0.6683075959420434, "learning_rate": 2.034766595565424e-08, "loss": 0.2812, "step": 41011 }, { "epoch": 1.9212067269405537, "grad_norm": 0.6403899575110242, "learning_rate": 2.032352571197216e-08, "loss": 0.2775, "step": 41012 }, { "epoch": 1.9212535719304822, "grad_norm": 0.6435291921896796, "learning_rate": 2.029939973820505e-08, "loss": 0.2883, "step": 41013 }, { "epoch": 1.9213004169204102, "grad_norm": 0.5442052993199664, "learning_rate": 2.0275288034491414e-08, "loss": 0.2469, "step": 41014 }, { "epoch": 1.9213472619103387, "grad_norm": 0.5671907217581036, "learning_rate": 2.0251190600970026e-08, "loss": 0.241, "step": 41015 }, { "epoch": 1.9213941069002671, "grad_norm": 0.5514536167484335, "learning_rate": 2.0227107437779947e-08, "loss": 0.249, "step": 41016 }, { "epoch": 1.9214409518901954, "grad_norm": 0.5803857574663019, "learning_rate": 2.020303854505995e-08, "loss": 0.2689, "step": 41017 }, { "epoch": 1.9214877968801236, "grad_norm": 0.6021489499964391, "learning_rate": 2.0178983922947702e-08, "loss": 0.2652, "step": 41018 }, { "epoch": 1.921534641870052, "grad_norm": 0.6074072987184717, "learning_rate": 2.015494357158254e-08, "loss": 0.2659, "step": 41019 }, { "epoch": 1.9215814868599803, "grad_norm": 0.6038551852498979, "learning_rate": 2.0130917491102133e-08, "loss": 0.2718, "step": 41020 }, { "epoch": 1.9216283318499086, "grad_norm": 0.6212788361700992, "learning_rate": 2.0106905681644972e-08, "loss": 0.2744, "step": 41021 }, { "epoch": 1.921675176839837, "grad_norm": 0.6275829145591083, "learning_rate": 2.0082908143349287e-08, "loss": 0.2785, "step": 41022 }, { "epoch": 1.9217220218297653, "grad_norm": 0.6477219017255025, "learning_rate": 2.0058924876353304e-08, "loss": 0.3009, "step": 41023 }, { "epoch": 1.9217688668196935, "grad_norm": 0.6234971189515923, "learning_rate": 2.003495588079496e-08, "loss": 0.2886, "step": 41024 }, { "epoch": 1.921815711809622, "grad_norm": 0.6126969959182379, "learning_rate": 2.0011001156811927e-08, "loss": 0.2639, "step": 41025 }, { "epoch": 1.9218625567995504, "grad_norm": 0.5942992505644977, "learning_rate": 1.9987060704542426e-08, "loss": 0.2738, "step": 41026 }, { "epoch": 1.9219094017894787, "grad_norm": 0.5380599143858774, "learning_rate": 1.9963134524123852e-08, "loss": 0.2492, "step": 41027 }, { "epoch": 1.921956246779407, "grad_norm": 0.5927169373111736, "learning_rate": 1.9939222615694143e-08, "loss": 0.2738, "step": 41028 }, { "epoch": 1.9220030917693354, "grad_norm": 0.5935790050156166, "learning_rate": 1.991532497939125e-08, "loss": 0.2754, "step": 41029 }, { "epoch": 1.9220499367592636, "grad_norm": 0.5869168627474326, "learning_rate": 1.9891441615351725e-08, "loss": 0.2672, "step": 41030 }, { "epoch": 1.9220967817491919, "grad_norm": 0.5947144098897786, "learning_rate": 1.9867572523714074e-08, "loss": 0.2718, "step": 41031 }, { "epoch": 1.9221436267391203, "grad_norm": 0.6161501097902969, "learning_rate": 1.984371770461513e-08, "loss": 0.2521, "step": 41032 }, { "epoch": 1.9221904717290486, "grad_norm": 0.6399841869451083, "learning_rate": 1.9819877158192e-08, "loss": 0.2857, "step": 41033 }, { "epoch": 1.9222373167189768, "grad_norm": 0.6327014343458638, "learning_rate": 1.9796050884582087e-08, "loss": 0.2919, "step": 41034 }, { "epoch": 1.9222841617089053, "grad_norm": 0.6376000808618971, "learning_rate": 1.9772238883922767e-08, "loss": 0.2676, "step": 41035 }, { "epoch": 1.9223310066988337, "grad_norm": 0.5716064909252292, "learning_rate": 1.9748441156350606e-08, "loss": 0.2629, "step": 41036 }, { "epoch": 1.9223778516887617, "grad_norm": 0.6408826780460407, "learning_rate": 1.9724657702002993e-08, "loss": 0.2755, "step": 41037 }, { "epoch": 1.9224246966786902, "grad_norm": 0.6365002040645351, "learning_rate": 1.9700888521016202e-08, "loss": 0.2673, "step": 41038 }, { "epoch": 1.9224715416686187, "grad_norm": 0.6300040733928788, "learning_rate": 1.967713361352791e-08, "loss": 0.2764, "step": 41039 }, { "epoch": 1.922518386658547, "grad_norm": 0.6150254102621414, "learning_rate": 1.9653392979674115e-08, "loss": 0.2676, "step": 41040 }, { "epoch": 1.9225652316484751, "grad_norm": 0.6243803940435064, "learning_rate": 1.9629666619591648e-08, "loss": 0.2691, "step": 41041 }, { "epoch": 1.9226120766384036, "grad_norm": 0.6212798064393378, "learning_rate": 1.9605954533416794e-08, "loss": 0.2775, "step": 41042 }, { "epoch": 1.9226589216283319, "grad_norm": 0.6051259837593339, "learning_rate": 1.9582256721286385e-08, "loss": 0.2666, "step": 41043 }, { "epoch": 1.92270576661826, "grad_norm": 0.5855865478895063, "learning_rate": 1.9558573183336704e-08, "loss": 0.2783, "step": 41044 }, { "epoch": 1.9227526116081886, "grad_norm": 0.5473540792207621, "learning_rate": 1.953490391970375e-08, "loss": 0.2664, "step": 41045 }, { "epoch": 1.9227994565981168, "grad_norm": 0.6133144203409914, "learning_rate": 1.951124893052436e-08, "loss": 0.2711, "step": 41046 }, { "epoch": 1.922846301588045, "grad_norm": 0.6373448477563816, "learning_rate": 1.948760821593426e-08, "loss": 0.2925, "step": 41047 }, { "epoch": 1.9228931465779735, "grad_norm": 0.5925592613424099, "learning_rate": 1.9463981776069452e-08, "loss": 0.2712, "step": 41048 }, { "epoch": 1.922939991567902, "grad_norm": 0.6040877249983267, "learning_rate": 1.9440369611065936e-08, "loss": 0.2741, "step": 41049 }, { "epoch": 1.92298683655783, "grad_norm": 0.6157550746554936, "learning_rate": 1.9416771721059436e-08, "loss": 0.2747, "step": 41050 }, { "epoch": 1.9230336815477584, "grad_norm": 0.5357898807170222, "learning_rate": 1.9393188106186235e-08, "loss": 0.25, "step": 41051 }, { "epoch": 1.923080526537687, "grad_norm": 0.5934496312438485, "learning_rate": 1.9369618766581498e-08, "loss": 0.2705, "step": 41052 }, { "epoch": 1.9231273715276151, "grad_norm": 0.5996478626760278, "learning_rate": 1.9346063702381233e-08, "loss": 0.2688, "step": 41053 }, { "epoch": 1.9231742165175434, "grad_norm": 0.6084248595876398, "learning_rate": 1.9322522913721165e-08, "loss": 0.2674, "step": 41054 }, { "epoch": 1.9232210615074719, "grad_norm": 0.5873059345575284, "learning_rate": 1.929899640073618e-08, "loss": 0.2736, "step": 41055 }, { "epoch": 1.9232679064974, "grad_norm": 0.5921089093888972, "learning_rate": 1.9275484163562007e-08, "loss": 0.2658, "step": 41056 }, { "epoch": 1.9233147514873283, "grad_norm": 0.6042117018598997, "learning_rate": 1.9251986202333815e-08, "loss": 0.2627, "step": 41057 }, { "epoch": 1.9233615964772568, "grad_norm": 0.6500474410334491, "learning_rate": 1.9228502517187054e-08, "loss": 0.2791, "step": 41058 }, { "epoch": 1.923408441467185, "grad_norm": 0.5895243642759675, "learning_rate": 1.9205033108256888e-08, "loss": 0.2491, "step": 41059 }, { "epoch": 1.9234552864571133, "grad_norm": 0.5871730215720229, "learning_rate": 1.9181577975677936e-08, "loss": 0.265, "step": 41060 }, { "epoch": 1.9235021314470417, "grad_norm": 0.5566221259810931, "learning_rate": 1.915813711958564e-08, "loss": 0.2571, "step": 41061 }, { "epoch": 1.9235489764369702, "grad_norm": 0.5872273811275844, "learning_rate": 1.91347105401149e-08, "loss": 0.2472, "step": 41062 }, { "epoch": 1.9235958214268984, "grad_norm": 0.5673077173053706, "learning_rate": 1.9111298237400046e-08, "loss": 0.2636, "step": 41063 }, { "epoch": 1.9236426664168267, "grad_norm": 0.5693614696899165, "learning_rate": 1.908790021157625e-08, "loss": 0.26, "step": 41064 }, { "epoch": 1.9236895114067551, "grad_norm": 0.5850515509273165, "learning_rate": 1.906451646277785e-08, "loss": 0.2673, "step": 41065 }, { "epoch": 1.9237363563966834, "grad_norm": 0.6026410955266599, "learning_rate": 1.9041146991140014e-08, "loss": 0.294, "step": 41066 }, { "epoch": 1.9237832013866116, "grad_norm": 0.5625630777928468, "learning_rate": 1.9017791796796526e-08, "loss": 0.2633, "step": 41067 }, { "epoch": 1.92383004637654, "grad_norm": 0.5628315628381418, "learning_rate": 1.8994450879882277e-08, "loss": 0.2585, "step": 41068 }, { "epoch": 1.9238768913664683, "grad_norm": 0.6493771297072712, "learning_rate": 1.8971124240531047e-08, "loss": 0.2685, "step": 41069 }, { "epoch": 1.9239237363563966, "grad_norm": 0.5911396073201802, "learning_rate": 1.8947811878877453e-08, "loss": 0.2752, "step": 41070 }, { "epoch": 1.923970581346325, "grad_norm": 0.6039414493288692, "learning_rate": 1.8924513795055832e-08, "loss": 0.2651, "step": 41071 }, { "epoch": 1.9240174263362535, "grad_norm": 0.6188020984711409, "learning_rate": 1.8901229989199965e-08, "loss": 0.2833, "step": 41072 }, { "epoch": 1.9240642713261815, "grad_norm": 0.5997510239035931, "learning_rate": 1.887796046144391e-08, "loss": 0.2739, "step": 41073 }, { "epoch": 1.92411111631611, "grad_norm": 0.5626588509089606, "learning_rate": 1.8854705211921732e-08, "loss": 0.2568, "step": 41074 }, { "epoch": 1.9241579613060384, "grad_norm": 0.5778966365895953, "learning_rate": 1.883146424076693e-08, "loss": 0.2571, "step": 41075 }, { "epoch": 1.9242048062959667, "grad_norm": 0.6105087274648013, "learning_rate": 1.8808237548113563e-08, "loss": 0.2693, "step": 41076 }, { "epoch": 1.924251651285895, "grad_norm": 0.6209667383882685, "learning_rate": 1.878502513409486e-08, "loss": 0.27, "step": 41077 }, { "epoch": 1.9242984962758234, "grad_norm": 0.5975749582029163, "learning_rate": 1.8761826998845157e-08, "loss": 0.271, "step": 41078 }, { "epoch": 1.9243453412657516, "grad_norm": 0.6144430894382419, "learning_rate": 1.8738643142497126e-08, "loss": 0.2629, "step": 41079 }, { "epoch": 1.9243921862556799, "grad_norm": 0.6030231128500012, "learning_rate": 1.8715473565184826e-08, "loss": 0.2629, "step": 41080 }, { "epoch": 1.9244390312456083, "grad_norm": 0.5669864593797356, "learning_rate": 1.869231826704121e-08, "loss": 0.2612, "step": 41081 }, { "epoch": 1.9244858762355366, "grad_norm": 0.6507197149683118, "learning_rate": 1.86691772481995e-08, "loss": 0.2821, "step": 41082 }, { "epoch": 1.9245327212254648, "grad_norm": 0.5747707258888521, "learning_rate": 1.86460505087932e-08, "loss": 0.2628, "step": 41083 }, { "epoch": 1.9245795662153933, "grad_norm": 0.6371704014684646, "learning_rate": 1.8622938048955263e-08, "loss": 0.2869, "step": 41084 }, { "epoch": 1.9246264112053217, "grad_norm": 0.617088093985166, "learning_rate": 1.8599839868818637e-08, "loss": 0.2709, "step": 41085 }, { "epoch": 1.9246732561952498, "grad_norm": 0.5969236351511831, "learning_rate": 1.857675596851599e-08, "loss": 0.2841, "step": 41086 }, { "epoch": 1.9247201011851782, "grad_norm": 0.5537044990452905, "learning_rate": 1.8553686348180554e-08, "loss": 0.2511, "step": 41087 }, { "epoch": 1.9247669461751067, "grad_norm": 0.5769384893799308, "learning_rate": 1.8530631007945275e-08, "loss": 0.2702, "step": 41088 }, { "epoch": 1.924813791165035, "grad_norm": 0.5906550880160077, "learning_rate": 1.8507589947942273e-08, "loss": 0.2727, "step": 41089 }, { "epoch": 1.9248606361549632, "grad_norm": 0.6011454804730835, "learning_rate": 1.8484563168304493e-08, "loss": 0.2696, "step": 41090 }, { "epoch": 1.9249074811448916, "grad_norm": 0.6097226591492106, "learning_rate": 1.846155066916405e-08, "loss": 0.2654, "step": 41091 }, { "epoch": 1.9249543261348199, "grad_norm": 0.6587713110257502, "learning_rate": 1.84385524506539e-08, "loss": 0.2945, "step": 41092 }, { "epoch": 1.9250011711247481, "grad_norm": 0.6422794026730891, "learning_rate": 1.8415568512906156e-08, "loss": 0.2741, "step": 41093 }, { "epoch": 1.9250480161146766, "grad_norm": 0.5854199645375328, "learning_rate": 1.8392598856052934e-08, "loss": 0.2533, "step": 41094 }, { "epoch": 1.9250948611046048, "grad_norm": 0.5845562193981516, "learning_rate": 1.8369643480226905e-08, "loss": 0.2709, "step": 41095 }, { "epoch": 1.925141706094533, "grad_norm": 0.6422104856748384, "learning_rate": 1.8346702385559633e-08, "loss": 0.2862, "step": 41096 }, { "epoch": 1.9251885510844615, "grad_norm": 0.5752956113639621, "learning_rate": 1.832377557218351e-08, "loss": 0.2599, "step": 41097 }, { "epoch": 1.92523539607439, "grad_norm": 0.5856824945620409, "learning_rate": 1.83008630402301e-08, "loss": 0.2841, "step": 41098 }, { "epoch": 1.9252822410643182, "grad_norm": 0.5648227710168467, "learning_rate": 1.8277964789831514e-08, "loss": 0.258, "step": 41099 }, { "epoch": 1.9253290860542465, "grad_norm": 0.62901904846166, "learning_rate": 1.825508082111932e-08, "loss": 0.2829, "step": 41100 }, { "epoch": 1.925375931044175, "grad_norm": 0.5540363822510953, "learning_rate": 1.823221113422563e-08, "loss": 0.26, "step": 41101 }, { "epoch": 1.9254227760341032, "grad_norm": 0.5797294969003174, "learning_rate": 1.820935572928173e-08, "loss": 0.2607, "step": 41102 }, { "epoch": 1.9254696210240314, "grad_norm": 0.5737480751048908, "learning_rate": 1.8186514606419458e-08, "loss": 0.2675, "step": 41103 }, { "epoch": 1.9255164660139599, "grad_norm": 0.5727557870263645, "learning_rate": 1.8163687765769545e-08, "loss": 0.2433, "step": 41104 }, { "epoch": 1.9255633110038881, "grad_norm": 0.6188003520079102, "learning_rate": 1.8140875207464104e-08, "loss": 0.2894, "step": 41105 }, { "epoch": 1.9256101559938164, "grad_norm": 0.5920799892777254, "learning_rate": 1.8118076931634143e-08, "loss": 0.2802, "step": 41106 }, { "epoch": 1.9256570009837448, "grad_norm": 0.5838735345192874, "learning_rate": 1.8095292938410668e-08, "loss": 0.2569, "step": 41107 }, { "epoch": 1.9257038459736733, "grad_norm": 0.5886995812869597, "learning_rate": 1.8072523227925243e-08, "loss": 0.2747, "step": 41108 }, { "epoch": 1.9257506909636013, "grad_norm": 0.6083061648887399, "learning_rate": 1.8049767800308315e-08, "loss": 0.268, "step": 41109 }, { "epoch": 1.9257975359535298, "grad_norm": 0.5936340290086367, "learning_rate": 1.802702665569145e-08, "loss": 0.275, "step": 41110 }, { "epoch": 1.9258443809434582, "grad_norm": 0.6073240687032642, "learning_rate": 1.8004299794205093e-08, "loss": 0.273, "step": 41111 }, { "epoch": 1.9258912259333865, "grad_norm": 0.6150153136011081, "learning_rate": 1.7981587215980255e-08, "loss": 0.277, "step": 41112 }, { "epoch": 1.9259380709233147, "grad_norm": 0.5701948797511294, "learning_rate": 1.7958888921147387e-08, "loss": 0.2561, "step": 41113 }, { "epoch": 1.9259849159132432, "grad_norm": 0.6074049993451084, "learning_rate": 1.7936204909837497e-08, "loss": 0.2692, "step": 41114 }, { "epoch": 1.9260317609031714, "grad_norm": 0.6179963707116852, "learning_rate": 1.791353518218075e-08, "loss": 0.2876, "step": 41115 }, { "epoch": 1.9260786058930996, "grad_norm": 0.614791165154929, "learning_rate": 1.7890879738307886e-08, "loss": 0.2741, "step": 41116 }, { "epoch": 1.9261254508830281, "grad_norm": 0.5979428407178479, "learning_rate": 1.7868238578349072e-08, "loss": 0.2811, "step": 41117 }, { "epoch": 1.9261722958729564, "grad_norm": 0.6109364183002263, "learning_rate": 1.7845611702434485e-08, "loss": 0.2592, "step": 41118 }, { "epoch": 1.9262191408628846, "grad_norm": 0.6120564353232323, "learning_rate": 1.782299911069485e-08, "loss": 0.2767, "step": 41119 }, { "epoch": 1.926265985852813, "grad_norm": 0.5896617959508288, "learning_rate": 1.780040080325979e-08, "loss": 0.2583, "step": 41120 }, { "epoch": 1.9263128308427415, "grad_norm": 0.6203406072284576, "learning_rate": 1.7777816780259748e-08, "loss": 0.2636, "step": 41121 }, { "epoch": 1.9263596758326695, "grad_norm": 0.5999607488636448, "learning_rate": 1.7755247041824077e-08, "loss": 0.2715, "step": 41122 }, { "epoch": 1.926406520822598, "grad_norm": 0.6023716323137905, "learning_rate": 1.7732691588083495e-08, "loss": 0.2725, "step": 41123 }, { "epoch": 1.9264533658125265, "grad_norm": 0.5829726790209739, "learning_rate": 1.771015041916735e-08, "loss": 0.2571, "step": 41124 }, { "epoch": 1.9265002108024547, "grad_norm": 0.6072069313292419, "learning_rate": 1.768762353520498e-08, "loss": 0.2689, "step": 41125 }, { "epoch": 1.926547055792383, "grad_norm": 0.6744040646092797, "learning_rate": 1.7665110936326835e-08, "loss": 0.2823, "step": 41126 }, { "epoch": 1.9265939007823114, "grad_norm": 0.6086333683043854, "learning_rate": 1.764261262266198e-08, "loss": 0.2621, "step": 41127 }, { "epoch": 1.9266407457722396, "grad_norm": 0.5809409348197985, "learning_rate": 1.7620128594340036e-08, "loss": 0.2636, "step": 41128 }, { "epoch": 1.926687590762168, "grad_norm": 0.5774761889985198, "learning_rate": 1.759765885149006e-08, "loss": 0.2772, "step": 41129 }, { "epoch": 1.9267344357520964, "grad_norm": 0.6065622470516892, "learning_rate": 1.7575203394241958e-08, "loss": 0.2812, "step": 41130 }, { "epoch": 1.9267812807420246, "grad_norm": 0.5878165867269087, "learning_rate": 1.7552762222724506e-08, "loss": 0.2645, "step": 41131 }, { "epoch": 1.9268281257319528, "grad_norm": 0.5843487023738079, "learning_rate": 1.7530335337066772e-08, "loss": 0.2661, "step": 41132 }, { "epoch": 1.9268749707218813, "grad_norm": 0.5586439786486755, "learning_rate": 1.7507922737398375e-08, "loss": 0.2631, "step": 41133 }, { "epoch": 1.9269218157118098, "grad_norm": 0.6197870823347282, "learning_rate": 1.7485524423847543e-08, "loss": 0.2701, "step": 41134 }, { "epoch": 1.926968660701738, "grad_norm": 0.604588109055076, "learning_rate": 1.746314039654362e-08, "loss": 0.2808, "step": 41135 }, { "epoch": 1.9270155056916662, "grad_norm": 0.5623778134238897, "learning_rate": 1.7440770655615667e-08, "loss": 0.2588, "step": 41136 }, { "epoch": 1.9270623506815947, "grad_norm": 0.6049827548557424, "learning_rate": 1.741841520119192e-08, "loss": 0.276, "step": 41137 }, { "epoch": 1.927109195671523, "grad_norm": 0.6028535679632259, "learning_rate": 1.7396074033401156e-08, "loss": 0.2658, "step": 41138 }, { "epoch": 1.9271560406614512, "grad_norm": 0.6522915844244996, "learning_rate": 1.7373747152372167e-08, "loss": 0.2745, "step": 41139 }, { "epoch": 1.9272028856513796, "grad_norm": 0.650070959726814, "learning_rate": 1.7351434558233183e-08, "loss": 0.2836, "step": 41140 }, { "epoch": 1.927249730641308, "grad_norm": 0.6586873484134862, "learning_rate": 1.732913625111271e-08, "loss": 0.2825, "step": 41141 }, { "epoch": 1.9272965756312361, "grad_norm": 0.6270491120646535, "learning_rate": 1.7306852231138982e-08, "loss": 0.28, "step": 41142 }, { "epoch": 1.9273434206211646, "grad_norm": 0.6369276328827888, "learning_rate": 1.7284582498440506e-08, "loss": 0.2696, "step": 41143 }, { "epoch": 1.927390265611093, "grad_norm": 0.6594567453918053, "learning_rate": 1.7262327053145234e-08, "loss": 0.283, "step": 41144 }, { "epoch": 1.927437110601021, "grad_norm": 0.6324848546535308, "learning_rate": 1.724008589538112e-08, "loss": 0.2717, "step": 41145 }, { "epoch": 1.9274839555909495, "grad_norm": 0.6048752351517543, "learning_rate": 1.7217859025276397e-08, "loss": 0.2774, "step": 41146 }, { "epoch": 1.927530800580878, "grad_norm": 0.5692269517083869, "learning_rate": 1.7195646442958735e-08, "loss": 0.2593, "step": 41147 }, { "epoch": 1.9275776455708062, "grad_norm": 0.5474661436758018, "learning_rate": 1.7173448148556094e-08, "loss": 0.2608, "step": 41148 }, { "epoch": 1.9276244905607345, "grad_norm": 0.5823912951798675, "learning_rate": 1.715126414219642e-08, "loss": 0.2509, "step": 41149 }, { "epoch": 1.927671335550663, "grad_norm": 0.6000861084788749, "learning_rate": 1.7129094424006844e-08, "loss": 0.2879, "step": 41150 }, { "epoch": 1.9277181805405912, "grad_norm": 0.6216159946320532, "learning_rate": 1.7106938994115585e-08, "loss": 0.2722, "step": 41151 }, { "epoch": 1.9277650255305194, "grad_norm": 0.6374392747132381, "learning_rate": 1.7084797852650048e-08, "loss": 0.2681, "step": 41152 }, { "epoch": 1.927811870520448, "grad_norm": 0.6479787153436982, "learning_rate": 1.7062670999737075e-08, "loss": 0.2785, "step": 41153 }, { "epoch": 1.9278587155103761, "grad_norm": 0.6095997307741474, "learning_rate": 1.704055843550434e-08, "loss": 0.2727, "step": 41154 }, { "epoch": 1.9279055605003044, "grad_norm": 0.5962890422630455, "learning_rate": 1.7018460160079243e-08, "loss": 0.2617, "step": 41155 }, { "epoch": 1.9279524054902328, "grad_norm": 0.5752210497737343, "learning_rate": 1.6996376173588904e-08, "loss": 0.2619, "step": 41156 }, { "epoch": 1.9279992504801613, "grad_norm": 0.5977106052250682, "learning_rate": 1.6974306476160162e-08, "loss": 0.2677, "step": 41157 }, { "epoch": 1.9280460954700893, "grad_norm": 0.5811680606384756, "learning_rate": 1.695225106792042e-08, "loss": 0.2545, "step": 41158 }, { "epoch": 1.9280929404600178, "grad_norm": 0.547782434782335, "learning_rate": 1.6930209948996246e-08, "loss": 0.2428, "step": 41159 }, { "epoch": 1.9281397854499462, "grad_norm": 0.5907808172739444, "learning_rate": 1.6908183119514754e-08, "loss": 0.2631, "step": 41160 }, { "epoch": 1.9281866304398745, "grad_norm": 0.5974885902049274, "learning_rate": 1.6886170579602234e-08, "loss": 0.2709, "step": 41161 }, { "epoch": 1.9282334754298027, "grad_norm": 0.5590732607230486, "learning_rate": 1.6864172329385808e-08, "loss": 0.2489, "step": 41162 }, { "epoch": 1.9282803204197312, "grad_norm": 0.5595868323059154, "learning_rate": 1.6842188368992318e-08, "loss": 0.2609, "step": 41163 }, { "epoch": 1.9283271654096594, "grad_norm": 0.574513631666438, "learning_rate": 1.6820218698547497e-08, "loss": 0.2701, "step": 41164 }, { "epoch": 1.9283740103995877, "grad_norm": 0.5863585237959621, "learning_rate": 1.6798263318178464e-08, "loss": 0.2736, "step": 41165 }, { "epoch": 1.9284208553895161, "grad_norm": 0.589598410468906, "learning_rate": 1.677632222801123e-08, "loss": 0.2563, "step": 41166 }, { "epoch": 1.9284677003794444, "grad_norm": 0.5712703517352605, "learning_rate": 1.6754395428172087e-08, "loss": 0.2539, "step": 41167 }, { "epoch": 1.9285145453693726, "grad_norm": 0.6626597351974592, "learning_rate": 1.673248291878704e-08, "loss": 0.2802, "step": 41168 }, { "epoch": 1.928561390359301, "grad_norm": 0.5836364454402725, "learning_rate": 1.6710584699982935e-08, "loss": 0.2623, "step": 41169 }, { "epoch": 1.9286082353492295, "grad_norm": 0.6231369361813186, "learning_rate": 1.668870077188467e-08, "loss": 0.2713, "step": 41170 }, { "epoch": 1.9286550803391578, "grad_norm": 0.5955984182735452, "learning_rate": 1.6666831134619087e-08, "loss": 0.2791, "step": 41171 }, { "epoch": 1.928701925329086, "grad_norm": 0.6175041632924791, "learning_rate": 1.6644975788311925e-08, "loss": 0.2727, "step": 41172 }, { "epoch": 1.9287487703190145, "grad_norm": 0.5782029033972412, "learning_rate": 1.6623134733088354e-08, "loss": 0.2641, "step": 41173 }, { "epoch": 1.9287956153089427, "grad_norm": 0.6025929201483576, "learning_rate": 1.6601307969074665e-08, "loss": 0.2693, "step": 41174 }, { "epoch": 1.928842460298871, "grad_norm": 0.6303679740713353, "learning_rate": 1.6579495496396315e-08, "loss": 0.2839, "step": 41175 }, { "epoch": 1.9288893052887994, "grad_norm": 0.636038455106684, "learning_rate": 1.6557697315178757e-08, "loss": 0.2843, "step": 41176 }, { "epoch": 1.9289361502787277, "grad_norm": 0.6159633068979354, "learning_rate": 1.6535913425547446e-08, "loss": 0.2738, "step": 41177 }, { "epoch": 1.928982995268656, "grad_norm": 0.6067554432456068, "learning_rate": 1.651414382762756e-08, "loss": 0.2766, "step": 41178 }, { "epoch": 1.9290298402585844, "grad_norm": 0.6407711094303495, "learning_rate": 1.6492388521544832e-08, "loss": 0.287, "step": 41179 }, { "epoch": 1.9290766852485128, "grad_norm": 0.57454189017261, "learning_rate": 1.6470647507424165e-08, "loss": 0.2716, "step": 41180 }, { "epoch": 1.9291235302384409, "grad_norm": 0.5785074020812455, "learning_rate": 1.644892078539073e-08, "loss": 0.2607, "step": 41181 }, { "epoch": 1.9291703752283693, "grad_norm": 0.6114805488734794, "learning_rate": 1.6427208355569435e-08, "loss": 0.2689, "step": 41182 }, { "epoch": 1.9292172202182978, "grad_norm": 0.6025391209017004, "learning_rate": 1.640551021808545e-08, "loss": 0.2747, "step": 41183 }, { "epoch": 1.929264065208226, "grad_norm": 0.5689965797475479, "learning_rate": 1.6383826373063407e-08, "loss": 0.2516, "step": 41184 }, { "epoch": 1.9293109101981543, "grad_norm": 0.6153000495168548, "learning_rate": 1.6362156820628195e-08, "loss": 0.2729, "step": 41185 }, { "epoch": 1.9293577551880827, "grad_norm": 0.570655064689598, "learning_rate": 1.6340501560904722e-08, "loss": 0.2616, "step": 41186 }, { "epoch": 1.929404600178011, "grad_norm": 0.5678432508285091, "learning_rate": 1.6318860594017328e-08, "loss": 0.2644, "step": 41187 }, { "epoch": 1.9294514451679392, "grad_norm": 0.5475923987361849, "learning_rate": 1.629723392009036e-08, "loss": 0.2478, "step": 41188 }, { "epoch": 1.9294982901578677, "grad_norm": 0.5869327325535171, "learning_rate": 1.6275621539249e-08, "loss": 0.2604, "step": 41189 }, { "epoch": 1.929545135147796, "grad_norm": 0.561479477138429, "learning_rate": 1.625402345161675e-08, "loss": 0.2657, "step": 41190 }, { "epoch": 1.9295919801377241, "grad_norm": 0.6788561428555787, "learning_rate": 1.623243965731852e-08, "loss": 0.2722, "step": 41191 }, { "epoch": 1.9296388251276526, "grad_norm": 0.5950810111269159, "learning_rate": 1.6210870156478375e-08, "loss": 0.263, "step": 41192 }, { "epoch": 1.929685670117581, "grad_norm": 0.6138800230412852, "learning_rate": 1.618931494922038e-08, "loss": 0.2703, "step": 41193 }, { "epoch": 1.929732515107509, "grad_norm": 0.5842199751421726, "learning_rate": 1.6167774035668882e-08, "loss": 0.2654, "step": 41194 }, { "epoch": 1.9297793600974376, "grad_norm": 0.629503891624515, "learning_rate": 1.6146247415947113e-08, "loss": 0.2918, "step": 41195 }, { "epoch": 1.929826205087366, "grad_norm": 0.6087600978199104, "learning_rate": 1.612473509017942e-08, "loss": 0.2697, "step": 41196 }, { "epoch": 1.9298730500772943, "grad_norm": 0.6032940932589449, "learning_rate": 1.6103237058489873e-08, "loss": 0.2591, "step": 41197 }, { "epoch": 1.9299198950672225, "grad_norm": 0.6108362708048469, "learning_rate": 1.6081753321001703e-08, "loss": 0.2535, "step": 41198 }, { "epoch": 1.929966740057151, "grad_norm": 0.5758223140706787, "learning_rate": 1.60602838778387e-08, "loss": 0.264, "step": 41199 }, { "epoch": 1.9300135850470792, "grad_norm": 0.5966012502923838, "learning_rate": 1.6038828729124654e-08, "loss": 0.2666, "step": 41200 }, { "epoch": 1.9300604300370074, "grad_norm": 0.6026977372705636, "learning_rate": 1.6017387874982528e-08, "loss": 0.2548, "step": 41201 }, { "epoch": 1.930107275026936, "grad_norm": 0.5974980867742695, "learning_rate": 1.599596131553638e-08, "loss": 0.2647, "step": 41202 }, { "epoch": 1.9301541200168641, "grad_norm": 0.6126807153858692, "learning_rate": 1.5974549050908895e-08, "loss": 0.2813, "step": 41203 }, { "epoch": 1.9302009650067924, "grad_norm": 0.5858794007988908, "learning_rate": 1.5953151081223305e-08, "loss": 0.2549, "step": 41204 }, { "epoch": 1.9302478099967209, "grad_norm": 0.5658030689084109, "learning_rate": 1.5931767406603405e-08, "loss": 0.2637, "step": 41205 }, { "epoch": 1.9302946549866493, "grad_norm": 0.6348763576134868, "learning_rate": 1.5910398027171592e-08, "loss": 0.2858, "step": 41206 }, { "epoch": 1.9303414999765776, "grad_norm": 0.5683771558159049, "learning_rate": 1.588904294305138e-08, "loss": 0.2601, "step": 41207 }, { "epoch": 1.9303883449665058, "grad_norm": 0.5899654866625245, "learning_rate": 1.586770215436517e-08, "loss": 0.261, "step": 41208 }, { "epoch": 1.9304351899564343, "grad_norm": 0.5669187067922428, "learning_rate": 1.5846375661235925e-08, "loss": 0.268, "step": 41209 }, { "epoch": 1.9304820349463625, "grad_norm": 0.5823641548253157, "learning_rate": 1.5825063463786317e-08, "loss": 0.2663, "step": 41210 }, { "epoch": 1.9305288799362907, "grad_norm": 0.617803130091256, "learning_rate": 1.5803765562139307e-08, "loss": 0.288, "step": 41211 }, { "epoch": 1.9305757249262192, "grad_norm": 0.573880680740867, "learning_rate": 1.5782481956417296e-08, "loss": 0.2611, "step": 41212 }, { "epoch": 1.9306225699161474, "grad_norm": 0.6061056775624122, "learning_rate": 1.5761212646742686e-08, "loss": 0.2627, "step": 41213 }, { "epoch": 1.9306694149060757, "grad_norm": 0.5499472424426487, "learning_rate": 1.5739957633237878e-08, "loss": 0.2493, "step": 41214 }, { "epoch": 1.9307162598960041, "grad_norm": 0.5863829724922386, "learning_rate": 1.5718716916025e-08, "loss": 0.2564, "step": 41215 }, { "epoch": 1.9307631048859326, "grad_norm": 0.5648938477873362, "learning_rate": 1.5697490495226732e-08, "loss": 0.2501, "step": 41216 }, { "epoch": 1.9308099498758606, "grad_norm": 0.5945025782361261, "learning_rate": 1.5676278370964915e-08, "loss": 0.2709, "step": 41217 }, { "epoch": 1.930856794865789, "grad_norm": 0.5926985179687323, "learning_rate": 1.565508054336168e-08, "loss": 0.2529, "step": 41218 }, { "epoch": 1.9309036398557176, "grad_norm": 0.5779613644039676, "learning_rate": 1.5633897012539145e-08, "loss": 0.2689, "step": 41219 }, { "epoch": 1.9309504848456458, "grad_norm": 0.5790609232181422, "learning_rate": 1.5612727778619163e-08, "loss": 0.2693, "step": 41220 }, { "epoch": 1.930997329835574, "grad_norm": 0.6063048399278519, "learning_rate": 1.5591572841723302e-08, "loss": 0.2764, "step": 41221 }, { "epoch": 1.9310441748255025, "grad_norm": 0.6413571452814164, "learning_rate": 1.5570432201973685e-08, "loss": 0.2766, "step": 41222 }, { "epoch": 1.9310910198154307, "grad_norm": 0.5677642746018804, "learning_rate": 1.5549305859491605e-08, "loss": 0.2466, "step": 41223 }, { "epoch": 1.931137864805359, "grad_norm": 0.5967046620038992, "learning_rate": 1.5528193814399184e-08, "loss": 0.2695, "step": 41224 }, { "epoch": 1.9311847097952874, "grad_norm": 0.5929352832247119, "learning_rate": 1.5507096066817164e-08, "loss": 0.2754, "step": 41225 }, { "epoch": 1.9312315547852157, "grad_norm": 0.5843063120938866, "learning_rate": 1.5486012616867385e-08, "loss": 0.2731, "step": 41226 }, { "epoch": 1.931278399775144, "grad_norm": 0.6652952563544451, "learning_rate": 1.5464943464671424e-08, "loss": 0.2839, "step": 41227 }, { "epoch": 1.9313252447650724, "grad_norm": 0.6076282182791501, "learning_rate": 1.544388861035001e-08, "loss": 0.2682, "step": 41228 }, { "epoch": 1.9313720897550009, "grad_norm": 0.6764509579257216, "learning_rate": 1.542284805402472e-08, "loss": 0.2928, "step": 41229 }, { "epoch": 1.9314189347449289, "grad_norm": 0.5594985759148218, "learning_rate": 1.5401821795816287e-08, "loss": 0.2599, "step": 41230 }, { "epoch": 1.9314657797348573, "grad_norm": 0.6145902925109208, "learning_rate": 1.5380809835846e-08, "loss": 0.2626, "step": 41231 }, { "epoch": 1.9315126247247858, "grad_norm": 0.6221226143071734, "learning_rate": 1.53598121742346e-08, "loss": 0.2701, "step": 41232 }, { "epoch": 1.931559469714714, "grad_norm": 0.5817490281707222, "learning_rate": 1.53388288111031e-08, "loss": 0.2711, "step": 41233 }, { "epoch": 1.9316063147046423, "grad_norm": 0.5661606401053425, "learning_rate": 1.531785974657196e-08, "loss": 0.2577, "step": 41234 }, { "epoch": 1.9316531596945707, "grad_norm": 0.5957810230137338, "learning_rate": 1.5296904980762195e-08, "loss": 0.2873, "step": 41235 }, { "epoch": 1.931700004684499, "grad_norm": 0.5958198349676398, "learning_rate": 1.527596451379426e-08, "loss": 0.2607, "step": 41236 }, { "epoch": 1.9317468496744272, "grad_norm": 0.6118643681156564, "learning_rate": 1.525503834578862e-08, "loss": 0.2812, "step": 41237 }, { "epoch": 1.9317936946643557, "grad_norm": 0.601043023361597, "learning_rate": 1.523412647686545e-08, "loss": 0.2845, "step": 41238 }, { "epoch": 1.931840539654284, "grad_norm": 0.578369141566432, "learning_rate": 1.5213228907145494e-08, "loss": 0.2677, "step": 41239 }, { "epoch": 1.9318873846442122, "grad_norm": 0.5993120085839669, "learning_rate": 1.5192345636748927e-08, "loss": 0.2762, "step": 41240 }, { "epoch": 1.9319342296341406, "grad_norm": 0.6250524821484791, "learning_rate": 1.5171476665795938e-08, "loss": 0.2732, "step": 41241 }, { "epoch": 1.931981074624069, "grad_norm": 0.571365164862776, "learning_rate": 1.51506219944067e-08, "loss": 0.2744, "step": 41242 }, { "epoch": 1.9320279196139973, "grad_norm": 0.6595553537136182, "learning_rate": 1.5129781622700844e-08, "loss": 0.2916, "step": 41243 }, { "epoch": 1.9320747646039256, "grad_norm": 0.5690961949157879, "learning_rate": 1.5108955550798833e-08, "loss": 0.2663, "step": 41244 }, { "epoch": 1.932121609593854, "grad_norm": 0.6113164563705422, "learning_rate": 1.508814377881973e-08, "loss": 0.2804, "step": 41245 }, { "epoch": 1.9321684545837823, "grad_norm": 0.6337021637281199, "learning_rate": 1.5067346306884278e-08, "loss": 0.2798, "step": 41246 }, { "epoch": 1.9322152995737105, "grad_norm": 0.6187355737344158, "learning_rate": 1.504656313511127e-08, "loss": 0.2818, "step": 41247 }, { "epoch": 1.932262144563639, "grad_norm": 0.6042237879658341, "learning_rate": 1.5025794263621163e-08, "loss": 0.2663, "step": 41248 }, { "epoch": 1.9323089895535672, "grad_norm": 0.618885114739099, "learning_rate": 1.500503969253275e-08, "loss": 0.2675, "step": 41249 }, { "epoch": 1.9323558345434955, "grad_norm": 0.6027079788170097, "learning_rate": 1.4984299421965943e-08, "loss": 0.2704, "step": 41250 }, { "epoch": 1.932402679533424, "grad_norm": 0.6029637315076951, "learning_rate": 1.4963573452039802e-08, "loss": 0.269, "step": 41251 }, { "epoch": 1.9324495245233524, "grad_norm": 0.6323704141984107, "learning_rate": 1.4942861782873407e-08, "loss": 0.2508, "step": 41252 }, { "epoch": 1.9324963695132804, "grad_norm": 0.5939590471987491, "learning_rate": 1.492216441458666e-08, "loss": 0.2691, "step": 41253 }, { "epoch": 1.9325432145032089, "grad_norm": 0.557858897924052, "learning_rate": 1.4901481347298073e-08, "loss": 0.263, "step": 41254 }, { "epoch": 1.9325900594931373, "grad_norm": 0.589185421501096, "learning_rate": 1.4880812581127002e-08, "loss": 0.2676, "step": 41255 }, { "epoch": 1.9326369044830656, "grad_norm": 0.5958134266871847, "learning_rate": 1.4860158116192235e-08, "loss": 0.2813, "step": 41256 }, { "epoch": 1.9326837494729938, "grad_norm": 0.6419750048473497, "learning_rate": 1.4839517952612569e-08, "loss": 0.284, "step": 41257 }, { "epoch": 1.9327305944629223, "grad_norm": 0.562356395592547, "learning_rate": 1.4818892090506798e-08, "loss": 0.2589, "step": 41258 }, { "epoch": 1.9327774394528505, "grad_norm": 0.6345530657023947, "learning_rate": 1.4798280529993714e-08, "loss": 0.2678, "step": 41259 }, { "epoch": 1.9328242844427788, "grad_norm": 0.6161661162772512, "learning_rate": 1.4777683271191834e-08, "loss": 0.289, "step": 41260 }, { "epoch": 1.9328711294327072, "grad_norm": 0.5571018470893161, "learning_rate": 1.4757100314219953e-08, "loss": 0.2531, "step": 41261 }, { "epoch": 1.9329179744226355, "grad_norm": 0.579966616125706, "learning_rate": 1.473653165919603e-08, "loss": 0.2605, "step": 41262 }, { "epoch": 1.9329648194125637, "grad_norm": 0.5654070087349817, "learning_rate": 1.471597730623886e-08, "loss": 0.2532, "step": 41263 }, { "epoch": 1.9330116644024922, "grad_norm": 0.624877787745103, "learning_rate": 1.4695437255466683e-08, "loss": 0.2738, "step": 41264 }, { "epoch": 1.9330585093924206, "grad_norm": 0.5859285172569729, "learning_rate": 1.4674911506997457e-08, "loss": 0.2626, "step": 41265 }, { "epoch": 1.9331053543823487, "grad_norm": 0.5819120237141749, "learning_rate": 1.4654400060949426e-08, "loss": 0.2552, "step": 41266 }, { "epoch": 1.9331521993722771, "grad_norm": 0.5929886341421481, "learning_rate": 1.4633902917440823e-08, "loss": 0.2601, "step": 41267 }, { "epoch": 1.9331990443622056, "grad_norm": 0.5840108303886431, "learning_rate": 1.4613420076589336e-08, "loss": 0.266, "step": 41268 }, { "epoch": 1.9332458893521338, "grad_norm": 0.5930981474388652, "learning_rate": 1.4592951538512922e-08, "loss": 0.2701, "step": 41269 }, { "epoch": 1.933292734342062, "grad_norm": 0.5834889349814536, "learning_rate": 1.4572497303329547e-08, "loss": 0.2847, "step": 41270 }, { "epoch": 1.9333395793319905, "grad_norm": 0.6094974247341077, "learning_rate": 1.4552057371156614e-08, "loss": 0.2814, "step": 41271 }, { "epoch": 1.9333864243219188, "grad_norm": 0.5998917307464505, "learning_rate": 1.4531631742111807e-08, "loss": 0.2695, "step": 41272 }, { "epoch": 1.933433269311847, "grad_norm": 0.6170378719157993, "learning_rate": 1.4511220416313088e-08, "loss": 0.2629, "step": 41273 }, { "epoch": 1.9334801143017755, "grad_norm": 0.6288309414428954, "learning_rate": 1.4490823393877307e-08, "loss": 0.2562, "step": 41274 }, { "epoch": 1.9335269592917037, "grad_norm": 0.599588504783558, "learning_rate": 1.4470440674922148e-08, "loss": 0.2642, "step": 41275 }, { "epoch": 1.933573804281632, "grad_norm": 0.639546091827621, "learning_rate": 1.445007225956474e-08, "loss": 0.2724, "step": 41276 }, { "epoch": 1.9336206492715604, "grad_norm": 0.6118356142909832, "learning_rate": 1.4429718147922767e-08, "loss": 0.267, "step": 41277 }, { "epoch": 1.9336674942614889, "grad_norm": 0.5842329785560096, "learning_rate": 1.4409378340113078e-08, "loss": 0.2571, "step": 41278 }, { "epoch": 1.9337143392514171, "grad_norm": 0.6134937162873177, "learning_rate": 1.4389052836252526e-08, "loss": 0.2808, "step": 41279 }, { "epoch": 1.9337611842413454, "grad_norm": 0.5699597300251195, "learning_rate": 1.4368741636458238e-08, "loss": 0.2639, "step": 41280 }, { "epoch": 1.9338080292312738, "grad_norm": 0.6118465233559531, "learning_rate": 1.4348444740847067e-08, "loss": 0.2816, "step": 41281 }, { "epoch": 1.933854874221202, "grad_norm": 0.5902682360822127, "learning_rate": 1.4328162149535863e-08, "loss": 0.2774, "step": 41282 }, { "epoch": 1.9339017192111303, "grad_norm": 0.5967315295213735, "learning_rate": 1.4307893862641197e-08, "loss": 0.2588, "step": 41283 }, { "epoch": 1.9339485642010588, "grad_norm": 0.5780772568989351, "learning_rate": 1.4287639880280202e-08, "loss": 0.267, "step": 41284 }, { "epoch": 1.933995409190987, "grad_norm": 0.5777016813899538, "learning_rate": 1.4267400202568894e-08, "loss": 0.269, "step": 41285 }, { "epoch": 1.9340422541809152, "grad_norm": 0.648628577797196, "learning_rate": 1.4247174829623844e-08, "loss": 0.2786, "step": 41286 }, { "epoch": 1.9340890991708437, "grad_norm": 0.5965159901346441, "learning_rate": 1.4226963761561629e-08, "loss": 0.2657, "step": 41287 }, { "epoch": 1.9341359441607722, "grad_norm": 0.5977526472510416, "learning_rate": 1.4206766998498544e-08, "loss": 0.2751, "step": 41288 }, { "epoch": 1.9341827891507002, "grad_norm": 0.6014803025973564, "learning_rate": 1.4186584540550607e-08, "loss": 0.2544, "step": 41289 }, { "epoch": 1.9342296341406287, "grad_norm": 0.6492525797071905, "learning_rate": 1.4166416387834115e-08, "loss": 0.2834, "step": 41290 }, { "epoch": 1.9342764791305571, "grad_norm": 0.5622829878163498, "learning_rate": 1.4146262540465084e-08, "loss": 0.2689, "step": 41291 }, { "epoch": 1.9343233241204854, "grad_norm": 0.599694917440185, "learning_rate": 1.412612299855981e-08, "loss": 0.2605, "step": 41292 }, { "epoch": 1.9343701691104136, "grad_norm": 0.6027724506980763, "learning_rate": 1.4105997762233481e-08, "loss": 0.2656, "step": 41293 }, { "epoch": 1.934417014100342, "grad_norm": 0.5957152724433652, "learning_rate": 1.408588683160239e-08, "loss": 0.2844, "step": 41294 }, { "epoch": 1.9344638590902703, "grad_norm": 0.6047085068762023, "learning_rate": 1.4065790206782281e-08, "loss": 0.2665, "step": 41295 }, { "epoch": 1.9345107040801985, "grad_norm": 0.5946115070617253, "learning_rate": 1.4045707887888615e-08, "loss": 0.2575, "step": 41296 }, { "epoch": 1.934557549070127, "grad_norm": 0.5777778575888324, "learning_rate": 1.4025639875037412e-08, "loss": 0.2457, "step": 41297 }, { "epoch": 1.9346043940600552, "grad_norm": 0.6177391161763498, "learning_rate": 1.4005586168343577e-08, "loss": 0.273, "step": 41298 }, { "epoch": 1.9346512390499835, "grad_norm": 0.6200586407728379, "learning_rate": 1.3985546767922852e-08, "loss": 0.2839, "step": 41299 }, { "epoch": 1.934698084039912, "grad_norm": 0.570223064164053, "learning_rate": 1.3965521673890425e-08, "loss": 0.2519, "step": 41300 }, { "epoch": 1.9347449290298404, "grad_norm": 0.6043282837727377, "learning_rate": 1.3945510886361758e-08, "loss": 0.2779, "step": 41301 }, { "epoch": 1.9347917740197684, "grad_norm": 0.5877072821633544, "learning_rate": 1.3925514405451478e-08, "loss": 0.2806, "step": 41302 }, { "epoch": 1.934838619009697, "grad_norm": 0.6082627391623032, "learning_rate": 1.3905532231275332e-08, "loss": 0.2869, "step": 41303 }, { "epoch": 1.9348854639996254, "grad_norm": 0.5900256684210475, "learning_rate": 1.3885564363947668e-08, "loss": 0.2706, "step": 41304 }, { "epoch": 1.9349323089895536, "grad_norm": 0.5767146321944463, "learning_rate": 1.3865610803584228e-08, "loss": 0.253, "step": 41305 }, { "epoch": 1.9349791539794818, "grad_norm": 0.5517590215146139, "learning_rate": 1.3845671550298811e-08, "loss": 0.2601, "step": 41306 }, { "epoch": 1.9350259989694103, "grad_norm": 0.5751219447942814, "learning_rate": 1.382574660420688e-08, "loss": 0.2673, "step": 41307 }, { "epoch": 1.9350728439593385, "grad_norm": 0.6228140219659251, "learning_rate": 1.3805835965422787e-08, "loss": 0.2701, "step": 41308 }, { "epoch": 1.9351196889492668, "grad_norm": 0.5545023269712129, "learning_rate": 1.378593963406144e-08, "loss": 0.2575, "step": 41309 }, { "epoch": 1.9351665339391952, "grad_norm": 0.6233053050970587, "learning_rate": 1.3766057610236915e-08, "loss": 0.2826, "step": 41310 }, { "epoch": 1.9352133789291235, "grad_norm": 0.599852323194331, "learning_rate": 1.3746189894064121e-08, "loss": 0.2633, "step": 41311 }, { "epoch": 1.9352602239190517, "grad_norm": 0.5949923075215325, "learning_rate": 1.3726336485656855e-08, "loss": 0.2611, "step": 41312 }, { "epoch": 1.9353070689089802, "grad_norm": 0.5976273729572379, "learning_rate": 1.3706497385129746e-08, "loss": 0.2707, "step": 41313 }, { "epoch": 1.9353539138989087, "grad_norm": 0.637414210384351, "learning_rate": 1.3686672592596873e-08, "loss": 0.3007, "step": 41314 }, { "epoch": 1.935400758888837, "grad_norm": 0.6123727753681399, "learning_rate": 1.3666862108172308e-08, "loss": 0.2704, "step": 41315 }, { "epoch": 1.9354476038787651, "grad_norm": 0.6434253685849217, "learning_rate": 1.364706593196985e-08, "loss": 0.2848, "step": 41316 }, { "epoch": 1.9354944488686936, "grad_norm": 0.6188190795089419, "learning_rate": 1.3627284064103574e-08, "loss": 0.2868, "step": 41317 }, { "epoch": 1.9355412938586218, "grad_norm": 0.6001478853493931, "learning_rate": 1.3607516504687556e-08, "loss": 0.2657, "step": 41318 }, { "epoch": 1.93558813884855, "grad_norm": 0.561459753196544, "learning_rate": 1.3587763253835318e-08, "loss": 0.2603, "step": 41319 }, { "epoch": 1.9356349838384785, "grad_norm": 0.5863398193545426, "learning_rate": 1.3568024311660377e-08, "loss": 0.2751, "step": 41320 }, { "epoch": 1.9356818288284068, "grad_norm": 0.6162686197286902, "learning_rate": 1.354829967827681e-08, "loss": 0.2672, "step": 41321 }, { "epoch": 1.935728673818335, "grad_norm": 0.600158759140803, "learning_rate": 1.3528589353797583e-08, "loss": 0.2663, "step": 41322 }, { "epoch": 1.9357755188082635, "grad_norm": 0.5933888601091302, "learning_rate": 1.3508893338336492e-08, "loss": 0.2687, "step": 41323 }, { "epoch": 1.935822363798192, "grad_norm": 0.6243429369529473, "learning_rate": 1.3489211632006505e-08, "loss": 0.276, "step": 41324 }, { "epoch": 1.93586920878812, "grad_norm": 0.6422536099903544, "learning_rate": 1.3469544234921416e-08, "loss": 0.2691, "step": 41325 }, { "epoch": 1.9359160537780484, "grad_norm": 0.5953407675672127, "learning_rate": 1.3449891147194194e-08, "loss": 0.2737, "step": 41326 }, { "epoch": 1.935962898767977, "grad_norm": 0.6040990464373681, "learning_rate": 1.3430252368937803e-08, "loss": 0.2646, "step": 41327 }, { "epoch": 1.9360097437579051, "grad_norm": 0.6075469752470704, "learning_rate": 1.3410627900265205e-08, "loss": 0.2674, "step": 41328 }, { "epoch": 1.9360565887478334, "grad_norm": 0.5828940558984809, "learning_rate": 1.3391017741289646e-08, "loss": 0.2617, "step": 41329 }, { "epoch": 1.9361034337377618, "grad_norm": 0.5751203363475101, "learning_rate": 1.3371421892123814e-08, "loss": 0.2641, "step": 41330 }, { "epoch": 1.93615027872769, "grad_norm": 0.6304427548789633, "learning_rate": 1.335184035288012e-08, "loss": 0.2785, "step": 41331 }, { "epoch": 1.9361971237176183, "grad_norm": 0.611755224141276, "learning_rate": 1.3332273123671802e-08, "loss": 0.2733, "step": 41332 }, { "epoch": 1.9362439687075468, "grad_norm": 0.5585486528235851, "learning_rate": 1.3312720204611274e-08, "loss": 0.2512, "step": 41333 }, { "epoch": 1.936290813697475, "grad_norm": 0.5768051171072845, "learning_rate": 1.3293181595811222e-08, "loss": 0.2779, "step": 41334 }, { "epoch": 1.9363376586874033, "grad_norm": 0.6011118520144818, "learning_rate": 1.327365729738378e-08, "loss": 0.2854, "step": 41335 }, { "epoch": 1.9363845036773317, "grad_norm": 0.575799583035925, "learning_rate": 1.3254147309441357e-08, "loss": 0.2685, "step": 41336 }, { "epoch": 1.9364313486672602, "grad_norm": 0.5885542509964875, "learning_rate": 1.3234651632096086e-08, "loss": 0.2624, "step": 41337 }, { "epoch": 1.9364781936571882, "grad_norm": 0.5801119189560691, "learning_rate": 1.3215170265460653e-08, "loss": 0.27, "step": 41338 }, { "epoch": 1.9365250386471167, "grad_norm": 0.5895609514811461, "learning_rate": 1.3195703209646915e-08, "loss": 0.2736, "step": 41339 }, { "epoch": 1.9365718836370451, "grad_norm": 0.6248188841449763, "learning_rate": 1.3176250464766727e-08, "loss": 0.2634, "step": 41340 }, { "epoch": 1.9366187286269734, "grad_norm": 0.5565474179570029, "learning_rate": 1.315681203093222e-08, "loss": 0.2531, "step": 41341 }, { "epoch": 1.9366655736169016, "grad_norm": 0.5627494121231758, "learning_rate": 1.3137387908255251e-08, "loss": 0.2486, "step": 41342 }, { "epoch": 1.93671241860683, "grad_norm": 0.5937223555500093, "learning_rate": 1.3117978096847395e-08, "loss": 0.2691, "step": 41343 }, { "epoch": 1.9367592635967583, "grad_norm": 0.5524030509408968, "learning_rate": 1.3098582596820786e-08, "loss": 0.2567, "step": 41344 }, { "epoch": 1.9368061085866866, "grad_norm": 0.5815591498904742, "learning_rate": 1.3079201408286446e-08, "loss": 0.2687, "step": 41345 }, { "epoch": 1.936852953576615, "grad_norm": 0.6150999802981346, "learning_rate": 1.3059834531356507e-08, "loss": 0.2648, "step": 41346 }, { "epoch": 1.9368997985665433, "grad_norm": 0.5812050169194136, "learning_rate": 1.3040481966141993e-08, "loss": 0.2644, "step": 41347 }, { "epoch": 1.9369466435564715, "grad_norm": 0.6053729519601703, "learning_rate": 1.3021143712754759e-08, "loss": 0.2861, "step": 41348 }, { "epoch": 1.9369934885464, "grad_norm": 0.597834675810876, "learning_rate": 1.3001819771305546e-08, "loss": 0.2645, "step": 41349 }, { "epoch": 1.9370403335363284, "grad_norm": 0.6109084401312156, "learning_rate": 1.2982510141905657e-08, "loss": 0.2714, "step": 41350 }, { "epoch": 1.9370871785262567, "grad_norm": 0.5653001389868214, "learning_rate": 1.2963214824666393e-08, "loss": 0.2535, "step": 41351 }, { "epoch": 1.937134023516185, "grad_norm": 0.5985028233120001, "learning_rate": 1.2943933819698772e-08, "loss": 0.2775, "step": 41352 }, { "epoch": 1.9371808685061134, "grad_norm": 0.57824454722799, "learning_rate": 1.2924667127113822e-08, "loss": 0.2664, "step": 41353 }, { "epoch": 1.9372277134960416, "grad_norm": 0.6193485261868877, "learning_rate": 1.2905414747022282e-08, "loss": 0.2746, "step": 41354 }, { "epoch": 1.9372745584859699, "grad_norm": 0.5998137231981232, "learning_rate": 1.2886176679534624e-08, "loss": 0.279, "step": 41355 }, { "epoch": 1.9373214034758983, "grad_norm": 0.609159140317301, "learning_rate": 1.2866952924762143e-08, "loss": 0.2591, "step": 41356 }, { "epoch": 1.9373682484658266, "grad_norm": 0.5821639999739511, "learning_rate": 1.284774348281531e-08, "loss": 0.252, "step": 41357 }, { "epoch": 1.9374150934557548, "grad_norm": 0.5739080982382263, "learning_rate": 1.2828548353804593e-08, "loss": 0.2696, "step": 41358 }, { "epoch": 1.9374619384456833, "grad_norm": 0.6246770956042573, "learning_rate": 1.2809367537840456e-08, "loss": 0.2799, "step": 41359 }, { "epoch": 1.9375087834356117, "grad_norm": 0.5667364593085369, "learning_rate": 1.279020103503309e-08, "loss": 0.2595, "step": 41360 }, { "epoch": 1.9375556284255397, "grad_norm": 0.6210797838904168, "learning_rate": 1.277104884549324e-08, "loss": 0.2683, "step": 41361 }, { "epoch": 1.9376024734154682, "grad_norm": 0.6438030320913574, "learning_rate": 1.2751910969330817e-08, "loss": 0.2777, "step": 41362 }, { "epoch": 1.9376493184053967, "grad_norm": 0.6319807337618353, "learning_rate": 1.2732787406655734e-08, "loss": 0.2789, "step": 41363 }, { "epoch": 1.937696163395325, "grad_norm": 0.5883407742019544, "learning_rate": 1.2713678157578734e-08, "loss": 0.2658, "step": 41364 }, { "epoch": 1.9377430083852532, "grad_norm": 0.6258644582470424, "learning_rate": 1.2694583222208901e-08, "loss": 0.2824, "step": 41365 }, { "epoch": 1.9377898533751816, "grad_norm": 0.5884501913729854, "learning_rate": 1.2675502600656974e-08, "loss": 0.2759, "step": 41366 }, { "epoch": 1.9378366983651099, "grad_norm": 0.5532327140666095, "learning_rate": 1.2656436293032037e-08, "loss": 0.2464, "step": 41367 }, { "epoch": 1.937883543355038, "grad_norm": 0.5474203484224683, "learning_rate": 1.2637384299444278e-08, "loss": 0.2474, "step": 41368 }, { "epoch": 1.9379303883449666, "grad_norm": 0.604397208086283, "learning_rate": 1.261834662000333e-08, "loss": 0.2726, "step": 41369 }, { "epoch": 1.9379772333348948, "grad_norm": 0.5775253870279476, "learning_rate": 1.2599323254818553e-08, "loss": 0.2546, "step": 41370 }, { "epoch": 1.938024078324823, "grad_norm": 0.6006165767297663, "learning_rate": 1.25803142039993e-08, "loss": 0.2619, "step": 41371 }, { "epoch": 1.9380709233147515, "grad_norm": 0.5694050362283694, "learning_rate": 1.2561319467655208e-08, "loss": 0.2709, "step": 41372 }, { "epoch": 1.93811776830468, "grad_norm": 0.5735073777394292, "learning_rate": 1.2542339045895634e-08, "loss": 0.2679, "step": 41373 }, { "epoch": 1.938164613294608, "grad_norm": 0.5825085932851506, "learning_rate": 1.2523372938829658e-08, "loss": 0.2725, "step": 41374 }, { "epoch": 1.9382114582845364, "grad_norm": 0.6074987384436669, "learning_rate": 1.2504421146566636e-08, "loss": 0.2898, "step": 41375 }, { "epoch": 1.938258303274465, "grad_norm": 0.6887347590145048, "learning_rate": 1.2485483669215371e-08, "loss": 0.3027, "step": 41376 }, { "epoch": 1.9383051482643932, "grad_norm": 0.5708954009730554, "learning_rate": 1.2466560506884939e-08, "loss": 0.2473, "step": 41377 }, { "epoch": 1.9383519932543214, "grad_norm": 0.6139100021169825, "learning_rate": 1.2447651659683869e-08, "loss": 0.2692, "step": 41378 }, { "epoch": 1.9383988382442499, "grad_norm": 0.61119174903358, "learning_rate": 1.242875712772179e-08, "loss": 0.2765, "step": 41379 }, { "epoch": 1.938445683234178, "grad_norm": 0.603141605859602, "learning_rate": 1.2409876911106677e-08, "loss": 0.2699, "step": 41380 }, { "epoch": 1.9384925282241063, "grad_norm": 0.6153811097650479, "learning_rate": 1.2391011009947606e-08, "loss": 0.2764, "step": 41381 }, { "epoch": 1.9385393732140348, "grad_norm": 0.5915227853411541, "learning_rate": 1.2372159424353103e-08, "loss": 0.2821, "step": 41382 }, { "epoch": 1.938586218203963, "grad_norm": 0.5928686322286588, "learning_rate": 1.235332215443169e-08, "loss": 0.2677, "step": 41383 }, { "epoch": 1.9386330631938913, "grad_norm": 0.5725379647284895, "learning_rate": 1.2334499200291616e-08, "loss": 0.2741, "step": 41384 }, { "epoch": 1.9386799081838197, "grad_norm": 0.6032673453964179, "learning_rate": 1.2315690562041127e-08, "loss": 0.2617, "step": 41385 }, { "epoch": 1.9387267531737482, "grad_norm": 0.5888626360911676, "learning_rate": 1.229689623978847e-08, "loss": 0.2746, "step": 41386 }, { "epoch": 1.9387735981636764, "grad_norm": 0.6422539781872116, "learning_rate": 1.227811623364189e-08, "loss": 0.2811, "step": 41387 }, { "epoch": 1.9388204431536047, "grad_norm": 0.5740678434952571, "learning_rate": 1.2259350543709914e-08, "loss": 0.2503, "step": 41388 }, { "epoch": 1.9388672881435332, "grad_norm": 0.5840016175262875, "learning_rate": 1.2240599170099676e-08, "loss": 0.264, "step": 41389 }, { "epoch": 1.9389141331334614, "grad_norm": 0.5986246676782371, "learning_rate": 1.2221862112919702e-08, "loss": 0.2544, "step": 41390 }, { "epoch": 1.9389609781233896, "grad_norm": 0.587167270899548, "learning_rate": 1.2203139372277684e-08, "loss": 0.2586, "step": 41391 }, { "epoch": 1.939007823113318, "grad_norm": 0.5727304064197424, "learning_rate": 1.2184430948281034e-08, "loss": 0.2628, "step": 41392 }, { "epoch": 1.9390546681032463, "grad_norm": 0.6247494153828991, "learning_rate": 1.2165736841038e-08, "loss": 0.2751, "step": 41393 }, { "epoch": 1.9391015130931746, "grad_norm": 0.5902950593957649, "learning_rate": 1.214705705065572e-08, "loss": 0.2495, "step": 41394 }, { "epoch": 1.939148358083103, "grad_norm": 0.5846531320028606, "learning_rate": 1.2128391577241883e-08, "loss": 0.2673, "step": 41395 }, { "epoch": 1.9391952030730315, "grad_norm": 0.572127407038913, "learning_rate": 1.2109740420903903e-08, "loss": 0.2668, "step": 41396 }, { "epoch": 1.9392420480629595, "grad_norm": 0.594092002368428, "learning_rate": 1.209110358174892e-08, "loss": 0.2707, "step": 41397 }, { "epoch": 1.939288893052888, "grad_norm": 0.5745308698265348, "learning_rate": 1.2072481059884344e-08, "loss": 0.2556, "step": 41398 }, { "epoch": 1.9393357380428164, "grad_norm": 0.556244062878364, "learning_rate": 1.2053872855417315e-08, "loss": 0.2615, "step": 41399 }, { "epoch": 1.9393825830327447, "grad_norm": 0.5819811855530136, "learning_rate": 1.2035278968454967e-08, "loss": 0.2601, "step": 41400 }, { "epoch": 1.939429428022673, "grad_norm": 0.6170995971307153, "learning_rate": 1.2016699399104437e-08, "loss": 0.253, "step": 41401 }, { "epoch": 1.9394762730126014, "grad_norm": 0.593726211163491, "learning_rate": 1.1998134147472306e-08, "loss": 0.2714, "step": 41402 }, { "epoch": 1.9395231180025296, "grad_norm": 0.5902610940847909, "learning_rate": 1.1979583213665713e-08, "loss": 0.2707, "step": 41403 }, { "epoch": 1.9395699629924579, "grad_norm": 0.639471969436233, "learning_rate": 1.196104659779096e-08, "loss": 0.2891, "step": 41404 }, { "epoch": 1.9396168079823863, "grad_norm": 0.5528675123608568, "learning_rate": 1.1942524299955182e-08, "loss": 0.2521, "step": 41405 }, { "epoch": 1.9396636529723146, "grad_norm": 0.5634073121101821, "learning_rate": 1.1924016320264964e-08, "loss": 0.2571, "step": 41406 }, { "epoch": 1.9397104979622428, "grad_norm": 0.5913957190000964, "learning_rate": 1.1905522658826608e-08, "loss": 0.263, "step": 41407 }, { "epoch": 1.9397573429521713, "grad_norm": 0.6368607423747507, "learning_rate": 1.1887043315746417e-08, "loss": 0.2643, "step": 41408 }, { "epoch": 1.9398041879420997, "grad_norm": 0.5994289897539674, "learning_rate": 1.1868578291131249e-08, "loss": 0.2903, "step": 41409 }, { "epoch": 1.9398510329320278, "grad_norm": 0.6066318897234645, "learning_rate": 1.1850127585086856e-08, "loss": 0.2667, "step": 41410 }, { "epoch": 1.9398978779219562, "grad_norm": 0.6047915980709069, "learning_rate": 1.1831691197719541e-08, "loss": 0.2738, "step": 41411 }, { "epoch": 1.9399447229118847, "grad_norm": 0.5841535215729222, "learning_rate": 1.1813269129135607e-08, "loss": 0.2627, "step": 41412 }, { "epoch": 1.939991567901813, "grad_norm": 0.6126432607066091, "learning_rate": 1.1794861379440803e-08, "loss": 0.2615, "step": 41413 }, { "epoch": 1.9400384128917412, "grad_norm": 0.6031015254425449, "learning_rate": 1.1776467948741432e-08, "loss": 0.2721, "step": 41414 }, { "epoch": 1.9400852578816696, "grad_norm": 0.6115166857962401, "learning_rate": 1.1758088837142967e-08, "loss": 0.2795, "step": 41415 }, { "epoch": 1.9401321028715979, "grad_norm": 0.6532965075472086, "learning_rate": 1.1739724044751155e-08, "loss": 0.2855, "step": 41416 }, { "epoch": 1.9401789478615261, "grad_norm": 0.562867214036642, "learning_rate": 1.1721373571672023e-08, "loss": 0.261, "step": 41417 }, { "epoch": 1.9402257928514546, "grad_norm": 0.5953657346061414, "learning_rate": 1.1703037418010765e-08, "loss": 0.2692, "step": 41418 }, { "epoch": 1.9402726378413828, "grad_norm": 0.6065873708533948, "learning_rate": 1.1684715583873408e-08, "loss": 0.2609, "step": 41419 }, { "epoch": 1.940319482831311, "grad_norm": 0.6485068817168416, "learning_rate": 1.1666408069364866e-08, "loss": 0.2695, "step": 41420 }, { "epoch": 1.9403663278212395, "grad_norm": 0.5410384782472479, "learning_rate": 1.1648114874590887e-08, "loss": 0.255, "step": 41421 }, { "epoch": 1.940413172811168, "grad_norm": 0.5877893574781218, "learning_rate": 1.1629835999656392e-08, "loss": 0.2773, "step": 41422 }, { "epoch": 1.9404600178010962, "grad_norm": 0.6057411749949332, "learning_rate": 1.1611571444666847e-08, "loss": 0.2734, "step": 41423 }, { "epoch": 1.9405068627910245, "grad_norm": 0.5855890746036976, "learning_rate": 1.159332120972717e-08, "loss": 0.2652, "step": 41424 }, { "epoch": 1.940553707780953, "grad_norm": 0.5795384015945206, "learning_rate": 1.1575085294942835e-08, "loss": 0.2716, "step": 41425 }, { "epoch": 1.9406005527708812, "grad_norm": 0.5929978204594679, "learning_rate": 1.1556863700418198e-08, "loss": 0.258, "step": 41426 }, { "epoch": 1.9406473977608094, "grad_norm": 0.6193753924482055, "learning_rate": 1.1538656426258454e-08, "loss": 0.2737, "step": 41427 }, { "epoch": 1.9406942427507379, "grad_norm": 0.6083300820619991, "learning_rate": 1.1520463472567967e-08, "loss": 0.2755, "step": 41428 }, { "epoch": 1.9407410877406661, "grad_norm": 0.6515194190155941, "learning_rate": 1.1502284839452205e-08, "loss": 0.3044, "step": 41429 }, { "epoch": 1.9407879327305944, "grad_norm": 0.5877040928378816, "learning_rate": 1.1484120527014975e-08, "loss": 0.2719, "step": 41430 }, { "epoch": 1.9408347777205228, "grad_norm": 0.636675264660817, "learning_rate": 1.146597053536147e-08, "loss": 0.2707, "step": 41431 }, { "epoch": 1.9408816227104513, "grad_norm": 0.6231607179644064, "learning_rate": 1.1447834864595774e-08, "loss": 0.2695, "step": 41432 }, { "epoch": 1.9409284677003793, "grad_norm": 0.5991310734690727, "learning_rate": 1.142971351482225e-08, "loss": 0.2621, "step": 41433 }, { "epoch": 1.9409753126903078, "grad_norm": 0.6419131058161834, "learning_rate": 1.1411606486145254e-08, "loss": 0.2576, "step": 41434 }, { "epoch": 1.9410221576802362, "grad_norm": 0.6416120137590366, "learning_rate": 1.1393513778668875e-08, "loss": 0.2681, "step": 41435 }, { "epoch": 1.9410690026701645, "grad_norm": 0.6251838668377125, "learning_rate": 1.137543539249747e-08, "loss": 0.2834, "step": 41436 }, { "epoch": 1.9411158476600927, "grad_norm": 0.6014235343875348, "learning_rate": 1.1357371327734844e-08, "loss": 0.2808, "step": 41437 }, { "epoch": 1.9411626926500212, "grad_norm": 0.5406246655319777, "learning_rate": 1.1339321584485086e-08, "loss": 0.252, "step": 41438 }, { "epoch": 1.9412095376399494, "grad_norm": 0.62641296976782, "learning_rate": 1.1321286162851996e-08, "loss": 0.2769, "step": 41439 }, { "epoch": 1.9412563826298777, "grad_norm": 0.5493728941164323, "learning_rate": 1.130326506293966e-08, "loss": 0.2683, "step": 41440 }, { "epoch": 1.9413032276198061, "grad_norm": 0.5898526254111278, "learning_rate": 1.1285258284851053e-08, "loss": 0.2665, "step": 41441 }, { "epoch": 1.9413500726097344, "grad_norm": 0.6177358775762889, "learning_rate": 1.1267265828690533e-08, "loss": 0.2505, "step": 41442 }, { "epoch": 1.9413969175996626, "grad_norm": 0.6099144921232097, "learning_rate": 1.1249287694561628e-08, "loss": 0.2709, "step": 41443 }, { "epoch": 1.941443762589591, "grad_norm": 0.5743787645310576, "learning_rate": 1.1231323882567035e-08, "loss": 0.2597, "step": 41444 }, { "epoch": 1.9414906075795195, "grad_norm": 0.6281196076969783, "learning_rate": 1.1213374392811117e-08, "loss": 0.2673, "step": 41445 }, { "epoch": 1.9415374525694475, "grad_norm": 0.5706284751060651, "learning_rate": 1.1195439225396287e-08, "loss": 0.2644, "step": 41446 }, { "epoch": 1.941584297559376, "grad_norm": 0.6404052130936416, "learning_rate": 1.1177518380426355e-08, "loss": 0.2656, "step": 41447 }, { "epoch": 1.9416311425493045, "grad_norm": 0.5942320130407504, "learning_rate": 1.1159611858004294e-08, "loss": 0.281, "step": 41448 }, { "epoch": 1.9416779875392327, "grad_norm": 0.5716749138653526, "learning_rate": 1.1141719658233353e-08, "loss": 0.2613, "step": 41449 }, { "epoch": 1.941724832529161, "grad_norm": 0.6003211323984987, "learning_rate": 1.112384178121595e-08, "loss": 0.2741, "step": 41450 }, { "epoch": 1.9417716775190894, "grad_norm": 0.5770550316170178, "learning_rate": 1.1105978227055336e-08, "loss": 0.2656, "step": 41451 }, { "epoch": 1.9418185225090177, "grad_norm": 0.6109080215823305, "learning_rate": 1.1088128995854208e-08, "loss": 0.2744, "step": 41452 }, { "epoch": 1.941865367498946, "grad_norm": 0.6024583611728667, "learning_rate": 1.1070294087715261e-08, "loss": 0.2618, "step": 41453 }, { "epoch": 1.9419122124888744, "grad_norm": 0.5797651335804217, "learning_rate": 1.1052473502741467e-08, "loss": 0.2538, "step": 41454 }, { "epoch": 1.9419590574788026, "grad_norm": 0.551840776081418, "learning_rate": 1.1034667241034968e-08, "loss": 0.2741, "step": 41455 }, { "epoch": 1.9420059024687308, "grad_norm": 0.604563497947849, "learning_rate": 1.101687530269846e-08, "loss": 0.2774, "step": 41456 }, { "epoch": 1.9420527474586593, "grad_norm": 0.6110262997760012, "learning_rate": 1.0999097687834082e-08, "loss": 0.2769, "step": 41457 }, { "epoch": 1.9420995924485878, "grad_norm": 0.6157915818392687, "learning_rate": 1.0981334396544253e-08, "loss": 0.2736, "step": 41458 }, { "epoch": 1.942146437438516, "grad_norm": 0.5824457093025268, "learning_rate": 1.0963585428931668e-08, "loss": 0.2796, "step": 41459 }, { "epoch": 1.9421932824284442, "grad_norm": 0.5932380240519255, "learning_rate": 1.0945850785097633e-08, "loss": 0.2683, "step": 41460 }, { "epoch": 1.9422401274183727, "grad_norm": 0.6066754706402677, "learning_rate": 1.0928130465144848e-08, "loss": 0.2745, "step": 41461 }, { "epoch": 1.942286972408301, "grad_norm": 0.6094809580742857, "learning_rate": 1.0910424469175173e-08, "loss": 0.2648, "step": 41462 }, { "epoch": 1.9423338173982292, "grad_norm": 0.6209952302455577, "learning_rate": 1.0892732797290196e-08, "loss": 0.2558, "step": 41463 }, { "epoch": 1.9423806623881577, "grad_norm": 0.5718129873247831, "learning_rate": 1.0875055449591777e-08, "loss": 0.2748, "step": 41464 }, { "epoch": 1.942427507378086, "grad_norm": 0.5769829999488085, "learning_rate": 1.0857392426182057e-08, "loss": 0.2615, "step": 41465 }, { "epoch": 1.9424743523680141, "grad_norm": 0.6153410430027014, "learning_rate": 1.0839743727162344e-08, "loss": 0.2784, "step": 41466 }, { "epoch": 1.9425211973579426, "grad_norm": 0.5831636977883787, "learning_rate": 1.0822109352634225e-08, "loss": 0.2731, "step": 41467 }, { "epoch": 1.942568042347871, "grad_norm": 0.5933757409384014, "learning_rate": 1.0804489302699283e-08, "loss": 0.2632, "step": 41468 }, { "epoch": 1.942614887337799, "grad_norm": 0.6047320973997086, "learning_rate": 1.0786883577458829e-08, "loss": 0.2769, "step": 41469 }, { "epoch": 1.9426617323277275, "grad_norm": 0.6017198940500853, "learning_rate": 1.0769292177014445e-08, "loss": 0.2782, "step": 41470 }, { "epoch": 1.942708577317656, "grad_norm": 0.6108582681959153, "learning_rate": 1.0751715101466887e-08, "loss": 0.2834, "step": 41471 }, { "epoch": 1.9427554223075842, "grad_norm": 0.6071672850152829, "learning_rate": 1.073415235091746e-08, "loss": 0.2693, "step": 41472 }, { "epoch": 1.9428022672975125, "grad_norm": 0.5742367524097332, "learning_rate": 1.071660392546775e-08, "loss": 0.2478, "step": 41473 }, { "epoch": 1.942849112287441, "grad_norm": 0.6286657745012056, "learning_rate": 1.0699069825218233e-08, "loss": 0.2803, "step": 41474 }, { "epoch": 1.9428959572773692, "grad_norm": 0.592074372636775, "learning_rate": 1.0681550050269662e-08, "loss": 0.2688, "step": 41475 }, { "epoch": 1.9429428022672974, "grad_norm": 0.6230192490464276, "learning_rate": 1.0664044600723066e-08, "loss": 0.2712, "step": 41476 }, { "epoch": 1.942989647257226, "grad_norm": 0.6684110450655779, "learning_rate": 1.0646553476679478e-08, "loss": 0.2875, "step": 41477 }, { "epoch": 1.9430364922471541, "grad_norm": 0.5837396861768955, "learning_rate": 1.0629076678239092e-08, "loss": 0.2632, "step": 41478 }, { "epoch": 1.9430833372370824, "grad_norm": 0.6485359211033697, "learning_rate": 1.0611614205502662e-08, "loss": 0.2837, "step": 41479 }, { "epoch": 1.9431301822270108, "grad_norm": 0.5808027263354336, "learning_rate": 1.0594166058570942e-08, "loss": 0.2617, "step": 41480 }, { "epoch": 1.9431770272169393, "grad_norm": 0.6199359468820483, "learning_rate": 1.057673223754413e-08, "loss": 0.2807, "step": 41481 }, { "epoch": 1.9432238722068673, "grad_norm": 0.5627876881530959, "learning_rate": 1.0559312742522143e-08, "loss": 0.2553, "step": 41482 }, { "epoch": 1.9432707171967958, "grad_norm": 0.5899032600276446, "learning_rate": 1.0541907573606015e-08, "loss": 0.2684, "step": 41483 }, { "epoch": 1.9433175621867242, "grad_norm": 0.5834679099564664, "learning_rate": 1.0524516730895384e-08, "loss": 0.2711, "step": 41484 }, { "epoch": 1.9433644071766525, "grad_norm": 0.5779970266873452, "learning_rate": 1.0507140214490174e-08, "loss": 0.2705, "step": 41485 }, { "epoch": 1.9434112521665807, "grad_norm": 0.5508077240845161, "learning_rate": 1.0489778024491137e-08, "loss": 0.2436, "step": 41486 }, { "epoch": 1.9434580971565092, "grad_norm": 0.6234656642317568, "learning_rate": 1.047243016099736e-08, "loss": 0.2851, "step": 41487 }, { "epoch": 1.9435049421464374, "grad_norm": 0.606167448966346, "learning_rate": 1.0455096624109318e-08, "loss": 0.2715, "step": 41488 }, { "epoch": 1.9435517871363657, "grad_norm": 0.5613478750336007, "learning_rate": 1.0437777413926098e-08, "loss": 0.2531, "step": 41489 }, { "epoch": 1.9435986321262941, "grad_norm": 0.6611692828897123, "learning_rate": 1.0420472530548175e-08, "loss": 0.2748, "step": 41490 }, { "epoch": 1.9436454771162224, "grad_norm": 0.6228959625962915, "learning_rate": 1.040318197407436e-08, "loss": 0.2775, "step": 41491 }, { "epoch": 1.9436923221061506, "grad_norm": 0.5978624181939818, "learning_rate": 1.0385905744604574e-08, "loss": 0.2664, "step": 41492 }, { "epoch": 1.943739167096079, "grad_norm": 0.5815600735710438, "learning_rate": 1.036864384223818e-08, "loss": 0.2753, "step": 41493 }, { "epoch": 1.9437860120860075, "grad_norm": 0.5863255886559691, "learning_rate": 1.0351396267074542e-08, "loss": 0.2477, "step": 41494 }, { "epoch": 1.9438328570759358, "grad_norm": 0.6290317647849702, "learning_rate": 1.0334163019213027e-08, "loss": 0.2831, "step": 41495 }, { "epoch": 1.943879702065864, "grad_norm": 0.6321286603989876, "learning_rate": 1.031694409875217e-08, "loss": 0.2766, "step": 41496 }, { "epoch": 1.9439265470557925, "grad_norm": 0.6297329457108896, "learning_rate": 1.0299739505791883e-08, "loss": 0.2779, "step": 41497 }, { "epoch": 1.9439733920457207, "grad_norm": 0.5965685616808131, "learning_rate": 1.0282549240430984e-08, "loss": 0.2717, "step": 41498 }, { "epoch": 1.944020237035649, "grad_norm": 0.6324325830523385, "learning_rate": 1.0265373302768e-08, "loss": 0.2561, "step": 41499 }, { "epoch": 1.9440670820255774, "grad_norm": 0.5418278041434126, "learning_rate": 1.0248211692902021e-08, "loss": 0.2457, "step": 41500 }, { "epoch": 1.9441139270155057, "grad_norm": 0.612675747659721, "learning_rate": 1.0231064410931857e-08, "loss": 0.2809, "step": 41501 }, { "epoch": 1.944160772005434, "grad_norm": 0.6067465435451205, "learning_rate": 1.0213931456956038e-08, "loss": 0.2766, "step": 41502 }, { "epoch": 1.9442076169953624, "grad_norm": 0.6544583049633654, "learning_rate": 1.0196812831073377e-08, "loss": 0.2696, "step": 41503 }, { "epoch": 1.9442544619852908, "grad_norm": 0.6477825694209282, "learning_rate": 1.0179708533382404e-08, "loss": 0.2955, "step": 41504 }, { "epoch": 1.9443013069752189, "grad_norm": 0.6087133030064084, "learning_rate": 1.0162618563981097e-08, "loss": 0.2786, "step": 41505 }, { "epoch": 1.9443481519651473, "grad_norm": 0.6366341542913827, "learning_rate": 1.0145542922968266e-08, "loss": 0.2769, "step": 41506 }, { "epoch": 1.9443949969550758, "grad_norm": 0.5784022127492304, "learning_rate": 1.0128481610442165e-08, "loss": 0.2607, "step": 41507 }, { "epoch": 1.944441841945004, "grad_norm": 0.6082947705568879, "learning_rate": 1.011143462650077e-08, "loss": 0.2756, "step": 41508 }, { "epoch": 1.9444886869349323, "grad_norm": 0.5898049187244542, "learning_rate": 1.0094401971242063e-08, "loss": 0.2742, "step": 41509 }, { "epoch": 1.9445355319248607, "grad_norm": 0.594822872472372, "learning_rate": 1.0077383644764571e-08, "loss": 0.265, "step": 41510 }, { "epoch": 1.944582376914789, "grad_norm": 0.6355102081942401, "learning_rate": 1.0060379647165718e-08, "loss": 0.2697, "step": 41511 }, { "epoch": 1.9446292219047172, "grad_norm": 0.6233619306008298, "learning_rate": 1.0043389978543484e-08, "loss": 0.2773, "step": 41512 }, { "epoch": 1.9446760668946457, "grad_norm": 0.5932715482960041, "learning_rate": 1.002641463899584e-08, "loss": 0.2749, "step": 41513 }, { "epoch": 1.944722911884574, "grad_norm": 0.6302549548836152, "learning_rate": 1.0009453628620492e-08, "loss": 0.2648, "step": 41514 }, { "epoch": 1.9447697568745022, "grad_norm": 0.7035923880587642, "learning_rate": 9.992506947514858e-09, "loss": 0.284, "step": 41515 }, { "epoch": 1.9448166018644306, "grad_norm": 0.6087374007859132, "learning_rate": 9.97557459577636e-09, "loss": 0.2742, "step": 41516 }, { "epoch": 1.944863446854359, "grad_norm": 0.6055933944245796, "learning_rate": 9.958656573502702e-09, "loss": 0.2697, "step": 41517 }, { "epoch": 1.944910291844287, "grad_norm": 0.5813578270412301, "learning_rate": 9.941752880791023e-09, "loss": 0.2615, "step": 41518 }, { "epoch": 1.9449571368342156, "grad_norm": 0.5673236987144584, "learning_rate": 9.92486351773847e-09, "loss": 0.2406, "step": 41519 }, { "epoch": 1.945003981824144, "grad_norm": 0.6142952037708808, "learning_rate": 9.907988484442742e-09, "loss": 0.2648, "step": 41520 }, { "epoch": 1.9450508268140723, "grad_norm": 0.611505284801608, "learning_rate": 9.891127781000709e-09, "loss": 0.2838, "step": 41521 }, { "epoch": 1.9450976718040005, "grad_norm": 0.592990490521992, "learning_rate": 9.874281407509511e-09, "loss": 0.2659, "step": 41522 }, { "epoch": 1.945144516793929, "grad_norm": 0.5780075560337392, "learning_rate": 9.85744936406574e-09, "loss": 0.2761, "step": 41523 }, { "epoch": 1.9451913617838572, "grad_norm": 0.6387434559642104, "learning_rate": 9.840631650766541e-09, "loss": 0.2612, "step": 41524 }, { "epoch": 1.9452382067737854, "grad_norm": 0.6511747397149698, "learning_rate": 9.823828267708502e-09, "loss": 0.2698, "step": 41525 }, { "epoch": 1.945285051763714, "grad_norm": 0.5821295312676453, "learning_rate": 9.80703921498849e-09, "loss": 0.2571, "step": 41526 }, { "epoch": 1.9453318967536422, "grad_norm": 0.6013063474038667, "learning_rate": 9.790264492703094e-09, "loss": 0.2649, "step": 41527 }, { "epoch": 1.9453787417435704, "grad_norm": 0.6165458154477355, "learning_rate": 9.773504100948905e-09, "loss": 0.285, "step": 41528 }, { "epoch": 1.9454255867334989, "grad_norm": 0.5706972971901396, "learning_rate": 9.756758039822233e-09, "loss": 0.2753, "step": 41529 }, { "epoch": 1.9454724317234273, "grad_norm": 0.6110550111975088, "learning_rate": 9.74002630941967e-09, "loss": 0.2791, "step": 41530 }, { "epoch": 1.9455192767133556, "grad_norm": 0.5887121726241086, "learning_rate": 9.723308909837248e-09, "loss": 0.2585, "step": 41531 }, { "epoch": 1.9455661217032838, "grad_norm": 0.6204701574562487, "learning_rate": 9.70660584117128e-09, "loss": 0.2858, "step": 41532 }, { "epoch": 1.9456129666932123, "grad_norm": 0.5499744130700682, "learning_rate": 9.689917103517798e-09, "loss": 0.2547, "step": 41533 }, { "epoch": 1.9456598116831405, "grad_norm": 0.6241700242150846, "learning_rate": 9.673242696973117e-09, "loss": 0.2827, "step": 41534 }, { "epoch": 1.9457066566730687, "grad_norm": 0.5980138778884968, "learning_rate": 9.656582621632993e-09, "loss": 0.2746, "step": 41535 }, { "epoch": 1.9457535016629972, "grad_norm": 0.5836995701018092, "learning_rate": 9.639936877593181e-09, "loss": 0.2527, "step": 41536 }, { "epoch": 1.9458003466529254, "grad_norm": 0.6135824751504816, "learning_rate": 9.623305464949716e-09, "loss": 0.2686, "step": 41537 }, { "epoch": 1.9458471916428537, "grad_norm": 0.5565409998137827, "learning_rate": 9.606688383798079e-09, "loss": 0.2645, "step": 41538 }, { "epoch": 1.9458940366327822, "grad_norm": 0.6010004598539191, "learning_rate": 9.590085634234025e-09, "loss": 0.2841, "step": 41539 }, { "epoch": 1.9459408816227106, "grad_norm": 0.5998174484841943, "learning_rate": 9.573497216353034e-09, "loss": 0.2661, "step": 41540 }, { "epoch": 1.9459877266126386, "grad_norm": 0.5919665449509339, "learning_rate": 9.556923130250583e-09, "loss": 0.2666, "step": 41541 }, { "epoch": 1.946034571602567, "grad_norm": 0.5944761246194588, "learning_rate": 9.540363376022155e-09, "loss": 0.2744, "step": 41542 }, { "epoch": 1.9460814165924956, "grad_norm": 0.6009708751438888, "learning_rate": 9.523817953763226e-09, "loss": 0.2695, "step": 41543 }, { "epoch": 1.9461282615824238, "grad_norm": 0.6381883465216426, "learning_rate": 9.507286863568444e-09, "loss": 0.2846, "step": 41544 }, { "epoch": 1.946175106572352, "grad_norm": 0.5817114085564468, "learning_rate": 9.490770105533564e-09, "loss": 0.2537, "step": 41545 }, { "epoch": 1.9462219515622805, "grad_norm": 0.6571459624744335, "learning_rate": 9.474267679752958e-09, "loss": 0.2632, "step": 41546 }, { "epoch": 1.9462687965522087, "grad_norm": 0.6045048950634042, "learning_rate": 9.45777958632238e-09, "loss": 0.2756, "step": 41547 }, { "epoch": 1.946315641542137, "grad_norm": 0.5926065450741586, "learning_rate": 9.441305825335922e-09, "loss": 0.2788, "step": 41548 }, { "epoch": 1.9463624865320655, "grad_norm": 0.5869830973414666, "learning_rate": 9.424846396889065e-09, "loss": 0.2627, "step": 41549 }, { "epoch": 1.9464093315219937, "grad_norm": 0.6236753164885062, "learning_rate": 9.408401301076175e-09, "loss": 0.2904, "step": 41550 }, { "epoch": 1.946456176511922, "grad_norm": 0.6232556196172875, "learning_rate": 9.3919705379919e-09, "loss": 0.278, "step": 41551 }, { "epoch": 1.9465030215018504, "grad_norm": 0.5699312212387732, "learning_rate": 9.375554107730889e-09, "loss": 0.2483, "step": 41552 }, { "epoch": 1.9465498664917789, "grad_norm": 0.560250119792656, "learning_rate": 9.359152010387506e-09, "loss": 0.2511, "step": 41553 }, { "epoch": 1.9465967114817069, "grad_norm": 0.5746509024242059, "learning_rate": 9.342764246056123e-09, "loss": 0.2766, "step": 41554 }, { "epoch": 1.9466435564716353, "grad_norm": 0.5681173932149675, "learning_rate": 9.32639081483111e-09, "loss": 0.2606, "step": 41555 }, { "epoch": 1.9466904014615638, "grad_norm": 0.608812004920662, "learning_rate": 9.310031716806833e-09, "loss": 0.2643, "step": 41556 }, { "epoch": 1.946737246451492, "grad_norm": 0.6275448121980206, "learning_rate": 9.293686952077385e-09, "loss": 0.2804, "step": 41557 }, { "epoch": 1.9467840914414203, "grad_norm": 0.6233592839451629, "learning_rate": 9.27735652073658e-09, "loss": 0.2698, "step": 41558 }, { "epoch": 1.9468309364313487, "grad_norm": 0.6089297242063711, "learning_rate": 9.261040422878786e-09, "loss": 0.2672, "step": 41559 }, { "epoch": 1.946877781421277, "grad_norm": 0.5628404159162709, "learning_rate": 9.244738658597541e-09, "loss": 0.2642, "step": 41560 }, { "epoch": 1.9469246264112052, "grad_norm": 0.6035699968537805, "learning_rate": 9.228451227986935e-09, "loss": 0.2761, "step": 41561 }, { "epoch": 1.9469714714011337, "grad_norm": 0.6276657239050463, "learning_rate": 9.212178131140226e-09, "loss": 0.2675, "step": 41562 }, { "epoch": 1.947018316391062, "grad_norm": 0.6100252386230978, "learning_rate": 9.195919368151784e-09, "loss": 0.27, "step": 41563 }, { "epoch": 1.9470651613809902, "grad_norm": 0.5813765833348487, "learning_rate": 9.179674939114591e-09, "loss": 0.2654, "step": 41564 }, { "epoch": 1.9471120063709186, "grad_norm": 0.6166347874257011, "learning_rate": 9.16344484412246e-09, "loss": 0.2609, "step": 41565 }, { "epoch": 1.947158851360847, "grad_norm": 0.6115293494324819, "learning_rate": 9.147229083268372e-09, "loss": 0.2642, "step": 41566 }, { "epoch": 1.9472056963507753, "grad_norm": 0.5728894016575952, "learning_rate": 9.13102765664614e-09, "loss": 0.2512, "step": 41567 }, { "epoch": 1.9472525413407036, "grad_norm": 0.5658302268572785, "learning_rate": 9.114840564348749e-09, "loss": 0.2582, "step": 41568 }, { "epoch": 1.947299386330632, "grad_norm": 0.611273726309578, "learning_rate": 9.098667806469452e-09, "loss": 0.2597, "step": 41569 }, { "epoch": 1.9473462313205603, "grad_norm": 0.6094047565923911, "learning_rate": 9.082509383100957e-09, "loss": 0.2677, "step": 41570 }, { "epoch": 1.9473930763104885, "grad_norm": 0.612937621227923, "learning_rate": 9.066365294336798e-09, "loss": 0.283, "step": 41571 }, { "epoch": 1.947439921300417, "grad_norm": 0.5764786025975204, "learning_rate": 9.05023554026968e-09, "loss": 0.2633, "step": 41572 }, { "epoch": 1.9474867662903452, "grad_norm": 0.6264597899127043, "learning_rate": 9.034120120992307e-09, "loss": 0.2824, "step": 41573 }, { "epoch": 1.9475336112802735, "grad_norm": 0.6339484562882556, "learning_rate": 9.018019036597658e-09, "loss": 0.272, "step": 41574 }, { "epoch": 1.947580456270202, "grad_norm": 0.6325726528680307, "learning_rate": 9.001932287177883e-09, "loss": 0.2559, "step": 41575 }, { "epoch": 1.9476273012601304, "grad_norm": 0.6141467110001886, "learning_rate": 8.985859872826242e-09, "loss": 0.269, "step": 41576 }, { "epoch": 1.9476741462500584, "grad_norm": 0.5833008302832281, "learning_rate": 8.969801793634602e-09, "loss": 0.2599, "step": 41577 }, { "epoch": 1.9477209912399869, "grad_norm": 0.6040803246831209, "learning_rate": 8.953758049695948e-09, "loss": 0.2742, "step": 41578 }, { "epoch": 1.9477678362299153, "grad_norm": 0.5906152293814315, "learning_rate": 8.93772864110215e-09, "loss": 0.2779, "step": 41579 }, { "epoch": 1.9478146812198436, "grad_norm": 0.5785136196030191, "learning_rate": 8.921713567945633e-09, "loss": 0.2645, "step": 41580 }, { "epoch": 1.9478615262097718, "grad_norm": 0.5828629080918851, "learning_rate": 8.905712830318547e-09, "loss": 0.2503, "step": 41581 }, { "epoch": 1.9479083711997003, "grad_norm": 0.6351724234325491, "learning_rate": 8.889726428312762e-09, "loss": 0.2772, "step": 41582 }, { "epoch": 1.9479552161896285, "grad_norm": 0.569722613687044, "learning_rate": 8.873754362020704e-09, "loss": 0.2612, "step": 41583 }, { "epoch": 1.9480020611795568, "grad_norm": 0.6061378974782177, "learning_rate": 8.857796631534243e-09, "loss": 0.2676, "step": 41584 }, { "epoch": 1.9480489061694852, "grad_norm": 0.6011492271249227, "learning_rate": 8.841853236944697e-09, "loss": 0.2842, "step": 41585 }, { "epoch": 1.9480957511594135, "grad_norm": 0.6010534611361702, "learning_rate": 8.825924178344492e-09, "loss": 0.2804, "step": 41586 }, { "epoch": 1.9481425961493417, "grad_norm": 0.6447504749408903, "learning_rate": 8.810009455824665e-09, "loss": 0.2848, "step": 41587 }, { "epoch": 1.9481894411392702, "grad_norm": 0.5759787407859508, "learning_rate": 8.794109069477086e-09, "loss": 0.2733, "step": 41588 }, { "epoch": 1.9482362861291986, "grad_norm": 0.5801972173897948, "learning_rate": 8.778223019393351e-09, "loss": 0.2636, "step": 41589 }, { "epoch": 1.9482831311191267, "grad_norm": 0.6199817455631468, "learning_rate": 8.762351305664773e-09, "loss": 0.2656, "step": 41590 }, { "epoch": 1.9483299761090551, "grad_norm": 0.6119746872585979, "learning_rate": 8.746493928382949e-09, "loss": 0.287, "step": 41591 }, { "epoch": 1.9483768210989836, "grad_norm": 0.6500683974870801, "learning_rate": 8.730650887638636e-09, "loss": 0.2654, "step": 41592 }, { "epoch": 1.9484236660889118, "grad_norm": 0.6155557980333246, "learning_rate": 8.714822183523431e-09, "loss": 0.2692, "step": 41593 }, { "epoch": 1.94847051107884, "grad_norm": 0.5605588726687256, "learning_rate": 8.699007816128091e-09, "loss": 0.2497, "step": 41594 }, { "epoch": 1.9485173560687685, "grad_norm": 0.6089067729145147, "learning_rate": 8.683207785543934e-09, "loss": 0.2779, "step": 41595 }, { "epoch": 1.9485642010586968, "grad_norm": 0.5931610310024265, "learning_rate": 8.667422091861722e-09, "loss": 0.2684, "step": 41596 }, { "epoch": 1.948611046048625, "grad_norm": 0.586107228491228, "learning_rate": 8.651650735172212e-09, "loss": 0.2858, "step": 41597 }, { "epoch": 1.9486578910385535, "grad_norm": 0.5773492827276006, "learning_rate": 8.635893715566445e-09, "loss": 0.255, "step": 41598 }, { "epoch": 1.9487047360284817, "grad_norm": 0.5533654902233672, "learning_rate": 8.620151033134905e-09, "loss": 0.245, "step": 41599 }, { "epoch": 1.94875158101841, "grad_norm": 0.6332170964669367, "learning_rate": 8.604422687968072e-09, "loss": 0.2718, "step": 41600 }, { "epoch": 1.9487984260083384, "grad_norm": 0.5803038141952891, "learning_rate": 8.58870868015671e-09, "loss": 0.2762, "step": 41601 }, { "epoch": 1.9488452709982669, "grad_norm": 0.5600863822112246, "learning_rate": 8.573009009791022e-09, "loss": 0.2565, "step": 41602 }, { "epoch": 1.9488921159881951, "grad_norm": 0.6209865070341865, "learning_rate": 8.55732367696177e-09, "loss": 0.2721, "step": 41603 }, { "epoch": 1.9489389609781234, "grad_norm": 0.5855047072510602, "learning_rate": 8.541652681758606e-09, "loss": 0.2755, "step": 41604 }, { "epoch": 1.9489858059680518, "grad_norm": 0.5782884999372816, "learning_rate": 8.52599602427201e-09, "loss": 0.278, "step": 41605 }, { "epoch": 1.94903265095798, "grad_norm": 0.6188049448242934, "learning_rate": 8.510353704592189e-09, "loss": 0.2709, "step": 41606 }, { "epoch": 1.9490794959479083, "grad_norm": 0.5760987919923508, "learning_rate": 8.494725722809071e-09, "loss": 0.2758, "step": 41607 }, { "epoch": 1.9491263409378368, "grad_norm": 0.5955623350638511, "learning_rate": 8.479112079012863e-09, "loss": 0.2854, "step": 41608 }, { "epoch": 1.949173185927765, "grad_norm": 0.6310822305276685, "learning_rate": 8.463512773292936e-09, "loss": 0.2894, "step": 41609 }, { "epoch": 1.9492200309176932, "grad_norm": 0.6023930881493671, "learning_rate": 8.44792780573922e-09, "loss": 0.2631, "step": 41610 }, { "epoch": 1.9492668759076217, "grad_norm": 0.5749555328519219, "learning_rate": 8.432357176441364e-09, "loss": 0.252, "step": 41611 }, { "epoch": 1.9493137208975502, "grad_norm": 0.570466031560433, "learning_rate": 8.416800885489018e-09, "loss": 0.2798, "step": 41612 }, { "epoch": 1.9493605658874782, "grad_norm": 0.6692769620636079, "learning_rate": 8.401258932972112e-09, "loss": 0.2833, "step": 41613 }, { "epoch": 1.9494074108774067, "grad_norm": 0.605613270612999, "learning_rate": 8.385731318979462e-09, "loss": 0.2751, "step": 41614 }, { "epoch": 1.9494542558673351, "grad_norm": 0.6467530711586538, "learning_rate": 8.37021804360072e-09, "loss": 0.2818, "step": 41615 }, { "epoch": 1.9495011008572634, "grad_norm": 0.578123961884406, "learning_rate": 8.35471910692498e-09, "loss": 0.2629, "step": 41616 }, { "epoch": 1.9495479458471916, "grad_norm": 0.6518963350074524, "learning_rate": 8.339234509041893e-09, "loss": 0.2838, "step": 41617 }, { "epoch": 1.94959479083712, "grad_norm": 0.606980537684437, "learning_rate": 8.32376425004e-09, "loss": 0.2686, "step": 41618 }, { "epoch": 1.9496416358270483, "grad_norm": 0.5685832089473793, "learning_rate": 8.308308330008675e-09, "loss": 0.2513, "step": 41619 }, { "epoch": 1.9496884808169765, "grad_norm": 0.6026850234575035, "learning_rate": 8.292866749036732e-09, "loss": 0.2673, "step": 41620 }, { "epoch": 1.949735325806905, "grad_norm": 0.5918762641384354, "learning_rate": 8.27743950721327e-09, "loss": 0.2603, "step": 41621 }, { "epoch": 1.9497821707968332, "grad_norm": 0.6067897005071407, "learning_rate": 8.26202660462655e-09, "loss": 0.2731, "step": 41622 }, { "epoch": 1.9498290157867615, "grad_norm": 0.6147425843375923, "learning_rate": 8.246628041365668e-09, "loss": 0.271, "step": 41623 }, { "epoch": 1.94987586077669, "grad_norm": 0.5572510334175013, "learning_rate": 8.231243817519442e-09, "loss": 0.254, "step": 41624 }, { "epoch": 1.9499227057666184, "grad_norm": 0.5726177228561818, "learning_rate": 8.215873933175855e-09, "loss": 0.2715, "step": 41625 }, { "epoch": 1.9499695507565464, "grad_norm": 0.5876857068262672, "learning_rate": 8.200518388423729e-09, "loss": 0.2686, "step": 41626 }, { "epoch": 1.950016395746475, "grad_norm": 0.650088850664109, "learning_rate": 8.185177183351323e-09, "loss": 0.2881, "step": 41627 }, { "epoch": 1.9500632407364034, "grad_norm": 0.5935212251307085, "learning_rate": 8.169850318046902e-09, "loss": 0.2699, "step": 41628 }, { "epoch": 1.9501100857263316, "grad_norm": 0.6011214241016947, "learning_rate": 8.154537792598726e-09, "loss": 0.2631, "step": 41629 }, { "epoch": 1.9501569307162598, "grad_norm": 0.5948584702755534, "learning_rate": 8.139239607094784e-09, "loss": 0.2829, "step": 41630 }, { "epoch": 1.9502037757061883, "grad_norm": 0.5766899269059309, "learning_rate": 8.123955761623337e-09, "loss": 0.2562, "step": 41631 }, { "epoch": 1.9502506206961165, "grad_norm": 0.6103202366337032, "learning_rate": 8.108686256272092e-09, "loss": 0.2745, "step": 41632 }, { "epoch": 1.9502974656860448, "grad_norm": 0.578569187844867, "learning_rate": 8.093431091129034e-09, "loss": 0.2658, "step": 41633 }, { "epoch": 1.9503443106759732, "grad_norm": 0.602420211278726, "learning_rate": 8.078190266282149e-09, "loss": 0.2837, "step": 41634 }, { "epoch": 1.9503911556659015, "grad_norm": 0.6429170478350825, "learning_rate": 8.062963781818867e-09, "loss": 0.2853, "step": 41635 }, { "epoch": 1.9504380006558297, "grad_norm": 0.5699801033418576, "learning_rate": 8.047751637826896e-09, "loss": 0.2517, "step": 41636 }, { "epoch": 1.9504848456457582, "grad_norm": 0.6062749690036693, "learning_rate": 8.032553834393664e-09, "loss": 0.2639, "step": 41637 }, { "epoch": 1.9505316906356867, "grad_norm": 0.606011002878068, "learning_rate": 8.017370371607158e-09, "loss": 0.2638, "step": 41638 }, { "epoch": 1.950578535625615, "grad_norm": 0.6110131257484915, "learning_rate": 8.002201249553976e-09, "loss": 0.2789, "step": 41639 }, { "epoch": 1.9506253806155431, "grad_norm": 0.6235985254786494, "learning_rate": 7.987046468321824e-09, "loss": 0.2934, "step": 41640 }, { "epoch": 1.9506722256054716, "grad_norm": 0.5583604149588923, "learning_rate": 7.971906027997855e-09, "loss": 0.2389, "step": 41641 }, { "epoch": 1.9507190705953998, "grad_norm": 0.5894017696488854, "learning_rate": 7.9567799286695e-09, "loss": 0.2697, "step": 41642 }, { "epoch": 1.950765915585328, "grad_norm": 0.6382525169322512, "learning_rate": 7.941668170423078e-09, "loss": 0.2822, "step": 41643 }, { "epoch": 1.9508127605752565, "grad_norm": 0.6082617067429987, "learning_rate": 7.926570753346296e-09, "loss": 0.2582, "step": 41644 }, { "epoch": 1.9508596055651848, "grad_norm": 0.6144205273587758, "learning_rate": 7.911487677525753e-09, "loss": 0.2895, "step": 41645 }, { "epoch": 1.950906450555113, "grad_norm": 0.5706360390920511, "learning_rate": 7.896418943048045e-09, "loss": 0.2592, "step": 41646 }, { "epoch": 1.9509532955450415, "grad_norm": 0.6182058188922203, "learning_rate": 7.881364550000325e-09, "loss": 0.2955, "step": 41647 }, { "epoch": 1.95100014053497, "grad_norm": 0.607161319265655, "learning_rate": 7.866324498468913e-09, "loss": 0.2791, "step": 41648 }, { "epoch": 1.951046985524898, "grad_norm": 0.6317085243974653, "learning_rate": 7.851298788540406e-09, "loss": 0.2821, "step": 41649 }, { "epoch": 1.9510938305148264, "grad_norm": 0.5549217814813416, "learning_rate": 7.836287420301125e-09, "loss": 0.2547, "step": 41650 }, { "epoch": 1.951140675504755, "grad_norm": 0.6025277711854238, "learning_rate": 7.821290393837944e-09, "loss": 0.2604, "step": 41651 }, { "epoch": 1.9511875204946831, "grad_norm": 0.6327150227072997, "learning_rate": 7.806307709236627e-09, "loss": 0.2619, "step": 41652 }, { "epoch": 1.9512343654846114, "grad_norm": 0.5674355368428418, "learning_rate": 7.791339366583494e-09, "loss": 0.2645, "step": 41653 }, { "epoch": 1.9512812104745398, "grad_norm": 0.5974229509208955, "learning_rate": 7.776385365964866e-09, "loss": 0.271, "step": 41654 }, { "epoch": 1.951328055464468, "grad_norm": 0.6011975692898919, "learning_rate": 7.761445707467064e-09, "loss": 0.2896, "step": 41655 }, { "epoch": 1.9513749004543963, "grad_norm": 0.6135510954203571, "learning_rate": 7.746520391175572e-09, "loss": 0.2629, "step": 41656 }, { "epoch": 1.9514217454443248, "grad_norm": 0.6024983699853368, "learning_rate": 7.731609417176434e-09, "loss": 0.2778, "step": 41657 }, { "epoch": 1.951468590434253, "grad_norm": 0.5631968223777399, "learning_rate": 7.716712785555413e-09, "loss": 0.2594, "step": 41658 }, { "epoch": 1.9515154354241813, "grad_norm": 0.622707037494778, "learning_rate": 7.701830496398278e-09, "loss": 0.273, "step": 41659 }, { "epoch": 1.9515622804141097, "grad_norm": 0.5847861993883601, "learning_rate": 7.686962549790788e-09, "loss": 0.2714, "step": 41660 }, { "epoch": 1.9516091254040382, "grad_norm": 0.6548489983820505, "learning_rate": 7.672108945818157e-09, "loss": 0.2783, "step": 41661 }, { "epoch": 1.9516559703939662, "grad_norm": 0.613836352929806, "learning_rate": 7.657269684566426e-09, "loss": 0.2637, "step": 41662 }, { "epoch": 1.9517028153838947, "grad_norm": 0.6221945941496745, "learning_rate": 7.642444766120804e-09, "loss": 0.2916, "step": 41663 }, { "epoch": 1.9517496603738231, "grad_norm": 0.5908628659830919, "learning_rate": 7.627634190566225e-09, "loss": 0.2428, "step": 41664 }, { "epoch": 1.9517965053637514, "grad_norm": 0.6060981694814013, "learning_rate": 7.612837957988173e-09, "loss": 0.2815, "step": 41665 }, { "epoch": 1.9518433503536796, "grad_norm": 0.6177811950360038, "learning_rate": 7.59805606847186e-09, "loss": 0.2685, "step": 41666 }, { "epoch": 1.951890195343608, "grad_norm": 0.6075392314363113, "learning_rate": 7.583288522102216e-09, "loss": 0.2659, "step": 41667 }, { "epoch": 1.9519370403335363, "grad_norm": 0.6236811768455104, "learning_rate": 7.568535318964454e-09, "loss": 0.2844, "step": 41668 }, { "epoch": 1.9519838853234646, "grad_norm": 0.5701376040070911, "learning_rate": 7.553796459143226e-09, "loss": 0.2658, "step": 41669 }, { "epoch": 1.952030730313393, "grad_norm": 0.6150334264545049, "learning_rate": 7.539071942723464e-09, "loss": 0.2811, "step": 41670 }, { "epoch": 1.9520775753033213, "grad_norm": 0.5677811846508246, "learning_rate": 7.524361769789823e-09, "loss": 0.2541, "step": 41671 }, { "epoch": 1.9521244202932495, "grad_norm": 0.6366827029157962, "learning_rate": 7.509665940426959e-09, "loss": 0.2756, "step": 41672 }, { "epoch": 1.952171265283178, "grad_norm": 0.6085199877034849, "learning_rate": 7.494984454719801e-09, "loss": 0.2767, "step": 41673 }, { "epoch": 1.9522181102731064, "grad_norm": 0.6216190742338134, "learning_rate": 7.480317312752172e-09, "loss": 0.2703, "step": 41674 }, { "epoch": 1.9522649552630347, "grad_norm": 0.5825246765607311, "learning_rate": 7.465664514609283e-09, "loss": 0.2685, "step": 41675 }, { "epoch": 1.952311800252963, "grad_norm": 0.61657099159876, "learning_rate": 7.451026060374677e-09, "loss": 0.2768, "step": 41676 }, { "epoch": 1.9523586452428914, "grad_norm": 0.5822277551698682, "learning_rate": 7.43640195013301e-09, "loss": 0.271, "step": 41677 }, { "epoch": 1.9524054902328196, "grad_norm": 0.5980140813237275, "learning_rate": 7.42179218396838e-09, "loss": 0.285, "step": 41678 }, { "epoch": 1.9524523352227479, "grad_norm": 0.6213240557519142, "learning_rate": 7.407196761964885e-09, "loss": 0.2731, "step": 41679 }, { "epoch": 1.9524991802126763, "grad_norm": 0.5615802252737655, "learning_rate": 7.39261568420635e-09, "loss": 0.259, "step": 41680 }, { "epoch": 1.9525460252026046, "grad_norm": 0.5777950329073483, "learning_rate": 7.378048950776873e-09, "loss": 0.2485, "step": 41681 }, { "epoch": 1.9525928701925328, "grad_norm": 0.5903607161566664, "learning_rate": 7.363496561760275e-09, "loss": 0.266, "step": 41682 }, { "epoch": 1.9526397151824613, "grad_norm": 0.6306201529527439, "learning_rate": 7.348958517240379e-09, "loss": 0.2736, "step": 41683 }, { "epoch": 1.9526865601723897, "grad_norm": 0.602948017885359, "learning_rate": 7.334434817300728e-09, "loss": 0.2629, "step": 41684 }, { "epoch": 1.9527334051623177, "grad_norm": 0.6075947807163977, "learning_rate": 7.319925462024591e-09, "loss": 0.2635, "step": 41685 }, { "epoch": 1.9527802501522462, "grad_norm": 0.5859894340326913, "learning_rate": 7.305430451496065e-09, "loss": 0.2564, "step": 41686 }, { "epoch": 1.9528270951421747, "grad_norm": 0.5884079000998103, "learning_rate": 7.2909497857981405e-09, "loss": 0.2546, "step": 41687 }, { "epoch": 1.952873940132103, "grad_norm": 0.5761081250328843, "learning_rate": 7.276483465014361e-09, "loss": 0.2655, "step": 41688 }, { "epoch": 1.9529207851220312, "grad_norm": 0.6055067910398872, "learning_rate": 7.262031489227716e-09, "loss": 0.2788, "step": 41689 }, { "epoch": 1.9529676301119596, "grad_norm": 0.5897973499480226, "learning_rate": 7.2475938585217505e-09, "loss": 0.262, "step": 41690 }, { "epoch": 1.9530144751018879, "grad_norm": 0.639329701960578, "learning_rate": 7.233170572979176e-09, "loss": 0.2778, "step": 41691 }, { "epoch": 1.953061320091816, "grad_norm": 0.6007134029926039, "learning_rate": 7.218761632683258e-09, "loss": 0.2807, "step": 41692 }, { "epoch": 1.9531081650817446, "grad_norm": 0.6137503809414725, "learning_rate": 7.204367037716986e-09, "loss": 0.2718, "step": 41693 }, { "epoch": 1.9531550100716728, "grad_norm": 0.6011259795200338, "learning_rate": 7.189986788162795e-09, "loss": 0.2764, "step": 41694 }, { "epoch": 1.953201855061601, "grad_norm": 0.6314894803929826, "learning_rate": 7.175620884103673e-09, "loss": 0.2769, "step": 41695 }, { "epoch": 1.9532487000515295, "grad_norm": 0.6075198753188011, "learning_rate": 7.161269325622333e-09, "loss": 0.2838, "step": 41696 }, { "epoch": 1.953295545041458, "grad_norm": 0.5439966201055402, "learning_rate": 7.146932112801485e-09, "loss": 0.2403, "step": 41697 }, { "epoch": 1.953342390031386, "grad_norm": 0.6373046085599989, "learning_rate": 7.132609245723565e-09, "loss": 0.2751, "step": 41698 }, { "epoch": 1.9533892350213145, "grad_norm": 0.6313854658393147, "learning_rate": 7.1183007244707284e-09, "loss": 0.2847, "step": 41699 }, { "epoch": 1.953436080011243, "grad_norm": 0.6024399768560635, "learning_rate": 7.104006549125686e-09, "loss": 0.263, "step": 41700 }, { "epoch": 1.9534829250011712, "grad_norm": 0.6027732675495595, "learning_rate": 7.089726719770318e-09, "loss": 0.2798, "step": 41701 }, { "epoch": 1.9535297699910994, "grad_norm": 0.5872124019117035, "learning_rate": 7.075461236487058e-09, "loss": 0.257, "step": 41702 }, { "epoch": 1.9535766149810279, "grad_norm": 0.6430740990110321, "learning_rate": 7.061210099357785e-09, "loss": 0.2703, "step": 41703 }, { "epoch": 1.953623459970956, "grad_norm": 0.576712062193696, "learning_rate": 7.046973308464933e-09, "loss": 0.2493, "step": 41704 }, { "epoch": 1.9536703049608843, "grad_norm": 0.6257338830675403, "learning_rate": 7.032750863890103e-09, "loss": 0.2777, "step": 41705 }, { "epoch": 1.9537171499508128, "grad_norm": 0.6234843646945892, "learning_rate": 7.018542765715175e-09, "loss": 0.2871, "step": 41706 }, { "epoch": 1.953763994940741, "grad_norm": 0.6050274595506652, "learning_rate": 7.004349014022027e-09, "loss": 0.2613, "step": 41707 }, { "epoch": 1.9538108399306693, "grad_norm": 0.623309177780887, "learning_rate": 6.9901696088922614e-09, "loss": 0.2745, "step": 41708 }, { "epoch": 1.9538576849205977, "grad_norm": 0.6281791331251702, "learning_rate": 6.976004550407201e-09, "loss": 0.2828, "step": 41709 }, { "epoch": 1.9539045299105262, "grad_norm": 0.6637400348600667, "learning_rate": 6.961853838649002e-09, "loss": 0.2765, "step": 41710 }, { "epoch": 1.9539513749004545, "grad_norm": 0.5766372086609972, "learning_rate": 6.947717473698435e-09, "loss": 0.2501, "step": 41711 }, { "epoch": 1.9539982198903827, "grad_norm": 0.6200980616059568, "learning_rate": 6.933595455637376e-09, "loss": 0.2919, "step": 41712 }, { "epoch": 1.9540450648803112, "grad_norm": 0.6058299443649218, "learning_rate": 6.919487784546875e-09, "loss": 0.2756, "step": 41713 }, { "epoch": 1.9540919098702394, "grad_norm": 0.6100426526510749, "learning_rate": 6.905394460507974e-09, "loss": 0.2911, "step": 41714 }, { "epoch": 1.9541387548601676, "grad_norm": 0.5961635875659715, "learning_rate": 6.891315483602001e-09, "loss": 0.2691, "step": 41715 }, { "epoch": 1.954185599850096, "grad_norm": 0.6131270631143658, "learning_rate": 6.877250853909723e-09, "loss": 0.27, "step": 41716 }, { "epoch": 1.9542324448400243, "grad_norm": 0.5928001862610573, "learning_rate": 6.8632005715124624e-09, "loss": 0.2697, "step": 41717 }, { "epoch": 1.9542792898299526, "grad_norm": 0.6304615747034121, "learning_rate": 6.849164636490713e-09, "loss": 0.2804, "step": 41718 }, { "epoch": 1.954326134819881, "grad_norm": 0.6016579063341493, "learning_rate": 6.835143048925519e-09, "loss": 0.2787, "step": 41719 }, { "epoch": 1.9543729798098095, "grad_norm": 0.6062267075525472, "learning_rate": 6.821135808897372e-09, "loss": 0.2773, "step": 41720 }, { "epoch": 1.9544198247997375, "grad_norm": 0.6214021154948561, "learning_rate": 6.8071429164867645e-09, "loss": 0.2955, "step": 41721 }, { "epoch": 1.954466669789666, "grad_norm": 0.5587989668036304, "learning_rate": 6.793164371774464e-09, "loss": 0.2448, "step": 41722 }, { "epoch": 1.9545135147795945, "grad_norm": 0.6258405346535579, "learning_rate": 6.779200174841239e-09, "loss": 0.2703, "step": 41723 }, { "epoch": 1.9545603597695227, "grad_norm": 0.6047555758651922, "learning_rate": 6.765250325766748e-09, "loss": 0.2717, "step": 41724 }, { "epoch": 1.954607204759451, "grad_norm": 0.5746428556110996, "learning_rate": 6.751314824631761e-09, "loss": 0.267, "step": 41725 }, { "epoch": 1.9546540497493794, "grad_norm": 0.614216594893617, "learning_rate": 6.737393671515935e-09, "loss": 0.2878, "step": 41726 }, { "epoch": 1.9547008947393076, "grad_norm": 0.60859467529903, "learning_rate": 6.7234868665000394e-09, "loss": 0.2684, "step": 41727 }, { "epoch": 1.9547477397292359, "grad_norm": 0.557382974467946, "learning_rate": 6.7095944096637335e-09, "loss": 0.25, "step": 41728 }, { "epoch": 1.9547945847191643, "grad_norm": 0.6096288543708075, "learning_rate": 6.6957163010872294e-09, "loss": 0.2697, "step": 41729 }, { "epoch": 1.9548414297090926, "grad_norm": 0.62311793976332, "learning_rate": 6.681852540849909e-09, "loss": 0.2711, "step": 41730 }, { "epoch": 1.9548882746990208, "grad_norm": 0.5713125462594563, "learning_rate": 6.6680031290319855e-09, "loss": 0.2667, "step": 41731 }, { "epoch": 1.9549351196889493, "grad_norm": 0.5963296228823026, "learning_rate": 6.654168065713118e-09, "loss": 0.2768, "step": 41732 }, { "epoch": 1.9549819646788777, "grad_norm": 0.6291694709164966, "learning_rate": 6.640347350972965e-09, "loss": 0.2727, "step": 41733 }, { "epoch": 1.9550288096688058, "grad_norm": 0.6204945702087064, "learning_rate": 6.6265409848906284e-09, "loss": 0.2771, "step": 41734 }, { "epoch": 1.9550756546587342, "grad_norm": 0.6604711205708107, "learning_rate": 6.6127489675460456e-09, "loss": 0.2831, "step": 41735 }, { "epoch": 1.9551224996486627, "grad_norm": 0.6349398697489056, "learning_rate": 6.5989712990183194e-09, "loss": 0.2697, "step": 41736 }, { "epoch": 1.955169344638591, "grad_norm": 0.5850119823421946, "learning_rate": 6.585207979386832e-09, "loss": 0.2546, "step": 41737 }, { "epoch": 1.9552161896285192, "grad_norm": 0.6173092523185119, "learning_rate": 6.571459008730685e-09, "loss": 0.2917, "step": 41738 }, { "epoch": 1.9552630346184476, "grad_norm": 0.5918065432631512, "learning_rate": 6.5577243871292605e-09, "loss": 0.2666, "step": 41739 }, { "epoch": 1.9553098796083759, "grad_norm": 0.6265707467708935, "learning_rate": 6.5440041146613845e-09, "loss": 0.2798, "step": 41740 }, { "epoch": 1.9553567245983041, "grad_norm": 0.5753497649393905, "learning_rate": 6.530298191405882e-09, "loss": 0.2725, "step": 41741 }, { "epoch": 1.9554035695882326, "grad_norm": 0.5752706065870393, "learning_rate": 6.516606617442134e-09, "loss": 0.2671, "step": 41742 }, { "epoch": 1.9554504145781608, "grad_norm": 0.6033690619057871, "learning_rate": 6.502929392848412e-09, "loss": 0.244, "step": 41743 }, { "epoch": 1.955497259568089, "grad_norm": 0.5991305856681468, "learning_rate": 6.489266517703541e-09, "loss": 0.2636, "step": 41744 }, { "epoch": 1.9555441045580175, "grad_norm": 0.6237789234790219, "learning_rate": 6.475617992086347e-09, "loss": 0.2757, "step": 41745 }, { "epoch": 1.955590949547946, "grad_norm": 0.6643761250387119, "learning_rate": 6.461983816075101e-09, "loss": 0.2861, "step": 41746 }, { "epoch": 1.955637794537874, "grad_norm": 0.6094455460630975, "learning_rate": 6.448363989748629e-09, "loss": 0.2766, "step": 41747 }, { "epoch": 1.9556846395278025, "grad_norm": 0.5891798804885, "learning_rate": 6.434758513184924e-09, "loss": 0.2777, "step": 41748 }, { "epoch": 1.955731484517731, "grad_norm": 0.5630935364049304, "learning_rate": 6.421167386462535e-09, "loss": 0.2599, "step": 41749 }, { "epoch": 1.9557783295076592, "grad_norm": 0.6394355287349657, "learning_rate": 6.407590609659453e-09, "loss": 0.2732, "step": 41750 }, { "epoch": 1.9558251744975874, "grad_norm": 0.6073174086124631, "learning_rate": 6.394028182854228e-09, "loss": 0.2872, "step": 41751 }, { "epoch": 1.9558720194875159, "grad_norm": 0.5748832042169395, "learning_rate": 6.380480106124298e-09, "loss": 0.2733, "step": 41752 }, { "epoch": 1.9559188644774441, "grad_norm": 0.6266416757790844, "learning_rate": 6.366946379548211e-09, "loss": 0.2813, "step": 41753 }, { "epoch": 1.9559657094673724, "grad_norm": 0.5645872443025638, "learning_rate": 6.353427003203405e-09, "loss": 0.2679, "step": 41754 }, { "epoch": 1.9560125544573008, "grad_norm": 0.5737780500270866, "learning_rate": 6.339921977168151e-09, "loss": 0.257, "step": 41755 }, { "epoch": 1.956059399447229, "grad_norm": 0.5920809581588249, "learning_rate": 6.3264313015196086e-09, "loss": 0.2673, "step": 41756 }, { "epoch": 1.9561062444371573, "grad_norm": 0.5737051688976762, "learning_rate": 6.312954976335772e-09, "loss": 0.2497, "step": 41757 }, { "epoch": 1.9561530894270858, "grad_norm": 0.6062327157896802, "learning_rate": 6.299493001694079e-09, "loss": 0.2765, "step": 41758 }, { "epoch": 1.9561999344170142, "grad_norm": 0.608505530535984, "learning_rate": 6.286045377671968e-09, "loss": 0.2681, "step": 41759 }, { "epoch": 1.9562467794069425, "grad_norm": 0.6101820596039447, "learning_rate": 6.272612104347153e-09, "loss": 0.2571, "step": 41760 }, { "epoch": 1.9562936243968707, "grad_norm": 0.5547638539742469, "learning_rate": 6.259193181796519e-09, "loss": 0.2696, "step": 41761 }, { "epoch": 1.9563404693867992, "grad_norm": 0.6173932525461615, "learning_rate": 6.245788610097503e-09, "loss": 0.2737, "step": 41762 }, { "epoch": 1.9563873143767274, "grad_norm": 0.6082054931722817, "learning_rate": 6.232398389326988e-09, "loss": 0.2723, "step": 41763 }, { "epoch": 1.9564341593666557, "grad_norm": 0.568960251056512, "learning_rate": 6.219022519562412e-09, "loss": 0.2545, "step": 41764 }, { "epoch": 1.9564810043565841, "grad_norm": 0.6101944325190831, "learning_rate": 6.2056610008806584e-09, "loss": 0.2773, "step": 41765 }, { "epoch": 1.9565278493465124, "grad_norm": 0.6171522351310353, "learning_rate": 6.192313833358332e-09, "loss": 0.2759, "step": 41766 }, { "epoch": 1.9565746943364406, "grad_norm": 0.6139214296325088, "learning_rate": 6.178981017072594e-09, "loss": 0.2643, "step": 41767 }, { "epoch": 1.956621539326369, "grad_norm": 0.6045825466688746, "learning_rate": 6.165662552100049e-09, "loss": 0.2674, "step": 41768 }, { "epoch": 1.9566683843162975, "grad_norm": 0.5929794784537457, "learning_rate": 6.152358438517303e-09, "loss": 0.2569, "step": 41769 }, { "epoch": 1.9567152293062255, "grad_norm": 0.6281794341038581, "learning_rate": 6.1390686764006835e-09, "loss": 0.2791, "step": 41770 }, { "epoch": 1.956762074296154, "grad_norm": 0.6263448198991832, "learning_rate": 6.125793265827074e-09, "loss": 0.2721, "step": 41771 }, { "epoch": 1.9568089192860825, "grad_norm": 0.6209614408218523, "learning_rate": 6.112532206872801e-09, "loss": 0.2766, "step": 41772 }, { "epoch": 1.9568557642760107, "grad_norm": 0.5853950997869448, "learning_rate": 6.099285499614194e-09, "loss": 0.2801, "step": 41773 }, { "epoch": 1.956902609265939, "grad_norm": 0.6172632390881442, "learning_rate": 6.086053144127302e-09, "loss": 0.2693, "step": 41774 }, { "epoch": 1.9569494542558674, "grad_norm": 0.5730993888760267, "learning_rate": 6.072835140488176e-09, "loss": 0.2573, "step": 41775 }, { "epoch": 1.9569962992457957, "grad_norm": 0.548205179094567, "learning_rate": 6.059631488773143e-09, "loss": 0.248, "step": 41776 }, { "epoch": 1.957043144235724, "grad_norm": 0.5931012052657103, "learning_rate": 6.0464421890579775e-09, "loss": 0.2604, "step": 41777 }, { "epoch": 1.9570899892256524, "grad_norm": 0.6000542402078749, "learning_rate": 6.0332672414190055e-09, "loss": 0.2843, "step": 41778 }, { "epoch": 1.9571368342155806, "grad_norm": 0.5795150215401191, "learning_rate": 6.020106645931445e-09, "loss": 0.2622, "step": 41779 }, { "epoch": 1.9571836792055088, "grad_norm": 0.5849801449267463, "learning_rate": 6.006960402671347e-09, "loss": 0.2556, "step": 41780 }, { "epoch": 1.9572305241954373, "grad_norm": 0.5683704481839023, "learning_rate": 5.993828511714761e-09, "loss": 0.2565, "step": 41781 }, { "epoch": 1.9572773691853658, "grad_norm": 0.5622240492766541, "learning_rate": 5.98071097313635e-09, "loss": 0.2681, "step": 41782 }, { "epoch": 1.9573242141752938, "grad_norm": 0.5373105252567175, "learning_rate": 5.967607787012441e-09, "loss": 0.2541, "step": 41783 }, { "epoch": 1.9573710591652222, "grad_norm": 0.5682385510788637, "learning_rate": 5.9545189534179755e-09, "loss": 0.2553, "step": 41784 }, { "epoch": 1.9574179041551507, "grad_norm": 0.6009145136587702, "learning_rate": 5.941444472428448e-09, "loss": 0.2622, "step": 41785 }, { "epoch": 1.957464749145079, "grad_norm": 0.5938703140624351, "learning_rate": 5.928384344118798e-09, "loss": 0.2742, "step": 41786 }, { "epoch": 1.9575115941350072, "grad_norm": 0.6311001561046513, "learning_rate": 5.915338568564521e-09, "loss": 0.2528, "step": 41787 }, { "epoch": 1.9575584391249357, "grad_norm": 0.596559134450917, "learning_rate": 5.902307145840836e-09, "loss": 0.2799, "step": 41788 }, { "epoch": 1.957605284114864, "grad_norm": 0.6418866566871377, "learning_rate": 5.889290076022403e-09, "loss": 0.2813, "step": 41789 }, { "epoch": 1.9576521291047921, "grad_norm": 0.6227083433959897, "learning_rate": 5.876287359184163e-09, "loss": 0.2673, "step": 41790 }, { "epoch": 1.9576989740947206, "grad_norm": 0.5746219281456282, "learning_rate": 5.8632989954010564e-09, "loss": 0.2557, "step": 41791 }, { "epoch": 1.9577458190846488, "grad_norm": 0.6079997254241032, "learning_rate": 5.850324984747746e-09, "loss": 0.2664, "step": 41792 }, { "epoch": 1.957792664074577, "grad_norm": 0.5521488877393592, "learning_rate": 5.8373653272988936e-09, "loss": 0.2662, "step": 41793 }, { "epoch": 1.9578395090645055, "grad_norm": 0.6005539600206022, "learning_rate": 5.82442002312944e-09, "loss": 0.27, "step": 41794 }, { "epoch": 1.957886354054434, "grad_norm": 0.5973397171133255, "learning_rate": 5.811489072313215e-09, "loss": 0.27, "step": 41795 }, { "epoch": 1.9579331990443622, "grad_norm": 0.6141051847443548, "learning_rate": 5.798572474925157e-09, "loss": 0.2677, "step": 41796 }, { "epoch": 1.9579800440342905, "grad_norm": 0.6364484914340034, "learning_rate": 5.7856702310393755e-09, "loss": 0.2783, "step": 41797 }, { "epoch": 1.958026889024219, "grad_norm": 0.5941296312848332, "learning_rate": 5.772782340729976e-09, "loss": 0.2667, "step": 41798 }, { "epoch": 1.9580737340141472, "grad_norm": 0.6667470088354295, "learning_rate": 5.7599088040716235e-09, "loss": 0.2601, "step": 41799 }, { "epoch": 1.9581205790040754, "grad_norm": 0.6269084310704485, "learning_rate": 5.747049621137868e-09, "loss": 0.2746, "step": 41800 }, { "epoch": 1.958167423994004, "grad_norm": 0.5873952225641262, "learning_rate": 5.734204792003095e-09, "loss": 0.2704, "step": 41801 }, { "epoch": 1.9582142689839321, "grad_norm": 0.6113780317241363, "learning_rate": 5.721374316740857e-09, "loss": 0.2608, "step": 41802 }, { "epoch": 1.9582611139738604, "grad_norm": 0.583785988838063, "learning_rate": 5.70855819542554e-09, "loss": 0.2644, "step": 41803 }, { "epoch": 1.9583079589637888, "grad_norm": 0.597690354554665, "learning_rate": 5.695756428130139e-09, "loss": 0.264, "step": 41804 }, { "epoch": 1.9583548039537173, "grad_norm": 0.5610037093462742, "learning_rate": 5.6829690149290404e-09, "loss": 0.2592, "step": 41805 }, { "epoch": 1.9584016489436453, "grad_norm": 0.5990027035645049, "learning_rate": 5.670195955895241e-09, "loss": 0.2696, "step": 41806 }, { "epoch": 1.9584484939335738, "grad_norm": 0.6006256340688871, "learning_rate": 5.657437251102849e-09, "loss": 0.2751, "step": 41807 }, { "epoch": 1.9584953389235022, "grad_norm": 0.5718074779417794, "learning_rate": 5.64469290062486e-09, "loss": 0.2694, "step": 41808 }, { "epoch": 1.9585421839134305, "grad_norm": 0.5822782767965595, "learning_rate": 5.631962904534827e-09, "loss": 0.2695, "step": 41809 }, { "epoch": 1.9585890289033587, "grad_norm": 0.6145721371767237, "learning_rate": 5.619247262905747e-09, "loss": 0.2769, "step": 41810 }, { "epoch": 1.9586358738932872, "grad_norm": 0.6113929699267913, "learning_rate": 5.606545975810896e-09, "loss": 0.2595, "step": 41811 }, { "epoch": 1.9586827188832154, "grad_norm": 0.5393912489732435, "learning_rate": 5.5938590433235466e-09, "loss": 0.2483, "step": 41812 }, { "epoch": 1.9587295638731437, "grad_norm": 0.5512614323475205, "learning_rate": 5.581186465516697e-09, "loss": 0.2497, "step": 41813 }, { "epoch": 1.9587764088630721, "grad_norm": 0.6006811361489169, "learning_rate": 5.568528242463067e-09, "loss": 0.2602, "step": 41814 }, { "epoch": 1.9588232538530004, "grad_norm": 0.5965912879131594, "learning_rate": 5.555884374235654e-09, "loss": 0.2501, "step": 41815 }, { "epoch": 1.9588700988429286, "grad_norm": 0.5993624666368387, "learning_rate": 5.543254860907177e-09, "loss": 0.26, "step": 41816 }, { "epoch": 1.958916943832857, "grad_norm": 0.5420459478097566, "learning_rate": 5.530639702550355e-09, "loss": 0.2563, "step": 41817 }, { "epoch": 1.9589637888227855, "grad_norm": 0.6317729998780689, "learning_rate": 5.5180388992376325e-09, "loss": 0.2769, "step": 41818 }, { "epoch": 1.9590106338127136, "grad_norm": 0.5664563318347828, "learning_rate": 5.505452451041448e-09, "loss": 0.2874, "step": 41819 }, { "epoch": 1.959057478802642, "grad_norm": 0.6722643970223212, "learning_rate": 5.492880358034802e-09, "loss": 0.2829, "step": 41820 }, { "epoch": 1.9591043237925705, "grad_norm": 0.6011021315077705, "learning_rate": 5.480322620289302e-09, "loss": 0.2718, "step": 41821 }, { "epoch": 1.9591511687824987, "grad_norm": 0.6172064397335226, "learning_rate": 5.467779237877946e-09, "loss": 0.247, "step": 41822 }, { "epoch": 1.959198013772427, "grad_norm": 0.5882920718319818, "learning_rate": 5.455250210872342e-09, "loss": 0.2467, "step": 41823 }, { "epoch": 1.9592448587623554, "grad_norm": 0.6416213280277356, "learning_rate": 5.442735539344657e-09, "loss": 0.2765, "step": 41824 }, { "epoch": 1.9592917037522837, "grad_norm": 0.6010461174160621, "learning_rate": 5.430235223367053e-09, "loss": 0.2681, "step": 41825 }, { "epoch": 1.959338548742212, "grad_norm": 0.6049691857439614, "learning_rate": 5.417749263011695e-09, "loss": 0.2901, "step": 41826 }, { "epoch": 1.9593853937321404, "grad_norm": 0.5971956173245078, "learning_rate": 5.405277658350194e-09, "loss": 0.2753, "step": 41827 }, { "epoch": 1.9594322387220686, "grad_norm": 0.5720866877881375, "learning_rate": 5.392820409454158e-09, "loss": 0.2845, "step": 41828 }, { "epoch": 1.9594790837119969, "grad_norm": 0.5978811032589961, "learning_rate": 5.380377516395474e-09, "loss": 0.2669, "step": 41829 }, { "epoch": 1.9595259287019253, "grad_norm": 0.6699133812626537, "learning_rate": 5.367948979245751e-09, "loss": 0.2551, "step": 41830 }, { "epoch": 1.9595727736918538, "grad_norm": 0.586096919981953, "learning_rate": 5.355534798076601e-09, "loss": 0.2849, "step": 41831 }, { "epoch": 1.959619618681782, "grad_norm": 0.5802329575442559, "learning_rate": 5.343134972959074e-09, "loss": 0.2492, "step": 41832 }, { "epoch": 1.9596664636717103, "grad_norm": 0.5974226364613119, "learning_rate": 5.33074950396506e-09, "loss": 0.2701, "step": 41833 }, { "epoch": 1.9597133086616387, "grad_norm": 0.56016418552872, "learning_rate": 5.318378391165335e-09, "loss": 0.2643, "step": 41834 }, { "epoch": 1.959760153651567, "grad_norm": 0.5788371460052338, "learning_rate": 5.306021634631509e-09, "loss": 0.2609, "step": 41835 }, { "epoch": 1.9598069986414952, "grad_norm": 0.6036993657657459, "learning_rate": 5.293679234434357e-09, "loss": 0.2706, "step": 41836 }, { "epoch": 1.9598538436314237, "grad_norm": 0.5595236690637801, "learning_rate": 5.281351190645212e-09, "loss": 0.2725, "step": 41837 }, { "epoch": 1.959900688621352, "grad_norm": 0.577574003811645, "learning_rate": 5.269037503334851e-09, "loss": 0.2692, "step": 41838 }, { "epoch": 1.9599475336112802, "grad_norm": 0.6032360791110892, "learning_rate": 5.256738172574327e-09, "loss": 0.2725, "step": 41839 }, { "epoch": 1.9599943786012086, "grad_norm": 0.6921647402465563, "learning_rate": 5.244453198433863e-09, "loss": 0.2781, "step": 41840 }, { "epoch": 1.960041223591137, "grad_norm": 0.6158394343214084, "learning_rate": 5.232182580984791e-09, "loss": 0.2718, "step": 41841 }, { "epoch": 1.960088068581065, "grad_norm": 0.5699957302051588, "learning_rate": 5.219926320297331e-09, "loss": 0.2685, "step": 41842 }, { "epoch": 1.9601349135709936, "grad_norm": 0.6411916951250018, "learning_rate": 5.20768441644226e-09, "loss": 0.292, "step": 41843 }, { "epoch": 1.960181758560922, "grad_norm": 0.5909094417774412, "learning_rate": 5.195456869489801e-09, "loss": 0.2466, "step": 41844 }, { "epoch": 1.9602286035508503, "grad_norm": 0.6364628918222331, "learning_rate": 5.1832436795104524e-09, "loss": 0.2845, "step": 41845 }, { "epoch": 1.9602754485407785, "grad_norm": 0.5826381914331117, "learning_rate": 5.171044846574713e-09, "loss": 0.2632, "step": 41846 }, { "epoch": 1.960322293530707, "grad_norm": 0.6134846610299685, "learning_rate": 5.15886037075225e-09, "loss": 0.2747, "step": 41847 }, { "epoch": 1.9603691385206352, "grad_norm": 0.5584240928091895, "learning_rate": 5.146690252113562e-09, "loss": 0.2565, "step": 41848 }, { "epoch": 1.9604159835105635, "grad_norm": 0.6238603182854491, "learning_rate": 5.134534490728871e-09, "loss": 0.2791, "step": 41849 }, { "epoch": 1.960462828500492, "grad_norm": 0.5933469131406883, "learning_rate": 5.122393086667566e-09, "loss": 0.2567, "step": 41850 }, { "epoch": 1.9605096734904202, "grad_norm": 0.6169966411282629, "learning_rate": 5.1102660400001445e-09, "loss": 0.2656, "step": 41851 }, { "epoch": 1.9605565184803484, "grad_norm": 0.6527343586832198, "learning_rate": 5.09815335079572e-09, "loss": 0.2641, "step": 41852 }, { "epoch": 1.9606033634702769, "grad_norm": 0.6024101761080789, "learning_rate": 5.0860550191245135e-09, "loss": 0.2662, "step": 41853 }, { "epoch": 1.9606502084602053, "grad_norm": 0.6400243257132644, "learning_rate": 5.0739710450559145e-09, "loss": 0.2824, "step": 41854 }, { "epoch": 1.9606970534501333, "grad_norm": 0.6409025947739582, "learning_rate": 5.061901428659588e-09, "loss": 0.2815, "step": 41855 }, { "epoch": 1.9607438984400618, "grad_norm": 0.5801004786016476, "learning_rate": 5.049846170005201e-09, "loss": 0.2591, "step": 41856 }, { "epoch": 1.9607907434299903, "grad_norm": 0.581562406859693, "learning_rate": 5.0378052691615886e-09, "loss": 0.2513, "step": 41857 }, { "epoch": 1.9608375884199185, "grad_norm": 0.5827500357266905, "learning_rate": 5.025778726198416e-09, "loss": 0.2651, "step": 41858 }, { "epoch": 1.9608844334098467, "grad_norm": 0.6016252966263486, "learning_rate": 5.013766541184794e-09, "loss": 0.2573, "step": 41859 }, { "epoch": 1.9609312783997752, "grad_norm": 0.5838799190128496, "learning_rate": 5.0017687141895586e-09, "loss": 0.2647, "step": 41860 }, { "epoch": 1.9609781233897035, "grad_norm": 0.5496805026794067, "learning_rate": 4.9897852452823744e-09, "loss": 0.2552, "step": 41861 }, { "epoch": 1.9610249683796317, "grad_norm": 0.6020106359022516, "learning_rate": 4.977816134531799e-09, "loss": 0.2555, "step": 41862 }, { "epoch": 1.9610718133695602, "grad_norm": 0.6247950460814347, "learning_rate": 4.965861382006942e-09, "loss": 0.276, "step": 41863 }, { "epoch": 1.9611186583594884, "grad_norm": 0.5779173412985561, "learning_rate": 4.953920987776084e-09, "loss": 0.2692, "step": 41864 }, { "epoch": 1.9611655033494166, "grad_norm": 0.6121773129283088, "learning_rate": 4.941994951908613e-09, "loss": 0.2711, "step": 41865 }, { "epoch": 1.961212348339345, "grad_norm": 0.6407252536044664, "learning_rate": 4.930083274472808e-09, "loss": 0.2881, "step": 41866 }, { "epoch": 1.9612591933292736, "grad_norm": 0.6008428989672326, "learning_rate": 4.918185955537224e-09, "loss": 0.2692, "step": 41867 }, { "epoch": 1.9613060383192018, "grad_norm": 0.6297803546451645, "learning_rate": 4.906302995170142e-09, "loss": 0.2919, "step": 41868 }, { "epoch": 1.96135288330913, "grad_norm": 0.5734157889550828, "learning_rate": 4.894434393440672e-09, "loss": 0.2442, "step": 41869 }, { "epoch": 1.9613997282990585, "grad_norm": 0.6112656366518464, "learning_rate": 4.88258015041626e-09, "loss": 0.2599, "step": 41870 }, { "epoch": 1.9614465732889867, "grad_norm": 0.6141064426709696, "learning_rate": 4.8707402661654635e-09, "loss": 0.2456, "step": 41871 }, { "epoch": 1.961493418278915, "grad_norm": 0.5793390355004112, "learning_rate": 4.858914740756559e-09, "loss": 0.2568, "step": 41872 }, { "epoch": 1.9615402632688435, "grad_norm": 0.640252844393563, "learning_rate": 4.847103574257273e-09, "loss": 0.2765, "step": 41873 }, { "epoch": 1.9615871082587717, "grad_norm": 0.5981696512394569, "learning_rate": 4.835306766735881e-09, "loss": 0.2764, "step": 41874 }, { "epoch": 1.9616339532487, "grad_norm": 0.6196741820489386, "learning_rate": 4.823524318260386e-09, "loss": 0.2749, "step": 41875 }, { "epoch": 1.9616807982386284, "grad_norm": 0.5779233782624318, "learning_rate": 4.811756228897957e-09, "loss": 0.2603, "step": 41876 }, { "epoch": 1.9617276432285569, "grad_norm": 0.5706069219212236, "learning_rate": 4.800002498717149e-09, "loss": 0.2729, "step": 41877 }, { "epoch": 1.9617744882184849, "grad_norm": 0.6184044176504899, "learning_rate": 4.788263127784854e-09, "loss": 0.2891, "step": 41878 }, { "epoch": 1.9618213332084133, "grad_norm": 0.6038230284538936, "learning_rate": 4.776538116169072e-09, "loss": 0.2824, "step": 41879 }, { "epoch": 1.9618681781983418, "grad_norm": 0.6015322009976147, "learning_rate": 4.764827463937249e-09, "loss": 0.2491, "step": 41880 }, { "epoch": 1.96191502318827, "grad_norm": 0.5777967608290737, "learning_rate": 4.753131171156555e-09, "loss": 0.2699, "step": 41881 }, { "epoch": 1.9619618681781983, "grad_norm": 0.6732495350553276, "learning_rate": 4.741449237894435e-09, "loss": 0.3006, "step": 41882 }, { "epoch": 1.9620087131681268, "grad_norm": 0.6555775931534454, "learning_rate": 4.7297816642183355e-09, "loss": 0.2726, "step": 41883 }, { "epoch": 1.962055558158055, "grad_norm": 0.5773565734782425, "learning_rate": 4.71812845019487e-09, "loss": 0.2603, "step": 41884 }, { "epoch": 1.9621024031479832, "grad_norm": 0.6029676075239646, "learning_rate": 4.706489595891484e-09, "loss": 0.2765, "step": 41885 }, { "epoch": 1.9621492481379117, "grad_norm": 0.6599494478897501, "learning_rate": 4.694865101375346e-09, "loss": 0.3115, "step": 41886 }, { "epoch": 1.96219609312784, "grad_norm": 0.5901301897613541, "learning_rate": 4.683254966712791e-09, "loss": 0.2753, "step": 41887 }, { "epoch": 1.9622429381177682, "grad_norm": 0.609483261661625, "learning_rate": 4.671659191971268e-09, "loss": 0.2713, "step": 41888 }, { "epoch": 1.9622897831076966, "grad_norm": 0.5795190993579697, "learning_rate": 4.660077777216831e-09, "loss": 0.2749, "step": 41889 }, { "epoch": 1.962336628097625, "grad_norm": 0.602328631817799, "learning_rate": 4.648510722516375e-09, "loss": 0.2592, "step": 41890 }, { "epoch": 1.9623834730875531, "grad_norm": 0.5883528260943632, "learning_rate": 4.636958027936789e-09, "loss": 0.2809, "step": 41891 }, { "epoch": 1.9624303180774816, "grad_norm": 0.57771436413541, "learning_rate": 4.6254196935444085e-09, "loss": 0.2843, "step": 41892 }, { "epoch": 1.96247716306741, "grad_norm": 0.5836003152137075, "learning_rate": 4.613895719405292e-09, "loss": 0.2589, "step": 41893 }, { "epoch": 1.9625240080573383, "grad_norm": 0.556758442791052, "learning_rate": 4.6023861055863315e-09, "loss": 0.2592, "step": 41894 }, { "epoch": 1.9625708530472665, "grad_norm": 0.5673793706510112, "learning_rate": 4.590890852153307e-09, "loss": 0.2544, "step": 41895 }, { "epoch": 1.962617698037195, "grad_norm": 0.62346027286744, "learning_rate": 4.579409959172277e-09, "loss": 0.286, "step": 41896 }, { "epoch": 1.9626645430271232, "grad_norm": 0.6135928122580245, "learning_rate": 4.567943426709853e-09, "loss": 0.274, "step": 41897 }, { "epoch": 1.9627113880170515, "grad_norm": 0.5817505909844962, "learning_rate": 4.556491254831541e-09, "loss": 0.2406, "step": 41898 }, { "epoch": 1.96275823300698, "grad_norm": 0.6033368834238955, "learning_rate": 4.5450534436033974e-09, "loss": 0.2673, "step": 41899 }, { "epoch": 1.9628050779969082, "grad_norm": 0.5715386139496674, "learning_rate": 4.533629993091482e-09, "loss": 0.2643, "step": 41900 }, { "epoch": 1.9628519229868364, "grad_norm": 0.6004458856674961, "learning_rate": 4.5222209033610185e-09, "loss": 0.2786, "step": 41901 }, { "epoch": 1.9628987679767649, "grad_norm": 0.634342100847371, "learning_rate": 4.510826174478067e-09, "loss": 0.2697, "step": 41902 }, { "epoch": 1.9629456129666933, "grad_norm": 0.6240445131459766, "learning_rate": 4.499445806508129e-09, "loss": 0.2822, "step": 41903 }, { "epoch": 1.9629924579566216, "grad_norm": 0.6809054988697267, "learning_rate": 4.488079799516709e-09, "loss": 0.286, "step": 41904 }, { "epoch": 1.9630393029465498, "grad_norm": 0.582126964488681, "learning_rate": 4.476728153569032e-09, "loss": 0.2566, "step": 41905 }, { "epoch": 1.9630861479364783, "grad_norm": 0.5905232591368151, "learning_rate": 4.465390868730879e-09, "loss": 0.2758, "step": 41906 }, { "epoch": 1.9631329929264065, "grad_norm": 0.5706904626697967, "learning_rate": 4.45406794506692e-09, "loss": 0.2509, "step": 41907 }, { "epoch": 1.9631798379163348, "grad_norm": 0.6396683186752982, "learning_rate": 4.442759382642381e-09, "loss": 0.2742, "step": 41908 }, { "epoch": 1.9632266829062632, "grad_norm": 0.5692754692337609, "learning_rate": 4.431465181522765e-09, "loss": 0.2513, "step": 41909 }, { "epoch": 1.9632735278961915, "grad_norm": 0.5733834195533076, "learning_rate": 4.420185341772743e-09, "loss": 0.2635, "step": 41910 }, { "epoch": 1.9633203728861197, "grad_norm": 0.6157515677138932, "learning_rate": 4.40891986345754e-09, "loss": 0.2639, "step": 41911 }, { "epoch": 1.9633672178760482, "grad_norm": 0.599993365060881, "learning_rate": 4.397668746641548e-09, "loss": 0.2651, "step": 41912 }, { "epoch": 1.9634140628659766, "grad_norm": 0.5634543625652728, "learning_rate": 4.3864319913897165e-09, "loss": 0.2731, "step": 41913 }, { "epoch": 1.9634609078559047, "grad_norm": 0.5776896507380489, "learning_rate": 4.375209597766717e-09, "loss": 0.2674, "step": 41914 }, { "epoch": 1.9635077528458331, "grad_norm": 0.6193059296945759, "learning_rate": 4.364001565837217e-09, "loss": 0.281, "step": 41915 }, { "epoch": 1.9635545978357616, "grad_norm": 0.5958806994595404, "learning_rate": 4.352807895665612e-09, "loss": 0.2869, "step": 41916 }, { "epoch": 1.9636014428256898, "grad_norm": 0.5897632876591274, "learning_rate": 4.341628587316294e-09, "loss": 0.2595, "step": 41917 }, { "epoch": 1.963648287815618, "grad_norm": 0.6134786432127354, "learning_rate": 4.330463640853655e-09, "loss": 0.2791, "step": 41918 }, { "epoch": 1.9636951328055465, "grad_norm": 0.641468513926397, "learning_rate": 4.319313056342089e-09, "loss": 0.2753, "step": 41919 }, { "epoch": 1.9637419777954748, "grad_norm": 0.611077216721197, "learning_rate": 4.3081768338454345e-09, "loss": 0.2759, "step": 41920 }, { "epoch": 1.963788822785403, "grad_norm": 0.5835842025471722, "learning_rate": 4.297054973428083e-09, "loss": 0.2684, "step": 41921 }, { "epoch": 1.9638356677753315, "grad_norm": 0.5587974795305005, "learning_rate": 4.2859474751535955e-09, "loss": 0.2654, "step": 41922 }, { "epoch": 1.9638825127652597, "grad_norm": 0.5752448689608631, "learning_rate": 4.274854339086642e-09, "loss": 0.2542, "step": 41923 }, { "epoch": 1.963929357755188, "grad_norm": 0.6009457759578303, "learning_rate": 4.263775565290507e-09, "loss": 0.2842, "step": 41924 }, { "epoch": 1.9639762027451164, "grad_norm": 0.6320890452680212, "learning_rate": 4.252711153828748e-09, "loss": 0.2826, "step": 41925 }, { "epoch": 1.9640230477350449, "grad_norm": 0.6075082186434769, "learning_rate": 4.241661104765759e-09, "loss": 0.2827, "step": 41926 }, { "epoch": 1.964069892724973, "grad_norm": 0.6162486025240477, "learning_rate": 4.230625418164269e-09, "loss": 0.2808, "step": 41927 }, { "epoch": 1.9641167377149014, "grad_norm": 0.5864456077726159, "learning_rate": 4.21960409408867e-09, "loss": 0.2798, "step": 41928 }, { "epoch": 1.9641635827048298, "grad_norm": 0.5383438474276424, "learning_rate": 4.208597132601688e-09, "loss": 0.2478, "step": 41929 }, { "epoch": 1.964210427694758, "grad_norm": 0.6407610847872478, "learning_rate": 4.197604533767163e-09, "loss": 0.2726, "step": 41930 }, { "epoch": 1.9642572726846863, "grad_norm": 0.6401270475437502, "learning_rate": 4.186626297648099e-09, "loss": 0.2834, "step": 41931 }, { "epoch": 1.9643041176746148, "grad_norm": 0.5646877882150211, "learning_rate": 4.1756624243075026e-09, "loss": 0.2554, "step": 41932 }, { "epoch": 1.964350962664543, "grad_norm": 0.5856605742260341, "learning_rate": 4.164712913808932e-09, "loss": 0.2827, "step": 41933 }, { "epoch": 1.9643978076544713, "grad_norm": 0.6471460028019387, "learning_rate": 4.153777766214839e-09, "loss": 0.2962, "step": 41934 }, { "epoch": 1.9644446526443997, "grad_norm": 0.604114831153639, "learning_rate": 4.142856981588783e-09, "loss": 0.2687, "step": 41935 }, { "epoch": 1.964491497634328, "grad_norm": 0.6619430956055355, "learning_rate": 4.131950559992937e-09, "loss": 0.2847, "step": 41936 }, { "epoch": 1.9645383426242562, "grad_norm": 0.6175916360260241, "learning_rate": 4.1210585014905825e-09, "loss": 0.2797, "step": 41937 }, { "epoch": 1.9645851876141847, "grad_norm": 0.6107820295286296, "learning_rate": 4.110180806144171e-09, "loss": 0.2785, "step": 41938 }, { "epoch": 1.9646320326041131, "grad_norm": 0.6062484542052511, "learning_rate": 4.099317474016428e-09, "loss": 0.2781, "step": 41939 }, { "epoch": 1.9646788775940414, "grad_norm": 0.5959960012685844, "learning_rate": 4.088468505169807e-09, "loss": 0.2617, "step": 41940 }, { "epoch": 1.9647257225839696, "grad_norm": 0.6309615807582766, "learning_rate": 4.0776338996664776e-09, "loss": 0.2839, "step": 41941 }, { "epoch": 1.964772567573898, "grad_norm": 0.6071128222438477, "learning_rate": 4.0668136575691686e-09, "loss": 0.2767, "step": 41942 }, { "epoch": 1.9648194125638263, "grad_norm": 0.5646396096451876, "learning_rate": 4.05600777894033e-09, "loss": 0.2546, "step": 41943 }, { "epoch": 1.9648662575537545, "grad_norm": 0.6012613788528397, "learning_rate": 4.045216263841578e-09, "loss": 0.2635, "step": 41944 }, { "epoch": 1.964913102543683, "grad_norm": 0.5835585556419186, "learning_rate": 4.034439112335087e-09, "loss": 0.2669, "step": 41945 }, { "epoch": 1.9649599475336113, "grad_norm": 0.5581111365755053, "learning_rate": 4.023676324483306e-09, "loss": 0.2529, "step": 41946 }, { "epoch": 1.9650067925235395, "grad_norm": 0.547840941963122, "learning_rate": 4.012927900347851e-09, "loss": 0.2431, "step": 41947 }, { "epoch": 1.965053637513468, "grad_norm": 0.6185984553060285, "learning_rate": 4.0021938399906206e-09, "loss": 0.2587, "step": 41948 }, { "epoch": 1.9651004825033964, "grad_norm": 0.6001644210659665, "learning_rate": 3.991474143473506e-09, "loss": 0.2647, "step": 41949 }, { "epoch": 1.9651473274933244, "grad_norm": 0.5985420877526901, "learning_rate": 3.980768810858126e-09, "loss": 0.2729, "step": 41950 }, { "epoch": 1.965194172483253, "grad_norm": 0.5652480109953385, "learning_rate": 3.970077842205821e-09, "loss": 0.2566, "step": 41951 }, { "epoch": 1.9652410174731814, "grad_norm": 0.5628730275903615, "learning_rate": 3.959401237578764e-09, "loss": 0.2633, "step": 41952 }, { "epoch": 1.9652878624631096, "grad_norm": 0.5785760533234816, "learning_rate": 3.948738997037738e-09, "loss": 0.2669, "step": 41953 }, { "epoch": 1.9653347074530378, "grad_norm": 0.6056965712211806, "learning_rate": 3.9380911206443605e-09, "loss": 0.2737, "step": 41954 }, { "epoch": 1.9653815524429663, "grad_norm": 0.6012328729773238, "learning_rate": 3.927457608459973e-09, "loss": 0.2696, "step": 41955 }, { "epoch": 1.9654283974328945, "grad_norm": 0.6436996950247326, "learning_rate": 3.916838460545636e-09, "loss": 0.2822, "step": 41956 }, { "epoch": 1.9654752424228228, "grad_norm": 0.6186304504257653, "learning_rate": 3.9062336769626895e-09, "loss": 0.2763, "step": 41957 }, { "epoch": 1.9655220874127513, "grad_norm": 0.545768393317864, "learning_rate": 3.8956432577719196e-09, "loss": 0.2487, "step": 41958 }, { "epoch": 1.9655689324026795, "grad_norm": 0.6623951811544592, "learning_rate": 3.88506720303411e-09, "loss": 0.2844, "step": 41959 }, { "epoch": 1.9656157773926077, "grad_norm": 0.6089603692387087, "learning_rate": 3.8745055128106e-09, "loss": 0.2638, "step": 41960 }, { "epoch": 1.9656626223825362, "grad_norm": 0.5972938012899084, "learning_rate": 3.863958187161898e-09, "loss": 0.271, "step": 41961 }, { "epoch": 1.9657094673724647, "grad_norm": 0.604461240120759, "learning_rate": 3.853425226148788e-09, "loss": 0.2704, "step": 41962 }, { "epoch": 1.9657563123623927, "grad_norm": 0.5724075096906773, "learning_rate": 3.8429066298317775e-09, "loss": 0.2646, "step": 41963 }, { "epoch": 1.9658031573523211, "grad_norm": 0.6017580465041723, "learning_rate": 3.832402398271373e-09, "loss": 0.2638, "step": 41964 }, { "epoch": 1.9658500023422496, "grad_norm": 0.597821116557017, "learning_rate": 3.82191253152836e-09, "loss": 0.279, "step": 41965 }, { "epoch": 1.9658968473321778, "grad_norm": 0.6044086125133608, "learning_rate": 3.81143702966269e-09, "loss": 0.269, "step": 41966 }, { "epoch": 1.965943692322106, "grad_norm": 0.6139776933837267, "learning_rate": 3.800975892734593e-09, "loss": 0.2806, "step": 41967 }, { "epoch": 1.9659905373120345, "grad_norm": 0.6353975096876284, "learning_rate": 3.790529120804853e-09, "loss": 0.2839, "step": 41968 }, { "epoch": 1.9660373823019628, "grad_norm": 0.5965408395952793, "learning_rate": 3.780096713932868e-09, "loss": 0.276, "step": 41969 }, { "epoch": 1.966084227291891, "grad_norm": 0.6172331631865654, "learning_rate": 3.769678672179422e-09, "loss": 0.282, "step": 41970 }, { "epoch": 1.9661310722818195, "grad_norm": 0.548802577730136, "learning_rate": 3.759274995603912e-09, "loss": 0.2544, "step": 41971 }, { "epoch": 1.9661779172717477, "grad_norm": 0.6050923676450917, "learning_rate": 3.74888568426629e-09, "loss": 0.2661, "step": 41972 }, { "epoch": 1.966224762261676, "grad_norm": 0.6054189010305854, "learning_rate": 3.738510738226508e-09, "loss": 0.2807, "step": 41973 }, { "epoch": 1.9662716072516044, "grad_norm": 0.5861884891453412, "learning_rate": 3.7281501575439635e-09, "loss": 0.2627, "step": 41974 }, { "epoch": 1.966318452241533, "grad_norm": 0.5938723879689226, "learning_rate": 3.7178039422786083e-09, "loss": 0.263, "step": 41975 }, { "epoch": 1.9663652972314611, "grad_norm": 0.5783516780524065, "learning_rate": 3.7074720924898387e-09, "loss": 0.2671, "step": 41976 }, { "epoch": 1.9664121422213894, "grad_norm": 0.6228839051704717, "learning_rate": 3.69715460823733e-09, "loss": 0.2633, "step": 41977 }, { "epoch": 1.9664589872113178, "grad_norm": 0.5601629875281875, "learning_rate": 3.686851489580201e-09, "loss": 0.2797, "step": 41978 }, { "epoch": 1.966505832201246, "grad_norm": 0.5950755820081316, "learning_rate": 3.676562736577849e-09, "loss": 0.2558, "step": 41979 }, { "epoch": 1.9665526771911743, "grad_norm": 0.5903524466293474, "learning_rate": 3.666288349289393e-09, "loss": 0.2809, "step": 41980 }, { "epoch": 1.9665995221811028, "grad_norm": 0.6099248154170994, "learning_rate": 3.6560283277739528e-09, "loss": 0.2594, "step": 41981 }, { "epoch": 1.966646367171031, "grad_norm": 0.5967263893277068, "learning_rate": 3.6457826720906476e-09, "loss": 0.266, "step": 41982 }, { "epoch": 1.9666932121609593, "grad_norm": 0.6039145674933226, "learning_rate": 3.6355513822985966e-09, "loss": 0.2787, "step": 41983 }, { "epoch": 1.9667400571508877, "grad_norm": 0.6161413523231543, "learning_rate": 3.6253344584563643e-09, "loss": 0.2641, "step": 41984 }, { "epoch": 1.9667869021408162, "grad_norm": 0.5911621035011715, "learning_rate": 3.61513190062307e-09, "loss": 0.265, "step": 41985 }, { "epoch": 1.9668337471307442, "grad_norm": 0.5857755231882917, "learning_rate": 3.6049437088570005e-09, "loss": 0.268, "step": 41986 }, { "epoch": 1.9668805921206727, "grad_norm": 0.6062729328539752, "learning_rate": 3.594769883217275e-09, "loss": 0.2794, "step": 41987 }, { "epoch": 1.9669274371106011, "grad_norm": 0.6063773409254256, "learning_rate": 3.5846104237619028e-09, "loss": 0.2714, "step": 41988 }, { "epoch": 1.9669742821005294, "grad_norm": 0.6057229838259744, "learning_rate": 3.574465330550003e-09, "loss": 0.2806, "step": 41989 }, { "epoch": 1.9670211270904576, "grad_norm": 0.5387182387044085, "learning_rate": 3.5643346036393077e-09, "loss": 0.2533, "step": 41990 }, { "epoch": 1.967067972080386, "grad_norm": 0.6372243601990134, "learning_rate": 3.5542182430886587e-09, "loss": 0.2837, "step": 41991 }, { "epoch": 1.9671148170703143, "grad_norm": 0.601970758564685, "learning_rate": 3.5441162489557866e-09, "loss": 0.2759, "step": 41992 }, { "epoch": 1.9671616620602426, "grad_norm": 0.6067237672697086, "learning_rate": 3.534028621299257e-09, "loss": 0.2791, "step": 41993 }, { "epoch": 1.967208507050171, "grad_norm": 0.5833848666142751, "learning_rate": 3.5239553601768007e-09, "loss": 0.2612, "step": 41994 }, { "epoch": 1.9672553520400993, "grad_norm": 0.5577017126597952, "learning_rate": 3.513896465646427e-09, "loss": 0.2682, "step": 41995 }, { "epoch": 1.9673021970300275, "grad_norm": 0.5625228876801592, "learning_rate": 3.503851937766145e-09, "loss": 0.2559, "step": 41996 }, { "epoch": 1.967349042019956, "grad_norm": 0.5802158548168642, "learning_rate": 3.493821776593964e-09, "loss": 0.2475, "step": 41997 }, { "epoch": 1.9673958870098844, "grad_norm": 0.6412700359004158, "learning_rate": 3.4838059821870606e-09, "loss": 0.2805, "step": 41998 }, { "epoch": 1.9674427319998125, "grad_norm": 0.5919641363859758, "learning_rate": 3.473804554603721e-09, "loss": 0.2664, "step": 41999 }, { "epoch": 1.967489576989741, "grad_norm": 0.5777543560257997, "learning_rate": 3.463817493900845e-09, "loss": 0.2708, "step": 42000 }, { "epoch": 1.9675364219796694, "grad_norm": 0.5825612902715172, "learning_rate": 3.453844800136441e-09, "loss": 0.2751, "step": 42001 }, { "epoch": 1.9675832669695976, "grad_norm": 0.5980137420205017, "learning_rate": 3.4438864733674084e-09, "loss": 0.2693, "step": 42002 }, { "epoch": 1.9676301119595259, "grad_norm": 0.6321081032760596, "learning_rate": 3.4339425136517555e-09, "loss": 0.276, "step": 42003 }, { "epoch": 1.9676769569494543, "grad_norm": 0.6282145092032856, "learning_rate": 3.4240129210458274e-09, "loss": 0.2823, "step": 42004 }, { "epoch": 1.9677238019393826, "grad_norm": 0.5826648168258162, "learning_rate": 3.414097695607632e-09, "loss": 0.2452, "step": 42005 }, { "epoch": 1.9677706469293108, "grad_norm": 0.580865268090276, "learning_rate": 3.404196837393514e-09, "loss": 0.2705, "step": 42006 }, { "epoch": 1.9678174919192393, "grad_norm": 0.6298592329667682, "learning_rate": 3.39431034646065e-09, "loss": 0.2749, "step": 42007 }, { "epoch": 1.9678643369091675, "grad_norm": 0.5809718451992779, "learning_rate": 3.3844382228662154e-09, "loss": 0.2596, "step": 42008 }, { "epoch": 1.9679111818990958, "grad_norm": 0.6149766667653525, "learning_rate": 3.3745804666668327e-09, "loss": 0.2767, "step": 42009 }, { "epoch": 1.9679580268890242, "grad_norm": 0.6143587971286817, "learning_rate": 3.3647370779194e-09, "loss": 0.2741, "step": 42010 }, { "epoch": 1.9680048718789527, "grad_norm": 0.591764977099131, "learning_rate": 3.354908056679984e-09, "loss": 0.2663, "step": 42011 }, { "epoch": 1.968051716868881, "grad_norm": 0.5995980863879622, "learning_rate": 3.3450934030060388e-09, "loss": 0.2744, "step": 42012 }, { "epoch": 1.9680985618588092, "grad_norm": 0.6107522291629454, "learning_rate": 3.3352931169530755e-09, "loss": 0.2749, "step": 42013 }, { "epoch": 1.9681454068487376, "grad_norm": 0.6346665867013417, "learning_rate": 3.3255071985782705e-09, "loss": 0.2801, "step": 42014 }, { "epoch": 1.9681922518386659, "grad_norm": 0.6360616587145608, "learning_rate": 3.31573564793769e-09, "loss": 0.2795, "step": 42015 }, { "epoch": 1.968239096828594, "grad_norm": 0.5972742027011401, "learning_rate": 3.3059784650874006e-09, "loss": 0.2684, "step": 42016 }, { "epoch": 1.9682859418185226, "grad_norm": 0.6119120749670005, "learning_rate": 3.2962356500837455e-09, "loss": 0.2782, "step": 42017 }, { "epoch": 1.9683327868084508, "grad_norm": 0.6062640382368761, "learning_rate": 3.2865072029827918e-09, "loss": 0.2693, "step": 42018 }, { "epoch": 1.968379631798379, "grad_norm": 0.652717269751401, "learning_rate": 3.2767931238406046e-09, "loss": 0.2646, "step": 42019 }, { "epoch": 1.9684264767883075, "grad_norm": 0.604997832805181, "learning_rate": 3.2670934127126965e-09, "loss": 0.2637, "step": 42020 }, { "epoch": 1.968473321778236, "grad_norm": 0.603542826697499, "learning_rate": 3.2574080696551324e-09, "loss": 0.2839, "step": 42021 }, { "epoch": 1.968520166768164, "grad_norm": 0.6142591340788958, "learning_rate": 3.247737094723702e-09, "loss": 0.2792, "step": 42022 }, { "epoch": 1.9685670117580925, "grad_norm": 0.6361210592043814, "learning_rate": 3.2380804879739158e-09, "loss": 0.2819, "step": 42023 }, { "epoch": 1.968613856748021, "grad_norm": 0.6069255865801485, "learning_rate": 3.228438249461563e-09, "loss": 0.2899, "step": 42024 }, { "epoch": 1.9686607017379492, "grad_norm": 0.5926950562575272, "learning_rate": 3.2188103792421545e-09, "loss": 0.266, "step": 42025 }, { "epoch": 1.9687075467278774, "grad_norm": 0.5945535411910158, "learning_rate": 3.209196877370646e-09, "loss": 0.2871, "step": 42026 }, { "epoch": 1.9687543917178059, "grad_norm": 0.582857619814824, "learning_rate": 3.199597743902827e-09, "loss": 0.2756, "step": 42027 }, { "epoch": 1.968801236707734, "grad_norm": 0.5899090412607172, "learning_rate": 3.1900129788939303e-09, "loss": 0.2794, "step": 42028 }, { "epoch": 1.9688480816976623, "grad_norm": 0.6409478742391544, "learning_rate": 3.1804425823986353e-09, "loss": 0.2879, "step": 42029 }, { "epoch": 1.9688949266875908, "grad_norm": 0.6287978762788208, "learning_rate": 3.170886554472452e-09, "loss": 0.2687, "step": 42030 }, { "epoch": 1.968941771677519, "grad_norm": 0.6099895650528823, "learning_rate": 3.1613448951703373e-09, "loss": 0.2548, "step": 42031 }, { "epoch": 1.9689886166674473, "grad_norm": 0.5748250400539697, "learning_rate": 3.1518176045469697e-09, "loss": 0.2605, "step": 42032 }, { "epoch": 1.9690354616573758, "grad_norm": 0.5756166869818594, "learning_rate": 3.142304682657582e-09, "loss": 0.2734, "step": 42033 }, { "epoch": 1.9690823066473042, "grad_norm": 0.6259607681739592, "learning_rate": 3.1328061295565758e-09, "loss": 0.2727, "step": 42034 }, { "epoch": 1.9691291516372322, "grad_norm": 0.637291775864714, "learning_rate": 3.1233219452986298e-09, "loss": 0.2702, "step": 42035 }, { "epoch": 1.9691759966271607, "grad_norm": 0.6204061187452561, "learning_rate": 3.1138521299384215e-09, "loss": 0.2656, "step": 42036 }, { "epoch": 1.9692228416170892, "grad_norm": 0.6538022392908641, "learning_rate": 3.104396683530353e-09, "loss": 0.2953, "step": 42037 }, { "epoch": 1.9692696866070174, "grad_norm": 0.5596186216065996, "learning_rate": 3.094955606129102e-09, "loss": 0.2502, "step": 42038 }, { "epoch": 1.9693165315969456, "grad_norm": 0.614471840940702, "learning_rate": 3.0855288977887922e-09, "loss": 0.2629, "step": 42039 }, { "epoch": 1.969363376586874, "grad_norm": 0.5733231233270036, "learning_rate": 3.0761165585635466e-09, "loss": 0.2534, "step": 42040 }, { "epoch": 1.9694102215768023, "grad_norm": 0.6561503049172805, "learning_rate": 3.0667185885077666e-09, "loss": 0.2818, "step": 42041 }, { "epoch": 1.9694570665667306, "grad_norm": 0.6117934509761414, "learning_rate": 3.0573349876752977e-09, "loss": 0.2795, "step": 42042 }, { "epoch": 1.969503911556659, "grad_norm": 0.5716897424817853, "learning_rate": 3.0479657561202635e-09, "loss": 0.2723, "step": 42043 }, { "epoch": 1.9695507565465873, "grad_norm": 0.6446215489229494, "learning_rate": 3.0386108938967872e-09, "loss": 0.2753, "step": 42044 }, { "epoch": 1.9695976015365155, "grad_norm": 0.6199377473356923, "learning_rate": 3.029270401058437e-09, "loss": 0.2746, "step": 42045 }, { "epoch": 1.969644446526444, "grad_norm": 0.5738609448184682, "learning_rate": 3.0199442776590593e-09, "loss": 0.277, "step": 42046 }, { "epoch": 1.9696912915163725, "grad_norm": 0.6678701323168468, "learning_rate": 3.010632523752499e-09, "loss": 0.2721, "step": 42047 }, { "epoch": 1.9697381365063007, "grad_norm": 0.5840556159850749, "learning_rate": 3.00133513939177e-09, "loss": 0.2796, "step": 42048 }, { "epoch": 1.969784981496229, "grad_norm": 0.603855304105125, "learning_rate": 2.9920521246309952e-09, "loss": 0.272, "step": 42049 }, { "epoch": 1.9698318264861574, "grad_norm": 0.5920190877579894, "learning_rate": 2.9827834795234655e-09, "loss": 0.2698, "step": 42050 }, { "epoch": 1.9698786714760856, "grad_norm": 0.6187267029140373, "learning_rate": 2.973529204122194e-09, "loss": 0.2769, "step": 42051 }, { "epoch": 1.9699255164660139, "grad_norm": 0.601880534633269, "learning_rate": 2.9642892984807493e-09, "loss": 0.2758, "step": 42052 }, { "epoch": 1.9699723614559423, "grad_norm": 0.6089187675284042, "learning_rate": 2.955063762652144e-09, "loss": 0.2754, "step": 42053 }, { "epoch": 1.9700192064458706, "grad_norm": 0.559909082400049, "learning_rate": 2.945852596689669e-09, "loss": 0.258, "step": 42054 }, { "epoch": 1.9700660514357988, "grad_norm": 0.5721477336483436, "learning_rate": 2.93665580064606e-09, "loss": 0.2565, "step": 42055 }, { "epoch": 1.9701128964257273, "grad_norm": 0.6198549713406272, "learning_rate": 2.9274733745746078e-09, "loss": 0.2665, "step": 42056 }, { "epoch": 1.9701597414156558, "grad_norm": 0.6344638922245229, "learning_rate": 2.9183053185274922e-09, "loss": 0.2696, "step": 42057 }, { "epoch": 1.9702065864055838, "grad_norm": 0.6000843924845508, "learning_rate": 2.9091516325582823e-09, "loss": 0.2604, "step": 42058 }, { "epoch": 1.9702534313955122, "grad_norm": 0.5628502008390529, "learning_rate": 2.9000123167191587e-09, "loss": 0.2548, "step": 42059 }, { "epoch": 1.9703002763854407, "grad_norm": 0.5736911491782956, "learning_rate": 2.890887371062856e-09, "loss": 0.2594, "step": 42060 }, { "epoch": 1.970347121375369, "grad_norm": 0.5983219742414028, "learning_rate": 2.8817767956421104e-09, "loss": 0.2637, "step": 42061 }, { "epoch": 1.9703939663652972, "grad_norm": 0.5808309236322344, "learning_rate": 2.8726805905088253e-09, "loss": 0.2659, "step": 42062 }, { "epoch": 1.9704408113552256, "grad_norm": 0.6212594057132387, "learning_rate": 2.8635987557157352e-09, "loss": 0.2744, "step": 42063 }, { "epoch": 1.9704876563451539, "grad_norm": 0.6007979175658387, "learning_rate": 2.8545312913150214e-09, "loss": 0.2682, "step": 42064 }, { "epoch": 1.9705345013350821, "grad_norm": 0.6290852407623096, "learning_rate": 2.8454781973585867e-09, "loss": 0.2825, "step": 42065 }, { "epoch": 1.9705813463250106, "grad_norm": 0.5828082747246874, "learning_rate": 2.836439473899166e-09, "loss": 0.2581, "step": 42066 }, { "epoch": 1.9706281913149388, "grad_norm": 0.5596933304617702, "learning_rate": 2.8274151209881085e-09, "loss": 0.2451, "step": 42067 }, { "epoch": 1.970675036304867, "grad_norm": 0.6395210892030792, "learning_rate": 2.818405138677871e-09, "loss": 0.2812, "step": 42068 }, { "epoch": 1.9707218812947955, "grad_norm": 0.5808966319931969, "learning_rate": 2.8094095270198017e-09, "loss": 0.2556, "step": 42069 }, { "epoch": 1.970768726284724, "grad_norm": 0.6172229403824621, "learning_rate": 2.8004282860660813e-09, "loss": 0.2699, "step": 42070 }, { "epoch": 1.970815571274652, "grad_norm": 0.6240186937451162, "learning_rate": 2.791461415868335e-09, "loss": 0.2681, "step": 42071 }, { "epoch": 1.9708624162645805, "grad_norm": 0.5795483305386971, "learning_rate": 2.7825089164779107e-09, "loss": 0.2789, "step": 42072 }, { "epoch": 1.970909261254509, "grad_norm": 0.5978684693881561, "learning_rate": 2.773570787946711e-09, "loss": 0.2699, "step": 42073 }, { "epoch": 1.9709561062444372, "grad_norm": 0.5819520905031602, "learning_rate": 2.7646470303258068e-09, "loss": 0.2563, "step": 42074 }, { "epoch": 1.9710029512343654, "grad_norm": 0.5486246407101713, "learning_rate": 2.7557376436668227e-09, "loss": 0.2523, "step": 42075 }, { "epoch": 1.9710497962242939, "grad_norm": 0.6147798064216609, "learning_rate": 2.7468426280208293e-09, "loss": 0.2651, "step": 42076 }, { "epoch": 1.9710966412142221, "grad_norm": 0.5907005450416335, "learning_rate": 2.737961983438897e-09, "loss": 0.2697, "step": 42077 }, { "epoch": 1.9711434862041504, "grad_norm": 0.6399591669110729, "learning_rate": 2.729095709972651e-09, "loss": 0.2948, "step": 42078 }, { "epoch": 1.9711903311940788, "grad_norm": 0.6483499520739543, "learning_rate": 2.720243807672607e-09, "loss": 0.3051, "step": 42079 }, { "epoch": 1.971237176184007, "grad_norm": 0.6047076297612345, "learning_rate": 2.711406276589834e-09, "loss": 0.2578, "step": 42080 }, { "epoch": 1.9712840211739353, "grad_norm": 0.577165771656802, "learning_rate": 2.7025831167754037e-09, "loss": 0.269, "step": 42081 }, { "epoch": 1.9713308661638638, "grad_norm": 0.5856657415306132, "learning_rate": 2.6937743282798303e-09, "loss": 0.2609, "step": 42082 }, { "epoch": 1.9713777111537922, "grad_norm": 0.5825248764334808, "learning_rate": 2.684979911154184e-09, "loss": 0.2664, "step": 42083 }, { "epoch": 1.9714245561437205, "grad_norm": 0.6013311585778373, "learning_rate": 2.6761998654484254e-09, "loss": 0.2739, "step": 42084 }, { "epoch": 1.9714714011336487, "grad_norm": 0.5628032267628144, "learning_rate": 2.667434191213625e-09, "loss": 0.258, "step": 42085 }, { "epoch": 1.9715182461235772, "grad_norm": 0.6460151289845425, "learning_rate": 2.6586828885000192e-09, "loss": 0.265, "step": 42086 }, { "epoch": 1.9715650911135054, "grad_norm": 0.601393276924848, "learning_rate": 2.6499459573581242e-09, "loss": 0.2786, "step": 42087 }, { "epoch": 1.9716119361034337, "grad_norm": 0.5957813526235121, "learning_rate": 2.6412233978379e-09, "loss": 0.2712, "step": 42088 }, { "epoch": 1.9716587810933621, "grad_norm": 0.5673013263315752, "learning_rate": 2.632515209989861e-09, "loss": 0.2599, "step": 42089 }, { "epoch": 1.9717056260832904, "grad_norm": 0.5450560370178444, "learning_rate": 2.6238213938642453e-09, "loss": 0.2444, "step": 42090 }, { "epoch": 1.9717524710732186, "grad_norm": 0.6045950711066048, "learning_rate": 2.615141949510458e-09, "loss": 0.2709, "step": 42091 }, { "epoch": 1.971799316063147, "grad_norm": 0.5894940814069901, "learning_rate": 2.606476876979014e-09, "loss": 0.2622, "step": 42092 }, { "epoch": 1.9718461610530755, "grad_norm": 0.5559682374643375, "learning_rate": 2.5978261763195954e-09, "loss": 0.2544, "step": 42093 }, { "epoch": 1.9718930060430035, "grad_norm": 0.5987460685385262, "learning_rate": 2.5891898475818853e-09, "loss": 0.2589, "step": 42094 }, { "epoch": 1.971939851032932, "grad_norm": 0.5962680958120464, "learning_rate": 2.580567890815844e-09, "loss": 0.2777, "step": 42095 }, { "epoch": 1.9719866960228605, "grad_norm": 0.5876454000666027, "learning_rate": 2.5719603060708754e-09, "loss": 0.2588, "step": 42096 }, { "epoch": 1.9720335410127887, "grad_norm": 0.6133834100128955, "learning_rate": 2.5633670933966626e-09, "loss": 0.2677, "step": 42097 }, { "epoch": 1.972080386002717, "grad_norm": 0.5925735079729725, "learning_rate": 2.554788252842333e-09, "loss": 0.2838, "step": 42098 }, { "epoch": 1.9721272309926454, "grad_norm": 0.5873555004498437, "learning_rate": 2.5462237844575686e-09, "loss": 0.2723, "step": 42099 }, { "epoch": 1.9721740759825737, "grad_norm": 0.5845299974547853, "learning_rate": 2.5376736882917753e-09, "loss": 0.2718, "step": 42100 }, { "epoch": 1.972220920972502, "grad_norm": 0.5961865137733381, "learning_rate": 2.529137964393802e-09, "loss": 0.2616, "step": 42101 }, { "epoch": 1.9722677659624304, "grad_norm": 0.6256638092110924, "learning_rate": 2.5206166128127763e-09, "loss": 0.2628, "step": 42102 }, { "epoch": 1.9723146109523586, "grad_norm": 0.6005423784393672, "learning_rate": 2.512109633598103e-09, "loss": 0.2647, "step": 42103 }, { "epoch": 1.9723614559422868, "grad_norm": 0.5877580394073, "learning_rate": 2.5036170267986327e-09, "loss": 0.2736, "step": 42104 }, { "epoch": 1.9724083009322153, "grad_norm": 0.6157759994534139, "learning_rate": 2.4951387924626593e-09, "loss": 0.2674, "step": 42105 }, { "epoch": 1.9724551459221438, "grad_norm": 0.5496934586521288, "learning_rate": 2.486674930639865e-09, "loss": 0.252, "step": 42106 }, { "epoch": 1.9725019909120718, "grad_norm": 0.6934168188886656, "learning_rate": 2.478225441378268e-09, "loss": 0.2749, "step": 42107 }, { "epoch": 1.9725488359020003, "grad_norm": 0.5922222941309578, "learning_rate": 2.469790324726995e-09, "loss": 0.2599, "step": 42108 }, { "epoch": 1.9725956808919287, "grad_norm": 0.6119154928484786, "learning_rate": 2.461369580734063e-09, "loss": 0.281, "step": 42109 }, { "epoch": 1.972642525881857, "grad_norm": 0.6119056598384598, "learning_rate": 2.4529632094486e-09, "loss": 0.2644, "step": 42110 }, { "epoch": 1.9726893708717852, "grad_norm": 0.6424985295478016, "learning_rate": 2.444571210918345e-09, "loss": 0.2915, "step": 42111 }, { "epoch": 1.9727362158617137, "grad_norm": 0.574842838150189, "learning_rate": 2.4361935851918705e-09, "loss": 0.264, "step": 42112 }, { "epoch": 1.972783060851642, "grad_norm": 0.5931599901166827, "learning_rate": 2.4278303323174713e-09, "loss": 0.2762, "step": 42113 }, { "epoch": 1.9728299058415701, "grad_norm": 0.5757879615306577, "learning_rate": 2.4194814523428866e-09, "loss": 0.2669, "step": 42114 }, { "epoch": 1.9728767508314986, "grad_norm": 0.5913277761596293, "learning_rate": 2.4111469453166892e-09, "loss": 0.2786, "step": 42115 }, { "epoch": 1.9729235958214268, "grad_norm": 0.6006613010893745, "learning_rate": 2.4028268112866184e-09, "loss": 0.2749, "step": 42116 }, { "epoch": 1.972970440811355, "grad_norm": 0.6069239960172499, "learning_rate": 2.3945210503004134e-09, "loss": 0.2801, "step": 42117 }, { "epoch": 1.9730172858012835, "grad_norm": 0.6300931328838076, "learning_rate": 2.3862296624058144e-09, "loss": 0.2843, "step": 42118 }, { "epoch": 1.973064130791212, "grad_norm": 0.6083591110780963, "learning_rate": 2.377952647651116e-09, "loss": 0.2717, "step": 42119 }, { "epoch": 1.9731109757811403, "grad_norm": 0.6139715610846311, "learning_rate": 2.369690006083225e-09, "loss": 0.2848, "step": 42120 }, { "epoch": 1.9731578207710685, "grad_norm": 0.6211076360508531, "learning_rate": 2.3614417377501584e-09, "loss": 0.2566, "step": 42121 }, { "epoch": 1.973204665760997, "grad_norm": 0.598399429905467, "learning_rate": 2.3532078426991014e-09, "loss": 0.2658, "step": 42122 }, { "epoch": 1.9732515107509252, "grad_norm": 0.597788251770631, "learning_rate": 2.3449883209777926e-09, "loss": 0.2739, "step": 42123 }, { "epoch": 1.9732983557408534, "grad_norm": 0.5890079763033284, "learning_rate": 2.336783172633139e-09, "loss": 0.2663, "step": 42124 }, { "epoch": 1.973345200730782, "grad_norm": 0.6322330566223562, "learning_rate": 2.3285923977126033e-09, "loss": 0.2802, "step": 42125 }, { "epoch": 1.9733920457207101, "grad_norm": 0.5506146460941528, "learning_rate": 2.3204159962633697e-09, "loss": 0.2449, "step": 42126 }, { "epoch": 1.9734388907106384, "grad_norm": 0.6217721851033522, "learning_rate": 2.3122539683323454e-09, "loss": 0.2661, "step": 42127 }, { "epoch": 1.9734857357005668, "grad_norm": 0.5932943757810272, "learning_rate": 2.3041063139664366e-09, "loss": 0.2788, "step": 42128 }, { "epoch": 1.9735325806904953, "grad_norm": 0.5696938791760023, "learning_rate": 2.2959730332125506e-09, "loss": 0.2589, "step": 42129 }, { "epoch": 1.9735794256804233, "grad_norm": 0.5520581919760433, "learning_rate": 2.2878541261175944e-09, "loss": 0.2463, "step": 42130 }, { "epoch": 1.9736262706703518, "grad_norm": 0.5706824285935875, "learning_rate": 2.2797495927281977e-09, "loss": 0.2484, "step": 42131 }, { "epoch": 1.9736731156602803, "grad_norm": 0.5886827700474099, "learning_rate": 2.2716594330912667e-09, "loss": 0.2638, "step": 42132 }, { "epoch": 1.9737199606502085, "grad_norm": 0.623520238400063, "learning_rate": 2.2635836472531537e-09, "loss": 0.2681, "step": 42133 }, { "epoch": 1.9737668056401367, "grad_norm": 0.5779730426204996, "learning_rate": 2.25552223526021e-09, "loss": 0.2648, "step": 42134 }, { "epoch": 1.9738136506300652, "grad_norm": 0.6632903562652117, "learning_rate": 2.2474751971587883e-09, "loss": 0.257, "step": 42135 }, { "epoch": 1.9738604956199934, "grad_norm": 0.5778009799433177, "learning_rate": 2.239442532995795e-09, "loss": 0.2645, "step": 42136 }, { "epoch": 1.9739073406099217, "grad_norm": 0.6484374274669663, "learning_rate": 2.231424242816749e-09, "loss": 0.2857, "step": 42137 }, { "epoch": 1.9739541855998501, "grad_norm": 0.5552246235750226, "learning_rate": 2.223420326668002e-09, "loss": 0.2471, "step": 42138 }, { "epoch": 1.9740010305897784, "grad_norm": 0.5913653018230798, "learning_rate": 2.2154307845959067e-09, "loss": 0.2688, "step": 42139 }, { "epoch": 1.9740478755797066, "grad_norm": 0.5949289519141822, "learning_rate": 2.2074556166459816e-09, "loss": 0.2694, "step": 42140 }, { "epoch": 1.974094720569635, "grad_norm": 0.6117648207835977, "learning_rate": 2.1994948228645784e-09, "loss": 0.2852, "step": 42141 }, { "epoch": 1.9741415655595635, "grad_norm": 0.6180778438848937, "learning_rate": 2.191548403296939e-09, "loss": 0.2729, "step": 42142 }, { "epoch": 1.9741884105494916, "grad_norm": 0.6256359324920072, "learning_rate": 2.1836163579894154e-09, "loss": 0.2728, "step": 42143 }, { "epoch": 1.97423525553942, "grad_norm": 0.541957167268977, "learning_rate": 2.1756986869872486e-09, "loss": 0.2435, "step": 42144 }, { "epoch": 1.9742821005293485, "grad_norm": 0.6545350682008266, "learning_rate": 2.167795390336236e-09, "loss": 0.282, "step": 42145 }, { "epoch": 1.9743289455192767, "grad_norm": 0.5908615482308585, "learning_rate": 2.159906468081896e-09, "loss": 0.2742, "step": 42146 }, { "epoch": 1.974375790509205, "grad_norm": 0.6308272097268883, "learning_rate": 2.152031920269193e-09, "loss": 0.2768, "step": 42147 }, { "epoch": 1.9744226354991334, "grad_norm": 0.6291355962489648, "learning_rate": 2.144171746943924e-09, "loss": 0.2848, "step": 42148 }, { "epoch": 1.9744694804890617, "grad_norm": 0.5801215957419303, "learning_rate": 2.136325948151052e-09, "loss": 0.2501, "step": 42149 }, { "epoch": 1.97451632547899, "grad_norm": 0.5847614358607159, "learning_rate": 2.1284945239358203e-09, "loss": 0.2718, "step": 42150 }, { "epoch": 1.9745631704689184, "grad_norm": 0.6276006474059027, "learning_rate": 2.120677474343191e-09, "loss": 0.2741, "step": 42151 }, { "epoch": 1.9746100154588466, "grad_norm": 0.5842718342186167, "learning_rate": 2.1128747994184075e-09, "loss": 0.2659, "step": 42152 }, { "epoch": 1.9746568604487749, "grad_norm": 0.5979185183630593, "learning_rate": 2.1050864992061547e-09, "loss": 0.2625, "step": 42153 }, { "epoch": 1.9747037054387033, "grad_norm": 0.6163261758676168, "learning_rate": 2.0973125737511203e-09, "loss": 0.2596, "step": 42154 }, { "epoch": 1.9747505504286318, "grad_norm": 0.5422552963424215, "learning_rate": 2.089553023098545e-09, "loss": 0.2517, "step": 42155 }, { "epoch": 1.97479739541856, "grad_norm": 0.583206039414556, "learning_rate": 2.0818078472925605e-09, "loss": 0.2619, "step": 42156 }, { "epoch": 1.9748442404084883, "grad_norm": 0.6024718895848786, "learning_rate": 2.074077046378131e-09, "loss": 0.2634, "step": 42157 }, { "epoch": 1.9748910853984167, "grad_norm": 0.5526847754412106, "learning_rate": 2.066360620399388e-09, "loss": 0.2566, "step": 42158 }, { "epoch": 1.974937930388345, "grad_norm": 0.5967473596167608, "learning_rate": 2.0586585694010175e-09, "loss": 0.2786, "step": 42159 }, { "epoch": 1.9749847753782732, "grad_norm": 0.6129566111452466, "learning_rate": 2.050970893427151e-09, "loss": 0.2749, "step": 42160 }, { "epoch": 1.9750316203682017, "grad_norm": 0.7040678969214874, "learning_rate": 2.0432975925219202e-09, "loss": 0.2658, "step": 42161 }, { "epoch": 1.97507846535813, "grad_norm": 0.6456536414661954, "learning_rate": 2.035638666730011e-09, "loss": 0.2827, "step": 42162 }, { "epoch": 1.9751253103480582, "grad_norm": 0.5782253545740585, "learning_rate": 2.027994116095e-09, "loss": 0.2694, "step": 42163 }, { "epoch": 1.9751721553379866, "grad_norm": 0.5697158278039895, "learning_rate": 2.0203639406610187e-09, "loss": 0.2459, "step": 42164 }, { "epoch": 1.975219000327915, "grad_norm": 0.6030774723162836, "learning_rate": 2.0127481404721982e-09, "loss": 0.2611, "step": 42165 }, { "epoch": 1.975265845317843, "grad_norm": 0.582519290912395, "learning_rate": 2.005146715571837e-09, "loss": 0.2693, "step": 42166 }, { "epoch": 1.9753126903077716, "grad_norm": 0.6110599829693157, "learning_rate": 1.997559666004345e-09, "loss": 0.2637, "step": 42167 }, { "epoch": 1.9753595352977, "grad_norm": 0.607862865380688, "learning_rate": 1.98998699181302e-09, "loss": 0.2676, "step": 42168 }, { "epoch": 1.9754063802876283, "grad_norm": 0.5910994585270853, "learning_rate": 1.982428693041161e-09, "loss": 0.2692, "step": 42169 }, { "epoch": 1.9754532252775565, "grad_norm": 0.5727241535747477, "learning_rate": 1.974884769732899e-09, "loss": 0.2579, "step": 42170 }, { "epoch": 1.975500070267485, "grad_norm": 0.6514790642538241, "learning_rate": 1.9673552219309778e-09, "loss": 0.2737, "step": 42171 }, { "epoch": 1.9755469152574132, "grad_norm": 0.5865082324657059, "learning_rate": 1.9598400496792514e-09, "loss": 0.2707, "step": 42172 }, { "epoch": 1.9755937602473415, "grad_norm": 0.5509938713072875, "learning_rate": 1.9523392530210184e-09, "loss": 0.2555, "step": 42173 }, { "epoch": 1.97564060523727, "grad_norm": 0.5833093771565623, "learning_rate": 1.944852831999022e-09, "loss": 0.2769, "step": 42174 }, { "epoch": 1.9756874502271982, "grad_norm": 0.5882587744932405, "learning_rate": 1.9373807866562843e-09, "loss": 0.2704, "step": 42175 }, { "epoch": 1.9757342952171264, "grad_norm": 0.6269340351619132, "learning_rate": 1.9299231170363807e-09, "loss": 0.2666, "step": 42176 }, { "epoch": 1.9757811402070549, "grad_norm": 0.5702460264573154, "learning_rate": 1.9224798231817777e-09, "loss": 0.2731, "step": 42177 }, { "epoch": 1.9758279851969833, "grad_norm": 0.6083524383713617, "learning_rate": 1.915050905135496e-09, "loss": 0.2483, "step": 42178 }, { "epoch": 1.9758748301869113, "grad_norm": 0.6099613113261216, "learning_rate": 1.9076363629402794e-09, "loss": 0.2754, "step": 42179 }, { "epoch": 1.9759216751768398, "grad_norm": 0.577185703198822, "learning_rate": 1.900236196638594e-09, "loss": 0.2629, "step": 42180 }, { "epoch": 1.9759685201667683, "grad_norm": 0.5877000758188958, "learning_rate": 1.8928504062731833e-09, "loss": 0.2643, "step": 42181 }, { "epoch": 1.9760153651566965, "grad_norm": 0.5860468234556494, "learning_rate": 1.8854789918865134e-09, "loss": 0.2618, "step": 42182 }, { "epoch": 1.9760622101466248, "grad_norm": 0.5946294598991846, "learning_rate": 1.878121953521328e-09, "loss": 0.2616, "step": 42183 }, { "epoch": 1.9761090551365532, "grad_norm": 0.5588266613853797, "learning_rate": 1.87077929121926e-09, "loss": 0.2577, "step": 42184 }, { "epoch": 1.9761559001264815, "grad_norm": 0.575563982387106, "learning_rate": 1.863451005023331e-09, "loss": 0.2681, "step": 42185 }, { "epoch": 1.9762027451164097, "grad_norm": 0.601312165447798, "learning_rate": 1.8561370949751744e-09, "loss": 0.2751, "step": 42186 }, { "epoch": 1.9762495901063382, "grad_norm": 0.570030408526158, "learning_rate": 1.848837561117256e-09, "loss": 0.2628, "step": 42187 }, { "epoch": 1.9762964350962664, "grad_norm": 0.5580451584682944, "learning_rate": 1.841552403491209e-09, "loss": 0.2555, "step": 42188 }, { "epoch": 1.9763432800861946, "grad_norm": 0.5357538178092003, "learning_rate": 1.8342816221389447e-09, "loss": 0.2507, "step": 42189 }, { "epoch": 1.976390125076123, "grad_norm": 0.5967625454880748, "learning_rate": 1.8270252171026515e-09, "loss": 0.2558, "step": 42190 }, { "epoch": 1.9764369700660516, "grad_norm": 0.6401868648527059, "learning_rate": 1.81978318842424e-09, "loss": 0.2838, "step": 42191 }, { "epoch": 1.9764838150559798, "grad_norm": 0.611017833740856, "learning_rate": 1.8125555361447889e-09, "loss": 0.2714, "step": 42192 }, { "epoch": 1.976530660045908, "grad_norm": 0.6219563856873626, "learning_rate": 1.8053422603062086e-09, "loss": 0.2777, "step": 42193 }, { "epoch": 1.9765775050358365, "grad_norm": 0.5518741493346677, "learning_rate": 1.7981433609501332e-09, "loss": 0.2529, "step": 42194 }, { "epoch": 1.9766243500257648, "grad_norm": 0.5903307471923976, "learning_rate": 1.79095883811764e-09, "loss": 0.2574, "step": 42195 }, { "epoch": 1.976671195015693, "grad_norm": 0.5864190135392352, "learning_rate": 1.7837886918503634e-09, "loss": 0.2609, "step": 42196 }, { "epoch": 1.9767180400056215, "grad_norm": 0.56615253465701, "learning_rate": 1.7766329221893807e-09, "loss": 0.2421, "step": 42197 }, { "epoch": 1.9767648849955497, "grad_norm": 0.5740902371151853, "learning_rate": 1.7694915291760483e-09, "loss": 0.2703, "step": 42198 }, { "epoch": 1.976811729985478, "grad_norm": 0.5800206939262381, "learning_rate": 1.7623645128514443e-09, "loss": 0.248, "step": 42199 }, { "epoch": 1.9768585749754064, "grad_norm": 0.6150845945026517, "learning_rate": 1.755251873256647e-09, "loss": 0.2864, "step": 42200 }, { "epoch": 1.9769054199653349, "grad_norm": 0.5922784834715553, "learning_rate": 1.7481536104321795e-09, "loss": 0.2739, "step": 42201 }, { "epoch": 1.9769522649552629, "grad_norm": 0.589158846550512, "learning_rate": 1.7410697244193976e-09, "loss": 0.2612, "step": 42202 }, { "epoch": 1.9769991099451913, "grad_norm": 0.6474915426746045, "learning_rate": 1.7340002152585467e-09, "loss": 0.2748, "step": 42203 }, { "epoch": 1.9770459549351198, "grad_norm": 0.5665401396848777, "learning_rate": 1.7269450829909829e-09, "loss": 0.2639, "step": 42204 }, { "epoch": 1.977092799925048, "grad_norm": 0.6116093490294785, "learning_rate": 1.7199043276566741e-09, "loss": 0.2717, "step": 42205 }, { "epoch": 1.9771396449149763, "grad_norm": 0.5478226457681884, "learning_rate": 1.712877949296421e-09, "loss": 0.2541, "step": 42206 }, { "epoch": 1.9771864899049048, "grad_norm": 0.6211485069907443, "learning_rate": 1.7058659479507467e-09, "loss": 0.2734, "step": 42207 }, { "epoch": 1.977233334894833, "grad_norm": 0.6418713394654011, "learning_rate": 1.698868323659897e-09, "loss": 0.2849, "step": 42208 }, { "epoch": 1.9772801798847612, "grad_norm": 0.5934790956285065, "learning_rate": 1.691885076464117e-09, "loss": 0.2688, "step": 42209 }, { "epoch": 1.9773270248746897, "grad_norm": 0.5710252108315762, "learning_rate": 1.6849162064036529e-09, "loss": 0.2578, "step": 42210 }, { "epoch": 1.977373869864618, "grad_norm": 0.6067048084373878, "learning_rate": 1.6779617135184723e-09, "loss": 0.2787, "step": 42211 }, { "epoch": 1.9774207148545462, "grad_norm": 0.6501885510051959, "learning_rate": 1.6710215978485433e-09, "loss": 0.2938, "step": 42212 }, { "epoch": 1.9774675598444746, "grad_norm": 0.5534894460006808, "learning_rate": 1.6640958594341117e-09, "loss": 0.2737, "step": 42213 }, { "epoch": 1.977514404834403, "grad_norm": 0.5607526508267325, "learning_rate": 1.6571844983148677e-09, "loss": 0.2566, "step": 42214 }, { "epoch": 1.9775612498243311, "grad_norm": 0.6121047468031727, "learning_rate": 1.6502875145307794e-09, "loss": 0.2896, "step": 42215 }, { "epoch": 1.9776080948142596, "grad_norm": 0.6191811703485537, "learning_rate": 1.64340490812126e-09, "loss": 0.2834, "step": 42216 }, { "epoch": 1.977654939804188, "grad_norm": 0.624331576963108, "learning_rate": 1.636536679125722e-09, "loss": 0.2829, "step": 42217 }, { "epoch": 1.9777017847941163, "grad_norm": 0.5685188948336865, "learning_rate": 1.6296828275841337e-09, "loss": 0.2555, "step": 42218 }, { "epoch": 1.9777486297840445, "grad_norm": 0.7054308905240104, "learning_rate": 1.6228433535359078e-09, "loss": 0.2838, "step": 42219 }, { "epoch": 1.977795474773973, "grad_norm": 0.5691292370299809, "learning_rate": 1.61601825702018e-09, "loss": 0.2515, "step": 42220 }, { "epoch": 1.9778423197639012, "grad_norm": 0.5882406753012278, "learning_rate": 1.6092075380763628e-09, "loss": 0.2704, "step": 42221 }, { "epoch": 1.9778891647538295, "grad_norm": 0.5753960040210326, "learning_rate": 1.6024111967435918e-09, "loss": 0.255, "step": 42222 }, { "epoch": 1.977936009743758, "grad_norm": 0.6021104011255435, "learning_rate": 1.5956292330610024e-09, "loss": 0.2726, "step": 42223 }, { "epoch": 1.9779828547336862, "grad_norm": 0.5985235894047284, "learning_rate": 1.5888616470674523e-09, "loss": 0.2649, "step": 42224 }, { "epoch": 1.9780296997236144, "grad_norm": 0.6679590111596431, "learning_rate": 1.5821084388023545e-09, "loss": 0.2913, "step": 42225 }, { "epoch": 1.9780765447135429, "grad_norm": 0.5928010774644377, "learning_rate": 1.5753696083040116e-09, "loss": 0.2621, "step": 42226 }, { "epoch": 1.9781233897034713, "grad_norm": 0.5998521720407287, "learning_rate": 1.568645155611559e-09, "loss": 0.2495, "step": 42227 }, { "epoch": 1.9781702346933996, "grad_norm": 0.6155361962526561, "learning_rate": 1.561935080763577e-09, "loss": 0.2814, "step": 42228 }, { "epoch": 1.9782170796833278, "grad_norm": 0.6114317196800184, "learning_rate": 1.5552393837989232e-09, "loss": 0.2602, "step": 42229 }, { "epoch": 1.9782639246732563, "grad_norm": 0.5970681312273509, "learning_rate": 1.5485580647556232e-09, "loss": 0.2656, "step": 42230 }, { "epoch": 1.9783107696631845, "grad_norm": 0.6149595225922155, "learning_rate": 1.5418911236725342e-09, "loss": 0.2833, "step": 42231 }, { "epoch": 1.9783576146531128, "grad_norm": 0.5855897853942769, "learning_rate": 1.5352385605876818e-09, "loss": 0.2663, "step": 42232 }, { "epoch": 1.9784044596430412, "grad_norm": 0.5697228767807007, "learning_rate": 1.5286003755399237e-09, "loss": 0.2567, "step": 42233 }, { "epoch": 1.9784513046329695, "grad_norm": 0.6097841925618619, "learning_rate": 1.5219765685667299e-09, "loss": 0.2732, "step": 42234 }, { "epoch": 1.9784981496228977, "grad_norm": 0.6386366567531002, "learning_rate": 1.515367139706958e-09, "loss": 0.2864, "step": 42235 }, { "epoch": 1.9785449946128262, "grad_norm": 0.6046835607516797, "learning_rate": 1.5087720889980783e-09, "loss": 0.2744, "step": 42236 }, { "epoch": 1.9785918396027546, "grad_norm": 0.6036385236632104, "learning_rate": 1.5021914164781159e-09, "loss": 0.2619, "step": 42237 }, { "epoch": 1.9786386845926827, "grad_norm": 0.5949651020981281, "learning_rate": 1.4956251221853734e-09, "loss": 0.265, "step": 42238 }, { "epoch": 1.9786855295826111, "grad_norm": 0.5933486545716294, "learning_rate": 1.4890732061570435e-09, "loss": 0.2448, "step": 42239 }, { "epoch": 1.9787323745725396, "grad_norm": 0.5549591877740964, "learning_rate": 1.4825356684314284e-09, "loss": 0.2699, "step": 42240 }, { "epoch": 1.9787792195624678, "grad_norm": 0.5767843468432726, "learning_rate": 1.4760125090457212e-09, "loss": 0.2666, "step": 42241 }, { "epoch": 1.978826064552396, "grad_norm": 0.5683235537545701, "learning_rate": 1.4695037280373914e-09, "loss": 0.2695, "step": 42242 }, { "epoch": 1.9788729095423245, "grad_norm": 0.5813634456313205, "learning_rate": 1.4630093254441868e-09, "loss": 0.2658, "step": 42243 }, { "epoch": 1.9789197545322528, "grad_norm": 0.612604508990656, "learning_rate": 1.4565293013035774e-09, "loss": 0.2872, "step": 42244 }, { "epoch": 1.978966599522181, "grad_norm": 0.6475590480294261, "learning_rate": 1.4500636556524783e-09, "loss": 0.2953, "step": 42245 }, { "epoch": 1.9790134445121095, "grad_norm": 0.6169414139931974, "learning_rate": 1.4436123885280818e-09, "loss": 0.2665, "step": 42246 }, { "epoch": 1.9790602895020377, "grad_norm": 0.5995022782180037, "learning_rate": 1.4371754999681354e-09, "loss": 0.273, "step": 42247 }, { "epoch": 1.979107134491966, "grad_norm": 0.6206531609395951, "learning_rate": 1.4307529900089988e-09, "loss": 0.2957, "step": 42248 }, { "epoch": 1.9791539794818944, "grad_norm": 0.6616131845191118, "learning_rate": 1.424344858687865e-09, "loss": 0.3042, "step": 42249 }, { "epoch": 1.9792008244718229, "grad_norm": 0.6399753967264232, "learning_rate": 1.4179511060416484e-09, "loss": 0.2898, "step": 42250 }, { "epoch": 1.979247669461751, "grad_norm": 0.5917282776037711, "learning_rate": 1.411571732107264e-09, "loss": 0.2692, "step": 42251 }, { "epoch": 1.9792945144516794, "grad_norm": 0.5544685773556014, "learning_rate": 1.405206736921072e-09, "loss": 0.2675, "step": 42252 }, { "epoch": 1.9793413594416078, "grad_norm": 0.5694084092143408, "learning_rate": 1.3988561205199868e-09, "loss": 0.2725, "step": 42253 }, { "epoch": 1.979388204431536, "grad_norm": 0.5800415987952203, "learning_rate": 1.392519882940646e-09, "loss": 0.2618, "step": 42254 }, { "epoch": 1.9794350494214643, "grad_norm": 0.5983149573699629, "learning_rate": 1.386198024219132e-09, "loss": 0.276, "step": 42255 }, { "epoch": 1.9794818944113928, "grad_norm": 0.6199107325285401, "learning_rate": 1.3798905443923595e-09, "loss": 0.2759, "step": 42256 }, { "epoch": 1.979528739401321, "grad_norm": 0.589656115472818, "learning_rate": 1.373597443496133e-09, "loss": 0.2612, "step": 42257 }, { "epoch": 1.9795755843912493, "grad_norm": 0.5796884739328086, "learning_rate": 1.3673187215668127e-09, "loss": 0.2614, "step": 42258 }, { "epoch": 1.9796224293811777, "grad_norm": 0.5985575031052478, "learning_rate": 1.3610543786404805e-09, "loss": 0.2498, "step": 42259 }, { "epoch": 1.979669274371106, "grad_norm": 0.6068141363168922, "learning_rate": 1.3548044147534967e-09, "loss": 0.2546, "step": 42260 }, { "epoch": 1.9797161193610342, "grad_norm": 0.5734656333846744, "learning_rate": 1.348568829941388e-09, "loss": 0.2594, "step": 42261 }, { "epoch": 1.9797629643509627, "grad_norm": 0.5815721831737652, "learning_rate": 1.3423476242402368e-09, "loss": 0.2581, "step": 42262 }, { "epoch": 1.9798098093408911, "grad_norm": 0.5653453830628754, "learning_rate": 1.3361407976861251e-09, "loss": 0.2552, "step": 42263 }, { "epoch": 1.9798566543308194, "grad_norm": 0.6170172626294179, "learning_rate": 1.3299483503143028e-09, "loss": 0.2604, "step": 42264 }, { "epoch": 1.9799034993207476, "grad_norm": 0.6071229264568749, "learning_rate": 1.3237702821605747e-09, "loss": 0.253, "step": 42265 }, { "epoch": 1.979950344310676, "grad_norm": 0.5812081096007403, "learning_rate": 1.3176065932607452e-09, "loss": 0.2697, "step": 42266 }, { "epoch": 1.9799971893006043, "grad_norm": 0.5536890281723558, "learning_rate": 1.3114572836497864e-09, "loss": 0.2521, "step": 42267 }, { "epoch": 1.9800440342905326, "grad_norm": 0.6150199830006312, "learning_rate": 1.3053223533635028e-09, "loss": 0.2825, "step": 42268 }, { "epoch": 1.980090879280461, "grad_norm": 0.5362108602309529, "learning_rate": 1.2992018024371443e-09, "loss": 0.2584, "step": 42269 }, { "epoch": 1.9801377242703893, "grad_norm": 0.5768174848099383, "learning_rate": 1.293095630905683e-09, "loss": 0.2517, "step": 42270 }, { "epoch": 1.9801845692603175, "grad_norm": 0.5685531132508924, "learning_rate": 1.2870038388046458e-09, "loss": 0.2656, "step": 42271 }, { "epoch": 1.980231414250246, "grad_norm": 0.6686613005715348, "learning_rate": 1.2809264261687271e-09, "loss": 0.2884, "step": 42272 }, { "epoch": 1.9802782592401744, "grad_norm": 0.5841838982106501, "learning_rate": 1.2748633930331767e-09, "loss": 0.2649, "step": 42273 }, { "epoch": 1.9803251042301024, "grad_norm": 0.5466190123486107, "learning_rate": 1.2688147394326888e-09, "loss": 0.2549, "step": 42274 }, { "epoch": 1.980371949220031, "grad_norm": 0.6083567371747264, "learning_rate": 1.2627804654019582e-09, "loss": 0.2743, "step": 42275 }, { "epoch": 1.9804187942099594, "grad_norm": 0.5882068782873615, "learning_rate": 1.2567605709762342e-09, "loss": 0.27, "step": 42276 }, { "epoch": 1.9804656391998876, "grad_norm": 0.5818243223576628, "learning_rate": 1.2507550561896565e-09, "loss": 0.2855, "step": 42277 }, { "epoch": 1.9805124841898158, "grad_norm": 0.6140186900534821, "learning_rate": 1.244763921076919e-09, "loss": 0.267, "step": 42278 }, { "epoch": 1.9805593291797443, "grad_norm": 0.5640536314617379, "learning_rate": 1.2387871656724392e-09, "loss": 0.2576, "step": 42279 }, { "epoch": 1.9806061741696726, "grad_norm": 0.594429621952248, "learning_rate": 1.2328247900106337e-09, "loss": 0.2779, "step": 42280 }, { "epoch": 1.9806530191596008, "grad_norm": 0.5939927488612671, "learning_rate": 1.2268767941261971e-09, "loss": 0.2701, "step": 42281 }, { "epoch": 1.9806998641495293, "grad_norm": 0.5973577214399449, "learning_rate": 1.2209431780527137e-09, "loss": 0.2749, "step": 42282 }, { "epoch": 1.9807467091394575, "grad_norm": 0.6048603114260084, "learning_rate": 1.2150239418248777e-09, "loss": 0.282, "step": 42283 }, { "epoch": 1.9807935541293857, "grad_norm": 0.5927897199256749, "learning_rate": 1.2091190854765512e-09, "loss": 0.2726, "step": 42284 }, { "epoch": 1.9808403991193142, "grad_norm": 0.6002605875048641, "learning_rate": 1.2032286090415957e-09, "loss": 0.2701, "step": 42285 }, { "epoch": 1.9808872441092427, "grad_norm": 0.6261587361473233, "learning_rate": 1.1973525125541507e-09, "loss": 0.2788, "step": 42286 }, { "epoch": 1.9809340890991707, "grad_norm": 0.6093771039385586, "learning_rate": 1.1914907960478006e-09, "loss": 0.2841, "step": 42287 }, { "epoch": 1.9809809340890991, "grad_norm": 0.5795923842895867, "learning_rate": 1.1856434595561295e-09, "loss": 0.2654, "step": 42288 }, { "epoch": 1.9810277790790276, "grad_norm": 0.6084518556861759, "learning_rate": 1.1798105031132767e-09, "loss": 0.2798, "step": 42289 }, { "epoch": 1.9810746240689558, "grad_norm": 0.5897926827741907, "learning_rate": 1.173991926752549e-09, "loss": 0.2721, "step": 42290 }, { "epoch": 1.981121469058884, "grad_norm": 0.5682316604939859, "learning_rate": 1.1681877305075306e-09, "loss": 0.2695, "step": 42291 }, { "epoch": 1.9811683140488126, "grad_norm": 0.5967617769170703, "learning_rate": 1.1623979144115284e-09, "loss": 0.2807, "step": 42292 }, { "epoch": 1.9812151590387408, "grad_norm": 0.6236578560323836, "learning_rate": 1.156622478497571e-09, "loss": 0.285, "step": 42293 }, { "epoch": 1.981262004028669, "grad_norm": 0.6049455297668792, "learning_rate": 1.1508614227995208e-09, "loss": 0.2875, "step": 42294 }, { "epoch": 1.9813088490185975, "grad_norm": 0.6002083898033373, "learning_rate": 1.145114747350129e-09, "loss": 0.2658, "step": 42295 }, { "epoch": 1.9813556940085257, "grad_norm": 0.5869546178820623, "learning_rate": 1.1393824521824249e-09, "loss": 0.2751, "step": 42296 }, { "epoch": 1.981402538998454, "grad_norm": 0.7141338357586754, "learning_rate": 1.1336645373294374e-09, "loss": 0.2648, "step": 42297 }, { "epoch": 1.9814493839883824, "grad_norm": 0.6656516195054495, "learning_rate": 1.127961002824196e-09, "loss": 0.2956, "step": 42298 }, { "epoch": 1.981496228978311, "grad_norm": 0.6247597477153684, "learning_rate": 1.1222718486997296e-09, "loss": 0.2633, "step": 42299 }, { "epoch": 1.9815430739682391, "grad_norm": 0.5984193120967456, "learning_rate": 1.116597074988235e-09, "loss": 0.2696, "step": 42300 }, { "epoch": 1.9815899189581674, "grad_norm": 0.5618524753381974, "learning_rate": 1.1109366817227407e-09, "loss": 0.246, "step": 42301 }, { "epoch": 1.9816367639480958, "grad_norm": 0.6329648031700572, "learning_rate": 1.1052906689357212e-09, "loss": 0.2825, "step": 42302 }, { "epoch": 1.981683608938024, "grad_norm": 0.5720606974193686, "learning_rate": 1.0996590366596505e-09, "loss": 0.273, "step": 42303 }, { "epoch": 1.9817304539279523, "grad_norm": 0.6194817689306541, "learning_rate": 1.0940417849270024e-09, "loss": 0.2647, "step": 42304 }, { "epoch": 1.9817772989178808, "grad_norm": 0.5845658557186851, "learning_rate": 1.088438913770251e-09, "loss": 0.2712, "step": 42305 }, { "epoch": 1.981824143907809, "grad_norm": 0.5843176040578159, "learning_rate": 1.0828504232213156e-09, "loss": 0.271, "step": 42306 }, { "epoch": 1.9818709888977373, "grad_norm": 0.6208213530859927, "learning_rate": 1.077276313312392e-09, "loss": 0.2854, "step": 42307 }, { "epoch": 1.9819178338876657, "grad_norm": 0.611579161827028, "learning_rate": 1.071716584075677e-09, "loss": 0.2796, "step": 42308 }, { "epoch": 1.9819646788775942, "grad_norm": 0.5834997750327213, "learning_rate": 1.0661712355433674e-09, "loss": 0.2734, "step": 42309 }, { "epoch": 1.9820115238675222, "grad_norm": 0.5941777201334393, "learning_rate": 1.0606402677468263e-09, "loss": 0.2724, "step": 42310 }, { "epoch": 1.9820583688574507, "grad_norm": 0.60491503993643, "learning_rate": 1.0551236807185284e-09, "loss": 0.2778, "step": 42311 }, { "epoch": 1.9821052138473791, "grad_norm": 0.6154447360316461, "learning_rate": 1.049621474489837e-09, "loss": 0.2772, "step": 42312 }, { "epoch": 1.9821520588373074, "grad_norm": 0.5800309772395289, "learning_rate": 1.0441336490923938e-09, "loss": 0.2642, "step": 42313 }, { "epoch": 1.9821989038272356, "grad_norm": 0.5502083091113291, "learning_rate": 1.0386602045578397e-09, "loss": 0.2511, "step": 42314 }, { "epoch": 1.982245748817164, "grad_norm": 0.6001887766814862, "learning_rate": 1.0332011409178167e-09, "loss": 0.2643, "step": 42315 }, { "epoch": 1.9822925938070923, "grad_norm": 0.5995851377023487, "learning_rate": 1.0277564582034106e-09, "loss": 0.2728, "step": 42316 }, { "epoch": 1.9823394387970206, "grad_norm": 0.5556120606040957, "learning_rate": 1.0223261564465402e-09, "loss": 0.2555, "step": 42317 }, { "epoch": 1.982386283786949, "grad_norm": 0.6344757184875278, "learning_rate": 1.016910235677737e-09, "loss": 0.2801, "step": 42318 }, { "epoch": 1.9824331287768773, "grad_norm": 0.6109048556018676, "learning_rate": 1.0115086959286424e-09, "loss": 0.2681, "step": 42319 }, { "epoch": 1.9824799737668055, "grad_norm": 0.6476409376542118, "learning_rate": 1.0061215372303423e-09, "loss": 0.2902, "step": 42320 }, { "epoch": 1.982526818756734, "grad_norm": 0.5805793660325665, "learning_rate": 1.0007487596136456e-09, "loss": 0.2811, "step": 42321 }, { "epoch": 1.9825736637466624, "grad_norm": 0.6866738410186204, "learning_rate": 9.953903631096384e-10, "loss": 0.2888, "step": 42322 }, { "epoch": 1.9826205087365905, "grad_norm": 0.5844298515979205, "learning_rate": 9.90046347748852e-10, "loss": 0.2571, "step": 42323 }, { "epoch": 1.982667353726519, "grad_norm": 0.6316561867221576, "learning_rate": 9.847167135623725e-10, "loss": 0.275, "step": 42324 }, { "epoch": 1.9827141987164474, "grad_norm": 0.650360625241153, "learning_rate": 9.79401460580731e-10, "loss": 0.2616, "step": 42325 }, { "epoch": 1.9827610437063756, "grad_norm": 0.6125917732687761, "learning_rate": 9.74100588834459e-10, "loss": 0.2726, "step": 42326 }, { "epoch": 1.9828078886963039, "grad_norm": 0.6141336115513726, "learning_rate": 9.688140983540873e-10, "loss": 0.2745, "step": 42327 }, { "epoch": 1.9828547336862323, "grad_norm": 0.5718487005357198, "learning_rate": 9.63541989170147e-10, "loss": 0.2589, "step": 42328 }, { "epoch": 1.9829015786761606, "grad_norm": 0.5623222934656056, "learning_rate": 9.582842613128918e-10, "loss": 0.2607, "step": 42329 }, { "epoch": 1.9829484236660888, "grad_norm": 0.6098761500270987, "learning_rate": 9.530409148125752e-10, "loss": 0.2675, "step": 42330 }, { "epoch": 1.9829952686560173, "grad_norm": 0.6295250590830331, "learning_rate": 9.478119496994508e-10, "loss": 0.2684, "step": 42331 }, { "epoch": 1.9830421136459455, "grad_norm": 0.5987698835335967, "learning_rate": 9.425973660037725e-10, "loss": 0.2679, "step": 42332 }, { "epoch": 1.9830889586358738, "grad_norm": 0.6057997949897012, "learning_rate": 9.373971637549606e-10, "loss": 0.2704, "step": 42333 }, { "epoch": 1.9831358036258022, "grad_norm": 0.6263375420236916, "learning_rate": 9.322113429835466e-10, "loss": 0.2828, "step": 42334 }, { "epoch": 1.9831826486157307, "grad_norm": 0.640142380190416, "learning_rate": 9.27039903719229e-10, "loss": 0.2723, "step": 42335 }, { "epoch": 1.983229493605659, "grad_norm": 0.5573276528741568, "learning_rate": 9.218828459914287e-10, "loss": 0.2644, "step": 42336 }, { "epoch": 1.9832763385955872, "grad_norm": 0.6279135018952234, "learning_rate": 9.167401698301215e-10, "loss": 0.2737, "step": 42337 }, { "epoch": 1.9833231835855156, "grad_norm": 0.5897254649833567, "learning_rate": 9.11611875265006e-10, "loss": 0.2653, "step": 42338 }, { "epoch": 1.9833700285754439, "grad_norm": 0.5764095211069415, "learning_rate": 9.064979623252257e-10, "loss": 0.2738, "step": 42339 }, { "epoch": 1.983416873565372, "grad_norm": 0.6127969103625377, "learning_rate": 9.013984310404789e-10, "loss": 0.278, "step": 42340 }, { "epoch": 1.9834637185553006, "grad_norm": 0.5881076619196871, "learning_rate": 8.963132814401865e-10, "loss": 0.2483, "step": 42341 }, { "epoch": 1.9835105635452288, "grad_norm": 0.6236477223193921, "learning_rate": 8.912425135534919e-10, "loss": 0.2842, "step": 42342 }, { "epoch": 1.983557408535157, "grad_norm": 0.582407445173274, "learning_rate": 8.861861274095385e-10, "loss": 0.2619, "step": 42343 }, { "epoch": 1.9836042535250855, "grad_norm": 0.6075887957926029, "learning_rate": 8.81144123037192e-10, "loss": 0.2609, "step": 42344 }, { "epoch": 1.983651098515014, "grad_norm": 0.6460367948678439, "learning_rate": 8.761165004661509e-10, "loss": 0.263, "step": 42345 }, { "epoch": 1.983697943504942, "grad_norm": 0.5861859541580406, "learning_rate": 8.711032597247259e-10, "loss": 0.2618, "step": 42346 }, { "epoch": 1.9837447884948705, "grad_norm": 0.5492085624600578, "learning_rate": 8.661044008417829e-10, "loss": 0.2625, "step": 42347 }, { "epoch": 1.983791633484799, "grad_norm": 0.594883938100924, "learning_rate": 8.611199238464651e-10, "loss": 0.2735, "step": 42348 }, { "epoch": 1.9838384784747272, "grad_norm": 0.6244036578710043, "learning_rate": 8.561498287673608e-10, "loss": 0.2706, "step": 42349 }, { "epoch": 1.9838853234646554, "grad_norm": 0.6245175118553862, "learning_rate": 8.511941156327807e-10, "loss": 0.2673, "step": 42350 }, { "epoch": 1.9839321684545839, "grad_norm": 0.6070840569768522, "learning_rate": 8.462527844715907e-10, "loss": 0.2731, "step": 42351 }, { "epoch": 1.983979013444512, "grad_norm": 0.6098065436207029, "learning_rate": 8.413258353121012e-10, "loss": 0.2753, "step": 42352 }, { "epoch": 1.9840258584344403, "grad_norm": 0.5964982486561324, "learning_rate": 8.364132681826232e-10, "loss": 0.2528, "step": 42353 }, { "epoch": 1.9840727034243688, "grad_norm": 0.6232709853238799, "learning_rate": 8.315150831111896e-10, "loss": 0.2674, "step": 42354 }, { "epoch": 1.984119548414297, "grad_norm": 0.6590038152815761, "learning_rate": 8.266312801266663e-10, "loss": 0.292, "step": 42355 }, { "epoch": 1.9841663934042253, "grad_norm": 0.6281333546652083, "learning_rate": 8.217618592562538e-10, "loss": 0.2781, "step": 42356 }, { "epoch": 1.9842132383941538, "grad_norm": 0.6282148761800054, "learning_rate": 8.169068205288177e-10, "loss": 0.2773, "step": 42357 }, { "epoch": 1.9842600833840822, "grad_norm": 0.6134575570830695, "learning_rate": 8.120661639718364e-10, "loss": 0.2685, "step": 42358 }, { "epoch": 1.9843069283740102, "grad_norm": 0.6469784335347649, "learning_rate": 8.072398896133426e-10, "loss": 0.2743, "step": 42359 }, { "epoch": 1.9843537733639387, "grad_norm": 0.5773756956014774, "learning_rate": 8.024279974808147e-10, "loss": 0.2665, "step": 42360 }, { "epoch": 1.9844006183538672, "grad_norm": 0.5573565859596485, "learning_rate": 7.976304876022856e-10, "loss": 0.2638, "step": 42361 }, { "epoch": 1.9844474633437954, "grad_norm": 0.6414927587724037, "learning_rate": 7.928473600055109e-10, "loss": 0.2916, "step": 42362 }, { "epoch": 1.9844943083337236, "grad_norm": 0.6180554975423066, "learning_rate": 7.880786147174136e-10, "loss": 0.2642, "step": 42363 }, { "epoch": 1.984541153323652, "grad_norm": 0.5744923661924801, "learning_rate": 7.833242517657491e-10, "loss": 0.2606, "step": 42364 }, { "epoch": 1.9845879983135803, "grad_norm": 0.6227197972123051, "learning_rate": 7.785842711779956e-10, "loss": 0.2846, "step": 42365 }, { "epoch": 1.9846348433035086, "grad_norm": 0.6192086943095273, "learning_rate": 7.738586729813535e-10, "loss": 0.2782, "step": 42366 }, { "epoch": 1.984681688293437, "grad_norm": 0.5445216253894724, "learning_rate": 7.691474572030233e-10, "loss": 0.2571, "step": 42367 }, { "epoch": 1.9847285332833653, "grad_norm": 0.6015130886461536, "learning_rate": 7.644506238702054e-10, "loss": 0.2641, "step": 42368 }, { "epoch": 1.9847753782732935, "grad_norm": 0.6108962589021072, "learning_rate": 7.597681730095451e-10, "loss": 0.2711, "step": 42369 }, { "epoch": 1.984822223263222, "grad_norm": 0.5825055782789682, "learning_rate": 7.551001046485207e-10, "loss": 0.2523, "step": 42370 }, { "epoch": 1.9848690682531505, "grad_norm": 0.5977528915948566, "learning_rate": 7.504464188134997e-10, "loss": 0.258, "step": 42371 }, { "epoch": 1.9849159132430787, "grad_norm": 0.6182237564367689, "learning_rate": 7.458071155316826e-10, "loss": 0.2631, "step": 42372 }, { "epoch": 1.984962758233007, "grad_norm": 0.6147444649816801, "learning_rate": 7.411821948294373e-10, "loss": 0.272, "step": 42373 }, { "epoch": 1.9850096032229354, "grad_norm": 0.6099554660035397, "learning_rate": 7.365716567334091e-10, "loss": 0.2793, "step": 42374 }, { "epoch": 1.9850564482128636, "grad_norm": 0.6220126698589822, "learning_rate": 7.31975501270521e-10, "loss": 0.2779, "step": 42375 }, { "epoch": 1.9851032932027919, "grad_norm": 0.613882015229163, "learning_rate": 7.273937284668631e-10, "loss": 0.2736, "step": 42376 }, { "epoch": 1.9851501381927203, "grad_norm": 0.5564689114282204, "learning_rate": 7.228263383488033e-10, "loss": 0.2669, "step": 42377 }, { "epoch": 1.9851969831826486, "grad_norm": 0.5563317110236272, "learning_rate": 7.182733309427092e-10, "loss": 0.2563, "step": 42378 }, { "epoch": 1.9852438281725768, "grad_norm": 0.5859315329431718, "learning_rate": 7.137347062749489e-10, "loss": 0.2557, "step": 42379 }, { "epoch": 1.9852906731625053, "grad_norm": 0.5657388721821034, "learning_rate": 7.092104643713349e-10, "loss": 0.2581, "step": 42380 }, { "epoch": 1.9853375181524338, "grad_norm": 0.6136504890027514, "learning_rate": 7.047006052579575e-10, "loss": 0.2745, "step": 42381 }, { "epoch": 1.9853843631423618, "grad_norm": 0.6210272407992244, "learning_rate": 7.002051289611844e-10, "loss": 0.2756, "step": 42382 }, { "epoch": 1.9854312081322902, "grad_norm": 0.6160134476387575, "learning_rate": 6.957240355062733e-10, "loss": 0.2691, "step": 42383 }, { "epoch": 1.9854780531222187, "grad_norm": 0.5921736904173626, "learning_rate": 6.912573249193145e-10, "loss": 0.2712, "step": 42384 }, { "epoch": 1.985524898112147, "grad_norm": 0.6221197335980753, "learning_rate": 6.868049972261204e-10, "loss": 0.2667, "step": 42385 }, { "epoch": 1.9855717431020752, "grad_norm": 0.6215641719802417, "learning_rate": 6.823670524519488e-10, "loss": 0.2794, "step": 42386 }, { "epoch": 1.9856185880920036, "grad_norm": 0.5929985581328833, "learning_rate": 6.779434906228899e-10, "loss": 0.2757, "step": 42387 }, { "epoch": 1.9856654330819319, "grad_norm": 0.5601992371752761, "learning_rate": 6.735343117639237e-10, "loss": 0.2702, "step": 42388 }, { "epoch": 1.9857122780718601, "grad_norm": 0.6385786221706461, "learning_rate": 6.691395159005854e-10, "loss": 0.2873, "step": 42389 }, { "epoch": 1.9857591230617886, "grad_norm": 0.6477161610165669, "learning_rate": 6.647591030581324e-10, "loss": 0.2986, "step": 42390 }, { "epoch": 1.9858059680517168, "grad_norm": 0.5947823835195013, "learning_rate": 6.603930732618224e-10, "loss": 0.2749, "step": 42391 }, { "epoch": 1.985852813041645, "grad_norm": 0.6213494145535269, "learning_rate": 6.560414265366355e-10, "loss": 0.2755, "step": 42392 }, { "epoch": 1.9858996580315735, "grad_norm": 0.6461509655539696, "learning_rate": 6.517041629081066e-10, "loss": 0.2858, "step": 42393 }, { "epoch": 1.985946503021502, "grad_norm": 0.5790801161974348, "learning_rate": 6.473812824003833e-10, "loss": 0.2643, "step": 42394 }, { "epoch": 1.98599334801143, "grad_norm": 0.6051293047415944, "learning_rate": 6.430727850390006e-10, "loss": 0.2717, "step": 42395 }, { "epoch": 1.9860401930013585, "grad_norm": 0.5926154373524712, "learning_rate": 6.38778670848661e-10, "loss": 0.2736, "step": 42396 }, { "epoch": 1.986087037991287, "grad_norm": 0.558232931533007, "learning_rate": 6.344989398537893e-10, "loss": 0.2598, "step": 42397 }, { "epoch": 1.9861338829812152, "grad_norm": 0.5901684828443876, "learning_rate": 6.302335920793657e-10, "loss": 0.2709, "step": 42398 }, { "epoch": 1.9861807279711434, "grad_norm": 0.6048835116495881, "learning_rate": 6.259826275495373e-10, "loss": 0.2634, "step": 42399 }, { "epoch": 1.9862275729610719, "grad_norm": 0.5849557239382731, "learning_rate": 6.217460462892843e-10, "loss": 0.2733, "step": 42400 }, { "epoch": 1.9862744179510001, "grad_norm": 0.5923345077779305, "learning_rate": 6.175238483224766e-10, "loss": 0.2708, "step": 42401 }, { "epoch": 1.9863212629409284, "grad_norm": 0.6171430324179717, "learning_rate": 6.133160336735389e-10, "loss": 0.2608, "step": 42402 }, { "epoch": 1.9863681079308568, "grad_norm": 0.5585844680586021, "learning_rate": 6.091226023668961e-10, "loss": 0.2559, "step": 42403 }, { "epoch": 1.986414952920785, "grad_norm": 0.5713531159823595, "learning_rate": 6.049435544264182e-10, "loss": 0.2602, "step": 42404 }, { "epoch": 1.9864617979107133, "grad_norm": 0.6065071119154972, "learning_rate": 6.007788898765298e-10, "loss": 0.287, "step": 42405 }, { "epoch": 1.9865086429006418, "grad_norm": 0.6331519723752782, "learning_rate": 5.966286087408235e-10, "loss": 0.2873, "step": 42406 }, { "epoch": 1.9865554878905702, "grad_norm": 0.5998745264681153, "learning_rate": 5.924927110434464e-10, "loss": 0.2603, "step": 42407 }, { "epoch": 1.9866023328804985, "grad_norm": 0.6242145322791207, "learning_rate": 5.883711968079908e-10, "loss": 0.2943, "step": 42408 }, { "epoch": 1.9866491778704267, "grad_norm": 0.5744080613769126, "learning_rate": 5.84264066058049e-10, "loss": 0.2691, "step": 42409 }, { "epoch": 1.9866960228603552, "grad_norm": 0.6133742132523197, "learning_rate": 5.801713188177682e-10, "loss": 0.2795, "step": 42410 }, { "epoch": 1.9867428678502834, "grad_norm": 0.6145758859043849, "learning_rate": 5.760929551101857e-10, "loss": 0.2742, "step": 42411 }, { "epoch": 1.9867897128402117, "grad_norm": 0.5723856795195723, "learning_rate": 5.720289749591712e-10, "loss": 0.2713, "step": 42412 }, { "epoch": 1.9868365578301401, "grad_norm": 0.6091776909676593, "learning_rate": 5.679793783877619e-10, "loss": 0.2701, "step": 42413 }, { "epoch": 1.9868834028200684, "grad_norm": 0.5992015552479649, "learning_rate": 5.639441654192724e-10, "loss": 0.2639, "step": 42414 }, { "epoch": 1.9869302478099966, "grad_norm": 0.5667262635800789, "learning_rate": 5.599233360772948e-10, "loss": 0.2655, "step": 42415 }, { "epoch": 1.986977092799925, "grad_norm": 0.6477488385659871, "learning_rate": 5.559168903848666e-10, "loss": 0.2486, "step": 42416 }, { "epoch": 1.9870239377898535, "grad_norm": 0.5357715652334315, "learning_rate": 5.519248283647472e-10, "loss": 0.2469, "step": 42417 }, { "epoch": 1.9870707827797816, "grad_norm": 0.5927499440940156, "learning_rate": 5.479471500399736e-10, "loss": 0.2678, "step": 42418 }, { "epoch": 1.98711762776971, "grad_norm": 0.5818567663780181, "learning_rate": 5.439838554338606e-10, "loss": 0.2621, "step": 42419 }, { "epoch": 1.9871644727596385, "grad_norm": 0.5909174768885461, "learning_rate": 5.400349445686126e-10, "loss": 0.2551, "step": 42420 }, { "epoch": 1.9872113177495667, "grad_norm": 0.6141331400980358, "learning_rate": 5.361004174675444e-10, "loss": 0.2778, "step": 42421 }, { "epoch": 1.987258162739495, "grad_norm": 0.6011867906485521, "learning_rate": 5.321802741528603e-10, "loss": 0.2595, "step": 42422 }, { "epoch": 1.9873050077294234, "grad_norm": 0.5402072986741803, "learning_rate": 5.282745146470425e-10, "loss": 0.2567, "step": 42423 }, { "epoch": 1.9873518527193517, "grad_norm": 0.6274443612870205, "learning_rate": 5.24383138973128e-10, "loss": 0.2694, "step": 42424 }, { "epoch": 1.98739869770928, "grad_norm": 0.6394692483302333, "learning_rate": 5.205061471530437e-10, "loss": 0.2935, "step": 42425 }, { "epoch": 1.9874455426992084, "grad_norm": 0.6050989282322334, "learning_rate": 5.166435392092717e-10, "loss": 0.2656, "step": 42426 }, { "epoch": 1.9874923876891366, "grad_norm": 0.6045571139233483, "learning_rate": 5.127953151637388e-10, "loss": 0.2513, "step": 42427 }, { "epoch": 1.9875392326790648, "grad_norm": 0.5672565317171232, "learning_rate": 5.089614750392047e-10, "loss": 0.2548, "step": 42428 }, { "epoch": 1.9875860776689933, "grad_norm": 0.5913124941921784, "learning_rate": 5.051420188570411e-10, "loss": 0.269, "step": 42429 }, { "epoch": 1.9876329226589218, "grad_norm": 0.6628180360637016, "learning_rate": 5.013369466397299e-10, "loss": 0.2716, "step": 42430 }, { "epoch": 1.9876797676488498, "grad_norm": 0.6469033977510104, "learning_rate": 4.975462584091984e-10, "loss": 0.2838, "step": 42431 }, { "epoch": 1.9877266126387783, "grad_norm": 0.589310645436833, "learning_rate": 4.937699541868179e-10, "loss": 0.2613, "step": 42432 }, { "epoch": 1.9877734576287067, "grad_norm": 0.6256313789353609, "learning_rate": 4.900080339945157e-10, "loss": 0.2758, "step": 42433 }, { "epoch": 1.987820302618635, "grad_norm": 0.587447264282205, "learning_rate": 4.862604978539409e-10, "loss": 0.2735, "step": 42434 }, { "epoch": 1.9878671476085632, "grad_norm": 0.5918963576974742, "learning_rate": 4.825273457870205e-10, "loss": 0.2678, "step": 42435 }, { "epoch": 1.9879139925984917, "grad_norm": 0.6627263697916805, "learning_rate": 4.788085778148488e-10, "loss": 0.2865, "step": 42436 }, { "epoch": 1.98796083758842, "grad_norm": 0.6296563487515964, "learning_rate": 4.751041939587975e-10, "loss": 0.2944, "step": 42437 }, { "epoch": 1.9880076825783481, "grad_norm": 0.5970421765485887, "learning_rate": 4.714141942402383e-10, "loss": 0.2681, "step": 42438 }, { "epoch": 1.9880545275682766, "grad_norm": 0.6005558496706774, "learning_rate": 4.677385786805433e-10, "loss": 0.2881, "step": 42439 }, { "epoch": 1.9881013725582048, "grad_norm": 0.5667631538457001, "learning_rate": 4.640773473005289e-10, "loss": 0.2609, "step": 42440 }, { "epoch": 1.988148217548133, "grad_norm": 0.5831946196140719, "learning_rate": 4.604305001218445e-10, "loss": 0.2546, "step": 42441 }, { "epoch": 1.9881950625380616, "grad_norm": 0.5757213414203662, "learning_rate": 4.5679803716502934e-10, "loss": 0.2554, "step": 42442 }, { "epoch": 1.98824190752799, "grad_norm": 0.5838746411584304, "learning_rate": 4.5317995845117757e-10, "loss": 0.2615, "step": 42443 }, { "epoch": 1.9882887525179183, "grad_norm": 0.5953532255253845, "learning_rate": 4.4957626400082835e-10, "loss": 0.2744, "step": 42444 }, { "epoch": 1.9883355975078465, "grad_norm": 0.5823840127199926, "learning_rate": 4.4598695383507587e-10, "loss": 0.2719, "step": 42445 }, { "epoch": 1.988382442497775, "grad_norm": 0.6023182734185089, "learning_rate": 4.424120279744593e-10, "loss": 0.2932, "step": 42446 }, { "epoch": 1.9884292874877032, "grad_norm": 0.5682784232254109, "learning_rate": 4.388514864395177e-10, "loss": 0.2617, "step": 42447 }, { "epoch": 1.9884761324776314, "grad_norm": 0.5852998523776691, "learning_rate": 4.3530532925079026e-10, "loss": 0.2823, "step": 42448 }, { "epoch": 1.98852297746756, "grad_norm": 0.5816785377297274, "learning_rate": 4.3177355642853856e-10, "loss": 0.2522, "step": 42449 }, { "epoch": 1.9885698224574881, "grad_norm": 0.6120328573937196, "learning_rate": 4.2825616799330174e-10, "loss": 0.2877, "step": 42450 }, { "epoch": 1.9886166674474164, "grad_norm": 0.6160001295506597, "learning_rate": 4.247531639650637e-10, "loss": 0.2774, "step": 42451 }, { "epoch": 1.9886635124373448, "grad_norm": 0.6093838236291013, "learning_rate": 4.2126454436408617e-10, "loss": 0.2641, "step": 42452 }, { "epoch": 1.9887103574272733, "grad_norm": 0.5836607985444418, "learning_rate": 4.177903092106306e-10, "loss": 0.263, "step": 42453 }, { "epoch": 1.9887572024172013, "grad_norm": 0.6235684630042173, "learning_rate": 4.1433045852440345e-10, "loss": 0.2759, "step": 42454 }, { "epoch": 1.9888040474071298, "grad_norm": 0.5688218559030892, "learning_rate": 4.1088499232566635e-10, "loss": 0.2652, "step": 42455 }, { "epoch": 1.9888508923970583, "grad_norm": 0.5707124490895122, "learning_rate": 4.074539106338482e-10, "loss": 0.2617, "step": 42456 }, { "epoch": 1.9888977373869865, "grad_norm": 0.6186849681654386, "learning_rate": 4.04037213468933e-10, "loss": 0.2755, "step": 42457 }, { "epoch": 1.9889445823769147, "grad_norm": 0.6278470781246508, "learning_rate": 4.0063490085062715e-10, "loss": 0.2757, "step": 42458 }, { "epoch": 1.9889914273668432, "grad_norm": 0.6154691922121084, "learning_rate": 3.9724697279835966e-10, "loss": 0.2632, "step": 42459 }, { "epoch": 1.9890382723567714, "grad_norm": 0.565242065124324, "learning_rate": 3.9387342933155936e-10, "loss": 0.2611, "step": 42460 }, { "epoch": 1.9890851173466997, "grad_norm": 0.6651635203449358, "learning_rate": 3.9051427046993276e-10, "loss": 0.2872, "step": 42461 }, { "epoch": 1.9891319623366281, "grad_norm": 0.5975345884906555, "learning_rate": 3.871694962326311e-10, "loss": 0.2752, "step": 42462 }, { "epoch": 1.9891788073265564, "grad_norm": 0.5764611699575914, "learning_rate": 3.8383910663880587e-10, "loss": 0.2717, "step": 42463 }, { "epoch": 1.9892256523164846, "grad_norm": 0.6274468250727311, "learning_rate": 3.8052310170788586e-10, "loss": 0.2877, "step": 42464 }, { "epoch": 1.989272497306413, "grad_norm": 0.6148593298216263, "learning_rate": 3.7722148145874494e-10, "loss": 0.2832, "step": 42465 }, { "epoch": 1.9893193422963416, "grad_norm": 0.5933465952319066, "learning_rate": 3.7393424591025685e-10, "loss": 0.2614, "step": 42466 }, { "epoch": 1.9893661872862696, "grad_norm": 0.5868234789472915, "learning_rate": 3.706613950818505e-10, "loss": 0.2706, "step": 42467 }, { "epoch": 1.989413032276198, "grad_norm": 0.609120546991648, "learning_rate": 3.674029289918446e-10, "loss": 0.2853, "step": 42468 }, { "epoch": 1.9894598772661265, "grad_norm": 0.5766042555604682, "learning_rate": 3.641588476591129e-10, "loss": 0.2594, "step": 42469 }, { "epoch": 1.9895067222560547, "grad_norm": 0.5689982248653216, "learning_rate": 3.6092915110252924e-10, "loss": 0.2614, "step": 42470 }, { "epoch": 1.989553567245983, "grad_norm": 0.6050681184091784, "learning_rate": 3.5771383934041227e-10, "loss": 0.2938, "step": 42471 }, { "epoch": 1.9896004122359114, "grad_norm": 0.60038928292431, "learning_rate": 3.545129123913582e-10, "loss": 0.2714, "step": 42472 }, { "epoch": 1.9896472572258397, "grad_norm": 0.5706210844735651, "learning_rate": 3.5132637027396333e-10, "loss": 0.2619, "step": 42473 }, { "epoch": 1.989694102215768, "grad_norm": 0.611539922519122, "learning_rate": 3.4815421300626874e-10, "loss": 0.2785, "step": 42474 }, { "epoch": 1.9897409472056964, "grad_norm": 0.6103583710813919, "learning_rate": 3.4499644060687063e-10, "loss": 0.2661, "step": 42475 }, { "epoch": 1.9897877921956246, "grad_norm": 0.5905683499841569, "learning_rate": 3.4185305309353266e-10, "loss": 0.2795, "step": 42476 }, { "epoch": 1.9898346371855529, "grad_norm": 0.6110111426683461, "learning_rate": 3.3872405048485103e-10, "loss": 0.282, "step": 42477 }, { "epoch": 1.9898814821754813, "grad_norm": 0.6051328741731685, "learning_rate": 3.356094327983117e-10, "loss": 0.2766, "step": 42478 }, { "epoch": 1.9899283271654098, "grad_norm": 0.6027205260583457, "learning_rate": 3.3250920005223343e-10, "loss": 0.2823, "step": 42479 }, { "epoch": 1.989975172155338, "grad_norm": 0.6086902241730094, "learning_rate": 3.294233522641022e-10, "loss": 0.2689, "step": 42480 }, { "epoch": 1.9900220171452663, "grad_norm": 0.5645687930166533, "learning_rate": 3.2635188945195904e-10, "loss": 0.2552, "step": 42481 }, { "epoch": 1.9900688621351947, "grad_norm": 0.5763996260195948, "learning_rate": 3.2329481163356766e-10, "loss": 0.2591, "step": 42482 }, { "epoch": 1.990115707125123, "grad_norm": 0.5353062629069707, "learning_rate": 3.202521188261365e-10, "loss": 0.2419, "step": 42483 }, { "epoch": 1.9901625521150512, "grad_norm": 0.6269669827848082, "learning_rate": 3.1722381104742906e-10, "loss": 0.2748, "step": 42484 }, { "epoch": 1.9902093971049797, "grad_norm": 0.6298976929808835, "learning_rate": 3.1420988831493137e-10, "loss": 0.2855, "step": 42485 }, { "epoch": 1.990256242094908, "grad_norm": 0.5925152978340856, "learning_rate": 3.1121035064557436e-10, "loss": 0.2731, "step": 42486 }, { "epoch": 1.9903030870848362, "grad_norm": 0.6145050318080959, "learning_rate": 3.082251980571216e-10, "loss": 0.2872, "step": 42487 }, { "epoch": 1.9903499320747646, "grad_norm": 0.5991719169566357, "learning_rate": 3.052544305667815e-10, "loss": 0.2549, "step": 42488 }, { "epoch": 1.990396777064693, "grad_norm": 0.5797747460916616, "learning_rate": 3.0229804819120744e-10, "loss": 0.2554, "step": 42489 }, { "epoch": 1.990443622054621, "grad_norm": 0.5789214994934792, "learning_rate": 2.993560509476079e-10, "loss": 0.2649, "step": 42490 }, { "epoch": 1.9904904670445496, "grad_norm": 0.6142758079078369, "learning_rate": 2.964284388529137e-10, "loss": 0.2739, "step": 42491 }, { "epoch": 1.990537312034478, "grad_norm": 0.5917222719211608, "learning_rate": 2.9351521192405586e-10, "loss": 0.2712, "step": 42492 }, { "epoch": 1.9905841570244063, "grad_norm": 0.6189746201252061, "learning_rate": 2.9061637017768764e-10, "loss": 0.263, "step": 42493 }, { "epoch": 1.9906310020143345, "grad_norm": 0.6033307599740582, "learning_rate": 2.877319136307399e-10, "loss": 0.27, "step": 42494 }, { "epoch": 1.990677847004263, "grad_norm": 0.6463689131657399, "learning_rate": 2.84861842299311e-10, "loss": 0.2882, "step": 42495 }, { "epoch": 1.9907246919941912, "grad_norm": 0.6361623223480789, "learning_rate": 2.8200615620060936e-10, "loss": 0.2604, "step": 42496 }, { "epoch": 1.9907715369841195, "grad_norm": 0.5958558853825677, "learning_rate": 2.7916485535045555e-10, "loss": 0.2723, "step": 42497 }, { "epoch": 1.990818381974048, "grad_norm": 0.609517555240366, "learning_rate": 2.763379397652255e-10, "loss": 0.2682, "step": 42498 }, { "epoch": 1.9908652269639762, "grad_norm": 0.5992876048164079, "learning_rate": 2.735254094615725e-10, "loss": 0.2642, "step": 42499 }, { "epoch": 1.9909120719539044, "grad_norm": 0.6409050629479603, "learning_rate": 2.707272644555947e-10, "loss": 0.2699, "step": 42500 }, { "epoch": 1.9909589169438329, "grad_norm": 0.5853660282515414, "learning_rate": 2.6794350476339047e-10, "loss": 0.2643, "step": 42501 }, { "epoch": 1.9910057619337613, "grad_norm": 0.5804747895850846, "learning_rate": 2.651741304005029e-10, "loss": 0.2583, "step": 42502 }, { "epoch": 1.9910526069236893, "grad_norm": 0.6279054924489662, "learning_rate": 2.6241914138358526e-10, "loss": 0.27, "step": 42503 }, { "epoch": 1.9910994519136178, "grad_norm": 0.624780437601725, "learning_rate": 2.5967853772818075e-10, "loss": 0.2873, "step": 42504 }, { "epoch": 1.9911462969035463, "grad_norm": 0.5999141269130843, "learning_rate": 2.569523194498325e-10, "loss": 0.2667, "step": 42505 }, { "epoch": 1.9911931418934745, "grad_norm": 0.5832787280597764, "learning_rate": 2.5424048656436106e-10, "loss": 0.2693, "step": 42506 }, { "epoch": 1.9912399868834028, "grad_norm": 0.5706764521027318, "learning_rate": 2.5154303908758725e-10, "loss": 0.2615, "step": 42507 }, { "epoch": 1.9912868318733312, "grad_norm": 0.5751465792989094, "learning_rate": 2.4885997703505414e-10, "loss": 0.2752, "step": 42508 }, { "epoch": 1.9913336768632595, "grad_norm": 0.5684682451965556, "learning_rate": 2.461913004217498e-10, "loss": 0.2571, "step": 42509 }, { "epoch": 1.9913805218531877, "grad_norm": 0.6700160962637134, "learning_rate": 2.4353700926349476e-10, "loss": 0.3003, "step": 42510 }, { "epoch": 1.9914273668431162, "grad_norm": 0.5566307283908735, "learning_rate": 2.4089710357555476e-10, "loss": 0.2618, "step": 42511 }, { "epoch": 1.9914742118330444, "grad_norm": 0.5599170995220021, "learning_rate": 2.382715833726401e-10, "loss": 0.2692, "step": 42512 }, { "epoch": 1.9915210568229726, "grad_norm": 0.609480302360934, "learning_rate": 2.35660448670294e-10, "loss": 0.288, "step": 42513 }, { "epoch": 1.991567901812901, "grad_norm": 0.6500856489990022, "learning_rate": 2.330636994835045e-10, "loss": 0.2859, "step": 42514 }, { "epoch": 1.9916147468028296, "grad_norm": 0.5872408688274128, "learning_rate": 2.3048133582698195e-10, "loss": 0.2715, "step": 42515 }, { "epoch": 1.9916615917927578, "grad_norm": 0.5755309462487385, "learning_rate": 2.2791335771599199e-10, "loss": 0.2621, "step": 42516 }, { "epoch": 1.991708436782686, "grad_norm": 0.6165281819040321, "learning_rate": 2.253597651649675e-10, "loss": 0.2833, "step": 42517 }, { "epoch": 1.9917552817726145, "grad_norm": 0.5487166510540056, "learning_rate": 2.2282055818861893e-10, "loss": 0.2567, "step": 42518 }, { "epoch": 1.9918021267625428, "grad_norm": 0.6133757476401819, "learning_rate": 2.202957368016567e-10, "loss": 0.2705, "step": 42519 }, { "epoch": 1.991848971752471, "grad_norm": 0.6130877567407501, "learning_rate": 2.177853010185138e-10, "loss": 0.2642, "step": 42520 }, { "epoch": 1.9918958167423995, "grad_norm": 0.6125821292473104, "learning_rate": 2.1528925085390062e-10, "loss": 0.2789, "step": 42521 }, { "epoch": 1.9919426617323277, "grad_norm": 0.609502570780935, "learning_rate": 2.128075863219725e-10, "loss": 0.2757, "step": 42522 }, { "epoch": 1.991989506722256, "grad_norm": 0.6079596902218902, "learning_rate": 2.1034030743688482e-10, "loss": 0.2657, "step": 42523 }, { "epoch": 1.9920363517121844, "grad_norm": 0.6157864084090339, "learning_rate": 2.07887414213348e-10, "loss": 0.2699, "step": 42524 }, { "epoch": 1.9920831967021129, "grad_norm": 0.5945166279202868, "learning_rate": 2.0544890666468476e-10, "loss": 0.2716, "step": 42525 }, { "epoch": 1.9921300416920409, "grad_norm": 0.5768916316527818, "learning_rate": 2.030247848056055e-10, "loss": 0.249, "step": 42526 }, { "epoch": 1.9921768866819693, "grad_norm": 0.658951889042525, "learning_rate": 2.006150486497105e-10, "loss": 0.2888, "step": 42527 }, { "epoch": 1.9922237316718978, "grad_norm": 0.5807868175944203, "learning_rate": 1.9821969821115505e-10, "loss": 0.2684, "step": 42528 }, { "epoch": 1.992270576661826, "grad_norm": 0.6137036039261912, "learning_rate": 1.9583873350326188e-10, "loss": 0.275, "step": 42529 }, { "epoch": 1.9923174216517543, "grad_norm": 0.5842374980336873, "learning_rate": 1.9347215454018631e-10, "loss": 0.2595, "step": 42530 }, { "epoch": 1.9923642666416828, "grad_norm": 0.6368595942881083, "learning_rate": 1.9111996133552858e-10, "loss": 0.2766, "step": 42531 }, { "epoch": 1.992411111631611, "grad_norm": 0.5357418424524784, "learning_rate": 1.8878215390233379e-10, "loss": 0.2539, "step": 42532 }, { "epoch": 1.9924579566215392, "grad_norm": 0.6134893299044004, "learning_rate": 1.864587322547573e-10, "loss": 0.2777, "step": 42533 }, { "epoch": 1.9925048016114677, "grad_norm": 0.5532984179348518, "learning_rate": 1.841496964055667e-10, "loss": 0.2551, "step": 42534 }, { "epoch": 1.992551646601396, "grad_norm": 0.624394984730982, "learning_rate": 1.8185504636836214e-10, "loss": 0.2736, "step": 42535 }, { "epoch": 1.9925984915913242, "grad_norm": 0.6401594669128329, "learning_rate": 1.7957478215618885e-10, "loss": 0.2789, "step": 42536 }, { "epoch": 1.9926453365812526, "grad_norm": 0.5842225788497895, "learning_rate": 1.7730890378236942e-10, "loss": 0.2756, "step": 42537 }, { "epoch": 1.992692181571181, "grad_norm": 0.6009008621361236, "learning_rate": 1.7505741125967146e-10, "loss": 0.2776, "step": 42538 }, { "epoch": 1.9927390265611091, "grad_norm": 0.6450605322203129, "learning_rate": 1.7282030460141764e-10, "loss": 0.2797, "step": 42539 }, { "epoch": 1.9927858715510376, "grad_norm": 0.6466829315762578, "learning_rate": 1.7059758382009795e-10, "loss": 0.2991, "step": 42540 }, { "epoch": 1.992832716540966, "grad_norm": 0.6003616590524211, "learning_rate": 1.6838924892875752e-10, "loss": 0.2584, "step": 42541 }, { "epoch": 1.9928795615308943, "grad_norm": 0.5727337435752186, "learning_rate": 1.6619529994016393e-10, "loss": 0.2709, "step": 42542 }, { "epoch": 1.9929264065208225, "grad_norm": 0.5794160002108076, "learning_rate": 1.640157368665296e-10, "loss": 0.2689, "step": 42543 }, { "epoch": 1.992973251510751, "grad_norm": 0.5804662941330923, "learning_rate": 1.618505597208997e-10, "loss": 0.265, "step": 42544 }, { "epoch": 1.9930200965006792, "grad_norm": 0.5775939640380662, "learning_rate": 1.5969976851548662e-10, "loss": 0.2826, "step": 42545 }, { "epoch": 1.9930669414906075, "grad_norm": 0.5822567889078247, "learning_rate": 1.5756336326250288e-10, "loss": 0.265, "step": 42546 }, { "epoch": 1.993113786480536, "grad_norm": 0.6332156918501939, "learning_rate": 1.55441343974716e-10, "loss": 0.2713, "step": 42547 }, { "epoch": 1.9931606314704642, "grad_norm": 0.6228145972239919, "learning_rate": 1.5333371066378332e-10, "loss": 0.2889, "step": 42548 }, { "epoch": 1.9932074764603924, "grad_norm": 0.6597928095043407, "learning_rate": 1.5124046334219488e-10, "loss": 0.2921, "step": 42549 }, { "epoch": 1.9932543214503209, "grad_norm": 0.6573006740343205, "learning_rate": 1.4916160202188556e-10, "loss": 0.2881, "step": 42550 }, { "epoch": 1.9933011664402494, "grad_norm": 0.662482482792523, "learning_rate": 1.4709712671479026e-10, "loss": 0.2753, "step": 42551 }, { "epoch": 1.9933480114301776, "grad_norm": 0.5886503132141403, "learning_rate": 1.4504703743312143e-10, "loss": 0.263, "step": 42552 }, { "epoch": 1.9933948564201058, "grad_norm": 0.5687750201628693, "learning_rate": 1.430113341879813e-10, "loss": 0.2622, "step": 42553 }, { "epoch": 1.9934417014100343, "grad_norm": 0.624080614821908, "learning_rate": 1.4099001699185987e-10, "loss": 0.2826, "step": 42554 }, { "epoch": 1.9934885463999625, "grad_norm": 0.5402758970904378, "learning_rate": 1.389830858558594e-10, "loss": 0.2497, "step": 42555 }, { "epoch": 1.9935353913898908, "grad_norm": 0.5946358810660821, "learning_rate": 1.3699054079163722e-10, "loss": 0.2662, "step": 42556 }, { "epoch": 1.9935822363798192, "grad_norm": 0.5926573134884894, "learning_rate": 1.3501238181085063e-10, "loss": 0.277, "step": 42557 }, { "epoch": 1.9936290813697475, "grad_norm": 0.5942485850461373, "learning_rate": 1.330486089246019e-10, "loss": 0.2639, "step": 42558 }, { "epoch": 1.9936759263596757, "grad_norm": 0.6062187609096089, "learning_rate": 1.310992221445484e-10, "loss": 0.2716, "step": 42559 }, { "epoch": 1.9937227713496042, "grad_norm": 0.5628478037277278, "learning_rate": 1.2916422148151476e-10, "loss": 0.2645, "step": 42560 }, { "epoch": 1.9937696163395326, "grad_norm": 0.589975684727608, "learning_rate": 1.2724360694688075e-10, "loss": 0.2695, "step": 42561 }, { "epoch": 1.9938164613294607, "grad_norm": 0.5764011392199507, "learning_rate": 1.2533737855174865e-10, "loss": 0.2733, "step": 42562 }, { "epoch": 1.9938633063193891, "grad_norm": 0.583711453448934, "learning_rate": 1.2344553630666556e-10, "loss": 0.2653, "step": 42563 }, { "epoch": 1.9939101513093176, "grad_norm": 0.6202854846364143, "learning_rate": 1.2156808022301126e-10, "loss": 0.2592, "step": 42564 }, { "epoch": 1.9939569962992458, "grad_norm": 0.5781277465071312, "learning_rate": 1.197050103116104e-10, "loss": 0.2575, "step": 42565 }, { "epoch": 1.994003841289174, "grad_norm": 0.5477400286882369, "learning_rate": 1.1785632658273262e-10, "loss": 0.2559, "step": 42566 }, { "epoch": 1.9940506862791025, "grad_norm": 0.5712509067329313, "learning_rate": 1.1602202904748006e-10, "loss": 0.2677, "step": 42567 }, { "epoch": 1.9940975312690308, "grad_norm": 0.627702396978015, "learning_rate": 1.1420211771612233e-10, "loss": 0.2724, "step": 42568 }, { "epoch": 1.994144376258959, "grad_norm": 0.615780909209503, "learning_rate": 1.1239659259892899e-10, "loss": 0.289, "step": 42569 }, { "epoch": 1.9941912212488875, "grad_norm": 0.6385374639313223, "learning_rate": 1.1060545370700227e-10, "loss": 0.2654, "step": 42570 }, { "epoch": 1.9942380662388157, "grad_norm": 0.6195368247082288, "learning_rate": 1.0882870105005661e-10, "loss": 0.2804, "step": 42571 }, { "epoch": 1.994284911228744, "grad_norm": 0.6170537303060879, "learning_rate": 1.0706633463836158e-10, "loss": 0.291, "step": 42572 }, { "epoch": 1.9943317562186724, "grad_norm": 0.6083943730648005, "learning_rate": 1.0531835448218675e-10, "loss": 0.2736, "step": 42573 }, { "epoch": 1.9943786012086009, "grad_norm": 0.6054610232958143, "learning_rate": 1.0358476059152412e-10, "loss": 0.2763, "step": 42574 }, { "epoch": 1.994425446198529, "grad_norm": 0.568971645152949, "learning_rate": 1.0186555297664324e-10, "loss": 0.258, "step": 42575 }, { "epoch": 1.9944722911884574, "grad_norm": 0.6043115895891861, "learning_rate": 1.0016073164698103e-10, "loss": 0.2724, "step": 42576 }, { "epoch": 1.9945191361783858, "grad_norm": 0.5850511148853884, "learning_rate": 9.847029661280705e-11, "loss": 0.2582, "step": 42577 }, { "epoch": 1.994565981168314, "grad_norm": 0.6012397196123325, "learning_rate": 9.679424788355818e-11, "loss": 0.2739, "step": 42578 }, { "epoch": 1.9946128261582423, "grad_norm": 0.7171137328513731, "learning_rate": 9.513258546867132e-11, "loss": 0.2919, "step": 42579 }, { "epoch": 1.9946596711481708, "grad_norm": 0.5781005123788928, "learning_rate": 9.348530937813849e-11, "loss": 0.2605, "step": 42580 }, { "epoch": 1.994706516138099, "grad_norm": 0.5700615699887863, "learning_rate": 9.185241962139658e-11, "loss": 0.2514, "step": 42581 }, { "epoch": 1.9947533611280273, "grad_norm": 0.5926910201821369, "learning_rate": 9.02339162076049e-11, "loss": 0.2526, "step": 42582 }, { "epoch": 1.9948002061179557, "grad_norm": 0.6056028640572838, "learning_rate": 8.86297991462004e-11, "loss": 0.2759, "step": 42583 }, { "epoch": 1.994847051107884, "grad_norm": 0.6464384663713225, "learning_rate": 8.704006844634238e-11, "loss": 0.2877, "step": 42584 }, { "epoch": 1.9948938960978122, "grad_norm": 0.5875671534786063, "learning_rate": 8.54647241171902e-11, "loss": 0.2764, "step": 42585 }, { "epoch": 1.9949407410877407, "grad_norm": 0.6123897918807686, "learning_rate": 8.39037661679032e-11, "loss": 0.2771, "step": 42586 }, { "epoch": 1.9949875860776691, "grad_norm": 0.5742883770851598, "learning_rate": 8.235719460764068e-11, "loss": 0.2673, "step": 42587 }, { "epoch": 1.9950344310675974, "grad_norm": 0.6178254691557042, "learning_rate": 8.082500944472938e-11, "loss": 0.2796, "step": 42588 }, { "epoch": 1.9950812760575256, "grad_norm": 0.6309730371403978, "learning_rate": 7.930721068860613e-11, "loss": 0.2666, "step": 42589 }, { "epoch": 1.995128121047454, "grad_norm": 0.5884360853631915, "learning_rate": 7.780379834759766e-11, "loss": 0.2741, "step": 42590 }, { "epoch": 1.9951749660373823, "grad_norm": 0.6044023580700943, "learning_rate": 7.631477243058571e-11, "loss": 0.2745, "step": 42591 }, { "epoch": 1.9952218110273106, "grad_norm": 0.5938452916494995, "learning_rate": 7.484013294589699e-11, "loss": 0.2675, "step": 42592 }, { "epoch": 1.995268656017239, "grad_norm": 0.6543863717841031, "learning_rate": 7.337987990241324e-11, "loss": 0.2858, "step": 42593 }, { "epoch": 1.9953155010071673, "grad_norm": 0.6058242801711768, "learning_rate": 7.193401330818361e-11, "loss": 0.2621, "step": 42594 }, { "epoch": 1.9953623459970955, "grad_norm": 0.607390960000491, "learning_rate": 7.050253317153477e-11, "loss": 0.2561, "step": 42595 }, { "epoch": 1.995409190987024, "grad_norm": 0.5495830151442884, "learning_rate": 6.908543950079338e-11, "loss": 0.2416, "step": 42596 }, { "epoch": 1.9954560359769524, "grad_norm": 0.6277320895736285, "learning_rate": 6.768273230428612e-11, "loss": 0.2763, "step": 42597 }, { "epoch": 1.9955028809668804, "grad_norm": 0.566208343841006, "learning_rate": 6.629441159006211e-11, "loss": 0.2858, "step": 42598 }, { "epoch": 1.995549725956809, "grad_norm": 0.6151254395162219, "learning_rate": 6.492047736561535e-11, "loss": 0.2672, "step": 42599 }, { "epoch": 1.9955965709467374, "grad_norm": 0.5933741639709738, "learning_rate": 6.356092963955007e-11, "loss": 0.2693, "step": 42600 }, { "epoch": 1.9956434159366656, "grad_norm": 0.5807991484578755, "learning_rate": 6.221576841908273e-11, "loss": 0.2591, "step": 42601 }, { "epoch": 1.9956902609265939, "grad_norm": 0.6109554941079841, "learning_rate": 6.088499371226242e-11, "loss": 0.2686, "step": 42602 }, { "epoch": 1.9957371059165223, "grad_norm": 0.5989177002071683, "learning_rate": 5.956860552686072e-11, "loss": 0.2636, "step": 42603 }, { "epoch": 1.9957839509064506, "grad_norm": 0.586890358576786, "learning_rate": 5.826660387037163e-11, "loss": 0.2585, "step": 42604 }, { "epoch": 1.9958307958963788, "grad_norm": 0.5432208783053355, "learning_rate": 5.697898875001162e-11, "loss": 0.2538, "step": 42605 }, { "epoch": 1.9958776408863073, "grad_norm": 0.5690833618971376, "learning_rate": 5.570576017355223e-11, "loss": 0.2545, "step": 42606 }, { "epoch": 1.9959244858762355, "grad_norm": 0.6215571286147479, "learning_rate": 5.444691814793235e-11, "loss": 0.2739, "step": 42607 }, { "epoch": 1.9959713308661637, "grad_norm": 0.6103103354024216, "learning_rate": 5.320246268092355e-11, "loss": 0.2694, "step": 42608 }, { "epoch": 1.9960181758560922, "grad_norm": 0.5992470340759409, "learning_rate": 5.197239377918717e-11, "loss": 0.2662, "step": 42609 }, { "epoch": 1.9960650208460207, "grad_norm": 0.6090762614530397, "learning_rate": 5.0756711450217213e-11, "loss": 0.2651, "step": 42610 }, { "epoch": 1.9961118658359487, "grad_norm": 0.5874521546592536, "learning_rate": 4.955541570067501e-11, "loss": 0.2706, "step": 42611 }, { "epoch": 1.9961587108258771, "grad_norm": 0.5858390095210475, "learning_rate": 4.836850653749947e-11, "loss": 0.265, "step": 42612 }, { "epoch": 1.9962055558158056, "grad_norm": 0.5858528133700789, "learning_rate": 4.719598396790703e-11, "loss": 0.2765, "step": 42613 }, { "epoch": 1.9962524008057339, "grad_norm": 0.6247805350907267, "learning_rate": 4.6037847998003925e-11, "loss": 0.2687, "step": 42614 }, { "epoch": 1.996299245795662, "grad_norm": 0.6489415178067108, "learning_rate": 4.48940986350066e-11, "loss": 0.2917, "step": 42615 }, { "epoch": 1.9963460907855906, "grad_norm": 0.6305832485635487, "learning_rate": 4.376473588529884e-11, "loss": 0.2828, "step": 42616 }, { "epoch": 1.9963929357755188, "grad_norm": 0.6084718459582632, "learning_rate": 4.2649759755541976e-11, "loss": 0.2644, "step": 42617 }, { "epoch": 1.996439780765447, "grad_norm": 0.5597842277164599, "learning_rate": 4.154917025184224e-11, "loss": 0.2645, "step": 42618 }, { "epoch": 1.9964866257553755, "grad_norm": 0.612832645083769, "learning_rate": 4.046296738058342e-11, "loss": 0.2775, "step": 42619 }, { "epoch": 1.9965334707453037, "grad_norm": 0.5600020294160452, "learning_rate": 3.9391151148149286e-11, "loss": 0.2565, "step": 42620 }, { "epoch": 1.996580315735232, "grad_norm": 0.5989164317459603, "learning_rate": 3.8333721560923634e-11, "loss": 0.2511, "step": 42621 }, { "epoch": 1.9966271607251604, "grad_norm": 0.6171395112465782, "learning_rate": 3.7290678624457565e-11, "loss": 0.2716, "step": 42622 }, { "epoch": 1.996674005715089, "grad_norm": 0.6179015308960836, "learning_rate": 3.626202234513487e-11, "loss": 0.2914, "step": 42623 }, { "epoch": 1.9967208507050171, "grad_norm": 0.6000937826224775, "learning_rate": 3.5247752728784224e-11, "loss": 0.279, "step": 42624 }, { "epoch": 1.9967676956949454, "grad_norm": 1.4803577716940417, "learning_rate": 3.4247869781234286e-11, "loss": 0.2828, "step": 42625 }, { "epoch": 1.9968145406848739, "grad_norm": 0.6212429267377264, "learning_rate": 3.326237350831374e-11, "loss": 0.2917, "step": 42626 }, { "epoch": 1.996861385674802, "grad_norm": 0.6009575163803148, "learning_rate": 3.2291263915573687e-11, "loss": 0.2819, "step": 42627 }, { "epoch": 1.9969082306647303, "grad_norm": 0.6501692781599675, "learning_rate": 3.1334541008565256e-11, "loss": 0.3021, "step": 42628 }, { "epoch": 1.9969550756546588, "grad_norm": 0.5997678408764334, "learning_rate": 3.0392204793117105e-11, "loss": 0.2695, "step": 42629 }, { "epoch": 1.997001920644587, "grad_norm": 0.6238010985517677, "learning_rate": 2.946425527422525e-11, "loss": 0.2661, "step": 42630 }, { "epoch": 1.9970487656345153, "grad_norm": 0.5984522002245639, "learning_rate": 2.8550692457440798e-11, "loss": 0.2713, "step": 42631 }, { "epoch": 1.9970956106244437, "grad_norm": 0.5789694930752792, "learning_rate": 2.765151634803731e-11, "loss": 0.2643, "step": 42632 }, { "epoch": 1.9971424556143722, "grad_norm": 0.642273457874385, "learning_rate": 2.676672695101079e-11, "loss": 0.2614, "step": 42633 }, { "epoch": 1.9971893006043002, "grad_norm": 0.5796627210007053, "learning_rate": 2.5896324271912355e-11, "loss": 0.2626, "step": 42634 }, { "epoch": 1.9972361455942287, "grad_norm": 0.5793159046701026, "learning_rate": 2.5040308315182892e-11, "loss": 0.2655, "step": 42635 }, { "epoch": 1.9972829905841571, "grad_norm": 0.5971411127115264, "learning_rate": 2.4198679086095967e-11, "loss": 0.271, "step": 42636 }, { "epoch": 1.9973298355740854, "grad_norm": 0.5915042003518448, "learning_rate": 2.3371436589370023e-11, "loss": 0.2792, "step": 42637 }, { "epoch": 1.9973766805640136, "grad_norm": 0.5631553435177137, "learning_rate": 2.255858082972351e-11, "loss": 0.2525, "step": 42638 }, { "epoch": 1.997423525553942, "grad_norm": 0.5725835027914723, "learning_rate": 2.1760111812152428e-11, "loss": 0.2659, "step": 42639 }, { "epoch": 1.9974703705438703, "grad_norm": 0.6036809599500517, "learning_rate": 2.097602954082012e-11, "loss": 0.2761, "step": 42640 }, { "epoch": 1.9975172155337986, "grad_norm": 0.6383210046780988, "learning_rate": 2.020633402044503e-11, "loss": 0.2751, "step": 42641 }, { "epoch": 1.997564060523727, "grad_norm": 0.5781290527938695, "learning_rate": 1.945102525546805e-11, "loss": 0.263, "step": 42642 }, { "epoch": 1.9976109055136553, "grad_norm": 0.5869648830480293, "learning_rate": 1.8710103250052512e-11, "loss": 0.2593, "step": 42643 }, { "epoch": 1.9976577505035835, "grad_norm": 0.618111246409027, "learning_rate": 1.7983568008639317e-11, "loss": 0.2461, "step": 42644 }, { "epoch": 1.997704595493512, "grad_norm": 0.5940206779803724, "learning_rate": 1.7271419535669354e-11, "loss": 0.2861, "step": 42645 }, { "epoch": 1.9977514404834404, "grad_norm": 0.5789650138214149, "learning_rate": 1.6573657834750845e-11, "loss": 0.2507, "step": 42646 }, { "epoch": 1.9977982854733685, "grad_norm": 0.5980686298809759, "learning_rate": 1.5890282910047128e-11, "loss": 0.2728, "step": 42647 }, { "epoch": 1.997845130463297, "grad_norm": 0.6145650345761163, "learning_rate": 1.522129476572154e-11, "loss": 0.2735, "step": 42648 }, { "epoch": 1.9978919754532254, "grad_norm": 0.6139896702668912, "learning_rate": 1.4566693405382305e-11, "loss": 0.263, "step": 42649 }, { "epoch": 1.9979388204431536, "grad_norm": 0.5850771907020262, "learning_rate": 1.3926478832637647e-11, "loss": 0.278, "step": 42650 }, { "epoch": 1.9979856654330819, "grad_norm": 0.6129396743858738, "learning_rate": 1.3300651051650904e-11, "loss": 0.2688, "step": 42651 }, { "epoch": 1.9980325104230103, "grad_norm": 0.627350113324727, "learning_rate": 1.2689210065752743e-11, "loss": 0.2759, "step": 42652 }, { "epoch": 1.9980793554129386, "grad_norm": 0.5856337821388942, "learning_rate": 1.2092155878551393e-11, "loss": 0.2752, "step": 42653 }, { "epoch": 1.9981262004028668, "grad_norm": 0.5717786455793525, "learning_rate": 1.1509488493099963e-11, "loss": 0.2656, "step": 42654 }, { "epoch": 1.9981730453927953, "grad_norm": 0.6318984614462267, "learning_rate": 1.0941207913284235e-11, "loss": 0.2836, "step": 42655 }, { "epoch": 1.9982198903827235, "grad_norm": 0.5484295720487737, "learning_rate": 1.0387314142157323e-11, "loss": 0.2535, "step": 42656 }, { "epoch": 1.9982667353726518, "grad_norm": 0.578552907185465, "learning_rate": 9.84780718277234e-12, "loss": 0.26, "step": 42657 }, { "epoch": 1.9983135803625802, "grad_norm": 0.5757510495852265, "learning_rate": 9.32268703873751e-12, "loss": 0.248, "step": 42658 }, { "epoch": 1.9983604253525087, "grad_norm": 0.6032086867745604, "learning_rate": 8.81195371227328e-12, "loss": 0.2607, "step": 42659 }, { "epoch": 1.998407270342437, "grad_norm": 0.5374791204630749, "learning_rate": 8.315607206987874e-12, "loss": 0.2532, "step": 42660 }, { "epoch": 1.9984541153323652, "grad_norm": 0.567934339087134, "learning_rate": 7.833647525379295e-12, "loss": 0.2629, "step": 42661 }, { "epoch": 1.9985009603222936, "grad_norm": 0.6065988796245896, "learning_rate": 7.366074670223101e-12, "loss": 0.2672, "step": 42662 }, { "epoch": 1.9985478053122219, "grad_norm": 0.615421531407821, "learning_rate": 6.9128886445724016e-12, "loss": 0.277, "step": 42663 }, { "epoch": 1.99859465030215, "grad_norm": 0.5847810721281181, "learning_rate": 6.474089450647647e-12, "loss": 0.2805, "step": 42664 }, { "epoch": 1.9986414952920786, "grad_norm": 0.594796393903766, "learning_rate": 6.049677091224393e-12, "loss": 0.2653, "step": 42665 }, { "epoch": 1.9986883402820068, "grad_norm": 0.6163324229000838, "learning_rate": 5.6396515685230855e-12, "loss": 0.272, "step": 42666 }, { "epoch": 1.998735185271935, "grad_norm": 0.6414868428786016, "learning_rate": 5.244012885041727e-12, "loss": 0.2881, "step": 42667 }, { "epoch": 1.9987820302618635, "grad_norm": 0.5811714064166205, "learning_rate": 4.8627610430007636e-12, "loss": 0.2757, "step": 42668 }, { "epoch": 1.998828875251792, "grad_norm": 0.6191586626402131, "learning_rate": 4.4958960448981956e-12, "loss": 0.259, "step": 42669 }, { "epoch": 1.99887572024172, "grad_norm": 0.6209272081281508, "learning_rate": 4.14341789239936e-12, "loss": 0.272, "step": 42670 }, { "epoch": 1.9989225652316485, "grad_norm": 0.6265855775093583, "learning_rate": 3.805326587447144e-12, "loss": 0.2725, "step": 42671 }, { "epoch": 1.998969410221577, "grad_norm": 0.5874970562900451, "learning_rate": 3.481622132539553e-12, "loss": 0.2716, "step": 42672 }, { "epoch": 1.9990162552115052, "grad_norm": 0.561853667621646, "learning_rate": 3.1723045290643628e-12, "loss": 0.243, "step": 42673 }, { "epoch": 1.9990631002014334, "grad_norm": 0.6337232949863664, "learning_rate": 2.877373779242021e-12, "loss": 0.2665, "step": 42674 }, { "epoch": 1.9991099451913619, "grad_norm": 0.5994641311615381, "learning_rate": 2.59682988418275e-12, "loss": 0.2618, "step": 42675 }, { "epoch": 1.99915679018129, "grad_norm": 0.6569828009120615, "learning_rate": 2.3306728461069963e-12, "loss": 0.2904, "step": 42676 }, { "epoch": 1.9992036351712184, "grad_norm": 0.6199397848464381, "learning_rate": 2.0789026661249824e-12, "loss": 0.2779, "step": 42677 }, { "epoch": 1.9992504801611468, "grad_norm": 0.6304231140737662, "learning_rate": 1.841519345902043e-12, "loss": 0.2624, "step": 42678 }, { "epoch": 1.999297325151075, "grad_norm": 0.5771093114797315, "learning_rate": 1.6185228865484015e-12, "loss": 0.2552, "step": 42679 }, { "epoch": 1.9993441701410033, "grad_norm": 0.6080988071677794, "learning_rate": 1.4099132897293922e-12, "loss": 0.2816, "step": 42680 }, { "epoch": 1.9993910151309318, "grad_norm": 0.6150521399364919, "learning_rate": 1.2156905562776822e-12, "loss": 0.2664, "step": 42681 }, { "epoch": 1.9994378601208602, "grad_norm": 0.5960918410952083, "learning_rate": 1.0358546875810505e-12, "loss": 0.2771, "step": 42682 }, { "epoch": 1.9994847051107882, "grad_norm": 0.6161401839411397, "learning_rate": 8.704056847497199e-13, "loss": 0.2722, "step": 42683 }, { "epoch": 1.9995315501007167, "grad_norm": 0.5909515487517819, "learning_rate": 7.193435483388023e-13, "loss": 0.276, "step": 42684 }, { "epoch": 1.9995783950906452, "grad_norm": 0.628595368427143, "learning_rate": 5.826682794585204e-13, "loss": 0.2786, "step": 42685 }, { "epoch": 1.9996252400805734, "grad_norm": 0.6292391668209067, "learning_rate": 4.603798789415415e-13, "loss": 0.2605, "step": 42686 }, { "epoch": 1.9996720850705016, "grad_norm": 0.5759611095544911, "learning_rate": 3.5247834734297716e-13, "loss": 0.2657, "step": 42687 }, { "epoch": 1.99971893006043, "grad_norm": 0.596295249926667, "learning_rate": 2.589636852179389e-13, "loss": 0.2686, "step": 42688 }, { "epoch": 1.9997657750503584, "grad_norm": 0.5670874675160223, "learning_rate": 1.7983589339909402e-13, "loss": 0.2654, "step": 42689 }, { "epoch": 1.9998126200402866, "grad_norm": 0.5898473214908776, "learning_rate": 1.1509497244155398e-13, "loss": 0.272, "step": 42690 }, { "epoch": 1.999859465030215, "grad_norm": 0.5717304450441908, "learning_rate": 6.474092206776306e-14, "loss": 0.2683, "step": 42691 }, { "epoch": 1.9999063100201433, "grad_norm": 0.5854867450162711, "learning_rate": 2.877374338794425e-14, "loss": 0.2654, "step": 42692 }, { "epoch": 1.9999531550100715, "grad_norm": 0.559714501038162, "learning_rate": 7.193435846986063e-15, "loss": 0.2514, "step": 42693 }, { "epoch": 2.0, "grad_norm": 0.5678060447658159, "learning_rate": 0.0, "loss": 0.2521, "step": 42694 }, { "epoch": 2.0, "step": 42694, "total_flos": 1.18789275810005e+17, "train_loss": 0.04862895955609489, "train_runtime": 106793.1001, "train_samples_per_second": 204.685, "train_steps_per_second": 0.4 } ], "logging_steps": 1.0, "max_steps": 42694, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 1.18789275810005e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }